(charset-chars): Fix typo.
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
08c19a27
KH
1;;; mule-conf.el --- configure multilingual environment
2
08c19a27 3;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
c0e17dd8
KH
5;; Copyright (C) 2001, 2002
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H13PRO009
08c19a27
KH
8
9;; Keywords: mule, multilingual, character set, coding system
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to the
25;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26;; Boston, MA 02111-1307, USA.
27
28;;; Commentary:
29
30;; Don't byte-compile this file.
31
32;;; Code:
33
34;;; Definitions of character sets.
35
c0e17dd8
KH
36;; The charsets `ascii' and `unicoe' are aleady defined in charset.c
37;; as below:
38;;
39;; (define-charset 'ascii
40;; ""
41;; :dimension 1
42;; :code-space [0 127]
43;; :iso-final-char ?A
44;; :ascii-compatible-p t
45;; :emacs-mule-id 0
46;; :code-offset 0)
47;;
48;; (define-charset 'unicode
49;; ""
50;; :dimension 3
51;; :code-space [0 255 0 255 0 16]
52;; :ascii-compatible-p t
53;; :code-offset 0)
54;;
55;; We now set :docstring, :short-name, and :long-name properties.
56
57(put-charset-property
58 'ascii :docstring "ASCII (ISO646 IRV)")
59(put-charset-property
60 'ascii :short-name "ASCII")
61(put-charset-property
62 'ascii :long-name "ASCII (ISO646 IRV)")
63(put-charset-property
64 'unicode :docstring "Unicode (ISO10646)")
65(put-charset-property
66 'unicode :short-name "Unicode")
67(put-charset-property
68 'unicode :long-name "Unicode (ISO10646)")
69
70(define-charset-alias 'ucs 'unicode)
71
72(define-charset 'emacs
73 "Full Emacs characters."
74 :ascii-compatible-p t
75 :code-space [ 0 255 0 255 0 63 ]
76 :code-offset 0
77 :supplementary-p t)
78
79(define-charset 'iso-8859-1
80 "Laint-1 (ISO/IEC 8859-1)"
81 :short-name "Latin-1"
82 :ascii-compatible-p t
83 :code-space [0 255]
84 :code-offset 0)
85
86(define-charset 'latin-iso8859-1
87 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
88 :short-name "RHP of Latin-1"
89 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
90 :iso-final-char ?A
91 :emacs-mule-id 129
92 :code-space [32 127]
93 :code-offset 160)
94
95(define-charset 'eight-bit-control
96 "8-bit control code (0x80..0x9F)"
97 :short-name "8-bit control code"
98 :code-space [128 159]
99 :code-offset 128)
100
101(define-charset 'eight-bit-graphic
102 "8-bit graphic code (0xA0..0xFF)"
103 :short-name "8-bit graphic code"
104 :code-space [160 255]
105 :code-offset 160)
106
107(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
108 iso-ir iso-final
109 emacs-mule-id map)
110 "For internal use only."
111 `(progn
112 (define-charset ,symbol
113 ,name
114 :short-name ,nickname
115 :long-name ,name
116 :ascii-compatible-p t
117 :code-space [0 255]
118 :map ,map)
119 (if ,iso-symbol
120 (define-charset ,iso-symbol
121 (if ,iso-ir
122 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
123 ,name ,nickname ,iso-ir)
124 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
125 :short-name (format "RHP of %s" ,name)
126 :long-name (format "RHP of %s (%s)" ,name ,nickname)
127 :iso-final-char ,iso-final
128 :emacs-mule-id ,emacs-mule-id
129 :code-space [32 127]
130 :parents (list (cons ,symbol 128))))))
131
132(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
133 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
134
135(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
136 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
137
138(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
139 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
140
141(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
142 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
143
144(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
145 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
146
147(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
148 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
149
150(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
151 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
152
153(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
154 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
155
156(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
157 "ISO/IEC 8859/13" "Latin-7" nil nil nil "8859-13")
158
159(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
160 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
161
162(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
163 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
164
165(define-charset 'thai-tis620
166 "TIS620.2533"
167 :short-name "TIS620.2533"
168 :iso-final-char ?T
169 :emacs-mule-id 133
170 :code-space [32 127]
171 :code-offset #x0E00)
172
173(define-charset 'tis620-2533
174 "TIS620.2533"
175 :short-name "TIS620.2533"
176 :ascii-compatible-p t
177 :code-space [0 255]
178 :parents '(ascii eight-bit-control (thai-tis620 . -128)))
179
180(define-charset 'jisx0201
181 "JISX0201"
182 :short-name "JISX0201"
183 :long-name "JISX0201"
184 :code-space [33 254]
185 :map "jisx0201")
186
187(define-charset 'latin-jisx0201
188 "Roman Part of JISX0201.1976"
189 :short-name "JISX0201 Roman"
190 :long-name "Japanese Roman (JISX0201.1976)"
191 :iso-final-char ?J
192 :emacs-mule-id 138
193 :code-space [33 126]
194 :parents '(jisx0201))
195
196(define-charset 'katakana-jisx0201
197 "Katakana Part of JISX0201.1976"
198 :short-name "JISX0201 Katakana"
199 :long-name "Japanese Katakana (JISX0201.1976)"
200 :iso-final-char ?I
201 :emacs-mule-id 137
202 :code-space [33 126]
203 :parents '((jisx0201 . #x80)))
204
205(define-charset 'chinese-gb2312
206 "GB2312 Chinese simplified: ISO-IR-58"
207 :short-name "GB2312"
208 :long-name "GB2312: ISO-IR-58"
209 :iso-final-char ?A
210 :emacs-mule-id 145
211 :code-space [33 126 33 126]
212 :code-offset #x110000
213 :unify-map "gb2312-1980")
214
215(define-charset 'chinese-cns11643-1
216 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
217 :short-name "CNS11643-1"
218 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
219 :iso-final-char ?G
220 :emacs-mule-id 149
221 :code-space [33 126 33 126]
222 :code-offset #x114000
223 :unify-map "cns11643-1")
224
225(define-charset 'chinese-cns11643-2
226 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
227 :short-name "CNS11643-2"
228 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
229 :iso-final-char ?H
230 :emacs-mule-id 150
231 :code-space [33 126 33 126]
232 :code-offset #x118000
233 :unify-map "cns11643-2")
234
235(define-charset 'chinese-cns11643-3
236 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
237 :short-name "CNS11643-3"
238 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
239 :iso-final-char ?I
240 :code-space [33 126 33 126]
241 :emacs-mule-id 246
242 :code-offset #x11C000)
243
244(define-charset 'chinese-cns11643-4
245 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
246 :short-name "CNS11643-4"
247 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
248 :iso-final-char ?J
249 :emacs-mule-id 247
250 :code-space [33 126 33 126]
251 :code-offset #x120000)
252
253(define-charset 'chinese-cns11643-5
254 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
255 :short-name "CNS11643-5"
256 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
257 :iso-final-char ?K
258 :emacs-mule-id 248
259 :code-space [33 126 33 126]
260 :code-offset #x124000)
261
262(define-charset 'chinese-cns11643-6
263 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
264 :short-name "CNS11643-6"
265 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
266 :iso-final-char ?L
267 :emacs-mule-id 249
268 :code-space [33 126 33 126]
269 :code-offset #x128000)
270
271(define-charset 'chinese-cns11643-7
272 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
273 :short-name "CNS11643-7"
274 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
275 :iso-final-char ?M
276 :emacs-mule-id 250
277 :code-space [33 126 33 126]
278 :code-offset #x12C000)
279
280(define-charset 'big5
281 "Big5 (Chinese traditional)"
282 :short-name "Big5"
283 :long-name "Big5"
284 :code-space [#x40 #xFE #xA1 #xFE]
285 :code-offset #x130000
286 :unify-map "big5")
287
288(define-charset 'chinese-big5-1
289 "Frequentry used part (A141-C67E) of Big5 (Chinese traditional)"
290 :short-name "Big5 (Level-1)"
291 :long-name "Big5 (Level-1) A141-C67F"
292 :iso-final-char ?0
293 :emacs-mule-id 152
294 :code-space [#x21 #x7E #x21 #x7E]
295 :code-offset #x134000
296 :unify-map "big5-1")
297
298(define-charset 'chinese-big5-2
299 "Less frequentry used part (C940-FEFE) of Big5 (Chinese traditional)"
300 :short-name "Big5 (Level-2)"
301 :long-name "Big5 (Level-2) C940-FEFE"
302 :iso-final-char ?1
303 :emacs-mule-id 153
304 :code-space [#x21 #x7E #x21 #x7E]
305 :code-offset #x138000
306 :unify-map "big5-2")
307
308(define-charset 'japanese-jisx0208
309 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
310 :short-name "JISX0208"
311 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
312 :iso-final-char ?B
313 :emacs-mule-id 146
314 :code-space [33 126 33 126]
315 :code-offset #x140000
316 :unify-map "jisx0208-1990")
317
318(define-charset 'japanese-jisx0208-1978
319 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
320 :short-name "JISX0208.1978"
321 :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
322 :iso-final-char ?@
323 :emacs-mule-id 144
324 :code-space [33 126 33 126]
325 :code-offset #x144000
326 :unify-map "jisx0208-1978")
327
328(define-charset 'japanese-jisx0212
329 "JISX0212 Japanese supplement: ISO-IR-159"
330 :short-name "JISX0212"
331 :long-name "JISX0212 (Japanese): ISO-IR-159"
332 :iso-final-char ?D
333 :emacs-mule-id 148
334 :code-space [33 126 33 126]
335 :code-offset #x148000
336 :unify-map "jisx0212-1990")
337
338(define-charset 'japanese-jisx0213-1
339 "JISX0213 Plane 1 (Japanese)"
340 :short-name "JISX0213-1"
341 :long-name "JISX0213-1"
342 :iso-final-char ?O
343 :emacs-mule-id 151
344 :code-space [33 126 33 126]
345 :code-offset #x14C000)
346
347(define-charset 'japanese-jisx0213-2
348 "JISX0213 Plane 2 (Japanese)"
349 :short-name "JISX0213-2"
350 :long-name "JISX0213-2"
351 :iso-final-char ?P
352 :emacs-mule-id 254
353 :code-space [33 126 33 126]
354 :code-offset #x150000)
355
356(define-charset 'korean-ksc5601
357 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
358 :short-name "KSC5601"
359 :long-name "KSC5601 (Korean): ISO-IR-149"
360 :iso-final-char ?C
361 :emacs-mule-id 147
362 :code-space [33 126 33 126]
363 :map "ksc5601-1987")
364
365(define-charset 'chinese-sisheng
366 "SiSheng characters for PinYin/ZhuYin"
367 :short-name "SiSheng"
368 :long-name "SiSheng (PinYin/ZhuYin)"
369 :iso-final-char ?0
370 :emacs-mule-id 160
371 :code-space [33 126]
372 :code-offset #x200000)
373
374(define-charset 'ipa
375 "IPA (International Phonetic Association)"
376 :short-name "IPA"
377 :long-name "IPA"
378 :iso-final-char ?0
379 :emacs-mule-id 161
380 :code-space [32 127]
381 :code-offset #x200080)
382
383(define-charset 'viscii
384 "VISCII1.1"
385 :short-name "VISCII"
386 :long-name "VISCII 1.1"
387 :code-space [0 255]
388 :map "viscii")
389
390(define-charset 'vietnamese-viscii-lower
391 "VISCII1.1 lower-case"
392 :short-name "VISCII lower"
393 :long-name "VISCII lower-case"
394 :iso-final-char ?1
395 :emacs-mule-id 162
396 :code-space [32 127]
397 :map "viscii-lower")
398
399(define-charset 'vietnamese-viscii-upper
400 "VISCII1.1 upper-case"
401 :short-name "VISCII upper"
402 :long-name "VISCII upper-case"
403 :iso-final-char ?2
404 :emacs-mule-id 163
405 :code-space [32 127]
406 :map "viscii-upper")
407
408(define-charset 'vscii
409 "VSCII1.1"
410 :short-name "VSCII"
411 :long-name "VSCII"
412 :code-space [0 255]
413 :map "vscii")
414
415(define-charset 'koi8-r
416 "KOI8-R"
417 :short-name "KOI8-R"
418 :long-name "KOI8-R"
419 :ascii-compatible-p t
420 :code-space [0 255]
421 :map "koi8-r")
422
423(define-charset-alias 'koi8 'koi8-r)
424
425(define-charset 'alternativnyj
426 "ALTERNATIVNYJ"
427 :short-name "alternativnyj"
428 :long-name "alternativnyj"
429 :ascii-compatible-p t
430 :code-space [0 255]
431 :map "ibm866")
08c19a27
KH
432
433;; For Arabic, we need three different types of character sets.
434;; Digits are of direction left-to-right and of width 1-column.
435;; Others are of direction right-to-left and of width 1-column or
436;; 2-column.
c0e17dd8
KH
437(define-charset 'arabic-digit
438 "Arabic digit"
439 :short-name "Arabic digit"
440 :long-name "Arabic digit"
441 :iso-final-char ?2
442 :emacs-mule-id 164
443 :code-space [34 42]
444 :code-offset #x0600)
445
446(define-charset 'arabic-1-column
447 "Arabic 1-column"
448 :short-name "Arabic 1-col"
449 :long-name "Arabic 1-column"
450 :iso-final-char ?3
451 :emacs-mule-id 165
452 :code-space [33 126]
453 :code-offset #x200100)
454
455(define-charset 'arabic-2-column
456 "Arabic 2-column"
457 :short-name "Arabic 2-col"
458 :long-name "Arabic 2-column"
459 :iso-final-char ?4
460 :emacs-mule-id 224
461 :code-space [33 126]
462 :code-offset #x200180)
7153b1f1
KH
463
464;; Lao script.
c0e17dd8
KH
465;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
466(define-charset 'lao
467 "Lao characters (ISO10646 0E81..0EDF)"
468 :short-name "Lao"
469 :long-name "Lao"
470 :iso-final-char ?1
471 :emacs-mule-id 167
472 :code-space [33 126]
473 :code-offset #x0E81)
474
475(define-charset 'mule-lao
476 "Lao characters (ISO10646 0E81..0EDF)"
477 :short-name "Lao"
478 :long-name "Lao"
479 :code-space [0 255]
480 :parents '(ascii eight-bit-control (lao . -128)))
7153b1f1 481
08c19a27 482
7153b1f1
KH
483;; Indian scripts. Symbolic charset for data exchange. Glyphs are
484;; not assigned. They are automatically converted to each Indian
485;; script which IS-13194 supports.
486
c0e17dd8
KH
487(define-charset 'indian-is13194
488 "Generic Indian charset for data exchange with IS 13194"
489 :short-name "IS 13194"
490 :long-name "Indian IS 13194"
491 :iso-final-char ?5
492 :emacs-mule-id 225
493 :code-space [33 126]
494 :code-offset #x180000)
495
496(define-charset 'indian-glyph
497 "Glyphs for Indian characters."
498 :short-name "Indian glyph"
499 :long-name "Indian glyph"
500 :iso-final-char ?4
501 :emacs-mule-id 240
502 :code-space [32 127 32 127]
503 :code-offset #x180100)
7153b1f1
KH
504
505;; Actual Glyph for 1-column width.
c0e17dd8
KH
506(define-charset 'indian-1-column
507 "Indian charset for 1-column width glyphs"
508 :short-name "Indian 1-col"
509 :long-name "Indian 1 Column"
510 :iso-final-char ?6
511 :emacs-mule-id 240
512 :code-space [33 126 33 126]
513 :code-offset #x184000)
08c19a27 514
08c19a27 515;; Actual Glyph for 2-column width.
c0e17dd8
KH
516(define-charset 'indian-2-column
517 "Indian charset for 2-column width glyphs"
518 :short-name "Indian 2-col"
519 :long-name "Indian 2 Column"
520 :iso-final-char ?5
521 :emacs-mule-id 251
522 :code-space [33 126 33 126]
523 :parents '(indian-1-column))
524
525(define-charset 'tibetan
526 "Tibetan characters"
527 :iso-final-char ?7
528 :short-name "Tibetan 2-col"
529 :long-name "Tibetan 2 column"
530 :iso-final-char ?7
531 :emacs-mule-id 252
532 :code-space [33 126 33 126]
533 :code-offset #x190000)
534
535(define-charset 'tibetan-1-column
536 "Tibetan 1 column glyph"
537 :short-name "Tibetan 1-col"
538 :long-name "Tibetan 1 column"
539 :iso-final-char ?8
540 :emacs-mule-id 241
541 :code-space [33 126 33 37]
542 :parents '(tibetan))
08c19a27 543
c0e17dd8
KH
544;; Subsets of Unicode.
545(define-charset 'mule-unicode-2500-33ff
546 "Unicode characters of the range U+2500..U+33FF."
547 :short-name "Unicode subset 2"
548 :long-name "Unicode subset (U+2500..U+33FF)"
549 :iso-final-char ?2
550 :emacs-mule-id 242
551 :code-space [#x20 #x7f #x20 #x47]
552 :code-offset #x2500)
553
554(define-charset 'mule-unicode-e000-ffff
555 "Unicode characters of the range U+E000..U+FFFF."
556 :short-name "Unicode subset 3"
557 :long-name "Unicode subset (U+E000+FFFF)"
558 :iso-final-char ?3
559 :emacs-mule-id 243
560 :code-space [#x20 #x7F #x20 #x75]
561 :code-offset #xE000)
562
563(define-charset 'mule-unicode-0100-24ff
564 "Unicode characters of the range U+0100..U+24FF."
565 :short-name "Unicode subset"
566 :long-name "Unicode subset (U+0100..U+24FF)"
567 :iso-final-char ?1
568 :emacs-mule-id 244
569 :code-space [#x20 #x7F #x20 #x7F]
570 :code-offset #x100)
571
572(define-charset 'ethiopic
573 "Ethiopic characters for Amahric and Tigrigna."
574 :short-name "Ethiopic"
575 :long-name "Ethiopic characters"
576 :iso-final-char ?3
577 :emacs-mule-id 245
578 :code-space [33 126 33 126]
579 :code-offset #x1A0000)
580
581(define-charset 'mac-roman
582 "Mac Roman charset"
583 :short-name "Mac Roman"
584 :long-name "Mac Roman"
585 :ascii-compatible-p t
586 :code-space [0 255]
587 :map "mac-roman")
588
589(unify-charset 'chinese-gb2312)
590(unify-charset 'chinese-cns11643-1)
591(unify-charset 'chinese-cns11643-2)
592(unify-charset 'big5)
593(unify-charset 'chinese-big5-1)
594(unify-charset 'chinese-big5-2)
85f789f7 595\f
d2a1ee18
KH
596;; These are tables for translating characters on decoding and
597;; encoding.
c0e17dd8 598(setq standard-translation-table-for-decode nil)
08c19a27 599
f967223b 600(setq standard-translation-table-for-encode nil)
08c19a27 601
bdf74bef
DL
602(defvar translation-table-for-input nil
603 "If non-nil, a char table used to translate characters from input methods.
604\(Currently only used by Quail.)")
08c19a27
KH
605\f
606;;; Make fundamental coding systems.
607
c0e17dd8
KH
608;; The coding system `no-conversion' is already defined in coding.c as
609;; below:
610;;
611;; (define-coding-system 'no-conversion
612;; "Do no conversion."
613;; :coding-type 'raw-text
614;; :mnemonic ?=)
08c19a27 615
c0e17dd8
KH
616(define-coding-system 'raw-text
617 "Raw text, which means text contains random 8-bit codes.
618Encoding text with this coding system produces the actual byte
619sequence of the text in buffers and strings. An exception is made for
620eight-bit-control characters. Each of them is encoded into a single
621byte.
bc6a0946
KH
622
623When you visit a file with this coding, the file is read into a
c0e17dd8
KH
624unibyte buffer as is (except for EOL format), thus each byte of a file
625is treated as a character."
626 :coding-type 'raw-text
627 :mnemonic ?t)
628
629(define-coding-system 'undecided
630 "No conversion on encoding, automatic conversion on decoding"
631 :coding-type 'undecided
632 :mnemonic ?-
633 :charset-list '(ascii))
08c19a27 634
8d969bf6 635(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
636(define-coding-system-alias 'dos 'undecided-dos)
637(define-coding-system-alias 'mac 'undecided-mac)
638
c0e17dd8
KH
639(define-coding-system 'iso-latin-1
640 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
641 :coding-type 'iso-2022
642 :mnemonic ?1
643 :charset-list '(ascii latin-iso8859-1)
644 :designation [ascii latin-iso8859-1 nil nil]
645 :mime-charset 'iso-8859-1)
bc6a0946 646
c0e17dd8
KH
647(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
648(define-coding-system-alias 'latin-1 'iso-latin-1)
08c19a27 649
c0e17dd8 650;; Coding systems not specific to each language environment.
bc6a0946 651
c0e17dd8
KH
652(define-coding-system 'emacs-mule
653 "Emacs 21 internal format used in buffer and string."
654 :coding-type 'emacs-mule
655 :mnemonic ?M)
656
657(define-coding-system 'utf-8
658 "UTF-8."
659 :coding-type 'utf-8
660 :mnemonic ?U
661 :charset-list '(unicode))
662
663(define-coding-system-alias 'mule-utf-8 'utf-8)
664
665(define-coding-system 'utf-8-emacs
666 "UTF-8 will full support for Emacs characters."
667 :coding-type 'utf-8
668 :mnemonic ?U
669 :charset-list '(emacs))
670
671(define-coding-system 'utf-16
672 "UTF-16"
673 :coding-type 'utf-16
674 :mnemonic ?U
675 :charset-list '(unicode))
676
677(define-coding-system 'utf-16-le-nosig
678 "UTF-16, little endian, no signature"
679 :coding-type 'utf-16
680 :mnemonic ?U
681 :charset-list '(unicode)
682 :endian 'little)
683
684(define-coding-system 'utf-16-be-nosig
685 "UTF-16, big endian, no signature"
686 :coding-type 'utf-16
687 :mnemonic ?U
688 :charset-list '(unicode)
689 :endian 'big)
690
691(define-coding-system 'utf-16-le
692 "UTF-16, little endian, with signature"
693 :coding-type 'utf-16
694 :mnemonic ?U
695 :charset-list '(unicode)
a44cf41b 696 :bom t
c0e17dd8
KH
697 :endian 'little)
698
699(define-coding-system 'utf-16-be
700 "UTF-16, big endian, with signature"
701 :coding-type 'utf-16
702 :mnemonic ?U
703 :charset-list '(unicode)
a44cf41b 704 :bom t
c0e17dd8
KH
705 :endian 'big)
706
707(define-coding-system 'iso-2022-7bit
708 "ISO 2022 based 7-bit encoding using only G0"
709 :coding-type 'iso-2022
710 :mnemonic ?J
711 :charset-list 'iso-2022
712 :designation [(ascii t) nil nil nil]
713 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
714
715(define-coding-system 'iso-2022-7bit-ss2
716 "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
717 :coding-type 'iso-2022
718 :mnemonic ?$
719 :charset-list 'iso-2022
720 :designation [(ascii 94) nil (nil 96) nil]
721 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
722 designation single-shift composition))
723
724(define-coding-system 'iso-2022-7bit-lock
725 "ISO-2022 coding system using Locking-Shift for 96-charset"
726 :coding-type 'iso-2022
727 :mnemonic ?&
728 :charset-list 'iso-2022
729 :designation [(ascii 94) (nil 96) nil nil]
730 :flags '(ascii-at-eol ascii-at-cntl 7-bit
731 designation locking-shift composition))
4951a271 732
2e21aa27 733(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27 734
c0e17dd8
KH
735(define-coding-system 'iso-2022-7bit-lock-ss2
736 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN"
737 :coding-type 'iso-2022
738 :mnemonic ?i
739 :charset-list '(ascii
740 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
741 korean-ksc5601
742 chinese-gb2312
743 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
744 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
745 chinese-cns11643-7)
746 :designation [(ascii 94)
747 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
748 (nil chinese-cns11643-2)
749 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
750 chinese-cns11643-6 chinese-cns11643-7)]
751 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
752 single-shift init-bol))
08c19a27 753
2e21aa27 754(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27 755
c0e17dd8
KH
756(define-coding-system 'iso-2022-8bit-ss2
757 "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
758 :coding-type 'iso-2022
759 :mnemonic ?@
760 :charset-list 'iso-2022
761 :designation [(ascii 94) nil (nil 96) nil]
762 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
08c19a27 763
c0e17dd8
KH
764(define-coding-system 'compound-text
765 "Compound text based generic encoding for decoding unknown messages.
73066974
EZ
766
767This coding system does not support ICCCM Extended Segments."
c0e17dd8
KH
768 :coding-type 'iso-2022
769 :mnemonic ?x
770 :charset-list 'iso-2022
771 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
772 :flags '(ascii-at-eol ascii-at-cntl
773 designation locking-shift single-shift composition)
774 :mime-charset 'x-ctext)
d49a4835 775
cb5be6c9
EZ
776(define-coding-system-alias 'x-ctext 'compound-text)
777(define-coding-system-alias 'ctext 'compound-text)
73066974 778
cb5be6c9
EZ
779;; Same as compound-text, but doesn't produce composition escape
780;; sequences. Used in post-read and pre-write conversions of
781;; compound-text-with-extensions, see mule.el. Note that this should
782;; not have a mime-charset property, to prevent it from showing up
783;; close to the beginning of coding systems ordered by priority.
c0e17dd8 784(define-coding-system 'ctext-no-compositions 2 ?x
73066974
EZ
785 "Compound text based generic encoding for decoding unknown messages.
786
cb5be6c9 787Like `compound-text', but does not produce escape sequences for compositions."
c0e17dd8
KH
788 :coding-type 'iso-2022
789 :mnemonic ?x
790 :charset-list 'iso-2022
791 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
792 :flags '(ascii-at-eol ascii-at-cntl
793 designation locking-shift single-shift))
794
795(define-coding-system 'compound-text-with-extensions
73066974
EZ
796 "Compound text encoding with ICCCM Extended Segment extensions.
797
798This coding system should be used only for X selections. It is inappropriate
799for decoding and encoding files, process I/O, etc."
c0e17dd8
KH
800 :coding-type 'raw-text
801 :mnemonic ?x
802 :post-read-conversion 'ctext-post-read-conversion
803 :pre-write-conversion 'ctext-pre-write-conversion)
73066974 804
cb5be6c9
EZ
805(define-coding-system-alias
806 'x-ctext-with-extensions 'compound-text-with-extensions)
807(define-coding-system-alias
808 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 809
c0e17dd8
KH
810(define-coding-system 'us-ascii
811 "Convert all characters but ASCII to `?'."
812 :coding-type 'charset
813 :mnemonic ?-
814 :charset-list '(ascii)
815 :default-char ??
816 :mime-charset 'us-ascii)
817
818(define-coding-system-alias 'iso-safe 'us-ascii)
f6eb8ace 819
c0e17dd8 820;; Use us-ascii for terminal output if some other coding system is not
e8dd0160 821;; specified explicitly.
c0e17dd8 822(set-safe-terminal-coding-system-internal 'us-ascii)
f6eb8ace 823
08c19a27 824;; The other coding-systems are defined in each language specific
c0e17dd8 825;; files under lisp/language.
08c19a27 826
678dc7ec
RS
827;; Normally, set coding system to `undecided' before reading a file.
828;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
829;; but we regard them as containing multibyte characters.
830;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 831
4951a271 832(setq file-coding-system-alist
2238f751 833 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
bdf74bef 834 ("\\.utf\\(-8\\)?\\'" . utf-8)
3e88bb50
EZ
835 ;; We use raw-text for reading loaddefs.el so that if it
836 ;; happens to have DOS or Mac EOLs, they are converted to
837 ;; newlines. This is required to make the special treatment
838 ;; of the "\ newline" combination in loaddefs.el, which marks
839 ;; the beginning of a doc string, work.
840 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 841 ("\\.tar\\'" . (no-conversion . no-conversion))
4951a271 842 ("" . (undecided . nil))))
08c19a27
KH
843
844\f
845;;; Setting coding categories and their priorities.
846
847;; This setting is just to read an Emacs Lisp source files which
848;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 849;; values are set by the command `set-language-environment' for each
08c19a27
KH
850;; language environment.
851
c0e17dd8
KH
852(set-coding-system-priority
853 'iso-latin-1
854 'utf-8
855 'iso-2022-7bit
856 )
08c19a27 857
c1b628eb
KH
858\f
859;;; Miscellaneous settings.
c1b628eb 860
c0e17dd8
KH
861;; Make all multibyte characters self-insert.
862(set-char-table-range (nth 1 global-map)
863 (cons (make-char 'unicode 128) (max-char))
864 'self-insert-command)
865
866(aset latin-extra-code-table ?\222 t)
4cb4b388 867
08c19a27 868;;; mule-conf.el ends here