(chinese-gbk): New charset.
[bpt/emacs.git] / lisp / international / mule-conf.el
1 ;;; mule-conf.el --- configure multilingual environment
2
3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 ;; Registration Number H13PRO009
8
9 ;; Keywords: mule, multilingual, character set, coding system
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
27
28 ;;; Commentary:
29
30 ;; Don't byte-compile this file.
31
32 ;;; Code:
33
34 ;;; Definitions of character sets.
35
36 ;; The charsets `ascii' and `unicoe' are aleady defined in charset.c
37 ;; as below:
38 ;;
39 ;; (define-charset 'ascii
40 ;; ""
41 ;; :dimension 1
42 ;; :code-space [0 127]
43 ;; :iso-final-char ?A
44 ;; :ascii-compatible-p t
45 ;; :emacs-mule-id 0
46 ;; :code-offset 0)
47 ;;
48 ;; (define-charset 'unicode
49 ;; ""
50 ;; :dimension 3
51 ;; :code-space [0 255 0 255 0 16]
52 ;; :ascii-compatible-p t
53 ;; :code-offset 0)
54 ;;
55 ;; We now set :docstring, :short-name, and :long-name properties.
56
57 (put-charset-property
58 'ascii :docstring "ASCII (ISO646 IRV)")
59 (put-charset-property
60 'ascii :short-name "ASCII")
61 (put-charset-property
62 'ascii :long-name "ASCII (ISO646 IRV)")
63 (put-charset-property
64 'unicode :docstring "Unicode (ISO10646)")
65 (put-charset-property
66 'unicode :short-name "Unicode")
67 (put-charset-property
68 'unicode :long-name "Unicode (ISO10646)")
69
70 (define-charset-alias 'ucs 'unicode)
71
72 (define-charset 'emacs
73 "Full Emacs characters."
74 :ascii-compatible-p t
75 :code-space [ 0 255 0 255 0 63 ]
76 :code-offset 0
77 :supplementary-p t)
78
79 (define-charset 'iso-8859-1
80 "Laint-1 (ISO/IEC 8859-1)"
81 :short-name "Latin-1"
82 :ascii-compatible-p t
83 :code-space [0 255]
84 :code-offset 0)
85
86 (define-charset 'latin-iso8859-1
87 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
88 :short-name "RHP of Latin-1"
89 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
90 :iso-final-char ?A
91 :emacs-mule-id 129
92 :code-space [32 127]
93 :code-offset 160)
94
95 (define-charset 'eight-bit-control
96 "8-bit control code (0x80..0x9F)"
97 :short-name "8-bit control code"
98 :code-space [128 159]
99 :code-offset 128)
100
101 (define-charset 'eight-bit-graphic
102 "8-bit graphic code (0xA0..0xFF)"
103 :short-name "8-bit graphic code"
104 :code-space [160 255]
105 :code-offset 160)
106
107 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
108 iso-ir iso-final
109 emacs-mule-id map)
110 "For internal use only."
111 `(progn
112 (define-charset ,symbol
113 ,name
114 :short-name ,nickname
115 :long-name ,name
116 :ascii-compatible-p t
117 :code-space [0 255]
118 :map ,map)
119 (if ,iso-symbol
120 (define-charset ,iso-symbol
121 (if ,iso-ir
122 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
123 ,name ,nickname ,iso-ir)
124 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
125 :short-name (format "RHP of %s" ,name)
126 :long-name (format "RHP of %s (%s)" ,name ,nickname)
127 :iso-final-char ,iso-final
128 :emacs-mule-id ,emacs-mule-id
129 :code-space [32 127]
130 :parents (list (cons ,symbol 128))))))
131
132 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
133 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
134
135 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
136 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
137
138 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
139 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
140
141 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
142 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
143
144 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
145 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
146
147 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
148 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
149
150 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
151 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
152
153 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
154 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
155
156 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
157 "ISO/IEC 8859/13" "Latin-7" nil nil nil "8859-13")
158
159 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
160 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
161
162 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
163 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
164
165 (define-charset 'thai-tis620
166 "TIS620.2533"
167 :short-name "TIS620.2533"
168 :iso-final-char ?T
169 :emacs-mule-id 133
170 :code-space [32 127]
171 :code-offset #x0E00)
172
173 (define-charset 'tis620-2533
174 "TIS620.2533"
175 :short-name "TIS620.2533"
176 :ascii-compatible-p t
177 :code-space [0 255]
178 :parents '(ascii eight-bit-control (thai-tis620 . -128)))
179
180 (define-charset 'jisx0201
181 "JISX0201"
182 :short-name "JISX0201"
183 :long-name "JISX0201"
184 :code-space [33 254]
185 :map "jisx0201")
186
187 (define-charset 'latin-jisx0201
188 "Roman Part of JISX0201.1976"
189 :short-name "JISX0201 Roman"
190 :long-name "Japanese Roman (JISX0201.1976)"
191 :iso-final-char ?J
192 :emacs-mule-id 138
193 :code-space [33 126]
194 :parents '(jisx0201))
195
196 (define-charset 'katakana-jisx0201
197 "Katakana Part of JISX0201.1976"
198 :short-name "JISX0201 Katakana"
199 :long-name "Japanese Katakana (JISX0201.1976)"
200 :iso-final-char ?I
201 :emacs-mule-id 137
202 :code-space [33 126]
203 :parents '((jisx0201 . #x80)))
204
205 (define-charset 'chinese-gb2312
206 "GB2312 Chinese simplified: ISO-IR-58"
207 :short-name "GB2312"
208 :long-name "GB2312: ISO-IR-58"
209 :iso-final-char ?A
210 :emacs-mule-id 145
211 :code-space [33 126 33 126]
212 :code-offset #x110000
213 :unify-map "gb2312-1980")
214
215 (define-charset 'chinese-gbk
216 "GBK Chinese simpliefied."
217 :short-name "GBK"
218 :long-name "GBK"
219 :code-space [#x40 #xFE #x81 #xFE]
220 :code-offset #x150000
221 :unify-map "gbk")
222
223 (define-charset 'chinese-cns11643-1
224 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
225 :short-name "CNS11643-1"
226 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
227 :iso-final-char ?G
228 :emacs-mule-id 149
229 :code-space [33 126 33 126]
230 :code-offset #x114000
231 :unify-map "cns11643-1")
232
233 (define-charset 'chinese-cns11643-2
234 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
235 :short-name "CNS11643-2"
236 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
237 :iso-final-char ?H
238 :emacs-mule-id 150
239 :code-space [33 126 33 126]
240 :code-offset #x118000
241 :unify-map "cns11643-2")
242
243 (define-charset 'chinese-cns11643-3
244 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
245 :short-name "CNS11643-3"
246 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
247 :iso-final-char ?I
248 :code-space [33 126 33 126]
249 :emacs-mule-id 246
250 :code-offset #x11C000)
251
252 (define-charset 'chinese-cns11643-4
253 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
254 :short-name "CNS11643-4"
255 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
256 :iso-final-char ?J
257 :emacs-mule-id 247
258 :code-space [33 126 33 126]
259 :code-offset #x120000)
260
261 (define-charset 'chinese-cns11643-5
262 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
263 :short-name "CNS11643-5"
264 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
265 :iso-final-char ?K
266 :emacs-mule-id 248
267 :code-space [33 126 33 126]
268 :code-offset #x124000)
269
270 (define-charset 'chinese-cns11643-6
271 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
272 :short-name "CNS11643-6"
273 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
274 :iso-final-char ?L
275 :emacs-mule-id 249
276 :code-space [33 126 33 126]
277 :code-offset #x128000)
278
279 (define-charset 'chinese-cns11643-7
280 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
281 :short-name "CNS11643-7"
282 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
283 :iso-final-char ?M
284 :emacs-mule-id 250
285 :code-space [33 126 33 126]
286 :code-offset #x12C000)
287
288 (define-charset 'big5
289 "Big5 (Chinese traditional)"
290 :short-name "Big5"
291 :long-name "Big5"
292 :code-space [#x40 #xFE #xA1 #xFE]
293 :code-offset #x130000
294 :unify-map "big5")
295
296 (define-charset 'chinese-big5-1
297 "Frequentry used part (A141-C67E) of Big5 (Chinese traditional)"
298 :short-name "Big5 (Level-1)"
299 :long-name "Big5 (Level-1) A141-C67F"
300 :iso-final-char ?0
301 :emacs-mule-id 152
302 :code-space [#x21 #x7E #x21 #x7E]
303 :code-offset #x135000
304 :unify-map "big5-1")
305
306 (define-charset 'chinese-big5-2
307 "Less frequentry used part (C940-FEFE) of Big5 (Chinese traditional)"
308 :short-name "Big5 (Level-2)"
309 :long-name "Big5 (Level-2) C940-FEFE"
310 :iso-final-char ?1
311 :emacs-mule-id 153
312 :code-space [#x21 #x7E #x21 #x7E]
313 :code-offset #x137800
314 :unify-map "big5-2")
315
316 (define-charset 'japanese-jisx0208
317 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
318 :short-name "JISX0208"
319 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
320 :iso-final-char ?B
321 :emacs-mule-id 146
322 :code-space [33 126 33 126]
323 :code-offset #x140000
324 :unify-map "jisx0208-1990")
325
326 (define-charset 'japanese-jisx0208-1978
327 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
328 :short-name "JISX0208.1978"
329 :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
330 :iso-final-char ?@
331 :emacs-mule-id 144
332 :code-space [33 126 33 126]
333 :code-offset #x144000
334 :unify-map "jisx0208-1978")
335
336 (define-charset 'japanese-jisx0212
337 "JISX0212 Japanese supplement: ISO-IR-159"
338 :short-name "JISX0212"
339 :long-name "JISX0212 (Japanese): ISO-IR-159"
340 :iso-final-char ?D
341 :emacs-mule-id 148
342 :code-space [33 126 33 126]
343 :code-offset #x148000
344 :unify-map "jisx0212-1990")
345
346 (define-charset 'japanese-jisx0213-1
347 "JISX0213 Plane 1 (Japanese)"
348 :short-name "JISX0213-1"
349 :long-name "JISX0213-1"
350 :iso-final-char ?O
351 :emacs-mule-id 151
352 :code-space [33 126 33 126]
353 :code-offset #x14C000)
354
355 (define-charset 'japanese-jisx0213-2
356 "JISX0213 Plane 2 (Japanese)"
357 :short-name "JISX0213-2"
358 :long-name "JISX0213-2"
359 :iso-final-char ?P
360 :emacs-mule-id 254
361 :code-space [33 126 33 126]
362 :code-offset #x150000)
363
364 (define-charset 'korean-ksc5601
365 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
366 :short-name "KSC5601"
367 :long-name "KSC5601 (Korean): ISO-IR-149"
368 :iso-final-char ?C
369 :emacs-mule-id 147
370 :code-space [33 126 33 126]
371 :map "ksc5601-1987")
372
373 (define-charset 'chinese-sisheng
374 "SiSheng characters for PinYin/ZhuYin"
375 :short-name "SiSheng"
376 :long-name "SiSheng (PinYin/ZhuYin)"
377 :iso-final-char ?0
378 :emacs-mule-id 160
379 :code-space [33 126]
380 :code-offset #x200000)
381
382 (define-charset 'ipa
383 "IPA (International Phonetic Association)"
384 :short-name "IPA"
385 :long-name "IPA"
386 :iso-final-char ?0
387 :emacs-mule-id 161
388 :code-space [32 127]
389 :code-offset #x200080)
390
391 (define-charset 'viscii
392 "VISCII1.1"
393 :short-name "VISCII"
394 :long-name "VISCII 1.1"
395 :code-space [0 255]
396 :map "viscii")
397
398 (define-charset 'vietnamese-viscii-lower
399 "VISCII1.1 lower-case"
400 :short-name "VISCII lower"
401 :long-name "VISCII lower-case"
402 :iso-final-char ?1
403 :emacs-mule-id 162
404 :code-space [32 127]
405 :map "viscii-lower")
406
407 (define-charset 'vietnamese-viscii-upper
408 "VISCII1.1 upper-case"
409 :short-name "VISCII upper"
410 :long-name "VISCII upper-case"
411 :iso-final-char ?2
412 :emacs-mule-id 163
413 :code-space [32 127]
414 :map "viscii-upper")
415
416 (define-charset 'vscii
417 "VSCII1.1"
418 :short-name "VSCII"
419 :long-name "VSCII"
420 :code-space [0 255]
421 :map "vscii")
422
423 (define-charset 'koi8-r
424 "KOI8-R"
425 :short-name "KOI8-R"
426 :long-name "KOI8-R"
427 :ascii-compatible-p t
428 :code-space [0 255]
429 :map "koi8-r")
430
431 (define-charset-alias 'koi8 'koi8-r)
432
433 (define-charset 'alternativnyj
434 "ALTERNATIVNYJ"
435 :short-name "alternativnyj"
436 :long-name "alternativnyj"
437 :ascii-compatible-p t
438 :code-space [0 255]
439 :map "ibm866")
440
441 ;; For Arabic, we need three different types of character sets.
442 ;; Digits are of direction left-to-right and of width 1-column.
443 ;; Others are of direction right-to-left and of width 1-column or
444 ;; 2-column.
445 (define-charset 'arabic-digit
446 "Arabic digit"
447 :short-name "Arabic digit"
448 :long-name "Arabic digit"
449 :iso-final-char ?2
450 :emacs-mule-id 164
451 :code-space [34 42]
452 :code-offset #x0600)
453
454 (define-charset 'arabic-1-column
455 "Arabic 1-column"
456 :short-name "Arabic 1-col"
457 :long-name "Arabic 1-column"
458 :iso-final-char ?3
459 :emacs-mule-id 165
460 :code-space [33 126]
461 :code-offset #x200100)
462
463 (define-charset 'arabic-2-column
464 "Arabic 2-column"
465 :short-name "Arabic 2-col"
466 :long-name "Arabic 2-column"
467 :iso-final-char ?4
468 :emacs-mule-id 224
469 :code-space [33 126]
470 :code-offset #x200180)
471
472 ;; Lao script.
473 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
474 (define-charset 'lao
475 "Lao characters (ISO10646 0E81..0EDF)"
476 :short-name "Lao"
477 :long-name "Lao"
478 :iso-final-char ?1
479 :emacs-mule-id 167
480 :code-space [33 126]
481 :code-offset #x0E81)
482
483 (define-charset 'mule-lao
484 "Lao characters (ISO10646 0E81..0EDF)"
485 :short-name "Lao"
486 :long-name "Lao"
487 :code-space [0 255]
488 :parents '(ascii eight-bit-control (lao . -128)))
489
490
491 ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
492 ;; not assigned. They are automatically converted to each Indian
493 ;; script which IS-13194 supports.
494
495 (define-charset 'indian-is13194
496 "Generic Indian charset for data exchange with IS 13194"
497 :short-name "IS 13194"
498 :long-name "Indian IS 13194"
499 :iso-final-char ?5
500 :emacs-mule-id 225
501 :code-space [33 126]
502 :code-offset #x180000)
503
504 (define-charset 'indian-glyph
505 "Glyphs for Indian characters."
506 :short-name "Indian glyph"
507 :long-name "Indian glyph"
508 :iso-final-char ?4
509 :emacs-mule-id 240
510 :code-space [32 127 32 127]
511 :code-offset #x180100)
512
513 ;; Actual Glyph for 1-column width.
514 (define-charset 'indian-1-column
515 "Indian charset for 1-column width glyphs"
516 :short-name "Indian 1-col"
517 :long-name "Indian 1 Column"
518 :iso-final-char ?6
519 :emacs-mule-id 240
520 :code-space [33 126 33 126]
521 :code-offset #x184000)
522
523 ;; Actual Glyph for 2-column width.
524 (define-charset 'indian-2-column
525 "Indian charset for 2-column width glyphs"
526 :short-name "Indian 2-col"
527 :long-name "Indian 2 Column"
528 :iso-final-char ?5
529 :emacs-mule-id 251
530 :code-space [33 126 33 126]
531 :parents '(indian-1-column))
532
533 (define-charset 'tibetan
534 "Tibetan characters"
535 :iso-final-char ?7
536 :short-name "Tibetan 2-col"
537 :long-name "Tibetan 2 column"
538 :iso-final-char ?7
539 :emacs-mule-id 252
540 :code-space [33 126 33 126]
541 :code-offset #x190000)
542
543 (define-charset 'tibetan-1-column
544 "Tibetan 1 column glyph"
545 :short-name "Tibetan 1-col"
546 :long-name "Tibetan 1 column"
547 :iso-final-char ?8
548 :emacs-mule-id 241
549 :code-space [33 126 33 37]
550 :parents '(tibetan))
551
552 ;; Subsets of Unicode.
553 (define-charset 'mule-unicode-2500-33ff
554 "Unicode characters of the range U+2500..U+33FF."
555 :short-name "Unicode subset 2"
556 :long-name "Unicode subset (U+2500..U+33FF)"
557 :iso-final-char ?2
558 :emacs-mule-id 242
559 :code-space [#x20 #x7f #x20 #x47]
560 :code-offset #x2500)
561
562 (define-charset 'mule-unicode-e000-ffff
563 "Unicode characters of the range U+E000..U+FFFF."
564 :short-name "Unicode subset 3"
565 :long-name "Unicode subset (U+E000+FFFF)"
566 :iso-final-char ?3
567 :emacs-mule-id 243
568 :code-space [#x20 #x7F #x20 #x75]
569 :code-offset #xE000)
570
571 (define-charset 'mule-unicode-0100-24ff
572 "Unicode characters of the range U+0100..U+24FF."
573 :short-name "Unicode subset"
574 :long-name "Unicode subset (U+0100..U+24FF)"
575 :iso-final-char ?1
576 :emacs-mule-id 244
577 :code-space [#x20 #x7F #x20 #x7F]
578 :code-offset #x100)
579
580 (define-charset 'ethiopic
581 "Ethiopic characters for Amahric and Tigrigna."
582 :short-name "Ethiopic"
583 :long-name "Ethiopic characters"
584 :iso-final-char ?3
585 :emacs-mule-id 245
586 :code-space [33 126 33 126]
587 :code-offset #x1A0000)
588
589 (define-charset 'mac-roman
590 "Mac Roman charset"
591 :short-name "Mac Roman"
592 :long-name "Mac Roman"
593 :ascii-compatible-p t
594 :code-space [0 255]
595 :map "mac-roman")
596
597 (unify-charset 'chinese-gb2312)
598 (unify-charset 'chinese-gbk)
599 (unify-charset 'chinese-cns11643-1)
600 (unify-charset 'chinese-cns11643-2)
601 (unify-charset 'big5)
602 (unify-charset 'chinese-big5-1)
603 (unify-charset 'chinese-big5-2)
604 \f
605 ;; These are tables for translating characters on decoding and
606 ;; encoding.
607 (setq standard-translation-table-for-decode nil)
608
609 (setq standard-translation-table-for-encode nil)
610
611 (defvar translation-table-for-input nil
612 "If non-nil, a char table used to translate characters from input methods.
613 \(Currently only used by Quail.)")
614 \f
615 ;;; Make fundamental coding systems.
616
617 ;; The coding system `no-conversion' is already defined in coding.c as
618 ;; below:
619 ;;
620 ;; (define-coding-system 'no-conversion
621 ;; "Do no conversion."
622 ;; :coding-type 'raw-text
623 ;; :mnemonic ?=)
624
625 (define-coding-system 'raw-text
626 "Raw text, which means text contains random 8-bit codes.
627 Encoding text with this coding system produces the actual byte
628 sequence of the text in buffers and strings. An exception is made for
629 eight-bit-control characters. Each of them is encoded into a single
630 byte.
631
632 When you visit a file with this coding, the file is read into a
633 unibyte buffer as is (except for EOL format), thus each byte of a file
634 is treated as a character."
635 :coding-type 'raw-text
636 :mnemonic ?t)
637
638 (define-coding-system 'undecided
639 "No conversion on encoding, automatic conversion on decoding"
640 :coding-type 'undecided
641 :mnemonic ?-
642 :charset-list '(ascii))
643
644 (define-coding-system-alias 'unix 'undecided-unix)
645 (define-coding-system-alias 'dos 'undecided-dos)
646 (define-coding-system-alias 'mac 'undecided-mac)
647
648 (define-coding-system 'iso-latin-1
649 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
650 :coding-type 'iso-2022
651 :mnemonic ?1
652 :charset-list '(ascii latin-iso8859-1)
653 :designation [ascii latin-iso8859-1 nil nil]
654 :mime-charset 'iso-8859-1)
655
656 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
657 (define-coding-system-alias 'latin-1 'iso-latin-1)
658
659 ;; Coding systems not specific to each language environment.
660
661 (define-coding-system 'emacs-mule
662 "Emacs 21 internal format used in buffer and string."
663 :coding-type 'emacs-mule
664 :mnemonic ?M)
665
666 (define-coding-system 'utf-8
667 "UTF-8."
668 :coding-type 'utf-8
669 :mnemonic ?U
670 :charset-list '(unicode))
671
672 (define-coding-system-alias 'mule-utf-8 'utf-8)
673
674 (define-coding-system 'utf-8-emacs
675 "UTF-8 will full support for Emacs characters."
676 :coding-type 'utf-8
677 :mnemonic ?U
678 :charset-list '(emacs))
679
680 (define-coding-system 'utf-16
681 "UTF-16"
682 :coding-type 'utf-16
683 :mnemonic ?U
684 :charset-list '(unicode))
685
686 (define-coding-system 'utf-16-le-nosig
687 "UTF-16, little endian, no signature"
688 :coding-type 'utf-16
689 :mnemonic ?U
690 :charset-list '(unicode)
691 :endian 'little)
692
693 (define-coding-system 'utf-16-be-nosig
694 "UTF-16, big endian, no signature"
695 :coding-type 'utf-16
696 :mnemonic ?U
697 :charset-list '(unicode)
698 :endian 'big)
699
700 (define-coding-system 'utf-16-le
701 "UTF-16, little endian, with signature"
702 :coding-type 'utf-16
703 :mnemonic ?U
704 :charset-list '(unicode)
705 :bom t
706 :endian 'little)
707
708 (define-coding-system 'utf-16-be
709 "UTF-16, big endian, with signature"
710 :coding-type 'utf-16
711 :mnemonic ?U
712 :charset-list '(unicode)
713 :bom t
714 :endian 'big)
715
716 (define-coding-system 'iso-2022-7bit
717 "ISO 2022 based 7-bit encoding using only G0"
718 :coding-type 'iso-2022
719 :mnemonic ?J
720 :charset-list 'iso-2022
721 :designation [(ascii t) nil nil nil]
722 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
723
724 (define-coding-system 'iso-2022-7bit-ss2
725 "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
726 :coding-type 'iso-2022
727 :mnemonic ?$
728 :charset-list 'iso-2022
729 :designation [(ascii 94) nil (nil 96) nil]
730 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
731 designation single-shift composition))
732
733 (define-coding-system 'iso-2022-7bit-lock
734 "ISO-2022 coding system using Locking-Shift for 96-charset"
735 :coding-type 'iso-2022
736 :mnemonic ?&
737 :charset-list 'iso-2022
738 :designation [(ascii 94) (nil 96) nil nil]
739 :flags '(ascii-at-eol ascii-at-cntl 7-bit
740 designation locking-shift composition))
741
742 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
743
744 (define-coding-system 'iso-2022-7bit-lock-ss2
745 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN"
746 :coding-type 'iso-2022
747 :mnemonic ?i
748 :charset-list '(ascii
749 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
750 korean-ksc5601
751 chinese-gb2312
752 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
753 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
754 chinese-cns11643-7)
755 :designation [(ascii 94)
756 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
757 (nil chinese-cns11643-2)
758 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
759 chinese-cns11643-6 chinese-cns11643-7)]
760 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
761 single-shift init-bol))
762
763 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
764
765 (define-coding-system 'iso-2022-8bit-ss2
766 "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
767 :coding-type 'iso-2022
768 :mnemonic ?@
769 :charset-list 'iso-2022
770 :designation [(ascii 94) nil (nil 96) nil]
771 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
772
773 (define-coding-system 'compound-text
774 "Compound text based generic encoding for decoding unknown messages.
775
776 This coding system does not support ICCCM Extended Segments."
777 :coding-type 'iso-2022
778 :mnemonic ?x
779 :charset-list 'iso-2022
780 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
781 :flags '(ascii-at-eol ascii-at-cntl
782 designation locking-shift single-shift composition)
783 :mime-charset 'x-ctext)
784
785 (define-coding-system-alias 'x-ctext 'compound-text)
786 (define-coding-system-alias 'ctext 'compound-text)
787
788 ;; Same as compound-text, but doesn't produce composition escape
789 ;; sequences. Used in post-read and pre-write conversions of
790 ;; compound-text-with-extensions, see mule.el. Note that this should
791 ;; not have a mime-charset property, to prevent it from showing up
792 ;; close to the beginning of coding systems ordered by priority.
793 (define-coding-system 'ctext-no-compositions 2 ?x
794 "Compound text based generic encoding for decoding unknown messages.
795
796 Like `compound-text', but does not produce escape sequences for compositions."
797 :coding-type 'iso-2022
798 :mnemonic ?x
799 :charset-list 'iso-2022
800 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
801 :flags '(ascii-at-eol ascii-at-cntl
802 designation locking-shift single-shift))
803
804 (define-coding-system 'compound-text-with-extensions
805 "Compound text encoding with ICCCM Extended Segment extensions.
806
807 This coding system should be used only for X selections. It is inappropriate
808 for decoding and encoding files, process I/O, etc."
809 :coding-type 'raw-text
810 :mnemonic ?x
811 :post-read-conversion 'ctext-post-read-conversion
812 :pre-write-conversion 'ctext-pre-write-conversion)
813
814 (define-coding-system-alias
815 'x-ctext-with-extensions 'compound-text-with-extensions)
816 (define-coding-system-alias
817 'ctext-with-extensions 'compound-text-with-extensions)
818
819 (define-coding-system 'us-ascii
820 "Convert all characters but ASCII to `?'."
821 :coding-type 'charset
822 :mnemonic ?-
823 :charset-list '(ascii)
824 :default-char ??
825 :mime-charset 'us-ascii)
826
827 (define-coding-system-alias 'iso-safe 'us-ascii)
828
829 ;; Use us-ascii for terminal output if some other coding system is not
830 ;; specified explicitly.
831 (set-safe-terminal-coding-system-internal 'us-ascii)
832
833 ;; The other coding-systems are defined in each language specific
834 ;; files under lisp/language.
835
836 ;; Normally, set coding system to `undecided' before reading a file.
837 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
838 ;; but we regard them as containing multibyte characters.
839 ;; Tar files are not decoded at all, but we treat them as raw bytes.
840
841 (setq file-coding-system-alist
842 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
843 ("\\.utf\\(-8\\)?\\'" . utf-8)
844 ;; We use raw-text for reading loaddefs.el so that if it
845 ;; happens to have DOS or Mac EOLs, they are converted to
846 ;; newlines. This is required to make the special treatment
847 ;; of the "\ newline" combination in loaddefs.el, which marks
848 ;; the beginning of a doc string, work.
849 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
850 ("\\.tar\\'" . (no-conversion . no-conversion))
851 ("" . (undecided . nil))))
852
853 \f
854 ;;; Setting coding categories and their priorities.
855
856 ;; This setting is just to read an Emacs Lisp source files which
857 ;; contain multilingual text while dumping Emacs. More appropriate
858 ;; values are set by the command `set-language-environment' for each
859 ;; language environment.
860
861 (set-coding-system-priority
862 'iso-latin-1
863 'utf-8
864 'iso-2022-7bit
865 )
866
867 \f
868 ;;; Miscellaneous settings.
869
870 ;; Make all multibyte characters self-insert.
871 (set-char-table-range (nth 1 global-map)
872 (cons (make-char 'unicode 128) (max-char))
873 'self-insert-command)
874
875 (aset latin-extra-code-table ?\222 t)
876
877 ;;; mule-conf.el ends here