Replaced.
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
08c19a27
KH
1;;; mule-conf.el --- configure multilingual environment
2
08c19a27 3;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
c0e17dd8
KH
5;; Copyright (C) 2001, 2002
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H13PRO009
9fea1ee1 8;; Copyright (C) 2002 Free Software Foundation, Inc.
08c19a27 9
24adcac1 10;; Keywords: i18n, mule, multilingual, character set, coding system
08c19a27
KH
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
28
29;;; Commentary:
30
3803079c
DL
31;; This file defines the Emacs charsets and some basic coding systems.
32;; Other coding systems are defined in the files in directory
33;; lisp/language.
08c19a27
KH
34
35;;; Code:
36
155b256a
DL
37;;; Remarks
38
3803079c
DL
39;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
40;; Standards docs equivalent to iso-2022 and iso-8859 are at
41;; http://www.ecma.ch/.
42
32fefe80
DL
43;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
44;; MS Windows, which are presumably the only charsets we really need
45;; to worry about on such systems:
155b256a
DL
46;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
47;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
48;; 1258, 874, 932, 936, 949, 950
49
08c19a27
KH
50;;; Definitions of character sets.
51
24adcac1 52;; The charsets `ascii' and `unicode' are already defined in charset.c
c0e17dd8
KH
53;; as below:
54;;
55;; (define-charset 'ascii
56;; ""
57;; :dimension 1
58;; :code-space [0 127]
59;; :iso-final-char ?A
60;; :ascii-compatible-p t
61;; :emacs-mule-id 0
62;; :code-offset 0)
63;;
64;; (define-charset 'unicode
65;; ""
66;; :dimension 3
67;; :code-space [0 255 0 255 0 16]
68;; :ascii-compatible-p t
69;; :code-offset 0)
70;;
71;; We now set :docstring, :short-name, and :long-name properties.
72
73(put-charset-property
74 'ascii :docstring "ASCII (ISO646 IRV)")
75(put-charset-property
76 'ascii :short-name "ASCII")
77(put-charset-property
78 'ascii :long-name "ASCII (ISO646 IRV)")
79(put-charset-property
80 'unicode :docstring "Unicode (ISO10646)")
81(put-charset-property
82 'unicode :short-name "Unicode")
83(put-charset-property
84 'unicode :long-name "Unicode (ISO10646)")
85
86(define-charset-alias 'ucs 'unicode)
87
88(define-charset 'emacs
cb269bb1 89 "Full Emacs characters"
c0e17dd8
KH
90 :ascii-compatible-p t
91 :code-space [ 0 255 0 255 0 63 ]
92 :code-offset 0
93 :supplementary-p t)
94
95(define-charset 'iso-8859-1
24adcac1 96 "Latin-1 (ISO/IEC 8859-1)"
c0e17dd8
KH
97 :short-name "Latin-1"
98 :ascii-compatible-p t
99 :code-space [0 255]
100 :code-offset 0)
101
102(define-charset 'latin-iso8859-1
103 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
104 :short-name "RHP of Latin-1"
105 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
106 :iso-final-char ?A
107 :emacs-mule-id 129
108 :code-space [32 127]
109 :code-offset 160)
110
b714f5c2
DL
111;; Name perhaps not ideal, but is XEmacs-compatible.
112(define-charset 'control-1
c0e17dd8
KH
113 "8-bit control code (0x80..0x9F)"
114 :short-name "8-bit control code"
115 :code-space [128 159]
116 :code-offset 128)
117
b714f5c2
DL
118(define-charset 'eight-bit-control
119 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
120 :short-name "Raw bytes 0x80..0x9F"
121 :code-space [128 159]
122 :code-offset #x3FFF80) ; see character.h
123
c0e17dd8 124(define-charset 'eight-bit-graphic
b714f5c2
DL
125 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
126 :short-name "Raw bytes 0xA0..0xFF"
c0e17dd8 127 :code-space [160 255]
b714f5c2 128 :code-offset #x3FFFA0) ; see character.h
c0e17dd8
KH
129
130(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
131 iso-ir iso-final
132 emacs-mule-id map)
c0e17dd8
KH
133 `(progn
134 (define-charset ,symbol
135 ,name
136 :short-name ,nickname
137 :long-name ,name
138 :ascii-compatible-p t
139 :code-space [0 255]
140 :map ,map)
141 (if ,iso-symbol
142 (define-charset ,iso-symbol
143 (if ,iso-ir
144 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
145 ,name ,nickname ,iso-ir)
146 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
147 :short-name (format "RHP of %s" ,name)
148 :long-name (format "RHP of %s (%s)" ,name ,nickname)
149 :iso-final-char ,iso-final
24adcac1 150 :emacs-mule-id ,emacs-mule-id
c0e17dd8 151 :code-space [32 127]
fd6c8fc1 152 :subset (list ,symbol 160 255 -128)))))
c0e17dd8
KH
153
154(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
155 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
156
157(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
158 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
159
160(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
161 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
162
163(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
164 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
165
166(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
167 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
168
169(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
170 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
171
172(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
173 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
174
175(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
176 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
177
3803079c 178(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
bbe3715c 179 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
3803079c 180
bbe3715c
DL
181;; http://www.nectec.or.th/it-standards/iso8859-11/
182;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
183;; plus nbsp
184(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
185 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
186
187;; 8859-12 doesn't (yet?) exist.
3803079c 188
c0e17dd8 189(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
3803079c 190 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
c0e17dd8
KH
191
192(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
193 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
194
195(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
196 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
197
7e8b4d67 198(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
08a2119c 199 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
7e8b4d67 200
cb269bb1
DL
201;; No point in keeping it around.
202(fmakunbound 'define-iso-single-byte-charset)
203
bbe3715c 204;; Can this be shared with 8859-11?
c0e17dd8
KH
205(define-charset 'thai-tis620
206 "TIS620.2533"
207 :short-name "TIS620.2533"
208 :iso-final-char ?T
209 :emacs-mule-id 133
210 :code-space [32 127]
211 :code-offset #x0E00)
212
bbe3715c 213;; Fixme: doc for this, c.f. above
c0e17dd8
KH
214(define-charset 'tis620-2533
215 "TIS620.2533"
216 :short-name "TIS620.2533"
217 :ascii-compatible-p t
218 :code-space [0 255]
fd6c8fc1 219 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
c0e17dd8
KH
220
221(define-charset 'jisx0201
222 "JISX0201"
223 :short-name "JISX0201"
c0e17dd8
KH
224 :code-space [33 254]
225 :map "jisx0201")
226
227(define-charset 'latin-jisx0201
228 "Roman Part of JISX0201.1976"
229 :short-name "JISX0201 Roman"
230 :long-name "Japanese Roman (JISX0201.1976)"
231 :iso-final-char ?J
232 :emacs-mule-id 138
233 :code-space [33 126]
fd6c8fc1 234 :subset '(jisx0201 33 126 0))
c0e17dd8
KH
235
236(define-charset 'katakana-jisx0201
237 "Katakana Part of JISX0201.1976"
238 :short-name "JISX0201 Katakana"
239 :long-name "Japanese Katakana (JISX0201.1976)"
240 :iso-final-char ?I
241 :emacs-mule-id 137
242 :code-space [33 126]
fd6c8fc1 243 :subset '(jisx0201 161 254 -128))
c0e17dd8
KH
244
245(define-charset 'chinese-gb2312
246 "GB2312 Chinese simplified: ISO-IR-58"
247 :short-name "GB2312"
248 :long-name "GB2312: ISO-IR-58"
249 :iso-final-char ?A
250 :emacs-mule-id 145
251 :code-space [33 126 33 126]
252 :code-offset #x110000
253 :unify-map "gb2312-1980")
254
7c9e1024 255(define-charset 'chinese-gbk
24adcac1 256 "GBK Chinese simplified."
7c9e1024 257 :short-name "GBK"
7c9e1024 258 :code-space [#x40 #xFE #x81 #xFE]
64762f77 259 :code-offset #x160000
7c9e1024 260 :unify-map "gbk")
155b256a 261(define-charset-alias 'cp936 'chinese-gbk)
32fefe80 262(define-charset-alias 'windows-936 'chinese-gbk)
7c9e1024 263
c0e17dd8
KH
264(define-charset 'chinese-cns11643-1
265 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
266 :short-name "CNS11643-1"
267 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
268 :iso-final-char ?G
269 :emacs-mule-id 149
270 :code-space [33 126 33 126]
271 :code-offset #x114000
272 :unify-map "cns11643-1")
273
274(define-charset 'chinese-cns11643-2
275 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
276 :short-name "CNS11643-2"
277 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
278 :iso-final-char ?H
279 :emacs-mule-id 150
280 :code-space [33 126 33 126]
281 :code-offset #x118000
282 :unify-map "cns11643-2")
283
284(define-charset 'chinese-cns11643-3
285 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
286 :short-name "CNS11643-3"
287 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
288 :iso-final-char ?I
289 :code-space [33 126 33 126]
290 :emacs-mule-id 246
291 :code-offset #x11C000)
292
293(define-charset 'chinese-cns11643-4
294 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
295 :short-name "CNS11643-4"
296 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
297 :iso-final-char ?J
298 :emacs-mule-id 247
299 :code-space [33 126 33 126]
300 :code-offset #x120000)
301
302(define-charset 'chinese-cns11643-5
303 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
304 :short-name "CNS11643-5"
305 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
306 :iso-final-char ?K
307 :emacs-mule-id 248
308 :code-space [33 126 33 126]
309 :code-offset #x124000)
310
311(define-charset 'chinese-cns11643-6
312 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
313 :short-name "CNS11643-6"
314 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
315 :iso-final-char ?L
316 :emacs-mule-id 249
317 :code-space [33 126 33 126]
318 :code-offset #x128000)
319
320(define-charset 'chinese-cns11643-7
321 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
322 :short-name "CNS11643-7"
323 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
324 :iso-final-char ?M
325 :emacs-mule-id 250
326 :code-space [33 126 33 126]
327 :code-offset #x12C000)
328
329(define-charset 'big5
330 "Big5 (Chinese traditional)"
331 :short-name "Big5"
c0e17dd8
KH
332 :code-space [#x40 #xFE #xA1 #xFE]
333 :code-offset #x130000
334 :unify-map "big5")
155b256a
DL
335;; Fixme: AKA cp950 according to
336;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
337;; that correct?
c0e17dd8
KH
338
339(define-charset 'chinese-big5-1
24adcac1 340 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
c0e17dd8
KH
341 :short-name "Big5 (Level-1)"
342 :long-name "Big5 (Level-1) A141-C67F"
343 :iso-final-char ?0
344 :emacs-mule-id 152
345 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 346 :code-offset #x135000
c0e17dd8
KH
347 :unify-map "big5-1")
348
349(define-charset 'chinese-big5-2
24adcac1 350 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
c0e17dd8
KH
351 :short-name "Big5 (Level-2)"
352 :long-name "Big5 (Level-2) C940-FEFE"
353 :iso-final-char ?1
354 :emacs-mule-id 153
355 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 356 :code-offset #x137800
c0e17dd8
KH
357 :unify-map "big5-2")
358
359(define-charset 'japanese-jisx0208
360 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
361 :short-name "JISX0208"
362 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
363 :iso-final-char ?B
364 :emacs-mule-id 146
365 :code-space [33 126 33 126]
366 :code-offset #x140000
367 :unify-map "jisx0208-1990")
368
369(define-charset 'japanese-jisx0208-1978
370 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
371 :short-name "JISX0208.1978"
372 :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
373 :iso-final-char ?@
374 :emacs-mule-id 144
375 :code-space [33 126 33 126]
376 :code-offset #x144000
377 :unify-map "jisx0208-1978")
378
379(define-charset 'japanese-jisx0212
380 "JISX0212 Japanese supplement: ISO-IR-159"
381 :short-name "JISX0212"
382 :long-name "JISX0212 (Japanese): ISO-IR-159"
383 :iso-final-char ?D
384 :emacs-mule-id 148
385 :code-space [33 126 33 126]
386 :code-offset #x148000
387 :unify-map "jisx0212-1990")
388
389(define-charset 'japanese-jisx0213-1
390 "JISX0213 Plane 1 (Japanese)"
391 :short-name "JISX0213-1"
c0e17dd8
KH
392 :iso-final-char ?O
393 :emacs-mule-id 151
394 :code-space [33 126 33 126]
395 :code-offset #x14C000)
396
397(define-charset 'japanese-jisx0213-2
398 "JISX0213 Plane 2 (Japanese)"
399 :short-name "JISX0213-2"
c0e17dd8
KH
400 :iso-final-char ?P
401 :emacs-mule-id 254
402 :code-space [33 126 33 126]
403 :code-offset #x150000)
404
405(define-charset 'korean-ksc5601
406 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
407 :short-name "KSC5601"
408 :long-name "KSC5601 (Korean): ISO-IR-149"
409 :iso-final-char ?C
410 :emacs-mule-id 147
411 :code-space [33 126 33 126]
b714f5c2
DL
412 :code-offset #x279f94
413 :unify-map "ksc5601-1987")
c0e17dd8 414
155b256a
DL
415;; Fixme: Korean cp949/UHC
416
c0e17dd8
KH
417(define-charset 'chinese-sisheng
418 "SiSheng characters for PinYin/ZhuYin"
419 :short-name "SiSheng"
420 :long-name "SiSheng (PinYin/ZhuYin)"
421 :iso-final-char ?0
422 :emacs-mule-id 160
423 :code-space [33 126]
c4e44241 424 :unify-map "sisheng"
c0e17dd8
KH
425 :code-offset #x200000)
426
b714f5c2
DL
427;; A subset of the 1989 version of IPA. It consists of the consonant
428;; signs used in English, French, German and Italian, and all vowels
429;; signs in the table. [says old MULE doc]
c0e17dd8
KH
430(define-charset 'ipa
431 "IPA (International Phonetic Association)"
432 :short-name "IPA"
c0e17dd8
KH
433 :iso-final-char ?0
434 :emacs-mule-id 161
c4e44241 435 :unify-map "ipa"
c0e17dd8
KH
436 :code-space [32 127]
437 :code-offset #x200080)
438
439(define-charset 'viscii
440 "VISCII1.1"
441 :short-name "VISCII"
442 :long-name "VISCII 1.1"
443 :code-space [0 255]
444 :map "viscii")
445
446(define-charset 'vietnamese-viscii-lower
447 "VISCII1.1 lower-case"
448 :short-name "VISCII lower"
449 :long-name "VISCII lower-case"
450 :iso-final-char ?1
451 :emacs-mule-id 162
452 :code-space [32 127]
65076506
KH
453 :code-offset #x200200
454 :unify-map "viscii-lower")
c0e17dd8
KH
455
456(define-charset 'vietnamese-viscii-upper
457 "VISCII1.1 upper-case"
458 :short-name "VISCII upper"
459 :long-name "VISCII upper-case"
460 :iso-final-char ?2
461 :emacs-mule-id 163
462 :code-space [32 127]
65076506
KH
463 :code-offset #x200280
464 :unify-map "viscii-upper")
c0e17dd8
KH
465
466(define-charset 'vscii
467 "VSCII1.1"
468 :short-name "VSCII"
c0e17dd8
KH
469 :code-space [0 255]
470 :map "vscii")
471
69862ba6
DL
472;; Fixme: see note in tcvn.map about combining characters
473(define-charset 'tcvn-5712
474 "TCVN-5712"
475 :code-space [0 255]
476 :map "tcvn")
477
c0e17dd8
KH
478(define-charset 'koi8-r
479 "KOI8-R"
480 :short-name "KOI8-R"
c0e17dd8
KH
481 :ascii-compatible-p t
482 :code-space [0 255]
483 :map "koi8-r")
484
485(define-charset-alias 'koi8 'koi8-r)
486
487(define-charset 'alternativnyj
488 "ALTERNATIVNYJ"
489 :short-name "alternativnyj"
c0e17dd8
KH
490 :ascii-compatible-p t
491 :code-space [0 255]
492 :map "ibm866")
bbe3715c
DL
493;; Fixme: http://czyborra.com/charsets/cyrillic.html says the
494;; following, but the iconv map for cp866 isn't the same as his chart
495;; for alternativnyj. I can't find anything that looks like an
496;; official definition of alternativnyj.
007eef16 497(define-charset-alias 'cp866 'alternativnyj)
08a2119c 498(define-charset-alias 'ibm866 'alternativnyj)
08c19a27 499
6ef462e0
DL
500(define-charset 'koi8-u
501 "KOI8-U"
502 :short-name "KOI8-U"
6ef462e0
DL
503 :ascii-compatible-p t
504 :code-space [0 255]
505 :map "koi8-u")
506
507(define-charset 'koi8-t
508 "KOI8-T"
509 :short-name "KOI8-T"
6ef462e0
DL
510 :ascii-compatible-p t
511 :code-space [0 255]
512 :map "koi8-t")
513
514(define-charset 'georgian-ps
515 "GEORGIAN-PS"
516 :short-name "GEORGIAN-PS"
6ef462e0
DL
517 :ascii-compatible-p t
518 :code-space [0 255]
519 :map "georgian-ps")
520
521(define-charset 'windows-1250
9fea1ee1 522 "WINDOWS-1250 (Central Europe)"
6ef462e0 523 :short-name "WINDOWS-1250"
6ef462e0
DL
524 :ascii-compatible-p t
525 :code-space [0 255]
526 :map "windows-1250")
527(define-charset-alias 'cp1250 'windows-1250)
528
529(define-charset 'windows-1251
155b256a 530 "WINDOWS-1251 (Cyrillic)"
6ef462e0 531 :short-name "WINDOWS-1251"
6ef462e0
DL
532 :ascii-compatible-p t
533 :code-space [0 255]
534 :map "windows-1251")
535(define-charset-alias 'cp1251 'windows-1251)
536
537(define-charset 'windows-1252
155b256a 538 "WINDOWS-1252 (Latin I)"
6ef462e0 539 :short-name "WINDOWS-1252"
6ef462e0
DL
540 :ascii-compatible-p t
541 :code-space [0 255]
542 :map "windows-1252")
543(define-charset-alias 'cp1252 'windows-1252)
544
9fea1ee1 545(define-charset 'windows-1253
155b256a 546 "WINDOWS-1253 (Greek)"
9fea1ee1 547 :short-name "WINDOWS-1253"
9fea1ee1
DL
548 :ascii-compatible-p t
549 :code-space [0 255]
550 :map "windows-1253")
551(define-charset-alias 'cp1253 'windows-1253)
552
553(define-charset 'windows-1254
155b256a 554 "WINDOWS-1254 (Turkish)"
9fea1ee1 555 :short-name "WINDOWS-1254"
9fea1ee1
DL
556 :ascii-compatible-p t
557 :code-space [0 255]
558 :map "windows-1254")
559(define-charset-alias 'cp1254 'windows-1254)
560
561(define-charset 'windows-1255
562 "WINDOWS-1255 (Hebrew)"
563 :short-name "WINDOWS-1255"
9fea1ee1
DL
564 :ascii-compatible-p t
565 :code-space [0 255]
566 :map "windows-1255")
567(define-charset-alias 'cp1255 'windows-1255)
568
569(define-charset 'windows-1256
570 "WINDOWS-1256 (Arabic)"
571 :short-name "WINDOWS-1256"
9fea1ee1
DL
572 :ascii-compatible-p t
573 :code-space [0 255]
574 :map "windows-1256")
575(define-charset-alias 'cp1256 'windows-1256)
576
577(define-charset 'windows-1257
578 "WINDOWS-1257 (Baltic)"
579 :short-name "WINDOWS-1257"
9fea1ee1
DL
580 :ascii-compatible-p t
581 :code-space [0 255]
582 :map "windows-1257")
583(define-charset-alias 'cp1257 'windows-1257)
584
585(define-charset 'windows-1258
155b256a 586 "WINDOWS-1258 (Viet Nam)"
9fea1ee1 587 :short-name "WINDOWS-1258"
9fea1ee1
DL
588 :ascii-compatible-p t
589 :code-space [0 255]
590 :map "windows-1258")
591(define-charset-alias 'cp1258 'windows-1258)
592
593(define-charset 'next
594 "NEXT"
595 :short-name "NEXT"
9fea1ee1
DL
596 :ascii-compatible-p t
597 :code-space [0 255]
598 :map "next")
599
6ef462e0
DL
600(define-charset 'cp1125
601 "CP1125"
602 :short-name "CP1125"
6ef462e0
DL
603 :code-space [0 255]
604 :map "cp1125")
605(define-charset-alias 'ruscii 'cp1125)
606;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
607(define-charset-alias 'cp866u 'cp1125)
608
bbe3715c
DL
609;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
610;; shows this as not ASCII comptaible, with various graphics in
611;; 0x01-0x1F.
007eef16 612(define-charset 'cp437
bbe3715c 613 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
007eef16 614 :short-name "CP437"
007eef16
DL
615 :code-space [0 255]
616 :ascii-compatible-p t
617 :map "cp437")
618
155b256a
DL
619(define-charset 'cp720
620 "CP720 (Arabic)"
621 :short-name "CP720"
622 :code-space [0 255]
623 :ascii-compatible-p t
624 :map "cp720")
625
007eef16 626(define-charset 'cp737
bbe3715c 627 "CP737 (PC Greek)"
007eef16 628 :short-name "CP737"
007eef16
DL
629 :code-space [0 255]
630 :ascii-compatible-p t
631 :map "cp737")
632
633(define-charset 'cp775
bbe3715c 634 "CP775 (PC Baltic)"
007eef16 635 :short-name "CP775"
007eef16
DL
636 :code-space [0 255]
637 :ascii-compatible-p t
638 :map "cp775")
639
640(define-charset 'cp851
641 "CP851"
642 :short-name "CP851"
007eef16
DL
643 :code-space [0 255]
644 :ascii-compatible-p t
645 :map "cp851")
646
647(define-charset 'cp852
bbe3715c 648 "CP852 (MS-DOS Latin-2)"
007eef16 649 :short-name "CP852"
007eef16
DL
650 :code-space [0 255]
651 :ascii-compatible-p t
652 :map "cp852")
653
654(define-charset 'cp855
bbe3715c 655 "CP855 (IBM Cyrillic)"
007eef16 656 :short-name "CP855"
007eef16
DL
657 :code-space [0 255]
658 :ascii-compatible-p t
659 :map "cp855")
660
661(define-charset 'cp857
bbe3715c 662 "CP857 (IBM Turkish)"
007eef16 663 :short-name "CP857"
007eef16
DL
664 :code-space [0 255]
665 :ascii-compatible-p t
666 :map "cp857")
667
155b256a
DL
668(define-charset 'cp858
669 "CP858 (Multilingual Latin I + Euro)"
670 :short-name "CP858"
671 :code-space [0 255]
672 :ascii-compatible-p t
673 :map "cp858")
bbe3715c 674(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
155b256a 675
007eef16 676(define-charset 'cp860
bbe3715c 677 "CP860 (MS-DOS Portuguese)"
007eef16 678 :short-name "CP860"
007eef16
DL
679 :code-space [0 255]
680 :ascii-compatible-p t
681 :map "cp860")
682
683(define-charset 'cp861
bbe3715c 684 "CP861 (MS-DOS Icelandic)"
007eef16 685 :short-name "CP861"
007eef16
DL
686 :code-space [0 255]
687 :ascii-compatible-p t
688 :map "cp861")
689
690(define-charset 'cp862
bbe3715c 691 "CP862 (PC Hebrew)"
007eef16 692 :short-name "CP862"
007eef16
DL
693 :code-space [0 255]
694 :ascii-compatible-p t
695 :map "cp862")
696
697(define-charset 'cp863
bbe3715c 698 "CP863 (MS-DOS Canadian French)"
007eef16 699 :short-name "CP863"
007eef16
DL
700 :code-space [0 255]
701 :ascii-compatible-p t
702 :map "cp863")
703
704(define-charset 'cp864
bbe3715c 705 "CP864 (PC Arabic)"
007eef16 706 :short-name "CP864"
007eef16
DL
707 :code-space [0 255]
708 :ascii-compatible-p t
709 :map "cp864")
710
711(define-charset 'cp865
bbe3715c 712 "CP865 (MS-DOS Nordic)"
007eef16 713 :short-name "CP865"
007eef16
DL
714 :code-space [0 255]
715 :ascii-compatible-p t
716 :map "cp865")
717
718(define-charset 'cp869
bbe3715c 719 "CP869 (IBM Modern Greek)"
007eef16 720 :short-name "CP869"
007eef16
DL
721 :code-space [0 255]
722 :ascii-compatible-p t
723 :map "cp869")
724
725(define-charset 'cp874
bbe3715c 726 "CP874 (IBM Thai)"
007eef16 727 :short-name "CP874"
007eef16
DL
728 :code-space [0 255]
729 :ascii-compatible-p t
730 :map "cp874")
731
08c19a27
KH
732;; For Arabic, we need three different types of character sets.
733;; Digits are of direction left-to-right and of width 1-column.
734;; Others are of direction right-to-left and of width 1-column or
735;; 2-column.
c0e17dd8
KH
736(define-charset 'arabic-digit
737 "Arabic digit"
738 :short-name "Arabic digit"
c0e17dd8
KH
739 :iso-final-char ?2
740 :emacs-mule-id 164
741 :code-space [34 42]
742 :code-offset #x0600)
743
744(define-charset 'arabic-1-column
745 "Arabic 1-column"
746 :short-name "Arabic 1-col"
747 :long-name "Arabic 1-column"
748 :iso-final-char ?3
749 :emacs-mule-id 165
750 :code-space [33 126]
751 :code-offset #x200100)
752
753(define-charset 'arabic-2-column
754 "Arabic 2-column"
755 :short-name "Arabic 2-col"
756 :long-name "Arabic 2-column"
757 :iso-final-char ?4
758 :emacs-mule-id 224
759 :code-space [33 126]
760 :code-offset #x200180)
7153b1f1
KH
761
762;; Lao script.
c0e17dd8
KH
763;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
764(define-charset 'lao
765 "Lao characters (ISO10646 0E81..0EDF)"
766 :short-name "Lao"
c0e17dd8
KH
767 :iso-final-char ?1
768 :emacs-mule-id 167
769 :code-space [33 126]
770 :code-offset #x0E81)
771
772(define-charset 'mule-lao
773 "Lao characters (ISO10646 0E81..0EDF)"
774 :short-name "Lao"
c0e17dd8 775 :code-space [0 255]
fd6c8fc1 776 :superset '(ascii eight-bit-control (lao . 128)))
7153b1f1 777
08c19a27 778
7153b1f1
KH
779;; Indian scripts. Symbolic charset for data exchange. Glyphs are
780;; not assigned. They are automatically converted to each Indian
781;; script which IS-13194 supports.
782
c0e17dd8
KH
783(define-charset 'indian-is13194
784 "Generic Indian charset for data exchange with IS 13194"
785 :short-name "IS 13194"
786 :long-name "Indian IS 13194"
787 :iso-final-char ?5
788 :emacs-mule-id 225
789 :code-space [33 126]
c4e44241 790 :unify-map "is13194"
c0e17dd8
KH
791 :code-offset #x180000)
792
793(define-charset 'indian-glyph
794 "Glyphs for Indian characters."
795 :short-name "Indian glyph"
c0e17dd8
KH
796 :iso-final-char ?4
797 :emacs-mule-id 240
798 :code-space [32 127 32 127]
799 :code-offset #x180100)
7153b1f1
KH
800
801;; Actual Glyph for 1-column width.
c0e17dd8
KH
802(define-charset 'indian-1-column
803 "Indian charset for 1-column width glyphs"
804 :short-name "Indian 1-col"
805 :long-name "Indian 1 Column"
806 :iso-final-char ?6
807 :emacs-mule-id 240
808 :code-space [33 126 33 126]
809 :code-offset #x184000)
08c19a27 810
08c19a27 811;; Actual Glyph for 2-column width.
c0e17dd8
KH
812(define-charset 'indian-2-column
813 "Indian charset for 2-column width glyphs"
814 :short-name "Indian 2-col"
815 :long-name "Indian 2 Column"
816 :iso-final-char ?5
817 :emacs-mule-id 251
818 :code-space [33 126 33 126]
fd6c8fc1 819 :superset '(indian-1-column))
c0e17dd8
KH
820
821(define-charset 'tibetan
822 "Tibetan characters"
823 :iso-final-char ?7
824 :short-name "Tibetan 2-col"
24adcac1 825 :long-name "Tibetan 2 column"
c0e17dd8
KH
826 :iso-final-char ?7
827 :emacs-mule-id 252
c4e44241 828 :unify-map "tibetan"
c0e17dd8
KH
829 :code-space [33 126 33 126]
830 :code-offset #x190000)
831
832(define-charset 'tibetan-1-column
833 "Tibetan 1 column glyph"
834 :short-name "Tibetan 1-col"
835 :long-name "Tibetan 1 column"
836 :iso-final-char ?8
837 :emacs-mule-id 241
838 :code-space [33 126 33 37]
fd6c8fc1 839 :superset '(tibetan))
08c19a27 840
c0e17dd8
KH
841;; Subsets of Unicode.
842(define-charset 'mule-unicode-2500-33ff
843 "Unicode characters of the range U+2500..U+33FF."
844 :short-name "Unicode subset 2"
845 :long-name "Unicode subset (U+2500..U+33FF)"
846 :iso-final-char ?2
847 :emacs-mule-id 242
848 :code-space [#x20 #x7f #x20 #x47]
849 :code-offset #x2500)
850
851(define-charset 'mule-unicode-e000-ffff
852 "Unicode characters of the range U+E000..U+FFFF."
853 :short-name "Unicode subset 3"
854 :long-name "Unicode subset (U+E000+FFFF)"
855 :iso-final-char ?3
856 :emacs-mule-id 243
857 :code-space [#x20 #x7F #x20 #x75]
858 :code-offset #xE000)
859
860(define-charset 'mule-unicode-0100-24ff
861 "Unicode characters of the range U+0100..U+24FF."
862 :short-name "Unicode subset"
863 :long-name "Unicode subset (U+0100..U+24FF)"
864 :iso-final-char ?1
865 :emacs-mule-id 244
866 :code-space [#x20 #x7F #x20 #x7F]
867 :code-offset #x100)
868
869(define-charset 'ethiopic
24adcac1 870 "Ethiopic characters for Amharic and Tigrigna."
c0e17dd8
KH
871 :short-name "Ethiopic"
872 :long-name "Ethiopic characters"
873 :iso-final-char ?3
874 :emacs-mule-id 245
c4e44241 875 :unify-map "ethiopic"
c0e17dd8
KH
876 :code-space [33 126 33 126]
877 :code-offset #x1A0000)
878
879(define-charset 'mac-roman
880 "Mac Roman charset"
881 :short-name "Mac Roman"
c0e17dd8
KH
882 :ascii-compatible-p t
883 :code-space [0 255]
884 :map "mac-roman")
885
6ef462e0
DL
886;; Fixme: modern EBCDIC variants, e.g. IBM00924?
887(define-charset 'ebcdic-us
888 "US version of EBCDIC"
889 :short-name "EBCDIC-US"
6ef462e0
DL
890 :code-space [0 255]
891 :mime-charset 'ebcdic-us
892 :map "ebcdic-us")
893
894(define-charset 'ebcdic-uk
895 "UK version of EBCDIC"
896 :short-name "EBCDIC-UK"
6ef462e0
DL
897 :code-space [0 255]
898 :mime-charset 'ebcdic-uk
899 :map "ebcdic-uk")
900
3803079c
DL
901(define-charset 'hp-roman8
902 "Encoding used by Hewlet-Packard printer software"
903 :short-name "HP-ROMAN8"
3803079c
DL
904 :ascii-compatible-p t
905 :code-space [0 255]
906 :map "hp-roman8")
907
bbe3715c
DL
908;; To make a coding system with this, a pre-write-conversion should
909;; account for the commented-out multi-valued code points in
910;; stdenc.map.
3803079c
DL
911(define-charset 'adobe-standard-encoding
912 "Adobe `standard encoding' used in PostScript"
913 :short-name "ADOBE-STANDARD-ENCODING"
6584daf1 914 :code-space [#x20 255]
3803079c
DL
915 :map "stdenc")
916
917(define-charset 'symbol
918 "Adobe symbol encoding used in PostScript"
919 :short-name "ADOBE-SYMBOL"
6584daf1 920 :code-space [#x20 255]
3803079c
DL
921 :map "symbol")
922
923(define-charset 'ibm850
bbe3715c 924 "DOS codepage 850 (Latin-1)"
3803079c 925 :short-name "IBM850"
6584daf1 926 :ascii-compatible-p t
3803079c
DL
927 :code-space [0 255]
928 :map "ibm850")
929(define-charset-alias 'cp850 'ibm850)
930
64762f77
KH
931(define-charset 'gb18030-2-byte
932 "GB18030 2-byte (0x814E..0xFEFE)"
933 :code-space [#x40 #xFE #x81 #xFE]
934 :supplementary-p t
935 :map "gb18030-2")
936
937(define-charset 'gb18030-4-byte-bmp
938 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
939 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
940 :supplementary-p t
941 :map "gb18030-4")
942
943(define-charset 'gb18030-4-byte-smp
944 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
945 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
946 :min-code '(#x9030 . #x8130)
947 :max-code '(#xE332 . #x9A35)
948 :supplementary-p t
949 :code-offset #x10000)
950
951(define-charset 'gb18030-4-byte-ext-1
952 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
953 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
954 :min-code '(#x8431 . #xA530)
955 :max-code '(#x8F39 . #xFE39)
956 :supplementary-p t
957 :code-offset #x200000 ; ... #x22484B
958 )
959
960(define-charset 'gb18030-4-byte-ext-2
961 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
962 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
963 :min-code '(#xE332 . #x9A36)
964 :max-code '(#xFE39 . #xFE39)
965 :supplementary-p t
966 :code-offset #X22484C ; ... #x279f93
967 )
968
969(define-charset 'gb18030
970 "GB18030"
971 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
972 :min-code 0
973 :max-code '(#xFE39 . #xFE39)
fd6c8fc1
KH
974 :superset '(ascii gb18030-2-byte
975 gb18030-4-byte-bmp gb18030-4-byte-smp
976 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
64762f77 977
c0e17dd8 978(unify-charset 'chinese-gb2312)
7c9e1024 979(unify-charset 'chinese-gbk)
c0e17dd8
KH
980(unify-charset 'chinese-cns11643-1)
981(unify-charset 'chinese-cns11643-2)
982(unify-charset 'big5)
983(unify-charset 'chinese-big5-1)
984(unify-charset 'chinese-big5-2)
65076506
KH
985(unify-charset 'vietnamese-viscii-lower)
986(unify-charset 'vietnamese-viscii-upper)
c4e44241
DL
987;; (unify-charset 'chinese-sisheng)
988;; (unify-charset 'ipa)
989;; (unify-charset 'indian-is13194)
990;; (unify-charset 'tibetan)
991;; (unify-charset 'ethiopic)
992;; (unify-charset 'japanese-jisx0208-1978)
993;; (unify-charset 'japanese-jisx0208)
994;; (unify-charset 'japanese-jisx0212)
65076506 995
85f789f7 996\f
d2a1ee18
KH
997;; These are tables for translating characters on decoding and
998;; encoding.
c0e17dd8 999(setq standard-translation-table-for-decode nil)
08c19a27 1000
f967223b 1001(setq standard-translation-table-for-encode nil)
08c19a27 1002
bdf74bef
DL
1003(defvar translation-table-for-input nil
1004 "If non-nil, a char table used to translate characters from input methods.
1005\(Currently only used by Quail.)")
08c19a27
KH
1006\f
1007;;; Make fundamental coding systems.
1008
c0e17dd8
KH
1009;; The coding system `no-conversion' is already defined in coding.c as
1010;; below:
1011;;
1012;; (define-coding-system 'no-conversion
1013;; "Do no conversion."
1014;; :coding-type 'raw-text
1015;; :mnemonic ?=)
08c19a27 1016
cb269bb1
DL
1017(define-coding-system-alias 'binary 'no-conversion)
1018
c0e17dd8
KH
1019(define-coding-system 'raw-text
1020 "Raw text, which means text contains random 8-bit codes.
1021Encoding text with this coding system produces the actual byte
1022sequence of the text in buffers and strings. An exception is made for
1023eight-bit-control characters. Each of them is encoded into a single
1024byte.
bc6a0946
KH
1025
1026When you visit a file with this coding, the file is read into a
c0e17dd8
KH
1027unibyte buffer as is (except for EOL format), thus each byte of a file
1028is treated as a character."
1029 :coding-type 'raw-text
1030 :mnemonic ?t)
1031
1032(define-coding-system 'undecided
cb269bb1 1033 "No conversion on encoding, automatic conversion on decoding."
c0e17dd8
KH
1034 :coding-type 'undecided
1035 :mnemonic ?-
1036 :charset-list '(ascii))
08c19a27 1037
8d969bf6 1038(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
1039(define-coding-system-alias 'dos 'undecided-dos)
1040(define-coding-system-alias 'mac 'undecided-mac)
1041
c0e17dd8
KH
1042(define-coding-system 'iso-latin-1
1043 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1044 :coding-type 'iso-2022
1045 :mnemonic ?1
1046 :charset-list '(ascii latin-iso8859-1)
1047 :designation [ascii latin-iso8859-1 nil nil]
1048 :mime-charset 'iso-8859-1)
bc6a0946 1049
c0e17dd8
KH
1050(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1051(define-coding-system-alias 'latin-1 'iso-latin-1)
08c19a27 1052
c0e17dd8 1053;; Coding systems not specific to each language environment.
bc6a0946 1054
c0e17dd8
KH
1055(define-coding-system 'emacs-mule
1056 "Emacs 21 internal format used in buffer and string."
1057 :coding-type 'emacs-mule
7e8b4d67 1058 :charset-list 'emacs-mule
c0e17dd8
KH
1059 :mnemonic ?M)
1060
1061(define-coding-system 'utf-8
1062 "UTF-8."
1063 :coding-type 'utf-8
1064 :mnemonic ?U
1065 :charset-list '(unicode))
1066
1067(define-coding-system-alias 'mule-utf-8 'utf-8)
1068
1069(define-coding-system 'utf-8-emacs
26dbea99 1070 "Support for all Emacs characters (including non-Unicode characters)."
c0e17dd8
KH
1071 :coding-type 'utf-8
1072 :mnemonic ?U
ebc563df
DL
1073 :charset-list '(emacs)
1074 :mime-charset 'utf-8)
c0e17dd8
KH
1075
1076(define-coding-system 'utf-16
1077 "UTF-16"
1078 :coding-type 'utf-16
1079 :mnemonic ?U
ebc563df
DL
1080 :charset-list '(unicode)
1081 :mime-charset 'utf-16)
c0e17dd8
KH
1082
1083(define-coding-system 'utf-16-le-nosig
cb269bb1 1084 "UTF-16, little endian, no signature."
c0e17dd8
KH
1085 :coding-type 'utf-16
1086 :mnemonic ?U
1087 :charset-list '(unicode)
1088 :endian 'little)
1089
1090(define-coding-system 'utf-16-be-nosig
cb269bb1 1091 "UTF-16, big endian, no signature."
c0e17dd8
KH
1092 :coding-type 'utf-16
1093 :mnemonic ?U
1094 :charset-list '(unicode)
1095 :endian 'big)
1096
1097(define-coding-system 'utf-16-le
cb269bb1 1098 "UTF-16, little endian, with signature."
c0e17dd8
KH
1099 :coding-type 'utf-16
1100 :mnemonic ?U
1101 :charset-list '(unicode)
a44cf41b 1102 :bom t
ebc563df
DL
1103 :endian 'little
1104 :mime-charset 'utf-16-le)
c0e17dd8
KH
1105
1106(define-coding-system 'utf-16-be
cb269bb1 1107 "UTF-16, big endian, with signature."
c0e17dd8
KH
1108 :coding-type 'utf-16
1109 :mnemonic ?U
1110 :charset-list '(unicode)
a44cf41b 1111 :bom t
ebc563df
DL
1112 :endian 'big
1113 :mime-charset 'utf-16-be)
c0e17dd8
KH
1114
1115(define-coding-system 'iso-2022-7bit
cb269bb1 1116 "ISO 2022 based 7-bit encoding using only G0."
c0e17dd8
KH
1117 :coding-type 'iso-2022
1118 :mnemonic ?J
1119 :charset-list 'iso-2022
1120 :designation [(ascii t) nil nil nil]
1121 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1122
1123(define-coding-system 'iso-2022-7bit-ss2
cb269bb1 1124 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1125 :coding-type 'iso-2022
1126 :mnemonic ?$
1127 :charset-list 'iso-2022
1128 :designation [(ascii 94) nil (nil 96) nil]
1129 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1130 designation single-shift composition))
1131
1132(define-coding-system 'iso-2022-7bit-lock
cb269bb1 1133 "ISO-2022 coding system using Locking-Shift for 96-charset."
c0e17dd8
KH
1134 :coding-type 'iso-2022
1135 :mnemonic ?&
1136 :charset-list 'iso-2022
1137 :designation [(ascii 94) (nil 96) nil nil]
1138 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1139 designation locking-shift composition))
4951a271 1140
2e21aa27 1141(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27 1142
c0e17dd8 1143(define-coding-system 'iso-2022-7bit-lock-ss2
cb269bb1 1144 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
c0e17dd8
KH
1145 :coding-type 'iso-2022
1146 :mnemonic ?i
1147 :charset-list '(ascii
1148 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1149 korean-ksc5601
1150 chinese-gb2312
1151 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1152 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1153 chinese-cns11643-7)
1154 :designation [(ascii 94)
1155 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1156 (nil chinese-cns11643-2)
1157 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1158 chinese-cns11643-6 chinese-cns11643-7)]
1159 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1160 single-shift init-bol))
08c19a27 1161
2e21aa27 1162(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27 1163
c0e17dd8 1164(define-coding-system 'iso-2022-8bit-ss2
cb269bb1 1165 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1166 :coding-type 'iso-2022
1167 :mnemonic ?@
1168 :charset-list 'iso-2022
1169 :designation [(ascii 94) nil (nil 96) nil]
1170 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
08c19a27 1171
c0e17dd8
KH
1172(define-coding-system 'compound-text
1173 "Compound text based generic encoding for decoding unknown messages.
73066974
EZ
1174
1175This coding system does not support ICCCM Extended Segments."
c0e17dd8
KH
1176 :coding-type 'iso-2022
1177 :mnemonic ?x
1178 :charset-list 'iso-2022
1179 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1180 :flags '(ascii-at-eol ascii-at-cntl
1181 designation locking-shift single-shift composition)
ebc563df
DL
1182 ;; Fixme: this isn't a valid MIME charset and has to be
1183 ;; special-cased elsewhere -- fx
c0e17dd8 1184 :mime-charset 'x-ctext)
d49a4835 1185
cb5be6c9
EZ
1186(define-coding-system-alias 'x-ctext 'compound-text)
1187(define-coding-system-alias 'ctext 'compound-text)
73066974 1188
cb5be6c9
EZ
1189;; Same as compound-text, but doesn't produce composition escape
1190;; sequences. Used in post-read and pre-write conversions of
1191;; compound-text-with-extensions, see mule.el. Note that this should
1192;; not have a mime-charset property, to prevent it from showing up
1193;; close to the beginning of coding systems ordered by priority.
c0e17dd8 1194(define-coding-system 'ctext-no-compositions 2 ?x
73066974
EZ
1195 "Compound text based generic encoding for decoding unknown messages.
1196
cb5be6c9 1197Like `compound-text', but does not produce escape sequences for compositions."
c0e17dd8
KH
1198 :coding-type 'iso-2022
1199 :mnemonic ?x
1200 :charset-list 'iso-2022
1201 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1202 :flags '(ascii-at-eol ascii-at-cntl
1203 designation locking-shift single-shift))
1204
1205(define-coding-system 'compound-text-with-extensions
73066974
EZ
1206 "Compound text encoding with ICCCM Extended Segment extensions.
1207
1208This coding system should be used only for X selections. It is inappropriate
1209for decoding and encoding files, process I/O, etc."
c0e17dd8
KH
1210 :coding-type 'raw-text
1211 :mnemonic ?x
1212 :post-read-conversion 'ctext-post-read-conversion
1213 :pre-write-conversion 'ctext-pre-write-conversion)
73066974 1214
cb5be6c9
EZ
1215(define-coding-system-alias
1216 'x-ctext-with-extensions 'compound-text-with-extensions)
1217(define-coding-system-alias
1218 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 1219
c0e17dd8
KH
1220(define-coding-system 'us-ascii
1221 "Convert all characters but ASCII to `?'."
1222 :coding-type 'charset
1223 :mnemonic ?-
1224 :charset-list '(ascii)
1225 :default-char ??
1226 :mime-charset 'us-ascii)
1227
1228(define-coding-system-alias 'iso-safe 'us-ascii)
f6eb8ace 1229
c0e17dd8 1230;; Use us-ascii for terminal output if some other coding system is not
e8dd0160 1231;; specified explicitly.
c0e17dd8 1232(set-safe-terminal-coding-system-internal 'us-ascii)
f6eb8ace 1233
08c19a27 1234;; The other coding-systems are defined in each language specific
c0e17dd8 1235;; files under lisp/language.
08c19a27 1236
678dc7ec
RS
1237;; Normally, set coding system to `undecided' before reading a file.
1238;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1239;; but we regard them as containing multibyte characters.
1240;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 1241
4951a271 1242(setq file-coding-system-alist
2238f751 1243 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
bdf74bef 1244 ("\\.utf\\(-8\\)?\\'" . utf-8)
3803079c
DL
1245 ;; This is the defined default for XML documents. It may be
1246 ;; overridden by a charset specification in the header. That
1247 ;; should be grokked by the auto-coding mechanism, but rms
1248 ;; vetoed that. -- fx
1249 ("\\.xml\\'" . utf-8)
3e88bb50
EZ
1250 ;; We use raw-text for reading loaddefs.el so that if it
1251 ;; happens to have DOS or Mac EOLs, they are converted to
1252 ;; newlines. This is required to make the special treatment
1253 ;; of the "\ newline" combination in loaddefs.el, which marks
1254 ;; the beginning of a doc string, work.
1255 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 1256 ("\\.tar\\'" . (no-conversion . no-conversion))
4951a271 1257 ("" . (undecided . nil))))
08c19a27
KH
1258
1259\f
1260;;; Setting coding categories and their priorities.
1261
1262;; This setting is just to read an Emacs Lisp source files which
1263;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 1264;; values are set by the command `set-language-environment' for each
08c19a27
KH
1265;; language environment.
1266
c0e17dd8
KH
1267(set-coding-system-priority
1268 'iso-latin-1
1269 'utf-8
1270 'iso-2022-7bit
1271 )
08c19a27 1272
c1b628eb
KH
1273\f
1274;;; Miscellaneous settings.
c1b628eb 1275
c0e17dd8
KH
1276;; Make all multibyte characters self-insert.
1277(set-char-table-range (nth 1 global-map)
007eef16 1278 (cons 128 (max-char))
c0e17dd8
KH
1279 'self-insert-command)
1280
1281(aset latin-extra-code-table ?\222 t)
4cb4b388 1282
cb269bb1
DL
1283;; Move least specific charsets to end of priority list
1284
1285(apply #'set-charset-priority
1286 (delq 'unicode (delq 'emacs (charset-priority-list))))
1287
6820ed3f
DL
1288;; The old code-pages library is obsoleted by coding systems based on
1289;; the charsets defined in this file but might be required by user
1290;; code.
1291(provide 'code-pages)
1292
3803079c
DL
1293;; Local variables:
1294;; no-byte-compile: t
1295;; End:
1296
08c19a27 1297;;; mule-conf.el ends here