(auto-coding-regexp-alist): Recognize
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
08c19a27
KH
1;;; mule-conf.el --- configure multilingual environment
2
08c19a27 3;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
c0e17dd8
KH
5;; Copyright (C) 2001, 2002
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H13PRO009
9fea1ee1 8;; Copyright (C) 2002 Free Software Foundation, Inc.
08c19a27 9
24adcac1 10;; Keywords: i18n, mule, multilingual, character set, coding system
08c19a27
KH
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
28
29;;; Commentary:
30
3803079c
DL
31;; This file defines the Emacs charsets and some basic coding systems.
32;; Other coding systems are defined in the files in directory
33;; lisp/language.
08c19a27
KH
34
35;;; Code:
36
155b256a
DL
37;;; Remarks
38
3803079c
DL
39;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
40;; Standards docs equivalent to iso-2022 and iso-8859 are at
41;; http://www.ecma.ch/.
42
32fefe80
DL
43;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
44;; MS Windows, which are presumably the only charsets we really need
45;; to worry about on such systems:
155b256a
DL
46;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
47;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
48;; 1258, 874, 932, 936, 949, 950
49
08c19a27
KH
50;;; Definitions of character sets.
51
24adcac1 52;; The charsets `ascii' and `unicode' are already defined in charset.c
c0e17dd8
KH
53;; as below:
54;;
55;; (define-charset 'ascii
56;; ""
57;; :dimension 1
58;; :code-space [0 127]
59;; :iso-final-char ?A
60;; :ascii-compatible-p t
61;; :emacs-mule-id 0
62;; :code-offset 0)
63;;
64;; (define-charset 'unicode
65;; ""
66;; :dimension 3
67;; :code-space [0 255 0 255 0 16]
68;; :ascii-compatible-p t
69;; :code-offset 0)
70;;
71;; We now set :docstring, :short-name, and :long-name properties.
72
73(put-charset-property
74 'ascii :docstring "ASCII (ISO646 IRV)")
75(put-charset-property
76 'ascii :short-name "ASCII")
77(put-charset-property
78 'ascii :long-name "ASCII (ISO646 IRV)")
79(put-charset-property
80 'unicode :docstring "Unicode (ISO10646)")
81(put-charset-property
82 'unicode :short-name "Unicode")
83(put-charset-property
84 'unicode :long-name "Unicode (ISO10646)")
85
86(define-charset-alias 'ucs 'unicode)
87
88(define-charset 'emacs
cb269bb1 89 "Full Emacs characters"
c0e17dd8
KH
90 :ascii-compatible-p t
91 :code-space [ 0 255 0 255 0 63 ]
92 :code-offset 0
93 :supplementary-p t)
94
95(define-charset 'iso-8859-1
24adcac1 96 "Latin-1 (ISO/IEC 8859-1)"
c0e17dd8
KH
97 :short-name "Latin-1"
98 :ascii-compatible-p t
99 :code-space [0 255]
100 :code-offset 0)
101
102(define-charset 'latin-iso8859-1
103 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
104 :short-name "RHP of Latin-1"
105 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
106 :iso-final-char ?A
107 :emacs-mule-id 129
108 :code-space [32 127]
109 :code-offset 160)
110
b714f5c2
DL
111;; Name perhaps not ideal, but is XEmacs-compatible.
112(define-charset 'control-1
c0e17dd8
KH
113 "8-bit control code (0x80..0x9F)"
114 :short-name "8-bit control code"
115 :code-space [128 159]
116 :code-offset 128)
117
b714f5c2
DL
118(define-charset 'eight-bit-control
119 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
120 :short-name "Raw bytes 0x80..0x9F"
121 :code-space [128 159]
122 :code-offset #x3FFF80) ; see character.h
123
c0e17dd8 124(define-charset 'eight-bit-graphic
b714f5c2
DL
125 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
126 :short-name "Raw bytes 0xA0..0xFF"
c0e17dd8 127 :code-space [160 255]
b714f5c2 128 :code-offset #x3FFFA0) ; see character.h
c0e17dd8
KH
129
130(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
131 iso-ir iso-final
132 emacs-mule-id map)
c0e17dd8
KH
133 `(progn
134 (define-charset ,symbol
135 ,name
136 :short-name ,nickname
137 :long-name ,name
138 :ascii-compatible-p t
139 :code-space [0 255]
140 :map ,map)
141 (if ,iso-symbol
142 (define-charset ,iso-symbol
143 (if ,iso-ir
144 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
145 ,name ,nickname ,iso-ir)
146 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
147 :short-name (format "RHP of %s" ,name)
148 :long-name (format "RHP of %s (%s)" ,name ,nickname)
149 :iso-final-char ,iso-final
24adcac1 150 :emacs-mule-id ,emacs-mule-id
c0e17dd8 151 :code-space [32 127]
fd6c8fc1 152 :subset (list ,symbol 160 255 -128)))))
c0e17dd8
KH
153
154(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
155 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
156
157(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
158 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
159
160(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
161 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
162
163(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
164 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
165
166(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
167 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
168
169(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
170 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
171
172(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
173 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
174
175(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
176 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
177
3803079c 178(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
bbe3715c 179 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
3803079c 180
bbe3715c
DL
181;; http://www.nectec.or.th/it-standards/iso8859-11/
182;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
183;; plus nbsp
184(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
185 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
186
187;; 8859-12 doesn't (yet?) exist.
3803079c 188
c0e17dd8 189(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
3803079c 190 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
c0e17dd8
KH
191
192(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
193 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
194
195(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
196 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
197
7e8b4d67 198(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
08a2119c 199 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
7e8b4d67 200
cb269bb1
DL
201;; No point in keeping it around.
202(fmakunbound 'define-iso-single-byte-charset)
203
bbe3715c 204;; Can this be shared with 8859-11?
c0e17dd8
KH
205(define-charset 'thai-tis620
206 "TIS620.2533"
207 :short-name "TIS620.2533"
208 :iso-final-char ?T
209 :emacs-mule-id 133
210 :code-space [32 127]
211 :code-offset #x0E00)
212
bbe3715c 213;; Fixme: doc for this, c.f. above
c0e17dd8
KH
214(define-charset 'tis620-2533
215 "TIS620.2533"
216 :short-name "TIS620.2533"
217 :ascii-compatible-p t
218 :code-space [0 255]
fd6c8fc1 219 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
c0e17dd8
KH
220
221(define-charset 'jisx0201
222 "JISX0201"
223 :short-name "JISX0201"
c0e17dd8
KH
224 :code-space [33 254]
225 :map "jisx0201")
226
227(define-charset 'latin-jisx0201
228 "Roman Part of JISX0201.1976"
229 :short-name "JISX0201 Roman"
230 :long-name "Japanese Roman (JISX0201.1976)"
231 :iso-final-char ?J
232 :emacs-mule-id 138
233 :code-space [33 126]
fd6c8fc1 234 :subset '(jisx0201 33 126 0))
c0e17dd8
KH
235
236(define-charset 'katakana-jisx0201
237 "Katakana Part of JISX0201.1976"
238 :short-name "JISX0201 Katakana"
239 :long-name "Japanese Katakana (JISX0201.1976)"
240 :iso-final-char ?I
241 :emacs-mule-id 137
242 :code-space [33 126]
fd6c8fc1 243 :subset '(jisx0201 161 254 -128))
c0e17dd8
KH
244
245(define-charset 'chinese-gb2312
246 "GB2312 Chinese simplified: ISO-IR-58"
247 :short-name "GB2312"
248 :long-name "GB2312: ISO-IR-58"
249 :iso-final-char ?A
250 :emacs-mule-id 145
251 :code-space [33 126 33 126]
252 :code-offset #x110000
253 :unify-map "gb2312-1980")
254
7c9e1024 255(define-charset 'chinese-gbk
24adcac1 256 "GBK Chinese simplified."
7c9e1024 257 :short-name "GBK"
7c9e1024 258 :code-space [#x40 #xFE #x81 #xFE]
64762f77 259 :code-offset #x160000
7c9e1024 260 :unify-map "gbk")
155b256a 261(define-charset-alias 'cp936 'chinese-gbk)
32fefe80 262(define-charset-alias 'windows-936 'chinese-gbk)
7c9e1024 263
c0e17dd8
KH
264(define-charset 'chinese-cns11643-1
265 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
266 :short-name "CNS11643-1"
267 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
268 :iso-final-char ?G
269 :emacs-mule-id 149
270 :code-space [33 126 33 126]
271 :code-offset #x114000
272 :unify-map "cns11643-1")
273
274(define-charset 'chinese-cns11643-2
275 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
276 :short-name "CNS11643-2"
277 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
278 :iso-final-char ?H
279 :emacs-mule-id 150
280 :code-space [33 126 33 126]
281 :code-offset #x118000
282 :unify-map "cns11643-2")
283
284(define-charset 'chinese-cns11643-3
285 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
286 :short-name "CNS11643-3"
287 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
288 :iso-final-char ?I
289 :code-space [33 126 33 126]
290 :emacs-mule-id 246
291 :code-offset #x11C000)
292
293(define-charset 'chinese-cns11643-4
294 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
295 :short-name "CNS11643-4"
296 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
297 :iso-final-char ?J
298 :emacs-mule-id 247
299 :code-space [33 126 33 126]
300 :code-offset #x120000)
301
302(define-charset 'chinese-cns11643-5
303 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
304 :short-name "CNS11643-5"
305 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
306 :iso-final-char ?K
307 :emacs-mule-id 248
308 :code-space [33 126 33 126]
309 :code-offset #x124000)
310
311(define-charset 'chinese-cns11643-6
312 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
313 :short-name "CNS11643-6"
314 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
315 :iso-final-char ?L
316 :emacs-mule-id 249
317 :code-space [33 126 33 126]
318 :code-offset #x128000)
319
320(define-charset 'chinese-cns11643-7
321 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
322 :short-name "CNS11643-7"
323 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
324 :iso-final-char ?M
325 :emacs-mule-id 250
326 :code-space [33 126 33 126]
327 :code-offset #x12C000)
328
329(define-charset 'big5
330 "Big5 (Chinese traditional)"
331 :short-name "Big5"
c0e17dd8
KH
332 :code-space [#x40 #xFE #xA1 #xFE]
333 :code-offset #x130000
334 :unify-map "big5")
155b256a
DL
335;; Fixme: AKA cp950 according to
336;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
337;; that correct?
c0e17dd8
KH
338
339(define-charset 'chinese-big5-1
24adcac1 340 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
c0e17dd8
KH
341 :short-name "Big5 (Level-1)"
342 :long-name "Big5 (Level-1) A141-C67F"
343 :iso-final-char ?0
344 :emacs-mule-id 152
345 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 346 :code-offset #x135000
c0e17dd8
KH
347 :unify-map "big5-1")
348
349(define-charset 'chinese-big5-2
24adcac1 350 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
c0e17dd8
KH
351 :short-name "Big5 (Level-2)"
352 :long-name "Big5 (Level-2) C940-FEFE"
353 :iso-final-char ?1
354 :emacs-mule-id 153
355 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 356 :code-offset #x137800
c0e17dd8
KH
357 :unify-map "big5-2")
358
359(define-charset 'japanese-jisx0208
360 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
361 :short-name "JISX0208"
362 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
363 :iso-final-char ?B
364 :emacs-mule-id 146
365 :code-space [33 126 33 126]
366 :code-offset #x140000
367 :unify-map "jisx0208-1990")
368
369(define-charset 'japanese-jisx0208-1978
370 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
371 :short-name "JISX0208.1978"
372 :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
373 :iso-final-char ?@
374 :emacs-mule-id 144
375 :code-space [33 126 33 126]
376 :code-offset #x144000
377 :unify-map "jisx0208-1978")
378
379(define-charset 'japanese-jisx0212
380 "JISX0212 Japanese supplement: ISO-IR-159"
381 :short-name "JISX0212"
382 :long-name "JISX0212 (Japanese): ISO-IR-159"
383 :iso-final-char ?D
384 :emacs-mule-id 148
385 :code-space [33 126 33 126]
386 :code-offset #x148000
387 :unify-map "jisx0212-1990")
388
389(define-charset 'japanese-jisx0213-1
390 "JISX0213 Plane 1 (Japanese)"
391 :short-name "JISX0213-1"
c0e17dd8
KH
392 :iso-final-char ?O
393 :emacs-mule-id 151
3adbd46f 394 :unify-map "jisx0213-1"
c0e17dd8
KH
395 :code-space [33 126 33 126]
396 :code-offset #x14C000)
397
398(define-charset 'japanese-jisx0213-2
399 "JISX0213 Plane 2 (Japanese)"
400 :short-name "JISX0213-2"
c0e17dd8
KH
401 :iso-final-char ?P
402 :emacs-mule-id 254
3adbd46f 403 :unify-map "jisx0213-2"
c0e17dd8
KH
404 :code-space [33 126 33 126]
405 :code-offset #x150000)
406
407(define-charset 'korean-ksc5601
408 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
409 :short-name "KSC5601"
410 :long-name "KSC5601 (Korean): ISO-IR-149"
411 :iso-final-char ?C
412 :emacs-mule-id 147
413 :code-space [33 126 33 126]
b714f5c2
DL
414 :code-offset #x279f94
415 :unify-map "ksc5601-1987")
c0e17dd8 416
155b256a
DL
417;; Fixme: Korean cp949/UHC
418
c0e17dd8
KH
419(define-charset 'chinese-sisheng
420 "SiSheng characters for PinYin/ZhuYin"
421 :short-name "SiSheng"
422 :long-name "SiSheng (PinYin/ZhuYin)"
423 :iso-final-char ?0
424 :emacs-mule-id 160
425 :code-space [33 126]
c4e44241 426 :unify-map "sisheng"
c0e17dd8
KH
427 :code-offset #x200000)
428
b714f5c2
DL
429;; A subset of the 1989 version of IPA. It consists of the consonant
430;; signs used in English, French, German and Italian, and all vowels
431;; signs in the table. [says old MULE doc]
c0e17dd8
KH
432(define-charset 'ipa
433 "IPA (International Phonetic Association)"
434 :short-name "IPA"
c0e17dd8
KH
435 :iso-final-char ?0
436 :emacs-mule-id 161
c4e44241 437 :unify-map "ipa"
c0e17dd8
KH
438 :code-space [32 127]
439 :code-offset #x200080)
440
441(define-charset 'viscii
442 "VISCII1.1"
443 :short-name "VISCII"
444 :long-name "VISCII 1.1"
445 :code-space [0 255]
446 :map "viscii")
447
448(define-charset 'vietnamese-viscii-lower
449 "VISCII1.1 lower-case"
450 :short-name "VISCII lower"
451 :long-name "VISCII lower-case"
452 :iso-final-char ?1
453 :emacs-mule-id 162
454 :code-space [32 127]
65076506
KH
455 :code-offset #x200200
456 :unify-map "viscii-lower")
c0e17dd8
KH
457
458(define-charset 'vietnamese-viscii-upper
459 "VISCII1.1 upper-case"
460 :short-name "VISCII upper"
461 :long-name "VISCII upper-case"
462 :iso-final-char ?2
463 :emacs-mule-id 163
464 :code-space [32 127]
65076506
KH
465 :code-offset #x200280
466 :unify-map "viscii-upper")
c0e17dd8
KH
467
468(define-charset 'vscii
469 "VSCII1.1"
470 :short-name "VSCII"
c0e17dd8
KH
471 :code-space [0 255]
472 :map "vscii")
473
69862ba6
DL
474;; Fixme: see note in tcvn.map about combining characters
475(define-charset 'tcvn-5712
476 "TCVN-5712"
477 :code-space [0 255]
478 :map "tcvn")
479
c0e17dd8
KH
480(define-charset 'koi8-r
481 "KOI8-R"
482 :short-name "KOI8-R"
c0e17dd8
KH
483 :ascii-compatible-p t
484 :code-space [0 255]
485 :map "koi8-r")
486
487(define-charset-alias 'koi8 'koi8-r)
488
489(define-charset 'alternativnyj
490 "ALTERNATIVNYJ"
491 :short-name "alternativnyj"
c0e17dd8
KH
492 :ascii-compatible-p t
493 :code-space [0 255]
494 :map "ibm866")
bbe3715c
DL
495;; Fixme: http://czyborra.com/charsets/cyrillic.html says the
496;; following, but the iconv map for cp866 isn't the same as his chart
497;; for alternativnyj. I can't find anything that looks like an
498;; official definition of alternativnyj.
007eef16 499(define-charset-alias 'cp866 'alternativnyj)
08a2119c 500(define-charset-alias 'ibm866 'alternativnyj)
08c19a27 501
6ef462e0
DL
502(define-charset 'koi8-u
503 "KOI8-U"
504 :short-name "KOI8-U"
6ef462e0
DL
505 :ascii-compatible-p t
506 :code-space [0 255]
507 :map "koi8-u")
508
509(define-charset 'koi8-t
510 "KOI8-T"
511 :short-name "KOI8-T"
6ef462e0
DL
512 :ascii-compatible-p t
513 :code-space [0 255]
514 :map "koi8-t")
515
516(define-charset 'georgian-ps
517 "GEORGIAN-PS"
518 :short-name "GEORGIAN-PS"
6ef462e0
DL
519 :ascii-compatible-p t
520 :code-space [0 255]
521 :map "georgian-ps")
522
523(define-charset 'windows-1250
9fea1ee1 524 "WINDOWS-1250 (Central Europe)"
6ef462e0 525 :short-name "WINDOWS-1250"
6ef462e0
DL
526 :ascii-compatible-p t
527 :code-space [0 255]
528 :map "windows-1250")
529(define-charset-alias 'cp1250 'windows-1250)
530
531(define-charset 'windows-1251
155b256a 532 "WINDOWS-1251 (Cyrillic)"
6ef462e0 533 :short-name "WINDOWS-1251"
6ef462e0
DL
534 :ascii-compatible-p t
535 :code-space [0 255]
536 :map "windows-1251")
537(define-charset-alias 'cp1251 'windows-1251)
538
539(define-charset 'windows-1252
155b256a 540 "WINDOWS-1252 (Latin I)"
6ef462e0 541 :short-name "WINDOWS-1252"
6ef462e0
DL
542 :ascii-compatible-p t
543 :code-space [0 255]
544 :map "windows-1252")
545(define-charset-alias 'cp1252 'windows-1252)
546
9fea1ee1 547(define-charset 'windows-1253
155b256a 548 "WINDOWS-1253 (Greek)"
9fea1ee1 549 :short-name "WINDOWS-1253"
9fea1ee1
DL
550 :ascii-compatible-p t
551 :code-space [0 255]
552 :map "windows-1253")
553(define-charset-alias 'cp1253 'windows-1253)
554
555(define-charset 'windows-1254
155b256a 556 "WINDOWS-1254 (Turkish)"
9fea1ee1 557 :short-name "WINDOWS-1254"
9fea1ee1
DL
558 :ascii-compatible-p t
559 :code-space [0 255]
560 :map "windows-1254")
561(define-charset-alias 'cp1254 'windows-1254)
562
563(define-charset 'windows-1255
564 "WINDOWS-1255 (Hebrew)"
565 :short-name "WINDOWS-1255"
9fea1ee1
DL
566 :ascii-compatible-p t
567 :code-space [0 255]
568 :map "windows-1255")
569(define-charset-alias 'cp1255 'windows-1255)
570
571(define-charset 'windows-1256
572 "WINDOWS-1256 (Arabic)"
573 :short-name "WINDOWS-1256"
9fea1ee1
DL
574 :ascii-compatible-p t
575 :code-space [0 255]
576 :map "windows-1256")
577(define-charset-alias 'cp1256 'windows-1256)
578
579(define-charset 'windows-1257
580 "WINDOWS-1257 (Baltic)"
581 :short-name "WINDOWS-1257"
9fea1ee1
DL
582 :ascii-compatible-p t
583 :code-space [0 255]
584 :map "windows-1257")
585(define-charset-alias 'cp1257 'windows-1257)
586
587(define-charset 'windows-1258
155b256a 588 "WINDOWS-1258 (Viet Nam)"
9fea1ee1 589 :short-name "WINDOWS-1258"
9fea1ee1
DL
590 :ascii-compatible-p t
591 :code-space [0 255]
592 :map "windows-1258")
593(define-charset-alias 'cp1258 'windows-1258)
594
595(define-charset 'next
596 "NEXT"
597 :short-name "NEXT"
9fea1ee1
DL
598 :ascii-compatible-p t
599 :code-space [0 255]
600 :map "next")
601
6ef462e0
DL
602(define-charset 'cp1125
603 "CP1125"
604 :short-name "CP1125"
6ef462e0
DL
605 :code-space [0 255]
606 :map "cp1125")
607(define-charset-alias 'ruscii 'cp1125)
608;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
609(define-charset-alias 'cp866u 'cp1125)
610
bbe3715c
DL
611;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
612;; shows this as not ASCII comptaible, with various graphics in
613;; 0x01-0x1F.
007eef16 614(define-charset 'cp437
bbe3715c 615 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
007eef16 616 :short-name "CP437"
007eef16
DL
617 :code-space [0 255]
618 :ascii-compatible-p t
619 :map "cp437")
620
155b256a
DL
621(define-charset 'cp720
622 "CP720 (Arabic)"
623 :short-name "CP720"
624 :code-space [0 255]
625 :ascii-compatible-p t
626 :map "cp720")
627
007eef16 628(define-charset 'cp737
bbe3715c 629 "CP737 (PC Greek)"
007eef16 630 :short-name "CP737"
007eef16
DL
631 :code-space [0 255]
632 :ascii-compatible-p t
633 :map "cp737")
634
635(define-charset 'cp775
bbe3715c 636 "CP775 (PC Baltic)"
007eef16 637 :short-name "CP775"
007eef16
DL
638 :code-space [0 255]
639 :ascii-compatible-p t
640 :map "cp775")
641
642(define-charset 'cp851
643 "CP851"
644 :short-name "CP851"
007eef16
DL
645 :code-space [0 255]
646 :ascii-compatible-p t
647 :map "cp851")
648
649(define-charset 'cp852
bbe3715c 650 "CP852 (MS-DOS Latin-2)"
007eef16 651 :short-name "CP852"
007eef16
DL
652 :code-space [0 255]
653 :ascii-compatible-p t
654 :map "cp852")
655
656(define-charset 'cp855
bbe3715c 657 "CP855 (IBM Cyrillic)"
007eef16 658 :short-name "CP855"
007eef16
DL
659 :code-space [0 255]
660 :ascii-compatible-p t
661 :map "cp855")
662
663(define-charset 'cp857
bbe3715c 664 "CP857 (IBM Turkish)"
007eef16 665 :short-name "CP857"
007eef16
DL
666 :code-space [0 255]
667 :ascii-compatible-p t
668 :map "cp857")
669
155b256a
DL
670(define-charset 'cp858
671 "CP858 (Multilingual Latin I + Euro)"
672 :short-name "CP858"
673 :code-space [0 255]
674 :ascii-compatible-p t
675 :map "cp858")
bbe3715c 676(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
155b256a 677
007eef16 678(define-charset 'cp860
bbe3715c 679 "CP860 (MS-DOS Portuguese)"
007eef16 680 :short-name "CP860"
007eef16
DL
681 :code-space [0 255]
682 :ascii-compatible-p t
683 :map "cp860")
684
685(define-charset 'cp861
bbe3715c 686 "CP861 (MS-DOS Icelandic)"
007eef16 687 :short-name "CP861"
007eef16
DL
688 :code-space [0 255]
689 :ascii-compatible-p t
690 :map "cp861")
691
692(define-charset 'cp862
bbe3715c 693 "CP862 (PC Hebrew)"
007eef16 694 :short-name "CP862"
007eef16
DL
695 :code-space [0 255]
696 :ascii-compatible-p t
697 :map "cp862")
698
699(define-charset 'cp863
bbe3715c 700 "CP863 (MS-DOS Canadian French)"
007eef16 701 :short-name "CP863"
007eef16
DL
702 :code-space [0 255]
703 :ascii-compatible-p t
704 :map "cp863")
705
706(define-charset 'cp864
bbe3715c 707 "CP864 (PC Arabic)"
007eef16 708 :short-name "CP864"
007eef16
DL
709 :code-space [0 255]
710 :ascii-compatible-p t
711 :map "cp864")
712
713(define-charset 'cp865
bbe3715c 714 "CP865 (MS-DOS Nordic)"
007eef16 715 :short-name "CP865"
007eef16
DL
716 :code-space [0 255]
717 :ascii-compatible-p t
718 :map "cp865")
719
720(define-charset 'cp869
bbe3715c 721 "CP869 (IBM Modern Greek)"
007eef16 722 :short-name "CP869"
007eef16
DL
723 :code-space [0 255]
724 :ascii-compatible-p t
725 :map "cp869")
726
727(define-charset 'cp874
bbe3715c 728 "CP874 (IBM Thai)"
007eef16 729 :short-name "CP874"
007eef16
DL
730 :code-space [0 255]
731 :ascii-compatible-p t
732 :map "cp874")
733
08c19a27
KH
734;; For Arabic, we need three different types of character sets.
735;; Digits are of direction left-to-right and of width 1-column.
736;; Others are of direction right-to-left and of width 1-column or
737;; 2-column.
c0e17dd8
KH
738(define-charset 'arabic-digit
739 "Arabic digit"
740 :short-name "Arabic digit"
c0e17dd8
KH
741 :iso-final-char ?2
742 :emacs-mule-id 164
743 :code-space [34 42]
744 :code-offset #x0600)
745
746(define-charset 'arabic-1-column
747 "Arabic 1-column"
748 :short-name "Arabic 1-col"
749 :long-name "Arabic 1-column"
750 :iso-final-char ?3
751 :emacs-mule-id 165
752 :code-space [33 126]
753 :code-offset #x200100)
754
755(define-charset 'arabic-2-column
756 "Arabic 2-column"
757 :short-name "Arabic 2-col"
758 :long-name "Arabic 2-column"
759 :iso-final-char ?4
760 :emacs-mule-id 224
761 :code-space [33 126]
762 :code-offset #x200180)
7153b1f1
KH
763
764;; Lao script.
c0e17dd8
KH
765;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
766(define-charset 'lao
767 "Lao characters (ISO10646 0E81..0EDF)"
768 :short-name "Lao"
c0e17dd8
KH
769 :iso-final-char ?1
770 :emacs-mule-id 167
771 :code-space [33 126]
772 :code-offset #x0E81)
773
774(define-charset 'mule-lao
775 "Lao characters (ISO10646 0E81..0EDF)"
776 :short-name "Lao"
c0e17dd8 777 :code-space [0 255]
fd6c8fc1 778 :superset '(ascii eight-bit-control (lao . 128)))
7153b1f1 779
08c19a27 780
7153b1f1
KH
781;; Indian scripts. Symbolic charset for data exchange. Glyphs are
782;; not assigned. They are automatically converted to each Indian
783;; script which IS-13194 supports.
784
c0e17dd8
KH
785(define-charset 'indian-is13194
786 "Generic Indian charset for data exchange with IS 13194"
787 :short-name "IS 13194"
788 :long-name "Indian IS 13194"
789 :iso-final-char ?5
790 :emacs-mule-id 225
791 :code-space [33 126]
c4e44241 792 :unify-map "is13194"
c0e17dd8
KH
793 :code-offset #x180000)
794
795(define-charset 'indian-glyph
796 "Glyphs for Indian characters."
797 :short-name "Indian glyph"
c0e17dd8
KH
798 :iso-final-char ?4
799 :emacs-mule-id 240
800 :code-space [32 127 32 127]
801 :code-offset #x180100)
7153b1f1
KH
802
803;; Actual Glyph for 1-column width.
c0e17dd8
KH
804(define-charset 'indian-1-column
805 "Indian charset for 1-column width glyphs"
806 :short-name "Indian 1-col"
807 :long-name "Indian 1 Column"
808 :iso-final-char ?6
809 :emacs-mule-id 240
810 :code-space [33 126 33 126]
811 :code-offset #x184000)
08c19a27 812
08c19a27 813;; Actual Glyph for 2-column width.
c0e17dd8
KH
814(define-charset 'indian-2-column
815 "Indian charset for 2-column width glyphs"
816 :short-name "Indian 2-col"
817 :long-name "Indian 2 Column"
818 :iso-final-char ?5
819 :emacs-mule-id 251
820 :code-space [33 126 33 126]
fd6c8fc1 821 :superset '(indian-1-column))
c0e17dd8
KH
822
823(define-charset 'tibetan
824 "Tibetan characters"
825 :iso-final-char ?7
826 :short-name "Tibetan 2-col"
24adcac1 827 :long-name "Tibetan 2 column"
c0e17dd8
KH
828 :iso-final-char ?7
829 :emacs-mule-id 252
c4e44241 830 :unify-map "tibetan"
c0e17dd8
KH
831 :code-space [33 126 33 126]
832 :code-offset #x190000)
833
834(define-charset 'tibetan-1-column
835 "Tibetan 1 column glyph"
836 :short-name "Tibetan 1-col"
837 :long-name "Tibetan 1 column"
838 :iso-final-char ?8
839 :emacs-mule-id 241
840 :code-space [33 126 33 37]
fd6c8fc1 841 :superset '(tibetan))
08c19a27 842
c0e17dd8
KH
843;; Subsets of Unicode.
844(define-charset 'mule-unicode-2500-33ff
845 "Unicode characters of the range U+2500..U+33FF."
846 :short-name "Unicode subset 2"
847 :long-name "Unicode subset (U+2500..U+33FF)"
848 :iso-final-char ?2
849 :emacs-mule-id 242
850 :code-space [#x20 #x7f #x20 #x47]
851 :code-offset #x2500)
852
853(define-charset 'mule-unicode-e000-ffff
854 "Unicode characters of the range U+E000..U+FFFF."
855 :short-name "Unicode subset 3"
856 :long-name "Unicode subset (U+E000+FFFF)"
857 :iso-final-char ?3
858 :emacs-mule-id 243
859 :code-space [#x20 #x7F #x20 #x75]
860 :code-offset #xE000)
861
862(define-charset 'mule-unicode-0100-24ff
863 "Unicode characters of the range U+0100..U+24FF."
864 :short-name "Unicode subset"
865 :long-name "Unicode subset (U+0100..U+24FF)"
866 :iso-final-char ?1
867 :emacs-mule-id 244
868 :code-space [#x20 #x7F #x20 #x7F]
869 :code-offset #x100)
870
871(define-charset 'ethiopic
24adcac1 872 "Ethiopic characters for Amharic and Tigrigna."
c0e17dd8
KH
873 :short-name "Ethiopic"
874 :long-name "Ethiopic characters"
875 :iso-final-char ?3
876 :emacs-mule-id 245
c4e44241 877 :unify-map "ethiopic"
c0e17dd8
KH
878 :code-space [33 126 33 126]
879 :code-offset #x1A0000)
880
881(define-charset 'mac-roman
882 "Mac Roman charset"
883 :short-name "Mac Roman"
c0e17dd8
KH
884 :ascii-compatible-p t
885 :code-space [0 255]
886 :map "mac-roman")
887
6ef462e0
DL
888;; Fixme: modern EBCDIC variants, e.g. IBM00924?
889(define-charset 'ebcdic-us
890 "US version of EBCDIC"
891 :short-name "EBCDIC-US"
6ef462e0
DL
892 :code-space [0 255]
893 :mime-charset 'ebcdic-us
894 :map "ebcdic-us")
895
896(define-charset 'ebcdic-uk
897 "UK version of EBCDIC"
898 :short-name "EBCDIC-UK"
6ef462e0
DL
899 :code-space [0 255]
900 :mime-charset 'ebcdic-uk
901 :map "ebcdic-uk")
902
3803079c
DL
903(define-charset 'hp-roman8
904 "Encoding used by Hewlet-Packard printer software"
905 :short-name "HP-ROMAN8"
3803079c
DL
906 :ascii-compatible-p t
907 :code-space [0 255]
908 :map "hp-roman8")
909
bbe3715c
DL
910;; To make a coding system with this, a pre-write-conversion should
911;; account for the commented-out multi-valued code points in
912;; stdenc.map.
3803079c
DL
913(define-charset 'adobe-standard-encoding
914 "Adobe `standard encoding' used in PostScript"
915 :short-name "ADOBE-STANDARD-ENCODING"
6584daf1 916 :code-space [#x20 255]
3803079c
DL
917 :map "stdenc")
918
919(define-charset 'symbol
920 "Adobe symbol encoding used in PostScript"
921 :short-name "ADOBE-SYMBOL"
6584daf1 922 :code-space [#x20 255]
3803079c
DL
923 :map "symbol")
924
925(define-charset 'ibm850
bbe3715c 926 "DOS codepage 850 (Latin-1)"
3803079c 927 :short-name "IBM850"
6584daf1 928 :ascii-compatible-p t
3803079c
DL
929 :code-space [0 255]
930 :map "ibm850")
931(define-charset-alias 'cp850 'ibm850)
932
64762f77
KH
933(define-charset 'gb18030-2-byte
934 "GB18030 2-byte (0x814E..0xFEFE)"
935 :code-space [#x40 #xFE #x81 #xFE]
936 :supplementary-p t
937 :map "gb18030-2")
938
939(define-charset 'gb18030-4-byte-bmp
940 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
941 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
942 :supplementary-p t
943 :map "gb18030-4")
944
945(define-charset 'gb18030-4-byte-smp
946 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
947 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
948 :min-code '(#x9030 . #x8130)
949 :max-code '(#xE332 . #x9A35)
950 :supplementary-p t
951 :code-offset #x10000)
952
953(define-charset 'gb18030-4-byte-ext-1
954 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
955 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
956 :min-code '(#x8431 . #xA530)
957 :max-code '(#x8F39 . #xFE39)
958 :supplementary-p t
959 :code-offset #x200000 ; ... #x22484B
960 )
961
962(define-charset 'gb18030-4-byte-ext-2
963 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
964 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
965 :min-code '(#xE332 . #x9A36)
966 :max-code '(#xFE39 . #xFE39)
967 :supplementary-p t
968 :code-offset #X22484C ; ... #x279f93
969 )
970
971(define-charset 'gb18030
972 "GB18030"
973 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
974 :min-code 0
975 :max-code '(#xFE39 . #xFE39)
fd6c8fc1
KH
976 :superset '(ascii gb18030-2-byte
977 gb18030-4-byte-bmp gb18030-4-byte-smp
978 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
64762f77 979
c0e17dd8 980(unify-charset 'chinese-gb2312)
7c9e1024 981(unify-charset 'chinese-gbk)
c0e17dd8
KH
982(unify-charset 'chinese-cns11643-1)
983(unify-charset 'chinese-cns11643-2)
984(unify-charset 'big5)
985(unify-charset 'chinese-big5-1)
986(unify-charset 'chinese-big5-2)
65076506
KH
987(unify-charset 'vietnamese-viscii-lower)
988(unify-charset 'vietnamese-viscii-upper)
3adbd46f
DL
989;; Fixme: unifying sucks in the charset tables, which may be large.
990;; (Can we avoid that and do it anyhow?)
991(unify-charset 'chinese-sisheng)
992(unify-charset 'indian-is13194)
c4e44241 993;; (unify-charset 'ipa)
c4e44241
DL
994;; (unify-charset 'tibetan)
995;; (unify-charset 'ethiopic)
996;; (unify-charset 'japanese-jisx0208-1978)
997;; (unify-charset 'japanese-jisx0208)
998;; (unify-charset 'japanese-jisx0212)
3adbd46f
DL
999;; (unify-charset 'japanese-jisx0213-1)
1000;; (unify-charset 'japanese-jisx0213-2)
65076506 1001
85f789f7 1002\f
d2a1ee18
KH
1003;; These are tables for translating characters on decoding and
1004;; encoding.
3adbd46f 1005;; Fixme: these aren't used now -- should they be?
c0e17dd8 1006(setq standard-translation-table-for-decode nil)
08c19a27 1007
f967223b 1008(setq standard-translation-table-for-encode nil)
08c19a27 1009
3adbd46f
DL
1010;; Fixme: should this be retained? I guess it could be useful for
1011;; non-unified charsets.
bdf74bef
DL
1012(defvar translation-table-for-input nil
1013 "If non-nil, a char table used to translate characters from input methods.
1014\(Currently only used by Quail.)")
08c19a27
KH
1015\f
1016;;; Make fundamental coding systems.
1017
c0e17dd8
KH
1018;; The coding system `no-conversion' is already defined in coding.c as
1019;; below:
1020;;
1021;; (define-coding-system 'no-conversion
1022;; "Do no conversion."
1023;; :coding-type 'raw-text
1024;; :mnemonic ?=)
08c19a27 1025
cb269bb1
DL
1026(define-coding-system-alias 'binary 'no-conversion)
1027
c0e17dd8
KH
1028(define-coding-system 'raw-text
1029 "Raw text, which means text contains random 8-bit codes.
1030Encoding text with this coding system produces the actual byte
1031sequence of the text in buffers and strings. An exception is made for
1032eight-bit-control characters. Each of them is encoded into a single
1033byte.
bc6a0946
KH
1034
1035When you visit a file with this coding, the file is read into a
c0e17dd8
KH
1036unibyte buffer as is (except for EOL format), thus each byte of a file
1037is treated as a character."
1038 :coding-type 'raw-text
1039 :mnemonic ?t)
1040
1041(define-coding-system 'undecided
cb269bb1 1042 "No conversion on encoding, automatic conversion on decoding."
c0e17dd8
KH
1043 :coding-type 'undecided
1044 :mnemonic ?-
1045 :charset-list '(ascii))
08c19a27 1046
8d969bf6 1047(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
1048(define-coding-system-alias 'dos 'undecided-dos)
1049(define-coding-system-alias 'mac 'undecided-mac)
1050
c0e17dd8
KH
1051(define-coding-system 'iso-latin-1
1052 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1053 :coding-type 'iso-2022
1054 :mnemonic ?1
1055 :charset-list '(ascii latin-iso8859-1)
1056 :designation [ascii latin-iso8859-1 nil nil]
1057 :mime-charset 'iso-8859-1)
bc6a0946 1058
c0e17dd8
KH
1059(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1060(define-coding-system-alias 'latin-1 'iso-latin-1)
08c19a27 1061
c0e17dd8 1062;; Coding systems not specific to each language environment.
bc6a0946 1063
c0e17dd8
KH
1064(define-coding-system 'emacs-mule
1065 "Emacs 21 internal format used in buffer and string."
1066 :coding-type 'emacs-mule
7e8b4d67 1067 :charset-list 'emacs-mule
c0e17dd8
KH
1068 :mnemonic ?M)
1069
1070(define-coding-system 'utf-8
1071 "UTF-8."
1072 :coding-type 'utf-8
1073 :mnemonic ?U
1074 :charset-list '(unicode))
1075
1076(define-coding-system-alias 'mule-utf-8 'utf-8)
1077
1078(define-coding-system 'utf-8-emacs
26dbea99 1079 "Support for all Emacs characters (including non-Unicode characters)."
c0e17dd8
KH
1080 :coding-type 'utf-8
1081 :mnemonic ?U
ebc563df
DL
1082 :charset-list '(emacs)
1083 :mime-charset 'utf-8)
c0e17dd8
KH
1084
1085(define-coding-system 'utf-16
1086 "UTF-16"
1087 :coding-type 'utf-16
1088 :mnemonic ?U
ebc563df
DL
1089 :charset-list '(unicode)
1090 :mime-charset 'utf-16)
c0e17dd8
KH
1091
1092(define-coding-system 'utf-16-le-nosig
cb269bb1 1093 "UTF-16, little endian, no signature."
c0e17dd8
KH
1094 :coding-type 'utf-16
1095 :mnemonic ?U
1096 :charset-list '(unicode)
1097 :endian 'little)
1098
1099(define-coding-system 'utf-16-be-nosig
cb269bb1 1100 "UTF-16, big endian, no signature."
c0e17dd8
KH
1101 :coding-type 'utf-16
1102 :mnemonic ?U
1103 :charset-list '(unicode)
1104 :endian 'big)
1105
1106(define-coding-system 'utf-16-le
cb269bb1 1107 "UTF-16, little endian, with signature."
c0e17dd8
KH
1108 :coding-type 'utf-16
1109 :mnemonic ?U
1110 :charset-list '(unicode)
a44cf41b 1111 :bom t
ebc563df
DL
1112 :endian 'little
1113 :mime-charset 'utf-16-le)
c0e17dd8
KH
1114
1115(define-coding-system 'utf-16-be
cb269bb1 1116 "UTF-16, big endian, with signature."
c0e17dd8
KH
1117 :coding-type 'utf-16
1118 :mnemonic ?U
1119 :charset-list '(unicode)
a44cf41b 1120 :bom t
ebc563df
DL
1121 :endian 'big
1122 :mime-charset 'utf-16-be)
c0e17dd8
KH
1123
1124(define-coding-system 'iso-2022-7bit
cb269bb1 1125 "ISO 2022 based 7-bit encoding using only G0."
c0e17dd8
KH
1126 :coding-type 'iso-2022
1127 :mnemonic ?J
1128 :charset-list 'iso-2022
1129 :designation [(ascii t) nil nil nil]
1130 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1131
1132(define-coding-system 'iso-2022-7bit-ss2
cb269bb1 1133 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1134 :coding-type 'iso-2022
1135 :mnemonic ?$
1136 :charset-list 'iso-2022
1137 :designation [(ascii 94) nil (nil 96) nil]
1138 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1139 designation single-shift composition))
1140
1141(define-coding-system 'iso-2022-7bit-lock
cb269bb1 1142 "ISO-2022 coding system using Locking-Shift for 96-charset."
c0e17dd8
KH
1143 :coding-type 'iso-2022
1144 :mnemonic ?&
1145 :charset-list 'iso-2022
1146 :designation [(ascii 94) (nil 96) nil nil]
1147 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1148 designation locking-shift composition))
4951a271 1149
2e21aa27 1150(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27 1151
c0e17dd8 1152(define-coding-system 'iso-2022-7bit-lock-ss2
cb269bb1 1153 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
c0e17dd8
KH
1154 :coding-type 'iso-2022
1155 :mnemonic ?i
1156 :charset-list '(ascii
1157 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1158 korean-ksc5601
1159 chinese-gb2312
1160 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1161 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1162 chinese-cns11643-7)
1163 :designation [(ascii 94)
1164 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1165 (nil chinese-cns11643-2)
1166 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1167 chinese-cns11643-6 chinese-cns11643-7)]
1168 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1169 single-shift init-bol))
08c19a27 1170
2e21aa27 1171(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27 1172
c0e17dd8 1173(define-coding-system 'iso-2022-8bit-ss2
cb269bb1 1174 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1175 :coding-type 'iso-2022
1176 :mnemonic ?@
1177 :charset-list 'iso-2022
1178 :designation [(ascii 94) nil (nil 96) nil]
1179 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
08c19a27 1180
c0e17dd8
KH
1181(define-coding-system 'compound-text
1182 "Compound text based generic encoding for decoding unknown messages.
73066974
EZ
1183
1184This coding system does not support ICCCM Extended Segments."
c0e17dd8
KH
1185 :coding-type 'iso-2022
1186 :mnemonic ?x
1187 :charset-list 'iso-2022
1188 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1189 :flags '(ascii-at-eol ascii-at-cntl
1190 designation locking-shift single-shift composition)
ebc563df
DL
1191 ;; Fixme: this isn't a valid MIME charset and has to be
1192 ;; special-cased elsewhere -- fx
c0e17dd8 1193 :mime-charset 'x-ctext)
d49a4835 1194
cb5be6c9
EZ
1195(define-coding-system-alias 'x-ctext 'compound-text)
1196(define-coding-system-alias 'ctext 'compound-text)
73066974 1197
cb5be6c9
EZ
1198;; Same as compound-text, but doesn't produce composition escape
1199;; sequences. Used in post-read and pre-write conversions of
1200;; compound-text-with-extensions, see mule.el. Note that this should
1201;; not have a mime-charset property, to prevent it from showing up
1202;; close to the beginning of coding systems ordered by priority.
c0e17dd8 1203(define-coding-system 'ctext-no-compositions 2 ?x
73066974
EZ
1204 "Compound text based generic encoding for decoding unknown messages.
1205
cb5be6c9 1206Like `compound-text', but does not produce escape sequences for compositions."
c0e17dd8
KH
1207 :coding-type 'iso-2022
1208 :mnemonic ?x
1209 :charset-list 'iso-2022
1210 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1211 :flags '(ascii-at-eol ascii-at-cntl
1212 designation locking-shift single-shift))
1213
1214(define-coding-system 'compound-text-with-extensions
73066974
EZ
1215 "Compound text encoding with ICCCM Extended Segment extensions.
1216
1217This coding system should be used only for X selections. It is inappropriate
1218for decoding and encoding files, process I/O, etc."
c0e17dd8
KH
1219 :coding-type 'raw-text
1220 :mnemonic ?x
1221 :post-read-conversion 'ctext-post-read-conversion
1222 :pre-write-conversion 'ctext-pre-write-conversion)
73066974 1223
cb5be6c9
EZ
1224(define-coding-system-alias
1225 'x-ctext-with-extensions 'compound-text-with-extensions)
1226(define-coding-system-alias
1227 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 1228
c0e17dd8
KH
1229(define-coding-system 'us-ascii
1230 "Convert all characters but ASCII to `?'."
1231 :coding-type 'charset
1232 :mnemonic ?-
1233 :charset-list '(ascii)
1234 :default-char ??
1235 :mime-charset 'us-ascii)
1236
1237(define-coding-system-alias 'iso-safe 'us-ascii)
f6eb8ace 1238
c0e17dd8 1239;; Use us-ascii for terminal output if some other coding system is not
e8dd0160 1240;; specified explicitly.
c0e17dd8 1241(set-safe-terminal-coding-system-internal 'us-ascii)
f6eb8ace 1242
08c19a27 1243;; The other coding-systems are defined in each language specific
c0e17dd8 1244;; files under lisp/language.
08c19a27 1245
678dc7ec
RS
1246;; Normally, set coding system to `undecided' before reading a file.
1247;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1248;; but we regard them as containing multibyte characters.
1249;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 1250
4951a271 1251(setq file-coding-system-alist
2238f751 1252 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
bdf74bef 1253 ("\\.utf\\(-8\\)?\\'" . utf-8)
3803079c
DL
1254 ;; This is the defined default for XML documents. It may be
1255 ;; overridden by a charset specification in the header. That
1256 ;; should be grokked by the auto-coding mechanism, but rms
1257 ;; vetoed that. -- fx
1258 ("\\.xml\\'" . utf-8)
3e88bb50
EZ
1259 ;; We use raw-text for reading loaddefs.el so that if it
1260 ;; happens to have DOS or Mac EOLs, they are converted to
1261 ;; newlines. This is required to make the special treatment
1262 ;; of the "\ newline" combination in loaddefs.el, which marks
1263 ;; the beginning of a doc string, work.
1264 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 1265 ("\\.tar\\'" . (no-conversion . no-conversion))
4951a271 1266 ("" . (undecided . nil))))
08c19a27
KH
1267
1268\f
1269;;; Setting coding categories and their priorities.
1270
1271;; This setting is just to read an Emacs Lisp source files which
1272;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 1273;; values are set by the command `set-language-environment' for each
08c19a27
KH
1274;; language environment.
1275
c0e17dd8
KH
1276(set-coding-system-priority
1277 'iso-latin-1
1278 'utf-8
1279 'iso-2022-7bit
1280 )
08c19a27 1281
c1b628eb
KH
1282\f
1283;;; Miscellaneous settings.
c1b628eb 1284
c0e17dd8
KH
1285;; Make all multibyte characters self-insert.
1286(set-char-table-range (nth 1 global-map)
007eef16 1287 (cons 128 (max-char))
c0e17dd8
KH
1288 'self-insert-command)
1289
1290(aset latin-extra-code-table ?\222 t)
4cb4b388 1291
cb269bb1
DL
1292;; Move least specific charsets to end of priority list
1293
1294(apply #'set-charset-priority
1295 (delq 'unicode (delq 'emacs (charset-priority-list))))
1296
6820ed3f
DL
1297;; The old code-pages library is obsoleted by coding systems based on
1298;; the charsets defined in this file but might be required by user
1299;; code.
1300(provide 'code-pages)
1301
3803079c
DL
1302;; Local variables:
1303;; no-byte-compile: t
1304;; End:
1305
08c19a27 1306;;; mule-conf.el ends here