Setup the default fontset by the new
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
08c19a27
KH
1;;; mule-conf.el --- configure multilingual environment
2
08c19a27 3;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
c0e17dd8
KH
5;; Copyright (C) 2001, 2002
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H13PRO009
9fea1ee1 8;; Copyright (C) 2002 Free Software Foundation, Inc.
08c19a27 9
24adcac1 10;; Keywords: i18n, mule, multilingual, character set, coding system
08c19a27
KH
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
28
29;;; Commentary:
30
3803079c
DL
31;; This file defines the Emacs charsets and some basic coding systems.
32;; Other coding systems are defined in the files in directory
33;; lisp/language.
08c19a27
KH
34
35;;; Code:
36
155b256a
DL
37;;; Remarks
38
3803079c
DL
39;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
40;; Standards docs equivalent to iso-2022 and iso-8859 are at
41;; http://www.ecma.ch/.
42
32fefe80
DL
43;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
44;; MS Windows, which are presumably the only charsets we really need
45;; to worry about on such systems:
155b256a
DL
46;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
47;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
48;; 1258, 874, 932, 936, 949, 950
49
08c19a27
KH
50;;; Definitions of character sets.
51
24adcac1 52;; The charsets `ascii' and `unicode' are already defined in charset.c
c0e17dd8
KH
53;; as below:
54;;
55;; (define-charset 'ascii
56;; ""
57;; :dimension 1
58;; :code-space [0 127]
59;; :iso-final-char ?A
60;; :ascii-compatible-p t
61;; :emacs-mule-id 0
62;; :code-offset 0)
63;;
64;; (define-charset 'unicode
65;; ""
66;; :dimension 3
67;; :code-space [0 255 0 255 0 16]
68;; :ascii-compatible-p t
69;; :code-offset 0)
70;;
71;; We now set :docstring, :short-name, and :long-name properties.
72
73(put-charset-property
74 'ascii :docstring "ASCII (ISO646 IRV)")
75(put-charset-property
76 'ascii :short-name "ASCII")
77(put-charset-property
78 'ascii :long-name "ASCII (ISO646 IRV)")
79(put-charset-property
80 'unicode :docstring "Unicode (ISO10646)")
81(put-charset-property
82 'unicode :short-name "Unicode")
83(put-charset-property
84 'unicode :long-name "Unicode (ISO10646)")
85
86(define-charset-alias 'ucs 'unicode)
87
88(define-charset 'emacs
cb269bb1 89 "Full Emacs characters"
c0e17dd8
KH
90 :ascii-compatible-p t
91 :code-space [ 0 255 0 255 0 63 ]
92 :code-offset 0
93 :supplementary-p t)
94
95(define-charset 'iso-8859-1
24adcac1 96 "Latin-1 (ISO/IEC 8859-1)"
c0e17dd8
KH
97 :short-name "Latin-1"
98 :ascii-compatible-p t
99 :code-space [0 255]
100 :code-offset 0)
101
102(define-charset 'latin-iso8859-1
103 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
104 :short-name "RHP of Latin-1"
105 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
106 :iso-final-char ?A
107 :emacs-mule-id 129
108 :code-space [32 127]
109 :code-offset 160)
110
b714f5c2
DL
111;; Name perhaps not ideal, but is XEmacs-compatible.
112(define-charset 'control-1
c0e17dd8
KH
113 "8-bit control code (0x80..0x9F)"
114 :short-name "8-bit control code"
115 :code-space [128 159]
116 :code-offset 128)
117
b714f5c2
DL
118(define-charset 'eight-bit-control
119 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
120 :short-name "Raw bytes 0x80..0x9F"
121 :code-space [128 159]
122 :code-offset #x3FFF80) ; see character.h
123
c0e17dd8 124(define-charset 'eight-bit-graphic
b714f5c2
DL
125 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
126 :short-name "Raw bytes 0xA0..0xFF"
c0e17dd8 127 :code-space [160 255]
b714f5c2 128 :code-offset #x3FFFA0) ; see character.h
c0e17dd8
KH
129
130(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
131 iso-ir iso-final
132 emacs-mule-id map)
c0e17dd8
KH
133 `(progn
134 (define-charset ,symbol
135 ,name
136 :short-name ,nickname
137 :long-name ,name
138 :ascii-compatible-p t
139 :code-space [0 255]
140 :map ,map)
141 (if ,iso-symbol
142 (define-charset ,iso-symbol
143 (if ,iso-ir
144 (format "Right-Hand Part of %s (%s): ISO-IR-%d"
145 ,name ,nickname ,iso-ir)
146 (format "Right-Hand Part of %s (%s)" ,name ,nickname))
147 :short-name (format "RHP of %s" ,name)
148 :long-name (format "RHP of %s (%s)" ,name ,nickname)
149 :iso-final-char ,iso-final
24adcac1 150 :emacs-mule-id ,emacs-mule-id
c0e17dd8 151 :code-space [32 127]
fd6c8fc1 152 :subset (list ,symbol 160 255 -128)))))
c0e17dd8
KH
153
154(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
155 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
156
157(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
158 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
159
160(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
161 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
162
163(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
164 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
165
166(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
167 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
168
169(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
170 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
171
172(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
173 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
174
175(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
176 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
177
3803079c 178(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
bbe3715c 179 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
3803079c 180
bbe3715c
DL
181;; http://www.nectec.or.th/it-standards/iso8859-11/
182;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
183;; plus nbsp
184(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
185 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
186
187;; 8859-12 doesn't (yet?) exist.
3803079c 188
c0e17dd8 189(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
3803079c 190 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
c0e17dd8
KH
191
192(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
193 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
194
195(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
196 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
197
7e8b4d67 198(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
08a2119c 199 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
7e8b4d67 200
cb269bb1
DL
201;; No point in keeping it around.
202(fmakunbound 'define-iso-single-byte-charset)
203
bbe3715c 204;; Can this be shared with 8859-11?
c0e17dd8
KH
205(define-charset 'thai-tis620
206 "TIS620.2533"
207 :short-name "TIS620.2533"
208 :iso-final-char ?T
209 :emacs-mule-id 133
210 :code-space [32 127]
211 :code-offset #x0E00)
212
bbe3715c 213;; Fixme: doc for this, c.f. above
c0e17dd8
KH
214(define-charset 'tis620-2533
215 "TIS620.2533"
216 :short-name "TIS620.2533"
217 :ascii-compatible-p t
218 :code-space [0 255]
fd6c8fc1 219 :superset '(ascii eight-bit-control (thai-tis620 . 128)))
c0e17dd8
KH
220
221(define-charset 'jisx0201
222 "JISX0201"
223 :short-name "JISX0201"
c0e17dd8
KH
224 :code-space [33 254]
225 :map "jisx0201")
226
227(define-charset 'latin-jisx0201
228 "Roman Part of JISX0201.1976"
229 :short-name "JISX0201 Roman"
230 :long-name "Japanese Roman (JISX0201.1976)"
231 :iso-final-char ?J
232 :emacs-mule-id 138
233 :code-space [33 126]
fd6c8fc1 234 :subset '(jisx0201 33 126 0))
c0e17dd8
KH
235
236(define-charset 'katakana-jisx0201
237 "Katakana Part of JISX0201.1976"
238 :short-name "JISX0201 Katakana"
239 :long-name "Japanese Katakana (JISX0201.1976)"
240 :iso-final-char ?I
241 :emacs-mule-id 137
242 :code-space [33 126]
fd6c8fc1 243 :subset '(jisx0201 161 254 -128))
c0e17dd8
KH
244
245(define-charset 'chinese-gb2312
246 "GB2312 Chinese simplified: ISO-IR-58"
247 :short-name "GB2312"
248 :long-name "GB2312: ISO-IR-58"
249 :iso-final-char ?A
250 :emacs-mule-id 145
251 :code-space [33 126 33 126]
252 :code-offset #x110000
253 :unify-map "gb2312-1980")
254
7c9e1024 255(define-charset 'chinese-gbk
24adcac1 256 "GBK Chinese simplified."
7c9e1024 257 :short-name "GBK"
7c9e1024 258 :code-space [#x40 #xFE #x81 #xFE]
64762f77 259 :code-offset #x160000
7c9e1024 260 :unify-map "gbk")
155b256a 261(define-charset-alias 'cp936 'chinese-gbk)
32fefe80 262(define-charset-alias 'windows-936 'chinese-gbk)
7c9e1024 263
c0e17dd8
KH
264(define-charset 'chinese-cns11643-1
265 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
266 :short-name "CNS11643-1"
267 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
268 :iso-final-char ?G
269 :emacs-mule-id 149
270 :code-space [33 126 33 126]
271 :code-offset #x114000
272 :unify-map "cns11643-1")
273
274(define-charset 'chinese-cns11643-2
275 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
276 :short-name "CNS11643-2"
277 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
278 :iso-final-char ?H
279 :emacs-mule-id 150
280 :code-space [33 126 33 126]
281 :code-offset #x118000
282 :unify-map "cns11643-2")
283
284(define-charset 'chinese-cns11643-3
285 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
286 :short-name "CNS11643-3"
287 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
288 :iso-final-char ?I
289 :code-space [33 126 33 126]
290 :emacs-mule-id 246
291 :code-offset #x11C000)
292
293(define-charset 'chinese-cns11643-4
294 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
295 :short-name "CNS11643-4"
296 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
297 :iso-final-char ?J
298 :emacs-mule-id 247
299 :code-space [33 126 33 126]
300 :code-offset #x120000)
301
302(define-charset 'chinese-cns11643-5
303 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
304 :short-name "CNS11643-5"
305 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
306 :iso-final-char ?K
307 :emacs-mule-id 248
308 :code-space [33 126 33 126]
309 :code-offset #x124000)
310
311(define-charset 'chinese-cns11643-6
312 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
313 :short-name "CNS11643-6"
314 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
315 :iso-final-char ?L
316 :emacs-mule-id 249
317 :code-space [33 126 33 126]
318 :code-offset #x128000)
319
320(define-charset 'chinese-cns11643-7
321 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
322 :short-name "CNS11643-7"
323 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
324 :iso-final-char ?M
325 :emacs-mule-id 250
326 :code-space [33 126 33 126]
327 :code-offset #x12C000)
328
329(define-charset 'big5
330 "Big5 (Chinese traditional)"
331 :short-name "Big5"
c0e17dd8
KH
332 :code-space [#x40 #xFE #xA1 #xFE]
333 :code-offset #x130000
334 :unify-map "big5")
155b256a
DL
335;; Fixme: AKA cp950 according to
336;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
337;; that correct?
c0e17dd8
KH
338
339(define-charset 'chinese-big5-1
24adcac1 340 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
c0e17dd8
KH
341 :short-name "Big5 (Level-1)"
342 :long-name "Big5 (Level-1) A141-C67F"
343 :iso-final-char ?0
344 :emacs-mule-id 152
345 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 346 :code-offset #x135000
c0e17dd8
KH
347 :unify-map "big5-1")
348
349(define-charset 'chinese-big5-2
24adcac1 350 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
c0e17dd8
KH
351 :short-name "Big5 (Level-2)"
352 :long-name "Big5 (Level-2) C940-FEFE"
353 :iso-final-char ?1
354 :emacs-mule-id 153
355 :code-space [#x21 #x7E #x21 #x7E]
7c9e1024 356 :code-offset #x137800
c0e17dd8
KH
357 :unify-map "big5-2")
358
359(define-charset 'japanese-jisx0208
360 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
361 :short-name "JISX0208"
362 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
363 :iso-final-char ?B
364 :emacs-mule-id 146
365 :code-space [33 126 33 126]
366 :code-offset #x140000
367 :unify-map "jisx0208-1990")
368
369(define-charset 'japanese-jisx0208-1978
370 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
371 :short-name "JISX0208.1978"
372 :long-name "JISX0208.1978 (Japanese): ISO-IR-42"
373 :iso-final-char ?@
374 :emacs-mule-id 144
375 :code-space [33 126 33 126]
376 :code-offset #x144000
377 :unify-map "jisx0208-1978")
378
379(define-charset 'japanese-jisx0212
380 "JISX0212 Japanese supplement: ISO-IR-159"
381 :short-name "JISX0212"
382 :long-name "JISX0212 (Japanese): ISO-IR-159"
383 :iso-final-char ?D
384 :emacs-mule-id 148
385 :code-space [33 126 33 126]
386 :code-offset #x148000
387 :unify-map "jisx0212-1990")
388
389(define-charset 'japanese-jisx0213-1
390 "JISX0213 Plane 1 (Japanese)"
391 :short-name "JISX0213-1"
c0e17dd8
KH
392 :iso-final-char ?O
393 :emacs-mule-id 151
3adbd46f 394 :unify-map "jisx0213-1"
c0e17dd8
KH
395 :code-space [33 126 33 126]
396 :code-offset #x14C000)
397
398(define-charset 'japanese-jisx0213-2
399 "JISX0213 Plane 2 (Japanese)"
400 :short-name "JISX0213-2"
c0e17dd8
KH
401 :iso-final-char ?P
402 :emacs-mule-id 254
3adbd46f 403 :unify-map "jisx0213-2"
c0e17dd8
KH
404 :code-space [33 126 33 126]
405 :code-offset #x150000)
406
407(define-charset 'korean-ksc5601
408 "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
409 :short-name "KSC5601"
410 :long-name "KSC5601 (Korean): ISO-IR-149"
411 :iso-final-char ?C
412 :emacs-mule-id 147
413 :code-space [33 126 33 126]
b714f5c2
DL
414 :code-offset #x279f94
415 :unify-map "ksc5601-1987")
c0e17dd8 416
155b256a
DL
417;; Fixme: Korean cp949/UHC
418
c0e17dd8
KH
419(define-charset 'chinese-sisheng
420 "SiSheng characters for PinYin/ZhuYin"
421 :short-name "SiSheng"
422 :long-name "SiSheng (PinYin/ZhuYin)"
423 :iso-final-char ?0
424 :emacs-mule-id 160
425 :code-space [33 126]
c4e44241 426 :unify-map "sisheng"
c0e17dd8
KH
427 :code-offset #x200000)
428
b714f5c2
DL
429;; A subset of the 1989 version of IPA. It consists of the consonant
430;; signs used in English, French, German and Italian, and all vowels
431;; signs in the table. [says old MULE doc]
c0e17dd8
KH
432(define-charset 'ipa
433 "IPA (International Phonetic Association)"
434 :short-name "IPA"
c0e17dd8
KH
435 :iso-final-char ?0
436 :emacs-mule-id 161
c4e44241 437 :unify-map "ipa"
c0e17dd8
KH
438 :code-space [32 127]
439 :code-offset #x200080)
440
441(define-charset 'viscii
442 "VISCII1.1"
443 :short-name "VISCII"
444 :long-name "VISCII 1.1"
445 :code-space [0 255]
446 :map "viscii")
447
448(define-charset 'vietnamese-viscii-lower
449 "VISCII1.1 lower-case"
450 :short-name "VISCII lower"
451 :long-name "VISCII lower-case"
452 :iso-final-char ?1
453 :emacs-mule-id 162
454 :code-space [32 127]
65076506
KH
455 :code-offset #x200200
456 :unify-map "viscii-lower")
c0e17dd8
KH
457
458(define-charset 'vietnamese-viscii-upper
459 "VISCII1.1 upper-case"
460 :short-name "VISCII upper"
461 :long-name "VISCII upper-case"
462 :iso-final-char ?2
463 :emacs-mule-id 163
464 :code-space [32 127]
65076506
KH
465 :code-offset #x200280
466 :unify-map "viscii-upper")
c0e17dd8
KH
467
468(define-charset 'vscii
469 "VSCII1.1"
470 :short-name "VSCII"
c0e17dd8
KH
471 :code-space [0 255]
472 :map "vscii")
473
69862ba6
DL
474;; Fixme: see note in tcvn.map about combining characters
475(define-charset 'tcvn-5712
476 "TCVN-5712"
477 :code-space [0 255]
478 :map "tcvn")
479
c0e17dd8
KH
480(define-charset 'koi8-r
481 "KOI8-R"
482 :short-name "KOI8-R"
c0e17dd8
KH
483 :ascii-compatible-p t
484 :code-space [0 255]
485 :map "koi8-r")
486
487(define-charset-alias 'koi8 'koi8-r)
488
489(define-charset 'alternativnyj
490 "ALTERNATIVNYJ"
491 :short-name "alternativnyj"
c0e17dd8
KH
492 :ascii-compatible-p t
493 :code-space [0 255]
494 :map "ibm866")
bbe3715c
DL
495;; Fixme: http://czyborra.com/charsets/cyrillic.html says the
496;; following, but the iconv map for cp866 isn't the same as his chart
497;; for alternativnyj. I can't find anything that looks like an
498;; official definition of alternativnyj.
007eef16 499(define-charset-alias 'cp866 'alternativnyj)
08a2119c 500(define-charset-alias 'ibm866 'alternativnyj)
08c19a27 501
6ef462e0
DL
502(define-charset 'koi8-u
503 "KOI8-U"
504 :short-name "KOI8-U"
6ef462e0
DL
505 :ascii-compatible-p t
506 :code-space [0 255]
507 :map "koi8-u")
508
509(define-charset 'koi8-t
510 "KOI8-T"
511 :short-name "KOI8-T"
6ef462e0
DL
512 :ascii-compatible-p t
513 :code-space [0 255]
514 :map "koi8-t")
515
516(define-charset 'georgian-ps
517 "GEORGIAN-PS"
518 :short-name "GEORGIAN-PS"
6ef462e0
DL
519 :ascii-compatible-p t
520 :code-space [0 255]
521 :map "georgian-ps")
522
523(define-charset 'windows-1250
9fea1ee1 524 "WINDOWS-1250 (Central Europe)"
6ef462e0 525 :short-name "WINDOWS-1250"
6ef462e0
DL
526 :ascii-compatible-p t
527 :code-space [0 255]
528 :map "windows-1250")
529(define-charset-alias 'cp1250 'windows-1250)
530
531(define-charset 'windows-1251
155b256a 532 "WINDOWS-1251 (Cyrillic)"
6ef462e0 533 :short-name "WINDOWS-1251"
6ef462e0
DL
534 :ascii-compatible-p t
535 :code-space [0 255]
536 :map "windows-1251")
537(define-charset-alias 'cp1251 'windows-1251)
538
539(define-charset 'windows-1252
155b256a 540 "WINDOWS-1252 (Latin I)"
6ef462e0 541 :short-name "WINDOWS-1252"
6ef462e0
DL
542 :ascii-compatible-p t
543 :code-space [0 255]
544 :map "windows-1252")
545(define-charset-alias 'cp1252 'windows-1252)
546
9fea1ee1 547(define-charset 'windows-1253
155b256a 548 "WINDOWS-1253 (Greek)"
9fea1ee1 549 :short-name "WINDOWS-1253"
9fea1ee1
DL
550 :ascii-compatible-p t
551 :code-space [0 255]
552 :map "windows-1253")
553(define-charset-alias 'cp1253 'windows-1253)
554
555(define-charset 'windows-1254
155b256a 556 "WINDOWS-1254 (Turkish)"
9fea1ee1 557 :short-name "WINDOWS-1254"
9fea1ee1
DL
558 :ascii-compatible-p t
559 :code-space [0 255]
560 :map "windows-1254")
561(define-charset-alias 'cp1254 'windows-1254)
562
563(define-charset 'windows-1255
564 "WINDOWS-1255 (Hebrew)"
565 :short-name "WINDOWS-1255"
9fea1ee1
DL
566 :ascii-compatible-p t
567 :code-space [0 255]
568 :map "windows-1255")
569(define-charset-alias 'cp1255 'windows-1255)
570
571(define-charset 'windows-1256
572 "WINDOWS-1256 (Arabic)"
573 :short-name "WINDOWS-1256"
9fea1ee1
DL
574 :ascii-compatible-p t
575 :code-space [0 255]
576 :map "windows-1256")
577(define-charset-alias 'cp1256 'windows-1256)
578
579(define-charset 'windows-1257
580 "WINDOWS-1257 (Baltic)"
581 :short-name "WINDOWS-1257"
9fea1ee1
DL
582 :ascii-compatible-p t
583 :code-space [0 255]
584 :map "windows-1257")
585(define-charset-alias 'cp1257 'windows-1257)
586
587(define-charset 'windows-1258
155b256a 588 "WINDOWS-1258 (Viet Nam)"
9fea1ee1 589 :short-name "WINDOWS-1258"
9fea1ee1
DL
590 :ascii-compatible-p t
591 :code-space [0 255]
592 :map "windows-1258")
593(define-charset-alias 'cp1258 'windows-1258)
594
595(define-charset 'next
596 "NEXT"
597 :short-name "NEXT"
9fea1ee1
DL
598 :ascii-compatible-p t
599 :code-space [0 255]
600 :map "next")
601
6ef462e0
DL
602(define-charset 'cp1125
603 "CP1125"
604 :short-name "CP1125"
6ef462e0
DL
605 :code-space [0 255]
606 :map "cp1125")
607(define-charset-alias 'ruscii 'cp1125)
608;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
609(define-charset-alias 'cp866u 'cp1125)
610
bbe3715c
DL
611;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
612;; shows this as not ASCII comptaible, with various graphics in
613;; 0x01-0x1F.
007eef16 614(define-charset 'cp437
bbe3715c 615 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
007eef16 616 :short-name "CP437"
007eef16
DL
617 :code-space [0 255]
618 :ascii-compatible-p t
619 :map "cp437")
620
155b256a
DL
621(define-charset 'cp720
622 "CP720 (Arabic)"
623 :short-name "CP720"
624 :code-space [0 255]
625 :ascii-compatible-p t
626 :map "cp720")
627
007eef16 628(define-charset 'cp737
bbe3715c 629 "CP737 (PC Greek)"
007eef16 630 :short-name "CP737"
007eef16
DL
631 :code-space [0 255]
632 :ascii-compatible-p t
633 :map "cp737")
634
635(define-charset 'cp775
bbe3715c 636 "CP775 (PC Baltic)"
007eef16 637 :short-name "CP775"
007eef16
DL
638 :code-space [0 255]
639 :ascii-compatible-p t
640 :map "cp775")
641
642(define-charset 'cp851
643 "CP851"
644 :short-name "CP851"
007eef16
DL
645 :code-space [0 255]
646 :ascii-compatible-p t
647 :map "cp851")
648
649(define-charset 'cp852
bbe3715c 650 "CP852 (MS-DOS Latin-2)"
007eef16 651 :short-name "CP852"
007eef16
DL
652 :code-space [0 255]
653 :ascii-compatible-p t
654 :map "cp852")
655
656(define-charset 'cp855
bbe3715c 657 "CP855 (IBM Cyrillic)"
007eef16 658 :short-name "CP855"
007eef16
DL
659 :code-space [0 255]
660 :ascii-compatible-p t
661 :map "cp855")
662
663(define-charset 'cp857
bbe3715c 664 "CP857 (IBM Turkish)"
007eef16 665 :short-name "CP857"
007eef16
DL
666 :code-space [0 255]
667 :ascii-compatible-p t
668 :map "cp857")
669
155b256a
DL
670(define-charset 'cp858
671 "CP858 (Multilingual Latin I + Euro)"
672 :short-name "CP858"
673 :code-space [0 255]
674 :ascii-compatible-p t
675 :map "cp858")
bbe3715c 676(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
155b256a 677
007eef16 678(define-charset 'cp860
bbe3715c 679 "CP860 (MS-DOS Portuguese)"
007eef16 680 :short-name "CP860"
007eef16
DL
681 :code-space [0 255]
682 :ascii-compatible-p t
683 :map "cp860")
684
685(define-charset 'cp861
bbe3715c 686 "CP861 (MS-DOS Icelandic)"
007eef16 687 :short-name "CP861"
007eef16
DL
688 :code-space [0 255]
689 :ascii-compatible-p t
690 :map "cp861")
691
692(define-charset 'cp862
bbe3715c 693 "CP862 (PC Hebrew)"
007eef16 694 :short-name "CP862"
007eef16
DL
695 :code-space [0 255]
696 :ascii-compatible-p t
697 :map "cp862")
698
699(define-charset 'cp863
bbe3715c 700 "CP863 (MS-DOS Canadian French)"
007eef16 701 :short-name "CP863"
007eef16
DL
702 :code-space [0 255]
703 :ascii-compatible-p t
704 :map "cp863")
705
706(define-charset 'cp864
bbe3715c 707 "CP864 (PC Arabic)"
007eef16 708 :short-name "CP864"
007eef16
DL
709 :code-space [0 255]
710 :ascii-compatible-p t
711 :map "cp864")
712
713(define-charset 'cp865
bbe3715c 714 "CP865 (MS-DOS Nordic)"
007eef16 715 :short-name "CP865"
007eef16
DL
716 :code-space [0 255]
717 :ascii-compatible-p t
718 :map "cp865")
719
720(define-charset 'cp869
bbe3715c 721 "CP869 (IBM Modern Greek)"
007eef16 722 :short-name "CP869"
007eef16
DL
723 :code-space [0 255]
724 :ascii-compatible-p t
725 :map "cp869")
726
727(define-charset 'cp874
bbe3715c 728 "CP874 (IBM Thai)"
007eef16 729 :short-name "CP874"
007eef16
DL
730 :code-space [0 255]
731 :ascii-compatible-p t
732 :map "cp874")
733
08c19a27
KH
734;; For Arabic, we need three different types of character sets.
735;; Digits are of direction left-to-right and of width 1-column.
736;; Others are of direction right-to-left and of width 1-column or
737;; 2-column.
c0e17dd8
KH
738(define-charset 'arabic-digit
739 "Arabic digit"
740 :short-name "Arabic digit"
c0e17dd8
KH
741 :iso-final-char ?2
742 :emacs-mule-id 164
743 :code-space [34 42]
744 :code-offset #x0600)
745
746(define-charset 'arabic-1-column
747 "Arabic 1-column"
748 :short-name "Arabic 1-col"
749 :long-name "Arabic 1-column"
750 :iso-final-char ?3
751 :emacs-mule-id 165
752 :code-space [33 126]
753 :code-offset #x200100)
754
755(define-charset 'arabic-2-column
756 "Arabic 2-column"
757 :short-name "Arabic 2-col"
758 :long-name "Arabic 2-column"
759 :iso-final-char ?4
760 :emacs-mule-id 224
761 :code-space [33 126]
762 :code-offset #x200180)
7153b1f1
KH
763
764;; Lao script.
c0e17dd8
KH
765;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
766(define-charset 'lao
767 "Lao characters (ISO10646 0E81..0EDF)"
768 :short-name "Lao"
c0e17dd8
KH
769 :iso-final-char ?1
770 :emacs-mule-id 167
771 :code-space [33 126]
772 :code-offset #x0E81)
773
774(define-charset 'mule-lao
775 "Lao characters (ISO10646 0E81..0EDF)"
776 :short-name "Lao"
c0e17dd8 777 :code-space [0 255]
fd6c8fc1 778 :superset '(ascii eight-bit-control (lao . 128)))
7153b1f1 779
08c19a27 780
7153b1f1
KH
781;; Indian scripts. Symbolic charset for data exchange. Glyphs are
782;; not assigned. They are automatically converted to each Indian
783;; script which IS-13194 supports.
784
c0e17dd8
KH
785(define-charset 'indian-is13194
786 "Generic Indian charset for data exchange with IS 13194"
787 :short-name "IS 13194"
788 :long-name "Indian IS 13194"
789 :iso-final-char ?5
790 :emacs-mule-id 225
791 :code-space [33 126]
c4e44241 792 :unify-map "is13194"
c0e17dd8
KH
793 :code-offset #x180000)
794
795(define-charset 'indian-glyph
796 "Glyphs for Indian characters."
797 :short-name "Indian glyph"
c0e17dd8
KH
798 :iso-final-char ?4
799 :emacs-mule-id 240
800 :code-space [32 127 32 127]
801 :code-offset #x180100)
7153b1f1
KH
802
803;; Actual Glyph for 1-column width.
c0e17dd8
KH
804(define-charset 'indian-1-column
805 "Indian charset for 1-column width glyphs"
806 :short-name "Indian 1-col"
807 :long-name "Indian 1 Column"
808 :iso-final-char ?6
809 :emacs-mule-id 240
810 :code-space [33 126 33 126]
811 :code-offset #x184000)
08c19a27 812
08c19a27 813;; Actual Glyph for 2-column width.
c0e17dd8
KH
814(define-charset 'indian-2-column
815 "Indian charset for 2-column width glyphs"
816 :short-name "Indian 2-col"
817 :long-name "Indian 2 Column"
818 :iso-final-char ?5
819 :emacs-mule-id 251
820 :code-space [33 126 33 126]
fd6c8fc1 821 :superset '(indian-1-column))
c0e17dd8
KH
822
823(define-charset 'tibetan
824 "Tibetan characters"
825 :iso-final-char ?7
826 :short-name "Tibetan 2-col"
24adcac1 827 :long-name "Tibetan 2 column"
c0e17dd8
KH
828 :iso-final-char ?7
829 :emacs-mule-id 252
c4e44241 830 :unify-map "tibetan"
c0e17dd8
KH
831 :code-space [33 126 33 126]
832 :code-offset #x190000)
833
834(define-charset 'tibetan-1-column
835 "Tibetan 1 column glyph"
836 :short-name "Tibetan 1-col"
837 :long-name "Tibetan 1 column"
838 :iso-final-char ?8
839 :emacs-mule-id 241
840 :code-space [33 126 33 37]
fd6c8fc1 841 :superset '(tibetan))
08c19a27 842
c0e17dd8
KH
843;; Subsets of Unicode.
844(define-charset 'mule-unicode-2500-33ff
845 "Unicode characters of the range U+2500..U+33FF."
846 :short-name "Unicode subset 2"
847 :long-name "Unicode subset (U+2500..U+33FF)"
848 :iso-final-char ?2
849 :emacs-mule-id 242
850 :code-space [#x20 #x7f #x20 #x47]
851 :code-offset #x2500)
852
853(define-charset 'mule-unicode-e000-ffff
854 "Unicode characters of the range U+E000..U+FFFF."
855 :short-name "Unicode subset 3"
856 :long-name "Unicode subset (U+E000+FFFF)"
857 :iso-final-char ?3
858 :emacs-mule-id 243
859 :code-space [#x20 #x7F #x20 #x75]
e08255bb
DL
860 :code-offset #xE000
861 :max-code 30015) ; U+FFFF
c0e17dd8
KH
862
863(define-charset 'mule-unicode-0100-24ff
864 "Unicode characters of the range U+0100..U+24FF."
865 :short-name "Unicode subset"
866 :long-name "Unicode subset (U+0100..U+24FF)"
867 :iso-final-char ?1
868 :emacs-mule-id 244
869 :code-space [#x20 #x7F #x20 #x7F]
870 :code-offset #x100)
871
872(define-charset 'ethiopic
24adcac1 873 "Ethiopic characters for Amharic and Tigrigna."
c0e17dd8
KH
874 :short-name "Ethiopic"
875 :long-name "Ethiopic characters"
876 :iso-final-char ?3
877 :emacs-mule-id 245
c4e44241 878 :unify-map "ethiopic"
c0e17dd8
KH
879 :code-space [33 126 33 126]
880 :code-offset #x1A0000)
881
882(define-charset 'mac-roman
883 "Mac Roman charset"
884 :short-name "Mac Roman"
c0e17dd8
KH
885 :ascii-compatible-p t
886 :code-space [0 255]
887 :map "mac-roman")
888
6ef462e0
DL
889;; Fixme: modern EBCDIC variants, e.g. IBM00924?
890(define-charset 'ebcdic-us
891 "US version of EBCDIC"
892 :short-name "EBCDIC-US"
6ef462e0
DL
893 :code-space [0 255]
894 :mime-charset 'ebcdic-us
895 :map "ebcdic-us")
896
897(define-charset 'ebcdic-uk
898 "UK version of EBCDIC"
899 :short-name "EBCDIC-UK"
6ef462e0
DL
900 :code-space [0 255]
901 :mime-charset 'ebcdic-uk
902 :map "ebcdic-uk")
903
3803079c
DL
904(define-charset 'hp-roman8
905 "Encoding used by Hewlet-Packard printer software"
906 :short-name "HP-ROMAN8"
3803079c
DL
907 :ascii-compatible-p t
908 :code-space [0 255]
909 :map "hp-roman8")
910
bbe3715c
DL
911;; To make a coding system with this, a pre-write-conversion should
912;; account for the commented-out multi-valued code points in
913;; stdenc.map.
3803079c
DL
914(define-charset 'adobe-standard-encoding
915 "Adobe `standard encoding' used in PostScript"
916 :short-name "ADOBE-STANDARD-ENCODING"
6584daf1 917 :code-space [#x20 255]
3803079c
DL
918 :map "stdenc")
919
920(define-charset 'symbol
921 "Adobe symbol encoding used in PostScript"
922 :short-name "ADOBE-SYMBOL"
6584daf1 923 :code-space [#x20 255]
3803079c
DL
924 :map "symbol")
925
926(define-charset 'ibm850
bbe3715c 927 "DOS codepage 850 (Latin-1)"
3803079c 928 :short-name "IBM850"
6584daf1 929 :ascii-compatible-p t
3803079c
DL
930 :code-space [0 255]
931 :map "ibm850")
932(define-charset-alias 'cp850 'ibm850)
933
64762f77
KH
934(define-charset 'gb18030-2-byte
935 "GB18030 2-byte (0x814E..0xFEFE)"
936 :code-space [#x40 #xFE #x81 #xFE]
937 :supplementary-p t
938 :map "gb18030-2")
939
940(define-charset 'gb18030-4-byte-bmp
941 "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
942 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
943 :supplementary-p t
944 :map "gb18030-4")
945
946(define-charset 'gb18030-4-byte-smp
947 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
948 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
949 :min-code '(#x9030 . #x8130)
950 :max-code '(#xE332 . #x9A35)
951 :supplementary-p t
952 :code-offset #x10000)
953
954(define-charset 'gb18030-4-byte-ext-1
955 "GB18030 4-byte (0x8431A530-0x8F39FE39)"
956 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
957 :min-code '(#x8431 . #xA530)
958 :max-code '(#x8F39 . #xFE39)
959 :supplementary-p t
960 :code-offset #x200000 ; ... #x22484B
961 )
962
963(define-charset 'gb18030-4-byte-ext-2
964 "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
965 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
966 :min-code '(#xE332 . #x9A36)
967 :max-code '(#xFE39 . #xFE39)
968 :supplementary-p t
969 :code-offset #X22484C ; ... #x279f93
970 )
971
972(define-charset 'gb18030
973 "GB18030"
974 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
975 :min-code 0
976 :max-code '(#xFE39 . #xFE39)
fd6c8fc1
KH
977 :superset '(ascii gb18030-2-byte
978 gb18030-4-byte-bmp gb18030-4-byte-smp
979 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
64762f77 980
c0e17dd8 981(unify-charset 'chinese-gb2312)
7c9e1024 982(unify-charset 'chinese-gbk)
c0e17dd8
KH
983(unify-charset 'chinese-cns11643-1)
984(unify-charset 'chinese-cns11643-2)
985(unify-charset 'big5)
986(unify-charset 'chinese-big5-1)
987(unify-charset 'chinese-big5-2)
65076506
KH
988(unify-charset 'vietnamese-viscii-lower)
989(unify-charset 'vietnamese-viscii-upper)
3adbd46f 990;; Fixme: unifying sucks in the charset tables, which may be large.
ac80de11 991;; (Can we avoid that and do it anyhow (with lazy loading)?)
3adbd46f
DL
992(unify-charset 'chinese-sisheng)
993(unify-charset 'indian-is13194)
c4e44241 994;; (unify-charset 'ipa)
c4e44241
DL
995;; (unify-charset 'tibetan)
996;; (unify-charset 'ethiopic)
997;; (unify-charset 'japanese-jisx0208-1978)
998;; (unify-charset 'japanese-jisx0208)
999;; (unify-charset 'japanese-jisx0212)
3adbd46f
DL
1000;; (unify-charset 'japanese-jisx0213-1)
1001;; (unify-charset 'japanese-jisx0213-2)
ac80de11 1002;; (unify-charset 'korean-ksc5601)
65076506 1003
85f789f7 1004\f
d2a1ee18
KH
1005;; These are tables for translating characters on decoding and
1006;; encoding.
3adbd46f 1007;; Fixme: these aren't used now -- should they be?
c0e17dd8 1008(setq standard-translation-table-for-decode nil)
08c19a27 1009
f967223b 1010(setq standard-translation-table-for-encode nil)
08c19a27 1011
3adbd46f
DL
1012;; Fixme: should this be retained? I guess it could be useful for
1013;; non-unified charsets.
bdf74bef
DL
1014(defvar translation-table-for-input nil
1015 "If non-nil, a char table used to translate characters from input methods.
1016\(Currently only used by Quail.)")
08c19a27
KH
1017\f
1018;;; Make fundamental coding systems.
1019
c0e17dd8
KH
1020;; The coding system `no-conversion' is already defined in coding.c as
1021;; below:
1022;;
1023;; (define-coding-system 'no-conversion
1024;; "Do no conversion."
1025;; :coding-type 'raw-text
1026;; :mnemonic ?=)
08c19a27 1027
cb269bb1
DL
1028(define-coding-system-alias 'binary 'no-conversion)
1029
c0e17dd8
KH
1030(define-coding-system 'raw-text
1031 "Raw text, which means text contains random 8-bit codes.
1032Encoding text with this coding system produces the actual byte
1033sequence of the text in buffers and strings. An exception is made for
1034eight-bit-control characters. Each of them is encoded into a single
1035byte.
bc6a0946
KH
1036
1037When you visit a file with this coding, the file is read into a
c0e17dd8
KH
1038unibyte buffer as is (except for EOL format), thus each byte of a file
1039is treated as a character."
1040 :coding-type 'raw-text
1041 :mnemonic ?t)
1042
1043(define-coding-system 'undecided
cb269bb1 1044 "No conversion on encoding, automatic conversion on decoding."
c0e17dd8
KH
1045 :coding-type 'undecided
1046 :mnemonic ?-
1047 :charset-list '(ascii))
08c19a27 1048
8d969bf6 1049(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
1050(define-coding-system-alias 'dos 'undecided-dos)
1051(define-coding-system-alias 'mac 'undecided-mac)
1052
c0e17dd8
KH
1053(define-coding-system 'iso-latin-1
1054 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1055 :coding-type 'iso-2022
1056 :mnemonic ?1
1057 :charset-list '(ascii latin-iso8859-1)
1058 :designation [ascii latin-iso8859-1 nil nil]
1059 :mime-charset 'iso-8859-1)
bc6a0946 1060
c0e17dd8
KH
1061(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1062(define-coding-system-alias 'latin-1 'iso-latin-1)
08c19a27 1063
c0e17dd8 1064;; Coding systems not specific to each language environment.
bc6a0946 1065
c0e17dd8
KH
1066(define-coding-system 'emacs-mule
1067 "Emacs 21 internal format used in buffer and string."
1068 :coding-type 'emacs-mule
7e8b4d67 1069 :charset-list 'emacs-mule
c0e17dd8
KH
1070 :mnemonic ?M)
1071
1072(define-coding-system 'utf-8
1073 "UTF-8."
1074 :coding-type 'utf-8
1075 :mnemonic ?U
1076 :charset-list '(unicode))
1077
1078(define-coding-system-alias 'mule-utf-8 'utf-8)
1079
1080(define-coding-system 'utf-8-emacs
26dbea99 1081 "Support for all Emacs characters (including non-Unicode characters)."
c0e17dd8
KH
1082 :coding-type 'utf-8
1083 :mnemonic ?U
ebc563df
DL
1084 :charset-list '(emacs)
1085 :mime-charset 'utf-8)
c0e17dd8
KH
1086
1087(define-coding-system 'utf-16
1088 "UTF-16"
1089 :coding-type 'utf-16
1090 :mnemonic ?U
ebc563df
DL
1091 :charset-list '(unicode)
1092 :mime-charset 'utf-16)
c0e17dd8
KH
1093
1094(define-coding-system 'utf-16-le-nosig
cb269bb1 1095 "UTF-16, little endian, no signature."
c0e17dd8
KH
1096 :coding-type 'utf-16
1097 :mnemonic ?U
1098 :charset-list '(unicode)
1099 :endian 'little)
1100
1101(define-coding-system 'utf-16-be-nosig
cb269bb1 1102 "UTF-16, big endian, no signature."
c0e17dd8
KH
1103 :coding-type 'utf-16
1104 :mnemonic ?U
1105 :charset-list '(unicode)
1106 :endian 'big)
1107
1108(define-coding-system 'utf-16-le
cb269bb1 1109 "UTF-16, little endian, with signature."
c0e17dd8
KH
1110 :coding-type 'utf-16
1111 :mnemonic ?U
1112 :charset-list '(unicode)
a44cf41b 1113 :bom t
ebc563df
DL
1114 :endian 'little
1115 :mime-charset 'utf-16-le)
c0e17dd8
KH
1116
1117(define-coding-system 'utf-16-be
cb269bb1 1118 "UTF-16, big endian, with signature."
c0e17dd8
KH
1119 :coding-type 'utf-16
1120 :mnemonic ?U
1121 :charset-list '(unicode)
a44cf41b 1122 :bom t
ebc563df
DL
1123 :endian 'big
1124 :mime-charset 'utf-16-be)
c0e17dd8
KH
1125
1126(define-coding-system 'iso-2022-7bit
cb269bb1 1127 "ISO 2022 based 7-bit encoding using only G0."
c0e17dd8
KH
1128 :coding-type 'iso-2022
1129 :mnemonic ?J
1130 :charset-list 'iso-2022
1131 :designation [(ascii t) nil nil nil]
1132 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1133
1134(define-coding-system 'iso-2022-7bit-ss2
cb269bb1 1135 "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1136 :coding-type 'iso-2022
1137 :mnemonic ?$
1138 :charset-list 'iso-2022
1139 :designation [(ascii 94) nil (nil 96) nil]
1140 :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1141 designation single-shift composition))
1142
1143(define-coding-system 'iso-2022-7bit-lock
cb269bb1 1144 "ISO-2022 coding system using Locking-Shift for 96-charset."
c0e17dd8
KH
1145 :coding-type 'iso-2022
1146 :mnemonic ?&
1147 :charset-list 'iso-2022
1148 :designation [(ascii 94) (nil 96) nil nil]
1149 :flags '(ascii-at-eol ascii-at-cntl 7-bit
1150 designation locking-shift composition))
4951a271 1151
2e21aa27 1152(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27 1153
c0e17dd8 1154(define-coding-system 'iso-2022-7bit-lock-ss2
cb269bb1 1155 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
c0e17dd8
KH
1156 :coding-type 'iso-2022
1157 :mnemonic ?i
1158 :charset-list '(ascii
1159 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1160 korean-ksc5601
1161 chinese-gb2312
1162 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1163 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1164 chinese-cns11643-7)
1165 :designation [(ascii 94)
1166 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1167 (nil chinese-cns11643-2)
1168 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1169 chinese-cns11643-6 chinese-cns11643-7)]
1170 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1171 single-shift init-bol))
08c19a27 1172
2e21aa27 1173(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27 1174
c0e17dd8 1175(define-coding-system 'iso-2022-8bit-ss2
cb269bb1 1176 "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
c0e17dd8
KH
1177 :coding-type 'iso-2022
1178 :mnemonic ?@
1179 :charset-list 'iso-2022
1180 :designation [(ascii 94) nil (nil 96) nil]
1181 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
08c19a27 1182
c0e17dd8
KH
1183(define-coding-system 'compound-text
1184 "Compound text based generic encoding for decoding unknown messages.
73066974
EZ
1185
1186This coding system does not support ICCCM Extended Segments."
c0e17dd8
KH
1187 :coding-type 'iso-2022
1188 :mnemonic ?x
1189 :charset-list 'iso-2022
1190 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1191 :flags '(ascii-at-eol ascii-at-cntl
1192 designation locking-shift single-shift composition)
ebc563df
DL
1193 ;; Fixme: this isn't a valid MIME charset and has to be
1194 ;; special-cased elsewhere -- fx
c0e17dd8 1195 :mime-charset 'x-ctext)
d49a4835 1196
cb5be6c9
EZ
1197(define-coding-system-alias 'x-ctext 'compound-text)
1198(define-coding-system-alias 'ctext 'compound-text)
73066974 1199
cb5be6c9
EZ
1200;; Same as compound-text, but doesn't produce composition escape
1201;; sequences. Used in post-read and pre-write conversions of
1202;; compound-text-with-extensions, see mule.el. Note that this should
1203;; not have a mime-charset property, to prevent it from showing up
1204;; close to the beginning of coding systems ordered by priority.
c0e17dd8 1205(define-coding-system 'ctext-no-compositions 2 ?x
73066974
EZ
1206 "Compound text based generic encoding for decoding unknown messages.
1207
cb5be6c9 1208Like `compound-text', but does not produce escape sequences for compositions."
c0e17dd8
KH
1209 :coding-type 'iso-2022
1210 :mnemonic ?x
1211 :charset-list 'iso-2022
1212 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1213 :flags '(ascii-at-eol ascii-at-cntl
1214 designation locking-shift single-shift))
1215
1216(define-coding-system 'compound-text-with-extensions
73066974
EZ
1217 "Compound text encoding with ICCCM Extended Segment extensions.
1218
1219This coding system should be used only for X selections. It is inappropriate
1220for decoding and encoding files, process I/O, etc."
c0e17dd8
KH
1221 :coding-type 'raw-text
1222 :mnemonic ?x
1223 :post-read-conversion 'ctext-post-read-conversion
1224 :pre-write-conversion 'ctext-pre-write-conversion)
73066974 1225
cb5be6c9
EZ
1226(define-coding-system-alias
1227 'x-ctext-with-extensions 'compound-text-with-extensions)
1228(define-coding-system-alias
1229 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 1230
c0e17dd8
KH
1231(define-coding-system 'us-ascii
1232 "Convert all characters but ASCII to `?'."
1233 :coding-type 'charset
1234 :mnemonic ?-
1235 :charset-list '(ascii)
1236 :default-char ??
1237 :mime-charset 'us-ascii)
1238
1239(define-coding-system-alias 'iso-safe 'us-ascii)
f6eb8ace 1240
c0e17dd8 1241;; Use us-ascii for terminal output if some other coding system is not
e8dd0160 1242;; specified explicitly.
c0e17dd8 1243(set-safe-terminal-coding-system-internal 'us-ascii)
f6eb8ace 1244
08c19a27 1245;; The other coding-systems are defined in each language specific
c0e17dd8 1246;; files under lisp/language.
08c19a27 1247
678dc7ec
RS
1248;; Normally, set coding system to `undecided' before reading a file.
1249;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1250;; but we regard them as containing multibyte characters.
1251;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 1252
4951a271 1253(setq file-coding-system-alist
ac80de11 1254 '(("\\.elc\\'" . utf-8-emacs)
bdf74bef 1255 ("\\.utf\\(-8\\)?\\'" . utf-8)
3803079c
DL
1256 ;; This is the defined default for XML documents. It may be
1257 ;; overridden by a charset specification in the header. That
1258 ;; should be grokked by the auto-coding mechanism, but rms
1259 ;; vetoed that. -- fx
1260 ("\\.xml\\'" . utf-8)
3e88bb50
EZ
1261 ;; We use raw-text for reading loaddefs.el so that if it
1262 ;; happens to have DOS or Mac EOLs, they are converted to
1263 ;; newlines. This is required to make the special treatment
1264 ;; of the "\ newline" combination in loaddefs.el, which marks
1265 ;; the beginning of a doc string, work.
1266 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 1267 ("\\.tar\\'" . (no-conversion . no-conversion))
4951a271 1268 ("" . (undecided . nil))))
08c19a27
KH
1269
1270\f
1271;;; Setting coding categories and their priorities.
1272
1273;; This setting is just to read an Emacs Lisp source files which
1274;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 1275;; values are set by the command `set-language-environment' for each
08c19a27
KH
1276;; language environment.
1277
c0e17dd8
KH
1278(set-coding-system-priority
1279 'iso-latin-1
1280 'utf-8
1281 'iso-2022-7bit
1282 )
08c19a27 1283
c1b628eb
KH
1284\f
1285;;; Miscellaneous settings.
c1b628eb 1286
c0e17dd8
KH
1287;; Make all multibyte characters self-insert.
1288(set-char-table-range (nth 1 global-map)
007eef16 1289 (cons 128 (max-char))
c0e17dd8
KH
1290 'self-insert-command)
1291
1292(aset latin-extra-code-table ?\222 t)
4cb4b388 1293
cb269bb1
DL
1294;; Move least specific charsets to end of priority list
1295
1296(apply #'set-charset-priority
1297 (delq 'unicode (delq 'emacs (charset-priority-list))))
1298
6820ed3f
DL
1299;; The old code-pages library is obsoleted by coding systems based on
1300;; the charsets defined in this file but might be required by user
1301;; code.
1302(provide 'code-pages)
1303
3803079c
DL
1304;; Local variables:
1305;; no-byte-compile: t
1306;; End:
1307
08c19a27 1308;;; mule-conf.el ends here