(ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
0a10297a 1;;; mule-conf.el --- configure multilingual environment -*- no-byte-compile: t -*-
08c19a27 2
08c19a27 3;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
08c19a27
KH
5
6;; Keywords: mule, multilingual, character set, coding system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;;; Commentary:
26
27;; Don't byte-compile this file.
28
29;;; Code:
30
31;;; Definitions of character sets.
32
33;; Basic (official) character sets. These character sets are treated
7a860cf2 34;; efficiently with respect to buffer memory.
08c19a27
KH
35
36;; Syntax:
37;; (define-charset CHARSET-ID CHARSET
38;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
39;; SHORT-NAME LONG-NAME DESCRIPTION ])
40;; ASCII charset is defined in src/charset.c as below.
41;; (define-charset 0 ascii
42;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"])
43
7153b1f1
KH
44;; 1-byte charsets. Valid range of CHARSET-ID is 128..143.
45
46;; CHARSET-ID 128 is not used.
47
08c19a27 48(define-charset 129 'latin-iso8859-1
cead26f6 49 [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100"
5a6b038a 50 "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."])
08c19a27 51(define-charset 130 'latin-iso8859-2
cead26f6 52 [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101"
5a6b038a 53 "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."])
08c19a27 54(define-charset 131 'latin-iso8859-3
cead26f6 55 [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109"
5a6b038a 56 "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."])
08c19a27 57(define-charset 132 'latin-iso8859-4
cead26f6 58 [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110"
5a6b038a 59 "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."])
08c19a27 60(define-charset 133 'thai-tis620
cead26f6 61 [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166"
5a6b038a 62 "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."])
08c19a27 63(define-charset 134 'greek-iso8859-7
cead26f6 64 [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126"
5a6b038a 65 "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."])
08c19a27 66(define-charset 135 'arabic-iso8859-6
cead26f6 67 [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127"
5a6b038a 68 "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."])
08c19a27 69(define-charset 136 'hebrew-iso8859-8
cead26f6 70 [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138"
5a6b038a 71 "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."])
08c19a27 72(define-charset 137 'katakana-jisx0201
cead26f6 73 [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)"
5a6b038a 74 "Katakana Part of JISX0201.1976."])
08c19a27 75(define-charset 138 'latin-jisx0201
cead26f6 76 [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)"
5a6b038a 77 "Roman Part of JISX0201.1976."])
7153b1f1
KH
78
79;; CHARSET-ID is not used 139.
80
08c19a27 81(define-charset 140 'cyrillic-iso8859-5
cead26f6 82 [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144"
5a6b038a 83 "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."])
08c19a27 84(define-charset 141 'latin-iso8859-9
cead26f6 85 [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148"
5a6b038a 86 "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."])
7a860cf2 87(define-charset 142 'latin-iso8859-15
cead26f6 88 [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203"
5a6b038a 89 "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."])
7a860cf2 90(define-charset 143 'latin-iso8859-14
bdf74bef 91 [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199"
5a6b038a 92 "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."])
7153b1f1
KH
93
94;; 2-byte charsets. Valid range of CHARSET-ID is 144..153.
95
08c19a27 96(define-charset 144 'japanese-jisx0208-1978
cead26f6 97 [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42"
5a6b038a 98 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."])
08c19a27 99(define-charset 145 'chinese-gb2312
cead26f6 100 [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58"
5a6b038a 101 "GB2312 Chinese simplified: ISO-IR-58."])
08c19a27 102(define-charset 146 'japanese-jisx0208
cead26f6 103 [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87"
5a6b038a 104 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."])
08c19a27 105(define-charset 147 'korean-ksc5601
cead26f6 106 [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149"
5a6b038a 107 "KSC5601 Korean Hangul and Hanja: ISO-IR-149."])
08c19a27 108(define-charset 148 'japanese-jisx0212
cead26f6 109 [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159"
5a6b038a 110 "JISX0212 Japanese supplement: ISO-IR-159."])
08c19a27 111(define-charset 149 'chinese-cns11643-1
cead26f6 112 [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171"
5a6b038a 113 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."])
08c19a27 114(define-charset 150 'chinese-cns11643-2
cead26f6 115 [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172"
5a6b038a 116 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."])
7153b1f1
KH
117(define-charset 151 'japanese-jisx0213-1
118 [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"])
08c19a27 119(define-charset 152 'chinese-big5-1
cead26f6 120 [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F"
5a6b038a 121 "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."])
08c19a27 122(define-charset 153 'chinese-big5-2
cead26f6 123 [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE"
5a6b038a 124 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."])
08c19a27
KH
125
126;; Additional (private) character sets. These character sets are
7a860cf2 127;; treated less space-efficiently in the buffer.
08c19a27
KH
128
129;; Syntax:
7153b1f1 130;; (define-charset CHARSET-ID CHARSET
08c19a27
KH
131;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
132;; SHORT-NAME LONG-NAME DESCRIPTION ])
133
134;; ISO-2022 allows a use of character sets not registered in ISO with
135;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs
136;; reserves `0' through `9' to support several private character sets.
137;; The remaining final characters `:' through `?' are for users.
138
7153b1f1
KH
139;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223.
140
141(define-charset 160 'chinese-sisheng
08c19a27 142 [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)"
5a6b038a 143 "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."])
08c19a27
KH
144
145;; IPA characters for phonetic symbols.
7153b1f1 146(define-charset 161 'ipa
5a6b038a
WL
147 [1 96 1 0 ?0 1 "IPA" "IPA"
148 "IPA (International Phonetic Association) characters."])
08c19a27
KH
149
150;; Vietnamese VISCII. VISCII is 1-byte character set which contains
151;; more than 96 characters. Since Emacs can't handle it as one
152;; character set, it is divided into two: lower case letters and upper
153;; case letters.
7153b1f1 154(define-charset 162 'vietnamese-viscii-lower
5a6b038a
WL
155 [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case"
156 "Vietnamese VISCII1.1 lower-case characters."])
7153b1f1 157(define-charset 163 'vietnamese-viscii-upper
5a6b038a
WL
158 [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case"
159 "Vietnamese VISCII1.1 upper-case characters."])
08c19a27
KH
160
161;; For Arabic, we need three different types of character sets.
162;; Digits are of direction left-to-right and of width 1-column.
163;; Others are of direction right-to-left and of width 1-column or
164;; 2-column.
7153b1f1 165(define-charset 164 'arabic-digit
5a6b038a
WL
166 [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit"
167 "Arabic digits."])
7153b1f1 168(define-charset 165 'arabic-1-column
5a6b038a
WL
169 [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column"
170 "Arabic 1-column width glyphs."])
7153b1f1
KH
171
172;; ASCII with right-to-left direction.
173(define-charset 166 'ascii-right-to-left
174 [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction"
5a6b038a 175 "ASCII (left half of ISO 8859-1) with right-to-left direction."])
7153b1f1
KH
176
177;; Lao script.
178;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F.
179(define-charset 167 'lao
5a6b038a
WL
180 [1 94 1 0 ?1 0 "Lao" "Lao"
181 "Lao characters (U+0E80..U+0EDF)."])
7153b1f1 182
e7f614fc 183;; CHARSET-IDs 168..223 are not used.
7153b1f1
KH
184
185;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239.
186
187(define-charset 224 'arabic-2-column
5a6b038a
WL
188 [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column"
189 "Arabic 2-column glyphs."])
08c19a27 190
7153b1f1
KH
191;; Indian scripts. Symbolic charset for data exchange. Glyphs are
192;; not assigned. They are automatically converted to each Indian
193;; script which IS-13194 supports.
194
195(define-charset 225 'indian-is13194
196 [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194"
5a6b038a 197 "Generic Indian character set for data exchange with IS 13194."])
7153b1f1
KH
198
199;; CHARSET-IDs 226..239 are not used.
200
16808556
RS
201(define-charset 240 'indian-glyph
202 [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph"
203 "Glyphs for Indian characters."])
204;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"]
205
7153b1f1
KH
206;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244.
207
208;; Actual Glyph for 1-column width.
7153b1f1 209(define-charset 241 'tibetan-1-column
5a6b038a
WL
210 [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column"
211 "Tibetan 1-column glyphs."])
7153b1f1 212
e98a6f1c
KH
213;; Subsets of Unicode.
214
215(define-charset 242 'mule-unicode-2500-33ff
216 [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)"
217 "Unicode characters of the range U+2500..U+33FF."])
218
219(define-charset 243 'mule-unicode-e000-ffff
220 [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)"
221 "Unicode characters of the range U+E000..U+FFFF."])
7153b1f1 222
7153b1f1
KH
223(define-charset 244 'mule-unicode-0100-24ff
224 [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)"
225 "Unicode characters of the range U+0100..U+24FF."])
226
227;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254.
228
7a362b93 229;; Ethiopic characters (Amharic and Tigrigna).
7153b1f1 230(define-charset 245 'ethiopic
5a6b038a
WL
231 [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters"
232 "Ethiopic characters."])
08c19a27
KH
233
234;; Chinese CNS11643 Plane3 thru Plane7. Although these are official
235;; character sets, the use is rare and don't have to be treated
7a860cf2 236;; space-efficiently in the buffer.
7153b1f1 237(define-charset 246 'chinese-cns11643-3
cead26f6 238 [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183"
5a6b038a 239 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."])
7153b1f1 240(define-charset 247 'chinese-cns11643-4
cead26f6 241 [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184"
5a6b038a 242 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."])
7153b1f1 243(define-charset 248 'chinese-cns11643-5
cead26f6 244 [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185"
5a6b038a 245 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."])
7153b1f1 246(define-charset 249 'chinese-cns11643-6
cead26f6 247 [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186"
5a6b038a 248 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."])
7153b1f1 249(define-charset 250 'chinese-cns11643-7
cead26f6 250 [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187"
5a6b038a 251 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."])
08c19a27 252
08c19a27 253;; Actual Glyph for 2-column width.
7153b1f1 254(define-charset 251 'indian-2-column
2aa72de7 255 [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column"
5a6b038a 256 "Indian character set for 2-column width glyphs."])
16808556
RS
257 ;; old indian-1-column characters will be translated to indian-2-column.
258(declare-equiv-charset 2 94 ?6 'indian-2-column)
08c19a27 259
2aa72de7 260;; Tibetan script.
7153b1f1 261(define-charset 252 'tibetan
5a6b038a
WL
262 [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column"
263 "Tibetan 2-column width glyphs."])
7153b1f1
KH
264
265;; CHARSET-ID 253 is not used.
266
267;; JISX0213 Plane 2
268(define-charset 254 'japanese-jisx0213-2
5a6b038a
WL
269 [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2"
270 "JISX0213 Plane 2 (Japanese)."])
2aa72de7 271
85f789f7
KH
272;; Tell C code charset ID's of several charsets.
273(setup-special-charsets)
08c19a27 274
85f789f7 275\f
d2a1ee18
KH
276;; These are tables for translating characters on decoding and
277;; encoding.
f967223b 278(define-translation-table
40e98681
KH
279 'oldjis-newjis-jisroman-ascii
280 (list (cons (make-char 'japanese-jisx0208-1978)
281 (make-char 'japanese-jisx0208))
282 (cons (make-char 'latin-jisx0201) (make-char 'ascii))))
3cd01a6c
KH
283(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table)
284 (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92))
285(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table)
286 (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126))
40e98681 287
f967223b
KH
288(setq standard-translation-table-for-decode
289 (get 'oldjis-newjis-jisroman-ascii 'translation-table))
08c19a27 290
f967223b 291(setq standard-translation-table-for-encode nil)
08c19a27 292
bdf74bef
DL
293(defvar translation-table-for-input nil
294 "If non-nil, a char table used to translate characters from input methods.
295\(Currently only used by Quail.)")
08c19a27
KH
296\f
297;;; Make fundamental coding systems.
298
299;; Miscellaneous coding systems which can't be made by
300;; `make-coding-system'.
301
302(put 'no-conversion 'coding-system
bc6a0946
KH
303 (vector nil ?= "Do no conversion.
304
305When you visit a file with this coding, the file is read into a
306unibyte buffer as is, thus each byte of a file is treated as a
307character."
3fcbab9a 308 (list 'coding-category 'coding-category-binary
701117d5
KH
309 'alias-coding-systems '(no-conversion)
310 'safe-charsets t 'safe-chars t)
3fcbab9a 311 nil))
08c19a27 312(put 'no-conversion 'eol-type 0)
3fcbab9a
KH
313(put 'coding-category-binary 'coding-systems '(no-conversion))
314(setq coding-system-list '(no-conversion))
315(setq coding-system-alist '(("no-conversion")))
701117d5 316(define-coding-system-internal 'no-conversion)
08c19a27 317
8da035d0
RS
318(define-coding-system-alias 'binary 'no-conversion)
319
426591c3 320(put 'undecided 'coding-system
76810f60 321 (vector t ?- "No conversion on encoding, automatic conversion on decoding"
08dcf825
KH
322 (list 'alias-coding-systems '(undecided)
323 'safe-charsets '(ascii))
3fcbab9a
KH
324 nil))
325(setq coding-system-list (cons 'undecided coding-system-list))
326(setq coding-system-alist (cons '("undecided") coding-system-alist))
426591c3 327(put 'undecided 'eol-type
3fcbab9a 328 (make-subsidiary-coding-system 'undecided))
08c19a27 329
8d969bf6 330(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
331(define-coding-system-alias 'dos 'undecided-dos)
332(define-coding-system-alias 'mac 'undecided-mac)
333
08c19a27
KH
334;; Coding systems not specific to each language environment.
335
336(make-coding-system
426591c3 337 'emacs-mule 0 ?=
695ac440 338 "Emacs internal format used in buffer and string.
bc6a0946 339
695ac440
KH
340Encoding text with this coding system produces the actual byte
341sequence of the text in buffers and strings. An exception is made for
342eight-bit-control characters. Each of them is encoded into a single
343byte."
40e98681 344 nil
06859d9e
KH
345 '((safe-charsets . t)
346 (composition . t)))
08c19a27 347
2cb30410
KH
348(make-coding-system
349 'raw-text 5 ?t
695ac440
KH
350 "Raw text, which means text contains random 8-bit codes.
351Encoding text with this coding system produces the actual byte
352sequence of the text in buffers and strings. An exception is made for
353eight-bit-control characters. Each of them is encoded into a single
bc6a0946
KH
354byte.
355
356When you visit a file with this coding, the file is read into a
357unibyte buffer as is (except for EOL format), thus each byte of a file
358is treated as a character."
40e98681
KH
359 nil
360 '((safe-charsets . t)))
2cb30410 361
08c19a27 362(make-coding-system
4951a271
KH
363 'iso-2022-7bit 2 ?J
364 "ISO 2022 based 7-bit encoding using only G0"
08c19a27 365 '((ascii t) nil nil nil
fec0e02d 366 short ascii-eol ascii-cntl seven)
ccac3d77
KH
367 '((safe-charsets . t)
368 (composition . t)))
08c19a27
KH
369
370(make-coding-system
2792ce16 371 'iso-2022-7bit-ss2 2 ?$
4951a271
KH
372 "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
373 '((ascii t) nil t nil
fec0e02d 374 short ascii-eol ascii-cntl seven nil single-shift)
ccac3d77
KH
375 '((safe-charsets . t)
376 (composition . t)))
4951a271
KH
377
378(make-coding-system
2792ce16 379 'iso-2022-7bit-lock 2 ?&
4951a271
KH
380 "ISO-2022 coding system using Locking-Shift for 96-charset"
381 '((ascii t) t nil nil
fec0e02d 382 nil ascii-eol ascii-cntl seven locking-shift)
ccac3d77
KH
383 '((safe-charsets . t)
384 (composition . t)))
4951a271 385
2e21aa27 386(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27
KH
387
388(make-coding-system
4951a271 389 'iso-2022-7bit-lock-ss2 2 ?i
d0c5d809 390 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN"
08c19a27
KH
391 '((ascii t)
392 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t)
393 (nil chinese-cns11643-2)
394 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
395 chinese-cns11643-6 chinese-cns11643-7)
d0c5d809 396 short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
fec0e02d 397 init-bol)
3374b353 398 '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
40e98681
KH
399 korean-ksc5601 chinese-gb2312 chinese-cns11643-1
400 chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4
ccac3d77
KH
401 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)
402 (composition . t)))
08c19a27 403
2e21aa27 404(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27
KH
405
406(make-coding-system
4951a271
KH
407 'iso-2022-8bit-ss2 2 ?@
408 "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
08c19a27 409 '((ascii t) nil t nil
fec0e02d 410 nil ascii-eol ascii-cntl nil nil single-shift)
ccac3d77
KH
411 '((safe-charsets . t)
412 (composition . t)))
08c19a27 413
d49a4835 414(make-coding-system
cb5be6c9 415 'compound-text 2 ?x
73066974
EZ
416 "Compound text based generic encoding for decoding unknown messages.
417
e11cf111 418This coding system does not support extended segments."
5259f34b 419 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
d49a4835
KH
420 nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil
421 init-bol nil nil)
422 '((safe-charsets . t)
ccac3d77
KH
423 (mime-charset . x-ctext)
424 (composition . t)))
d49a4835 425
cb5be6c9
EZ
426(define-coding-system-alias 'x-ctext 'compound-text)
427(define-coding-system-alias 'ctext 'compound-text)
73066974 428
cb5be6c9
EZ
429;; Same as compound-text, but doesn't produce composition escape
430;; sequences. Used in post-read and pre-write conversions of
431;; compound-text-with-extensions, see mule.el. Note that this should
432;; not have a mime-charset property, to prevent it from showing up
433;; close to the beginning of coding systems ordered by priority.
73066974
EZ
434(make-coding-system
435 'ctext-no-compositions 2 ?x
436 "Compound text based generic encoding for decoding unknown messages.
437
cb5be6c9 438Like `compound-text', but does not produce escape sequences for compositions."
73066974
EZ
439 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
440 nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil
441 init-bol nil nil)
cb5be6c9 442 '((safe-charsets . t)))
73066974
EZ
443
444(make-coding-system
1e056d48 445 'compound-text-with-extensions 2 ?x
e11cf111 446 "Compound text encoding with extended segments.
73066974 447
1e056d48
KH
448See the variable `ctext-non-standard-encodings-alist' for the
449detail about how extended segments are handled.
450
73066974
EZ
451This coding system should be used only for X selections. It is inappropriate
452for decoding and encoding files, process I/O, etc."
1e056d48
KH
453 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
454 nil ascii-eol ascii-cntl)
73066974
EZ
455 '((post-read-conversion . ctext-post-read-conversion)
456 (pre-write-conversion . ctext-pre-write-conversion)))
457
cb5be6c9
EZ
458(define-coding-system-alias
459 'x-ctext-with-extensions 'compound-text-with-extensions)
460(define-coding-system-alias
461 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 462
f6eb8ace
KH
463(make-coding-system
464 'iso-safe 2 ?-
e11cf111 465 "Encode ASCII asis and encode non-ASCII characters to `?'."
f6eb8ace 466 '(ascii nil nil nil
fec0e02d 467 nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t)
40e98681 468 '((safe-charsets ascii)))
f6eb8ace 469
9f13685a
KH
470(define-coding-system-alias
471 'us-ascii 'iso-safe)
472
435c388e
KH
473(make-coding-system
474 'iso-latin-1 2 ?1
475 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
476 '(ascii latin-iso8859-1 nil nil
477 nil nil nil nil nil nil nil nil nil nil nil t t)
478 '((safe-charsets ascii latin-iso8859-1)
479 (mime-charset . iso-8859-1)))
480
481(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
482(define-coding-system-alias 'latin-1 'iso-latin-1)
483
5b984695 484;; Use iso-safe for terminal output if some other coding system is not
e8dd0160 485;; specified explicitly.
f6eb8ace
KH
486(set-safe-terminal-coding-system-internal 'iso-safe)
487
08c19a27
KH
488;; The other coding-systems are defined in each language specific
489;; section of languages.el.
490
678dc7ec
RS
491;; Normally, set coding system to `undecided' before reading a file.
492;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
493;; but we regard them as containing multibyte characters.
494;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 495
4951a271 496(setq file-coding-system-alist
2238f751 497 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
bdf74bef 498 ("\\.utf\\(-8\\)?\\'" . utf-8)
3e88bb50
EZ
499 ;; We use raw-text for reading loaddefs.el so that if it
500 ;; happens to have DOS or Mac EOLs, they are converted to
501 ;; newlines. This is required to make the special treatment
502 ;; of the "\ newline" combination in loaddefs.el, which marks
503 ;; the beginning of a doc string, work.
504 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 505 ("\\.tar\\'" . (no-conversion . no-conversion))
518a0981 506 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
4951a271 507 ("" . (undecided . nil))))
08c19a27
KH
508
509\f
510;;; Setting coding categories and their priorities.
511
512;; This setting is just to read an Emacs Lisp source files which
513;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 514;; values are set by the command `set-language-environment' for each
08c19a27
KH
515;; language environment.
516
29742cb5
KH
517(setq coding-category-emacs-mule 'emacs-mule
518 coding-category-sjis 'japanese-shift-jis
519 coding-category-iso-7 'iso-2022-7bit
40e98681 520 coding-category-iso-7-tight 'iso-2022-jp
29742cb5 521 coding-category-iso-8-1 'iso-latin-1
5b984695 522 coding-category-iso-8-2 'iso-latin-1
29742cb5
KH
523 coding-category-iso-7-else 'iso-2022-7bit-lock
524 coding-category-iso-8-else 'iso-2022-8bit-ss2
a1223d6c 525 coding-category-ccl nil
f8499efb 526 coding-category-utf-8 'mule-utf-8
9ef9b28e
KH
527 coding-category-utf-16-be 'mule-utf-16be-with-signature
528 coding-category-utf-16-le 'mule-utf-16le-with-signature
29742cb5 529 coding-category-big5 'chinese-big5
2cb30410 530 coding-category-raw-text 'raw-text
29742cb5 531 coding-category-binary 'no-conversion)
08c19a27
KH
532
533(set-coding-priority
5b984695 534 '(coding-category-iso-8-1
69b73c8a 535 coding-category-iso-8-2
60ec9367
KH
536 coding-category-utf-8
537 coding-category-utf-16-be
538 coding-category-utf-16-le
5b984695
KH
539 coding-category-iso-7-tight
540 coding-category-iso-7
29742cb5 541 coding-category-iso-7-else
2cb30410 542 coding-category-iso-8-else
e4e6cfa0 543 coding-category-emacs-mule
2cb30410 544 coding-category-raw-text
a1506d29 545 coding-category-sjis
08c19a27 546 coding-category-big5
a1223d6c 547 coding-category-ccl
2c7794b0 548 coding-category-binary
60ec9367 549 ))
08c19a27 550
c1b628eb
KH
551\f
552;;; Miscellaneous settings.
e97c2306 553(aset latin-extra-code-table ?\221 t)
c1b628eb 554(aset latin-extra-code-table ?\222 t)
40232452
KH
555(aset latin-extra-code-table ?\223 t)
556(aset latin-extra-code-table ?\224 t)
b4026917 557(aset latin-extra-code-table ?\225 t)
e97c2306 558(aset latin-extra-code-table ?\226 t)
c1b628eb 559
4cb4b388
KH
560(update-coding-systems-internal)
561
ab5796a9 562;;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
08c19a27 563;;; mule-conf.el ends here