Update AIST copyright years.
[bpt/emacs.git] / lisp / international / mule-conf.el
CommitLineData
0a10297a 1;;; mule-conf.el --- configure multilingual environment -*- no-byte-compile: t -*-
08c19a27 2
d4877ac1
GM
3;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4;; 2004, 2005, 2006 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
2fd125a3
KH
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H14PRO021
08c19a27
KH
8
9;; Keywords: mule, multilingual, character set, coding system
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26;; Boston, MA 02110-1301, USA.
08c19a27
KH
27
28;;; Commentary:
29
30;; Don't byte-compile this file.
31
32;;; Code:
33
34;;; Definitions of character sets.
35
36;; Basic (official) character sets. These character sets are treated
7a860cf2 37;; efficiently with respect to buffer memory.
08c19a27
KH
38
39;; Syntax:
40;; (define-charset CHARSET-ID CHARSET
41;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
42;; SHORT-NAME LONG-NAME DESCRIPTION ])
43;; ASCII charset is defined in src/charset.c as below.
44;; (define-charset 0 ascii
45;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"])
46
7153b1f1
KH
47;; 1-byte charsets. Valid range of CHARSET-ID is 128..143.
48
49;; CHARSET-ID 128 is not used.
50
08c19a27 51(define-charset 129 'latin-iso8859-1
cead26f6 52 [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100"
5a6b038a 53 "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."])
08c19a27 54(define-charset 130 'latin-iso8859-2
cead26f6 55 [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101"
5a6b038a 56 "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."])
08c19a27 57(define-charset 131 'latin-iso8859-3
cead26f6 58 [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109"
5a6b038a 59 "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."])
08c19a27 60(define-charset 132 'latin-iso8859-4
cead26f6 61 [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110"
5a6b038a 62 "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."])
08c19a27 63(define-charset 133 'thai-tis620
cead26f6 64 [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166"
5a6b038a 65 "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."])
08c19a27 66(define-charset 134 'greek-iso8859-7
cead26f6 67 [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126"
5a6b038a 68 "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."])
08c19a27 69(define-charset 135 'arabic-iso8859-6
cead26f6 70 [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127"
5a6b038a 71 "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."])
08c19a27 72(define-charset 136 'hebrew-iso8859-8
cead26f6 73 [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138"
5a6b038a 74 "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."])
08c19a27 75(define-charset 137 'katakana-jisx0201
cead26f6 76 [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)"
5a6b038a 77 "Katakana Part of JISX0201.1976."])
08c19a27 78(define-charset 138 'latin-jisx0201
cead26f6 79 [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)"
5a6b038a 80 "Roman Part of JISX0201.1976."])
7153b1f1
KH
81
82;; CHARSET-ID is not used 139.
83
08c19a27 84(define-charset 140 'cyrillic-iso8859-5
cead26f6 85 [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144"
5a6b038a 86 "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."])
08c19a27 87(define-charset 141 'latin-iso8859-9
cead26f6 88 [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148"
5a6b038a 89 "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."])
7a860cf2 90(define-charset 142 'latin-iso8859-15
cead26f6 91 [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203"
5a6b038a 92 "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."])
7a860cf2 93(define-charset 143 'latin-iso8859-14
bdf74bef 94 [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199"
5a6b038a 95 "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."])
7153b1f1
KH
96
97;; 2-byte charsets. Valid range of CHARSET-ID is 144..153.
98
08c19a27 99(define-charset 144 'japanese-jisx0208-1978
cead26f6 100 [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42"
5a6b038a 101 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."])
08c19a27 102(define-charset 145 'chinese-gb2312
cead26f6 103 [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58"
5a6b038a 104 "GB2312 Chinese simplified: ISO-IR-58."])
08c19a27 105(define-charset 146 'japanese-jisx0208
cead26f6 106 [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87"
5a6b038a 107 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."])
08c19a27 108(define-charset 147 'korean-ksc5601
cead26f6 109 [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149"
5a6b038a 110 "KSC5601 Korean Hangul and Hanja: ISO-IR-149."])
08c19a27 111(define-charset 148 'japanese-jisx0212
cead26f6 112 [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159"
5a6b038a 113 "JISX0212 Japanese supplement: ISO-IR-159."])
08c19a27 114(define-charset 149 'chinese-cns11643-1
cead26f6 115 [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171"
5a6b038a 116 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."])
08c19a27 117(define-charset 150 'chinese-cns11643-2
cead26f6 118 [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172"
5a6b038a 119 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."])
7153b1f1
KH
120(define-charset 151 'japanese-jisx0213-1
121 [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"])
08c19a27 122(define-charset 152 'chinese-big5-1
cead26f6 123 [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F"
5a6b038a 124 "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."])
08c19a27 125(define-charset 153 'chinese-big5-2
cead26f6 126 [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE"
5a6b038a 127 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."])
08c19a27
KH
128
129;; Additional (private) character sets. These character sets are
7a860cf2 130;; treated less space-efficiently in the buffer.
08c19a27
KH
131
132;; Syntax:
7153b1f1 133;; (define-charset CHARSET-ID CHARSET
08c19a27
KH
134;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
135;; SHORT-NAME LONG-NAME DESCRIPTION ])
136
137;; ISO-2022 allows a use of character sets not registered in ISO with
138;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs
139;; reserves `0' through `9' to support several private character sets.
140;; The remaining final characters `:' through `?' are for users.
141
7153b1f1
KH
142;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223.
143
144(define-charset 160 'chinese-sisheng
08c19a27 145 [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)"
5a6b038a 146 "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."])
08c19a27
KH
147
148;; IPA characters for phonetic symbols.
7153b1f1 149(define-charset 161 'ipa
5a6b038a
WL
150 [1 96 1 0 ?0 1 "IPA" "IPA"
151 "IPA (International Phonetic Association) characters."])
08c19a27
KH
152
153;; Vietnamese VISCII. VISCII is 1-byte character set which contains
154;; more than 96 characters. Since Emacs can't handle it as one
155;; character set, it is divided into two: lower case letters and upper
156;; case letters.
7153b1f1 157(define-charset 162 'vietnamese-viscii-lower
5a6b038a
WL
158 [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case"
159 "Vietnamese VISCII1.1 lower-case characters."])
7153b1f1 160(define-charset 163 'vietnamese-viscii-upper
5a6b038a
WL
161 [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case"
162 "Vietnamese VISCII1.1 upper-case characters."])
08c19a27
KH
163
164;; For Arabic, we need three different types of character sets.
165;; Digits are of direction left-to-right and of width 1-column.
166;; Others are of direction right-to-left and of width 1-column or
167;; 2-column.
7153b1f1 168(define-charset 164 'arabic-digit
5a6b038a
WL
169 [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit"
170 "Arabic digits."])
7153b1f1 171(define-charset 165 'arabic-1-column
5a6b038a
WL
172 [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column"
173 "Arabic 1-column width glyphs."])
7153b1f1
KH
174
175;; ASCII with right-to-left direction.
176(define-charset 166 'ascii-right-to-left
177 [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction"
5a6b038a 178 "ASCII (left half of ISO 8859-1) with right-to-left direction."])
7153b1f1
KH
179
180;; Lao script.
181;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F.
182(define-charset 167 'lao
5a6b038a
WL
183 [1 94 1 0 ?1 0 "Lao" "Lao"
184 "Lao characters (U+0E80..U+0EDF)."])
7153b1f1 185
e7f614fc 186;; CHARSET-IDs 168..223 are not used.
7153b1f1
KH
187
188;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239.
189
190(define-charset 224 'arabic-2-column
5a6b038a
WL
191 [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column"
192 "Arabic 2-column glyphs."])
08c19a27 193
7153b1f1
KH
194;; Indian scripts. Symbolic charset for data exchange. Glyphs are
195;; not assigned. They are automatically converted to each Indian
196;; script which IS-13194 supports.
197
198(define-charset 225 'indian-is13194
199 [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194"
5a6b038a 200 "Generic Indian character set for data exchange with IS 13194."])
7153b1f1
KH
201
202;; CHARSET-IDs 226..239 are not used.
203
16808556
RS
204(define-charset 240 'indian-glyph
205 [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph"
206 "Glyphs for Indian characters."])
207;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"]
208
7153b1f1
KH
209;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244.
210
211;; Actual Glyph for 1-column width.
7153b1f1 212(define-charset 241 'tibetan-1-column
5a6b038a
WL
213 [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column"
214 "Tibetan 1-column glyphs."])
7153b1f1 215
e98a6f1c
KH
216;; Subsets of Unicode.
217
218(define-charset 242 'mule-unicode-2500-33ff
219 [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)"
220 "Unicode characters of the range U+2500..U+33FF."])
221
222(define-charset 243 'mule-unicode-e000-ffff
223 [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)"
224 "Unicode characters of the range U+E000..U+FFFF."])
7153b1f1 225
7153b1f1
KH
226(define-charset 244 'mule-unicode-0100-24ff
227 [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)"
228 "Unicode characters of the range U+0100..U+24FF."])
229
230;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254.
231
7a362b93 232;; Ethiopic characters (Amharic and Tigrigna).
7153b1f1 233(define-charset 245 'ethiopic
5a6b038a
WL
234 [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters"
235 "Ethiopic characters."])
08c19a27
KH
236
237;; Chinese CNS11643 Plane3 thru Plane7. Although these are official
238;; character sets, the use is rare and don't have to be treated
7a860cf2 239;; space-efficiently in the buffer.
7153b1f1 240(define-charset 246 'chinese-cns11643-3
cead26f6 241 [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183"
5a6b038a 242 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."])
7153b1f1 243(define-charset 247 'chinese-cns11643-4
cead26f6 244 [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184"
5a6b038a 245 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."])
7153b1f1 246(define-charset 248 'chinese-cns11643-5
cead26f6 247 [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185"
5a6b038a 248 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."])
7153b1f1 249(define-charset 249 'chinese-cns11643-6
cead26f6 250 [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186"
5a6b038a 251 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."])
7153b1f1 252(define-charset 250 'chinese-cns11643-7
cead26f6 253 [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187"
5a6b038a 254 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."])
08c19a27 255
08c19a27 256;; Actual Glyph for 2-column width.
7153b1f1 257(define-charset 251 'indian-2-column
2aa72de7 258 [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column"
5a6b038a 259 "Indian character set for 2-column width glyphs."])
16808556
RS
260 ;; old indian-1-column characters will be translated to indian-2-column.
261(declare-equiv-charset 2 94 ?6 'indian-2-column)
08c19a27 262
2aa72de7 263;; Tibetan script.
7153b1f1 264(define-charset 252 'tibetan
5a6b038a
WL
265 [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column"
266 "Tibetan 2-column width glyphs."])
7153b1f1
KH
267
268;; CHARSET-ID 253 is not used.
269
270;; JISX0213 Plane 2
271(define-charset 254 'japanese-jisx0213-2
5a6b038a
WL
272 [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2"
273 "JISX0213 Plane 2 (Japanese)."])
2aa72de7 274
85f789f7
KH
275;; Tell C code charset ID's of several charsets.
276(setup-special-charsets)
08c19a27 277
85f789f7 278\f
d2a1ee18
KH
279;; These are tables for translating characters on decoding and
280;; encoding.
f967223b 281(define-translation-table
40e98681
KH
282 'oldjis-newjis-jisroman-ascii
283 (list (cons (make-char 'japanese-jisx0208-1978)
284 (make-char 'japanese-jisx0208))
285 (cons (make-char 'latin-jisx0201) (make-char 'ascii))))
3cd01a6c
KH
286(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table)
287 (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92))
288(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table)
289 (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126))
40e98681 290
f967223b
KH
291(setq standard-translation-table-for-decode
292 (get 'oldjis-newjis-jisroman-ascii 'translation-table))
08c19a27 293
f967223b 294(setq standard-translation-table-for-encode nil)
08c19a27
KH
295\f
296;;; Make fundamental coding systems.
297
298;; Miscellaneous coding systems which can't be made by
299;; `make-coding-system'.
300
301(put 'no-conversion 'coding-system
bc6a0946
KH
302 (vector nil ?= "Do no conversion.
303
304When you visit a file with this coding, the file is read into a
305unibyte buffer as is, thus each byte of a file is treated as a
306character."
3fcbab9a 307 (list 'coding-category 'coding-category-binary
701117d5
KH
308 'alias-coding-systems '(no-conversion)
309 'safe-charsets t 'safe-chars t)
3fcbab9a 310 nil))
08c19a27 311(put 'no-conversion 'eol-type 0)
3fcbab9a
KH
312(put 'coding-category-binary 'coding-systems '(no-conversion))
313(setq coding-system-list '(no-conversion))
314(setq coding-system-alist '(("no-conversion")))
701117d5 315(define-coding-system-internal 'no-conversion)
08c19a27 316
8da035d0
RS
317(define-coding-system-alias 'binary 'no-conversion)
318
426591c3 319(put 'undecided 'coding-system
76810f60 320 (vector t ?- "No conversion on encoding, automatic conversion on decoding"
08dcf825
KH
321 (list 'alias-coding-systems '(undecided)
322 'safe-charsets '(ascii))
3fcbab9a
KH
323 nil))
324(setq coding-system-list (cons 'undecided coding-system-list))
325(setq coding-system-alist (cons '("undecided") coding-system-alist))
426591c3 326(put 'undecided 'eol-type
3fcbab9a 327 (make-subsidiary-coding-system 'undecided))
08c19a27 328
8d969bf6 329(define-coding-system-alias 'unix 'undecided-unix)
1c445211
RS
330(define-coding-system-alias 'dos 'undecided-dos)
331(define-coding-system-alias 'mac 'undecided-mac)
332
08c19a27
KH
333;; Coding systems not specific to each language environment.
334
335(make-coding-system
426591c3 336 'emacs-mule 0 ?=
695ac440 337 "Emacs internal format used in buffer and string.
bc6a0946 338
695ac440
KH
339Encoding text with this coding system produces the actual byte
340sequence of the text in buffers and strings. An exception is made for
341eight-bit-control characters. Each of them is encoded into a single
342byte."
40e98681 343 nil
06859d9e
KH
344 '((safe-charsets . t)
345 (composition . t)))
08c19a27 346
2cb30410
KH
347(make-coding-system
348 'raw-text 5 ?t
695ac440
KH
349 "Raw text, which means text contains random 8-bit codes.
350Encoding text with this coding system produces the actual byte
351sequence of the text in buffers and strings. An exception is made for
352eight-bit-control characters. Each of them is encoded into a single
bc6a0946
KH
353byte.
354
355When you visit a file with this coding, the file is read into a
356unibyte buffer as is (except for EOL format), thus each byte of a file
357is treated as a character."
40e98681
KH
358 nil
359 '((safe-charsets . t)))
2cb30410 360
08c19a27 361(make-coding-system
4951a271
KH
362 'iso-2022-7bit 2 ?J
363 "ISO 2022 based 7-bit encoding using only G0"
08c19a27 364 '((ascii t) nil nil nil
fec0e02d 365 short ascii-eol ascii-cntl seven)
ccac3d77
KH
366 '((safe-charsets . t)
367 (composition . t)))
08c19a27
KH
368
369(make-coding-system
2792ce16 370 'iso-2022-7bit-ss2 2 ?$
4951a271
KH
371 "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
372 '((ascii t) nil t nil
fec0e02d 373 short ascii-eol ascii-cntl seven nil single-shift)
ccac3d77
KH
374 '((safe-charsets . t)
375 (composition . t)))
4951a271
KH
376
377(make-coding-system
2792ce16 378 'iso-2022-7bit-lock 2 ?&
4951a271
KH
379 "ISO-2022 coding system using Locking-Shift for 96-charset"
380 '((ascii t) t nil nil
fec0e02d 381 nil ascii-eol ascii-cntl seven locking-shift)
ccac3d77
KH
382 '((safe-charsets . t)
383 (composition . t)))
4951a271 384
2e21aa27 385(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
08c19a27
KH
386
387(make-coding-system
4951a271 388 'iso-2022-7bit-lock-ss2 2 ?i
d0c5d809 389 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN"
08c19a27
KH
390 '((ascii t)
391 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t)
392 (nil chinese-cns11643-2)
393 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
394 chinese-cns11643-6 chinese-cns11643-7)
d0c5d809 395 short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
fec0e02d 396 init-bol)
3374b353 397 '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
40e98681
KH
398 korean-ksc5601 chinese-gb2312 chinese-cns11643-1
399 chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4
ccac3d77
KH
400 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)
401 (composition . t)))
08c19a27 402
2e21aa27 403(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
08c19a27
KH
404
405(make-coding-system
4951a271
KH
406 'iso-2022-8bit-ss2 2 ?@
407 "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
08c19a27 408 '((ascii t) nil t nil
fec0e02d 409 nil ascii-eol ascii-cntl nil nil single-shift)
ccac3d77
KH
410 '((safe-charsets . t)
411 (composition . t)))
08c19a27 412
d49a4835 413(make-coding-system
cb5be6c9 414 'compound-text 2 ?x
73066974
EZ
415 "Compound text based generic encoding for decoding unknown messages.
416
e11cf111 417This coding system does not support extended segments."
5259f34b 418 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
d49a4835
KH
419 nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil
420 init-bol nil nil)
421 '((safe-charsets . t)
ccac3d77
KH
422 (mime-charset . x-ctext)
423 (composition . t)))
d49a4835 424
cb5be6c9
EZ
425(define-coding-system-alias 'x-ctext 'compound-text)
426(define-coding-system-alias 'ctext 'compound-text)
73066974 427
cb5be6c9
EZ
428;; Same as compound-text, but doesn't produce composition escape
429;; sequences. Used in post-read and pre-write conversions of
430;; compound-text-with-extensions, see mule.el. Note that this should
431;; not have a mime-charset property, to prevent it from showing up
432;; close to the beginning of coding systems ordered by priority.
73066974
EZ
433(make-coding-system
434 'ctext-no-compositions 2 ?x
435 "Compound text based generic encoding for decoding unknown messages.
436
cb5be6c9 437Like `compound-text', but does not produce escape sequences for compositions."
73066974
EZ
438 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
439 nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil
440 init-bol nil nil)
cb5be6c9 441 '((safe-charsets . t)))
73066974
EZ
442
443(make-coding-system
1e056d48 444 'compound-text-with-extensions 2 ?x
e11cf111 445 "Compound text encoding with extended segments.
73066974 446
1e056d48
KH
447See the variable `ctext-non-standard-encodings-alist' for the
448detail about how extended segments are handled.
449
73066974
EZ
450This coding system should be used only for X selections. It is inappropriate
451for decoding and encoding files, process I/O, etc."
1e056d48
KH
452 '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t
453 nil ascii-eol ascii-cntl)
73066974
EZ
454 '((post-read-conversion . ctext-post-read-conversion)
455 (pre-write-conversion . ctext-pre-write-conversion)))
456
cb5be6c9
EZ
457(define-coding-system-alias
458 'x-ctext-with-extensions 'compound-text-with-extensions)
459(define-coding-system-alias
460 'ctext-with-extensions 'compound-text-with-extensions)
75b6fb58 461
f6eb8ace
KH
462(make-coding-system
463 'iso-safe 2 ?-
e11cf111 464 "Encode ASCII asis and encode non-ASCII characters to `?'."
f6eb8ace 465 '(ascii nil nil nil
fec0e02d 466 nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t)
40e98681 467 '((safe-charsets ascii)))
f6eb8ace 468
9f13685a
KH
469(define-coding-system-alias
470 'us-ascii 'iso-safe)
471
435c388e
KH
472(make-coding-system
473 'iso-latin-1 2 ?1
474 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
475 '(ascii latin-iso8859-1 nil nil
476 nil nil nil nil nil nil nil nil nil nil nil t t)
477 '((safe-charsets ascii latin-iso8859-1)
478 (mime-charset . iso-8859-1)))
479
480(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
481(define-coding-system-alias 'latin-1 'iso-latin-1)
482
5b984695 483;; Use iso-safe for terminal output if some other coding system is not
e8dd0160 484;; specified explicitly.
f6eb8ace
KH
485(set-safe-terminal-coding-system-internal 'iso-safe)
486
08c19a27
KH
487;; The other coding-systems are defined in each language specific
488;; section of languages.el.
489
678dc7ec
RS
490;; Normally, set coding system to `undecided' before reading a file.
491;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
492;; but we regard them as containing multibyte characters.
493;; Tar files are not decoded at all, but we treat them as raw bytes.
08c19a27 494
4951a271 495(setq file-coding-system-alist
2238f751 496 '(("\\.elc\\'" . (emacs-mule . emacs-mule))
bdf74bef 497 ("\\.utf\\(-8\\)?\\'" . utf-8)
3e88bb50
EZ
498 ;; We use raw-text for reading loaddefs.el so that if it
499 ;; happens to have DOS or Mac EOLs, they are converted to
500 ;; newlines. This is required to make the special treatment
501 ;; of the "\ newline" combination in loaddefs.el, which marks
502 ;; the beginning of a doc string, work.
503 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
2238f751 504 ("\\.tar\\'" . (no-conversion . no-conversion))
518a0981 505 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
2d5e5eb8 506 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
4951a271 507 ("" . (undecided . nil))))
08c19a27
KH
508
509\f
510;;; Setting coding categories and their priorities.
511
512;; This setting is just to read an Emacs Lisp source files which
513;; contain multilingual text while dumping Emacs. More appropriate
2792ce16 514;; values are set by the command `set-language-environment' for each
08c19a27
KH
515;; language environment.
516
29742cb5
KH
517(setq coding-category-emacs-mule 'emacs-mule
518 coding-category-sjis 'japanese-shift-jis
519 coding-category-iso-7 'iso-2022-7bit
40e98681 520 coding-category-iso-7-tight 'iso-2022-jp
29742cb5 521 coding-category-iso-8-1 'iso-latin-1
5b984695 522 coding-category-iso-8-2 'iso-latin-1
29742cb5
KH
523 coding-category-iso-7-else 'iso-2022-7bit-lock
524 coding-category-iso-8-else 'iso-2022-8bit-ss2
a1223d6c 525 coding-category-ccl nil
f8499efb 526 coding-category-utf-8 'mule-utf-8
9ef9b28e
KH
527 coding-category-utf-16-be 'mule-utf-16be-with-signature
528 coding-category-utf-16-le 'mule-utf-16le-with-signature
29742cb5 529 coding-category-big5 'chinese-big5
2cb30410 530 coding-category-raw-text 'raw-text
29742cb5 531 coding-category-binary 'no-conversion)
08c19a27
KH
532
533(set-coding-priority
5b984695 534 '(coding-category-iso-8-1
69b73c8a 535 coding-category-iso-8-2
60ec9367
KH
536 coding-category-utf-8
537 coding-category-utf-16-be
538 coding-category-utf-16-le
5b984695
KH
539 coding-category-iso-7-tight
540 coding-category-iso-7
29742cb5 541 coding-category-iso-7-else
2cb30410 542 coding-category-iso-8-else
e4e6cfa0 543 coding-category-emacs-mule
2cb30410 544 coding-category-raw-text
a1506d29 545 coding-category-sjis
08c19a27 546 coding-category-big5
a1223d6c 547 coding-category-ccl
2c7794b0 548 coding-category-binary
60ec9367 549 ))
08c19a27 550
c1b628eb
KH
551\f
552;;; Miscellaneous settings.
e97c2306 553(aset latin-extra-code-table ?\221 t)
c1b628eb 554(aset latin-extra-code-table ?\222 t)
40232452
KH
555(aset latin-extra-code-table ?\223 t)
556(aset latin-extra-code-table ?\224 t)
b4026917 557(aset latin-extra-code-table ?\225 t)
e97c2306 558(aset latin-extra-code-table ?\226 t)
c1b628eb 559
4cb4b388
KH
560(update-coding-systems-internal)
561
c791cb54 562;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
08c19a27 563;;; mule-conf.el ends here