| 1 | ;;; mule-conf.el --- configure multilingual environment |
| 2 | |
| 3 | ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. |
| 5 | |
| 6 | ;; Keywords: mule, multilingual, character set, coding system |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 13 | ;; any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 23 | ;; Boston, MA 02111-1307, USA. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;; Don't byte-compile this file. |
| 28 | |
| 29 | ;;; Code: |
| 30 | |
| 31 | ;;; Definitions of character sets. |
| 32 | |
| 33 | ;; Basic (official) character sets. These character sets are treated |
| 34 | ;; efficiently with respect to buffer memory. |
| 35 | |
| 36 | ;; Syntax: |
| 37 | ;; (define-charset CHARSET-ID CHARSET |
| 38 | ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE |
| 39 | ;; SHORT-NAME LONG-NAME DESCRIPTION ]) |
| 40 | ;; ASCII charset is defined in src/charset.c as below. |
| 41 | ;; (define-charset 0 ascii |
| 42 | ;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) |
| 43 | |
| 44 | ;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. |
| 45 | |
| 46 | ;; CHARSET-ID 128 is not used. |
| 47 | |
| 48 | (define-charset 129 'latin-iso8859-1 |
| 49 | [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" |
| 50 | "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."]) |
| 51 | (define-charset 130 'latin-iso8859-2 |
| 52 | [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" |
| 53 | "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."]) |
| 54 | (define-charset 131 'latin-iso8859-3 |
| 55 | [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" |
| 56 | "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."]) |
| 57 | (define-charset 132 'latin-iso8859-4 |
| 58 | [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" |
| 59 | "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."]) |
| 60 | (define-charset 133 'thai-tis620 |
| 61 | [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166" |
| 62 | "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."]) |
| 63 | (define-charset 134 'greek-iso8859-7 |
| 64 | [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126" |
| 65 | "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."]) |
| 66 | (define-charset 135 'arabic-iso8859-6 |
| 67 | [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127" |
| 68 | "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."]) |
| 69 | (define-charset 136 'hebrew-iso8859-8 |
| 70 | [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138" |
| 71 | "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."]) |
| 72 | (define-charset 137 'katakana-jisx0201 |
| 73 | [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)" |
| 74 | "Katakana Part of JISX0201.1976."]) |
| 75 | (define-charset 138 'latin-jisx0201 |
| 76 | [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)" |
| 77 | "Roman Part of JISX0201.1976."]) |
| 78 | |
| 79 | ;; CHARSET-ID is not used 139. |
| 80 | |
| 81 | (define-charset 140 'cyrillic-iso8859-5 |
| 82 | [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" |
| 83 | "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."]) |
| 84 | (define-charset 141 'latin-iso8859-9 |
| 85 | [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" |
| 86 | "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."]) |
| 87 | (define-charset 142 'latin-iso8859-15 |
| 88 | [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" |
| 89 | "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."]) |
| 90 | (define-charset 143 'latin-iso8859-14 |
| 91 | [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199" |
| 92 | "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."]) |
| 93 | |
| 94 | ;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. |
| 95 | |
| 96 | (define-charset 144 'japanese-jisx0208-1978 |
| 97 | [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42" |
| 98 | "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."]) |
| 99 | (define-charset 145 'chinese-gb2312 |
| 100 | [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58" |
| 101 | "GB2312 Chinese simplified: ISO-IR-58."]) |
| 102 | (define-charset 146 'japanese-jisx0208 |
| 103 | [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87" |
| 104 | "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."]) |
| 105 | (define-charset 147 'korean-ksc5601 |
| 106 | [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149" |
| 107 | "KSC5601 Korean Hangul and Hanja: ISO-IR-149."]) |
| 108 | (define-charset 148 'japanese-jisx0212 |
| 109 | [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159" |
| 110 | "JISX0212 Japanese supplement: ISO-IR-159."]) |
| 111 | (define-charset 149 'chinese-cns11643-1 |
| 112 | [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171" |
| 113 | "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."]) |
| 114 | (define-charset 150 'chinese-cns11643-2 |
| 115 | [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172" |
| 116 | "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."]) |
| 117 | (define-charset 151 'japanese-jisx0213-1 |
| 118 | [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"]) |
| 119 | (define-charset 152 'chinese-big5-1 |
| 120 | [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F" |
| 121 | "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."]) |
| 122 | (define-charset 153 'chinese-big5-2 |
| 123 | [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE" |
| 124 | "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."]) |
| 125 | |
| 126 | ;; Additional (private) character sets. These character sets are |
| 127 | ;; treated less space-efficiently in the buffer. |
| 128 | |
| 129 | ;; Syntax: |
| 130 | ;; (define-charset CHARSET-ID CHARSET |
| 131 | ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE |
| 132 | ;; SHORT-NAME LONG-NAME DESCRIPTION ]) |
| 133 | |
| 134 | ;; ISO-2022 allows a use of character sets not registered in ISO with |
| 135 | ;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs |
| 136 | ;; reserves `0' through `9' to support several private character sets. |
| 137 | ;; The remaining final characters `:' through `?' are for users. |
| 138 | |
| 139 | ;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. |
| 140 | |
| 141 | (define-charset 160 'chinese-sisheng |
| 142 | [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)" |
| 143 | "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."]) |
| 144 | |
| 145 | ;; IPA characters for phonetic symbols. |
| 146 | (define-charset 161 'ipa |
| 147 | [1 96 1 0 ?0 1 "IPA" "IPA" |
| 148 | "IPA (International Phonetic Association) characters."]) |
| 149 | |
| 150 | ;; Vietnamese VISCII. VISCII is 1-byte character set which contains |
| 151 | ;; more than 96 characters. Since Emacs can't handle it as one |
| 152 | ;; character set, it is divided into two: lower case letters and upper |
| 153 | ;; case letters. |
| 154 | (define-charset 162 'vietnamese-viscii-lower |
| 155 | [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case" |
| 156 | "Vietnamese VISCII1.1 lower-case characters."]) |
| 157 | (define-charset 163 'vietnamese-viscii-upper |
| 158 | [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case" |
| 159 | "Vietnamese VISCII1.1 upper-case characters."]) |
| 160 | |
| 161 | ;; For Arabic, we need three different types of character sets. |
| 162 | ;; Digits are of direction left-to-right and of width 1-column. |
| 163 | ;; Others are of direction right-to-left and of width 1-column or |
| 164 | ;; 2-column. |
| 165 | (define-charset 164 'arabic-digit |
| 166 | [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit" |
| 167 | "Arabic digits."]) |
| 168 | (define-charset 165 'arabic-1-column |
| 169 | [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column" |
| 170 | "Arabic 1-column width glyphs."]) |
| 171 | |
| 172 | ;; ASCII with right-to-left direction. |
| 173 | (define-charset 166 'ascii-right-to-left |
| 174 | [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction" |
| 175 | "ASCII (left half of ISO 8859-1) with right-to-left direction."]) |
| 176 | |
| 177 | ;; Lao script. |
| 178 | ;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F. |
| 179 | (define-charset 167 'lao |
| 180 | [1 94 1 0 ?1 0 "Lao" "Lao" |
| 181 | "Lao characters (U+0E80..U+0EDF)."]) |
| 182 | |
| 183 | ;; CHARSET-IDs 168..223 are not used. |
| 184 | |
| 185 | ;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239. |
| 186 | |
| 187 | (define-charset 224 'arabic-2-column |
| 188 | [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column" |
| 189 | "Arabic 2-column glyphs."]) |
| 190 | |
| 191 | ;; Indian scripts. Symbolic charset for data exchange. Glyphs are |
| 192 | ;; not assigned. They are automatically converted to each Indian |
| 193 | ;; script which IS-13194 supports. |
| 194 | |
| 195 | (define-charset 225 'indian-is13194 |
| 196 | [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194" |
| 197 | "Generic Indian character set for data exchange with IS 13194."]) |
| 198 | |
| 199 | ;; CHARSET-IDs 226..239 are not used. |
| 200 | |
| 201 | (define-charset 240 'indian-glyph |
| 202 | [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph" |
| 203 | "Glyphs for Indian characters."]) |
| 204 | ;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"] |
| 205 | |
| 206 | ;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. |
| 207 | |
| 208 | ;; Actual Glyph for 1-column width. |
| 209 | (define-charset 241 'tibetan-1-column |
| 210 | [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column" |
| 211 | "Tibetan 1-column glyphs."]) |
| 212 | |
| 213 | ;; Subsets of Unicode. |
| 214 | |
| 215 | (define-charset 242 'mule-unicode-2500-33ff |
| 216 | [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)" |
| 217 | "Unicode characters of the range U+2500..U+33FF."]) |
| 218 | |
| 219 | (define-charset 243 'mule-unicode-e000-ffff |
| 220 | [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)" |
| 221 | "Unicode characters of the range U+E000..U+FFFF."]) |
| 222 | |
| 223 | (define-charset 244 'mule-unicode-0100-24ff |
| 224 | [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)" |
| 225 | "Unicode characters of the range U+0100..U+24FF."]) |
| 226 | |
| 227 | ;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. |
| 228 | |
| 229 | ;; Ethiopic characters (Amahric and Tigrigna). |
| 230 | (define-charset 245 'ethiopic |
| 231 | [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters" |
| 232 | "Ethiopic characters."]) |
| 233 | |
| 234 | ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official |
| 235 | ;; character sets, the use is rare and don't have to be treated |
| 236 | ;; space-efficiently in the buffer. |
| 237 | (define-charset 246 'chinese-cns11643-3 |
| 238 | [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183" |
| 239 | "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."]) |
| 240 | (define-charset 247 'chinese-cns11643-4 |
| 241 | [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184" |
| 242 | "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."]) |
| 243 | (define-charset 248 'chinese-cns11643-5 |
| 244 | [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185" |
| 245 | "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."]) |
| 246 | (define-charset 249 'chinese-cns11643-6 |
| 247 | [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186" |
| 248 | "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."]) |
| 249 | (define-charset 250 'chinese-cns11643-7 |
| 250 | [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187" |
| 251 | "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."]) |
| 252 | |
| 253 | ;; Actual Glyph for 2-column width. |
| 254 | (define-charset 251 'indian-2-column |
| 255 | [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column" |
| 256 | "Indian character set for 2-column width glyphs."]) |
| 257 | ;; old indian-1-column characters will be translated to indian-2-column. |
| 258 | (declare-equiv-charset 2 94 ?6 'indian-2-column) |
| 259 | |
| 260 | ;; Tibetan script. |
| 261 | (define-charset 252 'tibetan |
| 262 | [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column" |
| 263 | "Tibetan 2-column width glyphs."]) |
| 264 | |
| 265 | ;; CHARSET-ID 253 is not used. |
| 266 | |
| 267 | ;; JISX0213 Plane 2 |
| 268 | (define-charset 254 'japanese-jisx0213-2 |
| 269 | [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2" |
| 270 | "JISX0213 Plane 2 (Japanese)."]) |
| 271 | |
| 272 | ;; Tell C code charset ID's of several charsets. |
| 273 | (setup-special-charsets) |
| 274 | |
| 275 | \f |
| 276 | ;; These are tables for translating characters on decoding and |
| 277 | ;; encoding. |
| 278 | (define-translation-table |
| 279 | 'oldjis-newjis-jisroman-ascii |
| 280 | (list (cons (make-char 'japanese-jisx0208-1978) |
| 281 | (make-char 'japanese-jisx0208)) |
| 282 | (cons (make-char 'latin-jisx0201) (make-char 'ascii)))) |
| 283 | (aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) |
| 284 | (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92)) |
| 285 | (aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) |
| 286 | (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126)) |
| 287 | |
| 288 | (setq standard-translation-table-for-decode |
| 289 | (get 'oldjis-newjis-jisroman-ascii 'translation-table)) |
| 290 | |
| 291 | (setq standard-translation-table-for-encode nil) |
| 292 | |
| 293 | (defvar translation-table-for-input nil |
| 294 | "If non-nil, a char table used to translate characters from input methods. |
| 295 | \(Currently only used by Quail.)") |
| 296 | \f |
| 297 | ;;; Make fundamental coding systems. |
| 298 | |
| 299 | ;; Miscellaneous coding systems which can't be made by |
| 300 | ;; `make-coding-system'. |
| 301 | |
| 302 | (put 'no-conversion 'coding-system |
| 303 | (vector nil ?= "Do no conversion. |
| 304 | |
| 305 | When you visit a file with this coding, the file is read into a |
| 306 | unibyte buffer as is, thus each byte of a file is treated as a |
| 307 | character." |
| 308 | (list 'coding-category 'coding-category-binary |
| 309 | 'alias-coding-systems '(no-conversion)) |
| 310 | nil)) |
| 311 | (put 'no-conversion 'eol-type 0) |
| 312 | (put 'coding-category-binary 'coding-systems '(no-conversion)) |
| 313 | (setq coding-system-list '(no-conversion)) |
| 314 | (setq coding-system-alist '(("no-conversion"))) |
| 315 | (register-char-codings 'no-conversion t) |
| 316 | |
| 317 | (define-coding-system-alias 'binary 'no-conversion) |
| 318 | |
| 319 | (put 'undecided 'coding-system |
| 320 | (vector t ?- "No conversion on encoding, automatic conversion on decoding" |
| 321 | (list 'alias-coding-systems '(undecided) |
| 322 | 'safe-charsets '(ascii)) |
| 323 | nil)) |
| 324 | (setq coding-system-list (cons 'undecided coding-system-list)) |
| 325 | (setq coding-system-alist (cons '("undecided") coding-system-alist)) |
| 326 | (put 'undecided 'eol-type |
| 327 | (make-subsidiary-coding-system 'undecided)) |
| 328 | |
| 329 | (define-coding-system-alias 'unix 'undecided-unix) |
| 330 | (define-coding-system-alias 'dos 'undecided-dos) |
| 331 | (define-coding-system-alias 'mac 'undecided-mac) |
| 332 | |
| 333 | ;; Coding systems not specific to each language environment. |
| 334 | |
| 335 | (make-coding-system |
| 336 | 'emacs-mule 0 ?= |
| 337 | "Emacs internal format used in buffer and string. |
| 338 | |
| 339 | Encoding text with this coding system produces the actual byte |
| 340 | sequence of the text in buffers and strings. An exception is made for |
| 341 | eight-bit-control characters. Each of them is encoded into a single |
| 342 | byte." |
| 343 | nil |
| 344 | '((safe-charsets . t))) |
| 345 | |
| 346 | (make-coding-system |
| 347 | 'raw-text 5 ?t |
| 348 | "Raw text, which means text contains random 8-bit codes. |
| 349 | Encoding text with this coding system produces the actual byte |
| 350 | sequence of the text in buffers and strings. An exception is made for |
| 351 | eight-bit-control characters. Each of them is encoded into a single |
| 352 | byte. |
| 353 | |
| 354 | When you visit a file with this coding, the file is read into a |
| 355 | unibyte buffer as is (except for EOL format), thus each byte of a file |
| 356 | is treated as a character." |
| 357 | nil |
| 358 | '((safe-charsets . t))) |
| 359 | |
| 360 | (make-coding-system |
| 361 | 'iso-2022-7bit 2 ?J |
| 362 | "ISO 2022 based 7-bit encoding using only G0" |
| 363 | '((ascii t) nil nil nil |
| 364 | short ascii-eol ascii-cntl seven) |
| 365 | '((safe-charsets . t) |
| 366 | (composition . t))) |
| 367 | |
| 368 | (make-coding-system |
| 369 | 'iso-2022-7bit-ss2 2 ?$ |
| 370 | "ISO 2022 based 7-bit encoding using SS2 for 96-charset" |
| 371 | '((ascii t) nil t nil |
| 372 | short ascii-eol ascii-cntl seven nil single-shift) |
| 373 | '((safe-charsets . t) |
| 374 | (composition . t))) |
| 375 | |
| 376 | (make-coding-system |
| 377 | 'iso-2022-7bit-lock 2 ?& |
| 378 | "ISO-2022 coding system using Locking-Shift for 96-charset" |
| 379 | '((ascii t) t nil nil |
| 380 | nil ascii-eol ascii-cntl seven locking-shift) |
| 381 | '((safe-charsets . t) |
| 382 | (composition . t))) |
| 383 | |
| 384 | (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) |
| 385 | |
| 386 | (make-coding-system |
| 387 | 'iso-2022-7bit-lock-ss2 2 ?i |
| 388 | "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" |
| 389 | '((ascii t) |
| 390 | (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t) |
| 391 | (nil chinese-cns11643-2) |
| 392 | (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 |
| 393 | chinese-cns11643-6 chinese-cns11643-7) |
| 394 | short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
| 395 | init-bol) |
| 396 | '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 |
| 397 | korean-ksc5601 chinese-gb2312 chinese-cns11643-1 |
| 398 | chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 |
| 399 | chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) |
| 400 | (composition . t))) |
| 401 | |
| 402 | (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) |
| 403 | |
| 404 | (make-coding-system |
| 405 | 'iso-2022-8bit-ss2 2 ?@ |
| 406 | "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
| 407 | '((ascii t) nil t nil |
| 408 | nil ascii-eol ascii-cntl nil nil single-shift) |
| 409 | '((safe-charsets . t) |
| 410 | (composition . t))) |
| 411 | |
| 412 | (make-coding-system |
| 413 | 'compound-text 2 ?x |
| 414 | "Compound text based generic encoding for decoding unknown messages. |
| 415 | |
| 416 | This coding system does not support ICCCM Extended Segments." |
| 417 | '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t |
| 418 | nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil |
| 419 | init-bol nil nil) |
| 420 | '((safe-charsets . t) |
| 421 | (mime-charset . x-ctext) |
| 422 | (composition . t))) |
| 423 | |
| 424 | (define-coding-system-alias 'x-ctext 'compound-text) |
| 425 | (define-coding-system-alias 'ctext 'compound-text) |
| 426 | |
| 427 | ;; Same as compound-text, but doesn't produce composition escape |
| 428 | ;; sequences. Used in post-read and pre-write conversions of |
| 429 | ;; compound-text-with-extensions, see mule.el. Note that this should |
| 430 | ;; not have a mime-charset property, to prevent it from showing up |
| 431 | ;; close to the beginning of coding systems ordered by priority. |
| 432 | (make-coding-system |
| 433 | 'ctext-no-compositions 2 ?x |
| 434 | "Compound text based generic encoding for decoding unknown messages. |
| 435 | |
| 436 | Like `compound-text', but does not produce escape sequences for compositions." |
| 437 | '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t |
| 438 | nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil |
| 439 | init-bol nil nil) |
| 440 | '((safe-charsets . t))) |
| 441 | |
| 442 | (make-coding-system |
| 443 | 'compound-text-with-extensions 5 ?x |
| 444 | "Compound text encoding with ICCCM Extended Segment extensions. |
| 445 | |
| 446 | This coding system should be used only for X selections. It is inappropriate |
| 447 | for decoding and encoding files, process I/O, etc." |
| 448 | nil |
| 449 | '((post-read-conversion . ctext-post-read-conversion) |
| 450 | (pre-write-conversion . ctext-pre-write-conversion))) |
| 451 | |
| 452 | (define-coding-system-alias |
| 453 | 'x-ctext-with-extensions 'compound-text-with-extensions) |
| 454 | (define-coding-system-alias |
| 455 | 'ctext-with-extensions 'compound-text-with-extensions) |
| 456 | |
| 457 | (make-coding-system |
| 458 | 'iso-safe 2 ?- |
| 459 | "Convert all characters but ASCII to `?'." |
| 460 | '(ascii nil nil nil |
| 461 | nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t) |
| 462 | '((safe-charsets ascii))) |
| 463 | |
| 464 | (define-coding-system-alias |
| 465 | 'us-ascii 'iso-safe) |
| 466 | |
| 467 | ;; Use iso-safe for terminal output if some other coding system is not |
| 468 | ;; specified explicitly. |
| 469 | (set-safe-terminal-coding-system-internal 'iso-safe) |
| 470 | |
| 471 | ;; The other coding-systems are defined in each language specific |
| 472 | ;; section of languages.el. |
| 473 | |
| 474 | ;; Normally, set coding system to `undecided' before reading a file. |
| 475 | ;; Compiled Emacs Lisp files (*.elc) are not decoded at all, |
| 476 | ;; but we regard them as containing multibyte characters. |
| 477 | ;; Tar files are not decoded at all, but we treat them as raw bytes. |
| 478 | |
| 479 | (setq file-coding-system-alist |
| 480 | '(("\\.elc\\'" . (emacs-mule . emacs-mule)) |
| 481 | ("\\.utf\\(-8\\)?\\'" . utf-8) |
| 482 | ;; We use raw-text for reading loaddefs.el so that if it |
| 483 | ;; happens to have DOS or Mac EOLs, they are converted to |
| 484 | ;; newlines. This is required to make the special treatment |
| 485 | ;; of the "\ newline" combination in loaddefs.el, which marks |
| 486 | ;; the beginning of a doc string, work. |
| 487 | ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) |
| 488 | ("\\.tar\\'" . (no-conversion . no-conversion)) |
| 489 | ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) |
| 490 | ("" . (undecided . nil)))) |
| 491 | |
| 492 | \f |
| 493 | ;;; Setting coding categories and their priorities. |
| 494 | |
| 495 | ;; This setting is just to read an Emacs Lisp source files which |
| 496 | ;; contain multilingual text while dumping Emacs. More appropriate |
| 497 | ;; values are set by the command `set-language-environment' for each |
| 498 | ;; language environment. |
| 499 | |
| 500 | (setq coding-category-emacs-mule 'emacs-mule |
| 501 | coding-category-sjis 'japanese-shift-jis |
| 502 | coding-category-iso-7 'iso-2022-7bit |
| 503 | coding-category-iso-7-tight 'iso-2022-jp |
| 504 | coding-category-iso-8-1 'iso-latin-1 |
| 505 | coding-category-iso-8-2 'iso-latin-1 |
| 506 | coding-category-iso-7-else 'iso-2022-7bit-lock |
| 507 | coding-category-iso-8-else 'iso-2022-8bit-ss2 |
| 508 | coding-category-ccl nil |
| 509 | coding-category-utf-8 'mule-utf-8 |
| 510 | coding-category-utf-16-be nil |
| 511 | coding-category-utf-16-le nil |
| 512 | coding-category-big5 'chinese-big5 |
| 513 | coding-category-raw-text 'raw-text |
| 514 | coding-category-binary 'no-conversion) |
| 515 | |
| 516 | (set-coding-priority |
| 517 | '(coding-category-iso-8-1 |
| 518 | coding-category-iso-8-2 |
| 519 | coding-category-iso-7-tight |
| 520 | coding-category-iso-7 |
| 521 | coding-category-iso-7-else |
| 522 | coding-category-iso-8-else |
| 523 | coding-category-emacs-mule |
| 524 | coding-category-raw-text |
| 525 | coding-category-sjis |
| 526 | coding-category-big5 |
| 527 | coding-category-ccl |
| 528 | coding-category-binary |
| 529 | coding-category-utf-8 |
| 530 | coding-category-utf-16-be |
| 531 | coding-category-utf-16-le)) |
| 532 | |
| 533 | \f |
| 534 | ;;; Miscellaneous settings. |
| 535 | (aset latin-extra-code-table ?\222 t) |
| 536 | (aset latin-extra-code-table ?\223 t) |
| 537 | (aset latin-extra-code-table ?\224 t) |
| 538 | |
| 539 | (update-coding-systems-internal) |
| 540 | |
| 541 | ;;; mule-conf.el ends here |