X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/97941b05253cdac11a83c12dd4fec72e82a189b0..c0e17dd87eb56bb2de52a0f7699fc33596c035f0:/lisp/international/mule-conf.el?ds=sidebyside diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el dissimilarity index 76% index fe3d6a2e49..31452b3408 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el @@ -1,537 +1,868 @@ -;;; mule-conf.el --- configure multilingual environment - -;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. - -;; Keywords: mule, multilingual, character set, coding system - -;; This file is part of GNU Emacs. - -;; GNU Emacs is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. - -;; GNU Emacs is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. - -;;; Commentary: - -;; Don't byte-compile this file. - -;;; Code: - -;;; Definitions of character sets. - -;; Basic (official) character sets. These character sets are treated -;; efficiently with respect to buffer memory. - -;; Syntax: -;; (define-charset CHARSET-ID CHARSET -;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE -;; SHORT-NAME LONG-NAME DESCRIPTION ]) -;; ASCII charset is defined in src/charset.c as below. -;; (define-charset 0 ascii -;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) - -;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. - -;; CHARSET-ID 128 is not used. - -(define-charset 129 'latin-iso8859-1 - [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" - "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."]) -(define-charset 130 'latin-iso8859-2 - [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" - "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."]) -(define-charset 131 'latin-iso8859-3 - [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" - "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."]) -(define-charset 132 'latin-iso8859-4 - [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" - "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."]) -(define-charset 133 'thai-tis620 - [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166" - "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."]) -(define-charset 134 'greek-iso8859-7 - [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126" - "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."]) -(define-charset 135 'arabic-iso8859-6 - [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127" - "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."]) -(define-charset 136 'hebrew-iso8859-8 - [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138" - "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."]) -(define-charset 137 'katakana-jisx0201 - [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)" - "Katakana Part of JISX0201.1976."]) -(define-charset 138 'latin-jisx0201 - [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)" - "Roman Part of JISX0201.1976."]) - -;; CHARSET-ID is not used 139. - -(define-charset 140 'cyrillic-iso8859-5 - [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" - "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."]) -(define-charset 141 'latin-iso8859-9 - [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" - "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."]) -(define-charset 142 'latin-iso8859-15 - [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" - "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."]) -(define-charset 143 'latin-iso8859-14 - [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199" - "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."]) - -;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. - -(define-charset 144 'japanese-jisx0208-1978 - [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42" - "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."]) -(define-charset 145 'chinese-gb2312 - [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58" - "GB2312 Chinese simplified: ISO-IR-58."]) -(define-charset 146 'japanese-jisx0208 - [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87" - "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."]) -(define-charset 147 'korean-ksc5601 - [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149" - "KSC5601 Korean Hangul and Hanja: ISO-IR-149."]) -(define-charset 148 'japanese-jisx0212 - [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159" - "JISX0212 Japanese supplement: ISO-IR-159."]) -(define-charset 149 'chinese-cns11643-1 - [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171" - "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."]) -(define-charset 150 'chinese-cns11643-2 - [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172" - "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."]) -(define-charset 151 'japanese-jisx0213-1 - [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"]) -(define-charset 152 'chinese-big5-1 - [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F" - "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."]) -(define-charset 153 'chinese-big5-2 - [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE" - "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."]) - -;; Additional (private) character sets. These character sets are -;; treated less space-efficiently in the buffer. - -;; Syntax: -;; (define-charset CHARSET-ID CHARSET -;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE -;; SHORT-NAME LONG-NAME DESCRIPTION ]) - -;; ISO-2022 allows a use of character sets not registered in ISO with -;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs -;; reserves `0' through `9' to support several private character sets. -;; The remaining final characters `:' through `?' are for users. - -;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. - -(define-charset 160 'chinese-sisheng - [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)" - "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."]) - -;; IPA characters for phonetic symbols. -(define-charset 161 'ipa - [1 96 1 0 ?0 1 "IPA" "IPA" - "IPA (International Phonetic Association) characters."]) - -;; Vietnamese VISCII. VISCII is 1-byte character set which contains -;; more than 96 characters. Since Emacs can't handle it as one -;; character set, it is divided into two: lower case letters and upper -;; case letters. -(define-charset 162 'vietnamese-viscii-lower - [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case" - "Vietnamese VISCII1.1 lower-case characters."]) -(define-charset 163 'vietnamese-viscii-upper - [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case" - "Vietnamese VISCII1.1 upper-case characters."]) - -;; For Arabic, we need three different types of character sets. -;; Digits are of direction left-to-right and of width 1-column. -;; Others are of direction right-to-left and of width 1-column or -;; 2-column. -(define-charset 164 'arabic-digit - [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit" - "Arabic digits."]) -(define-charset 165 'arabic-1-column - [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column" - "Arabic 1-column width glyphs."]) - -;; ASCII with right-to-left direction. -(define-charset 166 'ascii-right-to-left - [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction" - "ASCII (left half of ISO 8859-1) with right-to-left direction."]) - -;; Lao script. -;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F. -(define-charset 167 'lao - [1 94 1 0 ?1 0 "Lao" "Lao" - "Lao characters (U+0E80..U+0EDF)."]) - -;; CHARSET-IDs 168..223 are not used. - -;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239. - -(define-charset 224 'arabic-2-column - [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column" - "Arabic 2-column glyphs."]) - -;; Indian scripts. Symbolic charset for data exchange. Glyphs are -;; not assigned. They are automatically converted to each Indian -;; script which IS-13194 supports. - -(define-charset 225 'indian-is13194 - [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194" - "Generic Indian character set for data exchange with IS 13194."]) - -;; CHARSET-IDs 226..239 are not used. - -(define-charset 240 'indian-glyph - [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph" - "Glyphs for Indian characters."]) -;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"] - -;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. - -;; Actual Glyph for 1-column width. -(define-charset 241 'tibetan-1-column - [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column" - "Tibetan 1-column glyphs."]) - -;; Subsets of Unicode. - -(define-charset 242 'mule-unicode-2500-33ff - [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)" - "Unicode characters of the range U+2500..U+33FF."]) - -(define-charset 243 'mule-unicode-e000-ffff - [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)" - "Unicode characters of the range U+E000..U+FFFF."]) - -(define-charset 244 'mule-unicode-0100-24ff - [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)" - "Unicode characters of the range U+0100..U+24FF."]) - -;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. - -;; Ethiopic characters (Amahric and Tigrigna). -(define-charset 245 'ethiopic - [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters" - "Ethiopic characters."]) - -;; Chinese CNS11643 Plane3 thru Plane7. Although these are official -;; character sets, the use is rare and don't have to be treated -;; space-efficiently in the buffer. -(define-charset 246 'chinese-cns11643-3 - [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183" - "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."]) -(define-charset 247 'chinese-cns11643-4 - [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184" - "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."]) -(define-charset 248 'chinese-cns11643-5 - [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185" - "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."]) -(define-charset 249 'chinese-cns11643-6 - [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186" - "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."]) -(define-charset 250 'chinese-cns11643-7 - [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187" - "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."]) - -;; Actual Glyph for 2-column width. -(define-charset 251 'indian-2-column - [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column" - "Indian character set for 2-column width glyphs."]) - ;; old indian-1-column characters will be translated to indian-2-column. -(declare-equiv-charset 2 94 ?6 'indian-2-column) - -;; Tibetan script. -(define-charset 252 'tibetan - [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column" - "Tibetan 2-column width glyphs."]) - -;; CHARSET-ID 253 is not used. - -;; JISX0213 Plane 2 -(define-charset 254 'japanese-jisx0213-2 - [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2" - "JISX0213 Plane 2 (Japanese)."]) - -;; Tell C code charset ID's of several charsets. -(setup-special-charsets) - - -;; These are tables for translating characters on decoding and -;; encoding. -(define-translation-table - 'oldjis-newjis-jisroman-ascii - (list (cons (make-char 'japanese-jisx0208-1978) - (make-char 'japanese-jisx0208)) - (cons (make-char 'latin-jisx0201) (make-char 'ascii)))) -(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) - (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92)) -(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) - (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126)) - -(setq standard-translation-table-for-decode - (get 'oldjis-newjis-jisroman-ascii 'translation-table)) - -(setq standard-translation-table-for-encode nil) - -(defvar translation-table-for-input nil - "If non-nil, a char table used to translate characters from input methods. -\(Currently only used by Quail.)") - -;;; Make fundamental coding systems. - -;; Miscellaneous coding systems which can't be made by -;; `make-coding-system'. - -(put 'no-conversion 'coding-system - (vector nil ?= "Do no conversion. - -When you visit a file with this coding, the file is read into a -unibyte buffer as is, thus each byte of a file is treated as a -character." - (list 'coding-category 'coding-category-binary - 'alias-coding-systems '(no-conversion)) - nil)) -(put 'no-conversion 'eol-type 0) -(put 'coding-category-binary 'coding-systems '(no-conversion)) -(setq coding-system-list '(no-conversion)) -(setq coding-system-alist '(("no-conversion"))) -(register-char-codings 'no-conversion t) - -(define-coding-system-alias 'binary 'no-conversion) - -(put 'undecided 'coding-system - (vector t ?- "No conversion on encoding, automatic conversion on decoding" - (list 'alias-coding-systems '(undecided) - 'safe-charsets '(ascii)) - nil)) -(setq coding-system-list (cons 'undecided coding-system-list)) -(setq coding-system-alist (cons '("undecided") coding-system-alist)) -(put 'undecided 'eol-type - (make-subsidiary-coding-system 'undecided)) - -(define-coding-system-alias 'unix 'undecided-unix) -(define-coding-system-alias 'dos 'undecided-dos) -(define-coding-system-alias 'mac 'undecided-mac) - -;; Coding systems not specific to each language environment. - -(make-coding-system - 'emacs-mule 0 ?= - "Emacs internal format used in buffer and string. - -Encoding text with this coding system produces the actual byte -sequence of the text in buffers and strings. An exception is made for -eight-bit-control characters. Each of them is encoded into a single -byte." - nil - '((safe-charsets . t))) - -(make-coding-system - 'raw-text 5 ?t - "Raw text, which means text contains random 8-bit codes. -Encoding text with this coding system produces the actual byte -sequence of the text in buffers and strings. An exception is made for -eight-bit-control characters. Each of them is encoded into a single -byte. - -When you visit a file with this coding, the file is read into a -unibyte buffer as is (except for EOL format), thus each byte of a file -is treated as a character." - nil - '((safe-charsets . t))) - -(make-coding-system - 'iso-2022-7bit 2 ?J - "ISO 2022 based 7-bit encoding using only G0" - '((ascii t) nil nil nil - short ascii-eol ascii-cntl seven) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'iso-2022-7bit-ss2 2 ?$ - "ISO 2022 based 7-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - short ascii-eol ascii-cntl seven nil single-shift) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'iso-2022-7bit-lock 2 ?& - "ISO-2022 coding system using Locking-Shift for 96-charset" - '((ascii t) t nil nil - nil ascii-eol ascii-cntl seven locking-shift) - '((safe-charsets . t) - (composition . t))) - -(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) - -(make-coding-system - 'iso-2022-7bit-lock-ss2 2 ?i - "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" - '((ascii t) - (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t) - (nil chinese-cns11643-2) - (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 - chinese-cns11643-6 chinese-cns11643-7) - short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil - init-bol) - '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 - korean-ksc5601 chinese-gb2312 chinese-cns11643-1 - chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 - chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) - (composition . t))) - -(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) - -(make-coding-system - 'iso-2022-8bit-ss2 2 ?@ - "ISO 2022 based 8-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - nil ascii-eol ascii-cntl nil nil single-shift) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'compound-text 2 ?x - "Compound text based generic encoding for decoding unknown messages. - -This coding system does not support ICCCM Extended Segments." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil - init-bol nil nil) - '((safe-charsets . t) - (mime-charset . x-ctext) - (composition . t))) - -(define-coding-system-alias 'x-ctext 'compound-text) -(define-coding-system-alias 'ctext 'compound-text) - -;; Same as compound-text, but doesn't produce composition escape -;; sequences. Used in post-read and pre-write conversions of -;; compound-text-with-extensions, see mule.el. Note that this should -;; not have a mime-charset property, to prevent it from showing up -;; close to the beginning of coding systems ordered by priority. -(make-coding-system - 'ctext-no-compositions 2 ?x - "Compound text based generic encoding for decoding unknown messages. - -Like `compound-text', but does not produce escape sequences for compositions." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil - init-bol nil nil) - '((safe-charsets . t))) - -(make-coding-system - 'compound-text-with-extensions 5 ?x - "Compound text encoding with ICCCM Extended Segment extensions. - -This coding system should be used only for X selections. It is inappropriate -for decoding and encoding files, process I/O, etc." - nil - '((post-read-conversion . ctext-post-read-conversion) - (pre-write-conversion . ctext-pre-write-conversion))) - -(define-coding-system-alias - 'x-ctext-with-extensions 'compound-text-with-extensions) -(define-coding-system-alias - 'ctext-with-extensions 'compound-text-with-extensions) - -(make-coding-system - 'iso-safe 2 ?- - "Convert all characters but ASCII to `?'." - '(ascii nil nil nil - nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii))) - -;; Use iso-safe for terminal output if some other coding system is not -;; specified explicitly. -(set-safe-terminal-coding-system-internal 'iso-safe) - -;; The other coding-systems are defined in each language specific -;; section of languages.el. - -;; Normally, set coding system to `undecided' before reading a file. -;; Compiled Emacs Lisp files (*.elc) are not decoded at all, -;; but we regard them as containing multibyte characters. -;; Tar files are not decoded at all, but we treat them as raw bytes. - -(setq file-coding-system-alist - '(("\\.elc\\'" . (emacs-mule . emacs-mule)) - ("\\.utf\\(-8\\)?\\'" . utf-8) - ;; We use raw-text for reading loaddefs.el so that if it - ;; happens to have DOS or Mac EOLs, they are converted to - ;; newlines. This is required to make the special treatment - ;; of the "\ newline" combination in loaddefs.el, which marks - ;; the beginning of a doc string, work. - ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) - ("\\.tar\\'" . (no-conversion . no-conversion)) - ("" . (undecided . nil)))) - - -;;; Setting coding categories and their priorities. - -;; This setting is just to read an Emacs Lisp source files which -;; contain multilingual text while dumping Emacs. More appropriate -;; values are set by the command `set-language-environment' for each -;; language environment. - -(setq coding-category-emacs-mule 'emacs-mule - coding-category-sjis 'japanese-shift-jis - coding-category-iso-7 'iso-2022-7bit - coding-category-iso-7-tight 'iso-2022-jp - coding-category-iso-8-1 'iso-latin-1 - coding-category-iso-8-2 'iso-latin-1 - coding-category-iso-7-else 'iso-2022-7bit-lock - coding-category-iso-8-else 'iso-2022-8bit-ss2 - coding-category-ccl nil - coding-category-utf-8 'mule-utf-8 - coding-category-utf-16-be nil - coding-category-utf-16-le nil - coding-category-big5 'chinese-big5 - coding-category-raw-text 'raw-text - coding-category-binary 'no-conversion) - -(set-coding-priority - '(coding-category-iso-8-1 - coding-category-iso-8-2 - coding-category-iso-7-tight - coding-category-iso-7 - coding-category-iso-7-else - coding-category-iso-8-else - coding-category-emacs-mule - coding-category-raw-text - coding-category-sjis - coding-category-big5 - coding-category-ccl - coding-category-binary - coding-category-utf-8 - coding-category-utf-16-be - coding-category-utf-16-le)) - - -;;; Miscellaneous settings. -(aset latin-extra-code-table ?\222 t) -(aset latin-extra-code-table ?\223 t) -(aset latin-extra-code-table ?\224 t) - -(update-coding-systems-internal) - -;;; mule-conf.el ends here +;;; mule-conf.el --- configure multilingual environment + +;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. +;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 + +;; Keywords: mule, multilingual, character set, coding system + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Don't byte-compile this file. + +;;; Code: + +;;; Definitions of character sets. + +;; The charsets `ascii' and `unicoe' are aleady defined in charset.c +;; as below: +;; +;; (define-charset 'ascii +;; "" +;; :dimension 1 +;; :code-space [0 127] +;; :iso-final-char ?A +;; :ascii-compatible-p t +;; :emacs-mule-id 0 +;; :code-offset 0) +;; +;; (define-charset 'unicode +;; "" +;; :dimension 3 +;; :code-space [0 255 0 255 0 16] +;; :ascii-compatible-p t +;; :code-offset 0) +;; +;; We now set :docstring, :short-name, and :long-name properties. + +(put-charset-property + 'ascii :docstring "ASCII (ISO646 IRV)") +(put-charset-property + 'ascii :short-name "ASCII") +(put-charset-property + 'ascii :long-name "ASCII (ISO646 IRV)") +(put-charset-property + 'unicode :docstring "Unicode (ISO10646)") +(put-charset-property + 'unicode :short-name "Unicode") +(put-charset-property + 'unicode :long-name "Unicode (ISO10646)") + +(define-charset-alias 'ucs 'unicode) + +(define-charset 'emacs + "Full Emacs characters." + :ascii-compatible-p t + :code-space [ 0 255 0 255 0 63 ] + :code-offset 0 + :supplementary-p t) + +(define-charset 'iso-8859-1 + "Laint-1 (ISO/IEC 8859-1)" + :short-name "Latin-1" + :ascii-compatible-p t + :code-space [0 255] + :code-offset 0) + +(define-charset 'latin-iso8859-1 + "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :short-name "RHP of Latin-1" + :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :iso-final-char ?A + :emacs-mule-id 129 + :code-space [32 127] + :code-offset 160) + +(define-charset 'eight-bit-control + "8-bit control code (0x80..0x9F)" + :short-name "8-bit control code" + :code-space [128 159] + :code-offset 128) + +(define-charset 'eight-bit-graphic + "8-bit graphic code (0xA0..0xFF)" + :short-name "8-bit graphic code" + :code-space [160 255] + :code-offset 160) + +(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname + iso-ir iso-final + emacs-mule-id map) + "For internal use only." + `(progn + (define-charset ,symbol + ,name + :short-name ,nickname + :long-name ,name + :ascii-compatible-p t + :code-space [0 255] + :map ,map) + (if ,iso-symbol + (define-charset ,iso-symbol + (if ,iso-ir + (format "Right-Hand Part of %s (%s): ISO-IR-%d" + ,name ,nickname ,iso-ir) + (format "Right-Hand Part of %s (%s)" ,name ,nickname)) + :short-name (format "RHP of %s" ,name) + :long-name (format "RHP of %s (%s)" ,name ,nickname) + :iso-final-char ,iso-final + :emacs-mule-id ,emacs-mule-id + :code-space [32 127] + :parents (list (cons ,symbol 128)))))) + +(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2 + "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2") + +(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3 + "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3") + +(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4 + "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4") + +(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5 + "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5") + +(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6 + "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6") + +(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7 + "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7") + +(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8 + "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8") + +(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9 + "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9") + +(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13 + "ISO/IEC 8859/13" "Latin-7" nil nil nil "8859-13") + +(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14 + "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14") + +(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15 + "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15") + +(define-charset 'thai-tis620 + "TIS620.2533" + :short-name "TIS620.2533" + :iso-final-char ?T + :emacs-mule-id 133 + :code-space [32 127] + :code-offset #x0E00) + +(define-charset 'tis620-2533 + "TIS620.2533" + :short-name "TIS620.2533" + :ascii-compatible-p t + :code-space [0 255] + :parents '(ascii eight-bit-control (thai-tis620 . -128))) + +(define-charset 'jisx0201 + "JISX0201" + :short-name "JISX0201" + :long-name "JISX0201" + :code-space [33 254] + :map "jisx0201") + +(define-charset 'latin-jisx0201 + "Roman Part of JISX0201.1976" + :short-name "JISX0201 Roman" + :long-name "Japanese Roman (JISX0201.1976)" + :iso-final-char ?J + :emacs-mule-id 138 + :code-space [33 126] + :parents '(jisx0201)) + +(define-charset 'katakana-jisx0201 + "Katakana Part of JISX0201.1976" + :short-name "JISX0201 Katakana" + :long-name "Japanese Katakana (JISX0201.1976)" + :iso-final-char ?I + :emacs-mule-id 137 + :code-space [33 126] + :parents '((jisx0201 . #x80))) + +(define-charset 'chinese-gb2312 + "GB2312 Chinese simplified: ISO-IR-58" + :short-name "GB2312" + :long-name "GB2312: ISO-IR-58" + :iso-final-char ?A + :emacs-mule-id 145 + :code-space [33 126 33 126] + :code-offset #x110000 + :unify-map "gb2312-1980") + +(define-charset 'chinese-cns11643-1 + "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" + :short-name "CNS11643-1" + :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" + :iso-final-char ?G + :emacs-mule-id 149 + :code-space [33 126 33 126] + :code-offset #x114000 + :unify-map "cns11643-1") + +(define-charset 'chinese-cns11643-2 + "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" + :short-name "CNS11643-2" + :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" + :iso-final-char ?H + :emacs-mule-id 150 + :code-space [33 126 33 126] + :code-offset #x118000 + :unify-map "cns11643-2") + +(define-charset 'chinese-cns11643-3 + "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" + :short-name "CNS11643-3" + :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" + :iso-final-char ?I + :code-space [33 126 33 126] + :emacs-mule-id 246 + :code-offset #x11C000) + +(define-charset 'chinese-cns11643-4 + "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" + :short-name "CNS11643-4" + :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" + :iso-final-char ?J + :emacs-mule-id 247 + :code-space [33 126 33 126] + :code-offset #x120000) + +(define-charset 'chinese-cns11643-5 + "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" + :short-name "CNS11643-5" + :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" + :iso-final-char ?K + :emacs-mule-id 248 + :code-space [33 126 33 126] + :code-offset #x124000) + +(define-charset 'chinese-cns11643-6 + "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" + :short-name "CNS11643-6" + :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" + :iso-final-char ?L + :emacs-mule-id 249 + :code-space [33 126 33 126] + :code-offset #x128000) + +(define-charset 'chinese-cns11643-7 + "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" + :short-name "CNS11643-7" + :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" + :iso-final-char ?M + :emacs-mule-id 250 + :code-space [33 126 33 126] + :code-offset #x12C000) + +(define-charset 'big5 + "Big5 (Chinese traditional)" + :short-name "Big5" + :long-name "Big5" + :code-space [#x40 #xFE #xA1 #xFE] + :code-offset #x130000 + :unify-map "big5") + +(define-charset 'chinese-big5-1 + "Frequentry used part (A141-C67E) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-1)" + :long-name "Big5 (Level-1) A141-C67F" + :iso-final-char ?0 + :emacs-mule-id 152 + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x134000 + :unify-map "big5-1") + +(define-charset 'chinese-big5-2 + "Less frequentry used part (C940-FEFE) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-2)" + :long-name "Big5 (Level-2) C940-FEFE" + :iso-final-char ?1 + :emacs-mule-id 153 + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x138000 + :unify-map "big5-2") + +(define-charset 'japanese-jisx0208 + "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" + :short-name "JISX0208" + :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" + :iso-final-char ?B + :emacs-mule-id 146 + :code-space [33 126 33 126] + :code-offset #x140000 + :unify-map "jisx0208-1990") + +(define-charset 'japanese-jisx0208-1978 + "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" + :short-name "JISX0208.1978" + :long-name "JISX0208.1978 (Japanese): ISO-IR-42" + :iso-final-char ?@ + :emacs-mule-id 144 + :code-space [33 126 33 126] + :code-offset #x144000 + :unify-map "jisx0208-1978") + +(define-charset 'japanese-jisx0212 + "JISX0212 Japanese supplement: ISO-IR-159" + :short-name "JISX0212" + :long-name "JISX0212 (Japanese): ISO-IR-159" + :iso-final-char ?D + :emacs-mule-id 148 + :code-space [33 126 33 126] + :code-offset #x148000 + :unify-map "jisx0212-1990") + +(define-charset 'japanese-jisx0213-1 + "JISX0213 Plane 1 (Japanese)" + :short-name "JISX0213-1" + :long-name "JISX0213-1" + :iso-final-char ?O + :emacs-mule-id 151 + :code-space [33 126 33 126] + :code-offset #x14C000) + +(define-charset 'japanese-jisx0213-2 + "JISX0213 Plane 2 (Japanese)" + :short-name "JISX0213-2" + :long-name "JISX0213-2" + :iso-final-char ?P + :emacs-mule-id 254 + :code-space [33 126 33 126] + :code-offset #x150000) + +(define-charset 'korean-ksc5601 + "KSC5601 Korean Hangul and Hanja: ISO-IR-149" + :short-name "KSC5601" + :long-name "KSC5601 (Korean): ISO-IR-149" + :iso-final-char ?C + :emacs-mule-id 147 + :code-space [33 126 33 126] + :map "ksc5601-1987") + +(define-charset 'chinese-sisheng + "SiSheng characters for PinYin/ZhuYin" + :short-name "SiSheng" + :long-name "SiSheng (PinYin/ZhuYin)" + :iso-final-char ?0 + :emacs-mule-id 160 + :code-space [33 126] + :code-offset #x200000) + +(define-charset 'ipa + "IPA (International Phonetic Association)" + :short-name "IPA" + :long-name "IPA" + :iso-final-char ?0 + :emacs-mule-id 161 + :code-space [32 127] + :code-offset #x200080) + +(define-charset 'viscii + "VISCII1.1" + :short-name "VISCII" + :long-name "VISCII 1.1" + :code-space [0 255] + :map "viscii") + +(define-charset 'vietnamese-viscii-lower + "VISCII1.1 lower-case" + :short-name "VISCII lower" + :long-name "VISCII lower-case" + :iso-final-char ?1 + :emacs-mule-id 162 + :code-space [32 127] + :map "viscii-lower") + +(define-charset 'vietnamese-viscii-upper + "VISCII1.1 upper-case" + :short-name "VISCII upper" + :long-name "VISCII upper-case" + :iso-final-char ?2 + :emacs-mule-id 163 + :code-space [32 127] + :map "viscii-upper") + +(define-charset 'vscii + "VSCII1.1" + :short-name "VSCII" + :long-name "VSCII" + :code-space [0 255] + :map "vscii") + +(define-charset 'koi8-r + "KOI8-R" + :short-name "KOI8-R" + :long-name "KOI8-R" + :ascii-compatible-p t + :code-space [0 255] + :map "koi8-r") + +(define-charset-alias 'koi8 'koi8-r) + +(define-charset 'alternativnyj + "ALTERNATIVNYJ" + :short-name "alternativnyj" + :long-name "alternativnyj" + :ascii-compatible-p t + :code-space [0 255] + :map "ibm866") + +;; For Arabic, we need three different types of character sets. +;; Digits are of direction left-to-right and of width 1-column. +;; Others are of direction right-to-left and of width 1-column or +;; 2-column. +(define-charset 'arabic-digit + "Arabic digit" + :short-name "Arabic digit" + :long-name "Arabic digit" + :iso-final-char ?2 + :emacs-mule-id 164 + :code-space [34 42] + :code-offset #x0600) + +(define-charset 'arabic-1-column + "Arabic 1-column" + :short-name "Arabic 1-col" + :long-name "Arabic 1-column" + :iso-final-char ?3 + :emacs-mule-id 165 + :code-space [33 126] + :code-offset #x200100) + +(define-charset 'arabic-2-column + "Arabic 2-column" + :short-name "Arabic 2-col" + :long-name "Arabic 2-column" + :iso-final-char ?4 + :emacs-mule-id 224 + :code-space [33 126] + :code-offset #x200180) + +;; Lao script. +;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. +(define-charset 'lao + "Lao characters (ISO10646 0E81..0EDF)" + :short-name "Lao" + :long-name "Lao" + :iso-final-char ?1 + :emacs-mule-id 167 + :code-space [33 126] + :code-offset #x0E81) + +(define-charset 'mule-lao + "Lao characters (ISO10646 0E81..0EDF)" + :short-name "Lao" + :long-name "Lao" + :code-space [0 255] + :parents '(ascii eight-bit-control (lao . -128))) + + +;; Indian scripts. Symbolic charset for data exchange. Glyphs are +;; not assigned. They are automatically converted to each Indian +;; script which IS-13194 supports. + +(define-charset 'indian-is13194 + "Generic Indian charset for data exchange with IS 13194" + :short-name "IS 13194" + :long-name "Indian IS 13194" + :iso-final-char ?5 + :emacs-mule-id 225 + :code-space [33 126] + :code-offset #x180000) + +(define-charset 'indian-glyph + "Glyphs for Indian characters." + :short-name "Indian glyph" + :long-name "Indian glyph" + :iso-final-char ?4 + :emacs-mule-id 240 + :code-space [32 127 32 127] + :code-offset #x180100) + +;; Actual Glyph for 1-column width. +(define-charset 'indian-1-column + "Indian charset for 1-column width glyphs" + :short-name "Indian 1-col" + :long-name "Indian 1 Column" + :iso-final-char ?6 + :emacs-mule-id 240 + :code-space [33 126 33 126] + :code-offset #x184000) + +;; Actual Glyph for 2-column width. +(define-charset 'indian-2-column + "Indian charset for 2-column width glyphs" + :short-name "Indian 2-col" + :long-name "Indian 2 Column" + :iso-final-char ?5 + :emacs-mule-id 251 + :code-space [33 126 33 126] + :parents '(indian-1-column)) + +(define-charset 'tibetan + "Tibetan characters" + :iso-final-char ?7 + :short-name "Tibetan 2-col" + :long-name "Tibetan 2 column" + :iso-final-char ?7 + :emacs-mule-id 252 + :code-space [33 126 33 126] + :code-offset #x190000) + +(define-charset 'tibetan-1-column + "Tibetan 1 column glyph" + :short-name "Tibetan 1-col" + :long-name "Tibetan 1 column" + :iso-final-char ?8 + :emacs-mule-id 241 + :code-space [33 126 33 37] + :parents '(tibetan)) + +;; Subsets of Unicode. +(define-charset 'mule-unicode-2500-33ff + "Unicode characters of the range U+2500..U+33FF." + :short-name "Unicode subset 2" + :long-name "Unicode subset (U+2500..U+33FF)" + :iso-final-char ?2 + :emacs-mule-id 242 + :code-space [#x20 #x7f #x20 #x47] + :code-offset #x2500) + +(define-charset 'mule-unicode-e000-ffff + "Unicode characters of the range U+E000..U+FFFF." + :short-name "Unicode subset 3" + :long-name "Unicode subset (U+E000+FFFF)" + :iso-final-char ?3 + :emacs-mule-id 243 + :code-space [#x20 #x7F #x20 #x75] + :code-offset #xE000) + +(define-charset 'mule-unicode-0100-24ff + "Unicode characters of the range U+0100..U+24FF." + :short-name "Unicode subset" + :long-name "Unicode subset (U+0100..U+24FF)" + :iso-final-char ?1 + :emacs-mule-id 244 + :code-space [#x20 #x7F #x20 #x7F] + :code-offset #x100) + +(define-charset 'ethiopic + "Ethiopic characters for Amahric and Tigrigna." + :short-name "Ethiopic" + :long-name "Ethiopic characters" + :iso-final-char ?3 + :emacs-mule-id 245 + :code-space [33 126 33 126] + :code-offset #x1A0000) + +(define-charset 'mac-roman + "Mac Roman charset" + :short-name "Mac Roman" + :long-name "Mac Roman" + :ascii-compatible-p t + :code-space [0 255] + :map "mac-roman") + +(unify-charset 'chinese-gb2312) +(unify-charset 'chinese-cns11643-1) +(unify-charset 'chinese-cns11643-2) +(unify-charset 'big5) +(unify-charset 'chinese-big5-1) +(unify-charset 'chinese-big5-2) + +;; These are tables for translating characters on decoding and +;; encoding. +(setq standard-translation-table-for-decode nil) + +(setq standard-translation-table-for-encode nil) + +(defvar translation-table-for-input nil + "If non-nil, a char table used to translate characters from input methods. +\(Currently only used by Quail.)") + +;;; Make fundamental coding systems. + +;; The coding system `no-conversion' is already defined in coding.c as +;; below: +;; +;; (define-coding-system 'no-conversion +;; "Do no conversion." +;; :coding-type 'raw-text +;; :mnemonic ?=) + +(define-coding-system 'raw-text + "Raw text, which means text contains random 8-bit codes. +Encoding text with this coding system produces the actual byte +sequence of the text in buffers and strings. An exception is made for +eight-bit-control characters. Each of them is encoded into a single +byte. + +When you visit a file with this coding, the file is read into a +unibyte buffer as is (except for EOL format), thus each byte of a file +is treated as a character." + :coding-type 'raw-text + :mnemonic ?t) + +(define-coding-system 'undecided + "No conversion on encoding, automatic conversion on decoding" + :coding-type 'undecided + :mnemonic ?- + :charset-list '(ascii)) + +(define-coding-system-alias 'unix 'undecided-unix) +(define-coding-system-alias 'dos 'undecided-dos) +(define-coding-system-alias 'mac 'undecided-mac) + +(define-coding-system 'iso-latin-1 + "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." + :coding-type 'iso-2022 + :mnemonic ?1 + :charset-list '(ascii latin-iso8859-1) + :designation [ascii latin-iso8859-1 nil nil] + :mime-charset 'iso-8859-1) + +(define-coding-system-alias 'iso-8859-1 'iso-latin-1) +(define-coding-system-alias 'latin-1 'iso-latin-1) + +;; Coding systems not specific to each language environment. + +(define-coding-system 'emacs-mule + "Emacs 21 internal format used in buffer and string." + :coding-type 'emacs-mule + :mnemonic ?M) + +(define-coding-system 'utf-8 + "UTF-8." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(unicode)) + +(define-coding-system-alias 'mule-utf-8 'utf-8) + +(define-coding-system 'utf-8-emacs + "UTF-8 will full support for Emacs characters." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(emacs)) + +(define-coding-system 'utf-16 + "UTF-16" + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode)) + +(define-coding-system 'utf-16-le-nosig + "UTF-16, little endian, no signature" + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'little) + +(define-coding-system 'utf-16-be-nosig + "UTF-16, big endian, no signature" + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'big) + +(define-coding-system 'utf-16-le + "UTF-16, little endian, with signature" + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :signature t + :endian 'little) + +(define-coding-system 'utf-16-be + "UTF-16, big endian, with signature" + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :signature t + :endian 'big) + +(define-coding-system 'iso-2022-7bit + "ISO 2022 based 7-bit encoding using only G0" + :coding-type 'iso-2022 + :mnemonic ?J + :charset-list 'iso-2022 + :designation [(ascii t) nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition)) + +(define-coding-system 'iso-2022-7bit-ss2 + "ISO 2022 based 7-bit encoding using SS2 for 96-charset" + :coding-type 'iso-2022 + :mnemonic ?$ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit + designation single-shift composition)) + +(define-coding-system 'iso-2022-7bit-lock + "ISO-2022 coding system using Locking-Shift for 96-charset" + :coding-type 'iso-2022 + :mnemonic ?& + :charset-list 'iso-2022 + :designation [(ascii 94) (nil 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl 7-bit + designation locking-shift composition)) + +(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) + +(define-coding-system 'iso-2022-7bit-lock-ss2 + "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" + :coding-type 'iso-2022 + :mnemonic ?i + :charset-list '(ascii + japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 + korean-ksc5601 + chinese-gb2312 + chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 + chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 + chinese-cns11643-7) + :designation [(ascii 94) + (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96) + (nil chinese-cns11643-2) + (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 + chinese-cns11643-6 chinese-cns11643-7)] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift + single-shift init-bol)) + +(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) + +(define-coding-system 'iso-2022-8bit-ss2 + "ISO 2022 based 8-bit encoding using SS2 for 96-charset" + :coding-type 'iso-2022 + :mnemonic ?@ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) + +(define-coding-system 'compound-text + "Compound text based generic encoding for decoding unknown messages. + +This coding system does not support ICCCM Extended Segments." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl + designation locking-shift single-shift composition) + :mime-charset 'x-ctext) + +(define-coding-system-alias 'x-ctext 'compound-text) +(define-coding-system-alias 'ctext 'compound-text) + +;; Same as compound-text, but doesn't produce composition escape +;; sequences. Used in post-read and pre-write conversions of +;; compound-text-with-extensions, see mule.el. Note that this should +;; not have a mime-charset property, to prevent it from showing up +;; close to the beginning of coding systems ordered by priority. +(define-coding-system 'ctext-no-compositions 2 ?x + "Compound text based generic encoding for decoding unknown messages. + +Like `compound-text', but does not produce escape sequences for compositions." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl + designation locking-shift single-shift)) + +(define-coding-system 'compound-text-with-extensions + "Compound text encoding with ICCCM Extended Segment extensions. + +This coding system should be used only for X selections. It is inappropriate +for decoding and encoding files, process I/O, etc." + :coding-type 'raw-text + :mnemonic ?x + :post-read-conversion 'ctext-post-read-conversion + :pre-write-conversion 'ctext-pre-write-conversion) + +(define-coding-system-alias + 'x-ctext-with-extensions 'compound-text-with-extensions) +(define-coding-system-alias + 'ctext-with-extensions 'compound-text-with-extensions) + +(define-coding-system 'us-ascii + "Convert all characters but ASCII to `?'." + :coding-type 'charset + :mnemonic ?- + :charset-list '(ascii) + :default-char ?? + :mime-charset 'us-ascii) + +(define-coding-system-alias 'iso-safe 'us-ascii) + +;; Use us-ascii for terminal output if some other coding system is not +;; specified explicitly. +(set-safe-terminal-coding-system-internal 'us-ascii) + +;; The other coding-systems are defined in each language specific +;; files under lisp/language. + +;; Normally, set coding system to `undecided' before reading a file. +;; Compiled Emacs Lisp files (*.elc) are not decoded at all, +;; but we regard them as containing multibyte characters. +;; Tar files are not decoded at all, but we treat them as raw bytes. + +(setq file-coding-system-alist + '(("\\.elc\\'" . (emacs-mule . emacs-mule)) + ("\\.utf\\(-8\\)?\\'" . utf-8) + ;; We use raw-text for reading loaddefs.el so that if it + ;; happens to have DOS or Mac EOLs, they are converted to + ;; newlines. This is required to make the special treatment + ;; of the "\ newline" combination in loaddefs.el, which marks + ;; the beginning of a doc string, work. + ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) + ("\\.tar\\'" . (no-conversion . no-conversion)) + ("" . (undecided . nil)))) + + +;;; Setting coding categories and their priorities. + +;; This setting is just to read an Emacs Lisp source files which +;; contain multilingual text while dumping Emacs. More appropriate +;; values are set by the command `set-language-environment' for each +;; language environment. + +(set-coding-system-priority + 'iso-latin-1 + 'utf-8 + 'iso-2022-7bit + ) + + +;;; Miscellaneous settings. + +;; Make all multibyte characters self-insert. +(set-char-table-range (nth 1 global-map) + (cons (make-char 'unicode 128) (max-char)) + 'self-insert-command) + +(aset latin-extra-code-table ?\222 t) + +;;; mule-conf.el ends here