X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/b89a6b600b0f0acac4466bef69c9820b51574c4f..4936186e9c0135544ad00de67a9a8579fb637476:/lisp/international/mule-conf.el diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el dissimilarity index 77% index 05f259b038..6b87374f2f 100644 --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el @@ -1,563 +1,1521 @@ -;;; mule-conf.el --- configure multilingual environment -*- no-byte-compile: t -*- - -;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, -;; 2004, 2005, 2006, 2007 Free Software Foundation, Inc. -;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 -;; National Institute of Advanced Industrial Science and Technology (AIST) -;; Registration Number H14PRO021 - -;; Keywords: mule, multilingual, character set, coding system - -;; This file is part of GNU Emacs. - -;; GNU Emacs is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. - -;; GNU Emacs is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. - -;;; Commentary: - -;; Don't byte-compile this file. - -;;; Code: - -;;; Definitions of character sets. - -;; Basic (official) character sets. These character sets are treated -;; efficiently with respect to buffer memory. - -;; Syntax: -;; (define-charset CHARSET-ID CHARSET -;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE -;; SHORT-NAME LONG-NAME DESCRIPTION ]) -;; ASCII charset is defined in src/charset.c as below. -;; (define-charset 0 ascii -;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) - -;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. - -;; CHARSET-ID 128 is not used. - -(define-charset 129 'latin-iso8859-1 - [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" - "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."]) -(define-charset 130 'latin-iso8859-2 - [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" - "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."]) -(define-charset 131 'latin-iso8859-3 - [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" - "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."]) -(define-charset 132 'latin-iso8859-4 - [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" - "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."]) -(define-charset 133 'thai-tis620 - [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166" - "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."]) -(define-charset 134 'greek-iso8859-7 - [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126" - "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."]) -(define-charset 135 'arabic-iso8859-6 - [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127" - "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."]) -(define-charset 136 'hebrew-iso8859-8 - [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138" - "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."]) -(define-charset 137 'katakana-jisx0201 - [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)" - "Katakana Part of JISX0201.1976."]) -(define-charset 138 'latin-jisx0201 - [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)" - "Roman Part of JISX0201.1976."]) - -;; CHARSET-ID is not used 139. - -(define-charset 140 'cyrillic-iso8859-5 - [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" - "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."]) -(define-charset 141 'latin-iso8859-9 - [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" - "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."]) -(define-charset 142 'latin-iso8859-15 - [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" - "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."]) -(define-charset 143 'latin-iso8859-14 - [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199" - "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."]) - -;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. - -(define-charset 144 'japanese-jisx0208-1978 - [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42" - "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."]) -(define-charset 145 'chinese-gb2312 - [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58" - "GB2312 Chinese simplified: ISO-IR-58."]) -(define-charset 146 'japanese-jisx0208 - [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87" - "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."]) -(define-charset 147 'korean-ksc5601 - [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149" - "KSC5601 Korean Hangul and Hanja: ISO-IR-149."]) -(define-charset 148 'japanese-jisx0212 - [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159" - "JISX0212 Japanese supplement: ISO-IR-159."]) -(define-charset 149 'chinese-cns11643-1 - [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171" - "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."]) -(define-charset 150 'chinese-cns11643-2 - [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172" - "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."]) -(define-charset 151 'japanese-jisx0213-1 - [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"]) -(define-charset 152 'chinese-big5-1 - [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F" - "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."]) -(define-charset 153 'chinese-big5-2 - [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE" - "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."]) - -;; Additional (private) character sets. These character sets are -;; treated less space-efficiently in the buffer. - -;; Syntax: -;; (define-charset CHARSET-ID CHARSET -;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE -;; SHORT-NAME LONG-NAME DESCRIPTION ]) - -;; ISO-2022 allows a use of character sets not registered in ISO with -;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs -;; reserves `0' through `9' to support several private character sets. -;; The remaining final characters `:' through `?' are for users. - -;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. - -(define-charset 160 'chinese-sisheng - [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)" - "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."]) - -;; IPA characters for phonetic symbols. -(define-charset 161 'ipa - [1 96 1 0 ?0 1 "IPA" "IPA" - "IPA (International Phonetic Association) characters."]) - -;; Vietnamese VISCII. VISCII is 1-byte character set which contains -;; more than 96 characters. Since Emacs can't handle it as one -;; character set, it is divided into two: lower case letters and upper -;; case letters. -(define-charset 162 'vietnamese-viscii-lower - [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case" - "Vietnamese VISCII1.1 lower-case characters."]) -(define-charset 163 'vietnamese-viscii-upper - [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case" - "Vietnamese VISCII1.1 upper-case characters."]) - -;; For Arabic, we need three different types of character sets. -;; Digits are of direction left-to-right and of width 1-column. -;; Others are of direction right-to-left and of width 1-column or -;; 2-column. -(define-charset 164 'arabic-digit - [1 94 1 0 ?2 0 "Arabic digit" "Arabic digit" - "Arabic digits."]) -(define-charset 165 'arabic-1-column - [1 94 1 1 ?3 0 "Arabic 1-col" "Arabic 1-column" - "Arabic 1-column width glyphs."]) - -;; ASCII with right-to-left direction. -(define-charset 166 'ascii-right-to-left - [1 94 1 1 ?B 0 "rev ASCII" "ASCII with right-to-left direction" - "ASCII (left half of ISO 8859-1) with right-to-left direction."]) - -;; Lao script. -;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F. -(define-charset 167 'lao - [1 94 1 0 ?1 0 "Lao" "Lao" - "Lao characters (U+0E80..U+0EDF)."]) - -;; CHARSET-IDs 168..223 are not used. - -;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239. - -(define-charset 224 'arabic-2-column - [1 94 2 1 ?4 0 "Arabic 2-col" "Arabic 2-column" - "Arabic 2-column glyphs."]) - -;; Indian scripts. Symbolic charset for data exchange. Glyphs are -;; not assigned. They are automatically converted to each Indian -;; script which IS-13194 supports. - -(define-charset 225 'indian-is13194 - [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194" - "Generic Indian character set for data exchange with IS 13194."]) - -;; CHARSET-IDs 226..239 are not used. - -(define-charset 240 'indian-glyph - [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph" - "Glyphs for Indian characters."]) -;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"] - -;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. - -;; Actual Glyph for 1-column width. -(define-charset 241 'tibetan-1-column - [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column" - "Tibetan 1-column glyphs."]) - -;; Subsets of Unicode. - -(define-charset 242 'mule-unicode-2500-33ff - [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)" - "Unicode characters of the range U+2500..U+33FF."]) - -(define-charset 243 'mule-unicode-e000-ffff - [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)" - "Unicode characters of the range U+E000..U+FFFF."]) - -(define-charset 244 'mule-unicode-0100-24ff - [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)" - "Unicode characters of the range U+0100..U+24FF."]) - -;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. - -;; Ethiopic characters (Amharic and Tigrigna). -(define-charset 245 'ethiopic - [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters" - "Ethiopic characters."]) - -;; Chinese CNS11643 Plane3 thru Plane7. Although these are official -;; character sets, the use is rare and don't have to be treated -;; space-efficiently in the buffer. -(define-charset 246 'chinese-cns11643-3 - [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183" - "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."]) -(define-charset 247 'chinese-cns11643-4 - [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184" - "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."]) -(define-charset 248 'chinese-cns11643-5 - [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185" - "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."]) -(define-charset 249 'chinese-cns11643-6 - [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186" - "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."]) -(define-charset 250 'chinese-cns11643-7 - [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187" - "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."]) - -;; Actual Glyph for 2-column width. -(define-charset 251 'indian-2-column - [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column" - "Indian character set for 2-column width glyphs."]) - ;; old indian-1-column characters will be translated to indian-2-column. -(declare-equiv-charset 2 94 ?6 'indian-2-column) - -;; Tibetan script. -(define-charset 252 'tibetan - [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column" - "Tibetan 2-column width glyphs."]) - -;; CHARSET-ID 253 is not used. - -;; JISX0213 Plane 2 -(define-charset 254 'japanese-jisx0213-2 - [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2" - "JISX0213 Plane 2 (Japanese)."]) - -;; Tell C code charset ID's of several charsets. -(setup-special-charsets) - - -;; These are tables for translating characters on decoding and -;; encoding. -(define-translation-table - 'oldjis-newjis-jisroman-ascii - (list (cons (make-char 'japanese-jisx0208-1978) - (make-char 'japanese-jisx0208)) - (cons (make-char 'latin-jisx0201) (make-char 'ascii)))) -(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) - (make-char 'latin-jisx0201 92) (make-char 'latin-jisx0201 92)) -(aset (get 'oldjis-newjis-jisroman-ascii 'translation-table) - (make-char 'latin-jisx0201 126) (make-char 'latin-jisx0201 126)) - -(setq standard-translation-table-for-decode - (get 'oldjis-newjis-jisroman-ascii 'translation-table)) - -(setq standard-translation-table-for-encode nil) - -;;; Make fundamental coding systems. - -;; Miscellaneous coding systems which can't be made by -;; `make-coding-system'. - -(put 'no-conversion 'coding-system - (vector nil ?= "Do no conversion. - -When you visit a file with this coding, the file is read into a -unibyte buffer as is, thus each byte of a file is treated as a -character." - (list 'coding-category 'coding-category-binary - 'alias-coding-systems '(no-conversion) - 'safe-charsets t 'safe-chars t) - nil)) -(put 'no-conversion 'eol-type 0) -(put 'coding-category-binary 'coding-systems '(no-conversion)) -(setq coding-system-list '(no-conversion)) -(setq coding-system-alist '(("no-conversion"))) -(define-coding-system-internal 'no-conversion) - -(define-coding-system-alias 'binary 'no-conversion) - -(put 'undecided 'coding-system - (vector t ?- "No conversion on encoding, automatic conversion on decoding" - (list 'alias-coding-systems '(undecided) - 'safe-charsets '(ascii)) - nil)) -(setq coding-system-list (cons 'undecided coding-system-list)) -(setq coding-system-alist (cons '("undecided") coding-system-alist)) -(put 'undecided 'eol-type - (make-subsidiary-coding-system 'undecided)) - -(define-coding-system-alias 'unix 'undecided-unix) -(define-coding-system-alias 'dos 'undecided-dos) -(define-coding-system-alias 'mac 'undecided-mac) - -;; Coding systems not specific to each language environment. - -(make-coding-system - 'emacs-mule 0 ?= - "Emacs internal format used in buffer and string. - -Encoding text with this coding system produces the actual byte -sequence of the text in buffers and strings. An exception is made for -eight-bit-control characters. Each of them is encoded into a single -byte." - nil - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'raw-text 5 ?t - "Raw text, which means text contains random 8-bit codes. -Encoding text with this coding system produces the actual byte -sequence of the text in buffers and strings. An exception is made for -eight-bit-control characters. Each of them is encoded into a single -byte. - -When you visit a file with this coding, the file is read into a -unibyte buffer as is (except for EOL format), thus each byte of a file -is treated as a character." - nil - '((safe-charsets . t))) - -(make-coding-system - 'iso-2022-7bit 2 ?J - "ISO 2022 based 7-bit encoding using only G0" - '((ascii t) nil nil nil - short ascii-eol ascii-cntl seven) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'iso-2022-7bit-ss2 2 ?$ - "ISO 2022 based 7-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - short ascii-eol ascii-cntl seven nil single-shift) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'iso-2022-7bit-lock 2 ?& - "ISO-2022 coding system using Locking-Shift for 96-charset" - '((ascii t) t nil nil - nil ascii-eol ascii-cntl seven locking-shift) - '((safe-charsets . t) - (composition . t))) - -(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) - -(make-coding-system - 'iso-2022-7bit-lock-ss2 2 ?i - "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" - '((ascii t) - (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t) - (nil chinese-cns11643-2) - (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 - chinese-cns11643-6 chinese-cns11643-7) - short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil - init-bol) - '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 - korean-ksc5601 chinese-gb2312 chinese-cns11643-1 - chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 - chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) - (composition . t))) - -(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) - -(make-coding-system - 'iso-2022-8bit-ss2 2 ?@ - "ISO 2022 based 8-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - nil ascii-eol ascii-cntl nil nil single-shift) - '((safe-charsets . t) - (composition . t))) - -(make-coding-system - 'compound-text 2 ?x - "Compound text based generic encoding for decoding unknown messages. - -This coding system does not support extended segments." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil - init-bol nil nil) - '((safe-charsets . t) - (mime-charset . x-ctext) - (composition . t))) - -(define-coding-system-alias 'x-ctext 'compound-text) -(define-coding-system-alias 'ctext 'compound-text) - -;; Same as compound-text, but doesn't produce composition escape -;; sequences. Used in post-read and pre-write conversions of -;; compound-text-with-extensions, see mule.el. Note that this should -;; not have a mime-charset property, to prevent it from showing up -;; close to the beginning of coding systems ordered by priority. -(make-coding-system - 'ctext-no-compositions 2 ?x - "Compound text based generic encoding for decoding unknown messages. - -Like `compound-text', but does not produce escape sequences for compositions." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil - init-bol nil nil) - '((safe-charsets . t))) - -(make-coding-system - 'compound-text-with-extensions 2 ?x - "Compound text encoding with extended segments. - -See the variable `ctext-non-standard-encodings-alist' for the -detail about how extended segments are handled. - -This coding system should be used only for X selections. It is inappropriate -for decoding and encoding files, process I/O, etc." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl) - '((post-read-conversion . ctext-post-read-conversion) - (pre-write-conversion . ctext-pre-write-conversion))) - -(define-coding-system-alias - 'x-ctext-with-extensions 'compound-text-with-extensions) -(define-coding-system-alias - 'ctext-with-extensions 'compound-text-with-extensions) - -(make-coding-system - 'iso-safe 2 ?- - "Encode ASCII asis and encode non-ASCII characters to `?'." - '(ascii nil nil nil - nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii))) - -(define-coding-system-alias - 'us-ascii 'iso-safe) - -(make-coding-system - 'iso-latin-1 2 ?1 - "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." - '(ascii latin-iso8859-1 nil nil - nil nil nil nil nil nil nil nil nil nil nil t t) - '((safe-charsets ascii latin-iso8859-1) - (mime-charset . iso-8859-1))) - -(define-coding-system-alias 'iso-8859-1 'iso-latin-1) -(define-coding-system-alias 'latin-1 'iso-latin-1) - -;; Use iso-safe for terminal output if some other coding system is not -;; specified explicitly. -(set-safe-terminal-coding-system-internal 'iso-safe) - -;; The other coding-systems are defined in each language specific -;; section of languages.el. - -;; Normally, set coding system to `undecided' before reading a file. -;; Compiled Emacs Lisp files (*.elc) are not decoded at all, -;; but we regard them as containing multibyte characters. -;; Tar files are not decoded at all, but we treat them as raw bytes. - -(setq file-coding-system-alist - '(("\\.elc\\'" . (emacs-mule . emacs-mule)) - ("\\.utf\\(-8\\)?\\'" . utf-8) - ;; We use raw-text for reading loaddefs.el so that if it - ;; happens to have DOS or Mac EOLs, they are converted to - ;; newlines. This is required to make the special treatment - ;; of the "\ newline" combination in loaddefs.el, which marks - ;; the beginning of a doc string, work. - ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) - ("\\.tar\\'" . (no-conversion . no-conversion)) - ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) - ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system) - ("" . (undecided . nil)))) - - -;;; Setting coding categories and their priorities. - -;; This setting is just to read an Emacs Lisp source files which -;; contain multilingual text while dumping Emacs. More appropriate -;; values are set by the command `set-language-environment' for each -;; language environment. - -(setq coding-category-emacs-mule 'emacs-mule - coding-category-sjis 'japanese-shift-jis - coding-category-iso-7 'iso-2022-7bit - coding-category-iso-7-tight 'iso-2022-jp - coding-category-iso-8-1 'iso-latin-1 - coding-category-iso-8-2 'iso-latin-1 - coding-category-iso-7-else 'iso-2022-7bit-lock - coding-category-iso-8-else 'iso-2022-8bit-ss2 - coding-category-ccl nil - coding-category-utf-8 'mule-utf-8 - coding-category-utf-16-be 'mule-utf-16be-with-signature - coding-category-utf-16-le 'mule-utf-16le-with-signature - coding-category-big5 'chinese-big5 - coding-category-raw-text 'raw-text - coding-category-binary 'no-conversion) - -(set-coding-priority - '(coding-category-iso-8-1 - coding-category-iso-8-2 - coding-category-utf-8 - coding-category-utf-16-be - coding-category-utf-16-le - coding-category-iso-7-tight - coding-category-iso-7 - coding-category-iso-7-else - coding-category-iso-8-else - coding-category-emacs-mule - coding-category-raw-text - coding-category-sjis - coding-category-big5 - coding-category-ccl - coding-category-binary - )) - - -;;; Miscellaneous settings. -(aset latin-extra-code-table ?\221 t) -(aset latin-extra-code-table ?\222 t) -(aset latin-extra-code-table ?\223 t) -(aset latin-extra-code-table ?\224 t) -(aset latin-extra-code-table ?\225 t) -(aset latin-extra-code-table ?\226 t) - -(update-coding-systems-internal) - -;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5 -;;; mule-conf.el ends here +;;; mule-conf.el --- configure multilingual environment + +;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H14PRO021 +;; Copyright (C) 2003 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 + +;; Keywords: i18n, mule, multilingual, character set, coding system + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; This file defines the Emacs charsets and some basic coding systems. +;; Other coding systems are defined in the files in directory +;; lisp/language. + +;;; Code: + +;;; Remarks + +;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/. +;; Standards docs equivalent to iso-2022 and iso-8859 are at +;; http://www.ecma.ch/. + +;; FWIW, http://www.microsoft.com/globaldev/ lists the following for +;; MS Windows, which are presumably the only charsets we really need +;; to worry about on such systems: +;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866 +;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, +;; 1258, 874, 932, 936, 949, 950 + +;;; Definitions of character sets. + +;; The charsets `ascii', `unicode' and `eight-bit' are already defined +;; in charset.c as below: +;; +;; (define-charset 'ascii +;; "" +;; :dimension 1 +;; :code-space [0 127] +;; :iso-final-char ?B +;; :ascii-compatible-p t +;; :emacs-mule-id 0 +;; :code-offset 0) +;; +;; (define-charset 'unicode +;; "" +;; :dimension 3 +;; :code-space [0 255 0 255 0 16] +;; :ascii-compatible-p t +;; :code-offset 0) +;; +;; (define-charset 'eight-bit +;; "" +;; :dimension 1 +;; :code-space [128 255] +;; :code-offset #x3FFF80) +;; +;; We now set :docstring, :short-name, and :long-name properties. + +(put-charset-property + 'ascii :docstring "ASCII (ISO646 IRV)") +(put-charset-property + 'ascii :short-name "ASCII") +(put-charset-property + 'ascii :long-name "ASCII (ISO646 IRV)") +(put-charset-property + 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)") +(put-charset-property + 'iso-8859-1 :short-name "Latin-1") +(put-charset-property + 'iso-8859-1 :long-name "Latin-1") +(put-charset-property + 'unicode :docstring "Unicode (ISO10646)") +(put-charset-property + 'unicode :short-name "Unicode") +(put-charset-property + 'unicode :long-name "Unicode (ISO10646)") +(put-charset-property 'eight-bit :docstring "Raw bytes 0-255") +(put-charset-property 'eight-bit :short-name "Raw bytes") + +(define-charset-alias 'ucs 'unicode) + +(define-charset 'emacs + "Full Emacs characters" + :ascii-compatible-p t + :code-space [ 0 255 0 255 0 63 ] + :code-offset 0 + :supplementary-p t) + +(define-charset 'latin-iso8859-1 + "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :short-name "RHP of Latin-1" + :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :iso-final-char ?A + :emacs-mule-id 129 + :code-space [32 127] + :code-offset 160) + +;; Name perhaps not ideal, but is XEmacs-compatible. +(define-charset 'control-1 + "8-bit control code (0x80..0x9F)" + :short-name "8-bit control code" + :code-space [128 159] + :code-offset 128) + +(define-charset 'eight-bit-control + "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)" + :short-name "Raw bytes 0x80..0x9F" + :supplementary-p t + :code-space [128 159] + :code-offset #x3FFF80) ; see character.h + +(define-charset 'eight-bit-graphic + "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)" + :short-name "Raw bytes 0xA0..0xFF" + :supplementary-p t + :code-space [160 255] + :code-offset #x3FFFA0) ; see character.h + +(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname + iso-ir iso-final + emacs-mule-id map) + `(progn + (define-charset ,symbol + ,name + :short-name ,nickname + :long-name ,name + :ascii-compatible-p t + :code-space [0 255] + :map ,map) + (if ,iso-symbol + (define-charset ,iso-symbol + (if ,iso-ir + (format "Right-Hand Part of %s (%s): ISO-IR-%d" + ,name ,nickname ,iso-ir) + (format "Right-Hand Part of %s (%s)" ,name ,nickname)) + :short-name (format "RHP of %s" ,name) + :long-name (format "RHP of %s (%s)" ,name ,nickname) + :iso-final-char ,iso-final + :emacs-mule-id ,emacs-mule-id + :code-space [32 127] + :subset (list ,symbol 160 255 -128))))) + +(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2 + "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2") + +(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3 + "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3") + +(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4 + "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4") + +(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5 + "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5") + +(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6 + "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6") + +(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7 + "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7") + +(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8 + "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8") + +(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9 + "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9") + +(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10 + "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10") + +;; http://www.nectec.or.th/it-standards/iso8859-11/ +;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620 +;; plus nbsp +(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11 + "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11") + +;; 8859-12 doesn't (yet?) exist. + +(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13 + "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13") + +(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14 + "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14") + +(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15 + "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15") + +(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16 + "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16") + +;; No point in keeping it around. +(fmakunbound 'define-iso-single-byte-charset) + +;; Can this be shared with 8859-11? +;; N.b. not all of these are defined unicodes. +(define-charset 'thai-tis620 + "TIS620.2533" + :short-name "TIS620.2533" + :iso-final-char ?T + :emacs-mule-id 133 + :code-space [32 127] + :code-offset #x0E00) + +;; Fixme: doc for this, c.f. above +(define-charset 'tis620-2533 + "TIS620.2533" + :short-name "TIS620.2533" + :ascii-compatible-p t + :code-space [0 255] + :superset '(ascii eight-bit-control (thai-tis620 . 128))) + +(define-charset 'jisx0201 + "JISX0201" + :short-name "JISX0201" + :code-space [0 #xDF] + :map "JISX0201") + +(define-charset 'latin-jisx0201 + "Roman Part of JISX0201.1976" + :short-name "JISX0201 Roman" + :long-name "Japanese Roman (JISX0201.1976)" + :iso-final-char ?J + :emacs-mule-id 138 + :supplementary-p t + :code-space [33 126] + :subset '(jisx0201 33 126 0)) + +(define-charset 'katakana-jisx0201 + "Katakana Part of JISX0201.1976" + :short-name "JISX0201 Katakana" + :long-name "Japanese Katakana (JISX0201.1976)" + :iso-final-char ?I + :emacs-mule-id 137 + :supplementary-p t + :code-space [33 126] + :subset '(jisx0201 161 254 -128)) + +(define-charset 'chinese-gb2312 + "GB2312 Chinese simplified: ISO-IR-58" + :short-name "GB2312" + :long-name "GB2312: ISO-IR-58" + :iso-final-char ?A + :emacs-mule-id 145 + :code-space [33 126 33 126] + :code-offset #x110000 + :unify-map "GB2312") + +(define-charset 'chinese-gbk + "GBK Chinese simplified." + :short-name "GBK" + :code-space [#x40 #xFE #x81 #xFE] + :code-offset #x160000 + :unify-map "GBK") +(define-charset-alias 'cp936 'chinese-gbk) +(define-charset-alias 'windows-936 'chinese-gbk) + +(define-charset 'chinese-cns11643-1 + "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" + :short-name "CNS11643-1" + :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" + :iso-final-char ?G + :emacs-mule-id 149 + :code-space [33 126 33 126] + :code-offset #x114000 + :unify-map "CNS-1") + +(define-charset 'chinese-cns11643-2 + "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" + :short-name "CNS11643-2" + :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" + :iso-final-char ?H + :emacs-mule-id 150 + :code-space [33 126 33 126] + :code-offset #x118000 + :unify-map "CNS-2") + +(define-charset 'chinese-cns11643-3 + "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" + :short-name "CNS11643-3" + :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" + :iso-final-char ?I + :code-space [33 126 33 126] + :emacs-mule-id 246 + :code-offset #x11C000 + :unify-map "CNS-3") + +(define-charset 'chinese-cns11643-4 + "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" + :short-name "CNS11643-4" + :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" + :iso-final-char ?J + :emacs-mule-id 247 + :code-space [33 126 33 126] + :code-offset #x120000 + :unify-map "CNS-4") + +(define-charset 'chinese-cns11643-5 + "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" + :short-name "CNS11643-5" + :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" + :iso-final-char ?K + :emacs-mule-id 248 + :code-space [33 126 33 126] + :code-offset #x124000 + :unify-map "CNS-5") + +(define-charset 'chinese-cns11643-6 + "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" + :short-name "CNS11643-6" + :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" + :iso-final-char ?L + :emacs-mule-id 249 + :code-space [33 126 33 126] + :code-offset #x128000 + :unify-map "CNS-6") + +(define-charset 'chinese-cns11643-7 + "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" + :short-name "CNS11643-7" + :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" + :iso-final-char ?M + :emacs-mule-id 250 + :code-space [33 126 33 126] + :code-offset #x12C000 + :unify-map "CNS-7") + +(define-charset 'big5 + "Big5 (Chinese traditional)" + :short-name "Big5" + :code-space [#x40 #xFE #xA1 #xFE] + :code-offset #x130000 + :unify-map "BIG5") +;; Fixme: AKA cp950 according to +;; . Is +;; that correct? + +(define-charset 'chinese-big5-1 + "Frequently used part (A141-C67E) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-1)" + :long-name "Big5 (Level-1) A141-C67F" + :iso-final-char ?0 + :emacs-mule-id 152 + :supplementary-p t + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x135000 + :unify-map "BIG5-1") + +(define-charset 'chinese-big5-2 + "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-2)" + :long-name "Big5 (Level-2) C940-FEFE" + :iso-final-char ?1 + :emacs-mule-id 153 + :supplementary-p t + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x137800 + :unify-map "BIG5-2") + +(define-charset 'japanese-jisx0208 + "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" + :short-name "JISX0208" + :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" + :iso-final-char ?B + :emacs-mule-id 146 + :code-space [33 126 33 126] + :code-offset #x140000 + :unify-map "JISX0208") + +(define-charset 'japanese-jisx0208-1978 + "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" + :short-name "JISX0208.1978" + :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42" + :iso-final-char ?@ + :emacs-mule-id 144 + :code-space [33 126 33 126] + :code-offset #x144000 + :unify-map "JISC6226") + +(define-charset 'japanese-jisx0212 + "JISX0212 Japanese supplement: ISO-IR-159" + :short-name "JISX0212" + :long-name "JISX0212 (Japanese): ISO-IR-159" + :iso-final-char ?D + :emacs-mule-id 148 + :code-space [33 126 33 126] + :code-offset #x148000 + :unify-map "JISX0212") + +;; Note that jisx0213 contains characters not in Unicode (3.2?). It's +;; arguable whether it should have a unify-map. +(define-charset 'japanese-jisx0213-1 + "JISX0213.2000 Plane 1 (Japanese)" + :short-name "JISX0213-1" + :iso-final-char ?O + :emacs-mule-id 151 + :unify-map "JISX2131" + :code-space [33 126 33 126] + :code-offset #x14C000) + +(define-charset 'japanese-jisx0213-2 + "JISX0213.2000 Plane 2 (Japanese)" + :short-name "JISX0213-2" + :iso-final-char ?P + :emacs-mule-id 254 + :unify-map "JISX2132" + :code-space [33 126 33 126] + :code-offset #x150000) + +(define-charset 'japanese-jisx0213-a + "JISX0213.2004 adds these characters to JISX0213.2000." + :short-name "JISX0213A" + :dimension 2 + :code-space [33 126 33 126] + :supplementary-p t + :map "JISX213A") + +(define-charset 'japanese-jisx0213.2004-1 + "JISX0213.2004 Plane1 (Japanese)" + :short-name "JISX0213.2004-1" + :dimension 2 + :code-space [33 126 33 126] + :iso-final-char ?Q + :superset '(japanese-jisx0213-a japanese-jisx0213-1)) + +(define-charset 'katakana-sjis + "Katakana part of Shift-JIS" + :dimension 1 + :code-space [#xA1 #xDF] + :subset '(jisx0201 #xA1 #xDF 0) + :supplementary-p t) + +(define-charset 'cp932-2-byte + "2-byte part of CP932" + :dimension 2 + :map "CP932-2BYTE" + :code-space [#x40 #xFC #x81 #xFC] + :supplementary-p t) + +(define-charset 'cp932 + "CP932 (Microsoft shift-jis)" + :code-space [#x00 #xFF #x00 #xFE] + :short-name "CP932" + :superset '(ascii katakana-sjis cp932-2-byte)) + +(define-charset 'korean-ksc5601 + "KSC5601 Korean Hangul and Hanja: ISO-IR-149" + :short-name "KSC5601" + :long-name "KSC5601 (Korean): ISO-IR-149" + :iso-final-char ?C + :emacs-mule-id 147 + :code-space [33 126 33 126] + :code-offset #x279f94 ; ... #x27c217 + :unify-map "KSC5601") + +(define-charset 'big5-hkscs + "Big5-HKSCS (Chinese traditional, Hong Kong supplement)" + :short-name "Big5" + :code-space [#x40 #xFE #xA1 #xFE] + :code-offset #x27c218 ; ... #x280839 + :unify-map "BIG5-HKSCS") + +;; Fixme: Korean cp949/UHC + +(define-charset 'chinese-sisheng + "SiSheng characters for PinYin/ZhuYin" + :short-name "SiSheng" + :long-name "SiSheng (PinYin/ZhuYin)" + :iso-final-char ?0 + :emacs-mule-id 160 + :code-space [33 126] + :unify-map "MULE-sisheng" + :supplementary-p t + :code-offset #x200000) + +;; A subset of the 1989 version of IPA. It consists of the consonant +;; signs used in English, French, German and Italian, and all vowels +;; signs in the table. [says old MULE doc] +(define-charset 'ipa + "IPA (International Phonetic Association)" + :short-name "IPA" + :iso-final-char ?0 + :emacs-mule-id 161 + :unify-map "MULE-ipa" + :code-space [32 127] + :supplementary-p t + :code-offset #x200080) + +(define-charset 'viscii + "VISCII1.1" + :short-name "VISCII" + :long-name "VISCII 1.1" + :code-space [0 255] + :map "VISCII") + +(define-charset 'vietnamese-viscii-lower + "VISCII1.1 lower-case" + :short-name "VISCII lower" + :long-name "VISCII lower-case" + :iso-final-char ?1 + :emacs-mule-id 162 + :code-space [32 127] + :code-offset #x200200 + :supplementary-p t + :unify-map "MULE-lviscii") + +(define-charset 'vietnamese-viscii-upper + "VISCII1.1 upper-case" + :short-name "VISCII upper" + :long-name "VISCII upper-case" + :iso-final-char ?2 + :emacs-mule-id 163 + :code-space [32 127] + :code-offset #x200280 + :supplementary-p t + :unify-map "MULE-uviscii") + +(define-charset 'vscii + "VSCII1.1 (TCVN-5712 VN1)" + :short-name "VSCII" + :code-space [0 255] + :map "VSCII") + +(define-charset-alias 'tcvn-5712 'vscii) + +;; Fixme: see note in tcvn.map about combining characters +(define-charset 'vscii-2 + "VSCII-2 (TCVN-5712 VN2)" + :code-space [0 255] + :map "VSCII-2") + +(define-charset 'koi8-r + "KOI8-R" + :short-name "KOI8-R" + :ascii-compatible-p t + :code-space [0 255] + :map "KOI8-R") + +(define-charset-alias 'koi8 'koi8-r) + +(define-charset 'alternativnyj + "ALTERNATIVNYJ" + :short-name "alternativnyj" + :ascii-compatible-p t + :code-space [0 255] + :map "ALTERNATIVNYJ") + +(define-charset 'cp866 + "CP866" + :short-name "cp866" + :ascii-compatible-p t + :code-space [0 255] + :map "IBM866") +(define-charset-alias 'ibm866 'cp866) + +(define-charset 'koi8-u + "KOI8-U" + :short-name "KOI8-U" + :ascii-compatible-p t + :code-space [0 255] + :map "KOI8-U") + +(define-charset 'koi8-t + "KOI8-T" + :short-name "KOI8-T" + :ascii-compatible-p t + :code-space [0 255] + :map "KOI8-T") + +(define-charset 'georgian-ps + "GEORGIAN-PS" + :short-name "GEORGIAN-PS" + :ascii-compatible-p t + :code-space [0 255] + :map "KA-PS") + +(define-charset 'georgian-academy + "GEORGIAN-ACADEMY" + :short-name "GEORGIAN-ACADEMY" + :ascii-compatible-p t + :code-space [0 255] + :map "KA-ACADEMY") + +(define-charset 'windows-1250 + "WINDOWS-1250 (Central Europe)" + :short-name "WINDOWS-1250" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1250") +(define-charset-alias 'cp1250 'windows-1250) + +(define-charset 'windows-1251 + "WINDOWS-1251 (Cyrillic)" + :short-name "WINDOWS-1251" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1251") +(define-charset-alias 'cp1251 'windows-1251) + +(define-charset 'windows-1252 + "WINDOWS-1252 (Latin I)" + :short-name "WINDOWS-1252" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1252") +(define-charset-alias 'cp1252 'windows-1252) + +(define-charset 'windows-1253 + "WINDOWS-1253 (Greek)" + :short-name "WINDOWS-1253" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1253") +(define-charset-alias 'cp1253 'windows-1253) + +(define-charset 'windows-1254 + "WINDOWS-1254 (Turkish)" + :short-name "WINDOWS-1254" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1254") +(define-charset-alias 'cp1254 'windows-1254) + +(define-charset 'windows-1255 + "WINDOWS-1255 (Hebrew)" + :short-name "WINDOWS-1255" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1255") +(define-charset-alias 'cp1255 'windows-1255) + +(define-charset 'windows-1256 + "WINDOWS-1256 (Arabic)" + :short-name "WINDOWS-1256" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1256") +(define-charset-alias 'cp1256 'windows-1256) + +(define-charset 'windows-1257 + "WINDOWS-1257 (Baltic)" + :short-name "WINDOWS-1257" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1257") +(define-charset-alias 'cp1257 'windows-1257) + +(define-charset 'windows-1258 + "WINDOWS-1258 (Viet Nam)" + :short-name "WINDOWS-1258" + :ascii-compatible-p t + :code-space [0 255] + :map "CP1258") +(define-charset-alias 'cp1258 'windows-1258) + +(define-charset 'next + "NEXT" + :short-name "NEXT" + :ascii-compatible-p t + :code-space [0 255] + :map "NEXTSTEP") + +(define-charset 'cp1125 + "CP1125" + :short-name "CP1125" + :code-space [0 255] + :ascii-compatible-p t + :map "CP1125") +(define-charset-alias 'ruscii 'cp1125) +;; Original name for cp1125, says Serhii Hlodin +(define-charset-alias 'cp866u 'cp1125) + +;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html +;; shows this as not ASCII comptaible, with various graphics in +;; 0x01-0x1F. +(define-charset 'cp437 + "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)" + :short-name "CP437" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM437") + +(define-charset 'cp720 + "CP720 (Arabic)" + :short-name "CP720" + :code-space [0 255] + :ascii-compatible-p t + :map "CP720") + +(define-charset 'cp737 + "CP737 (PC Greek)" + :short-name "CP737" + :code-space [0 255] + :ascii-compatible-p t + :map "CP737") + +(define-charset 'cp775 + "CP775 (PC Baltic)" + :short-name "CP775" + :code-space [0 255] + :ascii-compatible-p t + :map "CP775") + +(define-charset 'cp851 + "CP851 (Greek)" + :short-name "CP851" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM851") + +(define-charset 'cp852 + "CP852 (MS-DOS Latin-2)" + :short-name "CP852" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM852") + +(define-charset 'cp855 + "CP855 (IBM Cyrillic)" + :short-name "CP855" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM855") + +(define-charset 'cp857 + "CP857 (IBM Turkish)" + :short-name "CP857" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM857") + +(define-charset 'cp858 + "CP858 (Multilingual Latin I + Euro)" + :short-name "CP858" + :code-space [0 255] + :ascii-compatible-p t + :map "CP858") +(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858 + +(define-charset 'cp860 + "CP860 (MS-DOS Portuguese)" + :short-name "CP860" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM860") + +(define-charset 'cp861 + "CP861 (MS-DOS Icelandic)" + :short-name "CP861" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM861") + +(define-charset 'cp862 + "CP862 (PC Hebrew)" + :short-name "CP862" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM862") + +(define-charset 'cp863 + "CP863 (MS-DOS Canadian French)" + :short-name "CP863" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM863") + +(define-charset 'cp864 + "CP864 (PC Arabic)" + :short-name "CP864" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM864") + +(define-charset 'cp865 + "CP865 (MS-DOS Nordic)" + :short-name "CP865" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM865") + +(define-charset 'cp869 + "CP869 (IBM Modern Greek)" + :short-name "CP869" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM869") + +(define-charset 'cp874 + "CP874 (IBM Thai)" + :short-name "CP874" + :code-space [0 255] + :ascii-compatible-p t + :map "IBM874") + +;; For Arabic, we need three different types of character sets. +;; Digits are of direction left-to-right and of width 1-column. +;; Others are of direction right-to-left and of width 1-column or +;; 2-column. +(define-charset 'arabic-digit + "Arabic digit" + :short-name "Arabic digit" + :iso-final-char ?2 + :emacs-mule-id 164 + :supplementary-p t + :code-space [34 42] + :code-offset #x0600) + +(define-charset 'arabic-1-column + "Arabic 1-column" + :short-name "Arabic 1-col" + :long-name "Arabic 1-column" + :iso-final-char ?3 + :emacs-mule-id 165 + :supplementary-p t + :code-space [33 126] + :code-offset #x200100) + +(define-charset 'arabic-2-column + "Arabic 2-column" + :short-name "Arabic 2-col" + :long-name "Arabic 2-column" + :iso-final-char ?4 + :emacs-mule-id 224 + :supplementary-p t + :code-space [33 126] + :code-offset #x200180) + +;; Lao script. +;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. +;; Not all of them are defined unicodes. +(define-charset 'lao + "Lao characters (ISO10646 0E81..0EDF)" + :short-name "Lao" + :iso-final-char ?1 + :emacs-mule-id 167 + :supplementary-p t + :code-space [33 126] + :code-offset #x0E81) + +(define-charset 'mule-lao + "Lao characters (ISO10646 0E81..0EDF)" + :short-name "Lao" + :code-space [0 255] + :supplementary-p t + :superset '(ascii eight-bit-control (lao . 128))) + + +;; Indian scripts. Symbolic charset for data exchange. Glyphs are +;; not assigned. They are automatically converted to each Indian +;; script which IS-13194 supports. + +(define-charset 'indian-is13194 + "Generic Indian charset for data exchange with IS 13194" + :short-name "IS 13194" + :long-name "Indian IS 13194" + :iso-final-char ?5 + :emacs-mule-id 225 + :supplementary-p t + :code-space [33 126] + :code-offset #x180000) + +(let ((code-offset #x180100)) + (dolist (script '(devanagari sanskrit bengali tamil telugu assamese + oriya kannada malayalam gujarati punjabi)) + (define-charset (intern (format "%s-cdac" script)) + (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'." + (capitalize (symbol-name script))) + :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script))) + :supplementary-p t + :code-space [0 255] + :code-offset code-offset) + (setq code-offset (+ code-offset #x100))) + + (dolist (script '(devanagari bengali punjabi gujarati + oriya tamil telugu kannada malayalam)) + (define-charset (intern (format "%s-akruti" script)) + (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'." + (capitalize (symbol-name script))) + :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script))) + :supplementary-p t + :code-space [0 255] + :code-offset code-offset) + (setq code-offset (+ code-offset #x100)))) + +(define-charset 'indian-glyph + "Glyphs for Indian characters." + :short-name "Indian glyph" + :iso-final-char ?4 + :emacs-mule-id 240 + :supplementary-p t + :code-space [32 127 32 127] + :code-offset #x180100) + +;; Actual Glyph for 1-column width. +(define-charset 'indian-1-column + "Indian charset for 1-column width glyphs." + :short-name "Indian 1-col" + :long-name "Indian 1 Column" + :iso-final-char ?6 + :emacs-mule-id 251 + :supplementary-p t + :code-space [33 126 33 126] + :code-offset #x184000) + +;; Actual Glyph for 2-column width. +(define-charset 'indian-2-column + "Indian charset for 2-column width glyphs." + :short-name "Indian 2-col" + :long-name "Indian 2 Column" + :iso-final-char ?5 + :emacs-mule-id 251 + :supplementary-p t + :code-space [33 126 33 126] + :code-offset #x184000) + +(define-charset 'tibetan + "Tibetan characters" + :iso-final-char ?7 + :short-name "Tibetan 2-col" + :long-name "Tibetan 2 column" + :iso-final-char ?7 + :emacs-mule-id 252 + :unify-map "MULE-tibetan" + :supplementary-p t + :code-space [33 126 33 37] + :code-offset #x190000) + +(define-charset 'tibetan-1-column + "Tibetan 1 column glyph" + :short-name "Tibetan 1-col" + :long-name "Tibetan 1 column" + :iso-final-char ?8 + :emacs-mule-id 241 + :supplementary-p t + :code-space [33 126 33 37] + :code-offset #x190000) + +;; Subsets of Unicode. +(define-charset 'mule-unicode-2500-33ff + "Unicode characters of the range U+2500..U+33FF." + :short-name "Unicode subset 2" + :long-name "Unicode subset (U+2500..U+33FF)" + :iso-final-char ?2 + :emacs-mule-id 242 + :supplementary-p t + :code-space [#x20 #x7f #x20 #x47] + :code-offset #x2500) + +(define-charset 'mule-unicode-e000-ffff + "Unicode characters of the range U+E000..U+FFFF." + :short-name "Unicode subset 3" + :long-name "Unicode subset (U+E000+FFFF)" + :iso-final-char ?3 + :emacs-mule-id 243 + :supplementary-p t + :code-space [#x20 #x7F #x20 #x75] + :code-offset #xE000 + :max-code 30015) ; U+FFFF + +(define-charset 'mule-unicode-0100-24ff + "Unicode characters of the range U+0100..U+24FF." + :short-name "Unicode subset" + :long-name "Unicode subset (U+0100..U+24FF)" + :iso-final-char ?1 + :emacs-mule-id 244 + :supplementary-p t + :code-space [#x20 #x7F #x20 #x7F] + :code-offset #x100) + +(define-charset 'unicode-bmp + "Unicode Basic Multilingual Plane (U+0000..U+FFFF)" + :short-name "Unicode BMP" + :code-space [0 255 0 255] + :code-offset 0) + +(define-charset 'unicode-smp + "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)" + :short-name "Unicode SMP " + :code-space [0 255 0 255] + :code-offset #x10000) + +(define-charset 'unicode-sip + "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)" + :short-name "Unicode SIP" + :code-space [0 255 0 255] + :code-offset #x20000) + +(define-charset 'unicode-ssp + "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)" + :short-name "Unicode SSP" + :code-space [0 255 0 255] + :code-offset #xE0000) + +(define-charset 'ethiopic + "Ethiopic characters for Amharic and Tigrigna." + :short-name "Ethiopic" + :long-name "Ethiopic characters" + :iso-final-char ?3 + :emacs-mule-id 245 + :supplementary-p t + :unify-map "MULE-ethiopic" + :code-space [33 126 33 126] + :code-offset #x1A0000) + +(define-charset 'mac-roman + "Mac Roman charset" + :short-name "Mac Roman" + :ascii-compatible-p t + :code-space [0 255] + :map "MACINTOSH") + +;; Fixme: modern EBCDIC variants, e.g. IBM00924? +(define-charset 'ebcdic-us + "US version of EBCDIC" + :short-name "EBCDIC-US" + :code-space [0 255] + :mime-charset 'ebcdic-us + :map "EBCDICUS") + +(define-charset 'ebcdic-uk + "UK version of EBCDIC" + :short-name "EBCDIC-UK" + :code-space [0 255] + :mime-charset 'ebcdic-uk + :map "EBCDICUK") + +(define-charset 'ibm1047 + ;; Says groff: + "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix." + :short-name "IBM1047" + :code-space [0 255] + :mime-charset 'ibm1047 + :map "IBM1047") +(define-charset-alias 'cp1047 'ibm1047) + +(define-charset 'hp-roman8 + "Encoding used by Hewlet-Packard printer software" + :short-name "HP-ROMAN8" + :ascii-compatible-p t + :code-space [0 255] + :map "HP-ROMAN8") + +;; To make a coding system with this, a pre-write-conversion should +;; account for the commented-out multi-valued code points in +;; stdenc.map. +(define-charset 'adobe-standard-encoding + "Adobe `standard encoding' used in PostScript" + :short-name "ADOBE-STANDARD-ENCODING" + :code-space [#x20 255] + :map "stdenc") + +(define-charset 'symbol + "Adobe symbol encoding used in PostScript" + :short-name "ADOBE-SYMBOL" + :code-space [#x20 255] + :map "symbol") + +(define-charset 'ibm850 + "DOS codepage 850 (Latin-1)" + :short-name "IBM850" + :ascii-compatible-p t + :code-space [0 255] + :map "IBM850") +(define-charset-alias 'cp850 'ibm850) + +(define-charset 'mik + "Bulgarian DOS codepage" + :short-name "MIK" + :ascii-compatible-p t + :code-space [0 255] + :map "MIK") + +(define-charset 'ptcp154 + "`Paratype' codepage (Asian Cyrillic)" + :short-name "PT154" + :ascii-compatible-p t + :code-space [0 255] + :mime-charset 'pt154 + :map "PTCP154") +(define-charset-alias 'pt154 'ptcp154) +(define-charset-alias 'cp154 'ptcp154) + +(define-charset 'gb18030-2-byte + "GB18030 2-byte (0x814E..0xFEFE)" + :code-space [#x40 #xFE #x81 #xFE] + :supplementary-p t + :map "GB180302") + +(define-charset 'gb18030-4-byte-bmp + "GB18030 4-byte for BMP (0x81308130-0x8431A439)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84] + :supplementary-p t + :map "GB180304") + +(define-charset 'gb18030-4-byte-smp + "GB18030 4-byte for SMP (0x90308130-0xE3329A35)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3] + :min-code '(#x9030 . #x8130) + :max-code '(#xE332 . #x9A35) + :supplementary-p t + :code-offset #x10000) + +(define-charset 'gb18030-4-byte-ext-1 + "GB18030 4-byte (0x8431A530-0x8F39FE39)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F] + :min-code '(#x8431 . #xA530) + :max-code '(#x8F39 . #xFE39) + :supplementary-p t + :code-offset #x200000 ; ... #x22484B + ) + +(define-charset 'gb18030-4-byte-ext-2 + "GB18030 4-byte (0xE3329A36-0xFE39FE39)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE] + :min-code '(#xE332 . #x9A36) + :max-code '(#xFE39 . #xFE39) + :supplementary-p t + :code-offset #x22484C ; ... #x279f93 + ) + +(define-charset 'gb18030 + "GB18030" + :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE] + :min-code 0 + :max-code '(#xFE39 . #xFE39) + :superset '(ascii gb18030-2-byte + gb18030-4-byte-bmp gb18030-4-byte-smp + gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)) + +(define-charset 'chinese-cns11643-15 + "CNS11643 Plane 15 Chinese Traditional" + :short-name "CNS11643-15" + :long-name "CNS11643-15 (Chinese traditional)" + :code-space [33 126 33 126] + :code-offset #x27A000) + +(unify-charset 'chinese-gb2312) +(unify-charset 'chinese-gbk) +(unify-charset 'chinese-cns11643-1) +(unify-charset 'chinese-cns11643-2) +(unify-charset 'chinese-cns11643-3) +(unify-charset 'chinese-cns11643-4) +(unify-charset 'chinese-cns11643-5) +(unify-charset 'chinese-cns11643-6) +(unify-charset 'chinese-cns11643-7) +(unify-charset 'big5) +(unify-charset 'chinese-big5-1) +(unify-charset 'chinese-big5-2) +(unify-charset 'big5-hkscs) +(unify-charset 'korean-ksc5601) +(unify-charset 'vietnamese-viscii-lower) +(unify-charset 'vietnamese-viscii-upper) +(unify-charset 'chinese-sisheng) +(unify-charset 'ipa) +(unify-charset 'tibetan) +(unify-charset 'ethiopic) +(unify-charset 'japanese-jisx0208-1978) +(unify-charset 'japanese-jisx0208) +(unify-charset 'japanese-jisx0212) +(unify-charset 'japanese-jisx0213-1) +(unify-charset 'japanese-jisx0213-2) + + +;; These are tables for translating characters on decoding and +;; encoding. +;; Fixme: these aren't used now -- should they be? +(setq standard-translation-table-for-decode nil) + +(setq standard-translation-table-for-encode nil) + +;;; Make fundamental coding systems. + +;; The coding system `no-conversion' and `undecided' are already +;; defined in coding.c as below: +;; +;; (define-coding-system 'no-conversion +;; "..." +;; :coding-type 'raw-text +;; ...) +;; (define-coding-system 'undecided +;; "..." +;; :coding-type 'undecided +;; ...) + +(define-coding-system-alias 'binary 'no-conversion) +(define-coding-system-alias 'unix 'undecided-unix) +(define-coding-system-alias 'dos 'undecided-dos) +(define-coding-system-alias 'mac 'undecided-mac) + +(define-coding-system 'raw-text + "Raw text, which means text contains random 8-bit codes. +Encoding text with this coding system produces the actual byte +sequence of the text in buffers and strings. An exception is made for +eight-bit-control characters. Each of them is encoded into a single +byte. + +When you visit a file with this coding, the file is read into a +unibyte buffer as is (except for EOL format), thus each byte of a file +is treated as a character." + :coding-type 'raw-text + :for-unibyte t + :mnemonic ?t) + +(define-coding-system 'no-conversion-multibyte + "Like `no-conversion' but don't read a file into a unibyte buffer." + :coding-type 'raw-text + :eol-type 'unix + :mnemonic ?=) + +(define-coding-system 'iso-latin-1 + "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." + :coding-type 'charset + :mnemonic ?1 + :charset-list '(iso-8859-1) + :mime-charset 'iso-8859-1) + +(define-coding-system-alias 'iso-8859-1 'iso-latin-1) +(define-coding-system-alias 'latin-1 'iso-latin-1) + +;; Coding systems not specific to each language environment. + +(define-coding-system 'emacs-mule + "Emacs 21 internal format used in buffer and string." + :coding-type 'emacs-mule + :charset-list 'emacs-mule + :mnemonic ?M) + +(define-coding-system 'utf-8 + "UTF-8." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(unicode) + :mime-charset 'utf-8) + +(define-coding-system-alias 'mule-utf-8 'utf-8) + +(define-coding-system 'utf-8-emacs + "Support for all Emacs characters (including non-Unicode characters)." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(emacs)) + +(define-coding-system 'utf-16le + "UTF-16LE (little endian, no signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'little + :mime-text-unsuitable t + :mime-charset 'utf-16le) + +(define-coding-system 'utf-16be + "UTF-16BE (big endian, no signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16be) + +(define-coding-system 'utf-16le-with-signature + "UTF-16 (little endian, with signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom t + :endian 'little + :mime-text-unsuitable t + :mime-charset 'utf-16) + +(define-coding-system 'utf-16be-with-signature + "UTF-16 (big endian, with signature)." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom t + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16) + +(define-coding-system 'utf-16 + "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom '(utf-16le-with-signature . utf-16be-with-signature) + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16) + +;; Backwards compatibility (old names, also used by Mule-UCS). We +;; prefer the MIME names. +(define-coding-system-alias 'utf-16-le 'utf-16le-with-signature) +(define-coding-system-alias 'utf-16-be 'utf-16be-with-signature) + + +(define-coding-system 'iso-2022-7bit + "ISO 2022 based 7-bit encoding using only G0." + :coding-type 'iso-2022 + :mnemonic ?J + :charset-list 'iso-2022 + :designation [(ascii t) nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition)) + +(define-coding-system 'iso-2022-7bit-ss2 + "ISO 2022 based 7-bit encoding using SS2 for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?$ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit + designation single-shift composition)) + +(define-coding-system 'iso-2022-7bit-lock + "ISO-2022 coding system using Locking-Shift for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?& + :charset-list 'iso-2022 + :designation [(ascii 94) (nil 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl 7-bit + designation locking-shift composition)) + +(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) + +(define-coding-system 'iso-2022-7bit-lock-ss2 + "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN." + :coding-type 'iso-2022 + :mnemonic ?i + :charset-list '(ascii + japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 + korean-ksc5601 + chinese-gb2312 + chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 + chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 + chinese-cns11643-7) + :designation [(ascii 94) + (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96) + (nil chinese-cns11643-2) + (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 + chinese-cns11643-6 chinese-cns11643-7)] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift + single-shift init-bol)) + +(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) + +(define-coding-system 'iso-2022-8bit-ss2 + "ISO 2022 based 8-bit encoding using SS2 for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?@ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) + +(define-coding-system 'compound-text + "Compound text based generic encoding for decoding unknown messages. + +This coding system does not support extended segments of CTEXT." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl long-form + designation locking-shift single-shift composition) + ;; Fixme: this isn't a valid MIME charset and has to be + ;; special-cased elsewhere -- fx + :mime-charset 'x-ctext) + +(define-coding-system-alias 'x-ctext 'compound-text) +(define-coding-system-alias 'ctext 'compound-text) + +;; Same as compound-text, but doesn't produce composition escape +;; sequences. Used in post-read and pre-write conversions of +;; compound-text-with-extensions, see mule.el. Note that this should +;; not have a mime-charset property, to prevent it from showing up +;; close to the beginning of coding systems ordered by priority. +(define-coding-system 'ctext-no-compositions + "Compound text based generic encoding for decoding unknown messages. + +Like `compound-text', but does not produce escape sequences for compositions." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl + designation locking-shift single-shift)) + +(define-coding-system 'compound-text-with-extensions + "Compound text encoding with ICCCM Extended Segment extensions. + +See the variable `ctext-non-standard-encodings-alist' for the +detail about how extended segments are handled. + +This coding system should be used only for X selections. It is inappropriate +for decoding and encoding files, process I/O, etc." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl long-form + designation locking-shift single-shift) + :post-read-conversion 'ctext-post-read-conversion + :pre-write-conversion 'ctext-pre-write-conversion) + +(define-coding-system-alias + 'x-ctext-with-extensions 'compound-text-with-extensions) +(define-coding-system-alias + 'ctext-with-extensions 'compound-text-with-extensions) + +(define-coding-system 'us-ascii + "Encode ASCII as-is and encode non-ASCII characters to `?'." + :coding-type 'charset + :mnemonic ?- + :charset-list '(ascii) + :default-char ?? + :mime-charset 'us-ascii) + +(define-coding-system-alias 'iso-safe 'us-ascii) + +(define-coding-system 'utf-7 + "UTF-7 encoding of Unicode (RFC 2152)." + :coding-type 'utf-8 + :mnemonic ?U + :mime-charset 'utf-7 + :charset-list '(unicode) + :pre-write-conversion 'utf-7-pre-write-conversion + :post-read-conversion 'utf-7-post-read-conversion) + +(define-coding-system 'utf-7-imap + "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" + :coding-type 'utf-8 + :mnemonic ?u + :charset-list '(unicode) + :pre-write-conversion 'utf-7-imap-pre-write-conversion + :post-read-conversion 'utf-7-imap-post-read-conversion) + +;; Use us-ascii for terminal output if some other coding system is not +;; specified explicitly. +(set-safe-terminal-coding-system-internal 'us-ascii) + +;; The other coding-systems are defined in each language specific +;; files under lisp/language. + +;; Normally, set coding system to `undecided' before reading a file. +;; Compiled Emacs Lisp files (*.elc) are not decoded at all, +;; but we regard them as containing multibyte characters. +;; Tar files are not decoded at all, but we treat them as raw bytes. + +(setq file-coding-system-alist + '(("\\.elc\\'" . utf-8-emacs) + ("\\.utf\\(-8\\)?\\'" . utf-8) + ("\\.xml\\'" . xml-find-file-coding-system) + ;; We use raw-text for reading loaddefs.el so that if it + ;; happens to have DOS or Mac EOLs, they are converted to + ;; newlines. This is required to make the special treatment + ;; of the "\ newline" combination in loaddefs.el, which marks + ;; the beginning of a doc string, work. + ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) + ("\\.tar\\'" . (no-conversion . no-conversion)) + ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) + ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system) + ("" . (undecided . nil)))) + + +;;; Setting coding categories and their priorities. + +;; This setting is just to read an Emacs Lisp source files which +;; contain multilingual text while dumping Emacs. More appropriate +;; values are set by the command `set-language-environment' for each +;; language environment. + +(set-coding-system-priority + 'iso-latin-1 + 'utf-8 + 'iso-2022-7bit + ) + + +;;; Miscellaneous settings. + +;; Make all multibyte characters self-insert. +(set-char-table-range (nth 1 global-map) + (cons 128 (max-char)) + 'self-insert-command) + +(aset latin-extra-code-table ?\221 t) +(aset latin-extra-code-table ?\222 t) +(aset latin-extra-code-table ?\223 t) +(aset latin-extra-code-table ?\224 t) +(aset latin-extra-code-table ?\225 t) +(aset latin-extra-code-table ?\226 t) + +;; The old code-pages library is obsoleted by coding systems based on +;; the charsets defined in this file but might be required by user +;; code. +(provide 'code-pages) + +;; Local variables: +;; no-byte-compile: t +;; End: + +;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5 +;;; mule-conf.el ends here