lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
   4 ;;   Licensed to the Free Software Foundation.
   5 ;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
   6 ;; Copyright (C) 2003
   7 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   8 ;;   Registration Number H13PRO009
   9
  10 ;; Keywords: i18n, mule, multilingual, character set, coding system
  11
  12 ;; This file is part of GNU Emacs.
  13
  14 ;; GNU Emacs is free software; you can redistribute it and/or modify
  15 ;; it under the terms of the GNU General Public License as published by
  16 ;; the Free Software Foundation; either version 2, or (at your option)
  17 ;; any later version.
  18
  19 ;; GNU Emacs is distributed in the hope that it will be useful,
  20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 ;; GNU General Public License for more details.
  23
  24 ;; You should have received a copy of the GNU General Public License
  25 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  27 ;; Boston, MA 02111-1307, USA.
  28
  29 ;;; Commentary:
  30
  31 ;; This file defines the Emacs charsets and some basic coding systems.
  32 ;; Other coding systems are defined in the files in directory
  33 ;; lisp/language.
  34
  35 ;;; Code:
  36
  37 ;;; Remarks
  38
  39 ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
  40 ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  41 ;; http://www.ecma.ch/.
  42
  43 ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
  44 ;; MS Windows, which are presumably the only charsets we really need
  45 ;; to worry about on such systems:
  46 ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  47 ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  48 ;;                      1258, 874, 932, 936, 949, 950
  49
  50 ;;; Definitions of character sets.
  51
  52 ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
  53 ;; in charset.c as below:
  54 ;;
  55 ;; (define-charset 'ascii
  56 ;;   ""
  57 ;;   :dimension 1
  58 ;;   :code-space [0 127]
  59 ;;   :iso-final-char ?B
  60 ;;   :ascii-compatible-p t
  61 ;;   :emacs-mule-id 0
  62 ;;   :code-offset 0)
  63 ;;
  64 ;; (define-charset 'unicode
  65 ;;   ""
  66 ;;   :dimension 3
  67 ;;   :code-space [0 255 0 255 0 16]
  68 ;;   :ascii-compatible-p t
  69 ;;   :code-offset 0)
  70 ;;
  71 ;; (define-charset 'eight-bit
  72 ;;   ""
  73 ;;   :dimension 1
  74 ;;   :code-space [128 255]
  75 ;;   :code-offset #x3FFF80)
  76 ;;
  77 ;; We now set :docstring, :short-name, and :long-name properties.
  78
  79 (put-charset-property
  80  'ascii :docstring "ASCII (ISO646 IRV)")
  81 (put-charset-property
  82  'ascii :short-name "ASCII")
  83 (put-charset-property
  84  'ascii :long-name "ASCII (ISO646 IRV)")
  85 (put-charset-property
  86  'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
  87 (put-charset-property
  88  'iso-8859-1 :short-name "Latin-1")
  89 (put-charset-property
  90  'iso-8859-1 :long-name "Latin-1")
  91 (put-charset-property
  92  'unicode :docstring "Unicode (ISO10646)")
  93 (put-charset-property
  94  'unicode :short-name "Unicode")
  95 (put-charset-property
  96  'unicode :long-name "Unicode (ISO10646)")
  97 (put-charset-property 'eight-bit :docstring "Raw bytes 0-255")
  98 (put-charset-property 'eight-bit :short-name "Raw bytes")
  99
 100 (define-charset-alias 'ucs 'unicode)
 101
 102 (define-charset 'emacs
 103   "Full Emacs characters"
 104   :ascii-compatible-p t
 105   :code-space [ 0 255 0 255 0 63 ]
 106   :code-offset 0
 107   :supplementary-p t)
 108
 109 (define-charset 'latin-iso8859-1
 110   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 111   :short-name "RHP of Latin-1"
 112   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
 113   :iso-final-char ?A
 114   :emacs-mule-id 129
 115   :code-space [32 127]
 116   :code-offset 160)
 117
 118 ;; Name perhaps not ideal, but is XEmacs-compatible.
 119 (define-charset 'control-1
 120   "8-bit control code (0x80..0x9F)"
 121   :short-name "8-bit control code"
 122   :code-space [128 159]
 123   :code-offset 128)
 124
 125 (define-charset 'eight-bit-control
 126   "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
 127   :short-name "Raw bytes 0x80..0x9F"
 128   :code-space [128 159]
 129   :code-offset #x3FFF80)                ; see character.h
 130
 131 (define-charset 'eight-bit-graphic
 132   "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
 133   :short-name "Raw bytes 0xA0..0xFF"
 134   :code-space [160 255]
 135   :code-offset #x3FFFA0)                ; see character.h
 136
 137 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 138                                                  iso-ir iso-final
 139                                                  emacs-mule-id map)
 140   `(progn
 141      (define-charset ,symbol
 142        ,name
 143        :short-name ,nickname
 144        :long-name ,name
 145        :ascii-compatible-p t
 146        :code-space [0 255]
 147        :map ,map)
 148      (if ,iso-symbol
 149          (define-charset ,iso-symbol
 150            (if ,iso-ir
 151                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 152                        ,name ,nickname ,iso-ir)
 153              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 154            :short-name (format "RHP of %s" ,name)
 155            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 156            :iso-final-char ,iso-final
 157            :emacs-mule-id ,emacs-mule-id
 158            :code-space [32 127]
 159            :subset (list ,symbol 160 255 -128)))))
 160
 161 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 162   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 163
 164 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 165   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 166
 167 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 168   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 169
 170 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 171   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 172
 173 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 174   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 175
 176 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 177   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 178
 179 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 180   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 181
 182 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 183   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 184
 185 (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
 186   "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
 187
 188 ;; http://www.nectec.or.th/it-standards/iso8859-11/
 189 ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
 190 ;; plus nbsp
 191 (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
 192   "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
 193
 194 ;; 8859-12 doesn't (yet?) exist.
 195
 196 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 197   "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
 198
 199 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 200   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 201
 202 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 203   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 204
 205 (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
 206   "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
 207
 208 ;; No point in keeping it around.
 209 (fmakunbound 'define-iso-single-byte-charset)
 210
 211 ;; Can this be shared with 8859-11?
 212 ;; N.b. not all of these are defined unicodes.
 213 (define-charset 'thai-tis620
 214   "TIS620.2533"
 215   :short-name "TIS620.2533"
 216   :iso-final-char ?T
 217   :emacs-mule-id 133
 218   :code-space [32 127]
 219   :code-offset #x0E00)
 220
 221 ;; Fixme: doc for this, c.f. above
 222 (define-charset 'tis620-2533
 223   "TIS620.2533"
 224   :short-name "TIS620.2533"
 225   :ascii-compatible-p t
 226   :code-space [0 255]
 227   :superset '(ascii eight-bit-control (thai-tis620 . 128)))
 228
 229 (define-charset 'jisx0201
 230   "JISX0201"
 231   :short-name "JISX0201"
 232   :code-space [0 #xDF]
 233   :map "JISX0201")
 234
 235 (define-charset 'latin-jisx0201
 236   "Roman Part of JISX0201.1976"
 237   :short-name "JISX0201 Roman"
 238   :long-name "Japanese Roman (JISX0201.1976)"
 239   :iso-final-char ?J
 240   :emacs-mule-id  138
 241   :code-space [33 126]
 242   :subset '(jisx0201 33 126 0))
 243
 244 (define-charset 'katakana-jisx0201
 245   "Katakana Part of JISX0201.1976"
 246   :short-name "JISX0201 Katakana"
 247   :long-name "Japanese Katakana (JISX0201.1976)"
 248   :iso-final-char ?I
 249   :emacs-mule-id  137
 250   :code-space [33 126]
 251   :subset '(jisx0201 161 254 -128))
 252
 253 (define-charset 'chinese-gb2312
 254   "GB2312 Chinese simplified: ISO-IR-58"
 255   :short-name "GB2312"
 256   :long-name "GB2312: ISO-IR-58"
 257   :iso-final-char ?A
 258   :emacs-mule-id 145
 259   :code-space [33 126 33 126]
 260   :code-offset #x110000
 261   :unify-map "GB2312")
 262
 263 (define-charset 'chinese-gbk
 264   "GBK Chinese simplified."
 265   :short-name "GBK"
 266   :code-space [#x40 #xFE #x81 #xFE]
 267   :code-offset #x160000
 268   :unify-map "GBK")
 269 (define-charset-alias 'cp936 'chinese-gbk)
 270 (define-charset-alias 'windows-936 'chinese-gbk)
 271
 272 (define-charset 'chinese-cns11643-1
 273   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 274   :short-name "CNS11643-1"
 275   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 276   :iso-final-char ?G
 277   :emacs-mule-id  149
 278   :code-space [33 126 33 126]
 279   :code-offset #x114000
 280   :unify-map "CNS-1")
 281
 282 (define-charset 'chinese-cns11643-2
 283   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 284   :short-name "CNS11643-2"
 285   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 286   :iso-final-char ?H
 287   :emacs-mule-id  150
 288   :code-space [33 126 33 126]
 289   :code-offset #x118000
 290   :unify-map "CNS-2")
 291
 292 (define-charset 'chinese-cns11643-3
 293   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 294   :short-name  "CNS11643-3"
 295   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 296   :iso-final-char ?I
 297   :code-space [33 126 33 126]
 298   :emacs-mule-id  246
 299   :code-offset #x11C000
 300   :unify-map "CNS-3")
 301
 302 (define-charset 'chinese-cns11643-4
 303   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 304   :short-name  "CNS11643-4"
 305   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 306   :iso-final-char ?J
 307   :emacs-mule-id  247
 308   :code-space [33 126 33 126]
 309   :code-offset #x120000
 310   :unify-map "CNS-4")
 311
 312 (define-charset 'chinese-cns11643-5
 313   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 314   :short-name  "CNS11643-5"
 315   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 316   :iso-final-char ?K
 317   :emacs-mule-id  248
 318   :code-space [33 126 33 126]
 319   :code-offset #x124000
 320   :unify-map "CNS-5")
 321
 322 (define-charset 'chinese-cns11643-6
 323   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 324   :short-name  "CNS11643-6"
 325   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 326   :iso-final-char ?L
 327   :emacs-mule-id 249
 328   :code-space [33 126 33 126]
 329   :code-offset #x128000
 330   :unify-map "CNS-6")
 331
 332 (define-charset 'chinese-cns11643-7
 333   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 334   :short-name  "CNS11643-7"
 335   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 336   :iso-final-char ?M
 337   :emacs-mule-id 250
 338   :code-space [33 126 33 126]
 339   :code-offset #x12C000
 340   :unify-map "CNS-7")
 341
 342 (define-charset 'big5
 343   "Big5 (Chinese traditional)"
 344   :short-name "Big5"
 345   :code-space [#x40 #xFE #xA1 #xFE]
 346   :code-offset #x130000
 347   :unify-map "BIG5")
 348 ;; Fixme: AKA cp950 according to
 349 ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
 350 ;; that correct?
 351
 352 (define-charset 'chinese-big5-1
 353   "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
 354   :short-name "Big5 (Level-1)"
 355   :long-name "Big5 (Level-1) A141-C67F"
 356   :iso-final-char ?0
 357   :emacs-mule-id 152
 358   :code-space [#x21 #x7E #x21 #x7E]
 359   :code-offset #x135000
 360   :unify-map "BIG5-1")
 361
 362 (define-charset 'chinese-big5-2
 363   "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
 364   :short-name "Big5 (Level-2)"
 365   :long-name "Big5 (Level-2) C940-FEFE"
 366   :iso-final-char ?1
 367   :emacs-mule-id  153
 368   :code-space [#x21 #x7E #x21 #x7E]
 369   :code-offset #x137800
 370   :unify-map "BIG5-2")
 371
 372 (define-charset 'japanese-jisx0208
 373   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 374   :short-name "JISX0208"
 375   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 376   :iso-final-char ?B
 377   :emacs-mule-id 146
 378   :code-space [33 126 33 126]
 379   :code-offset #x140000
 380   :unify-map "JISX0208")
 381
 382 (define-charset 'japanese-jisx0208-1978
 383   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 384   :short-name "JISX0208.1978"
 385   :long-name  "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
 386   :iso-final-char ?@
 387   :emacs-mule-id  144
 388   :code-space [33 126 33 126]
 389   :code-offset #x144000
 390   :unify-map "JISC6226")
 391
 392 (define-charset 'japanese-jisx0212
 393   "JISX0212 Japanese supplement: ISO-IR-159"
 394   :short-name "JISX0212"
 395   :long-name "JISX0212 (Japanese): ISO-IR-159"
 396   :iso-final-char ?D
 397   :emacs-mule-id 148
 398   :code-space [33 126 33 126]
 399   :code-offset #x148000
 400   :unify-map "JISX0212")
 401
 402 ;; Note that jisx0213 contains characters not in Unicode (3.2?).  It's
 403 ;; arguable whether it should have a unify-map.
 404 (define-charset 'japanese-jisx0213-1
 405   "JISX0213.2000 Plane 1 (Japanese)"
 406   :short-name "JISX0213-1"
 407   :iso-final-char ?O
 408   :emacs-mule-id  151
 409   :unify-map "JISX2131"
 410   :code-space [33 126 33 126]
 411   :code-offset #x14C000)
 412
 413 (define-charset 'japanese-jisx0213-2
 414   "JISX0213.2000 Plane 2 (Japanese)"
 415   :short-name "JISX0213-2"
 416   :iso-final-char ?P
 417   :emacs-mule-id 254
 418   :unify-map "JISX2132"
 419   :code-space [33 126 33 126]
 420   :code-offset #x150000)
 421
 422 (define-charset 'japanese-jisx0213-a
 423   "JISX0213.2004 adds these characters to JISX0213.2000."
 424   :short-name "JISX0213A"
 425   :dimension 2
 426   :code-space [33 126 33 126]
 427   :supplementary-p t
 428   :map "JISX213A")
 429
 430 (define-charset 'japanese-jisx0213.2004-1
 431   "JISX0213.2004 Plane1 (Japanese)"
 432   :short-name "JISX0213.2004-1"
 433   :dimension 2
 434   :code-space [33 126 33 126]
 435   :iso-final-char ?Q
 436   :superset '(japanese-jisx0213-a japanese-jisx0213-1))
 437
 438 (define-charset 'katakana-sjis
 439   "Katakana part of Shift-JIS"
 440   :dimension 1
 441   :code-space [#xA1 #xDF]
 442   :subset '(jisx0201 #xA1 #xDF 0)
 443   :supplementary-p t)
 444
 445 (define-charset 'cp932-2-byte
 446   "2-byte part of CP932"
 447   :dimension 2
 448   :map "CP932-2BYTE"
 449   :code-space [#x40 #xFC #x81 #xEF]
 450   :supplementary-p t)
 451
 452 (define-charset 'cp932
 453   "CP932 (Microsoft shift-jis)"
 454   :code-space [#x00 #xFF #x00 #xFE]
 455   :short-name "CP932"
 456   :superset '(ascii katakana-sjis cp932-2-byte))
 457
 458 (define-charset 'korean-ksc5601
 459   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 460   :short-name "KSC5601"
 461   :long-name "KSC5601 (Korean): ISO-IR-149"
 462   :iso-final-char ?C
 463   :emacs-mule-id 147
 464   :code-space [33 126 33 126]
 465   :code-offset #x279f94                 ; ... #x27c217
 466   :unify-map "KSC5601")
 467
 468 (define-charset 'big5-hkscs
 469   "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
 470   :short-name "Big5"
 471   :code-space [#x40 #xFE #xA1 #xFE]
 472   :code-offset #x27c218                 ; ... #x280839
 473   :unify-map "BIG5-HKSCS")
 474
 475 ;; Fixme: Korean cp949/UHC
 476
 477 (define-charset 'chinese-sisheng
 478   "SiSheng characters for PinYin/ZhuYin"
 479   :short-name "SiSheng"
 480   :long-name "SiSheng (PinYin/ZhuYin)"
 481   :iso-final-char ?0
 482   :emacs-mule-id 160
 483   :code-space [33 126]
 484   :unify-map "MULE-sisheng"
 485   :code-offset #x200000)
 486
 487 ;; A subset of the 1989 version of IPA.  It consists of the consonant
 488 ;; signs used in English, French, German and Italian, and all vowels
 489 ;; signs in the table.  [says old MULE doc]
 490 (define-charset 'ipa
 491   "IPA (International Phonetic Association)"
 492   :short-name "IPA"
 493   :iso-final-char ?0
 494   :emacs-mule-id  161
 495   :unify-map "MULE-ipa"
 496   :code-space [32 127]
 497   :code-offset #x200080)
 498
 499 (define-charset 'viscii
 500   "VISCII1.1"
 501   :short-name "VISCII"
 502   :long-name "VISCII 1.1"
 503   :code-space [0 255]
 504   :map "VISCII")
 505
 506 (define-charset 'vietnamese-viscii-lower
 507   "VISCII1.1 lower-case"
 508   :short-name "VISCII lower"
 509   :long-name "VISCII lower-case"
 510   :iso-final-char ?1
 511   :emacs-mule-id  162
 512   :code-space [32 127]
 513   :code-offset #x200200
 514   :unify-map "MULE-lviscii")
 515
 516 (define-charset 'vietnamese-viscii-upper
 517   "VISCII1.1 upper-case"
 518   :short-name "VISCII upper"
 519   :long-name "VISCII upper-case"
 520   :iso-final-char ?2
 521   :emacs-mule-id  163
 522   :code-space [32 127]
 523   :code-offset #x200280
 524   :unify-map "MULE-uviscii")
 525
 526 (define-charset 'vscii
 527   "VSCII1.1 (TCVN-5712 VN1)"
 528   :short-name "VSCII"
 529   :code-space [0 255]
 530   :map "VSCII")
 531
 532 (define-charset-alias 'tcvn-5712 'vscii)
 533
 534 ;; Fixme: see note in tcvn.map about combining characters
 535 (define-charset 'vscii-2
 536   "VSCII-2 (TCVN-5712 VN2)"
 537   :code-space [0 255]
 538   :map "VSCII-2")
 539
 540 (define-charset 'koi8-r
 541   "KOI8-R"
 542   :short-name "KOI8-R"
 543   :ascii-compatible-p t
 544   :code-space [0 255]
 545   :map "KOI8-R")
 546
 547 (define-charset-alias 'koi8 'koi8-r)
 548
 549 (define-charset 'alternativnyj
 550   "ALTERNATIVNYJ"
 551   :short-name "alternativnyj"
 552   :ascii-compatible-p t
 553   :code-space [0 255]
 554   :map "ALTERNATIVNYJ")
 555
 556 (define-charset 'cp866
 557   "CP866"
 558   :short-name "cp866"
 559   :ascii-compatible-p t
 560   :code-space [0 255]
 561   :map "IBM866")
 562 (define-charset-alias 'ibm866 'cp866)
 563
 564 (define-charset 'koi8-u
 565   "KOI8-U"
 566   :short-name "KOI8-U"
 567   :ascii-compatible-p t
 568   :code-space [0 255]
 569   :map "KOI8-U")
 570
 571 (define-charset 'koi8-t
 572   "KOI8-T"
 573   :short-name "KOI8-T"
 574   :ascii-compatible-p t
 575   :code-space [0 255]
 576   :map "KOI8-T")
 577
 578 (define-charset 'georgian-ps
 579   "GEORGIAN-PS"
 580   :short-name "GEORGIAN-PS"
 581   :ascii-compatible-p t
 582   :code-space [0 255]
 583   :map "KA-PS")
 584
 585 (define-charset 'georgian-academy
 586   "GEORGIAN-ACADEMY"
 587   :short-name "GEORGIAN-ACADEMY"
 588   :ascii-compatible-p t
 589   :code-space [0 255]
 590   :map "KA-ACADEMY")
 591
 592 (define-charset 'windows-1250
 593   "WINDOWS-1250 (Central Europe)"
 594   :short-name "WINDOWS-1250"
 595   :ascii-compatible-p t
 596   :code-space [0 255]
 597   :map "CP1250")
 598 (define-charset-alias 'cp1250 'windows-1250)
 599
 600 (define-charset 'windows-1251
 601   "WINDOWS-1251 (Cyrillic)"
 602   :short-name "WINDOWS-1251"
 603   :ascii-compatible-p t
 604   :code-space [0 255]
 605   :map "CP1251")
 606 (define-charset-alias 'cp1251 'windows-1251)
 607
 608 (define-charset 'windows-1252
 609   "WINDOWS-1252 (Latin I)"
 610   :short-name "WINDOWS-1252"
 611   :ascii-compatible-p t
 612   :code-space [0 255]
 613   :map "CP1252")
 614 (define-charset-alias 'cp1252 'windows-1252)
 615
 616 (define-charset 'windows-1253
 617   "WINDOWS-1253 (Greek)"
 618   :short-name "WINDOWS-1253"
 619   :ascii-compatible-p t
 620   :code-space [0 255]
 621   :map "CP1253")
 622 (define-charset-alias 'cp1253 'windows-1253)
 623
 624 (define-charset 'windows-1254
 625   "WINDOWS-1254 (Turkish)"
 626   :short-name "WINDOWS-1254"
 627   :ascii-compatible-p t
 628   :code-space [0 255]
 629   :map "CP1254")
 630 (define-charset-alias 'cp1254 'windows-1254)
 631
 632 (define-charset 'windows-1255
 633   "WINDOWS-1255 (Hebrew)"
 634   :short-name "WINDOWS-1255"
 635   :ascii-compatible-p t
 636   :code-space [0 255]
 637   :map "CP1255")
 638 (define-charset-alias 'cp1255 'windows-1255)
 639
 640 (define-charset 'windows-1256
 641   "WINDOWS-1256 (Arabic)"
 642   :short-name "WINDOWS-1256"
 643   :ascii-compatible-p t
 644   :code-space [0 255]
 645   :map "CP1256")
 646 (define-charset-alias 'cp1256 'windows-1256)
 647
 648 (define-charset 'windows-1257
 649   "WINDOWS-1257 (Baltic)"
 650   :short-name "WINDOWS-1257"
 651   :ascii-compatible-p t
 652   :code-space [0 255]
 653   :map "CP1257")
 654 (define-charset-alias 'cp1257 'windows-1257)
 655
 656 (define-charset 'windows-1258
 657   "WINDOWS-1258 (Viet Nam)"
 658   :short-name "WINDOWS-1258"
 659   :ascii-compatible-p t
 660   :code-space [0 255]
 661   :map "CP1258")
 662 (define-charset-alias 'cp1258 'windows-1258)
 663
 664 (define-charset 'next
 665   "NEXT"
 666   :short-name "NEXT"
 667   :ascii-compatible-p t
 668   :code-space [0 255]
 669   :map "NEXTSTEP")
 670
 671 (define-charset 'cp1125
 672   "CP1125"
 673   :short-name "CP1125"
 674   :code-space [0 255]
 675   :map "CP1125")
 676 (define-charset-alias 'ruscii 'cp1125)
 677 ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
 678 (define-charset-alias 'cp866u 'cp1125)
 679
 680 ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
 681 ;; shows this as not ASCII comptaible, with various graphics in
 682 ;; 0x01-0x1F.
 683 (define-charset 'cp437
 684   "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
 685   :short-name "CP437"
 686   :code-space [0 255]
 687   :ascii-compatible-p t
 688   :map "IBM437")
 689
 690 (define-charset 'cp720
 691   "CP720 (Arabic)"
 692   :short-name "CP720"
 693   :code-space [0 255]
 694   :ascii-compatible-p t
 695   :map "CP720")
 696
 697 (define-charset 'cp737
 698   "CP737 (PC Greek)"
 699   :short-name "CP737"
 700   :code-space [0 255]
 701   :ascii-compatible-p t
 702   :map "CP737")
 703
 704 (define-charset 'cp775
 705   "CP775 (PC Baltic)"
 706   :short-name "CP775"
 707   :code-space [0 255]
 708   :ascii-compatible-p t
 709   :map "CP775")
 710
 711 (define-charset 'cp851
 712   "CP851 (Greek)"
 713   :short-name "CP851"
 714   :code-space [0 255]
 715   :ascii-compatible-p t
 716   :map "IBM851")
 717
 718 (define-charset 'cp852
 719   "CP852 (MS-DOS Latin-2)"
 720   :short-name "CP852"
 721   :code-space [0 255]
 722   :ascii-compatible-p t
 723   :map "IBM852")
 724
 725 (define-charset 'cp855
 726   "CP855 (IBM Cyrillic)"
 727   :short-name "CP855"
 728   :code-space [0 255]
 729   :ascii-compatible-p t
 730   :map "IBM855")
 731
 732 (define-charset 'cp857
 733   "CP857 (IBM Turkish)"
 734   :short-name "CP857"
 735   :code-space [0 255]
 736   :ascii-compatible-p t
 737   :map "IBM857")
 738
 739 (define-charset 'cp858
 740   "CP858 (Multilingual Latin I + Euro)"
 741   :short-name "CP858"
 742   :code-space [0 255]
 743   :ascii-compatible-p t
 744   :map "CP858")
 745 (define-charset-alias 'cp00858 'cp858)  ; IANA has IBM00858/CP00858
 746
 747 (define-charset 'cp860
 748   "CP860 (MS-DOS Portuguese)"
 749   :short-name "CP860"
 750   :code-space [0 255]
 751   :ascii-compatible-p t
 752   :map "IBM860")
 753
 754 (define-charset 'cp861
 755   "CP861 (MS-DOS Icelandic)"
 756   :short-name "CP861"
 757   :code-space [0 255]
 758   :ascii-compatible-p t
 759   :map "IBM861")
 760
 761 (define-charset 'cp862
 762   "CP862 (PC Hebrew)"
 763   :short-name "CP862"
 764   :code-space [0 255]
 765   :ascii-compatible-p t
 766   :map "IBM862")
 767
 768 (define-charset 'cp863
 769   "CP863 (MS-DOS Canadian French)"
 770   :short-name "CP863"
 771   :code-space [0 255]
 772   :ascii-compatible-p t
 773   :map "IBM863")
 774
 775 (define-charset 'cp864
 776   "CP864 (PC Arabic)"
 777   :short-name "CP864"
 778   :code-space [0 255]
 779   :ascii-compatible-p t
 780   :map "IBM864")
 781
 782 (define-charset 'cp865
 783   "CP865 (MS-DOS Nordic)"
 784   :short-name "CP865"
 785   :code-space [0 255]
 786   :ascii-compatible-p t
 787   :map "IBM865")
 788
 789 (define-charset 'cp869
 790   "CP869 (IBM Modern Greek)"
 791   :short-name "CP869"
 792   :code-space [0 255]
 793   :ascii-compatible-p t
 794   :map "IBM869")
 795
 796 (define-charset 'cp874
 797   "CP874 (IBM Thai)"
 798   :short-name "CP874"
 799   :code-space [0 255]
 800   :ascii-compatible-p t
 801   :map "IBM874")
 802
 803 ;; For Arabic, we need three different types of character sets.
 804 ;; Digits are of direction left-to-right and of width 1-column.
 805 ;; Others are of direction right-to-left and of width 1-column or
 806 ;; 2-column.
 807 (define-charset 'arabic-digit
 808   "Arabic digit"
 809   :short-name "Arabic digit"
 810   :iso-final-char ?2
 811   :emacs-mule-id 164
 812   :code-space [34 42]
 813   :code-offset #x0600)
 814
 815 (define-charset 'arabic-1-column
 816   "Arabic 1-column"
 817   :short-name "Arabic 1-col"
 818   :long-name "Arabic 1-column"
 819   :iso-final-char ?3
 820   :emacs-mule-id 165
 821   :code-space [33 126]
 822   :code-offset #x200100)
 823
 824 (define-charset 'arabic-2-column
 825   "Arabic 2-column"
 826   :short-name "Arabic 2-col"
 827   :long-name "Arabic 2-column"
 828   :iso-final-char ?4
 829   :emacs-mule-id 224
 830   :code-space [33 126]
 831   :code-offset #x200180)
 832
 833 ;; Lao script.
 834 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 835 ;; Not all of them are defined unicodes.
 836 (define-charset 'lao
 837   "Lao characters (ISO10646 0E81..0EDF)"
 838   :short-name "Lao"
 839   :iso-final-char ?1
 840   :emacs-mule-id 167
 841   :code-space [33 126]
 842   :code-offset #x0E81)
 843
 844 (define-charset 'mule-lao
 845   "Lao characters (ISO10646 0E81..0EDF)"
 846   :short-name "Lao"
 847   :code-space [0 255]
 848   :superset '(ascii eight-bit-control (lao . 128)))
 849
 850
 851 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 852 ;; not assigned.  They are automatically converted to each Indian
 853 ;; script which IS-13194 supports.
 854
 855 (define-charset 'indian-is13194
 856   "Generic Indian charset for data exchange with IS 13194"
 857   :short-name "IS 13194"
 858   :long-name "Indian IS 13194"
 859   :iso-final-char ?5
 860   :emacs-mule-id 225
 861   :code-space [33 126]
 862   :code-offset #x180000)
 863
 864 (let ((code-offset #x180100))
 865   (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
 866                                oriya kannada malayalam gujarati punjabi))
 867     (define-charset (intern (format "%s-cdac" script))
 868       (format "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
 869               (capitalize (symbol-name script)))
 870       :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
 871       :code-space [0 255]
 872       :code-offset code-offset)
 873     (setq code-offset (+ code-offset #x100)))
 874
 875   (dolist (script '(devanagari bengali punjabi gujarati
 876                                oriya tamil telugu kannada malayalam))
 877     (define-charset (intern (format "%s-akruti" script))
 878       (format "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
 879               (capitalize (symbol-name script)))
 880       :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
 881       :code-space [0 255]
 882       :code-offset code-offset)
 883     (setq code-offset (+ code-offset #x100))))
 884
 885 (define-charset 'indian-glyph
 886   "Glyphs for Indian characters."
 887   :short-name "Indian glyph"
 888   :iso-final-char ?4
 889   :emacs-mule-id 240
 890   :code-space [32 127 32 127]
 891   :code-offset #x180100)
 892
 893 ;; Actual Glyph for 1-column width.
 894 (define-charset 'indian-1-column
 895   "Indian charset for 1-column width glyphs."
 896   :short-name "Indian 1-col"
 897   :long-name "Indian 1 Column"
 898   :iso-final-char ?6
 899   :emacs-mule-id  251
 900   :code-space [33 126 33 126]
 901   :code-offset #x184000)
 902
 903 ;; Actual Glyph for 2-column width.
 904 (define-charset 'indian-2-column
 905   "Indian charset for 2-column width glyphs."
 906   :short-name "Indian 2-col"
 907   :long-name "Indian 2 Column"
 908   :iso-final-char ?5
 909   :emacs-mule-id  251
 910   :code-space [33 126 33 126]
 911   :code-offset #x184000)
 912
 913 (define-charset 'tibetan
 914   "Tibetan characters"
 915   :iso-final-char ?7
 916   :short-name "Tibetan 2-col"
 917   :long-name "Tibetan 2 column"
 918   :iso-final-char ?7
 919   :emacs-mule-id 252
 920   :unify-map "MULE-tibetan"
 921   :code-space [33 126 33 37]
 922   :code-offset #x190000)
 923
 924 (define-charset 'tibetan-1-column
 925   "Tibetan 1 column glyph"
 926   :short-name "Tibetan 1-col"
 927   :long-name "Tibetan 1 column"
 928   :iso-final-char ?8
 929   :emacs-mule-id 241
 930   :code-space [33 126 33 37]
 931   :code-offset #x190000)
 932
 933 ;; Subsets of Unicode.
 934 (define-charset 'mule-unicode-2500-33ff
 935   "Unicode characters of the range U+2500..U+33FF."
 936   :short-name "Unicode subset 2"
 937   :long-name "Unicode subset (U+2500..U+33FF)"
 938   :iso-final-char ?2
 939   :emacs-mule-id 242
 940   :code-space [#x20 #x7f #x20 #x47]
 941   :code-offset #x2500)
 942
 943 (define-charset 'mule-unicode-e000-ffff
 944   "Unicode characters of the range U+E000..U+FFFF."
 945   :short-name "Unicode subset 3"
 946   :long-name "Unicode subset (U+E000+FFFF)"
 947   :iso-final-char ?3
 948   :emacs-mule-id 243
 949   :code-space [#x20 #x7F #x20 #x75]
 950   :code-offset #xE000
 951   :max-code 30015)                      ; U+FFFF
 952
 953 (define-charset 'mule-unicode-0100-24ff
 954   "Unicode characters of the range U+0100..U+24FF."
 955   :short-name "Unicode subset"
 956   :long-name "Unicode subset (U+0100..U+24FF)"
 957   :iso-final-char ?1
 958   :emacs-mule-id 244
 959   :code-space [#x20 #x7F #x20 #x7F]
 960   :code-offset #x100)
 961
 962 (define-charset 'unicode-bmp
 963   "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
 964   :short-name "Unicode BMP"
 965   :code-space [0 255 0 255]
 966   :code-offset 0)
 967
 968 (define-charset 'unicode-smp
 969   "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
 970   :short-name "Unicode SMP "
 971   :code-space [0 255 0 255]
 972   :code-offset #x10000)
 973
 974 (define-charset 'unicode-sip
 975   "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
 976   :short-name "Unicode SIP"
 977   :code-space [0 255 0 255]
 978   :code-offset #x20000)
 979
 980 (define-charset 'unicode-ssp
 981   "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
 982   :short-name "Unicode SSP"
 983   :code-space [0 255 0 255]
 984   :code-offset #xE0000)
 985
 986 (define-charset 'ethiopic
 987   "Ethiopic characters for Amharic and Tigrigna."
 988   :short-name "Ethiopic"
 989   :long-name "Ethiopic characters"
 990   :iso-final-char ?3
 991   :emacs-mule-id  245
 992   :unify-map "MULE-ethiopic"
 993   :code-space [33 126 33 126]
 994   :code-offset #x1A0000)
 995
 996 (define-charset 'mac-roman
 997   "Mac Roman charset"
 998   :short-name "Mac Roman"
 999   :ascii-compatible-p t
1000   :code-space [0 255]
1001   :map "MACINTOSH")
1002
1003 ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1004 (define-charset 'ebcdic-us
1005   "US version of EBCDIC"
1006   :short-name "EBCDIC-US"
1007   :code-space [0 255]
1008   :mime-charset 'ebcdic-us
1009   :map "EBCDICUS")
1010
1011 (define-charset 'ebcdic-uk
1012   "UK version of EBCDIC"
1013   :short-name "EBCDIC-UK"
1014   :code-space [0 255]
1015   :mime-charset 'ebcdic-uk
1016   :map "EBCDICUK")
1017
1018 (define-charset 'ibm1047
1019   ;; Says groff:
1020   "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1021   :short-name "IBM1047"
1022   :code-space [0 255]
1023   :mime-charset 'ibm1047
1024   :map "IBM1047")
1025 (define-charset-alias 'cp1047 'ibm1047)
1026
1027 (define-charset 'hp-roman8
1028   "Encoding used by Hewlet-Packard printer software"
1029   :short-name "HP-ROMAN8"
1030   :ascii-compatible-p t
1031   :code-space [0 255]
1032   :map "HP-ROMAN8")
1033
1034 ;; To make a coding system with this, a pre-write-conversion should
1035 ;; account for the commented-out multi-valued code points in
1036 ;; stdenc.map.
1037 (define-charset 'adobe-standard-encoding
1038   "Adobe `standard encoding' used in PostScript"
1039   :short-name "ADOBE-STANDARD-ENCODING"
1040   :code-space [#x20 255]
1041   :map "stdenc")
1042
1043 (define-charset 'symbol
1044   "Adobe symbol encoding used in PostScript"
1045   :short-name "ADOBE-SYMBOL"
1046   :code-space [#x20 255]
1047   :map "symbol")
1048
1049 (define-charset 'ibm850
1050   "DOS codepage 850 (Latin-1)"
1051   :short-name "IBM850"
1052   :ascii-compatible-p t
1053   :code-space [0 255]
1054   :map "IBM850")
1055 (define-charset-alias 'cp850 'ibm850)
1056
1057 (define-charset 'mik
1058   "Bulgarian DOS codepage"
1059   :short-name "MIK"
1060   :ascii-compatible-p t
1061   :code-space [0 255]
1062   :map "MIK")
1063
1064 (define-charset 'ptcp154
1065   "`Paratype' codepage (Asian Cyrillic)"
1066   :short-name "PT154"
1067   :ascii-compatible-p t
1068   :code-space [0 255]
1069   :mime-charset 'pt154
1070   :map "PTCP154")
1071 (define-charset-alias 'pt154 'ptcp154)
1072 (define-charset-alias 'cp154 'ptcp154)
1073
1074 (define-charset 'gb18030-2-byte
1075   "GB18030 2-byte (0x814E..0xFEFE)"
1076   :code-space [#x40 #xFE #x81 #xFE]
1077   :supplementary-p t
1078   :map "GB180302")
1079
1080 (define-charset 'gb18030-4-byte-bmp
1081   "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1082   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1083   :supplementary-p t
1084   :map "GB180304")
1085
1086 (define-charset 'gb18030-4-byte-smp
1087   "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1088   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1089   :min-code '(#x9030 . #x8130)
1090   :max-code '(#xE332 . #x9A35)
1091   :supplementary-p t
1092   :code-offset #x10000)
1093
1094 (define-charset 'gb18030-4-byte-ext-1
1095   "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1096   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1097   :min-code '(#x8431 . #xA530)
1098   :max-code '(#x8F39 . #xFE39)
1099   :supplementary-p t
1100   :code-offset #x200000                 ; ... #x22484B
1101   )
1102
1103 (define-charset 'gb18030-4-byte-ext-2
1104   "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1105   :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1106   :min-code '(#xE332 . #x9A36)
1107   :max-code '(#xFE39 . #xFE39)
1108   :supplementary-p t
1109   :code-offset #x22484C                 ; ... #x279f93
1110   )
1111
1112 (define-charset 'gb18030
1113   "GB18030"
1114   :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1115   :min-code 0
1116   :max-code '(#xFE39 . #xFE39)
1117   :superset '(ascii gb18030-2-byte
1118                     gb18030-4-byte-bmp gb18030-4-byte-smp
1119                     gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1120
1121 (define-charset 'chinese-cns11643-15
1122   "CNS11643 Plane 15 Chinese Traditional"
1123   :short-name  "CNS11643-15"
1124   :long-name "CNS11643-15 (Chinese traditional)"
1125   :code-space [33 126 33 126]
1126   :code-offset #x27A000)
1127
1128 (unify-charset 'chinese-gb2312)
1129 (unify-charset 'chinese-gbk)
1130 (unify-charset 'chinese-cns11643-1)
1131 (unify-charset 'chinese-cns11643-2)
1132 (unify-charset 'chinese-cns11643-3)
1133 (unify-charset 'chinese-cns11643-4)
1134 (unify-charset 'chinese-cns11643-5)
1135 (unify-charset 'chinese-cns11643-6)
1136 (unify-charset 'chinese-cns11643-7)
1137 (unify-charset 'big5)
1138 (unify-charset 'chinese-big5-1)
1139 (unify-charset 'chinese-big5-2)
1140 (unify-charset 'big5-hkscs)
1141 (unify-charset 'korean-ksc5601)
1142 (unify-charset 'vietnamese-viscii-lower)
1143 (unify-charset 'vietnamese-viscii-upper)
1144 (unify-charset 'chinese-sisheng)
1145 (unify-charset 'ipa)
1146 (unify-charset 'tibetan)
1147 (unify-charset 'ethiopic)
1148 (unify-charset 'japanese-jisx0208-1978)
1149 (unify-charset 'japanese-jisx0208)
1150 (unify-charset 'japanese-jisx0212)
1151 (unify-charset 'japanese-jisx0213-1)
1152 (unify-charset 'japanese-jisx0213-2)
1153
1154 \f
1155 ;; These are tables for translating characters on decoding and
1156 ;; encoding.
1157 ;; Fixme: these aren't used now -- should they be?
1158 (setq standard-translation-table-for-decode nil)
1159
1160 (setq standard-translation-table-for-encode nil)
1161
1162 ;; Fixme: should this be retained?  I guess it could be useful for
1163 ;; non-unified charsets.
1164 (defvar translation-table-for-input nil
1165   "If non-nil, a char table used to translate characters from input methods.
1166 \(Currently only used by Quail.)")
1167 \f
1168 ;;; Make fundamental coding systems.
1169
1170 ;; The coding system `no-conversion' is already defined in coding.c as
1171 ;; below:
1172 ;;
1173 ;; (define-coding-system 'no-conversion
1174 ;;   "Do no conversion."
1175 ;;   :coding-type 'raw-text
1176 ;;   :mnemonic ?=)
1177
1178 (define-coding-system-alias 'binary 'no-conversion)
1179
1180 (define-coding-system 'raw-text
1181   "Raw text, which means text contains random 8-bit codes.
1182 Encoding text with this coding system produces the actual byte
1183 sequence of the text in buffers and strings.  An exception is made for
1184 eight-bit-control characters.  Each of them is encoded into a single
1185 byte.
1186
1187 When you visit a file with this coding, the file is read into a
1188 unibyte buffer as is (except for EOL format), thus each byte of a file
1189 is treated as a character."
1190   :coding-type 'raw-text
1191   :for-unibyte t
1192   :mnemonic ?t)
1193
1194 (define-coding-system 'no-conversion-multibyte
1195   "Like `no-conversion' but don't read a file into a unibyte buffer."
1196   :coding-type 'raw-text
1197   :eol-type 'unix
1198   :mnemonic ?=)
1199
1200 (define-coding-system 'undecided
1201   "No conversion on encoding, automatic conversion on decoding."
1202   :coding-type 'undecided
1203   :mnemonic ?-
1204   :charset-list '(ascii))
1205
1206 (define-coding-system-alias 'unix 'undecided-unix)
1207 (define-coding-system-alias 'dos 'undecided-dos)
1208 (define-coding-system-alias 'mac 'undecided-mac)
1209
1210 (define-coding-system 'iso-latin-1
1211   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1212   :coding-type 'charset
1213   :mnemonic ?1
1214   :charset-list '(iso-8859-1)
1215   :mime-charset 'iso-8859-1)
1216
1217 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1218 (define-coding-system-alias 'latin-1 'iso-latin-1)
1219
1220 ;; Coding systems not specific to each language environment.
1221
1222 (define-coding-system 'emacs-mule
1223  "Emacs 21 internal format used in buffer and string."
1224  :coding-type 'emacs-mule
1225  :charset-list 'emacs-mule
1226  :mnemonic ?M)
1227
1228 (define-coding-system 'utf-8
1229   "UTF-8."
1230   :coding-type 'utf-8
1231   :mnemonic ?U
1232   :charset-list '(unicode)
1233   :mime-charset 'utf-8)
1234
1235 (define-coding-system-alias 'mule-utf-8 'utf-8)
1236
1237 (define-coding-system 'utf-8-emacs
1238   "Support for all Emacs characters (including non-Unicode characters)."
1239   :coding-type 'utf-8
1240   :mnemonic ?U
1241   :charset-list '(emacs))
1242
1243 (define-coding-system 'utf-16le
1244   "UTF-16LE (little endian, no signature (BOM))."
1245   :coding-type 'utf-16
1246   :mnemonic ?U
1247   :charset-list '(unicode)
1248   :endian 'little
1249   :mime-text-unsuitable t
1250   :mime-charset 'utf-16le)
1251
1252 (define-coding-system 'utf-16be
1253   "UTF-16BE (big endian, no signature (BOM))."
1254   :coding-type 'utf-16
1255   :mnemonic ?U
1256   :charset-list '(unicode)
1257   :endian 'big
1258   :mime-text-unsuitable t
1259   :mime-charset 'utf-16be)
1260
1261 (define-coding-system 'utf-16le-with-signature
1262   "UTF-16 (little endian, with signature (BOM))."
1263   :coding-type 'utf-16
1264   :mnemonic ?U
1265   :charset-list '(unicode)
1266   :bom t
1267   :endian 'little
1268   :mime-text-unsuitable t
1269   :mime-charset 'utf-16)
1270
1271 (define-coding-system 'utf-16be-with-signature
1272   "UTF-16 (big endian, with signature)."
1273   :coding-type 'utf-16
1274   :mnemonic ?U
1275   :charset-list '(unicode)
1276   :bom t
1277   :endian 'big
1278   :mime-text-unsuitable t
1279   :mime-charset 'utf-16)
1280
1281 (define-coding-system 'utf-16
1282   "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1283   :coding-type 'utf-16
1284   :mnemonic ?U
1285   :charset-list '(unicode)
1286   :bom '(utf-16le-with-signature . utf-16be-with-signature)
1287   :endian 'big
1288   :mime-text-unsuitable t
1289   :mime-charset 'utf-16)
1290
1291 ;; Backwards compatibility (old names, also used by Mule-UCS).  We
1292 ;; prefer the MIME names.
1293 (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1294 (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1295
1296
1297 (define-coding-system 'iso-2022-7bit
1298   "ISO 2022 based 7-bit encoding using only G0."
1299   :coding-type 'iso-2022
1300   :mnemonic ?J
1301   :charset-list 'iso-2022
1302   :designation [(ascii t) nil nil nil]
1303   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1304
1305 (define-coding-system 'iso-2022-7bit-ss2
1306   "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1307   :coding-type 'iso-2022
1308   :mnemonic ?$
1309   :charset-list 'iso-2022
1310   :designation [(ascii 94) nil (nil 96) nil]
1311   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1312                  designation single-shift composition))
1313
1314 (define-coding-system 'iso-2022-7bit-lock
1315   "ISO-2022 coding system using Locking-Shift for 96-charset."
1316   :coding-type 'iso-2022
1317   :mnemonic ?&
1318   :charset-list 'iso-2022
1319   :designation [(ascii 94) (nil 96) nil nil]
1320   :flags '(ascii-at-eol ascii-at-cntl 7-bit
1321                         designation locking-shift composition))
1322
1323 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1324
1325 (define-coding-system 'iso-2022-7bit-lock-ss2
1326   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1327   :coding-type 'iso-2022
1328   :mnemonic ?i
1329   :charset-list '(ascii
1330                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1331                   korean-ksc5601
1332                   chinese-gb2312
1333                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1334                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1335                   chinese-cns11643-7)
1336   :designation [(ascii 94)
1337                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1338                 (nil chinese-cns11643-2)
1339                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1340                      chinese-cns11643-6 chinese-cns11643-7)]
1341   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1342                  single-shift init-bol))
1343
1344 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1345
1346 (define-coding-system 'iso-2022-8bit-ss2
1347   "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1348   :coding-type 'iso-2022
1349   :mnemonic ?@
1350   :charset-list 'iso-2022
1351   :designation [(ascii 94) nil (nil 96) nil]
1352   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1353
1354 (define-coding-system 'compound-text
1355   "Compound text based generic encoding for decoding unknown messages.
1356
1357 This coding system does not support extended segments of CTEXT."
1358   :coding-type 'iso-2022
1359   :mnemonic ?x
1360   :charset-list 'iso-2022
1361   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1362   :flags '(ascii-at-eol ascii-at-cntl
1363                         designation locking-shift single-shift composition)
1364   ;; Fixme: this isn't a valid MIME charset and has to be
1365   ;; special-cased elsewhere  -- fx
1366   :mime-charset 'x-ctext)
1367
1368 (define-coding-system-alias  'x-ctext 'compound-text)
1369 (define-coding-system-alias  'ctext 'compound-text)
1370
1371 ;; Same as compound-text, but doesn't produce composition escape
1372 ;; sequences.  Used in post-read and pre-write conversions of
1373 ;; compound-text-with-extensions, see mule.el.  Note that this should
1374 ;; not have a mime-charset property, to prevent it from showing up
1375 ;; close to the beginning of coding systems ordered by priority.
1376 (define-coding-system 'ctext-no-compositions
1377  "Compound text based generic encoding for decoding unknown messages.
1378
1379 Like `compound-text', but does not produce escape sequences for compositions."
1380   :coding-type 'iso-2022
1381   :mnemonic ?x
1382   :charset-list 'iso-2022
1383   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1384   :flags '(ascii-at-eol ascii-at-cntl
1385                         designation locking-shift single-shift))
1386
1387 (define-coding-system 'compound-text-with-extensions
1388  "Compound text encoding with ICCCM Extended Segment extensions.
1389
1390 See the variable `ctext-non-standard-encodings-alist' for the
1391 detail about how extended segments are handled.
1392
1393 This coding system should be used only for X selections.  It is inappropriate
1394 for decoding and encoding files, process I/O, etc."
1395   :coding-type 'iso-2022
1396   :mnemonic ?x
1397   :charset-list 'iso-2022
1398   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1399   :flags '(ascii-at-eol ascii-at-cntl
1400                         designation locking-shift single-shift)
1401   :post-read-conversion 'ctext-post-read-conversion
1402   :pre-write-conversion 'ctext-pre-write-conversion)
1403
1404 (define-coding-system-alias
1405   'x-ctext-with-extensions 'compound-text-with-extensions)
1406 (define-coding-system-alias
1407   'ctext-with-extensions 'compound-text-with-extensions)
1408
1409 (define-coding-system 'us-ascii
1410   "Encode ASCII as-is and encode non-ASCII characters to `?'."
1411   :coding-type 'charset
1412   :mnemonic ?-
1413   :charset-list '(ascii)
1414   :default-char ??
1415   :mime-charset 'us-ascii)
1416
1417 (define-coding-system-alias 'iso-safe 'us-ascii)
1418
1419 (define-coding-system 'utf-7
1420   "UTF-7 encoding of Unicode (RFC 2152)."
1421   :coding-type 'utf-8
1422   :mnemonic ?U
1423   :mime-charset 'utf-7
1424   :charset-list '(unicode)
1425   :pre-write-conversion 'utf-7-pre-write-conversion
1426   :post-read-conversion 'utf-7-post-read-conversion)
1427
1428 ;; Use us-ascii for terminal output if some other coding system is not
1429 ;; specified explicitly.
1430 (set-safe-terminal-coding-system-internal 'us-ascii)
1431
1432 ;; The other coding-systems are defined in each language specific
1433 ;; files under lisp/language.
1434
1435 ;; Normally, set coding system to `undecided' before reading a file.
1436 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1437 ;; but we regard them as containing multibyte characters.
1438 ;; Tar files are not decoded at all, but we treat them as raw bytes.
1439
1440 (setq file-coding-system-alist
1441       '(("\\.elc\\'" . utf-8-emacs)
1442         ("\\.utf\\(-8\\)?\\'" . utf-8)
1443         ;; This is the defined default for XML documents.  It may be
1444         ;; overridden by a charset specification in the header.  That
1445         ;; should be grokked by the auto-coding mechanism, but rms
1446         ;; vetoed that.  -- fx
1447         ("\\.xml\\'" . utf-8)
1448         ;; We use raw-text for reading loaddefs.el so that if it
1449         ;; happens to have DOS or Mac EOLs, they are converted to
1450         ;; newlines.  This is required to make the special treatment
1451         ;; of the "\ newline" combination in loaddefs.el, which marks
1452         ;; the beginning of a doc string, work.
1453         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1454         ("\\.tar\\'" . (no-conversion . no-conversion))
1455         ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1456         ("" . (undecided . nil))))
1457
1458 \f
1459 ;;; Setting coding categories and their priorities.
1460
1461 ;; This setting is just to read an Emacs Lisp source files which
1462 ;; contain multilingual text while dumping Emacs.  More appropriate
1463 ;; values are set by the command `set-language-environment' for each
1464 ;; language environment.
1465
1466 (set-coding-system-priority
1467  'iso-latin-1
1468  'utf-8
1469  'iso-2022-7bit
1470  )
1471
1472 \f
1473 ;;; Miscellaneous settings.
1474
1475 ;; Make all multibyte characters self-insert.
1476 (set-char-table-range (nth 1 global-map)
1477                       (cons 128 (max-char))
1478                       'self-insert-command)
1479
1480 (aset latin-extra-code-table ?\221 t)
1481 (aset latin-extra-code-table ?\222 t)
1482 (aset latin-extra-code-table ?\223 t)
1483 (aset latin-extra-code-table ?\224 t)
1484 (aset latin-extra-code-table ?\225 t)
1485 (aset latin-extra-code-table ?\226 t)
1486
1487 ;; Move least specific charsets to end of priority list
1488
1489 (apply #'set-charset-priority
1490        (delq 'unicode (delq 'emacs (charset-priority-list))))
1491
1492 ;; The old code-pages library is obsoleted by coding systems based on
1493 ;; the charsets defined in this file but might be required by user
1494 ;; code.
1495 (provide 'code-pages)
1496
1497 ;; Local variables:
1498 ;; no-byte-compile: t
1499 ;; End:
1500
1501 ;;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
1502 ;;; mule-conf.el ends here