lisp/international/mule-conf.el

   1 ;;; mule-conf.el --- configure multilingual environment
   2
   3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
   4 ;; Licensed to the Free Software Foundation.
   5 ;; Copyright (C) 2001, 2002
   6 ;;   National Institute of Advanced Industrial Science and Technology (AIST)
   7 ;;   Registration Number H13PRO009
   8
   9 ;; Keywords: mule, multilingual, character set, coding system
  10
  11 ;; This file is part of GNU Emacs.
  12
  13 ;; GNU Emacs is free software; you can redistribute it and/or modify
  14 ;; it under the terms of the GNU General Public License as published by
  15 ;; the Free Software Foundation; either version 2, or (at your option)
  16 ;; any later version.
  17
  18 ;; GNU Emacs is distributed in the hope that it will be useful,
  19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 ;; GNU General Public License for more details.
  22
  23 ;; You should have received a copy of the GNU General Public License
  24 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  26 ;; Boston, MA 02111-1307, USA.
  27
  28 ;;; Commentary:
  29
  30 ;; Don't byte-compile this file.
  31
  32 ;;; Code:
  33
  34 ;;; Definitions of character sets.
  35
  36 ;; The charsets `ascii' and `unicoe' are aleady defined in charset.c
  37 ;; as below:
  38 ;;
  39 ;; (define-charset 'ascii
  40 ;;   ""
  41 ;;   :dimension 1
  42 ;;   :code-space [0 127]
  43 ;;   :iso-final-char ?A
  44 ;;   :ascii-compatible-p t
  45 ;;   :emacs-mule-id 0
  46 ;;   :code-offset 0)
  47 ;;
  48 ;; (define-charset 'unicode
  49 ;;   ""
  50 ;;   :dimension 3
  51 ;;   :code-space [0 255 0 255 0 16]
  52 ;;   :ascii-compatible-p t
  53 ;;   :code-offset 0)
  54 ;;
  55 ;; We now set :docstring, :short-name, and :long-name properties.
  56
  57 (put-charset-property
  58  'ascii :docstring "ASCII (ISO646 IRV)")
  59 (put-charset-property
  60  'ascii :short-name "ASCII")
  61 (put-charset-property
  62  'ascii :long-name "ASCII (ISO646 IRV)")
  63 (put-charset-property
  64  'unicode :docstring "Unicode (ISO10646)")
  65 (put-charset-property
  66  'unicode :short-name "Unicode")
  67 (put-charset-property
  68  'unicode :long-name "Unicode (ISO10646)")
  69
  70 (define-charset-alias 'ucs 'unicode)
  71
  72 (define-charset 'emacs
  73   "Full Emacs characters."
  74   :ascii-compatible-p t
  75   :code-space [ 0 255 0 255 0 63 ]
  76   :code-offset 0
  77   :supplementary-p t)
  78
  79 (define-charset 'iso-8859-1
  80   "Laint-1 (ISO/IEC 8859-1)"
  81   :short-name "Latin-1"
  82   :ascii-compatible-p t
  83   :code-space [0 255]
  84   :code-offset 0)
  85
  86 (define-charset 'latin-iso8859-1
  87   "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
  88   :short-name "RHP of Latin-1"
  89   :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
  90   :iso-final-char ?A
  91   :emacs-mule-id 129
  92   :code-space [32 127]
  93   :code-offset 160)
  94
  95 (define-charset 'eight-bit-control
  96   "8-bit control code (0x80..0x9F)"
  97   :short-name "8-bit control code"
  98   :code-space [128 159]
  99   :code-offset 128)
 100
 101 (define-charset 'eight-bit-graphic
 102   "8-bit graphic code (0xA0..0xFF)"
 103   :short-name "8-bit graphic code"
 104   :code-space [160 255]
 105   :code-offset 160)
 106
 107 (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
 108                                                  iso-ir iso-final
 109                                                  emacs-mule-id map)
 110   "For internal use only."
 111   `(progn
 112      (define-charset ,symbol
 113        ,name
 114        :short-name ,nickname
 115        :long-name ,name
 116        :ascii-compatible-p t
 117        :code-space [0 255]
 118        :map ,map)
 119      (if ,iso-symbol
 120          (define-charset ,iso-symbol
 121            (if ,iso-ir
 122                (format "Right-Hand Part of %s (%s): ISO-IR-%d"
 123                        ,name ,nickname ,iso-ir)
 124              (format "Right-Hand Part of %s (%s)" ,name ,nickname))
 125            :short-name (format "RHP of %s" ,name)
 126            :long-name (format "RHP of %s (%s)" ,name ,nickname)
 127            :iso-final-char ,iso-final
 128            :emacs-mule-id ,emacs-mule-id
 129            :code-space [32 127]
 130            :parents (list (cons ,symbol 128))))))
 131
 132 (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
 133   "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
 134
 135 (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
 136   "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
 137
 138 (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
 139   "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
 140
 141 (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
 142   "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
 143
 144 (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
 145   "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
 146
 147 (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
 148   "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
 149
 150 (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
 151   "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
 152
 153 (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
 154   "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
 155
 156 (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
 157   "ISO/IEC 8859/13" "Latin-7" nil nil nil "8859-13")
 158
 159 (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
 160   "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
 161
 162 (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
 163   "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
 164
 165 (define-charset 'thai-tis620
 166   "TIS620.2533"
 167   :short-name "TIS620.2533"
 168   :iso-final-char ?T
 169   :emacs-mule-id 133
 170   :code-space [32 127]
 171   :code-offset #x0E00)
 172
 173 (define-charset 'tis620-2533
 174   "TIS620.2533"
 175   :short-name "TIS620.2533"
 176   :ascii-compatible-p t
 177   :code-space [0 255]
 178   :parents '(ascii eight-bit-control (thai-tis620 . -128)))
 179
 180 (define-charset 'jisx0201
 181   "JISX0201"
 182   :short-name "JISX0201"
 183   :long-name "JISX0201"
 184   :code-space [33 254]
 185   :map "jisx0201")
 186
 187 (define-charset 'latin-jisx0201
 188   "Roman Part of JISX0201.1976"
 189   :short-name "JISX0201 Roman"
 190   :long-name "Japanese Roman (JISX0201.1976)"
 191   :iso-final-char ?J
 192   :emacs-mule-id  138
 193   :code-space [33 126]
 194   :parents '(jisx0201))
 195
 196 (define-charset 'katakana-jisx0201
 197   "Katakana Part of JISX0201.1976"
 198   :short-name "JISX0201 Katakana"
 199   :long-name "Japanese Katakana (JISX0201.1976)"
 200   :iso-final-char ?I
 201   :emacs-mule-id  137
 202   :code-space [33 126]
 203   :parents '((jisx0201 . #x80)))
 204
 205 (define-charset 'chinese-gb2312
 206   "GB2312 Chinese simplified: ISO-IR-58"
 207   :short-name "GB2312"
 208   :long-name "GB2312: ISO-IR-58"
 209   :iso-final-char ?A
 210   :emacs-mule-id 145
 211   :code-space [33 126 33 126]
 212   :code-offset #x110000
 213   :unify-map "gb2312-1980")
 214
 215 (define-charset 'chinese-gbk
 216   "GBK Chinese simpliefied."
 217   :short-name "GBK"
 218   :long-name "GBK"
 219   :code-space [#x40 #xFE #x81 #xFE]
 220   :code-offset #x150000
 221   :unify-map "gbk")
 222
 223 (define-charset 'chinese-cns11643-1
 224   "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
 225   :short-name "CNS11643-1"
 226   :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
 227   :iso-final-char ?G
 228   :emacs-mule-id  149
 229   :code-space [33 126 33 126]
 230   :code-offset #x114000
 231   :unify-map "cns11643-1")
 232
 233 (define-charset 'chinese-cns11643-2
 234   "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
 235   :short-name "CNS11643-2"
 236   :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
 237   :iso-final-char ?H
 238   :emacs-mule-id  150
 239   :code-space [33 126 33 126]
 240   :code-offset #x118000
 241   :unify-map "cns11643-2")
 242
 243 (define-charset 'chinese-cns11643-3
 244   "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
 245   :short-name  "CNS11643-3"
 246   :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
 247   :iso-final-char ?I
 248   :code-space [33 126 33 126]
 249   :emacs-mule-id  246
 250   :code-offset #x11C000)
 251
 252 (define-charset 'chinese-cns11643-4
 253   "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
 254   :short-name  "CNS11643-4"
 255   :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
 256   :iso-final-char ?J
 257   :emacs-mule-id  247
 258   :code-space [33 126 33 126]
 259   :code-offset #x120000)
 260
 261 (define-charset 'chinese-cns11643-5
 262   "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
 263   :short-name  "CNS11643-5"
 264   :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
 265   :iso-final-char ?K
 266   :emacs-mule-id  248
 267   :code-space [33 126 33 126]
 268   :code-offset #x124000)
 269
 270 (define-charset 'chinese-cns11643-6
 271   "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
 272   :short-name  "CNS11643-6"
 273   :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
 274   :iso-final-char ?L
 275   :emacs-mule-id 249
 276   :code-space [33 126 33 126]
 277   :code-offset #x128000)
 278
 279 (define-charset 'chinese-cns11643-7
 280   "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
 281   :short-name  "CNS11643-7"
 282   :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
 283   :iso-final-char ?M
 284   :emacs-mule-id 250
 285   :code-space [33 126 33 126]
 286   :code-offset #x12C000)
 287
 288 (define-charset 'big5
 289   "Big5 (Chinese traditional)"
 290   :short-name "Big5"
 291   :long-name "Big5"
 292   :code-space [#x40 #xFE #xA1 #xFE]
 293   :code-offset #x130000
 294   :unify-map "big5")
 295
 296 (define-charset 'chinese-big5-1
 297   "Frequentry used part (A141-C67E) of Big5 (Chinese traditional)"
 298   :short-name "Big5 (Level-1)"
 299   :long-name "Big5 (Level-1) A141-C67F"
 300   :iso-final-char ?0
 301   :emacs-mule-id 152
 302   :code-space [#x21 #x7E #x21 #x7E]
 303   :code-offset #x135000
 304   :unify-map "big5-1")
 305
 306 (define-charset 'chinese-big5-2
 307   "Less frequentry used part (C940-FEFE) of Big5 (Chinese traditional)"
 308   :short-name "Big5 (Level-2)"
 309   :long-name "Big5 (Level-2) C940-FEFE"
 310   :iso-final-char ?1
 311   :emacs-mule-id  153
 312   :code-space [#x21 #x7E #x21 #x7E]
 313   :code-offset #x137800
 314   :unify-map "big5-2")
 315
 316 (define-charset 'japanese-jisx0208
 317   "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
 318   :short-name "JISX0208"
 319   :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
 320   :iso-final-char ?B
 321   :emacs-mule-id 146
 322   :code-space [33 126 33 126]
 323   :code-offset #x140000
 324   :unify-map "jisx0208-1990")
 325
 326 (define-charset 'japanese-jisx0208-1978
 327   "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
 328   :short-name "JISX0208.1978"
 329   :long-name  "JISX0208.1978 (Japanese): ISO-IR-42"
 330   :iso-final-char ?@
 331   :emacs-mule-id  144
 332   :code-space [33 126 33 126]
 333   :code-offset #x144000
 334   :unify-map "jisx0208-1978")
 335
 336 (define-charset 'japanese-jisx0212
 337   "JISX0212 Japanese supplement: ISO-IR-159"
 338   :short-name "JISX0212"
 339   :long-name "JISX0212 (Japanese): ISO-IR-159"
 340   :iso-final-char ?D
 341   :emacs-mule-id 148
 342   :code-space [33 126 33 126]
 343   :code-offset #x148000
 344   :unify-map "jisx0212-1990")
 345
 346 (define-charset 'japanese-jisx0213-1
 347   "JISX0213 Plane 1 (Japanese)"
 348   :short-name "JISX0213-1"
 349   :long-name "JISX0213-1"
 350   :iso-final-char ?O
 351   :emacs-mule-id  151
 352   :code-space [33 126 33 126]
 353   :code-offset #x14C000)
 354
 355 (define-charset 'japanese-jisx0213-2
 356   "JISX0213 Plane 2 (Japanese)"
 357   :short-name "JISX0213-2"
 358   :long-name "JISX0213-2"
 359   :iso-final-char ?P
 360   :emacs-mule-id 254
 361   :code-space [33 126 33 126]
 362   :code-offset #x150000)
 363
 364 (define-charset 'korean-ksc5601
 365   "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
 366   :short-name "KSC5601"
 367   :long-name "KSC5601 (Korean): ISO-IR-149"
 368   :iso-final-char ?C
 369   :emacs-mule-id 147
 370   :code-space [33 126 33 126]
 371   :map "ksc5601-1987")
 372
 373 (define-charset 'chinese-sisheng
 374   "SiSheng characters for PinYin/ZhuYin"
 375   :short-name "SiSheng"
 376   :long-name "SiSheng (PinYin/ZhuYin)"
 377   :iso-final-char ?0
 378   :emacs-mule-id 160
 379   :code-space [33 126]
 380   :code-offset #x200000)
 381
 382 (define-charset 'ipa
 383   "IPA (International Phonetic Association)"
 384   :short-name "IPA"
 385   :long-name "IPA"
 386   :iso-final-char ?0
 387   :emacs-mule-id  161
 388   :code-space [32 127]
 389   :code-offset #x200080)
 390
 391 (define-charset 'viscii
 392   "VISCII1.1"
 393   :short-name "VISCII"
 394   :long-name "VISCII 1.1"
 395   :code-space [0 255]
 396   :map "viscii")
 397
 398 (define-charset 'vietnamese-viscii-lower
 399   "VISCII1.1 lower-case"
 400   :short-name "VISCII lower"
 401   :long-name "VISCII lower-case"
 402   :iso-final-char ?1
 403   :emacs-mule-id  162
 404   :code-space [32 127]
 405   :map "viscii-lower")
 406
 407 (define-charset 'vietnamese-viscii-upper
 408   "VISCII1.1 upper-case"
 409   :short-name "VISCII upper"
 410   :long-name "VISCII upper-case"
 411   :iso-final-char ?2
 412   :emacs-mule-id  163
 413   :code-space [32 127]
 414   :map "viscii-upper")
 415
 416 (define-charset 'vscii
 417   "VSCII1.1"
 418   :short-name "VSCII"
 419   :long-name "VSCII"
 420   :code-space [0 255]
 421   :map "vscii")
 422
 423 (define-charset 'koi8-r
 424   "KOI8-R"
 425   :short-name "KOI8-R"
 426   :long-name "KOI8-R"
 427   :ascii-compatible-p t
 428   :code-space [0 255]
 429   :map "koi8-r")
 430
 431 (define-charset-alias 'koi8 'koi8-r)
 432
 433 (define-charset 'alternativnyj
 434   "ALTERNATIVNYJ"
 435   :short-name "alternativnyj"
 436   :long-name "alternativnyj"
 437   :ascii-compatible-p t
 438   :code-space [0 255]
 439   :map "ibm866")
 440
 441 ;; For Arabic, we need three different types of character sets.
 442 ;; Digits are of direction left-to-right and of width 1-column.
 443 ;; Others are of direction right-to-left and of width 1-column or
 444 ;; 2-column.
 445 (define-charset 'arabic-digit
 446   "Arabic digit"
 447   :short-name "Arabic digit"
 448   :long-name "Arabic digit"
 449   :iso-final-char ?2
 450   :emacs-mule-id 164
 451   :code-space [34 42]
 452   :code-offset #x0600)
 453
 454 (define-charset 'arabic-1-column
 455   "Arabic 1-column"
 456   :short-name "Arabic 1-col"
 457   :long-name "Arabic 1-column"
 458   :iso-final-char ?3
 459   :emacs-mule-id 165
 460   :code-space [33 126]
 461   :code-offset #x200100)
 462
 463 (define-charset 'arabic-2-column
 464   "Arabic 2-column"
 465   :short-name "Arabic 2-col"
 466   :long-name "Arabic 2-column"
 467   :iso-final-char ?4
 468   :emacs-mule-id 224
 469   :code-space [33 126]
 470   :code-offset #x200180)
 471
 472 ;; Lao script.
 473 ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
 474 (define-charset 'lao
 475   "Lao characters (ISO10646 0E81..0EDF)"
 476   :short-name "Lao"
 477   :long-name "Lao"
 478   :iso-final-char ?1
 479   :emacs-mule-id 167
 480   :code-space [33 126]
 481   :code-offset #x0E81)
 482
 483 (define-charset 'mule-lao
 484   "Lao characters (ISO10646 0E81..0EDF)"
 485   :short-name "Lao"
 486   :long-name "Lao"
 487   :code-space [0 255]
 488   :parents '(ascii eight-bit-control (lao . -128)))
 489
 490
 491 ;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
 492 ;; not assigned.  They are automatically converted to each Indian
 493 ;; script which IS-13194 supports.
 494
 495 (define-charset 'indian-is13194
 496   "Generic Indian charset for data exchange with IS 13194"
 497   :short-name "IS 13194"
 498   :long-name "Indian IS 13194"
 499   :iso-final-char ?5
 500   :emacs-mule-id 225
 501   :code-space [33 126]
 502   :code-offset #x180000)
 503
 504 (define-charset  'indian-glyph
 505   "Glyphs for Indian characters."
 506   :short-name "Indian glyph"
 507   :long-name "Indian glyph"
 508   :iso-final-char ?4
 509   :emacs-mule-id 240
 510   :code-space [32 127 32 127]
 511   :code-offset #x180100)
 512
 513 ;; Actual Glyph for 1-column width.
 514 (define-charset 'indian-1-column
 515   "Indian charset for 1-column width glyphs"
 516   :short-name "Indian 1-col"
 517   :long-name "Indian 1 Column"
 518   :iso-final-char ?6
 519   :emacs-mule-id  240
 520   :code-space [33 126 33 126]
 521   :code-offset #x184000)
 522
 523 ;; Actual Glyph for 2-column width.
 524 (define-charset 'indian-2-column
 525   "Indian charset for 2-column width glyphs"
 526   :short-name "Indian 2-col"
 527   :long-name "Indian 2 Column"
 528   :iso-final-char ?5
 529   :emacs-mule-id  251
 530   :code-space [33 126 33 126]
 531   :parents '(indian-1-column))
 532
 533 (define-charset 'tibetan
 534   "Tibetan characters"
 535   :iso-final-char ?7
 536   :short-name "Tibetan 2-col"
 537   :long-name "Tibetan 2 column"
 538   :iso-final-char ?7
 539   :emacs-mule-id 252
 540   :code-space [33 126 33 126]
 541   :code-offset #x190000)
 542
 543 (define-charset 'tibetan-1-column
 544   "Tibetan 1 column glyph"
 545   :short-name "Tibetan 1-col"
 546   :long-name "Tibetan 1 column"
 547   :iso-final-char ?8
 548   :emacs-mule-id 241
 549   :code-space [33 126 33 37]
 550   :parents '(tibetan))
 551
 552 ;; Subsets of Unicode.
 553 (define-charset 'mule-unicode-2500-33ff
 554   "Unicode characters of the range U+2500..U+33FF."
 555   :short-name "Unicode subset 2"
 556   :long-name "Unicode subset (U+2500..U+33FF)"
 557   :iso-final-char ?2
 558   :emacs-mule-id 242
 559   :code-space [#x20 #x7f #x20 #x47]
 560   :code-offset #x2500)
 561
 562 (define-charset 'mule-unicode-e000-ffff
 563   "Unicode characters of the range U+E000..U+FFFF."
 564   :short-name "Unicode subset 3"
 565   :long-name "Unicode subset (U+E000+FFFF)"
 566   :iso-final-char ?3
 567   :emacs-mule-id 243
 568   :code-space [#x20 #x7F #x20 #x75]
 569   :code-offset #xE000)
 570
 571 (define-charset 'mule-unicode-0100-24ff
 572   "Unicode characters of the range U+0100..U+24FF."
 573   :short-name "Unicode subset"
 574   :long-name "Unicode subset (U+0100..U+24FF)"
 575   :iso-final-char ?1
 576   :emacs-mule-id 244
 577   :code-space [#x20 #x7F #x20 #x7F]
 578   :code-offset #x100)
 579
 580 (define-charset 'ethiopic
 581   "Ethiopic characters for Amahric and Tigrigna."
 582   :short-name "Ethiopic"
 583   :long-name "Ethiopic characters"
 584   :iso-final-char ?3
 585   :emacs-mule-id  245
 586   :code-space [33 126 33 126]
 587   :code-offset #x1A0000)
 588
 589 (define-charset 'mac-roman
 590   "Mac Roman charset"
 591   :short-name "Mac Roman"
 592   :long-name "Mac Roman"
 593   :ascii-compatible-p t
 594   :code-space [0 255]
 595   :map "mac-roman")
 596
 597 (unify-charset 'chinese-gb2312)
 598 (unify-charset 'chinese-gbk)
 599 (unify-charset 'chinese-cns11643-1)
 600 (unify-charset 'chinese-cns11643-2)
 601 (unify-charset 'big5)
 602 (unify-charset 'chinese-big5-1)
 603 (unify-charset 'chinese-big5-2)
 604 \f
 605 ;; These are tables for translating characters on decoding and
 606 ;; encoding.
 607 (setq standard-translation-table-for-decode nil)
 608
 609 (setq standard-translation-table-for-encode nil)
 610
 611 (defvar translation-table-for-input nil
 612   "If non-nil, a char table used to translate characters from input methods.
 613 \(Currently only used by Quail.)")
 614 \f
 615 ;;; Make fundamental coding systems.
 616
 617 ;; The coding system `no-conversion' is already defined in coding.c as
 618 ;; below:
 619 ;;
 620 ;; (define-coding-system 'no-conversion
 621 ;;   "Do no conversion."
 622 ;;   :coding-type 'raw-text
 623 ;;   :mnemonic ?=)
 624
 625 (define-coding-system 'raw-text
 626  "Raw text, which means text contains random 8-bit codes.
 627 Encoding text with this coding system produces the actual byte
 628 sequence of the text in buffers and strings.  An exception is made for
 629 eight-bit-control characters.  Each of them is encoded into a single
 630 byte.
 631
 632 When you visit a file with this coding, the file is read into a
 633 unibyte buffer as is (except for EOL format), thus each byte of a file
 634 is treated as a character."
 635  :coding-type 'raw-text
 636  :mnemonic ?t)
 637
 638 (define-coding-system 'undecided
 639   "No conversion on encoding, automatic conversion on decoding"
 640   :coding-type 'undecided
 641   :mnemonic ?-
 642   :charset-list '(ascii))
 643
 644 (define-coding-system-alias 'unix 'undecided-unix)
 645 (define-coding-system-alias 'dos 'undecided-dos)
 646 (define-coding-system-alias 'mac 'undecided-mac)
 647
 648 (define-coding-system 'iso-latin-1
 649   "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
 650   :coding-type 'iso-2022
 651   :mnemonic ?1
 652   :charset-list '(ascii latin-iso8859-1)
 653   :designation [ascii latin-iso8859-1 nil nil]
 654   :mime-charset 'iso-8859-1)
 655
 656 (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
 657 (define-coding-system-alias 'latin-1 'iso-latin-1)
 658
 659 ;; Coding systems not specific to each language environment.
 660
 661 (define-coding-system 'emacs-mule
 662  "Emacs 21 internal format used in buffer and string."
 663  :coding-type 'emacs-mule
 664  :mnemonic ?M)
 665
 666 (define-coding-system 'utf-8
 667   "UTF-8."
 668   :coding-type 'utf-8
 669   :mnemonic ?U
 670   :charset-list '(unicode))
 671
 672 (define-coding-system-alias 'mule-utf-8 'utf-8)
 673
 674 (define-coding-system 'utf-8-emacs
 675   "UTF-8 will full support for Emacs characters."
 676   :coding-type 'utf-8
 677   :mnemonic ?U
 678   :charset-list '(emacs))
 679
 680 (define-coding-system 'utf-16
 681   "UTF-16"
 682   :coding-type 'utf-16
 683   :mnemonic ?U
 684   :charset-list '(unicode))
 685
 686 (define-coding-system 'utf-16-le-nosig
 687   "UTF-16, little endian, no signature"
 688   :coding-type 'utf-16
 689   :mnemonic ?U
 690   :charset-list '(unicode)
 691   :endian 'little)
 692
 693 (define-coding-system 'utf-16-be-nosig
 694   "UTF-16, big endian, no signature"
 695   :coding-type 'utf-16
 696   :mnemonic ?U
 697   :charset-list '(unicode)
 698   :endian 'big)
 699
 700 (define-coding-system 'utf-16-le
 701   "UTF-16, little endian, with signature"
 702   :coding-type 'utf-16
 703   :mnemonic ?U
 704   :charset-list '(unicode)
 705   :bom t
 706   :endian 'little)
 707
 708 (define-coding-system 'utf-16-be
 709   "UTF-16, big endian, with signature"
 710   :coding-type 'utf-16
 711   :mnemonic ?U
 712   :charset-list '(unicode)
 713   :bom t
 714   :endian 'big)
 715
 716 (define-coding-system 'iso-2022-7bit
 717   "ISO 2022 based 7-bit encoding using only G0"
 718   :coding-type 'iso-2022
 719   :mnemonic ?J
 720   :charset-list 'iso-2022
 721   :designation [(ascii t) nil nil nil]
 722   :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
 723
 724 (define-coding-system 'iso-2022-7bit-ss2
 725   "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
 726   :coding-type 'iso-2022
 727   :mnemonic ?$
 728   :charset-list 'iso-2022
 729   :designation [(ascii 94) nil (nil 96) nil]
 730   :flags '(short ascii-at-eol ascii-at-cntl 7-bit
 731                  designation single-shift composition))
 732
 733 (define-coding-system 'iso-2022-7bit-lock
 734   "ISO-2022 coding system using Locking-Shift for 96-charset"
 735   :coding-type 'iso-2022
 736   :mnemonic ?&
 737   :charset-list 'iso-2022
 738   :designation [(ascii 94) (nil 96) nil nil]
 739   :flags '(ascii-at-eol ascii-at-cntl 7-bit
 740                         designation locking-shift composition))
 741
 742 (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
 743
 744 (define-coding-system 'iso-2022-7bit-lock-ss2
 745   "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN"
 746   :coding-type 'iso-2022
 747   :mnemonic ?i
 748   :charset-list '(ascii
 749                   japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
 750                   korean-ksc5601
 751                   chinese-gb2312
 752                   chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
 753                   chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
 754                   chinese-cns11643-7)
 755   :designation [(ascii 94)
 756                 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
 757                 (nil chinese-cns11643-2)
 758                 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
 759                      chinese-cns11643-6 chinese-cns11643-7)]
 760   :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
 761                  single-shift init-bol))
 762
 763 (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
 764
 765 (define-coding-system 'iso-2022-8bit-ss2
 766   "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
 767   :coding-type 'iso-2022
 768   :mnemonic ?@
 769   :charset-list 'iso-2022
 770   :designation [(ascii 94) nil (nil 96) nil]
 771   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
 772
 773 (define-coding-system 'compound-text
 774   "Compound text based generic encoding for decoding unknown messages.
 775
 776 This coding system does not support ICCCM Extended Segments."
 777   :coding-type 'iso-2022
 778   :mnemonic ?x
 779   :charset-list 'iso-2022
 780   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
 781   :flags '(ascii-at-eol ascii-at-cntl
 782                         designation locking-shift single-shift composition)
 783   :mime-charset 'x-ctext)
 784
 785 (define-coding-system-alias  'x-ctext 'compound-text)
 786 (define-coding-system-alias  'ctext 'compound-text)
 787
 788 ;; Same as compound-text, but doesn't produce composition escape
 789 ;; sequences.  Used in post-read and pre-write conversions of
 790 ;; compound-text-with-extensions, see mule.el.  Note that this should
 791 ;; not have a mime-charset property, to prevent it from showing up
 792 ;; close to the beginning of coding systems ordered by priority.
 793 (define-coding-system 'ctext-no-compositions 2 ?x
 794  "Compound text based generic encoding for decoding unknown messages.
 795
 796 Like `compound-text', but does not produce escape sequences for compositions."
 797   :coding-type 'iso-2022
 798   :mnemonic ?x
 799   :charset-list 'iso-2022
 800   :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
 801   :flags '(ascii-at-eol ascii-at-cntl
 802                         designation locking-shift single-shift))
 803
 804 (define-coding-system 'compound-text-with-extensions
 805  "Compound text encoding with ICCCM Extended Segment extensions.
 806
 807 This coding system should be used only for X selections.  It is inappropriate
 808 for decoding and encoding files, process I/O, etc."
 809   :coding-type 'raw-text
 810   :mnemonic ?x
 811   :post-read-conversion 'ctext-post-read-conversion
 812   :pre-write-conversion 'ctext-pre-write-conversion)
 813
 814 (define-coding-system-alias
 815   'x-ctext-with-extensions 'compound-text-with-extensions)
 816 (define-coding-system-alias
 817   'ctext-with-extensions 'compound-text-with-extensions)
 818
 819 (define-coding-system 'us-ascii
 820   "Convert all characters but ASCII to `?'."
 821   :coding-type 'charset
 822   :mnemonic ?-
 823   :charset-list '(ascii)
 824   :default-char ??
 825   :mime-charset 'us-ascii)
 826
 827 (define-coding-system-alias 'iso-safe 'us-ascii)
 828
 829 ;; Use us-ascii for terminal output if some other coding system is not
 830 ;; specified explicitly.
 831 (set-safe-terminal-coding-system-internal 'us-ascii)
 832
 833 ;; The other coding-systems are defined in each language specific
 834 ;; files under lisp/language.
 835
 836 ;; Normally, set coding system to `undecided' before reading a file.
 837 ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
 838 ;; but we regard them as containing multibyte characters.
 839 ;; Tar files are not decoded at all, but we treat them as raw bytes.
 840
 841 (setq file-coding-system-alist
 842       '(("\\.elc\\'" . (emacs-mule . emacs-mule))
 843         ("\\.utf\\(-8\\)?\\'" . utf-8)
 844         ;; We use raw-text for reading loaddefs.el so that if it
 845         ;; happens to have DOS or Mac EOLs, they are converted to
 846         ;; newlines.  This is required to make the special treatment
 847         ;; of the "\ newline" combination in loaddefs.el, which marks
 848         ;; the beginning of a doc string, work.
 849         ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
 850         ("\\.tar\\'" . (no-conversion . no-conversion))
 851         ("" . (undecided . nil))))
 852
 853 \f
 854 ;;; Setting coding categories and their priorities.
 855
 856 ;; This setting is just to read an Emacs Lisp source files which
 857 ;; contain multilingual text while dumping Emacs.  More appropriate
 858 ;; values are set by the command `set-language-environment' for each
 859 ;; language environment.
 860
 861 (set-coding-system-priority
 862  'iso-latin-1
 863  'utf-8
 864  'iso-2022-7bit
 865  )
 866
 867 \f
 868 ;;; Miscellaneous settings.
 869
 870 ;; Make all multibyte characters self-insert.
 871 (set-char-table-range (nth 1 global-map)
 872                       (cons (make-char 'unicode 128) (max-char))
 873                       'self-insert-command)
 874
 875 (aset latin-extra-code-table ?\222 t)
 876
 877 ;;; mule-conf.el ends here