Merge from emacs-23

[bpt/emacs.git] / lisp / international / mule-conf.el
diff --git a/lisp/international/mule-conf.el b/lisp/international/mule-conf.el

index 8b78952..c0a3932 100644 (file)
--- a/lisp/international/mule-conf.el
+++ b/lisp/international/mule-conf.el
@@ -1,8 +1,8 @@
  ;;; mule-conf.el --- configure multilingual environment
  
  ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-;;   2004, 2005, 2006, 2007  Free Software Foundation, Inc.
-;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;;   2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011  Free Software Foundation, Inc.
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
  ;;   National Institute of Advanced Industrial Science and Technology (AIST)
  ;;   Registration Number H14PRO021
  ;; Copyright (C) 2003
@@ -13,10 +13,10 @@
  
  ;; This file is part of GNU Emacs.
  
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
  ;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
  
  ;; GNU Emacs is distributed in the hope that it will be useful,
  ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -24,9 +24,7 @@
  ;; GNU General Public License for more details.
  
  ;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING.  If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
  
  ;;; Commentary:
  
@@ -70,6 +68,14 @@
  ;;   :ascii-compatible-p t
  ;;   :code-offset 0)
  ;;
+;; (define-charset 'emacs
+;;   ""
+;;   :dimension 3
+;;   :code-space [0 255 0 255 0 63]
+;;   :ascii-compatible-p t
+;;   :supplementary-p t
+;;   :code-offset 0)
+;;
  ;; (define-charset 'eight-bit
  ;;   ""
  ;;   :dimension 1
@@ -96,18 +102,18 @@
   'unicode :short-name "Unicode")
  (put-charset-property
   'unicode :long-name "Unicode (ISO10646)")
-(put-charset-property 'eight-bit :docstring "Raw bytes 0-255")
+(put-charset-property
+ 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
+(put-charset-property
+ 'emacs :short-name "Emacs")
+(put-charset-property
+ 'emacs :long-name "Emacs")
+
+(put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
  (put-charset-property 'eight-bit :short-name "Raw bytes")
  
  (define-charset-alias 'ucs 'unicode)
  
-(define-charset 'emacs
-  "Full Emacs characters"
-  :ascii-compatible-p t
-  :code-space [ 0 255 0 255 0 63 ]
-  :code-offset 0
-  :supplementary-p t)
-
  (define-charset 'latin-iso8859-1
    "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
    :short-name "RHP of Latin-1"
@@ -127,12 +133,14 @@
  (define-charset 'eight-bit-control
    "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
    :short-name "Raw bytes 0x80..0x9F"
+  :supplementary-p t
    :code-space [128 159]
    :code-offset #x3FFF80)               ; see character.h
  
  (define-charset 'eight-bit-graphic
    "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
    :short-name "Raw bytes 0xA0..0xFF"
+  :supplementary-p t
    :code-space [160 255]
    :code-offset #x3FFFA0)               ; see character.h
  
@@ -240,6 +248,7 @@
    :long-name "Japanese Roman (JISX0201.1976)"
    :iso-final-char ?J
    :emacs-mule-id  138
+  :supplementary-p t
    :code-space [33 126]
    :subset '(jisx0201 33 126 0))
  
@@ -249,6 +258,7 @@
    :long-name "Japanese Katakana (JISX0201.1976)"
    :iso-final-char ?I
    :emacs-mule-id  137
+  :supplementary-p t
    :code-space [33 126]
    :subset '(jisx0201 161 254 -128))
  
@@ -357,6 +367,7 @@
    :long-name "Big5 (Level-1) A141-C67F"
    :iso-final-char ?0
    :emacs-mule-id 152
+  :supplementary-p t
    :code-space [#x21 #x7E #x21 #x7E]
    :code-offset #x135000
    :unify-map "BIG5-1")
@@ -367,6 +378,7 @@
    :long-name "Big5 (Level-2) C940-FEFE"
    :iso-final-char ?1
    :emacs-mule-id  153
+  :supplementary-p t
    :code-space [#x21 #x7E #x21 #x7E]
    :code-offset #x137800
    :unify-map "BIG5-2")
@@ -474,7 +486,19 @@
    :code-offset #x27c218                        ; ... #x280839
    :unify-map "BIG5-HKSCS")
  
-;; Fixme: Korean cp949/UHC
+(define-charset 'cp949-2-byte
+  "2-byte part of CP949"
+  :dimension 2
+  :map "CP949-2BYTE"
+  :code-space [#x41 #xFE #x81 #xFD]
+  :supplementary-p t)
+
+(define-charset 'cp949
+  "CP949 (Korean)"
+  :short-name "CP949"
+  :long-name  "CP949 (Korean)"
+  :code-space [#x00 #xFE #x00 #xFD]
+  :superset '(ascii cp949-2-byte))
  
  (define-charset 'chinese-sisheng
    "SiSheng characters for PinYin/ZhuYin"
@@ -484,6 +508,7 @@
    :emacs-mule-id 160
    :code-space [33 126]
    :unify-map "MULE-sisheng"
+  :supplementary-p t
    :code-offset #x200000)
  
  ;; A subset of the 1989 version of IPA.  It consists of the consonant
@@ -496,6 +521,7 @@
    :emacs-mule-id  161
    :unify-map "MULE-ipa"
    :code-space [32 127]
+  :supplementary-p t
    :code-offset #x200080)
  
  (define-charset 'viscii
@@ -513,6 +539,7 @@
    :emacs-mule-id  162
    :code-space [32 127]
    :code-offset #x200200
+  :supplementary-p t
    :unify-map "MULE-lviscii")
  
  (define-charset 'vietnamese-viscii-upper
@@ -523,6 +550,7 @@
    :emacs-mule-id  163
    :code-space [32 127]
    :code-offset #x200280
+  :supplementary-p t
    :unify-map "MULE-uviscii")
  
  (define-charset 'vscii
@@ -812,6 +840,7 @@
    :short-name "Arabic digit"
    :iso-final-char ?2
    :emacs-mule-id 164
+  :supplementary-p t
    :code-space [34 42]
    :code-offset #x0600)
  
@@ -821,6 +850,7 @@
    :long-name "Arabic 1-column"
    :iso-final-char ?3
    :emacs-mule-id 165
+  :supplementary-p t
    :code-space [33 126]
    :code-offset #x200100)
  
@@ -830,6 +860,7 @@
    :long-name "Arabic 2-column"
    :iso-final-char ?4
    :emacs-mule-id 224
+  :supplementary-p t
    :code-space [33 126]
    :code-offset #x200180)
  
@@ -841,6 +872,7 @@
    :short-name "Lao"
    :iso-final-char ?1
    :emacs-mule-id 167
+  :supplementary-p t
    :code-space [33 126]
    :code-offset #x0E81)
  
@@ -848,6 +880,7 @@
    "Lao characters (ISO10646 0E81..0EDF)"
    :short-name "Lao"
    :code-space [0 255]
+  :supplementary-p t
    :superset '(ascii eight-bit-control (lao . 128)))
  
  
@@ -861,6 +894,7 @@
    :long-name "Indian IS 13194"
    :iso-final-char ?5
    :emacs-mule-id 225
+  :supplementary-p t
    :code-space [33 126]
    :code-offset #x180000)
  
@@ -871,6 +905,7 @@
        (format "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
               (capitalize (symbol-name script)))
        :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
+      :supplementary-p t
        :code-space [0 255]
        :code-offset code-offset)
      (setq code-offset (+ code-offset #x100)))
@@ -881,6 +916,7 @@
        (format "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
               (capitalize (symbol-name script)))
        :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
+      :supplementary-p t
        :code-space [0 255]
        :code-offset code-offset)
      (setq code-offset (+ code-offset #x100))))
@@ -890,6 +926,7 @@
    :short-name "Indian glyph"
    :iso-final-char ?4
    :emacs-mule-id 240
+  :supplementary-p t
    :code-space [32 127 32 127]
    :code-offset #x180100)
  
@@ -900,6 +937,7 @@
    :long-name "Indian 1 Column"
    :iso-final-char ?6
    :emacs-mule-id  251
+  :supplementary-p t
    :code-space [33 126 33 126]
    :code-offset #x184000)
  
@@ -910,6 +948,7 @@
    :long-name "Indian 2 Column"
    :iso-final-char ?5
    :emacs-mule-id  251
+  :supplementary-p t
    :code-space [33 126 33 126]
    :code-offset #x184000)
  
@@ -921,6 +960,7 @@
    :iso-final-char ?7
    :emacs-mule-id 252
    :unify-map "MULE-tibetan"
+  :supplementary-p t
    :code-space [33 126 33 37]
    :code-offset #x190000)
  
@@ -930,6 +970,7 @@
    :long-name "Tibetan 1 column"
    :iso-final-char ?8
    :emacs-mule-id 241
+  :supplementary-p t
    :code-space [33 126 33 37]
    :code-offset #x190000)
  
@@ -940,6 +981,7 @@
    :long-name "Unicode subset (U+2500..U+33FF)"
    :iso-final-char ?2
    :emacs-mule-id 242
+  :supplementary-p t
    :code-space [#x20 #x7f #x20 #x47]
    :code-offset #x2500)
  
@@ -949,6 +991,7 @@
    :long-name "Unicode subset (U+E000+FFFF)"
    :iso-final-char ?3
    :emacs-mule-id 243
+  :supplementary-p t
    :code-space [#x20 #x7F #x20 #x75]
    :code-offset #xE000
    :max-code 30015)                     ; U+FFFF
@@ -959,6 +1002,7 @@
    :long-name "Unicode subset (U+0100..U+24FF)"
    :iso-final-char ?1
    :emacs-mule-id 244
+  :supplementary-p t
    :code-space [#x20 #x7F #x20 #x7F]
    :code-offset #x100)
  
@@ -992,6 +1036,7 @@
    :long-name "Ethiopic characters"
    :iso-final-char ?3
    :emacs-mule-id  245
+  :supplementary-p t
    :unify-map "MULE-ethiopic"
    :code-space [33 126 33 126]
    :code-offset #x1A0000)
@@ -1185,8 +1230,8 @@
    "Raw text, which means text contains random 8-bit codes.
  Encoding text with this coding system produces the actual byte
  sequence of the text in buffers and strings.  An exception is made for
-eight-bit-control characters.  Each of them is encoded into a single
-byte.
+characters from the `eight-bit' character set.  Each of them is encoded
+into a single byte.
  
  When you visit a file with this coding, the file is read into a
  unibyte buffer as is (except for EOL format), thus each byte of a file
@@ -1200,7 +1245,7 @@ is treated as a character."
    :coding-type 'raw-text
    :eol-type 'unix
    :mnemonic ?=)
-  
+
  (define-coding-system 'iso-latin-1
    "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
    :coding-type 'charset
@@ -1220,12 +1265,26 @@ is treated as a character."
   :mnemonic ?M)
  
  (define-coding-system 'utf-8
-  "UTF-8."
+  "UTF-8 (no signature (BOM))"
    :coding-type 'utf-8
    :mnemonic ?U
    :charset-list '(unicode)
    :mime-charset 'utf-8)
  
+(define-coding-system 'utf-8-with-signature
+  "UTF-8 (with signature (BOM))"
+  :coding-type 'utf-8
+  :mnemonic ?U
+  :charset-list '(unicode)
+  :bom t)
+
+(define-coding-system 'utf-8-auto
+  "UTF-8 (auto-detect signature (BOM))"
+  :coding-type 'utf-8
+  :mnemonic ?U
+  :charset-list '(unicode)
+  :bom '(utf-8-with-signature . utf-8))
+
  (define-coding-system-alias 'mule-utf-8 'utf-8)
  
  (define-coding-system 'utf-8-emacs
@@ -1234,6 +1293,11 @@ is treated as a character."
    :mnemonic ?U
    :charset-list '(emacs))
  
+;; The encoding used internally.  This encoding is meant to be able to save
+;; any multibyte buffer without losing information.  It can change between
+;; Emacs releases, tho, so should only be used for internal files.
+(define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
+
  (define-coding-system 'utf-16le
    "UTF-16LE (little endian, no signature (BOM))."
    :coding-type 'utf-16
@@ -1263,7 +1327,7 @@ is treated as a character."
    :mime-charset 'utf-16)
  
  (define-coding-system 'utf-16be-with-signature
-  "UTF-16 (big endian, with signature)."
+  "UTF-16 (big endian, with signature (BOM))."
    :coding-type 'utf-16
    :mnemonic ?U
    :charset-list '(unicode)
@@ -1346,9 +1410,10 @@ is treated as a character."
    :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
  
  (define-coding-system 'compound-text
-  "Compound text based generic encoding for decoding unknown messages.
-
-This coding system does not support extended segments of CTEXT."
+  "Compound text based generic encoding.
+This coding system is an extension of X's \"Compound Text Encoding\".
+It encodes many characters using the normal ISO-2022 designation sequences,
+but it doesn't support extended segments of CTEXT."
    :coding-type 'iso-2022
    :mnemonic ?x
    :charset-list 'iso-2022
@@ -1368,7 +1433,7 @@ This coding system does not support extended segments of CTEXT."
  ;; not have a mime-charset property, to prevent it from showing up
  ;; close to the beginning of coding systems ordered by priority.
  (define-coding-system 'ctext-no-compositions
- "Compound text based generic encoding for decoding unknown messages.
+ "Compound text based generic encoding.
  
  Like `compound-text', but does not produce escape sequences for compositions."
    :coding-type 'iso-2022
@@ -1381,8 +1446,9 @@ Like `compound-text', but does not produce escape sequences for compositions."
  (define-coding-system 'compound-text-with-extensions
   "Compound text encoding with ICCCM Extended Segment extensions.
  
-See the variable `ctext-non-standard-encodings-alist' for the
-detail about how extended segments are handled.
+See the variables `ctext-standard-encodings' and
+`ctext-non-standard-encodings-alist' for the detail about how
+extended segments are handled.
  
  This coding system should be used only for X selections.  It is inappropriate
  for decoding and encoding files, process I/O, etc."
@@ -1419,6 +1485,14 @@ for decoding and encoding files, process I/O, etc."
    :pre-write-conversion 'utf-7-pre-write-conversion
    :post-read-conversion 'utf-7-post-read-conversion)
  
+(define-coding-system 'utf-7-imap
+  "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
+  :coding-type 'utf-8
+  :mnemonic ?u
+  :charset-list '(unicode)
+  :pre-write-conversion 'utf-7-imap-pre-write-conversion
+  :post-read-conversion 'utf-7-imap-post-read-conversion)
+
  ;; Use us-ascii for terminal output if some other coding system is not
  ;; specified explicitly.
  (set-safe-terminal-coding-system-internal 'us-ascii)
@@ -1432,13 +1506,10 @@ for decoding and encoding files, process I/O, etc."
  ;; Tar files are not decoded at all, but we treat them as raw bytes.
  
  (setq file-coding-system-alist
+      (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
        '(("\\.elc\\'" . utf-8-emacs)
         ("\\.utf\\(-8\\)?\\'" . utf-8)
-       ;; This is the defined default for XML documents.  It may be
-       ;; overridden by a charset specification in the header.  That
-       ;; should be grokked by the auto-coding mechanism, but rms
-       ;; vetoed that.  -- fx
-       ("\\.xml\\'" . utf-8)
+       ("\\.xml\\'" . xml-find-file-coding-system)
         ;; We use raw-text for reading loaddefs.el so that if it
         ;; happens to have DOS or Mac EOLs, they are converted to
         ;; newlines.  This is required to make the special treatment
@@ -1448,7 +1519,7 @@ for decoding and encoding files, process I/O, etc."
         ("\\.tar\\'" . (no-conversion . no-conversion))
         ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
         ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
-       ("" . (undecided . nil))))
+       ("" . (undecided . nil)))))
  
  \f
  ;;; Setting coding categories and their priorities.
@@ -1479,19 +1550,10 @@ for decoding and encoding files, process I/O, etc."
  (aset latin-extra-code-table ?\225 t)
  (aset latin-extra-code-table ?\226 t)
  
-;; Move least specific charsets to end of priority list
-
-(apply #'set-charset-priority
-       (delq 'unicode (delq 'emacs (charset-priority-list))))
-
  ;; The old code-pages library is obsoleted by coding systems based on
  ;; the charsets defined in this file but might be required by user
  ;; code.
  (provide 'code-pages)
  
-;; Local variables:
-;; no-byte-compile: t
-;; End:
-
  ;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
  ;;; mule-conf.el ends here