;;; mule-conf.el --- configure multilingual environment
;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-;; 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
-;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
;; Copyright (C) 2003
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; :ascii-compatible-p t
;; :code-offset 0)
;;
+;; (define-charset 'emacs
+;; ""
+;; :dimension 3
+;; :code-space [0 255 0 255 0 63]
+;; :ascii-compatible-p t
+;; :supplementary-p t
+;; :code-offset 0)
+;;
;; (define-charset 'eight-bit
;; ""
;; :dimension 1
'unicode :short-name "Unicode")
(put-charset-property
'unicode :long-name "Unicode (ISO10646)")
-(put-charset-property 'eight-bit :docstring "Raw bytes 0-255")
+(put-charset-property
+ 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
+(put-charset-property
+ 'emacs :short-name "Emacs")
+(put-charset-property
+ 'emacs :long-name "Emacs")
+
+(put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
(put-charset-property 'eight-bit :short-name "Raw bytes")
(define-charset-alias 'ucs 'unicode)
-(define-charset 'emacs
- "Full Emacs characters"
- :ascii-compatible-p t
- :code-space [ 0 255 0 255 0 63 ]
- :code-offset 0
- :supplementary-p t)
-
(define-charset 'latin-iso8859-1
"Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
:short-name "RHP of Latin-1"
(define-charset 'eight-bit-control
"Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
:short-name "Raw bytes 0x80..0x9F"
+ :supplementary-p t
:code-space [128 159]
:code-offset #x3FFF80) ; see character.h
(define-charset 'eight-bit-graphic
"Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
:short-name "Raw bytes 0xA0..0xFF"
+ :supplementary-p t
:code-space [160 255]
:code-offset #x3FFFA0) ; see character.h
:long-name "Japanese Roman (JISX0201.1976)"
:iso-final-char ?J
:emacs-mule-id 138
+ :supplementary-p t
:code-space [33 126]
:subset '(jisx0201 33 126 0))
:long-name "Japanese Katakana (JISX0201.1976)"
:iso-final-char ?I
:emacs-mule-id 137
+ :supplementary-p t
:code-space [33 126]
:subset '(jisx0201 161 254 -128))
:long-name "Big5 (Level-1) A141-C67F"
:iso-final-char ?0
:emacs-mule-id 152
+ :supplementary-p t
:code-space [#x21 #x7E #x21 #x7E]
:code-offset #x135000
:unify-map "BIG5-1")
:long-name "Big5 (Level-2) C940-FEFE"
:iso-final-char ?1
:emacs-mule-id 153
+ :supplementary-p t
:code-space [#x21 #x7E #x21 #x7E]
:code-offset #x137800
:unify-map "BIG5-2")
:code-offset #x27c218 ; ... #x280839
:unify-map "BIG5-HKSCS")
-;; Fixme: Korean cp949/UHC
+(define-charset 'cp949-2-byte
+ "2-byte part of CP949"
+ :dimension 2
+ :map "CP949-2BYTE"
+ :code-space [#x41 #xFE #x81 #xFD]
+ :supplementary-p t)
+
+(define-charset 'cp949
+ "CP949 (Korean)"
+ :short-name "CP949"
+ :long-name "CP949 (Korean)"
+ :code-space [#x00 #xFE #x00 #xFD]
+ :superset '(ascii cp949-2-byte))
(define-charset 'chinese-sisheng
"SiSheng characters for PinYin/ZhuYin"
:emacs-mule-id 160
:code-space [33 126]
:unify-map "MULE-sisheng"
+ :supplementary-p t
:code-offset #x200000)
;; A subset of the 1989 version of IPA. It consists of the consonant
:emacs-mule-id 161
:unify-map "MULE-ipa"
:code-space [32 127]
+ :supplementary-p t
:code-offset #x200080)
(define-charset 'viscii
:emacs-mule-id 162
:code-space [32 127]
:code-offset #x200200
+ :supplementary-p t
:unify-map "MULE-lviscii")
(define-charset 'vietnamese-viscii-upper
:emacs-mule-id 163
:code-space [32 127]
:code-offset #x200280
+ :supplementary-p t
:unify-map "MULE-uviscii")
(define-charset 'vscii
:short-name "Arabic digit"
:iso-final-char ?2
:emacs-mule-id 164
+ :supplementary-p t
:code-space [34 42]
:code-offset #x0600)
:long-name "Arabic 1-column"
:iso-final-char ?3
:emacs-mule-id 165
+ :supplementary-p t
:code-space [33 126]
:code-offset #x200100)
:long-name "Arabic 2-column"
:iso-final-char ?4
:emacs-mule-id 224
+ :supplementary-p t
:code-space [33 126]
:code-offset #x200180)
:short-name "Lao"
:iso-final-char ?1
:emacs-mule-id 167
+ :supplementary-p t
:code-space [33 126]
:code-offset #x0E81)
"Lao characters (ISO10646 0E81..0EDF)"
:short-name "Lao"
:code-space [0 255]
+ :supplementary-p t
:superset '(ascii eight-bit-control (lao . 128)))
:long-name "Indian IS 13194"
:iso-final-char ?5
:emacs-mule-id 225
+ :supplementary-p t
:code-space [33 126]
:code-offset #x180000)
(format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
(capitalize (symbol-name script)))
:short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
+ :supplementary-p t
:code-space [0 255]
:code-offset code-offset)
(setq code-offset (+ code-offset #x100)))
(format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
(capitalize (symbol-name script)))
:short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
+ :supplementary-p t
:code-space [0 255]
:code-offset code-offset)
(setq code-offset (+ code-offset #x100))))
:short-name "Indian glyph"
:iso-final-char ?4
:emacs-mule-id 240
+ :supplementary-p t
:code-space [32 127 32 127]
:code-offset #x180100)
:long-name "Indian 1 Column"
:iso-final-char ?6
:emacs-mule-id 251
+ :supplementary-p t
:code-space [33 126 33 126]
:code-offset #x184000)
:long-name "Indian 2 Column"
:iso-final-char ?5
:emacs-mule-id 251
+ :supplementary-p t
:code-space [33 126 33 126]
:code-offset #x184000)
:iso-final-char ?7
:emacs-mule-id 252
:unify-map "MULE-tibetan"
+ :supplementary-p t
:code-space [33 126 33 37]
:code-offset #x190000)
:long-name "Tibetan 1 column"
:iso-final-char ?8
:emacs-mule-id 241
+ :supplementary-p t
:code-space [33 126 33 37]
:code-offset #x190000)
:long-name "Unicode subset (U+2500..U+33FF)"
:iso-final-char ?2
:emacs-mule-id 242
+ :supplementary-p t
:code-space [#x20 #x7f #x20 #x47]
:code-offset #x2500)
:long-name "Unicode subset (U+E000+FFFF)"
:iso-final-char ?3
:emacs-mule-id 243
+ :supplementary-p t
:code-space [#x20 #x7F #x20 #x75]
:code-offset #xE000
:max-code 30015) ; U+FFFF
:long-name "Unicode subset (U+0100..U+24FF)"
:iso-final-char ?1
:emacs-mule-id 244
+ :supplementary-p t
:code-space [#x20 #x7F #x20 #x7F]
:code-offset #x100)
:long-name "Ethiopic characters"
:iso-final-char ?3
:emacs-mule-id 245
+ :supplementary-p t
:unify-map "MULE-ethiopic"
:code-space [33 126 33 126]
:code-offset #x1A0000)
"Raw text, which means text contains random 8-bit codes.
Encoding text with this coding system produces the actual byte
sequence of the text in buffers and strings. An exception is made for
-eight-bit-control characters. Each of them is encoded into a single
-byte.
+characters from the `eight-bit' character set. Each of them is encoded
+into a single byte.
When you visit a file with this coding, the file is read into a
unibyte buffer as is (except for EOL format), thus each byte of a file
:coding-type 'raw-text
:eol-type 'unix
:mnemonic ?=)
-
+
(define-coding-system 'iso-latin-1
"ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
:coding-type 'charset
:mnemonic ?M)
(define-coding-system 'utf-8
- "UTF-8."
+ "UTF-8 (no signature (BOM))"
:coding-type 'utf-8
:mnemonic ?U
:charset-list '(unicode)
:mime-charset 'utf-8)
+(define-coding-system 'utf-8-with-signature
+ "UTF-8 (with signature (BOM))"
+ :coding-type 'utf-8
+ :mnemonic ?U
+ :charset-list '(unicode)
+ :bom t)
+
+(define-coding-system 'utf-8-auto
+ "UTF-8 (auto-detect signature (BOM))"
+ :coding-type 'utf-8
+ :mnemonic ?U
+ :charset-list '(unicode)
+ :bom '(utf-8-with-signature . utf-8))
+
(define-coding-system-alias 'mule-utf-8 'utf-8)
(define-coding-system 'utf-8-emacs
:mnemonic ?U
:charset-list '(emacs))
+;; The encoding used internally. This encoding is meant to be able to save
+;; any multibyte buffer without losing information. It can change between
+;; Emacs releases, tho, so should only be used for internal files.
+(define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
+
(define-coding-system 'utf-16le
"UTF-16LE (little endian, no signature (BOM))."
:coding-type 'utf-16
:mime-charset 'utf-16)
(define-coding-system 'utf-16be-with-signature
- "UTF-16 (big endian, with signature)."
+ "UTF-16 (big endian, with signature (BOM))."
:coding-type 'utf-16
:mnemonic ?U
:charset-list '(unicode)
:flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
(define-coding-system 'compound-text
- "Compound text based generic encoding for decoding unknown messages.
-
-This coding system does not support extended segments of CTEXT."
+ "Compound text based generic encoding.
+This coding system is an extension of X's \"Compound Text Encoding\".
+It encodes many characters using the normal ISO-2022 designation sequences,
+but it doesn't support extended segments of CTEXT."
:coding-type 'iso-2022
:mnemonic ?x
:charset-list 'iso-2022
;; not have a mime-charset property, to prevent it from showing up
;; close to the beginning of coding systems ordered by priority.
(define-coding-system 'ctext-no-compositions
- "Compound text based generic encoding for decoding unknown messages.
+ "Compound text based generic encoding.
Like `compound-text', but does not produce escape sequences for compositions."
:coding-type 'iso-2022
(define-coding-system 'compound-text-with-extensions
"Compound text encoding with ICCCM Extended Segment extensions.
-See the variable `ctext-non-standard-encodings-alist' for the
-detail about how extended segments are handled.
+See the variables `ctext-standard-encodings' and
+`ctext-non-standard-encodings-alist' for the detail about how
+extended segments are handled.
This coding system should be used only for X selections. It is inappropriate
for decoding and encoding files, process I/O, etc."
:pre-write-conversion 'utf-7-pre-write-conversion
:post-read-conversion 'utf-7-post-read-conversion)
+(define-coding-system 'utf-7-imap
+ "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
+ :coding-type 'utf-8
+ :mnemonic ?u
+ :charset-list '(unicode)
+ :pre-write-conversion 'utf-7-imap-pre-write-conversion
+ :post-read-conversion 'utf-7-imap-post-read-conversion)
+
;; Use us-ascii for terminal output if some other coding system is not
;; specified explicitly.
(set-safe-terminal-coding-system-internal 'us-ascii)
;; Tar files are not decoded at all, but we treat them as raw bytes.
(setq file-coding-system-alist
+ (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
'(("\\.elc\\'" . utf-8-emacs)
("\\.utf\\(-8\\)?\\'" . utf-8)
- ;; This is the defined default for XML documents. It may be
- ;; overridden by a charset specification in the header. That
- ;; should be grokked by the auto-coding mechanism, but rms
- ;; vetoed that. -- fx
- ("\\.xml\\'" . utf-8)
+ ("\\.xml\\'" . xml-find-file-coding-system)
;; We use raw-text for reading loaddefs.el so that if it
;; happens to have DOS or Mac EOLs, they are converted to
;; newlines. This is required to make the special treatment
("\\.tar\\'" . (no-conversion . no-conversion))
( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
- ("" . (undecided . nil))))
+ ("" . (undecided . nil)))))
\f
;;; Setting coding categories and their priorities.
(aset latin-extra-code-table ?\225 t)
(aset latin-extra-code-table ?\226 t)
-;; Move least specific charsets to end of priority list
-
-(apply #'set-charset-priority
- (delq 'unicode (delq 'emacs (charset-priority-list))))
-
;; The old code-pages library is obsoleted by coding systems based on
;; the charsets defined in this file but might be required by user
;; code.
(provide 'code-pages)
-;; Local variables:
-;; no-byte-compile: t
-;; End:
-
;; arch-tag: 7d5fed55-b6df-42f6-8d3d-0011190551f5
;;; mule-conf.el ends here