HCoop Git - bpt/emacs.git/blame_incremental - lisp/international/mule-conf.el

... / ...

Commit	Line	Data
	1	;;; mule-conf.el --- configure multilingual environment
	2
	3	;; Copyright (C) 1997-2011 Free Software Foundation, Inc.
	4	;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
	5	;; National Institute of Advanced Industrial Science and Technology (AIST)
	6	;; Registration Number H14PRO021
	7	;; Copyright (C) 2003
	8	;; National Institute of Advanced Industrial Science and Technology (AIST)
	9	;; Registration Number H13PRO009
	10
	11	;; Keywords: i18n, mule, multilingual, character set, coding system
	12
	13	;; This file is part of GNU Emacs.
	14
	15	;; GNU Emacs is free software: you can redistribute it and/or modify
	16	;; it under the terms of the GNU General Public License as published by
	17	;; the Free Software Foundation, either version 3 of the License, or
	18	;; (at your option) any later version.
	19
	20	;; GNU Emacs is distributed in the hope that it will be useful,
	21	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	22	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	23	;; GNU General Public License for more details.
	24
	25	;; You should have received a copy of the GNU General Public License
	26	;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
	27
	28	;;; Commentary:
	29
	30	;; This file defines the Emacs charsets and some basic coding systems.
	31	;; Other coding systems are defined in the files in directory
	32	;; lisp/language.
	33
	34	;;; Code:
	35
	36	;;; Remarks
	37
	38	;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
	39	;; Standards docs equivalent to iso-2022 and iso-8859 are at
	40	;; http://www.ecma.ch/.
	41
	42	;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
	43	;; MS Windows, which are presumably the only charsets we really need
	44	;; to worry about on such systems:
	45	;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
	46	;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
	47	;; 1258, 874, 932, 936, 949, 950
	48
	49	;;; Definitions of character sets.
	50
	51	;; The charsets `ascii', `unicode' and `eight-bit' are already defined
	52	;; in charset.c as below:
	53	;;
	54	;; (define-charset 'ascii
	55	;; ""
	56	;; :dimension 1
	57	;; :code-space [0 127]
	58	;; :iso-final-char ?B
	59	;; :ascii-compatible-p t
	60	;; :emacs-mule-id 0
	61	;; :code-offset 0)
	62	;;
	63	;; (define-charset 'unicode
	64	;; ""
	65	;; :dimension 3
	66	;; :code-space [0 255 0 255 0 16]
	67	;; :ascii-compatible-p t
	68	;; :code-offset 0)
	69	;;
	70	;; (define-charset 'emacs
	71	;; ""
	72	;; :dimension 3
	73	;; :code-space [0 255 0 255 0 63]
	74	;; :ascii-compatible-p t
	75	;; :supplementary-p t
	76	;; :code-offset 0)
	77	;;
	78	;; (define-charset 'eight-bit
	79	;; ""
	80	;; :dimension 1
	81	;; :code-space [128 255]
	82	;; :code-offset #x3FFF80)
	83	;;
	84	;; We now set :docstring, :short-name, and :long-name properties.
	85
	86	(put-charset-property
	87	'ascii :docstring "ASCII (ISO646 IRV)")
	88	(put-charset-property
	89	'ascii :short-name "ASCII")
	90	(put-charset-property
	91	'ascii :long-name "ASCII (ISO646 IRV)")
	92	(put-charset-property
	93	'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
	94	(put-charset-property
	95	'iso-8859-1 :short-name "Latin-1")
	96	(put-charset-property
	97	'iso-8859-1 :long-name "Latin-1")
	98	(put-charset-property
	99	'unicode :docstring "Unicode (ISO10646)")
	100	(put-charset-property
	101	'unicode :short-name "Unicode")
	102	(put-charset-property
	103	'unicode :long-name "Unicode (ISO10646)")
	104	(put-charset-property
	105	'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
	106	(put-charset-property
	107	'emacs :short-name "Emacs")
	108	(put-charset-property
	109	'emacs :long-name "Emacs")
	110
	111	(put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
	112	(put-charset-property 'eight-bit :short-name "Raw bytes")
	113
	114	(define-charset-alias 'ucs 'unicode)
	115
	116	(define-charset 'latin-iso8859-1
	117	"Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
	118	:short-name "RHP of Latin-1"
	119	:long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
	120	:iso-final-char ?A
	121	:emacs-mule-id 129
	122	:code-space [32 127]
	123	:code-offset 160)
	124
	125	;; Name perhaps not ideal, but is XEmacs-compatible.
	126	(define-charset 'control-1
	127	"8-bit control code (0x80..0x9F)"
	128	:short-name "8-bit control code"
	129	:code-space [128 159]
	130	:code-offset 128)
	131
	132	(define-charset 'eight-bit-control
	133	"Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
	134	:short-name "Raw bytes 0x80..0x9F"
	135	:supplementary-p t
	136	:code-space [128 159]
	137	:code-offset #x3FFF80) ; see character.h
	138
	139	(define-charset 'eight-bit-graphic
	140	"Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
	141	:short-name "Raw bytes 0xA0..0xFF"
	142	:supplementary-p t
	143	:code-space [160 255]
	144	:code-offset #x3FFFA0) ; see character.h
	145
	146	(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
	147	iso-ir iso-final
	148	emacs-mule-id map)
	149	`(progn
	150	(define-charset ,symbol
	151	,name
	152	:short-name ,nickname
	153	:long-name ,name
	154	:ascii-compatible-p t
	155	:code-space [0 255]
	156	:map ,map)
	157	(if ,iso-symbol
	158	(define-charset ,iso-symbol
	159	(if ,iso-ir
	160	(format "Right-Hand Part of %s (%s): ISO-IR-%d"
	161	,name ,nickname ,iso-ir)
	162	(format "Right-Hand Part of %s (%s)" ,name ,nickname))
	163	:short-name (format "RHP of %s" ,name)
	164	:long-name (format "RHP of %s (%s)" ,name ,nickname)
	165	:iso-final-char ,iso-final
	166	:emacs-mule-id ,emacs-mule-id
	167	:code-space [32 127]
	168	:subset (list ,symbol 160 255 -128)))))
	169
	170	(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
	171	"ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
	172
	173	(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
	174	"ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
	175
	176	(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
	177	"ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
	178
	179	(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
	180	"ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
	181
	182	(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
	183	"ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
	184
	185	(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
	186	"ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
	187
	188	(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
	189	"ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
	190
	191	(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
	192	"ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
	193
	194	(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
	195	"ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
	196
	197	;; http://www.nectec.or.th/it-standards/iso8859-11/
	198	;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
	199	;; plus nbsp
	200	(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
	201	"ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
	202
	203	;; 8859-12 doesn't (yet?) exist.
	204
	205	(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
	206	"ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
	207
	208	(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
	209	"ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
	210
	211	(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
	212	"ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
	213
	214	(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
	215	"ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
	216
	217	;; No point in keeping it around.
	218	(fmakunbound 'define-iso-single-byte-charset)
	219
	220	;; Can this be shared with 8859-11?
	221	;; N.b. not all of these are defined in Unicode.
	222	(define-charset 'thai-tis620
	223	"TIS620.2533"
	224	:short-name "TIS620.2533"
	225	:iso-final-char ?T
	226	:emacs-mule-id 133
	227	:code-space [32 127]
	228	:code-offset #x0E00)
	229
	230	;; Fixme: doc for this, c.f. above
	231	(define-charset 'tis620-2533
	232	"TIS620.2533"
	233	:short-name "TIS620.2533"
	234	:ascii-compatible-p t
	235	:code-space [0 255]
	236	:superset '(ascii eight-bit-control (thai-tis620 . 128)))
	237
	238	(define-charset 'jisx0201
	239	"JISX0201"
	240	:short-name "JISX0201"
	241	:code-space [0 #xDF]
	242	:map "JISX0201")
	243
	244	(define-charset 'latin-jisx0201
	245	"Roman Part of JISX0201.1976"
	246	:short-name "JISX0201 Roman"
	247	:long-name "Japanese Roman (JISX0201.1976)"
	248	:iso-final-char ?J
	249	:emacs-mule-id 138
	250	:supplementary-p t
	251	:code-space [33 126]
	252	:subset '(jisx0201 33 126 0))
	253
	254	(define-charset 'katakana-jisx0201
	255	"Katakana Part of JISX0201.1976"
	256	:short-name "JISX0201 Katakana"
	257	:long-name "Japanese Katakana (JISX0201.1976)"
	258	:iso-final-char ?I
	259	:emacs-mule-id 137
	260	:supplementary-p t
	261	:code-space [33 126]
	262	:subset '(jisx0201 161 254 -128))
	263
	264	(define-charset 'chinese-gb2312
	265	"GB2312 Chinese simplified: ISO-IR-58"
	266	:short-name "GB2312"
	267	:long-name "GB2312: ISO-IR-58"
	268	:iso-final-char ?A
	269	:emacs-mule-id 145
	270	:code-space [33 126 33 126]
	271	:code-offset #x110000
	272	:unify-map "GB2312")
	273
	274	(define-charset 'chinese-gbk
	275	"GBK Chinese simplified."
	276	:short-name "GBK"
	277	:code-space [#x40 #xFE #x81 #xFE]
	278	:code-offset #x160000
	279	:unify-map "GBK")
	280	(define-charset-alias 'cp936 'chinese-gbk)
	281	(define-charset-alias 'windows-936 'chinese-gbk)
	282
	283	(define-charset 'chinese-cns11643-1
	284	"CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
	285	:short-name "CNS11643-1"
	286	:long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
	287	:iso-final-char ?G
	288	:emacs-mule-id 149
	289	:code-space [33 126 33 126]
	290	:code-offset #x114000
	291	:unify-map "CNS-1")
	292
	293	(define-charset 'chinese-cns11643-2
	294	"CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
	295	:short-name "CNS11643-2"
	296	:long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
	297	:iso-final-char ?H
	298	:emacs-mule-id 150
	299	:code-space [33 126 33 126]
	300	:code-offset #x118000
	301	:unify-map "CNS-2")
	302
	303	(define-charset 'chinese-cns11643-3
	304	"CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
	305	:short-name "CNS11643-3"
	306	:long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
	307	:iso-final-char ?I
	308	:code-space [33 126 33 126]
	309	:emacs-mule-id 246
	310	:code-offset #x11C000
	311	:unify-map "CNS-3")
	312
	313	(define-charset 'chinese-cns11643-4
	314	"CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
	315	:short-name "CNS11643-4"
	316	:long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
	317	:iso-final-char ?J
	318	:emacs-mule-id 247
	319	:code-space [33 126 33 126]
	320	:code-offset #x120000
	321	:unify-map "CNS-4")
	322
	323	(define-charset 'chinese-cns11643-5
	324	"CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
	325	:short-name "CNS11643-5"
	326	:long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
	327	:iso-final-char ?K
	328	:emacs-mule-id 248
	329	:code-space [33 126 33 126]
	330	:code-offset #x124000
	331	:unify-map "CNS-5")
	332
	333	(define-charset 'chinese-cns11643-6
	334	"CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
	335	:short-name "CNS11643-6"
	336	:long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
	337	:iso-final-char ?L
	338	:emacs-mule-id 249
	339	:code-space [33 126 33 126]
	340	:code-offset #x128000
	341	:unify-map "CNS-6")
	342
	343	(define-charset 'chinese-cns11643-7
	344	"CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
	345	:short-name "CNS11643-7"
	346	:long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
	347	:iso-final-char ?M
	348	:emacs-mule-id 250
	349	:code-space [33 126 33 126]
	350	:code-offset #x12C000
	351	:unify-map "CNS-7")
	352
	353	(define-charset 'big5
	354	"Big5 (Chinese traditional)"
	355	:short-name "Big5"
	356	:code-space [#x40 #xFE #xA1 #xFE]
	357	:code-offset #x130000
	358	:unify-map "BIG5")
	359	;; Fixme: AKA cp950 according to
	360	;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
	361	;; that correct?
	362
	363	(define-charset 'chinese-big5-1
	364	"Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
	365	:short-name "Big5 (Level-1)"
	366	:long-name "Big5 (Level-1) A141-C67F"
	367	:iso-final-char ?0
	368	:emacs-mule-id 152
	369	:supplementary-p t
	370	:code-space [#x21 #x7E #x21 #x7E]
	371	:code-offset #x135000
	372	:unify-map "BIG5-1")
	373
	374	(define-charset 'chinese-big5-2
	375	"Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
	376	:short-name "Big5 (Level-2)"
	377	:long-name "Big5 (Level-2) C940-FEFE"
	378	:iso-final-char ?1
	379	:emacs-mule-id 153
	380	:supplementary-p t
	381	:code-space [#x21 #x7E #x21 #x7E]
	382	:code-offset #x137800
	383	:unify-map "BIG5-2")
	384
	385	(define-charset 'japanese-jisx0208
	386	"JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
	387	:short-name "JISX0208"
	388	:long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
	389	:iso-final-char ?B
	390	:emacs-mule-id 146
	391	:code-space [33 126 33 126]
	392	:code-offset #x140000
	393	:unify-map "JISX0208")
	394
	395	(define-charset 'japanese-jisx0208-1978
	396	"JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
	397	:short-name "JISX0208.1978"
	398	:long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
	399	:iso-final-char ?@
	400	:emacs-mule-id 144
	401	:code-space [33 126 33 126]
	402	:code-offset #x144000
	403	:unify-map "JISC6226")
	404
	405	(define-charset 'japanese-jisx0212
	406	"JISX0212 Japanese supplement: ISO-IR-159"
	407	:short-name "JISX0212"
	408	:long-name "JISX0212 (Japanese): ISO-IR-159"
	409	:iso-final-char ?D
	410	:emacs-mule-id 148
	411	:code-space [33 126 33 126]
	412	:code-offset #x148000
	413	:unify-map "JISX0212")
	414
	415	;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
	416	;; arguable whether it should have a unify-map.
	417	(define-charset 'japanese-jisx0213-1
	418	"JISX0213.2000 Plane 1 (Japanese)"
	419	:short-name "JISX0213-1"
	420	:iso-final-char ?O
	421	:emacs-mule-id 151
	422	:unify-map "JISX2131"
	423	:code-space [33 126 33 126]
	424	:code-offset #x14C000)
	425
	426	(define-charset 'japanese-jisx0213-2
	427	"JISX0213.2000 Plane 2 (Japanese)"
	428	:short-name "JISX0213-2"
	429	:iso-final-char ?P
	430	:emacs-mule-id 254
	431	:unify-map "JISX2132"
	432	:code-space [33 126 33 126]
	433	:code-offset #x150000)
	434
	435	(define-charset 'japanese-jisx0213-a
	436	"JISX0213.2004 adds these characters to JISX0213.2000."
	437	:short-name "JISX0213A"
	438	:dimension 2
	439	:code-space [33 126 33 126]
	440	:supplementary-p t
	441	:map "JISX213A")
	442
	443	(define-charset 'japanese-jisx0213.2004-1
	444	"JISX0213.2004 Plane1 (Japanese)"
	445	:short-name "JISX0213.2004-1"
	446	:dimension 2
	447	:code-space [33 126 33 126]
	448	:iso-final-char ?Q
	449	:superset '(japanese-jisx0213-a japanese-jisx0213-1))
	450
	451	(define-charset 'katakana-sjis
	452	"Katakana part of Shift-JIS"
	453	:dimension 1
	454	:code-space [#xA1 #xDF]
	455	:subset '(jisx0201 #xA1 #xDF 0)
	456	:supplementary-p t)
	457
	458	(define-charset 'cp932-2-byte
	459	"2-byte part of CP932"
	460	:dimension 2
	461	:map "CP932-2BYTE"
	462	:code-space [#x40 #xFC #x81 #xFC]
	463	:supplementary-p t)
	464
	465	(define-charset 'cp932
	466	"CP932 (Microsoft shift-jis)"
	467	:code-space [#x00 #xFF #x00 #xFE]
	468	:short-name "CP932"
	469	:superset '(ascii katakana-sjis cp932-2-byte))
	470
	471	(define-charset 'korean-ksc5601
	472	"KSC5601 Korean Hangul and Hanja: ISO-IR-149"
	473	:short-name "KSC5601"
	474	:long-name "KSC5601 (Korean): ISO-IR-149"
	475	:iso-final-char ?C
	476	:emacs-mule-id 147
	477	:code-space [33 126 33 126]
	478	:code-offset #x279f94 ; ... #x27c217
	479	:unify-map "KSC5601")
	480
	481	(define-charset 'big5-hkscs
	482	"Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
	483	:short-name "Big5"
	484	:code-space [#x40 #xFE #xA1 #xFE]
	485	:code-offset #x27c218 ; ... #x280839
	486	:unify-map "BIG5-HKSCS")
	487
	488	(define-charset 'cp949-2-byte
	489	"2-byte part of CP949"
	490	:dimension 2
	491	:map "CP949-2BYTE"
	492	:code-space [#x41 #xFE #x81 #xFD]
	493	:supplementary-p t)
	494
	495	(define-charset 'cp949
	496	"CP949 (Korean)"
	497	:short-name "CP949"
	498	:long-name "CP949 (Korean)"
	499	:code-space [#x00 #xFE #x00 #xFD]
	500	:superset '(ascii cp949-2-byte))
	501
	502	(define-charset 'chinese-sisheng
	503	"SiSheng characters for PinYin/ZhuYin"
	504	:short-name "SiSheng"
	505	:long-name "SiSheng (PinYin/ZhuYin)"
	506	:iso-final-char ?0
	507	:emacs-mule-id 160
	508	:code-space [33 126]
	509	:unify-map "MULE-sisheng"
	510	:supplementary-p t
	511	:code-offset #x200000)
	512
	513	;; A subset of the 1989 version of IPA. It consists of the consonant
	514	;; signs used in English, French, German and Italian, and all vowels
	515	;; signs in the table. [says old MULE doc]
	516	(define-charset 'ipa
	517	"IPA (International Phonetic Association)"
	518	:short-name "IPA"
	519	:iso-final-char ?0
	520	:emacs-mule-id 161
	521	:unify-map "MULE-ipa"
	522	:code-space [32 127]
	523	:supplementary-p t
	524	:code-offset #x200080)
	525
	526	(define-charset 'viscii
	527	"VISCII1.1"
	528	:short-name "VISCII"
	529	:long-name "VISCII 1.1"
	530	:code-space [0 255]
	531	:map "VISCII")
	532
	533	(define-charset 'vietnamese-viscii-lower
	534	"VISCII1.1 lower-case"
	535	:short-name "VISCII lower"
	536	:long-name "VISCII lower-case"
	537	:iso-final-char ?1
	538	:emacs-mule-id 162
	539	:code-space [32 127]
	540	:code-offset #x200200
	541	:supplementary-p t
	542	:unify-map "MULE-lviscii")
	543
	544	(define-charset 'vietnamese-viscii-upper
	545	"VISCII1.1 upper-case"
	546	:short-name "VISCII upper"
	547	:long-name "VISCII upper-case"
	548	:iso-final-char ?2
	549	:emacs-mule-id 163
	550	:code-space [32 127]
	551	:code-offset #x200280
	552	:supplementary-p t
	553	:unify-map "MULE-uviscii")
	554
	555	(define-charset 'vscii
	556	"VSCII1.1 (TCVN-5712 VN1)"
	557	:short-name "VSCII"
	558	:code-space [0 255]
	559	:map "VSCII")
	560
	561	(define-charset-alias 'tcvn-5712 'vscii)
	562
	563	;; Fixme: see note in tcvn.map about combining characters
	564	(define-charset 'vscii-2
	565	"VSCII-2 (TCVN-5712 VN2)"
	566	:code-space [0 255]
	567	:map "VSCII-2")
	568
	569	(define-charset 'koi8-r
	570	"KOI8-R"
	571	:short-name "KOI8-R"
	572	:ascii-compatible-p t
	573	:code-space [0 255]
	574	:map "KOI8-R")
	575
	576	(define-charset-alias 'koi8 'koi8-r)
	577
	578	(define-charset 'alternativnyj
	579	"ALTERNATIVNYJ"
	580	:short-name "alternativnyj"
	581	:ascii-compatible-p t
	582	:code-space [0 255]
	583	:map "ALTERNATIVNYJ")
	584
	585	(define-charset 'cp866
	586	"CP866"
	587	:short-name "cp866"
	588	:ascii-compatible-p t
	589	:code-space [0 255]
	590	:map "IBM866")
	591	(define-charset-alias 'ibm866 'cp866)
	592
	593	(define-charset 'koi8-u
	594	"KOI8-U"
	595	:short-name "KOI8-U"
	596	:ascii-compatible-p t
	597	:code-space [0 255]
	598	:map "KOI8-U")
	599
	600	(define-charset 'koi8-t
	601	"KOI8-T"
	602	:short-name "KOI8-T"
	603	:ascii-compatible-p t
	604	:code-space [0 255]
	605	:map "KOI8-T")
	606
	607	(define-charset 'georgian-ps
	608	"GEORGIAN-PS"
	609	:short-name "GEORGIAN-PS"
	610	:ascii-compatible-p t
	611	:code-space [0 255]
	612	:map "KA-PS")
	613
	614	(define-charset 'georgian-academy
	615	"GEORGIAN-ACADEMY"
	616	:short-name "GEORGIAN-ACADEMY"
	617	:ascii-compatible-p t
	618	:code-space [0 255]
	619	:map "KA-ACADEMY")
	620
	621	(define-charset 'windows-1250
	622	"WINDOWS-1250 (Central Europe)"
	623	:short-name "WINDOWS-1250"
	624	:ascii-compatible-p t
	625	:code-space [0 255]
	626	:map "CP1250")
	627	(define-charset-alias 'cp1250 'windows-1250)
	628
	629	(define-charset 'windows-1251
	630	"WINDOWS-1251 (Cyrillic)"
	631	:short-name "WINDOWS-1251"
	632	:ascii-compatible-p t
	633	:code-space [0 255]
	634	:map "CP1251")
	635	(define-charset-alias 'cp1251 'windows-1251)
	636
	637	(define-charset 'windows-1252
	638	"WINDOWS-1252 (Latin I)"
	639	:short-name "WINDOWS-1252"
	640	:ascii-compatible-p t
	641	:code-space [0 255]
	642	:map "CP1252")
	643	(define-charset-alias 'cp1252 'windows-1252)
	644
	645	(define-charset 'windows-1253
	646	"WINDOWS-1253 (Greek)"
	647	:short-name "WINDOWS-1253"
	648	:ascii-compatible-p t
	649	:code-space [0 255]
	650	:map "CP1253")
	651	(define-charset-alias 'cp1253 'windows-1253)
	652
	653	(define-charset 'windows-1254
	654	"WINDOWS-1254 (Turkish)"
	655	:short-name "WINDOWS-1254"
	656	:ascii-compatible-p t
	657	:code-space [0 255]
	658	:map "CP1254")
	659	(define-charset-alias 'cp1254 'windows-1254)
	660
	661	(define-charset 'windows-1255
	662	"WINDOWS-1255 (Hebrew)"
	663	:short-name "WINDOWS-1255"
	664	:ascii-compatible-p t
	665	:code-space [0 255]
	666	:map "CP1255")
	667	(define-charset-alias 'cp1255 'windows-1255)
	668
	669	(define-charset 'windows-1256
	670	"WINDOWS-1256 (Arabic)"
	671	:short-name "WINDOWS-1256"
	672	:ascii-compatible-p t
	673	:code-space [0 255]
	674	:map "CP1256")
	675	(define-charset-alias 'cp1256 'windows-1256)
	676
	677	(define-charset 'windows-1257
	678	"WINDOWS-1257 (Baltic)"
	679	:short-name "WINDOWS-1257"
	680	:ascii-compatible-p t
	681	:code-space [0 255]
	682	:map "CP1257")
	683	(define-charset-alias 'cp1257 'windows-1257)
	684
	685	(define-charset 'windows-1258
	686	"WINDOWS-1258 (Viet Nam)"
	687	:short-name "WINDOWS-1258"
	688	:ascii-compatible-p t
	689	:code-space [0 255]
	690	:map "CP1258")
	691	(define-charset-alias 'cp1258 'windows-1258)
	692
	693	(define-charset 'next
	694	"NEXT"
	695	:short-name "NEXT"
	696	:ascii-compatible-p t
	697	:code-space [0 255]
	698	:map "NEXTSTEP")
	699
	700	(define-charset 'cp1125
	701	"CP1125"
	702	:short-name "CP1125"
	703	:code-space [0 255]
	704	:ascii-compatible-p t
	705	:map "CP1125")
	706	(define-charset-alias 'ruscii 'cp1125)
	707	;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
	708	(define-charset-alias 'cp866u 'cp1125)
	709
	710	;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
	711	;; shows this as not ASCII compatible, with various graphics in
	712	;; 0x01-0x1F.
	713	(define-charset 'cp437
	714	"CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
	715	:short-name "CP437"
	716	:code-space [0 255]
	717	:ascii-compatible-p t
	718	:map "IBM437")
	719
	720	(define-charset 'cp720
	721	"CP720 (Arabic)"
	722	:short-name "CP720"
	723	:code-space [0 255]
	724	:ascii-compatible-p t
	725	:map "CP720")
	726
	727	(define-charset 'cp737
	728	"CP737 (PC Greek)"
	729	:short-name "CP737"
	730	:code-space [0 255]
	731	:ascii-compatible-p t
	732	:map "CP737")
	733
	734	(define-charset 'cp775
	735	"CP775 (PC Baltic)"
	736	:short-name "CP775"
	737	:code-space [0 255]
	738	:ascii-compatible-p t
	739	:map "CP775")
	740
	741	(define-charset 'cp851
	742	"CP851 (Greek)"
	743	:short-name "CP851"
	744	:code-space [0 255]
	745	:ascii-compatible-p t
	746	:map "IBM851")
	747
	748	(define-charset 'cp852
	749	"CP852 (MS-DOS Latin-2)"
	750	:short-name "CP852"
	751	:code-space [0 255]
	752	:ascii-compatible-p t
	753	:map "IBM852")
	754
	755	(define-charset 'cp855
	756	"CP855 (IBM Cyrillic)"
	757	:short-name "CP855"
	758	:code-space [0 255]
	759	:ascii-compatible-p t
	760	:map "IBM855")
	761
	762	(define-charset 'cp857
	763	"CP857 (IBM Turkish)"
	764	:short-name "CP857"
	765	:code-space [0 255]
	766	:ascii-compatible-p t
	767	:map "IBM857")
	768
	769	(define-charset 'cp858
	770	"CP858 (Multilingual Latin I + Euro)"
	771	:short-name "CP858"
	772	:code-space [0 255]
	773	:ascii-compatible-p t
	774	:map "CP858")
	775	(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
	776
	777	(define-charset 'cp860
	778	"CP860 (MS-DOS Portuguese)"
	779	:short-name "CP860"
	780	:code-space [0 255]
	781	:ascii-compatible-p t
	782	:map "IBM860")
	783
	784	(define-charset 'cp861
	785	"CP861 (MS-DOS Icelandic)"
	786	:short-name "CP861"
	787	:code-space [0 255]
	788	:ascii-compatible-p t
	789	:map "IBM861")
	790
	791	(define-charset 'cp862
	792	"CP862 (PC Hebrew)"
	793	:short-name "CP862"
	794	:code-space [0 255]
	795	:ascii-compatible-p t
	796	:map "IBM862")
	797
	798	(define-charset 'cp863
	799	"CP863 (MS-DOS Canadian French)"
	800	:short-name "CP863"
	801	:code-space [0 255]
	802	:ascii-compatible-p t
	803	:map "IBM863")
	804
	805	(define-charset 'cp864
	806	"CP864 (PC Arabic)"
	807	:short-name "CP864"
	808	:code-space [0 255]
	809	:ascii-compatible-p t
	810	:map "IBM864")
	811
	812	(define-charset 'cp865
	813	"CP865 (MS-DOS Nordic)"
	814	:short-name "CP865"
	815	:code-space [0 255]
	816	:ascii-compatible-p t
	817	:map "IBM865")
	818
	819	(define-charset 'cp869
	820	"CP869 (IBM Modern Greek)"
	821	:short-name "CP869"
	822	:code-space [0 255]
	823	:ascii-compatible-p t
	824	:map "IBM869")
	825
	826	(define-charset 'cp874
	827	"CP874 (IBM Thai)"
	828	:short-name "CP874"
	829	:code-space [0 255]
	830	:ascii-compatible-p t
	831	:map "IBM874")
	832
	833	;; For Arabic, we need three different types of character sets.
	834	;; Digits are of direction left-to-right and of width 1-column.
	835	;; Others are of direction right-to-left and of width 1-column or
	836	;; 2-column.
	837	(define-charset 'arabic-digit
	838	"Arabic digit"
	839	:short-name "Arabic digit"
	840	:iso-final-char ?2
	841	:emacs-mule-id 164
	842	:supplementary-p t
	843	:code-space [34 42]
	844	:code-offset #x0600)
	845
	846	(define-charset 'arabic-1-column
	847	"Arabic 1-column"
	848	:short-name "Arabic 1-col"
	849	:long-name "Arabic 1-column"
	850	:iso-final-char ?3
	851	:emacs-mule-id 165
	852	:supplementary-p t
	853	:code-space [33 126]
	854	:code-offset #x200100)
	855
	856	(define-charset 'arabic-2-column
	857	"Arabic 2-column"
	858	:short-name "Arabic 2-col"
	859	:long-name "Arabic 2-column"
	860	:iso-final-char ?4
	861	:emacs-mule-id 224
	862	:supplementary-p t
	863	:code-space [33 126]
	864	:code-offset #x200180)
	865
	866	;; Lao script.
	867	;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
	868	;; Not all of them are defined in Unicode.
	869	(define-charset 'lao
	870	"Lao characters (ISO10646 0E81..0EDF)"
	871	:short-name "Lao"
	872	:iso-final-char ?1
	873	:emacs-mule-id 167
	874	:supplementary-p t
	875	:code-space [33 126]
	876	:code-offset #x0E81)
	877
	878	(define-charset 'mule-lao
	879	"Lao characters (ISO10646 0E81..0EDF)"
	880	:short-name "Lao"
	881	:code-space [0 255]
	882	:supplementary-p t
	883	:superset '(ascii eight-bit-control (lao . 128)))
	884
	885
	886	;; Indian scripts. Symbolic charset for data exchange. Glyphs are
	887	;; not assigned. They are automatically converted to each Indian
	888	;; script which IS-13194 supports.
	889
	890	(define-charset 'indian-is13194
	891	"Generic Indian charset for data exchange with IS 13194"
	892	:short-name "IS 13194"
	893	:long-name "Indian IS 13194"
	894	:iso-final-char ?5
	895	:emacs-mule-id 225
	896	:supplementary-p t
	897	:code-space [33 126]
	898	:code-offset #x180000)
	899
	900	(let ((code-offset #x180100))
	901	(dolist (script '(devanagari sanskrit bengali tamil telugu assamese
	902	oriya kannada malayalam gujarati punjabi))
	903	(define-charset (intern (format "%s-cdac" script))
	904	(format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
	905	(capitalize (symbol-name script)))
	906	:short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
	907	:supplementary-p t
	908	:code-space [0 255]
	909	:code-offset code-offset)
	910	(setq code-offset (+ code-offset #x100)))
	911
	912	(dolist (script '(devanagari bengali punjabi gujarati
	913	oriya tamil telugu kannada malayalam))
	914	(define-charset (intern (format "%s-akruti" script))
	915	(format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
	916	(capitalize (symbol-name script)))
	917	:short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
	918	:supplementary-p t
	919	:code-space [0 255]
	920	:code-offset code-offset)
	921	(setq code-offset (+ code-offset #x100))))
	922
	923	(define-charset 'indian-glyph
	924	"Glyphs for Indian characters."
	925	:short-name "Indian glyph"
	926	:iso-final-char ?4
	927	:emacs-mule-id 240
	928	:supplementary-p t
	929	:code-space [32 127 32 127]
	930	:code-offset #x180100)
	931
	932	;; Actual Glyph for 1-column width.
	933	(define-charset 'indian-1-column
	934	"Indian charset for 1-column width glyphs."
	935	:short-name "Indian 1-col"
	936	:long-name "Indian 1 Column"
	937	:iso-final-char ?6
	938	:emacs-mule-id 251
	939	:supplementary-p t
	940	:code-space [33 126 33 126]
	941	:code-offset #x184000)
	942
	943	;; Actual Glyph for 2-column width.
	944	(define-charset 'indian-2-column
	945	"Indian charset for 2-column width glyphs."
	946	:short-name "Indian 2-col"
	947	:long-name "Indian 2 Column"
	948	:iso-final-char ?5
	949	:emacs-mule-id 251
	950	:supplementary-p t
	951	:code-space [33 126 33 126]
	952	:code-offset #x184000)
	953
	954	(define-charset 'tibetan
	955	"Tibetan characters"
	956	:iso-final-char ?7
	957	:short-name "Tibetan 2-col"
	958	:long-name "Tibetan 2 column"
	959	:iso-final-char ?7
	960	:emacs-mule-id 252
	961	:unify-map "MULE-tibetan"
	962	:supplementary-p t
	963	:code-space [33 126 33 37]
	964	:code-offset #x190000)
	965
	966	(define-charset 'tibetan-1-column
	967	"Tibetan 1 column glyph"
	968	:short-name "Tibetan 1-col"
	969	:long-name "Tibetan 1 column"
	970	:iso-final-char ?8
	971	:emacs-mule-id 241
	972	:supplementary-p t
	973	:code-space [33 126 33 37]
	974	:code-offset #x190000)
	975
	976	;; Subsets of Unicode.
	977	(define-charset 'mule-unicode-2500-33ff
	978	"Unicode characters of the range U+2500..U+33FF."
	979	:short-name "Unicode subset 2"
	980	:long-name "Unicode subset (U+2500..U+33FF)"
	981	:iso-final-char ?2
	982	:emacs-mule-id 242
	983	:supplementary-p t
	984	:code-space [#x20 #x7f #x20 #x47]
	985	:code-offset #x2500)
	986
	987	(define-charset 'mule-unicode-e000-ffff
	988	"Unicode characters of the range U+E000..U+FFFF."
	989	:short-name "Unicode subset 3"
	990	:long-name "Unicode subset (U+E000+FFFF)"
	991	:iso-final-char ?3
	992	:emacs-mule-id 243
	993	:supplementary-p t
	994	:code-space [#x20 #x7F #x20 #x75]
	995	:code-offset #xE000
	996	:max-code 30015) ; U+FFFF
	997
	998	(define-charset 'mule-unicode-0100-24ff
	999	"Unicode characters of the range U+0100..U+24FF."
	1000	:short-name "Unicode subset"
	1001	:long-name "Unicode subset (U+0100..U+24FF)"
	1002	:iso-final-char ?1
	1003	:emacs-mule-id 244
	1004	:supplementary-p t
	1005	:code-space [#x20 #x7F #x20 #x7F]
	1006	:code-offset #x100)
	1007
	1008	(define-charset 'unicode-bmp
	1009	"Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
	1010	:short-name "Unicode BMP"
	1011	:code-space [0 255 0 255]
	1012	:code-offset 0)
	1013
	1014	(define-charset 'unicode-smp
	1015	"Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
	1016	:short-name "Unicode SMP "
	1017	:code-space [0 255 0 255]
	1018	:code-offset #x10000)
	1019
	1020	(define-charset 'unicode-sip
	1021	"Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
	1022	:short-name "Unicode SIP"
	1023	:code-space [0 255 0 255]
	1024	:code-offset #x20000)
	1025
	1026	(define-charset 'unicode-ssp
	1027	"Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
	1028	:short-name "Unicode SSP"
	1029	:code-space [0 255 0 255]
	1030	:code-offset #xE0000)
	1031
	1032	(define-charset 'ethiopic
	1033	"Ethiopic characters for Amharic and Tigrigna."
	1034	:short-name "Ethiopic"
	1035	:long-name "Ethiopic characters"
	1036	:iso-final-char ?3
	1037	:emacs-mule-id 245
	1038	:supplementary-p t
	1039	:unify-map "MULE-ethiopic"
	1040	:code-space [33 126 33 126]
	1041	:code-offset #x1A0000)
	1042
	1043	(define-charset 'mac-roman
	1044	"Mac Roman charset"
	1045	:short-name "Mac Roman"
	1046	:ascii-compatible-p t
	1047	:code-space [0 255]
	1048	:map "MACINTOSH")
	1049
	1050	;; Fixme: modern EBCDIC variants, e.g. IBM00924?
	1051	(define-charset 'ebcdic-us
	1052	"US version of EBCDIC"
	1053	:short-name "EBCDIC-US"
	1054	:code-space [0 255]
	1055	:mime-charset 'ebcdic-us
	1056	:map "EBCDICUS")
	1057
	1058	(define-charset 'ebcdic-uk
	1059	"UK version of EBCDIC"
	1060	:short-name "EBCDIC-UK"
	1061	:code-space [0 255]
	1062	:mime-charset 'ebcdic-uk
	1063	:map "EBCDICUK")
	1064
	1065	(define-charset 'ibm1047
	1066	;; Says groff:
	1067	"IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
	1068	:short-name "IBM1047"
	1069	:code-space [0 255]
	1070	:mime-charset 'ibm1047
	1071	:map "IBM1047")
	1072	(define-charset-alias 'cp1047 'ibm1047)
	1073
	1074	(define-charset 'hp-roman8
	1075	"Encoding used by Hewlet-Packard printer software"
	1076	:short-name "HP-ROMAN8"
	1077	:ascii-compatible-p t
	1078	:code-space [0 255]
	1079	:map "HP-ROMAN8")
	1080
	1081	;; To make a coding system with this, a pre-write-conversion should
	1082	;; account for the commented-out multi-valued code points in
	1083	;; stdenc.map.
	1084	(define-charset 'adobe-standard-encoding
	1085	"Adobe `standard encoding' used in PostScript"
	1086	:short-name "ADOBE-STANDARD-ENCODING"
	1087	:code-space [#x20 255]
	1088	:map "stdenc")
	1089
	1090	(define-charset 'symbol
	1091	"Adobe symbol encoding used in PostScript"
	1092	:short-name "ADOBE-SYMBOL"
	1093	:code-space [#x20 255]
	1094	:map "symbol")
	1095
	1096	(define-charset 'ibm850
	1097	"DOS codepage 850 (Latin-1)"
	1098	:short-name "IBM850"
	1099	:ascii-compatible-p t
	1100	:code-space [0 255]
	1101	:map "IBM850")
	1102	(define-charset-alias 'cp850 'ibm850)
	1103
	1104	(define-charset 'mik
	1105	"Bulgarian DOS codepage"
	1106	:short-name "MIK"
	1107	:ascii-compatible-p t
	1108	:code-space [0 255]
	1109	:map "MIK")
	1110
	1111	(define-charset 'ptcp154
	1112	"`Paratype' codepage (Asian Cyrillic)"
	1113	:short-name "PT154"
	1114	:ascii-compatible-p t
	1115	:code-space [0 255]
	1116	:mime-charset 'pt154
	1117	:map "PTCP154")
	1118	(define-charset-alias 'pt154 'ptcp154)
	1119	(define-charset-alias 'cp154 'ptcp154)
	1120
	1121	(define-charset 'gb18030-2-byte
	1122	"GB18030 2-byte (0x814E..0xFEFE)"
	1123	:code-space [#x40 #xFE #x81 #xFE]
	1124	:supplementary-p t
	1125	:map "GB180302")
	1126
	1127	(define-charset 'gb18030-4-byte-bmp
	1128	"GB18030 4-byte for BMP (0x81308130-0x8431A439)"
	1129	:code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
	1130	:supplementary-p t
	1131	:map "GB180304")
	1132
	1133	(define-charset 'gb18030-4-byte-smp
	1134	"GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
	1135	:code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
	1136	:min-code '(#x9030 . #x8130)
	1137	:max-code '(#xE332 . #x9A35)
	1138	:supplementary-p t
	1139	:code-offset #x10000)
	1140
	1141	(define-charset 'gb18030-4-byte-ext-1
	1142	"GB18030 4-byte (0x8431A530-0x8F39FE39)"
	1143	:code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
	1144	:min-code '(#x8431 . #xA530)
	1145	:max-code '(#x8F39 . #xFE39)
	1146	:supplementary-p t
	1147	:code-offset #x200000 ; ... #x22484B
	1148	)
	1149
	1150	(define-charset 'gb18030-4-byte-ext-2
	1151	"GB18030 4-byte (0xE3329A36-0xFE39FE39)"
	1152	:code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
	1153	:min-code '(#xE332 . #x9A36)
	1154	:max-code '(#xFE39 . #xFE39)
	1155	:supplementary-p t
	1156	:code-offset #x22484C ; ... #x279f93
	1157	)
	1158
	1159	(define-charset 'gb18030
	1160	"GB18030"
	1161	:code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
	1162	:min-code 0
	1163	:max-code '(#xFE39 . #xFE39)
	1164	:superset '(ascii gb18030-2-byte
	1165	gb18030-4-byte-bmp gb18030-4-byte-smp
	1166	gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
	1167
	1168	(define-charset 'chinese-cns11643-15
	1169	"CNS11643 Plane 15 Chinese Traditional"
	1170	:short-name "CNS11643-15"
	1171	:long-name "CNS11643-15 (Chinese traditional)"
	1172	:code-space [33 126 33 126]
	1173	:code-offset #x27A000)
	1174
	1175	(unify-charset 'chinese-gb2312)
	1176	(unify-charset 'chinese-gbk)
	1177	(unify-charset 'chinese-cns11643-1)
	1178	(unify-charset 'chinese-cns11643-2)
	1179	(unify-charset 'chinese-cns11643-3)
	1180	(unify-charset 'chinese-cns11643-4)
	1181	(unify-charset 'chinese-cns11643-5)
	1182	(unify-charset 'chinese-cns11643-6)
	1183	(unify-charset 'chinese-cns11643-7)
	1184	(unify-charset 'big5)
	1185	(unify-charset 'chinese-big5-1)
	1186	(unify-charset 'chinese-big5-2)
	1187	(unify-charset 'big5-hkscs)
	1188	(unify-charset 'korean-ksc5601)
	1189	(unify-charset 'vietnamese-viscii-lower)
	1190	(unify-charset 'vietnamese-viscii-upper)
	1191	(unify-charset 'chinese-sisheng)
	1192	(unify-charset 'ipa)
	1193	(unify-charset 'tibetan)
	1194	(unify-charset 'ethiopic)
	1195	(unify-charset 'japanese-jisx0208-1978)
	1196	(unify-charset 'japanese-jisx0208)
	1197	(unify-charset 'japanese-jisx0212)
	1198	(unify-charset 'japanese-jisx0213-1)
	1199	(unify-charset 'japanese-jisx0213-2)
	1200
	1201	\f
	1202	;; These are tables for translating characters on decoding and
	1203	;; encoding.
	1204	;; Fixme: these aren't used now -- should they be?
	1205	(setq standard-translation-table-for-decode nil)
	1206
	1207	(setq standard-translation-table-for-encode nil)
	1208	\f
	1209	;;; Make fundamental coding systems.
	1210
	1211	;; The coding system `no-conversion' and `undecided' are already
	1212	;; defined in coding.c as below:
	1213	;;
	1214	;; (define-coding-system 'no-conversion
	1215	;; "..."
	1216	;; :coding-type 'raw-text
	1217	;; ...)
	1218	;; (define-coding-system 'undecided
	1219	;; "..."
	1220	;; :coding-type 'undecided
	1221	;; ...)
	1222
	1223	(define-coding-system-alias 'binary 'no-conversion)
	1224	(define-coding-system-alias 'unix 'undecided-unix)
	1225	(define-coding-system-alias 'dos 'undecided-dos)
	1226	(define-coding-system-alias 'mac 'undecided-mac)
	1227
	1228	(define-coding-system 'raw-text
	1229	"Raw text, which means text contains random 8-bit codes.
	1230	Encoding text with this coding system produces the actual byte
	1231	sequence of the text in buffers and strings. An exception is made for
	1232	characters from the `eight-bit' character set. Each of them is encoded
	1233	into a single byte.
	1234
	1235	When you visit a file with this coding, the file is read into a
	1236	unibyte buffer as is (except for EOL format), thus each byte of a file
	1237	is treated as a character."
	1238	:coding-type 'raw-text
	1239	:for-unibyte t
	1240	:mnemonic ?t)
	1241
	1242	(define-coding-system 'no-conversion-multibyte
	1243	"Like `no-conversion' but don't read a file into a unibyte buffer."
	1244	:coding-type 'raw-text
	1245	:eol-type 'unix
	1246	:mnemonic ?=)
	1247
	1248	(define-coding-system 'iso-latin-1
	1249	"ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
	1250	:coding-type 'charset
	1251	:mnemonic ?1
	1252	:charset-list '(iso-8859-1)
	1253	:mime-charset 'iso-8859-1)
	1254
	1255	(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
	1256	(define-coding-system-alias 'latin-1 'iso-latin-1)
	1257
	1258	;; Coding systems not specific to each language environment.
	1259
	1260	(define-coding-system 'emacs-mule
	1261	"Emacs 21 internal format used in buffer and string."
	1262	:coding-type 'emacs-mule
	1263	:charset-list 'emacs-mule
	1264	:mnemonic ?M)
	1265
	1266	(define-coding-system 'utf-8
	1267	"UTF-8 (no signature (BOM))"
	1268	:coding-type 'utf-8
	1269	:mnemonic ?U
	1270	:charset-list '(unicode)
	1271	:mime-charset 'utf-8)
	1272
	1273	(define-coding-system 'utf-8-with-signature
	1274	"UTF-8 (with signature (BOM))"
	1275	:coding-type 'utf-8
	1276	:mnemonic ?U
	1277	:charset-list '(unicode)
	1278	:bom t)
	1279
	1280	(define-coding-system 'utf-8-auto
	1281	"UTF-8 (auto-detect signature (BOM))"
	1282	:coding-type 'utf-8
	1283	:mnemonic ?U
	1284	:charset-list '(unicode)
	1285	:bom '(utf-8-with-signature . utf-8))
	1286
	1287	(define-coding-system-alias 'mule-utf-8 'utf-8)
	1288
	1289	(define-coding-system 'utf-8-emacs
	1290	"Support for all Emacs characters (including non-Unicode characters)."
	1291	:coding-type 'utf-8
	1292	:mnemonic ?U
	1293	:charset-list '(emacs))
	1294
	1295	;; The encoding used internally. This encoding is meant to be able to save
	1296	;; any multibyte buffer without losing information. It can change between
	1297	;; Emacs releases, tho, so should only be used for internal files.
	1298	(define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
	1299
	1300	(define-coding-system 'utf-16le
	1301	"UTF-16LE (little endian, no signature (BOM))."
	1302	:coding-type 'utf-16
	1303	:mnemonic ?U
	1304	:charset-list '(unicode)
	1305	:endian 'little
	1306	:mime-text-unsuitable t
	1307	:mime-charset 'utf-16le)
	1308
	1309	(define-coding-system 'utf-16be
	1310	"UTF-16BE (big endian, no signature (BOM))."
	1311	:coding-type 'utf-16
	1312	:mnemonic ?U
	1313	:charset-list '(unicode)
	1314	:endian 'big
	1315	:mime-text-unsuitable t
	1316	:mime-charset 'utf-16be)
	1317
	1318	(define-coding-system 'utf-16le-with-signature
	1319	"UTF-16 (little endian, with signature (BOM))."
	1320	:coding-type 'utf-16
	1321	:mnemonic ?U
	1322	:charset-list '(unicode)
	1323	:bom t
	1324	:endian 'little
	1325	:mime-text-unsuitable t
	1326	:mime-charset 'utf-16)
	1327
	1328	(define-coding-system 'utf-16be-with-signature
	1329	"UTF-16 (big endian, with signature (BOM))."
	1330	:coding-type 'utf-16
	1331	:mnemonic ?U
	1332	:charset-list '(unicode)
	1333	:bom t
	1334	:endian 'big
	1335	:mime-text-unsuitable t
	1336	:mime-charset 'utf-16)
	1337
	1338	(define-coding-system 'utf-16
	1339	"UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
	1340	:coding-type 'utf-16
	1341	:mnemonic ?U
	1342	:charset-list '(unicode)
	1343	:bom '(utf-16le-with-signature . utf-16be-with-signature)
	1344	:endian 'big
	1345	:mime-text-unsuitable t
	1346	:mime-charset 'utf-16)
	1347
	1348	;; Backwards compatibility (old names, also used by Mule-UCS). We
	1349	;; prefer the MIME names.
	1350	(define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
	1351	(define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
	1352
	1353
	1354	(define-coding-system 'iso-2022-7bit
	1355	"ISO 2022 based 7-bit encoding using only G0."
	1356	:coding-type 'iso-2022
	1357	:mnemonic ?J
	1358	:charset-list 'iso-2022
	1359	:designation [(ascii t) nil nil nil]
	1360	:flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
	1361
	1362	(define-coding-system 'iso-2022-7bit-ss2
	1363	"ISO 2022 based 7-bit encoding using SS2 for 96-charset."
	1364	:coding-type 'iso-2022
	1365	:mnemonic ?$
	1366	:charset-list 'iso-2022
	1367	:designation [(ascii 94) nil (nil 96) nil]
	1368	:flags '(short ascii-at-eol ascii-at-cntl 7-bit
	1369	designation single-shift composition))
	1370
	1371	(define-coding-system 'iso-2022-7bit-lock
	1372	"ISO-2022 coding system using Locking-Shift for 96-charset."
	1373	:coding-type 'iso-2022
	1374	:mnemonic ?&
	1375	:charset-list 'iso-2022
	1376	:designation [(ascii 94) (nil 96) nil nil]
	1377	:flags '(ascii-at-eol ascii-at-cntl 7-bit
	1378	designation locking-shift composition))
	1379
	1380	(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
	1381
	1382	(define-coding-system 'iso-2022-7bit-lock-ss2
	1383	"Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
	1384	:coding-type 'iso-2022
	1385	:mnemonic ?i
	1386	:charset-list '(ascii
	1387	japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
	1388	korean-ksc5601
	1389	chinese-gb2312
	1390	chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
	1391	chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
	1392	chinese-cns11643-7)
	1393	:designation [(ascii 94)
	1394	(nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
	1395	(nil chinese-cns11643-2)
	1396	(nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
	1397	chinese-cns11643-6 chinese-cns11643-7)]
	1398	:flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
	1399	single-shift init-bol))
	1400
	1401	(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
	1402
	1403	(define-coding-system 'iso-2022-8bit-ss2
	1404	"ISO 2022 based 8-bit encoding using SS2 for 96-charset."
	1405	:coding-type 'iso-2022
	1406	:mnemonic ?@
	1407	:charset-list 'iso-2022
	1408	:designation [(ascii 94) nil (nil 96) nil]
	1409	:flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
	1410
	1411	(define-coding-system 'compound-text
	1412	"Compound text based generic encoding.
	1413	This coding system is an extension of X's \"Compound Text Encoding\".
	1414	It encodes many characters using the normal ISO-2022 designation sequences,
	1415	but it doesn't support extended segments of CTEXT."
	1416	:coding-type 'iso-2022
	1417	:mnemonic ?x
	1418	:charset-list 'iso-2022
	1419	:designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
	1420	:flags '(ascii-at-eol ascii-at-cntl long-form
	1421	designation locking-shift single-shift composition)
	1422	;; Fixme: this isn't a valid MIME charset and has to be
	1423	;; special-cased elsewhere -- fx
	1424	:mime-charset 'x-ctext)
	1425
	1426	(define-coding-system-alias 'x-ctext 'compound-text)
	1427	(define-coding-system-alias 'ctext 'compound-text)
	1428
	1429	;; Same as compound-text, but doesn't produce composition escape
	1430	;; sequences. Used in post-read and pre-write conversions of
	1431	;; compound-text-with-extensions, see mule.el. Note that this should
	1432	;; not have a mime-charset property, to prevent it from showing up
	1433	;; close to the beginning of coding systems ordered by priority.
	1434	(define-coding-system 'ctext-no-compositions
	1435	"Compound text based generic encoding.
	1436
	1437	Like `compound-text', but does not produce escape sequences for compositions."
	1438	:coding-type 'iso-2022
	1439	:mnemonic ?x
	1440	:charset-list 'iso-2022
	1441	:designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
	1442	:flags '(ascii-at-eol ascii-at-cntl
	1443	designation locking-shift single-shift))
	1444
	1445	(define-coding-system 'compound-text-with-extensions
	1446	"Compound text encoding with ICCCM Extended Segment extensions.
	1447
	1448	See the variables `ctext-standard-encodings' and
	1449	`ctext-non-standard-encodings-alist' for the detail about how
	1450	extended segments are handled.
	1451
	1452	This coding system should be used only for X selections. It is inappropriate
	1453	for decoding and encoding files, process I/O, etc."
	1454	:coding-type 'iso-2022
	1455	:mnemonic ?x
	1456	:charset-list 'iso-2022
	1457	:designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
	1458	:flags '(ascii-at-eol ascii-at-cntl long-form
	1459	designation locking-shift single-shift)
	1460	:post-read-conversion 'ctext-post-read-conversion
	1461	:pre-write-conversion 'ctext-pre-write-conversion)
	1462
	1463	(define-coding-system-alias
	1464	'x-ctext-with-extensions 'compound-text-with-extensions)
	1465	(define-coding-system-alias
	1466	'ctext-with-extensions 'compound-text-with-extensions)
	1467
	1468	(define-coding-system 'us-ascii
	1469	"Encode ASCII as-is and encode non-ASCII characters to `?'."
	1470	:coding-type 'charset
	1471	:mnemonic ?-
	1472	:charset-list '(ascii)
	1473	:default-char ??
	1474	:mime-charset 'us-ascii)
	1475
	1476	(define-coding-system-alias 'iso-safe 'us-ascii)
	1477
	1478	(define-coding-system 'utf-7
	1479	"UTF-7 encoding of Unicode (RFC 2152)."
	1480	:coding-type 'utf-8
	1481	:mnemonic ?U
	1482	:mime-charset 'utf-7
	1483	:charset-list '(unicode)
	1484	:pre-write-conversion 'utf-7-pre-write-conversion
	1485	:post-read-conversion 'utf-7-post-read-conversion)
	1486
	1487	(define-coding-system 'utf-7-imap
	1488	"UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
	1489	:coding-type 'utf-8
	1490	:mnemonic ?u
	1491	:charset-list '(unicode)
	1492	:pre-write-conversion 'utf-7-imap-pre-write-conversion
	1493	:post-read-conversion 'utf-7-imap-post-read-conversion)
	1494
	1495	;; Use us-ascii for terminal output if some other coding system is not
	1496	;; specified explicitly.
	1497	(set-safe-terminal-coding-system-internal 'us-ascii)
	1498
	1499	;; The other coding-systems are defined in each language specific
	1500	;; files under lisp/language.
	1501
	1502	;; Normally, set coding system to `undecided' before reading a file.
	1503	;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
	1504	;; but we regard them as containing multibyte characters.
	1505	;; Tar files are not decoded at all, but we treat them as raw bytes.
	1506
	1507	(setq file-coding-system-alist
	1508	(mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
	1509	'(("\\.elc\\'" . utf-8-emacs)
	1510	("\\.utf\$-8\$?\\'" . utf-8)
	1511	("\\.xml\\'" . xml-find-file-coding-system)
	1512	;; We use raw-text for reading loaddefs.el so that if it
	1513	;; happens to have DOS or Mac EOLs, they are converted to
	1514	;; newlines. This is required to make the special treatment
	1515	;; of the "\ newline" combination in loaddefs.el, which marks
	1516	;; the beginning of a doc string, work.
	1517	("\$\\`\\\|/\$loaddefs.el\\'" . (raw-text . raw-text-unix))
	1518	("\\.tar\\'" . (no-conversion . no-conversion))
	1519	( "\\.po[tx]?\\'\\\|\\.po\\." . po-find-file-coding-system)
	1520	("\\.\$tex\\\|ltx\\\|dtx\\\|drv\$\\'" . latexenc-find-file-coding-system)
	1521	("" . (undecided . nil)))))
	1522
	1523	\f
	1524	;;; Setting coding categories and their priorities.
	1525
	1526	;; This setting is just to read an Emacs Lisp source files which
	1527	;; contain multilingual text while dumping Emacs. More appropriate
	1528	;; values are set by the command `set-language-environment' for each
	1529	;; language environment.
	1530
	1531	(set-coding-system-priority
	1532	'iso-latin-1
	1533	'utf-8
	1534	'iso-2022-7bit
	1535	)
	1536
	1537	\f
	1538	;;; Miscellaneous settings.
	1539
	1540	;; Make all multibyte characters self-insert.
	1541	(set-char-table-range (nth 1 global-map)
	1542	(cons 128 (max-char))
	1543	'self-insert-command)
	1544
	1545	(aset latin-extra-code-table ?\221 t)
	1546	(aset latin-extra-code-table ?\222 t)
	1547	(aset latin-extra-code-table ?\223 t)
	1548	(aset latin-extra-code-table ?\224 t)
	1549	(aset latin-extra-code-table ?\225 t)
	1550	(aset latin-extra-code-table ?\226 t)
	1551
	1552	;; The old code-pages library is obsoleted by coding systems based on
	1553	;; the charsets defined in this file but might be required by user
	1554	;; code.
	1555	(provide 'code-pages)
	1556
	1557	;;; mule-conf.el ends here