Fix up comment convention on the arch-tag lines.
[bpt/emacs.git] / lisp / language / cyrillic.el
CommitLineData
60370d40 1;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
4ed46869 2
38141d20 3;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
d4877ac1 4;; Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
38141d20 6;; 2005, 2006, 2007, 2008
eaa61218
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7
KH
9;; Copyright (C) 2003
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869 12
6f27e0f9
DL
13;; Author: Kenichi Handa <handa@etl.go.jp>
14;; Keywords: multilingual, Cyrillic, i18n
4ed46869
KH
15
16;; This file is part of GNU Emacs.
17
18;; GNU Emacs is free software; you can redistribute it and/or modify
19;; it under the terms of the GNU General Public License as published by
d7142f3e 20;; the Free Software Foundation; either version 3, or (at your option)
4ed46869
KH
21;; any later version.
22
23;; GNU Emacs is distributed in the hope that it will be useful,
24;; but WITHOUT ANY WARRANTY; without even the implied warranty of
25;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26;; GNU General Public License for more details.
27
28;; You should have received a copy of the GNU General Public License
369314dc 29;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
30;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
31;; Boston, MA 02110-1301, USA.
4ed46869
KH
32
33;;; Commentary:
34
6f27e0f9
DL
35;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
36;; are converted to Unicode internally. See
37;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
38;; on Cyrillic charsets, see
39;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
40;; Alternativnyj coding systems should live in code-pages.el, but
41;; they've always been preloaded and the coding system autoload
42;; mechanism didn't get accepted, so they have to stay here and
43;; duplicate code-pages stuff.
44
45;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
46;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
47;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
48;; Alternativnyj coding systems encode both 8859-5 and Unicode.
49;; ucs-tables.el provides unification for cyrillic-iso-8bit.
50
d98276be 51;; Customizing `utf-fragment-on-decoding' allows decoding characters
6f27e0f9
DL
52;; from KOI and Alternativnyj into 8859-5 where that's possible.
53;; cyrillic-iso8859-5 characters take half as much space in the buffer
54;; as the mule-unicode-0100-24ff equivalents, though that's probably
55;; not normally a big deal.
4ed46869
KH
56
57;;; Code:
58
5f1d80c7
KH
59;; Cyrillic (general)
60
6f27e0f9 61;; ISO-8859-5 stuff
2b01336d 62
e1915ab3
KH
63(define-coding-system 'cyrillic-iso-8bit
64 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
65 :coding-type 'charset
66 :mnemonic ?5
67 :charset-list '(iso-8859-5)
30cbd69a 68 :mime-charset 'iso-8859-5)
4ed46869 69
71eabd24 70(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
efbc7e89 71
5f1d80c7 72(set-language-info-alist
e1915ab3 73 "Cyrillic-ISO" '((charset iso-8859-5)
11b77f14
KH
74 (coding-system cyrillic-iso-8bit)
75 (coding-priority cyrillic-iso-8bit)
6f27e0f9 76 (input-method . "cyrillic-yawerty") ; fixme
e1915ab3 77 (nonascii-translation . iso-8859-5)
a564ccf9
KH
78 (unibyte-display . cyrillic-iso-8bit)
79 (features cyril-util)
5f1d80c7 80 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
11b77f14
KH
81 (documentation . "Support for Cyrillic ISO-8859-5."))
82 '("Cyrillic"))
5f1d80c7 83
6f27e0f9 84;; KOI-8R stuff
2b01336d 85
e1915ab3
KH
86(define-coding-system 'cyrillic-koi8
87 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
88 :coding-type 'charset
89 ;; We used to use ?K. It is true that ?K is more strictly correct,
90 ;; but it is also used for Korean. So people who use koi8 for
91 ;; languages other than Russian will have to forgive us.
92 :mnemonic ?R
93 :charset-list '(koi8)
30cbd69a 94 :mime-charset 'koi8-r)
2b01336d 95
71eabd24
RS
96(define-coding-system-alias 'koi8-r 'cyrillic-koi8)
97(define-coding-system-alias 'koi8 'cyrillic-koi8)
6f27e0f9
DL
98(define-coding-system-alias 'cp878 'cyrillic-koi8)
99
5f1d80c7 100(set-language-info-alist
e1915ab3 101 "Cyrillic-KOI8" `((charset koi8)
11b77f14 102 (coding-system cyrillic-koi8)
6f27e0f9 103 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
a503f8b7 104 (ctext-non-standard-encodings "koi8-r")
e1915ab3 105 (nonascii-translation . koi8)
76509389 106 (input-method . "russian-typewriter")
a564ccf9
KH
107 (features cyril-util)
108 (unibyte-display . cyrillic-koi8)
5f1d80c7 109 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
70c58a1f 110 (documentation . "Support for Cyrillic KOI8-R."))
11b77f14 111 '("Cyrillic"))
5f1d80c7 112
76509389
DL
113(set-language-info-alist
114 "Russian" `((charset cyrillic-iso8859-5)
115 (nonascii-translation
116 . ,(get 'cyrillic-koi8-r-nonascii-translation-table
117 'translation-table))
118 (coding-system cyrillic-koi8)
119 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
120 (input-method . "russian-computer")
121 (features cyril-util)
122 (unibyte-display . cyrillic-koi8)
123 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
124 (documentation . "\
125Support for Russian using koi8-r and the russian-computer input method.")
126 (tutorial . "TUTORIAL.ru"))
127 '("Cyrillic"))
128
6b61353c
KH
129(define-coding-system 'koi8-u
130 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
131 :coding-type 'charset
132 :mnemonic ?U
133 :charset-list '(koi8-u)
134 :mime-charset 'koi8-u)
135
136(set-language-info-alist
137 "Ukrainian" `((charset koi8-u)
138 (coding-system koi8-u)
139 (coding-priority koi8-u)
140 (nonascii-translation . koi8-u)
141 (input-method . "ukrainian-computer")
142 (documentation
143 . "Support for Ukrainian with KOI8-U character set."))
144 '("Cyrillic"))
145
6f27e0f9 146;;; ALTERNATIVNYJ stuff
2b01336d 147
e1915ab3
KH
148(define-coding-system 'cyrillic-alternativnyj
149 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
150 :coding-type 'charset
151 :mnemonic ?A
d7a9db1c 152 :charset-list '(alternativnyj))
2b01336d 153
71eabd24 154(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
efbc7e89 155
4ed46869 156(set-language-info-alist
e1915ab3 157 "Cyrillic-ALT" `((charset alternativnyj)
11b77f14
KH
158 (coding-system cyrillic-alternativnyj)
159 (coding-priority cyrillic-alternativnyj)
e1915ab3 160 (nonascii-translation . alternativnyj)
6f27e0f9 161 (input-method . "russian-typewriter")
a564ccf9
KH
162 (features cyril-util)
163 (unibyte-display . cyrillic-alternativnyj)
5f1d80c7 164 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
11b77f14
KH
165 (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
166 '("Cyrillic"))
4ed46869 167
d7a9db1c
DL
168(define-coding-system 'cp866
169 "CP866 encoding for Cyrillic."
170 :coding-type 'charset
171 :mnemonic ?*
172 :charset-list '(ibm866)
173 :mime-charset 'cp866)
174
a07aa680
DL
175(define-coding-system 'koi8-u
176 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
177 :coding-type 'charset
178 :mnemonic ?U
179 :charset-list '(koi8-u)
180 :mime-charset 'koi8-u)
181
182(define-coding-system 'koi8-t
057eca09 183 "KOI8-T 8-bit encoding for Cyrillic"
a07aa680
DL
184 :coding-type 'charset
185 :mnemonic ?*
186 :charset-list '(koi8-t)
187 :mime-charset 'koi8-t)
188
189(define-coding-system 'windows-1251
190 "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
191 :coding-type 'charset
192 :mnemonic ?b
193 :charset-list '(windows-1251)
194 :mime-charset 'windows-1251)
195(define-coding-system-alias 'cp1251 'windows-1251)
196
197(define-coding-system 'cp1125
057eca09 198 "cp1125 8-bit encoding for Cyrillic"
a07aa680
DL
199 :coding-type 'charset
200 :mnemonic ?*
201 :charset-list '(cp1125))
202(define-coding-system-alias 'ruscii 'cp1125)
203;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
204(define-coding-system-alias 'cp866u 'cp1125)
205
515a4f82
DL
206(define-coding-system 'cp855
207 "DOS codepage 855 (Russian)"
208 :coding-type 'charset
209 :mnemonic ?D
210 :charset-list '(cp855)
211 :mime-charset 'cp855)
212(define-coding-system-alias 'ibm855 'cp855)
213
de223a74
DL
214(define-coding-system 'mik
215 "Bulgarian DOS codepage"
216 :coding-type 'charset
217 :mnemonic ?D
218 :charset-list '(mik))
219
220(define-coding-system 'pt154
221 "Parattype Asian Cyrillic codepage"
222 :coding-type 'charset
223 :mnemonic ?D
224 :charset-list '(pt154))
225
05b99d65
DL
226;; (set-language-info-alist
227;; "Windows-1251" `((coding-system windows-1251)
228;; (coding-priority windows-1251)
05b99d65
DL
229;; (input-method . "russian-typewriter") ; fixme?
230;; (features code-pages)
231;; (documentation . "Support for windows-1251 character set."))
232;; '("Cyrillic"))
6f27e0f9
DL
233
234(set-language-info-alist
0a3fa714
DL
235 "Tajik" `((coding-system koi8-t)
236 (coding-priority koi8-t)
05b99d65 237 (nonascii-translation . cyrillic-koi8-t)
d709c451 238 (charset koi8-t)
76509389
DL
239 (input-method . "russian-typewriter") ; fixme?
240 (features code-pages)
241 (documentation . "Support for Tajik using KOI8-T."))
6f27e0f9
DL
242 '("Cyrillic"))
243
6b61353c
KH
244(let ((elt `("microsoft-cp1251" windows-1251 1
245 ,(get 'encode-windows-1251 'translation-table)))
246 (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
247 (if slot
248 (setcdr slot (cdr elt))
249 (push elt ctext-non-standard-encodings-alist)))
250
6f27e0f9
DL
251(set-language-info-alist
252 "Bulgarian" `((coding-system windows-1251)
05b99d65
DL
253 (coding-priority windows-1251)
254 (nonascii-translation . windows-1251)
d709c451 255 (charset windows-1251)
6b61353c 256 (ctext-non-standard-encodings "microsoft-cp1251")
1d5f8bfc 257 (input-method . "bulgarian-bds")
6f27e0f9 258 (documentation
05b99d65 259 . "Support for Bulgrian with windows-1251 character set."))
6f27e0f9
DL
260 '("Cyrillic"))
261
262(set-language-info-alist
263 "Belarusian" `((coding-system windows-1251)
264 (coding-priority windows-1251)
05b99d65 265 (nonascii-translation . windows-1251)
d709c451 266 (charset windows-1251)
6b61353c 267 (ctext-non-standard-encodings "microsoft-cp1251")
6f27e0f9 268 (input-method . "belarusian")
6f27e0f9
DL
269 (documentation
270 . "Support for Belarusian with windows-1251 character set.
271\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
272 '("Cyrillic"))
273
0a3fa714
DL
274(set-language-info-alist
275 "Ukrainian" `((coding-system koi8-u)
276 (coding-priority koi8-u)
277 (input-method . "ukrainian-computer")
278 (documentation
279 . "Support for Ukrainian with koi8-u character set."))
280 '("Cyrillic"))
281
41da80b1
DL
282(provide 'cyrillic)
283
cbee283d 284;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
4ed46869 285;;; cyrillic.el ends here