Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
38141d20 | 3 | ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
7976eda0 | 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
38141d20 | 6 | ;; 2005, 2006, 2007, 2008 |
eaa61218 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
8f924df7 KH |
9 | ;; Copyright (C) 2003 |
10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
11 | ;; Registration Number H13PRO009 | |
4ed46869 | 12 | |
6f27e0f9 DL |
13 | ;; Author: Kenichi Handa <handa@etl.go.jp> |
14 | ;; Keywords: multilingual, Cyrillic, i18n | |
4ed46869 KH |
15 | |
16 | ;; This file is part of GNU Emacs. | |
17 | ||
18 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
19 | ;; it under the terms of the GNU General Public License as published by | |
d7142f3e | 20 | ;; the Free Software Foundation; either version 3, or (at your option) |
4ed46869 KH |
21 | ;; any later version. |
22 | ||
23 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
24 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
26 | ;; GNU General Public License for more details. | |
27 | ||
28 | ;; You should have received a copy of the GNU General Public License | |
369314dc | 29 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
3a35cf56 LK |
30 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
31 | ;; Boston, MA 02110-1301, USA. | |
4ed46869 KH |
32 | |
33 | ;;; Commentary: | |
34 | ||
6f27e0f9 DL |
35 | ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ |
36 | ;; are converted to Unicode internally. See | |
37 | ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info | |
38 | ;; on Cyrillic charsets, see | |
39 | ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and | |
40 | ;; Alternativnyj coding systems should live in code-pages.el, but | |
41 | ;; they've always been preloaded and the coding system autoload | |
42 | ;; mechanism didn't get accepted, so they have to stay here and | |
43 | ;; duplicate code-pages stuff. | |
44 | ||
45 | ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, | |
46 | ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, | |
47 | ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and | |
48 | ;; Alternativnyj coding systems encode both 8859-5 and Unicode. | |
49 | ;; ucs-tables.el provides unification for cyrillic-iso-8bit. | |
50 | ||
d98276be | 51 | ;; Customizing `utf-fragment-on-decoding' allows decoding characters |
6f27e0f9 DL |
52 | ;; from KOI and Alternativnyj into 8859-5 where that's possible. |
53 | ;; cyrillic-iso8859-5 characters take half as much space in the buffer | |
54 | ;; as the mule-unicode-0100-24ff equivalents, though that's probably | |
55 | ;; not normally a big deal. | |
4ed46869 KH |
56 | |
57 | ;;; Code: | |
58 | ||
5f1d80c7 KH |
59 | ;; Cyrillic (general) |
60 | ||
6f27e0f9 | 61 | ;; ISO-8859-5 stuff |
2b01336d | 62 | |
e1915ab3 KH |
63 | (define-coding-system 'cyrillic-iso-8bit |
64 | "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." | |
65 | :coding-type 'charset | |
66 | :mnemonic ?5 | |
67 | :charset-list '(iso-8859-5) | |
30cbd69a | 68 | :mime-charset 'iso-8859-5) |
4ed46869 | 69 | |
71eabd24 | 70 | (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) |
efbc7e89 | 71 | |
5f1d80c7 | 72 | (set-language-info-alist |
e1915ab3 | 73 | "Cyrillic-ISO" '((charset iso-8859-5) |
11b77f14 KH |
74 | (coding-system cyrillic-iso-8bit) |
75 | (coding-priority cyrillic-iso-8bit) | |
6f27e0f9 | 76 | (input-method . "cyrillic-yawerty") ; fixme |
e1915ab3 | 77 | (nonascii-translation . iso-8859-5) |
a564ccf9 KH |
78 | (unibyte-display . cyrillic-iso-8bit) |
79 | (features cyril-util) | |
5f1d80c7 | 80 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
81 | (documentation . "Support for Cyrillic ISO-8859-5.")) |
82 | '("Cyrillic")) | |
5f1d80c7 | 83 | |
6f27e0f9 | 84 | ;; KOI-8R stuff |
2b01336d | 85 | |
e1915ab3 KH |
86 | (define-coding-system 'cyrillic-koi8 |
87 | "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." | |
88 | :coding-type 'charset | |
89 | ;; We used to use ?K. It is true that ?K is more strictly correct, | |
90 | ;; but it is also used for Korean. So people who use koi8 for | |
91 | ;; languages other than Russian will have to forgive us. | |
92 | :mnemonic ?R | |
93 | :charset-list '(koi8) | |
30cbd69a | 94 | :mime-charset 'koi8-r) |
2b01336d | 95 | |
71eabd24 RS |
96 | (define-coding-system-alias 'koi8-r 'cyrillic-koi8) |
97 | (define-coding-system-alias 'koi8 'cyrillic-koi8) | |
6f27e0f9 DL |
98 | (define-coding-system-alias 'cp878 'cyrillic-koi8) |
99 | ||
5f1d80c7 | 100 | (set-language-info-alist |
e1915ab3 | 101 | "Cyrillic-KOI8" `((charset koi8) |
11b77f14 | 102 | (coding-system cyrillic-koi8) |
6f27e0f9 | 103 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) |
a503f8b7 | 104 | (ctext-non-standard-encodings "koi8-r") |
e1915ab3 | 105 | (nonascii-translation . koi8) |
76509389 | 106 | (input-method . "russian-typewriter") |
a564ccf9 KH |
107 | (features cyril-util) |
108 | (unibyte-display . cyrillic-koi8) | |
5f1d80c7 | 109 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
70c58a1f | 110 | (documentation . "Support for Cyrillic KOI8-R.")) |
11b77f14 | 111 | '("Cyrillic")) |
5f1d80c7 | 112 | |
76509389 DL |
113 | (set-language-info-alist |
114 | "Russian" `((charset cyrillic-iso8859-5) | |
115 | (nonascii-translation | |
116 | . ,(get 'cyrillic-koi8-r-nonascii-translation-table | |
117 | 'translation-table)) | |
118 | (coding-system cyrillic-koi8) | |
119 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) | |
120 | (input-method . "russian-computer") | |
121 | (features cyril-util) | |
122 | (unibyte-display . cyrillic-koi8) | |
123 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") | |
124 | (documentation . "\ | |
125 | Support for Russian using koi8-r and the russian-computer input method.") | |
126 | (tutorial . "TUTORIAL.ru")) | |
127 | '("Cyrillic")) | |
128 | ||
6b61353c KH |
129 | (define-coding-system 'koi8-u |
130 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
131 | :coding-type 'charset | |
132 | :mnemonic ?U | |
133 | :charset-list '(koi8-u) | |
134 | :mime-charset 'koi8-u) | |
135 | ||
136 | (set-language-info-alist | |
137 | "Ukrainian" `((charset koi8-u) | |
138 | (coding-system koi8-u) | |
139 | (coding-priority koi8-u) | |
140 | (nonascii-translation . koi8-u) | |
141 | (input-method . "ukrainian-computer") | |
142 | (documentation | |
143 | . "Support for Ukrainian with KOI8-U character set.")) | |
144 | '("Cyrillic")) | |
145 | ||
6f27e0f9 | 146 | ;;; ALTERNATIVNYJ stuff |
2b01336d | 147 | |
e1915ab3 KH |
148 | (define-coding-system 'cyrillic-alternativnyj |
149 | "ALTERNATIVNYJ 8-bit encoding for Cyrillic." | |
150 | :coding-type 'charset | |
151 | :mnemonic ?A | |
d7a9db1c | 152 | :charset-list '(alternativnyj)) |
2b01336d | 153 | |
71eabd24 | 154 | (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) |
efbc7e89 | 155 | |
4ed46869 | 156 | (set-language-info-alist |
e1915ab3 | 157 | "Cyrillic-ALT" `((charset alternativnyj) |
11b77f14 KH |
158 | (coding-system cyrillic-alternativnyj) |
159 | (coding-priority cyrillic-alternativnyj) | |
e1915ab3 | 160 | (nonascii-translation . alternativnyj) |
6f27e0f9 | 161 | (input-method . "russian-typewriter") |
a564ccf9 KH |
162 | (features cyril-util) |
163 | (unibyte-display . cyrillic-alternativnyj) | |
5f1d80c7 | 164 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
165 | (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) |
166 | '("Cyrillic")) | |
4ed46869 | 167 | |
d7a9db1c DL |
168 | (define-coding-system 'cp866 |
169 | "CP866 encoding for Cyrillic." | |
170 | :coding-type 'charset | |
171 | :mnemonic ?* | |
172 | :charset-list '(ibm866) | |
173 | :mime-charset 'cp866) | |
174 | ||
a07aa680 DL |
175 | (define-coding-system 'koi8-u |
176 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
177 | :coding-type 'charset | |
178 | :mnemonic ?U | |
179 | :charset-list '(koi8-u) | |
180 | :mime-charset 'koi8-u) | |
181 | ||
182 | (define-coding-system 'koi8-t | |
057eca09 | 183 | "KOI8-T 8-bit encoding for Cyrillic" |
a07aa680 DL |
184 | :coding-type 'charset |
185 | :mnemonic ?* | |
186 | :charset-list '(koi8-t) | |
187 | :mime-charset 'koi8-t) | |
188 | ||
189 | (define-coding-system 'windows-1251 | |
190 | "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)" | |
191 | :coding-type 'charset | |
192 | :mnemonic ?b | |
193 | :charset-list '(windows-1251) | |
194 | :mime-charset 'windows-1251) | |
195 | (define-coding-system-alias 'cp1251 'windows-1251) | |
196 | ||
197 | (define-coding-system 'cp1125 | |
057eca09 | 198 | "cp1125 8-bit encoding for Cyrillic" |
a07aa680 DL |
199 | :coding-type 'charset |
200 | :mnemonic ?* | |
201 | :charset-list '(cp1125)) | |
202 | (define-coding-system-alias 'ruscii 'cp1125) | |
203 | ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> | |
204 | (define-coding-system-alias 'cp866u 'cp1125) | |
205 | ||
515a4f82 DL |
206 | (define-coding-system 'cp855 |
207 | "DOS codepage 855 (Russian)" | |
208 | :coding-type 'charset | |
209 | :mnemonic ?D | |
210 | :charset-list '(cp855) | |
211 | :mime-charset 'cp855) | |
212 | (define-coding-system-alias 'ibm855 'cp855) | |
213 | ||
de223a74 DL |
214 | (define-coding-system 'mik |
215 | "Bulgarian DOS codepage" | |
216 | :coding-type 'charset | |
217 | :mnemonic ?D | |
218 | :charset-list '(mik)) | |
219 | ||
220 | (define-coding-system 'pt154 | |
221 | "Parattype Asian Cyrillic codepage" | |
222 | :coding-type 'charset | |
223 | :mnemonic ?D | |
224 | :charset-list '(pt154)) | |
225 | ||
05b99d65 DL |
226 | ;; (set-language-info-alist |
227 | ;; "Windows-1251" `((coding-system windows-1251) | |
228 | ;; (coding-priority windows-1251) | |
05b99d65 DL |
229 | ;; (input-method . "russian-typewriter") ; fixme? |
230 | ;; (features code-pages) | |
231 | ;; (documentation . "Support for windows-1251 character set.")) | |
232 | ;; '("Cyrillic")) | |
6f27e0f9 DL |
233 | |
234 | (set-language-info-alist | |
0a3fa714 DL |
235 | "Tajik" `((coding-system koi8-t) |
236 | (coding-priority koi8-t) | |
05b99d65 | 237 | (nonascii-translation . cyrillic-koi8-t) |
d709c451 | 238 | (charset koi8-t) |
76509389 DL |
239 | (input-method . "russian-typewriter") ; fixme? |
240 | (features code-pages) | |
241 | (documentation . "Support for Tajik using KOI8-T.")) | |
6f27e0f9 DL |
242 | '("Cyrillic")) |
243 | ||
6b61353c KH |
244 | (let ((elt `("microsoft-cp1251" windows-1251 1 |
245 | ,(get 'encode-windows-1251 'translation-table))) | |
246 | (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist))) | |
247 | (if slot | |
248 | (setcdr slot (cdr elt)) | |
249 | (push elt ctext-non-standard-encodings-alist))) | |
250 | ||
6f27e0f9 DL |
251 | (set-language-info-alist |
252 | "Bulgarian" `((coding-system windows-1251) | |
05b99d65 DL |
253 | (coding-priority windows-1251) |
254 | (nonascii-translation . windows-1251) | |
d709c451 | 255 | (charset windows-1251) |
6b61353c | 256 | (ctext-non-standard-encodings "microsoft-cp1251") |
1d5f8bfc | 257 | (input-method . "bulgarian-bds") |
6f27e0f9 | 258 | (documentation |
05b99d65 | 259 | . "Support for Bulgrian with windows-1251 character set.")) |
6f27e0f9 DL |
260 | '("Cyrillic")) |
261 | ||
262 | (set-language-info-alist | |
263 | "Belarusian" `((coding-system windows-1251) | |
264 | (coding-priority windows-1251) | |
05b99d65 | 265 | (nonascii-translation . windows-1251) |
d709c451 | 266 | (charset windows-1251) |
6b61353c | 267 | (ctext-non-standard-encodings "microsoft-cp1251") |
6f27e0f9 | 268 | (input-method . "belarusian") |
6f27e0f9 DL |
269 | (documentation |
270 | . "Support for Belarusian with windows-1251 character set. | |
271 | \(The name Belarusian replaced Byelorussian in the early 1990s.)")) | |
272 | '("Cyrillic")) | |
273 | ||
0a3fa714 DL |
274 | (set-language-info-alist |
275 | "Ukrainian" `((coding-system koi8-u) | |
276 | (coding-priority koi8-u) | |
277 | (input-method . "ukrainian-computer") | |
278 | (documentation | |
279 | . "Support for Ukrainian with koi8-u character set.")) | |
280 | '("Cyrillic")) | |
281 | ||
41da80b1 DL |
282 | (provide 'cyrillic) |
283 | ||
cbee283d | 284 | ;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3 |
4ed46869 | 285 | ;;; cyrillic.el ends here |