Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
73b0cd50 | 3 | ;; Copyright (C) 1997-1998, 2001-2011 |
d4877ac1 | 4 | ;; Free Software Foundation, Inc. |
7976eda0 | 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 6 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
eaa61218 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
8f924df7 KH |
9 | ;; Copyright (C) 2003 |
10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
11 | ;; Registration Number H13PRO009 | |
4ed46869 | 12 | |
6f27e0f9 DL |
13 | ;; Author: Kenichi Handa <handa@etl.go.jp> |
14 | ;; Keywords: multilingual, Cyrillic, i18n | |
4ed46869 KH |
15 | |
16 | ;; This file is part of GNU Emacs. | |
17 | ||
4936186e | 18 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 19 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
20 | ;; the Free Software Foundation, either version 3 of the License, or |
21 | ;; (at your option) any later version. | |
4ed46869 KH |
22 | |
23 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
24 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
25 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
26 | ;; GNU General Public License for more details. | |
27 | ||
28 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 29 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
30 | |
31 | ;;; Commentary: | |
32 | ||
6f27e0f9 DL |
33 | ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ |
34 | ;; are converted to Unicode internally. See | |
35 | ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info | |
36 | ;; on Cyrillic charsets, see | |
37 | ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and | |
38 | ;; Alternativnyj coding systems should live in code-pages.el, but | |
39 | ;; they've always been preloaded and the coding system autoload | |
40 | ;; mechanism didn't get accepted, so they have to stay here and | |
41 | ;; duplicate code-pages stuff. | |
42 | ||
43 | ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, | |
44 | ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, | |
45 | ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and | |
46 | ;; Alternativnyj coding systems encode both 8859-5 and Unicode. | |
47 | ;; ucs-tables.el provides unification for cyrillic-iso-8bit. | |
48 | ||
d98276be | 49 | ;; Customizing `utf-fragment-on-decoding' allows decoding characters |
6f27e0f9 DL |
50 | ;; from KOI and Alternativnyj into 8859-5 where that's possible. |
51 | ;; cyrillic-iso8859-5 characters take half as much space in the buffer | |
52 | ;; as the mule-unicode-0100-24ff equivalents, though that's probably | |
53 | ;; not normally a big deal. | |
4ed46869 KH |
54 | |
55 | ;;; Code: | |
56 | ||
5f1d80c7 KH |
57 | ;; Cyrillic (general) |
58 | ||
6f27e0f9 | 59 | ;; ISO-8859-5 stuff |
2b01336d | 60 | |
e1915ab3 KH |
61 | (define-coding-system 'cyrillic-iso-8bit |
62 | "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." | |
63 | :coding-type 'charset | |
64 | :mnemonic ?5 | |
65 | :charset-list '(iso-8859-5) | |
30cbd69a | 66 | :mime-charset 'iso-8859-5) |
4ed46869 | 67 | |
71eabd24 | 68 | (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) |
efbc7e89 | 69 | |
5f1d80c7 | 70 | (set-language-info-alist |
e1915ab3 | 71 | "Cyrillic-ISO" '((charset iso-8859-5) |
11b77f14 KH |
72 | (coding-system cyrillic-iso-8bit) |
73 | (coding-priority cyrillic-iso-8bit) | |
6f27e0f9 | 74 | (input-method . "cyrillic-yawerty") ; fixme |
e1915ab3 | 75 | (nonascii-translation . iso-8859-5) |
a564ccf9 KH |
76 | (unibyte-display . cyrillic-iso-8bit) |
77 | (features cyril-util) | |
5f1d80c7 | 78 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
79 | (documentation . "Support for Cyrillic ISO-8859-5.")) |
80 | '("Cyrillic")) | |
5f1d80c7 | 81 | |
6f27e0f9 | 82 | ;; KOI-8R stuff |
2b01336d | 83 | |
e1915ab3 KH |
84 | (define-coding-system 'cyrillic-koi8 |
85 | "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." | |
86 | :coding-type 'charset | |
87 | ;; We used to use ?K. It is true that ?K is more strictly correct, | |
88 | ;; but it is also used for Korean. So people who use koi8 for | |
89 | ;; languages other than Russian will have to forgive us. | |
90 | :mnemonic ?R | |
91 | :charset-list '(koi8) | |
30cbd69a | 92 | :mime-charset 'koi8-r) |
2b01336d | 93 | |
71eabd24 RS |
94 | (define-coding-system-alias 'koi8-r 'cyrillic-koi8) |
95 | (define-coding-system-alias 'koi8 'cyrillic-koi8) | |
6f27e0f9 DL |
96 | (define-coding-system-alias 'cp878 'cyrillic-koi8) |
97 | ||
5f1d80c7 | 98 | (set-language-info-alist |
e1915ab3 | 99 | "Cyrillic-KOI8" `((charset koi8) |
11b77f14 | 100 | (coding-system cyrillic-koi8) |
6f27e0f9 | 101 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) |
a503f8b7 | 102 | (ctext-non-standard-encodings "koi8-r") |
e1915ab3 | 103 | (nonascii-translation . koi8) |
76509389 | 104 | (input-method . "russian-typewriter") |
a564ccf9 KH |
105 | (features cyril-util) |
106 | (unibyte-display . cyrillic-koi8) | |
5f1d80c7 | 107 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
70c58a1f | 108 | (documentation . "Support for Cyrillic KOI8-R.")) |
11b77f14 | 109 | '("Cyrillic")) |
5f1d80c7 | 110 | |
76509389 DL |
111 | (set-language-info-alist |
112 | "Russian" `((charset cyrillic-iso8859-5) | |
113 | (nonascii-translation | |
114 | . ,(get 'cyrillic-koi8-r-nonascii-translation-table | |
115 | 'translation-table)) | |
116 | (coding-system cyrillic-koi8) | |
117 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) | |
118 | (input-method . "russian-computer") | |
119 | (features cyril-util) | |
120 | (unibyte-display . cyrillic-koi8) | |
121 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") | |
122 | (documentation . "\ | |
123 | Support for Russian using koi8-r and the russian-computer input method.") | |
124 | (tutorial . "TUTORIAL.ru")) | |
125 | '("Cyrillic")) | |
126 | ||
6b61353c KH |
127 | (define-coding-system 'koi8-u |
128 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
129 | :coding-type 'charset | |
130 | :mnemonic ?U | |
131 | :charset-list '(koi8-u) | |
132 | :mime-charset 'koi8-u) | |
133 | ||
134 | (set-language-info-alist | |
135 | "Ukrainian" `((charset koi8-u) | |
136 | (coding-system koi8-u) | |
137 | (coding-priority koi8-u) | |
138 | (nonascii-translation . koi8-u) | |
139 | (input-method . "ukrainian-computer") | |
140 | (documentation | |
141 | . "Support for Ukrainian with KOI8-U character set.")) | |
142 | '("Cyrillic")) | |
143 | ||
6f27e0f9 | 144 | ;;; ALTERNATIVNYJ stuff |
2b01336d | 145 | |
e1915ab3 KH |
146 | (define-coding-system 'cyrillic-alternativnyj |
147 | "ALTERNATIVNYJ 8-bit encoding for Cyrillic." | |
148 | :coding-type 'charset | |
149 | :mnemonic ?A | |
d7a9db1c | 150 | :charset-list '(alternativnyj)) |
2b01336d | 151 | |
71eabd24 | 152 | (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) |
efbc7e89 | 153 | |
4ed46869 | 154 | (set-language-info-alist |
e1915ab3 | 155 | "Cyrillic-ALT" `((charset alternativnyj) |
11b77f14 KH |
156 | (coding-system cyrillic-alternativnyj) |
157 | (coding-priority cyrillic-alternativnyj) | |
e1915ab3 | 158 | (nonascii-translation . alternativnyj) |
6f27e0f9 | 159 | (input-method . "russian-typewriter") |
a564ccf9 KH |
160 | (features cyril-util) |
161 | (unibyte-display . cyrillic-alternativnyj) | |
5f1d80c7 | 162 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
163 | (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) |
164 | '("Cyrillic")) | |
4ed46869 | 165 | |
d7a9db1c DL |
166 | (define-coding-system 'cp866 |
167 | "CP866 encoding for Cyrillic." | |
168 | :coding-type 'charset | |
169 | :mnemonic ?* | |
170 | :charset-list '(ibm866) | |
171 | :mime-charset 'cp866) | |
172 | ||
a07aa680 DL |
173 | (define-coding-system 'koi8-u |
174 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
175 | :coding-type 'charset | |
176 | :mnemonic ?U | |
177 | :charset-list '(koi8-u) | |
178 | :mime-charset 'koi8-u) | |
179 | ||
180 | (define-coding-system 'koi8-t | |
057eca09 | 181 | "KOI8-T 8-bit encoding for Cyrillic" |
a07aa680 DL |
182 | :coding-type 'charset |
183 | :mnemonic ?* | |
184 | :charset-list '(koi8-t) | |
185 | :mime-charset 'koi8-t) | |
186 | ||
187 | (define-coding-system 'windows-1251 | |
188 | "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)" | |
189 | :coding-type 'charset | |
190 | :mnemonic ?b | |
191 | :charset-list '(windows-1251) | |
192 | :mime-charset 'windows-1251) | |
193 | (define-coding-system-alias 'cp1251 'windows-1251) | |
194 | ||
195 | (define-coding-system 'cp1125 | |
057eca09 | 196 | "cp1125 8-bit encoding for Cyrillic" |
a07aa680 DL |
197 | :coding-type 'charset |
198 | :mnemonic ?* | |
199 | :charset-list '(cp1125)) | |
200 | (define-coding-system-alias 'ruscii 'cp1125) | |
201 | ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> | |
202 | (define-coding-system-alias 'cp866u 'cp1125) | |
203 | ||
515a4f82 DL |
204 | (define-coding-system 'cp855 |
205 | "DOS codepage 855 (Russian)" | |
206 | :coding-type 'charset | |
207 | :mnemonic ?D | |
208 | :charset-list '(cp855) | |
209 | :mime-charset 'cp855) | |
210 | (define-coding-system-alias 'ibm855 'cp855) | |
211 | ||
de223a74 DL |
212 | (define-coding-system 'mik |
213 | "Bulgarian DOS codepage" | |
214 | :coding-type 'charset | |
215 | :mnemonic ?D | |
216 | :charset-list '(mik)) | |
217 | ||
218 | (define-coding-system 'pt154 | |
219 | "Parattype Asian Cyrillic codepage" | |
220 | :coding-type 'charset | |
221 | :mnemonic ?D | |
222 | :charset-list '(pt154)) | |
223 | ||
05b99d65 DL |
224 | ;; (set-language-info-alist |
225 | ;; "Windows-1251" `((coding-system windows-1251) | |
226 | ;; (coding-priority windows-1251) | |
05b99d65 DL |
227 | ;; (input-method . "russian-typewriter") ; fixme? |
228 | ;; (features code-pages) | |
229 | ;; (documentation . "Support for windows-1251 character set.")) | |
230 | ;; '("Cyrillic")) | |
6f27e0f9 DL |
231 | |
232 | (set-language-info-alist | |
0a3fa714 DL |
233 | "Tajik" `((coding-system koi8-t) |
234 | (coding-priority koi8-t) | |
05b99d65 | 235 | (nonascii-translation . cyrillic-koi8-t) |
d709c451 | 236 | (charset koi8-t) |
76509389 DL |
237 | (input-method . "russian-typewriter") ; fixme? |
238 | (features code-pages) | |
239 | (documentation . "Support for Tajik using KOI8-T.")) | |
6f27e0f9 DL |
240 | '("Cyrillic")) |
241 | ||
242 | (set-language-info-alist | |
243 | "Bulgarian" `((coding-system windows-1251) | |
05b99d65 DL |
244 | (coding-priority windows-1251) |
245 | (nonascii-translation . windows-1251) | |
d709c451 | 246 | (charset windows-1251) |
6b61353c | 247 | (ctext-non-standard-encodings "microsoft-cp1251") |
1d5f8bfc | 248 | (input-method . "bulgarian-bds") |
6f27e0f9 | 249 | (documentation |
05b99d65 | 250 | . "Support for Bulgrian with windows-1251 character set.")) |
6f27e0f9 DL |
251 | '("Cyrillic")) |
252 | ||
253 | (set-language-info-alist | |
254 | "Belarusian" `((coding-system windows-1251) | |
255 | (coding-priority windows-1251) | |
05b99d65 | 256 | (nonascii-translation . windows-1251) |
d709c451 | 257 | (charset windows-1251) |
6b61353c | 258 | (ctext-non-standard-encodings "microsoft-cp1251") |
6f27e0f9 | 259 | (input-method . "belarusian") |
6f27e0f9 DL |
260 | (documentation |
261 | . "Support for Belarusian with windows-1251 character set. | |
262 | \(The name Belarusian replaced Byelorussian in the early 1990s.)")) | |
263 | '("Cyrillic")) | |
264 | ||
0a3fa714 DL |
265 | (set-language-info-alist |
266 | "Ukrainian" `((coding-system koi8-u) | |
267 | (coding-priority koi8-u) | |
268 | (input-method . "ukrainian-computer") | |
269 | (documentation | |
270 | . "Support for Ukrainian with koi8-u character set.")) | |
271 | '("Cyrillic")) | |
272 | ||
41da80b1 DL |
273 | (provide 'cyrillic) |
274 | ||
4ed46869 | 275 | ;;; cyrillic.el ends here |