Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
95df8112 | 3 | ;; Copyright (C) 1997-1998, 2001-2011 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
eaa61218 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
8f924df7 KH |
8 | ;; Copyright (C) 2003 |
9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
10 | ;; Registration Number H13PRO009 | |
4ed46869 | 11 | |
6f27e0f9 DL |
12 | ;; Author: Kenichi Handa <handa@etl.go.jp> |
13 | ;; Keywords: multilingual, Cyrillic, i18n | |
4ed46869 KH |
14 | |
15 | ;; This file is part of GNU Emacs. | |
16 | ||
4936186e | 17 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 18 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
19 | ;; the Free Software Foundation, either version 3 of the License, or |
20 | ;; (at your option) any later version. | |
4ed46869 KH |
21 | |
22 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
23 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | ;; GNU General Public License for more details. | |
26 | ||
27 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 28 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 KH |
29 | |
30 | ;;; Commentary: | |
31 | ||
6f27e0f9 DL |
32 | ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ |
33 | ;; are converted to Unicode internally. See | |
34 | ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info | |
35 | ;; on Cyrillic charsets, see | |
36 | ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and | |
37 | ;; Alternativnyj coding systems should live in code-pages.el, but | |
38 | ;; they've always been preloaded and the coding system autoload | |
39 | ;; mechanism didn't get accepted, so they have to stay here and | |
40 | ;; duplicate code-pages stuff. | |
41 | ||
42 | ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, | |
43 | ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, | |
44 | ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and | |
45 | ;; Alternativnyj coding systems encode both 8859-5 and Unicode. | |
46 | ;; ucs-tables.el provides unification for cyrillic-iso-8bit. | |
47 | ||
d98276be | 48 | ;; Customizing `utf-fragment-on-decoding' allows decoding characters |
6f27e0f9 DL |
49 | ;; from KOI and Alternativnyj into 8859-5 where that's possible. |
50 | ;; cyrillic-iso8859-5 characters take half as much space in the buffer | |
51 | ;; as the mule-unicode-0100-24ff equivalents, though that's probably | |
52 | ;; not normally a big deal. | |
4ed46869 KH |
53 | |
54 | ;;; Code: | |
55 | ||
5f1d80c7 KH |
56 | ;; Cyrillic (general) |
57 | ||
6f27e0f9 | 58 | ;; ISO-8859-5 stuff |
2b01336d | 59 | |
e1915ab3 KH |
60 | (define-coding-system 'cyrillic-iso-8bit |
61 | "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." | |
62 | :coding-type 'charset | |
63 | :mnemonic ?5 | |
64 | :charset-list '(iso-8859-5) | |
30cbd69a | 65 | :mime-charset 'iso-8859-5) |
4ed46869 | 66 | |
71eabd24 | 67 | (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) |
efbc7e89 | 68 | |
5f1d80c7 | 69 | (set-language-info-alist |
e1915ab3 | 70 | "Cyrillic-ISO" '((charset iso-8859-5) |
11b77f14 KH |
71 | (coding-system cyrillic-iso-8bit) |
72 | (coding-priority cyrillic-iso-8bit) | |
6f27e0f9 | 73 | (input-method . "cyrillic-yawerty") ; fixme |
e1915ab3 | 74 | (nonascii-translation . iso-8859-5) |
a564ccf9 KH |
75 | (unibyte-display . cyrillic-iso-8bit) |
76 | (features cyril-util) | |
5f1d80c7 | 77 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
78 | (documentation . "Support for Cyrillic ISO-8859-5.")) |
79 | '("Cyrillic")) | |
5f1d80c7 | 80 | |
6f27e0f9 | 81 | ;; KOI-8R stuff |
2b01336d | 82 | |
e1915ab3 KH |
83 | (define-coding-system 'cyrillic-koi8 |
84 | "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." | |
85 | :coding-type 'charset | |
86 | ;; We used to use ?K. It is true that ?K is more strictly correct, | |
87 | ;; but it is also used for Korean. So people who use koi8 for | |
88 | ;; languages other than Russian will have to forgive us. | |
89 | :mnemonic ?R | |
90 | :charset-list '(koi8) | |
30cbd69a | 91 | :mime-charset 'koi8-r) |
2b01336d | 92 | |
71eabd24 RS |
93 | (define-coding-system-alias 'koi8-r 'cyrillic-koi8) |
94 | (define-coding-system-alias 'koi8 'cyrillic-koi8) | |
6f27e0f9 DL |
95 | (define-coding-system-alias 'cp878 'cyrillic-koi8) |
96 | ||
5f1d80c7 | 97 | (set-language-info-alist |
e1915ab3 | 98 | "Cyrillic-KOI8" `((charset koi8) |
11b77f14 | 99 | (coding-system cyrillic-koi8) |
6f27e0f9 | 100 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) |
a503f8b7 | 101 | (ctext-non-standard-encodings "koi8-r") |
e1915ab3 | 102 | (nonascii-translation . koi8) |
76509389 | 103 | (input-method . "russian-typewriter") |
a564ccf9 KH |
104 | (features cyril-util) |
105 | (unibyte-display . cyrillic-koi8) | |
5f1d80c7 | 106 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
70c58a1f | 107 | (documentation . "Support for Cyrillic KOI8-R.")) |
11b77f14 | 108 | '("Cyrillic")) |
5f1d80c7 | 109 | |
76509389 DL |
110 | (set-language-info-alist |
111 | "Russian" `((charset cyrillic-iso8859-5) | |
112 | (nonascii-translation | |
113 | . ,(get 'cyrillic-koi8-r-nonascii-translation-table | |
114 | 'translation-table)) | |
115 | (coding-system cyrillic-koi8) | |
116 | (coding-priority cyrillic-koi8 cyrillic-iso-8bit) | |
117 | (input-method . "russian-computer") | |
118 | (features cyril-util) | |
119 | (unibyte-display . cyrillic-koi8) | |
120 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") | |
121 | (documentation . "\ | |
122 | Support for Russian using koi8-r and the russian-computer input method.") | |
123 | (tutorial . "TUTORIAL.ru")) | |
124 | '("Cyrillic")) | |
125 | ||
6b61353c KH |
126 | (define-coding-system 'koi8-u |
127 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
128 | :coding-type 'charset | |
129 | :mnemonic ?U | |
130 | :charset-list '(koi8-u) | |
131 | :mime-charset 'koi8-u) | |
132 | ||
133 | (set-language-info-alist | |
134 | "Ukrainian" `((charset koi8-u) | |
135 | (coding-system koi8-u) | |
136 | (coding-priority koi8-u) | |
137 | (nonascii-translation . koi8-u) | |
138 | (input-method . "ukrainian-computer") | |
139 | (documentation | |
140 | . "Support for Ukrainian with KOI8-U character set.")) | |
141 | '("Cyrillic")) | |
142 | ||
6f27e0f9 | 143 | ;;; ALTERNATIVNYJ stuff |
2b01336d | 144 | |
e1915ab3 KH |
145 | (define-coding-system 'cyrillic-alternativnyj |
146 | "ALTERNATIVNYJ 8-bit encoding for Cyrillic." | |
147 | :coding-type 'charset | |
148 | :mnemonic ?A | |
d7a9db1c | 149 | :charset-list '(alternativnyj)) |
2b01336d | 150 | |
71eabd24 | 151 | (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) |
efbc7e89 | 152 | |
4ed46869 | 153 | (set-language-info-alist |
e1915ab3 | 154 | "Cyrillic-ALT" `((charset alternativnyj) |
11b77f14 KH |
155 | (coding-system cyrillic-alternativnyj) |
156 | (coding-priority cyrillic-alternativnyj) | |
e1915ab3 | 157 | (nonascii-translation . alternativnyj) |
6f27e0f9 | 158 | (input-method . "russian-typewriter") |
a564ccf9 KH |
159 | (features cyril-util) |
160 | (unibyte-display . cyrillic-alternativnyj) | |
5f1d80c7 | 161 | (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!") |
11b77f14 KH |
162 | (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) |
163 | '("Cyrillic")) | |
4ed46869 | 164 | |
d7a9db1c DL |
165 | (define-coding-system 'cp866 |
166 | "CP866 encoding for Cyrillic." | |
167 | :coding-type 'charset | |
168 | :mnemonic ?* | |
169 | :charset-list '(ibm866) | |
170 | :mime-charset 'cp866) | |
171 | ||
a07aa680 DL |
172 | (define-coding-system 'koi8-u |
173 | "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" | |
174 | :coding-type 'charset | |
175 | :mnemonic ?U | |
176 | :charset-list '(koi8-u) | |
177 | :mime-charset 'koi8-u) | |
178 | ||
179 | (define-coding-system 'koi8-t | |
057eca09 | 180 | "KOI8-T 8-bit encoding for Cyrillic" |
a07aa680 DL |
181 | :coding-type 'charset |
182 | :mnemonic ?* | |
183 | :charset-list '(koi8-t) | |
184 | :mime-charset 'koi8-t) | |
185 | ||
186 | (define-coding-system 'windows-1251 | |
187 | "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)" | |
188 | :coding-type 'charset | |
189 | :mnemonic ?b | |
190 | :charset-list '(windows-1251) | |
191 | :mime-charset 'windows-1251) | |
192 | (define-coding-system-alias 'cp1251 'windows-1251) | |
193 | ||
194 | (define-coding-system 'cp1125 | |
057eca09 | 195 | "cp1125 8-bit encoding for Cyrillic" |
a07aa680 DL |
196 | :coding-type 'charset |
197 | :mnemonic ?* | |
198 | :charset-list '(cp1125)) | |
199 | (define-coding-system-alias 'ruscii 'cp1125) | |
200 | ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> | |
201 | (define-coding-system-alias 'cp866u 'cp1125) | |
202 | ||
515a4f82 DL |
203 | (define-coding-system 'cp855 |
204 | "DOS codepage 855 (Russian)" | |
205 | :coding-type 'charset | |
206 | :mnemonic ?D | |
207 | :charset-list '(cp855) | |
208 | :mime-charset 'cp855) | |
209 | (define-coding-system-alias 'ibm855 'cp855) | |
210 | ||
de223a74 DL |
211 | (define-coding-system 'mik |
212 | "Bulgarian DOS codepage" | |
213 | :coding-type 'charset | |
214 | :mnemonic ?D | |
215 | :charset-list '(mik)) | |
216 | ||
217 | (define-coding-system 'pt154 | |
218 | "Parattype Asian Cyrillic codepage" | |
219 | :coding-type 'charset | |
220 | :mnemonic ?D | |
221 | :charset-list '(pt154)) | |
222 | ||
05b99d65 DL |
223 | ;; (set-language-info-alist |
224 | ;; "Windows-1251" `((coding-system windows-1251) | |
225 | ;; (coding-priority windows-1251) | |
05b99d65 DL |
226 | ;; (input-method . "russian-typewriter") ; fixme? |
227 | ;; (features code-pages) | |
228 | ;; (documentation . "Support for windows-1251 character set.")) | |
229 | ;; '("Cyrillic")) | |
6f27e0f9 DL |
230 | |
231 | (set-language-info-alist | |
0a3fa714 DL |
232 | "Tajik" `((coding-system koi8-t) |
233 | (coding-priority koi8-t) | |
05b99d65 | 234 | (nonascii-translation . cyrillic-koi8-t) |
d709c451 | 235 | (charset koi8-t) |
76509389 DL |
236 | (input-method . "russian-typewriter") ; fixme? |
237 | (features code-pages) | |
238 | (documentation . "Support for Tajik using KOI8-T.")) | |
6f27e0f9 DL |
239 | '("Cyrillic")) |
240 | ||
241 | (set-language-info-alist | |
242 | "Bulgarian" `((coding-system windows-1251) | |
05b99d65 DL |
243 | (coding-priority windows-1251) |
244 | (nonascii-translation . windows-1251) | |
d709c451 | 245 | (charset windows-1251) |
6b61353c | 246 | (ctext-non-standard-encodings "microsoft-cp1251") |
1d5f8bfc | 247 | (input-method . "bulgarian-bds") |
6f27e0f9 | 248 | (documentation |
05b99d65 | 249 | . "Support for Bulgrian with windows-1251 character set.")) |
6f27e0f9 DL |
250 | '("Cyrillic")) |
251 | ||
252 | (set-language-info-alist | |
253 | "Belarusian" `((coding-system windows-1251) | |
254 | (coding-priority windows-1251) | |
05b99d65 | 255 | (nonascii-translation . windows-1251) |
d709c451 | 256 | (charset windows-1251) |
6b61353c | 257 | (ctext-non-standard-encodings "microsoft-cp1251") |
6f27e0f9 | 258 | (input-method . "belarusian") |
6f27e0f9 DL |
259 | (documentation |
260 | . "Support for Belarusian with windows-1251 character set. | |
261 | \(The name Belarusian replaced Byelorussian in the early 1990s.)")) | |
262 | '("Cyrillic")) | |
263 | ||
0a3fa714 DL |
264 | (set-language-info-alist |
265 | "Ukrainian" `((coding-system koi8-u) | |
266 | (coding-priority koi8-u) | |
267 | (input-method . "ukrainian-computer") | |
268 | (documentation | |
269 | . "Support for Ukrainian with koi8-u character set.")) | |
270 | '("Cyrillic")) | |
271 | ||
41da80b1 DL |
272 | (provide 'cyrillic) |
273 | ||
4ed46869 | 274 | ;;; cyrillic.el ends here |