New directory
[bpt/emacs.git] / lisp / language / cyrillic.el
CommitLineData
60370d40 1;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
b79e7b7b 4;; Licensed to the Free Software Foundation.
76509389 5;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
4ed46869 6
6f27e0f9
DL
7;; Author: Kenichi Handa <handa@etl.go.jp>
8;; Keywords: multilingual, Cyrillic, i18n
4ed46869
KH
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
369314dc
KH
23;; along with GNU Emacs; see the file COPYING. If not, write to the
24;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25;; Boston, MA 02111-1307, USA.
4ed46869
KH
26
27;;; Commentary:
28
6f27e0f9
DL
29;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
30;; are converted to Unicode internally. See
31;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
32;; on Cyrillic charsets, see
33;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
34;; Alternativnyj coding systems should live in code-pages.el, but
35;; they've always been preloaded and the coding system autoload
36;; mechanism didn't get accepted, so they have to stay here and
37;; duplicate code-pages stuff.
38
39;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
40;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
41;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
42;; Alternativnyj coding systems encode both 8859-5 and Unicode.
43;; ucs-tables.el provides unification for cyrillic-iso-8bit.
44
d98276be 45;; Customizing `utf-fragment-on-decoding' allows decoding characters
6f27e0f9
DL
46;; from KOI and Alternativnyj into 8859-5 where that's possible.
47;; cyrillic-iso8859-5 characters take half as much space in the buffer
48;; as the mule-unicode-0100-24ff equivalents, though that's probably
49;; not normally a big deal.
4ed46869
KH
50
51;;; Code:
52
5f1d80c7
KH
53;; Cyrillic (general)
54
6f27e0f9 55;; ISO-8859-5 stuff
2b01336d 56
4ed46869 57(make-coding-system
efbc7e89 58 'cyrillic-iso-8bit 2 ?5
5ef35063 59 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
11b77f14 60 '(ascii cyrillic-iso8859-5 nil nil
6de22aea 61 nil nil nil nil nil nil nil nil nil nil nil t)
11b77f14
KH
62 '((safe-charsets ascii cyrillic-iso8859-5)
63 (mime-charset . iso-8859-5)))
4ed46869 64
71eabd24 65(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
efbc7e89 66
5f1d80c7 67(set-language-info-alist
a564ccf9 68 "Cyrillic-ISO" '((charset cyrillic-iso8859-5)
11b77f14
KH
69 (coding-system cyrillic-iso-8bit)
70 (coding-priority cyrillic-iso-8bit)
6f27e0f9 71 (input-method . "cyrillic-yawerty") ; fixme
a564ccf9
KH
72 (nonascii-translation . cyrillic-iso8859-5)
73 (unibyte-display . cyrillic-iso-8bit)
74 (features cyril-util)
5f1d80c7 75 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
11b77f14
KH
76 (documentation . "Support for Cyrillic ISO-8859-5."))
77 '("Cyrillic"))
5f1d80c7 78
6f27e0f9 79;; KOI-8R stuff
2b01336d 80
6f27e0f9
DL
81;; The mule-unicode portion of this is from
82;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT,
83;; which references RFC 1489.
70c58a1f
KH
84(defvar cyrillic-koi8-r-decode-table
85 [
86 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
87 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
88 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
89 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
90 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
91 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
92 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
93 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
6f27e0f9
DL
94 ;; 8859-5 plus Unicode
95 ?\e$,2 \e(B ?\e$,2 "\e(B ?\e$,2 ,\e(B ?\e$,2 0\e(B ?\e$,2 4\e(B ?\e$,2 8\e(B ?\e$,2 <\e(B ?\e$,2 D\e(B ?\e$,2 L\e(B ?\e$,2 T\e(B ?\e$,2 \\e(B ?\e$,2!@\e(B ?\e$,2!D\e(B ?\e$,2!H\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B
96 ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,1{ \e(B ?\e$,2!`\e(B ?\e$,1s"\e(B ?\e$,1x:\e(B ?\e$,1xh\e(B ?\e$,1y$\e(B ?\e$,1y%\e(B ?\e,L \e(B ?\e$,1{!\e(B ?\e,A0\e(B ?\e,A2\e(B ?\e,A7\e(B ?\e,Aw\e(B
97 ?\e$,2 p\e(B ?\e$,2 q\e(B ?\e$,2 r\e(B ?\e,Lq\e(B ?\e$,2 s\e(B ?\e$,2 t\e(B ?\e$,2 u\e(B ?\e$,2 v\e(B ?\e$,2 w\e(B ?\e$,2 x\e(B ?\e$,2 y\e(B ?\e$,2 z\e(B ?\e$,2 {\e(B ?\e$,2 |\e(B ?\e$,2 }\e(B ?\e$,2 ~\e(B
98 ?\e$,2 \7f\e(B ?\e$,2! \e(B ?\e$,2!!\e(B ?\e,L!\e(B ?\e$,2!"\e(B ?\e$,2!#\e(B ?\e$,2!$\e(B ?\e$,2!%\e(B ?\e$,2!&\e(B ?\e$,2!'\e(B ?\e$,2!(\e(B ?\e$,2!)\e(B ?\e$,2!*\e(B ?\e$,2!+\e(B ?\e$,2!,\e(B ?\e,A)\e(B
99 ?\e,Ln\e(B ?\e,LP\e(B ?\e,LQ\e(B ?\e,Lf\e(B ?\e,LT\e(B ?\e,LU\e(B ?\e,Ld\e(B ?\e,LS\e(B ?\e,Le\e(B ?\e,LX\e(B ?\e,LY\e(B ?\e,LZ\e(B ?\e,L[\e(B ?\e,L\\e(B ?\e,L]\e(B ?\e,L^\e(B
100 ?\e,L_\e(B ?\e,Lo\e(B ?\e,L`\e(B ?\e,La\e(B ?\e,Lb\e(B ?\e,Lc\e(B ?\e,LV\e(B ?\e,LR\e(B ?\e,Ll\e(B ?\e,Lk\e(B ?\e,LW\e(B ?\e,Lh\e(B ?\e,Lm\e(B ?\e,Li\e(B ?\e,Lg\e(B ?\e,Lj\e(B
101 ?\e,LN\e(B ?\e,L0\e(B ?\e,L1\e(B ?\e,LF\e(B ?\e,L4\e(B ?\e,L5\e(B ?\e,LD\e(B ?\e,L3\e(B ?\e,LE\e(B ?\e,L8\e(B ?\e,L9\e(B ?\e,L:\e(B ?\e,L;\e(B ?\e,L<\e(B ?\e,L=\e(B ?\e,L>\e(B
102 ?\e,L?\e(B ?\e,LO\e(B ?\e,L@\e(B ?\e,LA\e(B ?\e,LB\e(B ?\e,LC\e(B ?\e,L6\e(B ?\e,L2\e(B ?\e,LL\e(B ?\e,LK\e(B ?\e,L7\e(B ?\e,LH\e(B ?\e,LM\e(B ?\e,LI\e(B ?\e,LG\e(B ?\e,LJ\e(B
103 ;; All Unicode:
104;; ?\e$,2 \e(B ?\e$,2 "\e(B ?\e$,2 ,\e(B ?\e$,2 0\e(B ?\e$,2 4\e(B ?\e$,2 8\e(B ?\e$,2 <\e(B ?\e$,2 D\e(B ?\e$,2 L\e(B ?\e$,2 T\e(B ?\e$,2 \\e(B ?\e$,2!@\e(B ?\e$,2!D\e(B ?\e$,2!H\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B
105;; ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,1{ \e(B ?\e$,2!`\e(B ?\e$,1s"\e(B ?\e$,1x:\e(B ?\e$,1xh\e(B ?\e$,1y$\e(B ?\e$,1y%\e(B ?\e,A \e(B ?\e$,1{!\e(B ?\e,A0\e(B ?\e,A2\e(B ?\e,A7\e(B ?\e,Aw\e(B
106;; ?\e$,2 p\e(B ?\e$,2 q\e(B ?\e$,2 r\e(B ?\e$,1(q\e(B ?\e$,2 s\e(B ?\e$,2 t\e(B ?\e$,2 u\e(B ?\e$,2 v\e(B ?\e$,2 w\e(B ?\e$,2 x\e(B ?\e$,2 y\e(B ?\e$,2 z\e(B ?\e$,2 {\e(B ?\e$,2 |\e(B ?\e$,2 }\e(B ?\e$,2 ~\e(B
107;; ?\e$,2 \7f\e(B ?\e$,2! \e(B ?\e$,2!!\e(B ?\e$,1(!\e(B ?\e$,2!"\e(B ?\e$,2!#\e(B ?\e$,2!$\e(B ?\e$,2!%\e(B ?\e$,2!&\e(B ?\e$,2!'\e(B ?\e$,2!(\e(B ?\e$,2!)\e(B ?\e$,2!*\e(B ?\e$,2!+\e(B ?\e$,2!,\e(B ?\e,A)\e(B
108;; ?\e$,1(n\e(B ?\e$,1(P\e(B ?\e$,1(Q\e(B ?\e$,1(f\e(B ?\e$,1(T\e(B ?\e$,1(U\e(B ?\e$,1(d\e(B ?\e$,1(S\e(B ?\e$,1(e\e(B ?\e$,1(X\e(B ?\e$,1(Y\e(B ?\e$,1(Z\e(B ?\e$,1([\e(B ?\e$,1(\\e(B ?\e$,1(]\e(B ?\e$,1(^\e(B
109;; ?\e$,1(_\e(B ?\e$,1(o\e(B ?\e$,1(`\e(B ?\e$,1(a\e(B ?\e$,1(b\e(B ?\e$,1(c\e(B ?\e$,1(V\e(B ?\e$,1(R\e(B ?\e$,1(l\e(B ?\e$,1(k\e(B ?\e$,1(W\e(B ?\e$,1(h\e(B ?\e$,1(m\e(B ?\e$,1(i\e(B ?\e$,1(g\e(B ?\e$,1(j\e(B
110;; ?\e$,1(N\e(B ?\e$,1(0\e(B ?\e$,1(1\e(B ?\e$,1(F\e(B ?\e$,1(4\e(B ?\e$,1(5\e(B ?\e$,1(D\e(B ?\e$,1(3\e(B ?\e$,1(E\e(B ?\e$,1(8\e(B ?\e$,1(9\e(B ?\e$,1(:\e(B ?\e$,1(;\e(B ?\e$,1(<\e(B ?\e$,1(=\e(B ?\e$,1(>\e(B
111;; ?\e$,1(?\e(B ?\e$,1(O\e(B ?\e$,1(@\e(B ?\e$,1(A\e(B ?\e$,1(B\e(B ?\e$,1(C\e(B ?\e$,1(6\e(B ?\e$,1(2\e(B ?\e$,1(L\e(B ?\e$,1(K\e(B ?\e$,1(7\e(B ?\e$,1(H\e(B ?\e$,1(M\e(B ?\e$,1(I\e(B ?\e$,1(G\e(B ?\e$,1(J\e(B
112 ]
70c58a1f
KH
113 "Cyrillic KOI8-R decoding table.")
114
1f487ef8
KH
115(let ((table (make-translation-table-from-vector
116 cyrillic-koi8-r-decode-table)))
117 (define-translation-table 'cyrillic-koi8-r-nonascii-translation-table table)
118 (define-translation-table 'cyrillic-koi8-r-encode-table
119 (char-table-extra-slot table 0)))
70c58a1f 120
6f27e0f9
DL
121;; No point in keeping it around. (It can't be let-bound, since it's
122;; needed for macro expansion.)
123(makunbound 'cyrillic-koi8-r-decode-table)
124
2b01336d 125(define-ccl-program ccl-decode-koi8
6f27e0f9 126 `(4
1f487ef8
KH
127 ((loop
128 (r0 = 0)
129 (read r1)
130 (if (r1 < 128)
131 (write-repeat r1)
132 ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1)
d98276be 133 (translate-character ucs-translation-table-for-decode r0 r1)
1f487ef8
KH
134 (write-multibyte-character r0 r1)
135 (repeat))))))
6f27e0f9 136 "CCL program to decode KOI8-R.")
2b01336d
KH
137
138(define-ccl-program ccl-encode-koi8
139 `(1
1f487ef8
KH
140 ((loop
141 (read-multibyte-character r0 r1)
6f27e0f9 142 (translate-character cyrillic-koi8-r-encode-table r0 r1)
58566dff
KH
143 (if (r0 != ,(charset-id 'ascii))
144 (if (r0 != ,(charset-id 'eight-bit-graphic))
145 (if (r0 != ,(charset-id 'eight-bit-control))
146 (r1 = ??))))
1f487ef8 147 (write-repeat r1))))
6f27e0f9
DL
148 "CCL program to encode KOI8-R.")
149
150(defun cyrillic-unify-encoding (table)
151 "Set up equivalent characters in the encoding TABLE.
d98276be 152This works whether or not the table is Unicode-based or
6f27e0f9
DL
1538859-5-based. (Only appropriate for Cyrillic.)"
154 (let ((table (get table 'translation-table)))
155 (dotimes (i 96)
156 (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32)))
157 (u ; equivalent Unicode char
158 (cond ((eq c ?\e,L \e(B) ?\e,A \e(B)
159 ((eq c ?\e,L-\e(B) ?\e,A-\e(B)
160 ((eq c ?\e,L}\e(B) ?\e,A'\e(B)
161 (t (decode-char 'ucs (+ #x400 i)))))
162 (ec (aref table c)) ; encoding of 8859-5
163 (uc (aref table u))) ; encoding of Unicode
164 (unless (memq c '(?\e,L \e(B ?\e,L-\e(B ?\e,L}\e(B)) ; 8859-5 exceptions
165 (unless uc
166 (aset table u ec))
167 (unless ec
168 (aset table c uc)))))))
169
170(cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table)
171
2b01336d 172(make-coding-system
4b9121fc 173 'cyrillic-koi8 4
49716ecd
RS
174 ;; We used to use ?K. It is true that ?K is more strictly correct,
175 ;; but it is also used for Korean.
176 ;; So people who use koi8 for languages other than Russian
177 ;; will have to forgive us.
6f27e0f9 178 ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)."
7fbf766f 179 '(ccl-decode-koi8 . ccl-encode-koi8)
6f27e0f9 180 `((safe-chars . cyrillic-koi8-r-encode-table)
70c58a1f 181 (mime-charset . koi8-r)
d98276be
KH
182 (valid-codes (0 . 255))
183 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
2b01336d 184
71eabd24
RS
185(define-coding-system-alias 'koi8-r 'cyrillic-koi8)
186(define-coding-system-alias 'koi8 'cyrillic-koi8)
6f27e0f9
DL
187(define-coding-system-alias 'cp878 'cyrillic-koi8)
188
189;; Allow displaying some of KOI & al with an 8859-5-encoded font. We
190;; won't bother about the exceptions when encoding the font, since
191;; NBSP will fall through below and work anyhow, and we'll have
192;; avoided setting the fontset for the other two to 8859-5 -- they're
193;; not in KOI and Alternativnyj anyhow.
194(define-ccl-program ccl-encode-8859-5-font
195 `(0
196 ((if (r0 == ,(charset-id 'cyrillic-iso8859-5))
197 (r1 += 128)
198 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
199 (r1 = (r2 + 128))))))
200 "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.")
201
202(add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font))
2b01336d 203
6f27e0f9 204;; The table is set up later to encode both Unicode and 8859-5.
2b01336d 205(define-ccl-program ccl-encode-koi8-font
70c58a1f 206 `(0
6f27e0f9 207 (translate-character cyrillic-koi8-r-encode-table r0 r1))
2b01336d
KH
208 "CCL program to encode Cyrillic chars to KOI font.")
209
6f27e0f9 210(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font))
70c58a1f 211
5f1d80c7 212(set-language-info-alist
a564ccf9
KH
213 "Cyrillic-KOI8" `((charset cyrillic-iso8859-5)
214 (nonascii-translation
1f487ef8
KH
215 . ,(get 'cyrillic-koi8-r-nonascii-translation-table
216 'translation-table))
11b77f14 217 (coding-system cyrillic-koi8)
6f27e0f9 218 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
76509389 219 (input-method . "russian-typewriter")
a564ccf9
KH
220 (features cyril-util)
221 (unibyte-display . cyrillic-koi8)
5f1d80c7 222 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
70c58a1f 223 (documentation . "Support for Cyrillic KOI8-R."))
11b77f14 224 '("Cyrillic"))
5f1d80c7 225
76509389
DL
226(set-language-info-alist
227 "Russian" `((charset cyrillic-iso8859-5)
228 (nonascii-translation
229 . ,(get 'cyrillic-koi8-r-nonascii-translation-table
230 'translation-table))
231 (coding-system cyrillic-koi8)
232 (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
233 (input-method . "russian-computer")
234 (features cyril-util)
235 (unibyte-display . cyrillic-koi8)
236 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
237 (documentation . "\
238Support for Russian using koi8-r and the russian-computer input method.")
239 (tutorial . "TUTORIAL.ru"))
240 '("Cyrillic"))
241
6f27e0f9
DL
242
243(defvar cyrillic-koi8-u-decode-table
244 [
245 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
246 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
247 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
248 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
249 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
250 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
251 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
252 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
253 ;; All Unicode:
254;; ?\e$,2 \e(B ?\e$,2 "\e(B ?\e$,2 ,\e(B ?\e$,2 0\e(B ?\e$,2 4\e(B ?\e$,2 8\e(B ?\e$,2 <\e(B ?\e$,2 D\e(B ?\e$,2 L\e(B ?\e$,2 T\e(B ?\e$,2 \\e(B ?\e$,2!@\e(B ?\e$,2!D\e(B ?\e$,2!H\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B
255;; ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,1{ \e(B ?\e$,2!`\e(B ?\e$,1x9\e(B ?\e$,1x:\e(B ?\e$,1xh\e(B ?\e$,1y$\e(B ?\e$,1y%\e(B ?\e,L \e(B ?\e$,1{!\e(B ?\e,A0\e(B ?\e,A2\e(B ?\e,A7\e(B ?\e,Aw\e(B
256;; ?\e$,2 p\e(B ?\e$,2 q\e(B ?\e$,2 r\e(B ?\e$,1(q\e(B ?\e$,1(t\e(B ?\e$,2 t\e(B ?\e$,1(v\e(B ?\e$,1(w\e(B ?\e$,2 w\e(B ?\e$,2 x\e(B ?\e$,2 y\e(B ?\e$,2 z\e(B ?\e$,2 {\e(B ?\e$,1)Q\e(B ?\e$,2 }\e(B ?\e$,2 ~\e(B
257;; ?\e$,2 \7f\e(B ?\e$,2! \e(B ?\e$,2!!\e(B ?\e$,1(!\e(B ?\e$,1($\e(B ?\e$,2!#\e(B ?\e$,1(&\e(B ?\e$,1('\e(B ?\e$,2!&\e(B ?\e$,2!'\e(B ?\e$,2!(\e(B ?\e$,2!)\e(B ?\e$,2!*\e(B ?\e$,1)P\e(B ?\e$,2!,\e(B ?\e,A)\e(B
258;; ?\e$,1(n\e(B ?\e$,1(P\e(B ?\e$,1(Q\e(B ?\e$,1(f\e(B ?\e$,1(T\e(B ?\e$,1(U\e(B ?\e$,1(d\e(B ?\e$,1(S\e(B ?\e$,1(e\e(B ?\e$,1(X\e(B ?\e$,1(Y\e(B ?\e$,1(Z\e(B ?\e$,1([\e(B ?\e$,1(\\e(B ?\e$,1(]\e(B ?\e$,1(^\e(B
259;; ?\e$,1(_\e(B ?\e$,1(o\e(B ?\e$,1(`\e(B ?\e$,1(a\e(B ?\e$,1(b\e(B ?\e$,1(c\e(B ?\e$,1(V\e(B ?\e$,1(R\e(B ?\e$,1(l\e(B ?\e$,1(k\e(B ?\e$,1(W\e(B ?\e$,1(h\e(B ?\e$,1(m\e(B ?\e$,1(i\e(B ?\e$,1(g\e(B ?\e$,1(j\e(B
260;; ?\e$,1(N\e(B ?\e$,1(0\e(B ?\e$,1(1\e(B ?\e$,1(F\e(B ?\e$,1(4\e(B ?\e$,1(5\e(B ?\e$,1(D\e(B ?\e$,1(3\e(B ?\e$,1(E\e(B ?\e$,1(8\e(B ?\e$,1(9\e(B ?\e$,1(:\e(B ?\e$,1(;\e(B ?\e$,1(<\e(B ?\e$,1(=\e(B ?\e$,1(>\e(B
261;; ?\e$,1(?\e(B ?\e$,1(O\e(B ?\e$,1(@\e(B ?\e$,1(A\e(B ?\e$,1(B\e(B ?\e$,1(C\e(B ?\e$,1(6\e(B ?\e$,1(2\e(B ?\e$,1(L\e(B ?\e$,1(K\e(B ?\e$,1(7\e(B ?\e$,1(H\e(B ?\e$,1(M\e(B ?\e$,1(I\e(B ?\e$,1(G\e(B ?\e$,1(J\e(B
262;; 8859-5 plus Unicode:
263 ?\e$,2 \e(B ?\e$,2 "\e(B ?\e$,2 ,\e(B ?\e$,2 0\e(B ?\e$,2 4\e(B ?\e$,2 8\e(B ?\e$,2 <\e(B ?\e$,2 D\e(B ?\e$,2 L\e(B ?\e$,2 T\e(B ?\e$,2 \\e(B ?\e$,2!@\e(B ?\e$,2!D\e(B ?\e$,2!H\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B
264 ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,1{ \e(B ?\e$,2!`\e(B ?\e$,1x9\e(B ?\e$,1x:\e(B ?\e$,1xh\e(B ?\e$,1y$\e(B ?\e$,1y%\e(B ?\e,L \e(B ?\e$,1{!\e(B ?\e,A0\e(B ?\e,A2\e(B ?\e,A7\e(B ?\e,Aw\e(B
265 ?\e$,2 p\e(B ?\e$,2 q\e(B ?\e$,2 r\e(B ?\e,Lq\e(B ?\e,Lt\e(B ?\e$,2 t\e(B ?\e,Lv\e(B ?\e,Lw\e(B ?\e$,2 w\e(B ?\e$,2 x\e(B ?\e$,2 y\e(B ?\e$,2 z\e(B ?\e$,2 {\e(B ?\e$,1)Q\e(B ?\e$,2 }\e(B ?\e$,2 ~\e(B
266 ?\e$,2 \7f\e(B ?\e$,2! \e(B ?\e$,2!!\e(B ?\e,L!\e(B ?\e,L$\e(B ?\e$,2!#\e(B ?\e,L&\e(B ?\e,L'\e(B ?\e$,2!&\e(B ?\e$,2!'\e(B ?\e$,2!(\e(B ?\e$,2!)\e(B ?\e$,2!*\e(B ?\e$,1)P\e(B ?\e$,2!,\e(B ?\e,A)\e(B
267 ?\e,Ln\e(B ?\e,LP\e(B ?\e,LQ\e(B ?\e,Lf\e(B ?\e,LT\e(B ?\e,LU\e(B ?\e,Ld\e(B ?\e,LS\e(B ?\e,Le\e(B ?\e,LX\e(B ?\e,LY\e(B ?\e,LZ\e(B ?\e,L[\e(B ?\e,L\\e(B ?\e,L]\e(B ?\e,L^\e(B
268 ?\e,L_\e(B ?\e,Lo\e(B ?\e,L`\e(B ?\e,La\e(B ?\e,Lb\e(B ?\e,Lc\e(B ?\e,LV\e(B ?\e,LR\e(B ?\e,Ll\e(B ?\e,Lk\e(B ?\e,LW\e(B ?\e,Lh\e(B ?\e,Lm\e(B ?\e,Li\e(B ?\e,Lg\e(B ?\e,Lj\e(B
269 ?\e,LN\e(B ?\e,L0\e(B ?\e,L1\e(B ?\e,LF\e(B ?\e,L4\e(B ?\e,L5\e(B ?\e,LD\e(B ?\e,L3\e(B ?\e,LE\e(B ?\e,L8\e(B ?\e,L9\e(B ?\e,L:\e(B ?\e,L;\e(B ?\e,L<\e(B ?\e,L=\e(B ?\e,L>\e(B
270 ?\e,L?\e(B ?\e,LO\e(B ?\e,L@\e(B ?\e,LA\e(B ?\e,LB\e(B ?\e,LC\e(B ?\e,L6\e(B ?\e,L2\e(B ?\e,LL\e(B ?\e,LK\e(B ?\e,L7\e(B ?\e,LH\e(B ?\e,LM\e(B ?\e,LI\e(B ?\e,LG\e(B ?\e,LJ\e(B
271 ]
272 "Cyrillic KOI8-U decoding table.")
273
274(let ((table (make-translation-table-from-vector
275 cyrillic-koi8-u-decode-table)))
276 (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table)
277 (define-translation-table 'cyrillic-koi8-u-encode-table
278 (char-table-extra-slot table 0)))
279
280(makunbound 'cyrillic-koi8-u-decode-table)
281
282(define-ccl-program ccl-decode-koi8-u
283 `(4
284 ((loop
285 (r0 = 0)
286 (read r1)
287 (if (r1 < 128)
288 (write-repeat r1)
289 ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1)
d98276be 290 (translate-character ucs-translation-table-for-decode r0 r1)
6f27e0f9
DL
291 (write-multibyte-character r0 r1)
292 (repeat))))))
293 "CCL program to decode KOI8-U.")
294
295(define-ccl-program ccl-encode-koi8-u
296 `(1
297 ((loop
298 (read-multibyte-character r0 r1)
299 (translate-character cyrillic-koi8-u-encode-table r0 r1)
58566dff
KH
300 (if (r0 != ,(charset-id 'ascii))
301 (if (r0 != ,(charset-id 'eight-bit-graphic))
302 (if (r0 != ,(charset-id 'eight-bit-control))
303 (r1 = ??))))
6f27e0f9
DL
304 (write-repeat r1))))
305 "CCL program to encode KOI8-U.")
306
307(cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table)
308
309(make-coding-system
310 'koi8-u 4
311 ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
312 '(ccl-decode-koi8-u . ccl-encode-koi8-u)
313 `((safe-chars . cyrillic-koi8-u-encode-table)
314 (mime-charset . koi8-u)
d98276be
KH
315 (valid-codes (0 . 255))
316 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
6f27e0f9
DL
317
318(define-ccl-program ccl-encode-koi8-u-font
319 `(0
320 (translate-character cyrillic-koi8-u-encode-table r0 r1))
321 "CCL program to encode Cyrillic chars to KOI-U font.")
322
323(add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font))
324
325(set-language-info-alist
326 "Ukrainian" `((coding-system koi8-u)
d98276be 327 (coding-priority koi8-u)
6f27e0f9 328 (nonascii-translation
b9e98de5 329 . ,(get 'cyrillic-koi8-u-nonascii-translation-table
6f27e0f9
DL
330 'translation-table))
331 (input-method . "ukrainian-computer")
d98276be
KH
332 (features code-pages)
333 (documentation
334 . "Support for Ukrainian with KOI8-U character set."))
6f27e0f9
DL
335 '("Cyrillic"))
336
337;;; ALTERNATIVNYJ stuff
2b01336d 338
b9e98de5
DL
339;; Fixme: It's unclear what's the correct table. I've found
340;; statements both that it's the same as cp866 and somewhat different,
1d5f8bfc 341;; but nothing that looks really definitive.
70c58a1f
KH
342(defvar cyrillic-alternativnyj-decode-table
343 [
344 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
345 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
346 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
347 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
348 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
349 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
350 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
351 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
6f27e0f9
DL
352;; ?\e$,1(0\e(B ?\e$,1(1\e(B ?\e$,1(2\e(B ?\e$,1(3\e(B ?\e$,1(4\e(B ?\e$,1(5\e(B ?\e$,1(6\e(B ?\e$,1(7\e(B ?\e$,1(8\e(B ?\e$,1(9\e(B ?\e$,1(:\e(B ?\e$,1(;\e(B ?\e$,1(<\e(B ?\e$,1(=\e(B ?\e$,1(>\e(B ?\e$,1(?\e(B
353;; ?\e$,1(@\e(B ?\e$,1(A\e(B ?\e$,1(B\e(B ?\e$,1(C\e(B ?\e$,1(D\e(B ?\e$,1(E\e(B ?\e$,1(F\e(B ?\e$,1(G\e(B ?\e$,1(H\e(B ?\e$,1(I\e(B ?\e$,1(J\e(B ?\e$,1(K\e(B ?\e$,1(L\e(B ?\e$,1(M\e(B ?\e$,1(N\e(B ?\e$,1(O\e(B
354;; ?\e$,1(P\e(B ?\e$,1(Q\e(B ?\e$,1(R\e(B ?\e$,1(S\e(B ?\e$,1(T\e(B ?\e$,1(U\e(B ?\e$,1(V\e(B ?\e$,1(W\e(B ?\e$,1(X\e(B ?\e$,1(Y\e(B ?\e$,1(Z\e(B ?\e$,1([\e(B ?\e$,1(\\e(B ?\e$,1(]\e(B ?\e$,1(^\e(B ?\e$,1(_\e(B
355;; ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,2 "\e(B ?\e$,2 D\e(B ?\e$,2!!\e(B ?\e$,2!"\e(B ?\e$,2 v\e(B ?\e$,2 u\e(B ?\e$,2!#\e(B ?\e$,2 q\e(B ?\e$,2 w\e(B ?\e$,2 }\e(B ?\e$,2 |\e(B ?\e$,2 {\e(B ?\e$,2 0\e(B
356;; ?\e$,2 4\e(B ?\e$,2 T\e(B ?\e$,2 L\e(B ?\e$,2 <\e(B ?\e$,2 \e(B ?\e$,2 \\e(B ?\e$,2 ~\e(B ?\e$,2 \7f\e(B ?\e$,2 z\e(B ?\e$,2 t\e(B ?\e$,2!)\e(B ?\e$,2!&\e(B ?\e$,2! \e(B ?\e$,2 p\e(B ?\e$,2!,\e(B ?\e$,2!'\e(B
357;; ?\e$,2!(\e(B ?\e$,2!$\e(B ?\e$,2!%\e(B ?\e$,2 y\e(B ?\e$,2 x\e(B ?\e$,2 r\e(B ?\e$,2 s\e(B ?\e$,2!+\e(B ?\e$,2!*\e(B ?\e$,2 8\e(B ?\e$,2 ,\e(B ?\e$,2!H\e(B ?\e$,2!D\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B ?\e$,2!@\e(B
358;; ?\e$,1(`\e(B ?\e$,1(a\e(B ?\e$,1(b\e(B ?\e$,1(c\e(B ?\e$,1(d\e(B ?\e$,1(e\e(B ?\e$,1(f\e(B ?\e$,1(g\e(B ?\e$,1(h\e(B ?\e$,1(i\e(B ?\e$,1(j\e(B ?\e$,1(k\e(B ?\e$,1(l\e(B ?\e$,1(m\e(B ?\e$,1(n\e(B ?\e$,1(o\e(B
1d5f8bfc 359;; ?\e$,1(!\e(B ?\e$,1(q\e(B ?\e$,1ry\e(B ?\e$,1rx\e(B ?\e$,1%A\e(B ?\e$,1%@\e(B ?\e$,1s:\e(B ?\e$,1s9\e(B ?\e$,1vq\e(B ?\e$,1vs\e(B ?\e,A1\e(B ?\e,Aw\e(B ?\e$,1uV\e(B ?\e,A$\e(B ?\e$,2!`\e(B ?\e,A \e(B ;
6f27e0f9 360;; 8859+Unicode
70c58a1f
KH
361 ?\e,L0\e(B ?\e,L1\e(B ?\e,L2\e(B ?\e,L3\e(B ?\e,L4\e(B ?\e,L5\e(B ?\e,L6\e(B ?\e,L7\e(B ?\e,L8\e(B ?\e,L9\e(B ?\e,L:\e(B ?\e,L;\e(B ?\e,L<\e(B ?\e,L=\e(B ?\e,L>\e(B ?\e,L?\e(B
362 ?\e,L@\e(B ?\e,LA\e(B ?\e,LB\e(B ?\e,LC\e(B ?\e,LD\e(B ?\e,LE\e(B ?\e,LF\e(B ?\e,LG\e(B ?\e,LH\e(B ?\e,LI\e(B ?\e,LJ\e(B ?\e,LK\e(B ?\e,LL\e(B ?\e,LM\e(B ?\e,LN\e(B ?\e,LO\e(B
363 ?\e,LP\e(B ?\e,LQ\e(B ?\e,LR\e(B ?\e,LS\e(B ?\e,LT\e(B ?\e,LU\e(B ?\e,LV\e(B ?\e,LW\e(B ?\e,LX\e(B ?\e,LY\e(B ?\e,LZ\e(B ?\e,L[\e(B ?\e,L\\e(B ?\e,L]\e(B ?\e,L^\e(B ?\e,L_\e(B
6f27e0f9
DL
364 ?\e$,2!Q\e(B ?\e$,2!R\e(B ?\e$,2!S\e(B ?\e$,2 "\e(B ?\e$,2 D\e(B ?\e$,2!!\e(B ?\e$,2!"\e(B ?\e$,2 v\e(B ?\e$,2 u\e(B ?\e$,2!#\e(B ?\e$,2 q\e(B ?\e$,2 w\e(B ?\e$,2 }\e(B ?\e$,2 |\e(B ?\e$,2 {\e(B ?\e$,2 0\e(B
365 ?\e$,2 4\e(B ?\e$,2 T\e(B ?\e$,2 L\e(B ?\e$,2 <\e(B ?\e$,2 \e(B ?\e$,2 \\e(B ?\e$,2 ~\e(B ?\e$,2 \7f\e(B ?\e$,2 z\e(B ?\e$,2 t\e(B ?\e$,2!)\e(B ?\e$,2!&\e(B ?\e$,2! \e(B ?\e$,2 p\e(B ?\e$,2!,\e(B ?\e$,2!'\e(B
366 ?\e$,2!(\e(B ?\e$,2!$\e(B ?\e$,2!%\e(B ?\e$,2 y\e(B ?\e$,2 x\e(B ?\e$,2 r\e(B ?\e$,2 s\e(B ?\e$,2!+\e(B ?\e$,2!*\e(B ?\e$,2 8\e(B ?\e$,2 ,\e(B ?\e$,2!H\e(B ?\e$,2!D\e(B ?\e$,2!L\e(B ?\e$,2!P\e(B ?\e$,2!@\e(B
70c58a1f 367 ?\e,L`\e(B ?\e,La\e(B ?\e,Lb\e(B ?\e,Lc\e(B ?\e,Ld\e(B ?\e,Le\e(B ?\e,Lf\e(B ?\e,Lg\e(B ?\e,Lh\e(B ?\e,Li\e(B ?\e,Lj\e(B ?\e,Lk\e(B ?\e,Ll\e(B ?\e,Lm\e(B ?\e,Ln\e(B ?\e,Lo\e(B
1d5f8bfc
KH
368 ;; Taken from http://www.cyrillic.com/ref/cyrillic/koi-8alt.html
369 ;; with guesses for the Unicodes of the glyphs in the absence of a
370 ;; table.
371 ?\e,L!\e(B ?\e,Lq\e(B ?\e$,1ry\e(B ?\e$,1rx\e(B ?\e$,1%A\e(B ?\e$,1%@\e(B ?\e$,1s:\e(B ?\e$,1s9\e(B ?\e$,1vq\e(B ?\e$,1vs\e(B ?\e,A1\e(B ?\e,Aw\e(B ?\e,Lp\e(B ?\e,A$\e(B ?\e$,2!`\e(B ?\e,L \e(B]
70c58a1f
KH
372 "Cyrillic ALTERNATIVNYJ decoding table.")
373
1f487ef8
KH
374(let ((table (make-translation-table-from-vector
375 cyrillic-alternativnyj-decode-table)))
376 (define-translation-table 'cyrillic-alternativnyj-nonascii-translation-table
70c58a1f 377 table)
1f487ef8
KH
378 (define-translation-table 'cyrillic-alternativnyj-encode-table
379 (char-table-extra-slot table 0)))
70c58a1f 380
6f27e0f9 381(makunbound 'cyrillic-alternativnyj-decode-table)
70c58a1f 382
2b01336d 383(define-ccl-program ccl-decode-alternativnyj
6f27e0f9 384 `(4
1f487ef8
KH
385 ((loop
386 (r0 = 0)
387 (read r1)
388 (if (r1 < 128)
389 (write-repeat r1)
390 ((translate-character cyrillic-alternativnyj-nonascii-translation-table
391 r0 r1)
d98276be 392 (translate-character ucs-translation-table-for-decode r0 r1)
1f487ef8
KH
393 (write-multibyte-character r0 r1)
394 (repeat))))))
2b01336d
KH
395 "CCL program to decode Alternativnyj.")
396
397(define-ccl-program ccl-encode-alternativnyj
398 `(1
1f487ef8
KH
399 ((loop
400 (read-multibyte-character r0 r1)
53a1db80 401 (translate-character cyrillic-alternativnyj-encode-table r0 r1)
58566dff
KH
402 (if (r0 != ,(charset-id 'ascii))
403 (if (r0 != ,(charset-id 'eight-bit-graphic))
404 (if (r0 != ,(charset-id 'eight-bit-control))
405 (r1 = ??))))
1f487ef8 406 (write-repeat r1))))
2b01336d 407 "CCL program to encode Alternativnyj.")
6f27e0f9
DL
408
409(cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table)
410
2b01336d 411(make-coding-system
efbc7e89 412 'cyrillic-alternativnyj 4 ?A
1d5f8bfc 413 "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
7fbf766f 414 '(ccl-decode-alternativnyj . ccl-encode-alternativnyj)
6f27e0f9
DL
415 `((safe-chars . cyrillic-alternativnyj-encode-table)
416 (valid-codes (0 . 255))
d98276be 417 (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))
2b01336d 418
71eabd24 419(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
efbc7e89 420
2b01336d 421(define-ccl-program ccl-encode-alternativnyj-font
6f27e0f9
DL
422 `(0
423 (translate-character cyrillic-alternativnyj-encode-table r0 r1))
2b01336d
KH
424 "CCL program to encode Cyrillic chars to Alternativnyj font.")
425
6f27e0f9
DL
426(add-to-list 'font-ccl-encoder-alist
427 '("alternativnyj" . ccl-encode-alternativnyj-font))
2b01336d 428
4ed46869 429(set-language-info-alist
a564ccf9
KH
430 "Cyrillic-ALT" `((charset cyrillic-iso8859-5)
431 (nonascii-translation
1f487ef8
KH
432 . ,(get 'cyrillic-alternativnyj-nonascii-translation-table
433 'translation-table))
11b77f14
KH
434 (coding-system cyrillic-alternativnyj)
435 (coding-priority cyrillic-alternativnyj)
6f27e0f9 436 (input-method . "russian-typewriter")
a564ccf9
KH
437 (features cyril-util)
438 (unibyte-display . cyrillic-alternativnyj)
5f1d80c7 439 (sample-text . "Russian (\e,L@caaZXY\e(B) \e,L7T`PRabRcYbU\e(B!")
11b77f14
KH
440 (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
441 '("Cyrillic"))
4ed46869 442
6f27e0f9
DL
443(set-language-info-alist
444 "Windows-1251" `((coding-system windows-1251)
445 (coding-priority windows-1251)
446 (nonascii-translation
447 . ,(get 'decode-windows-1252 'translation-table))
448 (input-method . "russian-typewriter") ; fixme?
449 (features code-pages)
450 (documentation . "Support for windows-1251 character set."))
451 '("Cyrillic"))
452
453(set-language-info-alist
454 "Tajik" `((coding-system cyrillic-koi8-t)
76509389
DL
455 (coding-priority cyrillic-koi8-t)
456 (nonascii-translation
457 . ,(get 'decode-koi8-t 'translation-table))
458 (input-method . "russian-typewriter") ; fixme?
459 (features code-pages)
460 (documentation . "Support for Tajik using KOI8-T."))
6f27e0f9
DL
461 '("Cyrillic"))
462
463(set-language-info-alist
464 "Bulgarian" `((coding-system windows-1251)
465 (coding-priority windows-1251)
466 (nonascii-translation
467 . ,(get 'decode-windows-1251 'translation-table))
1d5f8bfc 468 (input-method . "bulgarian-bds")
6f27e0f9
DL
469 (features code-pages)
470 (documentation
76509389
DL
471 . "Support for Bulgarian with windows-1251 character set.")
472 (tutorial . "TUTORIAL.bg"))
6f27e0f9
DL
473 '("Cyrillic"))
474
475(set-language-info-alist
476 "Belarusian" `((coding-system windows-1251)
477 (coding-priority windows-1251)
478 (nonascii-translation
479 . ,(get 'decode-windows-1251 'translation-table))
480 (input-method . "belarusian")
481 (features code-pages)
482 (documentation
483 . "Support for Belarusian with windows-1251 character set.
484\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
485 '("Cyrillic"))
486
41da80b1
DL
487(provide 'cyrillic)
488
4ed46869 489;;; cyrillic.el ends here