(latexenc-find-file-coding-system):
[bpt/emacs.git] / lisp / international / codepage.el
CommitLineData
55535639 1;;; codepage.el --- MS-DOS/MS-Windows specific coding systems
75e98450 2
2fd125a3
KH
3;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
4;; Copyright (C) 2000
5;; National Institute of Advanced Industrial Science and Technology (AIST)
6;; Registration Number H14PRO021
75e98450
EZ
7
8;; Author: Eli Zaretskii
9;; Maintainer: FSF
0ac646aa 10;; Keywords: i18n ms-dos ms-windows codepage
75e98450
EZ
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
28
29;;; Commentary:
30
0ac646aa 31;; Special coding systems for DOS/Windows codepage support.
75e98450 32;;
0ac646aa
EZ
33;; These coding systems perform conversion from the DOS/Windows
34;; codepage encoding to one of the ISO-8859 character sets. Each
35;; codepage has its corresponding ISO-8859 charset, chosen so as to be
36;; able to convert all (or most) of the characters. The idea is that
37;; Emacs internally works with the usual MULE charsets, and the
38;; conversion to and from the DOS codepage is performed on I/O only.
39;; See term/internal.el for the complementary setup of the DOS
40;; terminal display and input methods.
75e98450
EZ
41;;
42;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL
a1506d29 43;; encoders/decoders, and for help in debugging this code.
75e98450
EZ
44
45;;; Code:
46
47(defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name
48 decoder encoder)
49 "Make coding system CODING for a DOS codepage using translation tables.
50MNEMONIC is a character to be displayed on mode line for the coding system.
51ISO-NAME is the name of the ISO-8859 charset which corresponds to this
52codepage.
53DECODER is a translation table for converting characters in the DOS codepage
54encoding to Emacs multibyte characters.
55ENCODER is a translation table for encoding Emacs multibyte characters into
0ac646aa 56external DOS codepage codes."
75e98450
EZ
57 (save-match-data
58 (let* ((coding-name (symbol-name coding))
a7bc7c2a
EZ
59 (undef (if (eq system-type 'ms-dos)
60 (if dos-unsupported-char-glyph
61 (logand dos-unsupported-char-glyph 255)
62 127)
63 ??))
ba2a2753 64 (safe-chars (make-char-table 'safe-chars))
7bd53962 65 (ccl-decoder
776ca83d 66 (ccl-compile
7bd53962
KH
67 ;; The 4 here supplies the buf_magnification parameter
68 ;; for the CCL program. A multibyte character may take
0ac646aa 69 ;; at most 4-bytes.
776ca83d
EZ
70 `(4 (loop (read r1)
71 (if (r1 >= 128)
72 ((r0 = ,(charset-id 'ascii))
73 (translate-character ,decoder r0 r1)
7bd53962 74 (write-multibyte-character r0 r1))
776ca83d
EZ
75 (write r1))
76 (repeat)))))
7bd53962 77 (ccl-encoder
776ca83d 78 (ccl-compile
07a96c46
AI
79 ;; The 2 here supplies the buf_magnification parameter for
80 ;; the CCL program. Since the -dos coding system generates
81 ;; \r\n for each \n, a factor of 2 covers even the worst case
82 ;; of empty lines with a single \n.
83 `(2 (loop (read-multibyte-character r0 r1)
776ca83d
EZ
84 (if (r0 != ,(charset-id 'ascii))
85 ((translate-character ,encoder r0 r1)
86 (if (r0 == ,(charset-id 'japanese-jisx0208))
a7bc7c2a 87 ((r1 = ,undef)
776ca83d
EZ
88 (write r1)))))
89 (write-repeat r1))))))
776ca83d 90
ba2a2753
KH
91 ;; Set elements of safe multibyte characters for this codepage
92 ;; to t in the char-table safe-chars.
93 (let ((tbl (get decoder 'translation-table))
94 (i 128)
95 ch)
96 (while (< i 256)
97 (setq ch (aref tbl i))
98 (if ch (aset safe-chars ch t))
99 (setq i (1+ i))))
100
776ca83d 101 ;; Make coding system CODING.
75e98450
EZ
102 (make-coding-system
103 coding 4 mnemonic
104 (concat "8-bit encoding of " (symbol-name iso-name)
776ca83d 105 " characters using IBM codepage " coding-name)
7bd53962 106 (cons ccl-decoder ccl-encoder)
a205e32a 107 `((safe-charsets ascii eight-bit-control eight-bit-graphic ,iso-name)
ba2a2753 108 (safe-chars . ,safe-chars)
7bd53962 109 (valid-codes (0 . 255)))))))
75e98450
EZ
110
111(defun cp-decoding-vector-for-codepage (table charset offset)
112 "Create a vector for decoding IBM PC characters using conversion table
113TABLE into an ISO-8859 character set CHARSET whose first non-ASCII
e714d2cf 114character is generated by (make-char CHARSET OFFSET)."
75e98450
EZ
115 (let* ((len (length table))
116 (undefined-char
117 (if (eq system-type 'ms-dos)
118 (if dos-unsupported-char-glyph
119 (logand dos-unsupported-char-glyph 255)
120 127)
121 32))
122 (vec1 (make-vector 256 undefined-char))
123 (i 0))
14028d57 124 (while (< i 256)
75e98450
EZ
125 (aset vec1 i i)
126 (setq i (1+ i)))
127 (setq i 0)
128 (while (< i len)
129 (if (aref table i)
130 (aset vec1 (aref table i) (make-char charset (+ i offset))))
131 (setq i (1+ i)))
132 vec1))
133
134;;; You don't think I created all these tables below by hand, do you?
135;;; The following Awk script will create the table for cp850-to-Latin-1
136;;; conversion from the RFC 1345 file (the other tables are left as an
137;;; excercise):
138;;; BEGIN { n_pages = 11;
139;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2;
140;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5;
141;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8;
142;;; pn["IBM864"] = 9; pn["IBM865"] = 10;
143;;; }
144;;; $1 == "&charset" { charset = $2; }
145;;; $1 == "&code" { code = $2; }
146;;; /^ [^&]/ {
147;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/))
148;;; {
149;;; for (i = 1; i <= NF; i++)
150;;; chars[charset,code++] = $i;
151;;; }
152;;; }
153;;;
154;;; END {
155;;; for (i = 160; i < 256; i++)
156;;; {
157;;; c = chars["ISO_8859-1:1987",i];
158;;; if (c == "??") # skip unused positions
159;;; {
160;;; printf " nil";
161;;; if ((i - 159)%16 == 0)
162;;; printf "\n";
163;;; continue;
164;;; }
165;;; found = 0;
166;;; for (j in pn)
167;;; map[j] = "nil";
168;;; for (combined in chars)
169;;; {
170;;; candidate = chars[combined];
171;;; split (combined, separate, SUBSEP);
172;;; if (separate[1] == "IBM850" && candidate == c)
173;;; {
174;;; found = 1;
175;;; map[separate[1]] = separate[2];
176;;; }
177;;; }
178;;; printf " %s", map["IBM850"];
179;;; if ((i - 159)%16 == 0)
180;;; printf "\n";
181;;; }
182;;; }
183
184;;; WARNING WARNING WARNING!!!
185;;;
186;;; If you want to get fancy with these tables, remember that the inverse
187;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed
188;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on
189;;; internal.el). Therefore, you should NOT put any codes below 128 in
190;;; these tables! Otherwise, various Emacs commands and functions will
191;;; mysteriously fail! For example, a typical screwup is to map the Latin-N
192;;; acute accent character to the apostrophe, and have all regexps which
193;;; end with "\\'" begin to fail (e.g., the automatic setting of the major
194;;; mode by file name extension will stop working).
195;;;
196;;; You HAVE BEEN warned!
197
198;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very
199;; well, but why not use what we can salvage?
200(defvar cp437-decode-table
201 ;; Nth element is the code of a cp437 glyph for the multibyte
202 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
d3fcfdea 203 ;; The element nil means there's no corresponding cp437 glyph.
75e98450
EZ
204 [
205 255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil
206 248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
207 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
208 nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225
209 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
210 nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152]
211 "Table for converting ISO-8859-1 characters into codepage 437 glyphs.")
212(setplist 'cp437-decode-table
213 '(charset latin-iso8859-1 language "Latin-1" offset 160))
214
215;; Multilingual (Latin-1)
216(defvar cp850-decode-table
217 ;; Nth element is the code of a cp850 glyph for the multibyte
218 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
219 ;; The element nil means there's no corresponding cp850 glyph.
220 [
6752f598 221 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 nil
dd73abe6 222 248 241 253 252 239 230 244 250 247 251 167 175 172 171 243 168
75e98450
EZ
223 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
224 209 165 227 224 226 229 153 158 157 235 233 234 154 237 231 225
225 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
226 208 164 149 162 147 228 148 246 155 151 163 150 129 236 232 152]
227 "Table for converting ISO-8859-1 characters into codepage 850 glyphs.")
228(setplist 'cp850-decode-table
229 '(charset latin-iso8859-1 language "Latin-1" offset 160))
230
231;; Greek
232(defvar cp851-decode-table
233 [
234 255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil
235 248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152
236 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190
237 198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159
238 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
239 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
240 "Table for converting ISO-8859-7 characters into codepage 851 glyphs.")
241(setplist 'cp851-decode-table
242 '(charset greek-iso8859-7 language "Greek" offset 160))
243
244;; Slavic/Eastern Europe (Latin-2)
245(defvar cp852-decode-table
246 [
247 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189
248 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190
795537a2 249 232 181 182 198 142 145 143 128 172 144 168 211 183 214 215 210
75e98450
EZ
250 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225
251 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212
252 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250]
253 "Table for converting ISO-8859-2 characters into codepage 852 glyphs.")
254(setplist 'cp852-decode-table
255 '(charset latin-iso8859-2 language "Latin-2" offset 160))
256
257;; Russian
258(defvar cp855-decode-table
259 [
dd73abe6 260 255 133 129 131 135 137 139 141 143 145 147 149 151 240 153 155
75e98450
EZ
261 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221
262 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224
263 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216
264 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222
dd73abe6 265 239 132 128 130 134 136 138 140 142 144 146 148 150 253 152 154]
75e98450
EZ
266 "Table for converting ISO-8859-5 characters into codepage 855 glyphs.")
267(setplist 'cp855-decode-table
268 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
269
270;; Turkish
271(defvar cp857-decode-table
272 [
273 255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil
274 248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil
275 183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216
276 165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225
277 133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139
278 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250]
279 "Table for converting ISO-8859-3 characters into codepage 857 glyphs.")
280(setplist 'cp857-decode-table
281 '(charset latin-iso8859-3 language "Latin-3" offset 160))
282
283;; Portuguese
284(defvar cp860-decode-table
285 [
286 255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil
287 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
288 145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil
289 nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil
290 133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil
291 nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil]
292 "Table for converting ISO-8859-1 characters into codepage 860 glyphs.")
293(setplist 'cp860-decode-table
294 '(charset latin-iso8859-1 language "Latin-1" offset 160))
295
296;; Icelandic
297(defvar cp861-decode-table
298 [
299 255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil
300 nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168
301 nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil
302 139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil
303 133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil
304 140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil]
305 "Table for converting ISO-8859-1 characters into codepage 861 glyphs.")
306(setplist 'cp861-decode-table
307 '(charset latin-iso8859-1 language "Latin-1" offset 160))
308
309;; Hebrew
310(defvar cp862-decode-table
311 ;; Nth element is the code of a cp862 glyph for the multibyte
312 ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)).
0ac646aa 313 ;; The element nil means there's no corresponding cp862 glyph.
75e98450
EZ
314 [
315 255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil
316 248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil
317 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
318 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205
319 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
320 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil]
321 "Table for converting ISO-8859-8 characters into codepage 862 glyphs.")
322(setplist 'cp862-decode-table
323 '(charset hebrew-iso8859-8 language "Hebrew" offset 160))
324
325;; French Canadian
326(defvar cp863-decode-table
327 [
328 255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167
329 nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil
330 142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149
331 nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil
332 133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139
333 nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil]
334 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
335(setplist 'cp863-decode-table
336 '(charset latin-iso8859-1 language "Latin-1" offset 160))
337
338;; Arabic
339;; FIXME: Emacs doesn't seem to support the "Arabic" language
340;; environment yet. So this is only partially usable, for now
341(defvar cp864-decode-table
342 [
343 255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil
344 nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191
345 nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207
346 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil
347 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil
348 nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil]
0ac646aa 349 "Table for converting ISO-8859-6 characters into codepage 864 glyphs.")
75e98450
EZ
350(setplist 'cp864-decode-table
351 '(charset arabic-iso8859-6 language nil offset 160))
352
0ac646aa
EZ
353;; Arabic OEM codepage used by Windows
354;; FIXME: Emacs doesn't seem to support the "Arabic" language
355;; environment yet. So this is only partially usable, for now
356(defvar cp720-decode-table
357 [
358 255 nil nil nil 148 nil nil nil nil nil nil nil nil 196 nil nil
359 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
360 nil 152 153 154 155 157 158 159 160 161 162 163 164 165 166 167
361 168 169 170 171 172 173 224 225 226 227 228 nil nil nil nil nil
362 149 229 231 232 233 234 235 236 237 238 239 241 242 243 244 245
363 246 145 146 nil nil nil nil nil nil nil nil nil nil nil nil nil]
364 "Table for converting ISO-8859-6 characters into codepage 720 glyphs.")
365(setplist 'cp720-decode-table
366 '(charset arabic-iso8859-6 language nil offset 160))
367
368
75e98450
EZ
369;; Nordic (Norwegian/Danish)
370(defvar cp865-decode-table
371 [
372 255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil
373 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
374 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
375 nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil
376 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
377 nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152]
378 "Table for converting ISO-8859-1 characters into codepage 865 glyphs.")
379(setplist 'cp865-decode-table
380 '(charset latin-iso8859-1 language "Latin-1" offset 160))
381
573191a1
EZ
382;; Russian (Yes, another one! This one's supposed to be used
383;; on Windows as the Russian OEM code page.)
384(defvar cp866-decode-table
385 [
386 255 240 nil nil 242 nil nil 244 nil nil nil nil nil nil 246 nil
387 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
388 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
389 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
ffd5cede 390 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
573191a1
EZ
391 252 241 nil nil 243 nil nil 245 nil nil nil nil nil nil 247 nil]
392 "Table for converting ISO-8859-5 characters into codepage 866 glyphs.")
393(setplist 'cp866-decode-table
394 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
395
75e98450
EZ
396;; Greek (yes, another one!)
397(defvar cp869-decode-table
398 [
399 255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142
400 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152
401 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190
402 198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159
403 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
404 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
405 "Table for converting ISO-8859-7 characters into codepage 869 glyphs.")
406(setplist 'cp869-decode-table
407 '(charset greek-iso8859-7 language "Greek" offset 160))
408
0ac646aa
EZ
409;; Greek OEM codepage used by Windows
410(defvar cp737-decode-table
411 [
412 255 nil nil nil nil nil 179 nil nil nil nil nil nil 196 nil nil
413 248 241 253 nil nil nil 234 250 235 236 237 nil 238 nil 239 240
414 nil 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
415 143 144 nil 145 146 147 148 149 150 151 244 245 225 226 227 229
416 nil 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
417 167 168 170 169 171 172 173 174 175 224 228 232 230 231 233 nil]
418 "Table for converting ISO-8859-7 characters into codepage 737 glyphs.")
419(setplist 'cp737-decode-table
420 '(charset greek-iso8859-7 language "Greek" offset 160))
421
106e96bd
EZ
422;; Conversion from codepages 770-775 to Latin-4 for Baltic countries.
423;; FIXME: Once we support Latin-7, these should be remapped into it.
424(defvar cp770-decode-table
425 [
426 255 143 nil nil 155 nil 156 nil 157 159 137 168 nil 196 146 nil
427 248 133 nil nil nil nil 134 nil nil 158 136 152 nil nil 145 nil
428 160 nil nil nil 142 nil nil 173 128 nil 139 nil 144 nil nil 161
429 nil nil nil 163 nil 149 153 nil nil 167 nil nil 154 nil 166 225
430 131 nil nil nil 132 nil nil 141 135 nil 138 nil 130 nil nil 140
431 nil nil nil 162 nil 147 148 247 nil 151 nil nil 129 nil 150 nil]
432 "Table for converting ISO-8859-4 characters into codepage 770 glyphs.")
433(setplist 'cp770-decode-table
434 '(charset latin-iso8859-4 language "Latin-4" offset 160))
435
436(defvar cp773-decode-table
437 [
438 255 220 nil 138 150 nil 234 190 166 246 237 149 173 196 252 nil
439 208 nil nil 139 239 nil 235 nil nil 247 137 133 136 nil 253 nil
440 160 nil nil nil 142 143 146 244 222 144 240 nil 242 nil nil 161
441 nil 238 226 232 nil 229 153 158 157 248 nil nil 154 nil 250 225
442 131 nil nil nil 132 134 145 245 223 130 241 nil 243 nil nil 140
443 nil 236 147 233 nil 228 148 198 155 249 nil nil 129 nil 251 nil]
444 "Table for converting ISO-8859-4 characters into codepage 773 glyphs.")
445(setplist 'cp773-decode-table
446 '(charset latin-iso8859-4 language "Latin-4" offset 160))
447
448(defvar cp774-decode-table
449 [
450 255 181 nil nil 155 nil nil nil 245 190 nil nil nil 196 207 nil
451 248 208 nil nil nil nil nil nil nil 213 nil nil nil nil 216 nil
452 nil nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil nil
453 nil nil nil nil nil nil 153 nil nil 198 nil nil 154 nil 199 225
454 nil 160 nil nil 132 134 145 212 209 130 210 137 211 161 140 nil
455 nil nil nil nil 147 nil 148 246 237 214 163 150 129 nil 215 248]
456 "Table for converting ISO-8859-4 characters into codepage 774 glyphs.")
457(setplist 'cp774-decode-table
458 '(charset latin-iso8859-4 language "Latin-4" offset 160))
459
75e98450
EZ
460(defvar cp775-decode-table
461 [
462 255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil
106e96bd 463 248 208 nil 139 239 nil 235 nil nil 213 137 133 136 nil 216 nil
75e98450
EZ
464 160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161
465 nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225
466 131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140
106e96bd 467 nil 236 147 233 nil 228 148 247 155 214 nil nil 129 nil 215 nil]
75e98450
EZ
468 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
469(setplist 'cp775-decode-table
470 '(charset latin-iso8859-4 language "Latin-4" offset 160))
471
7e37faa3
EZ
472;; Support for the Windows 12xx series of codepages that MS has
473;; butchered from the ISO-8859 specs. This does not add support for
474;; the extended characters that MS has added in the 128 - 159 coding
475;; range, only translates those characters that can be expressed in
0ac646aa 476;; the corresponding iso-8859 charset.
7e37faa3
EZ
477
478;; Codepage Mapping:
479;;
480;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions
481;; Windows-1251: ISO-8859-5 (Cyrillic) - differs wildly
482;; Windows-1252: ISO-8859-1 (West Europe) - exact match
483;; Windows-1253: ISO-8859-7 (Greek) - differs in some positions
484;; Windows-1254: ISO-8859-9 (Turkish) - exact match
485;; Windows-1255: ISO-8859-8 (Hebrew) - exact match
486;; Windows-1256: ISO-8859-6 (Arabic) - half match
487;; Windows-1257: ISO-8859-4 (Baltic) - differs, future Latin-7
488;; Windows-1258: VISCII (Vietnamese) - Completely different
489
490(defvar cp1250-decode-table
491 [
492 160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175
493 176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191
494 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
495 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
496 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
497 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ]
adab83c8 498 "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table.")
7e37faa3
EZ
499(setplist 'cp1250-decode-table
500 '(charset latin-iso8859-2 language "Latin-2" offset 160))
501
502(defvar cp1251-decode-table
503 [
504 160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143
505 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
506 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
507 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
508 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
509 185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ]
adab83c8 510 "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table.")
7e37faa3
EZ
511(setplist 'cp1251-decode-table
512 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
513
514;; cp1253 is missing nbsp so we cannot quite translate perfectly. It
515;; also has two micro/mu characters which would require more complex
516;; processing to accomodate.
517(defvar cp1253-decode-table
518 [
519 nil 145 146 163 nil nil 166 167 168 169 nil 171 172 173 nil 151
520 176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191
521 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
522 208 209 nil 211 212 213 214 215 216 217 218 219 220 221 222 223
523 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
524 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil ]
adab83c8 525 "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table.")
7e37faa3
EZ
526(setplist 'cp1253-decode-table
527 '(charset greek-iso8859-7 language "Greek" offset 160))
528
529;; Since Latin-7 is not yet official, and Emacs does not support it,
530;; provide translation between Windows-1257 and Latin-4 the best we
531;; can.
532(defvar cp1257-decode-table
533 [
534 160 192 nil 170 164 nil 207 167 nil 208 199 204 nil 173 222 nil
535 176 224 nil 186 nil nil 239 nil nil 240 231 236 nil nil 254 nil
536 194 nil nil nil 196 197 175 193 200 201 198 nil 203 nil nil 206
537 nil 210 212 205 nil 213 214 215 168 216 nil nil 220 nil 219 223
538 226 nil nil nil 228 229 191 225 232 233 230 nil 235 nil nil 238
539 nil 242 244 237 nil 245 246 247 184 248 nil nil 252 nil 251 nil ]
adab83c8 540 "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table.")
7e37faa3
EZ
541(setplist 'cp1257-decode-table
542 '(charset latin-iso8859-4 language "Latin-4" offset 160))
543
75e98450
EZ
544;;;###autoload
545(defun cp-make-coding-systems-for-codepage (codepage iso-name offset)
708b0815 546 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME
75e98450
EZ
547whose first character is at offset OFFSET from the beginning of 8-bit
548ASCII table.
549
708b0815
EZ
550The created coding system has the usual 3 subsidiary systems: for Unix-,
551DOS- and Mac-style EOL conversion. However, unlike built-in coding
552systems, the Mac-style EOL conversion is currently not supported by the
553decoder and encoder created by this function."
75e98450
EZ
554 (let* ((decode-table (intern (format "%s-decode-table" codepage)))
555 (nonascii-table
556 (intern (format "%s-nonascii-translation-table" codepage)))
557 (decode-translation
558 (intern (format "%s-decode-translation-table" codepage)))
559 (encode-translation
776ca83d 560 (intern (format "%s-encode-translation-table" codepage))))
75e98450
EZ
561 (set nonascii-table
562 (make-translation-table-from-vector
563 (cp-decoding-vector-for-codepage
564 (symbol-value decode-table) iso-name offset)))
565 (define-translation-table encode-translation
566 (char-table-extra-slot (symbol-value nonascii-table) 0))
0d35b92b
EZ
567 ;; For charsets other than ascii, eight-bit-* and ISO-NAME, set
568 ;; `?' for one-column charsets, and some Japanese character for
75e98450 569 ;; wide-column charsets. CCL encoder convert that Japanese
a7bc7c2a 570 ;; character to either dos-unsupported-char-glyph or "??".
75e98450 571 (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0))
a7bc7c2a
EZ
572 (undef (if (eq system-type 'ms-dos)
573 (if dos-unsupported-char-glyph
574 (logand dos-unsupported-char-glyph 255)
575 127)
576 ??))
0d35b92b
EZ
577 (charsets (delq 'ascii
578 (delq 'eight-bit-control
579 (delq 'eight-bit-graphic
580 (delq iso-name
581 (copy-sequence charset-list))))))
75e98450
EZ
582 (wide-column-char (make-char 'japanese-jisx0208 32 32)))
583 (while charsets
584 (aset tbl (make-char (car charsets))
a7bc7c2a 585 (if (= (charset-width (car charsets)) 1) undef wide-column-char))
75e98450
EZ
586 (setq charsets (cdr charsets))))
587 (define-translation-table decode-translation
588 (symbol-value nonascii-table))
589 (cp-coding-system-for-codepage-1
776ca83d
EZ
590 (intern codepage) ?D iso-name decode-translation encode-translation)
591 ))
75e98450
EZ
592
593(defun cp-codepage-decoder (codepage)
8157ac14
PJ
594 "If CODEPAGE is the name of a supported codepage, return its decode table.
595Otherwise return nil."
75e98450
EZ
596 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage)))
597 (cond
598 ((stringp cp)
599 (intern-soft (format "%s-decode-table" cp)))
600 (t nil))))
601
602;;;###autoload
603(defun cp-charset-for-codepage (codepage)
604 "Return the charset for which there is a translation table to DOS CODEPAGE.
605CODEPAGE must be the name of a DOS codepage, a string."
606 (let ((cp-decoder (cp-codepage-decoder codepage)))
607 (if (null cp-decoder)
608 (error "Unsupported codepage %s" codepage)
609 (get cp-decoder 'charset))))
610
611;;;###autoload
612(defun cp-language-for-codepage (codepage)
613 "Return the name of the MULE language environment for CODEPAGE.
614CODEPAGE must be the name of a DOS codepage, a string."
615 (let ((cp-decoder (cp-codepage-decoder codepage)))
616 (if (null cp-decoder)
617 (error "Unsupported codepage %s" codepage)
618 (get cp-decoder 'language))))
619
620;;;###autoload
621(defun cp-offset-for-codepage (codepage)
622 "Return the offset to be used in setting up coding systems for CODEPAGE.
623CODEPAGE must be the name of a DOS codepage, a string."
624 (let ((cp-decoder (cp-codepage-decoder codepage)))
625 (if (null cp-decoder)
626 (error "Unsupported codepage %s" codepage)
627 (get cp-decoder 'offset))))
628
629;;;###autoload
630(defun cp-supported-codepages ()
631 "Return an alist of supported codepages.
632
633Each association in the alist has the form (NNN . CHARSET), where NNN is the
634codepage number, and CHARSET is the MULE charset which is the closest match
635for the character set supported by that codepage.
636
637A codepage NNN is supported if a variable called `cpNNN-decode-table' exists,
638is a vector, and has a charset property."
639 (save-match-data
640 (let (alist chset sname)
641 (mapatoms
642 (function
643 (lambda (sym)
644 (if (and (boundp sym)
7e37faa3 645 (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'"
75e98450
EZ
646 (setq sname (symbol-name sym)))
647 (vectorp (symbol-value sym))
648 (setq chset (get sym 'charset)))
649 (setq alist
650 (cons (cons (match-string 1 sname) chset) alist))))))
651 alist)))
652
653;;;###autoload
654(defun codepage-setup (codepage)
e714d2cf 655 "Create a coding system cpCODEPAGE to support the IBM codepage CODEPAGE.
75e98450
EZ
656
657These coding systems are meant for encoding and decoding 8-bit non-ASCII
658characters used by the IBM codepages, typically in conjunction with files
e714d2cf 659read/written by MS-DOS software, or for display on the MS-DOS terminal."
75e98450
EZ
660 (interactive
661 (let ((completion-ignore-case t)
662 (candidates (cp-supported-codepages)))
663 (list (completing-read "Setup DOS Codepage: (default 437) " candidates
664 nil t nil nil "437"))))
7dd4fd43
EZ
665 (let* ((cp (format "cp%s" codepage))
666 (cp-defined (intern-soft cp)))
667 (or (and cp-defined ;; avoid defining if already defined
668 (coding-system-p cp-defined))
669 (cp-make-coding-systems-for-codepage
670 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp)))))
75e98450 671
2c3245ae
DL
672;; Add DOS codepages to `non-iso-charset-alist'.
673(eval-after-load "mule-diag"
674 '(let ((tail (cp-supported-codepages))
675 elt)
676 (while tail
677 (setq elt (car tail) tail (cdr tail))
678 ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
679 ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
680 ;; are mapped to.
681 (unless (assq (intern (concat "cp" (car elt))) non-iso-charset-alist)
682 (setq non-iso-charset-alist
683 (cons (list (intern (concat "cp" (car elt)))
684 (list 'ascii (cdr elt))
685 `(lambda (code)
686 (decode-codepage-char ,(string-to-int (car elt))
687 code))
688 (list (list 0 255)))
689 non-iso-charset-alist))))))
690
75e98450
EZ
691(provide 'codepage)
692
ab5796a9 693;;; arch-tag: 80328de8-b94e-4386-be26-5876105731f0
55535639 694;;; codepage.el ends here