Commit | Line | Data |
---|---|---|
e8af40ee | 1 | ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- |
4ed46869 | 2 | |
891258f2 | 3 | ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. |
fa526c4a | 4 | ;; Licensed to the Free Software Foundation. |
5f3648c4 | 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
4ed46869 KH |
6 | |
7 | ;; Keywords: multilingual, European | |
8 | ||
9 | ;; This file is part of GNU Emacs. | |
10 | ||
11 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
13 | ;; the Free Software Foundation; either version 2, or (at your option) | |
14 | ;; any later version. | |
15 | ||
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 KH |
25 | |
26 | ;;; Commentary: | |
27 | ||
5f3648c4 DL |
28 | ;; For European scripts, character sets ISO8859-1,2,3,4,9,14,15 are |
29 | ;; supported. | |
4ed46869 KH |
30 | |
31 | ;;; Code: | |
32 | ||
5c4794f3 | 33 | ;; Latin-1 (ISO-8859-1) |
335a7ad7 | 34 | |
4ed46869 | 35 | (make-coding-system |
4b9121fc | 36 | 'iso-latin-1 2 ?1 |
5ef35063 | 37 | "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." |
3617715a KH |
38 | '(ascii latin-iso8859-1 nil nil |
39 | nil nil nil nil nil nil nil nil nil nil nil nil t) | |
40 | '((safe-charsets ascii latin-iso8859-1) | |
41 | (mime-charset . iso-8859-1))) | |
4ed46869 | 42 | |
71eabd24 | 43 | (define-coding-system-alias 'iso-8859-1 'iso-latin-1) |
300c450d | 44 | (define-coding-system-alias 'latin-1 'iso-latin-1) |
3617715a | 45 | |
335a7ad7 | 46 | (set-language-info-alist |
bed54678 | 47 | "Latin-1" '((charset ascii latin-iso8859-1) |
3617715a KH |
48 | (coding-system iso-latin-1) |
49 | (coding-priority iso-latin-1) | |
bed54678 | 50 | (nonascii-translation . latin-iso8859-1) |
16b2eb50 | 51 | (unibyte-syntax . "latin-1") |
bed54678 | 52 | (unibyte-display . iso-latin-1) |
3e56710f | 53 | (input-method . "latin-1-prefix") |
5c4794f3 RS |
54 | (sample-text |
55 | . "Hello, Hej, Tere, Hei, Bonjour, Gr\e,A|_\e(B Gott, Ciao, \e,A!\e(BHola!") | |
3617715a | 56 | (documentation . "\ |
cce3ba48 DL |
57 | This language environment is a generic one for the Latin-1 (ISO-8859-1) |
58 | character set which supports the following European languages: | |
59 | Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese, | |
60 | Finnish, French (with restrictions -- see Latin-9), Frisian, Galician, | |
61 | German, Greenlandic, Icelandic, Irish Gaelic (new orthography), | |
62 | Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic, | |
63 | Scottish Gaelic, Spanish, and Swedish. | |
cd53f29e KH |
64 | We also have specific language environments for the following languages: |
65 | For Dutch, \"Dutch\". | |
66 | For German, \"German\". | |
67 | For Spanish, \"Spanish\". | |
cc01e274 | 68 | For French, \"French\". |
cce3ba48 DL |
69 | |
70 | Latin-1 also covers several written languages outside Europe, including | |
71 | Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) | |
3617715a KH |
72 | '("European")) |
73 | ||
335a7ad7 | 74 | \f |
5c4794f3 | 75 | ;; Latin-2 (ISO-8859-2) |
335a7ad7 | 76 | |
4ed46869 | 77 | (make-coding-system |
4b9121fc | 78 | 'iso-latin-2 2 ?2 |
5ef35063 | 79 | "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." |
3617715a KH |
80 | '(ascii latin-iso8859-2 nil nil |
81 | nil nil nil nil nil nil nil) | |
82 | '((safe-charsets ascii latin-iso8859-2) | |
83 | (mime-charset . iso-8859-2))) | |
4ed46869 | 84 | |
71eabd24 | 85 | (define-coding-system-alias 'iso-8859-2 'iso-latin-2) |
300c450d | 86 | (define-coding-system-alias 'latin-2 'iso-latin-2) |
4b9121fc | 87 | |
335a7ad7 | 88 | (set-language-info-alist |
bed54678 | 89 | "Latin-2" '((charset ascii latin-iso8859-2) |
3617715a KH |
90 | (coding-system iso-latin-2) |
91 | (coding-priority iso-latin-2) | |
bed54678 | 92 | (nonascii-translation . latin-iso8859-2) |
16b2eb50 | 93 | (unibyte-syntax . "latin-2") |
bed54678 | 94 | (unibyte-display . iso-latin-2) |
3e56710f | 95 | (input-method . "latin-2-prefix") |
3617715a | 96 | (documentation . "\ |
cce3ba48 | 97 | This language environment is a generic one for the Latin-2 (ISO-8859-2) |
e325120d | 98 | character set which supports the following languages: |
60763153 | 99 | Albanian, Czech, English, German, Hungarian, Polish, Romanian, |
59c1ccf2 | 100 | Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower), |
e325120d KH |
101 | and Swedish. |
102 | We also have specific language environments for the following languages: | |
103 | For Czech, \"Czech\". | |
104 | For Romanian, \"Romanian\". | |
105 | For Slovak, \"Slovak\".")) | |
3617715a KH |
106 | '("European")) |
107 | ||
335a7ad7 | 108 | \f |
5c4794f3 | 109 | ;; Latin-3 (ISO-8859-3) |
335a7ad7 | 110 | |
4ed46869 | 111 | (make-coding-system |
4b9121fc | 112 | 'iso-latin-3 2 ?3 |
5ef35063 | 113 | "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." |
3617715a KH |
114 | '(ascii latin-iso8859-3 nil nil |
115 | nil nil nil nil nil nil nil) | |
116 | '((safe-charsets ascii latin-iso8859-3) | |
117 | (mime-charset . iso-8859-3))) | |
4ed46869 | 118 | |
71eabd24 | 119 | (define-coding-system-alias 'iso-8859-3 'iso-latin-3) |
300c450d | 120 | (define-coding-system-alias 'latin-3 'iso-latin-3) |
4b9121fc | 121 | |
335a7ad7 | 122 | (set-language-info-alist |
bed54678 | 123 | "Latin-3" '((charset ascii latin-iso8859-3) |
3617715a KH |
124 | (coding-system iso-latin-3) |
125 | (coding-priority iso-latin-3) | |
bed54678 | 126 | (nonascii-translation . latin-iso8859-3) |
16b2eb50 | 127 | (unibyte-syntax . "latin-3") |
bed54678 | 128 | (unibyte-display . iso-latin-3) |
3e56710f | 129 | (input-method . "latin-3-prefix") |
3617715a | 130 | (documentation . "\ |
60763153 RS |
131 | These languages are supported with the Latin-3 (ISO-8859-3) character set: |
132 | Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician, | |
3617715a KH |
133 | German, Italian, Maltese, Spanish, and Turkish.")) |
134 | '("European")) | |
135 | ||
335a7ad7 | 136 | \f |
5c4794f3 | 137 | ;; Latin-4 (ISO-8859-4) |
335a7ad7 | 138 | |
4ed46869 | 139 | (make-coding-system |
4b9121fc | 140 | 'iso-latin-4 2 ?4 |
5ef35063 | 141 | "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." |
3617715a KH |
142 | '(ascii latin-iso8859-4 nil nil |
143 | nil nil nil nil nil nil nil) | |
144 | '((safe-charsets ascii latin-iso8859-4) | |
8585266f | 145 | (mime-charset . iso-8859-4))) |
4ed46869 | 146 | |
71eabd24 | 147 | (define-coding-system-alias 'iso-8859-4 'iso-latin-4) |
300c450d | 148 | (define-coding-system-alias 'latin-4 'iso-latin-4) |
4b9121fc | 149 | |
335a7ad7 | 150 | (set-language-info-alist |
bed54678 | 151 | "Latin-4" '((charset ascii latin-iso8859-4) |
3617715a KH |
152 | (coding-system iso-8859-4) |
153 | (coding-priority iso-8859-4) | |
bed54678 | 154 | (nonascii-translation . latin-iso8859-4) |
16b2eb50 | 155 | (unibyte-syntax . "latin-4") |
bed54678 | 156 | (unibyte-display . iso-8859-4) |
11f340ed | 157 | (input-method . "latin-4-postfix") |
3617715a | 158 | (documentation . "\ |
60763153 RS |
159 | These languages are supported with the Latin-4 (ISO-8859-4) character set: |
160 | Danish, English, Estonian, Finnish, German, Greenlandic, Lappish, | |
3617715a KH |
161 | Latvian, Lithuanian, and Norwegian.")) |
162 | '("European")) | |
163 | ||
335a7ad7 | 164 | \f |
5c4794f3 | 165 | ;; Latin-5 (ISO-8859-9) |
335a7ad7 | 166 | |
4ed46869 | 167 | (make-coding-system |
4b9121fc | 168 | 'iso-latin-5 2 ?9 |
5ef35063 | 169 | "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." |
3617715a KH |
170 | '(ascii latin-iso8859-9 nil nil |
171 | nil nil nil nil nil nil nil) | |
172 | '((safe-charsets ascii latin-iso8859-9) | |
173 | (mime-charset . iso-8859-9))) | |
4ed46869 | 174 | |
71eabd24 | 175 | (define-coding-system-alias 'iso-8859-9 'iso-latin-5) |
300c450d | 176 | (define-coding-system-alias 'latin-5 'iso-latin-5) |
4b9121fc | 177 | |
3617715a | 178 | (set-language-info-alist |
bed54678 | 179 | "Latin-5" '((charset ascii latin-iso8859-9) |
3617715a KH |
180 | (coding-system iso-latin-5) |
181 | (coding-priority iso-latin-5) | |
bed54678 | 182 | (nonascii-translation . latin-iso8859-9) |
16b2eb50 | 183 | (unibyte-syntax . "latin-5") |
bed54678 | 184 | (unibyte-display . iso-latin-5) |
11f340ed | 185 | (input-method . "latin-5-postfix") |
3617715a | 186 | (documentation . "\ |
cce3ba48 | 187 | These languages are supported with the Latin-5 (ISO-8859-9) character set: |
5ef35063 WL |
188 | Bulgarian, Byelorussian, (Slavic) Macedonian, Russian, Serbian and |
189 | Ukranian.")) ; says ISO 8859-1 | |
cce3ba48 DL |
190 | '("European")) |
191 | ||
192 | \f | |
193 | ;; Latin-8 (ISO-8859-14) | |
194 | ||
195 | (make-coding-system | |
196 | 'iso-latin-8 2 ?W ; `W' for `Welsh', since `C' | |
197 | ; for `Celtic' is taken. | |
5ef35063 | 198 | "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." |
cce3ba48 DL |
199 | '(ascii latin-iso8859-14 nil nil |
200 | nil nil nil nil nil nil nil nil nil nil nil nil t) | |
201 | '((safe-charsets ascii latin-iso8859-14) | |
202 | (mime-charset . iso-8859-14))) | |
203 | ||
204 | (define-coding-system-alias 'iso-8859-14 'iso-latin-8) | |
205 | (define-coding-system-alias 'latin-8 'iso-latin-8) | |
206 | ||
cce3ba48 DL |
207 | (set-language-info-alist |
208 | "Latin-8" '((charset ascii latin-iso8859-14) | |
209 | (coding-system iso-latin-8) | |
210 | (coding-priority iso-latin-8) | |
211 | (nonascii-translation . latin-iso8859-14) | |
212 | (unibyte-syntax . "latin-8") | |
213 | (unibyte-display . iso-latin-8) | |
27ff18c9 DL |
214 | (input-method . "latin-8-prefix") |
215 | ;; Fixme: Welsh/Ga{e}lic greetings | |
216 | (sample-text . "\e,_"\e(B \e,_p\e(B \e,_^\e(B") | |
cce3ba48 DL |
217 | (documentation . "\ |
218 | This language environment is a generic one for the Latin-8 (ISO-8859-14) | |
27ff18c9 | 219 | character set which supports the Celtic languages, including those not |
5ef35063 WL |
220 | covered by other ISO-8859 character sets: |
221 | Welsh, Manx Gaelic and Irish Gaelic (old orthography).")) | |
3617715a | 222 | '("European")) |
cce3ba48 DL |
223 | \f |
224 | ;; Latin-9 (ISO-8859-15) | |
225 | ||
226 | (make-coding-system | |
227 | 'iso-latin-9 2 ?0 ; `0' for `Latin-0' | |
5ef35063 | 228 | "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." |
cce3ba48 DL |
229 | '(ascii latin-iso8859-15 nil nil |
230 | nil nil nil nil nil nil nil nil nil nil nil nil t) | |
231 | '((safe-charsets ascii latin-iso8859-15) | |
232 | (mime-charset . iso-8859-15))) | |
233 | ||
234 | (define-coding-system-alias 'iso-8859-15 'iso-latin-9) | |
235 | (define-coding-system-alias 'latin-9 'iso-latin-9) | |
236 | (define-coding-system-alias 'latin-0 'iso-latin-9) | |
3617715a | 237 | |
cce3ba48 DL |
238 | (set-language-info-alist |
239 | "Latin-9" '((charset ascii latin-iso8859-15) | |
240 | (coding-system iso-latin-9) | |
241 | (coding-priority iso-latin-9) | |
242 | (nonascii-translation . latin-iso8859-15) | |
243 | (unibyte-syntax . "latin-9") | |
244 | (unibyte-display . iso-latin-9) | |
27ff18c9 | 245 | (input-method . "latin-9-prefix") |
cce3ba48 | 246 | (sample-text |
27ff18c9 | 247 | . "AVE. \e,b&(48<=>\e(B \e,b$\e(B") |
cce3ba48 DL |
248 | (documentation . "\ |
249 | This language environment is a generic one for the Latin-9 (ISO-8859-15) | |
250 | character set which supports the same languages as Latin-1 with the | |
251 | addition of the Euro sign and some additional French and Finnish letters. | |
252 | Latin-9 is sometimes nicknamed `Latin-0'.")) | |
253 | '("European")) | |
3617715a | 254 | \f |
4ed46869 | 255 | (set-language-info-alist |
bed54678 | 256 | "German" '((tutorial . "TUTORIAL.de") |
3617715a KH |
257 | (charset ascii latin-iso8859-1) |
258 | (coding-system iso-latin-1) | |
259 | (coding-priority iso-latin-1) | |
3e56710f | 260 | (input-method . "german-postfix") |
bed54678 | 261 | (nonascii-translation . iso-latin-1) |
16b2eb50 | 262 | (unibyte-syntax . "latin-1") |
bed54678 | 263 | (unibyte-display . iso-latin-1) |
3617715a KH |
264 | (sample-text . "\ |
265 | German (Deutsch Nord) Guten Tag | |
266 | German (Deutsch S\e,A|\e(Bd) Gr\e,A|_\e(B Gott") | |
267 | (documentation . "\ | |
268 | This language environment is almost the same as Latin-1, | |
5ef35063 WL |
269 | but the default input method is set to \"german-postfix\". |
270 | Additionally, the tutorial is set to \"TUTORIAL.de\".")) | |
3617715a | 271 | '("European")) |
8dfef503 | 272 | |
fdd2d14d EZ |
273 | (set-language-info-alist |
274 | "French" '((tutorial . "TUTORIAL.fr") | |
275 | (charset ascii latin-iso8859-1) | |
276 | (coding-system iso-latin-1) | |
277 | (coding-priority iso-latin-1) | |
278 | (nonascii-translation . latin-iso8859-1) | |
279 | (unibyte-syntax . "latin-1") | |
280 | (unibyte-display . iso-latin-1) | |
281 | (input-method . "latin-1-prefix") | |
282 | (sample-text . "French (Fran\e,Ag\e(Bais) Bonjour, Salut") | |
283 | (documentation . "\ | |
284 | This language environment is almost the same as Latin-1, | |
285 | but the tutorial is set to \"TUTORIAL.fr\".")) | |
286 | '("European")) | |
287 | ||
e0980142 RS |
288 | (set-language-info-alist |
289 | "Slovenian" '((charset . (ascii latin-iso8859-2)) | |
290 | (coding-system . (iso-8859-2)) | |
291 | (coding-priority . (iso-8859-2)) | |
292 | (nonascii-translation . latin-iso8859-2) | |
293 | (input-method . "latin-2-postfix") | |
294 | (unibyte-syntax . "latin-2") | |
295 | (unibyte-display . iso-8859-2) | |
296 | (tutorial . "TUTORIAL.sl") | |
297 | (sample-text . "\e,B.\e(Belimo vam uspe\e,B9\e(Ben dan!") | |
298 | (documentation . t)) | |
299 | '("European")) | |
300 | ||
891258f2 GM |
301 | (set-language-info-alist |
302 | "Spanish" '((tutorial . "TUTORIAL.es") | |
303 | (charset ascii latin-iso8859-1) | |
304 | (coding-system iso-latin-1) | |
305 | (coding-priority iso-latin-1) | |
306 | (input-method . "spanish-postfix") | |
307 | (nonascii-translation . iso-latin-1) | |
308 | (unibyte-syntax . "latin-1") | |
309 | (unibyte-display . iso-latin-1) | |
310 | (sample-text . "Spanish (Espa\e,Aq\e(Bol) \e,A!\e(BHola!") | |
311 | (documentation . "\ | |
312 | This language environment is almost the same as Latin-1, | |
a959e723 SM |
313 | but the default input method is set to \"spanish-postfix\", |
314 | and it selects the Spanish tutorial.")) | |
891258f2 GM |
315 | '("European")) |
316 | ||
317 | (set-language-info-alist | |
318 | "Dutch" '((tutorial . "TUTORIAL.nl") | |
319 | (charset ascii latin-iso8859-1) | |
320 | (coding-system iso-latin-1) | |
321 | (coding-priority iso-latin-1) | |
322 | (nonascii-translation . iso-latin-1) | |
323 | (unibyte-syntax . "latin-1") | |
324 | (unibyte-display . iso-latin-1) | |
325 | (sample-text . "Er is een aantal manieren waarop je dit kan doen") | |
326 | (documentation . "\ | |
327 | This language environment is almost the same as Latin-1, | |
5ef35063 | 328 | but the Dutch tutorial is selected.")) |
891258f2 GM |
329 | '("European")) |
330 | ||
d22c4fb9 KH |
331 | ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But, |
332 | ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3) | |
333 | ;; was used for Turkish. Those who use Latin-3 for Turkish should use | |
334 | ;; "Latin-3" language environment. | |
335 | ||
336 | (set-language-info-alist | |
337 | "Turkish" '((charset ascii latin-iso8859-9) | |
338 | (coding-system iso-latin-5) | |
339 | (coding-priority iso-latin-5) | |
340 | (nonascii-translation . latin-iso8859-9) | |
341 | (unibyte-syntax . "latin-5") | |
342 | (unibyte-display . iso-latin-5) | |
343 | (input-method . "turkish-postfix") | |
344 | (sample-text . "Turkish (T\e,M|\e(Brk\e,Mg\e(Be) Merhaba") | |
345 | (documentation . t))) | |
346 | ||
b9427ca6 KH |
347 | ;; Polish ISO 8859-2 environment. |
348 | ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl> | |
349 | ;; Keywords: multilingual, Polish | |
350 | ||
b9427ca6 KH |
351 | (set-language-info-alist |
352 | "Polish" '((charset . (ascii latin-iso8859-2)) | |
353 | (coding-system . (iso-8859-2)) | |
354 | (coding-priority . (iso-8859-2)) | |
355 | (input-method . "polish-slash") | |
356 | (nonascii-translation . latin-iso8859-2) | |
357 | (unibyte-syntax . "latin-2") | |
358 | (unibyte-display . iso-8859-2) | |
359 | (tutorial . "TUTORIAL.pl") | |
2a52f40e | 360 | (sample-text . "P\e,Bs\e(Bjd\e,B<\e(B, ki\e,Bq\e(B-\e,B?\e(Be t\e,Bj\e(B chmurno\e,B6f\e(B w g\e,B31\e(Bb flaszy") |
b9427ca6 KH |
361 | (documentation . t)) |
362 | '("European")) | |
363 | ||
3286933f DL |
364 | (set-language-info-alist |
365 | "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based | |
366 | (coding-priority utf-8 latin-8) | |
367 | (input-method . "welsh") | |
368 | (documentation . "Support for Welsh, using Unicode.")) | |
369 | '("European")) | |
370 | ||
371 | (set-language-info-alist | |
372 | "Latin-7" `((coding-system latin-7) | |
373 | (coding-priority latin-7) | |
374 | ;; Fixme: input-method | |
375 | (features code-pages) | |
376 | (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) | |
377 | '("European")) | |
378 | ||
379 | (set-language-info-alist | |
380 | "Lithuanian" `((coding-system latin-7) | |
381 | (coding-priority latin-7) | |
382 | (input-method . "lithuanian-keyboard") | |
383 | (features code-pages) | |
384 | (documentation . "Support for Lithuanian.")) | |
385 | '("European")) | |
386 | ||
387 | (set-language-info-alist | |
388 | "Latvian" `((coding-system latin-7) | |
389 | (coding-priority latin-7) | |
390 | (input-method . "latvian-keyboard") | |
391 | (features code-pages) | |
392 | (documentation . "Support for Latvian.")) | |
393 | '("European")) | |
71070f12 KH |
394 | \f |
395 | ;; Definitions for the Mac Roman character sets and coding system. | |
396 | ;; The Mac Roman encoding uses all 128 code points in the range 128 to | |
a75f7176 | 397 | ;; 255 for actual characters. Emacs decodes them to one of the |
71070f12 KH |
398 | ;; following character sets. |
399 | ;; ascii, latin-iso8859-1, mule-unicode-0100-24ff, | |
400 | ;; mule-unicode-2500-33ff, mule-unicode-e000-ffff | |
401 | ||
402 | (let | |
403 | ((encoding-vector (make-vector 256 nil)) | |
404 | (i 0) | |
405 | (vec ;; mac-roman (128..255) -> UCS mapping | |
406 | [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS | |
407 | #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE | |
408 | #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA | |
409 | #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE | |
410 | #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE | |
411 | #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS | |
412 | #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS | |
413 | #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE | |
414 | #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE | |
415 | #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX | |
416 | #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS | |
417 | #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE | |
418 | #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE | |
419 | #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA | |
420 | #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE | |
421 | #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE | |
422 | #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX | |
423 | #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS | |
424 | #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE | |
425 | #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE | |
426 | #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX | |
427 | #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS | |
428 | #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE | |
429 | #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE | |
430 | #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE | |
431 | #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX | |
432 | #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS | |
433 | #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE | |
434 | #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE | |
435 | #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE | |
436 | #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX | |
437 | #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS | |
438 | #x2020 ;; 160:DAGGER | |
439 | #x00B0 ;; 161:DEGREE SIGN | |
440 | #x00A2 ;; 162:CENT SIGN | |
441 | #x00A3 ;; 163:POUND SIGN | |
442 | #x00A7 ;; 164:SECTION SIGN | |
443 | #x2022 ;; 165:BULLET | |
444 | #x00B6 ;; 166:PILCROW SIGN | |
445 | #x00DF ;; 167:LATIN SMALL LETTER SHARP S | |
446 | #x00AE ;; 168:REGISTERED SIGN | |
447 | #x00A9 ;; 169:COPYRIGHT SIGN | |
448 | #x2122 ;; 170:TRADE MARK SIGN | |
449 | #x00B4 ;; 171:ACUTE ACCENT | |
450 | #x00A8 ;; 172:DIAERESIS | |
451 | #x2260 ;; 173:NOT EQUAL TO | |
452 | #x00C6 ;; 174:LATIN CAPITAL LETTER AE | |
453 | #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE | |
454 | #x221E ;; 176:INFINITY | |
455 | #x00B1 ;; 177:PLUS-MINUS SIGN | |
456 | #x2264 ;; 178:LESS-THAN OR EQUAL TO | |
457 | #x2265 ;; 179:GREATER-THAN OR EQUAL TO | |
458 | #x00A5 ;; 180:YEN SIGN | |
459 | #x00B5 ;; 181:MICRO SIGN | |
460 | #x2202 ;; 182:PARTIAL DIFFERENTIAL | |
461 | #x2211 ;; 183:N-ARY SUMMATION | |
462 | #x220F ;; 184:N-ARY PRODUCT | |
463 | #x03C0 ;; 185:GREEK SMALL LETTER PI | |
464 | #x222B ;; 186:INTEGRAL | |
465 | #x00AA ;; 187:FEMININE ORDINAL INDICATOR | |
466 | #x00BA ;; 188:MASCULINE ORDINAL INDICATOR | |
467 | #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA | |
468 | #x00E6 ;; 190:LATIN SMALL LETTER AE | |
469 | #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE | |
470 | #x00BF ;; 192:INVERTED QUESTION MARK | |
471 | #x00A1 ;; 193:INVERTED EXCLAMATION MARK | |
472 | #x00AC ;; 194:NOT SIGN | |
473 | #x221A ;; 195:SQUARE ROOT | |
474 | #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK | |
475 | #x2248 ;; 197:ALMOST EQUAL TO | |
476 | #x2206 ;; 198:INCREMENT | |
477 | #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK | |
478 | #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK | |
479 | #x2026 ;; 201:HORIZONTAL ELLIPSIS | |
480 | #x00A0 ;; 202:NO-BREAK SPACE | |
481 | #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE | |
482 | #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE | |
483 | #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE | |
484 | #x0152 ;; 206:LATIN CAPITAL LIGATURE OE | |
485 | #x0153 ;; 207:LATIN SMALL LIGATURE OE | |
486 | #x2013 ;; 208:EN DASH | |
487 | #x2014 ;; 209:EM DASH | |
488 | #x201C ;; 210:LEFT DOUBLE QUOTATION MARK | |
489 | #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK | |
490 | #x2018 ;; 212:LEFT SINGLE QUOTATION MARK | |
491 | #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK | |
492 | #x00F7 ;; 214:DIVISION SIGN | |
493 | #x25CA ;; 215:LOZENGE | |
494 | #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS | |
495 | #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS | |
496 | #x2044 ;; 218:FRACTION SLASH | |
497 | #x20AC ;; 219:EURO SIGN | |
498 | #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
499 | #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
500 | #xFB01 ;; 222:LATIN SMALL LIGATURE FI | |
501 | #xFB02 ;; 223:LATIN SMALL LIGATURE FL | |
502 | #x2021 ;; 224:DOUBLE DAGGER | |
503 | #x00B7 ;; 225:MIDDLE DOT | |
504 | #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK | |
505 | #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK | |
506 | #x2030 ;; 228:PER MILLE SIGN | |
507 | #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX | |
508 | #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX | |
509 | #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE | |
510 | #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS | |
511 | #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE | |
512 | #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE | |
513 | #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX | |
514 | #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS | |
515 | #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE | |
516 | #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE | |
517 | #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX | |
518 | #xF8FF ;; 240:Apple logo | |
519 | #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE | |
520 | #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE | |
521 | #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX | |
522 | #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE | |
523 | #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I | |
524 | #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT | |
525 | #x02DC ;; 247:SMALL TILDE | |
526 | #x00AF ;; 248:MACRON | |
527 | #x02D8 ;; 249:BREVE | |
528 | #x02D9 ;; 250:DOT ABOVE | |
529 | #x02DA ;; 251:RING ABOVE | |
530 | #x00B8 ;; 252:CEDILLA | |
531 | #x02DD ;; 253:DOUBLE ACUTE ACCENT | |
532 | #x02DB ;; 254:OGONEK | |
533 | #x02C7 ;; 255:CARON | |
534 | ]) | |
535 | translation-table) | |
536 | (while (< i 128) | |
537 | (aset encoding-vector i i) | |
538 | (setq i (1+ i))) | |
539 | (while (< i 256) | |
540 | (aset encoding-vector i | |
541 | (decode-char 'ucs (aref vec (- i 128)))) | |
542 | (setq i (1+ i))) | |
543 | (setq translation-table | |
544 | (make-translation-table-from-vector encoding-vector)) | |
545 | (define-translation-table 'mac-roman-decoder translation-table) | |
546 | (define-translation-table 'mac-roman-encoder | |
547 | (char-table-extra-slot translation-table 0))) | |
548 | ||
549 | (define-ccl-program decode-mac-roman | |
550 | `(4 | |
551 | ((loop | |
552 | (read r1) | |
553 | (if (r1 < 128) ;; ASCII | |
554 | (r0 = ,(charset-id 'ascii)) | |
546790cb | 555 | (if (r1 < 160) |
71070f12 KH |
556 | (r0 = ,(charset-id 'eight-bit-control)) |
557 | (r0 = ,(charset-id 'eight-bit-graphic)))) | |
558 | (translate-character mac-roman-decoder r0 r1) | |
559 | (write-multibyte-character r0 r1) | |
560 | (repeat)))) | |
561 | "CCL program to decode Mac Roman") | |
562 | ||
563 | (define-ccl-program encode-mac-roman | |
564 | `(1 | |
565 | ((loop | |
566 | (read-multibyte-character r0 r1) | |
567 | (translate-character mac-roman-encoder r0 r1) | |
568 | (write-repeat r1)))) | |
569 | "CCL program to encode Mac Roman") | |
570 | ||
571 | (make-coding-system | |
5ef35063 WL |
572 | 'mac-roman 4 ?M |
573 | "Mac Roman Encoding (MIME:MACINTOSH)." | |
71070f12 KH |
574 | '(decode-mac-roman . encode-mac-roman) |
575 | '((safe-chars . mac-roman-encoder) | |
a959e723 SM |
576 | (valid-codes (0 . 255)) |
577 | (mime-charset . macintosh))) ; per IANA, rfc1345 | |
71070f12 | 578 | |
5180cc01 DL |
579 | (defconst diacritic-composition-pattern "\\C^\\c^+") |
580 | ||
5180cc01 DL |
581 | (defun diacritic-compose-region (beg end) |
582 | "Compose diacritic characters in the region. | |
583 | When called from a program, expects two arguments, | |
584 | positions (integers or markers) specifying the region." | |
585 | (interactive "r") | |
586 | (save-restriction | |
587 | (narrow-to-region beg end) | |
588 | (goto-char (point-min)) | |
589 | (while (re-search-forward diacritic-composition-pattern nil t) | |
590 | (compose-region (match-beginning 0) (match-end 0))))) | |
591 | ||
5180cc01 DL |
592 | (defun diacritic-compose-string (string) |
593 | "Compose diacritic characters in STRING and return the resulting string." | |
594 | (let ((idx 0)) | |
595 | (while (setq idx (string-match diacritic-composition-pattern string idx)) | |
596 | (compose-string string idx (match-end 0)) | |
597 | (setq idx (match-end 0)))) | |
598 | string) | |
599 | ||
5180cc01 DL |
600 | (defun diacritic-compose-buffer () |
601 | "Compose diacritic characters in the current buffer." | |
602 | (interactive) | |
603 | (diacritic-compose-region (point-min) (point-max))) | |
604 | ||
5180cc01 DL |
605 | (defun diacritic-post-read-conversion (len) |
606 | (diacritic-compose-region (point) (+ (point) len)) | |
607 | len) | |
608 | ||
5180cc01 DL |
609 | (defun diacritic-composition-function (from to pattern &optional string) |
610 | "Compose diacritic text in the region FROM and TO. | |
611 | The text matches the regular expression PATTERN. | |
612 | Optional 4th argument STRING, if non-nil, is a string containing text | |
613 | to compose. | |
614 | ||
615 | The return value is number of composed characters." | |
616 | (if (< (1+ from) to) | |
617 | (prog1 (- to from) | |
618 | (if string | |
619 | (compose-string string from to) | |
620 | (compose-region from to)) | |
621 | (- to from)))) | |
622 | ||
623 | ;; Register a function to compose Unicode diacrtics and marks. | |
5f3648c4 | 624 | (let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) |
5180cc01 DL |
625 | (let ((c #x300)) |
626 | (while (<= c #x362) | |
627 | (aset composition-function-table (decode-char 'ucs c) patterns) | |
628 | (setq c (1+ c))) | |
629 | (setq c #x20d0) | |
630 | (while (<= c #x20e3) | |
631 | (aset composition-function-table (decode-char 'ucs c) patterns) | |
632 | (setq c (1+ c))))) | |
633 | ||
27ff18c9 DL |
634 | (provide 'european) |
635 | ||
4ed46869 | 636 | ;;; european.el ends here |