| 1 | ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- |
| 2 | |
| 3 | ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. |
| 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
| 6 | |
| 7 | ;; Keywords: multilingual, European |
| 8 | |
| 9 | ;; This file is part of GNU Emacs. |
| 10 | |
| 11 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 12 | ;; it under the terms of the GNU General Public License as published by |
| 13 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 14 | ;; any later version. |
| 15 | |
| 16 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | ;; GNU General Public License for more details. |
| 20 | |
| 21 | ;; You should have received a copy of the GNU General Public License |
| 22 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 23 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 24 | ;; Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | ;;; Commentary: |
| 27 | |
| 28 | ;; For European scripts, character sets ISO8859-1,2,3,4,9,14,15 are |
| 29 | ;; supported. |
| 30 | |
| 31 | ;;; Code: |
| 32 | |
| 33 | ;; Latin-1 (ISO-8859-1) |
| 34 | |
| 35 | (make-coding-system |
| 36 | 'iso-latin-1 2 ?1 |
| 37 | "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." |
| 38 | '(ascii latin-iso8859-1 nil nil |
| 39 | nil nil nil nil nil nil nil nil nil nil nil nil t) |
| 40 | '((safe-charsets ascii latin-iso8859-1) |
| 41 | (mime-charset . iso-8859-1))) |
| 42 | |
| 43 | (define-coding-system-alias 'iso-8859-1 'iso-latin-1) |
| 44 | (define-coding-system-alias 'latin-1 'iso-latin-1) |
| 45 | |
| 46 | (set-language-info-alist |
| 47 | "Latin-1" '((charset ascii latin-iso8859-1) |
| 48 | (coding-system iso-latin-1) |
| 49 | (coding-priority iso-latin-1) |
| 50 | (nonascii-translation . latin-iso8859-1) |
| 51 | (unibyte-syntax . "latin-1") |
| 52 | (unibyte-display . iso-latin-1) |
| 53 | (input-method . "latin-1-prefix") |
| 54 | (sample-text |
| 55 | . "Hello, Hej, Tere, Hei, Bonjour, Gr\e,A|_\e(B Gott, Ciao, \e,A!\e(BHola!") |
| 56 | (documentation . "\ |
| 57 | This language environment is a generic one for the Latin-1 (ISO-8859-1) |
| 58 | character set which supports the following European languages: |
| 59 | Albanian, Basque, Breton, Catalan, Danish, Dutch, English, Faeroese, |
| 60 | Finnish, French (with restrictions -- see Latin-9), Frisian, Galician, |
| 61 | German, Greenlandic, Icelandic, Irish Gaelic (new orthography), |
| 62 | Italian, Latin, Luxemburgish, Norwegian, Portuguese, Rhaeto-Romanic, |
| 63 | Scottish Gaelic, Spanish, and Swedish. |
| 64 | We also have specific language environments for the following languages: |
| 65 | For Dutch, \"Dutch\". |
| 66 | For German, \"German\". |
| 67 | For Spanish, \"Spanish\". |
| 68 | For French, \"French\". |
| 69 | |
| 70 | Latin-1 also covers several written languages outside Europe, including |
| 71 | Indonesian/Malay, Tagalog (Philippines), Swahili and Afrikaans.")) |
| 72 | '("European")) |
| 73 | |
| 74 | \f |
| 75 | ;; Latin-2 (ISO-8859-2) |
| 76 | |
| 77 | (make-coding-system |
| 78 | 'iso-latin-2 2 ?2 |
| 79 | "ISO 2022 based 8-bit encoding for Latin-2 (MIME:ISO-8859-2)." |
| 80 | '(ascii latin-iso8859-2 nil nil |
| 81 | nil nil nil nil nil nil nil) |
| 82 | '((safe-charsets ascii latin-iso8859-2) |
| 83 | (mime-charset . iso-8859-2))) |
| 84 | |
| 85 | (define-coding-system-alias 'iso-8859-2 'iso-latin-2) |
| 86 | (define-coding-system-alias 'latin-2 'iso-latin-2) |
| 87 | |
| 88 | (set-language-info-alist |
| 89 | "Latin-2" '((charset ascii latin-iso8859-2) |
| 90 | (coding-system iso-latin-2) |
| 91 | (coding-priority iso-latin-2) |
| 92 | (nonascii-translation . latin-iso8859-2) |
| 93 | (unibyte-syntax . "latin-2") |
| 94 | (unibyte-display . iso-latin-2) |
| 95 | (input-method . "latin-2-prefix") |
| 96 | (documentation . "\ |
| 97 | This language environment is a generic one for the Latin-2 (ISO-8859-2) |
| 98 | character set which supports the following languages: |
| 99 | Albanian, Czech, English, German, Hungarian, Polish, Romanian, |
| 100 | Serbo-Croatian or Croatian, Slovak, Slovene, Sorbian (upper and lower), |
| 101 | and Swedish. |
| 102 | We also have specific language environments for the following languages: |
| 103 | For Czech, \"Czech\". |
| 104 | For Romanian, \"Romanian\". |
| 105 | For Slovak, \"Slovak\".")) |
| 106 | '("European")) |
| 107 | |
| 108 | \f |
| 109 | ;; Latin-3 (ISO-8859-3) |
| 110 | |
| 111 | (make-coding-system |
| 112 | 'iso-latin-3 2 ?3 |
| 113 | "ISO 2022 based 8-bit encoding for Latin-3 (MIME:ISO-8859-3)." |
| 114 | '(ascii latin-iso8859-3 nil nil |
| 115 | nil nil nil nil nil nil nil) |
| 116 | '((safe-charsets ascii latin-iso8859-3) |
| 117 | (mime-charset . iso-8859-3))) |
| 118 | |
| 119 | (define-coding-system-alias 'iso-8859-3 'iso-latin-3) |
| 120 | (define-coding-system-alias 'latin-3 'iso-latin-3) |
| 121 | |
| 122 | (set-language-info-alist |
| 123 | "Latin-3" '((charset ascii latin-iso8859-3) |
| 124 | (coding-system iso-latin-3) |
| 125 | (coding-priority iso-latin-3) |
| 126 | (nonascii-translation . latin-iso8859-3) |
| 127 | (unibyte-syntax . "latin-3") |
| 128 | (unibyte-display . iso-latin-3) |
| 129 | (input-method . "latin-3-prefix") |
| 130 | (documentation . "\ |
| 131 | These languages are supported with the Latin-3 (ISO-8859-3) character set: |
| 132 | Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician, |
| 133 | German, Italian, Maltese, Spanish, and Turkish.")) |
| 134 | '("European")) |
| 135 | |
| 136 | \f |
| 137 | ;; Latin-4 (ISO-8859-4) |
| 138 | |
| 139 | (make-coding-system |
| 140 | 'iso-latin-4 2 ?4 |
| 141 | "ISO 2022 based 8-bit encoding for Latin-4 (MIME:ISO-8859-4)." |
| 142 | '(ascii latin-iso8859-4 nil nil |
| 143 | nil nil nil nil nil nil nil) |
| 144 | '((safe-charsets ascii latin-iso8859-4) |
| 145 | (mime-charset . iso-8859-4))) |
| 146 | |
| 147 | (define-coding-system-alias 'iso-8859-4 'iso-latin-4) |
| 148 | (define-coding-system-alias 'latin-4 'iso-latin-4) |
| 149 | |
| 150 | (set-language-info-alist |
| 151 | "Latin-4" '((charset ascii latin-iso8859-4) |
| 152 | (coding-system iso-8859-4) |
| 153 | (coding-priority iso-8859-4) |
| 154 | (nonascii-translation . latin-iso8859-4) |
| 155 | (unibyte-syntax . "latin-4") |
| 156 | (unibyte-display . iso-8859-4) |
| 157 | (input-method . "latin-4-postfix") |
| 158 | (documentation . "\ |
| 159 | These languages are supported with the Latin-4 (ISO-8859-4) character set: |
| 160 | Danish, English, Estonian, Finnish, German, Greenlandic, Lappish, |
| 161 | Latvian, Lithuanian, and Norwegian.")) |
| 162 | '("European")) |
| 163 | |
| 164 | \f |
| 165 | ;; Latin-5 (ISO-8859-9) |
| 166 | |
| 167 | (make-coding-system |
| 168 | 'iso-latin-5 2 ?9 |
| 169 | "ISO 2022 based 8-bit encoding for Latin-5 (MIME:ISO-8859-9)." |
| 170 | '(ascii latin-iso8859-9 nil nil |
| 171 | nil nil nil nil nil nil nil) |
| 172 | '((safe-charsets ascii latin-iso8859-9) |
| 173 | (mime-charset . iso-8859-9))) |
| 174 | |
| 175 | (define-coding-system-alias 'iso-8859-9 'iso-latin-5) |
| 176 | (define-coding-system-alias 'latin-5 'iso-latin-5) |
| 177 | |
| 178 | (set-language-info-alist |
| 179 | "Latin-5" '((charset ascii latin-iso8859-9) |
| 180 | (coding-system iso-latin-5) |
| 181 | (coding-priority iso-latin-5) |
| 182 | (nonascii-translation . latin-iso8859-9) |
| 183 | (unibyte-syntax . "latin-5") |
| 184 | (unibyte-display . iso-latin-5) |
| 185 | (input-method . "latin-5-postfix") |
| 186 | (documentation . "\ |
| 187 | These languages are supported with the Latin-5 (ISO-8859-9) character set: |
| 188 | Bulgarian, Byelorussian, (Slavic) Macedonian, Russian, Serbian and |
| 189 | Ukranian.")) ; says ISO 8859-1 |
| 190 | '("European")) |
| 191 | |
| 192 | \f |
| 193 | ;; Latin-8 (ISO-8859-14) |
| 194 | |
| 195 | (make-coding-system |
| 196 | 'iso-latin-8 2 ?W ; `W' for `Welsh', since `C' |
| 197 | ; for `Celtic' is taken. |
| 198 | "ISO 2022 based 8-bit encoding for Latin-8 (MIME:ISO-8859-14)." |
| 199 | '(ascii latin-iso8859-14 nil nil |
| 200 | nil nil nil nil nil nil nil nil nil nil nil nil t) |
| 201 | '((safe-charsets ascii latin-iso8859-14) |
| 202 | (mime-charset . iso-8859-14))) |
| 203 | |
| 204 | (define-coding-system-alias 'iso-8859-14 'iso-latin-8) |
| 205 | (define-coding-system-alias 'latin-8 'iso-latin-8) |
| 206 | |
| 207 | (set-language-info-alist |
| 208 | "Latin-8" '((charset ascii latin-iso8859-14) |
| 209 | (coding-system iso-latin-8) |
| 210 | (coding-priority iso-latin-8) |
| 211 | (nonascii-translation . latin-iso8859-14) |
| 212 | (unibyte-syntax . "latin-8") |
| 213 | (unibyte-display . iso-latin-8) |
| 214 | (input-method . "latin-8-prefix") |
| 215 | ;; Fixme: Welsh/Ga{e}lic greetings |
| 216 | (sample-text . "\e,_"\e(B \e,_p\e(B \e,_^\e(B") |
| 217 | (documentation . "\ |
| 218 | This language environment is a generic one for the Latin-8 (ISO-8859-14) |
| 219 | character set which supports the Celtic languages, including those not |
| 220 | covered by other ISO-8859 character sets: |
| 221 | Welsh, Manx Gaelic and Irish Gaelic (old orthography).")) |
| 222 | '("European")) |
| 223 | \f |
| 224 | ;; Latin-9 (ISO-8859-15) |
| 225 | |
| 226 | (make-coding-system |
| 227 | 'iso-latin-9 2 ?0 ; `0' for `Latin-0' |
| 228 | "ISO 2022 based 8-bit encoding for Latin-9 (MIME:ISO-8859-15)." |
| 229 | '(ascii latin-iso8859-15 nil nil |
| 230 | nil nil nil nil nil nil nil nil nil nil nil nil t) |
| 231 | '((safe-charsets ascii latin-iso8859-15) |
| 232 | (mime-charset . iso-8859-15))) |
| 233 | |
| 234 | (define-coding-system-alias 'iso-8859-15 'iso-latin-9) |
| 235 | (define-coding-system-alias 'latin-9 'iso-latin-9) |
| 236 | (define-coding-system-alias 'latin-0 'iso-latin-9) |
| 237 | |
| 238 | (set-language-info-alist |
| 239 | "Latin-9" '((charset ascii latin-iso8859-15) |
| 240 | (coding-system iso-latin-9) |
| 241 | (coding-priority iso-latin-9) |
| 242 | (nonascii-translation . latin-iso8859-15) |
| 243 | (unibyte-syntax . "latin-9") |
| 244 | (unibyte-display . iso-latin-9) |
| 245 | (input-method . "latin-9-prefix") |
| 246 | (sample-text |
| 247 | . "AVE. \e,b&(48<=>\e(B \e,b$\e(B") |
| 248 | (documentation . "\ |
| 249 | This language environment is a generic one for the Latin-9 (ISO-8859-15) |
| 250 | character set which supports the same languages as Latin-1 with the |
| 251 | addition of the Euro sign and some additional French and Finnish letters. |
| 252 | Latin-9 is sometimes nicknamed `Latin-0'.")) |
| 253 | '("European")) |
| 254 | \f |
| 255 | (set-language-info-alist |
| 256 | "German" '((tutorial . "TUTORIAL.de") |
| 257 | (charset ascii latin-iso8859-1) |
| 258 | (coding-system iso-latin-1) |
| 259 | (coding-priority iso-latin-1) |
| 260 | (input-method . "german-postfix") |
| 261 | (nonascii-translation . iso-latin-1) |
| 262 | (unibyte-syntax . "latin-1") |
| 263 | (unibyte-display . iso-latin-1) |
| 264 | (sample-text . "\ |
| 265 | German (Deutsch Nord) Guten Tag |
| 266 | German (Deutsch S\e,A|\e(Bd) Gr\e,A|_\e(B Gott") |
| 267 | (documentation . "\ |
| 268 | This language environment is almost the same as Latin-1, |
| 269 | but the default input method is set to \"german-postfix\". |
| 270 | Additionally, the tutorial is set to \"TUTORIAL.de\".")) |
| 271 | '("European")) |
| 272 | |
| 273 | (set-language-info-alist |
| 274 | "French" '((tutorial . "TUTORIAL.fr") |
| 275 | (charset ascii latin-iso8859-1) |
| 276 | (coding-system iso-latin-1) |
| 277 | (coding-priority iso-latin-1) |
| 278 | (nonascii-translation . latin-iso8859-1) |
| 279 | (unibyte-syntax . "latin-1") |
| 280 | (unibyte-display . iso-latin-1) |
| 281 | (input-method . "latin-1-prefix") |
| 282 | (sample-text . "French (Fran\e,Ag\e(Bais) Bonjour, Salut") |
| 283 | (documentation . "\ |
| 284 | This language environment is almost the same as Latin-1, |
| 285 | but the tutorial is set to \"TUTORIAL.fr\".")) |
| 286 | '("European")) |
| 287 | |
| 288 | (set-language-info-alist |
| 289 | "Slovenian" '((charset . (ascii latin-iso8859-2)) |
| 290 | (coding-system . (iso-8859-2)) |
| 291 | (coding-priority . (iso-8859-2)) |
| 292 | (nonascii-translation . latin-iso8859-2) |
| 293 | (input-method . "latin-2-postfix") |
| 294 | (unibyte-syntax . "latin-2") |
| 295 | (unibyte-display . iso-8859-2) |
| 296 | (tutorial . "TUTORIAL.sl") |
| 297 | (sample-text . "\e,B.\e(Belimo vam uspe\e,B9\e(Ben dan!") |
| 298 | (documentation . t)) |
| 299 | '("European")) |
| 300 | |
| 301 | (set-language-info-alist |
| 302 | "Spanish" '((tutorial . "TUTORIAL.es") |
| 303 | (charset ascii latin-iso8859-1) |
| 304 | (coding-system iso-latin-1) |
| 305 | (coding-priority iso-latin-1) |
| 306 | (input-method . "spanish-postfix") |
| 307 | (nonascii-translation . iso-latin-1) |
| 308 | (unibyte-syntax . "latin-1") |
| 309 | (unibyte-display . iso-latin-1) |
| 310 | (sample-text . "Spanish (Espa\e,Aq\e(Bol) \e,A!\e(BHola!") |
| 311 | (documentation . "\ |
| 312 | This language environment is almost the same as Latin-1, |
| 313 | but the default input method is set to \"spanish-postfix\", |
| 314 | and it selects the Spanish tutorial.")) |
| 315 | '("European")) |
| 316 | |
| 317 | (set-language-info-alist |
| 318 | "Dutch" '((tutorial . "TUTORIAL.nl") |
| 319 | (charset ascii latin-iso8859-1) |
| 320 | (coding-system iso-latin-1) |
| 321 | (coding-priority iso-latin-1) |
| 322 | (nonascii-translation . iso-latin-1) |
| 323 | (unibyte-syntax . "latin-1") |
| 324 | (unibyte-display . iso-latin-1) |
| 325 | (sample-text . "Er is een aantal manieren waarop je dit kan doen") |
| 326 | (documentation . "\ |
| 327 | This language environment is almost the same as Latin-1, |
| 328 | but the Dutch tutorial is selected.")) |
| 329 | '("European")) |
| 330 | |
| 331 | ;; For Turkish, the character set ISO-8859-9 (Latin-5) is used. But, |
| 332 | ;; before the introduction of ISO-8859-9 in 1988, ISO-8859-3 (Latin-3) |
| 333 | ;; was used for Turkish. Those who use Latin-3 for Turkish should use |
| 334 | ;; "Latin-3" language environment. |
| 335 | |
| 336 | (set-language-info-alist |
| 337 | "Turkish" '((charset ascii latin-iso8859-9) |
| 338 | (coding-system iso-latin-5) |
| 339 | (coding-priority iso-latin-5) |
| 340 | (nonascii-translation . latin-iso8859-9) |
| 341 | (unibyte-syntax . "latin-5") |
| 342 | (unibyte-display . iso-latin-5) |
| 343 | (input-method . "turkish-postfix") |
| 344 | (sample-text . "Turkish (T\e,M|\e(Brk\e,Mg\e(Be) Merhaba") |
| 345 | (documentation . t))) |
| 346 | |
| 347 | ;; Polish ISO 8859-2 environment. |
| 348 | ;; Maintainer: Wlodek Bzyl <matwb@univ.gda.pl> |
| 349 | ;; Keywords: multilingual, Polish |
| 350 | |
| 351 | (set-language-info-alist |
| 352 | "Polish" '((charset . (ascii latin-iso8859-2)) |
| 353 | (coding-system . (iso-8859-2)) |
| 354 | (coding-priority . (iso-8859-2)) |
| 355 | (input-method . "polish-slash") |
| 356 | (nonascii-translation . latin-iso8859-2) |
| 357 | (unibyte-syntax . "latin-2") |
| 358 | (unibyte-display . iso-8859-2) |
| 359 | (tutorial . "TUTORIAL.pl") |
| 360 | (sample-text . "P\e,Bs\e(Bjd\e,B<\e(B, ki\e,Bq\e(B-\e,B?\e(Be t\e,Bj\e(B chmurno\e,B6f\e(B w g\e,B31\e(Bb flaszy") |
| 361 | (documentation . t)) |
| 362 | '("European")) |
| 363 | |
| 364 | (set-language-info-alist |
| 365 | "Welsh" `((coding-system utf-8 latin-8) ; the input method is Unicode-based |
| 366 | (coding-priority utf-8 latin-8) |
| 367 | (input-method . "welsh") |
| 368 | (documentation . "Support for Welsh, using Unicode.")) |
| 369 | '("European")) |
| 370 | |
| 371 | (set-language-info-alist |
| 372 | "Latin-7" `((coding-system latin-7) |
| 373 | (coding-priority latin-7) |
| 374 | ;; Fixme: input-method |
| 375 | (features code-pages) |
| 376 | (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) |
| 377 | '("European")) |
| 378 | |
| 379 | (set-language-info-alist |
| 380 | "Lithuanian" `((coding-system latin-7) |
| 381 | (coding-priority latin-7) |
| 382 | (input-method . "lithuanian-keyboard") |
| 383 | (features code-pages) |
| 384 | (documentation . "Support for Lithuanian.")) |
| 385 | '("European")) |
| 386 | |
| 387 | (set-language-info-alist |
| 388 | "Latvian" `((coding-system latin-7) |
| 389 | (coding-priority latin-7) |
| 390 | (input-method . "latvian-keyboard") |
| 391 | (features code-pages) |
| 392 | (documentation . "Support for Latvian.")) |
| 393 | '("European")) |
| 394 | \f |
| 395 | ;; Definitions for the Mac Roman character sets and coding system. |
| 396 | ;; The Mac Roman encoding uses all 128 code points in the range 128 to |
| 397 | ;; 255 for actual characters. Emacs decodes them to one of the |
| 398 | ;; following character sets. |
| 399 | ;; ascii, latin-iso8859-1, mule-unicode-0100-24ff, |
| 400 | ;; mule-unicode-2500-33ff, mule-unicode-e000-ffff |
| 401 | |
| 402 | (let |
| 403 | ((encoding-vector (make-vector 256 nil)) |
| 404 | (i 0) |
| 405 | (vec ;; mac-roman (128..255) -> UCS mapping |
| 406 | [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS |
| 407 | #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE |
| 408 | #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA |
| 409 | #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE |
| 410 | #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE |
| 411 | #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS |
| 412 | #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS |
| 413 | #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE |
| 414 | #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE |
| 415 | #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX |
| 416 | #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS |
| 417 | #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE |
| 418 | #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE |
| 419 | #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA |
| 420 | #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE |
| 421 | #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE |
| 422 | #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX |
| 423 | #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS |
| 424 | #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE |
| 425 | #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE |
| 426 | #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX |
| 427 | #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS |
| 428 | #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE |
| 429 | #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE |
| 430 | #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE |
| 431 | #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX |
| 432 | #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS |
| 433 | #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE |
| 434 | #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE |
| 435 | #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE |
| 436 | #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX |
| 437 | #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS |
| 438 | #x2020 ;; 160:DAGGER |
| 439 | #x00B0 ;; 161:DEGREE SIGN |
| 440 | #x00A2 ;; 162:CENT SIGN |
| 441 | #x00A3 ;; 163:POUND SIGN |
| 442 | #x00A7 ;; 164:SECTION SIGN |
| 443 | #x2022 ;; 165:BULLET |
| 444 | #x00B6 ;; 166:PILCROW SIGN |
| 445 | #x00DF ;; 167:LATIN SMALL LETTER SHARP S |
| 446 | #x00AE ;; 168:REGISTERED SIGN |
| 447 | #x00A9 ;; 169:COPYRIGHT SIGN |
| 448 | #x2122 ;; 170:TRADE MARK SIGN |
| 449 | #x00B4 ;; 171:ACUTE ACCENT |
| 450 | #x00A8 ;; 172:DIAERESIS |
| 451 | #x2260 ;; 173:NOT EQUAL TO |
| 452 | #x00C6 ;; 174:LATIN CAPITAL LETTER AE |
| 453 | #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE |
| 454 | #x221E ;; 176:INFINITY |
| 455 | #x00B1 ;; 177:PLUS-MINUS SIGN |
| 456 | #x2264 ;; 178:LESS-THAN OR EQUAL TO |
| 457 | #x2265 ;; 179:GREATER-THAN OR EQUAL TO |
| 458 | #x00A5 ;; 180:YEN SIGN |
| 459 | #x00B5 ;; 181:MICRO SIGN |
| 460 | #x2202 ;; 182:PARTIAL DIFFERENTIAL |
| 461 | #x2211 ;; 183:N-ARY SUMMATION |
| 462 | #x220F ;; 184:N-ARY PRODUCT |
| 463 | #x03C0 ;; 185:GREEK SMALL LETTER PI |
| 464 | #x222B ;; 186:INTEGRAL |
| 465 | #x00AA ;; 187:FEMININE ORDINAL INDICATOR |
| 466 | #x00BA ;; 188:MASCULINE ORDINAL INDICATOR |
| 467 | #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA |
| 468 | #x00E6 ;; 190:LATIN SMALL LETTER AE |
| 469 | #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE |
| 470 | #x00BF ;; 192:INVERTED QUESTION MARK |
| 471 | #x00A1 ;; 193:INVERTED EXCLAMATION MARK |
| 472 | #x00AC ;; 194:NOT SIGN |
| 473 | #x221A ;; 195:SQUARE ROOT |
| 474 | #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK |
| 475 | #x2248 ;; 197:ALMOST EQUAL TO |
| 476 | #x2206 ;; 198:INCREMENT |
| 477 | #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK |
| 478 | #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK |
| 479 | #x2026 ;; 201:HORIZONTAL ELLIPSIS |
| 480 | #x00A0 ;; 202:NO-BREAK SPACE |
| 481 | #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE |
| 482 | #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE |
| 483 | #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE |
| 484 | #x0152 ;; 206:LATIN CAPITAL LIGATURE OE |
| 485 | #x0153 ;; 207:LATIN SMALL LIGATURE OE |
| 486 | #x2013 ;; 208:EN DASH |
| 487 | #x2014 ;; 209:EM DASH |
| 488 | #x201C ;; 210:LEFT DOUBLE QUOTATION MARK |
| 489 | #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK |
| 490 | #x2018 ;; 212:LEFT SINGLE QUOTATION MARK |
| 491 | #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK |
| 492 | #x00F7 ;; 214:DIVISION SIGN |
| 493 | #x25CA ;; 215:LOZENGE |
| 494 | #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS |
| 495 | #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS |
| 496 | #x2044 ;; 218:FRACTION SLASH |
| 497 | #x20AC ;; 219:EURO SIGN |
| 498 | #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
| 499 | #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
| 500 | #xFB01 ;; 222:LATIN SMALL LIGATURE FI |
| 501 | #xFB02 ;; 223:LATIN SMALL LIGATURE FL |
| 502 | #x2021 ;; 224:DOUBLE DAGGER |
| 503 | #x00B7 ;; 225:MIDDLE DOT |
| 504 | #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK |
| 505 | #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK |
| 506 | #x2030 ;; 228:PER MILLE SIGN |
| 507 | #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX |
| 508 | #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX |
| 509 | #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE |
| 510 | #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS |
| 511 | #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE |
| 512 | #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE |
| 513 | #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX |
| 514 | #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS |
| 515 | #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE |
| 516 | #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE |
| 517 | #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX |
| 518 | #xF8FF ;; 240:Apple logo |
| 519 | #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE |
| 520 | #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE |
| 521 | #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX |
| 522 | #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE |
| 523 | #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I |
| 524 | #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT |
| 525 | #x02DC ;; 247:SMALL TILDE |
| 526 | #x00AF ;; 248:MACRON |
| 527 | #x02D8 ;; 249:BREVE |
| 528 | #x02D9 ;; 250:DOT ABOVE |
| 529 | #x02DA ;; 251:RING ABOVE |
| 530 | #x00B8 ;; 252:CEDILLA |
| 531 | #x02DD ;; 253:DOUBLE ACUTE ACCENT |
| 532 | #x02DB ;; 254:OGONEK |
| 533 | #x02C7 ;; 255:CARON |
| 534 | ]) |
| 535 | translation-table) |
| 536 | (while (< i 128) |
| 537 | (aset encoding-vector i i) |
| 538 | (setq i (1+ i))) |
| 539 | (while (< i 256) |
| 540 | (aset encoding-vector i |
| 541 | (decode-char 'ucs (aref vec (- i 128)))) |
| 542 | (setq i (1+ i))) |
| 543 | (setq translation-table |
| 544 | (make-translation-table-from-vector encoding-vector)) |
| 545 | (define-translation-table 'mac-roman-decoder translation-table) |
| 546 | (define-translation-table 'mac-roman-encoder |
| 547 | (char-table-extra-slot translation-table 0))) |
| 548 | |
| 549 | (define-ccl-program decode-mac-roman |
| 550 | `(4 |
| 551 | ((loop |
| 552 | (read r1) |
| 553 | (if (r1 < 128) ;; ASCII |
| 554 | (r0 = ,(charset-id 'ascii)) |
| 555 | (if (r1 < 160) |
| 556 | (r0 = ,(charset-id 'eight-bit-control)) |
| 557 | (r0 = ,(charset-id 'eight-bit-graphic)))) |
| 558 | (translate-character mac-roman-decoder r0 r1) |
| 559 | (write-multibyte-character r0 r1) |
| 560 | (repeat)))) |
| 561 | "CCL program to decode Mac Roman") |
| 562 | |
| 563 | (define-ccl-program encode-mac-roman |
| 564 | `(1 |
| 565 | ((loop |
| 566 | (read-multibyte-character r0 r1) |
| 567 | (translate-character mac-roman-encoder r0 r1) |
| 568 | (write-repeat r1)))) |
| 569 | "CCL program to encode Mac Roman") |
| 570 | |
| 571 | (make-coding-system |
| 572 | 'mac-roman 4 ?M |
| 573 | "Mac Roman Encoding (MIME:MACINTOSH)." |
| 574 | '(decode-mac-roman . encode-mac-roman) |
| 575 | '((safe-chars . mac-roman-encoder) |
| 576 | (valid-codes (0 . 255)) |
| 577 | (mime-charset . macintosh))) ; per IANA, rfc1345 |
| 578 | |
| 579 | (defconst diacritic-composition-pattern "\\C^\\c^+") |
| 580 | |
| 581 | (defun diacritic-compose-region (beg end) |
| 582 | "Compose diacritic characters in the region. |
| 583 | When called from a program, expects two arguments, |
| 584 | positions (integers or markers) specifying the region." |
| 585 | (interactive "r") |
| 586 | (save-restriction |
| 587 | (narrow-to-region beg end) |
| 588 | (goto-char (point-min)) |
| 589 | (while (re-search-forward diacritic-composition-pattern nil t) |
| 590 | (compose-region (match-beginning 0) (match-end 0))))) |
| 591 | |
| 592 | (defun diacritic-compose-string (string) |
| 593 | "Compose diacritic characters in STRING and return the resulting string." |
| 594 | (let ((idx 0)) |
| 595 | (while (setq idx (string-match diacritic-composition-pattern string idx)) |
| 596 | (compose-string string idx (match-end 0)) |
| 597 | (setq idx (match-end 0)))) |
| 598 | string) |
| 599 | |
| 600 | (defun diacritic-compose-buffer () |
| 601 | "Compose diacritic characters in the current buffer." |
| 602 | (interactive) |
| 603 | (diacritic-compose-region (point-min) (point-max))) |
| 604 | |
| 605 | (defun diacritic-post-read-conversion (len) |
| 606 | (diacritic-compose-region (point) (+ (point) len)) |
| 607 | len) |
| 608 | |
| 609 | (defun diacritic-composition-function (from to pattern &optional string) |
| 610 | "Compose diacritic text in the region FROM and TO. |
| 611 | The text matches the regular expression PATTERN. |
| 612 | Optional 4th argument STRING, if non-nil, is a string containing text |
| 613 | to compose. |
| 614 | |
| 615 | The return value is number of composed characters." |
| 616 | (if (< (1+ from) to) |
| 617 | (prog1 (- to from) |
| 618 | (if string |
| 619 | (compose-string string from to) |
| 620 | (compose-region from to)) |
| 621 | (- to from)))) |
| 622 | |
| 623 | ;; Register a function to compose Unicode diacrtics and marks. |
| 624 | (let ((patterns '(("\\C^\\c^+" . diacritic-composition-function)))) |
| 625 | (let ((c #x300)) |
| 626 | (while (<= c #x362) |
| 627 | (aset composition-function-table (decode-char 'ucs c) patterns) |
| 628 | (setq c (1+ c))) |
| 629 | (setq c #x20d0) |
| 630 | (while (<= c #x20e3) |
| 631 | (aset composition-function-table (decode-char 'ucs c) patterns) |
| 632 | (setq c (1+ c))))) |
| 633 | |
| 634 | (provide 'european) |
| 635 | |
| 636 | ;;; european.el ends here |