Fix up comment convention on the arch-tag lines.
[bpt/emacs.git] / lisp / international / ogonek.el
1 ;;; ogonek.el --- change the encoding of Polish diacritics
2
3 ;; Copyright (C) 1997, 1998, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 ;; Free Software Foundation, Inc.
5
6 ;; Author: W{\l}odek Bzyl, Ryszard Kubiak
7 ;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak)
8 ;; Keywords: i18n
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 3, or (at your option)
15 ;; any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
26
27 ;;; Commentary:
28
29 ;; To use this library load it using
30 ;; M-x load-library [enter] ogonek
31 ;; Then, you may get a short info by calling one of
32 ;; M-x ogonek-jak -- in Polish
33 ;; M-x ogonek-how -- in English "
34
35 ;;; Code:
36
37 (defgroup ogonek nil
38 "Change the encoding of Polish diacritic characters."
39 :prefix "ogonek-"
40 :group 'i18n)
41
42 (defconst ogonek-name-encoding-alist
43 '(("ascii" . (?A ?C ?E ?L ?N ?O ?S ?Z ?Z
44 ?a ?c ?e ?l ?n ?o ?s ?z ?z))
45 ("iso8859-2" . (161 198 202 163 209 211 166 172 175
46 177 230 234 179 241 243 182 188 191))
47 ("mazovia" . (143 149 144 156 165 163 152 160 161
48 134 141 145 146 164 162 158 166 167))
49 ("windows-EE" . (165 198 202 163 209 211 140 143 175
50 185 230 234 179 241 243 156 159 191))
51 ("windows-PL" . (165 198 202 163 209 211 140 143 175
52 185 230 234 179 241 243 156 159 191))
53 ("latin-2" . (164 143 168 157 227 224 151 141 189
54 165 134 169 136 228 162 152 171 190))
55 ("CP852" . (164 143 168 157 227 224 151 141 189
56 165 134 169 136 228 162 152 171 190))
57 ("MeX" . (129 130 134 138 139 211 145 153 155
58 161 162 166 170 171 243 177 185 187))
59 ("CorelDraw" . (197 242 201 163 209 211 255 225 237
60 229 236 230 198 241 243 165 170 186))
61 ("Amiga" . (194 202 203 206 207 211 212 218 219
62 226 234 235 238 239 243 244 250 251))
63 ("Mac" . (132 140 162 252 193 238 229 143 251
64 136 141 171 184 196 151 230 144 253))
65 )
66 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
67 Each LIST contains codes for 18 Polish diacritic characters. The codes
68 are given in the following order:
69 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
70 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
71
72 ;; ------ A Little Info in Polish ---------------
73
74 (defconst ogonek-informacja
75 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
76
77 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
78 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
79 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
80 polecenie `M-x kill-buffer'.
81
82 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
83 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
84
85 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
86 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
87 Po wywo/laniu interakcyjnym funkcji zadawane s/a
88 pytania o parametry przekodowania: nazw/e kodowania
89 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
90 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
91 oczekuj/a wymienione funkcje:
92
93 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
94 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
95 (ogonek-recode-buffer
96 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
97
98 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
99 s/lu/z/a do wprowadzania notacji prefiksowej.
100
101 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
102 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
103 (ogonek-prefixify-buffer
104 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
105
106 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
107 s/lu/z/a do usuwania notacji prefiksowej.
108
109 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
110 znak-prefiksu nazwa-kodowania-docelowa)
111 (ogonek-prefixify-buffer
112 znak-prefiksu nazwa-kodowania-docelowa)
113
114 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
115 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
116 `ogonek-name-encoding-alist'.
117
118 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
119 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
120 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
121
122 ogonek-from-encoding iso8859-2
123 ogonek-to-encoding ascii
124 ogonek-prefix-char /
125 ogonek-prefix-from-encoding iso8859-2
126 ogonek-prefix-to-encoding iso8859-2
127
128 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
129 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
130
131 (setq ogonek-prefix-char ?/)
132 (setq ogonek-prefix-to-encoding \"iso8859-2\")
133
134 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
135 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
136 w chwili ich rzeczywistego u/zycia:
137
138 (autoload 'ogonek-jak \"ogonek\")
139 (autoload 'ogonek-recode-region \"ogonek\")
140 (autoload 'ogonek-prefixify-region \"ogonek\")
141 (autoload 'ogonek-deprefixify-region \"ogonek\")
142
143 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
144 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
145
146 (defun deprefixify-iso8859-2-region (start end)
147 (interactive \"*r\")
148 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
149 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
150
151 (defun mazovia-to-iso8859-2 (start end)
152 (interactive \"*r\")
153 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
154 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
155
156 (defun prefixify-iso8859-2-region (start end)
157 (interactive \"*r\")
158 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
159 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
160
161 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
162 przez wykonanie polecenia `undo'.")
163
164 (defun ogonek-jak ()
165 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
166 (interactive)
167 (set-buffer (get-buffer-create " *ogonek-jak*"))
168 (insert ogonek-informacja)
169 (switch-to-buffer " *ogonek-jak*")
170 (goto-char (point-min)))
171
172 ;; ------ A Little Info in English --------
173
174 (defconst ogonek-information
175 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
176
177 If you read this text then you are either looking at the library's
178 source text or you have called the `ogonek-how' command. In the
179 latter case you may remove this text using `M-x kill-buffer'.
180
181 The library provides functions for changing the encoding of Polish
182 diacritic characters, the ones with an `ogonek' below or above them.
183 The functions come in the following groups.
184
185 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
186 between one-character encodings, such as `iso-8859-2', `mazovia',
187 plain `ascii' or `TeX'. As the names suggest you may recode
188 either the entire current buffer or just a marked region
189 in it. You may use the functions interactively as commands.
190 Once you call a command you will be asked about the code
191 currently used in your text and the target encoding, the one
192 you want to get. The following example shows a non-interactive
193 use of the functions in a program. This also illustrates what
194 type of parameters the functions expect to be called with:
195
196 (ogonek-recode-region
197 (region-beginning) (region-end) from-code-name to-code-name)
198 (ogonek-recode-buffer from-code-name to-code-name)
199
200 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
201 introducing prefix notation:
202
203 (ogonek-prefixify-region
204 (region-beginning) (region-end) from-code-name prefix-char)
205 (ogonek-prefixify-buffer from-code-name prefix-char)
206
207 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
208 removing prefix notation:
209
210 (ogonek-deprefixify-region
211 (region-beginning) (region-end) prefix-char to-code-name)
212 (ogonek-prefixify-buffer prefix-char to-code-name)
213
214 The TAB character used in interactive mode makes `emacs'
215 display the list of encodings recognized by the library. The list
216 is stored in the constant `ogonek-name-encoding-alist'.
217
218 The `ogonek' functions refer to five variables in which the suggested
219 answers to dialogue questions are stored. The variables and their
220 default values are:
221
222 ogonek-from-encoding iso8859-2
223 ogonek-to-encoding ascii
224 ogonek-prefix-char /
225 ogonek-prefix-from-encoding iso8859-2
226 ogonek-prefix-to-encoding iso8859-2
227
228 The above default values can be changed by placing appropriate settings
229 in the '~/.emacs' file:
230
231 (setq ogonek-prefix-char ?/)
232 (setq ogonek-prefix-to-encoding \"iso8859-2\")
233
234 Instead of loading the whole library `ogonek' it may be better to
235 autoload the needed functions, for example by placing in `~/.emacs':
236
237 (autoload 'ogonek-how \"ogonek\")
238 (autoload 'ogonek-recode-region \"ogonek\")
239 (autoload 'ogonek-prefixify-region \"ogonek\")
240 (autoload 'ogonek-deprefixify-region \"ogonek\")
241
242 The most frequent function calls can be abbreviated and assigned to
243 keyboard keys. Here are a few practical examples:
244
245 (defun deprefixify-iso8859-2-region (start end)
246 (interactive \"*r\")
247 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
248 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
249
250 (defun mazovia-to-iso8859-2 (start end)
251 (interactive \"*r\")
252 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
253 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
254
255 (defun prefixify-iso8859-2-region (start end)
256 (interactive \"*r\")
257 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
258 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
259
260 Each recoding operation can be called off using the `undo' command.")
261
262 (defun ogonek-how ()
263 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
264 (interactive "*")
265 (set-buffer (get-buffer-create " *ogonek-how*"))
266 (insert ogonek-information)
267 (switch-to-buffer " *ogonek-how*")
268 (goto-char (point-min)))
269
270 ;; ---- Variables keeping the suggested answers to dialogue questions -----
271 (defvar ogonek-encoding-choices
272 (cons 'choice
273 (mapcar (lambda (x) (list 'const (car x)))
274 ogonek-name-encoding-alist))
275 "List of ogonek encodings. Used only for customization.")
276 (defcustom ogonek-from-encoding "iso8859-2"
277 "*Encoding in the source file of recoding."
278 :type ogonek-encoding-choices
279 :group 'ogonek)
280 (defcustom ogonek-to-encoding "ascii"
281 "*Encoding in the target file of recoding."
282 :type ogonek-encoding-choices
283 :group 'ogonek)
284 (defcustom ogonek-prefix-char ?/
285 "*Prefix character for prefix encodings."
286 :type 'character
287 :group 'ogonek)
288 (defcustom ogonek-prefix-from-encoding "iso8859-2"
289 "*Encoding in the source file subject to prefixifation."
290 :type ogonek-encoding-choices
291 :group 'ogonek)
292 (defcustom ogonek-prefix-to-encoding "iso8859-2"
293 "*Encoding in the target file subject to deprefixifation."
294 :type ogonek-encoding-choices
295 :group 'ogonek)
296
297 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
298
299 (defun ogonek-read-encoding (prompt default-name-var)
300 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
301 Store the name in the parameter-variable DEFAULT-NAME-VAR.
302 PROMPT is a string to be shown when the user is asked for a name."
303 (let ((encoding
304 (completing-read
305 (format "%s (default %s): " prompt (eval default-name-var))
306 ogonek-name-encoding-alist nil t)))
307 ;; change the default name to the one just read
308 (set default-name-var
309 (if (string= encoding "") (eval default-name-var) encoding))
310 ;; return the new default as the name you read
311 (eval default-name-var)))
312
313 (defun ogonek-read-prefix (prompt default-prefix-var)
314 "Read a prefix character for prefix notation.
315 The result is stored in the variable DEFAULT-PREFIX-VAR.
316 PROMPT is a string to be shown when the user is asked for a new prefix."
317 (let ((prefix-string
318 (read-string
319 (format "%s (default %s): " prompt
320 (char-to-string (eval default-prefix-var))))))
321 (if (> (length prefix-string) 1)
322 (error "! Only one character expected")
323 ;; set the default prefix character to the one just read
324 (set default-prefix-var
325 (if (string= prefix-string "")
326 (eval default-prefix-var)
327 (string-to-char prefix-string)))
328 ;; the new default prefix is the function's result:
329 (eval default-prefix-var))))
330
331 (defun ogonek-lookup-encoding (encoding)
332 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
333 Before returning a result test whether the string ENCODING is in
334 the list `ogonek-name-encoding-alist'"
335 (let ((code-list (assoc encoding ogonek-name-encoding-alist)))
336 (if (null code-list)
337 (error "! Name `%s' not known in `ogonek-name-encoding-alist'"
338 encoding)
339 (cdr code-list))))
340
341 ;; ---- An auxiliary function for zipping two lists of equal length ----
342
343 (defun ogonek-zip-lists (xs ys)
344 "Build a list of pairs from lists XS and YS of the same length."
345 (let ((pairs nil))
346 (while xs
347 (setq pairs (cons (cons (car xs) (car ys)) pairs))
348 (setq xs (cdr xs))
349 (setq ys (cdr ys)))
350 ;; `pairs' are the function's result
351 pairs))
352
353 ;; ---- An auxiliary function building a one-to-one recoding table -----
354
355 (defun ogonek-build-table (recoding-pairs)
356 "Build a table required by Emacs's `translate-region' function.
357 RECODING-PAIRS is a list of character pairs for which recoding
358 is not an identity.
359 By using the built-in `translate-region' function
360 we gain better performance compared to converting characters
361 by a hand-written routine as it is done for prefix encodings."
362 (let ((table (make-string 256 0))
363 (i 0))
364 (while (< i 256)
365 (aset table i i)
366 (setq i (1+ i)))
367 ;; make changes in `table' according to `recoding-pairs'
368 (while recoding-pairs
369 (aset table (car (car recoding-pairs)) (cdr (car recoding-pairs)))
370 (setq recoding-pairs (cdr recoding-pairs)))
371 ;; return the table just built
372 table))
373
374 ;; ---- Commands for one-to-one recoding -------------------------------
375
376 (defun ogonek-recode-region (start end from-encoding to-encoding)
377 "Recode text in a marked region in one-to-one manner.
378 When called interactively ask the user for the names of the FROM-
379 and TO- encodings."
380 (interactive (progn (barf-if-buffer-read-only)
381 (list
382 (region-beginning)
383 (region-end)
384 (ogonek-read-encoding "From code" 'ogonek-from-encoding)
385 (ogonek-read-encoding "To code" 'ogonek-to-encoding))))
386 (save-excursion
387 (translate-region
388 start end
389 (ogonek-build-table
390 (ogonek-zip-lists
391 (ogonek-lookup-encoding from-encoding)
392 (ogonek-lookup-encoding to-encoding))))))
393
394 (defun ogonek-recode-buffer (from-encoding to-encoding)
395 "Call `ogonek-recode-region' on the entire buffer.
396 When called interactively ask the user for the names of the FROM-
397 and TO- encodings."
398 (interactive (progn (barf-if-buffer-read-only)
399 (list
400 (ogonek-read-encoding "From code" 'ogonek-from-encoding)
401 (ogonek-read-encoding "To code" 'ogonek-to-encoding))))
402 (ogonek-recode-region
403 (point-min) (point-max) from-encoding to-encoding))
404
405 ;; ---- Recoding with prefix notation -------------------------------
406
407 (defconst ogonek-prefix-code '(?A ?C ?E ?L ?N ?O ?S ?X ?Z
408 ?a ?c ?e ?l ?n ?o ?s ?x ?z))
409
410 (defun ogonek-prefixify-region (start end from-encoding prefix-char)
411 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
412 A PREFIX pair generated consists of PREFIX-CHAR and the respective
413 character listed in the `ogonek-prefix-code' constant.
414 PREFIX-CHAR itself gets doubled."
415 (interactive (progn (barf-if-buffer-read-only)
416 (list
417 (region-beginning)
418 (region-end)
419 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding)
420 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char))))
421 (let*
422 ((from-code (ogonek-lookup-encoding from-encoding))
423 (to-code ogonek-prefix-code)
424 (recoding-pairs ; `ogonek-prefix-char' added for doubling
425 (ogonek-zip-lists
426 (cons prefix-char from-code)
427 (cons prefix-char to-code))))
428 (save-excursion
429 (goto-char start)
430 (while (< (point) end)
431 (let ((pair (assoc (following-char) recoding-pairs)))
432 (if (null pair)
433 ;; not a Polish character -- skip it
434 (forward-char 1)
435 ;; Polish character -- insert a prefix pair instead
436 (delete-char 1)
437 (insert ogonek-prefix-char)
438 (insert (cdr pair))
439 ;; the region is now one character longer
440 (setq end (1+ end))))))))
441
442 (defun ogonek-prefixify-buffer (from-encoding prefix-char)
443 "Call `ogonek-prefixify-region' on the entire buffer."
444 (interactive (progn (barf-if-buffer-read-only)
445 (list
446 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding)
447 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char))))
448 (ogonek-prefixify-region
449 (point-min) (point-max) from-encoding prefix-char))
450
451 (defun ogonek-deprefixify-region (start end prefix-char to-encoding)
452 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
453 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
454 list is replaced with the corresponding TO-encoded character. A doubled
455 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
456 followed by a non-Polish character, that is one not listed in the
457 `ogonek-prefix-code' constant, is left unchanged."
458 (interactive (progn (barf-if-buffer-read-only)
459 (list (region-beginning)
460 (region-end)
461 (ogonek-read-prefix
462 "Prefix character" 'ogonek-prefix-char)
463 (ogonek-read-encoding
464 "To code" 'ogonek-prefix-to-encoding))))
465 (let*
466 ((from-code ogonek-prefix-code)
467 (to-code (ogonek-lookup-encoding to-encoding))
468 (recoding-pairs
469 (ogonek-zip-lists
470 (cons prefix-char from-code)
471 (cons prefix-char to-code))))
472 (save-excursion
473 (goto-char start)
474 (while (< (point) end)
475 (forward-char 1)
476 (if (or (not (= (preceding-char) prefix-char)) (= (point) end))
477 ;; non-prefix character or the end-of-region -- do nothing
478 ()
479 ;; now, we can check the next character
480 (let ((pair (assoc (following-char) recoding-pairs)))
481 (if (null pair)
482 ;; `following-char' is not a Polish character nor it is
483 ;; `prefix-char' since the one is among `recoding-pairs'
484 (forward-char 1)
485 ;; else prefix followed by a Polish character has been found
486 ;; replace it by the corresponding Polish character
487 (backward-char 1)
488 (delete-char 2)
489 (insert (cdr pair))
490 ;; the region got shorter by one character
491 (setq end (1- end)))))))))
492
493 (defun ogonek-deprefixify-buffer (prefix-char to-encoding)
494 "Call `ogonek-deprefixify-region' on the entire buffer."
495 (interactive (progn (barf-if-buffer-read-only)
496 (list
497 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)
498 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding))))
499 (ogonek-deprefixify-region
500 (point-min) (point-max) prefix-char to-encoding))
501
502 (provide 'ogonek)
503
504 ;; arch-tag: 672d7744-28ac-412b-965e-06a27e50d1d7
505 ;;; ogonek.el ends here