(setup-english-environment): Adjusted for the change of coding
[bpt/emacs.git] / lisp / international / ogonek.el
1 ;;; ogonek.el --- basic editing commands for Emacs
2
3 ;; Copyright (C) 1997 Free Software Foundation, Inc.
4
5 ;; Author: W{\l}odek Bzyl, Ryszard Kubiak
6 ;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak)
7 ;; Keywords: i18n
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; To use this library load it using
29 ;; M-x load-library [enter] ogonek
30 ;; Then, you may get a short info by calling one of
31 ;; M-x ogonek-jak -- in Polish
32 ;; M-x ogonek-how -- in English "
33
34 (defconst ogonek-name-encoding-alist
35 '(("ascii" . (?A ?C ?E ?L ?N ?O ?S ?Z ?Z
36 ?a ?c ?e ?l ?n ?o ?s ?z ?z))
37 ("iso8859-2" . (161 198 202 163 209 211 166 172 175
38 177 230 234 179 241 243 182 188 191))
39 ("mazovia" . (143 149 144 156 165 163 152 160 161
40 134 141 145 146 164 162 158 166 167))
41 ("windows-EE" . (165 198 202 163 209 211 140 143 175
42 185 230 234 179 241 243 156 159 191))
43 ("windows-PL" . (165 198 202 163 209 211 140 143 175
44 185 230 234 179 241 243 156 159 191))
45 ("latin-2" . (164 143 168 157 227 224 151 141 189
46 165 134 169 136 228 162 152 171 190))
47 ("CP852" . (164 143 168 157 227 224 151 141 189
48 165 134 169 136 228 162 152 171 190))
49 ("MeX" . (129 130 134 138 139 211 145 153 155
50 161 162 166 170 171 243 177 185 187))
51 ("CorelDraw" . (197 242 201 163 209 211 255 225 237
52 229 236 230 198 241 243 165 170 186))
53 ("Amiga" . (194 202 203 206 207 211 212 218 219
54 226 234 235 238 239 243 244 250 251))
55 ("Mac" . (132 140 162 252 193 238 229 143 251
56 136 141 171 184 196 151 230 144 253))
57 )
58 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs.
59 Each LIST contains codes for 18 Polish diacritic characters. The codes
60 are given in the following order:
61 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent
62 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.")
63
64 ;; ------ A Little Info in Polish ---------------
65
66 (defconst ogonek-informacja
67 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'.
68
69 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy
70 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'.
71 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac
72 polecenie `M-x kill-buffer'.
73
74 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich
75 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco.
76
77 1. `ogonek-recode-region' oraz `ogonek-recode-buffer'
78 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor.
79 Po wywo/laniu interakcyjnym funkcji zadawane s/a
80 pytania o parametry przekodowania: nazw/e kodowania
81 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego.
82 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow
83 oczekuj/a wymienione funkcje:
84
85 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu)
86 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
87 (ogonek-recode-buffer
88 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa)
89
90 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer'
91 s/lu/z/a do wprowadzania notacji prefiksowej.
92
93 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu)
94 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
95 (ogonek-prefixify-buffer
96 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu)
97
98 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer'
99 s/lu/z/a do usuwania notacji prefiksowej.
100
101 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu)
102 znak-prefiksu nazwa-kodowania-docelowa)
103 (ogonek-prefixify-buffer
104 znak-prefiksu nazwa-kodowania-docelowa)
105
106 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie
107 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej
108 `ogonek-name-encoding-alist'.
109
110 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore
111 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych
112 oraz ich warto/sci domy/slne s/a nast/epuj/ace:
113
114 ogonek-from-encoding iso8859-2
115 ogonek-to-encoding ascii
116 ogonek-prefix-char /
117 ogonek-prefix-from-encoding iso8859-2
118 ogonek-prefix-to-encoding iso8859-2
119
120 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku
121 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad:
122
123 (setq ogonek-prefix-char ?/)
124 (setq ogonek-prefix-to-encoding \"iso8859-2\")
125
126 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku
127 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero
128 w chwili ich rzeczywistego u/zycia:
129
130 (autoload 'ogonek-jak \"ogonek\")
131 (autoload 'ogonek-recode-region \"ogonek\")
132 (autoload 'ogonek-prefixify-region \"ogonek\")
133 (autoload 'ogonek-deprefixify-region \"ogonek\")
134
135 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody
136 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady:
137
138 (defun deprefixify-iso8859-2-region (start end)
139 (interactive \"*r\")
140 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
141 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
142
143 (defun mazovia-to-iso8859-2 (start end)
144 (interactive \"*r\")
145 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
146 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
147
148 (defun prefixify-iso8859-2-region (start end)
149 (interactive \"*r\")
150 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
151 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
152
153 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c
154 przez wykonanie polecenia `undo'.")
155
156 (defun ogonek-jak ()
157 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer."
158 (interactive)
159 (set-buffer (get-buffer-create " *ogonek-jak*"))
160 (insert ogonek-informacja)
161 (switch-to-buffer " *ogonek-jak*")
162 (beginning-of-buffer))
163
164 ;; ------ A Little Info in English --------
165
166 (defconst ogonek-information
167 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'.
168
169 If you read this text then you are either looking at the library's
170 source text or you have called the `ogonek-how' command. In the
171 latter case you may remove this text using `M-x kill-buffer'.
172
173 The library provides functions for changing the encoding of Polish
174 diacritic characters, the ones with an `ogonek' below or above them.
175 The functions come in the following groups.
176
177 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change
178 between one-character encodings, such as `iso-8859-2', `mazovia',
179 plain `ascii' or `TeX'. As the names suggest you may recode
180 either the entire current buffer or just a marked region
181 in it. You may use the functions interactively as commands.
182 Once you call a command you will be asked about the code
183 currently used in your text and the target encoding, the one
184 you want to get. The following example shows a non-interactive
185 use of the functions in a program. This also illustrates what
186 type of parameters the functions expect to be called with:
187
188 (ogonek-recode-region
189 (region-beginning) (region-end) from-code-name to-code-name)
190 (ogonek-recode-buffer from-code-name to-code-name)
191
192 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for
193 introducing prefix notation:
194
195 (ogonek-prefixify-region
196 (region-beginning) (region-end) from-code-name prefix-char)
197 (ogonek-prefixify-buffer from-code-name prefix-char)
198
199 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for
200 removing prefix notation:
201
202 (ogonek-deprefixify-region
203 (region-beginning) (region-end) prefix-char to-code-name)
204 (ogonek-prefixify-buffer prefix-char to-code-name)
205
206 The TAB character used in interactive mode makes `emacs'
207 display the list of encodings recognized by the library. The list
208 is stored in the constant `ogonek-name-encoding-alist'.
209
210 The `ogonek' functions refer to five variables in which the suggested
211 answers to dialogue questions are stored. The variables and their
212 default values are:
213
214 ogonek-from-encoding iso8859-2
215 ogonek-to-encoding ascii
216 ogonek-prefix-char /
217 ogonek-prefix-from-encoding iso8859-2
218 ogonek-prefix-to-encoding iso8859-2
219
220 The above default values can be changed by placing appropriate settings
221 in the '~/.emacs' file:
222
223 (setq ogonek-prefix-char ?/)
224 (setq ogonek-prefix-to-encoding \"iso8859-2\")
225
226 Instead of loading the whole library `ogonek' it may be better to
227 autoload the needed functions, for example by placing in `~/.emacs':
228
229 (autoload 'ogonek-how \"ogonek\")
230 (autoload 'ogonek-recode-region \"ogonek\")
231 (autoload 'ogonek-prefixify-region \"ogonek\")
232 (autoload 'ogonek-deprefixify-region \"ogonek\")
233
234 The most frequent function calls can be abbreviated and assigned to
235 keyboard keys. Here are a few practical examples:
236
237 (defun deprefixify-iso8859-2-region (start end)
238 (interactive \"*r\")
239 (ogonek-deprefixify-region start end ?/ \"iso8859-2\"))
240 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d
241
242 (defun mazovia-to-iso8859-2 (start end)
243 (interactive \"*r\")
244 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\"))
245 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r
246
247 (defun prefixify-iso8859-2-region (start end)
248 (interactive \"*r\")
249 (ogonek-prefixify-region start end \"iso8859-2\" ?/))
250 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p
251
252 Each recoding operation can be called off using the `undo' command.")
253
254 (defun ogonek-how ()
255 "Display `ogonek-information' in an auxiliary *recode-how* buffer."
256 (interactive "*")
257 (set-buffer (get-buffer-create " *ogonek-how*"))
258 (insert ogonek-information)
259 (switch-to-buffer " *ogonek-how*")
260 (beginning-of-buffer))
261
262 ;; ---- Variables keeping the suggested answers to dialogue questions -----
263
264 (defvar ogonek-from-encoding "iso8859-2"
265 "*Encoding in the source file of recoding.")
266 (defvar ogonek-to-encoding "ascii"
267 "*Encoding in the target file of recoding.")
268 (defvar ogonek-prefix-char ?/
269 "*Prefix character for prefix encodings.")
270 (defvar ogonek-prefix-from-encoding "iso8859-2"
271 "*Encoding in the source file subject to prefixifation.")
272 (defvar ogonek-prefix-to-encoding "iso8859-2"
273 "*Encoding in the target file subject to deprefixifation.")
274
275 ;; ---- Auxiliary functions for reading parameters in interactive mode ----
276
277 (defun ogonek-read-encoding (prompt default-name-var)
278 "Read encoding name with completion based on `ogonek-name-encoding-alist'.
279 Store the name in the the parameter-variable DEFAULT-NAME-VAR.
280 PROMPT is a string to be shown when the user is asked for a name."
281 (let ((encoding
282 (completing-read
283 (format "%s (default %s): " prompt (eval default-name-var))
284 ogonek-name-encoding-alist nil t)))
285 ;; change the default name to the one just read
286 (set default-name-var
287 (if (string= encoding "") (eval default-name-var) encoding))
288 ;; return the new default as the name you read
289 (eval default-name-var)))
290
291 (defun ogonek-read-prefix (prompt default-prefix-var)
292 "Read a prefix character for prefix notation.
293 The result is stored in the variable DEFAULT-PREFIX-VAR.
294 PROMPT is a string to be shown when the user is asked for a new prefix."
295 (let ((prefix-string
296 (read-string
297 (format "%s (default %s): " prompt
298 (char-to-string (eval default-prefix-var))))))
299 (if (> (length prefix-string) 1)
300 (error "! Only one character expected.")
301 ;; set the default prefix character to the one just read
302 (set default-prefix-var
303 (if (string= prefix-string "")
304 (eval default-prefix-var)
305 (string-to-char prefix-string)))
306 ;; the new default prefix is the function's result:
307 (eval default-prefix-var))))
308
309 (defun ogonek-lookup-encoding (encoding)
310 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'.
311 Before returning a result test whether the string ENCODING is in
312 the list `ogonek-name-encoding-alist'"
313 (let ((code-list (assoc encoding ogonek-name-encoding-alist)))
314 (if (null code-list)
315 (error "! Name `%s' not known in `ogonek-name-encoding-alist'."
316 encoding)
317 (cdr code-list))))
318
319 ;; ---- An auxiliary function for zipping two lists of equal length ----
320
321 (defun ogonek-zip-lists (xs ys)
322 "Build a list of pairs from lists XS and YS of the same length."
323 (let ((pairs nil))
324 (while xs
325 (setq pairs (cons (cons (car xs) (car ys)) pairs))
326 (setq xs (cdr xs))
327 (setq ys (cdr ys)))
328 ;; `pairs' are the function's result
329 pairs))
330
331 ;; ---- An auxiliary function building a one-to-one recoding table -----
332
333 (defun ogonek-build-table (recoding-pairs)
334 "Build a table required by Emacs's `translate-region' function.
335 RECODING-PAIRS is a list of character pairs for which recoding
336 is not an identity.
337 By using the built-in `translate-region' function
338 we gain better performance compared to converting characters
339 by a hand-written routine as it is done for prefix encodings."
340 (let ((table (make-string 256 0))
341 (i 0))
342 (while (< i 256)
343 (aset table i i)
344 (setq i (1+ i)))
345 ;; make changes in `table' according to `recoding-pairs'
346 (while recoding-pairs
347 (aset table (car (car recoding-pairs)) (cdr (car recoding-pairs)))
348 (setq recoding-pairs (cdr recoding-pairs)))
349 ;; return the table just built
350 table))
351
352 ;; ---- Commands for one-to-one recoding -------------------------------
353
354 (defun ogonek-recode-region (start end from-encoding to-encoding)
355 "Recode text in a marked region in one-to-one manner.
356 When called interactively ask the user for the names of the FROM-
357 and TO- encodings."
358 (interactive (progn (barf-if-buffer-read-only)
359 (list
360 (region-beginning)
361 (region-end)
362 (ogonek-read-encoding "From code" 'ogonek-from-encoding)
363 (ogonek-read-encoding "To code" 'ogonek-to-encoding))))
364 (save-excursion
365 (translate-region
366 start end
367 (ogonek-build-table
368 (ogonek-zip-lists
369 (ogonek-lookup-encoding from-encoding)
370 (ogonek-lookup-encoding to-encoding))))))
371
372 (defun ogonek-recode-buffer (from-encoding to-encoding)
373 "Call `ogonek-recode-region' on the entire buffer.
374 When called interactively ask the user for the names of the FROM-
375 and TO- encodings."
376 (interactive (progn (barf-if-buffer-read-only)
377 (list
378 (ogonek-read-encoding "From code" 'ogonek-from-encoding)
379 (ogonek-read-encoding "To code" 'ogonek-to-encoding))))
380 (ogonek-recode-region
381 (point-min) (point-max) from-encoding to-encoding))
382
383 ;; ---- Recoding with prefix notation -------------------------------
384
385 (defconst ogonek-prefix-code '(?A ?C ?E ?L ?N ?O ?S ?X ?Z
386 ?a ?c ?e ?l ?n ?o ?s ?x ?z))
387
388 (defun ogonek-prefixify-region (start end from-encoding prefix-char)
389 "In a region, replace FROM-encoded Polish characters with PREFIX pairs.
390 A PREFIX pair generated consists of PREFIX-CHAR and the respective
391 character listed in the `ogonek-prefix-code' constant.
392 PREFIX-CHAR itself gets doubled."
393 (interactive (progn (barf-if-buffer-read-only)
394 (list
395 (region-beginning)
396 (region-end)
397 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding)
398 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char))))
399 (let*
400 ((from-code (ogonek-lookup-encoding from-encoding))
401 (to-code ogonek-prefix-code)
402 (recoding-pairs ; `ogonek-prefix-char' added for doubling
403 (ogonek-zip-lists
404 (cons prefix-char from-code)
405 (cons prefix-char to-code))))
406 (save-excursion
407 (goto-char start)
408 (while (< (point) end)
409 (let ((pair (assoc (following-char) recoding-pairs)))
410 (if (null pair)
411 ;; not a Polish character -- skip it
412 (forward-char 1)
413 ;; Polish character -- insert a prefix pair instead
414 (delete-char 1)
415 (insert ogonek-prefix-char)
416 (insert (cdr pair))
417 ;; the region is now one character longer
418 (setq end (1+ end))))))))
419
420 (defun ogonek-prefixify-buffer (from-encoding prefix-char)
421 "Call `ogonek-prefixify-region' on the entire buffer."
422 (interactive (progn (barf-if-buffer-read-only)
423 (list
424 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding)
425 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char))))
426 (ogonek-prefixify-region
427 (point-min) (point-max) from-encoding prefix-char))
428
429 (defun ogonek-deprefixify-region (start end prefix-char to-encoding)
430 "In a region, replace PREFIX pairs with their corresponding TO-encodings.
431 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code'
432 list is replaced with the corresponding TO-encoded character. A doubled
433 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR
434 followed by a non-Polish character, that is one not listed in the
435 `ogonek-prefix-code' constant, is left unchanged."
436 (interactive (progn (barf-if-buffer-read-only)
437 (list (region-beginning)
438 (region-end)
439 (ogonek-read-prefix
440 "Prefix character" 'ogonek-prefix-char)
441 (ogonek-read-encoding
442 "To code" 'ogonek-prefix-to-encoding))))
443 (let*
444 ((from-code ogonek-prefix-code)
445 (to-code (ogonek-lookup-encoding to-encoding))
446 (recoding-pairs
447 (ogonek-zip-lists
448 (cons prefix-char from-code)
449 (cons prefix-char to-code))))
450 (save-excursion
451 (goto-char start)
452 (while (< (point) end)
453 (forward-char 1)
454 (if (or (not (= (preceding-char) prefix-char)) (= (point) end))
455 ;; non-prefix character or the end-of-region -- do nothing
456 ()
457 ;; now, we can check the next character
458 (let ((pair (assoc (following-char) recoding-pairs)))
459 (if (null pair)
460 ;; `following-char' is not a Polish character nor it is
461 ;; `prefix-char' since the one is among `recoding-pairs'
462 (forward-char 1)
463 ;; else prefix followed by a Polish character has been found
464 ;; replace it by the corresponding Polish character
465 (backward-char 1)
466 (delete-char 2)
467 (insert (cdr pair))
468 ;; the region got shorter by one character
469 (setq end (1- end)))))))))
470
471 (defun ogonek-deprefixify-buffer (prefix-char to-encoding)
472 "Call `ogonek-deprefixify-region' on the entire buffer."
473 (interactive (progn (barf-if-buffer-read-only)
474 (list
475 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)
476 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding))))
477 (ogonek-deprefixify-region
478 (point-min) (point-max) prefix-char to-encoding))
479
480 (provide 'ogonek)
481
482 ;;; ogonek.el ends here