Commit | Line | Data |
---|---|---|
d5c3f902 GM |
1 | ;;; tildify.el --- adding hard spaces into texts |
2 | ||
ab422c4d | 3 | ;; Copyright (C) 1997-2013 Free Software Foundation, Inc. |
d5c3f902 | 4 | |
dffb6224 | 5 | ;; Author: Milan Zamazal <pdm@zamazal.org> |
905a39b8 | 6 | ;; Version: 4.5 |
e5bed401 | 7 | ;; Keywords: text, TeX, SGML, wp |
d5c3f902 GM |
8 | |
9 | ;; This file is part of GNU Emacs. | |
10 | ||
1fecc8fe | 11 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
d5c3f902 | 12 | ;; it under the terms of the GNU General Public License as published by |
1fecc8fe GM |
13 | ;; the Free Software Foundation, either version 3 of the License, or |
14 | ;; (at your option) any later version. | |
d5c3f902 GM |
15 | |
16 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
1fecc8fe | 22 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
d5c3f902 GM |
23 | |
24 | ;;; Commentary: | |
25 | ||
26 | ;; This package can be typically used for adding forgotten tildes in TeX | |
27 | ;; sources or adding ` ' sequences in SGML (e.g. HTML) texts. | |
28 | ;; | |
8c722a81 | 29 | ;; For example, the Czech orthography requires avoiding one letter |
e5bed401 DL |
30 | ;; prepositions at line endings. So they should be connected with the |
31 | ;; following words by a tilde. Some users forget to do this all the | |
32 | ;; time. The purpose of this program is to check the text and suggest | |
33 | ;; adding of missing tildes on some places. It works in a similar | |
34 | ;; manner to `query-replace-regexp'. | |
d5c3f902 | 35 | ;; |
e5bed401 DL |
36 | ;; The functionality of this program is actually performing query |
37 | ;; replace on certain regions, but for historical reasons explained | |
38 | ;; above it is called `tildify'. | |
d5c3f902 GM |
39 | ;; |
40 | ;; The default variable settings are suited for Czech, so do not try to | |
41 | ;; understand them if you are not familiar with Czech grammar and spelling. | |
42 | ;; | |
c38e0c97 | 43 | ;; The algorithm was inspired by Petr Olšák's program `vlna'. Abilities of |
d5c3f902 GM |
44 | ;; `tildify.el' are a little limited; if you have improvement suggestions, let |
45 | ;; me know. | |
46 | ||
47 | ;;; Code: | |
48 | ||
49 | ||
50 | ;;; *** User configuration variables *** | |
51 | ||
52 | ||
53 | (defgroup tildify nil | |
cf20dee0 | 54 | "Add hard spaces or other text fragments to text buffers." |
e5bed401 | 55 | :version "21.1" |
d5c3f902 GM |
56 | :group 'wp) |
57 | ||
58 | (defcustom tildify-pattern-alist | |
59 | '((t "\\([,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(\\w\\|[([{\\]\\|<[a-zA-Z]\\)" 2)) | |
60 | "Alist specifying where to insert hard spaces. | |
61 | ||
62 | Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or | |
fd22e6ca | 63 | \(MAJOR-MODE . SYMBOL). |
d5c3f902 GM |
64 | |
65 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
66 | - a symbol equal to the major mode of the buffer to be fixed | |
67 | - t for default item, this applies to all major modes not defined in another | |
68 | alist item | |
69 | ||
70 | REGEXP is a regular expression matching the part of a text, where a hard space | |
71 | is missing. The regexp is always case sensitive, regardless of the current | |
72 | `case-fold-search' setting. | |
73 | ||
74 | NUMBER defines the number of the REGEXP subexpression which should be replaced | |
75 | by the hard space character. | |
76 | ||
77 | The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this | |
e5bed401 | 78 | mode, the item for the mode SYMBOL is looked up in the alist instead." |
d5c3f902 GM |
79 | :group 'tildify |
80 | :type '(repeat (choice (list symbol regexp integer) (cons symbol symbol)))) | |
81 | ||
82 | (defcustom tildify-string-alist | |
83 | '((latex-mode . "~") | |
84 | (tex-mode . latex-mode) | |
8049ddc0 | 85 | (plain-tex-mode . latex-mode) |
d5c3f902 | 86 | (sgml-mode . " ") |
905a39b8 | 87 | (xml-mode . sgml-mode) |
d5c3f902 GM |
88 | (html-mode . sgml-mode) |
89 | (t . " ")) | |
90 | "Alist specifying what is a hard space in the current major mode. | |
91 | ||
92 | Each alist item is of the form (MAJOR-MODE . STRING) or | |
fd22e6ca | 93 | \(MAJOR-MODE . SYMBOL). |
d5c3f902 GM |
94 | |
95 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
96 | - a symbol equal to the major mode of the buffer to be fixed | |
97 | - t for default item, this applies to all major modes not defined in another | |
98 | alist item | |
99 | ||
100 | STRING defines the hard space, which is inserted at places defined by | |
101 | `tildify-pattern-alist'. For example it can be \"~\" for TeX or \" \" | |
102 | for SGML. | |
103 | ||
104 | The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this | |
e5bed401 | 105 | mode, the item for the mode SYMBOL is looked up in the alist instead." |
d5c3f902 GM |
106 | :group 'tildify |
107 | :type '(repeat (cons symbol (choice string symbol)))) | |
db95369b | 108 | |
d5c3f902 GM |
109 | (defcustom tildify-ignored-environments-alist |
110 | '((latex-mode | |
111 | ("\\\\\\\\" . "") ; do not remove this | |
112 | ("\\\\begin{verbatim}" . "\\\\end{verbatim}") | |
1a6af6a2 | 113 | ("\\\\verb\\*?\\(.\\)" . (1)) |
d5c3f902 GM |
114 | ("\\$\\$" . "\\$\\$") |
115 | ("\\$" . "\\$") | |
116 | ("\\\\(" . "\\\\)") | |
117 | ("\\\\[[]" . "\\\\[]]") | |
118 | ("\\\\begin{math}" . "\\\\end{math}") | |
119 | ("\\\\begin{displaymath}" . "\\\\end{displaymath}") | |
120 | ("\\\\begin{equation}" . "\\\\end{equation}") | |
121 | ("\\\\begin{eqnarray\\*?}" . "\\\\end{eqnarray\\*?}") | |
122 | ("\\\\[a-zA-Z]+\\( +\\|{}\\)[a-zA-Z]*" . "") | |
123 | ("%" . "$")) | |
124 | (plain-tex-mode . latex-mode) | |
125 | (html-mode | |
126 | ("<pre[^>]*>" . "</pre>") | |
127 | ("<dfn>" . "</dfn>") | |
128 | ("<code>" . "</code>") | |
129 | ("<samp>" . "</samp>") | |
130 | ("<kbd>" . "</kbd>") | |
131 | ("<var>" . "</var>") | |
132 | ("<PRE[^>]*>" . "</PRE>") | |
133 | ("<DFN>" . "</DFN>") | |
134 | ("<CODE>" . "</CODE>") | |
135 | ("<SAMP>" . "</SAMP>") | |
136 | ("<KBD>" . "</KBD>") | |
137 | ("<VAR>" . "</VAR>") | |
138 | ("<! *--" . "-- *>") | |
139 | ("<" . ">")) | |
140 | (sgml-mode . html-mode) | |
141 | (t nil)) | |
142 | "Alist specifying ignored structured text environments. | |
143 | Parts of text defined in this alist are skipped without performing hard space | |
144 | insertion on them. These setting allow skipping text parts like verbatim or | |
145 | math environments in TeX or preformatted text in SGML. | |
146 | ||
147 | Each list element is of the form | |
148 | (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... ) | |
149 | ||
150 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
151 | - a symbol equal to the major mode of the buffer to be fixed | |
152 | - t for default item, this applies to all major modes not defined in another | |
153 | alist item | |
154 | ||
155 | BEG-REGEX is a regexp matching beginning of a text part to be skipped. | |
156 | END-REGEX defines end of the corresponding text part and can be either: | |
157 | - a regexp matching the end of the skipped text part | |
158 | - a list of regexps and numbers, which will compose the ending regexp by | |
159 | concatenating themselves, while replacing the numbers with corresponding | |
160 | subexpressions of BEG-REGEX (this is used to solve cases like | |
e5bed401 | 161 | \\\\verb<character> in TeX)." |
d5c3f902 GM |
162 | :group 'tildify |
163 | :type '(repeat (cons symbol (choice symbol (repeat sexp))))) | |
164 | ||
165 | ||
166 | ;;; *** Internal variables *** | |
167 | ||
168 | (defvar tildify-count nil | |
169 | "Counter for replacements.") | |
170 | ||
171 | ||
172 | ;;; *** Interactive functions *** | |
173 | ||
174 | ;;;###autoload | |
175 | (defun tildify-region (beg end) | |
176 | "Add hard spaces in the region between BEG and END. | |
177 | See variables `tildify-pattern-alist', `tildify-string-alist', and | |
178 | `tildify-ignored-environments-alist' for information about configuration | |
179 | parameters. | |
180 | This function performs no refilling of the changed text." | |
181 | (interactive "*r") | |
182 | (setq tildify-count 0) | |
183 | (let (a | |
184 | z | |
185 | (marker-end (copy-marker end)) | |
186 | end-env | |
187 | finish | |
188 | (ask t) | |
189 | (case-fold-search nil) | |
190 | (regexp (tildify-build-regexp)) ; beginnings of environments | |
191 | aux) | |
192 | (if regexp | |
193 | ;; Yes, ignored environments exist for the current major mode, | |
194 | ;; tildify just texts outside them | |
195 | (save-excursion | |
196 | (save-restriction | |
197 | (widen) | |
198 | (goto-char (point-min)) | |
199 | (while (not finish) | |
200 | (setq a (point)) | |
201 | (setq end-env (tildify-find-env regexp)) | |
202 | (setq z (copy-marker (if end-env (1- (point)) (point-max)))) | |
203 | (if (>= (marker-position z) beg) | |
204 | (progn | |
205 | (or (>= a beg) (setq a beg)) | |
206 | (or (<= (marker-position z) (marker-position marker-end)) | |
207 | (setq z marker-end)) | |
208 | (setq aux (tildify-tildify a (marker-position z) ask)) | |
209 | (if (eq aux 'force) | |
210 | (setq ask nil) | |
211 | (if (eq aux nil) | |
212 | (setq finish t))))) | |
e5bed401 | 213 | (if (>= (marker-position z) (marker-position marker-end)) |
d5c3f902 GM |
214 | (setq finish t)) |
215 | (or (>= (point) (marker-position z)) | |
216 | (goto-char (marker-position z))) | |
217 | (if (not finish) | |
218 | (if (re-search-forward end-env nil t) | |
219 | (if (> (point) (marker-position marker-end)) | |
220 | (setq finish t)) | |
221 | (message | |
5673af85 | 222 | "End of environment not found: %s" end-env) |
d5c3f902 GM |
223 | (setq finish t)))))) |
224 | ;; No ignored environments, tildify directly | |
225 | (tildify-tildify beg end ask))) | |
5673af85 | 226 | (message "%d spaces replaced." tildify-count)) |
db95369b | 227 | |
d5c3f902 GM |
228 | ;;;###autoload |
229 | (defun tildify-buffer () | |
230 | "Add hard spaces in the current buffer. | |
231 | See variables `tildify-pattern-alist', `tildify-string-alist', and | |
232 | `tildify-ignored-environments-alist' for information about configuration | |
233 | parameters. | |
234 | This function performs no refilling of the changed text." | |
235 | (interactive "*") | |
236 | (tildify-region (point-min) (point-max))) | |
237 | ||
238 | ||
239 | ;;; *** Auxiliary functions *** | |
240 | ||
241 | (defun tildify-build-regexp () | |
242 | "Build start of environment regexp." | |
243 | (let ((alist (tildify-mode-alist tildify-ignored-environments-alist)) | |
244 | regexp) | |
245 | (when alist | |
246 | (setq regexp (caar alist)) | |
247 | (setq alist (cdr alist)) | |
248 | (while alist | |
249 | (setq regexp (concat regexp "\\|" (caar alist))) | |
250 | (setq alist (cdr alist))) | |
251 | regexp))) | |
252 | ||
253 | (defun tildify-mode-alist (mode-alist &optional mode) | |
254 | "Return alist item for the MODE-ALIST in the current major MODE." | |
255 | (if (null mode) | |
256 | (setq mode major-mode)) | |
257 | (let ((alist (cdr (or (assoc mode mode-alist) | |
258 | (assoc t mode-alist))))) | |
259 | (if (and alist | |
260 | (symbolp alist)) | |
261 | (tildify-mode-alist mode-alist alist) | |
262 | alist))) | |
db95369b | 263 | |
d5c3f902 GM |
264 | (defun tildify-find-env (regexp) |
265 | "Find environment using REGEXP. | |
266 | Return regexp for the end of the environment or nil if no environment was | |
267 | found." | |
268 | ;; Find environment | |
269 | (if (re-search-forward regexp nil t) | |
270 | ;; Build end-env regexp | |
271 | (let ((match (match-string 0)) | |
272 | (alist (tildify-mode-alist tildify-ignored-environments-alist)) | |
273 | expression) | |
274 | (save-match-data | |
275 | (while (not (eq (string-match (caar alist) match) 0)) | |
276 | (setq alist (cdr alist)))) | |
277 | (if (stringp (setq expression (cdar alist))) | |
278 | expression | |
279 | (let ((result "") | |
280 | aux) | |
281 | (while expression | |
282 | (setq result (concat result | |
283 | (if (stringp (setq aux (car expression))) | |
284 | expression | |
285 | (regexp-quote (match-string aux))))) | |
286 | (setq expression (cdr expression))) | |
287 | result))) | |
288 | ;; Return nil if not found | |
289 | nil)) | |
290 | ||
291 | (defun tildify-tildify (beg end ask) | |
292 | "Add tilde characters in the region between BEG and END. | |
293 | This function does not do any further checking except of for comments and | |
294 | macros. | |
295 | ||
296 | If ASK is nil, perform replace without asking user for confirmation. | |
297 | ||
298 | Returns one of symbols: t (all right), nil (quit), force (replace without | |
299 | further questions)." | |
300 | (save-excursion | |
301 | (goto-char beg) | |
302 | (let* ((alist (tildify-mode-alist tildify-pattern-alist)) | |
303 | (regexp (car alist)) | |
304 | (match-number (cadr alist)) | |
305 | (tilde (tildify-mode-alist tildify-string-alist)) | |
306 | (end-marker (copy-marker end)) | |
307 | answer | |
308 | bad-answer | |
309 | replace | |
310 | quit | |
311 | (message-log-max nil)) | |
312 | (while (and (not quit) | |
313 | (re-search-forward regexp (marker-position end-marker) t)) | |
314 | (when (or (not ask) | |
315 | (progn | |
316 | (goto-char (match-beginning match-number)) | |
317 | (setq bad-answer t) | |
318 | (while bad-answer | |
319 | (setq bad-answer nil) | |
320 | (message "Replace? (yn!q) ") | |
321 | (setq answer (read-event))) | |
322 | (cond | |
323 | ((or (eq answer ?y) (eq answer ? ) (eq answer 'space)) | |
324 | (setq replace t)) | |
325 | ((eq answer ?n) | |
326 | (setq replace nil)) | |
327 | ((eq answer ?!) | |
328 | (setq replace t | |
329 | ask nil)) | |
330 | ((eq answer ?q) | |
331 | (setq replace nil | |
332 | quit t)) | |
333 | (t | |
334 | (message "Press y, n, !, or q.") | |
335 | (setq bad-answer t))) | |
336 | replace)) | |
337 | (replace-match tilde t t nil match-number) | |
338 | (setq tildify-count (1+ tildify-count)))) | |
339 | ;; Return value | |
340 | (cond | |
341 | (quit nil) | |
342 | ((not ask) 'force) | |
343 | (t t))))) | |
344 | ||
345 | ||
346 | ;;; *** Announce *** | |
347 | ||
348 | (provide 'tildify) | |
349 | ||
350 | ||
351 | ;; Local variables: | |
c38e0c97 | 352 | ;; coding: utf-8 |
d5c3f902 GM |
353 | ;; End: |
354 | ||
355 | ;;; tildify.el ends here |