Commit | Line | Data |
---|---|---|
d5c3f902 GM |
1 | ;;; tildify.el --- adding hard spaces into texts |
2 | ||
ba318903 | 3 | ;; Copyright (C) 1997-2014 Free Software Foundation, Inc. |
d5c3f902 | 4 | |
dffb6224 | 5 | ;; Author: Milan Zamazal <pdm@zamazal.org> |
03d7d160 MN |
6 | ;; Michal Nazarewicz <mina86@mina86.com> |
7 | ;; Version: 4.5.3 | |
e5bed401 | 8 | ;; Keywords: text, TeX, SGML, wp |
d5c3f902 GM |
9 | |
10 | ;; This file is part of GNU Emacs. | |
11 | ||
1fecc8fe | 12 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
d5c3f902 | 13 | ;; it under the terms of the GNU General Public License as published by |
1fecc8fe GM |
14 | ;; the Free Software Foundation, either version 3 of the License, or |
15 | ;; (at your option) any later version. | |
d5c3f902 GM |
16 | |
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
1fecc8fe | 23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
d5c3f902 GM |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;; This package can be typically used for adding forgotten tildes in TeX | |
28 | ;; sources or adding ` ' sequences in SGML (e.g. HTML) texts. | |
29 | ;; | |
8c722a81 | 30 | ;; For example, the Czech orthography requires avoiding one letter |
e5bed401 DL |
31 | ;; prepositions at line endings. So they should be connected with the |
32 | ;; following words by a tilde. Some users forget to do this all the | |
33 | ;; time. The purpose of this program is to check the text and suggest | |
34 | ;; adding of missing tildes on some places. It works in a similar | |
35 | ;; manner to `query-replace-regexp'. | |
d5c3f902 | 36 | ;; |
e5bed401 DL |
37 | ;; The functionality of this program is actually performing query |
38 | ;; replace on certain regions, but for historical reasons explained | |
39 | ;; above it is called `tildify'. | |
d5c3f902 GM |
40 | ;; |
41 | ;; The default variable settings are suited for Czech, so do not try to | |
42 | ;; understand them if you are not familiar with Czech grammar and spelling. | |
43 | ;; | |
c38e0c97 | 44 | ;; The algorithm was inspired by Petr Olšák's program `vlna'. Abilities of |
d5c3f902 GM |
45 | ;; `tildify.el' are a little limited; if you have improvement suggestions, let |
46 | ;; me know. | |
47 | ||
48 | ;;; Code: | |
49 | ||
50 | ||
51 | ;;; *** User configuration variables *** | |
52 | ||
53 | ||
54 | (defgroup tildify nil | |
cf20dee0 | 55 | "Add hard spaces or other text fragments to text buffers." |
e5bed401 | 56 | :version "21.1" |
d5c3f902 GM |
57 | :group 'wp) |
58 | ||
59 | (defcustom tildify-pattern-alist | |
60 | '((t "\\([,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(\\w\\|[([{\\]\\|<[a-zA-Z]\\)" 2)) | |
61 | "Alist specifying where to insert hard spaces. | |
62 | ||
63 | Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or | |
fd22e6ca | 64 | \(MAJOR-MODE . SYMBOL). |
d5c3f902 GM |
65 | |
66 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
67 | - a symbol equal to the major mode of the buffer to be fixed | |
68 | - t for default item, this applies to all major modes not defined in another | |
69 | alist item | |
70 | ||
71 | REGEXP is a regular expression matching the part of a text, where a hard space | |
72 | is missing. The regexp is always case sensitive, regardless of the current | |
73 | `case-fold-search' setting. | |
74 | ||
75 | NUMBER defines the number of the REGEXP subexpression which should be replaced | |
76 | by the hard space character. | |
77 | ||
78 | The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this | |
e5bed401 | 79 | mode, the item for the mode SYMBOL is looked up in the alist instead." |
d5c3f902 | 80 | :group 'tildify |
e01e361f MN |
81 | :type '(repeat (cons :tag "Entry for major mode" |
82 | (choice (const :tag "Default" t) | |
83 | (symbol :tag "Major mode")) | |
84 | (choice (list :tag "Regexp" | |
85 | regexp | |
86 | (integer :tag "Group ")) | |
87 | (symbol :tag "Like other"))))) | |
d5c3f902 GM |
88 | |
89 | (defcustom tildify-string-alist | |
90 | '((latex-mode . "~") | |
91 | (tex-mode . latex-mode) | |
8049ddc0 | 92 | (plain-tex-mode . latex-mode) |
d5c3f902 GM |
93 | (sgml-mode . " ") |
94 | (html-mode . sgml-mode) | |
eb54c73a MN |
95 | (xml-mode . " ") ; XML does not define use numeric reference |
96 | (nxml-mode . xml-mode) | |
d5c3f902 GM |
97 | (t . " ")) |
98 | "Alist specifying what is a hard space in the current major mode. | |
99 | ||
100 | Each alist item is of the form (MAJOR-MODE . STRING) or | |
fd22e6ca | 101 | \(MAJOR-MODE . SYMBOL). |
d5c3f902 GM |
102 | |
103 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
104 | - a symbol equal to the major mode of the buffer to be fixed | |
105 | - t for default item, this applies to all major modes not defined in another | |
106 | alist item | |
107 | ||
108 | STRING defines the hard space, which is inserted at places defined by | |
109 | `tildify-pattern-alist'. For example it can be \"~\" for TeX or \" \" | |
110 | for SGML. | |
111 | ||
112 | The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this | |
e5bed401 | 113 | mode, the item for the mode SYMBOL is looked up in the alist instead." |
d5c3f902 | 114 | :group 'tildify |
e01e361f MN |
115 | :type '(repeat (cons :tag "Entry for major mode" |
116 | (choice (const :tag "Default" t) | |
117 | (symbol :tag "Major mode")) | |
118 | (choice (const :tag "No-break space (U+00A0)" "\u00A0") | |
119 | (string :tag "String ") | |
120 | (symbol :tag "Like other"))))) | |
db95369b | 121 | |
d5c3f902 | 122 | (defcustom tildify-ignored-environments-alist |
df344ab4 | 123 | `((latex-mode |
d5c3f902 | 124 | ("\\\\\\\\" . "") ; do not remove this |
df344ab4 MN |
125 | (,(eval-when-compile (concat |
126 | "\\\\begin{\\(" | |
127 | (regexp-opt '("verbatim" "math" "displaymath" | |
128 | "equation" "eqnarray" "eqnarray*")) | |
129 | "\\)}")) | |
130 | . ("\\\\end{" 1 "}")) | |
1a6af6a2 | 131 | ("\\\\verb\\*?\\(.\\)" . (1)) |
df344ab4 | 132 | ("\\$\\$?" . (0)) |
d5c3f902 GM |
133 | ("\\\\(" . "\\\\)") |
134 | ("\\\\[[]" . "\\\\[]]") | |
d5c3f902 GM |
135 | ("\\\\[a-zA-Z]+\\( +\\|{}\\)[a-zA-Z]*" . "") |
136 | ("%" . "$")) | |
137 | (plain-tex-mode . latex-mode) | |
138 | (html-mode | |
df344ab4 MN |
139 | (,(eval-when-compile (concat |
140 | "<\\(" | |
141 | (regexp-opt '("pre" "dfn" "code" "samp" "kbd" "var" | |
142 | "PRE" "DFN" "CODE" "SAMP" "KBD" "VAR")) | |
143 | "\\)\\>[^>]*>")) | |
144 | . ("</" 1 ">")) | |
d5c3f902 GM |
145 | ("<! *--" . "-- *>") |
146 | ("<" . ">")) | |
147 | (sgml-mode . html-mode) | |
eb54c73a MN |
148 | (xml-mode |
149 | ("<! *--" . "-- *>") | |
150 | ("<" . ">")) | |
df344ab4 | 151 | (nxml-mode . xml-mode)) |
d5c3f902 GM |
152 | "Alist specifying ignored structured text environments. |
153 | Parts of text defined in this alist are skipped without performing hard space | |
154 | insertion on them. These setting allow skipping text parts like verbatim or | |
155 | math environments in TeX or preformatted text in SGML. | |
156 | ||
157 | Each list element is of the form | |
158 | (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... ) | |
159 | ||
160 | MAJOR-MODE defines major mode, for which the item applies. It can be either: | |
161 | - a symbol equal to the major mode of the buffer to be fixed | |
162 | - t for default item, this applies to all major modes not defined in another | |
163 | alist item | |
164 | ||
165 | BEG-REGEX is a regexp matching beginning of a text part to be skipped. | |
166 | END-REGEX defines end of the corresponding text part and can be either: | |
167 | - a regexp matching the end of the skipped text part | |
168 | - a list of regexps and numbers, which will compose the ending regexp by | |
169 | concatenating themselves, while replacing the numbers with corresponding | |
170 | subexpressions of BEG-REGEX (this is used to solve cases like | |
e5bed401 | 171 | \\\\verb<character> in TeX)." |
d5c3f902 | 172 | :group 'tildify |
e01e361f MN |
173 | :type '(repeat |
174 | (cons :tag "Entry for major mode" | |
175 | (choice (const :tag "Default" t) | |
176 | (symbol :tag "Major mode")) | |
177 | (choice | |
178 | (const :tag "None") | |
179 | (repeat | |
180 | :tag "Environments" | |
181 | (cons :tag "Regexp pair" | |
182 | (regexp :tag "Open ") | |
183 | (choice :tag "Close" | |
184 | (regexp :tag "Regexp") | |
185 | (list :tag "Regexp and groups (concatenated)" | |
186 | (choice (regexp :tag "Regexp") | |
187 | (integer :tag "Group ")))))) | |
188 | (symbol :tag "Like other"))))) | |
d5c3f902 GM |
189 | |
190 | ||
d5c3f902 GM |
191 | ;;; *** Interactive functions *** |
192 | ||
193 | ;;;###autoload | |
fc21a7de | 194 | (defun tildify-region (beg end &optional dont-ask) |
d5c3f902 GM |
195 | "Add hard spaces in the region between BEG and END. |
196 | See variables `tildify-pattern-alist', `tildify-string-alist', and | |
197 | `tildify-ignored-environments-alist' for information about configuration | |
198 | parameters. | |
fc21a7de MN |
199 | This function performs no refilling of the changed text. |
200 | If DONT-ASK is set, or called interactively with prefix argument, user | |
201 | won't be prompted for confirmation of each substitution." | |
202 | (interactive "*rP") | |
03d7d160 MN |
203 | (let (case-fold-search (count 0) (ask (not dont-ask))) |
204 | (tildify-foreach-region-outside-env beg end | |
205 | (lambda (beg end) | |
206 | (let ((aux (tildify-tildify beg end ask))) | |
207 | (setq count (+ count (car aux))) | |
208 | (if (not (eq (cdr aux) 'force)) | |
209 | (cdr aux) | |
210 | (setq ask nil) | |
211 | t)))) | |
212 | (message "%d spaces replaced." count))) | |
db95369b | 213 | |
d5c3f902 | 214 | ;;;###autoload |
fc21a7de | 215 | (defun tildify-buffer (&optional dont-ask) |
d5c3f902 GM |
216 | "Add hard spaces in the current buffer. |
217 | See variables `tildify-pattern-alist', `tildify-string-alist', and | |
218 | `tildify-ignored-environments-alist' for information about configuration | |
219 | parameters. | |
fc21a7de MN |
220 | This function performs no refilling of the changed text. |
221 | If DONT-ASK is set, or called interactively with prefix argument, user | |
222 | won't be prompted for confirmation of each substitution." | |
223 | (interactive "*P") | |
224 | (tildify-region (point-min) (point-max) dont-ask)) | |
d5c3f902 GM |
225 | |
226 | ||
227 | ;;; *** Auxiliary functions *** | |
228 | ||
d5c3f902 GM |
229 | (defun tildify-mode-alist (mode-alist &optional mode) |
230 | "Return alist item for the MODE-ALIST in the current major MODE." | |
03d7d160 | 231 | (let ((alist (cdr (or (assoc (or mode major-mode) mode-alist) |
d5c3f902 GM |
232 | (assoc t mode-alist))))) |
233 | (if (and alist | |
234 | (symbolp alist)) | |
235 | (tildify-mode-alist mode-alist alist) | |
236 | alist))) | |
db95369b | 237 | |
03d7d160 MN |
238 | (defun tildify-foreach-region-outside-env (beg end callback) |
239 | "Scan region from BEG to END calling CALLBACK on portions out of environments. | |
240 | Call CALLBACK on each region outside of environment to ignore. | |
241 | CALLBACK will only be called for regions which have intersection | |
242 | with [BEG END]. It must be a function that takes two point | |
243 | arguments specifying the region to operate on. Stop scanning the | |
244 | region as soon as CALLBACK returns nil. Environments to ignore | |
245 | are determined from `tildify-ignored-environments-alist'." | |
246 | (declare (indent 2)) | |
247 | (let ((pairs (tildify-mode-alist tildify-ignored-environments-alist))) | |
248 | (if (not pairs) | |
249 | (funcall callback beg end) | |
250 | (let ((func (lambda (b e) | |
251 | (let ((b (max b beg)) (e (min e end))) | |
252 | (if (< b e) (funcall callback b e) t)))) | |
253 | (beg-re (concat "\\(?:" | |
254 | (mapconcat 'car pairs "\\)\\|\\(?:") | |
255 | "\\)")) | |
256 | p end-re) | |
257 | (save-excursion | |
258 | (save-restriction | |
259 | (widen) | |
260 | (goto-char (point-min)) | |
261 | (while (and (< (setq p (point)) end) | |
262 | (if (not (setq end-re | |
263 | (tildify-find-env beg-re pairs))) | |
264 | (progn (funcall func p end) nil) | |
265 | (funcall func p (match-beginning 0)) | |
266 | (when (< (point) end) | |
267 | (setq p (point)) | |
268 | (re-search-forward end-re nil t))))))))))) | |
269 | ||
270 | (defun tildify-find-env (regexp pairs) | |
d5c3f902 | 271 | "Find environment using REGEXP. |
03d7d160 MN |
272 | Return regexp for the end of the environment found in PAIRS or nil if |
273 | no environment was found." | |
d5c3f902 | 274 | ;; Find environment |
af9a3b28 | 275 | (when (re-search-forward regexp nil t) |
9342feec | 276 | (save-match-data |
03d7d160 MN |
277 | (let ((match (match-string 0))) |
278 | (while (not (eq (string-match (caar pairs) match) 0)) | |
279 | (setq pairs (cdr pairs))) | |
280 | (let ((expression (cdar pairs))) | |
9342feec MN |
281 | (if (stringp expression) |
282 | expression | |
283 | (mapconcat | |
284 | (lambda (expr) | |
285 | (if (stringp expr) | |
286 | expr | |
287 | (regexp-quote (match-string expr match)))) | |
288 | expression | |
289 | ""))))))) | |
d5c3f902 GM |
290 | |
291 | (defun tildify-tildify (beg end ask) | |
292 | "Add tilde characters in the region between BEG and END. | |
293 | This function does not do any further checking except of for comments and | |
294 | macros. | |
295 | ||
296 | If ASK is nil, perform replace without asking user for confirmation. | |
297 | ||
03d7d160 MN |
298 | Returns (count . response) cons where count is number of string |
299 | replacements done and response is one of symbols: t (all right), nil | |
300 | (quit), force (replace without further questions)." | |
d5c3f902 GM |
301 | (save-excursion |
302 | (goto-char beg) | |
303 | (let* ((alist (tildify-mode-alist tildify-pattern-alist)) | |
304 | (regexp (car alist)) | |
305 | (match-number (cadr alist)) | |
306 | (tilde (tildify-mode-alist tildify-string-alist)) | |
307 | (end-marker (copy-marker end)) | |
308 | answer | |
309 | bad-answer | |
310 | replace | |
311 | quit | |
03d7d160 MN |
312 | (message-log-max nil) |
313 | (count 0)) | |
d5c3f902 GM |
314 | (while (and (not quit) |
315 | (re-search-forward regexp (marker-position end-marker) t)) | |
316 | (when (or (not ask) | |
317 | (progn | |
318 | (goto-char (match-beginning match-number)) | |
319 | (setq bad-answer t) | |
320 | (while bad-answer | |
321 | (setq bad-answer nil) | |
322 | (message "Replace? (yn!q) ") | |
323 | (setq answer (read-event))) | |
324 | (cond | |
325 | ((or (eq answer ?y) (eq answer ? ) (eq answer 'space)) | |
326 | (setq replace t)) | |
327 | ((eq answer ?n) | |
328 | (setq replace nil)) | |
329 | ((eq answer ?!) | |
330 | (setq replace t | |
331 | ask nil)) | |
332 | ((eq answer ?q) | |
333 | (setq replace nil | |
334 | quit t)) | |
335 | (t | |
336 | (message "Press y, n, !, or q.") | |
337 | (setq bad-answer t))) | |
338 | replace)) | |
339 | (replace-match tilde t t nil match-number) | |
03d7d160 | 340 | (setq count (1+ count)))) |
d5c3f902 | 341 | ;; Return value |
03d7d160 MN |
342 | (cons count (cond (quit nil) |
343 | ((not ask) 'force) | |
344 | (t t)))))) | |
d5c3f902 GM |
345 | |
346 | ||
347 | ;;; *** Announce *** | |
348 | ||
349 | (provide 'tildify) | |
350 | ||
351 | ||
352 | ;; Local variables: | |
c38e0c97 | 353 | ;; coding: utf-8 |
d5c3f902 GM |
354 | ;; End: |
355 | ||
356 | ;;; tildify.el ends here |