don't require grep in vc-git
[bpt/emacs.git] / lisp / textmodes / tildify.el
CommitLineData
d5c3f902
GM
1;;; tildify.el --- adding hard spaces into texts
2
ba318903 3;; Copyright (C) 1997-2014 Free Software Foundation, Inc.
d5c3f902 4
dffb6224 5;; Author: Milan Zamazal <pdm@zamazal.org>
03d7d160
MN
6;; Michal Nazarewicz <mina86@mina86.com>
7;; Version: 4.5.3
e5bed401 8;; Keywords: text, TeX, SGML, wp
d5c3f902
GM
9
10;; This file is part of GNU Emacs.
11
1fecc8fe 12;; GNU Emacs is free software: you can redistribute it and/or modify
d5c3f902 13;; it under the terms of the GNU General Public License as published by
1fecc8fe
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
d5c3f902
GM
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
1fecc8fe 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
d5c3f902
GM
24
25;;; Commentary:
26
27;; This package can be typically used for adding forgotten tildes in TeX
28;; sources or adding `&nbsp;' sequences in SGML (e.g. HTML) texts.
29;;
8c722a81 30;; For example, the Czech orthography requires avoiding one letter
e5bed401
DL
31;; prepositions at line endings. So they should be connected with the
32;; following words by a tilde. Some users forget to do this all the
33;; time. The purpose of this program is to check the text and suggest
34;; adding of missing tildes on some places. It works in a similar
35;; manner to `query-replace-regexp'.
d5c3f902 36;;
e5bed401
DL
37;; The functionality of this program is actually performing query
38;; replace on certain regions, but for historical reasons explained
39;; above it is called `tildify'.
d5c3f902
GM
40;;
41;; The default variable settings are suited for Czech, so do not try to
42;; understand them if you are not familiar with Czech grammar and spelling.
43;;
c38e0c97 44;; The algorithm was inspired by Petr Olšák's program `vlna'. Abilities of
d5c3f902
GM
45;; `tildify.el' are a little limited; if you have improvement suggestions, let
46;; me know.
47
48;;; Code:
49
50
51;;; *** User configuration variables ***
52
53
54(defgroup tildify nil
cf20dee0 55 "Add hard spaces or other text fragments to text buffers."
e5bed401 56 :version "21.1"
d5c3f902
GM
57 :group 'wp)
58
59(defcustom tildify-pattern-alist
60 '((t "\\([,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(\\w\\|[([{\\]\\|<[a-zA-Z]\\)" 2))
61 "Alist specifying where to insert hard spaces.
62
63Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or
fd22e6ca 64\(MAJOR-MODE . SYMBOL).
d5c3f902
GM
65
66MAJOR-MODE defines major mode, for which the item applies. It can be either:
67- a symbol equal to the major mode of the buffer to be fixed
68- t for default item, this applies to all major modes not defined in another
69 alist item
70
71REGEXP is a regular expression matching the part of a text, where a hard space
72is missing. The regexp is always case sensitive, regardless of the current
73`case-fold-search' setting.
74
75NUMBER defines the number of the REGEXP subexpression which should be replaced
76by the hard space character.
77
78The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
e5bed401 79mode, the item for the mode SYMBOL is looked up in the alist instead."
d5c3f902 80 :group 'tildify
e01e361f
MN
81 :type '(repeat (cons :tag "Entry for major mode"
82 (choice (const :tag "Default" t)
83 (symbol :tag "Major mode"))
84 (choice (list :tag "Regexp"
85 regexp
86 (integer :tag "Group "))
87 (symbol :tag "Like other")))))
d5c3f902
GM
88
89(defcustom tildify-string-alist
90 '((latex-mode . "~")
91 (tex-mode . latex-mode)
8049ddc0 92 (plain-tex-mode . latex-mode)
d5c3f902
GM
93 (sgml-mode . "&nbsp;")
94 (html-mode . sgml-mode)
eb54c73a
MN
95 (xml-mode . "&#160;") ; XML does not define &nbsp; use numeric reference
96 (nxml-mode . xml-mode)
d5c3f902
GM
97 (t . " "))
98 "Alist specifying what is a hard space in the current major mode.
99
100Each alist item is of the form (MAJOR-MODE . STRING) or
fd22e6ca 101\(MAJOR-MODE . SYMBOL).
d5c3f902
GM
102
103MAJOR-MODE defines major mode, for which the item applies. It can be either:
104- a symbol equal to the major mode of the buffer to be fixed
105- t for default item, this applies to all major modes not defined in another
106 alist item
107
108STRING defines the hard space, which is inserted at places defined by
109`tildify-pattern-alist'. For example it can be \"~\" for TeX or \"&nbsp;\"
110for SGML.
111
112The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
e5bed401 113mode, the item for the mode SYMBOL is looked up in the alist instead."
d5c3f902 114 :group 'tildify
e01e361f
MN
115 :type '(repeat (cons :tag "Entry for major mode"
116 (choice (const :tag "Default" t)
117 (symbol :tag "Major mode"))
118 (choice (const :tag "No-break space (U+00A0)" "\u00A0")
119 (string :tag "String ")
120 (symbol :tag "Like other")))))
db95369b 121
d5c3f902 122(defcustom tildify-ignored-environments-alist
df344ab4 123 `((latex-mode
d5c3f902 124 ("\\\\\\\\" . "") ; do not remove this
df344ab4
MN
125 (,(eval-when-compile (concat
126 "\\\\begin{\\("
127 (regexp-opt '("verbatim" "math" "displaymath"
128 "equation" "eqnarray" "eqnarray*"))
129 "\\)}"))
130 . ("\\\\end{" 1 "}"))
1a6af6a2 131 ("\\\\verb\\*?\\(.\\)" . (1))
df344ab4 132 ("\\$\\$?" . (0))
d5c3f902
GM
133 ("\\\\(" . "\\\\)")
134 ("\\\\[[]" . "\\\\[]]")
d5c3f902
GM
135 ("\\\\[a-zA-Z]+\\( +\\|{}\\)[a-zA-Z]*" . "")
136 ("%" . "$"))
137 (plain-tex-mode . latex-mode)
138 (html-mode
df344ab4
MN
139 (,(eval-when-compile (concat
140 "<\\("
141 (regexp-opt '("pre" "dfn" "code" "samp" "kbd" "var"
142 "PRE" "DFN" "CODE" "SAMP" "KBD" "VAR"))
143 "\\)\\>[^>]*>"))
144 . ("</" 1 ">"))
d5c3f902
GM
145 ("<! *--" . "-- *>")
146 ("<" . ">"))
147 (sgml-mode . html-mode)
eb54c73a
MN
148 (xml-mode
149 ("<! *--" . "-- *>")
150 ("<" . ">"))
df344ab4 151 (nxml-mode . xml-mode))
d5c3f902
GM
152 "Alist specifying ignored structured text environments.
153Parts of text defined in this alist are skipped without performing hard space
154insertion on them. These setting allow skipping text parts like verbatim or
155math environments in TeX or preformatted text in SGML.
156
157Each list element is of the form
158 (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... )
159
160MAJOR-MODE defines major mode, for which the item applies. It can be either:
161- a symbol equal to the major mode of the buffer to be fixed
162- t for default item, this applies to all major modes not defined in another
163 alist item
164
165BEG-REGEX is a regexp matching beginning of a text part to be skipped.
166END-REGEX defines end of the corresponding text part and can be either:
167- a regexp matching the end of the skipped text part
168- a list of regexps and numbers, which will compose the ending regexp by
169 concatenating themselves, while replacing the numbers with corresponding
170 subexpressions of BEG-REGEX (this is used to solve cases like
e5bed401 171 \\\\verb<character> in TeX)."
d5c3f902 172 :group 'tildify
e01e361f
MN
173 :type '(repeat
174 (cons :tag "Entry for major mode"
175 (choice (const :tag "Default" t)
176 (symbol :tag "Major mode"))
177 (choice
178 (const :tag "None")
179 (repeat
180 :tag "Environments"
181 (cons :tag "Regexp pair"
182 (regexp :tag "Open ")
183 (choice :tag "Close"
184 (regexp :tag "Regexp")
185 (list :tag "Regexp and groups (concatenated)"
186 (choice (regexp :tag "Regexp")
187 (integer :tag "Group "))))))
188 (symbol :tag "Like other")))))
d5c3f902
GM
189
190
d5c3f902
GM
191;;; *** Interactive functions ***
192
193;;;###autoload
fc21a7de 194(defun tildify-region (beg end &optional dont-ask)
d5c3f902
GM
195 "Add hard spaces in the region between BEG and END.
196See variables `tildify-pattern-alist', `tildify-string-alist', and
197`tildify-ignored-environments-alist' for information about configuration
198parameters.
fc21a7de
MN
199This function performs no refilling of the changed text.
200If DONT-ASK is set, or called interactively with prefix argument, user
201won't be prompted for confirmation of each substitution."
202 (interactive "*rP")
03d7d160
MN
203 (let (case-fold-search (count 0) (ask (not dont-ask)))
204 (tildify-foreach-region-outside-env beg end
205 (lambda (beg end)
206 (let ((aux (tildify-tildify beg end ask)))
207 (setq count (+ count (car aux)))
208 (if (not (eq (cdr aux) 'force))
209 (cdr aux)
210 (setq ask nil)
211 t))))
212 (message "%d spaces replaced." count)))
db95369b 213
d5c3f902 214;;;###autoload
fc21a7de 215(defun tildify-buffer (&optional dont-ask)
d5c3f902
GM
216 "Add hard spaces in the current buffer.
217See variables `tildify-pattern-alist', `tildify-string-alist', and
218`tildify-ignored-environments-alist' for information about configuration
219parameters.
fc21a7de
MN
220This function performs no refilling of the changed text.
221If DONT-ASK is set, or called interactively with prefix argument, user
222won't be prompted for confirmation of each substitution."
223 (interactive "*P")
224 (tildify-region (point-min) (point-max) dont-ask))
d5c3f902
GM
225
226
227;;; *** Auxiliary functions ***
228
d5c3f902
GM
229(defun tildify-mode-alist (mode-alist &optional mode)
230 "Return alist item for the MODE-ALIST in the current major MODE."
03d7d160 231 (let ((alist (cdr (or (assoc (or mode major-mode) mode-alist)
d5c3f902
GM
232 (assoc t mode-alist)))))
233 (if (and alist
234 (symbolp alist))
235 (tildify-mode-alist mode-alist alist)
236 alist)))
db95369b 237
03d7d160
MN
238(defun tildify-foreach-region-outside-env (beg end callback)
239 "Scan region from BEG to END calling CALLBACK on portions out of environments.
240Call CALLBACK on each region outside of environment to ignore.
241CALLBACK will only be called for regions which have intersection
242with [BEG END]. It must be a function that takes two point
243arguments specifying the region to operate on. Stop scanning the
244region as soon as CALLBACK returns nil. Environments to ignore
245are determined from `tildify-ignored-environments-alist'."
246 (declare (indent 2))
247 (let ((pairs (tildify-mode-alist tildify-ignored-environments-alist)))
248 (if (not pairs)
249 (funcall callback beg end)
250 (let ((func (lambda (b e)
251 (let ((b (max b beg)) (e (min e end)))
252 (if (< b e) (funcall callback b e) t))))
253 (beg-re (concat "\\(?:"
254 (mapconcat 'car pairs "\\)\\|\\(?:")
255 "\\)"))
256 p end-re)
257 (save-excursion
258 (save-restriction
259 (widen)
260 (goto-char (point-min))
261 (while (and (< (setq p (point)) end)
262 (if (not (setq end-re
263 (tildify-find-env beg-re pairs)))
264 (progn (funcall func p end) nil)
265 (funcall func p (match-beginning 0))
266 (when (< (point) end)
267 (setq p (point))
268 (re-search-forward end-re nil t)))))))))))
269
270(defun tildify-find-env (regexp pairs)
d5c3f902 271 "Find environment using REGEXP.
03d7d160
MN
272Return regexp for the end of the environment found in PAIRS or nil if
273no environment was found."
d5c3f902 274 ;; Find environment
af9a3b28 275 (when (re-search-forward regexp nil t)
9342feec 276 (save-match-data
03d7d160
MN
277 (let ((match (match-string 0)))
278 (while (not (eq (string-match (caar pairs) match) 0))
279 (setq pairs (cdr pairs)))
280 (let ((expression (cdar pairs)))
9342feec
MN
281 (if (stringp expression)
282 expression
283 (mapconcat
284 (lambda (expr)
285 (if (stringp expr)
286 expr
287 (regexp-quote (match-string expr match))))
288 expression
289 "")))))))
d5c3f902
GM
290
291(defun tildify-tildify (beg end ask)
292 "Add tilde characters in the region between BEG and END.
293This function does not do any further checking except of for comments and
294macros.
295
296If ASK is nil, perform replace without asking user for confirmation.
297
03d7d160
MN
298Returns (count . response) cons where count is number of string
299replacements done and response is one of symbols: t (all right), nil
300(quit), force (replace without further questions)."
d5c3f902
GM
301 (save-excursion
302 (goto-char beg)
303 (let* ((alist (tildify-mode-alist tildify-pattern-alist))
304 (regexp (car alist))
305 (match-number (cadr alist))
306 (tilde (tildify-mode-alist tildify-string-alist))
307 (end-marker (copy-marker end))
308 answer
309 bad-answer
310 replace
311 quit
03d7d160
MN
312 (message-log-max nil)
313 (count 0))
d5c3f902
GM
314 (while (and (not quit)
315 (re-search-forward regexp (marker-position end-marker) t))
316 (when (or (not ask)
317 (progn
318 (goto-char (match-beginning match-number))
319 (setq bad-answer t)
320 (while bad-answer
321 (setq bad-answer nil)
322 (message "Replace? (yn!q) ")
323 (setq answer (read-event)))
324 (cond
325 ((or (eq answer ?y) (eq answer ? ) (eq answer 'space))
326 (setq replace t))
327 ((eq answer ?n)
328 (setq replace nil))
329 ((eq answer ?!)
330 (setq replace t
331 ask nil))
332 ((eq answer ?q)
333 (setq replace nil
334 quit t))
335 (t
336 (message "Press y, n, !, or q.")
337 (setq bad-answer t)))
338 replace))
339 (replace-match tilde t t nil match-number)
03d7d160 340 (setq count (1+ count))))
d5c3f902 341 ;; Return value
03d7d160
MN
342 (cons count (cond (quit nil)
343 ((not ask) 'force)
344 (t t))))))
d5c3f902
GM
345
346
347;;; *** Announce ***
348
349(provide 'tildify)
350
351
352;; Local variables:
c38e0c97 353;; coding: utf-8
d5c3f902
GM
354;; End:
355
356;;; tildify.el ends here