Commit | Line | Data |
---|---|---|
12c64503 GM |
1 | ;;; rx.el --- sexp notation for regular expressions |
2 | ||
ceb4c4d3 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, |
f0fa15c5 | 4 | ;; 2006, 2007 Free Software Foundation, Inc. |
12c64503 GM |
5 | |
6 | ;; Author: Gerd Moellmann <gerd@gnu.org> | |
7 | ;; Maintainer: FSF | |
8 | ;; Keywords: strings, regexps, extensions | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation; either version 2, or (at your option) | |
15 | ;; any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
24 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | ;; Boston, MA 02110-1301, USA. | |
12c64503 GM |
26 | |
27 | ;;; Commentary: | |
28 | ||
29 | ;; This is another implementation of sexp-form regular expressions. | |
30 | ;; It was unfortunately written without being aware of the Sregex | |
31 | ;; package coming with Emacs, but as things stand, Rx completely | |
32 | ;; covers all regexp features, which Sregex doesn't, doesn't suffer | |
33 | ;; from the bugs mentioned in the commentary section of Sregex, and | |
34 | ;; uses a nicer syntax (IMHO, of course :-). | |
35 | ||
ccfbe679 SM |
36 | ;; This significantly extended version of the original, is almost |
37 | ;; compatible with Sregex. The only incompatibility I (fx) know of is | |
38 | ;; that the `repeat' form can't have multiple regexp args. | |
39 | ||
40 | ;; Now alternative forms are provided for a degree of compatibility | |
41 | ;; with Shivers' attempted definitive SRE notation | |
42 | ;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>. SRE forms not | |
43 | ;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>, | |
44 | ;; ,<exp>, (word ...), word+, posix-string, and character class forms. | |
45 | ;; Some forms are inconsistent with SRE, either for historical reasons | |
46 | ;; or because of the implementation -- simple translation into Emacs | |
47 | ;; regexp strings. These include: any, word. Also, case-sensitivity | |
48 | ;; and greediness are controlled by variables external to the regexp, | |
49 | ;; and you need to feed the forms to the `posix-' functions to get | |
50 | ;; SRE's POSIX semantics. There are probably more difficulties. | |
51 | ||
12c64503 GM |
52 | ;; Rx translates a sexp notation for regular expressions into the |
53 | ;; usual string notation. The translation can be done at compile-time | |
54 | ;; by using the `rx' macro. It can be done at run-time by calling | |
55 | ;; function `rx-to-string'. See the documentation of `rx' for a | |
56 | ;; complete description of the sexp notation. | |
57 | ;; | |
58 | ;; Some examples of string regexps and their sexp counterparts: | |
59 | ;; | |
60 | ;; "^[a-z]*" | |
61 | ;; (rx (and line-start (0+ (in "a-z")))) | |
62 | ;; | |
63 | ;; "\n[^ \t]" | |
64 | ;; (rx (and "\n" (not blank))), or | |
65 | ;; (rx (and "\n" (not (any " \t")))) | |
66 | ;; | |
67 | ;; "\\*\\*\\* EOOH \\*\\*\\*\n" | |
68 | ;; (rx "*** EOOH ***\n") | |
69 | ;; | |
70 | ;; "\\<\\(catch\\|finally\\)\\>[^_]" | |
71 | ;; (rx (and word-start (submatch (or "catch" "finally")) word-end | |
72 | ;; (not (any ?_)))) | |
73 | ;; | |
74 | ;; "[ \t\n]*:\\([^:]+\\|$\\)" | |
75 | ;; (rx (and (zero-or-more (in " \t\n")) ":" | |
76 | ;; (submatch (or line-end (one-or-more (not (any ?:))))))) | |
77 | ;; | |
78 | ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | |
79 | ;; (rx (and line-start | |
80 | ;; "content-transfer-encoding:" | |
c53f9b3b | 81 | ;; (+ (? ?\n)) blank |
12c64503 | 82 | ;; "quoted-printable" |
c53f9b3b | 83 | ;; (+ (? ?\n)) blank)) |
12c64503 GM |
84 | ;; |
85 | ;; (concat "^\\(?:" something-else "\\)") | |
86 | ;; (rx (and line-start (eval something-else))), statically or | |
87 | ;; (rx-to-string '(and line-start ,something-else)), dynamically. | |
88 | ;; | |
89 | ;; (regexp-opt '(STRING1 STRING2 ...)) | |
90 | ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically | |
91 | ;; calls `regexp-opt' as needed. | |
92 | ;; | |
93 | ;; "^;;\\s-*\n\\|^\n" | |
94 | ;; (rx (or (and line-start ";;" (0+ space) ?\n) | |
95 | ;; (and line-start ?\n))) | |
96 | ;; | |
97 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | |
a1506d29 JB |
98 | ;; (rx (and "$Id: " |
99 | ;; (1+ (not (in " "))) | |
12c64503 GM |
100 | ;; " " |
101 | ;; (submatch (1+ (not (in " ")))) | |
c53f9b3b | 102 | ;; " ")) |
12c64503 GM |
103 | ;; |
104 | ;; "\\\\\\\\\\[\\w+" | |
105 | ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | |
106 | ;; | |
107 | ;; etc. | |
108 | ||
109 | ;;; History: | |
a1506d29 | 110 | ;; |
12c64503 GM |
111 | |
112 | ;;; Code: | |
113 | ||
12c64503 GM |
114 | (defconst rx-constituents |
115 | '((and . (rx-and 1 nil)) | |
ccfbe679 SM |
116 | (seq . and) ; SRE |
117 | (: . and) ; SRE | |
118 | (sequence . and) ; sregex | |
12c64503 | 119 | (or . (rx-or 1 nil)) |
ccfbe679 | 120 | (| . or) ; SRE |
12c64503 | 121 | (not-newline . ".") |
ccfbe679 | 122 | (nonl . not-newline) ; SRE |
f61fd6b7 | 123 | (anything . "\\(?:.\\|\n\\)") |
ccfbe679 | 124 | (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE |
12c64503 | 125 | (in . any) |
ccfbe679 SM |
126 | (char . any) ; sregex |
127 | (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex | |
12c64503 | 128 | (not . (rx-not 1 1 rx-check-not)) |
ccfbe679 SM |
129 | ;; Partially consistent with sregex, whose `repeat' is like our |
130 | ;; `**'. (`repeat' with optional max arg and multiple sexp forms | |
131 | ;; is ambiguous.) | |
12c64503 | 132 | (repeat . (rx-repeat 2 3)) |
ccfbe679 SM |
133 | (= . (rx-= 2 nil)) ; SRE |
134 | (>= . (rx->= 2 nil)) ; SRE | |
135 | (** . (rx-** 2 nil)) ; SRE | |
136 | (submatch . (rx-submatch 1 nil)) ; SRE | |
12c64503 | 137 | (group . submatch) |
ccfbe679 SM |
138 | (zero-or-more . (rx-kleene 1 nil)) |
139 | (one-or-more . (rx-kleene 1 nil)) | |
140 | (zero-or-one . (rx-kleene 1 nil)) | |
141 | (\? . zero-or-one) ; SRE | |
12c64503 | 142 | (\?? . zero-or-one) |
ccfbe679 | 143 | (* . zero-or-more) ; SRE |
12c64503 GM |
144 | (*? . zero-or-more) |
145 | (0+ . zero-or-more) | |
ccfbe679 | 146 | (+ . one-or-more) ; SRE |
12c64503 GM |
147 | (+? . one-or-more) |
148 | (1+ . one-or-more) | |
149 | (optional . zero-or-one) | |
ccfbe679 | 150 | (opt . zero-or-one) ; sregex |
12c64503 GM |
151 | (minimal-match . (rx-greedy 1 1)) |
152 | (maximal-match . (rx-greedy 1 1)) | |
740b7c2d | 153 | (backref . (rx-backref 1 1 rx-check-backref)) |
12c64503 | 154 | (line-start . "^") |
ccfbe679 | 155 | (bol . line-start) ; SRE |
12c64503 | 156 | (line-end . "$") |
ccfbe679 | 157 | (eol . line-end) ; SRE |
12c64503 | 158 | (string-start . "\\`") |
ccfbe679 SM |
159 | (bos . string-start) ; SRE |
160 | (bot . string-start) ; sregex | |
12c64503 | 161 | (string-end . "\\'") |
ccfbe679 SM |
162 | (eos . string-end) ; SRE |
163 | (eot . string-end) ; sregex | |
12c64503 GM |
164 | (buffer-start . "\\`") |
165 | (buffer-end . "\\'") | |
166 | (point . "\\=") | |
167 | (word-start . "\\<") | |
ccfbe679 | 168 | (bow . word-start) ; SRE |
12c64503 | 169 | (word-end . "\\>") |
ccfbe679 | 170 | (eow . word-end) ; SRE |
12c64503 | 171 | (word-boundary . "\\b") |
ccfbe679 | 172 | (not-word-boundary . "\\B") ; sregex |
b62c13c2 SM |
173 | (symbol-start . "\\_<") |
174 | (symbol-end . "\\_>") | |
12c64503 | 175 | (syntax . (rx-syntax 1 1)) |
ccfbe679 | 176 | (not-syntax . (rx-not-syntax 1 1)) ; sregex |
12c64503 GM |
177 | (category . (rx-category 1 1 rx-check-category)) |
178 | (eval . (rx-eval 1 1)) | |
179 | (regexp . (rx-regexp 1 1 stringp)) | |
180 | (digit . "[[:digit:]]") | |
ccfbe679 SM |
181 | (numeric . digit) ; SRE |
182 | (num . digit) ; SRE | |
183 | (control . "[[:cntrl:]]") ; SRE | |
184 | (cntrl . control) ; SRE | |
185 | (hex-digit . "[[:xdigit:]]") ; SRE | |
186 | (hex . hex-digit) ; SRE | |
187 | (xdigit . hex-digit) ; SRE | |
188 | (blank . "[[:blank:]]") ; SRE | |
189 | (graphic . "[[:graph:]]") ; SRE | |
190 | (graph . graphic) ; SRE | |
191 | (printing . "[[:print:]]") ; SRE | |
192 | (print . printing) ; SRE | |
193 | (alphanumeric . "[[:alnum:]]") ; SRE | |
194 | (alnum . alphanumeric) ; SRE | |
12c64503 | 195 | (letter . "[[:alpha:]]") |
ccfbe679 SM |
196 | (alphabetic . letter) ; SRE |
197 | (alpha . letter) ; SRE | |
198 | (ascii . "[[:ascii:]]") ; SRE | |
12c64503 | 199 | (nonascii . "[[:nonascii:]]") |
ccfbe679 SM |
200 | (lower . "[[:lower:]]") ; SRE |
201 | (lower-case . lower) ; SRE | |
202 | (punctuation . "[[:punct:]]") ; SRE | |
203 | (punct . punctuation) ; SRE | |
204 | (space . "[[:space:]]") ; SRE | |
205 | (whitespace . space) ; SRE | |
206 | (white . space) ; SRE | |
207 | (upper . "[[:upper:]]") ; SRE | |
208 | (upper-case . upper) ; SRE | |
209 | (word . "[[:word:]]") ; inconsistent with SRE | |
210 | (wordchar . word) ; sregex | |
211 | (not-wordchar . "[^[:word:]]") ; sregex (use \\W?) | |
212 | ) | |
12c64503 GM |
213 | "Alist of sexp form regexp constituents. |
214 | Each element of the alist has the form (SYMBOL . DEFN). | |
215 | SYMBOL is a valid constituent of sexp regular expressions. | |
216 | If DEFN is a string, SYMBOL is translated into DEFN. | |
217 | If DEFN is a symbol, use the definition of DEFN, recursively. | |
218 | Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE). | |
219 | FUNCTION is used to produce code for SYMBOL. MIN-ARGS and MAX-ARGS | |
220 | are the minimum and maximum number of arguments the function-form | |
221 | sexp constituent SYMBOL may have in sexp regular expressions. | |
222 | MAX-ARGS nil means no limit. PREDICATE, if specified, means that | |
223 | all arguments must satisfy PREDICATE.") | |
224 | ||
225 | ||
226 | (defconst rx-syntax | |
227 | '((whitespace . ?-) | |
228 | (punctuation . ?.) | |
229 | (word . ?w) | |
230 | (symbol . ?_) | |
231 | (open-parenthesis . ?\() | |
232 | (close-parenthesis . ?\)) | |
233 | (expression-prefix . ?\') | |
234 | (string-quote . ?\") | |
235 | (paired-delimiter . ?$) | |
236 | (escape . ?\\) | |
237 | (character-quote . ?/) | |
238 | (comment-start . ?<) | |
740b7c2d EZ |
239 | (comment-end . ?>) |
240 | (string-delimiter . ?|) | |
09c774f7 | 241 | (comment-delimiter . ?!)) |
12c64503 GM |
242 | "Alist mapping Rx syntax symbols to syntax characters. |
243 | Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
244 | symbol in `(syntax SYMBOL)', and CHAR is the syntax character | |
245 | corresponding to SYMBOL, as it would be used with \\s or \\S in | |
246 | regular expressions.") | |
247 | ||
248 | ||
249 | (defconst rx-categories | |
250 | '((consonant . ?0) | |
251 | (base-vowel . ?1) | |
252 | (upper-diacritical-mark . ?2) | |
253 | (lower-diacritical-mark . ?3) | |
254 | (tone-mark . ?4) | |
255 | (symbol . ?5) | |
256 | (digit . ?6) | |
257 | (vowel-modifying-diacritical-mark . ?7) | |
258 | (vowel-sign . ?8) | |
259 | (semivowel-lower . ?9) | |
260 | (not-at-end-of-line . ?<) | |
261 | (not-at-beginning-of-line . ?>) | |
262 | (alpha-numeric-two-byte . ?A) | |
263 | (chinse-two-byte . ?C) | |
264 | (greek-two-byte . ?G) | |
265 | (japanese-hiragana-two-byte . ?H) | |
266 | (indian-two-byte . ?I) | |
267 | (japanese-katakana-two-byte . ?K) | |
268 | (korean-hangul-two-byte . ?N) | |
269 | (cyrillic-two-byte . ?Y) | |
740b7c2d | 270 | (combining-diacritic . ?^) |
12c64503 GM |
271 | (ascii . ?a) |
272 | (arabic . ?b) | |
273 | (chinese . ?c) | |
274 | (ethiopic . ?e) | |
275 | (greek . ?g) | |
276 | (korean . ?h) | |
277 | (indian . ?i) | |
278 | (japanese . ?j) | |
279 | (japanese-katakana . ?k) | |
280 | (latin . ?l) | |
281 | (lao . ?o) | |
282 | (tibetan . ?q) | |
283 | (japanese-roman . ?r) | |
284 | (thai . ?t) | |
285 | (vietnamese . ?v) | |
286 | (hebrew . ?w) | |
287 | (cyrillic . ?y) | |
288 | (can-break . ?|)) | |
289 | "Alist mapping symbols to category characters. | |
290 | Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
291 | symbol in `(category SYMBOL)', and CHAR is the category character | |
292 | corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in | |
293 | regular expression strings.") | |
294 | ||
295 | ||
296 | (defvar rx-greedy-flag t | |
297 | "Non-nil means produce greedy regular expressions for `zero-or-one', | |
298 | `zero-or-more', and `one-or-more'. Dynamically bound.") | |
299 | ||
300 | ||
301 | (defun rx-info (op) | |
302 | "Return parsing/code generation info for OP. | |
303 | If OP is the space character ASCII 32, return info for the symbol `?'. | |
304 | If OP is the character `?', return info for the symbol `??'. | |
305 | See also `rx-constituents'." | |
306 | (cond ((eq op ? ) (setq op '\?)) | |
307 | ((eq op ??) (setq op '\??))) | |
308 | (while (and (not (null op)) (symbolp op)) | |
309 | (setq op (cdr (assq op rx-constituents)))) | |
310 | op) | |
a1506d29 | 311 | |
12c64503 GM |
312 | |
313 | (defun rx-check (form) | |
314 | "Check FORM according to its car's parsing info." | |
ccfbe679 SM |
315 | (unless (listp form) |
316 | (error "rx `%s' needs argument(s)" form)) | |
12c64503 GM |
317 | (let* ((rx (rx-info (car form))) |
318 | (nargs (1- (length form))) | |
319 | (min-args (nth 1 rx)) | |
320 | (max-args (nth 2 rx)) | |
321 | (type-pred (nth 3 rx))) | |
322 | (when (and (not (null min-args)) | |
323 | (< nargs min-args)) | |
740b7c2d | 324 | (error "rx form `%s' requires at least %d args" |
12c64503 GM |
325 | (car form) min-args)) |
326 | (when (and (not (null max-args)) | |
327 | (> nargs max-args)) | |
740b7c2d | 328 | (error "rx form `%s' accepts at most %d args" |
12c64503 GM |
329 | (car form) max-args)) |
330 | (when (not (null type-pred)) | |
331 | (dolist (sub-form (cdr form)) | |
332 | (unless (funcall type-pred sub-form) | |
740b7c2d | 333 | (error "rx form `%s' requires args satisfying `%s'" |
12c64503 GM |
334 | (car form) type-pred)))))) |
335 | ||
336 | ||
337 | (defun rx-and (form) | |
338 | "Parse and produce code from FORM. | |
339 | FORM is of the form `(and FORM1 ...)'." | |
340 | (rx-check form) | |
c53f9b3b RS |
341 | (concat "\\(?:" |
342 | (mapconcat | |
343 | (function (lambda (x) (rx-to-string x 'no-group))) | |
344 | (cdr form) nil) | |
345 | "\\)")) | |
12c64503 GM |
346 | |
347 | ||
348 | (defun rx-or (form) | |
349 | "Parse and produce code from FORM, which is `(or FORM1 ...)'." | |
350 | (rx-check form) | |
351 | (let ((all-args-strings t)) | |
352 | (dolist (arg (cdr form)) | |
353 | (unless (stringp arg) | |
354 | (setq all-args-strings nil))) | |
cdddbfd2 EZ |
355 | (concat "\\(?:" |
356 | (if all-args-strings | |
357 | (regexp-opt (cdr form)) | |
358 | (mapconcat #'rx-to-string (cdr form) "\\|")) | |
359 | "\\)"))) | |
12c64503 GM |
360 | |
361 | ||
09c774f7 | 362 | (defvar rx-bracket) ; dynamically bound in `rx-any' |
12c64503 GM |
363 | |
364 | (defun rx-check-any (arg) | |
365 | "Check arg ARG for Rx `any'." | |
ccfbe679 SM |
366 | (if (integerp arg) |
367 | (setq arg (string arg))) | |
368 | (when (stringp arg) | |
369 | (if (zerop (length arg)) | |
370 | (error "String arg for Rx `any' must not be empty")) | |
371 | ;; Quote ^ at start; don't bother to check whether this is first arg. | |
372 | (if (eq ?^ (aref arg 0)) | |
373 | (setq arg (concat "\\" arg))) | |
374 | ;; Remove ] and set flag for adding it to start of overall result. | |
a6966c1c EZ |
375 | (when (string-match "\\]" arg) |
376 | (setq arg (replace-regexp-in-string "\\]" "" arg) | |
09c774f7 | 377 | rx-bracket "]"))) |
ccfbe679 SM |
378 | (when (symbolp arg) |
379 | (let ((translation (condition-case nil | |
380 | (rx-to-string arg 'no-group) | |
381 | (error nil)))) | |
382 | (unless translation (error "Invalid char class `%s' in Rx `any'" arg)) | |
383 | (setq arg (substring translation 1 -1)))) ; strip outer brackets | |
384 | ;; sregex compatibility | |
385 | (when (and (integerp (car-safe arg)) | |
386 | (integerp (cdr-safe arg))) | |
387 | (setq arg (string (car arg) ?- (cdr arg)))) | |
388 | (unless (stringp arg) | |
389 | (error "rx `any' requires string, character, char pair or char class args")) | |
390 | arg) | |
12c64503 GM |
391 | |
392 | (defun rx-any (form) | |
ccfbe679 SM |
393 | "Parse and produce code from FORM, which is `(any ARG ...)'. |
394 | ARG is optional." | |
12c64503 | 395 | (rx-check form) |
09c774f7 SM |
396 | (let* ((rx-bracket nil) |
397 | (args (mapcar #'rx-check-any (cdr form)))) ; side-effects `rx-bracket' | |
ccfbe679 SM |
398 | ;; If there was a ?- in the form, move it to the front to avoid |
399 | ;; accidental range. | |
400 | (if (member "-" args) | |
401 | (setq args (cons "-" (delete "-" args)))) | |
09c774f7 | 402 | (apply #'concat "[" rx-bracket (append args '("]"))))) |
12c64503 GM |
403 | |
404 | ||
740b7c2d EZ |
405 | (defun rx-check-not (arg) |
406 | "Check arg ARG for Rx `not'." | |
ccfbe679 | 407 | (unless (or (and (symbolp arg) |
a6966c1c | 408 | (string-match "\\`\\[\\[:[-a-z]:\\]\\]\\'" |
ccfbe679 SM |
409 | (condition-case nil |
410 | (rx-to-string arg 'no-group) | |
411 | (error "")))) | |
412 | (eq arg 'word-boundary) | |
413 | (and (consp arg) | |
414 | (memq (car arg) '(not any in syntax category)))) | |
415 | (error "rx `not' syntax error: %s" arg)) | |
416 | t) | |
12c64503 GM |
417 | |
418 | ||
419 | (defun rx-not (form) | |
420 | "Parse and produce code from FORM. FORM is `(not ...)'." | |
421 | (rx-check form) | |
062a9fce EZ |
422 | (let ((result (rx-to-string (cadr form) 'no-group)) |
423 | case-fold-search) | |
12c64503 GM |
424 | (cond ((string-match "\\`\\[^" result) |
425 | (if (= (length result) 4) | |
426 | (substring result 2 3) | |
427 | (concat "[" (substring result 2)))) | |
ccfbe679 | 428 | ((eq ?\[ (aref result 0)) |
12c64503 | 429 | (concat "[^" (substring result 1))) |
ccfbe679 SM |
430 | ((string-match "\\`\\\\[scb]" result) |
431 | (concat (capitalize (substring result 0 2)) (substring result 2))) | |
12c64503 GM |
432 | (t |
433 | (concat "[^" result "]"))))) | |
434 | ||
435 | ||
ccfbe679 SM |
436 | (defun rx-not-char (form) |
437 | "Parse and produce code from FORM. FORM is `(not-char ...)'." | |
438 | (rx-check form) | |
439 | (rx-not `(not (in ,@(cdr form))))) | |
440 | ||
441 | ||
442 | (defun rx-not-syntax (form) | |
443 | "Parse and produce code from FORM. FORM is `(not-syntax SYNTAX)'." | |
444 | (rx-check form) | |
445 | (rx-not `(not (syntax ,@(cdr form))))) | |
446 | ||
447 | ||
448 | (defun rx-trans-forms (form &optional skip) | |
449 | "If FORM's length is greater than two, transform it to length two. | |
450 | A form (HEAD REST ...) becomes (HEAD (and REST ...)). | |
451 | If SKIP is non-nil, allow that number of items after the head, i.e. | |
452 | `(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1." | |
453 | (unless skip (setq skip 0)) | |
454 | (let ((tail (nthcdr (1+ skip) form))) | |
455 | (if (= (length tail) 1) | |
456 | form | |
457 | (let ((form (copy-sequence form))) | |
458 | (setcdr (nthcdr skip form) (list (cons 'and tail))) | |
459 | form)))) | |
460 | ||
461 | ||
462 | (defun rx-= (form) | |
463 | "Parse and produce code from FORM `(= N ...)'." | |
464 | (rx-check form) | |
465 | (setq form (rx-trans-forms form 1)) | |
466 | (unless (and (integerp (nth 1 form)) | |
467 | (> (nth 1 form) 0)) | |
468 | (error "rx `=' requires positive integer first arg")) | |
469 | (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) | |
470 | ||
471 | ||
472 | (defun rx->= (form) | |
473 | "Parse and produce code from FORM `(>= N ...)'." | |
474 | (rx-check form) | |
475 | (setq form (rx-trans-forms form 1)) | |
476 | (unless (and (integerp (nth 1 form)) | |
477 | (> (nth 1 form) 0)) | |
478 | (error "rx `>=' requires positive integer first arg")) | |
479 | (format "%s\\{%d,\\}" (rx-to-string (nth 2 form)) (nth 1 form))) | |
480 | ||
481 | ||
482 | (defun rx-** (form) | |
483 | "Parse and produce code from FORM `(** N M ...)'." | |
484 | (rx-check form) | |
485 | (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) | |
486 | (rx-to-string form)) | |
487 | ||
488 | ||
12c64503 GM |
489 | (defun rx-repeat (form) |
490 | "Parse and produce code from FORM. | |
491 | FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." | |
492 | (rx-check form) | |
493 | (cond ((= (length form) 3) | |
494 | (unless (and (integerp (nth 1 form)) | |
495 | (> (nth 1 form) 0)) | |
740b7c2d | 496 | (error "rx `repeat' requires positive integer first arg")) |
12c64503 GM |
497 | (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) |
498 | ((or (not (integerp (nth 2 form))) | |
499 | (< (nth 2 form) 0) | |
500 | (not (integerp (nth 1 form))) | |
501 | (< (nth 1 form) 0) | |
502 | (< (nth 2 form) (nth 1 form))) | |
740b7c2d | 503 | (error "rx `repeat' range error")) |
12c64503 GM |
504 | (t |
505 | (format "%s\\{%d,%d\\}" (rx-to-string (nth 3 form)) | |
506 | (nth 1 form) (nth 2 form))))) | |
507 | ||
508 | ||
509 | (defun rx-submatch (form) | |
510 | "Parse and produce code from FORM, which is `(submatch ...)'." | |
c53f9b3b RS |
511 | (concat "\\(" |
512 | (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) | |
513 | (cdr form) nil) | |
514 | "\\)")) | |
12c64503 | 515 | |
740b7c2d EZ |
516 | (defun rx-backref (form) |
517 | "Parse and produce code from FORM, which is `(backref N)'." | |
518 | (rx-check form) | |
519 | (format "\\%d" (nth 1 form))) | |
520 | ||
521 | (defun rx-check-backref (arg) | |
522 | "Check arg ARG for Rx `backref'." | |
523 | (or (and (integerp arg) (>= arg 1) (<= arg 9)) | |
524 | (error "rx `backref' requires numeric 1<=arg<=9: %s" arg))) | |
525 | ||
12c64503 GM |
526 | (defun rx-kleene (form) |
527 | "Parse and produce code from FORM. | |
528 | FORM is `(OP FORM1)', where OP is one of the `zero-or-one', | |
a1506d29 | 529 | `zero-or-more' etc. operators. |
12c64503 GM |
530 | If OP is one of `*', `+', `?', produce a greedy regexp. |
531 | If OP is one of `*?', `+?', `??', produce a non-greedy regexp. | |
532 | If OP is anything else, produce a greedy regexp if `rx-greedy-flag' | |
533 | is non-nil." | |
534 | (rx-check form) | |
ccfbe679 | 535 | (setq form (rx-trans-forms form)) |
12c64503 GM |
536 | (let ((suffix (cond ((memq (car form) '(* + ? )) "") |
537 | ((memq (car form) '(*? +? ??)) "?") | |
538 | (rx-greedy-flag "") | |
539 | (t "?"))) | |
540 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | |
541 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") | |
c53f9b3b RS |
542 | (t "?"))) |
543 | (result (rx-to-string (cadr form) 'no-group))) | |
544 | (if (not (rx-atomic-p result)) | |
545 | (setq result (concat "\\(?:" result "\\)"))) | |
546 | (concat result op suffix))) | |
547 | ||
548 | (defun rx-atomic-p (r) | |
549 | "Return non-nil if regexp string R is atomic. | |
550 | An atomic regexp R is one such that a suffix operator | |
551 | appended to R will apply to all of R. For example, \"a\" | |
552 | \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", | |
553 | \"[ab]c\", and \"ab\\|ab*c\" are not atomic. | |
554 | ||
555 | This function may return false negatives, but it will not | |
556 | return false positives. It is nevertheless useful in | |
557 | situations where an efficiency shortcut can be taken iff a | |
558 | regexp is atomic. The function can be improved to detect | |
559 | more cases of atomic regexps. Presently, this function | |
560 | detects the following categories of atomic regexp; | |
561 | ||
562 | a group or shy group: \\(...\\) | |
563 | a character class: [...] | |
564 | a single character: a | |
565 | ||
566 | On the other hand, false negatives will be returned for | |
567 | regexps that are atomic but end in operators, such as | |
568 | \"a+\". I think these are rare. Probably such cases could | |
569 | be detected without much effort. A guarantee of no false | |
570 | negatives would require a theoretic specification of the set | |
571 | of all atomic regexps." | |
572 | (let ((l (length r))) | |
573 | (or (equal l 1) | |
574 | (and (>= l 6) | |
575 | (equal (substring r 0 2) "\\(") | |
576 | (equal (substring r -2) "\\)")) | |
577 | (and (>= l 2) | |
578 | (equal (substring r 0 1) "[") | |
579 | (equal (substring r -1) "]"))))) | |
12c64503 GM |
580 | |
581 | ||
582 | (defun rx-syntax (form) | |
583 | "Parse and produce code from FORM, which is `(syntax SYMBOL)'." | |
584 | (rx-check form) | |
09c774f7 SM |
585 | (let* ((sym (cadr form)) |
586 | (syntax (assq sym rx-syntax))) | |
12c64503 | 587 | (unless syntax |
09c774f7 SM |
588 | ;; Try sregex compatibility. |
589 | (let ((name (symbol-name sym))) | |
590 | (if (= 1 (length name)) | |
591 | (setq syntax (rassq (aref name 0) rx-syntax)))) | |
592 | (unless syntax | |
593 | (error "Unknown rx syntax `%s'" (cadr form)))) | |
12c64503 GM |
594 | (format "\\s%c" (cdr syntax)))) |
595 | ||
596 | ||
597 | (defun rx-check-category (form) | |
598 | "Check the argument FORM of a `(category FORM)'." | |
599 | (unless (or (integerp form) | |
600 | (cdr (assq form rx-categories))) | |
601 | (error "Unknown category `%s'" form)) | |
602 | t) | |
a1506d29 | 603 | |
12c64503 GM |
604 | |
605 | (defun rx-category (form) | |
ccfbe679 | 606 | "Parse and produce code from FORM, which is `(category SYMBOL)'." |
12c64503 GM |
607 | (rx-check form) |
608 | (let ((char (if (integerp (cadr form)) | |
609 | (cadr form) | |
610 | (cdr (assq (cadr form) rx-categories))))) | |
611 | (format "\\c%c" char))) | |
612 | ||
613 | ||
614 | (defun rx-eval (form) | |
615 | "Parse and produce code from FORM, which is `(eval FORM)'." | |
616 | (rx-check form) | |
617 | (rx-to-string (eval (cadr form)))) | |
618 | ||
619 | ||
620 | (defun rx-greedy (form) | |
740b7c2d EZ |
621 | "Parse and produce code from FORM. |
622 | If FORM is '(minimal-match FORM1)', non-greedy versions of `*', | |
623 | `+', and `?' operators will be used in FORM1. If FORM is | |
624 | '(maximal-match FORM1)', greedy operators will be used." | |
12c64503 GM |
625 | (rx-check form) |
626 | (let ((rx-greedy-flag (eq (car form) 'maximal-match))) | |
627 | (rx-to-string (cadr form)))) | |
628 | ||
629 | ||
630 | (defun rx-regexp (form) | |
631 | "Parse and produce code from FORM, which is `(regexp STRING)'." | |
632 | (rx-check form) | |
633 | (concat "\\(?:" (cadr form) "\\)")) | |
634 | ||
635 | ||
636 | ;;;###autoload | |
637 | (defun rx-to-string (form &optional no-group) | |
638 | "Parse and produce code for regular expression FORM. | |
639 | FORM is a regular expression in sexp form. | |
640 | NO-GROUP non-nil means don't put shy groups around the result." | |
641 | (cond ((stringp form) | |
642 | (regexp-quote form)) | |
643 | ((integerp form) | |
644 | (regexp-quote (char-to-string form))) | |
645 | ((symbolp form) | |
646 | (let ((info (rx-info form))) | |
647 | (cond ((stringp info) | |
648 | info) | |
649 | ((null info) | |
740b7c2d | 650 | (error "Unknown rx form `%s'" form)) |
a1506d29 | 651 | (t |
12c64503 GM |
652 | (funcall (nth 0 info) form))))) |
653 | ((consp form) | |
654 | (let ((info (rx-info (car form)))) | |
655 | (unless (consp info) | |
740b7c2d | 656 | (error "Unknown rx form `%s'" (car form))) |
12c64503 GM |
657 | (let ((result (funcall (nth 0 info) form))) |
658 | (if (or no-group (string-match "\\`\\\\[(]" result)) | |
659 | result | |
660 | (concat "\\(?:" result "\\)"))))) | |
661 | (t | |
740b7c2d | 662 | (error "rx syntax error at `%s'" form)))) |
12c64503 GM |
663 | |
664 | ||
665 | ;;;###autoload | |
ccfbe679 SM |
666 | (defmacro rx (&rest regexps) |
667 | "Translate regular expressions REGEXPS in sexp form to a regexp string. | |
668 | REGEXPS is a non-empty sequence of forms of the sort listed below. | |
12c64503 GM |
669 | See also `rx-to-string' for how to do such a translation at run-time. |
670 | ||
671 | The following are valid subforms of regular expressions in sexp | |
672 | notation. | |
673 | ||
674 | STRING | |
675 | matches string STRING literally. | |
676 | ||
677 | CHAR | |
678 | matches character CHAR literally. | |
679 | ||
ccfbe679 | 680 | `not-newline', `nonl' |
12c64503 GM |
681 | matches any character except a newline. |
682 | . | |
683 | `anything' | |
684 | matches any character | |
685 | ||
ccfbe679 SM |
686 | `(any SET ...)' |
687 | `(in SET ...)' | |
688 | `(char SET ...)' | |
689 | matches any character in SET .... SET may be a character or string. | |
12c64503 | 690 | Ranges of characters can be specified as `A-Z' in strings. |
ccfbe679 | 691 | Ranges may also be specified as conses like `(?A . ?Z)'. |
12c64503 | 692 | |
ccfbe679 SM |
693 | SET may also be the name of a character class: `digit', |
694 | `control', `hex-digit', `blank', `graph', `print', `alnum', | |
695 | `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper', | |
696 | `word', or one of their synonyms. | |
12c64503 | 697 | |
ccfbe679 SM |
698 | `(not (any SET ...))' |
699 | matches any character not in SET ... | |
12c64503 | 700 | |
ccfbe679 | 701 | `line-start', `bol' |
12c64503 GM |
702 | matches the empty string, but only at the beginning of a line |
703 | in the text being matched | |
704 | ||
ccfbe679 | 705 | `line-end', `eol' |
12c64503 GM |
706 | is similar to `line-start' but matches only at the end of a line |
707 | ||
ccfbe679 | 708 | `string-start', `bos', `bot' |
12c64503 GM |
709 | matches the empty string, but only at the beginning of the |
710 | string being matched against. | |
711 | ||
ccfbe679 | 712 | `string-end', `eos', `eot' |
12c64503 GM |
713 | matches the empty string, but only at the end of the |
714 | string being matched against. | |
715 | ||
716 | `buffer-start' | |
717 | matches the empty string, but only at the beginning of the | |
ccfbe679 | 718 | buffer being matched against. Actually equivalent to `string-start'. |
12c64503 GM |
719 | |
720 | `buffer-end' | |
721 | matches the empty string, but only at the end of the | |
ccfbe679 | 722 | buffer being matched against. Actually equivalent to `string-end'. |
12c64503 GM |
723 | |
724 | `point' | |
725 | matches the empty string, but only at point. | |
726 | ||
ccfbe679 | 727 | `word-start', `bow' |
5e3fc9eb | 728 | matches the empty string, but only at the beginning of a word. |
12c64503 | 729 | |
ccfbe679 | 730 | `word-end', `eow' |
12c64503 GM |
731 | matches the empty string, but only at the end of a word. |
732 | ||
733 | `word-boundary' | |
734 | matches the empty string, but only at the beginning or end of a | |
735 | word. | |
736 | ||
737 | `(not word-boundary)' | |
ccfbe679 | 738 | `not-word-boundary' |
12c64503 GM |
739 | matches the empty string, but not at the beginning or end of a |
740 | word. | |
741 | ||
5e3fc9eb GM |
742 | `symbol-start' |
743 | matches the empty string, but only at the beginning of a symbol. | |
744 | ||
745 | `symbol-end' | |
746 | matches the empty string, but only at the end of a symbol. | |
747 | ||
ccfbe679 | 748 | `digit', `numeric', `num' |
12c64503 GM |
749 | matches 0 through 9. |
750 | ||
ccfbe679 | 751 | `control', `cntrl' |
12c64503 GM |
752 | matches ASCII control characters. |
753 | ||
ccfbe679 | 754 | `hex-digit', `hex', `xdigit' |
12c64503 GM |
755 | matches 0 through 9, a through f and A through F. |
756 | ||
757 | `blank' | |
758 | matches space and tab only. | |
759 | ||
ccfbe679 | 760 | `graphic', `graph' |
12c64503 GM |
761 | matches graphic characters--everything except ASCII control chars, |
762 | space, and DEL. | |
763 | ||
ccfbe679 | 764 | `printing', `print' |
12c64503 GM |
765 | matches printing characters--everything except ASCII control chars |
766 | and DEL. | |
767 | ||
ccfbe679 | 768 | `alphanumeric', `alnum' |
12c64503 GM |
769 | matches letters and digits. (But at present, for multibyte characters, |
770 | it matches anything that has word syntax.) | |
771 | ||
ccfbe679 | 772 | `letter', `alphabetic', `alpha' |
12c64503 GM |
773 | matches letters. (But at present, for multibyte characters, |
774 | it matches anything that has word syntax.) | |
775 | ||
776 | `ascii' | |
777 | matches ASCII (unibyte) characters. | |
778 | ||
779 | `nonascii' | |
780 | matches non-ASCII (multibyte) characters. | |
781 | ||
ccfbe679 | 782 | `lower', `lower-case' |
12c64503 GM |
783 | matches anything lower-case. |
784 | ||
ccfbe679 | 785 | `upper', `upper-case' |
12c64503 GM |
786 | matches anything upper-case. |
787 | ||
ccfbe679 | 788 | `punctuation', `punct' |
12c64503 GM |
789 | matches punctuation. (But at present, for multibyte characters, |
790 | it matches anything that has non-word syntax.) | |
791 | ||
ccfbe679 | 792 | `space', `whitespace', `white' |
12c64503 GM |
793 | matches anything that has whitespace syntax. |
794 | ||
ccfbe679 | 795 | `word', `wordchar' |
12c64503 GM |
796 | matches anything that has word syntax. |
797 | ||
ccfbe679 SM |
798 | `not-wordchar' |
799 | matches anything that has non-word syntax. | |
800 | ||
12c64503 GM |
801 | `(syntax SYNTAX)' |
802 | matches a character with syntax SYNTAX. SYNTAX must be one | |
ccfbe679 SM |
803 | of the following symbols, or a symbol corresponding to the syntax |
804 | character, e.g. `\\.' for `\\s.'. | |
12c64503 GM |
805 | |
806 | `whitespace' (\\s- in string notation) | |
807 | `punctuation' (\\s.) | |
808 | `word' (\\sw) | |
809 | `symbol' (\\s_) | |
810 | `open-parenthesis' (\\s() | |
811 | `close-parenthesis' (\\s)) | |
812 | `expression-prefix' (\\s') | |
813 | `string-quote' (\\s\") | |
814 | `paired-delimiter' (\\s$) | |
815 | `escape' (\\s\\) | |
816 | `character-quote' (\\s/) | |
817 | `comment-start' (\\s<) | |
818 | `comment-end' (\\s>) | |
740b7c2d EZ |
819 | `string-delimiter' (\\s|) |
820 | `comment-delimiter' (\\s!) | |
12c64503 GM |
821 | |
822 | `(not (syntax SYNTAX))' | |
ccfbe679 | 823 | matches a character that doesn't have syntax SYNTAX. |
12c64503 GM |
824 | |
825 | `(category CATEGORY)' | |
826 | matches a character with category CATEGORY. CATEGORY must be | |
827 | either a character to use for C, or one of the following symbols. | |
828 | ||
829 | `consonant' (\\c0 in string notation) | |
830 | `base-vowel' (\\c1) | |
831 | `upper-diacritical-mark' (\\c2) | |
832 | `lower-diacritical-mark' (\\c3) | |
833 | `tone-mark' (\\c4) | |
834 | `symbol' (\\c5) | |
835 | `digit' (\\c6) | |
836 | `vowel-modifying-diacritical-mark' (\\c7) | |
837 | `vowel-sign' (\\c8) | |
838 | `semivowel-lower' (\\c9) | |
839 | `not-at-end-of-line' (\\c<) | |
840 | `not-at-beginning-of-line' (\\c>) | |
841 | `alpha-numeric-two-byte' (\\cA) | |
842 | `chinse-two-byte' (\\cC) | |
843 | `greek-two-byte' (\\cG) | |
844 | `japanese-hiragana-two-byte' (\\cH) | |
845 | `indian-tow-byte' (\\cI) | |
846 | `japanese-katakana-two-byte' (\\cK) | |
847 | `korean-hangul-two-byte' (\\cN) | |
848 | `cyrillic-two-byte' (\\cY) | |
ccfbe679 | 849 | `combining-diacritic' (\\c^) |
12c64503 GM |
850 | `ascii' (\\ca) |
851 | `arabic' (\\cb) | |
852 | `chinese' (\\cc) | |
853 | `ethiopic' (\\ce) | |
854 | `greek' (\\cg) | |
855 | `korean' (\\ch) | |
856 | `indian' (\\ci) | |
857 | `japanese' (\\cj) | |
858 | `japanese-katakana' (\\ck) | |
859 | `latin' (\\cl) | |
860 | `lao' (\\co) | |
861 | `tibetan' (\\cq) | |
862 | `japanese-roman' (\\cr) | |
863 | `thai' (\\ct) | |
864 | `vietnamese' (\\cv) | |
865 | `hebrew' (\\cw) | |
866 | `cyrillic' (\\cy) | |
867 | `can-break' (\\c|) | |
868 | ||
869 | `(not (category CATEGORY))' | |
ccfbe679 | 870 | matches a character that doesn't have category CATEGORY. |
12c64503 GM |
871 | |
872 | `(and SEXP1 SEXP2 ...)' | |
ccfbe679 SM |
873 | `(: SEXP1 SEXP2 ...)' |
874 | `(seq SEXP1 SEXP2 ...)' | |
875 | `(sequence SEXP1 SEXP2 ...)' | |
12c64503 GM |
876 | matches what SEXP1 matches, followed by what SEXP2 matches, etc. |
877 | ||
878 | `(submatch SEXP1 SEXP2 ...)' | |
ccfbe679 | 879 | `(group SEXP1 SEXP2 ...)' |
12c64503 GM |
880 | like `and', but makes the match accessible with `match-end', |
881 | `match-beginning', and `match-string'. | |
882 | ||
883 | `(group SEXP1 SEXP2 ...)' | |
884 | another name for `submatch'. | |
885 | ||
886 | `(or SEXP1 SEXP2 ...)' | |
ccfbe679 | 887 | `(| SEXP1 SEXP2 ...)' |
12c64503 GM |
888 | matches anything that matches SEXP1 or SEXP2, etc. If all |
889 | args are strings, use `regexp-opt' to optimize the resulting | |
890 | regular expression. | |
891 | ||
892 | `(minimal-match SEXP)' | |
893 | produce a non-greedy regexp for SEXP. Normally, regexps matching | |
740b7c2d | 894 | zero or more occurrences of something are \"greedy\" in that they |
12c64503 GM |
895 | match as much as they can, as long as the overall regexp can |
896 | still match. A non-greedy regexp matches as little as possible. | |
897 | ||
898 | `(maximal-match SEXP)' | |
0a6cac62 | 899 | produce a greedy regexp for SEXP. This is the default. |
12c64503 | 900 | |
ccfbe679 SM |
901 | Below, `SEXP ...' represents a sequence of regexp forms, treated as if |
902 | enclosed in `(and ...)'. | |
12c64503 | 903 | |
ccfbe679 SM |
904 | `(zero-or-more SEXP ...)' |
905 | `(0+ SEXP ...)' | |
906 | matches zero or more occurrences of what SEXP ... matches. | |
12c64503 | 907 | |
ccfbe679 SM |
908 | `(* SEXP ...)' |
909 | like `zero-or-more', but always produces a greedy regexp, independent | |
910 | of `rx-greedy-flag'. | |
12c64503 | 911 | |
ccfbe679 SM |
912 | `(*? SEXP ...)' |
913 | like `zero-or-more', but always produces a non-greedy regexp, | |
914 | independent of `rx-greedy-flag'. | |
a1506d29 | 915 | |
ccfbe679 SM |
916 | `(one-or-more SEXP ...)' |
917 | `(1+ SEXP ...)' | |
918 | matches one or more occurrences of SEXP ... | |
12c64503 | 919 | |
ccfbe679 | 920 | `(+ SEXP ...)' |
12c64503 GM |
921 | like `one-or-more', but always produces a greedy regexp. |
922 | ||
ccfbe679 | 923 | `(+? SEXP ...)' |
12c64503 GM |
924 | like `one-or-more', but always produces a non-greedy regexp. |
925 | ||
ccfbe679 SM |
926 | `(zero-or-one SEXP ...)' |
927 | `(optional SEXP ...)' | |
928 | `(opt SEXP ...)' | |
12c64503 | 929 | matches zero or one occurrences of A. |
a1506d29 | 930 | |
ccfbe679 | 931 | `(? SEXP ...)' |
12c64503 GM |
932 | like `zero-or-one', but always produces a greedy regexp. |
933 | ||
ccfbe679 | 934 | `(?? SEXP ...)' |
12c64503 GM |
935 | like `zero-or-one', but always produces a non-greedy regexp. |
936 | ||
937 | `(repeat N SEXP)' | |
ccfbe679 SM |
938 | `(= N SEXP ...)' |
939 | matches N occurrences. | |
940 | ||
941 | `(>= N SEXP ...)' | |
942 | matches N or more occurrences. | |
12c64503 GM |
943 | |
944 | `(repeat N M SEXP)' | |
ccfbe679 SM |
945 | `(** N M SEXP ...)' |
946 | matches N to M occurrences. | |
947 | ||
948 | `(backref N)' | |
949 | matches what was matched previously by submatch N. | |
12c64503 | 950 | |
942269e7 JB |
951 | `(backref N)' |
952 | matches what was matched previously by submatch N. | |
953 | ||
05ec033b EZ |
954 | `(backref N)' |
955 | matches what was matched previously by submatch N. | |
956 | ||
12c64503 | 957 | `(eval FORM)' |
942269e7 JB |
958 | evaluate FORM and insert result. If result is a string, |
959 | `regexp-quote' it. | |
12c64503 GM |
960 | |
961 | `(regexp REGEXP)' | |
942269e7 | 962 | include REGEXP in string notation in the result." |
ccfbe679 SM |
963 | (cond ((null regexps) |
964 | (error "No regexp")) | |
965 | ((cdr regexps) | |
966 | (rx-to-string `(and ,@regexps) t)) | |
967 | (t | |
968 | (rx-to-string (car regexps) t)))) | |
969 | \f | |
970 | ;; ;; sregex.el replacement | |
971 | ||
972 | ;; ;;;###autoload (provide 'sregex) | |
973 | ;; ;;;###autoload (autoload 'sregex "rx") | |
974 | ;; (defalias 'sregex 'rx-to-string) | |
975 | ;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro) | |
976 | ;; (defalias 'sregexq 'rx) | |
977 | \f | |
12c64503 GM |
978 | (provide 'rx) |
979 | ||
b62c13c2 | 980 | ;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b |
12c64503 | 981 | ;;; rx.el ends here |