Update copyright notices for 2013.
[bpt/emacs.git] / lisp / emacs-lisp / rx.el
CommitLineData
12c64503
GM
1;;; rx.el --- sexp notation for regular expressions
2
ab422c4d 3;; Copyright (C) 2001-2013 Free Software Foundation, Inc.
12c64503
GM
4
5;; Author: Gerd Moellmann <gerd@gnu.org>
6;; Maintainer: FSF
7;; Keywords: strings, regexps, extensions
8
9;; This file is part of GNU Emacs.
10
d6cba7ae 11;; GNU Emacs is free software: you can redistribute it and/or modify
12c64503 12;; it under the terms of the GNU General Public License as published by
d6cba7ae
GM
13;; the Free Software Foundation, either version 3 of the License, or
14;; (at your option) any later version.
12c64503
GM
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
d6cba7ae 22;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
12c64503
GM
23
24;;; Commentary:
25
26;; This is another implementation of sexp-form regular expressions.
27;; It was unfortunately written without being aware of the Sregex
28;; package coming with Emacs, but as things stand, Rx completely
29;; covers all regexp features, which Sregex doesn't, doesn't suffer
30;; from the bugs mentioned in the commentary section of Sregex, and
31;; uses a nicer syntax (IMHO, of course :-).
32
ccfbe679
SM
33;; This significantly extended version of the original, is almost
34;; compatible with Sregex. The only incompatibility I (fx) know of is
35;; that the `repeat' form can't have multiple regexp args.
36
37;; Now alternative forms are provided for a degree of compatibility
f47188fe
CY
38;; with Olin Shivers' attempted definitive SRE notation. SRE forms
39;; not catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
ccfbe679
SM
40;; ,<exp>, (word ...), word+, posix-string, and character class forms.
41;; Some forms are inconsistent with SRE, either for historical reasons
42;; or because of the implementation -- simple translation into Emacs
43;; regexp strings. These include: any, word. Also, case-sensitivity
44;; and greediness are controlled by variables external to the regexp,
45;; and you need to feed the forms to the `posix-' functions to get
46;; SRE's POSIX semantics. There are probably more difficulties.
47
12c64503
GM
48;; Rx translates a sexp notation for regular expressions into the
49;; usual string notation. The translation can be done at compile-time
50;; by using the `rx' macro. It can be done at run-time by calling
51;; function `rx-to-string'. See the documentation of `rx' for a
52;; complete description of the sexp notation.
53;;
54;; Some examples of string regexps and their sexp counterparts:
55;;
56;; "^[a-z]*"
57;; (rx (and line-start (0+ (in "a-z"))))
58;;
59;; "\n[^ \t]"
60;; (rx (and "\n" (not blank))), or
61;; (rx (and "\n" (not (any " \t"))))
62;;
63;; "\\*\\*\\* EOOH \\*\\*\\*\n"
64;; (rx "*** EOOH ***\n")
65;;
66;; "\\<\\(catch\\|finally\\)\\>[^_]"
67;; (rx (and word-start (submatch (or "catch" "finally")) word-end
68;; (not (any ?_))))
69;;
70;; "[ \t\n]*:\\([^:]+\\|$\\)"
71;; (rx (and (zero-or-more (in " \t\n")) ":"
72;; (submatch (or line-end (one-or-more (not (any ?:)))))))
73;;
74;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
75;; (rx (and line-start
76;; "content-transfer-encoding:"
c53f9b3b 77;; (+ (? ?\n)) blank
12c64503 78;; "quoted-printable"
c53f9b3b 79;; (+ (? ?\n)) blank))
12c64503
GM
80;;
81;; (concat "^\\(?:" something-else "\\)")
82;; (rx (and line-start (eval something-else))), statically or
83;; (rx-to-string '(and line-start ,something-else)), dynamically.
84;;
85;; (regexp-opt '(STRING1 STRING2 ...))
86;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically
87;; calls `regexp-opt' as needed.
88;;
89;; "^;;\\s-*\n\\|^\n"
90;; (rx (or (and line-start ";;" (0+ space) ?\n)
91;; (and line-start ?\n)))
92;;
93;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
a1506d29
JB
94;; (rx (and "$Id: "
95;; (1+ (not (in " ")))
12c64503
GM
96;; " "
97;; (submatch (1+ (not (in " "))))
c53f9b3b 98;; " "))
12c64503
GM
99;;
100;; "\\\\\\\\\\[\\w+"
101;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
102;;
103;; etc.
104
105;;; History:
a1506d29 106;;
12c64503
GM
107
108;;; Code:
109
daa9f1a6
SM
110;; FIXME: support macros.
111
112(defvar rx-constituents ;Not `const' because some modes extend it.
12c64503 113 '((and . (rx-and 1 nil))
ccfbe679
SM
114 (seq . and) ; SRE
115 (: . and) ; SRE
116 (sequence . and) ; sregex
12c64503 117 (or . (rx-or 1 nil))
ccfbe679 118 (| . or) ; SRE
12c64503 119 (not-newline . ".")
ccfbe679 120 (nonl . not-newline) ; SRE
5dbe5c8f 121 (anything . (rx-anything 0 nil))
ccfbe679 122 (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
723ee192 123 (any . ".") ; sregex
12c64503 124 (in . any)
ccfbe679
SM
125 (char . any) ; sregex
126 (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex
12c64503 127 (not . (rx-not 1 1 rx-check-not))
723ee192 128 (repeat . (rx-repeat 2 nil))
ccfbe679
SM
129 (= . (rx-= 2 nil)) ; SRE
130 (>= . (rx->= 2 nil)) ; SRE
131 (** . (rx-** 2 nil)) ; SRE
132 (submatch . (rx-submatch 1 nil)) ; SRE
723ee192 133 (group . submatch) ; sregex
6420d28b
CY
134 (submatch-n . (rx-submatch-n 2 nil))
135 (group-n . submatch-n)
ccfbe679
SM
136 (zero-or-more . (rx-kleene 1 nil))
137 (one-or-more . (rx-kleene 1 nil))
138 (zero-or-one . (rx-kleene 1 nil))
139 (\? . zero-or-one) ; SRE
12c64503 140 (\?? . zero-or-one)
ccfbe679 141 (* . zero-or-more) ; SRE
12c64503
GM
142 (*? . zero-or-more)
143 (0+ . zero-or-more)
ccfbe679 144 (+ . one-or-more) ; SRE
12c64503
GM
145 (+? . one-or-more)
146 (1+ . one-or-more)
147 (optional . zero-or-one)
ccfbe679 148 (opt . zero-or-one) ; sregex
12c64503
GM
149 (minimal-match . (rx-greedy 1 1))
150 (maximal-match . (rx-greedy 1 1))
740b7c2d 151 (backref . (rx-backref 1 1 rx-check-backref))
12c64503 152 (line-start . "^")
ccfbe679 153 (bol . line-start) ; SRE
12c64503 154 (line-end . "$")
ccfbe679 155 (eol . line-end) ; SRE
12c64503 156 (string-start . "\\`")
ccfbe679
SM
157 (bos . string-start) ; SRE
158 (bot . string-start) ; sregex
12c64503 159 (string-end . "\\'")
ccfbe679
SM
160 (eos . string-end) ; SRE
161 (eot . string-end) ; sregex
12c64503
GM
162 (buffer-start . "\\`")
163 (buffer-end . "\\'")
164 (point . "\\=")
165 (word-start . "\\<")
ccfbe679 166 (bow . word-start) ; SRE
12c64503 167 (word-end . "\\>")
ccfbe679 168 (eow . word-end) ; SRE
12c64503 169 (word-boundary . "\\b")
ccfbe679 170 (not-word-boundary . "\\B") ; sregex
b62c13c2
SM
171 (symbol-start . "\\_<")
172 (symbol-end . "\\_>")
12c64503 173 (syntax . (rx-syntax 1 1))
ccfbe679 174 (not-syntax . (rx-not-syntax 1 1)) ; sregex
12c64503
GM
175 (category . (rx-category 1 1 rx-check-category))
176 (eval . (rx-eval 1 1))
177 (regexp . (rx-regexp 1 1 stringp))
723ee192 178 (regex . regexp) ; sregex
12c64503 179 (digit . "[[:digit:]]")
ccfbe679
SM
180 (numeric . digit) ; SRE
181 (num . digit) ; SRE
182 (control . "[[:cntrl:]]") ; SRE
183 (cntrl . control) ; SRE
184 (hex-digit . "[[:xdigit:]]") ; SRE
185 (hex . hex-digit) ; SRE
186 (xdigit . hex-digit) ; SRE
187 (blank . "[[:blank:]]") ; SRE
188 (graphic . "[[:graph:]]") ; SRE
189 (graph . graphic) ; SRE
190 (printing . "[[:print:]]") ; SRE
191 (print . printing) ; SRE
192 (alphanumeric . "[[:alnum:]]") ; SRE
193 (alnum . alphanumeric) ; SRE
12c64503 194 (letter . "[[:alpha:]]")
ccfbe679
SM
195 (alphabetic . letter) ; SRE
196 (alpha . letter) ; SRE
197 (ascii . "[[:ascii:]]") ; SRE
12c64503 198 (nonascii . "[[:nonascii:]]")
ccfbe679
SM
199 (lower . "[[:lower:]]") ; SRE
200 (lower-case . lower) ; SRE
201 (punctuation . "[[:punct:]]") ; SRE
202 (punct . punctuation) ; SRE
203 (space . "[[:space:]]") ; SRE
204 (whitespace . space) ; SRE
205 (white . space) ; SRE
206 (upper . "[[:upper:]]") ; SRE
207 (upper-case . upper) ; SRE
208 (word . "[[:word:]]") ; inconsistent with SRE
209 (wordchar . word) ; sregex
5dbe5c8f 210 (not-wordchar . "\\W"))
12c64503
GM
211 "Alist of sexp form regexp constituents.
212Each element of the alist has the form (SYMBOL . DEFN).
213SYMBOL is a valid constituent of sexp regular expressions.
214If DEFN is a string, SYMBOL is translated into DEFN.
215If DEFN is a symbol, use the definition of DEFN, recursively.
216Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE).
217FUNCTION is used to produce code for SYMBOL. MIN-ARGS and MAX-ARGS
218are the minimum and maximum number of arguments the function-form
219sexp constituent SYMBOL may have in sexp regular expressions.
220MAX-ARGS nil means no limit. PREDICATE, if specified, means that
221all arguments must satisfy PREDICATE.")
222
223
224(defconst rx-syntax
225 '((whitespace . ?-)
226 (punctuation . ?.)
227 (word . ?w)
228 (symbol . ?_)
229 (open-parenthesis . ?\()
230 (close-parenthesis . ?\))
231 (expression-prefix . ?\')
232 (string-quote . ?\")
233 (paired-delimiter . ?$)
234 (escape . ?\\)
235 (character-quote . ?/)
236 (comment-start . ?<)
740b7c2d
EZ
237 (comment-end . ?>)
238 (string-delimiter . ?|)
09c774f7 239 (comment-delimiter . ?!))
12c64503
GM
240 "Alist mapping Rx syntax symbols to syntax characters.
241Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
242symbol in `(syntax SYMBOL)', and CHAR is the syntax character
243corresponding to SYMBOL, as it would be used with \\s or \\S in
244regular expressions.")
245
246
247(defconst rx-categories
248 '((consonant . ?0)
249 (base-vowel . ?1)
250 (upper-diacritical-mark . ?2)
251 (lower-diacritical-mark . ?3)
252 (tone-mark . ?4)
253 (symbol . ?5)
254 (digit . ?6)
255 (vowel-modifying-diacritical-mark . ?7)
256 (vowel-sign . ?8)
257 (semivowel-lower . ?9)
258 (not-at-end-of-line . ?<)
259 (not-at-beginning-of-line . ?>)
260 (alpha-numeric-two-byte . ?A)
261 (chinse-two-byte . ?C)
262 (greek-two-byte . ?G)
263 (japanese-hiragana-two-byte . ?H)
264 (indian-two-byte . ?I)
265 (japanese-katakana-two-byte . ?K)
266 (korean-hangul-two-byte . ?N)
267 (cyrillic-two-byte . ?Y)
740b7c2d 268 (combining-diacritic . ?^)
12c64503
GM
269 (ascii . ?a)
270 (arabic . ?b)
271 (chinese . ?c)
272 (ethiopic . ?e)
273 (greek . ?g)
274 (korean . ?h)
275 (indian . ?i)
276 (japanese . ?j)
277 (japanese-katakana . ?k)
278 (latin . ?l)
279 (lao . ?o)
280 (tibetan . ?q)
281 (japanese-roman . ?r)
282 (thai . ?t)
283 (vietnamese . ?v)
284 (hebrew . ?w)
285 (cyrillic . ?y)
286 (can-break . ?|))
287 "Alist mapping symbols to category characters.
288Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
289symbol in `(category SYMBOL)', and CHAR is the category character
290corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in
291regular expression strings.")
292
293
294(defvar rx-greedy-flag t
295 "Non-nil means produce greedy regular expressions for `zero-or-one',
296`zero-or-more', and `one-or-more'. Dynamically bound.")
297
298
723ee192 299(defun rx-info (op head)
12c64503
GM
300 "Return parsing/code generation info for OP.
301If OP is the space character ASCII 32, return info for the symbol `?'.
302If OP is the character `?', return info for the symbol `??'.
723ee192
SM
303See also `rx-constituents'.
304If HEAD is non-nil, then OP is the head of a sexp, otherwise it's
305a standalone symbol."
12c64503
GM
306 (cond ((eq op ? ) (setq op '\?))
307 ((eq op ??) (setq op '\??)))
723ee192
SM
308 (let (old-op)
309 (while (and (not (null op)) (symbolp op))
310 (setq old-op op)
311 (setq op (cdr (assq op rx-constituents)))
312 (when (if head (stringp op) (consp op))
313 ;; We found something but of the wrong kind. Let's look for an
314 ;; alternate definition for the other case.
315 (let ((new-op
316 (cdr (assq old-op (cdr (memq (assq old-op rx-constituents)
317 rx-constituents))))))
318 (if (and new-op (not (if head (stringp new-op) (consp new-op))))
319 (setq op new-op))))))
12c64503 320 op)
a1506d29 321
12c64503
GM
322
323(defun rx-check (form)
324 "Check FORM according to its car's parsing info."
ccfbe679
SM
325 (unless (listp form)
326 (error "rx `%s' needs argument(s)" form))
723ee192 327 (let* ((rx (rx-info (car form) 'head))
12c64503
GM
328 (nargs (1- (length form)))
329 (min-args (nth 1 rx))
330 (max-args (nth 2 rx))
331 (type-pred (nth 3 rx)))
332 (when (and (not (null min-args))
333 (< nargs min-args))
740b7c2d 334 (error "rx form `%s' requires at least %d args"
12c64503
GM
335 (car form) min-args))
336 (when (and (not (null max-args))
337 (> nargs max-args))
740b7c2d 338 (error "rx form `%s' accepts at most %d args"
12c64503
GM
339 (car form) max-args))
340 (when (not (null type-pred))
341 (dolist (sub-form (cdr form))
342 (unless (funcall type-pred sub-form)
740b7c2d 343 (error "rx form `%s' requires args satisfying `%s'"
12c64503
GM
344 (car form) type-pred))))))
345
346
5dbe5c8f
CY
347(defun rx-group-if (regexp group)
348 "Put shy groups around REGEXP if seemingly necessary when GROUP
349is non-nil."
350 (cond
351 ;; for some repetition
352 ((eq group '*) (if (rx-atomic-p regexp) (setq group nil)))
353 ;; for concatenation
354 ((eq group ':)
355 (if (rx-atomic-p
356 (if (string-match
357 "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp)
358 (substring regexp 0 (match-beginning 0))
359 regexp))
360 (setq group nil)))
361 ;; for OR
362 ((eq group '|) (setq group nil))
363 ;; do anyway
364 ((eq group t))
365 ((rx-atomic-p regexp t) (setq group nil)))
366 (if group
367 (concat "\\(?:" regexp "\\)")
368 regexp))
369
370
371(defvar rx-parent)
372;; dynamically bound in some functions.
373
374
12c64503
GM
375(defun rx-and (form)
376 "Parse and produce code from FORM.
377FORM is of the form `(and FORM1 ...)'."
378 (rx-check form)
5dbe5c8f
CY
379 (rx-group-if
380 (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil)
381 (and (memq rx-parent '(* t)) rx-parent)))
12c64503
GM
382
383
384(defun rx-or (form)
385 "Parse and produce code from FORM, which is `(or FORM1 ...)'."
386 (rx-check form)
5dbe5c8f
CY
387 (rx-group-if
388 (if (memq nil (mapcar 'stringp (cdr form)))
389 (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|")
390 (regexp-opt (cdr form)))
391 (and (memq rx-parent '(: * t)) rx-parent)))
392
393
394(defun rx-anything (form)
395 "Match any character."
396 (if (consp form)
91af3942 397 (error "rx `anything' syntax error: %s" form))
5dbe5c8f
CY
398 (rx-or (list 'or 'not-newline ?\n)))
399
400
401(defun rx-any-delete-from-range (char ranges)
402 "Delete by side effect character CHAR from RANGES.
403Only both edges of each range is checked."
404 (let (m)
405 (cond
406 ((memq char ranges) (setq ranges (delq char ranges)))
407 ((setq m (assq char ranges))
408 (if (eq (1+ char) (cdr m))
409 (setcar (memq m ranges) (1+ char))
410 (setcar m (1+ char))))
411 ((setq m (rassq char ranges))
412 (if (eq (1- char) (car m))
413 (setcar (memq m ranges) (1- char))
414 (setcdr m (1- char)))))
415 ranges))
416
1f6f7722 417
5dbe5c8f
CY
418(defun rx-any-condense-range (args)
419 "Condense by side effect ARGS as range for Rx `any'."
420 (let (str
421 l)
422 ;; set STR list of all strings
423 ;; set L list of all ranges
424 (mapc (lambda (e) (cond ((stringp e) (push e str))
425 ((numberp e) (push (cons e e) l))
426 (t (push e l))))
427 args)
428 ;; condense overlapped ranges in L
429 (let ((tail (setq l (sort l #'car-less-than-car)))
430 d)
431 (while (setq d (cdr tail))
432 (if (>= (cdar tail) (1- (caar d)))
433 (progn
434 (setcdr (car tail) (max (cdar tail) (cdar d)))
435 (setcdr tail (cdr d)))
436 (setq tail d))))
437 ;; Separate small ranges to single number, and delete dups.
438 (nconc
439 (apply #'nconc
440 (mapcar (lambda (e)
441 (cond
442 ((= (car e) (cdr e)) (list (car e)))
19fe13df 443 ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e)))
5dbe5c8f
CY
444 ((list e))))
445 l))
446 (delete-dups str))))
447
448
449(defun rx-check-any-string (str)
450 "Check string argument STR for Rx `any'."
451 (let ((i 0)
452 c1 c2 l)
453 (if (= 0 (length str))
454 (error "String arg for Rx `any' must not be empty"))
455 (while (string-match ".-." str i)
456 ;; string before range: convert it to characters
457 (if (< i (match-beginning 0))
458 (setq l (nconc
459 l
460 (append (substring str i (match-beginning 0)) nil))))
461 ;; range
462 (setq i (match-end 0)
463 c1 (aref str (match-beginning 0))
464 c2 (aref str (1- i)))
465 (cond
466 ((< c1 c2) (setq l (nconc l (list (cons c1 c2)))))
467 ((= c1 c2) (setq l (nconc l (list c1))))))
468 ;; rest?
469 (if (< i (length str))
470 (setq l (nconc l (append (substring str i) nil))))
471 l))
12c64503 472
12c64503
GM
473
474(defun rx-check-any (arg)
475 "Check arg ARG for Rx `any'."
5dbe5c8f
CY
476 (cond
477 ((integerp arg) (list arg))
478 ((symbolp arg)
ccfbe679 479 (let ((translation (condition-case nil
5dbe5c8f 480 (rx-form arg)
ccfbe679 481 (error nil))))
5dbe5c8f
CY
482 (if (or (null translation)
483 (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation)))
484 (error "Invalid char class `%s' in Rx `any'" arg))
485 (list (substring translation 1 -1)))) ; strip outer brackets
486 ((and (integerp (car-safe arg)) (integerp (cdr-safe arg)))
487 (list arg))
488 ((stringp arg) (rx-check-any-string arg))
489 ((error
490 "rx `any' requires string, character, char pair or char class args"))))
491
12c64503
GM
492
493(defun rx-any (form)
ccfbe679
SM
494 "Parse and produce code from FORM, which is `(any ARG ...)'.
495ARG is optional."
12c64503 496 (rx-check form)
5dbe5c8f
CY
497 (let* ((args (rx-any-condense-range
498 (apply
499 #'nconc
500 (mapcar #'rx-check-any (cdr form)))))
501 m
502 s)
503 (cond
504 ;; single close bracket
505 ;; => "[]...-]" or "[]...--.]"
506 ((memq ?\] args)
507 ;; set ] at the beginning
508 (setq args (cons ?\] (delq ?\] args)))
509 ;; set - at the end
510 (if (or (memq ?- args) (assq ?- args))
511 (setq args (nconc (rx-any-delete-from-range ?- args)
512 (list ?-)))))
513 ;; close bracket starts a range
514 ;; => "[]-....-]" or "[]-.--....]"
515 ((setq m (assq ?\] args))
516 ;; bring it to the beginning
517 (setq args (cons m (delq m args)))
518 (cond ((memq ?- args)
519 ;; to the end
520 (setq args (nconc (delq ?- args) (list ?-))))
521 ((setq m (assq ?- args))
522 ;; next to the bracket's range, make the second range
523 (setcdr args (cons m (delq m args))))))
524 ;; bracket in the end range
525 ;; => "[]...-]"
526 ((setq m (rassq ?\] args))
527 ;; set ] at the beginning
528 (setq args (cons ?\] (rx-any-delete-from-range ?\] args)))
529 ;; set - at the end
530 (if (or (memq ?- args) (assq ?- args))
531 (setq args (nconc (rx-any-delete-from-range ?- args)
532 (list ?-)))))
533 ;; {no close bracket appears}
534 ;;
535 ;; bring single bar to the beginning
536 ((memq ?- args)
537 (setq args (cons ?- (delq ?- args))))
538 ;; bar start a range, bring it to the beginning
539 ((setq m (assq ?- args))
540 (setq args (cons m (delq m args))))
541 ;;
542 ;; hat at the beginning?
543 ((or (eq (car args) ?^) (eq (car-safe (car args)) ?^))
544 (setq args (if (cdr args)
545 `(,(cadr args) ,(car args) ,@(cddr args))
546 (nconc (rx-any-delete-from-range ?^ args)
547 (list ?^))))))
548 ;; some 1-char?
549 (if (and (null (cdr args)) (numberp (car args))
550 (or (= 1 (length
551 (setq s (regexp-quote (string (car args))))))
552 (and (equal (car args) ?^) ;; unnecessary predicate?
553 (null (eq rx-parent '!)))))
554 s
555 (concat "["
556 (mapconcat
557 (lambda (e) (cond
558 ((numberp e) (string e))
559 ((consp e)
560 (if (and (= (1+ (car e)) (cdr e))
19fe13df
SM
561 ;; rx-any-condense-range should
562 ;; prevent this case from happening.
563 (null (memq (car e) '(?\] ?-)))
564 (null (memq (cdr e) '(?\] ?-))))
5dbe5c8f
CY
565 (string (car e) (cdr e))
566 (string (car e) ?- (cdr e))))
567 (e)))
568 args
569 nil)
570 "]"))))
12c64503
GM
571
572
740b7c2d
EZ
573(defun rx-check-not (arg)
574 "Check arg ARG for Rx `not'."
ccfbe679 575 (unless (or (and (symbolp arg)
5dbe5c8f 576 (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'"
ccfbe679 577 (condition-case nil
5dbe5c8f 578 (rx-form arg)
ccfbe679 579 (error ""))))
1f6f7722 580 (eq arg 'word-boundary)
ccfbe679
SM
581 (and (consp arg)
582 (memq (car arg) '(not any in syntax category))))
583 (error "rx `not' syntax error: %s" arg))
584 t)
12c64503
GM
585
586
587(defun rx-not (form)
588 "Parse and produce code from FORM. FORM is `(not ...)'."
589 (rx-check form)
5dbe5c8f 590 (let ((result (rx-form (cadr form) '!))
062a9fce 591 case-fold-search)
12c64503 592 (cond ((string-match "\\`\\[^" result)
5dbe5c8f
CY
593 (cond
594 ((equal result "[^]") "[^^]")
595 ((and (= (length result) 4) (null (eq rx-parent '!)))
596 (regexp-quote (substring result 2 3)))
597 ((concat "[" (substring result 2)))))
ccfbe679 598 ((eq ?\[ (aref result 0))
12c64503 599 (concat "[^" (substring result 1)))
5dbe5c8f
CY
600 ((string-match "\\`\\\\[scbw]" result)
601 (concat (upcase (substring result 0 2))
602 (substring result 2)))
603 ((string-match "\\`\\\\[SCBW]" result)
604 (concat (downcase (substring result 0 2))
605 (substring result 2)))
12c64503
GM
606 (t
607 (concat "[^" result "]")))))
608
609
ccfbe679
SM
610(defun rx-not-char (form)
611 "Parse and produce code from FORM. FORM is `(not-char ...)'."
612 (rx-check form)
613 (rx-not `(not (in ,@(cdr form)))))
614
615
616(defun rx-not-syntax (form)
617 "Parse and produce code from FORM. FORM is `(not-syntax SYNTAX)'."
618 (rx-check form)
619 (rx-not `(not (syntax ,@(cdr form)))))
620
621
622(defun rx-trans-forms (form &optional skip)
623 "If FORM's length is greater than two, transform it to length two.
624A form (HEAD REST ...) becomes (HEAD (and REST ...)).
625If SKIP is non-nil, allow that number of items after the head, i.e.
626`(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1."
627 (unless skip (setq skip 0))
628 (let ((tail (nthcdr (1+ skip) form)))
629 (if (= (length tail) 1)
630 form
631 (let ((form (copy-sequence form)))
632 (setcdr (nthcdr skip form) (list (cons 'and tail)))
633 form))))
634
635
636(defun rx-= (form)
637 "Parse and produce code from FORM `(= N ...)'."
638 (rx-check form)
639 (setq form (rx-trans-forms form 1))
640 (unless (and (integerp (nth 1 form))
641 (> (nth 1 form) 0))
642 (error "rx `=' requires positive integer first arg"))
5dbe5c8f 643 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
ccfbe679
SM
644
645
646(defun rx->= (form)
647 "Parse and produce code from FORM `(>= N ...)'."
648 (rx-check form)
649 (setq form (rx-trans-forms form 1))
650 (unless (and (integerp (nth 1 form))
651 (> (nth 1 form) 0))
652 (error "rx `>=' requires positive integer first arg"))
5dbe5c8f 653 (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
ccfbe679
SM
654
655
656(defun rx-** (form)
657 "Parse and produce code from FORM `(** N M ...)'."
658 (rx-check form)
723ee192 659 (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*))
ccfbe679
SM
660
661
12c64503
GM
662(defun rx-repeat (form)
663 "Parse and produce code from FORM.
723ee192 664FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
12c64503 665 (rx-check form)
723ee192
SM
666 (if (> (length form) 4)
667 (setq form (rx-trans-forms form 2)))
668 (if (null (nth 2 form))
1f6f7722 669 (setq form (cons (nth 0 form) (cons (nth 1 form) (nthcdr 3 form)))))
12c64503
GM
670 (cond ((= (length form) 3)
671 (unless (and (integerp (nth 1 form))
672 (> (nth 1 form) 0))
740b7c2d 673 (error "rx `repeat' requires positive integer first arg"))
5dbe5c8f 674 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
12c64503
GM
675 ((or (not (integerp (nth 2 form)))
676 (< (nth 2 form) 0)
677 (not (integerp (nth 1 form)))
678 (< (nth 1 form) 0)
679 (< (nth 2 form) (nth 1 form)))
740b7c2d 680 (error "rx `repeat' range error"))
12c64503 681 (t
5dbe5c8f 682 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*)
12c64503
GM
683 (nth 1 form) (nth 2 form)))))
684
685
686(defun rx-submatch (form)
687 "Parse and produce code from FORM, which is `(submatch ...)'."
e9e9c7b8
SM
688 (concat "\\("
689 (if (= 2 (length form))
690 ;; Only one sub-form.
691 (rx-form (cadr form))
692 ;; Several sub-forms implicitly concatenated.
693 (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil))
694 "\\)"))
5dbe5c8f 695
6420d28b
CY
696(defun rx-submatch-n (form)
697 "Parse and produce code from FORM, which is `(submatch-n N ...)'."
698 (let ((n (nth 1 form)))
699 (concat "\\(?" (number-to-string n) ":"
700 (if (= 3 (length form))
701 ;; Only one sub-form.
702 (rx-form (nth 2 form))
703 ;; Several sub-forms implicitly concatenated.
704 (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil))
705 "\\)")))
12c64503 706
740b7c2d
EZ
707(defun rx-backref (form)
708 "Parse and produce code from FORM, which is `(backref N)'."
709 (rx-check form)
710 (format "\\%d" (nth 1 form)))
711
712(defun rx-check-backref (arg)
713 "Check arg ARG for Rx `backref'."
714 (or (and (integerp arg) (>= arg 1) (<= arg 9))
715 (error "rx `backref' requires numeric 1<=arg<=9: %s" arg)))
716
12c64503
GM
717(defun rx-kleene (form)
718 "Parse and produce code from FORM.
719FORM is `(OP FORM1)', where OP is one of the `zero-or-one',
a1506d29 720`zero-or-more' etc. operators.
12c64503
GM
721If OP is one of `*', `+', `?', produce a greedy regexp.
722If OP is one of `*?', `+?', `??', produce a non-greedy regexp.
723If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
724is non-nil."
725 (rx-check form)
ccfbe679 726 (setq form (rx-trans-forms form))
5dbe5c8f 727 (let ((suffix (cond ((memq (car form) '(* + ?\s)) "")
12c64503
GM
728 ((memq (car form) '(*? +? ??)) "?")
729 (rx-greedy-flag "")
730 (t "?")))
731 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
732 ((memq (car form) '(+ +? 1+ one-or-more)) "+")
5dbe5c8f
CY
733 (t "?"))))
734 (rx-group-if
735 (concat (rx-form (cadr form) '*) op suffix)
736 (and (memq rx-parent '(t *)) rx-parent))))
c53f9b3b 737
5dbe5c8f
CY
738
739(defun rx-atomic-p (r &optional lax)
c53f9b3b
RS
740 "Return non-nil if regexp string R is atomic.
741An atomic regexp R is one such that a suffix operator
742appended to R will apply to all of R. For example, \"a\"
743\"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\",
744\"[ab]c\", and \"ab\\|ab*c\" are not atomic.
745
746This function may return false negatives, but it will not
747return false positives. It is nevertheless useful in
ab2d877d 748situations where an efficiency shortcut can be taken only if a
c53f9b3b
RS
749regexp is atomic. The function can be improved to detect
750more cases of atomic regexps. Presently, this function
751detects the following categories of atomic regexp;
752
753 a group or shy group: \\(...\\)
754 a character class: [...]
755 a single character: a
756
757On the other hand, false negatives will be returned for
758regexps that are atomic but end in operators, such as
759\"a+\". I think these are rare. Probably such cases could
760be detected without much effort. A guarantee of no false
761negatives would require a theoretic specification of the set
762of all atomic regexps."
763 (let ((l (length r)))
5dbe5c8f
CY
764 (cond
765 ((<= l 1))
766 ((= l 2) (= (aref r 0) ?\\))
767 ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
768 ((null lax)
769 (cond
770 ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r))
771 ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'" r)))))))
12c64503
GM
772
773
774(defun rx-syntax (form)
775 "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
776 (rx-check form)
09c774f7 777 (let* ((sym (cadr form))
723ee192 778 (syntax (cdr (assq sym rx-syntax))))
12c64503 779 (unless syntax
09c774f7 780 ;; Try sregex compatibility.
723ee192 781 (cond
8ee2dc5c 782 ((characterp sym) (setq syntax sym))
723ee192
SM
783 ((symbolp sym)
784 (let ((name (symbol-name sym)))
785 (if (= 1 (length name))
786 (setq syntax (aref name 0))))))
09c774f7 787 (unless syntax
723ee192
SM
788 (error "Unknown rx syntax `%s'" sym)))
789 (format "\\s%c" syntax)))
12c64503
GM
790
791
792(defun rx-check-category (form)
793 "Check the argument FORM of a `(category FORM)'."
794 (unless (or (integerp form)
795 (cdr (assq form rx-categories)))
796 (error "Unknown category `%s'" form))
797 t)
a1506d29 798
12c64503
GM
799
800(defun rx-category (form)
ccfbe679 801 "Parse and produce code from FORM, which is `(category SYMBOL)'."
12c64503
GM
802 (rx-check form)
803 (let ((char (if (integerp (cadr form))
804 (cadr form)
805 (cdr (assq (cadr form) rx-categories)))))
806 (format "\\c%c" char)))
807
808
809(defun rx-eval (form)
810 "Parse and produce code from FORM, which is `(eval FORM)'."
811 (rx-check form)
5dbe5c8f 812 (rx-form (eval (cadr form)) rx-parent))
12c64503
GM
813
814
815(defun rx-greedy (form)
740b7c2d
EZ
816 "Parse and produce code from FORM.
817If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
818`+', and `?' operators will be used in FORM1. If FORM is
819'(maximal-match FORM1)', greedy operators will be used."
12c64503
GM
820 (rx-check form)
821 (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
5dbe5c8f 822 (rx-form (cadr form) rx-parent)))
12c64503
GM
823
824
825(defun rx-regexp (form)
826 "Parse and produce code from FORM, which is `(regexp STRING)'."
827 (rx-check form)
5dbe5c8f
CY
828 (rx-group-if (cadr form) rx-parent))
829
830
831(defun rx-form (form &optional rx-parent)
832 "Parse and produce code for regular expression FORM.
833FORM is a regular expression in sexp form.
834RX-PARENT shows which type of expression calls and controls putting of
835shy groups around the result and some more in other functions."
daa9f1a6
SM
836 (cond
837 ((stringp form)
838 (rx-group-if (regexp-quote form)
839 (if (and (eq rx-parent '*) (< 1 (length form)))
840 rx-parent)))
841 ((integerp form)
842 (regexp-quote (char-to-string form)))
843 ((symbolp form)
844 (let ((info (rx-info form nil)))
845 (cond ((stringp info)
846 info)
847 ((null info)
848 (error "Unknown rx form `%s'" form))
849 (t
850 (funcall (nth 0 info) form)))))
851 ((consp form)
852 (let ((info (rx-info (car form) 'head)))
853 (unless (consp info)
854 (error "Unknown rx form `%s'" (car form)))
855 (funcall (nth 0 info) form)))
856 (t
857 (error "rx syntax error at `%s'" form))))
12c64503
GM
858
859
860;;;###autoload
861(defun rx-to-string (form &optional no-group)
862 "Parse and produce code for regular expression FORM.
863FORM is a regular expression in sexp form.
864NO-GROUP non-nil means don't put shy groups around the result."
5dbe5c8f 865 (rx-group-if (rx-form form) (null no-group)))
12c64503
GM
866
867
868;;;###autoload
ccfbe679
SM
869(defmacro rx (&rest regexps)
870 "Translate regular expressions REGEXPS in sexp form to a regexp string.
871REGEXPS is a non-empty sequence of forms of the sort listed below.
baac7510
CY
872
873Note that `rx' is a Lisp macro; when used in a Lisp program being
874 compiled, the translation is performed by the compiler.
875See `rx-to-string' for how to do such a translation at run-time.
12c64503
GM
876
877The following are valid subforms of regular expressions in sexp
878notation.
879
880STRING
881 matches string STRING literally.
882
883CHAR
884 matches character CHAR literally.
885
ccfbe679 886`not-newline', `nonl'
12c64503 887 matches any character except a newline.
e8449cdb 888
12c64503
GM
889`anything'
890 matches any character
891
ccfbe679
SM
892`(any SET ...)'
893`(in SET ...)'
894`(char SET ...)'
895 matches any character in SET .... SET may be a character or string.
12c64503 896 Ranges of characters can be specified as `A-Z' in strings.
ccfbe679 897 Ranges may also be specified as conses like `(?A . ?Z)'.
12c64503 898
ccfbe679
SM
899 SET may also be the name of a character class: `digit',
900 `control', `hex-digit', `blank', `graph', `print', `alnum',
901 `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',
902 `word', or one of their synonyms.
12c64503 903
ccfbe679
SM
904`(not (any SET ...))'
905 matches any character not in SET ...
12c64503 906
ccfbe679 907`line-start', `bol'
12c64503
GM
908 matches the empty string, but only at the beginning of a line
909 in the text being matched
910
ccfbe679 911`line-end', `eol'
12c64503
GM
912 is similar to `line-start' but matches only at the end of a line
913
ccfbe679 914`string-start', `bos', `bot'
12c64503
GM
915 matches the empty string, but only at the beginning of the
916 string being matched against.
917
ccfbe679 918`string-end', `eos', `eot'
12c64503
GM
919 matches the empty string, but only at the end of the
920 string being matched against.
921
922`buffer-start'
923 matches the empty string, but only at the beginning of the
ccfbe679 924 buffer being matched against. Actually equivalent to `string-start'.
12c64503
GM
925
926`buffer-end'
927 matches the empty string, but only at the end of the
ccfbe679 928 buffer being matched against. Actually equivalent to `string-end'.
12c64503
GM
929
930`point'
931 matches the empty string, but only at point.
932
ccfbe679 933`word-start', `bow'
5e3fc9eb 934 matches the empty string, but only at the beginning of a word.
12c64503 935
ccfbe679 936`word-end', `eow'
12c64503
GM
937 matches the empty string, but only at the end of a word.
938
939`word-boundary'
940 matches the empty string, but only at the beginning or end of a
941 word.
942
943`(not word-boundary)'
ccfbe679 944`not-word-boundary'
12c64503
GM
945 matches the empty string, but not at the beginning or end of a
946 word.
947
5e3fc9eb
GM
948`symbol-start'
949 matches the empty string, but only at the beginning of a symbol.
950
951`symbol-end'
952 matches the empty string, but only at the end of a symbol.
953
ccfbe679 954`digit', `numeric', `num'
12c64503
GM
955 matches 0 through 9.
956
ccfbe679 957`control', `cntrl'
12c64503
GM
958 matches ASCII control characters.
959
ccfbe679 960`hex-digit', `hex', `xdigit'
12c64503
GM
961 matches 0 through 9, a through f and A through F.
962
963`blank'
964 matches space and tab only.
965
ccfbe679 966`graphic', `graph'
12c64503
GM
967 matches graphic characters--everything except ASCII control chars,
968 space, and DEL.
969
ccfbe679 970`printing', `print'
12c64503
GM
971 matches printing characters--everything except ASCII control chars
972 and DEL.
973
ccfbe679 974`alphanumeric', `alnum'
12c64503
GM
975 matches letters and digits. (But at present, for multibyte characters,
976 it matches anything that has word syntax.)
977
ccfbe679 978`letter', `alphabetic', `alpha'
12c64503
GM
979 matches letters. (But at present, for multibyte characters,
980 it matches anything that has word syntax.)
981
982`ascii'
983 matches ASCII (unibyte) characters.
984
985`nonascii'
986 matches non-ASCII (multibyte) characters.
987
ccfbe679 988`lower', `lower-case'
12c64503
GM
989 matches anything lower-case.
990
ccfbe679 991`upper', `upper-case'
12c64503
GM
992 matches anything upper-case.
993
ccfbe679 994`punctuation', `punct'
12c64503
GM
995 matches punctuation. (But at present, for multibyte characters,
996 it matches anything that has non-word syntax.)
997
ccfbe679 998`space', `whitespace', `white'
12c64503
GM
999 matches anything that has whitespace syntax.
1000
ccfbe679 1001`word', `wordchar'
12c64503
GM
1002 matches anything that has word syntax.
1003
ccfbe679
SM
1004`not-wordchar'
1005 matches anything that has non-word syntax.
1006
12c64503
GM
1007`(syntax SYNTAX)'
1008 matches a character with syntax SYNTAX. SYNTAX must be one
ccfbe679
SM
1009 of the following symbols, or a symbol corresponding to the syntax
1010 character, e.g. `\\.' for `\\s.'.
12c64503
GM
1011
1012 `whitespace' (\\s- in string notation)
1013 `punctuation' (\\s.)
1014 `word' (\\sw)
1015 `symbol' (\\s_)
1016 `open-parenthesis' (\\s()
1017 `close-parenthesis' (\\s))
1018 `expression-prefix' (\\s')
1019 `string-quote' (\\s\")
1020 `paired-delimiter' (\\s$)
1021 `escape' (\\s\\)
1022 `character-quote' (\\s/)
1023 `comment-start' (\\s<)
1024 `comment-end' (\\s>)
740b7c2d
EZ
1025 `string-delimiter' (\\s|)
1026 `comment-delimiter' (\\s!)
12c64503
GM
1027
1028`(not (syntax SYNTAX))'
ccfbe679 1029 matches a character that doesn't have syntax SYNTAX.
12c64503
GM
1030
1031`(category CATEGORY)'
1032 matches a character with category CATEGORY. CATEGORY must be
1033 either a character to use for C, or one of the following symbols.
1034
1035 `consonant' (\\c0 in string notation)
1036 `base-vowel' (\\c1)
1037 `upper-diacritical-mark' (\\c2)
1038 `lower-diacritical-mark' (\\c3)
1039 `tone-mark' (\\c4)
1040 `symbol' (\\c5)
1041 `digit' (\\c6)
1042 `vowel-modifying-diacritical-mark' (\\c7)
1043 `vowel-sign' (\\c8)
1044 `semivowel-lower' (\\c9)
1045 `not-at-end-of-line' (\\c<)
1046 `not-at-beginning-of-line' (\\c>)
1047 `alpha-numeric-two-byte' (\\cA)
1048 `chinse-two-byte' (\\cC)
1049 `greek-two-byte' (\\cG)
1050 `japanese-hiragana-two-byte' (\\cH)
1051 `indian-tow-byte' (\\cI)
1052 `japanese-katakana-two-byte' (\\cK)
1053 `korean-hangul-two-byte' (\\cN)
1054 `cyrillic-two-byte' (\\cY)
ccfbe679 1055 `combining-diacritic' (\\c^)
12c64503
GM
1056 `ascii' (\\ca)
1057 `arabic' (\\cb)
1058 `chinese' (\\cc)
1059 `ethiopic' (\\ce)
1060 `greek' (\\cg)
1061 `korean' (\\ch)
1062 `indian' (\\ci)
1063 `japanese' (\\cj)
1064 `japanese-katakana' (\\ck)
1065 `latin' (\\cl)
1066 `lao' (\\co)
1067 `tibetan' (\\cq)
1068 `japanese-roman' (\\cr)
1069 `thai' (\\ct)
1070 `vietnamese' (\\cv)
1071 `hebrew' (\\cw)
1072 `cyrillic' (\\cy)
1073 `can-break' (\\c|)
1074
1075`(not (category CATEGORY))'
ccfbe679 1076 matches a character that doesn't have category CATEGORY.
12c64503
GM
1077
1078`(and SEXP1 SEXP2 ...)'
ccfbe679
SM
1079`(: SEXP1 SEXP2 ...)'
1080`(seq SEXP1 SEXP2 ...)'
1081`(sequence SEXP1 SEXP2 ...)'
12c64503
GM
1082 matches what SEXP1 matches, followed by what SEXP2 matches, etc.
1083
1084`(submatch SEXP1 SEXP2 ...)'
ccfbe679 1085`(group SEXP1 SEXP2 ...)'
12c64503
GM
1086 like `and', but makes the match accessible with `match-end',
1087 `match-beginning', and `match-string'.
1088
6420d28b
CY
1089`(submatch-n N SEXP1 SEXP2 ...)'
1090`(group-n N SEXP1 SEXP2 ...)'
1091 like `group', but make it an explicitly-numbered group with
1092 group number N.
1093
12c64503 1094`(or SEXP1 SEXP2 ...)'
ccfbe679 1095`(| SEXP1 SEXP2 ...)'
12c64503
GM
1096 matches anything that matches SEXP1 or SEXP2, etc. If all
1097 args are strings, use `regexp-opt' to optimize the resulting
1098 regular expression.
1099
1100`(minimal-match SEXP)'
1101 produce a non-greedy regexp for SEXP. Normally, regexps matching
740b7c2d 1102 zero or more occurrences of something are \"greedy\" in that they
12c64503
GM
1103 match as much as they can, as long as the overall regexp can
1104 still match. A non-greedy regexp matches as little as possible.
1105
1106`(maximal-match SEXP)'
0a6cac62 1107 produce a greedy regexp for SEXP. This is the default.
12c64503 1108
ccfbe679
SM
1109Below, `SEXP ...' represents a sequence of regexp forms, treated as if
1110enclosed in `(and ...)'.
12c64503 1111
ccfbe679
SM
1112`(zero-or-more SEXP ...)'
1113`(0+ SEXP ...)'
1114 matches zero or more occurrences of what SEXP ... matches.
12c64503 1115
ccfbe679
SM
1116`(* SEXP ...)'
1117 like `zero-or-more', but always produces a greedy regexp, independent
1118 of `rx-greedy-flag'.
12c64503 1119
ccfbe679
SM
1120`(*? SEXP ...)'
1121 like `zero-or-more', but always produces a non-greedy regexp,
1122 independent of `rx-greedy-flag'.
a1506d29 1123
ccfbe679
SM
1124`(one-or-more SEXP ...)'
1125`(1+ SEXP ...)'
1126 matches one or more occurrences of SEXP ...
12c64503 1127
ccfbe679 1128`(+ SEXP ...)'
12c64503
GM
1129 like `one-or-more', but always produces a greedy regexp.
1130
ccfbe679 1131`(+? SEXP ...)'
12c64503
GM
1132 like `one-or-more', but always produces a non-greedy regexp.
1133
ccfbe679
SM
1134`(zero-or-one SEXP ...)'
1135`(optional SEXP ...)'
1136`(opt SEXP ...)'
12c64503 1137 matches zero or one occurrences of A.
a1506d29 1138
ccfbe679 1139`(? SEXP ...)'
12c64503
GM
1140 like `zero-or-one', but always produces a greedy regexp.
1141
ccfbe679 1142`(?? SEXP ...)'
12c64503
GM
1143 like `zero-or-one', but always produces a non-greedy regexp.
1144
1145`(repeat N SEXP)'
ccfbe679
SM
1146`(= N SEXP ...)'
1147 matches N occurrences.
1148
1149`(>= N SEXP ...)'
1150 matches N or more occurrences.
12c64503
GM
1151
1152`(repeat N M SEXP)'
ccfbe679
SM
1153`(** N M SEXP ...)'
1154 matches N to M occurrences.
1155
942269e7
JB
1156`(backref N)'
1157 matches what was matched previously by submatch N.
1158
12c64503 1159`(eval FORM)'
942269e7
JB
1160 evaluate FORM and insert result. If result is a string,
1161 `regexp-quote' it.
12c64503
GM
1162
1163`(regexp REGEXP)'
942269e7 1164 include REGEXP in string notation in the result."
ccfbe679
SM
1165 (cond ((null regexps)
1166 (error "No regexp"))
1167 ((cdr regexps)
1168 (rx-to-string `(and ,@regexps) t))
1169 (t
1170 (rx-to-string (car regexps) t))))
1171\f
1172;; ;; sregex.el replacement
1173
1174;; ;;;###autoload (provide 'sregex)
1175;; ;;;###autoload (autoload 'sregex "rx")
1176;; (defalias 'sregex 'rx-to-string)
1177;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
1178;; (defalias 'sregexq 'rx)
1179\f
12c64503
GM
1180(provide 'rx)
1181
1182;;; rx.el ends here