;;; rx.el --- sexp notation for regular expressions
-;; Copyright (C) 2001-2011 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
;; Author: Gerd Moellmann <gerd@gnu.org>
-;; Maintainer: FSF
+;; Maintainer: emacs-devel@gnu.org
;; Keywords: strings, regexps, extensions
;; This file is part of GNU Emacs.
;; that the `repeat' form can't have multiple regexp args.
;; Now alternative forms are provided for a degree of compatibility
-;; with Shivers' attempted definitive SRE notation
-;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>. SRE forms not
-;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
+;; with Olin Shivers' attempted definitive SRE notation. SRE forms
+;; not catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
;; ,<exp>, (word ...), word+, posix-string, and character class forms.
;; Some forms are inconsistent with SRE, either for historical reasons
;; or because of the implementation -- simple translation into Emacs
;;; Code:
-(defconst rx-constituents
+;; FIXME: support macros.
+
+(defvar rx-constituents ;Not `const' because some modes extend it.
'((and . (rx-and 1 nil))
(seq . and) ; SRE
(: . and) ; SRE
(not-at-end-of-line . ?<)
(not-at-beginning-of-line . ?>)
(alpha-numeric-two-byte . ?A)
- (chinse-two-byte . ?C)
+ (chinese-two-byte . ?C)
+ (chinse-two-byte . ?C) ;; A typo in Emacs 21.1-24.3.
(greek-two-byte . ?G)
(japanese-hiragana-two-byte . ?H)
(indian-two-byte . ?I)
(defun rx-anything (form)
"Match any character."
(if (consp form)
- (error "rx `anythng' syntax error: %s" form))
+ (error "rx `anything' syntax error: %s" form))
(rx-or (list 'or 'not-newline ?\n)))
FORM is a regular expression in sexp form.
RX-PARENT shows which type of expression calls and controls putting of
shy groups around the result and some more in other functions."
- (if (stringp form)
- (rx-group-if (regexp-quote form)
- (if (and (eq rx-parent '*) (< 1 (length form)))
- rx-parent))
- (cond ((integerp form)
- (regexp-quote (char-to-string form)))
- ((symbolp form)
- (let ((info (rx-info form nil)))
- (cond ((stringp info)
- info)
- ((null info)
- (error "Unknown rx form `%s'" form))
- (t
- (funcall (nth 0 info) form)))))
- ((consp form)
- (let ((info (rx-info (car form) 'head)))
- (unless (consp info)
- (error "Unknown rx form `%s'" (car form)))
- (funcall (nth 0 info) form)))
- (t
- (error "rx syntax error at `%s'" form)))))
+ (cond
+ ((stringp form)
+ (rx-group-if (regexp-quote form)
+ (if (and (eq rx-parent '*) (< 1 (length form)))
+ rx-parent)))
+ ((integerp form)
+ (regexp-quote (char-to-string form)))
+ ((symbolp form)
+ (let ((info (rx-info form nil)))
+ (cond ((stringp info)
+ info)
+ ((null info)
+ (error "Unknown rx form `%s'" form))
+ (t
+ (funcall (nth 0 info) form)))))
+ ((consp form)
+ (let ((info (rx-info (car form) 'head)))
+ (unless (consp info)
+ (error "Unknown rx form `%s'" (car form)))
+ (funcall (nth 0 info) form)))
+ (t
+ (error "rx syntax error at `%s'" form))))
;;;###autoload
REGEXPS is a non-empty sequence of forms of the sort listed below.
Note that `rx' is a Lisp macro; when used in a Lisp program being
- compiled, the translation is performed by the compiler.
+compiled, the translation is performed by the compiler.
See `rx-to-string' for how to do such a translation at run-time.
The following are valid subforms of regular expressions in sexp
`not-at-end-of-line' (\\c<)
`not-at-beginning-of-line' (\\c>)
`alpha-numeric-two-byte' (\\cA)
- `chinse-two-byte' (\\cC)
+ `chinese-two-byte' (\\cC)
`greek-two-byte' (\\cG)
`japanese-hiragana-two-byte' (\\cH)
`indian-tow-byte' (\\cI)