X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/2ec1b5ee3464999a18b8197101e8bf08a3c564a8..daa9f1a6076ee5e54c8b56b321bc1d2d991a15c6:/lisp/emacs-lisp/rx.el diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 85fe3514b0..774c6cd2c3 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -1,7 +1,6 @@ ;;; rx.el --- sexp notation for regular expressions -;; Copyright (C) 2001, 2002, 2003, 2004, 2005, -;; 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +;; Copyright (C) 2001-2012 Free Software Foundation, Inc. ;; Author: Gerd Moellmann ;; Maintainer: FSF @@ -36,9 +35,8 @@ ;; that the `repeat' form can't have multiple regexp args. ;; Now alternative forms are provided for a degree of compatibility -;; with Shivers' attempted definitive SRE notation -;; . SRE forms not -;; catered for include: dsm, uncase, w/case, w/nocase, ,@, +;; with Olin Shivers' attempted definitive SRE notation. SRE forms +;; not catered for include: dsm, uncase, w/case, w/nocase, ,@, ;; ,, (word ...), word+, posix-string, and character class forms. ;; Some forms are inconsistent with SRE, either for historical reasons ;; or because of the implementation -- simple translation into Emacs @@ -109,7 +107,9 @@ ;;; Code: -(defconst rx-constituents +;; FIXME: support macros. + +(defvar rx-constituents ;Not `const' because some modes extend it. '((and . (rx-and 1 nil)) (seq . and) ; SRE (: . and) ; SRE @@ -120,19 +120,19 @@ (nonl . not-newline) ; SRE (anything . (rx-anything 0 nil)) (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE + (any . ".") ; sregex (in . any) (char . any) ; sregex (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex (not . (rx-not 1 1 rx-check-not)) - ;; Partially consistent with sregex, whose `repeat' is like our - ;; `**'. (`repeat' with optional max arg and multiple sexp forms - ;; is ambiguous.) - (repeat . (rx-repeat 2 3)) + (repeat . (rx-repeat 2 nil)) (= . (rx-= 2 nil)) ; SRE (>= . (rx->= 2 nil)) ; SRE (** . (rx-** 2 nil)) ; SRE (submatch . (rx-submatch 1 nil)) ; SRE - (group . submatch) + (group . submatch) ; sregex + (submatch-n . (rx-submatch-n 2 nil)) + (group-n . submatch-n) (zero-or-more . (rx-kleene 1 nil)) (one-or-more . (rx-kleene 1 nil)) (zero-or-one . (rx-kleene 1 nil)) @@ -175,6 +175,7 @@ (category . (rx-category 1 1 rx-check-category)) (eval . (rx-eval 1 1)) (regexp . (rx-regexp 1 1 stringp)) + (regex . regexp) ; sregex (digit . "[[:digit:]]") (numeric . digit) ; SRE (num . digit) ; SRE @@ -295,15 +296,27 @@ regular expression strings.") `zero-or-more', and `one-or-more'. Dynamically bound.") -(defun rx-info (op) +(defun rx-info (op head) "Return parsing/code generation info for OP. If OP is the space character ASCII 32, return info for the symbol `?'. If OP is the character `?', return info for the symbol `??'. -See also `rx-constituents'." +See also `rx-constituents'. +If HEAD is non-nil, then OP is the head of a sexp, otherwise it's +a standalone symbol." (cond ((eq op ? ) (setq op '\?)) ((eq op ??) (setq op '\??))) - (while (and (not (null op)) (symbolp op)) - (setq op (cdr (assq op rx-constituents)))) + (let (old-op) + (while (and (not (null op)) (symbolp op)) + (setq old-op op) + (setq op (cdr (assq op rx-constituents))) + (when (if head (stringp op) (consp op)) + ;; We found something but of the wrong kind. Let's look for an + ;; alternate definition for the other case. + (let ((new-op + (cdr (assq old-op (cdr (memq (assq old-op rx-constituents) + rx-constituents)))))) + (if (and new-op (not (if head (stringp new-op) (consp new-op)))) + (setq op new-op)))))) op) @@ -311,7 +324,7 @@ See also `rx-constituents'." "Check FORM according to its car's parsing info." (unless (listp form) (error "rx `%s' needs argument(s)" form)) - (let* ((rx (rx-info (car form))) + (let* ((rx (rx-info (car form) 'head)) (nargs (1- (length form))) (min-args (nth 1 rx)) (max-args (nth 2 rx)) @@ -381,7 +394,7 @@ FORM is of the form `(and FORM1 ...)'." (defun rx-anything (form) "Match any character." (if (consp form) - (error "rx `anythng' syntax error: %s" form)) + (error "rx `anything' syntax error: %s" form)) (rx-or (list 'or 'not-newline ?\n))) @@ -401,7 +414,7 @@ Only both edges of each range is checked." (setcdr m (1- char))))) ranges)) - + (defun rx-any-condense-range (args) "Condense by side effect ARGS as range for Rx `any'." (let (str @@ -427,7 +440,7 @@ Only both edges of each range is checked." (mapcar (lambda (e) (cond ((= (car e) (cdr e)) (list (car e))) - ;; ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) + ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) ((list e)))) l)) (delete-dups str)))) @@ -545,7 +558,10 @@ ARG is optional." ((numberp e) (string e)) ((consp e) (if (and (= (1+ (car e)) (cdr e)) - (null (memq (car e) '(?\] ?-)))) + ;; rx-any-condense-range should + ;; prevent this case from happening. + (null (memq (car e) '(?\] ?-))) + (null (memq (cdr e) '(?\] ?-)))) (string (car e) (cdr e)) (string (car e) ?- (cdr e)))) (e))) @@ -561,7 +577,7 @@ ARG is optional." (condition-case nil (rx-form arg) (error "")))) - (eq arg 'word-boundary) + (eq arg 'word-boundary) (and (consp arg) (memq (car arg) '(not any in syntax category)))) (error "rx `not' syntax error: %s" arg)) @@ -640,14 +656,17 @@ If SKIP is non-nil, allow that number of items after the head, i.e. (defun rx-** (form) "Parse and produce code from FORM `(** N M ...)'." (rx-check form) - (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) - (rx-form form '*)) + (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*)) (defun rx-repeat (form) "Parse and produce code from FORM. -FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." +FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." (rx-check form) + (if (> (length form) 4) + (setq form (rx-trans-forms form 2))) + (if (null (nth 2 form)) + (setq form (cons (nth 0 form) (cons (nth 1 form) (nthcdr 3 form))))) (cond ((= (length form) 3) (unless (and (integerp (nth 1 form)) (> (nth 1 form) 0)) @@ -674,6 +693,16 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil)) "\\)")) +(defun rx-submatch-n (form) + "Parse and produce code from FORM, which is `(submatch-n N ...)'." + (let ((n (nth 1 form))) + (concat "\\(?" (number-to-string n) ":" + (if (= 3 (length form)) + ;; Only one sub-form. + (rx-form (nth 2 form)) + ;; Several sub-forms implicitly concatenated. + (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil)) + "\\)"))) (defun rx-backref (form) "Parse and produce code from FORM, which is `(backref N)'." @@ -746,15 +775,18 @@ of all atomic regexps." "Parse and produce code from FORM, which is `(syntax SYMBOL)'." (rx-check form) (let* ((sym (cadr form)) - (syntax (assq sym rx-syntax))) + (syntax (cdr (assq sym rx-syntax)))) (unless syntax ;; Try sregex compatibility. - (let ((name (symbol-name sym))) - (if (= 1 (length name)) - (setq syntax (rassq (aref name 0) rx-syntax)))) + (cond + ((characterp sym) (setq syntax sym)) + ((symbolp sym) + (let ((name (symbol-name sym))) + (if (= 1 (length name)) + (setq syntax (aref name 0)))))) (unless syntax - (error "Unknown rx syntax `%s'" (cadr form)))) - (format "\\s%c" (cdr syntax)))) + (error "Unknown rx syntax `%s'" sym))) + (format "\\s%c" syntax))) (defun rx-check-category (form) @@ -801,27 +833,28 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*', FORM is a regular expression in sexp form. RX-PARENT shows which type of expression calls and controls putting of shy groups around the result and some more in other functions." - (if (stringp form) - (rx-group-if (regexp-quote form) - (if (and (eq rx-parent '*) (< 1 (length form))) - rx-parent)) - (cond ((integerp form) - (regexp-quote (char-to-string form))) - ((symbolp form) - (let ((info (rx-info form))) - (cond ((stringp info) - info) - ((null info) - (error "Unknown rx form `%s'" form)) - (t - (funcall (nth 0 info) form))))) - ((consp form) - (let ((info (rx-info (car form)))) - (unless (consp info) - (error "Unknown rx form `%s'" (car form))) - (funcall (nth 0 info) form))) - (t - (error "rx syntax error at `%s'" form))))) + (cond + ((stringp form) + (rx-group-if (regexp-quote form) + (if (and (eq rx-parent '*) (< 1 (length form))) + rx-parent))) + ((integerp form) + (regexp-quote (char-to-string form))) + ((symbolp form) + (let ((info (rx-info form nil))) + (cond ((stringp info) + info) + ((null info) + (error "Unknown rx form `%s'" form)) + (t + (funcall (nth 0 info) form))))) + ((consp form) + (let ((info (rx-info (car form) 'head))) + (unless (consp info) + (error "Unknown rx form `%s'" (car form))) + (funcall (nth 0 info) form))) + (t + (error "rx syntax error at `%s'" form)))) ;;;###autoload @@ -1053,6 +1086,11 @@ CHAR like `and', but makes the match accessible with `match-end', `match-beginning', and `match-string'. +`(submatch-n N SEXP1 SEXP2 ...)' +`(group-n N SEXP1 SEXP2 ...)' + like `group', but make it an explicitly-numbered group with + group number N. + `(or SEXP1 SEXP2 ...)' `(| SEXP1 SEXP2 ...)' matches anything that matches SEXP1 or SEXP2, etc. If all @@ -1141,5 +1179,4 @@ enclosed in `(and ...)'. (provide 'rx) -;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b ;;; rx.el ends here