;;; regexp-opt.el --- generate efficient regexps to match strings
-;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-;; 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2012 Free Software Foundation, Inc.
;; Author: Simon Marshall <simon@gnu.org>
;; Maintainer: FSF
(concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close))
If PAREN is `words', then the resulting regexp is additionally surrounded
-by \\=\\< and \\>."
+by \\=\\< and \\>.
+If PAREN is `symbols', then the resulting regexp is additionally surrounded
+by \\=\\_< and \\_>."
(save-match-data
;; Recurse on the sorted list.
(let* ((max-lisp-eval-depth 10000)
(max-specpdl-size 10000)
(completion-ignore-case nil)
(completion-regexp-list nil)
- (words (eq paren 'words))
(open (cond ((stringp paren) paren) (paren "\\(")))
(sorted-strings (delete-dups
(sort (copy-sequence strings) 'string-lessp)))
(re (regexp-opt-group sorted-strings (or open t) (not open))))
- (if words (concat "\\<" re "\\>") re))))
+ (cond ((eq paren 'words)
+ (concat "\\<" re "\\>"))
+ ((eq paren 'symbols)
+ (concat "\\_<" re "\\_>"))
+ (t re)))))
;;;###autoload
(defun regexp-opt-depth (regexp)
(string-match regexp "")
;; Count the number of open parentheses in REGEXP.
(let ((count 0) start last)
- (while (string-match "\\\\(\\(\\?:\\)?" regexp start)
+ (while (string-match "\\\\(\\(\\?[0-9]*:\\)?" regexp start)
(setq start (match-end 0)) ; Start of next search.
(when (and (not (match-beginning 1))
(subregexp-context-p regexp (match-beginning 0) last))
\f
;;; Workhorse functions.
-(eval-when-compile
- (require 'cl))
-
(defun regexp-opt-group (strings &optional paren lax)
"Return a regexp to match a string in the sorted list STRINGS.
If PAREN non-nil, output regexp parentheses around returned regexp.
If LAX non-nil, don't output parentheses if it doesn't require them.
-Merges keywords to avoid backtracking in Emacs' regexp matcher."
+Merges keywords to avoid backtracking in Emacs's regexp matcher."
;; The basic idea is to find the shortest common prefix or suffix, remove it
;; and recurse. If there is no prefix, we divide the list into two so that
;; \(at least) one half will have at least a one-character common prefix.
(defun regexp-opt-charset (chars)
- "Return a regexp to match a character in CHARS."
+ "Return a regexp to match a character in CHARS.
+CHARS should be a list of characters."
;; The basic idea is to find character ranges. Also we take care in the
;; position of character set meta characters in the character set regexp.
;;
;;
;; Make a character map but extract character set meta characters.
(dolist (char chars)
- (case char
- (?\]
- (setq bracket "]"))
- (?^
- (setq caret "^"))
- (?-
- (setq dash "-"))
- (otherwise
- (aset charmap char t))))
+ (cond
+ ((eq char ?\])
+ (setq bracket "]"))
+ ((eq char ?^)
+ (setq caret "^"))
+ ((eq char ?-)
+ (setq dash "-"))
+ (t
+ (aset charmap char t))))
;;
;; Make a character set from the map using ranges where applicable.
(map-char-table
(setq charset (format "%s%c-%c" charset start end))
(while (>= end start)
(setq charset (format "%s%c" charset start))
- (incf start)))
+ (setq start (1+ start))))
(setq start (car c) end (cdr c)))
(if (= (1- c) end) (setq end c)
(if (> end (+ start 2))
(setq charset (format "%s%c-%c" charset start end))
(while (>= end start)
(setq charset (format "%s%c" charset start))
- (incf start)))
+ (setq start (1+ start))))
(setq start c end c)))))
charmap)
(when (>= end start)
(setq charset (format "%s%c-%c" charset start end))
(while (>= end start)
(setq charset (format "%s%c" charset start))
- (incf start))))
+ (setq start (1+ start)))))
;;
;; Make sure a caret is not first and a dash is first or last.
(if (and (string-equal charset "") (string-equal bracket ""))
(provide 'regexp-opt)
-;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370
;;; regexp-opt.el ends here