;;; regexp-opt.el --- generate efficient regexps to match strings
-;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-;; 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2011 Free Software Foundation, Inc.
;; Author: Simon Marshall <simon@gnu.org>
;; Maintainer: FSF
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
(concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close))
If PAREN is `words', then the resulting regexp is additionally surrounded
-by \\=\\< and \\>."
+by \\=\\< and \\>.
+If PAREN is `symbols', then the resulting regexp is additionally surrounded
+by \\=\\_< and \\_>."
(save-match-data
;; Recurse on the sorted list.
- (let* ((max-lisp-eval-depth (* 1024 1024))
- (max-specpdl-size (* 1024 1024))
+ (let* ((max-lisp-eval-depth 10000)
+ (max-specpdl-size 10000)
(completion-ignore-case nil)
(completion-regexp-list nil)
- (words (eq paren 'words))
(open (cond ((stringp paren) paren) (paren "\\(")))
(sorted-strings (delete-dups
(sort (copy-sequence strings) 'string-lessp)))
- (re (regexp-opt-group sorted-strings open)))
- (if words (concat "\\<" re "\\>") re))))
+ (re (regexp-opt-group sorted-strings (or open t) (not open))))
+ (cond ((eq paren 'words)
+ (concat "\\<" re "\\>"))
+ ((eq paren 'symbols)
+ (concat "\\_<" re "\\_>"))
+ (t re)))))
;;;###autoload
(defun regexp-opt-depth (regexp)
(string-match regexp "")
;; Count the number of open parentheses in REGEXP.
(let ((count 0) start last)
- (while (string-match "\\\\(\\(\\?:\\)?" regexp start)
+ (while (string-match "\\\\(\\(\\?[0-9]*:\\)?" regexp start)
(setq start (match-end 0)) ; Start of next search.
(when (and (not (match-beginning 1))
(subregexp-context-p regexp (match-beginning 0) last))
(require 'cl))
(defun regexp-opt-group (strings &optional paren lax)
- ;; Return a regexp to match a string in the sorted list STRINGS.
- ;; If PAREN non-nil, output regexp parentheses around returned regexp.
- ;; If LAX non-nil, don't output parentheses if it doesn't require them.
- ;; Merges keywords to avoid backtracking in Emacs' regexp matcher.
-
+ "Return a regexp to match a string in the sorted list STRINGS.
+If PAREN non-nil, output regexp parentheses around returned regexp.
+If LAX non-nil, don't output parentheses if it doesn't require them.
+Merges keywords to avoid backtracking in Emacs' regexp matcher."
;; The basic idea is to find the shortest common prefix or suffix, remove it
;; and recurse. If there is no prefix, we divide the list into two so that
;; \(at least) one half will have at least a one-character common prefix.
;; Otherwise, divide the list into those that start with a
;; particular letter and those that do not, and recurse on them.
- (let* ((char (char-to-string (string-to-char (car strings))))
+ (let* ((char (substring-no-properties (car strings) 0 1))
(half1 (all-completions char strings))
(half2 (nthcdr (length half1) strings)))
(concat open-group
(defun regexp-opt-charset (chars)
- ;;
- ;; Return a regexp to match a character in CHARS.
- ;;
+ "Return a regexp to match a character in CHARS."
;; The basic idea is to find character ranges. Also we take care in the
;; position of character set meta characters in the character set regexp.
;;
(provide 'regexp-opt)
-;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370
;;; regexp-opt.el ends here