-;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 2.1 of the License, or (at your option) any later version.
;;;;
-;;;; This program is free software; you can redistribute it and/or modify
-;;;; it under the terms of the GNU General Public License as published by
-;;;; the Free Software Foundation; either version 2, or (at your option)
-;;;; any later version.
-;;;;
-;;;; This program is distributed in the hope that it will be useful,
+;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;;;; GNU General Public License for more details.
-;;;;
-;;;; You should have received a copy of the GNU General Public License
-;;;; along with this software; see the file COPYING. If not, write to
-;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
-;;;; Boston, MA 02111-1307 USA
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;;; Lesser General Public License for more details.
;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+;;; Commentary:
+
+;; These procedures are exported:
+;; (match:count match)
+;; (match:string match)
+;; (match:prefix match)
+;; (match:suffix match)
+;; (regexp-match? match)
+;; (regexp-quote string)
+;; (match:start match . submatch-num)
+;; (match:end match . submatch-num)
+;; (match:substring match . submatch-num)
+;; (string-match pattern str . start)
+;; (regexp-substitute port match . items)
+;; (fold-matches regexp string init proc . flags)
+;; (list-matches regexp string . flags)
+;; (regexp-substitute/global port regexp string . items)
+
+;;; Code:
\f
;;;; POSIX regex support functions.
-(define-module (ice-9 regex))
+(define-module (ice-9 regex)
+ :export (match:count match:string match:prefix match:suffix
+ regexp-match? regexp-quote match:start match:end match:substring
+ string-match regexp-substitute fold-matches list-matches
+ regexp-substitute/global))
+
+;; References:
+;;
+;; POSIX spec:
+;; http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
;;; FIXME:
;;; It is not clear what should happen if a `match' function
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; These procedures are not defined in SCSH, but I found them useful.
-(define-public (match:count match)
+(define (match:count match)
(- (vector-length match) 1))
-(define-public (match:string match)
+(define (match:string match)
(vector-ref match 0))
-(define-public (match:prefix match)
- (make-shared-substring (match:string match)
- 0
- (match:start match 0)))
+(define (match:prefix match)
+ (substring (match:string match) 0 (match:start match 0)))
-(define-public (match:suffix match)
- (make-shared-substring (match:string match)
- (match:end match 0)))
+(define (match:suffix match)
+ (substring (match:string match) (match:end match 0)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; SCSH compatibility routines.
-(define-public (regexp-match? match)
+(define (regexp-match? match)
(and (vector? match)
(string? (vector-ref match 0))
(let loop ((i 1))
(loop (+ 1 i)))
(else #f)))))
-(define-public (regexp-quote regexp)
+;; * . \ ^ $ and [ are special in both regexp/basic and regexp/extended and
+;; can be backslash escaped.
+;;
+;; ( ) + ? { } and | are special in regexp/extended so must be quoted. But
+;; that can't be done with a backslash since in regexp/basic where they're
+;; not special, adding a backslash makes them become special. Character
+;; class forms [(] etc are used instead.
+;;
+;; ) is not special when not preceded by a (, and * and ? are not special at
+;; the start of a string, but we quote all of these always, so the result
+;; can be concatenated or merged into some larger regexp.
+;;
+;; ] is not special outside a [ ] character class, so doesn't need to be
+;; quoted.
+;;
+(define (regexp-quote string)
(call-with-output-string
(lambda (p)
- (let loop ((i 0))
- (and (< i (string-length regexp))
- (begin
- (case (string-ref regexp i)
- ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
- (write-char #\\ p)))
- (write-char (string-ref regexp i) p)
- (loop (1+ i))))))))
-
-(define-public (match:start match . args)
+ (string-for-each (lambda (c)
+ (case c
+ ((#\* #\. #\\ #\^ #\$ #\[)
+ (write-char #\\ p)
+ (write-char c p))
+ ((#\( #\) #\+ #\? #\{ #\} #\|)
+ (write-char #\[ p)
+ (write-char c p)
+ (write-char #\] p))
+ (else
+ (write-char c p))))
+ string))))
+
+(define (match:start match . args)
(let* ((matchnum (if (pair? args)
(+ 1 (car args))
1))
(start (car (vector-ref match matchnum))))
(if (= start -1) #f start)))
-(define-public (match:end match . args)
+(define (match:end match . args)
(let* ((matchnum (if (pair? args)
(+ 1 (car args))
1))
(end (cdr (vector-ref match matchnum))))
(if (= end -1) #f end)))
-(define-public (match:substring match . args)
+(define (match:substring match . args)
(let* ((matchnum (if (pair? args)
(car args)
0))
(start (match:start match matchnum))
(end (match:end match matchnum)))
- (and start end (make-shared-substring (match:string match)
- start
- end))))
+ (and start end (substring (match:string match) start end))))
-(define-public (string-match pattern str . args)
+(define (string-match pattern str . args)
(let ((rx (make-regexp pattern))
(start (if (pair? args) (car args) 0)))
(regexp-exec rx str start)))
-(define-public (regexp-substitute port match . items)
+(define (regexp-substitute port match . items)
;; If `port' is #f, send output to a string.
(if (not port)
(call-with-output-string
;;; `b'. Around or within `xxx', only the match covering all three
;;; x's counts, because the rest are not maximal.
-(define-public (fold-matches regexp string init proc . flags)
+(define (fold-matches regexp string init proc . flags)
(let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
(flags (if (null? flags) 0 flags)))
(let loop ((start 0)
(else
(loop (match:end m) (proc m value) #t)))))))
-(define-public (list-matches regexp string . flags)
+(define (list-matches regexp string . flags)
(reverse! (apply fold-matches regexp string '() cons flags)))
-(define-public (regexp-substitute/global port regexp string . items)
+(define (regexp-substitute/global port regexp string . items)
;; If `port' is #f, send output to a string.
(if (not port)
;; Walk the set of non-overlapping, maximal matches.
(let next-match ((matches (list-matches regexp string))
(start 0))
- (if (pair? matches)
+ (if (null? matches)
+ (display (substring string start) port)
(let ((m (car matches)))
;; Process all of the items for this match. Don't use
;; for-each, because we need to make sure 'post at the
;; end of the item list is a tail call.
(let next-item ((items items))
-
+
(define (do-item item)
(cond
((string? item) (display item port))
((integer? item) (display (match:substring m item) port))
((procedure? item) (display (item m) port))
- ((eq? item 'pre)
+ ((eq? item 'pre)
(display
- (make-shared-substring string start (match:start m))
+ (substring string start (match:start m))
port))
((eq? item 'post)
- (if (pair? (cdr matches))
- (next-match (cdr matches) (match:end m))
- (display
- (make-shared-substring string (match:end m))
- port)))
+ (next-match (cdr matches) (match:end m)))
(else (error 'wrong-type-arg item))))
(if (pair? items)