1 ;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
3 ;;;; This program is free software; you can redistribute it and/or modify
4 ;;;; it under the terms of the GNU General Public License as published by
5 ;;;; the Free Software Foundation; either version 2, or (at your option)
6 ;;;; any later version.
8 ;;;; This program is distributed in the hope that it will be useful,
9 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 ;;;; GNU General Public License for more details.
13 ;;;; You should have received a copy of the GNU General Public License
14 ;;;; along with this software; see the file COPYING. If not, write to
15 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 ;;;; Boston, MA 02111-1307 USA
19 ;;;; POSIX regex support functions.
21 (define-module (ice-9 regex))
24 ;;; It is not clear what should happen if a `match' function
25 ;;; is passed a `match number' which is out of bounds for the
26 ;;; regexp match: return #f, or throw an error? These routines
27 ;;; throw an out-of-range error.
29 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 ;;;; These procedures are not defined in SCSH, but I found them useful.
32 (define-public (match:count match)
33 (- (vector-length match) 1))
35 (define-public (match:string match)
38 (define-public (match:prefix match)
39 (make-shared-substring (match:string match)
41 (match:start match 0)))
43 (define-public (match:suffix match)
44 (make-shared-substring (match:string match)
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;;;; SCSH compatibility routines.
50 (define-public (regexp-match? match)
52 (string? (vector-ref match 0))
54 (cond ((>= i (vector-length match)) #t)
55 ((and (pair? (vector-ref match i))
56 (integer? (car (vector-ref match i)))
57 (integer? (cdr (vector-ref match i))))
61 (define-public (regexp-quote regexp)
62 (call-with-output-string
65 (and (< i (string-length regexp))
67 (case (string-ref regexp i)
68 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
70 (write-char (string-ref regexp i) p)
73 (define-public (match:start match . args)
74 (let* ((matchnum (if (pair? args)
77 (start (car (vector-ref match matchnum))))
78 (if (= start -1) #f start)))
80 (define-public (match:end match . args)
81 (let* ((matchnum (if (pair? args)
84 (end (cdr (vector-ref match matchnum))))
85 (if (= end -1) #f end)))
87 (define-public (match:substring match . args)
88 (let* ((matchnum (if (pair? args)
91 (start (match:start match matchnum))
92 (end (match:end match matchnum)))
93 (and start end (make-shared-substring (match:string match)
97 (define-public (string-match pattern str . args)
98 (let ((rx (make-regexp pattern))
99 (start (if (pair? args) (car args) 0)))
100 (regexp-exec rx str start)))
102 (define-public (regexp-substitute port match . items)
103 ;; If `port' is #f, send output to a string.
105 (call-with-output-string
107 (apply regexp-substitute p match items)))
109 ;; Otherwise, process each substitution argument in `items'.
110 (for-each (lambda (obj)
111 (cond ((string? obj) (display obj port))
112 ((integer? obj) (display (match:substring match obj) port))
113 ((eq? 'pre obj) (display (match:prefix match) port))
114 ((eq? 'post obj) (display (match:suffix match) port))
115 (else (error 'wrong-type-arg obj))))
118 ;;; If we call fold-matches, below, with a regexp that can match the
119 ;;; empty string, it's not obvious what "all the matches" means. How
120 ;;; many empty strings are there in the string "a"? Our answer:
122 ;;; This function applies PROC to every non-overlapping, maximal
123 ;;; match of REGEXP in STRING.
125 ;;; "non-overlapping": There are two non-overlapping matches of "" in
126 ;;; "a" --- one before the `a', and one after. There are three
127 ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
128 ;;; before `a' and after `b', and `q'. The two empty strings before
129 ;;; and after `q' don't count, because they overlap with the match of
132 ;;; "maximal": There are three distinct maximal matches of "x*" in
133 ;;; "axxxb": one before the `a', one covering `xxx', and one after the
134 ;;; `b'. Around or within `xxx', only the match covering all three
135 ;;; x's counts, because the rest are not maximal.
137 (define-public (fold-matches regexp string init proc . flags)
138 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
139 (flags (if (null? flags) 0 flags)))
142 (abuts #f)) ; True if start abuts a previous match.
143 (let ((m (if (> start (string-length string)) #f
144 (regexp-exec regexp string start flags))))
147 ((and (= (match:start m) (match:end m)) abuts)
148 ;; We matched an empty string, but that would overlap the
149 ;; match immediately before. Try again at a position
150 ;; further to the right.
151 (loop (+ start 1) value #f))
153 (loop (match:end m) (proc m value) #t)))))))
155 (define-public (list-matches regexp string . flags)
156 (reverse! (apply fold-matches regexp string '() cons flags)))
158 (define-public (regexp-substitute/global port regexp string . items)
160 ;; If `port' is #f, send output to a string.
162 (call-with-output-string
164 (apply regexp-substitute/global p regexp string items)))
166 ;; Walk the set of non-overlapping, maximal matches.
167 (let next-match ((matches (list-matches regexp string))
170 (display (make-shared-substring string start) port)
171 (let ((m (car matches)))
173 ;; Process all of the items for this match. Don't use
174 ;; for-each, because we need to make sure 'post at the
175 ;; end of the item list is a tail call.
176 (let next-item ((items items))
178 (define (do-item item)
180 ((string? item) (display item port))
181 ((integer? item) (display (match:substring m item) port))
182 ((procedure? item) (display (item m) port))
185 (make-shared-substring string start (match:start m))
188 (next-match (cdr matches) (match:end m)))
189 (else (error 'wrong-type-arg item))))
192 (if (null? (cdr items))
193 (do-item (car items)) ; This is a tail call.
195 (do-item (car items)) ; This is not.
196 (next-item (cdr items)))))))))))