*** empty log message ***
[bpt/guile.git] / ice-9 / regex.scm
CommitLineData
1a179b03 1;;;; Copyright (C) 1997, 1999, 2001 Free Software Foundation, Inc.
87fefc1c 2;;;;
73be1d9e
MV
3;;;; This library is free software; you can redistribute it and/or
4;;;; modify it under the terms of the GNU Lesser General Public
5;;;; License as published by the Free Software Foundation; either
6;;;; version 2.1 of the License, or (at your option) any later version.
7;;;;
8;;;; This library is distributed in the hope that it will be useful,
400d7382 9;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11;;;; Lesser General Public License for more details.
12;;;;
13;;;; You should have received a copy of the GNU Lesser General Public
14;;;; License along with this library; if not, write to the Free Software
15;;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
87fefc1c
TTN
16;;;;
17
18;;; Commentary:
19
20;; These procedures are exported:
21;; (match:count match)
22;; (match:string match)
23;; (match:prefix match)
24;; (match:suffix match)
25;; (regexp-match? match)
26;; (regexp-quote string)
27;; (match:start match . submatch-num)
28;; (match:end match . submatch-num)
29;; (match:substring match . submatch-num)
30;; (string-match pattern str . start)
31;; (regexp-substitute port match . items)
32;; (fold-matches regexp string init proc . flags)
33;; (list-matches regexp string . flags)
34;; (regexp-substitute/global port regexp string . items)
35
36;;; Code:
400d7382
JB
37\f
38;;;; POSIX regex support functions.
39
1a179b03
MD
40(define-module (ice-9 regex)
41 :export (match:count match:string match:prefix match:suffix
42 regexp-match? regexp-quote match:start match:end match:substring
43 string-match regexp-substitute fold-matches list-matches
44 regexp-substitute/global))
05817d9e 45
400d7382
JB
46;;; FIXME:
47;;; It is not clear what should happen if a `match' function
48;;; is passed a `match number' which is out of bounds for the
49;;; regexp match: return #f, or throw an error? These routines
50;;; throw an out-of-range error.
51
52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
53;;;; These procedures are not defined in SCSH, but I found them useful.
54
1a179b03 55(define (match:count match)
400d7382
JB
56 (- (vector-length match) 1))
57
1a179b03 58(define (match:string match)
400d7382
JB
59 (vector-ref match 0))
60
1a179b03 61(define (match:prefix match)
4e15fee8 62 (substring (match:string match) 0 (match:start match 0)))
400d7382 63
1a179b03 64(define (match:suffix match)
4e15fee8 65 (substring (match:string match) (match:end match 0)))
400d7382
JB
66
67;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
68;;;; SCSH compatibility routines.
69
1a179b03 70(define (regexp-match? match)
400d7382
JB
71 (and (vector? match)
72 (string? (vector-ref match 0))
73 (let loop ((i 1))
74 (cond ((>= i (vector-length match)) #t)
75 ((and (pair? (vector-ref match i))
76 (integer? (car (vector-ref match i)))
77 (integer? (cdr (vector-ref match i))))
78 (loop (+ 1 i)))
79 (else #f)))))
80
87fefc1c 81(define (regexp-quote string)
400d7382
JB
82 (call-with-output-string
83 (lambda (p)
84 (let loop ((i 0))
87fefc1c 85 (and (< i (string-length string))
400d7382 86 (begin
87fefc1c 87 (case (string-ref string i)
400d7382
JB
88 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
89 (write-char #\\ p)))
87fefc1c 90 (write-char (string-ref string i) p)
400d7382
JB
91 (loop (1+ i))))))))
92
1a179b03 93(define (match:start match . args)
400d7382
JB
94 (let* ((matchnum (if (pair? args)
95 (+ 1 (car args))
96 1))
97 (start (car (vector-ref match matchnum))))
98 (if (= start -1) #f start)))
99
1a179b03 100(define (match:end match . args)
400d7382
JB
101 (let* ((matchnum (if (pair? args)
102 (+ 1 (car args))
103 1))
104 (end (cdr (vector-ref match matchnum))))
105 (if (= end -1) #f end)))
106
1a179b03 107(define (match:substring match . args)
400d7382
JB
108 (let* ((matchnum (if (pair? args)
109 (car args)
110 0))
111 (start (match:start match matchnum))
112 (end (match:end match matchnum)))
4e15fee8 113 (and start end (substring (match:string match) start end))))
400d7382 114
1a179b03 115(define (string-match pattern str . args)
400d7382
JB
116 (let ((rx (make-regexp pattern))
117 (start (if (pair? args) (car args) 0)))
118 (regexp-exec rx str start)))
119
1a179b03 120(define (regexp-substitute port match . items)
400d7382
JB
121 ;; If `port' is #f, send output to a string.
122 (if (not port)
123 (call-with-output-string
124 (lambda (p)
125 (apply regexp-substitute p match items)))
126
127 ;; Otherwise, process each substitution argument in `items'.
128 (for-each (lambda (obj)
129 (cond ((string? obj) (display obj port))
130 ((integer? obj) (display (match:substring match obj) port))
131 ((eq? 'pre obj) (display (match:prefix match) port))
132 ((eq? 'post obj) (display (match:suffix match) port))
133 (else (error 'wrong-type-arg obj))))
134 items)))
135
50ff2ecb
JB
136;;; If we call fold-matches, below, with a regexp that can match the
137;;; empty string, it's not obvious what "all the matches" means. How
138;;; many empty strings are there in the string "a"? Our answer:
139;;;
140;;; This function applies PROC to every non-overlapping, maximal
141;;; match of REGEXP in STRING.
142;;;
143;;; "non-overlapping": There are two non-overlapping matches of "" in
144;;; "a" --- one before the `a', and one after. There are three
145;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
146;;; before `a' and after `b', and `q'. The two empty strings before
147;;; and after `q' don't count, because they overlap with the match of
148;;; "q".
149;;;
150;;; "maximal": There are three distinct maximal matches of "x*" in
151;;; "axxxb": one before the `a', one covering `xxx', and one after the
152;;; `b'. Around or within `xxx', only the match covering all three
153;;; x's counts, because the rest are not maximal.
154
1a179b03 155(define (fold-matches regexp string init proc . flags)
50ff2ecb
JB
156 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
157 (flags (if (null? flags) 0 flags)))
158 (let loop ((start 0)
159 (value init)
160 (abuts #f)) ; True if start abuts a previous match.
161 (let ((m (if (> start (string-length string)) #f
162 (regexp-exec regexp string start flags))))
163 (cond
164 ((not m) value)
165 ((and (= (match:start m) (match:end m)) abuts)
166 ;; We matched an empty string, but that would overlap the
167 ;; match immediately before. Try again at a position
168 ;; further to the right.
169 (loop (+ start 1) value #f))
170 (else
171 (loop (match:end m) (proc m value) #t)))))))
172
1a179b03 173(define (list-matches regexp string . flags)
50ff2ecb
JB
174 (reverse! (apply fold-matches regexp string '() cons flags)))
175
1a179b03 176(define (regexp-substitute/global port regexp string . items)
50ff2ecb 177
400d7382
JB
178 ;; If `port' is #f, send output to a string.
179 (if (not port)
180 (call-with-output-string
181 (lambda (p)
182 (apply regexp-substitute/global p regexp string items)))
183
50ff2ecb
JB
184 ;; Walk the set of non-overlapping, maximal matches.
185 (let next-match ((matches (list-matches regexp string))
186 (start 0))
f5641401 187 (if (null? matches)
4e15fee8 188 (display (substring string start) port)
50ff2ecb
JB
189 (let ((m (car matches)))
190
191 ;; Process all of the items for this match. Don't use
192 ;; for-each, because we need to make sure 'post at the
193 ;; end of the item list is a tail call.
194 (let next-item ((items items))
87fefc1c 195
50ff2ecb
JB
196 (define (do-item item)
197 (cond
198 ((string? item) (display item port))
199 ((integer? item) (display (match:substring m item) port))
200 ((procedure? item) (display (item m) port))
87fefc1c 201 ((eq? item 'pre)
50ff2ecb 202 (display
4e15fee8 203 (substring string start (match:start m))
50ff2ecb
JB
204 port))
205 ((eq? item 'post)
f5641401 206 (next-match (cdr matches) (match:end m)))
50ff2ecb
JB
207 (else (error 'wrong-type-arg item))))
208
209 (if (pair? items)
210 (if (null? (cdr items))
211 (do-item (car items)) ; This is a tail call.
212 (begin
213 (do-item (car items)) ; This is not.
214 (next-item (cdr items)))))))))))