* tests/syntax.test: Added test cases for 'cond =>' syntax with
[bpt/guile.git] / ice-9 / regex.scm
CommitLineData
50ff2ecb 1;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
400d7382
JB
2;;;;
3;;;; This program is free software; you can redistribute it and/or modify
4;;;; it under the terms of the GNU General Public License as published by
5;;;; the Free Software Foundation; either version 2, or (at your option)
6;;;; any later version.
7;;;;
8;;;; This program is distributed in the hope that it will be useful,
9;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11;;;; GNU General Public License for more details.
12;;;;
13;;;; You should have received a copy of the GNU General Public License
14;;;; along with this software; see the file COPYING. If not, write to
c6e23ea2
JB
15;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16;;;; Boston, MA 02111-1307 USA
a482f2cc
MV
17;;;;
18;;;; As a special exception, the Free Software Foundation gives permission
19;;;; for additional uses of the text contained in its release of GUILE.
20;;;;
21;;;; The exception is that, if you link the GUILE library with other files
22;;;; to produce an executable, this does not by itself cause the
23;;;; resulting executable to be covered by the GNU General Public License.
24;;;; Your use of that executable is in no way restricted on account of
25;;;; linking the GUILE library code into it.
26;;;;
27;;;; This exception does not however invalidate any other reasons why
28;;;; the executable file might be covered by the GNU General Public License.
29;;;;
30;;;; This exception applies only to the code released by the
31;;;; Free Software Foundation under the name GUILE. If you copy
32;;;; code from other Free Software Foundation releases into a copy of
33;;;; GUILE, as the General Public License permits, the exception does
34;;;; not apply to the code that you add in this way. To avoid misleading
35;;;; anyone as to the status of such modified files, you must delete
36;;;; this exception notice from them.
37;;;;
38;;;; If you write modifications of your own for GUILE, it is your choice
39;;;; whether to permit this exception to apply to your modifications.
40;;;; If you do not wish that, delete this exception notice.
400d7382
JB
41;;;;
42\f
43;;;; POSIX regex support functions.
44
05817d9e
JB
45(define-module (ice-9 regex))
46
400d7382
JB
47;;; FIXME:
48;;; It is not clear what should happen if a `match' function
49;;; is passed a `match number' which is out of bounds for the
50;;; regexp match: return #f, or throw an error? These routines
51;;; throw an out-of-range error.
52
53;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54;;;; These procedures are not defined in SCSH, but I found them useful.
55
05817d9e 56(define-public (match:count match)
400d7382
JB
57 (- (vector-length match) 1))
58
05817d9e 59(define-public (match:string match)
400d7382
JB
60 (vector-ref match 0))
61
05817d9e 62(define-public (match:prefix match)
4e15fee8 63 (substring (match:string match) 0 (match:start match 0)))
400d7382 64
05817d9e 65(define-public (match:suffix match)
4e15fee8 66 (substring (match:string match) (match:end match 0)))
400d7382
JB
67
68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69;;;; SCSH compatibility routines.
70
05817d9e 71(define-public (regexp-match? match)
400d7382
JB
72 (and (vector? match)
73 (string? (vector-ref match 0))
74 (let loop ((i 1))
75 (cond ((>= i (vector-length match)) #t)
76 ((and (pair? (vector-ref match i))
77 (integer? (car (vector-ref match i)))
78 (integer? (cdr (vector-ref match i))))
79 (loop (+ 1 i)))
80 (else #f)))))
81
05817d9e 82(define-public (regexp-quote regexp)
400d7382
JB
83 (call-with-output-string
84 (lambda (p)
85 (let loop ((i 0))
86 (and (< i (string-length regexp))
87 (begin
88 (case (string-ref regexp i)
89 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
90 (write-char #\\ p)))
91 (write-char (string-ref regexp i) p)
92 (loop (1+ i))))))))
93
05817d9e 94(define-public (match:start match . args)
400d7382
JB
95 (let* ((matchnum (if (pair? args)
96 (+ 1 (car args))
97 1))
98 (start (car (vector-ref match matchnum))))
99 (if (= start -1) #f start)))
100
05817d9e 101(define-public (match:end match . args)
400d7382
JB
102 (let* ((matchnum (if (pair? args)
103 (+ 1 (car args))
104 1))
105 (end (cdr (vector-ref match matchnum))))
106 (if (= end -1) #f end)))
107
05817d9e 108(define-public (match:substring match . args)
400d7382
JB
109 (let* ((matchnum (if (pair? args)
110 (car args)
111 0))
112 (start (match:start match matchnum))
113 (end (match:end match matchnum)))
4e15fee8 114 (and start end (substring (match:string match) start end))))
400d7382 115
05817d9e 116(define-public (string-match pattern str . args)
400d7382
JB
117 (let ((rx (make-regexp pattern))
118 (start (if (pair? args) (car args) 0)))
119 (regexp-exec rx str start)))
120
05817d9e 121(define-public (regexp-substitute port match . items)
400d7382
JB
122 ;; If `port' is #f, send output to a string.
123 (if (not port)
124 (call-with-output-string
125 (lambda (p)
126 (apply regexp-substitute p match items)))
127
128 ;; Otherwise, process each substitution argument in `items'.
129 (for-each (lambda (obj)
130 (cond ((string? obj) (display obj port))
131 ((integer? obj) (display (match:substring match obj) port))
132 ((eq? 'pre obj) (display (match:prefix match) port))
133 ((eq? 'post obj) (display (match:suffix match) port))
134 (else (error 'wrong-type-arg obj))))
135 items)))
136
50ff2ecb
JB
137;;; If we call fold-matches, below, with a regexp that can match the
138;;; empty string, it's not obvious what "all the matches" means. How
139;;; many empty strings are there in the string "a"? Our answer:
140;;;
141;;; This function applies PROC to every non-overlapping, maximal
142;;; match of REGEXP in STRING.
143;;;
144;;; "non-overlapping": There are two non-overlapping matches of "" in
145;;; "a" --- one before the `a', and one after. There are three
146;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
147;;; before `a' and after `b', and `q'. The two empty strings before
148;;; and after `q' don't count, because they overlap with the match of
149;;; "q".
150;;;
151;;; "maximal": There are three distinct maximal matches of "x*" in
152;;; "axxxb": one before the `a', one covering `xxx', and one after the
153;;; `b'. Around or within `xxx', only the match covering all three
154;;; x's counts, because the rest are not maximal.
155
156(define-public (fold-matches regexp string init proc . flags)
157 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
158 (flags (if (null? flags) 0 flags)))
159 (let loop ((start 0)
160 (value init)
161 (abuts #f)) ; True if start abuts a previous match.
162 (let ((m (if (> start (string-length string)) #f
163 (regexp-exec regexp string start flags))))
164 (cond
165 ((not m) value)
166 ((and (= (match:start m) (match:end m)) abuts)
167 ;; We matched an empty string, but that would overlap the
168 ;; match immediately before. Try again at a position
169 ;; further to the right.
170 (loop (+ start 1) value #f))
171 (else
172 (loop (match:end m) (proc m value) #t)))))))
173
174(define-public (list-matches regexp string . flags)
175 (reverse! (apply fold-matches regexp string '() cons flags)))
176
05817d9e 177(define-public (regexp-substitute/global port regexp string . items)
50ff2ecb 178
400d7382
JB
179 ;; If `port' is #f, send output to a string.
180 (if (not port)
181 (call-with-output-string
182 (lambda (p)
183 (apply regexp-substitute/global p regexp string items)))
184
50ff2ecb
JB
185 ;; Walk the set of non-overlapping, maximal matches.
186 (let next-match ((matches (list-matches regexp string))
187 (start 0))
f5641401 188 (if (null? matches)
4e15fee8 189 (display (substring string start) port)
50ff2ecb
JB
190 (let ((m (car matches)))
191
192 ;; Process all of the items for this match. Don't use
193 ;; for-each, because we need to make sure 'post at the
194 ;; end of the item list is a tail call.
195 (let next-item ((items items))
196
197 (define (do-item item)
198 (cond
199 ((string? item) (display item port))
200 ((integer? item) (display (match:substring m item) port))
201 ((procedure? item) (display (item m) port))
202 ((eq? item 'pre)
203 (display
4e15fee8 204 (substring string start (match:start m))
50ff2ecb
JB
205 port))
206 ((eq? item 'post)
f5641401 207 (next-match (cdr matches) (match:end m)))
50ff2ecb
JB
208 (else (error 'wrong-type-arg item))))
209
210 (if (pair? items)
211 (if (null? (cdr items))
212 (do-item (car items)) ; This is a tail call.
213 (begin
214 (do-item (car items)) ; This is not.
215 (next-item (cdr items)))))))))))