*** empty log message ***
[bpt/guile.git] / ice-9 / regex.scm
CommitLineData
50ff2ecb 1;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
400d7382
JB
2;;;;
3;;;; This program is free software; you can redistribute it and/or modify
4;;;; it under the terms of the GNU General Public License as published by
5;;;; the Free Software Foundation; either version 2, or (at your option)
6;;;; any later version.
7;;;;
8;;;; This program is distributed in the hope that it will be useful,
9;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11;;;; GNU General Public License for more details.
12;;;;
13;;;; You should have received a copy of the GNU General Public License
14;;;; along with this software; see the file COPYING. If not, write to
c6e23ea2
JB
15;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16;;;; Boston, MA 02111-1307 USA
400d7382
JB
17;;;;
18\f
19;;;; POSIX regex support functions.
20
05817d9e
JB
21(define-module (ice-9 regex))
22
400d7382
JB
23;;; FIXME:
24;;; It is not clear what should happen if a `match' function
25;;; is passed a `match number' which is out of bounds for the
26;;; regexp match: return #f, or throw an error? These routines
27;;; throw an out-of-range error.
28
29;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30;;;; These procedures are not defined in SCSH, but I found them useful.
31
05817d9e 32(define-public (match:count match)
400d7382
JB
33 (- (vector-length match) 1))
34
05817d9e 35(define-public (match:string match)
400d7382
JB
36 (vector-ref match 0))
37
05817d9e 38(define-public (match:prefix match)
400d7382
JB
39 (make-shared-substring (match:string match)
40 0
41 (match:start match 0)))
42
05817d9e 43(define-public (match:suffix match)
400d7382
JB
44 (make-shared-substring (match:string match)
45 (match:end match 0)))
46
47;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48;;;; SCSH compatibility routines.
49
05817d9e 50(define-public (regexp-match? match)
400d7382
JB
51 (and (vector? match)
52 (string? (vector-ref match 0))
53 (let loop ((i 1))
54 (cond ((>= i (vector-length match)) #t)
55 ((and (pair? (vector-ref match i))
56 (integer? (car (vector-ref match i)))
57 (integer? (cdr (vector-ref match i))))
58 (loop (+ 1 i)))
59 (else #f)))))
60
05817d9e 61(define-public (regexp-quote regexp)
400d7382
JB
62 (call-with-output-string
63 (lambda (p)
64 (let loop ((i 0))
65 (and (< i (string-length regexp))
66 (begin
67 (case (string-ref regexp i)
68 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
69 (write-char #\\ p)))
70 (write-char (string-ref regexp i) p)
71 (loop (1+ i))))))))
72
05817d9e 73(define-public (match:start match . args)
400d7382
JB
74 (let* ((matchnum (if (pair? args)
75 (+ 1 (car args))
76 1))
77 (start (car (vector-ref match matchnum))))
78 (if (= start -1) #f start)))
79
05817d9e 80(define-public (match:end match . args)
400d7382
JB
81 (let* ((matchnum (if (pair? args)
82 (+ 1 (car args))
83 1))
84 (end (cdr (vector-ref match matchnum))))
85 (if (= end -1) #f end)))
86
05817d9e 87(define-public (match:substring match . args)
400d7382
JB
88 (let* ((matchnum (if (pair? args)
89 (car args)
90 0))
91 (start (match:start match matchnum))
92 (end (match:end match matchnum)))
93 (and start end (make-shared-substring (match:string match)
94 start
95 end))))
96
05817d9e 97(define-public (string-match pattern str . args)
400d7382
JB
98 (let ((rx (make-regexp pattern))
99 (start (if (pair? args) (car args) 0)))
100 (regexp-exec rx str start)))
101
05817d9e 102(define-public (regexp-substitute port match . items)
400d7382
JB
103 ;; If `port' is #f, send output to a string.
104 (if (not port)
105 (call-with-output-string
106 (lambda (p)
107 (apply regexp-substitute p match items)))
108
109 ;; Otherwise, process each substitution argument in `items'.
110 (for-each (lambda (obj)
111 (cond ((string? obj) (display obj port))
112 ((integer? obj) (display (match:substring match obj) port))
113 ((eq? 'pre obj) (display (match:prefix match) port))
114 ((eq? 'post obj) (display (match:suffix match) port))
115 (else (error 'wrong-type-arg obj))))
116 items)))
117
50ff2ecb
JB
118;;; If we call fold-matches, below, with a regexp that can match the
119;;; empty string, it's not obvious what "all the matches" means. How
120;;; many empty strings are there in the string "a"? Our answer:
121;;;
122;;; This function applies PROC to every non-overlapping, maximal
123;;; match of REGEXP in STRING.
124;;;
125;;; "non-overlapping": There are two non-overlapping matches of "" in
126;;; "a" --- one before the `a', and one after. There are three
127;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
128;;; before `a' and after `b', and `q'. The two empty strings before
129;;; and after `q' don't count, because they overlap with the match of
130;;; "q".
131;;;
132;;; "maximal": There are three distinct maximal matches of "x*" in
133;;; "axxxb": one before the `a', one covering `xxx', and one after the
134;;; `b'. Around or within `xxx', only the match covering all three
135;;; x's counts, because the rest are not maximal.
136
137(define-public (fold-matches regexp string init proc . flags)
138 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
139 (flags (if (null? flags) 0 flags)))
140 (let loop ((start 0)
141 (value init)
142 (abuts #f)) ; True if start abuts a previous match.
143 (let ((m (if (> start (string-length string)) #f
144 (regexp-exec regexp string start flags))))
145 (cond
146 ((not m) value)
147 ((and (= (match:start m) (match:end m)) abuts)
148 ;; We matched an empty string, but that would overlap the
149 ;; match immediately before. Try again at a position
150 ;; further to the right.
151 (loop (+ start 1) value #f))
152 (else
153 (loop (match:end m) (proc m value) #t)))))))
154
155(define-public (list-matches regexp string . flags)
156 (reverse! (apply fold-matches regexp string '() cons flags)))
157
05817d9e 158(define-public (regexp-substitute/global port regexp string . items)
50ff2ecb 159
400d7382
JB
160 ;; If `port' is #f, send output to a string.
161 (if (not port)
162 (call-with-output-string
163 (lambda (p)
164 (apply regexp-substitute/global p regexp string items)))
165
50ff2ecb
JB
166 ;; Walk the set of non-overlapping, maximal matches.
167 (let next-match ((matches (list-matches regexp string))
168 (start 0))
169 (if (pair? matches)
170 (let ((m (car matches)))
171
172 ;; Process all of the items for this match. Don't use
173 ;; for-each, because we need to make sure 'post at the
174 ;; end of the item list is a tail call.
175 (let next-item ((items items))
176
177 (define (do-item item)
178 (cond
179 ((string? item) (display item port))
180 ((integer? item) (display (match:substring m item) port))
181 ((procedure? item) (display (item m) port))
182 ((eq? item 'pre)
183 (display
184 (make-shared-substring string start (match:start m))
185 port))
186 ((eq? item 'post)
187 (if (pair? (cdr matches))
188 (next-match (cdr matches) (match:end m))
189 (display
190 (make-shared-substring string (match:end m))
191 port)))
192 (else (error 'wrong-type-arg item))))
193
194 (if (pair? items)
195 (if (null? (cdr items))
196 (do-item (car items)) ; This is a tail call.
197 (begin
198 (do-item (car items)) ; This is not.
199 (next-item (cdr items)))))))))))