Added comments to functions that are used by "modules.c".
[bpt/guile.git] / ice-9 / regex.scm
1 ;;;; Copyright (C) 1997, 1999, 2001 Free Software Foundation, Inc.
2 ;;;;
3 ;;;; This program is free software; you can redistribute it and/or modify
4 ;;;; it under the terms of the GNU General Public License as published by
5 ;;;; the Free Software Foundation; either version 2, or (at your option)
6 ;;;; any later version.
7 ;;;;
8 ;;;; This program is distributed in the hope that it will be useful,
9 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 ;;;; GNU General Public License for more details.
12 ;;;;
13 ;;;; You should have received a copy of the GNU General Public License
14 ;;;; along with this software; see the file COPYING. If not, write to
15 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 ;;;; Boston, MA 02111-1307 USA
17 ;;;;
18 ;;;; As a special exception, the Free Software Foundation gives permission
19 ;;;; for additional uses of the text contained in its release of GUILE.
20 ;;;;
21 ;;;; The exception is that, if you link the GUILE library with other files
22 ;;;; to produce an executable, this does not by itself cause the
23 ;;;; resulting executable to be covered by the GNU General Public License.
24 ;;;; Your use of that executable is in no way restricted on account of
25 ;;;; linking the GUILE library code into it.
26 ;;;;
27 ;;;; This exception does not however invalidate any other reasons why
28 ;;;; the executable file might be covered by the GNU General Public License.
29 ;;;;
30 ;;;; This exception applies only to the code released by the
31 ;;;; Free Software Foundation under the name GUILE. If you copy
32 ;;;; code from other Free Software Foundation releases into a copy of
33 ;;;; GUILE, as the General Public License permits, the exception does
34 ;;;; not apply to the code that you add in this way. To avoid misleading
35 ;;;; anyone as to the status of such modified files, you must delete
36 ;;;; this exception notice from them.
37 ;;;;
38 ;;;; If you write modifications of your own for GUILE, it is your choice
39 ;;;; whether to permit this exception to apply to your modifications.
40 ;;;; If you do not wish that, delete this exception notice.
41 ;;;;
42 \f
43 ;;;; POSIX regex support functions.
44
45 (define-module (ice-9 regex)
46 :export (match:count match:string match:prefix match:suffix
47 regexp-match? regexp-quote match:start match:end match:substring
48 string-match regexp-substitute fold-matches list-matches
49 regexp-substitute/global))
50
51 ;;; FIXME:
52 ;;; It is not clear what should happen if a `match' function
53 ;;; is passed a `match number' which is out of bounds for the
54 ;;; regexp match: return #f, or throw an error? These routines
55 ;;; throw an out-of-range error.
56
57 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
58 ;;;; These procedures are not defined in SCSH, but I found them useful.
59
60 (define (match:count match)
61 (- (vector-length match) 1))
62
63 (define (match:string match)
64 (vector-ref match 0))
65
66 (define (match:prefix match)
67 (substring (match:string match) 0 (match:start match 0)))
68
69 (define (match:suffix match)
70 (substring (match:string match) (match:end match 0)))
71
72 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
73 ;;;; SCSH compatibility routines.
74
75 (define (regexp-match? match)
76 (and (vector? match)
77 (string? (vector-ref match 0))
78 (let loop ((i 1))
79 (cond ((>= i (vector-length match)) #t)
80 ((and (pair? (vector-ref match i))
81 (integer? (car (vector-ref match i)))
82 (integer? (cdr (vector-ref match i))))
83 (loop (+ 1 i)))
84 (else #f)))))
85
86 (define (regexp-quote regexp)
87 (call-with-output-string
88 (lambda (p)
89 (let loop ((i 0))
90 (and (< i (string-length regexp))
91 (begin
92 (case (string-ref regexp i)
93 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
94 (write-char #\\ p)))
95 (write-char (string-ref regexp i) p)
96 (loop (1+ i))))))))
97
98 (define (match:start match . args)
99 (let* ((matchnum (if (pair? args)
100 (+ 1 (car args))
101 1))
102 (start (car (vector-ref match matchnum))))
103 (if (= start -1) #f start)))
104
105 (define (match:end match . args)
106 (let* ((matchnum (if (pair? args)
107 (+ 1 (car args))
108 1))
109 (end (cdr (vector-ref match matchnum))))
110 (if (= end -1) #f end)))
111
112 (define (match:substring match . args)
113 (let* ((matchnum (if (pair? args)
114 (car args)
115 0))
116 (start (match:start match matchnum))
117 (end (match:end match matchnum)))
118 (and start end (substring (match:string match) start end))))
119
120 (define (string-match pattern str . args)
121 (let ((rx (make-regexp pattern))
122 (start (if (pair? args) (car args) 0)))
123 (regexp-exec rx str start)))
124
125 (define (regexp-substitute port match . items)
126 ;; If `port' is #f, send output to a string.
127 (if (not port)
128 (call-with-output-string
129 (lambda (p)
130 (apply regexp-substitute p match items)))
131
132 ;; Otherwise, process each substitution argument in `items'.
133 (for-each (lambda (obj)
134 (cond ((string? obj) (display obj port))
135 ((integer? obj) (display (match:substring match obj) port))
136 ((eq? 'pre obj) (display (match:prefix match) port))
137 ((eq? 'post obj) (display (match:suffix match) port))
138 (else (error 'wrong-type-arg obj))))
139 items)))
140
141 ;;; If we call fold-matches, below, with a regexp that can match the
142 ;;; empty string, it's not obvious what "all the matches" means. How
143 ;;; many empty strings are there in the string "a"? Our answer:
144 ;;;
145 ;;; This function applies PROC to every non-overlapping, maximal
146 ;;; match of REGEXP in STRING.
147 ;;;
148 ;;; "non-overlapping": There are two non-overlapping matches of "" in
149 ;;; "a" --- one before the `a', and one after. There are three
150 ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
151 ;;; before `a' and after `b', and `q'. The two empty strings before
152 ;;; and after `q' don't count, because they overlap with the match of
153 ;;; "q".
154 ;;;
155 ;;; "maximal": There are three distinct maximal matches of "x*" in
156 ;;; "axxxb": one before the `a', one covering `xxx', and one after the
157 ;;; `b'. Around or within `xxx', only the match covering all three
158 ;;; x's counts, because the rest are not maximal.
159
160 (define (fold-matches regexp string init proc . flags)
161 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
162 (flags (if (null? flags) 0 flags)))
163 (let loop ((start 0)
164 (value init)
165 (abuts #f)) ; True if start abuts a previous match.
166 (let ((m (if (> start (string-length string)) #f
167 (regexp-exec regexp string start flags))))
168 (cond
169 ((not m) value)
170 ((and (= (match:start m) (match:end m)) abuts)
171 ;; We matched an empty string, but that would overlap the
172 ;; match immediately before. Try again at a position
173 ;; further to the right.
174 (loop (+ start 1) value #f))
175 (else
176 (loop (match:end m) (proc m value) #t)))))))
177
178 (define (list-matches regexp string . flags)
179 (reverse! (apply fold-matches regexp string '() cons flags)))
180
181 (define (regexp-substitute/global port regexp string . items)
182
183 ;; If `port' is #f, send output to a string.
184 (if (not port)
185 (call-with-output-string
186 (lambda (p)
187 (apply regexp-substitute/global p regexp string items)))
188
189 ;; Walk the set of non-overlapping, maximal matches.
190 (let next-match ((matches (list-matches regexp string))
191 (start 0))
192 (if (null? matches)
193 (display (substring string start) port)
194 (let ((m (car matches)))
195
196 ;; Process all of the items for this match. Don't use
197 ;; for-each, because we need to make sure 'post at the
198 ;; end of the item list is a tail call.
199 (let next-item ((items items))
200
201 (define (do-item item)
202 (cond
203 ((string? item) (display item port))
204 ((integer? item) (display (match:substring m item) port))
205 ((procedure? item) (display (item m) port))
206 ((eq? item 'pre)
207 (display
208 (substring string start (match:start m))
209 port))
210 ((eq? item 'post)
211 (next-match (cdr matches) (match:end m)))
212 (else (error 'wrong-type-arg item))))
213
214 (if (pair? items)
215 (if (null? (cdr items))
216 (do-item (car items)) ; This is a tail call.
217 (begin
218 (do-item (car items)) ; This is not.
219 (next-item (cdr items)))))))))))