Commit | Line | Data |
---|---|---|
9b694b12 | 1 | ;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005 Free Software Foundation, Inc. |
87fefc1c | 2 | ;;;; |
73be1d9e MV |
3 | ;;;; This library is free software; you can redistribute it and/or |
4 | ;;;; modify it under the terms of the GNU Lesser General Public | |
5 | ;;;; License as published by the Free Software Foundation; either | |
6 | ;;;; version 2.1 of the License, or (at your option) any later version. | |
7 | ;;;; | |
8 | ;;;; This library is distributed in the hope that it will be useful, | |
400d7382 | 9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
73be1d9e MV |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | ;;;; Lesser General Public License for more details. | |
12 | ;;;; | |
13 | ;;;; You should have received a copy of the GNU Lesser General Public | |
14 | ;;;; License along with this library; if not, write to the Free Software | |
15 | ;;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
87fefc1c TTN |
16 | ;;;; |
17 | ||
18 | ;;; Commentary: | |
19 | ||
20 | ;; These procedures are exported: | |
21 | ;; (match:count match) | |
22 | ;; (match:string match) | |
23 | ;; (match:prefix match) | |
24 | ;; (match:suffix match) | |
25 | ;; (regexp-match? match) | |
26 | ;; (regexp-quote string) | |
27 | ;; (match:start match . submatch-num) | |
28 | ;; (match:end match . submatch-num) | |
29 | ;; (match:substring match . submatch-num) | |
30 | ;; (string-match pattern str . start) | |
31 | ;; (regexp-substitute port match . items) | |
32 | ;; (fold-matches regexp string init proc . flags) | |
33 | ;; (list-matches regexp string . flags) | |
34 | ;; (regexp-substitute/global port regexp string . items) | |
35 | ||
36 | ;;; Code: | |
400d7382 JB |
37 | \f |
38 | ;;;; POSIX regex support functions. | |
39 | ||
1a179b03 MD |
40 | (define-module (ice-9 regex) |
41 | :export (match:count match:string match:prefix match:suffix | |
42 | regexp-match? regexp-quote match:start match:end match:substring | |
43 | string-match regexp-substitute fold-matches list-matches | |
44 | regexp-substitute/global)) | |
05817d9e | 45 | |
2b28ce5b KR |
46 | ;; References: |
47 | ;; | |
48 | ;; POSIX spec: | |
49 | ;; http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html | |
50 | ||
400d7382 JB |
51 | ;;; FIXME: |
52 | ;;; It is not clear what should happen if a `match' function | |
53 | ;;; is passed a `match number' which is out of bounds for the | |
54 | ;;; regexp match: return #f, or throw an error? These routines | |
55 | ;;; throw an out-of-range error. | |
56 | ||
57 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
58 | ;;;; These procedures are not defined in SCSH, but I found them useful. | |
59 | ||
1a179b03 | 60 | (define (match:count match) |
400d7382 JB |
61 | (- (vector-length match) 1)) |
62 | ||
1a179b03 | 63 | (define (match:string match) |
400d7382 JB |
64 | (vector-ref match 0)) |
65 | ||
1a179b03 | 66 | (define (match:prefix match) |
4e15fee8 | 67 | (substring (match:string match) 0 (match:start match 0))) |
400d7382 | 68 | |
1a179b03 | 69 | (define (match:suffix match) |
4e15fee8 | 70 | (substring (match:string match) (match:end match 0))) |
400d7382 JB |
71 | |
72 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
73 | ;;;; SCSH compatibility routines. | |
74 | ||
1a179b03 | 75 | (define (regexp-match? match) |
400d7382 JB |
76 | (and (vector? match) |
77 | (string? (vector-ref match 0)) | |
78 | (let loop ((i 1)) | |
79 | (cond ((>= i (vector-length match)) #t) | |
80 | ((and (pair? (vector-ref match i)) | |
81 | (integer? (car (vector-ref match i))) | |
82 | (integer? (cdr (vector-ref match i)))) | |
83 | (loop (+ 1 i))) | |
84 | (else #f))))) | |
85 | ||
2b28ce5b KR |
86 | ;; * . \ ^ $ and [ are special in both regexp/basic and regexp/extended and |
87 | ;; can be backslash escaped. | |
88 | ;; | |
89 | ;; ( ) + ? { } and | are special in regexp/extended so must be quoted. But | |
90 | ;; that can't be done with a backslash since in regexp/basic where they're | |
91 | ;; not special, adding a backslash makes them become special. Character | |
92 | ;; class forms [(] etc are used instead. | |
93 | ;; | |
94 | ;; ) is not special when not preceded by a (, and * and ? are not special at | |
95 | ;; the start of a string, but we quote all of these always, so the result | |
96 | ;; can be concatenated or merged into some larger regexp. | |
97 | ;; | |
98 | ;; ] is not special outside a [ ] character class, so doesn't need to be | |
99 | ;; quoted. | |
100 | ;; | |
87fefc1c | 101 | (define (regexp-quote string) |
400d7382 JB |
102 | (call-with-output-string |
103 | (lambda (p) | |
9b694b12 KR |
104 | (string-for-each (lambda (c) |
105 | (case c | |
106 | ((#\* #\. #\\ #\^ #\$ #\[) | |
107 | (write-char #\\ p) | |
108 | (write-char c p)) | |
109 | ((#\( #\) #\+ #\? #\{ #\} #\|) | |
110 | (write-char #\[ p) | |
111 | (write-char c p) | |
112 | (write-char #\] p)) | |
113 | (else | |
114 | (write-char c p)))) | |
115 | string)))) | |
400d7382 | 116 | |
1a179b03 | 117 | (define (match:start match . args) |
400d7382 JB |
118 | (let* ((matchnum (if (pair? args) |
119 | (+ 1 (car args)) | |
120 | 1)) | |
121 | (start (car (vector-ref match matchnum)))) | |
122 | (if (= start -1) #f start))) | |
123 | ||
1a179b03 | 124 | (define (match:end match . args) |
400d7382 JB |
125 | (let* ((matchnum (if (pair? args) |
126 | (+ 1 (car args)) | |
127 | 1)) | |
128 | (end (cdr (vector-ref match matchnum)))) | |
129 | (if (= end -1) #f end))) | |
130 | ||
1a179b03 | 131 | (define (match:substring match . args) |
400d7382 JB |
132 | (let* ((matchnum (if (pair? args) |
133 | (car args) | |
134 | 0)) | |
135 | (start (match:start match matchnum)) | |
136 | (end (match:end match matchnum))) | |
4e15fee8 | 137 | (and start end (substring (match:string match) start end)))) |
400d7382 | 138 | |
1a179b03 | 139 | (define (string-match pattern str . args) |
400d7382 JB |
140 | (let ((rx (make-regexp pattern)) |
141 | (start (if (pair? args) (car args) 0))) | |
142 | (regexp-exec rx str start))) | |
143 | ||
1a179b03 | 144 | (define (regexp-substitute port match . items) |
400d7382 JB |
145 | ;; If `port' is #f, send output to a string. |
146 | (if (not port) | |
147 | (call-with-output-string | |
148 | (lambda (p) | |
149 | (apply regexp-substitute p match items))) | |
150 | ||
151 | ;; Otherwise, process each substitution argument in `items'. | |
152 | (for-each (lambda (obj) | |
153 | (cond ((string? obj) (display obj port)) | |
154 | ((integer? obj) (display (match:substring match obj) port)) | |
155 | ((eq? 'pre obj) (display (match:prefix match) port)) | |
156 | ((eq? 'post obj) (display (match:suffix match) port)) | |
157 | (else (error 'wrong-type-arg obj)))) | |
158 | items))) | |
159 | ||
50ff2ecb JB |
160 | ;;; If we call fold-matches, below, with a regexp that can match the |
161 | ;;; empty string, it's not obvious what "all the matches" means. How | |
162 | ;;; many empty strings are there in the string "a"? Our answer: | |
163 | ;;; | |
164 | ;;; This function applies PROC to every non-overlapping, maximal | |
165 | ;;; match of REGEXP in STRING. | |
166 | ;;; | |
167 | ;;; "non-overlapping": There are two non-overlapping matches of "" in | |
168 | ;;; "a" --- one before the `a', and one after. There are three | |
169 | ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings | |
170 | ;;; before `a' and after `b', and `q'. The two empty strings before | |
171 | ;;; and after `q' don't count, because they overlap with the match of | |
172 | ;;; "q". | |
173 | ;;; | |
174 | ;;; "maximal": There are three distinct maximal matches of "x*" in | |
175 | ;;; "axxxb": one before the `a', one covering `xxx', and one after the | |
176 | ;;; `b'. Around or within `xxx', only the match covering all three | |
177 | ;;; x's counts, because the rest are not maximal. | |
178 | ||
1a179b03 | 179 | (define (fold-matches regexp string init proc . flags) |
50ff2ecb JB |
180 | (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp))) |
181 | (flags (if (null? flags) 0 flags))) | |
182 | (let loop ((start 0) | |
183 | (value init) | |
184 | (abuts #f)) ; True if start abuts a previous match. | |
185 | (let ((m (if (> start (string-length string)) #f | |
186 | (regexp-exec regexp string start flags)))) | |
187 | (cond | |
188 | ((not m) value) | |
189 | ((and (= (match:start m) (match:end m)) abuts) | |
190 | ;; We matched an empty string, but that would overlap the | |
191 | ;; match immediately before. Try again at a position | |
192 | ;; further to the right. | |
193 | (loop (+ start 1) value #f)) | |
194 | (else | |
195 | (loop (match:end m) (proc m value) #t))))))) | |
196 | ||
1a179b03 | 197 | (define (list-matches regexp string . flags) |
50ff2ecb JB |
198 | (reverse! (apply fold-matches regexp string '() cons flags))) |
199 | ||
1a179b03 | 200 | (define (regexp-substitute/global port regexp string . items) |
50ff2ecb | 201 | |
400d7382 JB |
202 | ;; If `port' is #f, send output to a string. |
203 | (if (not port) | |
204 | (call-with-output-string | |
205 | (lambda (p) | |
206 | (apply regexp-substitute/global p regexp string items))) | |
207 | ||
50ff2ecb JB |
208 | ;; Walk the set of non-overlapping, maximal matches. |
209 | (let next-match ((matches (list-matches regexp string)) | |
210 | (start 0)) | |
f5641401 | 211 | (if (null? matches) |
4e15fee8 | 212 | (display (substring string start) port) |
50ff2ecb JB |
213 | (let ((m (car matches))) |
214 | ||
215 | ;; Process all of the items for this match. Don't use | |
216 | ;; for-each, because we need to make sure 'post at the | |
217 | ;; end of the item list is a tail call. | |
218 | (let next-item ((items items)) | |
87fefc1c | 219 | |
50ff2ecb JB |
220 | (define (do-item item) |
221 | (cond | |
222 | ((string? item) (display item port)) | |
223 | ((integer? item) (display (match:substring m item) port)) | |
224 | ((procedure? item) (display (item m) port)) | |
87fefc1c | 225 | ((eq? item 'pre) |
50ff2ecb | 226 | (display |
4e15fee8 | 227 | (substring string start (match:start m)) |
50ff2ecb JB |
228 | port)) |
229 | ((eq? item 'post) | |
f5641401 | 230 | (next-match (cdr matches) (match:end m))) |
50ff2ecb JB |
231 | (else (error 'wrong-type-arg item)))) |
232 | ||
233 | (if (pair? items) | |
234 | (if (null? (cdr items)) | |
235 | (do-item (car items)) ; This is a tail call. | |
236 | (begin | |
237 | (do-item (car items)) ; This is not. | |
238 | (next-item (cdr items))))))))))) |