Commit | Line | Data |
---|---|---|
50ff2ecb | 1 | ;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc. |
400d7382 JB |
2 | ;;;; |
3 | ;;;; This program is free software; you can redistribute it and/or modify | |
4 | ;;;; it under the terms of the GNU General Public License as published by | |
5 | ;;;; the Free Software Foundation; either version 2, or (at your option) | |
6 | ;;;; any later version. | |
7 | ;;;; | |
8 | ;;;; This program is distributed in the hope that it will be useful, | |
9 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | ;;;; GNU General Public License for more details. | |
12 | ;;;; | |
13 | ;;;; You should have received a copy of the GNU General Public License | |
14 | ;;;; along with this software; see the file COPYING. If not, write to | |
c6e23ea2 JB |
15 | ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330, |
16 | ;;;; Boston, MA 02111-1307 USA | |
a482f2cc MV |
17 | ;;;; |
18 | ;;;; As a special exception, the Free Software Foundation gives permission | |
19 | ;;;; for additional uses of the text contained in its release of GUILE. | |
20 | ;;;; | |
21 | ;;;; The exception is that, if you link the GUILE library with other files | |
22 | ;;;; to produce an executable, this does not by itself cause the | |
23 | ;;;; resulting executable to be covered by the GNU General Public License. | |
24 | ;;;; Your use of that executable is in no way restricted on account of | |
25 | ;;;; linking the GUILE library code into it. | |
26 | ;;;; | |
27 | ;;;; This exception does not however invalidate any other reasons why | |
28 | ;;;; the executable file might be covered by the GNU General Public License. | |
29 | ;;;; | |
30 | ;;;; This exception applies only to the code released by the | |
31 | ;;;; Free Software Foundation under the name GUILE. If you copy | |
32 | ;;;; code from other Free Software Foundation releases into a copy of | |
33 | ;;;; GUILE, as the General Public License permits, the exception does | |
34 | ;;;; not apply to the code that you add in this way. To avoid misleading | |
35 | ;;;; anyone as to the status of such modified files, you must delete | |
36 | ;;;; this exception notice from them. | |
37 | ;;;; | |
38 | ;;;; If you write modifications of your own for GUILE, it is your choice | |
39 | ;;;; whether to permit this exception to apply to your modifications. | |
40 | ;;;; If you do not wish that, delete this exception notice. | |
400d7382 JB |
41 | ;;;; |
42 | \f | |
43 | ;;;; POSIX regex support functions. | |
44 | ||
05817d9e JB |
45 | (define-module (ice-9 regex)) |
46 | ||
400d7382 JB |
47 | ;;; FIXME: |
48 | ;;; It is not clear what should happen if a `match' function | |
49 | ;;; is passed a `match number' which is out of bounds for the | |
50 | ;;; regexp match: return #f, or throw an error? These routines | |
51 | ;;; throw an out-of-range error. | |
52 | ||
53 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
54 | ;;;; These procedures are not defined in SCSH, but I found them useful. | |
55 | ||
05817d9e | 56 | (define-public (match:count match) |
400d7382 JB |
57 | (- (vector-length match) 1)) |
58 | ||
05817d9e | 59 | (define-public (match:string match) |
400d7382 JB |
60 | (vector-ref match 0)) |
61 | ||
05817d9e | 62 | (define-public (match:prefix match) |
4e15fee8 | 63 | (substring (match:string match) 0 (match:start match 0))) |
400d7382 | 64 | |
05817d9e | 65 | (define-public (match:suffix match) |
4e15fee8 | 66 | (substring (match:string match) (match:end match 0))) |
400d7382 JB |
67 | |
68 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
69 | ;;;; SCSH compatibility routines. | |
70 | ||
05817d9e | 71 | (define-public (regexp-match? match) |
400d7382 JB |
72 | (and (vector? match) |
73 | (string? (vector-ref match 0)) | |
74 | (let loop ((i 1)) | |
75 | (cond ((>= i (vector-length match)) #t) | |
76 | ((and (pair? (vector-ref match i)) | |
77 | (integer? (car (vector-ref match i))) | |
78 | (integer? (cdr (vector-ref match i)))) | |
79 | (loop (+ 1 i))) | |
80 | (else #f))))) | |
81 | ||
05817d9e | 82 | (define-public (regexp-quote regexp) |
400d7382 JB |
83 | (call-with-output-string |
84 | (lambda (p) | |
85 | (let loop ((i 0)) | |
86 | (and (< i (string-length regexp)) | |
87 | (begin | |
88 | (case (string-ref regexp i) | |
89 | ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\}) | |
90 | (write-char #\\ p))) | |
91 | (write-char (string-ref regexp i) p) | |
92 | (loop (1+ i)))))))) | |
93 | ||
05817d9e | 94 | (define-public (match:start match . args) |
400d7382 JB |
95 | (let* ((matchnum (if (pair? args) |
96 | (+ 1 (car args)) | |
97 | 1)) | |
98 | (start (car (vector-ref match matchnum)))) | |
99 | (if (= start -1) #f start))) | |
100 | ||
05817d9e | 101 | (define-public (match:end match . args) |
400d7382 JB |
102 | (let* ((matchnum (if (pair? args) |
103 | (+ 1 (car args)) | |
104 | 1)) | |
105 | (end (cdr (vector-ref match matchnum)))) | |
106 | (if (= end -1) #f end))) | |
107 | ||
05817d9e | 108 | (define-public (match:substring match . args) |
400d7382 JB |
109 | (let* ((matchnum (if (pair? args) |
110 | (car args) | |
111 | 0)) | |
112 | (start (match:start match matchnum)) | |
113 | (end (match:end match matchnum))) | |
4e15fee8 | 114 | (and start end (substring (match:string match) start end)))) |
400d7382 | 115 | |
05817d9e | 116 | (define-public (string-match pattern str . args) |
400d7382 JB |
117 | (let ((rx (make-regexp pattern)) |
118 | (start (if (pair? args) (car args) 0))) | |
119 | (regexp-exec rx str start))) | |
120 | ||
05817d9e | 121 | (define-public (regexp-substitute port match . items) |
400d7382 JB |
122 | ;; If `port' is #f, send output to a string. |
123 | (if (not port) | |
124 | (call-with-output-string | |
125 | (lambda (p) | |
126 | (apply regexp-substitute p match items))) | |
127 | ||
128 | ;; Otherwise, process each substitution argument in `items'. | |
129 | (for-each (lambda (obj) | |
130 | (cond ((string? obj) (display obj port)) | |
131 | ((integer? obj) (display (match:substring match obj) port)) | |
132 | ((eq? 'pre obj) (display (match:prefix match) port)) | |
133 | ((eq? 'post obj) (display (match:suffix match) port)) | |
134 | (else (error 'wrong-type-arg obj)))) | |
135 | items))) | |
136 | ||
50ff2ecb JB |
137 | ;;; If we call fold-matches, below, with a regexp that can match the |
138 | ;;; empty string, it's not obvious what "all the matches" means. How | |
139 | ;;; many empty strings are there in the string "a"? Our answer: | |
140 | ;;; | |
141 | ;;; This function applies PROC to every non-overlapping, maximal | |
142 | ;;; match of REGEXP in STRING. | |
143 | ;;; | |
144 | ;;; "non-overlapping": There are two non-overlapping matches of "" in | |
145 | ;;; "a" --- one before the `a', and one after. There are three | |
146 | ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings | |
147 | ;;; before `a' and after `b', and `q'. The two empty strings before | |
148 | ;;; and after `q' don't count, because they overlap with the match of | |
149 | ;;; "q". | |
150 | ;;; | |
151 | ;;; "maximal": There are three distinct maximal matches of "x*" in | |
152 | ;;; "axxxb": one before the `a', one covering `xxx', and one after the | |
153 | ;;; `b'. Around or within `xxx', only the match covering all three | |
154 | ;;; x's counts, because the rest are not maximal. | |
155 | ||
156 | (define-public (fold-matches regexp string init proc . flags) | |
157 | (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp))) | |
158 | (flags (if (null? flags) 0 flags))) | |
159 | (let loop ((start 0) | |
160 | (value init) | |
161 | (abuts #f)) ; True if start abuts a previous match. | |
162 | (let ((m (if (> start (string-length string)) #f | |
163 | (regexp-exec regexp string start flags)))) | |
164 | (cond | |
165 | ((not m) value) | |
166 | ((and (= (match:start m) (match:end m)) abuts) | |
167 | ;; We matched an empty string, but that would overlap the | |
168 | ;; match immediately before. Try again at a position | |
169 | ;; further to the right. | |
170 | (loop (+ start 1) value #f)) | |
171 | (else | |
172 | (loop (match:end m) (proc m value) #t))))))) | |
173 | ||
174 | (define-public (list-matches regexp string . flags) | |
175 | (reverse! (apply fold-matches regexp string '() cons flags))) | |
176 | ||
05817d9e | 177 | (define-public (regexp-substitute/global port regexp string . items) |
50ff2ecb | 178 | |
400d7382 JB |
179 | ;; If `port' is #f, send output to a string. |
180 | (if (not port) | |
181 | (call-with-output-string | |
182 | (lambda (p) | |
183 | (apply regexp-substitute/global p regexp string items))) | |
184 | ||
50ff2ecb JB |
185 | ;; Walk the set of non-overlapping, maximal matches. |
186 | (let next-match ((matches (list-matches regexp string)) | |
187 | (start 0)) | |
f5641401 | 188 | (if (null? matches) |
4e15fee8 | 189 | (display (substring string start) port) |
50ff2ecb JB |
190 | (let ((m (car matches))) |
191 | ||
192 | ;; Process all of the items for this match. Don't use | |
193 | ;; for-each, because we need to make sure 'post at the | |
194 | ;; end of the item list is a tail call. | |
195 | (let next-item ((items items)) | |
196 | ||
197 | (define (do-item item) | |
198 | (cond | |
199 | ((string? item) (display item port)) | |
200 | ((integer? item) (display (match:substring m item) port)) | |
201 | ((procedure? item) (display (item m) port)) | |
202 | ((eq? item 'pre) | |
203 | (display | |
4e15fee8 | 204 | (substring string start (match:start m)) |
50ff2ecb JB |
205 | port)) |
206 | ((eq? item 'post) | |
f5641401 | 207 | (next-match (cdr matches) (match:end m))) |
50ff2ecb JB |
208 | (else (error 'wrong-type-arg item)))) |
209 | ||
210 | (if (pair? items) | |
211 | (if (null? (cdr items)) | |
212 | (do-item (car items)) ; This is a tail call. | |
213 | (begin | |
214 | (do-item (car items)) ; This is not. | |
215 | (next-item (cdr items))))))))))) |