Add insults.
[bpt/guile.git] / ice-9 / regex.scm
1 ;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
2 ;;;;
3 ;;;; This program is free software; you can redistribute it and/or modify
4 ;;;; it under the terms of the GNU General Public License as published by
5 ;;;; the Free Software Foundation; either version 2, or (at your option)
6 ;;;; any later version.
7 ;;;;
8 ;;;; This program is distributed in the hope that it will be useful,
9 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 ;;;; GNU General Public License for more details.
12 ;;;;
13 ;;;; You should have received a copy of the GNU General Public License
14 ;;;; along with this software; see the file COPYING. If not, write to
15 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 ;;;; Boston, MA 02111-1307 USA
17 ;;;;
18 \f
19 ;;;; POSIX regex support functions.
20
21 (define-module (ice-9 regex))
22
23 ;;; FIXME:
24 ;;; It is not clear what should happen if a `match' function
25 ;;; is passed a `match number' which is out of bounds for the
26 ;;; regexp match: return #f, or throw an error? These routines
27 ;;; throw an out-of-range error.
28
29 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 ;;;; These procedures are not defined in SCSH, but I found them useful.
31
32 (define-public (match:count match)
33 (- (vector-length match) 1))
34
35 (define-public (match:string match)
36 (vector-ref match 0))
37
38 (define-public (match:prefix match)
39 (make-shared-substring (match:string match)
40 0
41 (match:start match 0)))
42
43 (define-public (match:suffix match)
44 (make-shared-substring (match:string match)
45 (match:end match 0)))
46
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;;;; SCSH compatibility routines.
49
50 (define-public (regexp-match? match)
51 (and (vector? match)
52 (string? (vector-ref match 0))
53 (let loop ((i 1))
54 (cond ((>= i (vector-length match)) #t)
55 ((and (pair? (vector-ref match i))
56 (integer? (car (vector-ref match i)))
57 (integer? (cdr (vector-ref match i))))
58 (loop (+ 1 i)))
59 (else #f)))))
60
61 (define-public (regexp-quote regexp)
62 (call-with-output-string
63 (lambda (p)
64 (let loop ((i 0))
65 (and (< i (string-length regexp))
66 (begin
67 (case (string-ref regexp i)
68 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
69 (write-char #\\ p)))
70 (write-char (string-ref regexp i) p)
71 (loop (1+ i))))))))
72
73 (define-public (match:start match . args)
74 (let* ((matchnum (if (pair? args)
75 (+ 1 (car args))
76 1))
77 (start (car (vector-ref match matchnum))))
78 (if (= start -1) #f start)))
79
80 (define-public (match:end match . args)
81 (let* ((matchnum (if (pair? args)
82 (+ 1 (car args))
83 1))
84 (end (cdr (vector-ref match matchnum))))
85 (if (= end -1) #f end)))
86
87 (define-public (match:substring match . args)
88 (let* ((matchnum (if (pair? args)
89 (car args)
90 0))
91 (start (match:start match matchnum))
92 (end (match:end match matchnum)))
93 (and start end (make-shared-substring (match:string match)
94 start
95 end))))
96
97 (define-public (string-match pattern str . args)
98 (let ((rx (make-regexp pattern))
99 (start (if (pair? args) (car args) 0)))
100 (regexp-exec rx str start)))
101
102 (define-public (regexp-substitute port match . items)
103 ;; If `port' is #f, send output to a string.
104 (if (not port)
105 (call-with-output-string
106 (lambda (p)
107 (apply regexp-substitute p match items)))
108
109 ;; Otherwise, process each substitution argument in `items'.
110 (for-each (lambda (obj)
111 (cond ((string? obj) (display obj port))
112 ((integer? obj) (display (match:substring match obj) port))
113 ((eq? 'pre obj) (display (match:prefix match) port))
114 ((eq? 'post obj) (display (match:suffix match) port))
115 (else (error 'wrong-type-arg obj))))
116 items)))
117
118 ;;; If we call fold-matches, below, with a regexp that can match the
119 ;;; empty string, it's not obvious what "all the matches" means. How
120 ;;; many empty strings are there in the string "a"? Our answer:
121 ;;;
122 ;;; This function applies PROC to every non-overlapping, maximal
123 ;;; match of REGEXP in STRING.
124 ;;;
125 ;;; "non-overlapping": There are two non-overlapping matches of "" in
126 ;;; "a" --- one before the `a', and one after. There are three
127 ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
128 ;;; before `a' and after `b', and `q'. The two empty strings before
129 ;;; and after `q' don't count, because they overlap with the match of
130 ;;; "q".
131 ;;;
132 ;;; "maximal": There are three distinct maximal matches of "x*" in
133 ;;; "axxxb": one before the `a', one covering `xxx', and one after the
134 ;;; `b'. Around or within `xxx', only the match covering all three
135 ;;; x's counts, because the rest are not maximal.
136
137 (define-public (fold-matches regexp string init proc . flags)
138 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
139 (flags (if (null? flags) 0 flags)))
140 (let loop ((start 0)
141 (value init)
142 (abuts #f)) ; True if start abuts a previous match.
143 (let ((m (if (> start (string-length string)) #f
144 (regexp-exec regexp string start flags))))
145 (cond
146 ((not m) value)
147 ((and (= (match:start m) (match:end m)) abuts)
148 ;; We matched an empty string, but that would overlap the
149 ;; match immediately before. Try again at a position
150 ;; further to the right.
151 (loop (+ start 1) value #f))
152 (else
153 (loop (match:end m) (proc m value) #t)))))))
154
155 (define-public (list-matches regexp string . flags)
156 (reverse! (apply fold-matches regexp string '() cons flags)))
157
158 (define-public (regexp-substitute/global port regexp string . items)
159
160 ;; If `port' is #f, send output to a string.
161 (if (not port)
162 (call-with-output-string
163 (lambda (p)
164 (apply regexp-substitute/global p regexp string items)))
165
166 ;; Walk the set of non-overlapping, maximal matches.
167 (let next-match ((matches (list-matches regexp string))
168 (start 0))
169 (if (null? matches)
170 (display (make-shared-substring string start) port)
171 (let ((m (car matches)))
172
173 ;; Process all of the items for this match. Don't use
174 ;; for-each, because we need to make sure 'post at the
175 ;; end of the item list is a tail call.
176 (let next-item ((items items))
177
178 (define (do-item item)
179 (cond
180 ((string? item) (display item port))
181 ((integer? item) (display (match:substring m item) port))
182 ((procedure? item) (display (item m) port))
183 ((eq? item 'pre)
184 (display
185 (make-shared-substring string start (match:start m))
186 port))
187 ((eq? item 'post)
188 (next-match (cdr matches) (match:end m)))
189 (else (error 'wrong-type-arg item))))
190
191 (if (pair? items)
192 (if (null? (cdr items))
193 (do-item (car items)) ; This is a tail call.
194 (begin
195 (do-item (car items)) ; This is not.
196 (next-item (cdr items)))))))))))