* Remove calls to symbol-interned? which have always been useless, but now
[bpt/guile.git] / ice-9 / regex.scm
1 ;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
2 ;;;;
3 ;;;; This program is free software; you can redistribute it and/or modify
4 ;;;; it under the terms of the GNU General Public License as published by
5 ;;;; the Free Software Foundation; either version 2, or (at your option)
6 ;;;; any later version.
7 ;;;;
8 ;;;; This program is distributed in the hope that it will be useful,
9 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
10 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 ;;;; GNU General Public License for more details.
12 ;;;;
13 ;;;; You should have received a copy of the GNU General Public License
14 ;;;; along with this software; see the file COPYING. If not, write to
15 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 ;;;; Boston, MA 02111-1307 USA
17 ;;;;
18 \f
19 ;;;; POSIX regex support functions.
20
21 (define-module (ice-9 regex))
22
23 ;;; FIXME:
24 ;;; It is not clear what should happen if a `match' function
25 ;;; is passed a `match number' which is out of bounds for the
26 ;;; regexp match: return #f, or throw an error? These routines
27 ;;; throw an out-of-range error.
28
29 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30 ;;;; These procedures are not defined in SCSH, but I found them useful.
31
32 (define-public (match:count match)
33 (- (vector-length match) 1))
34
35 (define-public (match:string match)
36 (vector-ref match 0))
37
38 (define-public (match:prefix match)
39 (substring (match:string match) 0 (match:start match 0)))
40
41 (define-public (match:suffix match)
42 (substring (match:string match) (match:end match 0)))
43
44 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;; SCSH compatibility routines.
46
47 (define-public (regexp-match? match)
48 (and (vector? match)
49 (string? (vector-ref match 0))
50 (let loop ((i 1))
51 (cond ((>= i (vector-length match)) #t)
52 ((and (pair? (vector-ref match i))
53 (integer? (car (vector-ref match i)))
54 (integer? (cdr (vector-ref match i))))
55 (loop (+ 1 i)))
56 (else #f)))))
57
58 (define-public (regexp-quote regexp)
59 (call-with-output-string
60 (lambda (p)
61 (let loop ((i 0))
62 (and (< i (string-length regexp))
63 (begin
64 (case (string-ref regexp i)
65 ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
66 (write-char #\\ p)))
67 (write-char (string-ref regexp i) p)
68 (loop (1+ i))))))))
69
70 (define-public (match:start match . args)
71 (let* ((matchnum (if (pair? args)
72 (+ 1 (car args))
73 1))
74 (start (car (vector-ref match matchnum))))
75 (if (= start -1) #f start)))
76
77 (define-public (match:end match . args)
78 (let* ((matchnum (if (pair? args)
79 (+ 1 (car args))
80 1))
81 (end (cdr (vector-ref match matchnum))))
82 (if (= end -1) #f end)))
83
84 (define-public (match:substring match . args)
85 (let* ((matchnum (if (pair? args)
86 (car args)
87 0))
88 (start (match:start match matchnum))
89 (end (match:end match matchnum)))
90 (and start end (substring (match:string match) start end))))
91
92 (define-public (string-match pattern str . args)
93 (let ((rx (make-regexp pattern))
94 (start (if (pair? args) (car args) 0)))
95 (regexp-exec rx str start)))
96
97 (define-public (regexp-substitute port match . items)
98 ;; If `port' is #f, send output to a string.
99 (if (not port)
100 (call-with-output-string
101 (lambda (p)
102 (apply regexp-substitute p match items)))
103
104 ;; Otherwise, process each substitution argument in `items'.
105 (for-each (lambda (obj)
106 (cond ((string? obj) (display obj port))
107 ((integer? obj) (display (match:substring match obj) port))
108 ((eq? 'pre obj) (display (match:prefix match) port))
109 ((eq? 'post obj) (display (match:suffix match) port))
110 (else (error 'wrong-type-arg obj))))
111 items)))
112
113 ;;; If we call fold-matches, below, with a regexp that can match the
114 ;;; empty string, it's not obvious what "all the matches" means. How
115 ;;; many empty strings are there in the string "a"? Our answer:
116 ;;;
117 ;;; This function applies PROC to every non-overlapping, maximal
118 ;;; match of REGEXP in STRING.
119 ;;;
120 ;;; "non-overlapping": There are two non-overlapping matches of "" in
121 ;;; "a" --- one before the `a', and one after. There are three
122 ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
123 ;;; before `a' and after `b', and `q'. The two empty strings before
124 ;;; and after `q' don't count, because they overlap with the match of
125 ;;; "q".
126 ;;;
127 ;;; "maximal": There are three distinct maximal matches of "x*" in
128 ;;; "axxxb": one before the `a', one covering `xxx', and one after the
129 ;;; `b'. Around or within `xxx', only the match covering all three
130 ;;; x's counts, because the rest are not maximal.
131
132 (define-public (fold-matches regexp string init proc . flags)
133 (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
134 (flags (if (null? flags) 0 flags)))
135 (let loop ((start 0)
136 (value init)
137 (abuts #f)) ; True if start abuts a previous match.
138 (let ((m (if (> start (string-length string)) #f
139 (regexp-exec regexp string start flags))))
140 (cond
141 ((not m) value)
142 ((and (= (match:start m) (match:end m)) abuts)
143 ;; We matched an empty string, but that would overlap the
144 ;; match immediately before. Try again at a position
145 ;; further to the right.
146 (loop (+ start 1) value #f))
147 (else
148 (loop (match:end m) (proc m value) #t)))))))
149
150 (define-public (list-matches regexp string . flags)
151 (reverse! (apply fold-matches regexp string '() cons flags)))
152
153 (define-public (regexp-substitute/global port regexp string . items)
154
155 ;; If `port' is #f, send output to a string.
156 (if (not port)
157 (call-with-output-string
158 (lambda (p)
159 (apply regexp-substitute/global p regexp string items)))
160
161 ;; Walk the set of non-overlapping, maximal matches.
162 (let next-match ((matches (list-matches regexp string))
163 (start 0))
164 (if (null? matches)
165 (display (substring string start) port)
166 (let ((m (car matches)))
167
168 ;; Process all of the items for this match. Don't use
169 ;; for-each, because we need to make sure 'post at the
170 ;; end of the item list is a tail call.
171 (let next-item ((items items))
172
173 (define (do-item item)
174 (cond
175 ((string? item) (display item port))
176 ((integer? item) (display (match:substring m item) port))
177 ((procedure? item) (display (item m) port))
178 ((eq? item 'pre)
179 (display
180 (substring string start (match:start m))
181 port))
182 ((eq? item 'post)
183 (next-match (cdr matches) (match:end m)))
184 (else (error 'wrong-type-arg item))))
185
186 (if (pair? items)
187 (if (null? (cdr items))
188 (do-item (car items)) ; This is a tail call.
189 (begin
190 (do-item (car items)) ; This is not.
191 (next-item (cdr items)))))))))))