(scm_char_set_xor): bug fix: characters should only be included if
[bpt/guile.git] / ice-9 / string-fun.scm
1 ;;;; string-fun.scm --- string manipulation functions
2 ;;;;
3 ;;;; Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
4 ;;;;
5 ;;;; This program is free software; you can redistribute it and/or modify
6 ;;;; it under the terms of the GNU General Public License as published by
7 ;;;; the Free Software Foundation; either version 2, or (at your option)
8 ;;;; any later version.
9 ;;;;
10 ;;;; This program is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;;;; GNU General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU General Public License
16 ;;;; along with this software; see the file COPYING. If not, write to
17 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18 ;;;; Boston, MA 02111-1307 USA
19 ;;;;
20 ;;;; As a special exception, the Free Software Foundation gives permission
21 ;;;; for additional uses of the text contained in its release of GUILE.
22 ;;;;
23 ;;;; The exception is that, if you link the GUILE library with other files
24 ;;;; to produce an executable, this does not by itself cause the
25 ;;;; resulting executable to be covered by the GNU General Public License.
26 ;;;; Your use of that executable is in no way restricted on account of
27 ;;;; linking the GUILE library code into it.
28 ;;;;
29 ;;;; This exception does not however invalidate any other reasons why
30 ;;;; the executable file might be covered by the GNU General Public License.
31 ;;;;
32 ;;;; This exception applies only to the code released by the
33 ;;;; Free Software Foundation under the name GUILE. If you copy
34 ;;;; code from other Free Software Foundation releases into a copy of
35 ;;;; GUILE, as the General Public License permits, the exception does
36 ;;;; not apply to the code that you add in this way. To avoid misleading
37 ;;;; anyone as to the status of such modified files, you must delete
38 ;;;; this exception notice from them.
39 ;;;;
40 ;;;; If you write modifications of your own for GUILE, it is your choice
41 ;;;; whether to permit this exception to apply to your modifications.
42 ;;;; If you do not wish that, delete this exception notice.
43 ;;;;
44 \f
45 (define-module (ice-9 string-fun))
46
47 ;;;;
48 ;;;
49 ;;; Various string funcitons, particularly those that take
50 ;;; advantage of the "shared substring" capability.
51 ;;;
52 \f
53 ;;; {String Fun: Dividing Strings Into Fields}
54 ;;;
55 ;;; The names of these functions are very regular.
56 ;;; Here is a grammar of a call to one of these:
57 ;;;
58 ;;; <string-function-invocation>
59 ;;; := (<action>-<seperator-disposition>-<seperator-determination> <seperator-param> <str> <ret>)
60 ;;;
61 ;;; <str> = the string
62 ;;;
63 ;;; <ret> = The continuation. String functions generally return
64 ;;; multiple values by passing them to this procedure.
65 ;;;
66 ;;; <action> = split
67 ;;; | separate-fields
68 ;;;
69 ;;; "split" means to divide a string into two parts.
70 ;;; <ret> will be called with two arguments.
71 ;;;
72 ;;; "separate-fields" means to divide a string into as many
73 ;;; parts as possible. <ret> will be called with
74 ;;; however many fields are found.
75 ;;;
76 ;;; <seperator-disposition> = before
77 ;;; | after
78 ;;; | discarding
79 ;;;
80 ;;; "before" means to leave the seperator attached to
81 ;;; the beginning of the field to its right.
82 ;;; "after" means to leave the seperator attached to
83 ;;; the end of the field to its left.
84 ;;; "discarding" means to discard seperators.
85 ;;;
86 ;;; Other dispositions might be handy. For example, "isolate"
87 ;;; could mean to treat the separator as a field unto itself.
88 ;;;
89 ;;; <seperator-determination> = char
90 ;;; | predicate
91 ;;;
92 ;;; "char" means to use a particular character as field seperator.
93 ;;; "predicate" means to check each character using a particular predicate.
94 ;;;
95 ;;; Other determinations might be handy. For example, "character-set-member".
96 ;;;
97 ;;; <seperator-param> = A parameter that completes the meaning of the determinations.
98 ;;; For example, if the determination is "char", then this parameter
99 ;;; says which character. If it is "predicate", the parameter is the
100 ;;; predicate.
101 ;;;
102 ;;;
103 ;;; For example:
104 ;;;
105 ;;; (separate-fields-discarding-char #\, "foo, bar, baz, , bat" list)
106 ;;; => ("foo" " bar" " baz" " " " bat")
107 ;;;
108 ;;; (split-after-char #\- 'an-example-of-split list)
109 ;;; => ("an-" "example-of-split")
110 ;;;
111 ;;; As an alternative to using a determination "predicate", or to trying to do anything
112 ;;; complicated with these functions, consider using regular expressions.
113 ;;;
114
115 (define-public (split-after-char char str ret)
116 (let ((end (cond
117 ((string-index str char) => 1+)
118 (else (string-length str)))))
119 (ret (substring str 0 end)
120 (substring str end))))
121
122 (define-public (split-before-char char str ret)
123 (let ((end (or (string-index str char)
124 (string-length str))))
125 (ret (substring str 0 end)
126 (substring str end))))
127
128 (define-public (split-discarding-char char str ret)
129 (let ((end (string-index str char)))
130 (if (not end)
131 (ret str "")
132 (ret (substring str 0 end)
133 (substring str (1+ end))))))
134
135 (define-public (split-after-char-last char str ret)
136 (let ((end (cond
137 ((string-rindex str char) => 1+)
138 (else 0))))
139 (ret (substring str 0 end)
140 (substring str end))))
141
142 (define-public (split-before-char-last char str ret)
143 (let ((end (or (string-rindex str char) 0)))
144 (ret (substring str 0 end)
145 (substring str end))))
146
147 (define-public (split-discarding-char-last char str ret)
148 (let ((end (string-rindex str char)))
149 (if (not end)
150 (ret str "")
151 (ret (substring str 0 end)
152 (substring str (1+ end))))))
153
154 (define-public (split-before-predicate pred str ret)
155 (let loop ((n 0))
156 (cond
157 ((= n (string-length str)) (ret str ""))
158 ((not (pred (string-ref str n))) (loop (1+ n)))
159 (else (ret (substring str 0 n)
160 (substring str n))))))
161 (define-public (split-after-predicate pred str ret)
162 (let loop ((n 0))
163 (cond
164 ((= n (string-length str)) (ret str ""))
165 ((not (pred (string-ref str n))) (loop (1+ n)))
166 (else (ret (substring str 0 (1+ n))
167 (substring str (1+ n)))))))
168
169 (define-public (split-discarding-predicate pred str ret)
170 (let loop ((n 0))
171 (cond
172 ((= n (string-length str)) (ret str ""))
173 ((not (pred (string-ref str n))) (loop (1+ n)))
174 (else (ret (substring str 0 n)
175 (substring str (1+ n)))))))
176
177 (define-public (separate-fields-discarding-char ch str ret)
178 (let loop ((fields '())
179 (str str))
180 (cond
181 ((string-rindex str ch)
182 => (lambda (w) (loop (cons (substring str (+ 1 w)) fields)
183 (substring str 0 w))))
184 (else (apply ret str fields)))))
185
186 (define-public (separate-fields-after-char ch str ret)
187 (reverse
188 (let loop ((fields '())
189 (str str))
190 (cond
191 ((string-index str ch)
192 => (lambda (w) (loop (cons (substring str 0 (+ 1 w)) fields)
193 (substring str (+ 1 w)))))
194 (else (apply ret str fields))))))
195
196 (define-public (separate-fields-before-char ch str ret)
197 (let loop ((fields '())
198 (str str))
199 (cond
200 ((string-rindex str ch)
201 => (lambda (w) (loop (cons (substring str w) fields)
202 (substring str 0 w))))
203 (else (apply ret str fields)))))
204
205 \f
206 ;;; {String Fun: String Prefix Predicates}
207 ;;;
208 ;;; Very simple:
209 ;;;
210 ;;; (define-public ((string-prefix-predicate pred?) prefix str)
211 ;;; (and (<= (string-length prefix) (string-length str))
212 ;;; (pred? prefix (substring str 0 (string-length prefix)))))
213 ;;;
214 ;;; (define-public string-prefix=? (string-prefix-predicate string=?))
215 ;;;
216
217 (define-public ((string-prefix-predicate pred?) prefix str)
218 (and (<= (string-length prefix) (string-length str))
219 (pred? prefix (substring str 0 (string-length prefix)))))
220
221 (define-public string-prefix=? (string-prefix-predicate string=?))
222
223 \f
224 ;;; {String Fun: Strippers}
225 ;;;
226 ;;; <stripper> = sans-<removable-part>
227 ;;;
228 ;;; <removable-part> = surrounding-whitespace
229 ;;; | trailing-whitespace
230 ;;; | leading-whitespace
231 ;;; | final-newline
232 ;;;
233
234 (define-public (sans-surrounding-whitespace s)
235 (let ((st 0)
236 (end (string-length s)))
237 (while (and (< st (string-length s))
238 (char-whitespace? (string-ref s st)))
239 (set! st (1+ st)))
240 (while (and (< 0 end)
241 (char-whitespace? (string-ref s (1- end))))
242 (set! end (1- end)))
243 (if (< end st)
244 ""
245 (substring s st end))))
246
247 (define-public (sans-trailing-whitespace s)
248 (let ((st 0)
249 (end (string-length s)))
250 (while (and (< 0 end)
251 (char-whitespace? (string-ref s (1- end))))
252 (set! end (1- end)))
253 (if (< end st)
254 ""
255 (substring s st end))))
256
257 (define-public (sans-leading-whitespace s)
258 (let ((st 0)
259 (end (string-length s)))
260 (while (and (< st (string-length s))
261 (char-whitespace? (string-ref s st)))
262 (set! st (1+ st)))
263 (if (< end st)
264 ""
265 (substring s st end))))
266
267 (define-public (sans-final-newline str)
268 (cond
269 ((= 0 (string-length str))
270 str)
271
272 ((char=? #\nl (string-ref str (1- (string-length str))))
273 (substring str 0 (1- (string-length str))))
274
275 (else str)))
276 \f
277 ;;; {String Fun: has-trailing-newline?}
278 ;;;
279
280 (define-public (has-trailing-newline? str)
281 (and (< 0 (string-length str))
282 (char=? #\nl (string-ref str (1- (string-length str))))))
283
284
285 \f
286 ;;; {String Fun: with-regexp-parts}
287
288 ;;; This relies on the older, hairier regexp interface, which we don't
289 ;;; particularly want to implement, and it's not used anywhere, so
290 ;;; we're just going to drop it for now.
291 ;;; (define-public (with-regexp-parts regexp fields str return fail)
292 ;;; (let ((parts (regexec regexp str fields)))
293 ;;; (if (number? parts)
294 ;;; (fail parts)
295 ;;; (apply return parts))))
296