* lib.scm: Move module the system directives `export',
[bpt/guile.git] / ice-9 / string-fun.scm
1 ;;;; string-fun.scm --- string manipulation functions
2 ;;;;
3 ;;;; Copyright (C) 1995, 1996, 1997, 1999, 2001 Free Software Foundation, Inc.
4 ;;;;
5 ;;;; This program is free software; you can redistribute it and/or modify
6 ;;;; it under the terms of the GNU General Public License as published by
7 ;;;; the Free Software Foundation; either version 2, or (at your option)
8 ;;;; any later version.
9 ;;;;
10 ;;;; This program is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;;;; GNU General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU General Public License
16 ;;;; along with this software; see the file COPYING. If not, write to
17 ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18 ;;;; Boston, MA 02111-1307 USA
19 ;;;;
20 ;;;; As a special exception, the Free Software Foundation gives permission
21 ;;;; for additional uses of the text contained in its release of GUILE.
22 ;;;;
23 ;;;; The exception is that, if you link the GUILE library with other files
24 ;;;; to produce an executable, this does not by itself cause the
25 ;;;; resulting executable to be covered by the GNU General Public License.
26 ;;;; Your use of that executable is in no way restricted on account of
27 ;;;; linking the GUILE library code into it.
28 ;;;;
29 ;;;; This exception does not however invalidate any other reasons why
30 ;;;; the executable file might be covered by the GNU General Public License.
31 ;;;;
32 ;;;; This exception applies only to the code released by the
33 ;;;; Free Software Foundation under the name GUILE. If you copy
34 ;;;; code from other Free Software Foundation releases into a copy of
35 ;;;; GUILE, as the General Public License permits, the exception does
36 ;;;; not apply to the code that you add in this way. To avoid misleading
37 ;;;; anyone as to the status of such modified files, you must delete
38 ;;;; this exception notice from them.
39 ;;;;
40 ;;;; If you write modifications of your own for GUILE, it is your choice
41 ;;;; whether to permit this exception to apply to your modifications.
42 ;;;; If you do not wish that, delete this exception notice.
43 ;;;;
44 \f
45 (define-module (ice-9 string-fun)
46 :export (split-after-char split-before-char split-discarding-char
47 split-after-char-last split-before-char-last
48 split-discarding-char-last split-before-predicate
49 split-after-predicate split-discarding-predicate
50 separate-fields-discarding-char separate-fields-after-char
51 separate-fields-before-char string-prefix-predicate string-prefix=?
52 sans-surrounding-whitespace sans-trailing-whitespace
53 sans-leading-whitespace sans-final-newline has-trailing-newline?))
54
55 ;;;;
56 ;;;
57 ;;; Various string funcitons, particularly those that take
58 ;;; advantage of the "shared substring" capability.
59 ;;;
60 \f
61 ;;; {String Fun: Dividing Strings Into Fields}
62 ;;;
63 ;;; The names of these functions are very regular.
64 ;;; Here is a grammar of a call to one of these:
65 ;;;
66 ;;; <string-function-invocation>
67 ;;; := (<action>-<seperator-disposition>-<seperator-determination> <seperator-param> <str> <ret>)
68 ;;;
69 ;;; <str> = the string
70 ;;;
71 ;;; <ret> = The continuation. String functions generally return
72 ;;; multiple values by passing them to this procedure.
73 ;;;
74 ;;; <action> = split
75 ;;; | separate-fields
76 ;;;
77 ;;; "split" means to divide a string into two parts.
78 ;;; <ret> will be called with two arguments.
79 ;;;
80 ;;; "separate-fields" means to divide a string into as many
81 ;;; parts as possible. <ret> will be called with
82 ;;; however many fields are found.
83 ;;;
84 ;;; <seperator-disposition> = before
85 ;;; | after
86 ;;; | discarding
87 ;;;
88 ;;; "before" means to leave the seperator attached to
89 ;;; the beginning of the field to its right.
90 ;;; "after" means to leave the seperator attached to
91 ;;; the end of the field to its left.
92 ;;; "discarding" means to discard seperators.
93 ;;;
94 ;;; Other dispositions might be handy. For example, "isolate"
95 ;;; could mean to treat the separator as a field unto itself.
96 ;;;
97 ;;; <seperator-determination> = char
98 ;;; | predicate
99 ;;;
100 ;;; "char" means to use a particular character as field seperator.
101 ;;; "predicate" means to check each character using a particular predicate.
102 ;;;
103 ;;; Other determinations might be handy. For example, "character-set-member".
104 ;;;
105 ;;; <seperator-param> = A parameter that completes the meaning of the determinations.
106 ;;; For example, if the determination is "char", then this parameter
107 ;;; says which character. If it is "predicate", the parameter is the
108 ;;; predicate.
109 ;;;
110 ;;;
111 ;;; For example:
112 ;;;
113 ;;; (separate-fields-discarding-char #\, "foo, bar, baz, , bat" list)
114 ;;; => ("foo" " bar" " baz" " " " bat")
115 ;;;
116 ;;; (split-after-char #\- 'an-example-of-split list)
117 ;;; => ("an-" "example-of-split")
118 ;;;
119 ;;; As an alternative to using a determination "predicate", or to trying to do anything
120 ;;; complicated with these functions, consider using regular expressions.
121 ;;;
122
123 (define (split-after-char char str ret)
124 (let ((end (cond
125 ((string-index str char) => 1+)
126 (else (string-length str)))))
127 (ret (substring str 0 end)
128 (substring str end))))
129
130 (define (split-before-char char str ret)
131 (let ((end (or (string-index str char)
132 (string-length str))))
133 (ret (substring str 0 end)
134 (substring str end))))
135
136 (define (split-discarding-char char str ret)
137 (let ((end (string-index str char)))
138 (if (not end)
139 (ret str "")
140 (ret (substring str 0 end)
141 (substring str (1+ end))))))
142
143 (define (split-after-char-last char str ret)
144 (let ((end (cond
145 ((string-rindex str char) => 1+)
146 (else 0))))
147 (ret (substring str 0 end)
148 (substring str end))))
149
150 (define (split-before-char-last char str ret)
151 (let ((end (or (string-rindex str char) 0)))
152 (ret (substring str 0 end)
153 (substring str end))))
154
155 (define (split-discarding-char-last char str ret)
156 (let ((end (string-rindex str char)))
157 (if (not end)
158 (ret str "")
159 (ret (substring str 0 end)
160 (substring str (1+ end))))))
161
162 (define (split-before-predicate pred str ret)
163 (let loop ((n 0))
164 (cond
165 ((= n (string-length str)) (ret str ""))
166 ((not (pred (string-ref str n))) (loop (1+ n)))
167 (else (ret (substring str 0 n)
168 (substring str n))))))
169 (define (split-after-predicate pred str ret)
170 (let loop ((n 0))
171 (cond
172 ((= n (string-length str)) (ret str ""))
173 ((not (pred (string-ref str n))) (loop (1+ n)))
174 (else (ret (substring str 0 (1+ n))
175 (substring str (1+ n)))))))
176
177 (define (split-discarding-predicate pred str ret)
178 (let loop ((n 0))
179 (cond
180 ((= n (string-length str)) (ret str ""))
181 ((not (pred (string-ref str n))) (loop (1+ n)))
182 (else (ret (substring str 0 n)
183 (substring str (1+ n)))))))
184
185 (define (separate-fields-discarding-char ch str ret)
186 (let loop ((fields '())
187 (str str))
188 (cond
189 ((string-rindex str ch)
190 => (lambda (w) (loop (cons (substring str (+ 1 w)) fields)
191 (substring str 0 w))))
192 (else (apply ret str fields)))))
193
194 (define (separate-fields-after-char ch str ret)
195 (reverse
196 (let loop ((fields '())
197 (str str))
198 (cond
199 ((string-index str ch)
200 => (lambda (w) (loop (cons (substring str 0 (+ 1 w)) fields)
201 (substring str (+ 1 w)))))
202 (else (apply ret str fields))))))
203
204 (define (separate-fields-before-char ch str ret)
205 (let loop ((fields '())
206 (str str))
207 (cond
208 ((string-rindex str ch)
209 => (lambda (w) (loop (cons (substring str w) fields)
210 (substring str 0 w))))
211 (else (apply ret str fields)))))
212
213 \f
214 ;;; {String Fun: String Prefix Predicates}
215 ;;;
216 ;;; Very simple:
217 ;;;
218 ;;; (define-public ((string-prefix-predicate pred?) prefix str)
219 ;;; (and (<= (string-length prefix) (string-length str))
220 ;;; (pred? prefix (substring str 0 (string-length prefix)))))
221 ;;;
222 ;;; (define-public string-prefix=? (string-prefix-predicate string=?))
223 ;;;
224
225 (define ((string-prefix-predicate pred?) prefix str)
226 (and (<= (string-length prefix) (string-length str))
227 (pred? prefix (substring str 0 (string-length prefix)))))
228
229 (define string-prefix=? (string-prefix-predicate string=?))
230
231 \f
232 ;;; {String Fun: Strippers}
233 ;;;
234 ;;; <stripper> = sans-<removable-part>
235 ;;;
236 ;;; <removable-part> = surrounding-whitespace
237 ;;; | trailing-whitespace
238 ;;; | leading-whitespace
239 ;;; | final-newline
240 ;;;
241
242 (define (sans-surrounding-whitespace s)
243 (let ((st 0)
244 (end (string-length s)))
245 (while (and (< st (string-length s))
246 (char-whitespace? (string-ref s st)))
247 (set! st (1+ st)))
248 (while (and (< 0 end)
249 (char-whitespace? (string-ref s (1- end))))
250 (set! end (1- end)))
251 (if (< end st)
252 ""
253 (substring s st end))))
254
255 (define (sans-trailing-whitespace s)
256 (let ((st 0)
257 (end (string-length s)))
258 (while (and (< 0 end)
259 (char-whitespace? (string-ref s (1- end))))
260 (set! end (1- end)))
261 (if (< end st)
262 ""
263 (substring s st end))))
264
265 (define (sans-leading-whitespace s)
266 (let ((st 0)
267 (end (string-length s)))
268 (while (and (< st (string-length s))
269 (char-whitespace? (string-ref s st)))
270 (set! st (1+ st)))
271 (if (< end st)
272 ""
273 (substring s st end))))
274
275 (define (sans-final-newline str)
276 (cond
277 ((= 0 (string-length str))
278 str)
279
280 ((char=? #\nl (string-ref str (1- (string-length str))))
281 (substring str 0 (1- (string-length str))))
282
283 (else str)))
284 \f
285 ;;; {String Fun: has-trailing-newline?}
286 ;;;
287
288 (define (has-trailing-newline? str)
289 (and (< 0 (string-length str))
290 (char=? #\nl (string-ref str (1- (string-length str))))))
291
292
293 \f
294 ;;; {String Fun: with-regexp-parts}
295
296 ;;; This relies on the older, hairier regexp interface, which we don't
297 ;;; particularly want to implement, and it's not used anywhere, so
298 ;;; we're just going to drop it for now.
299 ;;; (define-public (with-regexp-parts regexp fields str return fail)
300 ;;; (let ((parts (regexec regexp str fields)))
301 ;;; (if (number? parts)
302 ;;; (fail parts)
303 ;;; (apply return parts))))
304