1 ;;;; (texinfo string-utils) -- text filling and wrapping
3 ;;;; Copyright (C) 2009 Free Software Foundation, Inc.
4 ;;;; Copyright (C) 2003 Richard Todd
6 ;;;; This library is free software; you can redistribute it and/or
7 ;;;; modify it under the terms of the GNU Lesser General Public
8 ;;;; License as published by the Free Software Foundation; either
9 ;;;; version 3 of the License, or (at your option) any later version.
11 ;;;; This library is distributed in the hope that it will be useful,
12 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;;;; Lesser General Public License for more details.
16 ;;;; You should have received a copy of the GNU Lesser General Public
17 ;;;; License along with this library; if not, write to the Free Software
18 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;; Module @samp{(texinfo string-utils)} provides various string-related
23 ;; functions useful to Guile's texinfo support.
26 (define-module (texinfo string-utils)
27 #:use-module (srfi srfi-13)
28 #:use-module (srfi srfi-14)
29 #:use-module (oop goops)
30 #:export (escape-special-chars
36 collapse-repeated-chars
39 string->wrapped-lines))
41 (define* (transform-string str match? replace #:optional (start #f) (end #f))
42 "Uses @var{match?} against each character in @var{str}, and performs a
43 replacement on each character for which matches are found.
45 @var{match?} may either be a function, a character, a string, or
46 @code{#t}. If @var{match?} is a function, then it takes a single
47 character as input, and should return @samp{#t} for matches.
48 @var{match?} is a character, it is compared to each string character
49 using @code{char=?}. If @var{match?} is a string, then any character
50 in that string will be considered a match. @code{#t} will cause
51 every character to be a match.
53 If @var{replace} is a function, it is called with the matched
54 character as an argument, and the returned value is sent to the output
55 string via @samp{display}. If @var{replace} is anything else, it is
56 sent through the output string via @samp{display}.
58 Note that te replacement for the matched characters does not need to
59 be a single character. That is what differentiates this function from
60 @samp{string-map}, and what makes it useful for applications such as
61 converting @samp{#\\&} to @samp{\"&\"} in web page text. Some other
62 functions in this module are just wrappers around common uses of
63 @samp{transform-string}. Transformations not possible with this
64 function should probably be done with regular expressions.
66 If @var{start} and @var{end} are given, they control which portion
67 of the string undergoes transformation. The entire input string
68 is still output, though. So, if @var{start} is @samp{5}, then the
69 first five characters of @var{str} will still appear in the returned
73 ; these two are equivalent...
74 (transform-string str #\\space #\\-) ; change all spaces to -'s
75 (transform-string str (lambda (c) (char=? #\\space c)) #\\-)
77 ;; I had implemented this with string-fold, but it was
79 (let* ((os (open-output-string))
80 (matcher (cond ((char? match?)
81 (lambda (c) (char=? match? c)))
85 (lambda (c) (string-index match? c)))
88 (else (throw 'bad-type "expected #t, char, string, or procedure"))))
89 (replacer (if (procedure? replace)
90 (lambda (c) (display (replace c) os))
91 (lambda (c) (display replace os)))))
93 ;; put the first part in, un-transformed if they asked for it...
94 (if (and start (<= start (string-length str)))
95 (display (substring str 0 start) os))
97 ;; process the portion they want processed....
101 ;; we have a match! replace the char as directed...
104 ;; not a match, just insert the character itself...
108 (or end (string-length str)))
110 ;; if there was any at the end, tack it on...
111 (if (and end (< end (string-length str)))
112 (display (substring str end) os))
114 (get-output-string os)))
116 (define* (expand-tabs str #:optional (tab-size 8))
117 "Returns a copy of @var{str} with all tabs expanded to spaces. @var{tab-size} defaults to 8.
119 Assuming tab size of 8, this is equivalent to: @lisp
120 (transform-string str #\\tab \" \")
122 (transform-string str
124 (make-string tab-size #\space)))
126 (define (escape-special-chars str special-chars escape-char)
127 "Returns a copy of @var{str} with all given special characters preceded
128 by the given @var{escape-char}.
130 @var{special-chars} can either be a single character, or a string consisting
131 of all the special characters.
134 ;; make a string regexp-safe...
135 (escape-special-chars \"***(Example String)***\"
138 => \"\\\\*\\\\*\\\\*\\\\(Example String\\\\)\\\\*\\\\*\\\\*\"
140 ;; also can escape a singe char...
141 (escape-special-chars \"richardt@@vzavenue.net\"
144 => \"richardt@@@@vzavenue.net\"
146 (transform-string str
147 (if (char? special-chars)
148 ;; if they gave us a char, use char=?
149 (lambda (c) (char=? c special-chars))
151 ;; if they gave us a string, see if our character is in it
152 (lambda (c) (string-index special-chars c)))
154 ;; replace matches with the character preceded by the escape character
155 (lambda (c) (string escape-char c))))
157 (define* (center-string str #:optional (width 80) (chr #\space) (rchr #f))
158 "Returns a copy of @var{str} centered in a field of @var{width}
159 characters. Any needed padding is done by character @var{chr}, which
160 defaults to @samp{#\\space}. If @var{rchr} is provided, then the
161 padding to the right will use it instead. See the examples below.
162 left and @var{rchr} on the right. The default @var{width} is 80. The
163 default @var{chr} and @var{rchr} is @samp{#\\space}. The string is
166 (center-string \"Richard Todd\" 24)
167 => \" Richard Todd \"
169 (center-string \" Richard Todd \" 24 #\\=)
170 => \"===== Richard Todd =====\"
172 (center-string \" Richard Todd \" 24 #\\< #\\>)
173 => \"<<<<< Richard Todd >>>>>\"
175 (let* ((len (string-length str))
176 (lpad (make-string (max (quotient (- width len) 2) 0) chr))
177 ;; right-char == char unless it has been provided by the user
178 (right-chr (or rchr chr))
179 (rpad (if (char=? right-chr chr)
181 (make-string (max (quotient (- width len) 2) 0) right-chr))))
184 (string-append lpad str rpad (if (odd? (- width len)) (string right-chr) "")))))
186 (define* (left-justify-string str #:optional (width 80) (chr #\space))
187 "@code{left-justify-string str [width chr]}.
188 Returns a copy of @var{str} padded with @var{chr} such that it is left
189 justified in a field of @var{width} characters. The default
190 @var{width} is 80. Unlike @samp{string-pad} from srfi-13, the string
192 (let* ((len (string-length str))
193 (pad (make-string (max (- width len) 0) chr)))
196 (string-append str pad))))
198 (define* (right-justify-string str #:optional (width 80) (chr #\space))
199 "Returns a copy of @var{str} padded with @var{chr} such that it is
200 right justified in a field of @var{width} characters. The default
201 @var{width} is 80. The default @var{chr} is @samp{#\\space}. Unlike
202 @samp{string-pad} from srfi-13, the string is never truncated."
203 (let* ((len (string-length str))
204 (pad (make-string (max (- width len) 0) chr)))
207 (string-append pad str))))
209 (define* (collapse-repeated-chars str #:optional (chr #\space) (num 1))
210 "Returns a copy of @var{str} with all repeated instances of
211 @var{chr} collapsed down to at most @var{num} instances.
212 The default value for @var{chr} is @samp{#\\space}, and
213 the default value for @var{num} is 1.
216 (collapse-repeated-chars \"H e l l o\")
218 (collapse-repeated-chars \"H--e--l--l--o\" #\\-)
220 (collapse-repeated-chars \"H-e--l---l----o\" #\\- 2)
223 ;; define repeat-locator as a stateful match? function which remembers
224 ;; the last character it had seen.
225 (let ((repeat-locator
226 ;; initialize prev-chr to something other than what we're seeking...
227 (let ((prev-chr (if (char=? chr #\space) #\A #\space))
230 (if (and (char=? c prev-chr)
231 (char=? prev-chr chr))
232 ;; found enough duplicates if the match-count is high enough
234 (set! match-count (+ 1 match-count))
235 (>= match-count num))
237 ;; did not find a duplicate
238 (begin (set! match-count 0)
242 ;; transform the string with our stateful matcher...
243 ;; deleting matches...
244 (transform-string str repeat-locator "")))
246 ;; split a text string into segments that have the form...
247 ;; <ws non-ws> <ws non-ws> etc..
248 (define (split-by-single-words str)
249 (let ((non-wschars (char-set-complement char-set:whitespace)))
252 (let ((next-non-ws (string-index str non-wschars index)))
254 ;; found non-ws...look for ws following...
255 (let ((next-ws (string-index str char-set:whitespace next-non-ws)))
257 ;; found the ws following...
258 (loop (cons (substring str index next-ws) ans)
260 ;; did not find ws...must be the end...
261 (reverse (cons (substring str index) ans))))
262 ;; did not find non-ws... only ws at end of the string...
265 (define* (make-text-wrapper #:key
269 (collapse-whitespace? #t)
270 (subsequent-indent "")
272 (break-long-words? #t))
273 "Returns a procedure that will split a string into lines according to the
278 This is the target length used when deciding where to wrap lines.
282 Boolean describing whether tabs in the input should be expanded. Default
286 If tabs are expanded, this will be the number of spaces to which they
287 expand. Default is 8.
289 @item #:collapse-whitespace?
290 Boolean describing whether the whitespace inside the existing text
291 should be removed or not. Default is #t.
293 If text is already well-formatted, and is just being wrapped to fit in a
294 different width, then set this to @samp{#f}. This way, many common text
295 conventions (such as two spaces between sentences) can be preserved if
296 in the original text. If the input text spacing cannot be trusted, then
297 leave this setting at the default, and all repeated whitespace will be
298 collapsed down to a single space.
300 @item #:initial-indent
301 Defines a string that will be put in front of the first line of wrapped
302 text. Default is the empty string, ``''.
304 @item #:subsequent-indent
305 Defines a string that will be put in front of all lines of wrapped
306 text, except the first one. Default is the empty string, ``''.
308 @item #:break-long-words?
309 If a single word is too big to fit on a line, this setting tells the
310 wrapper what to do. Defaults to #t, which will break up long words.
311 When set to #f, the line will be allowed, even though it is longer
312 than the defined @code{#:line-width}.
315 The return value is a procedure of one argument, the input string, which
316 returns a list of strings, where each element of the list is one line."
318 ;; replace newlines with spaces
319 (set! str (transform-string str (lambda (c) (char=? c #\nl)) #\space))
321 ;; expand tabs if they wanted us to...
323 (set! str (expand-tabs str tab-width)))
325 ;; collapse whitespace if they wanted us to...
326 (if collapse-whitespace?
327 (set! str (collapse-repeated-chars str)))
329 ;; drop any whitespace from the front...
330 (set! str (string-trim str))
332 ;; now start breaking the text into lines...
334 (words (split-by-single-words str))
335 (line initial-indent)
338 ;; out of words? ...done!
339 (reverse (if (> count 0)
343 ;; not out of words...keep going...
344 (let ((length-left (- line-width
345 (string-length line)))
346 (next-word (if (= count 0)
347 (string-trim (car words))
350 ;; does the next entry fit?
351 ((<= (string-length next-word)
355 (string-append line next-word)
358 ;; ok, it didn't fit...is there already at least one word on the line?
360 ;; try to use it for the next line, then...
361 (loop (cons line ans)
366 ;; ok, it didn't fit...and it's the first word.
367 ;; were we told to break up long words?
369 ;; break the like at the limit, since the user wants us to...
370 (loop (cons (string-append line (substring next-word 0 length-left))
372 (cons (substring next-word length-left)
377 ;; well, then is it the first word and we *shouldn't* break long words, then...
379 (loop (cons (string-append line next-word)
385 (define (string->wrapped-lines str . kwargs)
386 "@code{string->wrapped-lines str keywds ...}. Wraps the text given in
387 string @var{str} according to the parameters provided in @var{keywds},
388 or the default setting if they are not given. Returns a list of strings
389 representing the formatted lines. Valid keyword arguments are discussed
390 in @code{make-text-wrapper}."
391 ((apply make-text-wrapper kwargs) str))
393 (define (fill-string str . kwargs)
394 "Wraps the text given in string @var{str} according to the parameters
395 provided in @var{kwargs}, or the default setting if they are not
396 given. Returns a single string with the wrapped text. Valid keyword
397 arguments are discussed in @code{make-text-wrapper}."
398 (string-join (apply string->wrapped-lines str kwargs)