Default "meet" operator is meet-error for intmap
[bpt/guile.git] / module / scripts / read-text-outline.scm
1 ;;; read-text-outline --- Read a text outline and display it as a sexp
2
3 ;; Copyright (C) 2002, 2006, 2011 Free Software Foundation, Inc.
4 ;;
5 ;; This program is free software; you can redistribute it and/or
6 ;; modify it under the terms of the GNU Lesser General Public License
7 ;; as published by the Free Software Foundation; either version 3, or
8 ;; (at your option) any later version.
9 ;;
10 ;; This program is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU Lesser General Public
16 ;; License along with this software; see the file COPYING.LESSER. If
17 ;; not, write to the Free Software Foundation, Inc., 51 Franklin
18 ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
19
20 ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
21
22 ;;; Commentary:
23
24 ;; Usage: read-text-outline OUTLINE
25 ;;
26 ;; Scan OUTLINE file and display a list of trees, the structure of
27 ;; each reflecting the "levels" in OUTLINE. The recognized outline
28 ;; format (used to indicate outline headings) is zero or more pairs of
29 ;; leading spaces followed by "-". Something like:
30 ;;
31 ;; - a 0
32 ;; - b 1
33 ;; - c 2
34 ;; - d 1
35 ;; - e 0
36 ;; - f 1
37 ;; - g 2
38 ;; - h 1
39 ;;
40 ;; In this example the levels are shown to the right. The output for
41 ;; such a file would be the single line:
42 ;;
43 ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
44 ;;
45 ;; Basically, anything at the beginning of a list is a parent, and the
46 ;; remaining elements of that list are its children.
47 ;;
48 ;;
49 ;; Usage from a Scheme program: These two procs are exported:
50 ;;
51 ;; (read-text-outline . args) ; only first arg is used
52 ;; (read-text-outline-silently port)
53 ;; (make-text-outline-reader re specs)
54 ;;
55 ;; `make-text-outline-reader' returns a proc that reads from PORT and
56 ;; returns a list of trees (similar to `read-text-outline-silently').
57 ;;
58 ;; RE is a regular expression (string) that is used to identify a header
59 ;; line of the outline (as opposed to a whitespace line or intervening
60 ;; text). RE must begin w/ a sub-expression to match the "level prefix"
61 ;; of the line. You can use `level-submatch-number' in SPECS (explained
62 ;; below) to specify a number other than 1, the default.
63 ;;
64 ;; Normally, the level of the line is taken directly as the length of
65 ;; its level prefix. This often results in adjacent levels not mapping
66 ;; to adjacent numbers, which confuses the tree-building portion of the
67 ;; program, which expects top-level to be 0, first sub-level to be 1,
68 ;; etc. You can use `level-substring-divisor' or `compute-level' in
69 ;; SPECS to specify a constant scaling factor or specify a completely
70 ;; alternative procedure, respectively.
71 ;;
72 ;; SPECS is an alist which may contain the following key/value pairs:
73 ;;
74 ;; - level-submatch-number NUMBER
75 ;; - level-substring-divisor NUMBER
76 ;; - compute-level PROC
77 ;; - body-submatch-number NUMBER
78 ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
79 ;;
80 ;; The PROC value associated with key `compute-level' should take a
81 ;; Scheme match structure (as returned by `regexp-exec') and return a
82 ;; number, the normalized level for that line. If this is specified,
83 ;; it takes precedence over other level-computation methods.
84 ;;
85 ;; Use `body-submatch-number' if RE specifies the whole body, or if you
86 ;; want to make use of the extra fields parsing. The `extra-fields'
87 ;; value is a sub-alist, whose keys name additional fields that are to
88 ;; be recognized. These fields along with `level' are set as object
89 ;; properties of the final string ("body") that is consed into the tree.
90 ;; If a field name ends in "?" the field value is set to be #t if there
91 ;; is a match and the result is not an empty string, and #f otherwise.
92 ;;
93 ;;
94 ;; Bugs and caveats:
95 ;;
96 ;; (1) Only the first file specified on the command line is scanned.
97 ;; (2) TAB characters at the beginnings of lines are not recognized.
98 ;; (3) Outlines that "skip" levels signal an error. In other words,
99 ;; this will fail:
100 ;;
101 ;; - a 0
102 ;; - b 1
103 ;; - c 3 <-- skipped 2 -- error!
104 ;; - d 1
105 ;;
106 ;;
107 ;; TODO: Determine what's the right thing to do for skips.
108 ;; Handle TABs.
109 ;; Make line format customizable via longopts.
110
111 ;;; Code:
112
113 (define-module (scripts read-text-outline)
114 :export (read-text-outline
115 read-text-outline-silently
116 make-text-outline-reader)
117 :use-module (ice-9 regex)
118 :autoload (ice-9 rdelim) (read-line)
119 :autoload (ice-9 getopt-long) (getopt-long))
120
121 (define %include-in-guild-list #f)
122 (define %summary "Convert textual outlines to s-expressions.")
123
124 (define (?? symbol)
125 (let ((name (symbol->string symbol)))
126 (string=? "?" (substring name (1- (string-length name))))))
127
128 (define (msub n)
129 (lambda (m)
130 (match:substring m n)))
131
132 (define (??-predicates pair)
133 (cons (car pair)
134 (if (?? (car pair))
135 (lambda (m)
136 (not (string=? "" (match:substring m (cdr pair)))))
137 (msub (cdr pair)))))
138
139 (define (make-line-parser re specs)
140 (let* ((rx (let ((fc (substring re 0 1)))
141 (make-regexp (if (string=? "^" fc)
142 re
143 (string-append "^" re)))))
144 (check (lambda (key)
145 (assq-ref specs key)))
146 (level-substring (msub (or (check 'level-submatch-number) 1)))
147 (extract-level (cond ((check 'compute-level)
148 => (lambda (proc)
149 (lambda (m)
150 (proc m))))
151 ((check 'level-substring-divisor)
152 => (lambda (n)
153 (lambda (m)
154 (/ (string-length (level-substring m))
155 n))))
156 (else
157 (lambda (m)
158 (string-length (level-substring m))))))
159 (extract-body (cond ((check 'body-submatch-number)
160 => msub)
161 (else
162 (lambda (m) (match:suffix m)))))
163 (misc-props! (cond ((check 'extra-fields)
164 => (lambda (alist)
165 (let ((new (map ??-predicates alist)))
166 (lambda (obj m)
167 (for-each
168 (lambda (pair)
169 (set-object-property!
170 obj (car pair)
171 ((cdr pair) m)))
172 new)))))
173 (else
174 (lambda (obj m) #t)))))
175 ;; retval
176 (lambda (line)
177 (cond ((regexp-exec rx line)
178 => (lambda (m)
179 (let ((level (extract-level m))
180 (body (extract-body m)))
181 (set-object-property! body 'level level)
182 (misc-props! body m)
183 body)))
184 (else #f)))))
185
186 (define (make-text-outline-reader re specs)
187 (let ((parse-line (make-line-parser re specs)))
188 ;; retval
189 (lambda (port)
190 (let* ((all '(start))
191 (pchain (list))) ; parents chain
192 (let loop ((line (read-line port))
193 (prev-level -1) ; how this relates to the first input
194 ; level determines whether or not we
195 ; start in "sibling" or "child" mode.
196 ; in the end, `start' is ignored and
197 ; it's much easier to ignore parents
198 ; than siblings (sometimes). this is
199 ; not to encourage ignorance, however.
200 (tp all)) ; tail pointer
201 (or (eof-object? line)
202 (cond ((parse-line line)
203 => (lambda (w)
204 (let* ((words (list w))
205 (level (object-property w 'level))
206 (diff (- level prev-level)))
207 (cond
208
209 ;; sibling
210 ((zero? diff)
211 ;; just extend the chain
212 (set-cdr! tp words))
213
214 ;; child
215 ((positive? diff)
216 (or (= 1 diff)
217 (error "unhandled diff not 1:" diff line))
218 ;; parent may be contacted by uncle later (kids
219 ;; these days!) so save its level
220 (set-object-property! tp 'level prev-level)
221 (set! pchain (cons tp pchain))
222 ;; "push down" car into hierarchy
223 (set-car! tp (cons (car tp) words)))
224
225 ;; uncle
226 ((negative? diff)
227 ;; prune back to where levels match
228 (do ((p pchain (cdr p)))
229 ((= level (object-property (car p) 'level))
230 (set! pchain p)))
231 ;; resume at this level
232 (set-cdr! (car pchain) words)
233 (set! pchain (cdr pchain))))
234
235 (loop (read-line port) level words))))
236 (else (loop (read-line port) prev-level tp)))))
237 (set! all (car all))
238 (if (eq? 'start all)
239 '() ; wasteland
240 (cdr all))))))
241
242 (define read-text-outline-silently
243 (make-text-outline-reader "(([ ][ ])*)- *"
244 '((level-substring-divisor . 2))))
245
246 (define (read-text-outline . args)
247 (write (read-text-outline-silently (open-file (car args) "r")))
248 (newline)
249 #t) ; exit val
250
251 (define main read-text-outline)
252
253 ;;; read-text-outline ends here