Commit | Line | Data |
---|---|---|
773ea2fb TTN |
1 | #!/bin/sh |
2 | # aside from this initial boilerplate, this is actually -*- scheme -*- code | |
3 | main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')' | |
4 | exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" | |
5 | !# | |
6 | ;;; read-text-outline --- Read a text outline and display it as a sexp | |
7 | ||
6e7d5622 | 8 | ;; Copyright (C) 2002, 2006 Free Software Foundation, Inc. |
773ea2fb TTN |
9 | ;; |
10 | ;; This program is free software; you can redistribute it and/or | |
11 | ;; modify it under the terms of the GNU General Public License as | |
12 | ;; published by the Free Software Foundation; either version 2, or | |
13 | ;; (at your option) any later version. | |
14 | ;; | |
15 | ;; This program is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | ;; General Public License for more details. | |
19 | ;; | |
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with this software; see the file COPYING. If not, write to | |
92205699 MV |
22 | ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 | ;; Boston, MA 02110-1301 USA | |
773ea2fb TTN |
24 | |
25 | ;;; Author: Thien-Thi Nguyen <ttn@gnu.org> | |
26 | ||
27 | ;;; Commentary: | |
28 | ||
29 | ;; Usage: read-text-outline OUTLINE | |
30 | ;; | |
31 | ;; Scan OUTLINE file and display a list of trees, the structure of | |
32 | ;; each reflecting the "levels" in OUTLINE. The recognized outline | |
33 | ;; format (used to indicate outline headings) is zero or more pairs of | |
088b5285 | 34 | ;; leading spaces followed by "-". Something like: |
773ea2fb TTN |
35 | ;; |
36 | ;; - a 0 | |
37 | ;; - b 1 | |
38 | ;; - c 2 | |
39 | ;; - d 1 | |
40 | ;; - e 0 | |
41 | ;; - f 1 | |
42 | ;; - g 2 | |
088b5285 | 43 | ;; - h 1 |
773ea2fb TTN |
44 | ;; |
45 | ;; In this example the levels are shown to the right. The output for | |
46 | ;; such a file would be the single line: | |
47 | ;; | |
48 | ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h")) | |
49 | ;; | |
088b5285 TTN |
50 | ;; Basically, anything at the beginning of a list is a parent, and the |
51 | ;; remaining elements of that list are its children. | |
773ea2fb | 52 | ;; |
088b5285 TTN |
53 | ;; |
54 | ;; Usage from a Scheme program: These two procs are exported: | |
773ea2fb TTN |
55 | ;; |
56 | ;; (read-text-outline . args) ; only first arg is used | |
57 | ;; (read-text-outline-silently port) | |
088b5285 TTN |
58 | ;; (make-text-outline-reader re specs) |
59 | ;; | |
60 | ;; `make-text-outline-reader' returns a proc that reads from PORT and | |
61 | ;; returns a list of trees (similar to `read-text-outline-silently'). | |
62 | ;; | |
63 | ;; RE is a regular expression (string) that is used to identify a header | |
64 | ;; line of the outline (as opposed to a whitespace line or intervening | |
65 | ;; text). RE must begin w/ a sub-expression to match the "level prefix" | |
66 | ;; of the line. You can use `level-submatch-number' in SPECS (explained | |
67 | ;; below) to specify a number other than 1, the default. | |
68 | ;; | |
69 | ;; Normally, the level of the line is taken directly as the length of | |
70 | ;; its level prefix. This often results in adjacent levels not mapping | |
71 | ;; to adjacent numbers, which confuses the tree-building portion of the | |
72 | ;; program, which expects top-level to be 0, first sub-level to be 1, | |
73 | ;; etc. You can use `level-substring-divisor' or `compute-level' in | |
74 | ;; SPECS to specify a constant scaling factor or specify a completely | |
75 | ;; alternative procedure, respectively. | |
76 | ;; | |
77 | ;; SPECS is an alist which may contain the following key/value pairs: | |
78 | ;; | |
79 | ;; - level-submatch-number NUMBER | |
80 | ;; - level-substring-divisor NUMBER | |
81 | ;; - compute-level PROC | |
82 | ;; - body-submatch-number NUMBER | |
83 | ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...) | |
84 | ;; | |
85 | ;; The PROC value associated with key `compute-level' should take a | |
86 | ;; Scheme match structure (as returned by `regexp-exec') and return a | |
87 | ;; number, the normalized level for that line. If this is specified, | |
88 | ;; it takes precedence over other level-computation methods. | |
89 | ;; | |
90 | ;; Use `body-submatch-number' if RE specifies the whole body, or if you | |
91 | ;; want to make use of the extra fields parsing. The `extra-fields' | |
92 | ;; value is a sub-alist, whose keys name additional fields that are to | |
93 | ;; be recognized. These fields along with `level' are set as object | |
94 | ;; properties of the final string ("body") that is consed into the tree. | |
95 | ;; If a field name ends in "?" the field value is set to be #t if there | |
96 | ;; is a match and the result is not an empty string, and #f otherwise. | |
773ea2fb TTN |
97 | ;; |
98 | ;; | |
99 | ;; Bugs and caveats: | |
100 | ;; | |
101 | ;; (1) Only the first file specified on the command line is scanned. | |
102 | ;; (2) TAB characters at the beginnings of lines are not recognized. | |
103 | ;; (3) Outlines that "skip" levels signal an error. In other words, | |
104 | ;; this will fail: | |
105 | ;; | |
106 | ;; - a 0 | |
107 | ;; - b 1 | |
108 | ;; - c 3 <-- skipped 2 -- error! | |
109 | ;; - d 1 | |
110 | ;; | |
111 | ;; | |
112 | ;; TODO: Determine what's the right thing to do for skips. | |
113 | ;; Handle TABs. | |
088b5285 | 114 | ;; Make line format customizable via longopts. |
773ea2fb TTN |
115 | |
116 | ;;; Code: | |
117 | ||
118 | (define-module (scripts read-text-outline) | |
088b5285 TTN |
119 | :export (read-text-outline |
120 | read-text-outline-silently | |
121 | make-text-outline-reader) | |
773ea2fb | 122 | :use-module (ice-9 regex) |
088b5285 TTN |
123 | :autoload (ice-9 rdelim) (read-line) |
124 | :autoload (ice-9 getopt-long) (getopt-long)) | |
125 | ||
126 | (define (?? symbol) | |
127 | (let ((name (symbol->string symbol))) | |
128 | (string=? "?" (substring name (1- (string-length name)))))) | |
129 | ||
130 | (define (msub n) | |
131 | (lambda (m) | |
132 | (match:substring m n))) | |
133 | ||
134 | (define (??-predicates pair) | |
135 | (cons (car pair) | |
136 | (if (?? (car pair)) | |
137 | (lambda (m) | |
138 | (not (string=? "" (match:substring m (cdr pair))))) | |
139 | (msub (cdr pair))))) | |
140 | ||
141 | (define (make-line-parser re specs) | |
142 | (let* ((rx (let ((fc (substring re 0 1))) | |
143 | (make-regexp (if (string=? "^" fc) | |
144 | re | |
145 | (string-append "^" re))))) | |
146 | (check (lambda (key) | |
147 | (assq-ref specs key))) | |
148 | (level-substring (msub (or (check 'level-submatch-number) 1))) | |
149 | (extract-level (cond ((check 'compute-level) | |
150 | => (lambda (proc) | |
151 | (lambda (m) | |
152 | (proc m)))) | |
153 | ((check 'level-substring-divisor) | |
154 | => (lambda (n) | |
155 | (lambda (m) | |
156 | (/ (string-length (level-substring m)) | |
157 | n)))) | |
158 | (else | |
159 | (lambda (m) | |
160 | (string-length (level-substring m)))))) | |
161 | (extract-body (cond ((check 'body-submatch-number) | |
162 | => msub) | |
163 | (else | |
164 | (lambda (m) (match:suffix m))))) | |
165 | (misc-props! (cond ((check 'extra-fields) | |
166 | => (lambda (alist) | |
167 | (let ((new (map ??-predicates alist))) | |
168 | (lambda (obj m) | |
169 | (for-each | |
170 | (lambda (pair) | |
171 | (set-object-property! | |
172 | obj (car pair) | |
173 | ((cdr pair) m))) | |
174 | new))))) | |
175 | (else | |
176 | (lambda (obj m) #t))))) | |
177 | ;; retval | |
178 | (lambda (line) | |
179 | (cond ((regexp-exec rx line) | |
180 | => (lambda (m) | |
181 | (let ((level (extract-level m)) | |
182 | (body (extract-body m))) | |
183 | (set-object-property! body 'level level) | |
184 | (misc-props! body m) | |
185 | body))) | |
186 | (else #f))))) | |
187 | ||
188 | (define (make-text-outline-reader re specs) | |
189 | (let ((parse-line (make-line-parser re specs))) | |
190 | ;; retval | |
191 | (lambda (port) | |
192 | (let* ((all '(start)) | |
193 | (pchain (list))) ; parents chain | |
194 | (let loop ((line (read-line port)) | |
195 | (prev-level -1) ; how this relates to the first input | |
04ab3b74 TTN |
196 | ; level determines whether or not we |
197 | ; start in "sibling" or "child" mode. | |
198 | ; in the end, `start' is ignored and | |
199 | ; it's much easier to ignore parents | |
200 | ; than siblings (sometimes). this is | |
201 | ; not to encourage ignorance, however. | |
088b5285 TTN |
202 | (tp all)) ; tail pointer |
203 | (or (eof-object? line) | |
204 | (cond ((parse-line line) | |
205 | => (lambda (w) | |
206 | (let* ((words (list w)) | |
207 | (level (object-property w 'level)) | |
208 | (diff (- level prev-level))) | |
209 | (cond | |
210 | ||
211 | ;; sibling | |
212 | ((zero? diff) | |
213 | ;; just extend the chain | |
214 | (set-cdr! tp words)) | |
215 | ||
216 | ;; child | |
217 | ((positive? diff) | |
218 | (or (= 1 diff) | |
219 | (error "unhandled diff not 1:" diff line)) | |
220 | ;; parent may be contacted by uncle later (kids | |
221 | ;; these days!) so save its level | |
222 | (set-object-property! tp 'level prev-level) | |
223 | (set! pchain (cons tp pchain)) | |
224 | ;; "push down" car into hierarchy | |
225 | (set-car! tp (cons (car tp) words))) | |
226 | ||
227 | ;; uncle | |
228 | ((negative? diff) | |
229 | ;; prune back to where levels match | |
230 | (do ((p pchain (cdr p))) | |
231 | ((= level (object-property (car p) 'level)) | |
232 | (set! pchain p))) | |
233 | ;; resume at this level | |
234 | (set-cdr! (car pchain) words) | |
235 | (set! pchain (cdr pchain)))) | |
236 | ||
237 | (loop (read-line port) level words)))) | |
238 | (else (loop (read-line port) prev-level tp))))) | |
239 | (set! all (car all)) | |
240 | (if (eq? 'start all) | |
241 | '() ; wasteland | |
242 | (cdr all)))))) | |
243 | ||
244 | (define read-text-outline-silently | |
245 | (make-text-outline-reader "(([ ][ ])*)- *" | |
246 | '((level-substring-divisor . 2)))) | |
773ea2fb TTN |
247 | |
248 | (define (read-text-outline . args) | |
088b5285 TTN |
249 | (write (read-text-outline-silently (open-file (car args) "r"))) |
250 | (newline) | |
773ea2fb TTN |
251 | #t) ; exit val |
252 | ||
253 | (define main read-text-outline) | |
254 | ||
255 | ;;; read-text-outline ends here |