module-{ref,define}-submodule use the submodules table
[bpt/guile.git] / module / scripts / read-text-outline.scm
CommitLineData
773ea2fb
TTN
1;;; read-text-outline --- Read a text outline and display it as a sexp
2
6e7d5622 3;; Copyright (C) 2002, 2006 Free Software Foundation, Inc.
773ea2fb
TTN
4;;
5;; This program is free software; you can redistribute it and/or
83ba2d37
NJ
6;; modify it under the terms of the GNU Lesser General Public License
7;; as published by the Free Software Foundation; either version 3, or
773ea2fb
TTN
8;; (at your option) any later version.
9;;
10;; This program is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
83ba2d37 13;; Lesser General Public License for more details.
773ea2fb 14;;
83ba2d37
NJ
15;; You should have received a copy of the GNU Lesser General Public
16;; License along with this software; see the file COPYING.LESSER. If
17;; not, write to the Free Software Foundation, Inc., 51 Franklin
18;; Street, Fifth Floor, Boston, MA 02110-1301 USA
773ea2fb
TTN
19
20;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
21
22;;; Commentary:
23
24;; Usage: read-text-outline OUTLINE
25;;
26;; Scan OUTLINE file and display a list of trees, the structure of
27;; each reflecting the "levels" in OUTLINE. The recognized outline
28;; format (used to indicate outline headings) is zero or more pairs of
088b5285 29;; leading spaces followed by "-". Something like:
773ea2fb
TTN
30;;
31;; - a 0
32;; - b 1
33;; - c 2
34;; - d 1
35;; - e 0
36;; - f 1
37;; - g 2
088b5285 38;; - h 1
773ea2fb
TTN
39;;
40;; In this example the levels are shown to the right. The output for
41;; such a file would be the single line:
42;;
43;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
44;;
088b5285
TTN
45;; Basically, anything at the beginning of a list is a parent, and the
46;; remaining elements of that list are its children.
773ea2fb 47;;
088b5285
TTN
48;;
49;; Usage from a Scheme program: These two procs are exported:
773ea2fb
TTN
50;;
51;; (read-text-outline . args) ; only first arg is used
52;; (read-text-outline-silently port)
088b5285
TTN
53;; (make-text-outline-reader re specs)
54;;
55;; `make-text-outline-reader' returns a proc that reads from PORT and
56;; returns a list of trees (similar to `read-text-outline-silently').
57;;
58;; RE is a regular expression (string) that is used to identify a header
59;; line of the outline (as opposed to a whitespace line or intervening
60;; text). RE must begin w/ a sub-expression to match the "level prefix"
61;; of the line. You can use `level-submatch-number' in SPECS (explained
62;; below) to specify a number other than 1, the default.
63;;
64;; Normally, the level of the line is taken directly as the length of
65;; its level prefix. This often results in adjacent levels not mapping
66;; to adjacent numbers, which confuses the tree-building portion of the
67;; program, which expects top-level to be 0, first sub-level to be 1,
68;; etc. You can use `level-substring-divisor' or `compute-level' in
69;; SPECS to specify a constant scaling factor or specify a completely
70;; alternative procedure, respectively.
71;;
72;; SPECS is an alist which may contain the following key/value pairs:
73;;
74;; - level-submatch-number NUMBER
75;; - level-substring-divisor NUMBER
76;; - compute-level PROC
77;; - body-submatch-number NUMBER
78;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
79;;
80;; The PROC value associated with key `compute-level' should take a
81;; Scheme match structure (as returned by `regexp-exec') and return a
82;; number, the normalized level for that line. If this is specified,
83;; it takes precedence over other level-computation methods.
84;;
85;; Use `body-submatch-number' if RE specifies the whole body, or if you
86;; want to make use of the extra fields parsing. The `extra-fields'
87;; value is a sub-alist, whose keys name additional fields that are to
88;; be recognized. These fields along with `level' are set as object
89;; properties of the final string ("body") that is consed into the tree.
90;; If a field name ends in "?" the field value is set to be #t if there
91;; is a match and the result is not an empty string, and #f otherwise.
773ea2fb
TTN
92;;
93;;
94;; Bugs and caveats:
95;;
96;; (1) Only the first file specified on the command line is scanned.
97;; (2) TAB characters at the beginnings of lines are not recognized.
98;; (3) Outlines that "skip" levels signal an error. In other words,
99;; this will fail:
100;;
101;; - a 0
102;; - b 1
103;; - c 3 <-- skipped 2 -- error!
104;; - d 1
105;;
106;;
107;; TODO: Determine what's the right thing to do for skips.
108;; Handle TABs.
088b5285 109;; Make line format customizable via longopts.
773ea2fb
TTN
110
111;;; Code:
112
113(define-module (scripts read-text-outline)
088b5285
TTN
114 :export (read-text-outline
115 read-text-outline-silently
116 make-text-outline-reader)
773ea2fb 117 :use-module (ice-9 regex)
088b5285
TTN
118 :autoload (ice-9 rdelim) (read-line)
119 :autoload (ice-9 getopt-long) (getopt-long))
120
121(define (?? symbol)
122 (let ((name (symbol->string symbol)))
123 (string=? "?" (substring name (1- (string-length name))))))
124
125(define (msub n)
126 (lambda (m)
127 (match:substring m n)))
128
129(define (??-predicates pair)
130 (cons (car pair)
131 (if (?? (car pair))
132 (lambda (m)
133 (not (string=? "" (match:substring m (cdr pair)))))
134 (msub (cdr pair)))))
135
136(define (make-line-parser re specs)
137 (let* ((rx (let ((fc (substring re 0 1)))
138 (make-regexp (if (string=? "^" fc)
139 re
140 (string-append "^" re)))))
141 (check (lambda (key)
142 (assq-ref specs key)))
143 (level-substring (msub (or (check 'level-submatch-number) 1)))
144 (extract-level (cond ((check 'compute-level)
145 => (lambda (proc)
146 (lambda (m)
147 (proc m))))
148 ((check 'level-substring-divisor)
149 => (lambda (n)
150 (lambda (m)
151 (/ (string-length (level-substring m))
152 n))))
153 (else
154 (lambda (m)
155 (string-length (level-substring m))))))
156 (extract-body (cond ((check 'body-submatch-number)
157 => msub)
158 (else
159 (lambda (m) (match:suffix m)))))
160 (misc-props! (cond ((check 'extra-fields)
161 => (lambda (alist)
162 (let ((new (map ??-predicates alist)))
163 (lambda (obj m)
164 (for-each
165 (lambda (pair)
166 (set-object-property!
167 obj (car pair)
168 ((cdr pair) m)))
169 new)))))
170 (else
171 (lambda (obj m) #t)))))
172 ;; retval
173 (lambda (line)
174 (cond ((regexp-exec rx line)
175 => (lambda (m)
176 (let ((level (extract-level m))
177 (body (extract-body m)))
178 (set-object-property! body 'level level)
179 (misc-props! body m)
180 body)))
181 (else #f)))))
182
183(define (make-text-outline-reader re specs)
184 (let ((parse-line (make-line-parser re specs)))
185 ;; retval
186 (lambda (port)
187 (let* ((all '(start))
188 (pchain (list))) ; parents chain
189 (let loop ((line (read-line port))
190 (prev-level -1) ; how this relates to the first input
04ab3b74
TTN
191 ; level determines whether or not we
192 ; start in "sibling" or "child" mode.
193 ; in the end, `start' is ignored and
194 ; it's much easier to ignore parents
195 ; than siblings (sometimes). this is
196 ; not to encourage ignorance, however.
088b5285
TTN
197 (tp all)) ; tail pointer
198 (or (eof-object? line)
199 (cond ((parse-line line)
200 => (lambda (w)
201 (let* ((words (list w))
202 (level (object-property w 'level))
203 (diff (- level prev-level)))
204 (cond
205
206 ;; sibling
207 ((zero? diff)
208 ;; just extend the chain
209 (set-cdr! tp words))
210
211 ;; child
212 ((positive? diff)
213 (or (= 1 diff)
214 (error "unhandled diff not 1:" diff line))
215 ;; parent may be contacted by uncle later (kids
216 ;; these days!) so save its level
217 (set-object-property! tp 'level prev-level)
218 (set! pchain (cons tp pchain))
219 ;; "push down" car into hierarchy
220 (set-car! tp (cons (car tp) words)))
221
222 ;; uncle
223 ((negative? diff)
224 ;; prune back to where levels match
225 (do ((p pchain (cdr p)))
226 ((= level (object-property (car p) 'level))
227 (set! pchain p)))
228 ;; resume at this level
229 (set-cdr! (car pchain) words)
230 (set! pchain (cdr pchain))))
231
232 (loop (read-line port) level words))))
233 (else (loop (read-line port) prev-level tp)))))
234 (set! all (car all))
235 (if (eq? 'start all)
236 '() ; wasteland
237 (cdr all))))))
238
239(define read-text-outline-silently
240 (make-text-outline-reader "(([ ][ ])*)- *"
241 '((level-substring-divisor . 2))))
773ea2fb
TTN
242
243(define (read-text-outline . args)
088b5285
TTN
244 (write (read-text-outline-silently (open-file (car args) "r")))
245 (newline)
773ea2fb
TTN
246 #t) ; exit val
247
248(define main read-text-outline)
249
250;;; read-text-outline ends here