merge from 1.8 branch
[bpt/guile.git] / scripts / read-text-outline
CommitLineData
773ea2fb
TTN
1#!/bin/sh
2# aside from this initial boilerplate, this is actually -*- scheme -*- code
3main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')'
4exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
5!#
6;;; read-text-outline --- Read a text outline and display it as a sexp
7
6e7d5622 8;; Copyright (C) 2002, 2006 Free Software Foundation, Inc.
773ea2fb
TTN
9;;
10;; This program is free software; you can redistribute it and/or
11;; modify it under the terms of the GNU General Public License as
12;; published by the Free Software Foundation; either version 2, or
13;; (at your option) any later version.
14;;
15;; This program is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18;; General Public License for more details.
19;;
20;; You should have received a copy of the GNU General Public License
21;; along with this software; see the file COPYING. If not, write to
92205699
MV
22;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301 USA
773ea2fb
TTN
24
25;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
26
27;;; Commentary:
28
29;; Usage: read-text-outline OUTLINE
30;;
31;; Scan OUTLINE file and display a list of trees, the structure of
32;; each reflecting the "levels" in OUTLINE. The recognized outline
33;; format (used to indicate outline headings) is zero or more pairs of
088b5285 34;; leading spaces followed by "-". Something like:
773ea2fb
TTN
35;;
36;; - a 0
37;; - b 1
38;; - c 2
39;; - d 1
40;; - e 0
41;; - f 1
42;; - g 2
088b5285 43;; - h 1
773ea2fb
TTN
44;;
45;; In this example the levels are shown to the right. The output for
46;; such a file would be the single line:
47;;
48;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
49;;
088b5285
TTN
50;; Basically, anything at the beginning of a list is a parent, and the
51;; remaining elements of that list are its children.
773ea2fb 52;;
088b5285
TTN
53;;
54;; Usage from a Scheme program: These two procs are exported:
773ea2fb
TTN
55;;
56;; (read-text-outline . args) ; only first arg is used
57;; (read-text-outline-silently port)
088b5285
TTN
58;; (make-text-outline-reader re specs)
59;;
60;; `make-text-outline-reader' returns a proc that reads from PORT and
61;; returns a list of trees (similar to `read-text-outline-silently').
62;;
63;; RE is a regular expression (string) that is used to identify a header
64;; line of the outline (as opposed to a whitespace line or intervening
65;; text). RE must begin w/ a sub-expression to match the "level prefix"
66;; of the line. You can use `level-submatch-number' in SPECS (explained
67;; below) to specify a number other than 1, the default.
68;;
69;; Normally, the level of the line is taken directly as the length of
70;; its level prefix. This often results in adjacent levels not mapping
71;; to adjacent numbers, which confuses the tree-building portion of the
72;; program, which expects top-level to be 0, first sub-level to be 1,
73;; etc. You can use `level-substring-divisor' or `compute-level' in
74;; SPECS to specify a constant scaling factor or specify a completely
75;; alternative procedure, respectively.
76;;
77;; SPECS is an alist which may contain the following key/value pairs:
78;;
79;; - level-submatch-number NUMBER
80;; - level-substring-divisor NUMBER
81;; - compute-level PROC
82;; - body-submatch-number NUMBER
83;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
84;;
85;; The PROC value associated with key `compute-level' should take a
86;; Scheme match structure (as returned by `regexp-exec') and return a
87;; number, the normalized level for that line. If this is specified,
88;; it takes precedence over other level-computation methods.
89;;
90;; Use `body-submatch-number' if RE specifies the whole body, or if you
91;; want to make use of the extra fields parsing. The `extra-fields'
92;; value is a sub-alist, whose keys name additional fields that are to
93;; be recognized. These fields along with `level' are set as object
94;; properties of the final string ("body") that is consed into the tree.
95;; If a field name ends in "?" the field value is set to be #t if there
96;; is a match and the result is not an empty string, and #f otherwise.
773ea2fb
TTN
97;;
98;;
99;; Bugs and caveats:
100;;
101;; (1) Only the first file specified on the command line is scanned.
102;; (2) TAB characters at the beginnings of lines are not recognized.
103;; (3) Outlines that "skip" levels signal an error. In other words,
104;; this will fail:
105;;
106;; - a 0
107;; - b 1
108;; - c 3 <-- skipped 2 -- error!
109;; - d 1
110;;
111;;
112;; TODO: Determine what's the right thing to do for skips.
113;; Handle TABs.
088b5285 114;; Make line format customizable via longopts.
773ea2fb
TTN
115
116;;; Code:
117
118(define-module (scripts read-text-outline)
088b5285
TTN
119 :export (read-text-outline
120 read-text-outline-silently
121 make-text-outline-reader)
773ea2fb 122 :use-module (ice-9 regex)
088b5285
TTN
123 :autoload (ice-9 rdelim) (read-line)
124 :autoload (ice-9 getopt-long) (getopt-long))
125
126(define (?? symbol)
127 (let ((name (symbol->string symbol)))
128 (string=? "?" (substring name (1- (string-length name))))))
129
130(define (msub n)
131 (lambda (m)
132 (match:substring m n)))
133
134(define (??-predicates pair)
135 (cons (car pair)
136 (if (?? (car pair))
137 (lambda (m)
138 (not (string=? "" (match:substring m (cdr pair)))))
139 (msub (cdr pair)))))
140
141(define (make-line-parser re specs)
142 (let* ((rx (let ((fc (substring re 0 1)))
143 (make-regexp (if (string=? "^" fc)
144 re
145 (string-append "^" re)))))
146 (check (lambda (key)
147 (assq-ref specs key)))
148 (level-substring (msub (or (check 'level-submatch-number) 1)))
149 (extract-level (cond ((check 'compute-level)
150 => (lambda (proc)
151 (lambda (m)
152 (proc m))))
153 ((check 'level-substring-divisor)
154 => (lambda (n)
155 (lambda (m)
156 (/ (string-length (level-substring m))
157 n))))
158 (else
159 (lambda (m)
160 (string-length (level-substring m))))))
161 (extract-body (cond ((check 'body-submatch-number)
162 => msub)
163 (else
164 (lambda (m) (match:suffix m)))))
165 (misc-props! (cond ((check 'extra-fields)
166 => (lambda (alist)
167 (let ((new (map ??-predicates alist)))
168 (lambda (obj m)
169 (for-each
170 (lambda (pair)
171 (set-object-property!
172 obj (car pair)
173 ((cdr pair) m)))
174 new)))))
175 (else
176 (lambda (obj m) #t)))))
177 ;; retval
178 (lambda (line)
179 (cond ((regexp-exec rx line)
180 => (lambda (m)
181 (let ((level (extract-level m))
182 (body (extract-body m)))
183 (set-object-property! body 'level level)
184 (misc-props! body m)
185 body)))
186 (else #f)))))
187
188(define (make-text-outline-reader re specs)
189 (let ((parse-line (make-line-parser re specs)))
190 ;; retval
191 (lambda (port)
192 (let* ((all '(start))
193 (pchain (list))) ; parents chain
194 (let loop ((line (read-line port))
195 (prev-level -1) ; how this relates to the first input
04ab3b74
TTN
196 ; level determines whether or not we
197 ; start in "sibling" or "child" mode.
198 ; in the end, `start' is ignored and
199 ; it's much easier to ignore parents
200 ; than siblings (sometimes). this is
201 ; not to encourage ignorance, however.
088b5285
TTN
202 (tp all)) ; tail pointer
203 (or (eof-object? line)
204 (cond ((parse-line line)
205 => (lambda (w)
206 (let* ((words (list w))
207 (level (object-property w 'level))
208 (diff (- level prev-level)))
209 (cond
210
211 ;; sibling
212 ((zero? diff)
213 ;; just extend the chain
214 (set-cdr! tp words))
215
216 ;; child
217 ((positive? diff)
218 (or (= 1 diff)
219 (error "unhandled diff not 1:" diff line))
220 ;; parent may be contacted by uncle later (kids
221 ;; these days!) so save its level
222 (set-object-property! tp 'level prev-level)
223 (set! pchain (cons tp pchain))
224 ;; "push down" car into hierarchy
225 (set-car! tp (cons (car tp) words)))
226
227 ;; uncle
228 ((negative? diff)
229 ;; prune back to where levels match
230 (do ((p pchain (cdr p)))
231 ((= level (object-property (car p) 'level))
232 (set! pchain p)))
233 ;; resume at this level
234 (set-cdr! (car pchain) words)
235 (set! pchain (cdr pchain))))
236
237 (loop (read-line port) level words))))
238 (else (loop (read-line port) prev-level tp)))))
239 (set! all (car all))
240 (if (eq? 'start all)
241 '() ; wasteland
242 (cdr all))))))
243
244(define read-text-outline-silently
245 (make-text-outline-reader "(([ ][ ])*)- *"
246 '((level-substring-divisor . 2))))
773ea2fb
TTN
247
248(define (read-text-outline . args)
088b5285
TTN
249 (write (read-text-outline-silently (open-file (car args) "r")))
250 (newline)
773ea2fb
TTN
251 #t) ; exit val
252
253(define main read-text-outline)
254
255;;; read-text-outline ends here