| 1 | #!/bin/sh |
| 2 | # aside from this initial boilerplate, this is actually -*- scheme -*- code |
| 3 | main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')' |
| 4 | exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@" |
| 5 | !# |
| 6 | ;;; read-text-outline --- Read a text outline and display it as a sexp |
| 7 | |
| 8 | ;; Copyright (C) 2002 Free Software Foundation, Inc. |
| 9 | ;; |
| 10 | ;; This program is free software; you can redistribute it and/or |
| 11 | ;; modify it under the terms of the GNU General Public License as |
| 12 | ;; published by the Free Software Foundation; either version 2, or |
| 13 | ;; (at your option) any later version. |
| 14 | ;; |
| 15 | ;; This program is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 18 | ;; General Public License for more details. |
| 19 | ;; |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with this software; see the file COPYING. If not, write to |
| 22 | ;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330, |
| 23 | ;; Boston, MA 02111-1307 USA |
| 24 | |
| 25 | ;;; Author: Thien-Thi Nguyen <ttn@gnu.org> |
| 26 | |
| 27 | ;;; Commentary: |
| 28 | |
| 29 | ;; Usage: read-text-outline OUTLINE |
| 30 | ;; |
| 31 | ;; Scan OUTLINE file and display a list of trees, the structure of |
| 32 | ;; each reflecting the "levels" in OUTLINE. The recognized outline |
| 33 | ;; format (used to indicate outline headings) is zero or more pairs of |
| 34 | ;; leading spaces followed by "-". Something like: |
| 35 | ;; |
| 36 | ;; - a 0 |
| 37 | ;; - b 1 |
| 38 | ;; - c 2 |
| 39 | ;; - d 1 |
| 40 | ;; - e 0 |
| 41 | ;; - f 1 |
| 42 | ;; - g 2 |
| 43 | ;; - h 1 |
| 44 | ;; |
| 45 | ;; In this example the levels are shown to the right. The output for |
| 46 | ;; such a file would be the single line: |
| 47 | ;; |
| 48 | ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h")) |
| 49 | ;; |
| 50 | ;; Basically, anything at the beginning of a list is a parent, and the |
| 51 | ;; remaining elements of that list are its children. |
| 52 | ;; |
| 53 | ;; |
| 54 | ;; Usage from a Scheme program: These two procs are exported: |
| 55 | ;; |
| 56 | ;; (read-text-outline . args) ; only first arg is used |
| 57 | ;; (read-text-outline-silently port) |
| 58 | ;; (make-text-outline-reader re specs) |
| 59 | ;; |
| 60 | ;; `make-text-outline-reader' returns a proc that reads from PORT and |
| 61 | ;; returns a list of trees (similar to `read-text-outline-silently'). |
| 62 | ;; |
| 63 | ;; RE is a regular expression (string) that is used to identify a header |
| 64 | ;; line of the outline (as opposed to a whitespace line or intervening |
| 65 | ;; text). RE must begin w/ a sub-expression to match the "level prefix" |
| 66 | ;; of the line. You can use `level-submatch-number' in SPECS (explained |
| 67 | ;; below) to specify a number other than 1, the default. |
| 68 | ;; |
| 69 | ;; Normally, the level of the line is taken directly as the length of |
| 70 | ;; its level prefix. This often results in adjacent levels not mapping |
| 71 | ;; to adjacent numbers, which confuses the tree-building portion of the |
| 72 | ;; program, which expects top-level to be 0, first sub-level to be 1, |
| 73 | ;; etc. You can use `level-substring-divisor' or `compute-level' in |
| 74 | ;; SPECS to specify a constant scaling factor or specify a completely |
| 75 | ;; alternative procedure, respectively. |
| 76 | ;; |
| 77 | ;; SPECS is an alist which may contain the following key/value pairs: |
| 78 | ;; |
| 79 | ;; - level-submatch-number NUMBER |
| 80 | ;; - level-substring-divisor NUMBER |
| 81 | ;; - compute-level PROC |
| 82 | ;; - body-submatch-number NUMBER |
| 83 | ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...) |
| 84 | ;; |
| 85 | ;; The PROC value associated with key `compute-level' should take a |
| 86 | ;; Scheme match structure (as returned by `regexp-exec') and return a |
| 87 | ;; number, the normalized level for that line. If this is specified, |
| 88 | ;; it takes precedence over other level-computation methods. |
| 89 | ;; |
| 90 | ;; Use `body-submatch-number' if RE specifies the whole body, or if you |
| 91 | ;; want to make use of the extra fields parsing. The `extra-fields' |
| 92 | ;; value is a sub-alist, whose keys name additional fields that are to |
| 93 | ;; be recognized. These fields along with `level' are set as object |
| 94 | ;; properties of the final string ("body") that is consed into the tree. |
| 95 | ;; If a field name ends in "?" the field value is set to be #t if there |
| 96 | ;; is a match and the result is not an empty string, and #f otherwise. |
| 97 | ;; |
| 98 | ;; |
| 99 | ;; Bugs and caveats: |
| 100 | ;; |
| 101 | ;; (1) Only the first file specified on the command line is scanned. |
| 102 | ;; (2) TAB characters at the beginnings of lines are not recognized. |
| 103 | ;; (3) Outlines that "skip" levels signal an error. In other words, |
| 104 | ;; this will fail: |
| 105 | ;; |
| 106 | ;; - a 0 |
| 107 | ;; - b 1 |
| 108 | ;; - c 3 <-- skipped 2 -- error! |
| 109 | ;; - d 1 |
| 110 | ;; |
| 111 | ;; |
| 112 | ;; TODO: Determine what's the right thing to do for skips. |
| 113 | ;; Handle TABs. |
| 114 | ;; Make line format customizable via longopts. |
| 115 | |
| 116 | ;;; Code: |
| 117 | |
| 118 | (define-module (scripts read-text-outline) |
| 119 | :export (read-text-outline |
| 120 | read-text-outline-silently |
| 121 | make-text-outline-reader) |
| 122 | :use-module (ice-9 regex) |
| 123 | :autoload (ice-9 rdelim) (read-line) |
| 124 | :autoload (ice-9 getopt-long) (getopt-long)) |
| 125 | |
| 126 | (define (?? symbol) |
| 127 | (let ((name (symbol->string symbol))) |
| 128 | (string=? "?" (substring name (1- (string-length name)))))) |
| 129 | |
| 130 | (define (msub n) |
| 131 | (lambda (m) |
| 132 | (match:substring m n))) |
| 133 | |
| 134 | (define (??-predicates pair) |
| 135 | (cons (car pair) |
| 136 | (if (?? (car pair)) |
| 137 | (lambda (m) |
| 138 | (not (string=? "" (match:substring m (cdr pair))))) |
| 139 | (msub (cdr pair))))) |
| 140 | |
| 141 | (define (make-line-parser re specs) |
| 142 | (let* ((rx (let ((fc (substring re 0 1))) |
| 143 | (make-regexp (if (string=? "^" fc) |
| 144 | re |
| 145 | (string-append "^" re))))) |
| 146 | (check (lambda (key) |
| 147 | (assq-ref specs key))) |
| 148 | (level-substring (msub (or (check 'level-submatch-number) 1))) |
| 149 | (extract-level (cond ((check 'compute-level) |
| 150 | => (lambda (proc) |
| 151 | (lambda (m) |
| 152 | (proc m)))) |
| 153 | ((check 'level-substring-divisor) |
| 154 | => (lambda (n) |
| 155 | (lambda (m) |
| 156 | (/ (string-length (level-substring m)) |
| 157 | n)))) |
| 158 | (else |
| 159 | (lambda (m) |
| 160 | (string-length (level-substring m)))))) |
| 161 | (extract-body (cond ((check 'body-submatch-number) |
| 162 | => msub) |
| 163 | (else |
| 164 | (lambda (m) (match:suffix m))))) |
| 165 | (misc-props! (cond ((check 'extra-fields) |
| 166 | => (lambda (alist) |
| 167 | (let ((new (map ??-predicates alist))) |
| 168 | (lambda (obj m) |
| 169 | (for-each |
| 170 | (lambda (pair) |
| 171 | (set-object-property! |
| 172 | obj (car pair) |
| 173 | ((cdr pair) m))) |
| 174 | new))))) |
| 175 | (else |
| 176 | (lambda (obj m) #t))))) |
| 177 | ;; retval |
| 178 | (lambda (line) |
| 179 | (cond ((regexp-exec rx line) |
| 180 | => (lambda (m) |
| 181 | (let ((level (extract-level m)) |
| 182 | (body (extract-body m))) |
| 183 | (set-object-property! body 'level level) |
| 184 | (misc-props! body m) |
| 185 | body))) |
| 186 | (else #f))))) |
| 187 | |
| 188 | (define (make-text-outline-reader re specs) |
| 189 | (let ((parse-line (make-line-parser re specs))) |
| 190 | ;; retval |
| 191 | (lambda (port) |
| 192 | (let* ((all '(start)) |
| 193 | (pchain (list))) ; parents chain |
| 194 | (let loop ((line (read-line port)) |
| 195 | (prev-level -1) ; how this relates to the first input |
| 196 | ; level determines whether or not we |
| 197 | ; start in "sibling" or "child" mode. |
| 198 | ; in the end, `start' is ignored and |
| 199 | ; it's much easier to ignore parents |
| 200 | ; than siblings (sometimes). this is |
| 201 | ; not to encourage ignorance, however. |
| 202 | (tp all)) ; tail pointer |
| 203 | (or (eof-object? line) |
| 204 | (cond ((parse-line line) |
| 205 | => (lambda (w) |
| 206 | (let* ((words (list w)) |
| 207 | (level (object-property w 'level)) |
| 208 | (diff (- level prev-level))) |
| 209 | (cond |
| 210 | |
| 211 | ;; sibling |
| 212 | ((zero? diff) |
| 213 | ;; just extend the chain |
| 214 | (set-cdr! tp words)) |
| 215 | |
| 216 | ;; child |
| 217 | ((positive? diff) |
| 218 | (or (= 1 diff) |
| 219 | (error "unhandled diff not 1:" diff line)) |
| 220 | ;; parent may be contacted by uncle later (kids |
| 221 | ;; these days!) so save its level |
| 222 | (set-object-property! tp 'level prev-level) |
| 223 | (set! pchain (cons tp pchain)) |
| 224 | ;; "push down" car into hierarchy |
| 225 | (set-car! tp (cons (car tp) words))) |
| 226 | |
| 227 | ;; uncle |
| 228 | ((negative? diff) |
| 229 | ;; prune back to where levels match |
| 230 | (do ((p pchain (cdr p))) |
| 231 | ((= level (object-property (car p) 'level)) |
| 232 | (set! pchain p))) |
| 233 | ;; resume at this level |
| 234 | (set-cdr! (car pchain) words) |
| 235 | (set! pchain (cdr pchain)))) |
| 236 | |
| 237 | (loop (read-line port) level words)))) |
| 238 | (else (loop (read-line port) prev-level tp))))) |
| 239 | (set! all (car all)) |
| 240 | (if (eq? 'start all) |
| 241 | '() ; wasteland |
| 242 | (cdr all)))))) |
| 243 | |
| 244 | (define read-text-outline-silently |
| 245 | (make-text-outline-reader "(([ ][ ])*)- *" |
| 246 | '((level-substring-divisor . 2)))) |
| 247 | |
| 248 | (define (read-text-outline . args) |
| 249 | (write (read-text-outline-silently (open-file (car args) "r"))) |
| 250 | (newline) |
| 251 | #t) ; exit val |
| 252 | |
| 253 | (define main read-text-outline) |
| 254 | |
| 255 | ;;; read-text-outline ends here |