Default "meet" operator is meet-error for intmap
[bpt/guile.git] / module / scripts / read-scheme-source.scm
1 ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
2
3 ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
4 ;;
5 ;; This program is free software; you can redistribute it and/or
6 ;; modify it under the terms of the GNU Lesser General Public License
7 ;; as published by the Free Software Foundation; either version 3, or
8 ;; (at your option) any later version.
9 ;;
10 ;; This program is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU Lesser General Public
16 ;; License along with this software; see the file COPYING.LESSER. If
17 ;; not, write to the Free Software Foundation, Inc., 51 Franklin
18 ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
19
20 ;;; Author: Thien-Thi Nguyen
21
22 ;;; Commentary:
23
24 ;; Usage: read-scheme-source FILE1 FILE2 ...
25 ;;
26 ;; This program parses each FILE and writes to stdout sexps that describe the
27 ;; top-level structures of the file: scheme forms, single-line comments, and
28 ;; hash-bang comments. You can further process these (to associate comments
29 ;; w/ scheme forms as a kind of documentation, for example).
30 ;;
31 ;; The output sexps have one of these forms:
32 ;;
33 ;; (quote (filename FILENAME))
34 ;;
35 ;; (quote (comment :leading-semicolons N
36 ;; :text LINE))
37 ;;
38 ;; (quote (whitespace :text LINE))
39 ;;
40 ;; (quote (hash-bang-comment :line LINUM
41 ;; :line-count N
42 ;; :text-list (LINE1 LINE2 ...)))
43 ;;
44 ;; (quote (following-form-properties :line LINUM
45 ;; :line-count N)
46 ;; :type TYPE
47 ;; :signature SIGNATURE
48 ;; :std-int-doc DOCSTRING))
49 ;;
50 ;; SEXP
51 ;;
52 ;; The first four are straightforward (both FILENAME and LINE are strings sans
53 ;; newline, while LINUM and N are integers). The last two always go together,
54 ;; in that order. SEXP is scheme code processed only by `read' and then
55 ;; `write'.
56 ;;
57 ;; The :type field may be omitted if the form is not recognized. Otherwise,
58 ;; TYPE may be one of: procedure, alias, define-module, variable.
59 ;;
60 ;; The :signature field may be omitted if the form is not a procedure.
61 ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
62 ;;
63 ;; If the type is `procedure' and the form has a standard internal docstring
64 ;; (first body form a string), that is extracted in full -- including any
65 ;; embedded newlines -- and recorded by field :std-int-doc.
66 ;;
67 ;;
68 ;; Usage from a program: The output list of sexps can be retrieved by scheme
69 ;; programs w/o having to capture stdout, like so:
70 ;;
71 ;; (use-modules (scripts read-scheme-source))
72 ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
73 ;;
74 ;; There are also two convenience procs exported for use by Scheme programs:
75 ;;
76 ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
77 ;; have the same number of leading semicolons.
78 ;;
79 ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
80 ;; the ":tags", and return alist of (TAG . VAL) elems.
81 ;;
82 ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
83 ;; Make `annotate!' extensible.
84
85 ;;; Code:
86
87 (define-module (scripts read-scheme-source)
88 :use-module (ice-9 rdelim)
89 :export (read-scheme-source
90 read-scheme-source-silently
91 quoted?
92 clump))
93
94 (define %include-in-guild-list #f)
95 (define %summary "Print a parsed representation of a Scheme file.")
96
97 ;; Try to figure out what FORM is and its various attributes.
98 ;; Call proc NOTE! with key (a symbol) and value.
99 ;;
100 (define (annotate! form note!)
101 (cond ((and (list? form)
102 (< 2 (length form))
103 (eq? 'define (car form))
104 (pair? (cadr form))
105 (symbol? (caadr form)))
106 (note! ':type 'procedure)
107 (note! ':signature (cadr form))
108 (and (< 3 (length form))
109 (string? (caddr form))
110 (note! ':std-int-doc (caddr form))))
111 ((and (list? form)
112 (< 2 (length form))
113 (eq? 'define (car form))
114 (symbol? (cadr form))
115 (list? (caddr form))
116 (< 3 (length (caddr form)))
117 (eq? 'lambda (car (caddr form)))
118 (string? (caddr (caddr form))))
119 (note! ':type 'procedure)
120 (note! ':signature (cons (cadr form) (cadr (caddr form))))
121 (note! ':std-int-doc (caddr (caddr form))))
122 ((and (list? form)
123 (= 3 (length form))
124 (eq? 'define (car form))
125 (symbol? (cadr form))
126 (symbol? (caddr form)))
127 (note! ':type 'alias))
128 ((and (list? form)
129 (eq? 'define-module (car form)))
130 (note! ':type 'define-module))
131 ;; Add other types here.
132 (else (note! ':type 'variable))))
133
134 ;; Process FILE, calling NB! on parsed top-level elements.
135 ;; Recognized: #!-!# and regular comments in addition to normal forms.
136 ;;
137 (define (process file nb!)
138 (nb! `'(filename ,file))
139 (let ((hash-bang-rx (make-regexp "^#!"))
140 (bang-hash-rx (make-regexp "^!#"))
141 (all-comment-rx (make-regexp "^[ \t]*(;+)"))
142 (all-whitespace-rx (make-regexp "^[ \t]*$"))
143 (p (open-input-file file)))
144 (let loop ((n (1+ (port-line p))) (line (read-line p)))
145 (or (not n)
146 (eof-object? line)
147 (begin
148 (cond ((regexp-exec hash-bang-rx line)
149 (let loop ((line (read-line p))
150 (text (list line)))
151 (if (or (eof-object? line)
152 (regexp-exec bang-hash-rx line))
153 (nb! `'(hash-bang-comment
154 :line ,n
155 :line-count ,(1+ (length text))
156 :text-list ,(reverse
157 (cons line text))))
158 (loop (read-line p)
159 (cons line text)))))
160 ((regexp-exec all-whitespace-rx line)
161 (nb! `'(whitespace :text ,line)))
162 ((regexp-exec all-comment-rx line)
163 => (lambda (m)
164 (nb! `'(comment
165 :leading-semicolons
166 ,(let ((m1 (vector-ref m 1)))
167 (- (cdr m1) (car m1)))
168 :text ,line))))
169 (else
170 (unread-string line p)
171 (let* ((form (read p))
172 (count (- (port-line p) n))
173 (props (let* ((props '())
174 (prop+ (lambda args
175 (set! props
176 (append props args)))))
177 (annotate! form prop+)
178 props)))
179 (or (= count 1) ; ugh
180 (begin
181 (read-line p)
182 (set! count (1+ count))))
183 (nb! `'(following-form-properties
184 :line ,n
185 :line-count ,count
186 ,@props))
187 (nb! form))))
188 (loop (1+ (port-line p)) (read-line p)))))))
189
190 ;;; entry points
191
192 (define (read-scheme-source-silently . files)
193 "See commentary in module (scripts read-scheme-source)."
194 (let* ((res '()))
195 (for-each (lambda (file)
196 (process file (lambda (e) (set! res (cons e res)))))
197 files)
198 (reverse res)))
199
200 (define (read-scheme-source . files)
201 "See commentary in module (scripts read-scheme-source)."
202 (for-each (lambda (file)
203 (process file (lambda (e) (write e) (newline))))
204 files))
205
206 ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
207 ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
208 ;; where the tags are symbols.
209 ;;
210 (define (quoted? sym form)
211 (and (list? form)
212 (= 2 (length form))
213 (eq? 'quote (car form))
214 (let ((inside (cadr form)))
215 (and (list? inside)
216 (< 0 (length inside))
217 (eq? sym (car inside))
218 (let loop ((ls (cdr inside)) (alist '()))
219 (if (null? ls)
220 alist ; retval
221 (let ((first (car ls)))
222 (or (symbol? first)
223 (error "bad list!"))
224 (loop (cddr ls)
225 (acons (string->symbol
226 (substring (symbol->string first) 1))
227 (cadr ls)
228 alist)))))))))
229
230 ;; Filter FORMS, combining contiguous comment forms that have the same number
231 ;; of leading semicolons. Do not include in them whitespace lines.
232 ;; Whitespace lines outside of such comment groupings are ignored, as are
233 ;; hash-bang comments. All other forms are passed through unchanged.
234 ;;
235 (define (clump forms)
236 (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
237 (if (null? forms)
238 (reverse acc) ; retval
239 (let ((form (car forms)))
240 (cond (pass-this-one-through?
241 (loop (cdr forms) (cons form acc) #f))
242 ((quoted? 'following-form-properties form)
243 (loop (cdr forms) (cons form acc) #t))
244 ((quoted? 'whitespace form) ;;; ignore
245 (loop (cdr forms) acc #f))
246 ((quoted? 'hash-bang-comment form) ;;; ignore for now
247 (loop (cdr forms) acc #f))
248 ((quoted? 'comment form)
249 => (lambda (alist)
250 (let cloop ((inner-forms (cdr forms))
251 (level (assq-ref alist 'leading-semicolons))
252 (text (list (assq-ref alist 'text))))
253 (let ((up (lambda ()
254 (loop inner-forms
255 (cons (cons level (reverse text))
256 acc)
257 #f))))
258 (if (null? inner-forms)
259 (up)
260 (let ((inner-form (car inner-forms)))
261 (cond ((quoted? 'comment inner-form)
262 => (lambda (inner-alist)
263 (let ((new-level
264 (assq-ref
265 inner-alist
266 'leading-semicolons)))
267 (if (= new-level level)
268 (cloop (cdr inner-forms)
269 level
270 (cons (assq-ref
271 inner-alist
272 'text)
273 text))
274 (up)))))
275 (else (up)))))))))
276 (else (loop (cdr forms) (cons form acc) #f)))))))
277
278 ;;; script entry point
279
280 (define main read-scheme-source)
281
282 ;;; read-scheme-source ends here