1 ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
3 ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
5 ;; This program is free software; you can redistribute it and/or
6 ;; modify it under the terms of the GNU Lesser General Public License
7 ;; as published by the Free Software Foundation; either version 3, or
8 ;; (at your option) any later version.
10 ;; This program is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; Lesser General Public License for more details.
15 ;; You should have received a copy of the GNU Lesser General Public
16 ;; License along with this software; see the file COPYING.LESSER. If
17 ;; not, write to the Free Software Foundation, Inc., 51 Franklin
18 ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
20 ;;; Author: Thien-Thi Nguyen
24 ;; Usage: read-scheme-source FILE1 FILE2 ...
26 ;; This program parses each FILE and writes to stdout sexps that describe the
27 ;; top-level structures of the file: scheme forms, single-line comments, and
28 ;; hash-bang comments. You can further process these (to associate comments
29 ;; w/ scheme forms as a kind of documentation, for example).
31 ;; The output sexps have one of these forms:
33 ;; (quote (filename FILENAME))
35 ;; (quote (comment :leading-semicolons N
38 ;; (quote (whitespace :text LINE))
40 ;; (quote (hash-bang-comment :line LINUM
42 ;; :text-list (LINE1 LINE2 ...)))
44 ;; (quote (following-form-properties :line LINUM
47 ;; :signature SIGNATURE
48 ;; :std-int-doc DOCSTRING))
52 ;; The first four are straightforward (both FILENAME and LINE are strings sans
53 ;; newline, while LINUM and N are integers). The last two always go together,
54 ;; in that order. SEXP is scheme code processed only by `read' and then
57 ;; The :type field may be omitted if the form is not recognized. Otherwise,
58 ;; TYPE may be one of: procedure, alias, define-module, variable.
60 ;; The :signature field may be omitted if the form is not a procedure.
61 ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
63 ;; If the type is `procedure' and the form has a standard internal docstring
64 ;; (first body form a string), that is extracted in full -- including any
65 ;; embedded newlines -- and recorded by field :std-int-doc.
68 ;; Usage from a program: The output list of sexps can be retrieved by scheme
69 ;; programs w/o having to capture stdout, like so:
71 ;; (use-modules (scripts read-scheme-source))
72 ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
74 ;; There are also two convenience procs exported for use by Scheme programs:
76 ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
77 ;; have the same number of leading semicolons.
79 ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
80 ;; the ":tags", and return alist of (TAG . VAL) elems.
82 ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
83 ;; Make `annotate!' extensible.
87 (define-module (scripts read-scheme-source)
88 :use-module (ice-9 rdelim)
89 :export (read-scheme-source
90 read-scheme-source-silently
94 (define %include-in-guild-list #f)
95 (define %summary "Print a parsed representation of a Scheme file.")
97 ;; Try to figure out what FORM is and its various attributes.
98 ;; Call proc NOTE! with key (a symbol) and value.
100 (define (annotate! form note!)
101 (cond ((and (list? form)
103 (eq? 'define (car form))
105 (symbol? (caadr form)))
106 (note! ':type 'procedure)
107 (note! ':signature (cadr form))
108 (and (< 3 (length form))
109 (string? (caddr form))
110 (note! ':std-int-doc (caddr form))))
113 (eq? 'define (car form))
114 (symbol? (cadr form))
116 (< 3 (length (caddr form)))
117 (eq? 'lambda (car (caddr form)))
118 (string? (caddr (caddr form))))
119 (note! ':type 'procedure)
120 (note! ':signature (cons (cadr form) (cadr (caddr form))))
121 (note! ':std-int-doc (caddr (caddr form))))
124 (eq? 'define (car form))
125 (symbol? (cadr form))
126 (symbol? (caddr form)))
127 (note! ':type 'alias))
129 (eq? 'define-module (car form)))
130 (note! ':type 'define-module))
131 ;; Add other types here.
132 (else (note! ':type 'variable))))
134 ;; Process FILE, calling NB! on parsed top-level elements.
135 ;; Recognized: #!-!# and regular comments in addition to normal forms.
137 (define (process file nb!)
138 (nb! `'(filename ,file))
139 (let ((hash-bang-rx (make-regexp "^#!"))
140 (bang-hash-rx (make-regexp "^!#"))
141 (all-comment-rx (make-regexp "^[ \t]*(;+)"))
142 (all-whitespace-rx (make-regexp "^[ \t]*$"))
143 (p (open-input-file file)))
144 (let loop ((n (1+ (port-line p))) (line (read-line p)))
148 (cond ((regexp-exec hash-bang-rx line)
149 (let loop ((line (read-line p))
151 (if (or (eof-object? line)
152 (regexp-exec bang-hash-rx line))
153 (nb! `'(hash-bang-comment
155 :line-count ,(1+ (length text))
160 ((regexp-exec all-whitespace-rx line)
161 (nb! `'(whitespace :text ,line)))
162 ((regexp-exec all-comment-rx line)
166 ,(let ((m1 (vector-ref m 1)))
167 (- (cdr m1) (car m1)))
170 (unread-string line p)
171 (let* ((form (read p))
172 (count (- (port-line p) n))
173 (props (let* ((props '())
176 (append props args)))))
177 (annotate! form prop+)
179 (or (= count 1) ; ugh
182 (set! count (1+ count))))
183 (nb! `'(following-form-properties
188 (loop (1+ (port-line p)) (read-line p)))))))
192 (define (read-scheme-source-silently . files)
193 "See commentary in module (scripts read-scheme-source)."
195 (for-each (lambda (file)
196 (process file (lambda (e) (set! res (cons e res)))))
200 (define (read-scheme-source . files)
201 "See commentary in module (scripts read-scheme-source)."
202 (for-each (lambda (file)
203 (process file (lambda (e) (write e) (newline))))
206 ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
207 ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
208 ;; where the tags are symbols.
210 (define (quoted? sym form)
213 (eq? 'quote (car form))
214 (let ((inside (cadr form)))
216 (< 0 (length inside))
217 (eq? sym (car inside))
218 (let loop ((ls (cdr inside)) (alist '()))
221 (let ((first (car ls)))
225 (acons (string->symbol
226 (substring (symbol->string first) 1))
230 ;; Filter FORMS, combining contiguous comment forms that have the same number
231 ;; of leading semicolons. Do not include in them whitespace lines.
232 ;; Whitespace lines outside of such comment groupings are ignored, as are
233 ;; hash-bang comments. All other forms are passed through unchanged.
235 (define (clump forms)
236 (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
238 (reverse acc) ; retval
239 (let ((form (car forms)))
240 (cond (pass-this-one-through?
241 (loop (cdr forms) (cons form acc) #f))
242 ((quoted? 'following-form-properties form)
243 (loop (cdr forms) (cons form acc) #t))
244 ((quoted? 'whitespace form) ;;; ignore
245 (loop (cdr forms) acc #f))
246 ((quoted? 'hash-bang-comment form) ;;; ignore for now
247 (loop (cdr forms) acc #f))
248 ((quoted? 'comment form)
250 (let cloop ((inner-forms (cdr forms))
251 (level (assq-ref alist 'leading-semicolons))
252 (text (list (assq-ref alist 'text))))
255 (cons (cons level (reverse text))
258 (if (null? inner-forms)
260 (let ((inner-form (car inner-forms)))
261 (cond ((quoted? 'comment inner-form)
262 => (lambda (inner-alist)
266 'leading-semicolons)))
267 (if (= new-level level)
268 (cloop (cdr inner-forms)
276 (else (loop (cdr forms) (cons form acc) #f)))))))
278 ;;; script entry point
280 (define main read-scheme-source)
282 ;;; read-scheme-source ends here