Reify bytevector? in the correct module
[bpt/guile.git] / module / scripts / read-scheme-source.scm
CommitLineData
bff56cdf
TTN
1;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
2
a1a2ed53 3;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
bff56cdf
TTN
4;;
5;; This program is free software; you can redistribute it and/or
83ba2d37
NJ
6;; modify it under the terms of the GNU Lesser General Public License
7;; as published by the Free Software Foundation; either version 3, or
bff56cdf
TTN
8;; (at your option) any later version.
9;;
10;; This program is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
83ba2d37 13;; Lesser General Public License for more details.
bff56cdf 14;;
83ba2d37
NJ
15;; You should have received a copy of the GNU Lesser General Public
16;; License along with this software; see the file COPYING.LESSER. If
17;; not, write to the Free Software Foundation, Inc., 51 Franklin
18;; Street, Fifth Floor, Boston, MA 02110-1301 USA
bff56cdf 19
61897afe
TTN
20;;; Author: Thien-Thi Nguyen
21
bff56cdf
TTN
22;;; Commentary:
23
24;; Usage: read-scheme-source FILE1 FILE2 ...
25;;
26;; This program parses each FILE and writes to stdout sexps that describe the
27;; top-level structures of the file: scheme forms, single-line comments, and
28;; hash-bang comments. You can further process these (to associate comments
29;; w/ scheme forms as a kind of documentation, for example).
30;;
31;; The output sexps have one of these forms:
32;;
33;; (quote (filename FILENAME))
34;;
e8cd769d 35;; (quote (comment :leading-semicolons N
bff56cdf
TTN
36;; :text LINE))
37;;
38;; (quote (whitespace :text LINE))
39;;
40;; (quote (hash-bang-comment :line LINUM
41;; :line-count N
42;; :text-list (LINE1 LINE2 ...)))
43;;
44;; (quote (following-form-properties :line LINUM
45;; :line-count N)
46;; :type TYPE
47;; :signature SIGNATURE
48;; :std-int-doc DOCSTRING))
49;;
50;; SEXP
51;;
52;; The first four are straightforward (both FILENAME and LINE are strings sans
53;; newline, while LINUM and N are integers). The last two always go together,
54;; in that order. SEXP is scheme code processed only by `read' and then
55;; `write'.
56;;
57;; The :type field may be omitted if the form is not recognized. Otherwise,
58;; TYPE may be one of: procedure, alias, define-module, variable.
59;;
60;; The :signature field may be omitted if the form is not a procedure.
61;; Otherwise, SIGNATURE is a list showing the procedure's signature.
62;;
63;; If the type is `procedure' and the form has a standard internal docstring
64;; (first body form a string), that is extracted in full -- including any
65;; embedded newlines -- and recorded by field :std-int-doc.
66;;
67;;
68;; Usage from a program: The output list of sexps can be retrieved by scheme
69;; programs w/o having to capture stdout, like so:
70;;
71;; (use-modules (scripts read-scheme-source))
72;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
73;;
6f2ec1d1
TTN
74;; There are also two convenience procs exported for use by Scheme programs:
75;;
76;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
77;; have the same number of leading semicolons.
78;;
79;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
80;; the ":tags", and return alist of (TAG . VAL) elems.
bff56cdf
TTN
81;;
82;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
83;; Make `annotate!' extensible.
bff56cdf
TTN
84
85;;; Code:
86
87(define-module (scripts read-scheme-source)
88 :use-module (ice-9 rdelim)
6f2ec1d1
TTN
89 :export (read-scheme-source
90 read-scheme-source-silently
91 quoted?
92 clump))
bff56cdf 93
a1a2ed53
AW
94(define %include-in-guild-list #f)
95(define %summary "Print a parsed representation of a Scheme file.")
96
bff56cdf
TTN
97;; Try to figure out what FORM is and its various attributes.
98;; Call proc NOTE! with key (a symbol) and value.
99;;
100(define (annotate! form note!)
101 (cond ((and (list? form)
102 (< 2 (length form))
103 (eq? 'define (car form))
104 (pair? (cadr form))
105 (symbol? (caadr form)))
106 (note! ':type 'procedure)
107 (note! ':signature (cadr form))
108 (and (< 3 (length form))
109 (string? (caddr form))
110 (note! ':std-int-doc (caddr form))))
111 ((and (list? form)
112 (< 2 (length form))
113 (eq? 'define (car form))
114 (symbol? (cadr form))
115 (list? (caddr form))
116 (< 3 (length (caddr form)))
117 (eq? 'lambda (car (caddr form)))
118 (string? (caddr (caddr form))))
119 (note! ':type 'procedure)
120 (note! ':signature (cons (cadr form) (cadr (caddr form))))
121 (note! ':std-int-doc (caddr (caddr form))))
122 ((and (list? form)
123 (= 3 (length form))
124 (eq? 'define (car form))
125 (symbol? (cadr form))
126 (symbol? (caddr form)))
127 (note! ':type 'alias))
128 ((and (list? form)
129 (eq? 'define-module (car form)))
130 (note! ':type 'define-module))
131 ;; Add other types here.
132 (else (note! ':type 'variable))))
133
134;; Process FILE, calling NB! on parsed top-level elements.
135;; Recognized: #!-!# and regular comments in addition to normal forms.
136;;
137(define (process file nb!)
138 (nb! `'(filename ,file))
139 (let ((hash-bang-rx (make-regexp "^#!"))
140 (bang-hash-rx (make-regexp "^!#"))
141 (all-comment-rx (make-regexp "^[ \t]*(;+)"))
142 (all-whitespace-rx (make-regexp "^[ \t]*$"))
143 (p (open-input-file file)))
144 (let loop ((n (1+ (port-line p))) (line (read-line p)))
145 (or (not n)
146 (eof-object? line)
147 (begin
148 (cond ((regexp-exec hash-bang-rx line)
149 (let loop ((line (read-line p))
150 (text (list line)))
151 (if (or (eof-object? line)
152 (regexp-exec bang-hash-rx line))
153 (nb! `'(hash-bang-comment
154 :line ,n
155 :line-count ,(1+ (length text))
156 :text-list ,(reverse
157 (cons line text))))
158 (loop (read-line p)
159 (cons line text)))))
160 ((regexp-exec all-whitespace-rx line)
161 (nb! `'(whitespace :text ,line)))
162 ((regexp-exec all-comment-rx line)
163 => (lambda (m)
164 (nb! `'(comment
e8cd769d
TTN
165 :leading-semicolons
166 ,(let ((m1 (vector-ref m 1)))
167 (- (cdr m1) (car m1)))
bff56cdf
TTN
168 :text ,line))))
169 (else
170 (unread-string line p)
171 (let* ((form (read p))
172 (count (- (port-line p) n))
173 (props (let* ((props '())
174 (prop+ (lambda args
175 (set! props
176 (append props args)))))
177 (annotate! form prop+)
178 props)))
179 (or (= count 1) ; ugh
180 (begin
181 (read-line p)
182 (set! count (1+ count))))
183 (nb! `'(following-form-properties
184 :line ,n
185 :line-count ,count
186 ,@props))
187 (nb! form))))
188 (loop (1+ (port-line p)) (read-line p)))))))
189
6f2ec1d1
TTN
190;;; entry points
191
bff56cdf
TTN
192(define (read-scheme-source-silently . files)
193 "See commentary in module (scripts read-scheme-source)."
194 (let* ((res '()))
195 (for-each (lambda (file)
196 (process file (lambda (e) (set! res (cons e res)))))
197 files)
198 (reverse res)))
199
200(define (read-scheme-source . files)
201 "See commentary in module (scripts read-scheme-source)."
202 (for-each (lambda (file)
203 (process file (lambda (e) (write e) (newline))))
204 files))
205
6f2ec1d1
TTN
206;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
207;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
208;; where the tags are symbols.
209;;
210(define (quoted? sym form)
211 (and (list? form)
212 (= 2 (length form))
213 (eq? 'quote (car form))
214 (let ((inside (cadr form)))
215 (and (list? inside)
216 (< 0 (length inside))
217 (eq? sym (car inside))
218 (let loop ((ls (cdr inside)) (alist '()))
219 (if (null? ls)
220 alist ; retval
221 (let ((first (car ls)))
222 (or (symbol? first)
223 (error "bad list!"))
224 (loop (cddr ls)
225 (acons (string->symbol
226 (substring (symbol->string first) 1))
227 (cadr ls)
228 alist)))))))))
229
230;; Filter FORMS, combining contiguous comment forms that have the same number
231;; of leading semicolons. Do not include in them whitespace lines.
232;; Whitespace lines outside of such comment groupings are ignored, as are
233;; hash-bang comments. All other forms are passed through unchanged.
234;;
235(define (clump forms)
236 (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
237 (if (null? forms)
238 (reverse acc) ; retval
239 (let ((form (car forms)))
240 (cond (pass-this-one-through?
241 (loop (cdr forms) (cons form acc) #f))
242 ((quoted? 'following-form-properties form)
243 (loop (cdr forms) (cons form acc) #t))
244 ((quoted? 'whitespace form) ;;; ignore
245 (loop (cdr forms) acc #f))
246 ((quoted? 'hash-bang-comment form) ;;; ignore for now
247 (loop (cdr forms) acc #f))
248 ((quoted? 'comment form)
249 => (lambda (alist)
250 (let cloop ((inner-forms (cdr forms))
251 (level (assq-ref alist 'leading-semicolons))
252 (text (list (assq-ref alist 'text))))
253 (let ((up (lambda ()
254 (loop inner-forms
255 (cons (cons level (reverse text))
256 acc)
257 #f))))
258 (if (null? inner-forms)
259 (up)
260 (let ((inner-form (car inner-forms)))
261 (cond ((quoted? 'comment inner-form)
262 => (lambda (inner-alist)
263 (let ((new-level
264 (assq-ref
265 inner-alist
266 'leading-semicolons)))
267 (if (= new-level level)
268 (cloop (cdr inner-forms)
269 level
270 (cons (assq-ref
271 inner-alist
272 'text)
273 text))
274 (up)))))
275 (else (up)))))))))
276 (else (loop (cdr forms) (cons form acc) #f)))))))
277
278;;; script entry point
279
bff56cdf
TTN
280(define main read-scheme-source)
281
282;;; read-scheme-source ends here