module-{ref,define}-submodule use the submodules table
[bpt/guile.git] / module / scripts / read-scheme-source.scm
CommitLineData
bff56cdf
TTN
1;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
2
6e7d5622 3;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
bff56cdf
TTN
4;;
5;; This program is free software; you can redistribute it and/or
83ba2d37
NJ
6;; modify it under the terms of the GNU Lesser General Public License
7;; as published by the Free Software Foundation; either version 3, or
bff56cdf
TTN
8;; (at your option) any later version.
9;;
10;; This program is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
83ba2d37 13;; Lesser General Public License for more details.
bff56cdf 14;;
83ba2d37
NJ
15;; You should have received a copy of the GNU Lesser General Public
16;; License along with this software; see the file COPYING.LESSER. If
17;; not, write to the Free Software Foundation, Inc., 51 Franklin
18;; Street, Fifth Floor, Boston, MA 02110-1301 USA
bff56cdf 19
61897afe
TTN
20;;; Author: Thien-Thi Nguyen
21
bff56cdf
TTN
22;;; Commentary:
23
24;; Usage: read-scheme-source FILE1 FILE2 ...
25;;
26;; This program parses each FILE and writes to stdout sexps that describe the
27;; top-level structures of the file: scheme forms, single-line comments, and
28;; hash-bang comments. You can further process these (to associate comments
29;; w/ scheme forms as a kind of documentation, for example).
30;;
31;; The output sexps have one of these forms:
32;;
33;; (quote (filename FILENAME))
34;;
e8cd769d 35;; (quote (comment :leading-semicolons N
bff56cdf
TTN
36;; :text LINE))
37;;
38;; (quote (whitespace :text LINE))
39;;
40;; (quote (hash-bang-comment :line LINUM
41;; :line-count N
42;; :text-list (LINE1 LINE2 ...)))
43;;
44;; (quote (following-form-properties :line LINUM
45;; :line-count N)
46;; :type TYPE
47;; :signature SIGNATURE
48;; :std-int-doc DOCSTRING))
49;;
50;; SEXP
51;;
52;; The first four are straightforward (both FILENAME and LINE are strings sans
53;; newline, while LINUM and N are integers). The last two always go together,
54;; in that order. SEXP is scheme code processed only by `read' and then
55;; `write'.
56;;
57;; The :type field may be omitted if the form is not recognized. Otherwise,
58;; TYPE may be one of: procedure, alias, define-module, variable.
59;;
60;; The :signature field may be omitted if the form is not a procedure.
61;; Otherwise, SIGNATURE is a list showing the procedure's signature.
62;;
63;; If the type is `procedure' and the form has a standard internal docstring
64;; (first body form a string), that is extracted in full -- including any
65;; embedded newlines -- and recorded by field :std-int-doc.
66;;
67;;
68;; Usage from a program: The output list of sexps can be retrieved by scheme
69;; programs w/o having to capture stdout, like so:
70;;
71;; (use-modules (scripts read-scheme-source))
72;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
73;;
6f2ec1d1
TTN
74;; There are also two convenience procs exported for use by Scheme programs:
75;;
76;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
77;; have the same number of leading semicolons.
78;;
79;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
80;; the ":tags", and return alist of (TAG . VAL) elems.
bff56cdf
TTN
81;;
82;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
83;; Make `annotate!' extensible.
bff56cdf
TTN
84
85;;; Code:
86
87(define-module (scripts read-scheme-source)
88 :use-module (ice-9 rdelim)
6f2ec1d1
TTN
89 :export (read-scheme-source
90 read-scheme-source-silently
91 quoted?
92 clump))
bff56cdf
TTN
93
94;; Try to figure out what FORM is and its various attributes.
95;; Call proc NOTE! with key (a symbol) and value.
96;;
97(define (annotate! form note!)
98 (cond ((and (list? form)
99 (< 2 (length form))
100 (eq? 'define (car form))
101 (pair? (cadr form))
102 (symbol? (caadr form)))
103 (note! ':type 'procedure)
104 (note! ':signature (cadr form))
105 (and (< 3 (length form))
106 (string? (caddr form))
107 (note! ':std-int-doc (caddr form))))
108 ((and (list? form)
109 (< 2 (length form))
110 (eq? 'define (car form))
111 (symbol? (cadr form))
112 (list? (caddr form))
113 (< 3 (length (caddr form)))
114 (eq? 'lambda (car (caddr form)))
115 (string? (caddr (caddr form))))
116 (note! ':type 'procedure)
117 (note! ':signature (cons (cadr form) (cadr (caddr form))))
118 (note! ':std-int-doc (caddr (caddr form))))
119 ((and (list? form)
120 (= 3 (length form))
121 (eq? 'define (car form))
122 (symbol? (cadr form))
123 (symbol? (caddr form)))
124 (note! ':type 'alias))
125 ((and (list? form)
126 (eq? 'define-module (car form)))
127 (note! ':type 'define-module))
128 ;; Add other types here.
129 (else (note! ':type 'variable))))
130
131;; Process FILE, calling NB! on parsed top-level elements.
132;; Recognized: #!-!# and regular comments in addition to normal forms.
133;;
134(define (process file nb!)
135 (nb! `'(filename ,file))
136 (let ((hash-bang-rx (make-regexp "^#!"))
137 (bang-hash-rx (make-regexp "^!#"))
138 (all-comment-rx (make-regexp "^[ \t]*(;+)"))
139 (all-whitespace-rx (make-regexp "^[ \t]*$"))
140 (p (open-input-file file)))
141 (let loop ((n (1+ (port-line p))) (line (read-line p)))
142 (or (not n)
143 (eof-object? line)
144 (begin
145 (cond ((regexp-exec hash-bang-rx line)
146 (let loop ((line (read-line p))
147 (text (list line)))
148 (if (or (eof-object? line)
149 (regexp-exec bang-hash-rx line))
150 (nb! `'(hash-bang-comment
151 :line ,n
152 :line-count ,(1+ (length text))
153 :text-list ,(reverse
154 (cons line text))))
155 (loop (read-line p)
156 (cons line text)))))
157 ((regexp-exec all-whitespace-rx line)
158 (nb! `'(whitespace :text ,line)))
159 ((regexp-exec all-comment-rx line)
160 => (lambda (m)
161 (nb! `'(comment
e8cd769d
TTN
162 :leading-semicolons
163 ,(let ((m1 (vector-ref m 1)))
164 (- (cdr m1) (car m1)))
bff56cdf
TTN
165 :text ,line))))
166 (else
167 (unread-string line p)
168 (let* ((form (read p))
169 (count (- (port-line p) n))
170 (props (let* ((props '())
171 (prop+ (lambda args
172 (set! props
173 (append props args)))))
174 (annotate! form prop+)
175 props)))
176 (or (= count 1) ; ugh
177 (begin
178 (read-line p)
179 (set! count (1+ count))))
180 (nb! `'(following-form-properties
181 :line ,n
182 :line-count ,count
183 ,@props))
184 (nb! form))))
185 (loop (1+ (port-line p)) (read-line p)))))))
186
6f2ec1d1
TTN
187;;; entry points
188
bff56cdf
TTN
189(define (read-scheme-source-silently . files)
190 "See commentary in module (scripts read-scheme-source)."
191 (let* ((res '()))
192 (for-each (lambda (file)
193 (process file (lambda (e) (set! res (cons e res)))))
194 files)
195 (reverse res)))
196
197(define (read-scheme-source . files)
198 "See commentary in module (scripts read-scheme-source)."
199 (for-each (lambda (file)
200 (process file (lambda (e) (write e) (newline))))
201 files))
202
6f2ec1d1
TTN
203;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
204;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
205;; where the tags are symbols.
206;;
207(define (quoted? sym form)
208 (and (list? form)
209 (= 2 (length form))
210 (eq? 'quote (car form))
211 (let ((inside (cadr form)))
212 (and (list? inside)
213 (< 0 (length inside))
214 (eq? sym (car inside))
215 (let loop ((ls (cdr inside)) (alist '()))
216 (if (null? ls)
217 alist ; retval
218 (let ((first (car ls)))
219 (or (symbol? first)
220 (error "bad list!"))
221 (loop (cddr ls)
222 (acons (string->symbol
223 (substring (symbol->string first) 1))
224 (cadr ls)
225 alist)))))))))
226
227;; Filter FORMS, combining contiguous comment forms that have the same number
228;; of leading semicolons. Do not include in them whitespace lines.
229;; Whitespace lines outside of such comment groupings are ignored, as are
230;; hash-bang comments. All other forms are passed through unchanged.
231;;
232(define (clump forms)
233 (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
234 (if (null? forms)
235 (reverse acc) ; retval
236 (let ((form (car forms)))
237 (cond (pass-this-one-through?
238 (loop (cdr forms) (cons form acc) #f))
239 ((quoted? 'following-form-properties form)
240 (loop (cdr forms) (cons form acc) #t))
241 ((quoted? 'whitespace form) ;;; ignore
242 (loop (cdr forms) acc #f))
243 ((quoted? 'hash-bang-comment form) ;;; ignore for now
244 (loop (cdr forms) acc #f))
245 ((quoted? 'comment form)
246 => (lambda (alist)
247 (let cloop ((inner-forms (cdr forms))
248 (level (assq-ref alist 'leading-semicolons))
249 (text (list (assq-ref alist 'text))))
250 (let ((up (lambda ()
251 (loop inner-forms
252 (cons (cons level (reverse text))
253 acc)
254 #f))))
255 (if (null? inner-forms)
256 (up)
257 (let ((inner-form (car inner-forms)))
258 (cond ((quoted? 'comment inner-form)
259 => (lambda (inner-alist)
260 (let ((new-level
261 (assq-ref
262 inner-alist
263 'leading-semicolons)))
264 (if (= new-level level)
265 (cloop (cdr inner-forms)
266 level
267 (cons (assq-ref
268 inner-alist
269 'text)
270 text))
271 (up)))))
272 (else (up)))))))))
273 (else (loop (cdr forms) (cons form acc) #f)))))))
274
275;;; script entry point
276
bff56cdf
TTN
277(define main read-scheme-source)
278
279;;; read-scheme-source ends here