Commit | Line | Data |
---|---|---|
bff56cdf TTN |
1 | ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments |
2 | ||
a1a2ed53 | 3 | ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc. |
bff56cdf TTN |
4 | ;; |
5 | ;; This program is free software; you can redistribute it and/or | |
83ba2d37 NJ |
6 | ;; modify it under the terms of the GNU Lesser General Public License |
7 | ;; as published by the Free Software Foundation; either version 3, or | |
bff56cdf TTN |
8 | ;; (at your option) any later version. |
9 | ;; | |
10 | ;; This program is distributed in the hope that it will be useful, | |
11 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
83ba2d37 | 13 | ;; Lesser General Public License for more details. |
bff56cdf | 14 | ;; |
83ba2d37 NJ |
15 | ;; You should have received a copy of the GNU Lesser General Public |
16 | ;; License along with this software; see the file COPYING.LESSER. If | |
17 | ;; not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | ;; Street, Fifth Floor, Boston, MA 02110-1301 USA | |
bff56cdf | 19 | |
61897afe TTN |
20 | ;;; Author: Thien-Thi Nguyen |
21 | ||
bff56cdf TTN |
22 | ;;; Commentary: |
23 | ||
24 | ;; Usage: read-scheme-source FILE1 FILE2 ... | |
25 | ;; | |
26 | ;; This program parses each FILE and writes to stdout sexps that describe the | |
27 | ;; top-level structures of the file: scheme forms, single-line comments, and | |
28 | ;; hash-bang comments. You can further process these (to associate comments | |
29 | ;; w/ scheme forms as a kind of documentation, for example). | |
30 | ;; | |
31 | ;; The output sexps have one of these forms: | |
32 | ;; | |
33 | ;; (quote (filename FILENAME)) | |
34 | ;; | |
e8cd769d | 35 | ;; (quote (comment :leading-semicolons N |
bff56cdf TTN |
36 | ;; :text LINE)) |
37 | ;; | |
38 | ;; (quote (whitespace :text LINE)) | |
39 | ;; | |
40 | ;; (quote (hash-bang-comment :line LINUM | |
41 | ;; :line-count N | |
42 | ;; :text-list (LINE1 LINE2 ...))) | |
43 | ;; | |
44 | ;; (quote (following-form-properties :line LINUM | |
45 | ;; :line-count N) | |
46 | ;; :type TYPE | |
47 | ;; :signature SIGNATURE | |
48 | ;; :std-int-doc DOCSTRING)) | |
49 | ;; | |
50 | ;; SEXP | |
51 | ;; | |
52 | ;; The first four are straightforward (both FILENAME and LINE are strings sans | |
53 | ;; newline, while LINUM and N are integers). The last two always go together, | |
54 | ;; in that order. SEXP is scheme code processed only by `read' and then | |
55 | ;; `write'. | |
56 | ;; | |
57 | ;; The :type field may be omitted if the form is not recognized. Otherwise, | |
58 | ;; TYPE may be one of: procedure, alias, define-module, variable. | |
59 | ;; | |
60 | ;; The :signature field may be omitted if the form is not a procedure. | |
61 | ;; Otherwise, SIGNATURE is a list showing the procedure's signature. | |
62 | ;; | |
63 | ;; If the type is `procedure' and the form has a standard internal docstring | |
64 | ;; (first body form a string), that is extracted in full -- including any | |
65 | ;; embedded newlines -- and recorded by field :std-int-doc. | |
66 | ;; | |
67 | ;; | |
68 | ;; Usage from a program: The output list of sexps can be retrieved by scheme | |
69 | ;; programs w/o having to capture stdout, like so: | |
70 | ;; | |
71 | ;; (use-modules (scripts read-scheme-source)) | |
72 | ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...)) | |
73 | ;; | |
6f2ec1d1 TTN |
74 | ;; There are also two convenience procs exported for use by Scheme programs: |
75 | ;; | |
76 | ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that | |
77 | ;; have the same number of leading semicolons. | |
78 | ;; | |
79 | ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse | |
80 | ;; the ":tags", and return alist of (TAG . VAL) elems. | |
bff56cdf TTN |
81 | ;; |
82 | ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles. | |
83 | ;; Make `annotate!' extensible. | |
bff56cdf TTN |
84 | |
85 | ;;; Code: | |
86 | ||
87 | (define-module (scripts read-scheme-source) | |
88 | :use-module (ice-9 rdelim) | |
6f2ec1d1 TTN |
89 | :export (read-scheme-source |
90 | read-scheme-source-silently | |
91 | quoted? | |
92 | clump)) | |
bff56cdf | 93 | |
a1a2ed53 AW |
94 | (define %include-in-guild-list #f) |
95 | (define %summary "Print a parsed representation of a Scheme file.") | |
96 | ||
bff56cdf TTN |
97 | ;; Try to figure out what FORM is and its various attributes. |
98 | ;; Call proc NOTE! with key (a symbol) and value. | |
99 | ;; | |
100 | (define (annotate! form note!) | |
101 | (cond ((and (list? form) | |
102 | (< 2 (length form)) | |
103 | (eq? 'define (car form)) | |
104 | (pair? (cadr form)) | |
105 | (symbol? (caadr form))) | |
106 | (note! ':type 'procedure) | |
107 | (note! ':signature (cadr form)) | |
108 | (and (< 3 (length form)) | |
109 | (string? (caddr form)) | |
110 | (note! ':std-int-doc (caddr form)))) | |
111 | ((and (list? form) | |
112 | (< 2 (length form)) | |
113 | (eq? 'define (car form)) | |
114 | (symbol? (cadr form)) | |
115 | (list? (caddr form)) | |
116 | (< 3 (length (caddr form))) | |
117 | (eq? 'lambda (car (caddr form))) | |
118 | (string? (caddr (caddr form)))) | |
119 | (note! ':type 'procedure) | |
120 | (note! ':signature (cons (cadr form) (cadr (caddr form)))) | |
121 | (note! ':std-int-doc (caddr (caddr form)))) | |
122 | ((and (list? form) | |
123 | (= 3 (length form)) | |
124 | (eq? 'define (car form)) | |
125 | (symbol? (cadr form)) | |
126 | (symbol? (caddr form))) | |
127 | (note! ':type 'alias)) | |
128 | ((and (list? form) | |
129 | (eq? 'define-module (car form))) | |
130 | (note! ':type 'define-module)) | |
131 | ;; Add other types here. | |
132 | (else (note! ':type 'variable)))) | |
133 | ||
134 | ;; Process FILE, calling NB! on parsed top-level elements. | |
135 | ;; Recognized: #!-!# and regular comments in addition to normal forms. | |
136 | ;; | |
137 | (define (process file nb!) | |
138 | (nb! `'(filename ,file)) | |
139 | (let ((hash-bang-rx (make-regexp "^#!")) | |
140 | (bang-hash-rx (make-regexp "^!#")) | |
141 | (all-comment-rx (make-regexp "^[ \t]*(;+)")) | |
142 | (all-whitespace-rx (make-regexp "^[ \t]*$")) | |
143 | (p (open-input-file file))) | |
144 | (let loop ((n (1+ (port-line p))) (line (read-line p))) | |
145 | (or (not n) | |
146 | (eof-object? line) | |
147 | (begin | |
148 | (cond ((regexp-exec hash-bang-rx line) | |
149 | (let loop ((line (read-line p)) | |
150 | (text (list line))) | |
151 | (if (or (eof-object? line) | |
152 | (regexp-exec bang-hash-rx line)) | |
153 | (nb! `'(hash-bang-comment | |
154 | :line ,n | |
155 | :line-count ,(1+ (length text)) | |
156 | :text-list ,(reverse | |
157 | (cons line text)))) | |
158 | (loop (read-line p) | |
159 | (cons line text))))) | |
160 | ((regexp-exec all-whitespace-rx line) | |
161 | (nb! `'(whitespace :text ,line))) | |
162 | ((regexp-exec all-comment-rx line) | |
163 | => (lambda (m) | |
164 | (nb! `'(comment | |
e8cd769d TTN |
165 | :leading-semicolons |
166 | ,(let ((m1 (vector-ref m 1))) | |
167 | (- (cdr m1) (car m1))) | |
bff56cdf TTN |
168 | :text ,line)))) |
169 | (else | |
170 | (unread-string line p) | |
171 | (let* ((form (read p)) | |
172 | (count (- (port-line p) n)) | |
173 | (props (let* ((props '()) | |
174 | (prop+ (lambda args | |
175 | (set! props | |
176 | (append props args))))) | |
177 | (annotate! form prop+) | |
178 | props))) | |
179 | (or (= count 1) ; ugh | |
180 | (begin | |
181 | (read-line p) | |
182 | (set! count (1+ count)))) | |
183 | (nb! `'(following-form-properties | |
184 | :line ,n | |
185 | :line-count ,count | |
186 | ,@props)) | |
187 | (nb! form)))) | |
188 | (loop (1+ (port-line p)) (read-line p))))))) | |
189 | ||
6f2ec1d1 TTN |
190 | ;;; entry points |
191 | ||
bff56cdf TTN |
192 | (define (read-scheme-source-silently . files) |
193 | "See commentary in module (scripts read-scheme-source)." | |
194 | (let* ((res '())) | |
195 | (for-each (lambda (file) | |
196 | (process file (lambda (e) (set! res (cons e res))))) | |
197 | files) | |
198 | (reverse res))) | |
199 | ||
200 | (define (read-scheme-source . files) | |
201 | "See commentary in module (scripts read-scheme-source)." | |
202 | (for-each (lambda (file) | |
203 | (process file (lambda (e) (write e) (newline)))) | |
204 | files)) | |
205 | ||
6f2ec1d1 TTN |
206 | ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...)) |
207 | ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...) | |
208 | ;; where the tags are symbols. | |
209 | ;; | |
210 | (define (quoted? sym form) | |
211 | (and (list? form) | |
212 | (= 2 (length form)) | |
213 | (eq? 'quote (car form)) | |
214 | (let ((inside (cadr form))) | |
215 | (and (list? inside) | |
216 | (< 0 (length inside)) | |
217 | (eq? sym (car inside)) | |
218 | (let loop ((ls (cdr inside)) (alist '())) | |
219 | (if (null? ls) | |
220 | alist ; retval | |
221 | (let ((first (car ls))) | |
222 | (or (symbol? first) | |
223 | (error "bad list!")) | |
224 | (loop (cddr ls) | |
225 | (acons (string->symbol | |
226 | (substring (symbol->string first) 1)) | |
227 | (cadr ls) | |
228 | alist))))))))) | |
229 | ||
230 | ;; Filter FORMS, combining contiguous comment forms that have the same number | |
231 | ;; of leading semicolons. Do not include in them whitespace lines. | |
232 | ;; Whitespace lines outside of such comment groupings are ignored, as are | |
233 | ;; hash-bang comments. All other forms are passed through unchanged. | |
234 | ;; | |
235 | (define (clump forms) | |
236 | (let loop ((forms forms) (acc '()) (pass-this-one-through? #f)) | |
237 | (if (null? forms) | |
238 | (reverse acc) ; retval | |
239 | (let ((form (car forms))) | |
240 | (cond (pass-this-one-through? | |
241 | (loop (cdr forms) (cons form acc) #f)) | |
242 | ((quoted? 'following-form-properties form) | |
243 | (loop (cdr forms) (cons form acc) #t)) | |
244 | ((quoted? 'whitespace form) ;;; ignore | |
245 | (loop (cdr forms) acc #f)) | |
246 | ((quoted? 'hash-bang-comment form) ;;; ignore for now | |
247 | (loop (cdr forms) acc #f)) | |
248 | ((quoted? 'comment form) | |
249 | => (lambda (alist) | |
250 | (let cloop ((inner-forms (cdr forms)) | |
251 | (level (assq-ref alist 'leading-semicolons)) | |
252 | (text (list (assq-ref alist 'text)))) | |
253 | (let ((up (lambda () | |
254 | (loop inner-forms | |
255 | (cons (cons level (reverse text)) | |
256 | acc) | |
257 | #f)))) | |
258 | (if (null? inner-forms) | |
259 | (up) | |
260 | (let ((inner-form (car inner-forms))) | |
261 | (cond ((quoted? 'comment inner-form) | |
262 | => (lambda (inner-alist) | |
263 | (let ((new-level | |
264 | (assq-ref | |
265 | inner-alist | |
266 | 'leading-semicolons))) | |
267 | (if (= new-level level) | |
268 | (cloop (cdr inner-forms) | |
269 | level | |
270 | (cons (assq-ref | |
271 | inner-alist | |
272 | 'text) | |
273 | text)) | |
274 | (up))))) | |
275 | (else (up))))))))) | |
276 | (else (loop (cdr forms) (cons form acc) #f))))))) | |
277 | ||
278 | ;;; script entry point | |
279 | ||
bff56cdf TTN |
280 | (define main read-scheme-source) |
281 | ||
282 | ;;; read-scheme-source ends here |