Commit | Line | Data |
---|---|---|
eb80072d LC |
1 | ;;; Guile Emacs Lisp |
2 | ||
c983a199 | 3 | ;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. |
eb80072d LC |
4 | ;;; |
5 | ;;; This library is free software; you can redistribute it and/or | |
6 | ;;; modify it under the terms of the GNU Lesser General Public | |
7 | ;;; License as published by the Free Software Foundation; either | |
8 | ;;; version 3 of the License, or (at your option) any later version. | |
9 | ;;; | |
10 | ;;; This library is distributed in the hope that it will be useful, | |
11 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | ;;; Lesser General Public License for more details. | |
14 | ;;; | |
15 | ;;; You should have received a copy of the GNU Lesser General Public | |
16 | ;;; License along with this library; if not, write to the Free Software | |
17 | ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
e840cc65 DK |
18 | |
19 | ;;; Code: | |
20 | ||
21 | (define-module (language elisp parser) | |
22 | #:use-module (language elisp lexer) | |
e840cc65 DK |
23 | #:export (read-elisp)) |
24 | ||
c983a199 BT |
25 | ;;; The parser (reader) for elisp expressions. |
26 | ;;; | |
27 | ;;; It is hand-written (just as the lexer is) instead of using some | |
28 | ;;; parser generator because this allows easier transfer of source | |
29 | ;;; properties from the lexer ((text parse-lalr) seems not to allow | |
30 | ;;; access to the original lexer token-pair) and is easy enough anyways. | |
9e90010f | 31 | |
c983a199 BT |
32 | ;;; Report a parse error. The first argument is some current lexer |
33 | ;;; token where source information is available should it be useful. | |
9e90010f DK |
34 | |
35 | (define (parse-error token msg . args) | |
36 | (apply error msg args)) | |
37 | ||
c983a199 BT |
38 | ;;; For parsing circular structures, we keep track of definitions in a |
39 | ;;; hash-map that maps the id's to their values. When defining a new | |
40 | ;;; id, though, we immediatly fill the slot with a promise before | |
41 | ;;; parsing and setting the real value, because it must already be | |
42 | ;;; available at that time in case of a circular reference. The promise | |
43 | ;;; refers to a local variable that will be set when the real value is | |
44 | ;;; available through a closure. After parsing the expression is | |
45 | ;;; completed, we work through it again and force all promises we find. | |
46 | ;;; The definitions themselves are stored in a fluid and their scope is | |
47 | ;;; one call to read-elisp (but not only the currently parsed | |
48 | ;;; expression!). | |
9a9f1231 DK |
49 | |
50 | (define circular-definitions (make-fluid)) | |
51 | ||
52 | (define (make-circular-definitions) | |
53 | (make-hash-table)) | |
54 | ||
55 | (define (circular-ref token) | |
56 | (if (not (eq? (car token) 'circular-ref)) | |
f4e5e411 | 57 | (error "invalid token for circular-ref" token)) |
9a9f1231 DK |
58 | (let* ((id (cdr token)) |
59 | (value (hashq-ref (fluid-ref circular-definitions) id))) | |
60 | (if value | |
f4e5e411 BT |
61 | value |
62 | (parse-error token "undefined circular reference" id)))) | |
9a9f1231 | 63 | |
c983a199 BT |
64 | ;;; Returned is a closure that, when invoked, will set the final value. |
65 | ;;; This means both the variable the promise will return and the | |
66 | ;;; hash-table slot so we don't generate promises any longer. | |
abcf4a9e | 67 | |
9a9f1231 DK |
68 | (define (circular-define! token) |
69 | (if (not (eq? (car token) 'circular-def)) | |
f4e5e411 | 70 | (error "invalid token for circular-define!" token)) |
9a9f1231 DK |
71 | (let ((value #f) |
72 | (table (fluid-ref circular-definitions)) | |
73 | (id (cdr token))) | |
74 | (hashq-set! table id (delay value)) | |
75 | (lambda (real-value) | |
76 | (set! value real-value) | |
77 | (hashq-set! table id real-value)))) | |
78 | ||
c983a199 BT |
79 | ;;; Work through a parsed data structure and force the promises there. |
80 | ;;; After a promise is forced, the resulting value must not be recursed | |
81 | ;;; on; this may lead to infinite recursion with a circular structure, | |
82 | ;;; and additionally this value was already processed when it was | |
83 | ;;; defined. All deep data structures that can be parsed must be | |
84 | ;;; handled here! | |
abcf4a9e | 85 | |
9a9f1231 DK |
86 | (define (force-promises! data) |
87 | (cond | |
f4e5e411 BT |
88 | ((pair? data) |
89 | (begin | |
90 | (if (promise? (car data)) | |
91 | (set-car! data (force (car data))) | |
92 | (force-promises! (car data))) | |
93 | (if (promise? (cdr data)) | |
94 | (set-cdr! data (force (cdr data))) | |
95 | (force-promises! (cdr data))))) | |
96 | ((vector? data) | |
97 | (let ((len (vector-length data))) | |
98 | (let iterate ((i 0)) | |
99 | (if (< i len) | |
100 | (let ((el (vector-ref data i))) | |
101 | (if (promise? el) | |
102 | (vector-set! data i (force el)) | |
103 | (force-promises! el)) | |
104 | (iterate (1+ i))))))) | |
105 | ;; Else nothing needs to be done. | |
106 | )) | |
9a9f1231 | 107 | |
c983a199 BT |
108 | ;;; We need peek-functionality for the next lexer token, this is done |
109 | ;;; with some single token look-ahead storage. This is handled by a | |
110 | ;;; closure which allows getting or peeking the next token. When one | |
111 | ;;; expression is fully parsed, we don't want a look-ahead stored here | |
112 | ;;; because it would miss from future parsing. This is verified by the | |
113 | ;;; finish action. | |
9e90010f DK |
114 | |
115 | (define (make-lexer-buffer lex) | |
116 | (let ((look-ahead #f)) | |
117 | (lambda (action) | |
118 | (if (eq? action 'finish) | |
f4e5e411 BT |
119 | (if look-ahead |
120 | (error "lexer-buffer is not empty when finished") | |
121 | #f) | |
122 | (begin | |
123 | (if (not look-ahead) | |
124 | (set! look-ahead (lex))) | |
125 | (case action | |
126 | ((peek) look-ahead) | |
127 | ((get) | |
128 | (let ((result look-ahead)) | |
129 | (set! look-ahead #f) | |
130 | result)) | |
131 | (else (error "invalid lexer-buffer action" action)))))))) | |
9e90010f | 132 | |
c983a199 BT |
133 | ;;; Get the contents of a list, where the opening parentheses has |
134 | ;;; already been found. The same code is used for vectors and lists, | |
135 | ;;; where lists allow the dotted tail syntax and vectors not; | |
136 | ;;; additionally, the closing parenthesis must of course match. The | |
137 | ;;; implementation here is not tail-recursive, but I think it is clearer | |
138 | ;;; and simpler this way. | |
9e90010f DK |
139 | |
140 | (define (get-list lex allow-dot close-square) | |
141 | (let* ((next (lex 'peek)) | |
142 | (type (car next))) | |
143 | (cond | |
f4e5e411 BT |
144 | ((eq? type (if close-square 'square-close 'paren-close)) |
145 | (begin | |
146 | (if (not (eq? (car (lex 'get)) type)) | |
147 | (error "got different token than peeked")) | |
148 | '())) | |
149 | ((and allow-dot (eq? type 'dot)) | |
150 | (begin | |
151 | (if (not (eq? (car (lex 'get)) type)) | |
152 | (error "got different token than peeked")) | |
153 | (let ((tail (get-list lex #f close-square))) | |
154 | (if (not (= (length tail) 1)) | |
155 | (parse-error next | |
156 | "expected exactly one element after dot")) | |
157 | (car tail)))) | |
158 | (else | |
159 | ;; Do both parses in exactly this sequence! | |
160 | (let* ((head (get-expression lex)) | |
161 | (tail (get-list lex allow-dot close-square))) | |
162 | (cons head tail)))))) | |
9e90010f | 163 | |
c983a199 BT |
164 | ;;; Parse a single expression from a lexer-buffer. This is the main |
165 | ;;; routine in our recursive-descent parser. | |
9e90010f DK |
166 | |
167 | (define quotation-symbols '((quote . quote) | |
0dbfdeef BT |
168 | (backquote . #{`}#) |
169 | (unquote . #{,}#) | |
170 | (unquote-splicing . #{,@}#))) | |
9e90010f DK |
171 | |
172 | (define (get-expression lex) | |
173 | (let* ((token (lex 'get)) | |
174 | (type (car token)) | |
175 | (return (lambda (result) | |
176 | (if (pair? result) | |
f4e5e411 BT |
177 | (set-source-properties! |
178 | result | |
179 | (source-properties token))) | |
9e90010f DK |
180 | result))) |
181 | (case type | |
1dfe5939 BT |
182 | ((eof) |
183 | (parse-error token "end of file during parsing")) | |
9e90010f DK |
184 | ((integer float symbol character string) |
185 | (return (cdr token))) | |
b7966c10 BT |
186 | ((function) |
187 | (return `(function ,(get-expression lex)))) | |
9e90010f | 188 | ((quote backquote unquote unquote-splicing) |
f4e5e411 BT |
189 | (return (list (assq-ref quotation-symbols type) |
190 | (get-expression lex)))) | |
9e90010f DK |
191 | ((paren-open) |
192 | (return (get-list lex #t #f))) | |
193 | ((square-open) | |
194 | (return (list->vector (get-list lex #f #t)))) | |
9a9f1231 DK |
195 | ((circular-ref) |
196 | (circular-ref token)) | |
197 | ((circular-def) | |
c983a199 | 198 | ;; The order of definitions is important! |
9a9f1231 DK |
199 | (let* ((setter (circular-define! token)) |
200 | (expr (get-expression lex))) | |
201 | (setter expr) | |
202 | (force-promises! expr) | |
203 | expr)) | |
03e00c5c BT |
204 | ((set-lexical-binding-mode!) |
205 | (return `(%set-lexical-binding-mode ,(cdr token)))) | |
9e90010f | 206 | (else |
f4e5e411 | 207 | (parse-error token "expected expression, got" token))))) |
9e90010f | 208 | |
c983a199 BT |
209 | ;;; Define the reader function based on this; build a lexer, a |
210 | ;;; lexer-buffer, and then parse a single expression to return. We also | |
211 | ;;; define a circular-definitions data structure to use. | |
e840cc65 DK |
212 | |
213 | (define (read-elisp port) | |
9a9f1231 DK |
214 | (with-fluids ((circular-definitions (make-circular-definitions))) |
215 | (let* ((lexer (get-lexer port)) | |
216 | (lexbuf (make-lexer-buffer lexer)) | |
1dfe5939 BT |
217 | (next (lexbuf 'peek))) |
218 | (if (eq? (car next) 'eof) | |
219 | (cdr next) | |
220 | (let ((result (get-expression lexbuf))) | |
221 | (lexbuf 'finish) | |
222 | result))))) |