execute top level require forms
[bpt/guile.git] / module / language / elisp / parser.scm
CommitLineData
eb80072d
LC
1;;; Guile Emacs Lisp
2
c983a199 3;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
eb80072d
LC
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e840cc65
DK
18
19;;; Code:
20
21(define-module (language elisp parser)
22 #:use-module (language elisp lexer)
e840cc65
DK
23 #:export (read-elisp))
24
c983a199
BT
25;;; The parser (reader) for elisp expressions.
26;;;
27;;; It is hand-written (just as the lexer is) instead of using some
28;;; parser generator because this allows easier transfer of source
29;;; properties from the lexer ((text parse-lalr) seems not to allow
30;;; access to the original lexer token-pair) and is easy enough anyways.
9e90010f 31
c983a199
BT
32;;; Report a parse error. The first argument is some current lexer
33;;; token where source information is available should it be useful.
9e90010f
DK
34
35(define (parse-error token msg . args)
36 (apply error msg args))
37
c983a199
BT
38;;; For parsing circular structures, we keep track of definitions in a
39;;; hash-map that maps the id's to their values. When defining a new
40;;; id, though, we immediatly fill the slot with a promise before
41;;; parsing and setting the real value, because it must already be
42;;; available at that time in case of a circular reference. The promise
43;;; refers to a local variable that will be set when the real value is
44;;; available through a closure. After parsing the expression is
45;;; completed, we work through it again and force all promises we find.
46;;; The definitions themselves are stored in a fluid and their scope is
47;;; one call to read-elisp (but not only the currently parsed
48;;; expression!).
9a9f1231
DK
49
50(define circular-definitions (make-fluid))
51
52(define (make-circular-definitions)
53 (make-hash-table))
54
55(define (circular-ref token)
56 (if (not (eq? (car token) 'circular-ref))
f4e5e411 57 (error "invalid token for circular-ref" token))
9a9f1231
DK
58 (let* ((id (cdr token))
59 (value (hashq-ref (fluid-ref circular-definitions) id)))
60 (if value
f4e5e411
BT
61 value
62 (parse-error token "undefined circular reference" id))))
9a9f1231 63
c983a199
BT
64;;; Returned is a closure that, when invoked, will set the final value.
65;;; This means both the variable the promise will return and the
66;;; hash-table slot so we don't generate promises any longer.
abcf4a9e 67
9a9f1231
DK
68(define (circular-define! token)
69 (if (not (eq? (car token) 'circular-def))
f4e5e411 70 (error "invalid token for circular-define!" token))
9a9f1231
DK
71 (let ((value #f)
72 (table (fluid-ref circular-definitions))
73 (id (cdr token)))
74 (hashq-set! table id (delay value))
75 (lambda (real-value)
76 (set! value real-value)
77 (hashq-set! table id real-value))))
78
c983a199
BT
79;;; Work through a parsed data structure and force the promises there.
80;;; After a promise is forced, the resulting value must not be recursed
81;;; on; this may lead to infinite recursion with a circular structure,
82;;; and additionally this value was already processed when it was
83;;; defined. All deep data structures that can be parsed must be
84;;; handled here!
abcf4a9e 85
9a9f1231
DK
86(define (force-promises! data)
87 (cond
f4e5e411
BT
88 ((pair? data)
89 (begin
90 (if (promise? (car data))
91 (set-car! data (force (car data)))
92 (force-promises! (car data)))
93 (if (promise? (cdr data))
94 (set-cdr! data (force (cdr data)))
95 (force-promises! (cdr data)))))
96 ((vector? data)
97 (let ((len (vector-length data)))
98 (let iterate ((i 0))
99 (if (< i len)
100 (let ((el (vector-ref data i)))
101 (if (promise? el)
102 (vector-set! data i (force el))
103 (force-promises! el))
104 (iterate (1+ i)))))))
105 ;; Else nothing needs to be done.
106 ))
9a9f1231 107
c983a199
BT
108;;; We need peek-functionality for the next lexer token, this is done
109;;; with some single token look-ahead storage. This is handled by a
110;;; closure which allows getting or peeking the next token. When one
111;;; expression is fully parsed, we don't want a look-ahead stored here
112;;; because it would miss from future parsing. This is verified by the
113;;; finish action.
9e90010f
DK
114
115(define (make-lexer-buffer lex)
116 (let ((look-ahead #f))
117 (lambda (action)
118 (if (eq? action 'finish)
f4e5e411
BT
119 (if look-ahead
120 (error "lexer-buffer is not empty when finished")
121 #f)
122 (begin
123 (if (not look-ahead)
124 (set! look-ahead (lex)))
125 (case action
126 ((peek) look-ahead)
127 ((get)
128 (let ((result look-ahead))
129 (set! look-ahead #f)
130 result))
131 (else (error "invalid lexer-buffer action" action))))))))
9e90010f 132
c983a199
BT
133;;; Get the contents of a list, where the opening parentheses has
134;;; already been found. The same code is used for vectors and lists,
135;;; where lists allow the dotted tail syntax and vectors not;
136;;; additionally, the closing parenthesis must of course match. The
137;;; implementation here is not tail-recursive, but I think it is clearer
138;;; and simpler this way.
9e90010f
DK
139
140(define (get-list lex allow-dot close-square)
141 (let* ((next (lex 'peek))
142 (type (car next)))
143 (cond
f4e5e411
BT
144 ((eq? type (if close-square 'square-close 'paren-close))
145 (begin
146 (if (not (eq? (car (lex 'get)) type))
147 (error "got different token than peeked"))
148 '()))
149 ((and allow-dot (eq? type 'dot))
150 (begin
151 (if (not (eq? (car (lex 'get)) type))
152 (error "got different token than peeked"))
153 (let ((tail (get-list lex #f close-square)))
154 (if (not (= (length tail) 1))
155 (parse-error next
156 "expected exactly one element after dot"))
157 (car tail))))
158 (else
159 ;; Do both parses in exactly this sequence!
160 (let* ((head (get-expression lex))
161 (tail (get-list lex allow-dot close-square)))
162 (cons head tail))))))
9e90010f 163
c983a199
BT
164;;; Parse a single expression from a lexer-buffer. This is the main
165;;; routine in our recursive-descent parser.
9e90010f
DK
166
167(define quotation-symbols '((quote . quote)
0dbfdeef
BT
168 (backquote . #{`}#)
169 (unquote . #{,}#)
170 (unquote-splicing . #{,@}#)))
9e90010f
DK
171
172(define (get-expression lex)
173 (let* ((token (lex 'get))
174 (type (car token))
175 (return (lambda (result)
176 (if (pair? result)
f4e5e411
BT
177 (set-source-properties!
178 result
179 (source-properties token)))
9e90010f
DK
180 result)))
181 (case type
1dfe5939
BT
182 ((eof)
183 (parse-error token "end of file during parsing"))
9e90010f
DK
184 ((integer float symbol character string)
185 (return (cdr token)))
b7966c10
BT
186 ((function)
187 (return `(function ,(get-expression lex))))
9e90010f 188 ((quote backquote unquote unquote-splicing)
f4e5e411
BT
189 (return (list (assq-ref quotation-symbols type)
190 (get-expression lex))))
9e90010f
DK
191 ((paren-open)
192 (return (get-list lex #t #f)))
193 ((square-open)
194 (return (list->vector (get-list lex #f #t))))
9a9f1231
DK
195 ((circular-ref)
196 (circular-ref token))
197 ((circular-def)
c983a199 198 ;; The order of definitions is important!
9a9f1231
DK
199 (let* ((setter (circular-define! token))
200 (expr (get-expression lex)))
201 (setter expr)
202 (force-promises! expr)
203 expr))
03e00c5c
BT
204 ((set-lexical-binding-mode!)
205 (return `(%set-lexical-binding-mode ,(cdr token))))
9e90010f 206 (else
f4e5e411 207 (parse-error token "expected expression, got" token)))))
9e90010f 208
c983a199
BT
209;;; Define the reader function based on this; build a lexer, a
210;;; lexer-buffer, and then parse a single expression to return. We also
211;;; define a circular-definitions data structure to use.
e840cc65
DK
212
213(define (read-elisp port)
9a9f1231
DK
214 (with-fluids ((circular-definitions (make-circular-definitions)))
215 (let* ((lexer (get-lexer port))
216 (lexbuf (make-lexer-buffer lexer))
1dfe5939
BT
217 (next (lexbuf 'peek)))
218 (if (eq? (car next) 'eof)
219 (cdr next)
220 (let ((result (get-expression lexbuf)))
221 (lexbuf 'finish)
222 result)))))