Commit | Line | Data |
---|---|---|
ca8a4d5e CY |
1 | ;;; semantic/wisent.el --- Wisent - Semantic gateway |
2 | ||
95df8112 | 3 | ;; Copyright (C) 2001-2007, 2009-2011 Free Software Foundation, Inc. |
ca8a4d5e CY |
4 | |
5 | ;; Author: David Ponce <david@dponce.com> | |
6 | ;; Maintainer: David Ponce <david@dponce.com> | |
7 | ;; Created: 30 Aug 2001 | |
8 | ;; Keywords: syntax | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation, either version 3 of the License, or | |
15 | ;; (at your option) any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
24 | ||
25 | ;;; Commentary: | |
26 | ;; | |
27 | ;; Here are functions necessary to use the Wisent LALR parser from | |
28 | ;; Semantic environment. | |
29 | ||
30 | ;;; History: | |
31 | ;; | |
32 | ||
33 | ;;; Code: | |
34 | ||
35 | (require 'semantic) | |
36 | (require 'semantic/wisent/wisent) | |
37 | \f | |
38 | ;;; Lexical analysis | |
39 | ;; | |
40 | (defvar wisent-lex-istream nil | |
41 | "Input stream of `semantic-lex' syntactic tokens.") | |
42 | ||
43 | (defvar wisent-lex-lookahead nil | |
44 | "Extra lookahead token. | |
45 | When non-nil it is directly returned by `wisent-lex-function'.") | |
46 | ||
47 | ;; Maintain this alias for compatibility until all WY grammars have | |
48 | ;; been translated again to Elisp code. | |
49 | (semantic-alias-obsolete 'wisent-lex-make-token-table | |
eefa91db | 50 | 'semantic-lex-make-type-table "23.2") |
ca8a4d5e CY |
51 | |
52 | (defmacro wisent-lex-eoi () | |
53 | "Return an End-Of-Input lexical token. | |
9bf6c65c | 54 | The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)." |
ca8a4d5e CY |
55 | `(cons ',wisent-eoi-term |
56 | (cons "" | |
57 | (cons (point-max) (point-max))))) | |
58 | ||
59 | (defmacro define-wisent-lexer (name doc &rest body) | |
60 | "Create a new lexical analyzer with NAME. | |
61 | DOC is a documentation string describing this analyzer. | |
62 | When a token is available in `wisent-lex-istream', eval BODY forms | |
63 | sequentially. BODY must return a lexical token for the LALR parser. | |
64 | ||
65 | Each token in input was produced by `semantic-lex', it is a list: | |
66 | ||
67 | (TOKSYM START . END) | |
68 | ||
69 | TOKSYM is a terminal symbol used in the grammar. | |
70 | START and END mark boundary in the current buffer of that token's | |
71 | value. | |
72 | ||
73 | Returned tokens must have the form: | |
74 | ||
75 | (TOKSYM VALUE START . END) | |
76 | ||
77 | where VALUE is the buffer substring between START and END positions." | |
78 | `(defun | |
79 | ,name () ,doc | |
80 | (cond | |
81 | (wisent-lex-lookahead | |
82 | (prog1 wisent-lex-lookahead | |
83 | (setq wisent-lex-lookahead nil))) | |
84 | (wisent-lex-istream | |
85 | ,@body) | |
86 | ((wisent-lex-eoi))))) | |
87 | ||
88 | (define-wisent-lexer wisent-lex | |
89 | "Return the next available lexical token in Wisent's form. | |
90 | The variable `wisent-lex-istream' contains the list of lexical tokens | |
91 | produced by `semantic-lex'. Pop the next token available and convert | |
92 | it to a form suitable for the Wisent's parser." | |
93 | (let* ((tk (car wisent-lex-istream))) | |
94 | ;; Eat input stream | |
95 | (setq wisent-lex-istream (cdr wisent-lex-istream)) | |
96 | (cons (semantic-lex-token-class tk) | |
97 | (cons (semantic-lex-token-text tk) | |
98 | (semantic-lex-token-bounds tk))))) | |
99 | \f | |
100 | ;;; Syntax analysis | |
101 | ;; | |
102 | (defvar wisent-error-function nil | |
103 | "Function used to report parse error. | |
104 | By default use the function `wisent-message'.") | |
105 | (make-variable-buffer-local 'wisent-error-function) | |
106 | ||
107 | (defvar wisent-lexer-function 'wisent-lex | |
108 | "Function used to obtain the next lexical token in input. | |
109 | Should be a lexical analyzer created with `define-wisent-lexer'.") | |
110 | (make-variable-buffer-local 'wisent-lexer-function) | |
111 | ||
112 | ;; Tag production | |
113 | ;; | |
114 | (defsubst wisent-raw-tag (semantic-tag) | |
115 | "Return raw form of given Semantic tag SEMANTIC-TAG. | |
116 | Should be used in semantic actions, in grammars, to build a Semantic | |
117 | parse tree." | |
118 | (nconc semantic-tag | |
119 | (if (or $region | |
120 | (setq $region (nthcdr 2 wisent-input))) | |
121 | (list (car $region) (cdr $region)) | |
122 | (list (point-max) (point-max))))) | |
123 | ||
124 | (defsubst wisent-cook-tag (raw-tag) | |
125 | "From raw form of Semantic tag RAW-TAG, return a list of cooked tags. | |
126 | Should be used in semantic actions, in grammars, to build a Semantic | |
127 | parse tree." | |
128 | (let* ((cooked (semantic--tag-expand raw-tag)) | |
129 | (l cooked)) | |
130 | (while l | |
131 | (semantic--tag-put-property (car l) 'reparse-symbol $nterm) | |
132 | (setq l (cdr l))) | |
133 | cooked)) | |
134 | ||
135 | ;; Unmatched syntax collector | |
136 | ;; | |
137 | (defun wisent-collect-unmatched-syntax (nomatch) | |
138 | "Add lexical token NOMATCH to the cache of unmatched tokens. | |
139 | See also the variable `semantic-unmatched-syntax-cache'. | |
140 | ||
141 | NOMATCH is in Wisent's form: (SYMBOL VALUE START . END) | |
142 | and will be collected in `semantic-lex' form: (SYMBOL START . END)." | |
143 | (let ((region (cddr nomatch))) | |
144 | (and (number-or-marker-p (car region)) | |
145 | (number-or-marker-p (cdr region)) | |
146 | (setq semantic-unmatched-syntax-cache | |
147 | (cons (cons (car nomatch) region) | |
148 | semantic-unmatched-syntax-cache))))) | |
149 | ||
150 | ;; Parser plug-ins | |
151 | ;; | |
152 | ;; The following functions permit to plug the Wisent LALR parser in | |
153 | ;; Semantic toolkit. They use the standard API provided by Semantic | |
154 | ;; to plug parsers in. | |
155 | ;; | |
156 | ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME: | |
157 | ;; | |
158 | ;; - `wisent-parse-stream' designed to override the standard function | |
159 | ;; `semantic-parse-stream'. | |
160 | ;; | |
161 | ;; - `wisent-parse-region' designed to override the standard function | |
162 | ;; `semantic-parse-region'. | |
163 | ;; | |
164 | ;; Maybe the latter is faster because it eliminates a lot of function | |
165 | ;; call. | |
166 | ;; | |
167 | (defun wisent-parse-stream (stream goal) | |
168 | "Parse STREAM using the Wisent LALR parser. | |
169 | GOAL is a nonterminal symbol to start parsing at. | |
170 | Return the list (STREAM SEMANTIC-STREAM) where STREAM are those | |
171 | elements of STREAM that have not been used. SEMANTIC-STREAM is the | |
172 | list of semantic tags found. | |
173 | The LALR parser automaton must be available in buffer local variable | |
174 | `semantic--parse-table'. | |
175 | ||
176 | Must be installed by `semantic-install-function-overrides' to override | |
177 | the standard function `semantic-parse-stream'." | |
178 | (let (wisent-lex-istream wisent-lex-lookahead la-elt cache) | |
179 | ||
180 | ;; IMPLEMENTATION NOTES: | |
181 | ;; `wisent-parse' returns a lookahead token when it stopped | |
182 | ;; parsing before encountering the end of input. To re-enter the | |
183 | ;; parser it is necessary to push back in the lexical input stream | |
184 | ;; the last lookahead token issued. Because the format of | |
185 | ;; lookahead tokens and tokens in STREAM can be different the | |
186 | ;; lookahead token is put in the variable `wisent-lex-lookahead' | |
187 | ;; before calling `wisent-parse'. Wisent's lexers always pop the | |
188 | ;; next lexical token from that variable when non nil, then from | |
189 | ;; the lexical input stream. | |
190 | ;; | |
191 | ;; The first element of STREAM is used to keep lookahead tokens | |
192 | ;; across successive calls to `wisent-parse-stream'. In fact | |
193 | ;; what is kept is a stack of lookaheads encountered so far. It | |
194 | ;; is cleared when `wisent-parse' returns a valid semantic tag, | |
195 | ;; or twice the same lookahead token! The latter indicates that | |
196 | ;; there is a syntax error on that token. If so, tokens currently | |
197 | ;; in the lookahead stack have not been used, and are moved into | |
198 | ;; `semantic-unmatched-syntax-cache'. When the parser will be | |
199 | ;; re-entered, a new lexical token will be read from STREAM. | |
200 | ;; | |
201 | ;; The first element of STREAM that contains the lookahead stack | |
202 | ;; has this format (compatible with the format of `semantic-lex' | |
203 | ;; tokens): | |
204 | ;; | |
205 | ;; (LOOKAHEAD-STACK START . END) | |
206 | ;; | |
207 | ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And | |
208 | ;; START/END are the bounds of the lookahead at top of stack. | |
209 | ||
210 | ;; Retrieve lookahead token from stack | |
211 | (setq la-elt (car stream)) | |
212 | (if (consp (car la-elt)) | |
213 | ;; The first elt of STREAM contains a lookahead stack | |
214 | (setq wisent-lex-lookahead (caar la-elt) | |
215 | stream (cdr stream)) | |
216 | (setq la-elt nil)) | |
217 | ;; Parse | |
218 | (setq wisent-lex-istream stream | |
219 | cache (semantic-safe "wisent-parse-stream: %s" | |
220 | (condition-case error-to-filter | |
221 | (wisent-parse semantic--parse-table | |
222 | wisent-lexer-function | |
223 | wisent-error-function | |
224 | goal) | |
225 | (args-out-of-range | |
226 | (if (and (not debug-on-error) | |
227 | (= wisent-parse-max-stack-size | |
228 | (nth 2 error-to-filter))) | |
229 | (progn | |
230 | (message "wisent-parse-stream: %s" | |
231 | (error-message-string error-to-filter)) | |
232 | (message "wisent-parse-max-stack-size \ | |
233 | might need to be increased")) | |
234 | (apply 'signal error-to-filter)))))) | |
235 | ;; Manage returned lookahead token | |
236 | (if wisent-lookahead | |
237 | (if (eq (caar la-elt) wisent-lookahead) | |
238 | ;; It is already at top of lookahead stack | |
239 | (progn | |
240 | (setq cache nil | |
241 | la-elt (car la-elt)) | |
242 | (while la-elt | |
243 | ;; Collect unmatched tokens from the stack | |
244 | (run-hook-with-args | |
245 | 'wisent-discarding-token-functions (car la-elt)) | |
246 | (setq la-elt (cdr la-elt)))) | |
247 | ;; New lookahead token | |
248 | (if (or (consp cache) ;; Clear the stack if parse succeeded | |
249 | (null la-elt)) | |
250 | (setq la-elt (cons nil nil))) | |
251 | ;; Push it into the stack | |
252 | (setcar la-elt (cons wisent-lookahead (car la-elt))) | |
253 | ;; Update START/END | |
254 | (setcdr la-elt (cddr wisent-lookahead)) | |
255 | ;; Push (LOOKAHEAD-STACK START . END) in STREAM | |
256 | (setq wisent-lex-istream (cons la-elt wisent-lex-istream)))) | |
257 | ;; Return (STREAM SEMANTIC-STREAM) | |
258 | (list wisent-lex-istream | |
259 | (if (consp cache) cache '(nil)) | |
260 | ))) | |
261 | ||
262 | (defun wisent-parse-region (start end &optional goal depth returnonerror) | |
263 | "Parse the area between START and END using the Wisent LALR parser. | |
264 | Return the list of semantic tags found. | |
265 | Optional arguments GOAL is a nonterminal symbol to start parsing at, | |
266 | DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to | |
267 | stop parsing on syntax error, when non-nil. | |
268 | The LALR parser automaton must be available in buffer local variable | |
269 | `semantic--parse-table'. | |
270 | ||
271 | Must be installed by `semantic-install-function-overrides' to override | |
272 | the standard function `semantic-parse-region'." | |
273 | (if (or (< start (point-min)) (> end (point-max)) (< end start)) | |
274 | (error "Invalid bounds [%s %s] passed to `wisent-parse-region'" | |
275 | start end)) | |
276 | (let* ((case-fold-search semantic-case-fold) | |
277 | (wisent-lex-istream (semantic-lex start end depth)) | |
278 | ptree tag cooked lstack wisent-lex-lookahead) | |
279 | ;; Loop while there are lexical tokens available | |
280 | (while wisent-lex-istream | |
281 | ;; Parse | |
282 | (setq wisent-lex-lookahead (car lstack) | |
283 | tag (semantic-safe "wisent-parse-region: %s" | |
284 | (wisent-parse semantic--parse-table | |
285 | wisent-lexer-function | |
286 | wisent-error-function | |
287 | goal))) | |
288 | ;; Manage returned lookahead token | |
289 | (if wisent-lookahead | |
290 | (if (eq (car lstack) wisent-lookahead) | |
291 | ;; It is already at top of lookahead stack | |
292 | (progn | |
293 | (setq tag nil) | |
294 | (while lstack | |
295 | ;; Collect unmatched tokens from lookahead stack | |
296 | (run-hook-with-args | |
297 | 'wisent-discarding-token-functions (car lstack)) | |
298 | (setq lstack (cdr lstack)))) | |
299 | ;; Push new lookahead token into the stack | |
300 | (setq lstack (cons wisent-lookahead lstack)))) | |
301 | ;; Manage the parser result | |
302 | (cond | |
303 | ;; Parse succeeded, cook result | |
304 | ((consp tag) | |
305 | (setq lstack nil ;; Clear the lookahead stack | |
306 | cooked (semantic--tag-expand tag) | |
307 | ptree (append cooked ptree)) | |
308 | (while cooked | |
309 | (setq tag (car cooked) | |
310 | cooked (cdr cooked)) | |
311 | (or (semantic--tag-get-property tag 'reparse-symbol) | |
312 | (semantic--tag-put-property tag 'reparse-symbol goal))) | |
313 | ) | |
314 | ;; Return on error if requested | |
315 | (returnonerror | |
316 | (setq wisent-lex-istream nil) | |
317 | )) | |
318 | ;; Work in progress... | |
319 | (if wisent-lex-istream | |
320 | (and (eq semantic-working-type 'percent) | |
321 | (boundp 'semantic--progress-reporter) | |
322 | semantic--progress-reporter | |
323 | (progress-reporter-update | |
324 | semantic--progress-reporter | |
325 | (/ (* 100 (semantic-lex-token-start | |
326 | (car wisent-lex-istream))) | |
327 | (point-max)))))) | |
328 | ;; Return parse tree | |
329 | (nreverse ptree))) | |
330 | \f | |
331 | ;;; Interfacing with edebug | |
332 | ;; | |
333 | (add-hook | |
334 | 'edebug-setup-hook | |
335 | #'(lambda () | |
336 | ||
337 | (def-edebug-spec define-wisent-lexer | |
338 | (&define name stringp def-body) | |
339 | ) | |
340 | ||
341 | )) | |
342 | ||
343 | (provide 'semantic/wisent) | |
344 | ||
345 | ;;; semantic/wisent.el ends here |