Commit | Line | Data |
---|---|---|
bb051423 CY |
1 | ;;; semantic/wisent/wisent.el --- GNU Bison for Emacs - Runtime |
2 | ||
ab422c4d | 3 | ;;; Copyright (C) 2002-2007, 2009-2013 Free Software Foundation, Inc. |
bb051423 CY |
4 | |
5 | ;; Author: David Ponce <david@dponce.com> | |
6 | ;; Maintainer: David Ponce <david@dponce.com> | |
7 | ;; Created: 30 January 2002 | |
8 | ;; Keywords: syntax | |
bb051423 CY |
9 | |
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation, either version 3 of the License, or | |
15 | ;; (at your option) any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
24 | ||
25 | ;;; Commentary: | |
26 | ;; | |
27 | ;; Parser engine and runtime of Wisent. | |
28 | ;; | |
29 | ;; Wisent (the European Bison ;-) is an Elisp implementation of the | |
30 | ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C | |
31 | ;; code of GNU Bison 1.28 & 1.31. | |
32 | ;; | |
33 | ;; For more details on the basic concepts for understanding Wisent, | |
34 | ;; read the Bison manual ;) | |
35 | ;; | |
36 | ;; For more details on Wisent itself read the Wisent manual. | |
37 | ||
38 | ;;; History: | |
39 | ;; | |
40 | ||
41 | ;;; Code: | |
42 | ||
43 | (defgroup wisent nil | |
44 | " | |
45 | /\\_.-^^^-._/\\ The GNU | |
46 | \\_ _/ | |
47 | ( `o ` (European ;-) Bison | |
48 | \\ ` / | |
39004030 PE |
49 | ( D ,\" for Emacs! |
50 | ` ~ ,\" | |
bb051423 CY |
51 | `\"\"" |
52 | :group 'semantic) | |
53 | ||
54 | \f | |
55 | ;;;; ------------- | |
56 | ;;;; Runtime stuff | |
57 | ;;;; ------------- | |
58 | ||
59 | ;;; Compatibility | |
60 | (eval-and-compile | |
61 | (if (fboundp 'char-valid-p) | |
62 | (defalias 'wisent-char-p 'char-valid-p) | |
63 | (defalias 'wisent-char-p 'char-or-char-int-p))) | |
64 | ||
65 | ;;; Printed representation of terminals and nonterminals | |
66 | (defconst wisent-escape-sequence-strings | |
67 | '( | |
68 | (?\a . "'\\a'") ; C-g | |
69 | (?\b . "'\\b'") ; backspace, BS, C-h | |
70 | (?\t . "'\\t'") ; tab, TAB, C-i | |
71 | (?\n . "'\\n'") ; newline, C-j | |
72 | (?\v . "'\\v'") ; vertical tab, C-k | |
73 | (?\f . "'\\f'") ; formfeed character, C-l | |
74 | (?\r . "'\\r'") ; carriage return, RET, C-m | |
75 | (?\e . "'\\e'") ; escape character, ESC, C-[ | |
76 | (?\\ . "'\\'") ; backslash character, \ | |
77 | (?\d . "'\\d'") ; delete character, DEL | |
78 | ) | |
79 | "Printed representation of usual escape sequences.") | |
80 | ||
81 | (defsubst wisent-item-to-string (item) | |
82 | "Return a printed representation of ITEM. | |
83 | ITEM can be a nonterminal or terminal symbol, or a character literal." | |
84 | (if (wisent-char-p item) | |
85 | (or (cdr (assq item wisent-escape-sequence-strings)) | |
86 | (format "'%c'" item)) | |
87 | (symbol-name item))) | |
88 | ||
89 | (defsubst wisent-token-to-string (token) | |
90 | "Return a printed representation of lexical token TOKEN." | |
91 | (format "%s%s(%S)" (wisent-item-to-string (car token)) | |
92 | (if (nth 2 token) (format "@%s" (nth 2 token)) "") | |
93 | (nth 1 token))) | |
94 | ||
95 | ;;; Special symbols | |
96 | (defconst wisent-eoi-term '$EOI | |
97 | "End Of Input token.") | |
98 | ||
99 | (defconst wisent-error-term 'error | |
100 | "Error recovery token.") | |
101 | ||
102 | (defconst wisent-accept-tag 'accept | |
103 | "Accept result after input successfully parsed.") | |
104 | ||
105 | (defconst wisent-error-tag 'error | |
106 | "Process a syntax error.") | |
107 | ||
108 | ;;; Special functions | |
109 | (defun wisent-automaton-p (obj) | |
110 | "Return non-nil if OBJ is a LALR automaton. | |
111 | If OBJ is a symbol check its value." | |
112 | (and obj (symbolp obj) (boundp obj) | |
113 | (setq obj (symbol-value obj))) | |
114 | (and (vectorp obj) (= 4 (length obj)) | |
115 | (vectorp (aref obj 0)) (vectorp (aref obj 1)) | |
116 | (= (length (aref obj 0)) (length (aref obj 1))) | |
117 | (listp (aref obj 2)) (vectorp (aref obj 3)))) | |
118 | ||
119 | (defsubst wisent-region (&rest positions) | |
120 | "Return the start/end positions of the region including POSITIONS. | |
121 | Each element of POSITIONS is a pair (START-POS . END-POS) or nil. The | |
122 | returned value is the pair (MIN-START-POS . MAX-END-POS) or nil if no | |
123 | POSITIONS are available." | |
124 | (let ((pl (delq nil positions))) | |
125 | (if pl | |
126 | (cons (apply #'min (mapcar #'car pl)) | |
127 | (apply #'max (mapcar #'cdr pl)))))) | |
128 | ||
129 | ;;; Reporting | |
130 | (defvar wisent-parse-verbose-flag nil | |
131 | "*Non-nil means to issue more messages while parsing.") | |
132 | ||
133 | (defun wisent-parse-toggle-verbose-flag () | |
134 | "Toggle whether to issue more messages while parsing." | |
135 | (interactive) | |
136 | (setq wisent-parse-verbose-flag (not wisent-parse-verbose-flag)) | |
2054a44c | 137 | (when (called-interactively-p 'interactive) |
bb051423 CY |
138 | (message "More messages while parsing %sabled" |
139 | (if wisent-parse-verbose-flag "en" "dis")))) | |
140 | ||
141 | (defsubst wisent-message (string &rest args) | |
142 | "Print a one-line message if `wisent-parse-verbose-flag' is set. | |
143 | Pass STRING and ARGS arguments to `message'." | |
144 | (and wisent-parse-verbose-flag | |
145 | (apply 'message string args))) | |
146 | \f | |
147 | ;;;; -------------------- | |
148 | ;;;; The LR parser engine | |
149 | ;;;; -------------------- | |
150 | ||
151 | (defcustom wisent-parse-max-stack-size 500 | |
152 | "The parser stack size." | |
153 | :type 'integer | |
154 | :group 'wisent) | |
155 | ||
156 | (defcustom wisent-parse-max-recover 3 | |
157 | "Number of tokens to shift before turning off error status." | |
158 | :type 'integer | |
159 | :group 'wisent) | |
160 | ||
161 | (defvar wisent-discarding-token-functions nil | |
162 | "List of functions to be called when discarding a lexical token. | |
163 | These functions receive the lexical token discarded. | |
164 | When the parser encounters unexpected tokens, it can discards them, | |
165 | based on what directed by error recovery rules. Either when the | |
166 | parser reads tokens until one is found that can be shifted, or when an | |
167 | semantic action calls the function `wisent-skip-token' or | |
168 | `wisent-skip-block'. | |
169 | For language specific hooks, make sure you define this as a local | |
170 | hook.") | |
171 | ||
172 | (defvar wisent-pre-parse-hook nil | |
173 | "Normal hook run just before entering the LR parser engine.") | |
174 | ||
175 | (defvar wisent-post-parse-hook nil | |
176 | "Normal hook run just after the LR parser engine terminated.") | |
177 | ||
178 | (defvar wisent-loop nil | |
179 | "The current parser action. | |
180 | Stop parsing when set to nil. | |
181 | This variable only has meaning in the scope of `wisent-parse'.") | |
182 | ||
183 | (defvar wisent-nerrs nil | |
184 | "The number of parse errors encountered so far.") | |
185 | ||
186 | (defvar wisent-lookahead nil | |
187 | "The lookahead lexical token. | |
188 | This value is non-nil if the parser terminated because of an | |
189 | unrecoverable error.") | |
190 | ||
191 | ;; Variables and macros that are useful in semantic actions. | |
192 | (defvar wisent-parse-lexer-function nil | |
193 | "The user supplied lexer function. | |
194 | This function don't have arguments. | |
195 | This variable only has meaning in the scope of `wisent-parse'.") | |
196 | ||
197 | (defvar wisent-parse-error-function nil | |
198 | "The user supplied error function. | |
199 | This function must accept one argument, a message string. | |
200 | This variable only has meaning in the scope of `wisent-parse'.") | |
201 | ||
202 | (defvar wisent-input nil | |
203 | "The last token read. | |
204 | This variable only has meaning in the scope of `wisent-parse'.") | |
205 | ||
206 | (defvar wisent-recovering nil | |
207 | "Non-nil means that the parser is recovering. | |
208 | This variable only has meaning in the scope of `wisent-parse'.") | |
209 | ||
210 | ;; Variables that only have meaning in the scope of a semantic action. | |
211 | ;; These global definitions avoid byte-compiler warnings. | |
212 | (defvar $region nil) | |
213 | (defvar $nterm nil) | |
214 | (defvar $action nil) | |
215 | ||
216 | (defmacro wisent-lexer () | |
217 | "Obtain the next terminal in input." | |
218 | '(funcall wisent-parse-lexer-function)) | |
219 | ||
220 | (defmacro wisent-error (msg) | |
221 | "Call the user supplied error reporting function with message MSG." | |
222 | `(funcall wisent-parse-error-function ,msg)) | |
223 | ||
224 | (defmacro wisent-errok () | |
225 | "Resume generating error messages immediately for subsequent syntax errors. | |
226 | This is useful primarily in error recovery semantic actions." | |
227 | '(setq wisent-recovering nil)) | |
228 | ||
229 | (defmacro wisent-clearin () | |
230 | "Discard the current lookahead token. | |
231 | This will cause a new lexical token to be read. | |
232 | This is useful primarily in error recovery semantic actions." | |
233 | '(setq wisent-input nil)) | |
234 | ||
235 | (defmacro wisent-abort () | |
236 | "Abort parsing and save the lookahead token. | |
237 | This is useful primarily in error recovery semantic actions." | |
238 | '(setq wisent-lookahead wisent-input | |
239 | wisent-loop nil)) | |
240 | ||
241 | (defmacro wisent-set-region (start end) | |
242 | "Change the region of text matched by the current nonterminal. | |
243 | START and END are respectively the beginning and end positions of the | |
244 | region. If START or END values are not a valid positions the region | |
245 | is set to nil." | |
246 | `(setq $region (and (number-or-marker-p ,start) | |
247 | (number-or-marker-p ,end) | |
248 | (cons ,start ,end)))) | |
249 | ||
250 | (defun wisent-skip-token () | |
251 | "Skip the lookahead token in order to resume parsing. | |
252 | Return nil. | |
253 | Must be used in error recovery semantic actions." | |
254 | (if (eq (car wisent-input) wisent-eoi-term) | |
255 | ;; Does nothing at EOI to avoid infinite recovery loop. | |
256 | nil | |
257 | (wisent-message "%s: skip %s" $action | |
258 | (wisent-token-to-string wisent-input)) | |
259 | (run-hook-with-args | |
260 | 'wisent-discarding-token-functions wisent-input) | |
261 | (wisent-clearin) | |
262 | (wisent-errok))) | |
263 | ||
264 | (defun wisent-skip-block (&optional bounds) | |
265 | "Safely skip a parenthesized block in order to resume parsing. | |
266 | Return nil. | |
267 | Must be used in error recovery semantic actions. | |
268 | Optional argument BOUNDS is a pair (START . END) which indicates where | |
269 | the parenthesized block starts. Typically the value of a `$regionN' | |
a30e71ae JB |
270 | variable, where `N' is the Nth element of the current rule components |
271 | that match the block beginning. It defaults to the value of the | |
272 | `$region' variable." | |
bb051423 CY |
273 | (let ((start (car (or bounds $region))) |
274 | end input) | |
275 | (if (not (number-or-marker-p start)) | |
276 | ;; No nonterminal region available, skip the lookahead token. | |
277 | (wisent-skip-token) | |
278 | ;; Try to skip a block. | |
279 | (if (not (setq end (save-excursion | |
280 | (goto-char start) | |
281 | (and (looking-at "\\s(") | |
282 | (condition-case nil | |
283 | (1- (scan-lists (point) 1 0)) | |
284 | (error nil)))))) | |
285 | ;; Not actually a block, skip the lookahead token. | |
286 | (wisent-skip-token) | |
287 | ;; OK to safely skip the block, so read input until a matching | |
288 | ;; close paren or EOI is encountered. | |
289 | (setq input wisent-input) | |
290 | (while (and (not (eq (car input) wisent-eoi-term)) | |
291 | (< (nth 2 input) end)) | |
292 | (run-hook-with-args | |
293 | 'wisent-discarding-token-functions input) | |
294 | (setq input (wisent-lexer))) | |
295 | (wisent-message "%s: in enclosing block, skip from %s to %s" | |
296 | $action | |
297 | (wisent-token-to-string wisent-input) | |
298 | (wisent-token-to-string input)) | |
299 | (if (eq (car wisent-input) wisent-eoi-term) | |
300 | ;; Does nothing at EOI to avoid infinite recovery loop. | |
301 | nil | |
302 | (wisent-clearin) | |
303 | (wisent-errok)) | |
304 | ;; Set end of $region to end of block. | |
305 | (wisent-set-region (car $region) (1+ end)) | |
306 | nil)))) | |
307 | ||
308 | ;;; Core parser engine | |
309 | (defsubst wisent-production-bounds (stack i j) | |
310 | "Determine the start and end locations of a production value. | |
311 | Return a pair (START . END), where START is the first available start | |
312 | location, and END the last available end location, in components | |
313 | values of the rule currently reduced. | |
314 | Return nil when no component location is available. | |
315 | STACK is the parser stack. | |
316 | I and J are the indices in STACK of respectively the value of the | |
317 | first and last components of the current rule. | |
318 | This function is for internal use by semantic actions' generated | |
319 | lambda-expression." | |
320 | (let ((f (cadr (aref stack i))) | |
321 | (l (cddr (aref stack j)))) | |
322 | (while (/= i j) | |
323 | (cond | |
324 | ((not f) (setq f (cadr (aref stack (setq i (+ i 2)))))) | |
325 | ((not l) (setq l (cddr (aref stack (setq j (- j 2)))))) | |
326 | ((setq i j)))) | |
327 | (and f l (cons f l)))) | |
328 | ||
329 | (defmacro wisent-parse-action (i al) | |
330 | "Return the next parser action. | |
331 | I is a token item number and AL is the list of (item . action) | |
332 | available at current state. The first element of AL contains the | |
333 | default action for this state." | |
334 | `(cdr (or (assq ,i ,al) (car ,al)))) | |
335 | ||
336 | (defsubst wisent-parse-start (start starts) | |
337 | "Return the first lexical token to shift for START symbol. | |
338 | STARTS is the table of allowed start symbols or nil if the LALR | |
339 | automaton has only one entry point." | |
340 | (if (null starts) | |
341 | ;; Only one entry point, return the first lexical token | |
342 | ;; available in input. | |
343 | (wisent-lexer) | |
344 | ;; Multiple start symbols defined, return the internal lexical | |
345 | ;; token associated to START. By default START is the first | |
346 | ;; nonterminal defined in STARTS. | |
347 | (let ((token (cdr (if start (assq start starts) (car starts))))) | |
348 | (if token | |
349 | (list token (symbol-name token)) | |
350 | (error "Invalid start symbol %s" start))))) | |
351 | ||
352 | (defun wisent-parse (automaton lexer &optional error start) | |
353 | "Parse input using the automaton specified in AUTOMATON. | |
354 | ||
355 | - AUTOMATON is an LALR(1) automaton generated by | |
356 | `wisent-compile-grammar'. | |
357 | ||
358 | - LEXER is a function with no argument called by the parser to obtain | |
359 | the next terminal (token) in input. | |
360 | ||
361 | - ERROR is an optional reporting function called when a parse error | |
362 | occurs. It receives a message string to report. It defaults to the | |
363 | function `wisent-message'. | |
364 | ||
365 | - START specify the start symbol (nonterminal) used by the parser as | |
366 | its goal. It defaults to the start symbol defined in the grammar | |
367 | \(see also `wisent-compile-grammar')." | |
368 | (run-hooks 'wisent-pre-parse-hook) | |
369 | (let* ((actions (aref automaton 0)) | |
370 | (gotos (aref automaton 1)) | |
371 | (starts (aref automaton 2)) | |
372 | (stack (make-vector wisent-parse-max-stack-size nil)) | |
373 | (sp 0) | |
374 | (wisent-loop t) | |
375 | (wisent-parse-error-function (or error 'wisent-message)) | |
376 | (wisent-parse-lexer-function lexer) | |
377 | (wisent-recovering nil) | |
378 | (wisent-input (wisent-parse-start start starts)) | |
379 | state tokid choices choice) | |
380 | (setq wisent-nerrs 0 ;; Reset parse error counter | |
381 | wisent-lookahead nil) ;; and lookahead token | |
382 | (aset stack 0 0) ;; Initial state | |
383 | (while wisent-loop | |
384 | (setq state (aref stack sp) | |
385 | tokid (car wisent-input) | |
386 | wisent-loop (wisent-parse-action tokid (aref actions state))) | |
387 | (cond | |
388 | ||
389 | ;; Input successfully parsed | |
390 | ;; ------------------------- | |
391 | ((eq wisent-loop wisent-accept-tag) | |
392 | (setq wisent-loop nil)) | |
393 | ||
394 | ;; Syntax error in input | |
395 | ;; --------------------- | |
396 | ((eq wisent-loop wisent-error-tag) | |
397 | ;; Report this error if not already recovering from an error. | |
398 | (setq choices (aref actions state)) | |
399 | (or wisent-recovering | |
400 | (wisent-error | |
401 | (format "Syntax error, unexpected %s, expecting %s" | |
402 | (wisent-token-to-string wisent-input) | |
403 | (mapconcat 'wisent-item-to-string | |
404 | (delq wisent-error-term | |
405 | (mapcar 'car (cdr choices))) | |
406 | ", ")))) | |
407 | ;; Increment the error counter | |
408 | (setq wisent-nerrs (1+ wisent-nerrs)) | |
409 | ;; If just tried and failed to reuse lookahead token after an | |
410 | ;; error, discard it. | |
411 | (if (eq wisent-recovering wisent-parse-max-recover) | |
412 | (if (eq tokid wisent-eoi-term) | |
413 | (wisent-abort) ;; Terminate if at end of input. | |
414 | (wisent-message "Error recovery: skip %s" | |
415 | (wisent-token-to-string wisent-input)) | |
416 | (run-hook-with-args | |
417 | 'wisent-discarding-token-functions wisent-input) | |
418 | (setq wisent-input (wisent-lexer))) | |
419 | ||
420 | ;; Else will try to reuse lookahead token after shifting the | |
421 | ;; error token. | |
422 | ||
423 | ;; Each real token shifted decrements this. | |
424 | (setq wisent-recovering wisent-parse-max-recover) | |
425 | ;; Pop the value/state stack to see if an action associated | |
426 | ;; to special terminal symbol 'error exists. | |
427 | (while (and (>= sp 0) | |
428 | (not (and (setq state (aref stack sp) | |
429 | choices (aref actions state) | |
430 | choice (assq wisent-error-term choices)) | |
431 | (natnump (cdr choice))))) | |
432 | (setq sp (- sp 2))) | |
433 | ||
434 | (if (not choice) | |
435 | ;; No 'error terminal was found. Just terminate. | |
436 | (wisent-abort) | |
437 | ;; Try to recover and continue parsing. | |
438 | ;; Shift the error terminal. | |
439 | (setq state (cdr choice) ; new state | |
440 | sp (+ sp 2)) | |
441 | (aset stack (1- sp) nil) ; push value | |
442 | (aset stack sp state) ; push new state | |
443 | ;; Adjust input to error recovery state. Unless 'error | |
444 | ;; triggers a reduction, eat the input stream until an | |
445 | ;; expected terminal symbol is found, or EOI is reached. | |
446 | (if (cdr (setq choices (aref actions state))) | |
447 | (while (not (or (eq (car wisent-input) wisent-eoi-term) | |
448 | (assq (car wisent-input) choices))) | |
449 | (wisent-message "Error recovery: skip %s" | |
450 | (wisent-token-to-string wisent-input)) | |
451 | (run-hook-with-args | |
452 | 'wisent-discarding-token-functions wisent-input) | |
453 | (setq wisent-input (wisent-lexer))))))) | |
454 | ||
455 | ;; Shift current token on top of the stack | |
456 | ;; --------------------------------------- | |
457 | ((natnump wisent-loop) | |
458 | ;; Count tokens shifted since error; after | |
459 | ;; `wisent-parse-max-recover', turn off error status. | |
460 | (setq wisent-recovering (and (natnump wisent-recovering) | |
461 | (> wisent-recovering 1) | |
462 | (1- wisent-recovering))) | |
463 | (setq sp (+ sp 2)) | |
464 | (aset stack (1- sp) (cdr wisent-input)) | |
465 | (aset stack sp wisent-loop) | |
466 | (setq wisent-input (wisent-lexer))) | |
467 | ||
468 | ;; Reduce by rule (call semantic action) | |
469 | ;; ------------------------------------- | |
470 | (t | |
471 | (setq sp (funcall wisent-loop stack sp gotos)) | |
472 | (or wisent-input (setq wisent-input (wisent-lexer)))))) | |
473 | (run-hooks 'wisent-post-parse-hook) | |
474 | (car (aref stack 1)))) | |
475 | ||
476 | (provide 'semantic/wisent/wisent) | |
477 | ||
478 | ;;; semantic/wisent/wisent.el ends here |