Sync to HEAD
[bpt/emacs.git] / lisp / progmodes / ebnf-yac.el
CommitLineData
e8af40ee 1;;; ebnf-yac.el --- parser for Yacc/Bison
984ae001 2
6b61353c
KH
3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
4;; Free Sofware Foundation, Inc.
984ae001 5
6b61353c
KH
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8;; Time-stamp: <2004/04/03 16:50:46 vinicius>
ae16d111 9;; Keywords: wp, ebnf, PostScript
6b61353c 10;; Version: 1.3
984ae001 11
8d9ea7b1 12;; This file is part of GNU Emacs.
984ae001 13
8d9ea7b1 14;; GNU Emacs is free software; you can redistribute it and/or modify
984ae001
GM
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
8d9ea7b1 19;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
28
29;;; Commentary:
30
31;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;;
33;;
34;; This is part of ebnf2ps package.
35;;
36;; This package defines a parser for Yacc/Bison.
37;;
38;; See ebnf2ps.el for documentation.
39;;
40;;
41;; Yacc/Bison Syntax
42;; -----------------
43;;
44;; YACC = { YACC-Definitions }* "%%" { YACC-Rule }* [ "%%" [ YACC-Code ] ].
45;;
6b61353c
KH
46;; YACC-Definitions = ( "%token" | "%left" | "%right" | "%nonassoc" )
47;; [ "<" Name ">" ] Name-List
48;; | "%prec" Name
984ae001
GM
49;; | "any other Yacc definition"
50;; .
51;;
52;; YACC-Code = "any C definition".
53;;
54;; YACC-Rule = Name ":" Alternative ";".
55;;
56;; Alternative = { Sequence || "|" }*.
57;;
58;; Sequence = { Factor }*.
59;;
60;; Factor = Name
61;; | "'" "character" "'"
62;; | "error"
63;; | "{" "C like commands" "}"
64;; .
65;;
66;; Name-List = { Name || "," }*.
67;;
68;; Name = "[A-Za-z][A-Za-z0-9_.]*".
69;;
70;; Comment = "/*" "any character, but the sequence \"*/\"" "*/"
6b61353c
KH
71;; | "//" "any character, but the newline \"\\n\"" "\\n".
72;;
73;;
74;; In other words, a valid Name begins with a letter (upper or lower case)
75;; followed by letters, decimal digits, underscore (_) or point (.). For
76;; example: this_is_a_valid.name, Another_EXAMPLE, mIxEd.CaSe.
77;;
78;;
79;; Acknowledgements
80;; ----------------
81;;
82;; Thanks to Matthew K. Junker <junker@alum.mit.edu> for the suggestion to deal
83;; with %right, %left and %prec pragmas. His suggestion was extended to deal
84;; with %nonassoc pragma too.
984ae001
GM
85;;
86;;
87;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88
e8af40ee 89;;; Code:
984ae001
GM
90
91
92(require 'ebnf-otz)
93
94
95(defvar ebnf-yac-lex nil
96 "Value returned by `ebnf-yac-lex' function.")
97
98
99(defvar ebnf-yac-token-list nil
100 "List of `%TOKEN' names.")
101
102
103(defvar ebnf-yac-skip-char nil
104 "Non-nil means skip printable characters with no grammatical meaning.")
105
106
107(defvar ebnf-yac-error nil
b373b419 108 "Non-nil means \"error\" occurred.")
984ae001
GM
109
110\f
111;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f504d516 112;; Syntactic analyzer
984ae001
GM
113
114
115;;; YACC = { YACC-Definitions }* "%%" { YACC-Rule }* [ "%%" [ YACC-Code ] ].
116;;;
117;;; YACC-Code = "any C definition".
118
119(defun ebnf-yac-parser (start)
120 "yacc/Bison parser."
121 (let ((total (+ (- ebnf-limit start) 1))
122 (bias (1- start))
123 (origin (point))
124 syntax-list token rule)
125 (goto-char start)
126 (setq token (ebnf-yac-lex))
127 (and (eq token 'end-of-input)
e8af40ee 128 (error "Invalid Yacc/Bison file format"))
984ae001 129 (or (eq (ebnf-yac-definitions token) 'yac-separator)
e8af40ee 130 (error "Missing `%%%%'"))
984ae001
GM
131 (setq token (ebnf-yac-lex))
132 (while (not (memq token '(end-of-input yac-separator)))
133 (ebnf-message-float
134 "Parsing...%s%%"
135 (/ (* (- (point) bias) 100.0) total))
136 (setq token (ebnf-yac-rule token)
137 rule (cdr token)
138 token (car token))
139 (or (ebnf-add-empty-rule-list rule)
140 (setq syntax-list (cons rule syntax-list))))
141 (goto-char origin)
142 syntax-list))
143
144
6b61353c
KH
145;;; YACC-Definitions = ( "%token" | "%left" | "%right" | "%nonassoc" )
146;;; [ "<" Name ">" ] Name-List
147;;; | "%prec" Name
984ae001
GM
148;;; | "any other Yacc definition"
149;;; .
150
151(defun ebnf-yac-definitions (token)
152 (let ((ebnf-yac-skip-char t))
153 (while (not (memq token '(yac-separator end-of-input)))
154 (setq token
155 (cond
6b61353c
KH
156 ;; ( "%token" | "%left" | "%right" | "%nonassoc" )
157 ;; [ "<" Name ">" ] Name-List
984ae001
GM
158 ((eq token 'yac-token)
159 (setq token (ebnf-yac-lex))
160 (when (eq token 'open-angle)
161 (or (eq (ebnf-yac-lex) 'non-terminal)
e8af40ee 162 (error "Missing type name"))
984ae001 163 (or (eq (ebnf-yac-lex) 'close-angle)
e8af40ee 164 (error "Missing `>'"))
984ae001
GM
165 (setq token (ebnf-yac-lex)))
166 (setq token (ebnf-yac-name-list token)
167 ebnf-yac-token-list (nconc (cdr token)
168 ebnf-yac-token-list))
169 (car token))
6b61353c
KH
170 ;; "%prec" Name
171 ((eq token 'yac-prec)
172 (or (eq (ebnf-yac-lex) 'non-terminal)
173 (error "Missing prec name"))
174 (ebnf-yac-lex))
175 ;; "any other Yacc definition"
984ae001
GM
176 (t
177 (ebnf-yac-lex))
178 )))
179 token))
180
181
182;;; YACC-Rule = Name ":" Alternative ";".
183
184(defun ebnf-yac-rule (token)
185 (let ((header ebnf-yac-lex)
186 (action ebnf-action)
187 body)
188 (setq ebnf-action nil)
189 (or (eq token 'non-terminal)
e8af40ee 190 (error "Invalid rule name"))
984ae001 191 (or (eq (ebnf-yac-lex) 'colon)
e8af40ee 192 (error "Invalid rule: missing `:'"))
984ae001
GM
193 (setq body (ebnf-yac-alternative))
194 (or (eq (car body) 'period)
e8af40ee 195 (error "Invalid rule: missing `;'"))
984ae001
GM
196 (setq body (cdr body))
197 (ebnf-eps-add-production header)
198 (cons (ebnf-yac-lex)
199 (ebnf-make-production header body action))))
200
201
202;;; Alternative = { Sequence || "|" }*.
203
204(defun ebnf-yac-alternative ()
205 (let (body sequence)
206 (while (eq (car (setq sequence (ebnf-yac-sequence)))
207 'alternative)
208 (and (setq sequence (cdr sequence))
209 (setq body (cons sequence body))))
210 (ebnf-token-alternative body sequence)))
211
212
213;;; Sequence = { Factor }*.
214
215(defun ebnf-yac-sequence ()
216 (let (ebnf-yac-error token seq factor)
217 (while (setq token (ebnf-yac-lex)
218 factor (ebnf-yac-factor token))
219 (setq seq (cons factor seq)))
220 (cons token
6b61353c
KH
221 (if (and ebnf-yac-ignore-error-recovery ebnf-yac-error)
222 ;; ignore error recovery
223 nil
224 (ebnf-token-sequence seq)))))
984ae001
GM
225
226
227;;; Factor = Name
228;;; | "'" "character" "'"
229;;; | "error"
230;;; | "{" "C like commands" "}"
231;;; .
232
233(defun ebnf-yac-factor (token)
234 (cond
235 ;; 'character'
236 ((eq token 'terminal)
237 (ebnf-make-terminal ebnf-yac-lex))
238 ;; Name
239 ((eq token 'non-terminal)
240 (ebnf-make-non-terminal ebnf-yac-lex))
241 ;; "error"
242 ((eq token 'yac-error)
243 (ebnf-make-special ebnf-yac-lex))
244 ;; not a factor
245 (t
246 nil)
247 ))
248
249
250;;; Name-List = { Name || "," }*.
251
252(defun ebnf-yac-name-list (token)
253 (let (names)
254 (when (eq token 'non-terminal)
255 (while (progn
256 (setq names (cons ebnf-yac-lex names)
257 token (ebnf-yac-lex))
258 (eq token 'comma))
259 (or (eq (ebnf-yac-lex) 'non-terminal)
e8af40ee 260 (error "Missing token name"))))
984ae001
GM
261 (cons token names)))
262
263\f
264;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
265;; Lexical analyzer
266
267
268;;; Name = "[A-Za-z][A-Za-z0-9_.]*".
269;;;
270;;; Comment = "/*" "any character, but the sequence \"*/\"" "*/"
271;;; | "//" "any character" "\\n".
272
273(defconst ebnf-yac-token-table
274 ;; control character & 8-bit character are set to `error'
275 (let ((table (make-vector 256 'error)))
276 ;; upper & lower case letters:
277 (mapcar
278 #'(lambda (char)
279 (aset table char 'non-terminal))
280 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
281 ;; printable characters:
282 (mapcar
283 #'(lambda (char)
284 (aset table char 'character))
285 "!#$&()*+-.0123456789=?@[\\]^_`~")
286 ;; Override space characters:
287 (aset table ?\n 'space) ; [NL] linefeed
288 (aset table ?\r 'space) ; [CR] carriage return
289 (aset table ?\t 'space) ; [HT] horizontal tab
290 (aset table ?\ 'space) ; [SP] space
291 ;; Override form feed character:
292 (aset table ?\f 'form-feed) ; [FF] form feed
293 ;; Override other lexical characters:
294 (aset table ?< 'open-angle)
295 (aset table ?> 'close-angle)
296 (aset table ?, 'comma)
297 (aset table ?% 'yac-pragma)
298 (aset table ?/ 'slash)
299 (aset table ?\{ 'yac-code)
300 (aset table ?\" 'string)
301 (aset table ?\' 'terminal)
302 (aset table ?: 'colon)
303 (aset table ?| 'alternative)
304 (aset table ?\; 'period)
305 table)
306 "Vector used to map characters to a lexical token.")
307
308
309(defun ebnf-yac-initialize ()
310 "Initializations for Yacc/Bison parser."
311 (setq ebnf-yac-token-list nil))
312
313
314(defun ebnf-yac-lex ()
315 "Lexical analyser for Yacc/Bison.
316
317Return a lexical token.
318
319See documentation for variable `ebnf-yac-lex'."
320 (if (>= (point) ebnf-limit)
321 'end-of-input
322 (let (token)
323 ;; skip spaces, code blocks and comments
324 (while (if (> (following-char) 255)
325 (progn
326 (setq token 'error)
327 nil)
328 (setq token (aref ebnf-yac-token-table (following-char)))
329 (cond
330 ((or (eq token 'space)
331 (and ebnf-yac-skip-char
332 (eq token 'character)))
333 (ebnf-yac-skip-spaces))
334 ((eq token 'yac-code)
335 (ebnf-yac-skip-code))
336 ((eq token 'slash)
337 (ebnf-yac-handle-comment))
338 ((eq token 'form-feed)
339 (forward-char)
340 (setq ebnf-action 'form-feed))
341 (t nil)
342 )))
343 (cond
344 ;; end of input
345 ((>= (point) ebnf-limit)
346 'end-of-input)
347 ;; error
348 ((eq token 'error)
e8af40ee 349 (error "Illegal character"))
984ae001
GM
350 ;; "string"
351 ((eq token 'string)
352 (setq ebnf-yac-lex (ebnf-get-string))
353 'string)
354 ;; terminal: 'char'
355 ((eq token 'terminal)
356 (setq ebnf-yac-lex (ebnf-string " -&(-~" ?\' "terminal"))
357 'terminal)
358 ;; non-terminal, terminal or "error"
359 ((eq token 'non-terminal)
360 (setq ebnf-yac-lex (ebnf-buffer-substring "0-9A-Za-z_."))
361 (cond ((member ebnf-yac-lex ebnf-yac-token-list)
362 'terminal)
363 ((string= ebnf-yac-lex "error")
364 (setq ebnf-yac-error t)
365 'yac-error)
366 (t
367 'non-terminal)
368 ))
369 ;; %% and Yacc pragmas (%TOKEN, %START, etc).
370 ((eq token 'yac-pragma)
371 (forward-char)
372 (cond
373 ;; Yacc separator
374 ((eq (following-char) ?%)
375 (forward-char)
376 'yac-separator)
6b61353c
KH
377 ;; %TOKEN, %RIGHT, %LEFT, %PREC, %NONASSOC
378 ((cdr (assoc (upcase (ebnf-buffer-substring "0-9A-Za-z_"))
379 '(("TOKEN" . yac-token)
380 ("RIGHT" . yac-token)
381 ("LEFT" . yac-token)
382 ("NONASSOC" . yac-token)
383 ("PREC" . yac-prec)))))
984ae001
GM
384 ;; other Yacc pragmas
385 (t
386 'yac-pragma)
387 ))
388 ;; miscellaneous
389 (t
390 (forward-char)
391 token)
392 ))))
393
394
395(defun ebnf-yac-skip-spaces ()
396 (skip-chars-forward
397 (if ebnf-yac-skip-char
398 "\n\r\t !#$&()*+-.0123456789=?@[\\\\]^_`~"
399 "\n\r\t ")
400 ebnf-limit)
401 (< (point) ebnf-limit))
402
403
a1548b10
GM
404;; replace the range "\177-\377" (see `ebnf-range-regexp').
405(defconst ebnf-yac-skip-chars
406 (ebnf-range-regexp "^{}/'\"\000-\010\013\016-\037" ?\177 ?\377))
407
408
984ae001
GM
409(defun ebnf-yac-skip-code ()
410 (forward-char)
411 (let ((pair 1))
412 (while (> pair 0)
a1548b10 413 (skip-chars-forward ebnf-yac-skip-chars ebnf-limit)
984ae001
GM
414 (cond
415 ((= (following-char) ?{)
416 (forward-char)
417 (setq pair (1+ pair)))
418 ((= (following-char) ?})
419 (forward-char)
420 (setq pair (1- pair)))
421 ((= (following-char) ?/)
422 (ebnf-yac-handle-comment))
423 ((= (following-char) ?\")
424 (ebnf-get-string))
425 ((= (following-char) ?\')
426 (ebnf-string " -&(-~" ?\' "character"))
427 (t
e8af40ee 428 (error "Illegal character"))
984ae001
GM
429 )))
430 (ebnf-yac-skip-spaces))
431
432
433(defun ebnf-yac-handle-comment ()
434 (forward-char)
435 (cond
436 ;; begin comment
437 ((= (following-char) ?*)
438 (ebnf-yac-skip-comment)
439 (ebnf-yac-skip-spaces))
440 ;; line comment
441 ((= (following-char) ?/)
442 (end-of-line)
443 (ebnf-yac-skip-spaces))
444 ;; no comment
445 (t nil)
446 ))
447
448
647a066c
GM
449;; replace the range "\177-\237" (see `ebnf-range-regexp').
450(defconst ebnf-yac-comment-chars
451 (ebnf-range-regexp "^*\000-\010\013\016-\037" ?\177 ?\237))
984ae001
GM
452
453
454(defun ebnf-yac-skip-comment ()
455 (forward-char)
456 (cond
457 ;; open EPS file
458 ((and ebnf-eps-executing (= (following-char) ?\[))
459 (ebnf-eps-add-context (ebnf-yac-eps-filename)))
460 ;; close EPS file
461 ((and ebnf-eps-executing (= (following-char) ?\]))
462 (ebnf-eps-remove-context (ebnf-yac-eps-filename)))
463 ;; any other action in comment
464 (t
465 (setq ebnf-action (aref ebnf-comment-table (following-char))))
466 )
467 (let ((not-end t))
468 (while not-end
469 (skip-chars-forward ebnf-yac-comment-chars ebnf-limit)
470 (cond ((>= (point) ebnf-limit)
e8af40ee 471 (error "Missing end of comment: `*/'"))
984ae001
GM
472 ((= (following-char) ?*)
473 (skip-chars-forward "*" ebnf-limit)
474 (when (= (following-char) ?/)
475 ;; end of comment
476 (forward-char)
477 (setq not-end nil)))
478 (t
e8af40ee 479 (error "Illegal character"))
984ae001
GM
480 ))))
481
482
483(defun ebnf-yac-eps-filename ()
484 (forward-char)
485 (buffer-substring-no-properties
486 (point)
487 (let ((chars (concat ebnf-yac-comment-chars "\n"))
488 found)
489 (while (not found)
490 (skip-chars-forward chars ebnf-limit)
491 (setq found
492 (cond ((>= (point) ebnf-limit)
493 (point))
494 ((= (following-char) ?*)
495 (skip-chars-forward "*" ebnf-limit)
496 (if (/= (following-char) ?\/)
497 nil
498 (backward-char)
499 (point)))
500 (t
501 (point))
502 )))
503 found)))
504
505\f
506;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
507
508
509(provide 'ebnf-yac)
510
511
6b61353c 512;;; arch-tag: 8a96989c-0b1d-42ba-a020-b2901f9a2a4d
984ae001 513;;; ebnf-yac.el ends here