1 ;;; ebnf-bnf.el --- parser for EBNF
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; This is part of ebnf2ps package.
34 ;; This package defines a parser for EBNF.
36 ;; See ebnf2ps.el for documentation.
42 ;; The current EBNF that ebnf2ps accepts has the following constructions:
44 ;; ; comment (until end of line)
48 ;; $A default non-terminal
49 ;; $"C" default terminal
50 ;; $?C? default special
51 ;; A = B. production (A is the header and B the body)
52 ;; C D sequence (C occurs before D)
53 ;; C | D alternative (C or D occurs)
54 ;; A - B exception (A excluding B, B without any non-terminal)
55 ;; n * A repetition (A repeats at least n (integer) times)
56 ;; n * n A repetition (A repeats exactly n (integer) times)
57 ;; n * m A repetition (A repeats at least n (integer) and at most
59 ;; (C) group (expression C is grouped together)
60 ;; [C] optional (C may or not occurs)
61 ;; C+ one or more occurrences of C
62 ;; {C}+ one or more occurrences of C
63 ;; {C}* zero or more occurrences of C
64 ;; {C} zero or more occurrences of C
65 ;; C / D equivalent to: C {D C}*
66 ;; {C || D}+ equivalent to: C {D C}*
67 ;; {C || D}* equivalent to: [C {D C}*]
68 ;; {C || D} equivalent to: [C {D C}*]
70 ;; The EBNF syntax written using the notation above is:
72 ;; EBNF = {production}+.
74 ;; production = non_terminal "=" body ".". ;; production
76 ;; body = {sequence || "|"}*. ;; alternative
78 ;; sequence = {exception}*. ;; sequence
80 ;; exception = repeat [ "-" repeat]. ;; exception
82 ;; repeat = [ integer "*" [ integer ]] term. ;; repetition
85 ;; | [factor] "+" ;; one-or-more
86 ;; | [factor] "/" [factor] ;; one-or-more
89 ;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
90 ;; | [ "$" ] non_terminal ;; non-terminal
91 ;; | [ "$" ] "?" special "?" ;; special
92 ;; | "(" body ")" ;; group
93 ;; | "[" body "]" ;; zero-or-one
94 ;; | "{" body [ "||" body ] "}+" ;; one-or-more
95 ;; | "{" body [ "||" body ] "}*" ;; zero-or-more
96 ;; | "{" body [ "||" body ] "}" ;; zero-or-more
99 ;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
100 ;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper
101 ;; ;; and lower), 8-bit accentuated characters,
102 ;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
103 ;; ;; "<", ">", "@", "\", "^", "_", "`" and "~".
105 ;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
106 ;; ;; that is, a valid terminal accepts any printable character (including
107 ;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
108 ;; ;; terminal. Also, accepts escaped characters, that is, a character
109 ;; ;; pair starting with `\' followed by a printable character, for
110 ;; ;; example: \", \\.
112 ;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
113 ;; ;; that is, a valid special accepts any printable character (including
114 ;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
115 ;; ;; delimit a special.
117 ;; integer = "[0-9]+".
118 ;; ;; that is, an integer is a sequence of one or more decimal digits.
120 ;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
121 ;; ;; that is, a comment starts with the character `;' and terminates at end
122 ;; ;; of line. Also, it only accepts printable characters (including 8-bit
123 ;; ;; accentuated characters) and tabs.
126 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 (defvar ebnf-bnf-lex nil
135 "Value returned by `ebnf-bnf-lex' function.")
138 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
139 ;; Syntactic analyzer
142 ;;; EBNF = {production}+.
144 (defun ebnf-bnf-parser (start)
146 (let ((total (+ (- ebnf-limit start
) 1))
149 prod-list token rule
)
151 (setq token
(ebnf-bnf-lex))
152 (and (eq token
'end-of-input
)
153 (error "Invalid EBNF file format"))
154 (while (not (eq token
'end-of-input
))
157 (/ (* (- (point) bias
) 100.0) total
))
158 (setq token
(ebnf-production token
)
161 (or (ebnf-add-empty-rule-list rule
)
162 (setq prod-list
(cons rule prod-list
))))
167 ;;; production = non-terminal "=" body ".".
169 (defun ebnf-production (token)
170 (let ((header ebnf-bnf-lex
)
173 (setq ebnf-action nil
)
174 (or (eq token
'non-terminal
)
175 (error "Invalid header production"))
176 (or (eq (ebnf-bnf-lex) 'equal
)
177 (error "Invalid production: missing `='"))
178 (setq body
(ebnf-body))
179 (or (eq (car body
) 'period
)
180 (error "Invalid production: missing `.'"))
181 (setq body
(cdr body
))
182 (ebnf-eps-add-production header
)
184 (ebnf-make-production header body action
))))
187 ;;; body = {sequence || "|"}*.
191 (while (eq (car (setq sequence
(ebnf-sequence))) 'alternative
)
192 (setq sequence
(cdr sequence
)
193 body
(cons sequence body
)))
194 (ebnf-token-alternative body sequence
)))
197 ;;; sequence = {exception}*.
199 (defun ebnf-sequence ()
200 (let ((token (ebnf-bnf-lex))
202 (while (setq term
(ebnf-exception token
)
205 (setq seq
(cons term seq
)))
207 (ebnf-token-sequence seq
))))
210 ;;; exception = repeat [ "-" repeat].
212 (defun ebnf-exception (token)
213 (let ((term (ebnf-repeat token
)))
214 (if (not (eq (car term
) 'except
))
218 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
219 (ebnf-no-non-terminal (cdr exception
))
220 (ebnf-token-except (cdr term
) exception
)))))
223 (defun ebnf-no-non-terminal (node)
225 (let ((kind (ebnf-node-kind node
)))
227 ((eq kind
'ebnf-generate-non-terminal
)
228 (error "Exception sequence should not contain a non-terminal"))
229 ((eq kind
'ebnf-generate-repeat
)
230 (ebnf-no-non-terminal (ebnf-node-separator node
)))
231 ((memq kind
'(ebnf-generate-optional ebnf-generate-except
))
232 (ebnf-no-non-terminal (ebnf-node-list node
)))
233 ((memq kind
'(ebnf-generate-one-or-more ebnf-generate-zero-or-more
))
234 (ebnf-no-non-terminal (ebnf-node-list node
))
235 (ebnf-no-non-terminal (ebnf-node-separator node
)))
236 ((memq kind
'(ebnf-generate-alternative ebnf-generate-sequence
))
237 (let ((seq (ebnf-node-list node
)))
239 (ebnf-no-non-terminal (car seq
))
240 (setq seq
(cdr seq
)))))
244 ;;; repeat = [ integer "*" [ integer ]] term.
246 (defun ebnf-repeat (token)
247 (if (not (eq token
'integer
))
249 (let ((times ebnf-bnf-lex
)
251 (or (eq (ebnf-bnf-lex) 'repeat
)
252 (error "Missing `*'"))
253 (setq token
(ebnf-bnf-lex))
254 (when (eq token
'integer
)
255 (setq upper ebnf-bnf-lex
256 token
(ebnf-bnf-lex)))
257 (ebnf-token-repeat times
(ebnf-term token
) upper
))))
261 ;;; | [factor] "+" ;; one-or-more
262 ;;; | [factor] "/" [factor] ;; one-or-more
265 (defun ebnf-term (token)
266 (let ((factor (ebnf-factor token
)))
268 (setq token
(ebnf-bnf-lex)))
271 ((eq token
'one-or-more
)
274 (let ((kind (ebnf-node-kind factor
)))
276 ;; { A }+ + ==> { A }+
277 ;; { A }* + ==> { A }*
278 ((memq kind
'(ebnf-generate-zero-or-more
279 ebnf-generate-one-or-more
))
281 ;; [ A ] + ==> { A }*
282 ((eq kind
'ebnf-generate-optional
)
283 (ebnf-make-zero-or-more (list factor
)))
286 (ebnf-make-one-or-more (list factor
)))
288 ;; [factor] / [factor]
290 (setq token
(ebnf-bnf-lex))
291 (let ((sep (ebnf-factor token
)))
293 (setq factor
(or factor
(ebnf-make-empty))))
298 (ebnf-make-one-or-more factor sep
)))))
305 ;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
306 ;;; | [ "$" ] non_terminal ;; non-terminal
307 ;;; | [ "$" ] "?" special "?" ;; special
308 ;;; | "(" body ")" ;; group
309 ;;; | "[" body "]" ;; zero-or-one
310 ;;; | "{" body [ "||" body ] "}+" ;; one-or-more
311 ;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
312 ;;; | "{" body [ "||" body ] "}" ;; zero-or-more
315 (defun ebnf-factor (token)
318 ((eq token
'terminal
)
319 (ebnf-make-terminal ebnf-bnf-lex
))
321 ((eq token
'non-terminal
)
322 (ebnf-make-non-terminal ebnf-bnf-lex
))
325 (ebnf-make-special ebnf-bnf-lex
))
327 ((eq token
'begin-group
)
328 (let ((body (ebnf-body)))
329 (or (eq (car body
) 'end-group
)
330 (error "Missing `)'"))
333 ((eq token
'begin-optional
)
334 (let ((body (ebnf-body)))
335 (or (eq (car body
) 'end-optional
)
336 (error "Missing `]'"))
337 (ebnf-token-optional (cdr body
))))
339 ((eq token
'begin-list
)
340 (let* ((body (ebnf-body))
342 (list-part (cdr body
))
344 (and (eq token
'list-separator
)
346 (setq body
(ebnf-body) ; get separator
348 sep-part
(cdr body
)))
351 ((eq token
'end-one-or-more
)
352 (ebnf-make-one-or-more list-part sep-part
))
354 ((eq token
'end-zero-or-more
)
355 (ebnf-make-zero-or-more list-part sep-part
))
357 (error "Missing `}+', `}*' or `}'"))
365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
369 (defconst ebnf-bnf-token-table
(make-vector 256 'error
)
370 "Vector used to map characters to a lexical token.")
373 (defun ebnf-bnf-initialize ()
374 "Initialize EBNF token table."
375 ;; control character & control 8-bit character are set to `error'
377 ;; printable character:
378 (while (< char ?
\060)
379 (aset ebnf-bnf-token-table char
'non-terminal
)
380 (setq char
(1+ char
)))
382 (while (< char ?
\072)
383 (aset ebnf-bnf-token-table char
'integer
)
384 (setq char
(1+ char
)))
385 ;; printable character:
386 (while (< char ?
\177)
387 (aset ebnf-bnf-token-table char
'non-terminal
)
388 (setq char
(1+ char
)))
389 ;; European 8-bit accentuated characters:
391 (while (< char ?
\400)
392 (aset ebnf-bnf-token-table char
'non-terminal
)
393 (setq char
(1+ char
)))
394 ;; Override space characters:
395 (aset ebnf-bnf-token-table ?
\013 'space
) ; [VT] vertical tab
396 (aset ebnf-bnf-token-table ?
\n 'space
) ; [NL] linefeed
397 (aset ebnf-bnf-token-table ?
\r 'space
) ; [CR] carriage return
398 (aset ebnf-bnf-token-table ?
\t 'space
) ; [HT] horizontal tab
399 (aset ebnf-bnf-token-table ?\
'space
) ; [SP] space
400 ;; Override form feed character:
401 (aset ebnf-bnf-token-table ?
\f 'form-feed
) ; [FF] form feed
402 ;; Override other lexical characters:
403 (aset ebnf-bnf-token-table ?
\" 'terminal
)
404 (aset ebnf-bnf-token-table ?
\? 'special
)
405 (aset ebnf-bnf-token-table ?\
( 'begin-group
)
406 (aset ebnf-bnf-token-table ?\
) 'end-group
)
407 (aset ebnf-bnf-token-table ?
* 'repeat
)
408 (aset ebnf-bnf-token-table ?-
'except
)
409 (aset ebnf-bnf-token-table ?
= 'equal
)
410 (aset ebnf-bnf-token-table ?\
[ 'begin-optional
)
411 (aset ebnf-bnf-token-table ?\
] 'end-optional
)
412 (aset ebnf-bnf-token-table ?\
{ 'begin-list
)
413 (aset ebnf-bnf-token-table ?|
'alternative
)
414 (aset ebnf-bnf-token-table ?\
} 'end-list
)
415 (aset ebnf-bnf-token-table ?
/ 'list
)
416 (aset ebnf-bnf-token-table ?
+ 'one-or-more
)
417 (aset ebnf-bnf-token-table ?$
'default
)
418 ;; Override comment character:
419 (aset ebnf-bnf-token-table ebnf-lex-comment-char
'comment
)
420 ;; Override end of production character:
421 (aset ebnf-bnf-token-table ebnf-lex-eop-char
'period
)))
424 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
425 (defconst ebnf-bnf-non-terminal-chars
426 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?
\240 ?
\377))
429 (defun ebnf-bnf-lex ()
430 "Lexical analyzer for EBNF.
432 Return a lexical token.
434 See documentation for variable `ebnf-bnf-lex'."
435 (if (>= (point) ebnf-limit
)
438 ;; skip spaces and comments
439 (while (if (> (following-char) 255)
443 (setq token
(aref ebnf-bnf-token-table
(following-char)))
446 (skip-chars-forward " \013\n\r\t" ebnf-limit
)
447 (< (point) ebnf-limit
))
449 (ebnf-bnf-skip-comment))
450 ((eq token
'form-feed
)
452 (setq ebnf-action
'form-feed
))
455 (setq ebnf-default-p nil
)
458 ((>= (point) ebnf-limit
)
462 (error "Invalid character"))
466 (if (memq (aref ebnf-bnf-token-table
(following-char))
467 '(terminal non-terminal special
))
470 (setq ebnf-default-p t
))
471 (error "Invalid `default' element")))
474 (setq ebnf-bnf-lex
(ebnf-buffer-substring "0-9"))
476 ;; special: ?special?
478 (setq ebnf-bnf-lex
(concat (and ebnf-special-show-delimiter
"?")
479 (ebnf-string " ->@-~" ?
\? "special")
480 (and ebnf-special-show-delimiter
"?")))
482 ;; terminal: "string"
483 ((eq token
'terminal
)
484 (setq ebnf-bnf-lex
(ebnf-unescape-string (ebnf-get-string)))
486 ;; non-terminal or terminal
487 ((eq token
'non-terminal
)
488 (setq ebnf-bnf-lex
(ebnf-buffer-substring ebnf-bnf-non-terminal-chars
))
489 (let ((case-fold-search ebnf-case-fold-search
)
491 (if (and ebnf-terminal-regexp
492 (setq match
(string-match ebnf-terminal-regexp
495 (= (match-end 0) (length ebnf-bnf-lex
)))
498 ;; end of list: }+, }*, }
499 ((eq token
'end-list
)
502 ((= (following-char) ?
+)
505 ((= (following-char) ?
*)
511 ;; alternative: |, ||
512 ((eq token
'alternative
)
514 (if (/= (following-char) ?|
)
518 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
525 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
526 (defconst ebnf-bnf-comment-chars
527 (ebnf-range-regexp "^\n\000-\010\016-\037" ?
\177 ?
\237))
530 (defun ebnf-bnf-skip-comment ()
534 ((and ebnf-eps-executing
(= (following-char) ?\
[))
535 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
537 ((and ebnf-eps-executing
(= (following-char) ?\
]))
538 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
540 ((and ebnf-eps-executing
(= (following-char) ?H
))
541 (ebnf-eps-header-comment (ebnf-bnf-eps-filename)))
543 ((and ebnf-eps-executing
(= (following-char) ?F
))
544 (ebnf-eps-footer-comment (ebnf-bnf-eps-filename)))
545 ;; any other action in comment
547 (setq ebnf-action
(aref ebnf-comment-table
(following-char)))
548 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit
))
550 ;; check for a valid end of comment
551 (cond ((>= (point) ebnf-limit
)
553 ((= (following-char) ?
\n)
557 (error "Invalid character"))
561 (defun ebnf-bnf-eps-filename ()
563 (ebnf-buffer-substring ebnf-bnf-comment-chars
))
566 (defun ebnf-unescape-string (str)
567 (let* ((len (length str
))
571 ;; count number of escapes
574 (if (= (aref str istr
) ?
\\)
576 (setq n-esc
(1+ n-esc
))
582 ;; at least one escape
583 (let ((new (make-string (- len n-esc
) ?\
))
585 ;; eliminate all escapes
588 (and (= (aref str istr
) ?
\\)
591 (aset new inew
(aref str istr
))
594 ;; remaining string has no escape
596 (aset new inew
(aref str istr
))
602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
608 ;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
609 ;;; ebnf-bnf.el ends here