1 ;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
3 ;; Copyright (C) 2004 Free Sofware Foundation, Inc.
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Time-stamp: <2004/03/18 23:49:58 vinicius>
8 ;; Keywords: wp, ebnf, PostScript
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33 ;; This is part of ebnf2ps package.
35 ;; This package defines a parser for ABNF (Augmented BNF).
37 ;; See ebnf2ps.el for documentation.
44 ;; `http://www.ietf.org/rfc/rfc2234.txt'
46 ;; `http://www.faqs.org/rfcs/rfc2234.html'
48 ;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
49 ;; ("Augmented BNF for Syntax Specifications: ABNF").
52 ;; rulelist = 1*( rule / (*c-wsp c-nl) )
54 ;; rule = rulename defined-as elements c-nl
55 ;; ; continues if next line starts with white space
57 ;; rulename = ALPHA *(ALPHA / DIGIT / "-")
59 ;; defined-as = *c-wsp ("=" / "=/") *c-wsp
60 ;; ; basic rules definition and incremental
63 ;; elements = alternation *c-wsp
65 ;; c-wsp = WSP / (c-nl WSP)
67 ;; c-nl = comment / CRLF
68 ;; ; comment or newline
70 ;; comment = ";" *(WSP / VCHAR) CRLF
72 ;; alternation = concatenation
73 ;; *(*c-wsp "/" *c-wsp concatenation)
75 ;; concatenation = repetition *(1*c-wsp repetition)
77 ;; repetition = [repeat] element
79 ;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
81 ;; element = rulename / group / option /
82 ;; char-val / num-val / prose-val
84 ;; group = "(" *c-wsp alternation *c-wsp ")"
86 ;; option = "[" *c-wsp alternation *c-wsp "]"
88 ;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
89 ;; ; quoted string of SP and VCHAR without DQUOTE
91 ;; num-val = "%" (bin-val / dec-val / hex-val)
93 ;; bin-val = "b" 1*BIT
94 ;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
95 ;; ; series of concatenated bit values
96 ;; ; or single ONEOF range
98 ;; dec-val = "d" 1*DIGIT
99 ;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
101 ;; hex-val = "x" 1*HEXDIG
102 ;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
104 ;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
105 ;; ; bracketed string of SP and VCHAR without
107 ;; ; prose description, to be used as last resort
109 ;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
111 ;; ALPHA = %x41-5A / %x61-7A
117 ;; ; any 7-bit US-ASCII character, excluding NUL
123 ;; ; Internet standard newline
125 ;; CTL = %x00-1F / %x7F
132 ;; ; " (Double Quote)
134 ;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
142 ;; LWSP = *(WSP / CRLF WSP)
143 ;; ; linear white space (past newline)
152 ;; ; visible (printing) characters
160 ;; 1. Rules name and terminal strings are case INSENSITIVE.
161 ;; So, the following rule names are all equals:
162 ;; Rule-name, rule-Name, rule-name, RULE-NAME
163 ;; Also, the following strings are equals:
164 ;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
166 ;; 2. To have a case SENSITIVE string, use the character notation.
167 ;; For example, to specify the lowercase string "abc", use:
170 ;; 3. There are no implicit spaces between elements, for example, the
177 ;; mumble = foo bar foo
179 ;; Are equivalent to the following rule:
181 ;; mumble = %x61.62.61
183 ;; If spaces are needed, it should be explicit specified, like:
185 ;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
187 ;; mumble = foo spaces bar spaces foo
189 ;; 4. Lines starting with space or tab are considered a continuation line.
190 ;; For example, the rule:
200 ;; Differences Between ABNF And ebnf2ps ABNF
201 ;; -----------------------------------------
203 ;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
204 ;; underscore (_) for rule name and european 8-bit accentuated characters (from
205 ;; \240 to \377) for rule name, string and comment.
208 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
216 (defvar ebnf-abn-lex nil
217 "Value returned by `ebnf-abn-lex' function.")
220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
221 ;; Syntactic analyzer
224 ;;; rulelist = 1*( rule / (*c-wsp c-nl) )
226 (defun ebnf-abn-parser (start)
228 (let ((total (+ (- ebnf-limit start
) 1))
231 rule-list token rule
)
233 (setq token
(ebnf-abn-lex))
234 (and (eq token
'end-of-input
)
235 (error "Invalid ABNF file format"))
236 (and (eq token
'end-of-rule
)
237 (setq token
(ebnf-abn-lex)))
238 (while (not (eq token
'end-of-input
))
241 (/ (* (- (point) bias
) 100.0) total
))
242 (setq token
(ebnf-abn-rule token
)
245 (or (ebnf-add-empty-rule-list rule
)
246 (setq rule-list
(cons rule rule-list
))))
251 ;;; rule = rulename defined-as elements c-nl
252 ;;; ; continues if next line starts with white space
254 ;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
256 ;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
257 ;;; ; basic rules definition and incremental
260 ;;; elements = alternation *c-wsp
262 ;;; c-wsp = WSP / (c-nl WSP)
264 ;;; c-nl = comment / CRLF
265 ;;; ; comment or newline
267 ;;; comment = ";" *(WSP / VCHAR) CRLF
270 (defun ebnf-abn-rule (token)
271 (let ((name ebnf-abn-lex
)
274 (setq ebnf-action nil
)
275 (or (eq token
'non-terminal
)
276 (error "Invalid rule name"))
277 (setq token
(ebnf-abn-lex))
278 (or (memq token
'(equal incremental-alternative
))
279 (error "Invalid rule: missing `=' or `=/'"))
280 (and (eq token
'incremental-alternative
)
281 (setq name
(concat name
" =/")))
282 (setq elements
(ebnf-abn-alternation))
283 (or (memq (car elements
) '(end-of-rule end-of-input
))
284 (error "Invalid rule: there is no end of rule"))
285 (setq elements
(cdr elements
))
286 (ebnf-eps-add-production name
)
288 (ebnf-make-production name elements action
))))
291 ;;; alternation = concatenation
292 ;;; *(*c-wsp "/" *c-wsp concatenation)
295 (defun ebnf-abn-alternation ()
296 (let (body concatenation
)
297 (while (eq (car (setq concatenation
298 (ebnf-abn-concatenation (ebnf-abn-lex))))
300 (setq body
(cons (cdr concatenation
) body
)))
301 (ebnf-token-alternative body concatenation
)))
304 ;;; concatenation = repetition *(1*c-wsp repetition)
307 (defun ebnf-abn-concatenation (token)
308 (let ((term (ebnf-abn-repetition token
))
310 (or (setq token
(car term
)
312 (error "Empty element"))
313 (setq seq
(cons term seq
))
314 (while (setq term
(ebnf-abn-repetition token
)
317 (setq seq
(cons term seq
)))
319 (if (= (length seq
) 1)
320 ;; sequence with only one element
323 (ebnf-make-sequence (nreverse seq
))))))
326 ;;; repetition = [repeat] element
328 ;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
331 (defun ebnf-abn-repetition (token)
333 ;; INTEGER [ "*" [ INTEGER ] ]
334 (when (eq token
'integer
)
335 (setq lower ebnf-abn-lex
336 token
(ebnf-abn-lex))
337 (or (eq token
'repeat
)
340 (when (eq token
'repeat
)
341 ;; only * ==> lower & upper are empty string
345 (when (eq (setq token
(ebnf-abn-lex)) 'integer
)
346 (setq upper ebnf-abn-lex
347 token
(ebnf-abn-lex))))
348 (let ((element (ebnf-abn-element token
)))
350 ;; there is a repetition
353 (error "Missing element repetition"))
354 (setq token
(ebnf-abn-lex))
357 ((and (string= lower
"1") (null upper
))
358 (cons token
(ebnf-make-one-or-more element
)))
360 ((or (and (string= lower
"0") (null upper
))
361 (and (string= lower
"") (string= upper
"")))
362 (cons token
(ebnf-make-zero-or-more element
)))
365 (ebnf-token-repeat lower
(cons token element
) upper
))))
366 ;; there is an element
368 (cons (ebnf-abn-lex) element
))
369 ;; something that caller has to deal
371 (cons token nil
))))))
374 ;;; element = rulename / group / option /
375 ;;; char-val / num-val / prose-val
377 ;;; group = "(" *c-wsp alternation *c-wsp ")"
379 ;;; option = "[" *c-wsp alternation *c-wsp "]"
381 ;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
382 ;;; ; quoted string of SP and VCHAR without DQUOTE
384 ;;; num-val = "%" (bin-val / dec-val / hex-val)
386 ;;; bin-val = "b" 1*BIT
387 ;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
388 ;;; ; series of concatenated bit values
389 ;;; ; or single ONEOF range
391 ;;; dec-val = "d" 1*DIGIT
392 ;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
394 ;;; hex-val = "x" 1*HEXDIG
395 ;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
397 ;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
398 ;;; ; bracketed string of SP and VCHAR without
400 ;;; ; prose description, to be used as last resort
403 (defun ebnf-abn-element (token)
406 ((eq token
'terminal
)
407 (ebnf-make-terminal ebnf-abn-lex
))
409 ((eq token
'non-terminal
)
410 (ebnf-make-non-terminal ebnf-abn-lex
))
412 ((eq token
'begin-group
)
413 (let ((body (ebnf-abn-alternation)))
414 (or (eq (car body
) 'end-group
)
415 (error "Missing `)'"))
418 ((eq token
'begin-optional
)
419 (let ((body (ebnf-abn-alternation)))
420 (or (eq (car body
) 'end-optional
)
421 (error "Missing `]'"))
422 (ebnf-token-optional (cdr body
))))
429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
433 (defconst ebnf-abn-token-table
(make-vector 256 'error
)
434 "Vector used to map characters to a lexical token.")
437 (defun ebnf-abn-initialize ()
438 "Initialize EBNF token table."
439 ;; control character & control 8-bit character are set to `error'
442 (while (< char ?
\072)
443 (aset ebnf-abn-token-table char
'integer
)
444 (setq char
(1+ char
)))
445 ;; printable character: A-Z
447 (while (< char ?
\133)
448 (aset ebnf-abn-token-table char
'non-terminal
)
449 (setq char
(1+ char
)))
450 ;; printable character: a-z
452 (while (< char ?
\173)
453 (aset ebnf-abn-token-table char
'non-terminal
)
454 (setq char
(1+ char
)))
455 ;; European 8-bit accentuated characters:
457 (while (< char ?
\400)
458 (aset ebnf-abn-token-table char
'non-terminal
)
459 (setq char
(1+ char
)))
460 ;; Override end of line characters:
461 (aset ebnf-abn-token-table ?
\n 'end-of-rule
) ; [NL] linefeed
462 (aset ebnf-abn-token-table ?
\r 'end-of-rule
) ; [CR] carriage return
463 ;; Override space characters:
464 (aset ebnf-abn-token-table ?
\013 'space
) ; [VT] vertical tab
465 (aset ebnf-abn-token-table ?
\t 'space
) ; [HT] horizontal tab
466 (aset ebnf-abn-token-table ?\
'space
) ; [SP] space
467 ;; Override form feed character:
468 (aset ebnf-abn-token-table ?
\f 'form-feed
) ; [FF] form feed
469 ;; Override other lexical characters:
470 (aset ebnf-abn-token-table ?
< 'non-terminal
)
471 (aset ebnf-abn-token-table ?%
'terminal
)
472 (aset ebnf-abn-token-table ?
\" 'terminal
)
473 (aset ebnf-abn-token-table ?\
( 'begin-group
)
474 (aset ebnf-abn-token-table ?\
) 'end-group
)
475 (aset ebnf-abn-token-table ?
* 'repeat
)
476 (aset ebnf-abn-token-table ?
= 'equal
)
477 (aset ebnf-abn-token-table ?\
[ 'begin-optional
)
478 (aset ebnf-abn-token-table ?\
] 'end-optional
)
479 (aset ebnf-abn-token-table ?
/ 'alternative
)
480 ;; Override comment character:
481 (aset ebnf-abn-token-table ?\
; 'comment)))
484 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
485 (defconst ebnf-abn-non-terminal-chars
486 (ebnf-range-regexp "-_0-9A-Za-z" ?
\240 ?
\377))
487 (defconst ebnf-abn-non-terminal-letter-chars
488 (ebnf-range-regexp "A-Za-z" ?
\240 ?
\377))
491 (defun ebnf-abn-lex ()
492 "Lexical analyser for ABNF.
494 Return a lexical token.
496 See documentation for variable `ebnf-abn-lex'."
497 (if (>= (point) ebnf-limit
)
500 ;; skip spaces and comments
501 (while (if (> (following-char) 255)
505 (setq token
(aref ebnf-abn-token-table
(following-char)))
508 (skip-chars-forward " \013\t" ebnf-limit
)
509 (< (point) ebnf-limit
))
511 (ebnf-abn-skip-comment))
512 ((eq token
'form-feed
)
514 (setq ebnf-action
'form-feed
))
515 ((eq token
'end-of-rule
)
516 (ebnf-abn-skip-end-of-rule))
521 ((>= (point) ebnf-limit
)
525 (error "Illegal character"))
527 ((eq token
'end-of-rule
)
531 (setq ebnf-abn-lex
(ebnf-buffer-substring "0-9"))
533 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
534 ((eq token
'terminal
)
536 (if (= (following-char) ?
\")
538 (ebnf-abn-character)))
540 ;; non-terminal: NAME or <NAME>
541 ((eq token
'non-terminal
)
542 (let ((prose-p (= (following-char) ?
<)))
545 (or (looking-at ebnf-abn-non-terminal-letter-chars
)
546 (error "Invalid prose value")))
548 (ebnf-buffer-substring ebnf-abn-non-terminal-chars
))
550 (or (= (following-char) ?
>)
551 (error "Invalid prose value"))
552 (setq ebnf-abn-lex
(concat "<" ebnf-abn-lex
">"))))
557 (if (/= (following-char) ?
/)
560 'incremental-alternative
))
561 ;; miscellaneous: (, ), [, ], /, *
568 (defun ebnf-abn-skip-end-of-rule ()
571 ;; end of rule ==> 2 or more consecutive end of lines
572 (setq eor-p
(or (> (skip-chars-forward "\r\n" ebnf-limit
) 1)
575 (skip-chars-forward " \013\t" ebnf-limit
)
577 (and (= (following-char) ?\
;)
578 (ebnf-abn-skip-comment))))
582 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
583 (defconst ebnf-abn-comment-chars
584 (ebnf-range-regexp "^\n\000-\010\016-\037" ?
\177 ?
\237))
587 (defun ebnf-abn-skip-comment ()
591 ((and ebnf-eps-executing
(= (following-char) ?\
[))
592 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
594 ((and ebnf-eps-executing
(= (following-char) ?\
]))
595 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
596 ;; any other action in comment
598 (setq ebnf-action
(aref ebnf-comment-table
(following-char)))
599 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit
))
601 ;; check for a valid end of comment
602 (cond ((>= (point) ebnf-limit
)
604 ((= (following-char) ?
\n)
607 (error "Illegal character"))
611 (defun ebnf-abn-eps-filename ()
613 (ebnf-buffer-substring ebnf-abn-comment-chars
))
616 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
617 (defconst ebnf-abn-string-chars
618 (ebnf-range-regexp " -!#-~" ?
\240 ?
\377))
621 (defun ebnf-abn-string ()
622 (buffer-substring-no-properties
627 (skip-chars-forward ebnf-abn-string-chars ebnf-limit
)
628 (or (= (following-char) ?
\")
629 (error "Missing `\"'"))
635 (defun ebnf-abn-character ()
636 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
637 (buffer-substring-no-properties
641 (let* ((char (following-char))
642 (chars (cond ((or (= char ?B
) (= char ?b
)) "01")
643 ((or (= char ?D
) (= char ?d
)) "0-9")
644 ((or (= char ?X
) (= char ?x
)) "0-9A-Fa-f")
645 (t (error "Invalid terminal value")))))
647 (or (> (skip-chars-forward chars ebnf-limit
) 0)
648 (error "Invalid terminal value"))
649 (if (= (following-char) ?-
)
652 (or (> (skip-chars-forward chars ebnf-limit
) 0)
653 (error "Invalid terminal value range")))
654 (while (= (following-char) ?.
)
656 (or (> (skip-chars-forward chars ebnf-limit
) 0)
657 (error "Invalid terminal value")))))
661 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
666 ;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
667 ;;; ebnf-abn.el ends here