(defgroup reftex): Update home page url-link.
[bpt/emacs.git] / lisp / progmodes / ebnf-abn.el
CommitLineData
da8f925e
VJL
1;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
2
d91362c9
NR
3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
4;; Free Sofware Foundation, Inc.
da8f925e
VJL
5
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6411a60a 8;; Time-stamp: <2004/04/03 16:43:57 vinicius>
da8f925e 9;; Keywords: wp, ebnf, PostScript
6411a60a 10;; Version: 1.1
da8f925e
VJL
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
26;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27;; Boston, MA 02110-1301, USA.
da8f925e
VJL
28
29;;; Commentary:
30
31;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;;
33;;
34;; This is part of ebnf2ps package.
35;;
36;; This package defines a parser for ABNF (Augmented BNF).
37;;
38;; See ebnf2ps.el for documentation.
39;;
40;;
41;; ABNF Syntax
42;; -----------
43;;
44;; See the URL:
7fd08a0a
VJL
45;; `http://www.ietf.org/rfc/rfc2234.txt'
46;; or
da8f925e
VJL
47;; `http://www.faqs.org/rfcs/rfc2234.html'
48;; or
49;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
50;; ("Augmented BNF for Syntax Specifications: ABNF").
51;;
52;;
53;; rulelist = 1*( rule / (*c-wsp c-nl) )
54;;
55;; rule = rulename defined-as elements c-nl
56;; ; continues if next line starts with white space
57;;
58;; rulename = ALPHA *(ALPHA / DIGIT / "-")
59;;
60;; defined-as = *c-wsp ("=" / "=/") *c-wsp
61;; ; basic rules definition and incremental
62;; ; alternatives
63;;
64;; elements = alternation *c-wsp
65;;
66;; c-wsp = WSP / (c-nl WSP)
67;;
68;; c-nl = comment / CRLF
69;; ; comment or newline
70;;
71;; comment = ";" *(WSP / VCHAR) CRLF
72;;
73;; alternation = concatenation
74;; *(*c-wsp "/" *c-wsp concatenation)
75;;
76;; concatenation = repetition *(1*c-wsp repetition)
77;;
78;; repetition = [repeat] element
79;;
80;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
81;;
82;; element = rulename / group / option /
83;; char-val / num-val / prose-val
84;;
85;; group = "(" *c-wsp alternation *c-wsp ")"
86;;
87;; option = "[" *c-wsp alternation *c-wsp "]"
88;;
89;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
90;; ; quoted string of SP and VCHAR without DQUOTE
91;;
92;; num-val = "%" (bin-val / dec-val / hex-val)
93;;
94;; bin-val = "b" 1*BIT
95;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
96;; ; series of concatenated bit values
97;; ; or single ONEOF range
98;;
99;; dec-val = "d" 1*DIGIT
100;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
101;;
102;; hex-val = "x" 1*HEXDIG
103;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
104;;
105;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
106;; ; bracketed string of SP and VCHAR without
107;; ; angles
108;; ; prose description, to be used as last resort
109;;
110;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
111;;
112;; ALPHA = %x41-5A / %x61-7A
113;; ; A-Z / a-z
114;;
115;; BIT = "0" / "1"
116;;
117;; CHAR = %x01-7F
118;; ; any 7-bit US-ASCII character, excluding NUL
119;;
120;; CR = %x0D
121;; ; carriage return
122;;
123;; CRLF = CR LF
124;; ; Internet standard newline
125;;
126;; CTL = %x00-1F / %x7F
127;; ; controls
128;;
129;; DIGIT = %x30-39
130;; ; 0-9
131;;
132;; DQUOTE = %x22
133;; ; " (Double Quote)
134;;
135;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
136;;
137;; HTAB = %x09
138;; ; horizontal tab
139;;
140;; LF = %x0A
141;; ; linefeed
142;;
143;; LWSP = *(WSP / CRLF WSP)
144;; ; linear white space (past newline)
145;;
146;; OCTET = %x00-FF
147;; ; 8 bits of data
148;;
149;; SP = %x20
150;; ; space
151;;
152;; VCHAR = %x21-7E
153;; ; visible (printing) characters
154;;
155;; WSP = SP / HTAB
156;; ; white space
157;;
158;;
159;; NOTES:
160;;
161;; 1. Rules name and terminal strings are case INSENSITIVE.
162;; So, the following rule names are all equals:
163;; Rule-name, rule-Name, rule-name, RULE-NAME
164;; Also, the following strings are equals:
165;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
166;;
167;; 2. To have a case SENSITIVE string, use the character notation.
168;; For example, to specify the lowercase string "abc", use:
169;; %d97.98.99
170;;
171;; 3. There are no implicit spaces between elements, for example, the
172;; following rules:
173;;
174;; foo = %x61 ; a
175;;
176;; bar = %x62 ; b
177;;
178;; mumble = foo bar foo
179;;
180;; Are equivalent to the following rule:
181;;
182;; mumble = %x61.62.61
183;;
184;; If spaces are needed, it should be explicit specified, like:
185;;
186;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
187;;
188;; mumble = foo spaces bar spaces foo
189;;
190;; 4. Lines starting with space or tab are considered a continuation line.
191;; For example, the rule:
192;;
193;; rule = foo
194;; bar
195;;
196;; Is equivalent to:
197;;
198;; rule = foo bar
199;;
200;;
201;; Differences Between ABNF And ebnf2ps ABNF
202;; -----------------------------------------
203;;
204;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
205;; underscore (_) for rule name and european 8-bit accentuated characters (from
206;; \240 to \377) for rule name, string and comment.
207;;
208;;
209;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
210
211;;; Code:
212
213
214(require 'ebnf-otz)
215
216
217(defvar ebnf-abn-lex nil
218 "Value returned by `ebnf-abn-lex' function.")
219
220\f
221;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
222;; Syntactic analyzer
223
224
225;;; rulelist = 1*( rule / (*c-wsp c-nl) )
226
227(defun ebnf-abn-parser (start)
228 "ABNF parser."
229 (let ((total (+ (- ebnf-limit start) 1))
230 (bias (1- start))
231 (origin (point))
232 rule-list token rule)
233 (goto-char start)
234 (setq token (ebnf-abn-lex))
235 (and (eq token 'end-of-input)
236 (error "Invalid ABNF file format"))
9d59cbb0
VJL
237 (and (eq token 'end-of-rule)
238 (setq token (ebnf-abn-lex)))
da8f925e
VJL
239 (while (not (eq token 'end-of-input))
240 (ebnf-message-float
241 "Parsing...%s%%"
242 (/ (* (- (point) bias) 100.0) total))
243 (setq token (ebnf-abn-rule token)
244 rule (cdr token)
245 token (car token))
246 (or (ebnf-add-empty-rule-list rule)
247 (setq rule-list (cons rule rule-list))))
248 (goto-char origin)
249 rule-list))
250
251
252;;; rule = rulename defined-as elements c-nl
253;;; ; continues if next line starts with white space
254;;;
255;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
256;;;
257;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
258;;; ; basic rules definition and incremental
259;;; ; alternatives
260;;;
261;;; elements = alternation *c-wsp
262;;;
263;;; c-wsp = WSP / (c-nl WSP)
264;;;
265;;; c-nl = comment / CRLF
266;;; ; comment or newline
267;;;
268;;; comment = ";" *(WSP / VCHAR) CRLF
269
270
271(defun ebnf-abn-rule (token)
272 (let ((name ebnf-abn-lex)
273 (action ebnf-action)
274 elements)
275 (setq ebnf-action nil)
276 (or (eq token 'non-terminal)
277 (error "Invalid rule name"))
278 (setq token (ebnf-abn-lex))
279 (or (memq token '(equal incremental-alternative))
280 (error "Invalid rule: missing `=' or `=/'"))
281 (and (eq token 'incremental-alternative)
282 (setq name (concat name " =/")))
283 (setq elements (ebnf-abn-alternation))
284 (or (memq (car elements) '(end-of-rule end-of-input))
285 (error "Invalid rule: there is no end of rule"))
286 (setq elements (cdr elements))
287 (ebnf-eps-add-production name)
288 (cons (ebnf-abn-lex)
289 (ebnf-make-production name elements action))))
290
291
292;;; alternation = concatenation
293;;; *(*c-wsp "/" *c-wsp concatenation)
294
295
296(defun ebnf-abn-alternation ()
297 (let (body concatenation)
298 (while (eq (car (setq concatenation
299 (ebnf-abn-concatenation (ebnf-abn-lex))))
300 'alternative)
301 (setq body (cons (cdr concatenation) body)))
302 (ebnf-token-alternative body concatenation)))
303
304
305;;; concatenation = repetition *(1*c-wsp repetition)
306
307
308(defun ebnf-abn-concatenation (token)
309 (let ((term (ebnf-abn-repetition token))
310 seq)
311 (or (setq token (car term)
312 term (cdr term))
313 (error "Empty element"))
314 (setq seq (cons term seq))
315 (while (setq term (ebnf-abn-repetition token)
316 token (car term)
317 term (cdr term))
318 (setq seq (cons term seq)))
319 (cons token
6411a60a 320 (ebnf-token-sequence seq))))
da8f925e
VJL
321
322
323;;; repetition = [repeat] element
324;;;
325;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
326
327
328(defun ebnf-abn-repetition (token)
329 (let (lower upper)
330 ;; INTEGER [ "*" [ INTEGER ] ]
331 (when (eq token 'integer)
332 (setq lower ebnf-abn-lex
333 token (ebnf-abn-lex))
334 (or (eq token 'repeat)
335 (setq upper lower)))
336 ;; "*" [ INTEGER ]
337 (when (eq token 'repeat)
338 ;; only * ==> lower & upper are empty string
339 (or lower
340 (setq lower ""
341 upper ""))
342 (when (eq (setq token (ebnf-abn-lex)) 'integer)
343 (setq upper ebnf-abn-lex
344 token (ebnf-abn-lex))))
345 (let ((element (ebnf-abn-element token)))
346 (cond
347 ;; there is a repetition
348 (lower
349 (or element
350 (error "Missing element repetition"))
351 (setq token (ebnf-abn-lex))
352 (cond
353 ;; one or more
354 ((and (string= lower "1") (null upper))
355 (cons token (ebnf-make-one-or-more element)))
356 ;; zero or more
357 ((or (and (string= lower "0") (null upper))
358 (and (string= lower "") (string= upper "")))
359 (cons token (ebnf-make-zero-or-more element)))
360 ;; real repetition
361 (t
362 (ebnf-token-repeat lower (cons token element) upper))))
363 ;; there is an element
364 (element
365 (cons (ebnf-abn-lex) element))
366 ;; something that caller has to deal
367 (t
368 (cons token nil))))))
369
370
371;;; element = rulename / group / option /
372;;; char-val / num-val / prose-val
373;;;
374;;; group = "(" *c-wsp alternation *c-wsp ")"
375;;;
376;;; option = "[" *c-wsp alternation *c-wsp "]"
377;;;
378;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
379;;; ; quoted string of SP and VCHAR without DQUOTE
380;;;
381;;; num-val = "%" (bin-val / dec-val / hex-val)
382;;;
383;;; bin-val = "b" 1*BIT
384;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
385;;; ; series of concatenated bit values
386;;; ; or single ONEOF range
387;;;
388;;; dec-val = "d" 1*DIGIT
389;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
390;;;
391;;; hex-val = "x" 1*HEXDIG
392;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
393;;;
394;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
395;;; ; bracketed string of SP and VCHAR without
396;;; ; angles
397;;; ; prose description, to be used as last resort
398
399
400(defun ebnf-abn-element (token)
401 (cond
402 ;; terminal
403 ((eq token 'terminal)
404 (ebnf-make-terminal ebnf-abn-lex))
405 ;; non-terminal
406 ((eq token 'non-terminal)
407 (ebnf-make-non-terminal ebnf-abn-lex))
408 ;; group
409 ((eq token 'begin-group)
410 (let ((body (ebnf-abn-alternation)))
411 (or (eq (car body) 'end-group)
412 (error "Missing `)'"))
413 (cdr body)))
414 ;; optional
415 ((eq token 'begin-optional)
416 (let ((body (ebnf-abn-alternation)))
417 (or (eq (car body) 'end-optional)
418 (error "Missing `]'"))
419 (ebnf-token-optional (cdr body))))
420 ;; no element
421 (t
422 nil)
423 ))
424
425\f
426;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
427;; Lexical analyzer
428
429
430(defconst ebnf-abn-token-table (make-vector 256 'error)
431 "Vector used to map characters to a lexical token.")
432
433
434(defun ebnf-abn-initialize ()
435 "Initialize EBNF token table."
436 ;; control character & control 8-bit character are set to `error'
437 (let ((char ?\060))
438 ;; digits: 0-9
439 (while (< char ?\072)
440 (aset ebnf-abn-token-table char 'integer)
441 (setq char (1+ char)))
442 ;; printable character: A-Z
443 (setq char ?\101)
444 (while (< char ?\133)
445 (aset ebnf-abn-token-table char 'non-terminal)
446 (setq char (1+ char)))
447 ;; printable character: a-z
448 (setq char ?\141)
449 (while (< char ?\173)
450 (aset ebnf-abn-token-table char 'non-terminal)
451 (setq char (1+ char)))
452 ;; European 8-bit accentuated characters:
453 (setq char ?\240)
454 (while (< char ?\400)
455 (aset ebnf-abn-token-table char 'non-terminal)
456 (setq char (1+ char)))
457 ;; Override end of line characters:
458 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
459 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
460 ;; Override space characters:
461 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
462 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
463 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
464 ;; Override form feed character:
465 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
466 ;; Override other lexical characters:
467 (aset ebnf-abn-token-table ?< 'non-terminal)
468 (aset ebnf-abn-token-table ?% 'terminal)
469 (aset ebnf-abn-token-table ?\" 'terminal)
470 (aset ebnf-abn-token-table ?\( 'begin-group)
471 (aset ebnf-abn-token-table ?\) 'end-group)
472 (aset ebnf-abn-token-table ?* 'repeat)
473 (aset ebnf-abn-token-table ?= 'equal)
474 (aset ebnf-abn-token-table ?\[ 'begin-optional)
475 (aset ebnf-abn-token-table ?\] 'end-optional)
476 (aset ebnf-abn-token-table ?/ 'alternative)
477 ;; Override comment character:
478 (aset ebnf-abn-token-table ?\; 'comment)))
479
480
481;; replace the range "\240-\377" (see `ebnf-range-regexp').
482(defconst ebnf-abn-non-terminal-chars
483 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
484(defconst ebnf-abn-non-terminal-letter-chars
485 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
486
487
488(defun ebnf-abn-lex ()
13a93e66 489 "Lexical analyzer for ABNF.
da8f925e
VJL
490
491Return a lexical token.
492
493See documentation for variable `ebnf-abn-lex'."
494 (if (>= (point) ebnf-limit)
495 'end-of-input
496 (let (token)
497 ;; skip spaces and comments
498 (while (if (> (following-char) 255)
499 (progn
500 (setq token 'error)
501 nil)
502 (setq token (aref ebnf-abn-token-table (following-char)))
503 (cond
504 ((eq token 'space)
505 (skip-chars-forward " \013\t" ebnf-limit)
506 (< (point) ebnf-limit))
507 ((eq token 'comment)
508 (ebnf-abn-skip-comment))
509 ((eq token 'form-feed)
510 (forward-char)
511 (setq ebnf-action 'form-feed))
512 ((eq token 'end-of-rule)
513 (ebnf-abn-skip-end-of-rule))
514 (t nil)
515 )))
516 (cond
517 ;; end of input
518 ((>= (point) ebnf-limit)
519 'end-of-input)
520 ;; error
521 ((eq token 'error)
eac9c0ef 522 (error "Invalid character"))
da8f925e
VJL
523 ;; end of rule
524 ((eq token 'end-of-rule)
525 'end-of-rule)
526 ;; integer
527 ((eq token 'integer)
528 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
529 'integer)
530 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
531 ((eq token 'terminal)
532 (setq ebnf-abn-lex
533 (if (= (following-char) ?\")
534 (ebnf-abn-string)
535 (ebnf-abn-character)))
536 'terminal)
537 ;; non-terminal: NAME or <NAME>
538 ((eq token 'non-terminal)
539 (let ((prose-p (= (following-char) ?<)))
540 (when prose-p
541 (forward-char)
542 (or (looking-at ebnf-abn-non-terminal-letter-chars)
543 (error "Invalid prose value")))
544 (setq ebnf-abn-lex
545 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
546 (when prose-p
547 (or (= (following-char) ?>)
548 (error "Invalid prose value"))
549 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
550 'non-terminal)
551 ;; equal: =, =/
552 ((eq token 'equal)
553 (forward-char)
554 (if (/= (following-char) ?/)
555 'equal
556 (forward-char)
557 'incremental-alternative))
558 ;; miscellaneous: (, ), [, ], /, *
559 (t
560 (forward-char)
561 token)
562 ))))
563
564
565(defun ebnf-abn-skip-end-of-rule ()
566 (let (eor-p)
567 (while (progn
568 ;; end of rule ==> 2 or more consecutive end of lines
569 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
570 eor-p))
571 ;; skip spaces
572 (skip-chars-forward " \013\t" ebnf-limit)
573 ;; skip comments
574 (and (= (following-char) ?\;)
575 (ebnf-abn-skip-comment))))
576 (not eor-p)))
577
578
579;; replace the range "\177-\237" (see `ebnf-range-regexp').
580(defconst ebnf-abn-comment-chars
581 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
582
583
584(defun ebnf-abn-skip-comment ()
585 (forward-char)
586 (cond
587 ;; open EPS file
588 ((and ebnf-eps-executing (= (following-char) ?\[))
589 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
590 ;; close EPS file
591 ((and ebnf-eps-executing (= (following-char) ?\]))
592 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
593 ;; any other action in comment
594 (t
595 (setq ebnf-action (aref ebnf-comment-table (following-char)))
596 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
597 )
598 ;; check for a valid end of comment
599 (cond ((>= (point) ebnf-limit)
600 nil)
601 ((= (following-char) ?\n)
602 t)
603 (t
eac9c0ef 604 (error "Invalid character"))
da8f925e
VJL
605 ))
606
607
608(defun ebnf-abn-eps-filename ()
609 (forward-char)
610 (ebnf-buffer-substring ebnf-abn-comment-chars))
611
612
613;; replace the range "\240-\377" (see `ebnf-range-regexp').
614(defconst ebnf-abn-string-chars
615 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
616
617
618(defun ebnf-abn-string ()
619 (buffer-substring-no-properties
620 (progn
621 (forward-char)
622 (point))
623 (progn
624 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
625 (or (= (following-char) ?\")
626 (error "Missing `\"'"))
627 (prog1
628 (point)
629 (forward-char)))))
630
631
632(defun ebnf-abn-character ()
633 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
634 (buffer-substring-no-properties
635 (point)
636 (progn
637 (forward-char)
638 (let* ((char (following-char))
639 (chars (cond ((or (= char ?B) (= char ?b)) "01")
640 ((or (= char ?D) (= char ?d)) "0-9")
641 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
642 (t (error "Invalid terminal value")))))
643 (forward-char)
644 (or (> (skip-chars-forward chars ebnf-limit) 0)
645 (error "Invalid terminal value"))
646 (if (= (following-char) ?-)
647 (progn
648 (forward-char)
649 (or (> (skip-chars-forward chars ebnf-limit) 0)
650 (error "Invalid terminal value range")))
651 (while (= (following-char) ?.)
652 (forward-char)
653 (or (> (skip-chars-forward chars ebnf-limit) 0)
654 (error "Invalid terminal value")))))
655 (point))))
656
657\f
658;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
659
660
661(provide 'ebnf-abn)
662
b9db4567 663;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
da8f925e 664;;; ebnf-abn.el ends here