Handle initial comments.
[bpt/emacs.git] / lisp / progmodes / ebnf-abn.el
CommitLineData
da8f925e
VJL
1;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
2
3;; Copyright (C) 2004 Free Sofware Foundation, Inc.
4
5;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
9d59cbb0 7;; Time-stamp: <2004/03/18 23:49:58 vinicius>
da8f925e
VJL
8;; Keywords: wp, ebnf, PostScript
9;; Version: 1.0
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to the
25;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26;; Boston, MA 02111-1307, USA.
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for ABNF (Augmented BNF).
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; ABNF Syntax
41;; -----------
42;;
43;; See the URL:
7fd08a0a
VJL
44;; `http://www.ietf.org/rfc/rfc2234.txt'
45;; or
da8f925e
VJL
46;; `http://www.faqs.org/rfcs/rfc2234.html'
47;; or
48;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
49;; ("Augmented BNF for Syntax Specifications: ABNF").
50;;
51;;
52;; rulelist = 1*( rule / (*c-wsp c-nl) )
53;;
54;; rule = rulename defined-as elements c-nl
55;; ; continues if next line starts with white space
56;;
57;; rulename = ALPHA *(ALPHA / DIGIT / "-")
58;;
59;; defined-as = *c-wsp ("=" / "=/") *c-wsp
60;; ; basic rules definition and incremental
61;; ; alternatives
62;;
63;; elements = alternation *c-wsp
64;;
65;; c-wsp = WSP / (c-nl WSP)
66;;
67;; c-nl = comment / CRLF
68;; ; comment or newline
69;;
70;; comment = ";" *(WSP / VCHAR) CRLF
71;;
72;; alternation = concatenation
73;; *(*c-wsp "/" *c-wsp concatenation)
74;;
75;; concatenation = repetition *(1*c-wsp repetition)
76;;
77;; repetition = [repeat] element
78;;
79;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
80;;
81;; element = rulename / group / option /
82;; char-val / num-val / prose-val
83;;
84;; group = "(" *c-wsp alternation *c-wsp ")"
85;;
86;; option = "[" *c-wsp alternation *c-wsp "]"
87;;
88;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
89;; ; quoted string of SP and VCHAR without DQUOTE
90;;
91;; num-val = "%" (bin-val / dec-val / hex-val)
92;;
93;; bin-val = "b" 1*BIT
94;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
95;; ; series of concatenated bit values
96;; ; or single ONEOF range
97;;
98;; dec-val = "d" 1*DIGIT
99;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
100;;
101;; hex-val = "x" 1*HEXDIG
102;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
103;;
104;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
105;; ; bracketed string of SP and VCHAR without
106;; ; angles
107;; ; prose description, to be used as last resort
108;;
109;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
110;;
111;; ALPHA = %x41-5A / %x61-7A
112;; ; A-Z / a-z
113;;
114;; BIT = "0" / "1"
115;;
116;; CHAR = %x01-7F
117;; ; any 7-bit US-ASCII character, excluding NUL
118;;
119;; CR = %x0D
120;; ; carriage return
121;;
122;; CRLF = CR LF
123;; ; Internet standard newline
124;;
125;; CTL = %x00-1F / %x7F
126;; ; controls
127;;
128;; DIGIT = %x30-39
129;; ; 0-9
130;;
131;; DQUOTE = %x22
132;; ; " (Double Quote)
133;;
134;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
135;;
136;; HTAB = %x09
137;; ; horizontal tab
138;;
139;; LF = %x0A
140;; ; linefeed
141;;
142;; LWSP = *(WSP / CRLF WSP)
143;; ; linear white space (past newline)
144;;
145;; OCTET = %x00-FF
146;; ; 8 bits of data
147;;
148;; SP = %x20
149;; ; space
150;;
151;; VCHAR = %x21-7E
152;; ; visible (printing) characters
153;;
154;; WSP = SP / HTAB
155;; ; white space
156;;
157;;
158;; NOTES:
159;;
160;; 1. Rules name and terminal strings are case INSENSITIVE.
161;; So, the following rule names are all equals:
162;; Rule-name, rule-Name, rule-name, RULE-NAME
163;; Also, the following strings are equals:
164;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
165;;
166;; 2. To have a case SENSITIVE string, use the character notation.
167;; For example, to specify the lowercase string "abc", use:
168;; %d97.98.99
169;;
170;; 3. There are no implicit spaces between elements, for example, the
171;; following rules:
172;;
173;; foo = %x61 ; a
174;;
175;; bar = %x62 ; b
176;;
177;; mumble = foo bar foo
178;;
179;; Are equivalent to the following rule:
180;;
181;; mumble = %x61.62.61
182;;
183;; If spaces are needed, it should be explicit specified, like:
184;;
185;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
186;;
187;; mumble = foo spaces bar spaces foo
188;;
189;; 4. Lines starting with space or tab are considered a continuation line.
190;; For example, the rule:
191;;
192;; rule = foo
193;; bar
194;;
195;; Is equivalent to:
196;;
197;; rule = foo bar
198;;
199;;
200;; Differences Between ABNF And ebnf2ps ABNF
201;; -----------------------------------------
202;;
203;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
204;; underscore (_) for rule name and european 8-bit accentuated characters (from
205;; \240 to \377) for rule name, string and comment.
206;;
207;;
208;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
209
210;;; Code:
211
212
213(require 'ebnf-otz)
214
215
216(defvar ebnf-abn-lex nil
217 "Value returned by `ebnf-abn-lex' function.")
218
219\f
220;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
221;; Syntactic analyzer
222
223
224;;; rulelist = 1*( rule / (*c-wsp c-nl) )
225
226(defun ebnf-abn-parser (start)
227 "ABNF parser."
228 (let ((total (+ (- ebnf-limit start) 1))
229 (bias (1- start))
230 (origin (point))
231 rule-list token rule)
232 (goto-char start)
233 (setq token (ebnf-abn-lex))
234 (and (eq token 'end-of-input)
235 (error "Invalid ABNF file format"))
9d59cbb0
VJL
236 (and (eq token 'end-of-rule)
237 (setq token (ebnf-abn-lex)))
da8f925e
VJL
238 (while (not (eq token 'end-of-input))
239 (ebnf-message-float
240 "Parsing...%s%%"
241 (/ (* (- (point) bias) 100.0) total))
242 (setq token (ebnf-abn-rule token)
243 rule (cdr token)
244 token (car token))
245 (or (ebnf-add-empty-rule-list rule)
246 (setq rule-list (cons rule rule-list))))
247 (goto-char origin)
248 rule-list))
249
250
251;;; rule = rulename defined-as elements c-nl
252;;; ; continues if next line starts with white space
253;;;
254;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
255;;;
256;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
257;;; ; basic rules definition and incremental
258;;; ; alternatives
259;;;
260;;; elements = alternation *c-wsp
261;;;
262;;; c-wsp = WSP / (c-nl WSP)
263;;;
264;;; c-nl = comment / CRLF
265;;; ; comment or newline
266;;;
267;;; comment = ";" *(WSP / VCHAR) CRLF
268
269
270(defun ebnf-abn-rule (token)
271 (let ((name ebnf-abn-lex)
272 (action ebnf-action)
273 elements)
274 (setq ebnf-action nil)
275 (or (eq token 'non-terminal)
276 (error "Invalid rule name"))
277 (setq token (ebnf-abn-lex))
278 (or (memq token '(equal incremental-alternative))
279 (error "Invalid rule: missing `=' or `=/'"))
280 (and (eq token 'incremental-alternative)
281 (setq name (concat name " =/")))
282 (setq elements (ebnf-abn-alternation))
283 (or (memq (car elements) '(end-of-rule end-of-input))
284 (error "Invalid rule: there is no end of rule"))
285 (setq elements (cdr elements))
286 (ebnf-eps-add-production name)
287 (cons (ebnf-abn-lex)
288 (ebnf-make-production name elements action))))
289
290
291;;; alternation = concatenation
292;;; *(*c-wsp "/" *c-wsp concatenation)
293
294
295(defun ebnf-abn-alternation ()
296 (let (body concatenation)
297 (while (eq (car (setq concatenation
298 (ebnf-abn-concatenation (ebnf-abn-lex))))
299 'alternative)
300 (setq body (cons (cdr concatenation) body)))
301 (ebnf-token-alternative body concatenation)))
302
303
304;;; concatenation = repetition *(1*c-wsp repetition)
305
306
307(defun ebnf-abn-concatenation (token)
308 (let ((term (ebnf-abn-repetition token))
309 seq)
310 (or (setq token (car term)
311 term (cdr term))
312 (error "Empty element"))
313 (setq seq (cons term seq))
314 (while (setq term (ebnf-abn-repetition token)
315 token (car term)
316 term (cdr term))
317 (setq seq (cons term seq)))
318 (cons token
319 (if (= (length seq) 1)
320 ;; sequence with only one element
321 (car seq)
322 ;; a real sequence
323 (ebnf-make-sequence (nreverse seq))))))
324
325
326;;; repetition = [repeat] element
327;;;
328;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
329
330
331(defun ebnf-abn-repetition (token)
332 (let (lower upper)
333 ;; INTEGER [ "*" [ INTEGER ] ]
334 (when (eq token 'integer)
335 (setq lower ebnf-abn-lex
336 token (ebnf-abn-lex))
337 (or (eq token 'repeat)
338 (setq upper lower)))
339 ;; "*" [ INTEGER ]
340 (when (eq token 'repeat)
341 ;; only * ==> lower & upper are empty string
342 (or lower
343 (setq lower ""
344 upper ""))
345 (when (eq (setq token (ebnf-abn-lex)) 'integer)
346 (setq upper ebnf-abn-lex
347 token (ebnf-abn-lex))))
348 (let ((element (ebnf-abn-element token)))
349 (cond
350 ;; there is a repetition
351 (lower
352 (or element
353 (error "Missing element repetition"))
354 (setq token (ebnf-abn-lex))
355 (cond
356 ;; one or more
357 ((and (string= lower "1") (null upper))
358 (cons token (ebnf-make-one-or-more element)))
359 ;; zero or more
360 ((or (and (string= lower "0") (null upper))
361 (and (string= lower "") (string= upper "")))
362 (cons token (ebnf-make-zero-or-more element)))
363 ;; real repetition
364 (t
365 (ebnf-token-repeat lower (cons token element) upper))))
366 ;; there is an element
367 (element
368 (cons (ebnf-abn-lex) element))
369 ;; something that caller has to deal
370 (t
371 (cons token nil))))))
372
373
374;;; element = rulename / group / option /
375;;; char-val / num-val / prose-val
376;;;
377;;; group = "(" *c-wsp alternation *c-wsp ")"
378;;;
379;;; option = "[" *c-wsp alternation *c-wsp "]"
380;;;
381;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
382;;; ; quoted string of SP and VCHAR without DQUOTE
383;;;
384;;; num-val = "%" (bin-val / dec-val / hex-val)
385;;;
386;;; bin-val = "b" 1*BIT
387;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
388;;; ; series of concatenated bit values
389;;; ; or single ONEOF range
390;;;
391;;; dec-val = "d" 1*DIGIT
392;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
393;;;
394;;; hex-val = "x" 1*HEXDIG
395;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
396;;;
397;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
398;;; ; bracketed string of SP and VCHAR without
399;;; ; angles
400;;; ; prose description, to be used as last resort
401
402
403(defun ebnf-abn-element (token)
404 (cond
405 ;; terminal
406 ((eq token 'terminal)
407 (ebnf-make-terminal ebnf-abn-lex))
408 ;; non-terminal
409 ((eq token 'non-terminal)
410 (ebnf-make-non-terminal ebnf-abn-lex))
411 ;; group
412 ((eq token 'begin-group)
413 (let ((body (ebnf-abn-alternation)))
414 (or (eq (car body) 'end-group)
415 (error "Missing `)'"))
416 (cdr body)))
417 ;; optional
418 ((eq token 'begin-optional)
419 (let ((body (ebnf-abn-alternation)))
420 (or (eq (car body) 'end-optional)
421 (error "Missing `]'"))
422 (ebnf-token-optional (cdr body))))
423 ;; no element
424 (t
425 nil)
426 ))
427
428\f
429;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
430;; Lexical analyzer
431
432
433(defconst ebnf-abn-token-table (make-vector 256 'error)
434 "Vector used to map characters to a lexical token.")
435
436
437(defun ebnf-abn-initialize ()
438 "Initialize EBNF token table."
439 ;; control character & control 8-bit character are set to `error'
440 (let ((char ?\060))
441 ;; digits: 0-9
442 (while (< char ?\072)
443 (aset ebnf-abn-token-table char 'integer)
444 (setq char (1+ char)))
445 ;; printable character: A-Z
446 (setq char ?\101)
447 (while (< char ?\133)
448 (aset ebnf-abn-token-table char 'non-terminal)
449 (setq char (1+ char)))
450 ;; printable character: a-z
451 (setq char ?\141)
452 (while (< char ?\173)
453 (aset ebnf-abn-token-table char 'non-terminal)
454 (setq char (1+ char)))
455 ;; European 8-bit accentuated characters:
456 (setq char ?\240)
457 (while (< char ?\400)
458 (aset ebnf-abn-token-table char 'non-terminal)
459 (setq char (1+ char)))
460 ;; Override end of line characters:
461 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
462 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
463 ;; Override space characters:
464 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
465 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
466 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space
467 ;; Override form feed character:
468 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
469 ;; Override other lexical characters:
470 (aset ebnf-abn-token-table ?< 'non-terminal)
471 (aset ebnf-abn-token-table ?% 'terminal)
472 (aset ebnf-abn-token-table ?\" 'terminal)
473 (aset ebnf-abn-token-table ?\( 'begin-group)
474 (aset ebnf-abn-token-table ?\) 'end-group)
475 (aset ebnf-abn-token-table ?* 'repeat)
476 (aset ebnf-abn-token-table ?= 'equal)
477 (aset ebnf-abn-token-table ?\[ 'begin-optional)
478 (aset ebnf-abn-token-table ?\] 'end-optional)
479 (aset ebnf-abn-token-table ?/ 'alternative)
480 ;; Override comment character:
481 (aset ebnf-abn-token-table ?\; 'comment)))
482
483
484;; replace the range "\240-\377" (see `ebnf-range-regexp').
485(defconst ebnf-abn-non-terminal-chars
486 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
487(defconst ebnf-abn-non-terminal-letter-chars
488 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
489
490
491(defun ebnf-abn-lex ()
492 "Lexical analyser for ABNF.
493
494Return a lexical token.
495
496See documentation for variable `ebnf-abn-lex'."
497 (if (>= (point) ebnf-limit)
498 'end-of-input
499 (let (token)
500 ;; skip spaces and comments
501 (while (if (> (following-char) 255)
502 (progn
503 (setq token 'error)
504 nil)
505 (setq token (aref ebnf-abn-token-table (following-char)))
506 (cond
507 ((eq token 'space)
508 (skip-chars-forward " \013\t" ebnf-limit)
509 (< (point) ebnf-limit))
510 ((eq token 'comment)
511 (ebnf-abn-skip-comment))
512 ((eq token 'form-feed)
513 (forward-char)
514 (setq ebnf-action 'form-feed))
515 ((eq token 'end-of-rule)
516 (ebnf-abn-skip-end-of-rule))
517 (t nil)
518 )))
519 (cond
520 ;; end of input
521 ((>= (point) ebnf-limit)
522 'end-of-input)
523 ;; error
524 ((eq token 'error)
525 (error "Illegal character"))
526 ;; end of rule
527 ((eq token 'end-of-rule)
528 'end-of-rule)
529 ;; integer
530 ((eq token 'integer)
531 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
532 'integer)
533 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
534 ((eq token 'terminal)
535 (setq ebnf-abn-lex
536 (if (= (following-char) ?\")
537 (ebnf-abn-string)
538 (ebnf-abn-character)))
539 'terminal)
540 ;; non-terminal: NAME or <NAME>
541 ((eq token 'non-terminal)
542 (let ((prose-p (= (following-char) ?<)))
543 (when prose-p
544 (forward-char)
545 (or (looking-at ebnf-abn-non-terminal-letter-chars)
546 (error "Invalid prose value")))
547 (setq ebnf-abn-lex
548 (ebnf-buffer-substring ebnf-abn-non-terminal-chars))
549 (when prose-p
550 (or (= (following-char) ?>)
551 (error "Invalid prose value"))
552 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
553 'non-terminal)
554 ;; equal: =, =/
555 ((eq token 'equal)
556 (forward-char)
557 (if (/= (following-char) ?/)
558 'equal
559 (forward-char)
560 'incremental-alternative))
561 ;; miscellaneous: (, ), [, ], /, *
562 (t
563 (forward-char)
564 token)
565 ))))
566
567
568(defun ebnf-abn-skip-end-of-rule ()
569 (let (eor-p)
570 (while (progn
571 ;; end of rule ==> 2 or more consecutive end of lines
572 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
573 eor-p))
574 ;; skip spaces
575 (skip-chars-forward " \013\t" ebnf-limit)
576 ;; skip comments
577 (and (= (following-char) ?\;)
578 (ebnf-abn-skip-comment))))
579 (not eor-p)))
580
581
582;; replace the range "\177-\237" (see `ebnf-range-regexp').
583(defconst ebnf-abn-comment-chars
584 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
585
586
587(defun ebnf-abn-skip-comment ()
588 (forward-char)
589 (cond
590 ;; open EPS file
591 ((and ebnf-eps-executing (= (following-char) ?\[))
592 (ebnf-eps-add-context (ebnf-abn-eps-filename)))
593 ;; close EPS file
594 ((and ebnf-eps-executing (= (following-char) ?\]))
595 (ebnf-eps-remove-context (ebnf-abn-eps-filename)))
596 ;; any other action in comment
597 (t
598 (setq ebnf-action (aref ebnf-comment-table (following-char)))
599 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
600 )
601 ;; check for a valid end of comment
602 (cond ((>= (point) ebnf-limit)
603 nil)
604 ((= (following-char) ?\n)
605 t)
606 (t
607 (error "Illegal character"))
608 ))
609
610
611(defun ebnf-abn-eps-filename ()
612 (forward-char)
613 (ebnf-buffer-substring ebnf-abn-comment-chars))
614
615
616;; replace the range "\240-\377" (see `ebnf-range-regexp').
617(defconst ebnf-abn-string-chars
618 (ebnf-range-regexp " -!#-~" ?\240 ?\377))
619
620
621(defun ebnf-abn-string ()
622 (buffer-substring-no-properties
623 (progn
624 (forward-char)
625 (point))
626 (progn
627 (skip-chars-forward ebnf-abn-string-chars ebnf-limit)
628 (or (= (following-char) ?\")
629 (error "Missing `\"'"))
630 (prog1
631 (point)
632 (forward-char)))))
633
634
635(defun ebnf-abn-character ()
636 ;; %[bdx]NNN((-NNN)|(.NNN)+)?
637 (buffer-substring-no-properties
638 (point)
639 (progn
640 (forward-char)
641 (let* ((char (following-char))
642 (chars (cond ((or (= char ?B) (= char ?b)) "01")
643 ((or (= char ?D) (= char ?d)) "0-9")
644 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
645 (t (error "Invalid terminal value")))))
646 (forward-char)
647 (or (> (skip-chars-forward chars ebnf-limit) 0)
648 (error "Invalid terminal value"))
649 (if (= (following-char) ?-)
650 (progn
651 (forward-char)
652 (or (> (skip-chars-forward chars ebnf-limit) 0)
653 (error "Invalid terminal value range")))
654 (while (= (following-char) ?.)
655 (forward-char)
656 (or (> (skip-chars-forward chars ebnf-limit) 0)
657 (error "Invalid terminal value")))))
658 (point))))
659
660\f
661;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
662
663
664(provide 'ebnf-abn)
665
b9db4567 666;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
da8f925e 667;;; ebnf-abn.el ends here