Copyright up-date.
[bpt/emacs.git] / lisp / progmodes / ebnf-bnf.el
CommitLineData
984ae001
GM
1;;; ebnf-bnf --- Parser for EBNF
2
8d9ea7b1 3;; Copyright (C) 1999, 2000 Free Sofware Foundation, Inc.
984ae001
GM
4
5;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
6;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
7;; Keywords: wp, ebnf, PostScript
8;; Time-stamp: <99/11/20 18:05:05 vinicius>
9;; Version: 1.4
10
8d9ea7b1 11;; This file is part of GNU Emacs.
984ae001 12
8d9ea7b1 13;; GNU Emacs is free software; you can redistribute it and/or modify
984ae001
GM
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
8d9ea7b1 18;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to the
25;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26;; Boston, MA 02111-1307, USA.
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for EBNF.
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; EBNF Syntax
41;; -----------
42;;
43;; The current EBNF that ebnf2ps accepts has the following constructions:
44;;
45;; ; comment (until end of line)
46;; A non-terminal
47;; "C" terminal
48;; ?C? special
49;; $A default non-terminal
50;; $"C" default terminal
51;; $?C? default special
52;; A = B. production (A is the header and B the body)
53;; C D sequence (C occurs before D)
54;; C | D alternative (C or D occurs)
55;; A - B exception (A excluding B, B without any non-terminal)
56;; n * A repetition (A repeats n (integer) times)
57;; (C) group (expression C is grouped together)
58;; [C] optional (C may or not occurs)
59;; C+ one or more occurrences of C
60;; {C}+ one or more occurrences of C
61;; {C}* zero or more occurrences of C
62;; {C} zero or more occurrences of C
63;; C / D equivalent to: C {D C}*
64;; {C || D}+ equivalent to: C {D C}*
65;; {C || D}* equivalent to: [C {D C}*]
66;; {C || D} equivalent to: [C {D C}*]
67;;
68;; The EBNF syntax written using the notation above is:
69;;
70;; EBNF = {production}+.
71;;
72;; production = non_terminal "=" body ".". ;; production
73;;
74;; body = {sequence || "|"}*. ;; alternative
75;;
76;; sequence = {exception}*. ;; sequence
77;;
78;; exception = repeat [ "-" repeat]. ;; exception
79;;
80;; repeat = [ integer "*" ] term. ;; repetition
81;;
82;; term = factor
83;; | [factor] "+" ;; one-or-more
84;; | [factor] "/" [factor] ;; one-or-more
85;; .
86;;
87;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
88;; | [ "$" ] non_terminal ;; non-terminal
89;; | [ "$" ] "?" special "?" ;; special
90;; | "(" body ")" ;; group
91;; | "[" body "]" ;; zero-or-one
92;; | "{" body [ "||" body ] "}+" ;; one-or-more
93;; | "{" body [ "||" body ] "}*" ;; zero-or-more
94;; | "{" body [ "||" body ] "}" ;; zero-or-more
95;; .
96;;
97;; non_terminal = "[A-Za-z\\240-\\377][!#%&'*-,0-:<>@-Z\\^-z~\\240-\\377]*".
98;;
99;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
100;;
101;; special = "[^?\\n\\000-\\010\\016-\\037\\177-\\237]*".
102;;
103;; integer = "[0-9]+".
104;;
105;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
106;;
107;;
108;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
109
110;;; code:
111
112
113(require 'ebnf-otz)
114
115
116(defvar ebnf-bnf-lex nil
117 "Value returned by `ebnf-bnf-lex' function.")
118
119\f
120;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121;; Syntatic analyzer
122
123
124;;; EBNF = {production}+.
125
126(defun ebnf-bnf-parser (start)
127 "EBNF parser."
128 (let ((total (+ (- ebnf-limit start) 1))
129 (bias (1- start))
130 (origin (point))
131 prod-list token rule)
132 (goto-char start)
133 (setq token (ebnf-bnf-lex))
134 (and (eq token 'end-of-input)
135 (error "Invalid EBNF file format."))
136 (while (not (eq token 'end-of-input))
137 (ebnf-message-float
138 "Parsing...%s%%"
139 (/ (* (- (point) bias) 100.0) total))
140 (setq token (ebnf-production token)
141 rule (cdr token)
142 token (car token))
143 (or (ebnf-add-empty-rule-list rule)
144 (setq prod-list (cons rule prod-list))))
145 (goto-char origin)
146 prod-list))
147
148
149;;; production = non-terminal "=" body ".".
150
151(defun ebnf-production (token)
152 (let ((header ebnf-bnf-lex)
153 (action ebnf-action)
154 body)
155 (setq ebnf-action nil)
156 (or (eq token 'non-terminal)
157 (error "Invalid header production."))
158 (or (eq (ebnf-bnf-lex) 'equal)
159 (error "Invalid production: missing `='."))
160 (setq body (ebnf-body))
161 (or (eq (car body) 'period)
162 (error "Invalid production: missing `.'."))
163 (setq body (cdr body))
164 (ebnf-eps-add-production header)
165 (cons (ebnf-bnf-lex)
166 (ebnf-make-production header body action))))
167
168
169;;; body = {sequence || "|"}*.
170
171(defun ebnf-body ()
172 (let (body sequence)
173 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
174 (setq sequence (cdr sequence)
175 body (cons sequence body)))
176 (ebnf-token-alternative body sequence)))
177
178
179;;; sequence = {exception}*.
180
181(defun ebnf-sequence ()
182 (let ((token (ebnf-bnf-lex))
183 seq term)
184 (while (setq term (ebnf-exception token)
185 token (car term)
186 term (cdr term))
187 (setq seq (cons term seq)))
188 (cons token
189 (cond
190 ;; null sequence
191 ((null seq)
192 (ebnf-make-empty))
193 ;; sequence with only one element
194 ((= (length seq) 1)
195 (car seq))
196 ;; a real sequence
197 (t
198 (ebnf-make-sequence (nreverse seq)))
199 ))))
200
201
202;;; exception = repeat [ "-" repeat].
203
204(defun ebnf-exception (token)
205 (let ((term (ebnf-repeat token)))
206 (if (not (eq (car term) 'except))
207 ;; repeat
208 term
209 ;; repeat - repeat
210 (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
211 (ebnf-no-non-terminal (cdr exception))
212 (ebnf-token-except (cdr term) exception)))))
213
214
215(defun ebnf-no-non-terminal (node)
216 (and (vectorp node)
217 (let ((kind (ebnf-node-kind node)))
218 (cond
219 ((eq kind 'ebnf-generate-non-terminal)
220 (error "Exception sequence should not contain a non-terminal."))
221 ((eq kind 'ebnf-generate-repeat)
222 (ebnf-no-non-terminal (ebnf-node-separator node)))
223 ((memq kind '(ebnf-generate-optional ebnf-generate-except))
224 (ebnf-no-non-terminal (ebnf-node-list node)))
225 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
226 (ebnf-no-non-terminal (ebnf-node-list node))
227 (ebnf-no-non-terminal (ebnf-node-separator node)))
228 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
229 (let ((seq (ebnf-node-list node)))
230 (while seq
231 (ebnf-no-non-terminal (car seq))
232 (setq seq (cdr seq)))))
233 ))))
234
235
236;;; repeat = [ integer "*" ] term.
237
238(defun ebnf-repeat (token)
239 (if (not (eq token 'integer))
240 (ebnf-term token)
241 (let ((times ebnf-bnf-lex))
242 (or (eq (ebnf-bnf-lex) 'repeat)
243 (error "Missing `*'."))
244 (ebnf-token-repeat times (ebnf-term (ebnf-bnf-lex))))))
245
246
247;;; term = factor
248;;; | [factor] "+" ;; one-or-more
249;;; | [factor] "/" [factor] ;; one-or-more
250;;; .
251
252(defun ebnf-term (token)
253 (let ((factor (ebnf-factor token)))
254 (and factor
255 (setq token (ebnf-bnf-lex)))
256 (cond
257 ;; [factor] +
258 ((eq token 'one-or-more)
259 (cons (ebnf-bnf-lex)
260 (and factor
261 (let ((kind (ebnf-node-kind factor)))
262 (cond
263 ;; { A }+ + ==> { A }+
264 ;; { A }* + ==> { A }*
265 ((memq kind '(ebnf-generate-zero-or-more
266 ebnf-generate-one-or-more))
267 factor)
268 ;; [ A ] + ==> { A }*
269 ((eq kind 'ebnf-generate-optional)
270 (ebnf-make-zero-or-more (list factor)))
271 ;; A +
272 (t
273 (ebnf-make-one-or-more (list factor)))
274 )))))
275 ;; [factor] / [factor]
276 ((eq token 'list)
277 (setq token (ebnf-bnf-lex))
278 (let ((sep (ebnf-factor token)))
279 (and sep
280 (setq factor (or factor (ebnf-make-empty))))
281 (cons (if sep
282 (ebnf-bnf-lex)
283 token)
284 (and factor
285 (ebnf-make-one-or-more factor sep)))))
286 ;; factor
287 (t
288 (cons token factor))
289 )))
290
291
292;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
293;;; | [ "$" ] non_terminal ;; non-terminal
294;;; | [ "$" ] "?" special "?" ;; special
295;;; | "(" body ")" ;; group
296;;; | "[" body "]" ;; zero-or-one
297;;; | "{" body [ "||" body ] "}+" ;; one-or-more
298;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
299;;; | "{" body [ "||" body ] "}" ;; zero-or-more
300;;; .
301
302(defun ebnf-factor (token)
303 (cond
304 ;; terminal
305 ((eq token 'terminal)
306 (ebnf-make-terminal ebnf-bnf-lex))
307 ;; non-terminal
308 ((eq token 'non-terminal)
309 (ebnf-make-non-terminal ebnf-bnf-lex))
310 ;; special
311 ((eq token 'special)
312 (ebnf-make-special ebnf-bnf-lex))
313 ;; group
314 ((eq token 'begin-group)
315 (let ((body (ebnf-body)))
316 (or (eq (car body) 'end-group)
317 (error "Missing `)'."))
318 (cdr body)))
319 ;; optional
320 ((eq token 'begin-optional)
321 (let ((body (ebnf-body)))
322 (or (eq (car body) 'end-optional)
323 (error "Missing `]'."))
324 (ebnf-token-optional (cdr body))))
325 ;; list
326 ((eq token 'begin-list)
327 (let* ((body (ebnf-body))
328 (token (car body))
329 (list-part (cdr body))
330 sep-part)
331 (and (eq token 'list-separator)
332 ;; { A || B }
333 (setq body (ebnf-body) ; get separator
334 token (car body)
335 sep-part (cdr body)))
336 (cond
337 ;; { A }+
338 ((eq token 'end-one-or-more)
339 (ebnf-make-one-or-more list-part sep-part))
340 ;; { A }*
341 ((eq token 'end-zero-or-more)
342 (ebnf-make-zero-or-more list-part sep-part))
343 (t
344 (error "Missing `}+', `}*' or `}'."))
345 )))
346 ;; no term
347 (t
348 nil)
349 ))
350
351\f
352;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
353;; Lexical analyzer
354
355
356(defconst ebnf-bnf-token-table (make-vector 256 'error)
357 "Vector used to map characters to a lexical token.")
358
359
360(defun ebnf-bnf-initialize ()
361 "Initialize EBNF token table."
362 ;; control character & control 8-bit character are set to `error'
363 (let ((char ?\040))
364 ;; printable character:
365 (while (< char ?\060)
366 (aset ebnf-bnf-token-table char 'non-terminal)
367 (setq char (1+ char)))
368 ;; digits:
369 (while (< char ?\072)
370 (aset ebnf-bnf-token-table char 'integer)
371 (setq char (1+ char)))
372 ;; printable character:
373 (while (< char ?\177)
374 (aset ebnf-bnf-token-table char 'non-terminal)
375 (setq char (1+ char)))
376 ;; European 8-bit accentuated characters:
377 (setq char ?\240)
378 (while (< char ?\400)
379 (aset ebnf-bnf-token-table char 'non-terminal)
380 (setq char (1+ char)))
381 ;; Override space characters:
382 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
383 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
384 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
385 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
386 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
387 ;; Override form feed character:
388 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
389 ;; Override other lexical characters:
390 (aset ebnf-bnf-token-table ?\" 'terminal)
391 (aset ebnf-bnf-token-table ?\? 'special)
392 (aset ebnf-bnf-token-table ?\( 'begin-group)
393 (aset ebnf-bnf-token-table ?\) 'end-group)
394 (aset ebnf-bnf-token-table ?* 'repeat)
395 (aset ebnf-bnf-token-table ?- 'except)
396 (aset ebnf-bnf-token-table ?= 'equal)
397 (aset ebnf-bnf-token-table ?\[ 'begin-optional)
398 (aset ebnf-bnf-token-table ?\] 'end-optional)
399 (aset ebnf-bnf-token-table ?\{ 'begin-list)
400 (aset ebnf-bnf-token-table ?| 'alternative)
401 (aset ebnf-bnf-token-table ?\} 'end-list)
402 (aset ebnf-bnf-token-table ?/ 'list)
403 (aset ebnf-bnf-token-table ?+ 'one-or-more)
404 (aset ebnf-bnf-token-table ?$ 'default)
405 ;; Override comment character:
406 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
407 ;; Override end of production character:
408 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
409
410
411(defun ebnf-bnf-lex ()
412 "Lexical analyser for EBNF.
413
414Return a lexical token.
415
416See documentation for variable `ebnf-bnf-lex'."
417 (if (>= (point) ebnf-limit)
418 'end-of-input
419 (let (token)
420 ;; skip spaces and comments
421 (while (if (> (following-char) 255)
422 (progn
423 (setq token 'error)
424 nil)
425 (setq token (aref ebnf-bnf-token-table (following-char)))
426 (cond
427 ((eq token 'space)
428 (skip-chars-forward " \013\n\r\t" ebnf-limit)
429 (< (point) ebnf-limit))
430 ((eq token 'comment)
431 (ebnf-bnf-skip-comment))
432 ((eq token 'form-feed)
433 (forward-char)
434 (setq ebnf-action 'form-feed))
435 (t nil)
436 )))
437 (setq ebnf-default-p nil)
438 (cond
439 ;; end of input
440 ((>= (point) ebnf-limit)
441 'end-of-input)
442 ;; error
443 ((eq token 'error)
444 (error "Illegal character."))
445 ;; default
446 ((eq token 'default)
447 (forward-char)
448 (if (memq (aref ebnf-bnf-token-table (following-char))
449 '(terminal non-terminal special))
450 (prog1
451 (ebnf-bnf-lex)
452 (setq ebnf-default-p t))
453 (error "Illegal `default' element.")))
454 ;; integer
455 ((eq token 'integer)
456 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
457 'integer)
458 ;; special: ?special?
459 ((eq token 'special)
460 (setq ebnf-bnf-lex (concat "?"
461 (ebnf-string " ->@-~" ?\? "special")
462 "?"))
463 'special)
464 ;; terminal: "string"
465 ((eq token 'terminal)
466 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
467 'terminal)
468 ;; non-terminal or terminal
469 ((eq token 'non-terminal)
470 (setq ebnf-bnf-lex (ebnf-buffer-substring
471 "!#%&'*-,0-:<>@-Z\\^-z~\240-\377"))
472 (let ((case-fold-search ebnf-case-fold-search)
473 match)
474 (if (and ebnf-terminal-regexp
475 (setq match (string-match ebnf-terminal-regexp
476 ebnf-bnf-lex))
477 (zerop match)
478 (= (match-end 0) (length ebnf-bnf-lex)))
479 'terminal
480 'non-terminal)))
481 ;; end of list: }+, }*, }
482 ((eq token 'end-list)
483 (forward-char)
484 (cond
485 ((= (following-char) ?+)
486 (forward-char)
487 'end-one-or-more)
488 ((= (following-char) ?*)
489 (forward-char)
490 'end-zero-or-more)
491 (t
492 'end-zero-or-more)
493 ))
494 ;; alternative: |, ||
495 ((eq token 'alternative)
496 (forward-char)
497 (if (/= (following-char) ?|)
498 'alternative
499 (forward-char)
500 'list-separator))
501 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
502 (t
503 (forward-char)
504 token)
505 ))))
506
507
508(defconst ebnf-bnf-comment-chars "^\n\000-\010\016-\037\177-\237")
509
510
511(defun ebnf-bnf-skip-comment ()
512 (forward-char)
513 (cond
514 ;; open EPS file
515 ((and ebnf-eps-executing (= (following-char) ?\[))
516 (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
517 ;; close EPS file
518 ((and ebnf-eps-executing (= (following-char) ?\]))
519 (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
520 ;; any other action in comment
521 (t
522 (setq ebnf-action (aref ebnf-comment-table (following-char)))
523 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
524 )
525 ;; check for a valid end of comment
526 (cond ((>= (point) ebnf-limit)
527 nil)
528 ((= (following-char) ?\n)
529 (forward-char)
530 t)
531 (t
532 (error "Illegal character."))
533 ))
534
535
536(defun ebnf-bnf-eps-filename ()
537 (forward-char)
538 (ebnf-buffer-substring ebnf-bnf-comment-chars))
539
540
541(defun ebnf-unescape-string (str)
542 (let* ((len (length str))
543 (size (1- len))
544 (istr 0)
545 (n-esc 0))
546 ;; count number of escapes
547 (while (< istr size)
548 (setq istr (+ istr
549 (if (= (aref str istr) ?\\)
550 (progn
551 (setq n-esc (1+ n-esc))
552 2)
553 1))))
554 (if (zerop n-esc)
555 ;; no escapes
556 str
557 ;; at least one escape
558 (let ((new (make-string (- len n-esc) ?\ ))
559 (inew 0))
560 ;; eliminate all escapes
561 (setq istr 0)
562 (while (> n-esc 0)
563 (and (= (aref str istr) ?\\)
564 (setq istr (1+ istr)
565 n-esc (1- n-esc)))
566 (aset new inew (aref str istr))
567 (setq inew (1+ inew)
568 istr (1+ istr)))
569 ;; remaining string has no escape
570 (while (< istr len)
571 (aset new inew (aref str istr))
572 (setq inew (1+ inew)
573 istr (1+ istr)))
574 new))))
575
576\f
577;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
578
579
580(provide 'ebnf-bnf)
581
582
583;;; ebnf-bnf.el ends here