Remove some function declarations, no longer needed or correct
[bpt/emacs.git] / lisp / progmodes / ebnf-iso.el
CommitLineData
e8af40ee 1;;; ebnf-iso.el --- parser for ISO EBNF
984ae001 2
ba318903 3;; Copyright (C) 1999-2014 Free Software Foundation, Inc.
984ae001 4
ac4780a1
VJL
5;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
ae16d111 7;; Keywords: wp, ebnf, PostScript
12059709 8;; Old-Version: 1.9
bd78fa1d 9;; Package: ebnf2ps
984ae001 10
8d9ea7b1 11;; This file is part of GNU Emacs.
984ae001 12
b1fc2b50 13;; GNU Emacs is free software: you can redistribute it and/or modify
984ae001 14;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
984ae001 17
8d9ea7b1 18;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
b1fc2b50 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
984ae001
GM
25
26;;; Commentary:
27
28;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29;;
30;;
31;; This is part of ebnf2ps package.
32;;
33;; This package defines a parser for ISO EBNF.
34;;
35;; See ebnf2ps.el for documentation.
36;;
37;;
38;; ISO EBNF Syntax
39;; ---------------
40;;
41;; See the URL:
42;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
43;; ("International Standard of the ISO EBNF Notation").
44;;
45;;
46;; ISO EBNF = syntax rule, {syntax rule};
47;;
48;; syntax rule = meta identifier, '=', definition list, ';';
49;;
50;; definition list = single definition, {'|', single definition};
51;;
52;; single definition = term, {',', term};
53;;
54;; term = factor, ['-', exception];
55;;
56;; exception = factor (* without <meta identifier> *);
57;;
58;; factor = [integer, '*'], primary;
59;;
60;; primary = optional sequence | repeated sequence | special sequence
61;; | grouped sequence | meta identifier | terminal string
62;; | empty;
63;;
64;; empty = ;
65;;
66;; optional sequence = '[', definition list, ']';
67;;
68;; repeated sequence = '{', definition list, '}';
69;;
70;; grouped sequence = '(', definition list, ')';
71;;
72;; terminal string = "'", character - "'", {character - "'"}, "'"
73;; | '"', character - '"', {character - '"'}, '"';
74;;
75;; special sequence = '?', {character - '?'}, '?';
76;;
77;; meta identifier = letter, { letter | decimal digit | ' ' };
78;;
79;; integer = decimal digit, {decimal digit};
80;;
81;; comment = '(*', {comment symbol}, '*)';
82;;
83;; comment symbol = comment (* <== NESTED COMMENT *)
84;; | terminal string | special sequence | character;
85;;
86;; letter = ? A-Z a-z ?;
87;;
88;; decimal digit = ? 0-9 ?;
89;;
90;; character = letter | decimal digit
91;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
92;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
93;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
94;;
95;;
96;; There is also the following alternative representation:
97;;
98;; STANDARD ALTERNATIVE
99;; | ==> / or !
100;; [ ==> (/
101;; ] ==> /)
102;; { ==> (:
103;; } ==> :)
104;; ; ==> .
105;;
106;;
107;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
108;; -------------------------------------------------
109;;
110;; ISO EBNF accepts the characters given by <character> production above,
111;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
eac9c0ef 112;; (^L), any other characters are invalid. But ebnf2ps accepts also the
60df7255
VJL
113;; european 8-bit accentuated characters (from \240 to \377) and underscore
114;; (_).
984ae001
GM
115;;
116;;
117;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
118
e8af40ee 119;;; Code:
984ae001
GM
120
121
122(require 'ebnf-otz)
123
124
125(defvar ebnf-iso-lex nil
126 "Value returned by `ebnf-iso-lex' function.")
127
128
1002b9b5
VJL
129(defvar ebnf-no-meta-identifier nil
130 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
984ae001
GM
131
132\f
133;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f504d516 134;; Syntactic analyzer
984ae001
GM
135
136
137;;; ISO EBNF = syntax rule, {syntax rule};
138
139(defun ebnf-iso-parser (start)
140 "ISO EBNF parser."
141 (let ((total (+ (- ebnf-limit start) 1))
142 (bias (1- start))
143 (origin (point))
144 syntax-list token rule)
145 (goto-char start)
146 (setq token (ebnf-iso-lex))
147 (and (eq token 'end-of-input)
e8af40ee 148 (error "Invalid ISO EBNF file format"))
984ae001
GM
149 (while (not (eq token 'end-of-input))
150 (ebnf-message-float
151 "Parsing...%s%%"
152 (/ (* (- (point) bias) 100.0) total))
153 (setq token (ebnf-iso-syntax-rule token)
154 rule (cdr token)
155 token (car token))
156 (or (ebnf-add-empty-rule-list rule)
157 (setq syntax-list (cons rule syntax-list))))
158 (goto-char origin)
159 syntax-list))
160
161
162;;; syntax rule = meta identifier, '=', definition list, ';';
163
164(defun ebnf-iso-syntax-rule (token)
165 (let ((header ebnf-iso-lex)
166 (action ebnf-action)
167 body)
168 (setq ebnf-action nil)
169 (or (eq token 'non-terminal)
e8af40ee 170 (error "Invalid meta identifier syntax rule"))
984ae001 171 (or (eq (ebnf-iso-lex) 'equal)
e8af40ee 172 (error "Invalid syntax rule: missing `='"))
984ae001
GM
173 (setq body (ebnf-iso-definition-list))
174 (or (eq (car body) 'period)
e8af40ee 175 (error "Invalid syntax rule: missing `;' or `.'"))
984ae001
GM
176 (setq body (cdr body))
177 (ebnf-eps-add-production header)
178 (cons (ebnf-iso-lex)
179 (ebnf-make-production header body action))))
180
181
182;;; definition list = single definition, {'|', single definition};
183
184(defun ebnf-iso-definition-list ()
185 (let (body sequence)
186 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
187 'alternative)
188 (setq sequence (cdr sequence)
189 body (cons sequence body)))
190 (ebnf-token-alternative body sequence)))
191
192
193;;; single definition = term, {',', term};
194
195(defun ebnf-iso-single-definition ()
196 (let (token seq term)
197 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
198 token (car term)
199 term (cdr term))
200 (eq token 'catenate))
201 (setq seq (cons term seq)))
202 (cons token
6411a60a
VJL
203 (ebnf-token-sequence (if term
204 (cons term seq)
205 seq)))))
984ae001
GM
206
207
208;;; term = factor, ['-', exception];
209;;;
210;;; exception = factor (* without <meta identifier> *);
211
212(defun ebnf-iso-term (token)
213 (let ((factor (ebnf-iso-factor token)))
214 (if (not (eq (car factor) 'except))
215 ;; factor
216 factor
217 ;; factor - exception
218 (let ((ebnf-no-meta-identifier t))
219 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
220
221
222;;; factor = [integer, '*'], primary;
223
224(defun ebnf-iso-factor (token)
225 (if (eq token 'integer)
226 (let ((times ebnf-iso-lex))
227 (or (eq (ebnf-iso-lex) 'repeat)
e8af40ee 228 (error "Missing `*'"))
984ae001
GM
229 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
230 (ebnf-iso-primary token)))
231
232
233;;; primary = optional sequence | repeated sequence | special sequence
234;;; | grouped sequence | meta identifier | terminal string
235;;; | empty;
236;;;
237;;; empty = ;
238;;;
239;;; optional sequence = '[', definition list, ']';
240;;;
241;;; repeated sequence = '{', definition list, '}';
242;;;
243;;; grouped sequence = '(', definition list, ')';
244;;;
245;;; terminal string = "'", character - "'", {character - "'"}, "'"
246;;; | '"', character - '"', {character - '"'}, '"';
247;;;
248;;; special sequence = '?', {character - '?'}, '?';
249;;;
250;;; meta identifier = letter, {letter | decimal digit};
251
252(defun ebnf-iso-primary (token)
253 (let ((primary
254 (cond
255 ;; terminal string
256 ((eq token 'terminal)
257 (ebnf-make-terminal ebnf-iso-lex))
258 ;; meta identifier
259 ((eq token 'non-terminal)
260 (ebnf-make-non-terminal ebnf-iso-lex))
261 ;; special sequence
262 ((eq token 'special)
263 (ebnf-make-special ebnf-iso-lex))
264 ;; grouped sequence
265 ((eq token 'begin-group)
266 (let ((body (ebnf-iso-definition-list)))
267 (or (eq (car body) 'end-group)
e8af40ee 268 (error "Missing `)'"))
984ae001
GM
269 (cdr body)))
270 ;; optional sequence
271 ((eq token 'begin-optional)
272 (let ((body (ebnf-iso-definition-list)))
273 (or (eq (car body) 'end-optional)
e8af40ee 274 (error "Missing `]' or `/)'"))
984ae001
GM
275 (ebnf-token-optional (cdr body))))
276 ;; repeated sequence
277 ((eq token 'begin-zero-or-more)
278 (let* ((body (ebnf-iso-definition-list))
279 (repeat (cdr body)))
280 (or (eq (car body) 'end-zero-or-more)
e8af40ee 281 (error "Missing `}' or `:)'"))
984ae001
GM
282 (ebnf-make-zero-or-more repeat)))
283 ;; empty
284 (t
285 nil)
286 )))
287 (cons (if primary
288 (ebnf-iso-lex)
289 token)
290 primary)))
291
292\f
293;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
294;; Lexical analyzer
295
296
297(defconst ebnf-iso-token-table
298 ;; control character & 8-bit character are set to `error'
299 (let ((table (make-vector 256 'error))
300 (char ?\040))
301 ;; printable character
302 (while (< char ?\060)
303 (aset table char 'character)
304 (setq char (1+ char)))
305 ;; digits:
306 (while (< char ?\072)
307 (aset table char 'integer)
308 (setq char (1+ char)))
309 (while (< char ?\101)
310 (aset table char 'character)
311 (setq char (1+ char)))
312 ;; upper case letters:
313 (while (< char ?\133)
314 (aset table char 'non-terminal)
315 (setq char (1+ char)))
316 (while (< char ?\141)
317 (aset table char 'character)
318 (setq char (1+ char)))
319 ;; lower case letters:
320 (while (< char ?\173)
321 (aset table char 'non-terminal)
322 (setq char (1+ char)))
323 (while (< char ?\177)
324 (aset table char 'character)
325 (setq char (1+ char)))
326 ;; European 8-bit accentuated characters:
327 (setq char ?\240)
328 (while (< char ?\400)
329 (aset table char 'non-terminal)
330 (setq char (1+ char)))
331 ;; Override space characters:
332 (aset table ?\013 'space) ; [VT] vertical tab
333 (aset table ?\n 'space) ; [NL] linefeed
334 (aset table ?\r 'space) ; [CR] carriage return
335 (aset table ?\t 'space) ; [HT] horizontal tab
336 (aset table ?\ 'space) ; [SP] space
337 ;; Override form feed character:
338 (aset table ?\f 'form-feed) ; [FF] form feed
339 ;; Override other lexical characters:
ac4780a1 340 (aset table ?_ 'non-terminal)
984ae001
GM
341 (aset table ?\" 'double-terminal)
342 (aset table ?\' 'single-terminal)
343 (aset table ?\? 'special)
344 (aset table ?* 'repeat)
345 (aset table ?, 'catenate)
346 (aset table ?- 'except)
347 (aset table ?= 'equal)
348 (aset table ?\) 'end-group)
349 table)
350 "Vector used to map characters to a lexical token.")
351
352
353(defun ebnf-iso-initialize ()
354 "Initialize ISO EBNF token table."
355 (if ebnf-iso-alternative-p
356 ;; Override alternative lexical characters:
357 (progn
358 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
359 (aset ebnf-iso-token-table ?\[ 'character)
360 (aset ebnf-iso-token-table ?\] 'character)
361 (aset ebnf-iso-token-table ?\{ 'character)
362 (aset ebnf-iso-token-table ?\} 'character)
363 (aset ebnf-iso-token-table ?| 'character)
364 (aset ebnf-iso-token-table ?\; 'character)
365 (aset ebnf-iso-token-table ?/ 'slash)
366 (aset ebnf-iso-token-table ?! 'alternative)
367 (aset ebnf-iso-token-table ?: 'colon)
368 (aset ebnf-iso-token-table ?. 'period))
369 ;; Override standard lexical characters:
370 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
371 (aset ebnf-iso-token-table ?\[ 'begin-optional)
372 (aset ebnf-iso-token-table ?\] 'end-optional)
373 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
374 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
375 (aset ebnf-iso-token-table ?| 'alternative)
376 (aset ebnf-iso-token-table ?\; 'period)
377 (aset ebnf-iso-token-table ?/ 'character)
378 (aset ebnf-iso-token-table ?! 'character)
379 (aset ebnf-iso-token-table ?: 'character)
380 (aset ebnf-iso-token-table ?. 'character)))
381
382
8a1e4eeb
GM
383;; replace the range "\240-\377" (see `ebnf-range-regexp').
384(defconst ebnf-iso-non-terminal-chars
ac4780a1 385 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
8a1e4eeb
GM
386
387
984ae001 388(defun ebnf-iso-lex ()
fc6e6963 389 "Lexical analyzer for ISO EBNF.
984ae001
GM
390
391Return a lexical token.
392
393See documentation for variable `ebnf-iso-lex'."
394 (if (>= (point) ebnf-limit)
395 'end-of-input
396 (let (token)
397 ;; skip spaces and comments
398 (while (if (> (following-char) 255)
399 (progn
400 (setq token 'error)
401 nil)
402 (setq token (aref ebnf-iso-token-table (following-char)))
403 (cond
404 ((eq token 'space)
405 (skip-chars-forward " \013\n\r\t" ebnf-limit)
406 (< (point) ebnf-limit))
407 ((or (eq token 'begin-parenthesis)
408 (eq token 'left-parenthesis))
409 (forward-char)
410 (if (/= (following-char) ?*)
411 ;; no comment
412 nil
413 ;; comment
414 (ebnf-iso-skip-comment)
415 t))
416 ((eq token 'form-feed)
417 (forward-char)
418 (setq ebnf-action 'form-feed))
419 (t nil)
420 )))
421 (cond
422 ;; end of input
423 ((>= (point) ebnf-limit)
424 'end-of-input)
425 ;; error
426 ((eq token 'error)
eac9c0ef 427 (error "Invalid character"))
984ae001
GM
428 ;; integer
429 ((eq token 'integer)
430 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
431 'integer)
432 ;; special: ?special?
433 ((eq token 'special)
ac4780a1 434 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
984ae001 435 (ebnf-string " ->@-~" ?\? "special")
ac4780a1 436 (and ebnf-special-show-delimiter "?")))
984ae001
GM
437 'special)
438 ;; terminal: "string"
439 ((eq token 'double-terminal)
440 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
441 'terminal)
442 ;; terminal: 'string'
443 ((eq token 'single-terminal)
444 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
445 'terminal)
446 ;; non-terminal
447 ((eq token 'non-terminal)
8a1e4eeb
GM
448 (setq ebnf-iso-lex
449 (ebnf-iso-normalize
450 (ebnf-trim-right
451 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
984ae001 452 (and ebnf-no-meta-identifier
e8af40ee 453 (error "Exception sequence should not contain a meta identifier"))
984ae001
GM
454 'non-terminal)
455 ;; begin optional, begin list or begin group
456 ((eq token 'left-parenthesis)
457 (forward-char)
458 (cond ((= (following-char) ?/)
459 (forward-char)
460 'begin-optional)
461 ((= (following-char) ?:)
462 (forward-char)
463 'begin-zero-or-more)
464 (t
465 'begin-group)
466 ))
467 ;; end optional or alternative
468 ((eq token 'slash)
469 (forward-char)
470 (if (/= (following-char) ?\))
471 'alternative
472 (forward-char)
473 'end-optional))
474 ;; end list
475 ((eq token 'colon)
476 (forward-char)
477 (if (/= (following-char) ?\))
478 'character
479 (forward-char)
480 'end-zero-or-more))
481 ;; begin group
482 ((eq token 'begin-parenthesis)
483 'begin-group)
484 ;; miscellaneous
485 (t
486 (forward-char)
487 token)
488 ))))
489
490
2197ec3b
GM
491;; replace the range "\177-\237" (see `ebnf-range-regexp').
492(defconst ebnf-iso-comment-chars
493 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
984ae001
GM
494
495
496(defun ebnf-iso-skip-comment ()
497 (forward-char)
498 (cond
499 ;; open EPS file
500 ((and ebnf-eps-executing (= (following-char) ?\[))
501 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
502 ;; close EPS file
503 ((and ebnf-eps-executing (= (following-char) ?\]))
504 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
3ced5caa
VJL
505 ;; EPS header
506 ((and ebnf-eps-executing (= (following-char) ?H))
507 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
508 ;; EPS footer
509 ((and ebnf-eps-executing (= (following-char) ?F))
510 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
984ae001
GM
511 ;; any other action in comment
512 (t
513 (setq ebnf-action (aref ebnf-comment-table (following-char))))
514 )
515 (let ((pair 1))
516 (while (> pair 0)
517 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
518 (cond ((>= (point) ebnf-limit)
e8af40ee 519 (error "Missing end of comment: `*)'"))
984ae001
GM
520 ((= (following-char) ?*)
521 (skip-chars-forward "*" ebnf-limit)
522 (when (= (following-char) ?\))
523 ;; end of comment
524 (forward-char)
525 (setq pair (1- pair))))
526 ((= (following-char) ?\()
527 (skip-chars-forward "(" ebnf-limit)
528 (when (= (following-char) ?*)
529 ;; beginning of comment
530 (forward-char)
531 (setq pair (1+ pair))))
532 (t
eac9c0ef 533 (error "Invalid character"))
984ae001
GM
534 ))))
535
536
537(defun ebnf-iso-eps-filename ()
538 (forward-char)
539 (buffer-substring-no-properties
540 (point)
541 (let ((chars (concat ebnf-iso-comment-chars "\n"))
542 found)
543 (while (not found)
544 (skip-chars-forward chars ebnf-limit)
545 (setq found
546 (cond ((>= (point) ebnf-limit)
547 (point))
548 ((= (following-char) ?*)
549 (skip-chars-forward "*" ebnf-limit)
550 (if (/= (following-char) ?\))
551 nil
552 (backward-char)
553 (point)))
554 ((= (following-char) ?\()
555 (forward-char)
556 (if (/= (following-char) ?*)
557 nil
558 (backward-char)
559 (point)))
560 (t
561 (point))
562 )))
563 found)))
564
565
566(defun ebnf-iso-normalize (str)
567 (if (not ebnf-iso-normalize-p)
568 str
569 (let ((len (length str))
570 (stri 0)
571 (spaces 0))
572 ;; count exceeding spaces
573 (while (< stri len)
574 (if (/= (aref str stri) ?\ )
575 (setq stri (1+ stri))
576 (setq stri (1+ stri))
577 (while (and (< stri len) (= (aref str stri) ?\ ))
578 (setq stri (1+ stri)
579 spaces (1+ spaces)))))
580 (if (zerop spaces)
581 ;; no exceeding space
582 str
583 ;; at least one exceeding space
584 (let ((new (make-string (- len spaces) ?\ ))
585 (newi 0))
586 ;; eliminate exceeding spaces
587 (setq stri 0)
588 (while (> spaces 0)
589 (if (/= (aref str stri) ?\ )
590 (progn
591 (aset new newi (aref str stri))
592 (setq stri (1+ stri)
593 newi (1+ newi)))
594 (aset new newi (aref str stri))
595 (setq stri (1+ stri)
596 newi (1+ newi))
597 (while (and (> spaces 0) (= (aref str stri) ?\ ))
598 (setq stri (1+ stri)
599 spaces (1- spaces)))))
600 ;; remaining is normalized
601 (while (< stri len)
602 (aset new newi (aref str stri))
603 (setq stri (1+ stri)
604 newi (1+ newi)))
605 new)))))
606
607\f
608;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
609
610
611(provide 'ebnf-iso)
612
613
614;;; ebnf-iso.el ends here