Fix up comment convention on the arch-tag lines.
[bpt/emacs.git] / lisp / progmodes / ebnf-iso.el
CommitLineData
e8af40ee 1;;; ebnf-iso.el --- parser for ISO EBNF
984ae001 2
4e643dd2 3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
3ced5caa 4;; Free Software Foundation, Inc.
984ae001 5
ac4780a1
VJL
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
ae16d111 8;; Keywords: wp, ebnf, PostScript
3ced5caa 9;; Version: 1.9
984ae001 10
8d9ea7b1 11;; This file is part of GNU Emacs.
984ae001 12
8d9ea7b1 13;; GNU Emacs is free software; you can redistribute it and/or modify
984ae001 14;; it under the terms of the GNU General Public License as published by
1a484753 15;; the Free Software Foundation; either version 3, or (at your option)
984ae001
GM
16;; any later version.
17
8d9ea7b1 18;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26;; Boston, MA 02110-1301, USA.
984ae001
GM
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for ISO EBNF.
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; ISO EBNF Syntax
41;; ---------------
42;;
43;; See the URL:
44;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
45;; ("International Standard of the ISO EBNF Notation").
46;;
47;;
48;; ISO EBNF = syntax rule, {syntax rule};
49;;
50;; syntax rule = meta identifier, '=', definition list, ';';
51;;
52;; definition list = single definition, {'|', single definition};
53;;
54;; single definition = term, {',', term};
55;;
56;; term = factor, ['-', exception];
57;;
58;; exception = factor (* without <meta identifier> *);
59;;
60;; factor = [integer, '*'], primary;
61;;
62;; primary = optional sequence | repeated sequence | special sequence
63;; | grouped sequence | meta identifier | terminal string
64;; | empty;
65;;
66;; empty = ;
67;;
68;; optional sequence = '[', definition list, ']';
69;;
70;; repeated sequence = '{', definition list, '}';
71;;
72;; grouped sequence = '(', definition list, ')';
73;;
74;; terminal string = "'", character - "'", {character - "'"}, "'"
75;; | '"', character - '"', {character - '"'}, '"';
76;;
77;; special sequence = '?', {character - '?'}, '?';
78;;
79;; meta identifier = letter, { letter | decimal digit | ' ' };
80;;
81;; integer = decimal digit, {decimal digit};
82;;
83;; comment = '(*', {comment symbol}, '*)';
84;;
85;; comment symbol = comment (* <== NESTED COMMENT *)
86;; | terminal string | special sequence | character;
87;;
88;; letter = ? A-Z a-z ?;
89;;
90;; decimal digit = ? 0-9 ?;
91;;
92;; character = letter | decimal digit
93;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
94;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
95;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96;;
97;;
98;; There is also the following alternative representation:
99;;
100;; STANDARD ALTERNATIVE
101;; | ==> / or !
102;; [ ==> (/
103;; ] ==> /)
104;; { ==> (:
105;; } ==> :)
106;; ; ==> .
107;;
108;;
109;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
110;; -------------------------------------------------
111;;
112;; ISO EBNF accepts the characters given by <character> production above,
113;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
eac9c0ef 114;; (^L), any other characters are invalid. But ebnf2ps accepts also the
60df7255
VJL
115;; european 8-bit accentuated characters (from \240 to \377) and underscore
116;; (_).
984ae001
GM
117;;
118;;
119;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
120
e8af40ee 121;;; Code:
984ae001
GM
122
123
124(require 'ebnf-otz)
125
126
127(defvar ebnf-iso-lex nil
128 "Value returned by `ebnf-iso-lex' function.")
129
130
1002b9b5
VJL
131(defvar ebnf-no-meta-identifier nil
132 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
984ae001
GM
133
134\f
135;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f504d516 136;; Syntactic analyzer
984ae001
GM
137
138
139;;; ISO EBNF = syntax rule, {syntax rule};
140
141(defun ebnf-iso-parser (start)
142 "ISO EBNF parser."
143 (let ((total (+ (- ebnf-limit start) 1))
144 (bias (1- start))
145 (origin (point))
146 syntax-list token rule)
147 (goto-char start)
148 (setq token (ebnf-iso-lex))
149 (and (eq token 'end-of-input)
e8af40ee 150 (error "Invalid ISO EBNF file format"))
984ae001
GM
151 (while (not (eq token 'end-of-input))
152 (ebnf-message-float
153 "Parsing...%s%%"
154 (/ (* (- (point) bias) 100.0) total))
155 (setq token (ebnf-iso-syntax-rule token)
156 rule (cdr token)
157 token (car token))
158 (or (ebnf-add-empty-rule-list rule)
159 (setq syntax-list (cons rule syntax-list))))
160 (goto-char origin)
161 syntax-list))
162
163
164;;; syntax rule = meta identifier, '=', definition list, ';';
165
166(defun ebnf-iso-syntax-rule (token)
167 (let ((header ebnf-iso-lex)
168 (action ebnf-action)
169 body)
170 (setq ebnf-action nil)
171 (or (eq token 'non-terminal)
e8af40ee 172 (error "Invalid meta identifier syntax rule"))
984ae001 173 (or (eq (ebnf-iso-lex) 'equal)
e8af40ee 174 (error "Invalid syntax rule: missing `='"))
984ae001
GM
175 (setq body (ebnf-iso-definition-list))
176 (or (eq (car body) 'period)
e8af40ee 177 (error "Invalid syntax rule: missing `;' or `.'"))
984ae001
GM
178 (setq body (cdr body))
179 (ebnf-eps-add-production header)
180 (cons (ebnf-iso-lex)
181 (ebnf-make-production header body action))))
182
183
184;;; definition list = single definition, {'|', single definition};
185
186(defun ebnf-iso-definition-list ()
187 (let (body sequence)
188 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
189 'alternative)
190 (setq sequence (cdr sequence)
191 body (cons sequence body)))
192 (ebnf-token-alternative body sequence)))
193
194
195;;; single definition = term, {',', term};
196
197(defun ebnf-iso-single-definition ()
198 (let (token seq term)
199 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
200 token (car term)
201 term (cdr term))
202 (eq token 'catenate))
203 (setq seq (cons term seq)))
204 (cons token
6411a60a
VJL
205 (ebnf-token-sequence (if term
206 (cons term seq)
207 seq)))))
984ae001
GM
208
209
210;;; term = factor, ['-', exception];
211;;;
212;;; exception = factor (* without <meta identifier> *);
213
214(defun ebnf-iso-term (token)
215 (let ((factor (ebnf-iso-factor token)))
216 (if (not (eq (car factor) 'except))
217 ;; factor
218 factor
219 ;; factor - exception
220 (let ((ebnf-no-meta-identifier t))
221 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
222
223
224;;; factor = [integer, '*'], primary;
225
226(defun ebnf-iso-factor (token)
227 (if (eq token 'integer)
228 (let ((times ebnf-iso-lex))
229 (or (eq (ebnf-iso-lex) 'repeat)
e8af40ee 230 (error "Missing `*'"))
984ae001
GM
231 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
232 (ebnf-iso-primary token)))
233
234
235;;; primary = optional sequence | repeated sequence | special sequence
236;;; | grouped sequence | meta identifier | terminal string
237;;; | empty;
238;;;
239;;; empty = ;
240;;;
241;;; optional sequence = '[', definition list, ']';
242;;;
243;;; repeated sequence = '{', definition list, '}';
244;;;
245;;; grouped sequence = '(', definition list, ')';
246;;;
247;;; terminal string = "'", character - "'", {character - "'"}, "'"
248;;; | '"', character - '"', {character - '"'}, '"';
249;;;
250;;; special sequence = '?', {character - '?'}, '?';
251;;;
252;;; meta identifier = letter, {letter | decimal digit};
253
254(defun ebnf-iso-primary (token)
255 (let ((primary
256 (cond
257 ;; terminal string
258 ((eq token 'terminal)
259 (ebnf-make-terminal ebnf-iso-lex))
260 ;; meta identifier
261 ((eq token 'non-terminal)
262 (ebnf-make-non-terminal ebnf-iso-lex))
263 ;; special sequence
264 ((eq token 'special)
265 (ebnf-make-special ebnf-iso-lex))
266 ;; grouped sequence
267 ((eq token 'begin-group)
268 (let ((body (ebnf-iso-definition-list)))
269 (or (eq (car body) 'end-group)
e8af40ee 270 (error "Missing `)'"))
984ae001
GM
271 (cdr body)))
272 ;; optional sequence
273 ((eq token 'begin-optional)
274 (let ((body (ebnf-iso-definition-list)))
275 (or (eq (car body) 'end-optional)
e8af40ee 276 (error "Missing `]' or `/)'"))
984ae001
GM
277 (ebnf-token-optional (cdr body))))
278 ;; repeated sequence
279 ((eq token 'begin-zero-or-more)
280 (let* ((body (ebnf-iso-definition-list))
281 (repeat (cdr body)))
282 (or (eq (car body) 'end-zero-or-more)
e8af40ee 283 (error "Missing `}' or `:)'"))
984ae001
GM
284 (ebnf-make-zero-or-more repeat)))
285 ;; empty
286 (t
287 nil)
288 )))
289 (cons (if primary
290 (ebnf-iso-lex)
291 token)
292 primary)))
293
294\f
295;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
296;; Lexical analyzer
297
298
299(defconst ebnf-iso-token-table
300 ;; control character & 8-bit character are set to `error'
301 (let ((table (make-vector 256 'error))
302 (char ?\040))
303 ;; printable character
304 (while (< char ?\060)
305 (aset table char 'character)
306 (setq char (1+ char)))
307 ;; digits:
308 (while (< char ?\072)
309 (aset table char 'integer)
310 (setq char (1+ char)))
311 (while (< char ?\101)
312 (aset table char 'character)
313 (setq char (1+ char)))
314 ;; upper case letters:
315 (while (< char ?\133)
316 (aset table char 'non-terminal)
317 (setq char (1+ char)))
318 (while (< char ?\141)
319 (aset table char 'character)
320 (setq char (1+ char)))
321 ;; lower case letters:
322 (while (< char ?\173)
323 (aset table char 'non-terminal)
324 (setq char (1+ char)))
325 (while (< char ?\177)
326 (aset table char 'character)
327 (setq char (1+ char)))
328 ;; European 8-bit accentuated characters:
329 (setq char ?\240)
330 (while (< char ?\400)
331 (aset table char 'non-terminal)
332 (setq char (1+ char)))
333 ;; Override space characters:
334 (aset table ?\013 'space) ; [VT] vertical tab
335 (aset table ?\n 'space) ; [NL] linefeed
336 (aset table ?\r 'space) ; [CR] carriage return
337 (aset table ?\t 'space) ; [HT] horizontal tab
338 (aset table ?\ 'space) ; [SP] space
339 ;; Override form feed character:
340 (aset table ?\f 'form-feed) ; [FF] form feed
341 ;; Override other lexical characters:
ac4780a1 342 (aset table ?_ 'non-terminal)
984ae001
GM
343 (aset table ?\" 'double-terminal)
344 (aset table ?\' 'single-terminal)
345 (aset table ?\? 'special)
346 (aset table ?* 'repeat)
347 (aset table ?, 'catenate)
348 (aset table ?- 'except)
349 (aset table ?= 'equal)
350 (aset table ?\) 'end-group)
351 table)
352 "Vector used to map characters to a lexical token.")
353
354
355(defun ebnf-iso-initialize ()
356 "Initialize ISO EBNF token table."
357 (if ebnf-iso-alternative-p
358 ;; Override alternative lexical characters:
359 (progn
360 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
361 (aset ebnf-iso-token-table ?\[ 'character)
362 (aset ebnf-iso-token-table ?\] 'character)
363 (aset ebnf-iso-token-table ?\{ 'character)
364 (aset ebnf-iso-token-table ?\} 'character)
365 (aset ebnf-iso-token-table ?| 'character)
366 (aset ebnf-iso-token-table ?\; 'character)
367 (aset ebnf-iso-token-table ?/ 'slash)
368 (aset ebnf-iso-token-table ?! 'alternative)
369 (aset ebnf-iso-token-table ?: 'colon)
370 (aset ebnf-iso-token-table ?. 'period))
371 ;; Override standard lexical characters:
372 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
373 (aset ebnf-iso-token-table ?\[ 'begin-optional)
374 (aset ebnf-iso-token-table ?\] 'end-optional)
375 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
376 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
377 (aset ebnf-iso-token-table ?| 'alternative)
378 (aset ebnf-iso-token-table ?\; 'period)
379 (aset ebnf-iso-token-table ?/ 'character)
380 (aset ebnf-iso-token-table ?! 'character)
381 (aset ebnf-iso-token-table ?: 'character)
382 (aset ebnf-iso-token-table ?. 'character)))
383
384
8a1e4eeb
GM
385;; replace the range "\240-\377" (see `ebnf-range-regexp').
386(defconst ebnf-iso-non-terminal-chars
ac4780a1 387 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
8a1e4eeb
GM
388
389
984ae001 390(defun ebnf-iso-lex ()
fc6e6963 391 "Lexical analyzer for ISO EBNF.
984ae001
GM
392
393Return a lexical token.
394
395See documentation for variable `ebnf-iso-lex'."
396 (if (>= (point) ebnf-limit)
397 'end-of-input
398 (let (token)
399 ;; skip spaces and comments
400 (while (if (> (following-char) 255)
401 (progn
402 (setq token 'error)
403 nil)
404 (setq token (aref ebnf-iso-token-table (following-char)))
405 (cond
406 ((eq token 'space)
407 (skip-chars-forward " \013\n\r\t" ebnf-limit)
408 (< (point) ebnf-limit))
409 ((or (eq token 'begin-parenthesis)
410 (eq token 'left-parenthesis))
411 (forward-char)
412 (if (/= (following-char) ?*)
413 ;; no comment
414 nil
415 ;; comment
416 (ebnf-iso-skip-comment)
417 t))
418 ((eq token 'form-feed)
419 (forward-char)
420 (setq ebnf-action 'form-feed))
421 (t nil)
422 )))
423 (cond
424 ;; end of input
425 ((>= (point) ebnf-limit)
426 'end-of-input)
427 ;; error
428 ((eq token 'error)
eac9c0ef 429 (error "Invalid character"))
984ae001
GM
430 ;; integer
431 ((eq token 'integer)
432 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
433 'integer)
434 ;; special: ?special?
435 ((eq token 'special)
ac4780a1 436 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
984ae001 437 (ebnf-string " ->@-~" ?\? "special")
ac4780a1 438 (and ebnf-special-show-delimiter "?")))
984ae001
GM
439 'special)
440 ;; terminal: "string"
441 ((eq token 'double-terminal)
442 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
443 'terminal)
444 ;; terminal: 'string'
445 ((eq token 'single-terminal)
446 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
447 'terminal)
448 ;; non-terminal
449 ((eq token 'non-terminal)
8a1e4eeb
GM
450 (setq ebnf-iso-lex
451 (ebnf-iso-normalize
452 (ebnf-trim-right
453 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
984ae001 454 (and ebnf-no-meta-identifier
e8af40ee 455 (error "Exception sequence should not contain a meta identifier"))
984ae001
GM
456 'non-terminal)
457 ;; begin optional, begin list or begin group
458 ((eq token 'left-parenthesis)
459 (forward-char)
460 (cond ((= (following-char) ?/)
461 (forward-char)
462 'begin-optional)
463 ((= (following-char) ?:)
464 (forward-char)
465 'begin-zero-or-more)
466 (t
467 'begin-group)
468 ))
469 ;; end optional or alternative
470 ((eq token 'slash)
471 (forward-char)
472 (if (/= (following-char) ?\))
473 'alternative
474 (forward-char)
475 'end-optional))
476 ;; end list
477 ((eq token 'colon)
478 (forward-char)
479 (if (/= (following-char) ?\))
480 'character
481 (forward-char)
482 'end-zero-or-more))
483 ;; begin group
484 ((eq token 'begin-parenthesis)
485 'begin-group)
486 ;; miscellaneous
487 (t
488 (forward-char)
489 token)
490 ))))
491
492
2197ec3b
GM
493;; replace the range "\177-\237" (see `ebnf-range-regexp').
494(defconst ebnf-iso-comment-chars
495 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
984ae001
GM
496
497
498(defun ebnf-iso-skip-comment ()
499 (forward-char)
500 (cond
501 ;; open EPS file
502 ((and ebnf-eps-executing (= (following-char) ?\[))
503 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
504 ;; close EPS file
505 ((and ebnf-eps-executing (= (following-char) ?\]))
506 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
3ced5caa
VJL
507 ;; EPS header
508 ((and ebnf-eps-executing (= (following-char) ?H))
509 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
510 ;; EPS footer
511 ((and ebnf-eps-executing (= (following-char) ?F))
512 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
984ae001
GM
513 ;; any other action in comment
514 (t
515 (setq ebnf-action (aref ebnf-comment-table (following-char))))
516 )
517 (let ((pair 1))
518 (while (> pair 0)
519 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
520 (cond ((>= (point) ebnf-limit)
e8af40ee 521 (error "Missing end of comment: `*)'"))
984ae001
GM
522 ((= (following-char) ?*)
523 (skip-chars-forward "*" ebnf-limit)
524 (when (= (following-char) ?\))
525 ;; end of comment
526 (forward-char)
527 (setq pair (1- pair))))
528 ((= (following-char) ?\()
529 (skip-chars-forward "(" ebnf-limit)
530 (when (= (following-char) ?*)
531 ;; beginning of comment
532 (forward-char)
533 (setq pair (1+ pair))))
534 (t
eac9c0ef 535 (error "Invalid character"))
984ae001
GM
536 ))))
537
538
539(defun ebnf-iso-eps-filename ()
540 (forward-char)
541 (buffer-substring-no-properties
542 (point)
543 (let ((chars (concat ebnf-iso-comment-chars "\n"))
544 found)
545 (while (not found)
546 (skip-chars-forward chars ebnf-limit)
547 (setq found
548 (cond ((>= (point) ebnf-limit)
549 (point))
550 ((= (following-char) ?*)
551 (skip-chars-forward "*" ebnf-limit)
552 (if (/= (following-char) ?\))
553 nil
554 (backward-char)
555 (point)))
556 ((= (following-char) ?\()
557 (forward-char)
558 (if (/= (following-char) ?*)
559 nil
560 (backward-char)
561 (point)))
562 (t
563 (point))
564 )))
565 found)))
566
567
568(defun ebnf-iso-normalize (str)
569 (if (not ebnf-iso-normalize-p)
570 str
571 (let ((len (length str))
572 (stri 0)
573 (spaces 0))
574 ;; count exceeding spaces
575 (while (< stri len)
576 (if (/= (aref str stri) ?\ )
577 (setq stri (1+ stri))
578 (setq stri (1+ stri))
579 (while (and (< stri len) (= (aref str stri) ?\ ))
580 (setq stri (1+ stri)
581 spaces (1+ spaces)))))
582 (if (zerop spaces)
583 ;; no exceeding space
584 str
585 ;; at least one exceeding space
586 (let ((new (make-string (- len spaces) ?\ ))
587 (newi 0))
588 ;; eliminate exceeding spaces
589 (setq stri 0)
590 (while (> spaces 0)
591 (if (/= (aref str stri) ?\ )
592 (progn
593 (aset new newi (aref str stri))
594 (setq stri (1+ stri)
595 newi (1+ newi)))
596 (aset new newi (aref str stri))
597 (setq stri (1+ stri)
598 newi (1+ newi))
599 (while (and (> spaces 0) (= (aref str stri) ?\ ))
600 (setq stri (1+ stri)
601 spaces (1- spaces)))))
602 ;; remaining is normalized
603 (while (< stri len)
604 (aset new newi (aref str stri))
605 (setq stri (1+ stri)
606 newi (1+ newi)))
607 new)))))
608
609\f
610;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
611
612
613(provide 'ebnf-iso)
614
615
cbee283d 616;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
984ae001 617;;; ebnf-iso.el ends here