Convert consecutive FSF copyright years to ranges.
[bpt/emacs.git] / lisp / progmodes / ebnf-iso.el
CommitLineData
e8af40ee 1;;; ebnf-iso.el --- parser for ISO EBNF
984ae001 2
73b0cd50 3;; Copyright (C) 1999-2011
3ced5caa 4;; Free Software Foundation, Inc.
984ae001 5
ac4780a1
VJL
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
ae16d111 8;; Keywords: wp, ebnf, PostScript
3ced5caa 9;; Version: 1.9
bd78fa1d 10;; Package: ebnf2ps
984ae001 11
8d9ea7b1 12;; This file is part of GNU Emacs.
984ae001 13
b1fc2b50 14;; GNU Emacs is free software: you can redistribute it and/or modify
984ae001 15;; it under the terms of the GNU General Public License as published by
b1fc2b50
GM
16;; the Free Software Foundation, either version 3 of the License, or
17;; (at your option) any later version.
984ae001 18
8d9ea7b1 19;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
b1fc2b50 25;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
984ae001
GM
26
27;;; Commentary:
28
29;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30;;
31;;
32;; This is part of ebnf2ps package.
33;;
34;; This package defines a parser for ISO EBNF.
35;;
36;; See ebnf2ps.el for documentation.
37;;
38;;
39;; ISO EBNF Syntax
40;; ---------------
41;;
42;; See the URL:
43;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
44;; ("International Standard of the ISO EBNF Notation").
45;;
46;;
47;; ISO EBNF = syntax rule, {syntax rule};
48;;
49;; syntax rule = meta identifier, '=', definition list, ';';
50;;
51;; definition list = single definition, {'|', single definition};
52;;
53;; single definition = term, {',', term};
54;;
55;; term = factor, ['-', exception];
56;;
57;; exception = factor (* without <meta identifier> *);
58;;
59;; factor = [integer, '*'], primary;
60;;
61;; primary = optional sequence | repeated sequence | special sequence
62;; | grouped sequence | meta identifier | terminal string
63;; | empty;
64;;
65;; empty = ;
66;;
67;; optional sequence = '[', definition list, ']';
68;;
69;; repeated sequence = '{', definition list, '}';
70;;
71;; grouped sequence = '(', definition list, ')';
72;;
73;; terminal string = "'", character - "'", {character - "'"}, "'"
74;; | '"', character - '"', {character - '"'}, '"';
75;;
76;; special sequence = '?', {character - '?'}, '?';
77;;
78;; meta identifier = letter, { letter | decimal digit | ' ' };
79;;
80;; integer = decimal digit, {decimal digit};
81;;
82;; comment = '(*', {comment symbol}, '*)';
83;;
84;; comment symbol = comment (* <== NESTED COMMENT *)
85;; | terminal string | special sequence | character;
86;;
87;; letter = ? A-Z a-z ?;
88;;
89;; decimal digit = ? 0-9 ?;
90;;
91;; character = letter | decimal digit
92;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
93;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
94;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
95;;
96;;
97;; There is also the following alternative representation:
98;;
99;; STANDARD ALTERNATIVE
100;; | ==> / or !
101;; [ ==> (/
102;; ] ==> /)
103;; { ==> (:
104;; } ==> :)
105;; ; ==> .
106;;
107;;
108;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
109;; -------------------------------------------------
110;;
111;; ISO EBNF accepts the characters given by <character> production above,
112;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
eac9c0ef 113;; (^L), any other characters are invalid. But ebnf2ps accepts also the
60df7255
VJL
114;; european 8-bit accentuated characters (from \240 to \377) and underscore
115;; (_).
984ae001
GM
116;;
117;;
118;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
119
e8af40ee 120;;; Code:
984ae001
GM
121
122
123(require 'ebnf-otz)
124
125
126(defvar ebnf-iso-lex nil
127 "Value returned by `ebnf-iso-lex' function.")
128
129
1002b9b5
VJL
130(defvar ebnf-no-meta-identifier nil
131 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
984ae001
GM
132
133\f
134;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f504d516 135;; Syntactic analyzer
984ae001
GM
136
137
138;;; ISO EBNF = syntax rule, {syntax rule};
139
140(defun ebnf-iso-parser (start)
141 "ISO EBNF parser."
142 (let ((total (+ (- ebnf-limit start) 1))
143 (bias (1- start))
144 (origin (point))
145 syntax-list token rule)
146 (goto-char start)
147 (setq token (ebnf-iso-lex))
148 (and (eq token 'end-of-input)
e8af40ee 149 (error "Invalid ISO EBNF file format"))
984ae001
GM
150 (while (not (eq token 'end-of-input))
151 (ebnf-message-float
152 "Parsing...%s%%"
153 (/ (* (- (point) bias) 100.0) total))
154 (setq token (ebnf-iso-syntax-rule token)
155 rule (cdr token)
156 token (car token))
157 (or (ebnf-add-empty-rule-list rule)
158 (setq syntax-list (cons rule syntax-list))))
159 (goto-char origin)
160 syntax-list))
161
162
163;;; syntax rule = meta identifier, '=', definition list, ';';
164
165(defun ebnf-iso-syntax-rule (token)
166 (let ((header ebnf-iso-lex)
167 (action ebnf-action)
168 body)
169 (setq ebnf-action nil)
170 (or (eq token 'non-terminal)
e8af40ee 171 (error "Invalid meta identifier syntax rule"))
984ae001 172 (or (eq (ebnf-iso-lex) 'equal)
e8af40ee 173 (error "Invalid syntax rule: missing `='"))
984ae001
GM
174 (setq body (ebnf-iso-definition-list))
175 (or (eq (car body) 'period)
e8af40ee 176 (error "Invalid syntax rule: missing `;' or `.'"))
984ae001
GM
177 (setq body (cdr body))
178 (ebnf-eps-add-production header)
179 (cons (ebnf-iso-lex)
180 (ebnf-make-production header body action))))
181
182
183;;; definition list = single definition, {'|', single definition};
184
185(defun ebnf-iso-definition-list ()
186 (let (body sequence)
187 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
188 'alternative)
189 (setq sequence (cdr sequence)
190 body (cons sequence body)))
191 (ebnf-token-alternative body sequence)))
192
193
194;;; single definition = term, {',', term};
195
196(defun ebnf-iso-single-definition ()
197 (let (token seq term)
198 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
199 token (car term)
200 term (cdr term))
201 (eq token 'catenate))
202 (setq seq (cons term seq)))
203 (cons token
6411a60a
VJL
204 (ebnf-token-sequence (if term
205 (cons term seq)
206 seq)))))
984ae001
GM
207
208
209;;; term = factor, ['-', exception];
210;;;
211;;; exception = factor (* without <meta identifier> *);
212
213(defun ebnf-iso-term (token)
214 (let ((factor (ebnf-iso-factor token)))
215 (if (not (eq (car factor) 'except))
216 ;; factor
217 factor
218 ;; factor - exception
219 (let ((ebnf-no-meta-identifier t))
220 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
221
222
223;;; factor = [integer, '*'], primary;
224
225(defun ebnf-iso-factor (token)
226 (if (eq token 'integer)
227 (let ((times ebnf-iso-lex))
228 (or (eq (ebnf-iso-lex) 'repeat)
e8af40ee 229 (error "Missing `*'"))
984ae001
GM
230 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
231 (ebnf-iso-primary token)))
232
233
234;;; primary = optional sequence | repeated sequence | special sequence
235;;; | grouped sequence | meta identifier | terminal string
236;;; | empty;
237;;;
238;;; empty = ;
239;;;
240;;; optional sequence = '[', definition list, ']';
241;;;
242;;; repeated sequence = '{', definition list, '}';
243;;;
244;;; grouped sequence = '(', definition list, ')';
245;;;
246;;; terminal string = "'", character - "'", {character - "'"}, "'"
247;;; | '"', character - '"', {character - '"'}, '"';
248;;;
249;;; special sequence = '?', {character - '?'}, '?';
250;;;
251;;; meta identifier = letter, {letter | decimal digit};
252
253(defun ebnf-iso-primary (token)
254 (let ((primary
255 (cond
256 ;; terminal string
257 ((eq token 'terminal)
258 (ebnf-make-terminal ebnf-iso-lex))
259 ;; meta identifier
260 ((eq token 'non-terminal)
261 (ebnf-make-non-terminal ebnf-iso-lex))
262 ;; special sequence
263 ((eq token 'special)
264 (ebnf-make-special ebnf-iso-lex))
265 ;; grouped sequence
266 ((eq token 'begin-group)
267 (let ((body (ebnf-iso-definition-list)))
268 (or (eq (car body) 'end-group)
e8af40ee 269 (error "Missing `)'"))
984ae001
GM
270 (cdr body)))
271 ;; optional sequence
272 ((eq token 'begin-optional)
273 (let ((body (ebnf-iso-definition-list)))
274 (or (eq (car body) 'end-optional)
e8af40ee 275 (error "Missing `]' or `/)'"))
984ae001
GM
276 (ebnf-token-optional (cdr body))))
277 ;; repeated sequence
278 ((eq token 'begin-zero-or-more)
279 (let* ((body (ebnf-iso-definition-list))
280 (repeat (cdr body)))
281 (or (eq (car body) 'end-zero-or-more)
e8af40ee 282 (error "Missing `}' or `:)'"))
984ae001
GM
283 (ebnf-make-zero-or-more repeat)))
284 ;; empty
285 (t
286 nil)
287 )))
288 (cons (if primary
289 (ebnf-iso-lex)
290 token)
291 primary)))
292
293\f
294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
295;; Lexical analyzer
296
297
298(defconst ebnf-iso-token-table
299 ;; control character & 8-bit character are set to `error'
300 (let ((table (make-vector 256 'error))
301 (char ?\040))
302 ;; printable character
303 (while (< char ?\060)
304 (aset table char 'character)
305 (setq char (1+ char)))
306 ;; digits:
307 (while (< char ?\072)
308 (aset table char 'integer)
309 (setq char (1+ char)))
310 (while (< char ?\101)
311 (aset table char 'character)
312 (setq char (1+ char)))
313 ;; upper case letters:
314 (while (< char ?\133)
315 (aset table char 'non-terminal)
316 (setq char (1+ char)))
317 (while (< char ?\141)
318 (aset table char 'character)
319 (setq char (1+ char)))
320 ;; lower case letters:
321 (while (< char ?\173)
322 (aset table char 'non-terminal)
323 (setq char (1+ char)))
324 (while (< char ?\177)
325 (aset table char 'character)
326 (setq char (1+ char)))
327 ;; European 8-bit accentuated characters:
328 (setq char ?\240)
329 (while (< char ?\400)
330 (aset table char 'non-terminal)
331 (setq char (1+ char)))
332 ;; Override space characters:
333 (aset table ?\013 'space) ; [VT] vertical tab
334 (aset table ?\n 'space) ; [NL] linefeed
335 (aset table ?\r 'space) ; [CR] carriage return
336 (aset table ?\t 'space) ; [HT] horizontal tab
337 (aset table ?\ 'space) ; [SP] space
338 ;; Override form feed character:
339 (aset table ?\f 'form-feed) ; [FF] form feed
340 ;; Override other lexical characters:
ac4780a1 341 (aset table ?_ 'non-terminal)
984ae001
GM
342 (aset table ?\" 'double-terminal)
343 (aset table ?\' 'single-terminal)
344 (aset table ?\? 'special)
345 (aset table ?* 'repeat)
346 (aset table ?, 'catenate)
347 (aset table ?- 'except)
348 (aset table ?= 'equal)
349 (aset table ?\) 'end-group)
350 table)
351 "Vector used to map characters to a lexical token.")
352
353
354(defun ebnf-iso-initialize ()
355 "Initialize ISO EBNF token table."
356 (if ebnf-iso-alternative-p
357 ;; Override alternative lexical characters:
358 (progn
359 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
360 (aset ebnf-iso-token-table ?\[ 'character)
361 (aset ebnf-iso-token-table ?\] 'character)
362 (aset ebnf-iso-token-table ?\{ 'character)
363 (aset ebnf-iso-token-table ?\} 'character)
364 (aset ebnf-iso-token-table ?| 'character)
365 (aset ebnf-iso-token-table ?\; 'character)
366 (aset ebnf-iso-token-table ?/ 'slash)
367 (aset ebnf-iso-token-table ?! 'alternative)
368 (aset ebnf-iso-token-table ?: 'colon)
369 (aset ebnf-iso-token-table ?. 'period))
370 ;; Override standard lexical characters:
371 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
372 (aset ebnf-iso-token-table ?\[ 'begin-optional)
373 (aset ebnf-iso-token-table ?\] 'end-optional)
374 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
375 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
376 (aset ebnf-iso-token-table ?| 'alternative)
377 (aset ebnf-iso-token-table ?\; 'period)
378 (aset ebnf-iso-token-table ?/ 'character)
379 (aset ebnf-iso-token-table ?! 'character)
380 (aset ebnf-iso-token-table ?: 'character)
381 (aset ebnf-iso-token-table ?. 'character)))
382
383
8a1e4eeb
GM
384;; replace the range "\240-\377" (see `ebnf-range-regexp').
385(defconst ebnf-iso-non-terminal-chars
ac4780a1 386 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
8a1e4eeb
GM
387
388
984ae001 389(defun ebnf-iso-lex ()
fc6e6963 390 "Lexical analyzer for ISO EBNF.
984ae001
GM
391
392Return a lexical token.
393
394See documentation for variable `ebnf-iso-lex'."
395 (if (>= (point) ebnf-limit)
396 'end-of-input
397 (let (token)
398 ;; skip spaces and comments
399 (while (if (> (following-char) 255)
400 (progn
401 (setq token 'error)
402 nil)
403 (setq token (aref ebnf-iso-token-table (following-char)))
404 (cond
405 ((eq token 'space)
406 (skip-chars-forward " \013\n\r\t" ebnf-limit)
407 (< (point) ebnf-limit))
408 ((or (eq token 'begin-parenthesis)
409 (eq token 'left-parenthesis))
410 (forward-char)
411 (if (/= (following-char) ?*)
412 ;; no comment
413 nil
414 ;; comment
415 (ebnf-iso-skip-comment)
416 t))
417 ((eq token 'form-feed)
418 (forward-char)
419 (setq ebnf-action 'form-feed))
420 (t nil)
421 )))
422 (cond
423 ;; end of input
424 ((>= (point) ebnf-limit)
425 'end-of-input)
426 ;; error
427 ((eq token 'error)
eac9c0ef 428 (error "Invalid character"))
984ae001
GM
429 ;; integer
430 ((eq token 'integer)
431 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
432 'integer)
433 ;; special: ?special?
434 ((eq token 'special)
ac4780a1 435 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
984ae001 436 (ebnf-string " ->@-~" ?\? "special")
ac4780a1 437 (and ebnf-special-show-delimiter "?")))
984ae001
GM
438 'special)
439 ;; terminal: "string"
440 ((eq token 'double-terminal)
441 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
442 'terminal)
443 ;; terminal: 'string'
444 ((eq token 'single-terminal)
445 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
446 'terminal)
447 ;; non-terminal
448 ((eq token 'non-terminal)
8a1e4eeb
GM
449 (setq ebnf-iso-lex
450 (ebnf-iso-normalize
451 (ebnf-trim-right
452 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
984ae001 453 (and ebnf-no-meta-identifier
e8af40ee 454 (error "Exception sequence should not contain a meta identifier"))
984ae001
GM
455 'non-terminal)
456 ;; begin optional, begin list or begin group
457 ((eq token 'left-parenthesis)
458 (forward-char)
459 (cond ((= (following-char) ?/)
460 (forward-char)
461 'begin-optional)
462 ((= (following-char) ?:)
463 (forward-char)
464 'begin-zero-or-more)
465 (t
466 'begin-group)
467 ))
468 ;; end optional or alternative
469 ((eq token 'slash)
470 (forward-char)
471 (if (/= (following-char) ?\))
472 'alternative
473 (forward-char)
474 'end-optional))
475 ;; end list
476 ((eq token 'colon)
477 (forward-char)
478 (if (/= (following-char) ?\))
479 'character
480 (forward-char)
481 'end-zero-or-more))
482 ;; begin group
483 ((eq token 'begin-parenthesis)
484 'begin-group)
485 ;; miscellaneous
486 (t
487 (forward-char)
488 token)
489 ))))
490
491
2197ec3b
GM
492;; replace the range "\177-\237" (see `ebnf-range-regexp').
493(defconst ebnf-iso-comment-chars
494 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
984ae001
GM
495
496
497(defun ebnf-iso-skip-comment ()
498 (forward-char)
499 (cond
500 ;; open EPS file
501 ((and ebnf-eps-executing (= (following-char) ?\[))
502 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
503 ;; close EPS file
504 ((and ebnf-eps-executing (= (following-char) ?\]))
505 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
3ced5caa
VJL
506 ;; EPS header
507 ((and ebnf-eps-executing (= (following-char) ?H))
508 (ebnf-eps-header-comment (ebnf-iso-eps-filename)))
509 ;; EPS footer
510 ((and ebnf-eps-executing (= (following-char) ?F))
511 (ebnf-eps-footer-comment (ebnf-iso-eps-filename)))
984ae001
GM
512 ;; any other action in comment
513 (t
514 (setq ebnf-action (aref ebnf-comment-table (following-char))))
515 )
516 (let ((pair 1))
517 (while (> pair 0)
518 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
519 (cond ((>= (point) ebnf-limit)
e8af40ee 520 (error "Missing end of comment: `*)'"))
984ae001
GM
521 ((= (following-char) ?*)
522 (skip-chars-forward "*" ebnf-limit)
523 (when (= (following-char) ?\))
524 ;; end of comment
525 (forward-char)
526 (setq pair (1- pair))))
527 ((= (following-char) ?\()
528 (skip-chars-forward "(" ebnf-limit)
529 (when (= (following-char) ?*)
530 ;; beginning of comment
531 (forward-char)
532 (setq pair (1+ pair))))
533 (t
eac9c0ef 534 (error "Invalid character"))
984ae001
GM
535 ))))
536
537
538(defun ebnf-iso-eps-filename ()
539 (forward-char)
540 (buffer-substring-no-properties
541 (point)
542 (let ((chars (concat ebnf-iso-comment-chars "\n"))
543 found)
544 (while (not found)
545 (skip-chars-forward chars ebnf-limit)
546 (setq found
547 (cond ((>= (point) ebnf-limit)
548 (point))
549 ((= (following-char) ?*)
550 (skip-chars-forward "*" ebnf-limit)
551 (if (/= (following-char) ?\))
552 nil
553 (backward-char)
554 (point)))
555 ((= (following-char) ?\()
556 (forward-char)
557 (if (/= (following-char) ?*)
558 nil
559 (backward-char)
560 (point)))
561 (t
562 (point))
563 )))
564 found)))
565
566
567(defun ebnf-iso-normalize (str)
568 (if (not ebnf-iso-normalize-p)
569 str
570 (let ((len (length str))
571 (stri 0)
572 (spaces 0))
573 ;; count exceeding spaces
574 (while (< stri len)
575 (if (/= (aref str stri) ?\ )
576 (setq stri (1+ stri))
577 (setq stri (1+ stri))
578 (while (and (< stri len) (= (aref str stri) ?\ ))
579 (setq stri (1+ stri)
580 spaces (1+ spaces)))))
581 (if (zerop spaces)
582 ;; no exceeding space
583 str
584 ;; at least one exceeding space
585 (let ((new (make-string (- len spaces) ?\ ))
586 (newi 0))
587 ;; eliminate exceeding spaces
588 (setq stri 0)
589 (while (> spaces 0)
590 (if (/= (aref str stri) ?\ )
591 (progn
592 (aset new newi (aref str stri))
593 (setq stri (1+ stri)
594 newi (1+ newi)))
595 (aset new newi (aref str stri))
596 (setq stri (1+ stri)
597 newi (1+ newi))
598 (while (and (> spaces 0) (= (aref str stri) ?\ ))
599 (setq stri (1+ stri)
600 spaces (1- spaces)))))
601 ;; remaining is normalized
602 (while (< stri len)
603 (aset new newi (aref str stri))
604 (setq stri (1+ stri)
605 newi (1+ newi)))
606 new)))))
607
608\f
609;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
610
611
612(provide 'ebnf-iso)
613
614
615;;; ebnf-iso.el ends here