Add arch taglines
[bpt/emacs.git] / lisp / progmodes / ebnf-iso.el
1 ;;; ebnf-iso.el --- parser for ISO EBNF
2
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4
5 ;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
6 ;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
7 ;; Keywords: wp, ebnf, PostScript
8 ;; Time-stamp: <2003/08/12 21:29:14 vinicius>
9 ;; Version: 1.6
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
27
28 ;;; Commentary:
29
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;;
32 ;;
33 ;; This is part of ebnf2ps package.
34 ;;
35 ;; This package defines a parser for ISO EBNF.
36 ;;
37 ;; See ebnf2ps.el for documentation.
38 ;;
39 ;;
40 ;; ISO EBNF Syntax
41 ;; ---------------
42 ;;
43 ;; See the URL:
44 ;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
45 ;; ("International Standard of the ISO EBNF Notation").
46 ;;
47 ;;
48 ;; ISO EBNF = syntax rule, {syntax rule};
49 ;;
50 ;; syntax rule = meta identifier, '=', definition list, ';';
51 ;;
52 ;; definition list = single definition, {'|', single definition};
53 ;;
54 ;; single definition = term, {',', term};
55 ;;
56 ;; term = factor, ['-', exception];
57 ;;
58 ;; exception = factor (* without <meta identifier> *);
59 ;;
60 ;; factor = [integer, '*'], primary;
61 ;;
62 ;; primary = optional sequence | repeated sequence | special sequence
63 ;; | grouped sequence | meta identifier | terminal string
64 ;; | empty;
65 ;;
66 ;; empty = ;
67 ;;
68 ;; optional sequence = '[', definition list, ']';
69 ;;
70 ;; repeated sequence = '{', definition list, '}';
71 ;;
72 ;; grouped sequence = '(', definition list, ')';
73 ;;
74 ;; terminal string = "'", character - "'", {character - "'"}, "'"
75 ;; | '"', character - '"', {character - '"'}, '"';
76 ;;
77 ;; special sequence = '?', {character - '?'}, '?';
78 ;;
79 ;; meta identifier = letter, { letter | decimal digit | ' ' };
80 ;;
81 ;; integer = decimal digit, {decimal digit};
82 ;;
83 ;; comment = '(*', {comment symbol}, '*)';
84 ;;
85 ;; comment symbol = comment (* <== NESTED COMMENT *)
86 ;; | terminal string | special sequence | character;
87 ;;
88 ;; letter = ? A-Z a-z ?;
89 ;;
90 ;; decimal digit = ? 0-9 ?;
91 ;;
92 ;; character = letter | decimal digit
93 ;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
94 ;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
95 ;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96 ;;
97 ;;
98 ;; There is also the following alternative representation:
99 ;;
100 ;; STANDARD ALTERNATIVE
101 ;; | ==> / or !
102 ;; [ ==> (/
103 ;; ] ==> /)
104 ;; { ==> (:
105 ;; } ==> :)
106 ;; ; ==> .
107 ;;
108 ;;
109 ;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
110 ;; -------------------------------------------------
111 ;;
112 ;; ISO EBNF accepts the characters given by <character> production above,
113 ;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
114 ;; (^L), any other characters are illegal. But ebnf2ps accepts also the
115 ;; european 8-bit accentuated characters (from \240 to \377).
116 ;;
117 ;;
118 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
119
120 ;;; Code:
121
122
123 (require 'ebnf-otz)
124
125
126 (defvar ebnf-iso-lex nil
127 "Value returned by `ebnf-iso-lex' function.")
128
129
130 (defvar ebnf-no-meta-identifier nil
131 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
132
133 \f
134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
135 ;; Syntactic analyzer
136
137
138 ;;; ISO EBNF = syntax rule, {syntax rule};
139
140 (defun ebnf-iso-parser (start)
141 "ISO EBNF parser."
142 (let ((total (+ (- ebnf-limit start) 1))
143 (bias (1- start))
144 (origin (point))
145 syntax-list token rule)
146 (goto-char start)
147 (setq token (ebnf-iso-lex))
148 (and (eq token 'end-of-input)
149 (error "Invalid ISO EBNF file format"))
150 (while (not (eq token 'end-of-input))
151 (ebnf-message-float
152 "Parsing...%s%%"
153 (/ (* (- (point) bias) 100.0) total))
154 (setq token (ebnf-iso-syntax-rule token)
155 rule (cdr token)
156 token (car token))
157 (or (ebnf-add-empty-rule-list rule)
158 (setq syntax-list (cons rule syntax-list))))
159 (goto-char origin)
160 syntax-list))
161
162
163 ;;; syntax rule = meta identifier, '=', definition list, ';';
164
165 (defun ebnf-iso-syntax-rule (token)
166 (let ((header ebnf-iso-lex)
167 (action ebnf-action)
168 body)
169 (setq ebnf-action nil)
170 (or (eq token 'non-terminal)
171 (error "Invalid meta identifier syntax rule"))
172 (or (eq (ebnf-iso-lex) 'equal)
173 (error "Invalid syntax rule: missing `='"))
174 (setq body (ebnf-iso-definition-list))
175 (or (eq (car body) 'period)
176 (error "Invalid syntax rule: missing `;' or `.'"))
177 (setq body (cdr body))
178 (ebnf-eps-add-production header)
179 (cons (ebnf-iso-lex)
180 (ebnf-make-production header body action))))
181
182
183 ;;; definition list = single definition, {'|', single definition};
184
185 (defun ebnf-iso-definition-list ()
186 (let (body sequence)
187 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
188 'alternative)
189 (setq sequence (cdr sequence)
190 body (cons sequence body)))
191 (ebnf-token-alternative body sequence)))
192
193
194 ;;; single definition = term, {',', term};
195
196 (defun ebnf-iso-single-definition ()
197 (let (token seq term)
198 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
199 token (car term)
200 term (cdr term))
201 (eq token 'catenate))
202 (setq seq (cons term seq)))
203 (cons token
204 (cond
205 ;; null sequence
206 ((null seq)
207 term)
208 ;; sequence with only one element
209 ((and (null term) (= (length seq) 1))
210 (car seq))
211 ;; a real sequence
212 (t
213 (ebnf-make-sequence (nreverse (cons term seq))))
214 ))))
215
216
217 ;;; term = factor, ['-', exception];
218 ;;;
219 ;;; exception = factor (* without <meta identifier> *);
220
221 (defun ebnf-iso-term (token)
222 (let ((factor (ebnf-iso-factor token)))
223 (if (not (eq (car factor) 'except))
224 ;; factor
225 factor
226 ;; factor - exception
227 (let ((ebnf-no-meta-identifier t))
228 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
229
230
231 ;;; factor = [integer, '*'], primary;
232
233 (defun ebnf-iso-factor (token)
234 (if (eq token 'integer)
235 (let ((times ebnf-iso-lex))
236 (or (eq (ebnf-iso-lex) 'repeat)
237 (error "Missing `*'"))
238 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
239 (ebnf-iso-primary token)))
240
241
242 ;;; primary = optional sequence | repeated sequence | special sequence
243 ;;; | grouped sequence | meta identifier | terminal string
244 ;;; | empty;
245 ;;;
246 ;;; empty = ;
247 ;;;
248 ;;; optional sequence = '[', definition list, ']';
249 ;;;
250 ;;; repeated sequence = '{', definition list, '}';
251 ;;;
252 ;;; grouped sequence = '(', definition list, ')';
253 ;;;
254 ;;; terminal string = "'", character - "'", {character - "'"}, "'"
255 ;;; | '"', character - '"', {character - '"'}, '"';
256 ;;;
257 ;;; special sequence = '?', {character - '?'}, '?';
258 ;;;
259 ;;; meta identifier = letter, {letter | decimal digit};
260
261 (defun ebnf-iso-primary (token)
262 (let ((primary
263 (cond
264 ;; terminal string
265 ((eq token 'terminal)
266 (ebnf-make-terminal ebnf-iso-lex))
267 ;; meta identifier
268 ((eq token 'non-terminal)
269 (ebnf-make-non-terminal ebnf-iso-lex))
270 ;; special sequence
271 ((eq token 'special)
272 (ebnf-make-special ebnf-iso-lex))
273 ;; grouped sequence
274 ((eq token 'begin-group)
275 (let ((body (ebnf-iso-definition-list)))
276 (or (eq (car body) 'end-group)
277 (error "Missing `)'"))
278 (cdr body)))
279 ;; optional sequence
280 ((eq token 'begin-optional)
281 (let ((body (ebnf-iso-definition-list)))
282 (or (eq (car body) 'end-optional)
283 (error "Missing `]' or `/)'"))
284 (ebnf-token-optional (cdr body))))
285 ;; repeated sequence
286 ((eq token 'begin-zero-or-more)
287 (let* ((body (ebnf-iso-definition-list))
288 (repeat (cdr body)))
289 (or (eq (car body) 'end-zero-or-more)
290 (error "Missing `}' or `:)'"))
291 (ebnf-make-zero-or-more repeat)))
292 ;; empty
293 (t
294 nil)
295 )))
296 (cons (if primary
297 (ebnf-iso-lex)
298 token)
299 primary)))
300
301 \f
302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
303 ;; Lexical analyzer
304
305
306 (defconst ebnf-iso-token-table
307 ;; control character & 8-bit character are set to `error'
308 (let ((table (make-vector 256 'error))
309 (char ?\040))
310 ;; printable character
311 (while (< char ?\060)
312 (aset table char 'character)
313 (setq char (1+ char)))
314 ;; digits:
315 (while (< char ?\072)
316 (aset table char 'integer)
317 (setq char (1+ char)))
318 (while (< char ?\101)
319 (aset table char 'character)
320 (setq char (1+ char)))
321 ;; upper case letters:
322 (while (< char ?\133)
323 (aset table char 'non-terminal)
324 (setq char (1+ char)))
325 (while (< char ?\141)
326 (aset table char 'character)
327 (setq char (1+ char)))
328 ;; lower case letters:
329 (while (< char ?\173)
330 (aset table char 'non-terminal)
331 (setq char (1+ char)))
332 (while (< char ?\177)
333 (aset table char 'character)
334 (setq char (1+ char)))
335 ;; European 8-bit accentuated characters:
336 (setq char ?\240)
337 (while (< char ?\400)
338 (aset table char 'non-terminal)
339 (setq char (1+ char)))
340 ;; Override space characters:
341 (aset table ?\013 'space) ; [VT] vertical tab
342 (aset table ?\n 'space) ; [NL] linefeed
343 (aset table ?\r 'space) ; [CR] carriage return
344 (aset table ?\t 'space) ; [HT] horizontal tab
345 (aset table ?\ 'space) ; [SP] space
346 ;; Override form feed character:
347 (aset table ?\f 'form-feed) ; [FF] form feed
348 ;; Override other lexical characters:
349 (aset table ?\" 'double-terminal)
350 (aset table ?\' 'single-terminal)
351 (aset table ?\? 'special)
352 (aset table ?* 'repeat)
353 (aset table ?, 'catenate)
354 (aset table ?- 'except)
355 (aset table ?= 'equal)
356 (aset table ?\) 'end-group)
357 table)
358 "Vector used to map characters to a lexical token.")
359
360
361 (defun ebnf-iso-initialize ()
362 "Initialize ISO EBNF token table."
363 (if ebnf-iso-alternative-p
364 ;; Override alternative lexical characters:
365 (progn
366 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
367 (aset ebnf-iso-token-table ?\[ 'character)
368 (aset ebnf-iso-token-table ?\] 'character)
369 (aset ebnf-iso-token-table ?\{ 'character)
370 (aset ebnf-iso-token-table ?\} 'character)
371 (aset ebnf-iso-token-table ?| 'character)
372 (aset ebnf-iso-token-table ?\; 'character)
373 (aset ebnf-iso-token-table ?/ 'slash)
374 (aset ebnf-iso-token-table ?! 'alternative)
375 (aset ebnf-iso-token-table ?: 'colon)
376 (aset ebnf-iso-token-table ?. 'period))
377 ;; Override standard lexical characters:
378 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
379 (aset ebnf-iso-token-table ?\[ 'begin-optional)
380 (aset ebnf-iso-token-table ?\] 'end-optional)
381 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
382 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
383 (aset ebnf-iso-token-table ?| 'alternative)
384 (aset ebnf-iso-token-table ?\; 'period)
385 (aset ebnf-iso-token-table ?/ 'character)
386 (aset ebnf-iso-token-table ?! 'character)
387 (aset ebnf-iso-token-table ?: 'character)
388 (aset ebnf-iso-token-table ?. 'character)))
389
390
391 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
392 (defconst ebnf-iso-non-terminal-chars
393 (ebnf-range-regexp " 0-9A-Za-z" ?\240 ?\377))
394
395
396 (defun ebnf-iso-lex ()
397 "Lexical analyser for ISO EBNF.
398
399 Return a lexical token.
400
401 See documentation for variable `ebnf-iso-lex'."
402 (if (>= (point) ebnf-limit)
403 'end-of-input
404 (let (token)
405 ;; skip spaces and comments
406 (while (if (> (following-char) 255)
407 (progn
408 (setq token 'error)
409 nil)
410 (setq token (aref ebnf-iso-token-table (following-char)))
411 (cond
412 ((eq token 'space)
413 (skip-chars-forward " \013\n\r\t" ebnf-limit)
414 (< (point) ebnf-limit))
415 ((or (eq token 'begin-parenthesis)
416 (eq token 'left-parenthesis))
417 (forward-char)
418 (if (/= (following-char) ?*)
419 ;; no comment
420 nil
421 ;; comment
422 (ebnf-iso-skip-comment)
423 t))
424 ((eq token 'form-feed)
425 (forward-char)
426 (setq ebnf-action 'form-feed))
427 (t nil)
428 )))
429 (cond
430 ;; end of input
431 ((>= (point) ebnf-limit)
432 'end-of-input)
433 ;; error
434 ((eq token 'error)
435 (error "Illegal character"))
436 ;; integer
437 ((eq token 'integer)
438 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
439 'integer)
440 ;; special: ?special?
441 ((eq token 'special)
442 (setq ebnf-iso-lex (concat "?"
443 (ebnf-string " ->@-~" ?\? "special")
444 "?"))
445 'special)
446 ;; terminal: "string"
447 ((eq token 'double-terminal)
448 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
449 'terminal)
450 ;; terminal: 'string'
451 ((eq token 'single-terminal)
452 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
453 'terminal)
454 ;; non-terminal
455 ((eq token 'non-terminal)
456 (setq ebnf-iso-lex
457 (ebnf-iso-normalize
458 (ebnf-trim-right
459 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
460 (and ebnf-no-meta-identifier
461 (error "Exception sequence should not contain a meta identifier"))
462 'non-terminal)
463 ;; begin optional, begin list or begin group
464 ((eq token 'left-parenthesis)
465 (forward-char)
466 (cond ((= (following-char) ?/)
467 (forward-char)
468 'begin-optional)
469 ((= (following-char) ?:)
470 (forward-char)
471 'begin-zero-or-more)
472 (t
473 'begin-group)
474 ))
475 ;; end optional or alternative
476 ((eq token 'slash)
477 (forward-char)
478 (if (/= (following-char) ?\))
479 'alternative
480 (forward-char)
481 'end-optional))
482 ;; end list
483 ((eq token 'colon)
484 (forward-char)
485 (if (/= (following-char) ?\))
486 'character
487 (forward-char)
488 'end-zero-or-more))
489 ;; begin group
490 ((eq token 'begin-parenthesis)
491 'begin-group)
492 ;; miscellaneous
493 (t
494 (forward-char)
495 token)
496 ))))
497
498
499 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
500 (defconst ebnf-iso-comment-chars
501 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
502
503
504 (defun ebnf-iso-skip-comment ()
505 (forward-char)
506 (cond
507 ;; open EPS file
508 ((and ebnf-eps-executing (= (following-char) ?\[))
509 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
510 ;; close EPS file
511 ((and ebnf-eps-executing (= (following-char) ?\]))
512 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
513 ;; any other action in comment
514 (t
515 (setq ebnf-action (aref ebnf-comment-table (following-char))))
516 )
517 (let ((pair 1))
518 (while (> pair 0)
519 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
520 (cond ((>= (point) ebnf-limit)
521 (error "Missing end of comment: `*)'"))
522 ((= (following-char) ?*)
523 (skip-chars-forward "*" ebnf-limit)
524 (when (= (following-char) ?\))
525 ;; end of comment
526 (forward-char)
527 (setq pair (1- pair))))
528 ((= (following-char) ?\()
529 (skip-chars-forward "(" ebnf-limit)
530 (when (= (following-char) ?*)
531 ;; beginning of comment
532 (forward-char)
533 (setq pair (1+ pair))))
534 (t
535 (error "Illegal character"))
536 ))))
537
538
539 (defun ebnf-iso-eps-filename ()
540 (forward-char)
541 (buffer-substring-no-properties
542 (point)
543 (let ((chars (concat ebnf-iso-comment-chars "\n"))
544 found)
545 (while (not found)
546 (skip-chars-forward chars ebnf-limit)
547 (setq found
548 (cond ((>= (point) ebnf-limit)
549 (point))
550 ((= (following-char) ?*)
551 (skip-chars-forward "*" ebnf-limit)
552 (if (/= (following-char) ?\))
553 nil
554 (backward-char)
555 (point)))
556 ((= (following-char) ?\()
557 (forward-char)
558 (if (/= (following-char) ?*)
559 nil
560 (backward-char)
561 (point)))
562 (t
563 (point))
564 )))
565 found)))
566
567
568 (defun ebnf-iso-normalize (str)
569 (if (not ebnf-iso-normalize-p)
570 str
571 (let ((len (length str))
572 (stri 0)
573 (spaces 0))
574 ;; count exceeding spaces
575 (while (< stri len)
576 (if (/= (aref str stri) ?\ )
577 (setq stri (1+ stri))
578 (setq stri (1+ stri))
579 (while (and (< stri len) (= (aref str stri) ?\ ))
580 (setq stri (1+ stri)
581 spaces (1+ spaces)))))
582 (if (zerop spaces)
583 ;; no exceeding space
584 str
585 ;; at least one exceeding space
586 (let ((new (make-string (- len spaces) ?\ ))
587 (newi 0))
588 ;; eliminate exceeding spaces
589 (setq stri 0)
590 (while (> spaces 0)
591 (if (/= (aref str stri) ?\ )
592 (progn
593 (aset new newi (aref str stri))
594 (setq stri (1+ stri)
595 newi (1+ newi)))
596 (aset new newi (aref str stri))
597 (setq stri (1+ stri)
598 newi (1+ newi))
599 (while (and (> spaces 0) (= (aref str stri) ?\ ))
600 (setq stri (1+ stri)
601 spaces (1- spaces)))))
602 ;; remaining is normalized
603 (while (< stri len)
604 (aset new newi (aref str stri))
605 (setq stri (1+ stri)
606 newi (1+ newi)))
607 new)))))
608
609 \f
610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
611
612
613 (provide 'ebnf-iso)
614
615
616 ;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
617 ;;; ebnf-iso.el ends here