* mh-search.el (mh-index-next-result-function): Add format to
[bpt/emacs.git] / lisp / progmodes / ebnf-iso.el
CommitLineData
e8af40ee 1;;; ebnf-iso.el --- parser for ISO EBNF
984ae001 2
eac9c0ef 3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
ac4780a1 4;; Free Software Foundation, Inc.
984ae001 5
ac4780a1
VJL
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
6411a60a 8;; Time-stamp: <2004/04/03 16:48:52 vinicius>
ae16d111 9;; Keywords: wp, ebnf, PostScript
6411a60a 10;; Version: 1.8
984ae001 11
8d9ea7b1 12;; This file is part of GNU Emacs.
984ae001 13
8d9ea7b1 14;; GNU Emacs is free software; you can redistribute it and/or modify
984ae001
GM
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
8d9ea7b1 19;; GNU Emacs is distributed in the hope that it will be useful,
984ae001
GM
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
26;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27;; Boston, MA 02110-1301, USA.
984ae001
GM
28
29;;; Commentary:
30
31;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;;
33;;
34;; This is part of ebnf2ps package.
35;;
36;; This package defines a parser for ISO EBNF.
37;;
38;; See ebnf2ps.el for documentation.
39;;
40;;
41;; ISO EBNF Syntax
42;; ---------------
43;;
44;; See the URL:
45;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
46;; ("International Standard of the ISO EBNF Notation").
47;;
48;;
49;; ISO EBNF = syntax rule, {syntax rule};
50;;
51;; syntax rule = meta identifier, '=', definition list, ';';
52;;
53;; definition list = single definition, {'|', single definition};
54;;
55;; single definition = term, {',', term};
56;;
57;; term = factor, ['-', exception];
58;;
59;; exception = factor (* without <meta identifier> *);
60;;
61;; factor = [integer, '*'], primary;
62;;
63;; primary = optional sequence | repeated sequence | special sequence
64;; | grouped sequence | meta identifier | terminal string
65;; | empty;
66;;
67;; empty = ;
68;;
69;; optional sequence = '[', definition list, ']';
70;;
71;; repeated sequence = '{', definition list, '}';
72;;
73;; grouped sequence = '(', definition list, ')';
74;;
75;; terminal string = "'", character - "'", {character - "'"}, "'"
76;; | '"', character - '"', {character - '"'}, '"';
77;;
78;; special sequence = '?', {character - '?'}, '?';
79;;
80;; meta identifier = letter, { letter | decimal digit | ' ' };
81;;
82;; integer = decimal digit, {decimal digit};
83;;
84;; comment = '(*', {comment symbol}, '*)';
85;;
86;; comment symbol = comment (* <== NESTED COMMENT *)
87;; | terminal string | special sequence | character;
88;;
89;; letter = ? A-Z a-z ?;
90;;
91;; decimal digit = ? 0-9 ?;
92;;
93;; character = letter | decimal digit
94;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
95;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
96;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
97;;
98;;
99;; There is also the following alternative representation:
100;;
101;; STANDARD ALTERNATIVE
102;; | ==> / or !
103;; [ ==> (/
104;; ] ==> /)
105;; { ==> (:
106;; } ==> :)
107;; ; ==> .
108;;
109;;
110;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
111;; -------------------------------------------------
112;;
113;; ISO EBNF accepts the characters given by <character> production above,
114;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
eac9c0ef 115;; (^L), any other characters are invalid. But ebnf2ps accepts also the
60df7255
VJL
116;; european 8-bit accentuated characters (from \240 to \377) and underscore
117;; (_).
984ae001
GM
118;;
119;;
120;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121
e8af40ee 122;;; Code:
984ae001
GM
123
124
125(require 'ebnf-otz)
126
127
128(defvar ebnf-iso-lex nil
129 "Value returned by `ebnf-iso-lex' function.")
130
131
1002b9b5
VJL
132(defvar ebnf-no-meta-identifier nil
133 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
984ae001
GM
134
135\f
136;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f504d516 137;; Syntactic analyzer
984ae001
GM
138
139
140;;; ISO EBNF = syntax rule, {syntax rule};
141
142(defun ebnf-iso-parser (start)
143 "ISO EBNF parser."
144 (let ((total (+ (- ebnf-limit start) 1))
145 (bias (1- start))
146 (origin (point))
147 syntax-list token rule)
148 (goto-char start)
149 (setq token (ebnf-iso-lex))
150 (and (eq token 'end-of-input)
e8af40ee 151 (error "Invalid ISO EBNF file format"))
984ae001
GM
152 (while (not (eq token 'end-of-input))
153 (ebnf-message-float
154 "Parsing...%s%%"
155 (/ (* (- (point) bias) 100.0) total))
156 (setq token (ebnf-iso-syntax-rule token)
157 rule (cdr token)
158 token (car token))
159 (or (ebnf-add-empty-rule-list rule)
160 (setq syntax-list (cons rule syntax-list))))
161 (goto-char origin)
162 syntax-list))
163
164
165;;; syntax rule = meta identifier, '=', definition list, ';';
166
167(defun ebnf-iso-syntax-rule (token)
168 (let ((header ebnf-iso-lex)
169 (action ebnf-action)
170 body)
171 (setq ebnf-action nil)
172 (or (eq token 'non-terminal)
e8af40ee 173 (error "Invalid meta identifier syntax rule"))
984ae001 174 (or (eq (ebnf-iso-lex) 'equal)
e8af40ee 175 (error "Invalid syntax rule: missing `='"))
984ae001
GM
176 (setq body (ebnf-iso-definition-list))
177 (or (eq (car body) 'period)
e8af40ee 178 (error "Invalid syntax rule: missing `;' or `.'"))
984ae001
GM
179 (setq body (cdr body))
180 (ebnf-eps-add-production header)
181 (cons (ebnf-iso-lex)
182 (ebnf-make-production header body action))))
183
184
185;;; definition list = single definition, {'|', single definition};
186
187(defun ebnf-iso-definition-list ()
188 (let (body sequence)
189 (while (eq (car (setq sequence (ebnf-iso-single-definition)))
190 'alternative)
191 (setq sequence (cdr sequence)
192 body (cons sequence body)))
193 (ebnf-token-alternative body sequence)))
194
195
196;;; single definition = term, {',', term};
197
198(defun ebnf-iso-single-definition ()
199 (let (token seq term)
200 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex))
201 token (car term)
202 term (cdr term))
203 (eq token 'catenate))
204 (setq seq (cons term seq)))
205 (cons token
6411a60a
VJL
206 (ebnf-token-sequence (if term
207 (cons term seq)
208 seq)))))
984ae001
GM
209
210
211;;; term = factor, ['-', exception];
212;;;
213;;; exception = factor (* without <meta identifier> *);
214
215(defun ebnf-iso-term (token)
216 (let ((factor (ebnf-iso-factor token)))
217 (if (not (eq (car factor) 'except))
218 ;; factor
219 factor
220 ;; factor - exception
221 (let ((ebnf-no-meta-identifier t))
222 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
223
224
225;;; factor = [integer, '*'], primary;
226
227(defun ebnf-iso-factor (token)
228 (if (eq token 'integer)
229 (let ((times ebnf-iso-lex))
230 (or (eq (ebnf-iso-lex) 'repeat)
e8af40ee 231 (error "Missing `*'"))
984ae001
GM
232 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
233 (ebnf-iso-primary token)))
234
235
236;;; primary = optional sequence | repeated sequence | special sequence
237;;; | grouped sequence | meta identifier | terminal string
238;;; | empty;
239;;;
240;;; empty = ;
241;;;
242;;; optional sequence = '[', definition list, ']';
243;;;
244;;; repeated sequence = '{', definition list, '}';
245;;;
246;;; grouped sequence = '(', definition list, ')';
247;;;
248;;; terminal string = "'", character - "'", {character - "'"}, "'"
249;;; | '"', character - '"', {character - '"'}, '"';
250;;;
251;;; special sequence = '?', {character - '?'}, '?';
252;;;
253;;; meta identifier = letter, {letter | decimal digit};
254
255(defun ebnf-iso-primary (token)
256 (let ((primary
257 (cond
258 ;; terminal string
259 ((eq token 'terminal)
260 (ebnf-make-terminal ebnf-iso-lex))
261 ;; meta identifier
262 ((eq token 'non-terminal)
263 (ebnf-make-non-terminal ebnf-iso-lex))
264 ;; special sequence
265 ((eq token 'special)
266 (ebnf-make-special ebnf-iso-lex))
267 ;; grouped sequence
268 ((eq token 'begin-group)
269 (let ((body (ebnf-iso-definition-list)))
270 (or (eq (car body) 'end-group)
e8af40ee 271 (error "Missing `)'"))
984ae001
GM
272 (cdr body)))
273 ;; optional sequence
274 ((eq token 'begin-optional)
275 (let ((body (ebnf-iso-definition-list)))
276 (or (eq (car body) 'end-optional)
e8af40ee 277 (error "Missing `]' or `/)'"))
984ae001
GM
278 (ebnf-token-optional (cdr body))))
279 ;; repeated sequence
280 ((eq token 'begin-zero-or-more)
281 (let* ((body (ebnf-iso-definition-list))
282 (repeat (cdr body)))
283 (or (eq (car body) 'end-zero-or-more)
e8af40ee 284 (error "Missing `}' or `:)'"))
984ae001
GM
285 (ebnf-make-zero-or-more repeat)))
286 ;; empty
287 (t
288 nil)
289 )))
290 (cons (if primary
291 (ebnf-iso-lex)
292 token)
293 primary)))
294
295\f
296;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297;; Lexical analyzer
298
299
300(defconst ebnf-iso-token-table
301 ;; control character & 8-bit character are set to `error'
302 (let ((table (make-vector 256 'error))
303 (char ?\040))
304 ;; printable character
305 (while (< char ?\060)
306 (aset table char 'character)
307 (setq char (1+ char)))
308 ;; digits:
309 (while (< char ?\072)
310 (aset table char 'integer)
311 (setq char (1+ char)))
312 (while (< char ?\101)
313 (aset table char 'character)
314 (setq char (1+ char)))
315 ;; upper case letters:
316 (while (< char ?\133)
317 (aset table char 'non-terminal)
318 (setq char (1+ char)))
319 (while (< char ?\141)
320 (aset table char 'character)
321 (setq char (1+ char)))
322 ;; lower case letters:
323 (while (< char ?\173)
324 (aset table char 'non-terminal)
325 (setq char (1+ char)))
326 (while (< char ?\177)
327 (aset table char 'character)
328 (setq char (1+ char)))
329 ;; European 8-bit accentuated characters:
330 (setq char ?\240)
331 (while (< char ?\400)
332 (aset table char 'non-terminal)
333 (setq char (1+ char)))
334 ;; Override space characters:
335 (aset table ?\013 'space) ; [VT] vertical tab
336 (aset table ?\n 'space) ; [NL] linefeed
337 (aset table ?\r 'space) ; [CR] carriage return
338 (aset table ?\t 'space) ; [HT] horizontal tab
339 (aset table ?\ 'space) ; [SP] space
340 ;; Override form feed character:
341 (aset table ?\f 'form-feed) ; [FF] form feed
342 ;; Override other lexical characters:
ac4780a1 343 (aset table ?_ 'non-terminal)
984ae001
GM
344 (aset table ?\" 'double-terminal)
345 (aset table ?\' 'single-terminal)
346 (aset table ?\? 'special)
347 (aset table ?* 'repeat)
348 (aset table ?, 'catenate)
349 (aset table ?- 'except)
350 (aset table ?= 'equal)
351 (aset table ?\) 'end-group)
352 table)
353 "Vector used to map characters to a lexical token.")
354
355
356(defun ebnf-iso-initialize ()
357 "Initialize ISO EBNF token table."
358 (if ebnf-iso-alternative-p
359 ;; Override alternative lexical characters:
360 (progn
361 (aset ebnf-iso-token-table ?\( 'left-parenthesis)
362 (aset ebnf-iso-token-table ?\[ 'character)
363 (aset ebnf-iso-token-table ?\] 'character)
364 (aset ebnf-iso-token-table ?\{ 'character)
365 (aset ebnf-iso-token-table ?\} 'character)
366 (aset ebnf-iso-token-table ?| 'character)
367 (aset ebnf-iso-token-table ?\; 'character)
368 (aset ebnf-iso-token-table ?/ 'slash)
369 (aset ebnf-iso-token-table ?! 'alternative)
370 (aset ebnf-iso-token-table ?: 'colon)
371 (aset ebnf-iso-token-table ?. 'period))
372 ;; Override standard lexical characters:
373 (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
374 (aset ebnf-iso-token-table ?\[ 'begin-optional)
375 (aset ebnf-iso-token-table ?\] 'end-optional)
376 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
377 (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
378 (aset ebnf-iso-token-table ?| 'alternative)
379 (aset ebnf-iso-token-table ?\; 'period)
380 (aset ebnf-iso-token-table ?/ 'character)
381 (aset ebnf-iso-token-table ?! 'character)
382 (aset ebnf-iso-token-table ?: 'character)
383 (aset ebnf-iso-token-table ?. 'character)))
384
385
8a1e4eeb
GM
386;; replace the range "\240-\377" (see `ebnf-range-regexp').
387(defconst ebnf-iso-non-terminal-chars
ac4780a1 388 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
8a1e4eeb
GM
389
390
984ae001 391(defun ebnf-iso-lex ()
fc6e6963 392 "Lexical analyzer for ISO EBNF.
984ae001
GM
393
394Return a lexical token.
395
396See documentation for variable `ebnf-iso-lex'."
397 (if (>= (point) ebnf-limit)
398 'end-of-input
399 (let (token)
400 ;; skip spaces and comments
401 (while (if (> (following-char) 255)
402 (progn
403 (setq token 'error)
404 nil)
405 (setq token (aref ebnf-iso-token-table (following-char)))
406 (cond
407 ((eq token 'space)
408 (skip-chars-forward " \013\n\r\t" ebnf-limit)
409 (< (point) ebnf-limit))
410 ((or (eq token 'begin-parenthesis)
411 (eq token 'left-parenthesis))
412 (forward-char)
413 (if (/= (following-char) ?*)
414 ;; no comment
415 nil
416 ;; comment
417 (ebnf-iso-skip-comment)
418 t))
419 ((eq token 'form-feed)
420 (forward-char)
421 (setq ebnf-action 'form-feed))
422 (t nil)
423 )))
424 (cond
425 ;; end of input
426 ((>= (point) ebnf-limit)
427 'end-of-input)
428 ;; error
429 ((eq token 'error)
eac9c0ef 430 (error "Invalid character"))
984ae001
GM
431 ;; integer
432 ((eq token 'integer)
433 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
434 'integer)
435 ;; special: ?special?
436 ((eq token 'special)
ac4780a1 437 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
984ae001 438 (ebnf-string " ->@-~" ?\? "special")
ac4780a1 439 (and ebnf-special-show-delimiter "?")))
984ae001
GM
440 'special)
441 ;; terminal: "string"
442 ((eq token 'double-terminal)
443 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
444 'terminal)
445 ;; terminal: 'string'
446 ((eq token 'single-terminal)
447 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
448 'terminal)
449 ;; non-terminal
450 ((eq token 'non-terminal)
8a1e4eeb
GM
451 (setq ebnf-iso-lex
452 (ebnf-iso-normalize
453 (ebnf-trim-right
454 (ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
984ae001 455 (and ebnf-no-meta-identifier
e8af40ee 456 (error "Exception sequence should not contain a meta identifier"))
984ae001
GM
457 'non-terminal)
458 ;; begin optional, begin list or begin group
459 ((eq token 'left-parenthesis)
460 (forward-char)
461 (cond ((= (following-char) ?/)
462 (forward-char)
463 'begin-optional)
464 ((= (following-char) ?:)
465 (forward-char)
466 'begin-zero-or-more)
467 (t
468 'begin-group)
469 ))
470 ;; end optional or alternative
471 ((eq token 'slash)
472 (forward-char)
473 (if (/= (following-char) ?\))
474 'alternative
475 (forward-char)
476 'end-optional))
477 ;; end list
478 ((eq token 'colon)
479 (forward-char)
480 (if (/= (following-char) ?\))
481 'character
482 (forward-char)
483 'end-zero-or-more))
484 ;; begin group
485 ((eq token 'begin-parenthesis)
486 'begin-group)
487 ;; miscellaneous
488 (t
489 (forward-char)
490 token)
491 ))))
492
493
2197ec3b
GM
494;; replace the range "\177-\237" (see `ebnf-range-regexp').
495(defconst ebnf-iso-comment-chars
496 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
984ae001
GM
497
498
499(defun ebnf-iso-skip-comment ()
500 (forward-char)
501 (cond
502 ;; open EPS file
503 ((and ebnf-eps-executing (= (following-char) ?\[))
504 (ebnf-eps-add-context (ebnf-iso-eps-filename)))
505 ;; close EPS file
506 ((and ebnf-eps-executing (= (following-char) ?\]))
507 (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
508 ;; any other action in comment
509 (t
510 (setq ebnf-action (aref ebnf-comment-table (following-char))))
511 )
512 (let ((pair 1))
513 (while (> pair 0)
514 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
515 (cond ((>= (point) ebnf-limit)
e8af40ee 516 (error "Missing end of comment: `*)'"))
984ae001
GM
517 ((= (following-char) ?*)
518 (skip-chars-forward "*" ebnf-limit)
519 (when (= (following-char) ?\))
520 ;; end of comment
521 (forward-char)
522 (setq pair (1- pair))))
523 ((= (following-char) ?\()
524 (skip-chars-forward "(" ebnf-limit)
525 (when (= (following-char) ?*)
526 ;; beginning of comment
527 (forward-char)
528 (setq pair (1+ pair))))
529 (t
eac9c0ef 530 (error "Invalid character"))
984ae001
GM
531 ))))
532
533
534(defun ebnf-iso-eps-filename ()
535 (forward-char)
536 (buffer-substring-no-properties
537 (point)
538 (let ((chars (concat ebnf-iso-comment-chars "\n"))
539 found)
540 (while (not found)
541 (skip-chars-forward chars ebnf-limit)
542 (setq found
543 (cond ((>= (point) ebnf-limit)
544 (point))
545 ((= (following-char) ?*)
546 (skip-chars-forward "*" ebnf-limit)
547 (if (/= (following-char) ?\))
548 nil
549 (backward-char)
550 (point)))
551 ((= (following-char) ?\()
552 (forward-char)
553 (if (/= (following-char) ?*)
554 nil
555 (backward-char)
556 (point)))
557 (t
558 (point))
559 )))
560 found)))
561
562
563(defun ebnf-iso-normalize (str)
564 (if (not ebnf-iso-normalize-p)
565 str
566 (let ((len (length str))
567 (stri 0)
568 (spaces 0))
569 ;; count exceeding spaces
570 (while (< stri len)
571 (if (/= (aref str stri) ?\ )
572 (setq stri (1+ stri))
573 (setq stri (1+ stri))
574 (while (and (< stri len) (= (aref str stri) ?\ ))
575 (setq stri (1+ stri)
576 spaces (1+ spaces)))))
577 (if (zerop spaces)
578 ;; no exceeding space
579 str
580 ;; at least one exceeding space
581 (let ((new (make-string (- len spaces) ?\ ))
582 (newi 0))
583 ;; eliminate exceeding spaces
584 (setq stri 0)
585 (while (> spaces 0)
586 (if (/= (aref str stri) ?\ )
587 (progn
588 (aset new newi (aref str stri))
589 (setq stri (1+ stri)
590 newi (1+ newi)))
591 (aset new newi (aref str stri))
592 (setq stri (1+ stri)
593 newi (1+ newi))
594 (while (and (> spaces 0) (= (aref str stri) ?\ ))
595 (setq stri (1+ stri)
596 spaces (1- spaces)))))
597 ;; remaining is normalized
598 (while (< stri len)
599 (aset new newi (aref str stri))
600 (setq stri (1+ stri)
601 newi (1+ newi)))
602 new)))))
603
604\f
605;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
606
607
608(provide 'ebnf-iso)
609
610
ab5796a9 611;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
984ae001 612;;; ebnf-iso.el ends here