Commit | Line | Data |
---|---|---|
f3c3dee6 VJL |
1 | ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX) |
2 | ||
3 | ;; Copyright (C) 2004 Free Sofware Foundation, Inc. | |
4 | ||
5 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
6 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
7 | ;; Time-stamp: <2004/03/22 08:53:21 vinicius> | |
8 | ;; Keywords: wp, ebnf, PostScript | |
9 | ;; Version: 1.0 | |
10 | ||
11 | ;; This file is part of GNU Emacs. | |
12 | ||
13 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
14 | ;; it under the terms of the GNU General Public License as published by | |
15 | ;; the Free Software Foundation; either version 2, or (at your option) | |
16 | ;; any later version. | |
17 | ||
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
25 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
26 | ;; Boston, MA 02111-1307, USA. | |
27 | ||
28 | ;;; Commentary: | |
29 | ||
30 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
31 | ;; | |
32 | ;; | |
33 | ;; This is part of ebnf2ps package. | |
34 | ;; | |
35 | ;; This package defines a parser for EBNF used to specify XML (EBNFX). | |
36 | ;; | |
37 | ;; See ebnf2ps.el for documentation. | |
38 | ;; | |
39 | ;; | |
40 | ;; EBNFX Syntax | |
41 | ;; ------------ | |
42 | ;; | |
43 | ;; See the URL: | |
44 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation' | |
45 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) | |
46 | ;; | |
47 | ;; | |
48 | ;; rule ::= symbol '::=' expression | |
49 | ;; /* rules are separated by at least one blank line. */ | |
50 | ;; | |
51 | ;; expression ::= concatenation ('|' concatenation)* | |
52 | ;; | |
53 | ;; concatenation ::= exception* | |
54 | ;; | |
55 | ;; exception ::= term ('-' term)? | |
56 | ;; | |
57 | ;; term ::= factor ('*' | '+' | '?')? | |
58 | ;; | |
59 | ;; factor ::= hex-char+ | |
60 | ;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
61 | ;; | '"' 'string' '"' | |
62 | ;; | "'" "string" "'" | |
63 | ;; | '(' expression ')' | |
64 | ;; | symbol | |
65 | ;; | |
66 | ;; symbol ::= 'upper or lower case letter' | |
67 | ;; ('upper or lower case letter' | '-' | '_')* | |
68 | ;; /* upper and lower 8-bit accentuated characters are included */ | |
69 | ;; | |
70 | ;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
71 | ;; | |
72 | ;; char ::= hex-char | 'any character except control characters' | |
73 | ;; /* 8-bit accentuated characters are included */ | |
74 | ;; | |
75 | ;; any-char ::= char | 'newline' | 'tab' | |
76 | ;; | |
77 | ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']' | |
78 | ;; | |
79 | ;; comment ::= '/*' ( any-char - '*/' ) '*/' | |
80 | ;; | |
81 | ;; | |
82 | ;; Below is the Notation section extracted from the URL cited above. | |
83 | ;; | |
84 | ;; 6 Notation | |
85 | ;; | |
86 | ;; The formal grammar of XML is given in this specification using a simple | |
87 | ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines | |
88 | ;; one symbol, in the form | |
89 | ;; | |
90 | ;; symbol ::= expression | |
91 | ;; | |
92 | ;; Symbols are written with an initial capital letter if they are the start | |
93 | ;; symbol of a regular language, otherwise with an initial lowercase letter. | |
94 | ;; Literal strings are quoted. | |
95 | ;; | |
96 | ;; Within the expression on the right-hand side of a rule, the following | |
97 | ;; expressions are used to match strings of one or more characters: | |
98 | ;; | |
99 | ;; #xN | |
100 | ;; | |
101 | ;; where N is a hexadecimal integer, the expression matches the character | |
102 | ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading | |
103 | ;; zeros in the #xN form is insignificant. | |
104 | ;; | |
105 | ;; [a-zA-Z], [#xN-#xN] | |
106 | ;; | |
107 | ;; matches any Char with a value in the range(s) indicated (inclusive). | |
108 | ;; | |
109 | ;; [abc], [#xN#xN#xN] | |
110 | ;; | |
111 | ;; matches any Char with a value among the characters enumerated. | |
112 | ;; Enumerations and ranges can be mixed in one set of brackets. | |
113 | ;; | |
114 | ;; [^a-z], [^#xN-#xN] | |
115 | ;; | |
116 | ;; matches any Char with a value outside the range indicated. | |
117 | ;; | |
118 | ;; [^abc], [^#xN#xN#xN] | |
119 | ;; | |
120 | ;; matches any Char with a value not among the characters given. | |
121 | ;; Enumerations and ranges of forbidden values can be mixed in one set of | |
122 | ;; brackets. | |
123 | ;; | |
124 | ;; "string" | |
125 | ;; | |
126 | ;; matches a literal string matching that given inside the double quotes. | |
127 | ;; | |
128 | ;; 'string' | |
129 | ;; | |
130 | ;; matches a literal string matching that given inside the single quotes. | |
131 | ;; | |
132 | ;; These symbols may be combined to match more complex patterns as follows, | |
133 | ;; where A and B represent simple expressions: | |
134 | ;; | |
135 | ;; (expression) | |
136 | ;; | |
137 | ;; expression is treated as a unit and may be combined as described in this | |
138 | ;; list. | |
139 | ;; | |
140 | ;; A? | |
141 | ;; | |
142 | ;; matches A or nothing; optional A. | |
143 | ;; | |
144 | ;; A B | |
145 | ;; | |
146 | ;; matches A followed by B. This operator has higher precedence than | |
147 | ;; alternation; thus A B | C D is identical to (A B) | (C D). | |
148 | ;; | |
149 | ;; A | B | |
150 | ;; | |
151 | ;; matches A or B. | |
152 | ;; | |
153 | ;; A - B | |
154 | ;; | |
155 | ;; matches any string that matches A but does not match B. | |
156 | ;; | |
157 | ;; A+ | |
158 | ;; | |
159 | ;; matches one or more occurrences of A. Concatenation has higher | |
160 | ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). | |
161 | ;; | |
162 | ;; A* | |
163 | ;; | |
164 | ;; matches zero or more occurrences of A. Concatenation has higher | |
165 | ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*). | |
166 | ;; | |
167 | ;; Other notations used in the productions are: | |
168 | ;; | |
169 | ;; /* ... */ | |
170 | ;; | |
171 | ;; comment. | |
172 | ;; | |
173 | ;; [ wfc: ... ] | |
174 | ;; | |
175 | ;; well-formedness constraint; this identifies by name a constraint on | |
176 | ;; well-formed documents associated with a production. | |
177 | ;; | |
178 | ;; [ vc: ... ] | |
179 | ;; | |
180 | ;; validity constraint; this identifies by name a constraint on valid | |
181 | ;; documents associated with a production. | |
182 | ;; | |
183 | ;; | |
184 | ;; Differences Between EBNFX And ebnf2ps EBNFX | |
185 | ;; ------------------------------------------- | |
186 | ;; | |
187 | ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the | |
188 | ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated | |
189 | ;; characters (from \240 to \377) for rule name, string and comment. Also | |
190 | ;; rule name can start with upper case letter. | |
191 | ;; | |
192 | ;; | |
193 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
194 | ||
195 | ;;; Code: | |
196 | ||
197 | ||
198 | (require 'ebnf-otz) | |
199 | ||
200 | ||
201 | (defvar ebnf-ebx-lex nil | |
202 | "Value returned by `ebnf-ebx-lex' function.") | |
203 | ||
204 | \f | |
205 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
206 | ;; Syntactic analyzer | |
207 | ||
208 | ||
209 | ;;; rulelist ::= rule+ | |
210 | ||
211 | (defun ebnf-ebx-parser (start) | |
212 | "EBNFX parser." | |
213 | (let ((total (+ (- ebnf-limit start) 1)) | |
214 | (bias (1- start)) | |
215 | (origin (point)) | |
216 | rule-list token rule) | |
217 | (goto-char start) | |
218 | (setq token (ebnf-ebx-lex)) | |
219 | (and (eq token 'end-of-input) | |
220 | (error "Invalid EBNFX file format")) | |
221 | (and (eq token 'end-of-rule) | |
222 | (setq token (ebnf-ebx-lex))) | |
223 | (while (not (eq token 'end-of-input)) | |
224 | (ebnf-message-float | |
225 | "Parsing...%s%%" | |
226 | (/ (* (- (point) bias) 100.0) total)) | |
227 | (setq token (ebnf-ebx-rule token) | |
228 | rule (cdr token) | |
229 | token (car token)) | |
230 | (or (ebnf-add-empty-rule-list rule) | |
231 | (setq rule-list (cons rule rule-list)))) | |
232 | (goto-char origin) | |
233 | rule-list)) | |
234 | ||
235 | ||
236 | ;;; rule ::= symbol '::=' expression | |
237 | ||
238 | ||
239 | (defun ebnf-ebx-rule (token) | |
240 | (let ((name ebnf-ebx-lex) | |
241 | (action ebnf-action) | |
242 | elements) | |
243 | (setq ebnf-action nil) | |
244 | (or (eq token 'non-terminal) | |
245 | (error "Invalid rule name")) | |
246 | (setq token (ebnf-ebx-lex)) | |
247 | (or (eq token 'production) | |
248 | (error "Invalid rule: missing `::='")) | |
249 | (setq elements (ebnf-ebx-expression)) | |
250 | (or (memq (car elements) '(end-of-rule end-of-input)) | |
251 | (error "Invalid rule: there is no end of rule")) | |
252 | (setq elements (cdr elements)) | |
253 | (ebnf-eps-add-production name) | |
254 | (cons (ebnf-ebx-lex) | |
255 | (ebnf-make-production name elements action)))) | |
256 | ||
257 | ||
258 | ;; expression ::= concatenation ('|' concatenation)* | |
259 | ||
260 | ||
261 | (defun ebnf-ebx-expression () | |
262 | (let (body concatenation) | |
263 | (while (eq (car (setq concatenation | |
264 | (ebnf-ebx-concatenation (ebnf-ebx-lex)))) | |
265 | 'alternative) | |
266 | (setq body (cons (cdr concatenation) body))) | |
267 | (ebnf-token-alternative body concatenation))) | |
268 | ||
269 | ||
270 | ;; concatenation ::= exception* | |
271 | ||
272 | ||
273 | (defun ebnf-ebx-concatenation (token) | |
274 | (let ((term (ebnf-ebx-exception token)) | |
275 | seq) | |
276 | (or (setq token (car term) | |
277 | term (cdr term)) | |
278 | (error "Empty element")) | |
279 | (setq seq (cons term seq)) | |
280 | (while (setq term (ebnf-ebx-exception token) | |
281 | token (car term) | |
282 | term (cdr term)) | |
283 | (setq seq (cons term seq))) | |
284 | (cons token | |
285 | (if (= (length seq) 1) | |
286 | ;; sequence with only one element | |
287 | (car seq) | |
288 | ;; a real sequence | |
289 | (ebnf-make-sequence (nreverse seq)))))) | |
290 | ||
291 | ||
292 | ;;; exception ::= term ('-' term)? | |
293 | ||
294 | ||
295 | (defun ebnf-ebx-exception (token) | |
296 | (let ((term (ebnf-ebx-term token))) | |
297 | (if (eq (car term) 'exception) | |
298 | (let ((except (ebnf-ebx-term (ebnf-ebx-lex)))) | |
299 | (cons (car except) | |
300 | (ebnf-make-except (cdr term) (cdr except)))) | |
301 | term))) | |
302 | ||
303 | ||
304 | ||
305 | ;;; term ::= factor ('*' | '+' | '?')? | |
306 | ||
307 | ||
308 | (defun ebnf-ebx-term (token) | |
309 | (let ((factor (ebnf-ebx-factor token))) | |
310 | (when factor | |
311 | (setq token (ebnf-ebx-lex)) | |
312 | (cond ((eq token 'zero-or-more) | |
313 | (setq factor (ebnf-make-zero-or-more factor) | |
314 | token (ebnf-ebx-lex))) | |
315 | ((eq token 'one-or-more) | |
316 | (setq factor (ebnf-make-one-or-more factor) | |
317 | token (ebnf-ebx-lex))) | |
318 | ((eq token 'optional) | |
319 | (setq factor (ebnf-token-optional factor) | |
320 | token (ebnf-ebx-lex))))) | |
321 | (cons token factor))) | |
322 | ||
323 | ||
324 | ;;; factor ::= hex-char+ | |
325 | ;;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
326 | ;;; | '"' 'string' '"' | |
327 | ;;; | "'" "string" "'" | |
328 | ;;; | '(' expression ')' | |
329 | ;;; | symbol | |
330 | ;;; | |
331 | ;;; symbol ::= 'upper or lower case letter' | |
332 | ;;; ('upper or lower case letter' | '-' | '_')* | |
333 | ;;; /* upper and lower 8-bit accentuated characters are included */ | |
334 | ;;; | |
335 | ;;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
336 | ;;; | |
337 | ;;; char ::= hex-char | 'any character except control characters' | |
338 | ;;; /* 8-bit accentuated characters are included */ | |
339 | ;;; | |
340 | ;;; any-char ::= char | 'newline' | 'tab' | |
341 | ||
342 | ||
343 | (defun ebnf-ebx-factor (token) | |
344 | (cond | |
345 | ;; terminal | |
346 | ((eq token 'terminal) | |
347 | (ebnf-make-terminal ebnf-ebx-lex)) | |
348 | ;; non-terminal | |
349 | ((eq token 'non-terminal) | |
350 | (ebnf-make-non-terminal ebnf-ebx-lex)) | |
351 | ;; group | |
352 | ((eq token 'begin-group) | |
353 | (let ((body (ebnf-ebx-expression))) | |
354 | (or (eq (car body) 'end-group) | |
355 | (error "Missing `)'")) | |
356 | (cdr body))) | |
357 | ;; no element | |
358 | (t | |
359 | nil) | |
360 | )) | |
361 | ||
362 | \f | |
363 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
364 | ;; Lexical analyzer | |
365 | ||
366 | ||
367 | (defconst ebnf-ebx-token-table (make-vector 256 'error) | |
368 | "Vector used to map characters to a lexical token.") | |
369 | ||
370 | ||
371 | (defun ebnf-ebx-initialize () | |
372 | "Initialize EBNFX token table." | |
373 | ;; control character & control 8-bit character are set to `error' | |
374 | (let ((char ?\101)) | |
375 | ;; printable character: A-Z | |
376 | (while (< char ?\133) | |
377 | (aset ebnf-ebx-token-table char 'non-terminal) | |
378 | (setq char (1+ char))) | |
379 | ;; printable character: a-z | |
380 | (setq char ?\141) | |
381 | (while (< char ?\173) | |
382 | (aset ebnf-ebx-token-table char 'non-terminal) | |
383 | (setq char (1+ char))) | |
384 | ;; European 8-bit accentuated characters: | |
385 | (setq char ?\240) | |
386 | (while (< char ?\400) | |
387 | (aset ebnf-ebx-token-table char 'non-terminal) | |
388 | (setq char (1+ char))) | |
389 | ;; Override end of line characters: | |
390 | (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed | |
391 | (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return | |
392 | ;; Override space characters: | |
393 | (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab | |
394 | (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab | |
395 | (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space | |
396 | ;; Override form feed character: | |
397 | (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed | |
398 | ;; Override other lexical characters: | |
399 | (aset ebnf-ebx-token-table ?# 'hash) | |
400 | (aset ebnf-ebx-token-table ?\" 'double-quote) | |
401 | (aset ebnf-ebx-token-table ?\' 'single-quote) | |
402 | (aset ebnf-ebx-token-table ?\( 'begin-group) | |
403 | (aset ebnf-ebx-token-table ?\) 'end-group) | |
404 | (aset ebnf-ebx-token-table ?- 'exception) | |
405 | (aset ebnf-ebx-token-table ?: 'colon) | |
406 | (aset ebnf-ebx-token-table ?\[ 'begin-square) | |
407 | (aset ebnf-ebx-token-table ?| 'alternative) | |
408 | (aset ebnf-ebx-token-table ?* 'zero-or-more) | |
409 | (aset ebnf-ebx-token-table ?+ 'one-or-more) | |
410 | (aset ebnf-ebx-token-table ?\? 'optional) | |
411 | ;; Override comment character: | |
412 | (aset ebnf-ebx-token-table ?/ 'comment))) | |
413 | ||
414 | ||
415 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
416 | (defconst ebnf-ebx-non-terminal-chars | |
417 | (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377)) | |
418 | (defconst ebnf-ebx-non-terminal-letter-chars | |
419 | (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) | |
420 | ||
421 | ||
422 | (defun ebnf-ebx-lex () | |
423 | "Lexical analyser for EBNFX. | |
424 | ||
425 | Return a lexical token. | |
426 | ||
427 | See documentation for variable `ebnf-ebx-lex'." | |
428 | (if (>= (point) ebnf-limit) | |
429 | 'end-of-input | |
430 | (let (token) | |
431 | ;; skip spaces and comments | |
432 | (while (if (> (following-char) 255) | |
433 | (progn | |
434 | (setq token 'error) | |
435 | nil) | |
436 | (setq token (aref ebnf-ebx-token-table (following-char))) | |
437 | (cond | |
438 | ((eq token 'space) | |
439 | (skip-chars-forward " \013\t" ebnf-limit) | |
440 | (< (point) ebnf-limit)) | |
441 | ((eq token 'comment) | |
442 | (ebnf-ebx-skip-comment)) | |
443 | ((eq token 'form-feed) | |
444 | (forward-char) | |
445 | (setq ebnf-action 'form-feed)) | |
446 | ((eq token 'end-of-rule) | |
447 | (ebnf-ebx-skip-end-of-rule)) | |
448 | ((and (eq token 'begin-square) | |
449 | (let ((case-fold-search t)) | |
450 | (looking-at "\\[\\(wfc\\|vc\\):"))) | |
451 | (ebnf-ebx-skip-constraint)) | |
452 | (t nil) | |
453 | ))) | |
454 | (cond | |
455 | ;; end of input | |
456 | ((>= (point) ebnf-limit) | |
457 | 'end-of-input) | |
458 | ;; error | |
459 | ((eq token 'error) | |
460 | (error "Illegal character")) | |
461 | ;; end of rule | |
462 | ((eq token 'end-of-rule) | |
463 | 'end-of-rule) | |
464 | ;; terminal: #x [0-9A-Fa-f]+ | |
465 | ((eq token 'hash) | |
466 | (setq ebnf-ebx-lex (ebnf-ebx-character)) | |
467 | 'terminal) | |
468 | ;; terminal: "string" | |
469 | ((eq token 'double-quote) | |
470 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\")) | |
471 | 'terminal) | |
472 | ;; terminal: 'string' | |
473 | ((eq token 'single-quote) | |
474 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\')) | |
475 | 'terminal) | |
476 | ;; terminal: [ ^? ( char ( - char )? )+ ] | |
477 | ((eq token 'begin-square) | |
478 | (setq ebnf-ebx-lex (ebnf-ebx-range)) | |
479 | 'terminal) | |
480 | ;; non-terminal: NAME | |
481 | ((eq token 'non-terminal) | |
482 | (setq ebnf-ebx-lex | |
483 | (ebnf-buffer-substring ebnf-ebx-non-terminal-chars)) | |
484 | 'non-terminal) | |
485 | ;; colon: ::= | |
486 | ((eq token 'colon) | |
487 | (or (looking-at "::=") | |
488 | (error "Missing `::=' token")) | |
489 | (forward-char 3) | |
490 | 'production) | |
491 | ;; miscellaneous: (, ), *, +, ?, |, - | |
492 | (t | |
493 | (forward-char) | |
494 | token) | |
495 | )))) | |
496 | ||
497 | ||
498 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
499 | (defconst ebnf-ebx-constraint-chars | |
500 | (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237)) | |
501 | ||
502 | ||
503 | (defun ebnf-ebx-skip-constraint () | |
504 | (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0) | |
505 | (error "Invalid character")) | |
506 | (or (= (following-char) ?\]) | |
507 | (error "Missing end of constraint `]'")) | |
508 | (forward-char) | |
509 | t) | |
510 | ||
511 | ||
512 | ||
513 | (defun ebnf-ebx-skip-end-of-rule () | |
514 | (let (eor-p) | |
515 | (while (progn | |
516 | ;; end of rule ==> 2 or more consecutive end of lines | |
517 | (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) | |
518 | eor-p)) | |
519 | ;; skip spaces | |
520 | (skip-chars-forward " \013\t" ebnf-limit) | |
521 | ;; skip comments | |
522 | (and (= (following-char) ?/) | |
523 | (ebnf-ebx-skip-comment)))) | |
524 | (not eor-p))) | |
525 | ||
526 | ||
527 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
528 | (defconst ebnf-ebx-comment-chars | |
529 | (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237)) | |
530 | (defconst ebnf-ebx-filename-chars | |
531 | (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237)) | |
532 | ||
533 | ||
534 | (defun ebnf-ebx-skip-comment () | |
535 | (forward-char) | |
536 | (or (= (following-char) ?*) | |
537 | (error "Invalid beginning of comment")) | |
538 | (forward-char) | |
539 | (cond | |
540 | ;; open EPS file | |
541 | ((and ebnf-eps-executing (= (following-char) ?\[)) | |
542 | (ebnf-eps-add-context (ebnf-ebx-eps-filename))) | |
543 | ;; close EPS file | |
544 | ((and ebnf-eps-executing (= (following-char) ?\])) | |
545 | (ebnf-eps-remove-context (ebnf-ebx-eps-filename))) | |
546 | ;; any other action in comment | |
547 | (t | |
548 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) | |
549 | ) | |
550 | (while (progn | |
551 | (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit) | |
552 | (or (= (following-char) ?*) | |
553 | (error "Missing end of comment")) | |
554 | (forward-char) | |
555 | (and (/= (following-char) ?/) | |
556 | (< (point) ebnf-limit)))) | |
557 | ;; check for a valid end of comment | |
558 | (and (>= (point) ebnf-limit) | |
559 | (error "Missing end of comment")) | |
560 | (forward-char) | |
561 | t) | |
562 | ||
563 | ||
564 | (defun ebnf-ebx-eps-filename () | |
565 | (forward-char) | |
566 | (let (fname nchar) | |
567 | (while (progn | |
568 | (setq fname | |
569 | (concat fname | |
570 | (ebnf-buffer-substring ebnf-ebx-filename-chars))) | |
571 | (and (< (point) ebnf-limit) | |
572 | (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0) | |
573 | (< (point) ebnf-limit) | |
574 | (/= (following-char) ?/))) | |
575 | (setq fname (concat fname (make-string nchar ?*)) | |
576 | nchar nil)) | |
577 | (if (or (not nchar) (= nchar 0)) | |
578 | fname | |
579 | (and (< (point) ebnf-limit) | |
580 | (= (following-char) ?/) | |
581 | (setq nchar (1- nchar))) | |
582 | (concat fname (make-string nchar ?*))))) | |
583 | ||
584 | ||
585 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
586 | (defconst ebnf-ebx-double-string-chars | |
587 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) | |
588 | (defconst ebnf-ebx-single-string-chars | |
589 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) | |
590 | ||
591 | ||
592 | (defun ebnf-ebx-string (delim) | |
593 | (buffer-substring-no-properties | |
594 | (progn | |
595 | (forward-char) | |
596 | (point)) | |
597 | (progn | |
598 | (skip-chars-forward (if (= delim ?\") | |
599 | ebnf-ebx-double-string-chars | |
600 | ebnf-ebx-single-string-chars) | |
601 | ebnf-limit) | |
602 | (or (= (following-char) delim) | |
603 | (error "Missing string delimiter `%c'" delim)) | |
604 | (prog1 | |
605 | (point) | |
606 | (forward-char))))) | |
607 | ||
608 | ||
609 | (defun ebnf-ebx-character () | |
610 | ;; #x [0-9A-Fa-f]+ | |
611 | (buffer-substring-no-properties | |
612 | (point) | |
613 | (progn | |
614 | (ebnf-ebx-hex-character) | |
615 | (point)))) | |
616 | ||
617 | ||
618 | (defun ebnf-ebx-range () | |
619 | ;; [ ^? ( char ( - char )? )+ ] | |
620 | (buffer-substring-no-properties | |
621 | (point) | |
622 | (progn | |
623 | (forward-char) | |
624 | (and (= (following-char) ?^) | |
625 | (forward-char)) | |
626 | (and (= (following-char) ?-) | |
627 | (forward-char)) | |
628 | (while (progn | |
629 | (ebnf-ebx-any-character) | |
630 | (when (= (following-char) ?-) | |
631 | (forward-char) | |
632 | (ebnf-ebx-any-character)) | |
633 | (and (/= (following-char) ?\]) | |
634 | (< (point) ebnf-limit)))) | |
635 | (and (>= (point) ebnf-limit) | |
636 | (error "Missing end of character range `]'")) | |
637 | (forward-char) | |
638 | (point)))) | |
639 | ||
640 | ||
641 | (defun ebnf-ebx-any-character () | |
642 | (let ((char (following-char))) | |
643 | (cond ((= char ?#) | |
644 | (ebnf-ebx-hex-character t)) | |
645 | ((or (and (<= ?\ char) (<= char ?\")) ; # | |
646 | (and (<= ?$ char) (<= char ?,)) ; - | |
647 | (and (<= ?. char) (<= char ?\\)) ; ] | |
648 | (and (<= ?^ char) (<= char ?~)) | |
649 | (and (<= ?\240 char) (<= char ?\377))) | |
650 | (forward-char)) | |
651 | (t | |
652 | (error "Invalid character `%c'" char))))) | |
653 | ||
654 | ||
655 | (defun ebnf-ebx-hex-character (&optional no-error) | |
656 | ;; #x [0-9A-Fa-f]+ | |
657 | (forward-char) | |
658 | (if (/= (following-char) ?x) | |
659 | (or no-error | |
660 | (error "Invalid hexadecimal character")) | |
661 | (forward-char) | |
662 | (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0) | |
663 | (error "Invalid hexadecimal character")))) | |
664 | ||
665 | \f | |
666 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
667 | ||
668 | ||
669 | (provide 'ebnf-ebx) | |
670 | ||
671 | ||
672 | ;;; arch-tag: | |
673 | ;;; ebnf-ebx.el ends here |