Commit | Line | Data |
---|---|---|
f3c3dee6 VJL |
1 | ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX) |
2 | ||
73b0cd50 | 3 | ;; Copyright (C) 2001-2011 |
d7a0267c | 4 | ;; Free Software Foundation, Inc. |
f3c3dee6 VJL |
5 | |
6 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
7 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
f3c3dee6 | 8 | ;; Keywords: wp, ebnf, PostScript |
3ced5caa | 9 | ;; Version: 1.2 |
bd78fa1d | 10 | ;; Package: ebnf2ps |
f3c3dee6 VJL |
11 | |
12 | ;; This file is part of GNU Emacs. | |
13 | ||
b1fc2b50 | 14 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
f3c3dee6 | 15 | ;; it under the terms of the GNU General Public License as published by |
b1fc2b50 GM |
16 | ;; the Free Software Foundation, either version 3 of the License, or |
17 | ;; (at your option) any later version. | |
f3c3dee6 VJL |
18 | |
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
b1fc2b50 | 25 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
f3c3dee6 VJL |
26 | |
27 | ;;; Commentary: | |
28 | ||
29 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
30 | ;; | |
31 | ;; | |
32 | ;; This is part of ebnf2ps package. | |
33 | ;; | |
34 | ;; This package defines a parser for EBNF used to specify XML (EBNFX). | |
35 | ;; | |
36 | ;; See ebnf2ps.el for documentation. | |
37 | ;; | |
38 | ;; | |
39 | ;; EBNFX Syntax | |
40 | ;; ------------ | |
41 | ;; | |
42 | ;; See the URL: | |
43 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation' | |
44 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) | |
45 | ;; | |
46 | ;; | |
47 | ;; rule ::= symbol '::=' expression | |
48 | ;; /* rules are separated by at least one blank line. */ | |
49 | ;; | |
50 | ;; expression ::= concatenation ('|' concatenation)* | |
51 | ;; | |
52 | ;; concatenation ::= exception* | |
53 | ;; | |
54 | ;; exception ::= term ('-' term)? | |
55 | ;; | |
56 | ;; term ::= factor ('*' | '+' | '?')? | |
57 | ;; | |
58 | ;; factor ::= hex-char+ | |
59 | ;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
60 | ;; | '"' 'string' '"' | |
61 | ;; | "'" "string" "'" | |
62 | ;; | '(' expression ')' | |
63 | ;; | symbol | |
64 | ;; | |
65 | ;; symbol ::= 'upper or lower case letter' | |
66 | ;; ('upper or lower case letter' | '-' | '_')* | |
67 | ;; /* upper and lower 8-bit accentuated characters are included */ | |
68 | ;; | |
69 | ;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
70 | ;; | |
71 | ;; char ::= hex-char | 'any character except control characters' | |
72 | ;; /* 8-bit accentuated characters are included */ | |
73 | ;; | |
74 | ;; any-char ::= char | 'newline' | 'tab' | |
75 | ;; | |
76 | ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']' | |
77 | ;; | |
78 | ;; comment ::= '/*' ( any-char - '*/' ) '*/' | |
79 | ;; | |
80 | ;; | |
81 | ;; Below is the Notation section extracted from the URL cited above. | |
82 | ;; | |
83 | ;; 6 Notation | |
84 | ;; | |
85 | ;; The formal grammar of XML is given in this specification using a simple | |
86 | ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines | |
87 | ;; one symbol, in the form | |
88 | ;; | |
89 | ;; symbol ::= expression | |
90 | ;; | |
91 | ;; Symbols are written with an initial capital letter if they are the start | |
92 | ;; symbol of a regular language, otherwise with an initial lowercase letter. | |
93 | ;; Literal strings are quoted. | |
94 | ;; | |
95 | ;; Within the expression on the right-hand side of a rule, the following | |
96 | ;; expressions are used to match strings of one or more characters: | |
97 | ;; | |
98 | ;; #xN | |
99 | ;; | |
100 | ;; where N is a hexadecimal integer, the expression matches the character | |
101 | ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading | |
102 | ;; zeros in the #xN form is insignificant. | |
103 | ;; | |
104 | ;; [a-zA-Z], [#xN-#xN] | |
105 | ;; | |
106 | ;; matches any Char with a value in the range(s) indicated (inclusive). | |
107 | ;; | |
108 | ;; [abc], [#xN#xN#xN] | |
109 | ;; | |
110 | ;; matches any Char with a value among the characters enumerated. | |
111 | ;; Enumerations and ranges can be mixed in one set of brackets. | |
112 | ;; | |
113 | ;; [^a-z], [^#xN-#xN] | |
114 | ;; | |
115 | ;; matches any Char with a value outside the range indicated. | |
116 | ;; | |
117 | ;; [^abc], [^#xN#xN#xN] | |
118 | ;; | |
119 | ;; matches any Char with a value not among the characters given. | |
120 | ;; Enumerations and ranges of forbidden values can be mixed in one set of | |
121 | ;; brackets. | |
122 | ;; | |
123 | ;; "string" | |
124 | ;; | |
125 | ;; matches a literal string matching that given inside the double quotes. | |
126 | ;; | |
127 | ;; 'string' | |
128 | ;; | |
129 | ;; matches a literal string matching that given inside the single quotes. | |
130 | ;; | |
131 | ;; These symbols may be combined to match more complex patterns as follows, | |
132 | ;; where A and B represent simple expressions: | |
133 | ;; | |
134 | ;; (expression) | |
135 | ;; | |
136 | ;; expression is treated as a unit and may be combined as described in this | |
137 | ;; list. | |
138 | ;; | |
139 | ;; A? | |
140 | ;; | |
141 | ;; matches A or nothing; optional A. | |
142 | ;; | |
143 | ;; A B | |
144 | ;; | |
145 | ;; matches A followed by B. This operator has higher precedence than | |
146 | ;; alternation; thus A B | C D is identical to (A B) | (C D). | |
147 | ;; | |
148 | ;; A | B | |
149 | ;; | |
150 | ;; matches A or B. | |
151 | ;; | |
152 | ;; A - B | |
153 | ;; | |
154 | ;; matches any string that matches A but does not match B. | |
155 | ;; | |
156 | ;; A+ | |
157 | ;; | |
158 | ;; matches one or more occurrences of A. Concatenation has higher | |
159 | ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). | |
160 | ;; | |
161 | ;; A* | |
162 | ;; | |
163 | ;; matches zero or more occurrences of A. Concatenation has higher | |
164 | ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*). | |
165 | ;; | |
166 | ;; Other notations used in the productions are: | |
167 | ;; | |
168 | ;; /* ... */ | |
169 | ;; | |
170 | ;; comment. | |
171 | ;; | |
172 | ;; [ wfc: ... ] | |
173 | ;; | |
174 | ;; well-formedness constraint; this identifies by name a constraint on | |
175 | ;; well-formed documents associated with a production. | |
176 | ;; | |
177 | ;; [ vc: ... ] | |
178 | ;; | |
179 | ;; validity constraint; this identifies by name a constraint on valid | |
180 | ;; documents associated with a production. | |
181 | ;; | |
182 | ;; | |
183 | ;; Differences Between EBNFX And ebnf2ps EBNFX | |
184 | ;; ------------------------------------------- | |
185 | ;; | |
186 | ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the | |
187 | ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated | |
188 | ;; characters (from \240 to \377) for rule name, string and comment. Also | |
189 | ;; rule name can start with upper case letter. | |
190 | ;; | |
191 | ;; | |
192 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
193 | ||
194 | ;;; Code: | |
195 | ||
196 | ||
197 | (require 'ebnf-otz) | |
198 | ||
199 | ||
200 | (defvar ebnf-ebx-lex nil | |
201 | "Value returned by `ebnf-ebx-lex' function.") | |
202 | ||
203 | \f | |
204 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
205 | ;; Syntactic analyzer | |
206 | ||
207 | ||
208 | ;;; rulelist ::= rule+ | |
209 | ||
210 | (defun ebnf-ebx-parser (start) | |
211 | "EBNFX parser." | |
212 | (let ((total (+ (- ebnf-limit start) 1)) | |
213 | (bias (1- start)) | |
214 | (origin (point)) | |
215 | rule-list token rule) | |
216 | (goto-char start) | |
217 | (setq token (ebnf-ebx-lex)) | |
218 | (and (eq token 'end-of-input) | |
219 | (error "Invalid EBNFX file format")) | |
220 | (and (eq token 'end-of-rule) | |
221 | (setq token (ebnf-ebx-lex))) | |
222 | (while (not (eq token 'end-of-input)) | |
223 | (ebnf-message-float | |
224 | "Parsing...%s%%" | |
225 | (/ (* (- (point) bias) 100.0) total)) | |
226 | (setq token (ebnf-ebx-rule token) | |
227 | rule (cdr token) | |
228 | token (car token)) | |
229 | (or (ebnf-add-empty-rule-list rule) | |
230 | (setq rule-list (cons rule rule-list)))) | |
231 | (goto-char origin) | |
232 | rule-list)) | |
233 | ||
234 | ||
235 | ;;; rule ::= symbol '::=' expression | |
236 | ||
237 | ||
238 | (defun ebnf-ebx-rule (token) | |
239 | (let ((name ebnf-ebx-lex) | |
240 | (action ebnf-action) | |
241 | elements) | |
242 | (setq ebnf-action nil) | |
243 | (or (eq token 'non-terminal) | |
244 | (error "Invalid rule name")) | |
245 | (setq token (ebnf-ebx-lex)) | |
246 | (or (eq token 'production) | |
247 | (error "Invalid rule: missing `::='")) | |
248 | (setq elements (ebnf-ebx-expression)) | |
249 | (or (memq (car elements) '(end-of-rule end-of-input)) | |
250 | (error "Invalid rule: there is no end of rule")) | |
251 | (setq elements (cdr elements)) | |
252 | (ebnf-eps-add-production name) | |
253 | (cons (ebnf-ebx-lex) | |
254 | (ebnf-make-production name elements action)))) | |
255 | ||
256 | ||
257 | ;; expression ::= concatenation ('|' concatenation)* | |
258 | ||
259 | ||
260 | (defun ebnf-ebx-expression () | |
261 | (let (body concatenation) | |
262 | (while (eq (car (setq concatenation | |
263 | (ebnf-ebx-concatenation (ebnf-ebx-lex)))) | |
264 | 'alternative) | |
265 | (setq body (cons (cdr concatenation) body))) | |
266 | (ebnf-token-alternative body concatenation))) | |
267 | ||
268 | ||
269 | ;; concatenation ::= exception* | |
270 | ||
271 | ||
272 | (defun ebnf-ebx-concatenation (token) | |
273 | (let ((term (ebnf-ebx-exception token)) | |
274 | seq) | |
275 | (or (setq token (car term) | |
276 | term (cdr term)) | |
277 | (error "Empty element")) | |
278 | (setq seq (cons term seq)) | |
279 | (while (setq term (ebnf-ebx-exception token) | |
280 | token (car term) | |
281 | term (cdr term)) | |
282 | (setq seq (cons term seq))) | |
283 | (cons token | |
6411a60a | 284 | (ebnf-token-sequence seq)))) |
f3c3dee6 VJL |
285 | |
286 | ||
287 | ;;; exception ::= term ('-' term)? | |
288 | ||
289 | ||
290 | (defun ebnf-ebx-exception (token) | |
291 | (let ((term (ebnf-ebx-term token))) | |
292 | (if (eq (car term) 'exception) | |
293 | (let ((except (ebnf-ebx-term (ebnf-ebx-lex)))) | |
294 | (cons (car except) | |
295 | (ebnf-make-except (cdr term) (cdr except)))) | |
296 | term))) | |
66bf9274 | 297 | |
f3c3dee6 VJL |
298 | |
299 | ||
300 | ;;; term ::= factor ('*' | '+' | '?')? | |
301 | ||
302 | ||
303 | (defun ebnf-ebx-term (token) | |
304 | (let ((factor (ebnf-ebx-factor token))) | |
305 | (when factor | |
306 | (setq token (ebnf-ebx-lex)) | |
307 | (cond ((eq token 'zero-or-more) | |
308 | (setq factor (ebnf-make-zero-or-more factor) | |
309 | token (ebnf-ebx-lex))) | |
310 | ((eq token 'one-or-more) | |
311 | (setq factor (ebnf-make-one-or-more factor) | |
312 | token (ebnf-ebx-lex))) | |
313 | ((eq token 'optional) | |
314 | (setq factor (ebnf-token-optional factor) | |
315 | token (ebnf-ebx-lex))))) | |
316 | (cons token factor))) | |
317 | ||
318 | ||
319 | ;;; factor ::= hex-char+ | |
320 | ;;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
321 | ;;; | '"' 'string' '"' | |
322 | ;;; | "'" "string" "'" | |
323 | ;;; | '(' expression ')' | |
324 | ;;; | symbol | |
325 | ;;; | |
326 | ;;; symbol ::= 'upper or lower case letter' | |
327 | ;;; ('upper or lower case letter' | '-' | '_')* | |
328 | ;;; /* upper and lower 8-bit accentuated characters are included */ | |
329 | ;;; | |
330 | ;;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
331 | ;;; | |
332 | ;;; char ::= hex-char | 'any character except control characters' | |
333 | ;;; /* 8-bit accentuated characters are included */ | |
334 | ;;; | |
335 | ;;; any-char ::= char | 'newline' | 'tab' | |
336 | ||
337 | ||
338 | (defun ebnf-ebx-factor (token) | |
339 | (cond | |
340 | ;; terminal | |
341 | ((eq token 'terminal) | |
342 | (ebnf-make-terminal ebnf-ebx-lex)) | |
343 | ;; non-terminal | |
344 | ((eq token 'non-terminal) | |
345 | (ebnf-make-non-terminal ebnf-ebx-lex)) | |
346 | ;; group | |
347 | ((eq token 'begin-group) | |
348 | (let ((body (ebnf-ebx-expression))) | |
349 | (or (eq (car body) 'end-group) | |
350 | (error "Missing `)'")) | |
351 | (cdr body))) | |
352 | ;; no element | |
353 | (t | |
354 | nil) | |
355 | )) | |
356 | ||
357 | \f | |
358 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
359 | ;; Lexical analyzer | |
360 | ||
361 | ||
362 | (defconst ebnf-ebx-token-table (make-vector 256 'error) | |
363 | "Vector used to map characters to a lexical token.") | |
364 | ||
365 | ||
366 | (defun ebnf-ebx-initialize () | |
367 | "Initialize EBNFX token table." | |
368 | ;; control character & control 8-bit character are set to `error' | |
369 | (let ((char ?\101)) | |
370 | ;; printable character: A-Z | |
371 | (while (< char ?\133) | |
372 | (aset ebnf-ebx-token-table char 'non-terminal) | |
373 | (setq char (1+ char))) | |
374 | ;; printable character: a-z | |
375 | (setq char ?\141) | |
376 | (while (< char ?\173) | |
377 | (aset ebnf-ebx-token-table char 'non-terminal) | |
378 | (setq char (1+ char))) | |
379 | ;; European 8-bit accentuated characters: | |
380 | (setq char ?\240) | |
381 | (while (< char ?\400) | |
382 | (aset ebnf-ebx-token-table char 'non-terminal) | |
383 | (setq char (1+ char))) | |
384 | ;; Override end of line characters: | |
385 | (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed | |
386 | (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return | |
387 | ;; Override space characters: | |
388 | (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab | |
389 | (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab | |
390 | (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space | |
391 | ;; Override form feed character: | |
392 | (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed | |
393 | ;; Override other lexical characters: | |
394 | (aset ebnf-ebx-token-table ?# 'hash) | |
395 | (aset ebnf-ebx-token-table ?\" 'double-quote) | |
396 | (aset ebnf-ebx-token-table ?\' 'single-quote) | |
397 | (aset ebnf-ebx-token-table ?\( 'begin-group) | |
398 | (aset ebnf-ebx-token-table ?\) 'end-group) | |
399 | (aset ebnf-ebx-token-table ?- 'exception) | |
400 | (aset ebnf-ebx-token-table ?: 'colon) | |
401 | (aset ebnf-ebx-token-table ?\[ 'begin-square) | |
402 | (aset ebnf-ebx-token-table ?| 'alternative) | |
403 | (aset ebnf-ebx-token-table ?* 'zero-or-more) | |
404 | (aset ebnf-ebx-token-table ?+ 'one-or-more) | |
405 | (aset ebnf-ebx-token-table ?\? 'optional) | |
406 | ;; Override comment character: | |
407 | (aset ebnf-ebx-token-table ?/ 'comment))) | |
408 | ||
409 | ||
410 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
411 | (defconst ebnf-ebx-non-terminal-chars | |
412 | (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377)) | |
413 | (defconst ebnf-ebx-non-terminal-letter-chars | |
414 | (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) | |
415 | ||
416 | ||
417 | (defun ebnf-ebx-lex () | |
66bf9274 | 418 | "Lexical analyzer for EBNFX. |
f3c3dee6 VJL |
419 | |
420 | Return a lexical token. | |
421 | ||
422 | See documentation for variable `ebnf-ebx-lex'." | |
423 | (if (>= (point) ebnf-limit) | |
424 | 'end-of-input | |
425 | (let (token) | |
426 | ;; skip spaces and comments | |
427 | (while (if (> (following-char) 255) | |
428 | (progn | |
429 | (setq token 'error) | |
430 | nil) | |
431 | (setq token (aref ebnf-ebx-token-table (following-char))) | |
432 | (cond | |
433 | ((eq token 'space) | |
434 | (skip-chars-forward " \013\t" ebnf-limit) | |
435 | (< (point) ebnf-limit)) | |
436 | ((eq token 'comment) | |
437 | (ebnf-ebx-skip-comment)) | |
438 | ((eq token 'form-feed) | |
439 | (forward-char) | |
440 | (setq ebnf-action 'form-feed)) | |
441 | ((eq token 'end-of-rule) | |
442 | (ebnf-ebx-skip-end-of-rule)) | |
443 | ((and (eq token 'begin-square) | |
444 | (let ((case-fold-search t)) | |
445 | (looking-at "\\[\\(wfc\\|vc\\):"))) | |
446 | (ebnf-ebx-skip-constraint)) | |
447 | (t nil) | |
448 | ))) | |
449 | (cond | |
450 | ;; end of input | |
451 | ((>= (point) ebnf-limit) | |
452 | 'end-of-input) | |
453 | ;; error | |
454 | ((eq token 'error) | |
eac9c0ef | 455 | (error "Invalid character")) |
f3c3dee6 VJL |
456 | ;; end of rule |
457 | ((eq token 'end-of-rule) | |
458 | 'end-of-rule) | |
459 | ;; terminal: #x [0-9A-Fa-f]+ | |
460 | ((eq token 'hash) | |
461 | (setq ebnf-ebx-lex (ebnf-ebx-character)) | |
462 | 'terminal) | |
463 | ;; terminal: "string" | |
464 | ((eq token 'double-quote) | |
465 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\")) | |
466 | 'terminal) | |
467 | ;; terminal: 'string' | |
468 | ((eq token 'single-quote) | |
469 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\')) | |
470 | 'terminal) | |
471 | ;; terminal: [ ^? ( char ( - char )? )+ ] | |
472 | ((eq token 'begin-square) | |
473 | (setq ebnf-ebx-lex (ebnf-ebx-range)) | |
474 | 'terminal) | |
475 | ;; non-terminal: NAME | |
476 | ((eq token 'non-terminal) | |
477 | (setq ebnf-ebx-lex | |
478 | (ebnf-buffer-substring ebnf-ebx-non-terminal-chars)) | |
479 | 'non-terminal) | |
480 | ;; colon: ::= | |
481 | ((eq token 'colon) | |
482 | (or (looking-at "::=") | |
483 | (error "Missing `::=' token")) | |
484 | (forward-char 3) | |
485 | 'production) | |
486 | ;; miscellaneous: (, ), *, +, ?, |, - | |
487 | (t | |
488 | (forward-char) | |
489 | token) | |
490 | )))) | |
491 | ||
492 | ||
493 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
494 | (defconst ebnf-ebx-constraint-chars | |
495 | (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237)) | |
496 | ||
497 | ||
498 | (defun ebnf-ebx-skip-constraint () | |
499 | (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0) | |
500 | (error "Invalid character")) | |
501 | (or (= (following-char) ?\]) | |
502 | (error "Missing end of constraint `]'")) | |
503 | (forward-char) | |
504 | t) | |
66bf9274 | 505 | |
f3c3dee6 VJL |
506 | |
507 | ||
508 | (defun ebnf-ebx-skip-end-of-rule () | |
509 | (let (eor-p) | |
510 | (while (progn | |
511 | ;; end of rule ==> 2 or more consecutive end of lines | |
512 | (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) | |
513 | eor-p)) | |
514 | ;; skip spaces | |
515 | (skip-chars-forward " \013\t" ebnf-limit) | |
516 | ;; skip comments | |
517 | (and (= (following-char) ?/) | |
518 | (ebnf-ebx-skip-comment)))) | |
519 | (not eor-p))) | |
520 | ||
521 | ||
522 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
523 | (defconst ebnf-ebx-comment-chars | |
524 | (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237)) | |
525 | (defconst ebnf-ebx-filename-chars | |
526 | (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237)) | |
527 | ||
528 | ||
529 | (defun ebnf-ebx-skip-comment () | |
530 | (forward-char) | |
531 | (or (= (following-char) ?*) | |
532 | (error "Invalid beginning of comment")) | |
533 | (forward-char) | |
534 | (cond | |
535 | ;; open EPS file | |
536 | ((and ebnf-eps-executing (= (following-char) ?\[)) | |
537 | (ebnf-eps-add-context (ebnf-ebx-eps-filename))) | |
538 | ;; close EPS file | |
539 | ((and ebnf-eps-executing (= (following-char) ?\])) | |
540 | (ebnf-eps-remove-context (ebnf-ebx-eps-filename))) | |
3ced5caa VJL |
541 | ;; EPS header |
542 | ((and ebnf-eps-executing (= (following-char) ?H)) | |
543 | (ebnf-eps-header-comment (ebnf-ebx-eps-filename))) | |
544 | ;; EPS footer | |
545 | ((and ebnf-eps-executing (= (following-char) ?F)) | |
546 | (ebnf-eps-footer-comment (ebnf-ebx-eps-filename))) | |
f3c3dee6 VJL |
547 | ;; any other action in comment |
548 | (t | |
549 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) | |
550 | ) | |
551 | (while (progn | |
552 | (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit) | |
553 | (or (= (following-char) ?*) | |
554 | (error "Missing end of comment")) | |
555 | (forward-char) | |
556 | (and (/= (following-char) ?/) | |
557 | (< (point) ebnf-limit)))) | |
558 | ;; check for a valid end of comment | |
559 | (and (>= (point) ebnf-limit) | |
560 | (error "Missing end of comment")) | |
561 | (forward-char) | |
562 | t) | |
563 | ||
564 | ||
565 | (defun ebnf-ebx-eps-filename () | |
566 | (forward-char) | |
567 | (let (fname nchar) | |
568 | (while (progn | |
569 | (setq fname | |
570 | (concat fname | |
571 | (ebnf-buffer-substring ebnf-ebx-filename-chars))) | |
572 | (and (< (point) ebnf-limit) | |
573 | (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0) | |
574 | (< (point) ebnf-limit) | |
575 | (/= (following-char) ?/))) | |
576 | (setq fname (concat fname (make-string nchar ?*)) | |
577 | nchar nil)) | |
578 | (if (or (not nchar) (= nchar 0)) | |
579 | fname | |
580 | (and (< (point) ebnf-limit) | |
581 | (= (following-char) ?/) | |
582 | (setq nchar (1- nchar))) | |
583 | (concat fname (make-string nchar ?*))))) | |
584 | ||
585 | ||
586 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
587 | (defconst ebnf-ebx-double-string-chars | |
588 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) | |
589 | (defconst ebnf-ebx-single-string-chars | |
590 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) | |
591 | ||
592 | ||
593 | (defun ebnf-ebx-string (delim) | |
594 | (buffer-substring-no-properties | |
595 | (progn | |
596 | (forward-char) | |
597 | (point)) | |
598 | (progn | |
599 | (skip-chars-forward (if (= delim ?\") | |
600 | ebnf-ebx-double-string-chars | |
601 | ebnf-ebx-single-string-chars) | |
602 | ebnf-limit) | |
603 | (or (= (following-char) delim) | |
604 | (error "Missing string delimiter `%c'" delim)) | |
605 | (prog1 | |
606 | (point) | |
607 | (forward-char))))) | |
608 | ||
609 | ||
610 | (defun ebnf-ebx-character () | |
611 | ;; #x [0-9A-Fa-f]+ | |
612 | (buffer-substring-no-properties | |
613 | (point) | |
614 | (progn | |
615 | (ebnf-ebx-hex-character) | |
616 | (point)))) | |
617 | ||
618 | ||
619 | (defun ebnf-ebx-range () | |
620 | ;; [ ^? ( char ( - char )? )+ ] | |
621 | (buffer-substring-no-properties | |
622 | (point) | |
623 | (progn | |
624 | (forward-char) | |
625 | (and (= (following-char) ?^) | |
626 | (forward-char)) | |
627 | (and (= (following-char) ?-) | |
628 | (forward-char)) | |
629 | (while (progn | |
630 | (ebnf-ebx-any-character) | |
631 | (when (= (following-char) ?-) | |
632 | (forward-char) | |
633 | (ebnf-ebx-any-character)) | |
634 | (and (/= (following-char) ?\]) | |
635 | (< (point) ebnf-limit)))) | |
636 | (and (>= (point) ebnf-limit) | |
637 | (error "Missing end of character range `]'")) | |
638 | (forward-char) | |
639 | (point)))) | |
640 | ||
641 | ||
642 | (defun ebnf-ebx-any-character () | |
643 | (let ((char (following-char))) | |
644 | (cond ((= char ?#) | |
645 | (ebnf-ebx-hex-character t)) | |
646 | ((or (and (<= ?\ char) (<= char ?\")) ; # | |
647 | (and (<= ?$ char) (<= char ?,)) ; - | |
648 | (and (<= ?. char) (<= char ?\\)) ; ] | |
649 | (and (<= ?^ char) (<= char ?~)) | |
650 | (and (<= ?\240 char) (<= char ?\377))) | |
651 | (forward-char)) | |
652 | (t | |
653 | (error "Invalid character `%c'" char))))) | |
654 | ||
655 | ||
656 | (defun ebnf-ebx-hex-character (&optional no-error) | |
657 | ;; #x [0-9A-Fa-f]+ | |
658 | (forward-char) | |
659 | (if (/= (following-char) ?x) | |
660 | (or no-error | |
661 | (error "Invalid hexadecimal character")) | |
662 | (forward-char) | |
663 | (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0) | |
664 | (error "Invalid hexadecimal character")))) | |
665 | ||
666 | \f | |
667 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
668 | ||
669 | ||
670 | (provide 'ebnf-ebx) | |
671 | ||
f3c3dee6 | 672 | ;;; ebnf-ebx.el ends here |