Commit | Line | Data |
---|---|---|
f3c3dee6 VJL |
1 | ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX) |
2 | ||
4e643dd2 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
d7a0267c | 4 | ;; Free Software Foundation, Inc. |
f3c3dee6 VJL |
5 | |
6 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
7 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> | |
f3c3dee6 | 8 | ;; Keywords: wp, ebnf, PostScript |
3ced5caa | 9 | ;; Version: 1.2 |
f3c3dee6 VJL |
10 | |
11 | ;; This file is part of GNU Emacs. | |
12 | ||
b1fc2b50 | 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
f3c3dee6 | 14 | ;; it under the terms of the GNU General Public License as published by |
b1fc2b50 GM |
15 | ;; the Free Software Foundation, either version 3 of the License, or |
16 | ;; (at your option) any later version. | |
f3c3dee6 VJL |
17 | |
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
b1fc2b50 | 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
f3c3dee6 VJL |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ;; | |
30 | ;; | |
31 | ;; This is part of ebnf2ps package. | |
32 | ;; | |
33 | ;; This package defines a parser for EBNF used to specify XML (EBNFX). | |
34 | ;; | |
35 | ;; See ebnf2ps.el for documentation. | |
36 | ;; | |
37 | ;; | |
38 | ;; EBNFX Syntax | |
39 | ;; ------------ | |
40 | ;; | |
41 | ;; See the URL: | |
42 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation' | |
43 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) | |
44 | ;; | |
45 | ;; | |
46 | ;; rule ::= symbol '::=' expression | |
47 | ;; /* rules are separated by at least one blank line. */ | |
48 | ;; | |
49 | ;; expression ::= concatenation ('|' concatenation)* | |
50 | ;; | |
51 | ;; concatenation ::= exception* | |
52 | ;; | |
53 | ;; exception ::= term ('-' term)? | |
54 | ;; | |
55 | ;; term ::= factor ('*' | '+' | '?')? | |
56 | ;; | |
57 | ;; factor ::= hex-char+ | |
58 | ;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
59 | ;; | '"' 'string' '"' | |
60 | ;; | "'" "string" "'" | |
61 | ;; | '(' expression ')' | |
62 | ;; | symbol | |
63 | ;; | |
64 | ;; symbol ::= 'upper or lower case letter' | |
65 | ;; ('upper or lower case letter' | '-' | '_')* | |
66 | ;; /* upper and lower 8-bit accentuated characters are included */ | |
67 | ;; | |
68 | ;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
69 | ;; | |
70 | ;; char ::= hex-char | 'any character except control characters' | |
71 | ;; /* 8-bit accentuated characters are included */ | |
72 | ;; | |
73 | ;; any-char ::= char | 'newline' | 'tab' | |
74 | ;; | |
75 | ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']' | |
76 | ;; | |
77 | ;; comment ::= '/*' ( any-char - '*/' ) '*/' | |
78 | ;; | |
79 | ;; | |
80 | ;; Below is the Notation section extracted from the URL cited above. | |
81 | ;; | |
82 | ;; 6 Notation | |
83 | ;; | |
84 | ;; The formal grammar of XML is given in this specification using a simple | |
85 | ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines | |
86 | ;; one symbol, in the form | |
87 | ;; | |
88 | ;; symbol ::= expression | |
89 | ;; | |
90 | ;; Symbols are written with an initial capital letter if they are the start | |
91 | ;; symbol of a regular language, otherwise with an initial lowercase letter. | |
92 | ;; Literal strings are quoted. | |
93 | ;; | |
94 | ;; Within the expression on the right-hand side of a rule, the following | |
95 | ;; expressions are used to match strings of one or more characters: | |
96 | ;; | |
97 | ;; #xN | |
98 | ;; | |
99 | ;; where N is a hexadecimal integer, the expression matches the character | |
100 | ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading | |
101 | ;; zeros in the #xN form is insignificant. | |
102 | ;; | |
103 | ;; [a-zA-Z], [#xN-#xN] | |
104 | ;; | |
105 | ;; matches any Char with a value in the range(s) indicated (inclusive). | |
106 | ;; | |
107 | ;; [abc], [#xN#xN#xN] | |
108 | ;; | |
109 | ;; matches any Char with a value among the characters enumerated. | |
110 | ;; Enumerations and ranges can be mixed in one set of brackets. | |
111 | ;; | |
112 | ;; [^a-z], [^#xN-#xN] | |
113 | ;; | |
114 | ;; matches any Char with a value outside the range indicated. | |
115 | ;; | |
116 | ;; [^abc], [^#xN#xN#xN] | |
117 | ;; | |
118 | ;; matches any Char with a value not among the characters given. | |
119 | ;; Enumerations and ranges of forbidden values can be mixed in one set of | |
120 | ;; brackets. | |
121 | ;; | |
122 | ;; "string" | |
123 | ;; | |
124 | ;; matches a literal string matching that given inside the double quotes. | |
125 | ;; | |
126 | ;; 'string' | |
127 | ;; | |
128 | ;; matches a literal string matching that given inside the single quotes. | |
129 | ;; | |
130 | ;; These symbols may be combined to match more complex patterns as follows, | |
131 | ;; where A and B represent simple expressions: | |
132 | ;; | |
133 | ;; (expression) | |
134 | ;; | |
135 | ;; expression is treated as a unit and may be combined as described in this | |
136 | ;; list. | |
137 | ;; | |
138 | ;; A? | |
139 | ;; | |
140 | ;; matches A or nothing; optional A. | |
141 | ;; | |
142 | ;; A B | |
143 | ;; | |
144 | ;; matches A followed by B. This operator has higher precedence than | |
145 | ;; alternation; thus A B | C D is identical to (A B) | (C D). | |
146 | ;; | |
147 | ;; A | B | |
148 | ;; | |
149 | ;; matches A or B. | |
150 | ;; | |
151 | ;; A - B | |
152 | ;; | |
153 | ;; matches any string that matches A but does not match B. | |
154 | ;; | |
155 | ;; A+ | |
156 | ;; | |
157 | ;; matches one or more occurrences of A. Concatenation has higher | |
158 | ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). | |
159 | ;; | |
160 | ;; A* | |
161 | ;; | |
162 | ;; matches zero or more occurrences of A. Concatenation has higher | |
163 | ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*). | |
164 | ;; | |
165 | ;; Other notations used in the productions are: | |
166 | ;; | |
167 | ;; /* ... */ | |
168 | ;; | |
169 | ;; comment. | |
170 | ;; | |
171 | ;; [ wfc: ... ] | |
172 | ;; | |
173 | ;; well-formedness constraint; this identifies by name a constraint on | |
174 | ;; well-formed documents associated with a production. | |
175 | ;; | |
176 | ;; [ vc: ... ] | |
177 | ;; | |
178 | ;; validity constraint; this identifies by name a constraint on valid | |
179 | ;; documents associated with a production. | |
180 | ;; | |
181 | ;; | |
182 | ;; Differences Between EBNFX And ebnf2ps EBNFX | |
183 | ;; ------------------------------------------- | |
184 | ;; | |
185 | ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the | |
186 | ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated | |
187 | ;; characters (from \240 to \377) for rule name, string and comment. Also | |
188 | ;; rule name can start with upper case letter. | |
189 | ;; | |
190 | ;; | |
191 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
192 | ||
193 | ;;; Code: | |
194 | ||
195 | ||
196 | (require 'ebnf-otz) | |
197 | ||
198 | ||
199 | (defvar ebnf-ebx-lex nil | |
200 | "Value returned by `ebnf-ebx-lex' function.") | |
201 | ||
202 | \f | |
203 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
204 | ;; Syntactic analyzer | |
205 | ||
206 | ||
207 | ;;; rulelist ::= rule+ | |
208 | ||
209 | (defun ebnf-ebx-parser (start) | |
210 | "EBNFX parser." | |
211 | (let ((total (+ (- ebnf-limit start) 1)) | |
212 | (bias (1- start)) | |
213 | (origin (point)) | |
214 | rule-list token rule) | |
215 | (goto-char start) | |
216 | (setq token (ebnf-ebx-lex)) | |
217 | (and (eq token 'end-of-input) | |
218 | (error "Invalid EBNFX file format")) | |
219 | (and (eq token 'end-of-rule) | |
220 | (setq token (ebnf-ebx-lex))) | |
221 | (while (not (eq token 'end-of-input)) | |
222 | (ebnf-message-float | |
223 | "Parsing...%s%%" | |
224 | (/ (* (- (point) bias) 100.0) total)) | |
225 | (setq token (ebnf-ebx-rule token) | |
226 | rule (cdr token) | |
227 | token (car token)) | |
228 | (or (ebnf-add-empty-rule-list rule) | |
229 | (setq rule-list (cons rule rule-list)))) | |
230 | (goto-char origin) | |
231 | rule-list)) | |
232 | ||
233 | ||
234 | ;;; rule ::= symbol '::=' expression | |
235 | ||
236 | ||
237 | (defun ebnf-ebx-rule (token) | |
238 | (let ((name ebnf-ebx-lex) | |
239 | (action ebnf-action) | |
240 | elements) | |
241 | (setq ebnf-action nil) | |
242 | (or (eq token 'non-terminal) | |
243 | (error "Invalid rule name")) | |
244 | (setq token (ebnf-ebx-lex)) | |
245 | (or (eq token 'production) | |
246 | (error "Invalid rule: missing `::='")) | |
247 | (setq elements (ebnf-ebx-expression)) | |
248 | (or (memq (car elements) '(end-of-rule end-of-input)) | |
249 | (error "Invalid rule: there is no end of rule")) | |
250 | (setq elements (cdr elements)) | |
251 | (ebnf-eps-add-production name) | |
252 | (cons (ebnf-ebx-lex) | |
253 | (ebnf-make-production name elements action)))) | |
254 | ||
255 | ||
256 | ;; expression ::= concatenation ('|' concatenation)* | |
257 | ||
258 | ||
259 | (defun ebnf-ebx-expression () | |
260 | (let (body concatenation) | |
261 | (while (eq (car (setq concatenation | |
262 | (ebnf-ebx-concatenation (ebnf-ebx-lex)))) | |
263 | 'alternative) | |
264 | (setq body (cons (cdr concatenation) body))) | |
265 | (ebnf-token-alternative body concatenation))) | |
266 | ||
267 | ||
268 | ;; concatenation ::= exception* | |
269 | ||
270 | ||
271 | (defun ebnf-ebx-concatenation (token) | |
272 | (let ((term (ebnf-ebx-exception token)) | |
273 | seq) | |
274 | (or (setq token (car term) | |
275 | term (cdr term)) | |
276 | (error "Empty element")) | |
277 | (setq seq (cons term seq)) | |
278 | (while (setq term (ebnf-ebx-exception token) | |
279 | token (car term) | |
280 | term (cdr term)) | |
281 | (setq seq (cons term seq))) | |
282 | (cons token | |
6411a60a | 283 | (ebnf-token-sequence seq)))) |
f3c3dee6 VJL |
284 | |
285 | ||
286 | ;;; exception ::= term ('-' term)? | |
287 | ||
288 | ||
289 | (defun ebnf-ebx-exception (token) | |
290 | (let ((term (ebnf-ebx-term token))) | |
291 | (if (eq (car term) 'exception) | |
292 | (let ((except (ebnf-ebx-term (ebnf-ebx-lex)))) | |
293 | (cons (car except) | |
294 | (ebnf-make-except (cdr term) (cdr except)))) | |
295 | term))) | |
66bf9274 | 296 | |
f3c3dee6 VJL |
297 | |
298 | ||
299 | ;;; term ::= factor ('*' | '+' | '?')? | |
300 | ||
301 | ||
302 | (defun ebnf-ebx-term (token) | |
303 | (let ((factor (ebnf-ebx-factor token))) | |
304 | (when factor | |
305 | (setq token (ebnf-ebx-lex)) | |
306 | (cond ((eq token 'zero-or-more) | |
307 | (setq factor (ebnf-make-zero-or-more factor) | |
308 | token (ebnf-ebx-lex))) | |
309 | ((eq token 'one-or-more) | |
310 | (setq factor (ebnf-make-one-or-more factor) | |
311 | token (ebnf-ebx-lex))) | |
312 | ((eq token 'optional) | |
313 | (setq factor (ebnf-token-optional factor) | |
314 | token (ebnf-ebx-lex))))) | |
315 | (cons token factor))) | |
316 | ||
317 | ||
318 | ;;; factor ::= hex-char+ | |
319 | ;;; | '[' '^'? ( char ( '-' char )? )+ ']' | |
320 | ;;; | '"' 'string' '"' | |
321 | ;;; | "'" "string" "'" | |
322 | ;;; | '(' expression ')' | |
323 | ;;; | symbol | |
324 | ;;; | |
325 | ;;; symbol ::= 'upper or lower case letter' | |
326 | ;;; ('upper or lower case letter' | '-' | '_')* | |
327 | ;;; /* upper and lower 8-bit accentuated characters are included */ | |
328 | ;;; | |
329 | ;;; hex-char ::= '#x' [0-9A-Fa-f]+ | |
330 | ;;; | |
331 | ;;; char ::= hex-char | 'any character except control characters' | |
332 | ;;; /* 8-bit accentuated characters are included */ | |
333 | ;;; | |
334 | ;;; any-char ::= char | 'newline' | 'tab' | |
335 | ||
336 | ||
337 | (defun ebnf-ebx-factor (token) | |
338 | (cond | |
339 | ;; terminal | |
340 | ((eq token 'terminal) | |
341 | (ebnf-make-terminal ebnf-ebx-lex)) | |
342 | ;; non-terminal | |
343 | ((eq token 'non-terminal) | |
344 | (ebnf-make-non-terminal ebnf-ebx-lex)) | |
345 | ;; group | |
346 | ((eq token 'begin-group) | |
347 | (let ((body (ebnf-ebx-expression))) | |
348 | (or (eq (car body) 'end-group) | |
349 | (error "Missing `)'")) | |
350 | (cdr body))) | |
351 | ;; no element | |
352 | (t | |
353 | nil) | |
354 | )) | |
355 | ||
356 | \f | |
357 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
358 | ;; Lexical analyzer | |
359 | ||
360 | ||
361 | (defconst ebnf-ebx-token-table (make-vector 256 'error) | |
362 | "Vector used to map characters to a lexical token.") | |
363 | ||
364 | ||
365 | (defun ebnf-ebx-initialize () | |
366 | "Initialize EBNFX token table." | |
367 | ;; control character & control 8-bit character are set to `error' | |
368 | (let ((char ?\101)) | |
369 | ;; printable character: A-Z | |
370 | (while (< char ?\133) | |
371 | (aset ebnf-ebx-token-table char 'non-terminal) | |
372 | (setq char (1+ char))) | |
373 | ;; printable character: a-z | |
374 | (setq char ?\141) | |
375 | (while (< char ?\173) | |
376 | (aset ebnf-ebx-token-table char 'non-terminal) | |
377 | (setq char (1+ char))) | |
378 | ;; European 8-bit accentuated characters: | |
379 | (setq char ?\240) | |
380 | (while (< char ?\400) | |
381 | (aset ebnf-ebx-token-table char 'non-terminal) | |
382 | (setq char (1+ char))) | |
383 | ;; Override end of line characters: | |
384 | (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed | |
385 | (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return | |
386 | ;; Override space characters: | |
387 | (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab | |
388 | (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab | |
389 | (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space | |
390 | ;; Override form feed character: | |
391 | (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed | |
392 | ;; Override other lexical characters: | |
393 | (aset ebnf-ebx-token-table ?# 'hash) | |
394 | (aset ebnf-ebx-token-table ?\" 'double-quote) | |
395 | (aset ebnf-ebx-token-table ?\' 'single-quote) | |
396 | (aset ebnf-ebx-token-table ?\( 'begin-group) | |
397 | (aset ebnf-ebx-token-table ?\) 'end-group) | |
398 | (aset ebnf-ebx-token-table ?- 'exception) | |
399 | (aset ebnf-ebx-token-table ?: 'colon) | |
400 | (aset ebnf-ebx-token-table ?\[ 'begin-square) | |
401 | (aset ebnf-ebx-token-table ?| 'alternative) | |
402 | (aset ebnf-ebx-token-table ?* 'zero-or-more) | |
403 | (aset ebnf-ebx-token-table ?+ 'one-or-more) | |
404 | (aset ebnf-ebx-token-table ?\? 'optional) | |
405 | ;; Override comment character: | |
406 | (aset ebnf-ebx-token-table ?/ 'comment))) | |
407 | ||
408 | ||
409 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
410 | (defconst ebnf-ebx-non-terminal-chars | |
411 | (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377)) | |
412 | (defconst ebnf-ebx-non-terminal-letter-chars | |
413 | (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) | |
414 | ||
415 | ||
416 | (defun ebnf-ebx-lex () | |
66bf9274 | 417 | "Lexical analyzer for EBNFX. |
f3c3dee6 VJL |
418 | |
419 | Return a lexical token. | |
420 | ||
421 | See documentation for variable `ebnf-ebx-lex'." | |
422 | (if (>= (point) ebnf-limit) | |
423 | 'end-of-input | |
424 | (let (token) | |
425 | ;; skip spaces and comments | |
426 | (while (if (> (following-char) 255) | |
427 | (progn | |
428 | (setq token 'error) | |
429 | nil) | |
430 | (setq token (aref ebnf-ebx-token-table (following-char))) | |
431 | (cond | |
432 | ((eq token 'space) | |
433 | (skip-chars-forward " \013\t" ebnf-limit) | |
434 | (< (point) ebnf-limit)) | |
435 | ((eq token 'comment) | |
436 | (ebnf-ebx-skip-comment)) | |
437 | ((eq token 'form-feed) | |
438 | (forward-char) | |
439 | (setq ebnf-action 'form-feed)) | |
440 | ((eq token 'end-of-rule) | |
441 | (ebnf-ebx-skip-end-of-rule)) | |
442 | ((and (eq token 'begin-square) | |
443 | (let ((case-fold-search t)) | |
444 | (looking-at "\\[\\(wfc\\|vc\\):"))) | |
445 | (ebnf-ebx-skip-constraint)) | |
446 | (t nil) | |
447 | ))) | |
448 | (cond | |
449 | ;; end of input | |
450 | ((>= (point) ebnf-limit) | |
451 | 'end-of-input) | |
452 | ;; error | |
453 | ((eq token 'error) | |
eac9c0ef | 454 | (error "Invalid character")) |
f3c3dee6 VJL |
455 | ;; end of rule |
456 | ((eq token 'end-of-rule) | |
457 | 'end-of-rule) | |
458 | ;; terminal: #x [0-9A-Fa-f]+ | |
459 | ((eq token 'hash) | |
460 | (setq ebnf-ebx-lex (ebnf-ebx-character)) | |
461 | 'terminal) | |
462 | ;; terminal: "string" | |
463 | ((eq token 'double-quote) | |
464 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\")) | |
465 | 'terminal) | |
466 | ;; terminal: 'string' | |
467 | ((eq token 'single-quote) | |
468 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\')) | |
469 | 'terminal) | |
470 | ;; terminal: [ ^? ( char ( - char )? )+ ] | |
471 | ((eq token 'begin-square) | |
472 | (setq ebnf-ebx-lex (ebnf-ebx-range)) | |
473 | 'terminal) | |
474 | ;; non-terminal: NAME | |
475 | ((eq token 'non-terminal) | |
476 | (setq ebnf-ebx-lex | |
477 | (ebnf-buffer-substring ebnf-ebx-non-terminal-chars)) | |
478 | 'non-terminal) | |
479 | ;; colon: ::= | |
480 | ((eq token 'colon) | |
481 | (or (looking-at "::=") | |
482 | (error "Missing `::=' token")) | |
483 | (forward-char 3) | |
484 | 'production) | |
485 | ;; miscellaneous: (, ), *, +, ?, |, - | |
486 | (t | |
487 | (forward-char) | |
488 | token) | |
489 | )))) | |
490 | ||
491 | ||
492 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
493 | (defconst ebnf-ebx-constraint-chars | |
494 | (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237)) | |
495 | ||
496 | ||
497 | (defun ebnf-ebx-skip-constraint () | |
498 | (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0) | |
499 | (error "Invalid character")) | |
500 | (or (= (following-char) ?\]) | |
501 | (error "Missing end of constraint `]'")) | |
502 | (forward-char) | |
503 | t) | |
66bf9274 | 504 | |
f3c3dee6 VJL |
505 | |
506 | ||
507 | (defun ebnf-ebx-skip-end-of-rule () | |
508 | (let (eor-p) | |
509 | (while (progn | |
510 | ;; end of rule ==> 2 or more consecutive end of lines | |
511 | (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) | |
512 | eor-p)) | |
513 | ;; skip spaces | |
514 | (skip-chars-forward " \013\t" ebnf-limit) | |
515 | ;; skip comments | |
516 | (and (= (following-char) ?/) | |
517 | (ebnf-ebx-skip-comment)))) | |
518 | (not eor-p))) | |
519 | ||
520 | ||
521 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). | |
522 | (defconst ebnf-ebx-comment-chars | |
523 | (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237)) | |
524 | (defconst ebnf-ebx-filename-chars | |
525 | (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237)) | |
526 | ||
527 | ||
528 | (defun ebnf-ebx-skip-comment () | |
529 | (forward-char) | |
530 | (or (= (following-char) ?*) | |
531 | (error "Invalid beginning of comment")) | |
532 | (forward-char) | |
533 | (cond | |
534 | ;; open EPS file | |
535 | ((and ebnf-eps-executing (= (following-char) ?\[)) | |
536 | (ebnf-eps-add-context (ebnf-ebx-eps-filename))) | |
537 | ;; close EPS file | |
538 | ((and ebnf-eps-executing (= (following-char) ?\])) | |
539 | (ebnf-eps-remove-context (ebnf-ebx-eps-filename))) | |
3ced5caa VJL |
540 | ;; EPS header |
541 | ((and ebnf-eps-executing (= (following-char) ?H)) | |
542 | (ebnf-eps-header-comment (ebnf-ebx-eps-filename))) | |
543 | ;; EPS footer | |
544 | ((and ebnf-eps-executing (= (following-char) ?F)) | |
545 | (ebnf-eps-footer-comment (ebnf-ebx-eps-filename))) | |
f3c3dee6 VJL |
546 | ;; any other action in comment |
547 | (t | |
548 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) | |
549 | ) | |
550 | (while (progn | |
551 | (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit) | |
552 | (or (= (following-char) ?*) | |
553 | (error "Missing end of comment")) | |
554 | (forward-char) | |
555 | (and (/= (following-char) ?/) | |
556 | (< (point) ebnf-limit)))) | |
557 | ;; check for a valid end of comment | |
558 | (and (>= (point) ebnf-limit) | |
559 | (error "Missing end of comment")) | |
560 | (forward-char) | |
561 | t) | |
562 | ||
563 | ||
564 | (defun ebnf-ebx-eps-filename () | |
565 | (forward-char) | |
566 | (let (fname nchar) | |
567 | (while (progn | |
568 | (setq fname | |
569 | (concat fname | |
570 | (ebnf-buffer-substring ebnf-ebx-filename-chars))) | |
571 | (and (< (point) ebnf-limit) | |
572 | (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0) | |
573 | (< (point) ebnf-limit) | |
574 | (/= (following-char) ?/))) | |
575 | (setq fname (concat fname (make-string nchar ?*)) | |
576 | nchar nil)) | |
577 | (if (or (not nchar) (= nchar 0)) | |
578 | fname | |
579 | (and (< (point) ebnf-limit) | |
580 | (= (following-char) ?/) | |
581 | (setq nchar (1- nchar))) | |
582 | (concat fname (make-string nchar ?*))))) | |
583 | ||
584 | ||
585 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). | |
586 | (defconst ebnf-ebx-double-string-chars | |
587 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) | |
588 | (defconst ebnf-ebx-single-string-chars | |
589 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) | |
590 | ||
591 | ||
592 | (defun ebnf-ebx-string (delim) | |
593 | (buffer-substring-no-properties | |
594 | (progn | |
595 | (forward-char) | |
596 | (point)) | |
597 | (progn | |
598 | (skip-chars-forward (if (= delim ?\") | |
599 | ebnf-ebx-double-string-chars | |
600 | ebnf-ebx-single-string-chars) | |
601 | ebnf-limit) | |
602 | (or (= (following-char) delim) | |
603 | (error "Missing string delimiter `%c'" delim)) | |
604 | (prog1 | |
605 | (point) | |
606 | (forward-char))))) | |
607 | ||
608 | ||
609 | (defun ebnf-ebx-character () | |
610 | ;; #x [0-9A-Fa-f]+ | |
611 | (buffer-substring-no-properties | |
612 | (point) | |
613 | (progn | |
614 | (ebnf-ebx-hex-character) | |
615 | (point)))) | |
616 | ||
617 | ||
618 | (defun ebnf-ebx-range () | |
619 | ;; [ ^? ( char ( - char )? )+ ] | |
620 | (buffer-substring-no-properties | |
621 | (point) | |
622 | (progn | |
623 | (forward-char) | |
624 | (and (= (following-char) ?^) | |
625 | (forward-char)) | |
626 | (and (= (following-char) ?-) | |
627 | (forward-char)) | |
628 | (while (progn | |
629 | (ebnf-ebx-any-character) | |
630 | (when (= (following-char) ?-) | |
631 | (forward-char) | |
632 | (ebnf-ebx-any-character)) | |
633 | (and (/= (following-char) ?\]) | |
634 | (< (point) ebnf-limit)))) | |
635 | (and (>= (point) ebnf-limit) | |
636 | (error "Missing end of character range `]'")) | |
637 | (forward-char) | |
638 | (point)))) | |
639 | ||
640 | ||
641 | (defun ebnf-ebx-any-character () | |
642 | (let ((char (following-char))) | |
643 | (cond ((= char ?#) | |
644 | (ebnf-ebx-hex-character t)) | |
645 | ((or (and (<= ?\ char) (<= char ?\")) ; # | |
646 | (and (<= ?$ char) (<= char ?,)) ; - | |
647 | (and (<= ?. char) (<= char ?\\)) ; ] | |
648 | (and (<= ?^ char) (<= char ?~)) | |
649 | (and (<= ?\240 char) (<= char ?\377))) | |
650 | (forward-char)) | |
651 | (t | |
652 | (error "Invalid character `%c'" char))))) | |
653 | ||
654 | ||
655 | (defun ebnf-ebx-hex-character (&optional no-error) | |
656 | ;; #x [0-9A-Fa-f]+ | |
657 | (forward-char) | |
658 | (if (/= (following-char) ?x) | |
659 | (or no-error | |
660 | (error "Invalid hexadecimal character")) | |
661 | (forward-char) | |
662 | (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0) | |
663 | (error "Invalid hexadecimal character")))) | |
664 | ||
665 | \f | |
666 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
667 | ||
668 | ||
669 | (provide 'ebnf-ebx) | |
670 | ||
cbee283d | 671 | ;; arch-tag: bfe2f95b-66bc-4dc6-8b7e-b7831e68f5fb |
f3c3dee6 | 672 | ;;; ebnf-ebx.el ends here |