| 1 | ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX) |
| 2 | |
| 3 | ;; Copyright (C) 2001-2013 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> |
| 6 | ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> |
| 7 | ;; Keywords: wp, ebnf, PostScript |
| 8 | ;; Version: 1.2 |
| 9 | ;; Package: ebnf2ps |
| 10 | |
| 11 | ;; This file is part of GNU Emacs. |
| 12 | |
| 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 14 | ;; it under the terms of the GNU General Public License as published by |
| 15 | ;; the Free Software Foundation, either version 3 of the License, or |
| 16 | ;; (at your option) any later version. |
| 17 | |
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | ;; GNU General Public License for more details. |
| 22 | |
| 23 | ;; You should have received a copy of the GNU General Public License |
| 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 25 | |
| 26 | ;;; Commentary: |
| 27 | |
| 28 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 29 | ;; |
| 30 | ;; |
| 31 | ;; This is part of ebnf2ps package. |
| 32 | ;; |
| 33 | ;; This package defines a parser for EBNF used to specify XML (EBNFX). |
| 34 | ;; |
| 35 | ;; See ebnf2ps.el for documentation. |
| 36 | ;; |
| 37 | ;; |
| 38 | ;; EBNFX Syntax |
| 39 | ;; ------------ |
| 40 | ;; |
| 41 | ;; See the URL: |
| 42 | ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation' |
| 43 | ;; (Extensible Markup Language (XML) 1.0 (Third Edition)) |
| 44 | ;; |
| 45 | ;; |
| 46 | ;; rule ::= symbol '::=' expression |
| 47 | ;; /* rules are separated by at least one blank line. */ |
| 48 | ;; |
| 49 | ;; expression ::= concatenation ('|' concatenation)* |
| 50 | ;; |
| 51 | ;; concatenation ::= exception* |
| 52 | ;; |
| 53 | ;; exception ::= term ('-' term)? |
| 54 | ;; |
| 55 | ;; term ::= factor ('*' | '+' | '?')? |
| 56 | ;; |
| 57 | ;; factor ::= hex-char+ |
| 58 | ;; | '[' '^'? ( char ( '-' char )? )+ ']' |
| 59 | ;; | '"' 'string' '"' |
| 60 | ;; | "'" "string" "'" |
| 61 | ;; | '(' expression ')' |
| 62 | ;; | symbol |
| 63 | ;; |
| 64 | ;; symbol ::= 'upper or lower case letter' |
| 65 | ;; ('upper or lower case letter' | '-' | '_')* |
| 66 | ;; /* upper and lower 8-bit accentuated characters are included */ |
| 67 | ;; |
| 68 | ;; hex-char ::= '#x' [0-9A-Fa-f]+ |
| 69 | ;; |
| 70 | ;; char ::= hex-char | 'any character except control characters' |
| 71 | ;; /* 8-bit accentuated characters are included */ |
| 72 | ;; |
| 73 | ;; any-char ::= char | 'newline' | 'tab' |
| 74 | ;; |
| 75 | ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']' |
| 76 | ;; |
| 77 | ;; comment ::= '/*' ( any-char - '*/' ) '*/' |
| 78 | ;; |
| 79 | ;; |
| 80 | ;; Below is the Notation section extracted from the URL cited above. |
| 81 | ;; |
| 82 | ;; 6 Notation |
| 83 | ;; |
| 84 | ;; The formal grammar of XML is given in this specification using a simple |
| 85 | ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines |
| 86 | ;; one symbol, in the form |
| 87 | ;; |
| 88 | ;; symbol ::= expression |
| 89 | ;; |
| 90 | ;; Symbols are written with an initial capital letter if they are the start |
| 91 | ;; symbol of a regular language, otherwise with an initial lowercase letter. |
| 92 | ;; Literal strings are quoted. |
| 93 | ;; |
| 94 | ;; Within the expression on the right-hand side of a rule, the following |
| 95 | ;; expressions are used to match strings of one or more characters: |
| 96 | ;; |
| 97 | ;; #xN |
| 98 | ;; |
| 99 | ;; where N is a hexadecimal integer, the expression matches the character |
| 100 | ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading |
| 101 | ;; zeros in the #xN form is insignificant. |
| 102 | ;; |
| 103 | ;; [a-zA-Z], [#xN-#xN] |
| 104 | ;; |
| 105 | ;; matches any Char with a value in the range(s) indicated (inclusive). |
| 106 | ;; |
| 107 | ;; [abc], [#xN#xN#xN] |
| 108 | ;; |
| 109 | ;; matches any Char with a value among the characters enumerated. |
| 110 | ;; Enumerations and ranges can be mixed in one set of brackets. |
| 111 | ;; |
| 112 | ;; [^a-z], [^#xN-#xN] |
| 113 | ;; |
| 114 | ;; matches any Char with a value outside the range indicated. |
| 115 | ;; |
| 116 | ;; [^abc], [^#xN#xN#xN] |
| 117 | ;; |
| 118 | ;; matches any Char with a value not among the characters given. |
| 119 | ;; Enumerations and ranges of forbidden values can be mixed in one set of |
| 120 | ;; brackets. |
| 121 | ;; |
| 122 | ;; "string" |
| 123 | ;; |
| 124 | ;; matches a literal string matching that given inside the double quotes. |
| 125 | ;; |
| 126 | ;; 'string' |
| 127 | ;; |
| 128 | ;; matches a literal string matching that given inside the single quotes. |
| 129 | ;; |
| 130 | ;; These symbols may be combined to match more complex patterns as follows, |
| 131 | ;; where A and B represent simple expressions: |
| 132 | ;; |
| 133 | ;; (expression) |
| 134 | ;; |
| 135 | ;; expression is treated as a unit and may be combined as described in this |
| 136 | ;; list. |
| 137 | ;; |
| 138 | ;; A? |
| 139 | ;; |
| 140 | ;; matches A or nothing; optional A. |
| 141 | ;; |
| 142 | ;; A B |
| 143 | ;; |
| 144 | ;; matches A followed by B. This operator has higher precedence than |
| 145 | ;; alternation; thus A B | C D is identical to (A B) | (C D). |
| 146 | ;; |
| 147 | ;; A | B |
| 148 | ;; |
| 149 | ;; matches A or B. |
| 150 | ;; |
| 151 | ;; A - B |
| 152 | ;; |
| 153 | ;; matches any string that matches A but does not match B. |
| 154 | ;; |
| 155 | ;; A+ |
| 156 | ;; |
| 157 | ;; matches one or more occurrences of A. Concatenation has higher |
| 158 | ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). |
| 159 | ;; |
| 160 | ;; A* |
| 161 | ;; |
| 162 | ;; matches zero or more occurrences of A. Concatenation has higher |
| 163 | ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*). |
| 164 | ;; |
| 165 | ;; Other notations used in the productions are: |
| 166 | ;; |
| 167 | ;; /* ... */ |
| 168 | ;; |
| 169 | ;; comment. |
| 170 | ;; |
| 171 | ;; [ wfc: ... ] |
| 172 | ;; |
| 173 | ;; well-formedness constraint; this identifies by name a constraint on |
| 174 | ;; well-formed documents associated with a production. |
| 175 | ;; |
| 176 | ;; [ vc: ... ] |
| 177 | ;; |
| 178 | ;; validity constraint; this identifies by name a constraint on valid |
| 179 | ;; documents associated with a production. |
| 180 | ;; |
| 181 | ;; |
| 182 | ;; Differences Between EBNFX And ebnf2ps EBNFX |
| 183 | ;; ------------------------------------------- |
| 184 | ;; |
| 185 | ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the |
| 186 | ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated |
| 187 | ;; characters (from \240 to \377) for rule name, string and comment. Also |
| 188 | ;; rule name can start with upper case letter. |
| 189 | ;; |
| 190 | ;; |
| 191 | ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 192 | |
| 193 | ;;; Code: |
| 194 | |
| 195 | |
| 196 | (require 'ebnf-otz) |
| 197 | |
| 198 | |
| 199 | (defvar ebnf-ebx-lex nil |
| 200 | "Value returned by `ebnf-ebx-lex' function.") |
| 201 | |
| 202 | \f |
| 203 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 204 | ;; Syntactic analyzer |
| 205 | |
| 206 | |
| 207 | ;;; rulelist ::= rule+ |
| 208 | |
| 209 | (defun ebnf-ebx-parser (start) |
| 210 | "EBNFX parser." |
| 211 | (let ((total (+ (- ebnf-limit start) 1)) |
| 212 | (bias (1- start)) |
| 213 | (origin (point)) |
| 214 | rule-list token rule) |
| 215 | (goto-char start) |
| 216 | (setq token (ebnf-ebx-lex)) |
| 217 | (and (eq token 'end-of-input) |
| 218 | (error "Invalid EBNFX file format")) |
| 219 | (and (eq token 'end-of-rule) |
| 220 | (setq token (ebnf-ebx-lex))) |
| 221 | (while (not (eq token 'end-of-input)) |
| 222 | (ebnf-message-float |
| 223 | "Parsing...%s%%" |
| 224 | (/ (* (- (point) bias) 100.0) total)) |
| 225 | (setq token (ebnf-ebx-rule token) |
| 226 | rule (cdr token) |
| 227 | token (car token)) |
| 228 | (or (ebnf-add-empty-rule-list rule) |
| 229 | (setq rule-list (cons rule rule-list)))) |
| 230 | (goto-char origin) |
| 231 | rule-list)) |
| 232 | |
| 233 | |
| 234 | ;;; rule ::= symbol '::=' expression |
| 235 | |
| 236 | |
| 237 | (defun ebnf-ebx-rule (token) |
| 238 | (let ((name ebnf-ebx-lex) |
| 239 | (action ebnf-action) |
| 240 | elements) |
| 241 | (setq ebnf-action nil) |
| 242 | (or (eq token 'non-terminal) |
| 243 | (error "Invalid rule name")) |
| 244 | (setq token (ebnf-ebx-lex)) |
| 245 | (or (eq token 'production) |
| 246 | (error "Invalid rule: missing `::='")) |
| 247 | (setq elements (ebnf-ebx-expression)) |
| 248 | (or (memq (car elements) '(end-of-rule end-of-input)) |
| 249 | (error "Invalid rule: there is no end of rule")) |
| 250 | (setq elements (cdr elements)) |
| 251 | (ebnf-eps-add-production name) |
| 252 | (cons (ebnf-ebx-lex) |
| 253 | (ebnf-make-production name elements action)))) |
| 254 | |
| 255 | |
| 256 | ;; expression ::= concatenation ('|' concatenation)* |
| 257 | |
| 258 | |
| 259 | (defun ebnf-ebx-expression () |
| 260 | (let (body concatenation) |
| 261 | (while (eq (car (setq concatenation |
| 262 | (ebnf-ebx-concatenation (ebnf-ebx-lex)))) |
| 263 | 'alternative) |
| 264 | (setq body (cons (cdr concatenation) body))) |
| 265 | (ebnf-token-alternative body concatenation))) |
| 266 | |
| 267 | |
| 268 | ;; concatenation ::= exception* |
| 269 | |
| 270 | |
| 271 | (defun ebnf-ebx-concatenation (token) |
| 272 | (let ((term (ebnf-ebx-exception token)) |
| 273 | seq) |
| 274 | (or (setq token (car term) |
| 275 | term (cdr term)) |
| 276 | (error "Empty element")) |
| 277 | (setq seq (cons term seq)) |
| 278 | (while (setq term (ebnf-ebx-exception token) |
| 279 | token (car term) |
| 280 | term (cdr term)) |
| 281 | (setq seq (cons term seq))) |
| 282 | (cons token |
| 283 | (ebnf-token-sequence seq)))) |
| 284 | |
| 285 | |
| 286 | ;;; exception ::= term ('-' term)? |
| 287 | |
| 288 | |
| 289 | (defun ebnf-ebx-exception (token) |
| 290 | (let ((term (ebnf-ebx-term token))) |
| 291 | (if (eq (car term) 'exception) |
| 292 | (let ((except (ebnf-ebx-term (ebnf-ebx-lex)))) |
| 293 | (cons (car except) |
| 294 | (ebnf-make-except (cdr term) (cdr except)))) |
| 295 | term))) |
| 296 | |
| 297 | |
| 298 | |
| 299 | ;;; term ::= factor ('*' | '+' | '?')? |
| 300 | |
| 301 | |
| 302 | (defun ebnf-ebx-term (token) |
| 303 | (let ((factor (ebnf-ebx-factor token))) |
| 304 | (when factor |
| 305 | (setq token (ebnf-ebx-lex)) |
| 306 | (cond ((eq token 'zero-or-more) |
| 307 | (setq factor (ebnf-make-zero-or-more factor) |
| 308 | token (ebnf-ebx-lex))) |
| 309 | ((eq token 'one-or-more) |
| 310 | (setq factor (ebnf-make-one-or-more factor) |
| 311 | token (ebnf-ebx-lex))) |
| 312 | ((eq token 'optional) |
| 313 | (setq factor (ebnf-token-optional factor) |
| 314 | token (ebnf-ebx-lex))))) |
| 315 | (cons token factor))) |
| 316 | |
| 317 | |
| 318 | ;;; factor ::= hex-char+ |
| 319 | ;;; | '[' '^'? ( char ( '-' char )? )+ ']' |
| 320 | ;;; | '"' 'string' '"' |
| 321 | ;;; | "'" "string" "'" |
| 322 | ;;; | '(' expression ')' |
| 323 | ;;; | symbol |
| 324 | ;;; |
| 325 | ;;; symbol ::= 'upper or lower case letter' |
| 326 | ;;; ('upper or lower case letter' | '-' | '_')* |
| 327 | ;;; /* upper and lower 8-bit accentuated characters are included */ |
| 328 | ;;; |
| 329 | ;;; hex-char ::= '#x' [0-9A-Fa-f]+ |
| 330 | ;;; |
| 331 | ;;; char ::= hex-char | 'any character except control characters' |
| 332 | ;;; /* 8-bit accentuated characters are included */ |
| 333 | ;;; |
| 334 | ;;; any-char ::= char | 'newline' | 'tab' |
| 335 | |
| 336 | |
| 337 | (defun ebnf-ebx-factor (token) |
| 338 | (cond |
| 339 | ;; terminal |
| 340 | ((eq token 'terminal) |
| 341 | (ebnf-make-terminal ebnf-ebx-lex)) |
| 342 | ;; non-terminal |
| 343 | ((eq token 'non-terminal) |
| 344 | (ebnf-make-non-terminal ebnf-ebx-lex)) |
| 345 | ;; group |
| 346 | ((eq token 'begin-group) |
| 347 | (let ((body (ebnf-ebx-expression))) |
| 348 | (or (eq (car body) 'end-group) |
| 349 | (error "Missing `)'")) |
| 350 | (cdr body))) |
| 351 | ;; no element |
| 352 | (t |
| 353 | nil) |
| 354 | )) |
| 355 | |
| 356 | \f |
| 357 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 358 | ;; Lexical analyzer |
| 359 | |
| 360 | |
| 361 | (defconst ebnf-ebx-token-table (make-vector 256 'error) |
| 362 | "Vector used to map characters to a lexical token.") |
| 363 | |
| 364 | |
| 365 | (defun ebnf-ebx-initialize () |
| 366 | "Initialize EBNFX token table." |
| 367 | ;; control character & control 8-bit character are set to `error' |
| 368 | (let ((char ?\101)) |
| 369 | ;; printable character: A-Z |
| 370 | (while (< char ?\133) |
| 371 | (aset ebnf-ebx-token-table char 'non-terminal) |
| 372 | (setq char (1+ char))) |
| 373 | ;; printable character: a-z |
| 374 | (setq char ?\141) |
| 375 | (while (< char ?\173) |
| 376 | (aset ebnf-ebx-token-table char 'non-terminal) |
| 377 | (setq char (1+ char))) |
| 378 | ;; European 8-bit accentuated characters: |
| 379 | (setq char ?\240) |
| 380 | (while (< char ?\400) |
| 381 | (aset ebnf-ebx-token-table char 'non-terminal) |
| 382 | (setq char (1+ char))) |
| 383 | ;; Override end of line characters: |
| 384 | (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed |
| 385 | (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return |
| 386 | ;; Override space characters: |
| 387 | (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab |
| 388 | (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab |
| 389 | (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space |
| 390 | ;; Override form feed character: |
| 391 | (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed |
| 392 | ;; Override other lexical characters: |
| 393 | (aset ebnf-ebx-token-table ?# 'hash) |
| 394 | (aset ebnf-ebx-token-table ?\" 'double-quote) |
| 395 | (aset ebnf-ebx-token-table ?\' 'single-quote) |
| 396 | (aset ebnf-ebx-token-table ?\( 'begin-group) |
| 397 | (aset ebnf-ebx-token-table ?\) 'end-group) |
| 398 | (aset ebnf-ebx-token-table ?- 'exception) |
| 399 | (aset ebnf-ebx-token-table ?: 'colon) |
| 400 | (aset ebnf-ebx-token-table ?\[ 'begin-square) |
| 401 | (aset ebnf-ebx-token-table ?| 'alternative) |
| 402 | (aset ebnf-ebx-token-table ?* 'zero-or-more) |
| 403 | (aset ebnf-ebx-token-table ?+ 'one-or-more) |
| 404 | (aset ebnf-ebx-token-table ?\? 'optional) |
| 405 | ;; Override comment character: |
| 406 | (aset ebnf-ebx-token-table ?/ 'comment))) |
| 407 | |
| 408 | |
| 409 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). |
| 410 | (defconst ebnf-ebx-non-terminal-chars |
| 411 | (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377)) |
| 412 | (defconst ebnf-ebx-non-terminal-letter-chars |
| 413 | (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) |
| 414 | |
| 415 | |
| 416 | (defun ebnf-ebx-lex () |
| 417 | "Lexical analyzer for EBNFX. |
| 418 | |
| 419 | Return a lexical token. |
| 420 | |
| 421 | See documentation for variable `ebnf-ebx-lex'." |
| 422 | (if (>= (point) ebnf-limit) |
| 423 | 'end-of-input |
| 424 | (let (token) |
| 425 | ;; skip spaces and comments |
| 426 | (while (if (> (following-char) 255) |
| 427 | (progn |
| 428 | (setq token 'error) |
| 429 | nil) |
| 430 | (setq token (aref ebnf-ebx-token-table (following-char))) |
| 431 | (cond |
| 432 | ((eq token 'space) |
| 433 | (skip-chars-forward " \013\t" ebnf-limit) |
| 434 | (< (point) ebnf-limit)) |
| 435 | ((eq token 'comment) |
| 436 | (ebnf-ebx-skip-comment)) |
| 437 | ((eq token 'form-feed) |
| 438 | (forward-char) |
| 439 | (setq ebnf-action 'form-feed)) |
| 440 | ((eq token 'end-of-rule) |
| 441 | (ebnf-ebx-skip-end-of-rule)) |
| 442 | ((and (eq token 'begin-square) |
| 443 | (let ((case-fold-search t)) |
| 444 | (looking-at "\\[\\(wfc\\|vc\\):"))) |
| 445 | (ebnf-ebx-skip-constraint)) |
| 446 | (t nil) |
| 447 | ))) |
| 448 | (cond |
| 449 | ;; end of input |
| 450 | ((>= (point) ebnf-limit) |
| 451 | 'end-of-input) |
| 452 | ;; error |
| 453 | ((eq token 'error) |
| 454 | (error "Invalid character")) |
| 455 | ;; end of rule |
| 456 | ((eq token 'end-of-rule) |
| 457 | 'end-of-rule) |
| 458 | ;; terminal: #x [0-9A-Fa-f]+ |
| 459 | ((eq token 'hash) |
| 460 | (setq ebnf-ebx-lex (ebnf-ebx-character)) |
| 461 | 'terminal) |
| 462 | ;; terminal: "string" |
| 463 | ((eq token 'double-quote) |
| 464 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\")) |
| 465 | 'terminal) |
| 466 | ;; terminal: 'string' |
| 467 | ((eq token 'single-quote) |
| 468 | (setq ebnf-ebx-lex (ebnf-ebx-string ?\')) |
| 469 | 'terminal) |
| 470 | ;; terminal: [ ^? ( char ( - char )? )+ ] |
| 471 | ((eq token 'begin-square) |
| 472 | (setq ebnf-ebx-lex (ebnf-ebx-range)) |
| 473 | 'terminal) |
| 474 | ;; non-terminal: NAME |
| 475 | ((eq token 'non-terminal) |
| 476 | (setq ebnf-ebx-lex |
| 477 | (ebnf-buffer-substring ebnf-ebx-non-terminal-chars)) |
| 478 | 'non-terminal) |
| 479 | ;; colon: ::= |
| 480 | ((eq token 'colon) |
| 481 | (or (looking-at "::=") |
| 482 | (error "Missing `::=' token")) |
| 483 | (forward-char 3) |
| 484 | 'production) |
| 485 | ;; miscellaneous: (, ), *, +, ?, |, - |
| 486 | (t |
| 487 | (forward-char) |
| 488 | token) |
| 489 | )))) |
| 490 | |
| 491 | |
| 492 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). |
| 493 | (defconst ebnf-ebx-constraint-chars |
| 494 | (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237)) |
| 495 | |
| 496 | |
| 497 | (defun ebnf-ebx-skip-constraint () |
| 498 | (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0) |
| 499 | (error "Invalid character")) |
| 500 | (or (= (following-char) ?\]) |
| 501 | (error "Missing end of constraint `]'")) |
| 502 | (forward-char) |
| 503 | t) |
| 504 | |
| 505 | |
| 506 | |
| 507 | (defun ebnf-ebx-skip-end-of-rule () |
| 508 | (let (eor-p) |
| 509 | (while (progn |
| 510 | ;; end of rule ==> 2 or more consecutive end of lines |
| 511 | (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) |
| 512 | eor-p)) |
| 513 | ;; skip spaces |
| 514 | (skip-chars-forward " \013\t" ebnf-limit) |
| 515 | ;; skip comments |
| 516 | (and (= (following-char) ?/) |
| 517 | (ebnf-ebx-skip-comment)))) |
| 518 | (not eor-p))) |
| 519 | |
| 520 | |
| 521 | ;; replace the range "\177-\237" (see `ebnf-range-regexp'). |
| 522 | (defconst ebnf-ebx-comment-chars |
| 523 | (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237)) |
| 524 | (defconst ebnf-ebx-filename-chars |
| 525 | (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237)) |
| 526 | |
| 527 | |
| 528 | (defun ebnf-ebx-skip-comment () |
| 529 | (forward-char) |
| 530 | (or (= (following-char) ?*) |
| 531 | (error "Invalid beginning of comment")) |
| 532 | (forward-char) |
| 533 | (cond |
| 534 | ;; open EPS file |
| 535 | ((and ebnf-eps-executing (= (following-char) ?\[)) |
| 536 | (ebnf-eps-add-context (ebnf-ebx-eps-filename))) |
| 537 | ;; close EPS file |
| 538 | ((and ebnf-eps-executing (= (following-char) ?\])) |
| 539 | (ebnf-eps-remove-context (ebnf-ebx-eps-filename))) |
| 540 | ;; EPS header |
| 541 | ((and ebnf-eps-executing (= (following-char) ?H)) |
| 542 | (ebnf-eps-header-comment (ebnf-ebx-eps-filename))) |
| 543 | ;; EPS footer |
| 544 | ((and ebnf-eps-executing (= (following-char) ?F)) |
| 545 | (ebnf-eps-footer-comment (ebnf-ebx-eps-filename))) |
| 546 | ;; any other action in comment |
| 547 | (t |
| 548 | (setq ebnf-action (aref ebnf-comment-table (following-char)))) |
| 549 | ) |
| 550 | (while (progn |
| 551 | (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit) |
| 552 | (or (= (following-char) ?*) |
| 553 | (error "Missing end of comment")) |
| 554 | (forward-char) |
| 555 | (and (/= (following-char) ?/) |
| 556 | (< (point) ebnf-limit)))) |
| 557 | ;; check for a valid end of comment |
| 558 | (and (>= (point) ebnf-limit) |
| 559 | (error "Missing end of comment")) |
| 560 | (forward-char) |
| 561 | t) |
| 562 | |
| 563 | |
| 564 | (defun ebnf-ebx-eps-filename () |
| 565 | (forward-char) |
| 566 | (let (fname nchar) |
| 567 | (while (progn |
| 568 | (setq fname |
| 569 | (concat fname |
| 570 | (ebnf-buffer-substring ebnf-ebx-filename-chars))) |
| 571 | (and (< (point) ebnf-limit) |
| 572 | (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0) |
| 573 | (< (point) ebnf-limit) |
| 574 | (/= (following-char) ?/))) |
| 575 | (setq fname (concat fname (make-string nchar ?*)) |
| 576 | nchar nil)) |
| 577 | (if (or (not nchar) (= nchar 0)) |
| 578 | fname |
| 579 | (and (< (point) ebnf-limit) |
| 580 | (= (following-char) ?/) |
| 581 | (setq nchar (1- nchar))) |
| 582 | (concat fname (make-string nchar ?*))))) |
| 583 | |
| 584 | |
| 585 | ;; replace the range "\240-\377" (see `ebnf-range-regexp'). |
| 586 | (defconst ebnf-ebx-double-string-chars |
| 587 | (ebnf-range-regexp "\t -!#-~" ?\240 ?\377)) |
| 588 | (defconst ebnf-ebx-single-string-chars |
| 589 | (ebnf-range-regexp "\t -&(-~" ?\240 ?\377)) |
| 590 | |
| 591 | |
| 592 | (defun ebnf-ebx-string (delim) |
| 593 | (buffer-substring-no-properties |
| 594 | (progn |
| 595 | (forward-char) |
| 596 | (point)) |
| 597 | (progn |
| 598 | (skip-chars-forward (if (= delim ?\") |
| 599 | ebnf-ebx-double-string-chars |
| 600 | ebnf-ebx-single-string-chars) |
| 601 | ebnf-limit) |
| 602 | (or (= (following-char) delim) |
| 603 | (error "Missing string delimiter `%c'" delim)) |
| 604 | (prog1 |
| 605 | (point) |
| 606 | (forward-char))))) |
| 607 | |
| 608 | |
| 609 | (defun ebnf-ebx-character () |
| 610 | ;; #x [0-9A-Fa-f]+ |
| 611 | (buffer-substring-no-properties |
| 612 | (point) |
| 613 | (progn |
| 614 | (ebnf-ebx-hex-character) |
| 615 | (point)))) |
| 616 | |
| 617 | |
| 618 | (defun ebnf-ebx-range () |
| 619 | ;; [ ^? ( char ( - char )? )+ ] |
| 620 | (buffer-substring-no-properties |
| 621 | (point) |
| 622 | (progn |
| 623 | (forward-char) |
| 624 | (and (= (following-char) ?^) |
| 625 | (forward-char)) |
| 626 | (and (= (following-char) ?-) |
| 627 | (forward-char)) |
| 628 | (while (progn |
| 629 | (ebnf-ebx-any-character) |
| 630 | (when (= (following-char) ?-) |
| 631 | (forward-char) |
| 632 | (ebnf-ebx-any-character)) |
| 633 | (and (/= (following-char) ?\]) |
| 634 | (< (point) ebnf-limit)))) |
| 635 | (and (>= (point) ebnf-limit) |
| 636 | (error "Missing end of character range `]'")) |
| 637 | (forward-char) |
| 638 | (point)))) |
| 639 | |
| 640 | |
| 641 | (defun ebnf-ebx-any-character () |
| 642 | (let ((char (following-char))) |
| 643 | (cond ((= char ?#) |
| 644 | (ebnf-ebx-hex-character t)) |
| 645 | ((or (and (<= ?\ char) (<= char ?\")) ; # |
| 646 | (and (<= ?$ char) (<= char ?,)) ; - |
| 647 | (and (<= ?. char) (<= char ?\\)) ; ] |
| 648 | (and (<= ?^ char) (<= char ?~)) |
| 649 | (and (<= ?\240 char) (<= char ?\377))) |
| 650 | (forward-char)) |
| 651 | (t |
| 652 | (error "Invalid character `%c'" char))))) |
| 653 | |
| 654 | |
| 655 | (defun ebnf-ebx-hex-character (&optional no-error) |
| 656 | ;; #x [0-9A-Fa-f]+ |
| 657 | (forward-char) |
| 658 | (if (/= (following-char) ?x) |
| 659 | (or no-error |
| 660 | (error "Invalid hexadecimal character")) |
| 661 | (forward-char) |
| 662 | (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0) |
| 663 | (error "Invalid hexadecimal character")))) |
| 664 | |
| 665 | \f |
| 666 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 667 | |
| 668 | |
| 669 | (provide 'ebnf-ebx) |
| 670 | |
| 671 | ;;; ebnf-ebx.el ends here |