| 1 | ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas |
| 2 | |
| 3 | ;; Copyright (C) 2003, 2007-2014 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: James Clark |
| 6 | ;; Keywords: wp, hypermedia, languages, XML, RelaxNG |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation, either version 3 of the License, or |
| 13 | ;; (at your option) any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 22 | |
| 23 | ;;; Commentary: |
| 24 | |
| 25 | ;; This parses a RELAX NG Compact Syntax schema into the form |
| 26 | ;; specified in rng-pttrn.el. |
| 27 | ;; |
| 28 | ;; RELAX NG Compact Syntax is specified by |
| 29 | ;; http://relaxng.org/compact.html |
| 30 | ;; |
| 31 | ;; This file uses the prefix "rng-c-". |
| 32 | |
| 33 | ;;; Code: |
| 34 | |
| 35 | (require 'nxml-util) |
| 36 | (require 'rng-util) |
| 37 | (require 'rng-uri) |
| 38 | (require 'rng-pttrn) |
| 39 | |
| 40 | ;;;###autoload |
| 41 | (defun rng-c-load-schema (filename) |
| 42 | "Load a schema in RELAX NG compact syntax from FILENAME. |
| 43 | Return a pattern." |
| 44 | (rng-c-parse-file filename)) |
| 45 | |
| 46 | ;;; Error handling |
| 47 | |
| 48 | (define-error 'rng-c-incorrect-schema |
| 49 | "Incorrect schema" '(rng-error nxml-file-parse-error)) |
| 50 | |
| 51 | (defun rng-c-signal-incorrect-schema (filename pos message) |
| 52 | (nxml-signal-file-parse-error filename |
| 53 | pos |
| 54 | message |
| 55 | 'rng-c-incorrect-schema)) |
| 56 | |
| 57 | ;;; Lexing |
| 58 | |
| 59 | (defconst rng-c-keywords |
| 60 | '("attribute" |
| 61 | "default" |
| 62 | "datatypes" |
| 63 | "div" |
| 64 | "element" |
| 65 | "empty" |
| 66 | "external" |
| 67 | "grammar" |
| 68 | "include" |
| 69 | "inherit" |
| 70 | "list" |
| 71 | "mixed" |
| 72 | "namespace" |
| 73 | "notAllowed" |
| 74 | "parent" |
| 75 | "start" |
| 76 | "string" |
| 77 | "text" |
| 78 | "token") |
| 79 | "List of strings that are keywords in the compact syntax.") |
| 80 | |
| 81 | (defconst rng-c-anchored-keyword-re |
| 82 | (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'") |
| 83 | "Regular expression to match a keyword in the compact syntax.") |
| 84 | |
| 85 | (defvar rng-c-syntax-table nil |
| 86 | "Syntax table for parsing the compact syntax.") |
| 87 | |
| 88 | (if rng-c-syntax-table |
| 89 | () |
| 90 | (setq rng-c-syntax-table (make-syntax-table)) |
| 91 | (modify-syntax-entry ?# "<" rng-c-syntax-table) |
| 92 | (modify-syntax-entry ?\n ">" rng-c-syntax-table) |
| 93 | (modify-syntax-entry ?- "w" rng-c-syntax-table) |
| 94 | (modify-syntax-entry ?. "w" rng-c-syntax-table) |
| 95 | (modify-syntax-entry ?_ "w" rng-c-syntax-table) |
| 96 | (modify-syntax-entry ?: "_" rng-c-syntax-table)) |
| 97 | |
| 98 | (defconst rng-c-literal-1-re |
| 99 | "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'" |
| 100 | "Regular expression to match a single-quoted literal.") |
| 101 | |
| 102 | (defconst rng-c-literal-2-re |
| 103 | (replace-regexp-in-string "'" "\"" rng-c-literal-1-re) |
| 104 | "Regular expression to match a double-quoted literal.") |
| 105 | |
| 106 | (defconst rng-c-ncname-re "\\w+") |
| 107 | |
| 108 | (defconst rng-c-anchored-ncname-re |
| 109 | (concat "\\`" rng-c-ncname-re "\\'")) |
| 110 | |
| 111 | (defconst rng-c-token-re |
| 112 | (concat "[&|]=" "\\|" |
| 113 | "[][()|&,*+?{}~=-]" "\\|" |
| 114 | rng-c-literal-1-re "\\|" |
| 115 | rng-c-literal-2-re "\\|" |
| 116 | rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|" |
| 117 | "\\\\" rng-c-ncname-re "\\|" |
| 118 | ">>") |
| 119 | "Regular expression to match a token in the compact syntax.") |
| 120 | |
| 121 | (defun rng-c-init-buffer () |
| 122 | (setq case-fold-search nil) ; automatically becomes buffer-local when set |
| 123 | (set-buffer-multibyte t) |
| 124 | (set-syntax-table rng-c-syntax-table)) |
| 125 | |
| 126 | (defvar rng-c-current-token nil) |
| 127 | (make-variable-buffer-local 'rng-c-current-token) |
| 128 | |
| 129 | (defun rng-c-advance () |
| 130 | (cond ((looking-at rng-c-token-re) |
| 131 | (setq rng-c-current-token (match-string 0)) |
| 132 | (goto-char (match-end 0)) |
| 133 | (forward-comment (point-max))) |
| 134 | ((= (point) (point-max)) |
| 135 | (setq rng-c-current-token "")) |
| 136 | (t (rng-c-error "Invalid token")))) |
| 137 | |
| 138 | (defconst rng-c-anchored-datatype-name-re |
| 139 | (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'")) |
| 140 | |
| 141 | (defsubst rng-c-current-token-keyword-p () |
| 142 | (string-match rng-c-anchored-keyword-re rng-c-current-token)) |
| 143 | |
| 144 | (defsubst rng-c-current-token-prefixed-name-p () |
| 145 | (string-match rng-c-anchored-datatype-name-re rng-c-current-token)) |
| 146 | |
| 147 | (defsubst rng-c-current-token-literal-p () |
| 148 | (string-match "\\`['\"]" rng-c-current-token)) |
| 149 | |
| 150 | (defsubst rng-c-current-token-quoted-identifier-p () |
| 151 | (string-match "\\`\\\\" rng-c-current-token)) |
| 152 | |
| 153 | (defsubst rng-c-current-token-ncname-p () |
| 154 | (string-match rng-c-anchored-ncname-re rng-c-current-token)) |
| 155 | |
| 156 | (defsubst rng-c-current-token-ns-name-p () |
| 157 | (let ((len (length rng-c-current-token))) |
| 158 | (and (> len 0) |
| 159 | (= (aref rng-c-current-token (- len 1)) ?*)))) |
| 160 | |
| 161 | ;;; Namespaces |
| 162 | |
| 163 | (defvar rng-c-inherit-namespace nil) |
| 164 | |
| 165 | (defvar rng-c-default-namespace nil) |
| 166 | |
| 167 | (defvar rng-c-default-namespace-declared nil) |
| 168 | |
| 169 | (defvar rng-c-namespace-decls nil |
| 170 | "Alist of namespace declarations.") |
| 171 | |
| 172 | (defconst rng-c-no-namespace nil) |
| 173 | |
| 174 | (defun rng-c-declare-standard-namespaces () |
| 175 | (setq rng-c-namespace-decls |
| 176 | (cons (cons "xml" nxml-xml-namespace-uri) |
| 177 | rng-c-namespace-decls)) |
| 178 | (when (and (not rng-c-default-namespace-declared) |
| 179 | rng-c-inherit-namespace) |
| 180 | (setq rng-c-default-namespace rng-c-inherit-namespace))) |
| 181 | |
| 182 | (defun rng-c-expand-name (prefixed-name) |
| 183 | (let ((i (string-match ":" prefixed-name))) |
| 184 | (rng-make-name (rng-c-lookup-prefix (substring prefixed-name |
| 185 | 0 |
| 186 | i)) |
| 187 | (substring prefixed-name (+ i 1))))) |
| 188 | |
| 189 | (defun rng-c-lookup-prefix (prefix) |
| 190 | (let ((binding (assoc prefix rng-c-namespace-decls))) |
| 191 | (or binding (rng-c-error "Undefined prefix %s" prefix)) |
| 192 | (cdr binding))) |
| 193 | |
| 194 | (defun rng-c-unqualified-namespace (attribute) |
| 195 | (if attribute |
| 196 | rng-c-no-namespace |
| 197 | rng-c-default-namespace)) |
| 198 | |
| 199 | (defun rng-c-make-context () |
| 200 | (cons rng-c-default-namespace rng-c-namespace-decls)) |
| 201 | |
| 202 | ;;; Datatypes |
| 203 | |
| 204 | (defconst rng-string-datatype |
| 205 | (rng-make-datatype rng-builtin-datatypes-uri "string")) |
| 206 | |
| 207 | (defconst rng-token-datatype |
| 208 | (rng-make-datatype rng-builtin-datatypes-uri "token")) |
| 209 | |
| 210 | (defvar rng-c-datatype-decls nil |
| 211 | "Alist of datatype declarations. |
| 212 | Contains a list of pairs (PREFIX . URI) where PREFIX is a string |
| 213 | and URI is a symbol.") |
| 214 | |
| 215 | (defun rng-c-declare-standard-datatypes () |
| 216 | (setq rng-c-datatype-decls |
| 217 | (cons (cons "xsd" rng-xsd-datatypes-uri) |
| 218 | rng-c-datatype-decls))) |
| 219 | |
| 220 | (defun rng-c-lookup-datatype-prefix (prefix) |
| 221 | (let ((binding (assoc prefix rng-c-datatype-decls))) |
| 222 | (or binding (rng-c-error "Undefined prefix %s" prefix)) |
| 223 | (cdr binding))) |
| 224 | |
| 225 | (defun rng-c-expand-datatype (prefixed-name) |
| 226 | (let ((i (string-match ":" prefixed-name))) |
| 227 | (rng-make-datatype |
| 228 | (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i)) |
| 229 | (substring prefixed-name (+ i 1))))) |
| 230 | |
| 231 | ;;; Grammars |
| 232 | |
| 233 | (defvar rng-c-current-grammar nil) |
| 234 | (defvar rng-c-parent-grammar nil) |
| 235 | |
| 236 | (defun rng-c-make-grammar () |
| 237 | (make-hash-table :test 'equal)) |
| 238 | |
| 239 | (defconst rng-c-about-override-slot 0) |
| 240 | (defconst rng-c-about-combine-slot 1) |
| 241 | |
| 242 | (defun rng-c-lookup-create (name grammar) |
| 243 | "Return a def object for NAME. |
| 244 | A def object is a pair \(ABOUT . REF) where REF is returned by |
| 245 | `rng-make-ref'. |
| 246 | ABOUT is a two-element vector [OVERRIDE COMBINE]. |
| 247 | COMBINE is either nil, choice or interleave. |
| 248 | OVERRIDE is either nil, require or t." |
| 249 | (let ((def (gethash name grammar))) |
| 250 | (if def |
| 251 | def |
| 252 | (progn |
| 253 | (setq def (cons (vector nil nil) (rng-make-ref name))) |
| 254 | (puthash name def grammar) |
| 255 | def)))) |
| 256 | |
| 257 | (defun rng-c-make-ref (name) |
| 258 | (or rng-c-current-grammar |
| 259 | (rng-c-error "Reference not in a grammar")) |
| 260 | (cdr (rng-c-lookup-create name rng-c-current-grammar))) |
| 261 | |
| 262 | (defun rng-c-make-parent-ref (name) |
| 263 | (or rng-c-parent-grammar |
| 264 | (rng-c-error "Reference to non-existent parent grammar")) |
| 265 | (cdr (rng-c-lookup-create name rng-c-parent-grammar))) |
| 266 | |
| 267 | (defvar rng-c-overrides nil |
| 268 | "Contains a list of (NAME . DEF) pairs.") |
| 269 | |
| 270 | (defun rng-c-merge-combine (def combine name) |
| 271 | (let* ((about (car def)) |
| 272 | (current-combine (aref about rng-c-about-combine-slot))) |
| 273 | (if combine |
| 274 | (if current-combine |
| 275 | (or (eq combine current-combine) |
| 276 | (rng-c-error "Inconsistent combine for %s" name)) |
| 277 | (aset about rng-c-about-combine-slot combine)) |
| 278 | current-combine))) |
| 279 | |
| 280 | (defun rng-c-prepare-define (name combine in-include) |
| 281 | (let* ((def (rng-c-lookup-create name rng-c-current-grammar)) |
| 282 | (about (car def)) |
| 283 | (overridden (aref about rng-c-about-override-slot))) |
| 284 | (and in-include |
| 285 | (setq rng-c-overrides (cons (cons name def) rng-c-overrides))) |
| 286 | (cond (overridden (and (eq overridden 'require) |
| 287 | (aset about rng-c-about-override-slot t)) |
| 288 | nil) |
| 289 | (t (setq combine (rng-c-merge-combine def combine name)) |
| 290 | (and (rng-ref-get (cdr def)) |
| 291 | (not combine) |
| 292 | (rng-c-error "Duplicate definition of %s" name)) |
| 293 | def)))) |
| 294 | |
| 295 | (defun rng-c-start-include (overrides) |
| 296 | (mapcar (lambda (name-def) |
| 297 | (let* ((def (cdr name-def)) |
| 298 | (about (car def)) |
| 299 | (save (aref about rng-c-about-override-slot))) |
| 300 | (aset about rng-c-about-override-slot 'require) |
| 301 | (cons save name-def))) |
| 302 | overrides)) |
| 303 | |
| 304 | (defun rng-c-end-include (overrides) |
| 305 | (mapcar (lambda (o) |
| 306 | (let* ((saved (car o)) |
| 307 | (name-def (cdr o)) |
| 308 | (name (car name-def)) |
| 309 | (def (cdr name-def)) |
| 310 | (about (car def))) |
| 311 | (and (eq (aref about rng-c-about-override-slot) 'require) |
| 312 | (rng-c-error "Definition of %s in include did not override definition in included file" name)) |
| 313 | (aset about rng-c-about-override-slot saved))) |
| 314 | overrides)) |
| 315 | |
| 316 | (defun rng-c-define (def value) |
| 317 | (and def |
| 318 | (let ((current-value (rng-ref-get (cdr def)))) |
| 319 | (rng-ref-set (cdr def) |
| 320 | (if current-value |
| 321 | (if (eq (aref (car def) rng-c-about-combine-slot) |
| 322 | 'choice) |
| 323 | (rng-make-choice (list current-value value)) |
| 324 | (rng-make-interleave (list current-value value))) |
| 325 | value))))) |
| 326 | |
| 327 | (defun rng-c-finish-grammar () |
| 328 | (maphash (lambda (key def) |
| 329 | (or (rng-ref-get (cdr def)) |
| 330 | (rng-c-error "Reference to undefined pattern %s" key))) |
| 331 | rng-c-current-grammar) |
| 332 | (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar) |
| 333 | (rng-c-error "No definition of start"))))) |
| 334 | |
| 335 | ;;; Parsing |
| 336 | |
| 337 | (defvar rng-c-escape-positions nil) |
| 338 | (make-variable-buffer-local 'rng-c-escape-positions) |
| 339 | |
| 340 | (defvar rng-c-file-name nil) |
| 341 | (make-variable-buffer-local 'rng-c-file-name) |
| 342 | |
| 343 | (defvar rng-c-file-index nil) |
| 344 | |
| 345 | (defun rng-c-parse-file (filename &optional context) |
| 346 | (with-current-buffer (get-buffer-create (rng-c-buffer-name context)) |
| 347 | (erase-buffer) |
| 348 | (rng-c-init-buffer) |
| 349 | (setq rng-c-file-name |
| 350 | (car (insert-file-contents filename))) |
| 351 | (setq rng-c-escape-positions nil) |
| 352 | (rng-c-process-escapes) |
| 353 | (rng-c-parse-top-level context))) |
| 354 | |
| 355 | (defun rng-c-buffer-name (context) |
| 356 | (concat " *RNC Input" |
| 357 | (if context |
| 358 | (concat "<" |
| 359 | (number-to-string (setq rng-c-file-index |
| 360 | (1+ rng-c-file-index))) |
| 361 | ">*") |
| 362 | (setq rng-c-file-index 1) |
| 363 | "*"))) |
| 364 | |
| 365 | (defun rng-c-process-escapes () |
| 366 | ;; Check for any NULs, since we will use NUL chars |
| 367 | ;; for internal purposes. |
| 368 | (let ((pos (search-forward "\C-@" nil t))) |
| 369 | (and pos |
| 370 | (rng-c-error "Nul character found (binary file?)"))) |
| 371 | (let ((offset 0)) |
| 372 | (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}" |
| 373 | (point-max) |
| 374 | t) |
| 375 | (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16)))) |
| 376 | (if (and ch (> ch 0)) |
| 377 | (let ((begin (match-beginning 0)) |
| 378 | (end (match-end 0))) |
| 379 | (delete-region begin end) |
| 380 | ;; Represent an escaped newline by nul, so |
| 381 | ;; that we can distinguish it from a literal newline. |
| 382 | ;; We will translate it back into a real newline later. |
| 383 | (insert (if (eq ch ?\n) 0 ch)) |
| 384 | (setq offset (+ offset (- end begin 1))) |
| 385 | (setq rng-c-escape-positions |
| 386 | (cons (cons (point) offset) |
| 387 | rng-c-escape-positions))) |
| 388 | (rng-c-error "Invalid character escape"))))) |
| 389 | (goto-char 1)) |
| 390 | |
| 391 | (defun rng-c-translate-position (pos) |
| 392 | (let ((tem rng-c-escape-positions)) |
| 393 | (while (and tem |
| 394 | (> (caar tem) pos)) |
| 395 | (setq tem (cdr tem))) |
| 396 | (if tem |
| 397 | (+ pos (cdar tem)) |
| 398 | pos))) |
| 399 | |
| 400 | (defun rng-c-error (&rest args) |
| 401 | (rng-c-signal-incorrect-schema rng-c-file-name |
| 402 | (rng-c-translate-position (point)) |
| 403 | (apply 'format args))) |
| 404 | |
| 405 | (defun rng-c-parse-top-level (context) |
| 406 | (let ((rng-c-namespace-decls nil) |
| 407 | (rng-c-default-namespace nil) |
| 408 | (rng-c-datatype-decls nil)) |
| 409 | (goto-char (point-min)) |
| 410 | (forward-comment (point-max)) |
| 411 | (rng-c-advance) |
| 412 | (rng-c-parse-decls) |
| 413 | (let ((p (if (eq context 'include) |
| 414 | (if (rng-c-implicit-grammar-p) |
| 415 | (rng-c-parse-grammar-body "") |
| 416 | (rng-c-parse-included-grammar)) |
| 417 | (if (rng-c-implicit-grammar-p) |
| 418 | (rng-c-parse-implicit-grammar) |
| 419 | (rng-c-parse-pattern))))) |
| 420 | (or (string-equal rng-c-current-token "") |
| 421 | (rng-c-error "Unexpected characters after pattern")) |
| 422 | p))) |
| 423 | |
| 424 | (defun rng-c-parse-included-grammar () |
| 425 | (or (string-equal rng-c-current-token "grammar") |
| 426 | (rng-c-error "Included schema is not a grammar")) |
| 427 | (rng-c-advance) |
| 428 | (rng-c-expect "{") |
| 429 | (rng-c-parse-grammar-body "}")) |
| 430 | |
| 431 | (defun rng-c-implicit-grammar-p () |
| 432 | (or (and (or (rng-c-current-token-prefixed-name-p) |
| 433 | (rng-c-current-token-quoted-identifier-p) |
| 434 | (and (rng-c-current-token-ncname-p) |
| 435 | (not (rng-c-current-token-keyword-p)))) |
| 436 | (looking-at "\\[")) |
| 437 | (and (string-equal rng-c-current-token "[") |
| 438 | (rng-c-parse-lead-annotation) |
| 439 | nil) |
| 440 | (member rng-c-current-token '("div" "include" "")) |
| 441 | (looking-at "[|&]?="))) |
| 442 | |
| 443 | (defun rng-c-parse-decls () |
| 444 | (setq rng-c-default-namespace-declared nil) |
| 445 | (while (progn |
| 446 | (let ((binding |
| 447 | (assoc rng-c-current-token |
| 448 | '(("namespace" . rng-c-parse-namespace) |
| 449 | ("datatypes" . rng-c-parse-datatypes) |
| 450 | ("default" . rng-c-parse-default))))) |
| 451 | (if binding |
| 452 | (progn |
| 453 | (rng-c-advance) |
| 454 | (funcall (cdr binding)) |
| 455 | t) |
| 456 | nil)))) |
| 457 | (rng-c-declare-standard-datatypes) |
| 458 | (rng-c-declare-standard-namespaces)) |
| 459 | |
| 460 | (defun rng-c-parse-datatypes () |
| 461 | (let ((prefix (rng-c-parse-identifier-or-keyword))) |
| 462 | (or (not (assoc prefix rng-c-datatype-decls)) |
| 463 | (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix)) |
| 464 | (rng-c-expect "=") |
| 465 | (setq rng-c-datatype-decls |
| 466 | (cons (cons prefix |
| 467 | (rng-make-datatypes-uri (rng-c-parse-literal))) |
| 468 | rng-c-datatype-decls)))) |
| 469 | |
| 470 | (defun rng-c-parse-namespace () |
| 471 | (rng-c-declare-namespace nil |
| 472 | (rng-c-parse-identifier-or-keyword))) |
| 473 | |
| 474 | (defun rng-c-parse-default () |
| 475 | (rng-c-expect "namespace") |
| 476 | (rng-c-declare-namespace t |
| 477 | (if (string-equal rng-c-current-token "=") |
| 478 | nil |
| 479 | (rng-c-parse-identifier-or-keyword)))) |
| 480 | |
| 481 | (defun rng-c-declare-namespace (declare-default prefix) |
| 482 | (rng-c-expect "=") |
| 483 | (let ((ns (cond ((string-equal rng-c-current-token "inherit") |
| 484 | (rng-c-advance) |
| 485 | rng-c-inherit-namespace) |
| 486 | (t |
| 487 | (nxml-make-namespace (rng-c-parse-literal)))))) |
| 488 | (and prefix |
| 489 | (or (not (assoc prefix rng-c-namespace-decls)) |
| 490 | (rng-c-error "Duplicate namespace declaration for prefix %s" |
| 491 | prefix)) |
| 492 | (setq rng-c-namespace-decls |
| 493 | (cons (cons prefix ns) rng-c-namespace-decls))) |
| 494 | (and declare-default |
| 495 | (or (not rng-c-default-namespace-declared) |
| 496 | (rng-c-error "Duplicate default namespace declaration")) |
| 497 | (setq rng-c-default-namespace-declared t) |
| 498 | (setq rng-c-default-namespace ns)))) |
| 499 | |
| 500 | (defun rng-c-parse-implicit-grammar () |
| 501 | (let* ((rng-c-parent-grammar rng-c-current-grammar) |
| 502 | (rng-c-current-grammar (rng-c-make-grammar))) |
| 503 | (rng-c-parse-grammar-body "") |
| 504 | (rng-c-finish-grammar))) |
| 505 | |
| 506 | (defun rng-c-parse-grammar-body (close-token &optional in-include) |
| 507 | (while (not (string-equal rng-c-current-token close-token)) |
| 508 | (cond ((rng-c-current-token-keyword-p) |
| 509 | (let ((kw (intern rng-c-current-token))) |
| 510 | (cond ((eq kw 'start) |
| 511 | (rng-c-parse-define 'start in-include)) |
| 512 | ((eq kw 'div) |
| 513 | (rng-c-advance) |
| 514 | (rng-c-parse-div in-include)) |
| 515 | ((eq kw 'include) |
| 516 | (and in-include |
| 517 | (rng-c-error "Nested include")) |
| 518 | (rng-c-advance) |
| 519 | (rng-c-parse-include)) |
| 520 | (t (rng-c-error "Invalid grammar keyword"))))) |
| 521 | ((rng-c-current-token-ncname-p) |
| 522 | (if (looking-at "\\[") |
| 523 | (rng-c-parse-annotation-element) |
| 524 | (rng-c-parse-define rng-c-current-token |
| 525 | in-include))) |
| 526 | ((rng-c-current-token-quoted-identifier-p) |
| 527 | (if (looking-at "\\[") |
| 528 | (rng-c-parse-annotation-element) |
| 529 | (rng-c-parse-define (substring rng-c-current-token 1) |
| 530 | in-include))) |
| 531 | ((rng-c-current-token-prefixed-name-p) |
| 532 | (rng-c-parse-annotation-element)) |
| 533 | ((string-equal rng-c-current-token "[") |
| 534 | (rng-c-parse-lead-annotation) |
| 535 | (and (string-equal rng-c-current-token close-token) |
| 536 | (rng-c-error "Missing annotation subject")) |
| 537 | (and (looking-at "\\[") |
| 538 | (rng-c-error "Leading annotation applied to annotation"))) |
| 539 | (t (rng-c-error "Invalid grammar content")))) |
| 540 | (or (string-equal rng-c-current-token "") |
| 541 | (rng-c-advance))) |
| 542 | |
| 543 | (defun rng-c-parse-div (in-include) |
| 544 | (rng-c-expect "{") |
| 545 | (rng-c-parse-grammar-body "}" in-include)) |
| 546 | |
| 547 | (defun rng-c-parse-include () |
| 548 | (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) |
| 549 | (rng-c-inherit-namespace (rng-c-parse-opt-inherit)) |
| 550 | overrides) |
| 551 | (cond ((string-equal rng-c-current-token "{") |
| 552 | (rng-c-advance) |
| 553 | (let ((rng-c-overrides nil)) |
| 554 | (rng-c-parse-grammar-body "}" t) |
| 555 | (setq overrides rng-c-overrides)) |
| 556 | (setq overrides (rng-c-start-include overrides)) |
| 557 | (rng-c-parse-file filename 'include) |
| 558 | (rng-c-end-include overrides)) |
| 559 | (t (rng-c-parse-file filename 'include))))) |
| 560 | |
| 561 | (defun rng-c-parse-define (name in-include) |
| 562 | (rng-c-advance) |
| 563 | (let ((assign (assoc rng-c-current-token |
| 564 | '(("=" . nil) |
| 565 | ("|=" . choice) |
| 566 | ("&=" . interleave))))) |
| 567 | (or assign |
| 568 | (rng-c-error "Expected assignment operator")) |
| 569 | (rng-c-advance) |
| 570 | (let ((ref (rng-c-prepare-define name (cdr assign) in-include))) |
| 571 | (rng-c-define ref (rng-c-parse-pattern))))) |
| 572 | |
| 573 | (defvar rng-c-had-except nil) |
| 574 | |
| 575 | (defun rng-c-parse-pattern () |
| 576 | (let* ((rng-c-had-except nil) |
| 577 | (p (rng-c-parse-repeated)) |
| 578 | (op (assoc rng-c-current-token |
| 579 | '(("|" . rng-make-choice) |
| 580 | ("," . rng-make-group) |
| 581 | ("&" . rng-make-interleave))))) |
| 582 | (if op |
| 583 | (if rng-c-had-except |
| 584 | (rng-c-error "Parentheses required around pattern using -") |
| 585 | (let* ((patterns (cons p nil)) |
| 586 | (tail patterns) |
| 587 | (connector rng-c-current-token)) |
| 588 | (while (progn |
| 589 | (rng-c-advance) |
| 590 | (let ((newcdr (cons (rng-c-parse-repeated) nil))) |
| 591 | (setcdr tail newcdr) |
| 592 | (setq tail newcdr)) |
| 593 | (string-equal rng-c-current-token connector))) |
| 594 | (funcall (cdr op) patterns))) |
| 595 | p))) |
| 596 | |
| 597 | (defun rng-c-parse-repeated () |
| 598 | (let ((p (rng-c-parse-follow-annotations |
| 599 | (rng-c-parse-primary))) |
| 600 | (op (assoc rng-c-current-token |
| 601 | '(("*" . rng-make-zero-or-more) |
| 602 | ("+" . rng-make-one-or-more) |
| 603 | ("?" . rng-make-optional))))) |
| 604 | (if op |
| 605 | (if rng-c-had-except |
| 606 | (rng-c-error "Parentheses required around pattern using -") |
| 607 | (rng-c-parse-follow-annotations |
| 608 | (progn |
| 609 | (rng-c-advance) |
| 610 | (funcall (cdr op) p)))) |
| 611 | p))) |
| 612 | |
| 613 | (defun rng-c-parse-primary () |
| 614 | "Parse a primary expression. |
| 615 | The current token must be the first token of the expression. |
| 616 | After parsing the current token should be the token following |
| 617 | the primary expression." |
| 618 | (cond ((rng-c-current-token-keyword-p) |
| 619 | (let ((parse-function (get (intern rng-c-current-token) |
| 620 | 'rng-c-pattern))) |
| 621 | (or parse-function |
| 622 | (rng-c-error "Keyword %s does not introduce a pattern" |
| 623 | rng-c-current-token)) |
| 624 | (rng-c-advance) |
| 625 | (funcall parse-function))) |
| 626 | ((rng-c-current-token-ncname-p) |
| 627 | (rng-c-advance-with (rng-c-make-ref rng-c-current-token))) |
| 628 | ((string-equal rng-c-current-token "(") |
| 629 | (rng-c-advance) |
| 630 | (let ((p (rng-c-parse-pattern))) |
| 631 | (rng-c-expect ")") |
| 632 | p)) |
| 633 | ((rng-c-current-token-prefixed-name-p) |
| 634 | (let ((name (rng-c-expand-datatype rng-c-current-token))) |
| 635 | (rng-c-advance) |
| 636 | (rng-c-parse-data name))) |
| 637 | ((rng-c-current-token-literal-p) |
| 638 | (rng-make-value rng-token-datatype (rng-c-parse-literal) nil)) |
| 639 | ((rng-c-current-token-quoted-identifier-p) |
| 640 | (rng-c-advance-with |
| 641 | (rng-c-make-ref (substring rng-c-current-token 1)))) |
| 642 | ((string-equal rng-c-current-token "[") |
| 643 | (rng-c-parse-lead-annotation) |
| 644 | (rng-c-parse-primary)) |
| 645 | (t (rng-c-error "Invalid pattern")))) |
| 646 | |
| 647 | (defun rng-c-parse-parent () |
| 648 | (and (rng-c-current-token-keyword-p) |
| 649 | (rng-c-error "Keyword following parent was not quoted" |
| 650 | rng-c-current-token)) |
| 651 | (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword))) |
| 652 | |
| 653 | (defun rng-c-parse-literal () |
| 654 | (rng-c-fix-escaped-newlines |
| 655 | (apply 'concat (rng-c-parse-literal-segments)))) |
| 656 | |
| 657 | (defun rng-c-parse-literal-segments () |
| 658 | (let ((str (rng-c-parse-literal-segment))) |
| 659 | (cons str |
| 660 | (cond ((string-equal rng-c-current-token "~") |
| 661 | (rng-c-advance) |
| 662 | (rng-c-parse-literal-segments)) |
| 663 | (t nil))))) |
| 664 | |
| 665 | (defun rng-c-parse-literal-segment () |
| 666 | (or (rng-c-current-token-literal-p) |
| 667 | (rng-c-error "Expected a literal")) |
| 668 | (rng-c-advance-with |
| 669 | (let ((n (if (and (>= (length rng-c-current-token) 6) |
| 670 | (eq (aref rng-c-current-token 0) |
| 671 | (aref rng-c-current-token 1))) |
| 672 | 3 |
| 673 | 1))) |
| 674 | (substring rng-c-current-token n (- n))))) |
| 675 | |
| 676 | (defun rng-c-fix-escaped-newlines (str) |
| 677 | (let ((pos 0)) |
| 678 | (while (progn |
| 679 | (let ((n (string-match "\C-@" str pos))) |
| 680 | (and n |
| 681 | (aset str n ?\n) |
| 682 | (setq pos (1+ n))))))) |
| 683 | str) |
| 684 | |
| 685 | (defun rng-c-parse-identifier-or-keyword () |
| 686 | (cond ((rng-c-current-token-ncname-p) |
| 687 | (rng-c-advance-with rng-c-current-token)) |
| 688 | ((rng-c-current-token-quoted-identifier-p) |
| 689 | (rng-c-advance-with (substring rng-c-current-token 1))) |
| 690 | (t (rng-c-error "Expected identifier or keyword")))) |
| 691 | |
| 692 | (put 'string 'rng-c-pattern 'rng-c-parse-string) |
| 693 | (put 'token 'rng-c-pattern 'rng-c-parse-token) |
| 694 | (put 'element 'rng-c-pattern 'rng-c-parse-element) |
| 695 | (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute) |
| 696 | (put 'list 'rng-c-pattern 'rng-c-parse-list) |
| 697 | (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed) |
| 698 | (put 'text 'rng-c-pattern 'rng-c-parse-text) |
| 699 | (put 'empty 'rng-c-pattern 'rng-c-parse-empty) |
| 700 | (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed) |
| 701 | (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar) |
| 702 | (put 'parent 'rng-c-pattern 'rng-c-parse-parent) |
| 703 | (put 'external 'rng-c-pattern 'rng-c-parse-external) |
| 704 | |
| 705 | (defun rng-c-parse-element () |
| 706 | (let ((name-class (rng-c-parse-name-class nil))) |
| 707 | (rng-c-expect "{") |
| 708 | (let ((pattern (rng-c-parse-pattern))) |
| 709 | (rng-c-expect "}") |
| 710 | (rng-make-element name-class pattern)))) |
| 711 | |
| 712 | (defun rng-c-parse-attribute () |
| 713 | (let ((name-class (rng-c-parse-name-class 'attribute))) |
| 714 | (rng-c-expect "{") |
| 715 | (let ((pattern (rng-c-parse-pattern))) |
| 716 | (rng-c-expect "}") |
| 717 | (rng-make-attribute name-class pattern)))) |
| 718 | |
| 719 | (defun rng-c-parse-name-class (attribute) |
| 720 | (let* ((rng-c-had-except nil) |
| 721 | (name-class |
| 722 | (rng-c-parse-follow-annotations |
| 723 | (rng-c-parse-primary-name-class attribute)))) |
| 724 | (if (string-equal rng-c-current-token "|") |
| 725 | (let* ((name-classes (cons name-class nil)) |
| 726 | (tail name-classes)) |
| 727 | (or (not rng-c-had-except) |
| 728 | (rng-c-error "Parentheses required around name-class using - operator")) |
| 729 | (while (progn |
| 730 | (rng-c-advance) |
| 731 | (let ((newcdr |
| 732 | (cons (rng-c-parse-follow-annotations |
| 733 | (rng-c-parse-primary-name-class attribute)) |
| 734 | nil))) |
| 735 | (setcdr tail newcdr) |
| 736 | (setq tail newcdr)) |
| 737 | (string-equal rng-c-current-token "|"))) |
| 738 | (rng-make-choice-name-class name-classes)) |
| 739 | name-class))) |
| 740 | |
| 741 | (defun rng-c-parse-primary-name-class (attribute) |
| 742 | (cond ((rng-c-current-token-ncname-p) |
| 743 | (rng-c-advance-with |
| 744 | (rng-make-name-name-class |
| 745 | (rng-make-name (rng-c-unqualified-namespace attribute) |
| 746 | rng-c-current-token)))) |
| 747 | ((rng-c-current-token-prefixed-name-p) |
| 748 | (rng-c-advance-with |
| 749 | (rng-make-name-name-class |
| 750 | (rng-c-expand-name rng-c-current-token)))) |
| 751 | ((string-equal rng-c-current-token "*") |
| 752 | (let ((except (rng-c-parse-opt-except-name-class attribute))) |
| 753 | (if except |
| 754 | (rng-make-any-name-except-name-class except) |
| 755 | (rng-make-any-name-name-class)))) |
| 756 | ((rng-c-current-token-ns-name-p) |
| 757 | (let* ((ns |
| 758 | (rng-c-lookup-prefix (substring rng-c-current-token |
| 759 | 0 |
| 760 | -2))) |
| 761 | (except (rng-c-parse-opt-except-name-class attribute))) |
| 762 | (if except |
| 763 | (rng-make-ns-name-except-name-class ns except) |
| 764 | (rng-make-ns-name-name-class ns)))) |
| 765 | ((string-equal rng-c-current-token "(") |
| 766 | (rng-c-advance) |
| 767 | (let ((name-class (rng-c-parse-name-class attribute))) |
| 768 | (rng-c-expect ")") |
| 769 | name-class)) |
| 770 | ((rng-c-current-token-quoted-identifier-p) |
| 771 | (rng-c-advance-with |
| 772 | (rng-make-name-name-class |
| 773 | (rng-make-name (rng-c-unqualified-namespace attribute) |
| 774 | (substring rng-c-current-token 1))))) |
| 775 | ((string-equal rng-c-current-token "[") |
| 776 | (rng-c-parse-lead-annotation) |
| 777 | (rng-c-parse-primary-name-class attribute)) |
| 778 | (t (rng-c-error "Bad name class")))) |
| 779 | |
| 780 | (defun rng-c-parse-opt-except-name-class (attribute) |
| 781 | (rng-c-advance) |
| 782 | (and (string-equal rng-c-current-token "-") |
| 783 | (or (not rng-c-had-except) |
| 784 | (rng-c-error "Parentheses required around name-class using - operator")) |
| 785 | (setq rng-c-had-except t) |
| 786 | (progn |
| 787 | (rng-c-advance) |
| 788 | (rng-c-parse-primary-name-class attribute)))) |
| 789 | |
| 790 | (defun rng-c-parse-mixed () |
| 791 | (rng-c-expect "{") |
| 792 | (let ((pattern (rng-make-mixed (rng-c-parse-pattern)))) |
| 793 | (rng-c-expect "}") |
| 794 | pattern)) |
| 795 | |
| 796 | (defun rng-c-parse-list () |
| 797 | (rng-c-expect "{") |
| 798 | (let ((pattern (rng-make-list (rng-c-parse-pattern)))) |
| 799 | (rng-c-expect "}") |
| 800 | pattern)) |
| 801 | |
| 802 | (defun rng-c-parse-text () |
| 803 | (rng-make-text)) |
| 804 | |
| 805 | (defun rng-c-parse-empty () |
| 806 | (rng-make-empty)) |
| 807 | |
| 808 | (defun rng-c-parse-not-allowed () |
| 809 | (rng-make-not-allowed)) |
| 810 | |
| 811 | (defun rng-c-parse-string () |
| 812 | (rng-c-parse-data rng-string-datatype)) |
| 813 | |
| 814 | (defun rng-c-parse-token () |
| 815 | (rng-c-parse-data rng-token-datatype)) |
| 816 | |
| 817 | (defun rng-c-parse-data (name) |
| 818 | (if (rng-c-current-token-literal-p) |
| 819 | (rng-make-value name |
| 820 | (rng-c-parse-literal) |
| 821 | (and (car name) |
| 822 | (rng-c-make-context))) |
| 823 | (let ((params (rng-c-parse-optional-params))) |
| 824 | (if (string-equal rng-c-current-token "-") |
| 825 | (progn |
| 826 | (if rng-c-had-except |
| 827 | (rng-c-error "Parentheses required around pattern using -") |
| 828 | (setq rng-c-had-except t)) |
| 829 | (rng-c-advance) |
| 830 | (rng-make-data-except name |
| 831 | params |
| 832 | (rng-c-parse-primary))) |
| 833 | (rng-make-data name params))))) |
| 834 | |
| 835 | (defun rng-c-parse-optional-params () |
| 836 | (and (string-equal rng-c-current-token "{") |
| 837 | (let* ((head (cons nil nil)) |
| 838 | (tail head)) |
| 839 | (rng-c-advance) |
| 840 | (while (not (string-equal rng-c-current-token "}")) |
| 841 | (and (string-equal rng-c-current-token "[") |
| 842 | (rng-c-parse-lead-annotation)) |
| 843 | (let ((name (rng-c-parse-identifier-or-keyword))) |
| 844 | (rng-c-expect "=") |
| 845 | (let ((newcdr (cons (cons (intern name) |
| 846 | (rng-c-parse-literal)) |
| 847 | nil))) |
| 848 | (setcdr tail newcdr) |
| 849 | (setq tail newcdr)))) |
| 850 | (rng-c-advance) |
| 851 | (cdr head)))) |
| 852 | |
| 853 | (defun rng-c-parse-external () |
| 854 | (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) |
| 855 | (rng-c-inherit-namespace (rng-c-parse-opt-inherit))) |
| 856 | (rng-c-parse-file filename 'external))) |
| 857 | |
| 858 | (defun rng-c-expand-file (uri) |
| 859 | (condition-case err |
| 860 | (rng-uri-file-name (rng-uri-resolve uri |
| 861 | (rng-file-name-uri rng-c-file-name))) |
| 862 | (rng-uri-error |
| 863 | (rng-c-error (cadr err))))) |
| 864 | |
| 865 | (defun rng-c-parse-opt-inherit () |
| 866 | (cond ((string-equal rng-c-current-token "inherit") |
| 867 | (rng-c-advance) |
| 868 | (rng-c-expect "=") |
| 869 | (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword))) |
| 870 | (t rng-c-default-namespace))) |
| 871 | |
| 872 | (defun rng-c-parse-grammar () |
| 873 | (rng-c-expect "{") |
| 874 | (let* ((rng-c-parent-grammar rng-c-current-grammar) |
| 875 | (rng-c-current-grammar (rng-c-make-grammar))) |
| 876 | (rng-c-parse-grammar-body "}") |
| 877 | (rng-c-finish-grammar))) |
| 878 | |
| 879 | (defun rng-c-parse-lead-annotation () |
| 880 | (rng-c-parse-annotation-body) |
| 881 | (and (string-equal rng-c-current-token "[") |
| 882 | (rng-c-error "Multiple leading annotations"))) |
| 883 | |
| 884 | (defun rng-c-parse-follow-annotations (obj) |
| 885 | (while (string-equal rng-c-current-token ">>") |
| 886 | (rng-c-advance) |
| 887 | (if (rng-c-current-token-prefixed-name-p) |
| 888 | (rng-c-advance) |
| 889 | (rng-c-parse-identifier-or-keyword)) |
| 890 | (rng-c-parse-annotation-body t)) |
| 891 | obj) |
| 892 | |
| 893 | (defun rng-c-parse-annotation-element () |
| 894 | (rng-c-advance) |
| 895 | (rng-c-parse-annotation-body t)) |
| 896 | |
| 897 | ;; XXX need stricter checking of attribute names |
| 898 | ;; XXX don't allow attributes after text |
| 899 | |
| 900 | (defun rng-c-parse-annotation-body (&optional allow-text) |
| 901 | "Current token is [. Parse up to matching ]. |
| 902 | Current token after parse is token following ]." |
| 903 | (or (string-equal rng-c-current-token "[") |
| 904 | (rng-c-error "Expected [")) |
| 905 | (rng-c-advance) |
| 906 | (while (not (string-equal rng-c-current-token "]")) |
| 907 | (cond ((rng-c-current-token-literal-p) |
| 908 | (or allow-text |
| 909 | (rng-c-error "Out of place text within annotation")) |
| 910 | (rng-c-parse-literal)) |
| 911 | (t |
| 912 | (if (rng-c-current-token-prefixed-name-p) |
| 913 | (rng-c-advance) |
| 914 | (rng-c-parse-identifier-or-keyword)) |
| 915 | (cond ((string-equal rng-c-current-token "[") |
| 916 | (rng-c-parse-annotation-body t)) |
| 917 | ((string-equal rng-c-current-token "=") |
| 918 | (rng-c-advance) |
| 919 | (rng-c-parse-literal)) |
| 920 | (t (rng-c-error "Expected = or [")))))) |
| 921 | (rng-c-advance)) |
| 922 | |
| 923 | (defun rng-c-advance-with (pattern) |
| 924 | (rng-c-advance) |
| 925 | pattern) |
| 926 | |
| 927 | (defun rng-c-expect (str) |
| 928 | (or (string-equal rng-c-current-token str) |
| 929 | (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token)) |
| 930 | (rng-c-advance)) |
| 931 | |
| 932 | (provide 'rng-cmpct) |
| 933 | |
| 934 | ;;; rng-cmpct.el |