Commit | Line | Data |
---|---|---|
8cd39fb3 MH |
1 | ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas |
2 | ||
ba318903 | 3 | ;; Copyright (C) 2003, 2007-2014 Free Software Foundation, Inc. |
8cd39fb3 MH |
4 | |
5 | ;; Author: James Clark | |
3e77f05d | 6 | ;; Keywords: wp, hypermedia, languages, XML, RelaxNG |
8cd39fb3 | 7 | |
09aa73e6 | 8 | ;; This file is part of GNU Emacs. |
8cd39fb3 | 9 | |
4936186e | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
09aa73e6 | 11 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
8cd39fb3 | 14 | |
09aa73e6 GM |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
8cd39fb3 MH |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;; This parses a RELAX NG Compact Syntax schema into the form | |
26 | ;; specified in rng-pttrn.el. | |
27 | ;; | |
28 | ;; RELAX NG Compact Syntax is specified by | |
29 | ;; http://relaxng.org/compact.html | |
30 | ;; | |
31 | ;; This file uses the prefix "rng-c-". | |
32 | ||
33 | ;;; Code: | |
34 | ||
35 | (require 'nxml-util) | |
36 | (require 'rng-util) | |
37 | (require 'rng-uri) | |
38 | (require 'rng-pttrn) | |
39 | ||
40 | ;;;###autoload | |
41 | (defun rng-c-load-schema (filename) | |
42 | "Load a schema in RELAX NG compact syntax from FILENAME. | |
43 | Return a pattern." | |
44 | (rng-c-parse-file filename)) | |
45 | ||
46 | ;;; Error handling | |
47 | ||
54bd972f SM |
48 | (define-error 'rng-c-incorrect-schema |
49 | "Incorrect schema" '(rng-error nxml-file-parse-error)) | |
8cd39fb3 MH |
50 | |
51 | (defun rng-c-signal-incorrect-schema (filename pos message) | |
52 | (nxml-signal-file-parse-error filename | |
53 | pos | |
54 | message | |
55 | 'rng-c-incorrect-schema)) | |
56 | ||
57 | ;;; Lexing | |
58 | ||
59 | (defconst rng-c-keywords | |
60 | '("attribute" | |
61 | "default" | |
62 | "datatypes" | |
63 | "div" | |
64 | "element" | |
65 | "empty" | |
66 | "external" | |
67 | "grammar" | |
68 | "include" | |
69 | "inherit" | |
70 | "list" | |
71 | "mixed" | |
72 | "namespace" | |
73 | "notAllowed" | |
74 | "parent" | |
75 | "start" | |
76 | "string" | |
77 | "text" | |
78 | "token") | |
79 | "List of strings that are keywords in the compact syntax.") | |
80 | ||
81 | (defconst rng-c-anchored-keyword-re | |
82 | (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'") | |
83 | "Regular expression to match a keyword in the compact syntax.") | |
84 | ||
85 | (defvar rng-c-syntax-table nil | |
86 | "Syntax table for parsing the compact syntax.") | |
87 | ||
88 | (if rng-c-syntax-table | |
89 | () | |
90 | (setq rng-c-syntax-table (make-syntax-table)) | |
91 | (modify-syntax-entry ?# "<" rng-c-syntax-table) | |
92 | (modify-syntax-entry ?\n ">" rng-c-syntax-table) | |
93 | (modify-syntax-entry ?- "w" rng-c-syntax-table) | |
94 | (modify-syntax-entry ?. "w" rng-c-syntax-table) | |
95 | (modify-syntax-entry ?_ "w" rng-c-syntax-table) | |
96 | (modify-syntax-entry ?: "_" rng-c-syntax-table)) | |
97 | ||
98 | (defconst rng-c-literal-1-re | |
99 | "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'" | |
100 | "Regular expression to match a single-quoted literal.") | |
101 | ||
102 | (defconst rng-c-literal-2-re | |
103 | (replace-regexp-in-string "'" "\"" rng-c-literal-1-re) | |
104 | "Regular expression to match a double-quoted literal.") | |
105 | ||
106 | (defconst rng-c-ncname-re "\\w+") | |
107 | ||
108 | (defconst rng-c-anchored-ncname-re | |
109 | (concat "\\`" rng-c-ncname-re "\\'")) | |
110 | ||
111 | (defconst rng-c-token-re | |
112 | (concat "[&|]=" "\\|" | |
113 | "[][()|&,*+?{}~=-]" "\\|" | |
114 | rng-c-literal-1-re "\\|" | |
115 | rng-c-literal-2-re "\\|" | |
116 | rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|" | |
117 | "\\\\" rng-c-ncname-re "\\|" | |
118 | ">>") | |
119 | "Regular expression to match a token in the compact syntax.") | |
120 | ||
121 | (defun rng-c-init-buffer () | |
122 | (setq case-fold-search nil) ; automatically becomes buffer-local when set | |
123 | (set-buffer-multibyte t) | |
124 | (set-syntax-table rng-c-syntax-table)) | |
125 | ||
126 | (defvar rng-c-current-token nil) | |
127 | (make-variable-buffer-local 'rng-c-current-token) | |
128 | ||
129 | (defun rng-c-advance () | |
130 | (cond ((looking-at rng-c-token-re) | |
131 | (setq rng-c-current-token (match-string 0)) | |
132 | (goto-char (match-end 0)) | |
133 | (forward-comment (point-max))) | |
134 | ((= (point) (point-max)) | |
135 | (setq rng-c-current-token "")) | |
136 | (t (rng-c-error "Invalid token")))) | |
137 | ||
138 | (defconst rng-c-anchored-datatype-name-re | |
139 | (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'")) | |
140 | ||
141 | (defsubst rng-c-current-token-keyword-p () | |
142 | (string-match rng-c-anchored-keyword-re rng-c-current-token)) | |
143 | ||
144 | (defsubst rng-c-current-token-prefixed-name-p () | |
145 | (string-match rng-c-anchored-datatype-name-re rng-c-current-token)) | |
146 | ||
147 | (defsubst rng-c-current-token-literal-p () | |
148 | (string-match "\\`['\"]" rng-c-current-token)) | |
149 | ||
150 | (defsubst rng-c-current-token-quoted-identifier-p () | |
151 | (string-match "\\`\\\\" rng-c-current-token)) | |
152 | ||
153 | (defsubst rng-c-current-token-ncname-p () | |
154 | (string-match rng-c-anchored-ncname-re rng-c-current-token)) | |
155 | ||
156 | (defsubst rng-c-current-token-ns-name-p () | |
157 | (let ((len (length rng-c-current-token))) | |
158 | (and (> len 0) | |
159 | (= (aref rng-c-current-token (- len 1)) ?*)))) | |
160 | ||
161 | ;;; Namespaces | |
162 | ||
163 | (defvar rng-c-inherit-namespace nil) | |
164 | ||
165 | (defvar rng-c-default-namespace nil) | |
166 | ||
167 | (defvar rng-c-default-namespace-declared nil) | |
168 | ||
169 | (defvar rng-c-namespace-decls nil | |
170 | "Alist of namespace declarations.") | |
171 | ||
172 | (defconst rng-c-no-namespace nil) | |
173 | ||
174 | (defun rng-c-declare-standard-namespaces () | |
175 | (setq rng-c-namespace-decls | |
176 | (cons (cons "xml" nxml-xml-namespace-uri) | |
177 | rng-c-namespace-decls)) | |
178 | (when (and (not rng-c-default-namespace-declared) | |
179 | rng-c-inherit-namespace) | |
180 | (setq rng-c-default-namespace rng-c-inherit-namespace))) | |
181 | ||
182 | (defun rng-c-expand-name (prefixed-name) | |
183 | (let ((i (string-match ":" prefixed-name))) | |
184 | (rng-make-name (rng-c-lookup-prefix (substring prefixed-name | |
185 | 0 | |
186 | i)) | |
187 | (substring prefixed-name (+ i 1))))) | |
188 | ||
189 | (defun rng-c-lookup-prefix (prefix) | |
190 | (let ((binding (assoc prefix rng-c-namespace-decls))) | |
191 | (or binding (rng-c-error "Undefined prefix %s" prefix)) | |
192 | (cdr binding))) | |
193 | ||
194 | (defun rng-c-unqualified-namespace (attribute) | |
195 | (if attribute | |
196 | rng-c-no-namespace | |
197 | rng-c-default-namespace)) | |
198 | ||
199 | (defun rng-c-make-context () | |
200 | (cons rng-c-default-namespace rng-c-namespace-decls)) | |
201 | ||
202 | ;;; Datatypes | |
203 | ||
204 | (defconst rng-string-datatype | |
205 | (rng-make-datatype rng-builtin-datatypes-uri "string")) | |
206 | ||
207 | (defconst rng-token-datatype | |
208 | (rng-make-datatype rng-builtin-datatypes-uri "token")) | |
209 | ||
210 | (defvar rng-c-datatype-decls nil | |
211 | "Alist of datatype declarations. | |
212 | Contains a list of pairs (PREFIX . URI) where PREFIX is a string | |
213 | and URI is a symbol.") | |
214 | ||
215 | (defun rng-c-declare-standard-datatypes () | |
216 | (setq rng-c-datatype-decls | |
217 | (cons (cons "xsd" rng-xsd-datatypes-uri) | |
218 | rng-c-datatype-decls))) | |
219 | ||
220 | (defun rng-c-lookup-datatype-prefix (prefix) | |
221 | (let ((binding (assoc prefix rng-c-datatype-decls))) | |
222 | (or binding (rng-c-error "Undefined prefix %s" prefix)) | |
223 | (cdr binding))) | |
224 | ||
225 | (defun rng-c-expand-datatype (prefixed-name) | |
226 | (let ((i (string-match ":" prefixed-name))) | |
227 | (rng-make-datatype | |
228 | (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i)) | |
229 | (substring prefixed-name (+ i 1))))) | |
230 | ||
231 | ;;; Grammars | |
232 | ||
233 | (defvar rng-c-current-grammar nil) | |
234 | (defvar rng-c-parent-grammar nil) | |
235 | ||
236 | (defun rng-c-make-grammar () | |
237 | (make-hash-table :test 'equal)) | |
238 | ||
239 | (defconst rng-c-about-override-slot 0) | |
240 | (defconst rng-c-about-combine-slot 1) | |
241 | ||
242 | (defun rng-c-lookup-create (name grammar) | |
10545bd8 JB |
243 | "Return a def object for NAME. |
244 | A def object is a pair \(ABOUT . REF) where REF is returned by | |
245 | `rng-make-ref'. | |
246 | ABOUT is a two-element vector [OVERRIDE COMBINE]. | |
247 | COMBINE is either nil, choice or interleave. | |
248 | OVERRIDE is either nil, require or t." | |
8cd39fb3 MH |
249 | (let ((def (gethash name grammar))) |
250 | (if def | |
251 | def | |
10545bd8 | 252 | (progn |
8cd39fb3 MH |
253 | (setq def (cons (vector nil nil) (rng-make-ref name))) |
254 | (puthash name def grammar) | |
255 | def)))) | |
256 | ||
257 | (defun rng-c-make-ref (name) | |
258 | (or rng-c-current-grammar | |
259 | (rng-c-error "Reference not in a grammar")) | |
260 | (cdr (rng-c-lookup-create name rng-c-current-grammar))) | |
261 | ||
262 | (defun rng-c-make-parent-ref (name) | |
263 | (or rng-c-parent-grammar | |
264 | (rng-c-error "Reference to non-existent parent grammar")) | |
265 | (cdr (rng-c-lookup-create name rng-c-parent-grammar))) | |
266 | ||
267 | (defvar rng-c-overrides nil | |
268 | "Contains a list of (NAME . DEF) pairs.") | |
269 | ||
270 | (defun rng-c-merge-combine (def combine name) | |
271 | (let* ((about (car def)) | |
272 | (current-combine (aref about rng-c-about-combine-slot))) | |
273 | (if combine | |
274 | (if current-combine | |
275 | (or (eq combine current-combine) | |
276 | (rng-c-error "Inconsistent combine for %s" name)) | |
277 | (aset about rng-c-about-combine-slot combine)) | |
278 | current-combine))) | |
279 | ||
280 | (defun rng-c-prepare-define (name combine in-include) | |
281 | (let* ((def (rng-c-lookup-create name rng-c-current-grammar)) | |
282 | (about (car def)) | |
283 | (overridden (aref about rng-c-about-override-slot))) | |
284 | (and in-include | |
285 | (setq rng-c-overrides (cons (cons name def) rng-c-overrides))) | |
286 | (cond (overridden (and (eq overridden 'require) | |
287 | (aset about rng-c-about-override-slot t)) | |
288 | nil) | |
289 | (t (setq combine (rng-c-merge-combine def combine name)) | |
290 | (and (rng-ref-get (cdr def)) | |
291 | (not combine) | |
292 | (rng-c-error "Duplicate definition of %s" name)) | |
293 | def)))) | |
294 | ||
295 | (defun rng-c-start-include (overrides) | |
296 | (mapcar (lambda (name-def) | |
297 | (let* ((def (cdr name-def)) | |
298 | (about (car def)) | |
299 | (save (aref about rng-c-about-override-slot))) | |
300 | (aset about rng-c-about-override-slot 'require) | |
301 | (cons save name-def))) | |
302 | overrides)) | |
303 | ||
304 | (defun rng-c-end-include (overrides) | |
305 | (mapcar (lambda (o) | |
306 | (let* ((saved (car o)) | |
307 | (name-def (cdr o)) | |
308 | (name (car name-def)) | |
309 | (def (cdr name-def)) | |
310 | (about (car def))) | |
311 | (and (eq (aref about rng-c-about-override-slot) 'require) | |
312 | (rng-c-error "Definition of %s in include did not override definition in included file" name)) | |
313 | (aset about rng-c-about-override-slot saved))) | |
314 | overrides)) | |
315 | ||
316 | (defun rng-c-define (def value) | |
317 | (and def | |
318 | (let ((current-value (rng-ref-get (cdr def)))) | |
319 | (rng-ref-set (cdr def) | |
320 | (if current-value | |
321 | (if (eq (aref (car def) rng-c-about-combine-slot) | |
322 | 'choice) | |
323 | (rng-make-choice (list current-value value)) | |
324 | (rng-make-interleave (list current-value value))) | |
325 | value))))) | |
326 | ||
327 | (defun rng-c-finish-grammar () | |
328 | (maphash (lambda (key def) | |
329 | (or (rng-ref-get (cdr def)) | |
330 | (rng-c-error "Reference to undefined pattern %s" key))) | |
331 | rng-c-current-grammar) | |
332 | (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar) | |
333 | (rng-c-error "No definition of start"))))) | |
334 | ||
335 | ;;; Parsing | |
336 | ||
337 | (defvar rng-c-escape-positions nil) | |
338 | (make-variable-buffer-local 'rng-c-escape-positions) | |
339 | ||
340 | (defvar rng-c-file-name nil) | |
341 | (make-variable-buffer-local 'rng-c-file-name) | |
342 | ||
343 | (defvar rng-c-file-index nil) | |
344 | ||
345 | (defun rng-c-parse-file (filename &optional context) | |
2adaf057 | 346 | (with-current-buffer (get-buffer-create (rng-c-buffer-name context)) |
8cd39fb3 MH |
347 | (erase-buffer) |
348 | (rng-c-init-buffer) | |
349 | (setq rng-c-file-name | |
350 | (car (insert-file-contents filename))) | |
351 | (setq rng-c-escape-positions nil) | |
352 | (rng-c-process-escapes) | |
353 | (rng-c-parse-top-level context))) | |
354 | ||
355 | (defun rng-c-buffer-name (context) | |
356 | (concat " *RNC Input" | |
357 | (if context | |
358 | (concat "<" | |
359 | (number-to-string (setq rng-c-file-index | |
360 | (1+ rng-c-file-index))) | |
361 | ">*") | |
362 | (setq rng-c-file-index 1) | |
363 | "*"))) | |
364 | ||
365 | (defun rng-c-process-escapes () | |
10ee3b3f | 366 | ;; Check for any NULs, since we will use NUL chars |
8cd39fb3 MH |
367 | ;; for internal purposes. |
368 | (let ((pos (search-forward "\C-@" nil t))) | |
369 | (and pos | |
370 | (rng-c-error "Nul character found (binary file?)"))) | |
371 | (let ((offset 0)) | |
372 | (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}" | |
373 | (point-max) | |
374 | t) | |
375 | (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16)))) | |
376 | (if (and ch (> ch 0)) | |
377 | (let ((begin (match-beginning 0)) | |
378 | (end (match-end 0))) | |
379 | (delete-region begin end) | |
380 | ;; Represent an escaped newline by nul, so | |
381 | ;; that we can distinguish it from a literal newline. | |
382 | ;; We will translate it back into a real newline later. | |
383 | (insert (if (eq ch ?\n) 0 ch)) | |
384 | (setq offset (+ offset (- end begin 1))) | |
385 | (setq rng-c-escape-positions | |
386 | (cons (cons (point) offset) | |
387 | rng-c-escape-positions))) | |
388 | (rng-c-error "Invalid character escape"))))) | |
389 | (goto-char 1)) | |
390 | ||
391 | (defun rng-c-translate-position (pos) | |
392 | (let ((tem rng-c-escape-positions)) | |
393 | (while (and tem | |
394 | (> (caar tem) pos)) | |
395 | (setq tem (cdr tem))) | |
396 | (if tem | |
397 | (+ pos (cdar tem)) | |
398 | pos))) | |
399 | ||
400 | (defun rng-c-error (&rest args) | |
401 | (rng-c-signal-incorrect-schema rng-c-file-name | |
402 | (rng-c-translate-position (point)) | |
403 | (apply 'format args))) | |
404 | ||
405 | (defun rng-c-parse-top-level (context) | |
406 | (let ((rng-c-namespace-decls nil) | |
407 | (rng-c-default-namespace nil) | |
408 | (rng-c-datatype-decls nil)) | |
409 | (goto-char (point-min)) | |
410 | (forward-comment (point-max)) | |
411 | (rng-c-advance) | |
412 | (rng-c-parse-decls) | |
413 | (let ((p (if (eq context 'include) | |
414 | (if (rng-c-implicit-grammar-p) | |
415 | (rng-c-parse-grammar-body "") | |
416 | (rng-c-parse-included-grammar)) | |
417 | (if (rng-c-implicit-grammar-p) | |
418 | (rng-c-parse-implicit-grammar) | |
419 | (rng-c-parse-pattern))))) | |
420 | (or (string-equal rng-c-current-token "") | |
421 | (rng-c-error "Unexpected characters after pattern")) | |
422 | p))) | |
423 | ||
424 | (defun rng-c-parse-included-grammar () | |
425 | (or (string-equal rng-c-current-token "grammar") | |
426 | (rng-c-error "Included schema is not a grammar")) | |
427 | (rng-c-advance) | |
428 | (rng-c-expect "{") | |
429 | (rng-c-parse-grammar-body "}")) | |
430 | ||
431 | (defun rng-c-implicit-grammar-p () | |
432 | (or (and (or (rng-c-current-token-prefixed-name-p) | |
433 | (rng-c-current-token-quoted-identifier-p) | |
434 | (and (rng-c-current-token-ncname-p) | |
435 | (not (rng-c-current-token-keyword-p)))) | |
436 | (looking-at "\\[")) | |
437 | (and (string-equal rng-c-current-token "[") | |
438 | (rng-c-parse-lead-annotation) | |
439 | nil) | |
440 | (member rng-c-current-token '("div" "include" "")) | |
441 | (looking-at "[|&]?="))) | |
442 | ||
443 | (defun rng-c-parse-decls () | |
444 | (setq rng-c-default-namespace-declared nil) | |
445 | (while (progn | |
446 | (let ((binding | |
447 | (assoc rng-c-current-token | |
448 | '(("namespace" . rng-c-parse-namespace) | |
449 | ("datatypes" . rng-c-parse-datatypes) | |
450 | ("default" . rng-c-parse-default))))) | |
451 | (if binding | |
452 | (progn | |
453 | (rng-c-advance) | |
454 | (funcall (cdr binding)) | |
455 | t) | |
456 | nil)))) | |
457 | (rng-c-declare-standard-datatypes) | |
458 | (rng-c-declare-standard-namespaces)) | |
459 | ||
460 | (defun rng-c-parse-datatypes () | |
461 | (let ((prefix (rng-c-parse-identifier-or-keyword))) | |
462 | (or (not (assoc prefix rng-c-datatype-decls)) | |
463 | (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix)) | |
464 | (rng-c-expect "=") | |
465 | (setq rng-c-datatype-decls | |
466 | (cons (cons prefix | |
467 | (rng-make-datatypes-uri (rng-c-parse-literal))) | |
468 | rng-c-datatype-decls)))) | |
10545bd8 | 469 | |
8cd39fb3 MH |
470 | (defun rng-c-parse-namespace () |
471 | (rng-c-declare-namespace nil | |
472 | (rng-c-parse-identifier-or-keyword))) | |
473 | ||
474 | (defun rng-c-parse-default () | |
475 | (rng-c-expect "namespace") | |
10545bd8 | 476 | (rng-c-declare-namespace t |
8cd39fb3 MH |
477 | (if (string-equal rng-c-current-token "=") |
478 | nil | |
479 | (rng-c-parse-identifier-or-keyword)))) | |
480 | ||
481 | (defun rng-c-declare-namespace (declare-default prefix) | |
482 | (rng-c-expect "=") | |
483 | (let ((ns (cond ((string-equal rng-c-current-token "inherit") | |
484 | (rng-c-advance) | |
485 | rng-c-inherit-namespace) | |
486 | (t | |
487 | (nxml-make-namespace (rng-c-parse-literal)))))) | |
488 | (and prefix | |
489 | (or (not (assoc prefix rng-c-namespace-decls)) | |
490 | (rng-c-error "Duplicate namespace declaration for prefix %s" | |
491 | prefix)) | |
492 | (setq rng-c-namespace-decls | |
493 | (cons (cons prefix ns) rng-c-namespace-decls))) | |
494 | (and declare-default | |
495 | (or (not rng-c-default-namespace-declared) | |
496 | (rng-c-error "Duplicate default namespace declaration")) | |
497 | (setq rng-c-default-namespace-declared t) | |
498 | (setq rng-c-default-namespace ns)))) | |
499 | ||
500 | (defun rng-c-parse-implicit-grammar () | |
501 | (let* ((rng-c-parent-grammar rng-c-current-grammar) | |
502 | (rng-c-current-grammar (rng-c-make-grammar))) | |
503 | (rng-c-parse-grammar-body "") | |
504 | (rng-c-finish-grammar))) | |
505 | ||
506 | (defun rng-c-parse-grammar-body (close-token &optional in-include) | |
507 | (while (not (string-equal rng-c-current-token close-token)) | |
508 | (cond ((rng-c-current-token-keyword-p) | |
509 | (let ((kw (intern rng-c-current-token))) | |
510 | (cond ((eq kw 'start) | |
511 | (rng-c-parse-define 'start in-include)) | |
512 | ((eq kw 'div) | |
513 | (rng-c-advance) | |
514 | (rng-c-parse-div in-include)) | |
515 | ((eq kw 'include) | |
516 | (and in-include | |
517 | (rng-c-error "Nested include")) | |
518 | (rng-c-advance) | |
519 | (rng-c-parse-include)) | |
520 | (t (rng-c-error "Invalid grammar keyword"))))) | |
521 | ((rng-c-current-token-ncname-p) | |
522 | (if (looking-at "\\[") | |
523 | (rng-c-parse-annotation-element) | |
524 | (rng-c-parse-define rng-c-current-token | |
525 | in-include))) | |
526 | ((rng-c-current-token-quoted-identifier-p) | |
527 | (if (looking-at "\\[") | |
528 | (rng-c-parse-annotation-element) | |
529 | (rng-c-parse-define (substring rng-c-current-token 1) | |
530 | in-include))) | |
531 | ((rng-c-current-token-prefixed-name-p) | |
532 | (rng-c-parse-annotation-element)) | |
533 | ((string-equal rng-c-current-token "[") | |
534 | (rng-c-parse-lead-annotation) | |
535 | (and (string-equal rng-c-current-token close-token) | |
536 | (rng-c-error "Missing annotation subject")) | |
537 | (and (looking-at "\\[") | |
538 | (rng-c-error "Leading annotation applied to annotation"))) | |
539 | (t (rng-c-error "Invalid grammar content")))) | |
540 | (or (string-equal rng-c-current-token "") | |
541 | (rng-c-advance))) | |
542 | ||
543 | (defun rng-c-parse-div (in-include) | |
544 | (rng-c-expect "{") | |
545 | (rng-c-parse-grammar-body "}" in-include)) | |
546 | ||
547 | (defun rng-c-parse-include () | |
548 | (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) | |
549 | (rng-c-inherit-namespace (rng-c-parse-opt-inherit)) | |
550 | overrides) | |
551 | (cond ((string-equal rng-c-current-token "{") | |
552 | (rng-c-advance) | |
553 | (let ((rng-c-overrides nil)) | |
554 | (rng-c-parse-grammar-body "}" t) | |
555 | (setq overrides rng-c-overrides)) | |
556 | (setq overrides (rng-c-start-include overrides)) | |
557 | (rng-c-parse-file filename 'include) | |
558 | (rng-c-end-include overrides)) | |
559 | (t (rng-c-parse-file filename 'include))))) | |
560 | ||
561 | (defun rng-c-parse-define (name in-include) | |
562 | (rng-c-advance) | |
563 | (let ((assign (assoc rng-c-current-token | |
564 | '(("=" . nil) | |
565 | ("|=" . choice) | |
566 | ("&=" . interleave))))) | |
567 | (or assign | |
568 | (rng-c-error "Expected assignment operator")) | |
569 | (rng-c-advance) | |
570 | (let ((ref (rng-c-prepare-define name (cdr assign) in-include))) | |
571 | (rng-c-define ref (rng-c-parse-pattern))))) | |
572 | ||
573 | (defvar rng-c-had-except nil) | |
574 | ||
575 | (defun rng-c-parse-pattern () | |
576 | (let* ((rng-c-had-except nil) | |
577 | (p (rng-c-parse-repeated)) | |
578 | (op (assoc rng-c-current-token | |
579 | '(("|" . rng-make-choice) | |
580 | ("," . rng-make-group) | |
581 | ("&" . rng-make-interleave))))) | |
582 | (if op | |
583 | (if rng-c-had-except | |
584 | (rng-c-error "Parentheses required around pattern using -") | |
585 | (let* ((patterns (cons p nil)) | |
586 | (tail patterns) | |
587 | (connector rng-c-current-token)) | |
588 | (while (progn | |
589 | (rng-c-advance) | |
590 | (let ((newcdr (cons (rng-c-parse-repeated) nil))) | |
591 | (setcdr tail newcdr) | |
592 | (setq tail newcdr)) | |
593 | (string-equal rng-c-current-token connector))) | |
594 | (funcall (cdr op) patterns))) | |
595 | p))) | |
596 | ||
597 | (defun rng-c-parse-repeated () | |
598 | (let ((p (rng-c-parse-follow-annotations | |
599 | (rng-c-parse-primary))) | |
600 | (op (assoc rng-c-current-token | |
601 | '(("*" . rng-make-zero-or-more) | |
602 | ("+" . rng-make-one-or-more) | |
603 | ("?" . rng-make-optional))))) | |
604 | (if op | |
605 | (if rng-c-had-except | |
606 | (rng-c-error "Parentheses required around pattern using -") | |
607 | (rng-c-parse-follow-annotations | |
608 | (progn | |
609 | (rng-c-advance) | |
610 | (funcall (cdr op) p)))) | |
611 | p))) | |
612 | ||
613 | (defun rng-c-parse-primary () | |
10545bd8 JB |
614 | "Parse a primary expression. |
615 | The current token must be the first token of the expression. | |
616 | After parsing the current token should be the token following | |
617 | the primary expression." | |
8cd39fb3 MH |
618 | (cond ((rng-c-current-token-keyword-p) |
619 | (let ((parse-function (get (intern rng-c-current-token) | |
620 | 'rng-c-pattern))) | |
621 | (or parse-function | |
622 | (rng-c-error "Keyword %s does not introduce a pattern" | |
623 | rng-c-current-token)) | |
624 | (rng-c-advance) | |
625 | (funcall parse-function))) | |
626 | ((rng-c-current-token-ncname-p) | |
627 | (rng-c-advance-with (rng-c-make-ref rng-c-current-token))) | |
628 | ((string-equal rng-c-current-token "(") | |
629 | (rng-c-advance) | |
630 | (let ((p (rng-c-parse-pattern))) | |
631 | (rng-c-expect ")") | |
632 | p)) | |
633 | ((rng-c-current-token-prefixed-name-p) | |
634 | (let ((name (rng-c-expand-datatype rng-c-current-token))) | |
635 | (rng-c-advance) | |
636 | (rng-c-parse-data name))) | |
637 | ((rng-c-current-token-literal-p) | |
638 | (rng-make-value rng-token-datatype (rng-c-parse-literal) nil)) | |
639 | ((rng-c-current-token-quoted-identifier-p) | |
640 | (rng-c-advance-with | |
641 | (rng-c-make-ref (substring rng-c-current-token 1)))) | |
642 | ((string-equal rng-c-current-token "[") | |
643 | (rng-c-parse-lead-annotation) | |
644 | (rng-c-parse-primary)) | |
645 | (t (rng-c-error "Invalid pattern")))) | |
646 | ||
647 | (defun rng-c-parse-parent () | |
648 | (and (rng-c-current-token-keyword-p) | |
649 | (rng-c-error "Keyword following parent was not quoted" | |
650 | rng-c-current-token)) | |
651 | (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword))) | |
652 | ||
653 | (defun rng-c-parse-literal () | |
654 | (rng-c-fix-escaped-newlines | |
655 | (apply 'concat (rng-c-parse-literal-segments)))) | |
656 | ||
657 | (defun rng-c-parse-literal-segments () | |
658 | (let ((str (rng-c-parse-literal-segment))) | |
659 | (cons str | |
660 | (cond ((string-equal rng-c-current-token "~") | |
661 | (rng-c-advance) | |
662 | (rng-c-parse-literal-segments)) | |
663 | (t nil))))) | |
664 | ||
665 | (defun rng-c-parse-literal-segment () | |
666 | (or (rng-c-current-token-literal-p) | |
667 | (rng-c-error "Expected a literal")) | |
668 | (rng-c-advance-with | |
669 | (let ((n (if (and (>= (length rng-c-current-token) 6) | |
670 | (eq (aref rng-c-current-token 0) | |
671 | (aref rng-c-current-token 1))) | |
672 | 3 | |
673 | 1))) | |
674 | (substring rng-c-current-token n (- n))))) | |
675 | ||
676 | (defun rng-c-fix-escaped-newlines (str) | |
677 | (let ((pos 0)) | |
678 | (while (progn | |
679 | (let ((n (string-match "\C-@" str pos))) | |
680 | (and n | |
681 | (aset str n ?\n) | |
682 | (setq pos (1+ n))))))) | |
683 | str) | |
684 | ||
685 | (defun rng-c-parse-identifier-or-keyword () | |
686 | (cond ((rng-c-current-token-ncname-p) | |
687 | (rng-c-advance-with rng-c-current-token)) | |
688 | ((rng-c-current-token-quoted-identifier-p) | |
689 | (rng-c-advance-with (substring rng-c-current-token 1))) | |
690 | (t (rng-c-error "Expected identifier or keyword")))) | |
10545bd8 | 691 | |
8cd39fb3 MH |
692 | (put 'string 'rng-c-pattern 'rng-c-parse-string) |
693 | (put 'token 'rng-c-pattern 'rng-c-parse-token) | |
694 | (put 'element 'rng-c-pattern 'rng-c-parse-element) | |
695 | (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute) | |
696 | (put 'list 'rng-c-pattern 'rng-c-parse-list) | |
697 | (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed) | |
698 | (put 'text 'rng-c-pattern 'rng-c-parse-text) | |
699 | (put 'empty 'rng-c-pattern 'rng-c-parse-empty) | |
700 | (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed) | |
701 | (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar) | |
702 | (put 'parent 'rng-c-pattern 'rng-c-parse-parent) | |
703 | (put 'external 'rng-c-pattern 'rng-c-parse-external) | |
704 | ||
705 | (defun rng-c-parse-element () | |
706 | (let ((name-class (rng-c-parse-name-class nil))) | |
707 | (rng-c-expect "{") | |
708 | (let ((pattern (rng-c-parse-pattern))) | |
709 | (rng-c-expect "}") | |
710 | (rng-make-element name-class pattern)))) | |
711 | ||
712 | (defun rng-c-parse-attribute () | |
713 | (let ((name-class (rng-c-parse-name-class 'attribute))) | |
714 | (rng-c-expect "{") | |
715 | (let ((pattern (rng-c-parse-pattern))) | |
716 | (rng-c-expect "}") | |
717 | (rng-make-attribute name-class pattern)))) | |
718 | ||
719 | (defun rng-c-parse-name-class (attribute) | |
720 | (let* ((rng-c-had-except nil) | |
721 | (name-class | |
722 | (rng-c-parse-follow-annotations | |
723 | (rng-c-parse-primary-name-class attribute)))) | |
724 | (if (string-equal rng-c-current-token "|") | |
725 | (let* ((name-classes (cons name-class nil)) | |
726 | (tail name-classes)) | |
727 | (or (not rng-c-had-except) | |
728 | (rng-c-error "Parentheses required around name-class using - operator")) | |
729 | (while (progn | |
730 | (rng-c-advance) | |
731 | (let ((newcdr | |
732 | (cons (rng-c-parse-follow-annotations | |
733 | (rng-c-parse-primary-name-class attribute)) | |
734 | nil))) | |
735 | (setcdr tail newcdr) | |
736 | (setq tail newcdr)) | |
737 | (string-equal rng-c-current-token "|"))) | |
738 | (rng-make-choice-name-class name-classes)) | |
739 | name-class))) | |
10545bd8 | 740 | |
8cd39fb3 MH |
741 | (defun rng-c-parse-primary-name-class (attribute) |
742 | (cond ((rng-c-current-token-ncname-p) | |
743 | (rng-c-advance-with | |
744 | (rng-make-name-name-class | |
745 | (rng-make-name (rng-c-unqualified-namespace attribute) | |
746 | rng-c-current-token)))) | |
747 | ((rng-c-current-token-prefixed-name-p) | |
748 | (rng-c-advance-with | |
749 | (rng-make-name-name-class | |
750 | (rng-c-expand-name rng-c-current-token)))) | |
751 | ((string-equal rng-c-current-token "*") | |
752 | (let ((except (rng-c-parse-opt-except-name-class attribute))) | |
753 | (if except | |
754 | (rng-make-any-name-except-name-class except) | |
755 | (rng-make-any-name-name-class)))) | |
756 | ((rng-c-current-token-ns-name-p) | |
757 | (let* ((ns | |
758 | (rng-c-lookup-prefix (substring rng-c-current-token | |
759 | 0 | |
760 | -2))) | |
761 | (except (rng-c-parse-opt-except-name-class attribute))) | |
762 | (if except | |
763 | (rng-make-ns-name-except-name-class ns except) | |
764 | (rng-make-ns-name-name-class ns)))) | |
765 | ((string-equal rng-c-current-token "(") | |
766 | (rng-c-advance) | |
767 | (let ((name-class (rng-c-parse-name-class attribute))) | |
768 | (rng-c-expect ")") | |
769 | name-class)) | |
770 | ((rng-c-current-token-quoted-identifier-p) | |
771 | (rng-c-advance-with | |
772 | (rng-make-name-name-class | |
773 | (rng-make-name (rng-c-unqualified-namespace attribute) | |
774 | (substring rng-c-current-token 1))))) | |
775 | ((string-equal rng-c-current-token "[") | |
776 | (rng-c-parse-lead-annotation) | |
777 | (rng-c-parse-primary-name-class attribute)) | |
778 | (t (rng-c-error "Bad name class")))) | |
779 | ||
780 | (defun rng-c-parse-opt-except-name-class (attribute) | |
781 | (rng-c-advance) | |
782 | (and (string-equal rng-c-current-token "-") | |
783 | (or (not rng-c-had-except) | |
784 | (rng-c-error "Parentheses required around name-class using - operator")) | |
785 | (setq rng-c-had-except t) | |
786 | (progn | |
787 | (rng-c-advance) | |
788 | (rng-c-parse-primary-name-class attribute)))) | |
789 | ||
790 | (defun rng-c-parse-mixed () | |
791 | (rng-c-expect "{") | |
792 | (let ((pattern (rng-make-mixed (rng-c-parse-pattern)))) | |
793 | (rng-c-expect "}") | |
794 | pattern)) | |
795 | ||
796 | (defun rng-c-parse-list () | |
797 | (rng-c-expect "{") | |
798 | (let ((pattern (rng-make-list (rng-c-parse-pattern)))) | |
799 | (rng-c-expect "}") | |
800 | pattern)) | |
801 | ||
802 | (defun rng-c-parse-text () | |
803 | (rng-make-text)) | |
804 | ||
805 | (defun rng-c-parse-empty () | |
806 | (rng-make-empty)) | |
807 | ||
808 | (defun rng-c-parse-not-allowed () | |
809 | (rng-make-not-allowed)) | |
810 | ||
811 | (defun rng-c-parse-string () | |
812 | (rng-c-parse-data rng-string-datatype)) | |
813 | ||
814 | (defun rng-c-parse-token () | |
815 | (rng-c-parse-data rng-token-datatype)) | |
816 | ||
817 | (defun rng-c-parse-data (name) | |
818 | (if (rng-c-current-token-literal-p) | |
819 | (rng-make-value name | |
820 | (rng-c-parse-literal) | |
821 | (and (car name) | |
822 | (rng-c-make-context))) | |
823 | (let ((params (rng-c-parse-optional-params))) | |
824 | (if (string-equal rng-c-current-token "-") | |
825 | (progn | |
826 | (if rng-c-had-except | |
827 | (rng-c-error "Parentheses required around pattern using -") | |
828 | (setq rng-c-had-except t)) | |
829 | (rng-c-advance) | |
830 | (rng-make-data-except name | |
831 | params | |
832 | (rng-c-parse-primary))) | |
833 | (rng-make-data name params))))) | |
834 | ||
835 | (defun rng-c-parse-optional-params () | |
836 | (and (string-equal rng-c-current-token "{") | |
837 | (let* ((head (cons nil nil)) | |
838 | (tail head)) | |
839 | (rng-c-advance) | |
840 | (while (not (string-equal rng-c-current-token "}")) | |
841 | (and (string-equal rng-c-current-token "[") | |
842 | (rng-c-parse-lead-annotation)) | |
843 | (let ((name (rng-c-parse-identifier-or-keyword))) | |
844 | (rng-c-expect "=") | |
845 | (let ((newcdr (cons (cons (intern name) | |
846 | (rng-c-parse-literal)) | |
847 | nil))) | |
848 | (setcdr tail newcdr) | |
849 | (setq tail newcdr)))) | |
850 | (rng-c-advance) | |
851 | (cdr head)))) | |
852 | ||
853 | (defun rng-c-parse-external () | |
854 | (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) | |
855 | (rng-c-inherit-namespace (rng-c-parse-opt-inherit))) | |
856 | (rng-c-parse-file filename 'external))) | |
857 | ||
858 | (defun rng-c-expand-file (uri) | |
859 | (condition-case err | |
860 | (rng-uri-file-name (rng-uri-resolve uri | |
861 | (rng-file-name-uri rng-c-file-name))) | |
862 | (rng-uri-error | |
863 | (rng-c-error (cadr err))))) | |
864 | ||
865 | (defun rng-c-parse-opt-inherit () | |
866 | (cond ((string-equal rng-c-current-token "inherit") | |
867 | (rng-c-advance) | |
868 | (rng-c-expect "=") | |
869 | (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword))) | |
870 | (t rng-c-default-namespace))) | |
871 | ||
872 | (defun rng-c-parse-grammar () | |
873 | (rng-c-expect "{") | |
874 | (let* ((rng-c-parent-grammar rng-c-current-grammar) | |
875 | (rng-c-current-grammar (rng-c-make-grammar))) | |
876 | (rng-c-parse-grammar-body "}") | |
877 | (rng-c-finish-grammar))) | |
878 | ||
879 | (defun rng-c-parse-lead-annotation () | |
880 | (rng-c-parse-annotation-body) | |
881 | (and (string-equal rng-c-current-token "[") | |
882 | (rng-c-error "Multiple leading annotations"))) | |
883 | ||
884 | (defun rng-c-parse-follow-annotations (obj) | |
885 | (while (string-equal rng-c-current-token ">>") | |
886 | (rng-c-advance) | |
887 | (if (rng-c-current-token-prefixed-name-p) | |
888 | (rng-c-advance) | |
889 | (rng-c-parse-identifier-or-keyword)) | |
890 | (rng-c-parse-annotation-body t)) | |
891 | obj) | |
892 | ||
893 | (defun rng-c-parse-annotation-element () | |
894 | (rng-c-advance) | |
895 | (rng-c-parse-annotation-body t)) | |
896 | ||
897 | ;; XXX need stricter checking of attribute names | |
898 | ;; XXX don't allow attributes after text | |
899 | ||
900 | (defun rng-c-parse-annotation-body (&optional allow-text) | |
10545bd8 JB |
901 | "Current token is [. Parse up to matching ]. |
902 | Current token after parse is token following ]." | |
8cd39fb3 MH |
903 | (or (string-equal rng-c-current-token "[") |
904 | (rng-c-error "Expected [")) | |
905 | (rng-c-advance) | |
906 | (while (not (string-equal rng-c-current-token "]")) | |
907 | (cond ((rng-c-current-token-literal-p) | |
908 | (or allow-text | |
909 | (rng-c-error "Out of place text within annotation")) | |
910 | (rng-c-parse-literal)) | |
911 | (t | |
912 | (if (rng-c-current-token-prefixed-name-p) | |
913 | (rng-c-advance) | |
914 | (rng-c-parse-identifier-or-keyword)) | |
915 | (cond ((string-equal rng-c-current-token "[") | |
916 | (rng-c-parse-annotation-body t)) | |
917 | ((string-equal rng-c-current-token "=") | |
918 | (rng-c-advance) | |
919 | (rng-c-parse-literal)) | |
920 | (t (rng-c-error "Expected = or [")))))) | |
921 | (rng-c-advance)) | |
10545bd8 | 922 | |
8cd39fb3 MH |
923 | (defun rng-c-advance-with (pattern) |
924 | (rng-c-advance) | |
925 | pattern) | |
926 | ||
927 | (defun rng-c-expect (str) | |
928 | (or (string-equal rng-c-current-token str) | |
929 | (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token)) | |
930 | (rng-c-advance)) | |
931 | ||
932 | (provide 'rng-cmpct) | |
933 | ||
934 | ;;; rng-cmpct.el |