declare smobs in alloc.c
[bpt/emacs.git] / lisp / nxml / rng-cmpct.el
1 ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
2
3 ;; Copyright (C) 2003, 2007-2014 Free Software Foundation, Inc.
4
5 ;; Author: James Clark
6 ;; Keywords: wp, hypermedia, languages, XML, RelaxNG
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22
23 ;;; Commentary:
24
25 ;; This parses a RELAX NG Compact Syntax schema into the form
26 ;; specified in rng-pttrn.el.
27 ;;
28 ;; RELAX NG Compact Syntax is specified by
29 ;; http://relaxng.org/compact.html
30 ;;
31 ;; This file uses the prefix "rng-c-".
32
33 ;;; Code:
34
35 (require 'nxml-util)
36 (require 'rng-util)
37 (require 'rng-uri)
38 (require 'rng-pttrn)
39
40 ;;;###autoload
41 (defun rng-c-load-schema (filename)
42 "Load a schema in RELAX NG compact syntax from FILENAME.
43 Return a pattern."
44 (rng-c-parse-file filename))
45
46 ;;; Error handling
47
48 (define-error 'rng-c-incorrect-schema
49 "Incorrect schema" '(rng-error nxml-file-parse-error))
50
51 (defun rng-c-signal-incorrect-schema (filename pos message)
52 (nxml-signal-file-parse-error filename
53 pos
54 message
55 'rng-c-incorrect-schema))
56
57 ;;; Lexing
58
59 (defconst rng-c-keywords
60 '("attribute"
61 "default"
62 "datatypes"
63 "div"
64 "element"
65 "empty"
66 "external"
67 "grammar"
68 "include"
69 "inherit"
70 "list"
71 "mixed"
72 "namespace"
73 "notAllowed"
74 "parent"
75 "start"
76 "string"
77 "text"
78 "token")
79 "List of strings that are keywords in the compact syntax.")
80
81 (defconst rng-c-anchored-keyword-re
82 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
83 "Regular expression to match a keyword in the compact syntax.")
84
85 (defvar rng-c-syntax-table nil
86 "Syntax table for parsing the compact syntax.")
87
88 (if rng-c-syntax-table
89 ()
90 (setq rng-c-syntax-table (make-syntax-table))
91 (modify-syntax-entry ?# "<" rng-c-syntax-table)
92 (modify-syntax-entry ?\n ">" rng-c-syntax-table)
93 (modify-syntax-entry ?- "w" rng-c-syntax-table)
94 (modify-syntax-entry ?. "w" rng-c-syntax-table)
95 (modify-syntax-entry ?_ "w" rng-c-syntax-table)
96 (modify-syntax-entry ?: "_" rng-c-syntax-table))
97
98 (defconst rng-c-literal-1-re
99 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
100 "Regular expression to match a single-quoted literal.")
101
102 (defconst rng-c-literal-2-re
103 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
104 "Regular expression to match a double-quoted literal.")
105
106 (defconst rng-c-ncname-re "\\w+")
107
108 (defconst rng-c-anchored-ncname-re
109 (concat "\\`" rng-c-ncname-re "\\'"))
110
111 (defconst rng-c-token-re
112 (concat "[&|]=" "\\|"
113 "[][()|&,*+?{}~=-]" "\\|"
114 rng-c-literal-1-re "\\|"
115 rng-c-literal-2-re "\\|"
116 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
117 "\\\\" rng-c-ncname-re "\\|"
118 ">>")
119 "Regular expression to match a token in the compact syntax.")
120
121 (defun rng-c-init-buffer ()
122 (setq case-fold-search nil) ; automatically becomes buffer-local when set
123 (set-buffer-multibyte t)
124 (set-syntax-table rng-c-syntax-table))
125
126 (defvar rng-c-current-token nil)
127 (make-variable-buffer-local 'rng-c-current-token)
128
129 (defun rng-c-advance ()
130 (cond ((looking-at rng-c-token-re)
131 (setq rng-c-current-token (match-string 0))
132 (goto-char (match-end 0))
133 (forward-comment (point-max)))
134 ((= (point) (point-max))
135 (setq rng-c-current-token ""))
136 (t (rng-c-error "Invalid token"))))
137
138 (defconst rng-c-anchored-datatype-name-re
139 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
140
141 (defsubst rng-c-current-token-keyword-p ()
142 (string-match rng-c-anchored-keyword-re rng-c-current-token))
143
144 (defsubst rng-c-current-token-prefixed-name-p ()
145 (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
146
147 (defsubst rng-c-current-token-literal-p ()
148 (string-match "\\`['\"]" rng-c-current-token))
149
150 (defsubst rng-c-current-token-quoted-identifier-p ()
151 (string-match "\\`\\\\" rng-c-current-token))
152
153 (defsubst rng-c-current-token-ncname-p ()
154 (string-match rng-c-anchored-ncname-re rng-c-current-token))
155
156 (defsubst rng-c-current-token-ns-name-p ()
157 (let ((len (length rng-c-current-token)))
158 (and (> len 0)
159 (= (aref rng-c-current-token (- len 1)) ?*))))
160
161 ;;; Namespaces
162
163 (defvar rng-c-inherit-namespace nil)
164
165 (defvar rng-c-default-namespace nil)
166
167 (defvar rng-c-default-namespace-declared nil)
168
169 (defvar rng-c-namespace-decls nil
170 "Alist of namespace declarations.")
171
172 (defconst rng-c-no-namespace nil)
173
174 (defun rng-c-declare-standard-namespaces ()
175 (setq rng-c-namespace-decls
176 (cons (cons "xml" nxml-xml-namespace-uri)
177 rng-c-namespace-decls))
178 (when (and (not rng-c-default-namespace-declared)
179 rng-c-inherit-namespace)
180 (setq rng-c-default-namespace rng-c-inherit-namespace)))
181
182 (defun rng-c-expand-name (prefixed-name)
183 (let ((i (string-match ":" prefixed-name)))
184 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
185 0
186 i))
187 (substring prefixed-name (+ i 1)))))
188
189 (defun rng-c-lookup-prefix (prefix)
190 (let ((binding (assoc prefix rng-c-namespace-decls)))
191 (or binding (rng-c-error "Undefined prefix %s" prefix))
192 (cdr binding)))
193
194 (defun rng-c-unqualified-namespace (attribute)
195 (if attribute
196 rng-c-no-namespace
197 rng-c-default-namespace))
198
199 (defun rng-c-make-context ()
200 (cons rng-c-default-namespace rng-c-namespace-decls))
201
202 ;;; Datatypes
203
204 (defconst rng-string-datatype
205 (rng-make-datatype rng-builtin-datatypes-uri "string"))
206
207 (defconst rng-token-datatype
208 (rng-make-datatype rng-builtin-datatypes-uri "token"))
209
210 (defvar rng-c-datatype-decls nil
211 "Alist of datatype declarations.
212 Contains a list of pairs (PREFIX . URI) where PREFIX is a string
213 and URI is a symbol.")
214
215 (defun rng-c-declare-standard-datatypes ()
216 (setq rng-c-datatype-decls
217 (cons (cons "xsd" rng-xsd-datatypes-uri)
218 rng-c-datatype-decls)))
219
220 (defun rng-c-lookup-datatype-prefix (prefix)
221 (let ((binding (assoc prefix rng-c-datatype-decls)))
222 (or binding (rng-c-error "Undefined prefix %s" prefix))
223 (cdr binding)))
224
225 (defun rng-c-expand-datatype (prefixed-name)
226 (let ((i (string-match ":" prefixed-name)))
227 (rng-make-datatype
228 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
229 (substring prefixed-name (+ i 1)))))
230
231 ;;; Grammars
232
233 (defvar rng-c-current-grammar nil)
234 (defvar rng-c-parent-grammar nil)
235
236 (defun rng-c-make-grammar ()
237 (make-hash-table :test 'equal))
238
239 (defconst rng-c-about-override-slot 0)
240 (defconst rng-c-about-combine-slot 1)
241
242 (defun rng-c-lookup-create (name grammar)
243 "Return a def object for NAME.
244 A def object is a pair \(ABOUT . REF) where REF is returned by
245 `rng-make-ref'.
246 ABOUT is a two-element vector [OVERRIDE COMBINE].
247 COMBINE is either nil, choice or interleave.
248 OVERRIDE is either nil, require or t."
249 (let ((def (gethash name grammar)))
250 (if def
251 def
252 (progn
253 (setq def (cons (vector nil nil) (rng-make-ref name)))
254 (puthash name def grammar)
255 def))))
256
257 (defun rng-c-make-ref (name)
258 (or rng-c-current-grammar
259 (rng-c-error "Reference not in a grammar"))
260 (cdr (rng-c-lookup-create name rng-c-current-grammar)))
261
262 (defun rng-c-make-parent-ref (name)
263 (or rng-c-parent-grammar
264 (rng-c-error "Reference to non-existent parent grammar"))
265 (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
266
267 (defvar rng-c-overrides nil
268 "Contains a list of (NAME . DEF) pairs.")
269
270 (defun rng-c-merge-combine (def combine name)
271 (let* ((about (car def))
272 (current-combine (aref about rng-c-about-combine-slot)))
273 (if combine
274 (if current-combine
275 (or (eq combine current-combine)
276 (rng-c-error "Inconsistent combine for %s" name))
277 (aset about rng-c-about-combine-slot combine))
278 current-combine)))
279
280 (defun rng-c-prepare-define (name combine in-include)
281 (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
282 (about (car def))
283 (overridden (aref about rng-c-about-override-slot)))
284 (and in-include
285 (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
286 (cond (overridden (and (eq overridden 'require)
287 (aset about rng-c-about-override-slot t))
288 nil)
289 (t (setq combine (rng-c-merge-combine def combine name))
290 (and (rng-ref-get (cdr def))
291 (not combine)
292 (rng-c-error "Duplicate definition of %s" name))
293 def))))
294
295 (defun rng-c-start-include (overrides)
296 (mapcar (lambda (name-def)
297 (let* ((def (cdr name-def))
298 (about (car def))
299 (save (aref about rng-c-about-override-slot)))
300 (aset about rng-c-about-override-slot 'require)
301 (cons save name-def)))
302 overrides))
303
304 (defun rng-c-end-include (overrides)
305 (mapcar (lambda (o)
306 (let* ((saved (car o))
307 (name-def (cdr o))
308 (name (car name-def))
309 (def (cdr name-def))
310 (about (car def)))
311 (and (eq (aref about rng-c-about-override-slot) 'require)
312 (rng-c-error "Definition of %s in include did not override definition in included file" name))
313 (aset about rng-c-about-override-slot saved)))
314 overrides))
315
316 (defun rng-c-define (def value)
317 (and def
318 (let ((current-value (rng-ref-get (cdr def))))
319 (rng-ref-set (cdr def)
320 (if current-value
321 (if (eq (aref (car def) rng-c-about-combine-slot)
322 'choice)
323 (rng-make-choice (list current-value value))
324 (rng-make-interleave (list current-value value)))
325 value)))))
326
327 (defun rng-c-finish-grammar ()
328 (maphash (lambda (key def)
329 (or (rng-ref-get (cdr def))
330 (rng-c-error "Reference to undefined pattern %s" key)))
331 rng-c-current-grammar)
332 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
333 (rng-c-error "No definition of start")))))
334
335 ;;; Parsing
336
337 (defvar rng-c-escape-positions nil)
338 (make-variable-buffer-local 'rng-c-escape-positions)
339
340 (defvar rng-c-file-name nil)
341 (make-variable-buffer-local 'rng-c-file-name)
342
343 (defvar rng-c-file-index nil)
344
345 (defun rng-c-parse-file (filename &optional context)
346 (with-current-buffer (get-buffer-create (rng-c-buffer-name context))
347 (erase-buffer)
348 (rng-c-init-buffer)
349 (setq rng-c-file-name
350 (car (insert-file-contents filename)))
351 (setq rng-c-escape-positions nil)
352 (rng-c-process-escapes)
353 (rng-c-parse-top-level context)))
354
355 (defun rng-c-buffer-name (context)
356 (concat " *RNC Input"
357 (if context
358 (concat "<"
359 (number-to-string (setq rng-c-file-index
360 (1+ rng-c-file-index)))
361 ">*")
362 (setq rng-c-file-index 1)
363 "*")))
364
365 (defun rng-c-process-escapes ()
366 ;; Check for any NULs, since we will use NUL chars
367 ;; for internal purposes.
368 (let ((pos (search-forward "\C-@" nil t)))
369 (and pos
370 (rng-c-error "Nul character found (binary file?)")))
371 (let ((offset 0))
372 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
373 (point-max)
374 t)
375 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
376 (if (and ch (> ch 0))
377 (let ((begin (match-beginning 0))
378 (end (match-end 0)))
379 (delete-region begin end)
380 ;; Represent an escaped newline by nul, so
381 ;; that we can distinguish it from a literal newline.
382 ;; We will translate it back into a real newline later.
383 (insert (if (eq ch ?\n) 0 ch))
384 (setq offset (+ offset (- end begin 1)))
385 (setq rng-c-escape-positions
386 (cons (cons (point) offset)
387 rng-c-escape-positions)))
388 (rng-c-error "Invalid character escape")))))
389 (goto-char 1))
390
391 (defun rng-c-translate-position (pos)
392 (let ((tem rng-c-escape-positions))
393 (while (and tem
394 (> (caar tem) pos))
395 (setq tem (cdr tem)))
396 (if tem
397 (+ pos (cdar tem))
398 pos)))
399
400 (defun rng-c-error (&rest args)
401 (rng-c-signal-incorrect-schema rng-c-file-name
402 (rng-c-translate-position (point))
403 (apply 'format args)))
404
405 (defun rng-c-parse-top-level (context)
406 (let ((rng-c-namespace-decls nil)
407 (rng-c-default-namespace nil)
408 (rng-c-datatype-decls nil))
409 (goto-char (point-min))
410 (forward-comment (point-max))
411 (rng-c-advance)
412 (rng-c-parse-decls)
413 (let ((p (if (eq context 'include)
414 (if (rng-c-implicit-grammar-p)
415 (rng-c-parse-grammar-body "")
416 (rng-c-parse-included-grammar))
417 (if (rng-c-implicit-grammar-p)
418 (rng-c-parse-implicit-grammar)
419 (rng-c-parse-pattern)))))
420 (or (string-equal rng-c-current-token "")
421 (rng-c-error "Unexpected characters after pattern"))
422 p)))
423
424 (defun rng-c-parse-included-grammar ()
425 (or (string-equal rng-c-current-token "grammar")
426 (rng-c-error "Included schema is not a grammar"))
427 (rng-c-advance)
428 (rng-c-expect "{")
429 (rng-c-parse-grammar-body "}"))
430
431 (defun rng-c-implicit-grammar-p ()
432 (or (and (or (rng-c-current-token-prefixed-name-p)
433 (rng-c-current-token-quoted-identifier-p)
434 (and (rng-c-current-token-ncname-p)
435 (not (rng-c-current-token-keyword-p))))
436 (looking-at "\\["))
437 (and (string-equal rng-c-current-token "[")
438 (rng-c-parse-lead-annotation)
439 nil)
440 (member rng-c-current-token '("div" "include" ""))
441 (looking-at "[|&]?=")))
442
443 (defun rng-c-parse-decls ()
444 (setq rng-c-default-namespace-declared nil)
445 (while (progn
446 (let ((binding
447 (assoc rng-c-current-token
448 '(("namespace" . rng-c-parse-namespace)
449 ("datatypes" . rng-c-parse-datatypes)
450 ("default" . rng-c-parse-default)))))
451 (if binding
452 (progn
453 (rng-c-advance)
454 (funcall (cdr binding))
455 t)
456 nil))))
457 (rng-c-declare-standard-datatypes)
458 (rng-c-declare-standard-namespaces))
459
460 (defun rng-c-parse-datatypes ()
461 (let ((prefix (rng-c-parse-identifier-or-keyword)))
462 (or (not (assoc prefix rng-c-datatype-decls))
463 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
464 (rng-c-expect "=")
465 (setq rng-c-datatype-decls
466 (cons (cons prefix
467 (rng-make-datatypes-uri (rng-c-parse-literal)))
468 rng-c-datatype-decls))))
469
470 (defun rng-c-parse-namespace ()
471 (rng-c-declare-namespace nil
472 (rng-c-parse-identifier-or-keyword)))
473
474 (defun rng-c-parse-default ()
475 (rng-c-expect "namespace")
476 (rng-c-declare-namespace t
477 (if (string-equal rng-c-current-token "=")
478 nil
479 (rng-c-parse-identifier-or-keyword))))
480
481 (defun rng-c-declare-namespace (declare-default prefix)
482 (rng-c-expect "=")
483 (let ((ns (cond ((string-equal rng-c-current-token "inherit")
484 (rng-c-advance)
485 rng-c-inherit-namespace)
486 (t
487 (nxml-make-namespace (rng-c-parse-literal))))))
488 (and prefix
489 (or (not (assoc prefix rng-c-namespace-decls))
490 (rng-c-error "Duplicate namespace declaration for prefix %s"
491 prefix))
492 (setq rng-c-namespace-decls
493 (cons (cons prefix ns) rng-c-namespace-decls)))
494 (and declare-default
495 (or (not rng-c-default-namespace-declared)
496 (rng-c-error "Duplicate default namespace declaration"))
497 (setq rng-c-default-namespace-declared t)
498 (setq rng-c-default-namespace ns))))
499
500 (defun rng-c-parse-implicit-grammar ()
501 (let* ((rng-c-parent-grammar rng-c-current-grammar)
502 (rng-c-current-grammar (rng-c-make-grammar)))
503 (rng-c-parse-grammar-body "")
504 (rng-c-finish-grammar)))
505
506 (defun rng-c-parse-grammar-body (close-token &optional in-include)
507 (while (not (string-equal rng-c-current-token close-token))
508 (cond ((rng-c-current-token-keyword-p)
509 (let ((kw (intern rng-c-current-token)))
510 (cond ((eq kw 'start)
511 (rng-c-parse-define 'start in-include))
512 ((eq kw 'div)
513 (rng-c-advance)
514 (rng-c-parse-div in-include))
515 ((eq kw 'include)
516 (and in-include
517 (rng-c-error "Nested include"))
518 (rng-c-advance)
519 (rng-c-parse-include))
520 (t (rng-c-error "Invalid grammar keyword")))))
521 ((rng-c-current-token-ncname-p)
522 (if (looking-at "\\[")
523 (rng-c-parse-annotation-element)
524 (rng-c-parse-define rng-c-current-token
525 in-include)))
526 ((rng-c-current-token-quoted-identifier-p)
527 (if (looking-at "\\[")
528 (rng-c-parse-annotation-element)
529 (rng-c-parse-define (substring rng-c-current-token 1)
530 in-include)))
531 ((rng-c-current-token-prefixed-name-p)
532 (rng-c-parse-annotation-element))
533 ((string-equal rng-c-current-token "[")
534 (rng-c-parse-lead-annotation)
535 (and (string-equal rng-c-current-token close-token)
536 (rng-c-error "Missing annotation subject"))
537 (and (looking-at "\\[")
538 (rng-c-error "Leading annotation applied to annotation")))
539 (t (rng-c-error "Invalid grammar content"))))
540 (or (string-equal rng-c-current-token "")
541 (rng-c-advance)))
542
543 (defun rng-c-parse-div (in-include)
544 (rng-c-expect "{")
545 (rng-c-parse-grammar-body "}" in-include))
546
547 (defun rng-c-parse-include ()
548 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
549 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
550 overrides)
551 (cond ((string-equal rng-c-current-token "{")
552 (rng-c-advance)
553 (let ((rng-c-overrides nil))
554 (rng-c-parse-grammar-body "}" t)
555 (setq overrides rng-c-overrides))
556 (setq overrides (rng-c-start-include overrides))
557 (rng-c-parse-file filename 'include)
558 (rng-c-end-include overrides))
559 (t (rng-c-parse-file filename 'include)))))
560
561 (defun rng-c-parse-define (name in-include)
562 (rng-c-advance)
563 (let ((assign (assoc rng-c-current-token
564 '(("=" . nil)
565 ("|=" . choice)
566 ("&=" . interleave)))))
567 (or assign
568 (rng-c-error "Expected assignment operator"))
569 (rng-c-advance)
570 (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
571 (rng-c-define ref (rng-c-parse-pattern)))))
572
573 (defvar rng-c-had-except nil)
574
575 (defun rng-c-parse-pattern ()
576 (let* ((rng-c-had-except nil)
577 (p (rng-c-parse-repeated))
578 (op (assoc rng-c-current-token
579 '(("|" . rng-make-choice)
580 ("," . rng-make-group)
581 ("&" . rng-make-interleave)))))
582 (if op
583 (if rng-c-had-except
584 (rng-c-error "Parentheses required around pattern using -")
585 (let* ((patterns (cons p nil))
586 (tail patterns)
587 (connector rng-c-current-token))
588 (while (progn
589 (rng-c-advance)
590 (let ((newcdr (cons (rng-c-parse-repeated) nil)))
591 (setcdr tail newcdr)
592 (setq tail newcdr))
593 (string-equal rng-c-current-token connector)))
594 (funcall (cdr op) patterns)))
595 p)))
596
597 (defun rng-c-parse-repeated ()
598 (let ((p (rng-c-parse-follow-annotations
599 (rng-c-parse-primary)))
600 (op (assoc rng-c-current-token
601 '(("*" . rng-make-zero-or-more)
602 ("+" . rng-make-one-or-more)
603 ("?" . rng-make-optional)))))
604 (if op
605 (if rng-c-had-except
606 (rng-c-error "Parentheses required around pattern using -")
607 (rng-c-parse-follow-annotations
608 (progn
609 (rng-c-advance)
610 (funcall (cdr op) p))))
611 p)))
612
613 (defun rng-c-parse-primary ()
614 "Parse a primary expression.
615 The current token must be the first token of the expression.
616 After parsing the current token should be the token following
617 the primary expression."
618 (cond ((rng-c-current-token-keyword-p)
619 (let ((parse-function (get (intern rng-c-current-token)
620 'rng-c-pattern)))
621 (or parse-function
622 (rng-c-error "Keyword %s does not introduce a pattern"
623 rng-c-current-token))
624 (rng-c-advance)
625 (funcall parse-function)))
626 ((rng-c-current-token-ncname-p)
627 (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
628 ((string-equal rng-c-current-token "(")
629 (rng-c-advance)
630 (let ((p (rng-c-parse-pattern)))
631 (rng-c-expect ")")
632 p))
633 ((rng-c-current-token-prefixed-name-p)
634 (let ((name (rng-c-expand-datatype rng-c-current-token)))
635 (rng-c-advance)
636 (rng-c-parse-data name)))
637 ((rng-c-current-token-literal-p)
638 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
639 ((rng-c-current-token-quoted-identifier-p)
640 (rng-c-advance-with
641 (rng-c-make-ref (substring rng-c-current-token 1))))
642 ((string-equal rng-c-current-token "[")
643 (rng-c-parse-lead-annotation)
644 (rng-c-parse-primary))
645 (t (rng-c-error "Invalid pattern"))))
646
647 (defun rng-c-parse-parent ()
648 (and (rng-c-current-token-keyword-p)
649 (rng-c-error "Keyword following parent was not quoted"
650 rng-c-current-token))
651 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
652
653 (defun rng-c-parse-literal ()
654 (rng-c-fix-escaped-newlines
655 (apply 'concat (rng-c-parse-literal-segments))))
656
657 (defun rng-c-parse-literal-segments ()
658 (let ((str (rng-c-parse-literal-segment)))
659 (cons str
660 (cond ((string-equal rng-c-current-token "~")
661 (rng-c-advance)
662 (rng-c-parse-literal-segments))
663 (t nil)))))
664
665 (defun rng-c-parse-literal-segment ()
666 (or (rng-c-current-token-literal-p)
667 (rng-c-error "Expected a literal"))
668 (rng-c-advance-with
669 (let ((n (if (and (>= (length rng-c-current-token) 6)
670 (eq (aref rng-c-current-token 0)
671 (aref rng-c-current-token 1)))
672 3
673 1)))
674 (substring rng-c-current-token n (- n)))))
675
676 (defun rng-c-fix-escaped-newlines (str)
677 (let ((pos 0))
678 (while (progn
679 (let ((n (string-match "\C-@" str pos)))
680 (and n
681 (aset str n ?\n)
682 (setq pos (1+ n)))))))
683 str)
684
685 (defun rng-c-parse-identifier-or-keyword ()
686 (cond ((rng-c-current-token-ncname-p)
687 (rng-c-advance-with rng-c-current-token))
688 ((rng-c-current-token-quoted-identifier-p)
689 (rng-c-advance-with (substring rng-c-current-token 1)))
690 (t (rng-c-error "Expected identifier or keyword"))))
691
692 (put 'string 'rng-c-pattern 'rng-c-parse-string)
693 (put 'token 'rng-c-pattern 'rng-c-parse-token)
694 (put 'element 'rng-c-pattern 'rng-c-parse-element)
695 (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
696 (put 'list 'rng-c-pattern 'rng-c-parse-list)
697 (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
698 (put 'text 'rng-c-pattern 'rng-c-parse-text)
699 (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
700 (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
701 (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
702 (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
703 (put 'external 'rng-c-pattern 'rng-c-parse-external)
704
705 (defun rng-c-parse-element ()
706 (let ((name-class (rng-c-parse-name-class nil)))
707 (rng-c-expect "{")
708 (let ((pattern (rng-c-parse-pattern)))
709 (rng-c-expect "}")
710 (rng-make-element name-class pattern))))
711
712 (defun rng-c-parse-attribute ()
713 (let ((name-class (rng-c-parse-name-class 'attribute)))
714 (rng-c-expect "{")
715 (let ((pattern (rng-c-parse-pattern)))
716 (rng-c-expect "}")
717 (rng-make-attribute name-class pattern))))
718
719 (defun rng-c-parse-name-class (attribute)
720 (let* ((rng-c-had-except nil)
721 (name-class
722 (rng-c-parse-follow-annotations
723 (rng-c-parse-primary-name-class attribute))))
724 (if (string-equal rng-c-current-token "|")
725 (let* ((name-classes (cons name-class nil))
726 (tail name-classes))
727 (or (not rng-c-had-except)
728 (rng-c-error "Parentheses required around name-class using - operator"))
729 (while (progn
730 (rng-c-advance)
731 (let ((newcdr
732 (cons (rng-c-parse-follow-annotations
733 (rng-c-parse-primary-name-class attribute))
734 nil)))
735 (setcdr tail newcdr)
736 (setq tail newcdr))
737 (string-equal rng-c-current-token "|")))
738 (rng-make-choice-name-class name-classes))
739 name-class)))
740
741 (defun rng-c-parse-primary-name-class (attribute)
742 (cond ((rng-c-current-token-ncname-p)
743 (rng-c-advance-with
744 (rng-make-name-name-class
745 (rng-make-name (rng-c-unqualified-namespace attribute)
746 rng-c-current-token))))
747 ((rng-c-current-token-prefixed-name-p)
748 (rng-c-advance-with
749 (rng-make-name-name-class
750 (rng-c-expand-name rng-c-current-token))))
751 ((string-equal rng-c-current-token "*")
752 (let ((except (rng-c-parse-opt-except-name-class attribute)))
753 (if except
754 (rng-make-any-name-except-name-class except)
755 (rng-make-any-name-name-class))))
756 ((rng-c-current-token-ns-name-p)
757 (let* ((ns
758 (rng-c-lookup-prefix (substring rng-c-current-token
759 0
760 -2)))
761 (except (rng-c-parse-opt-except-name-class attribute)))
762 (if except
763 (rng-make-ns-name-except-name-class ns except)
764 (rng-make-ns-name-name-class ns))))
765 ((string-equal rng-c-current-token "(")
766 (rng-c-advance)
767 (let ((name-class (rng-c-parse-name-class attribute)))
768 (rng-c-expect ")")
769 name-class))
770 ((rng-c-current-token-quoted-identifier-p)
771 (rng-c-advance-with
772 (rng-make-name-name-class
773 (rng-make-name (rng-c-unqualified-namespace attribute)
774 (substring rng-c-current-token 1)))))
775 ((string-equal rng-c-current-token "[")
776 (rng-c-parse-lead-annotation)
777 (rng-c-parse-primary-name-class attribute))
778 (t (rng-c-error "Bad name class"))))
779
780 (defun rng-c-parse-opt-except-name-class (attribute)
781 (rng-c-advance)
782 (and (string-equal rng-c-current-token "-")
783 (or (not rng-c-had-except)
784 (rng-c-error "Parentheses required around name-class using - operator"))
785 (setq rng-c-had-except t)
786 (progn
787 (rng-c-advance)
788 (rng-c-parse-primary-name-class attribute))))
789
790 (defun rng-c-parse-mixed ()
791 (rng-c-expect "{")
792 (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
793 (rng-c-expect "}")
794 pattern))
795
796 (defun rng-c-parse-list ()
797 (rng-c-expect "{")
798 (let ((pattern (rng-make-list (rng-c-parse-pattern))))
799 (rng-c-expect "}")
800 pattern))
801
802 (defun rng-c-parse-text ()
803 (rng-make-text))
804
805 (defun rng-c-parse-empty ()
806 (rng-make-empty))
807
808 (defun rng-c-parse-not-allowed ()
809 (rng-make-not-allowed))
810
811 (defun rng-c-parse-string ()
812 (rng-c-parse-data rng-string-datatype))
813
814 (defun rng-c-parse-token ()
815 (rng-c-parse-data rng-token-datatype))
816
817 (defun rng-c-parse-data (name)
818 (if (rng-c-current-token-literal-p)
819 (rng-make-value name
820 (rng-c-parse-literal)
821 (and (car name)
822 (rng-c-make-context)))
823 (let ((params (rng-c-parse-optional-params)))
824 (if (string-equal rng-c-current-token "-")
825 (progn
826 (if rng-c-had-except
827 (rng-c-error "Parentheses required around pattern using -")
828 (setq rng-c-had-except t))
829 (rng-c-advance)
830 (rng-make-data-except name
831 params
832 (rng-c-parse-primary)))
833 (rng-make-data name params)))))
834
835 (defun rng-c-parse-optional-params ()
836 (and (string-equal rng-c-current-token "{")
837 (let* ((head (cons nil nil))
838 (tail head))
839 (rng-c-advance)
840 (while (not (string-equal rng-c-current-token "}"))
841 (and (string-equal rng-c-current-token "[")
842 (rng-c-parse-lead-annotation))
843 (let ((name (rng-c-parse-identifier-or-keyword)))
844 (rng-c-expect "=")
845 (let ((newcdr (cons (cons (intern name)
846 (rng-c-parse-literal))
847 nil)))
848 (setcdr tail newcdr)
849 (setq tail newcdr))))
850 (rng-c-advance)
851 (cdr head))))
852
853 (defun rng-c-parse-external ()
854 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
855 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
856 (rng-c-parse-file filename 'external)))
857
858 (defun rng-c-expand-file (uri)
859 (condition-case err
860 (rng-uri-file-name (rng-uri-resolve uri
861 (rng-file-name-uri rng-c-file-name)))
862 (rng-uri-error
863 (rng-c-error (cadr err)))))
864
865 (defun rng-c-parse-opt-inherit ()
866 (cond ((string-equal rng-c-current-token "inherit")
867 (rng-c-advance)
868 (rng-c-expect "=")
869 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
870 (t rng-c-default-namespace)))
871
872 (defun rng-c-parse-grammar ()
873 (rng-c-expect "{")
874 (let* ((rng-c-parent-grammar rng-c-current-grammar)
875 (rng-c-current-grammar (rng-c-make-grammar)))
876 (rng-c-parse-grammar-body "}")
877 (rng-c-finish-grammar)))
878
879 (defun rng-c-parse-lead-annotation ()
880 (rng-c-parse-annotation-body)
881 (and (string-equal rng-c-current-token "[")
882 (rng-c-error "Multiple leading annotations")))
883
884 (defun rng-c-parse-follow-annotations (obj)
885 (while (string-equal rng-c-current-token ">>")
886 (rng-c-advance)
887 (if (rng-c-current-token-prefixed-name-p)
888 (rng-c-advance)
889 (rng-c-parse-identifier-or-keyword))
890 (rng-c-parse-annotation-body t))
891 obj)
892
893 (defun rng-c-parse-annotation-element ()
894 (rng-c-advance)
895 (rng-c-parse-annotation-body t))
896
897 ;; XXX need stricter checking of attribute names
898 ;; XXX don't allow attributes after text
899
900 (defun rng-c-parse-annotation-body (&optional allow-text)
901 "Current token is [. Parse up to matching ].
902 Current token after parse is token following ]."
903 (or (string-equal rng-c-current-token "[")
904 (rng-c-error "Expected ["))
905 (rng-c-advance)
906 (while (not (string-equal rng-c-current-token "]"))
907 (cond ((rng-c-current-token-literal-p)
908 (or allow-text
909 (rng-c-error "Out of place text within annotation"))
910 (rng-c-parse-literal))
911 (t
912 (if (rng-c-current-token-prefixed-name-p)
913 (rng-c-advance)
914 (rng-c-parse-identifier-or-keyword))
915 (cond ((string-equal rng-c-current-token "[")
916 (rng-c-parse-annotation-body t))
917 ((string-equal rng-c-current-token "=")
918 (rng-c-advance)
919 (rng-c-parse-literal))
920 (t (rng-c-error "Expected = or ["))))))
921 (rng-c-advance))
922
923 (defun rng-c-advance-with (pattern)
924 (rng-c-advance)
925 pattern)
926
927 (defun rng-c-expect (str)
928 (or (string-equal rng-c-current-token str)
929 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
930 (rng-c-advance))
931
932 (provide 'rng-cmpct)
933
934 ;;; rng-cmpct.el