Initial merge of nxml
[bpt/emacs.git] / lisp / nxml / rng-cmpct.el
CommitLineData
8cd39fb3
MH
1;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
2
3;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5;; Author: James Clark
6;; Keywords: XML, RelaxNG
7
8;; This program is free software; you can redistribute it and/or
9;; modify it under the terms of the GNU General Public License as
10;; published by the Free Software Foundation; either version 2 of
11;; the License, or (at your option) any later version.
12
13;; This program is distributed in the hope that it will be
14;; useful, but WITHOUT ANY WARRANTY; without even the implied
15;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16;; PURPOSE. See the GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public
19;; License along with this program; if not, write to the Free
20;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
21;; MA 02111-1307 USA
22
23;;; Commentary:
24
25;; This parses a RELAX NG Compact Syntax schema into the form
26;; specified in rng-pttrn.el.
27;;
28;; RELAX NG Compact Syntax is specified by
29;; http://relaxng.org/compact.html
30;;
31;; This file uses the prefix "rng-c-".
32
33;;; Code:
34
35(require 'nxml-util)
36(require 'rng-util)
37(require 'rng-uri)
38(require 'rng-pttrn)
39
40;;;###autoload
41(defun rng-c-load-schema (filename)
42 "Load a schema in RELAX NG compact syntax from FILENAME.
43Return a pattern."
44 (rng-c-parse-file filename))
45
46;;; Error handling
47
48(put 'rng-c-incorrect-schema
49 'error-conditions
50 '(error rng-error nxml-file-parse-error rng-c-incorrect-schema))
51
52(put 'rng-c-incorrect-schema
53 'error-message
54 "Incorrect schema")
55
56(defun rng-c-signal-incorrect-schema (filename pos message)
57 (nxml-signal-file-parse-error filename
58 pos
59 message
60 'rng-c-incorrect-schema))
61
62;;; Lexing
63
64(defconst rng-c-keywords
65 '("attribute"
66 "default"
67 "datatypes"
68 "div"
69 "element"
70 "empty"
71 "external"
72 "grammar"
73 "include"
74 "inherit"
75 "list"
76 "mixed"
77 "namespace"
78 "notAllowed"
79 "parent"
80 "start"
81 "string"
82 "text"
83 "token")
84 "List of strings that are keywords in the compact syntax.")
85
86(defconst rng-c-anchored-keyword-re
87 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
88 "Regular expression to match a keyword in the compact syntax.")
89
90(defvar rng-c-syntax-table nil
91 "Syntax table for parsing the compact syntax.")
92
93(if rng-c-syntax-table
94 ()
95 (setq rng-c-syntax-table (make-syntax-table))
96 (modify-syntax-entry ?# "<" rng-c-syntax-table)
97 (modify-syntax-entry ?\n ">" rng-c-syntax-table)
98 (modify-syntax-entry ?- "w" rng-c-syntax-table)
99 (modify-syntax-entry ?. "w" rng-c-syntax-table)
100 (modify-syntax-entry ?_ "w" rng-c-syntax-table)
101 (modify-syntax-entry ?: "_" rng-c-syntax-table))
102
103(defconst rng-c-literal-1-re
104 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
105 "Regular expression to match a single-quoted literal.")
106
107(defconst rng-c-literal-2-re
108 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
109 "Regular expression to match a double-quoted literal.")
110
111(defconst rng-c-ncname-re "\\w+")
112
113(defconst rng-c-anchored-ncname-re
114 (concat "\\`" rng-c-ncname-re "\\'"))
115
116(defconst rng-c-token-re
117 (concat "[&|]=" "\\|"
118 "[][()|&,*+?{}~=-]" "\\|"
119 rng-c-literal-1-re "\\|"
120 rng-c-literal-2-re "\\|"
121 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
122 "\\\\" rng-c-ncname-re "\\|"
123 ">>")
124 "Regular expression to match a token in the compact syntax.")
125
126(defun rng-c-init-buffer ()
127 (setq case-fold-search nil) ; automatically becomes buffer-local when set
128 (set-buffer-multibyte t)
129 (set-syntax-table rng-c-syntax-table))
130
131(defvar rng-c-current-token nil)
132(make-variable-buffer-local 'rng-c-current-token)
133
134(defun rng-c-advance ()
135 (cond ((looking-at rng-c-token-re)
136 (setq rng-c-current-token (match-string 0))
137 (goto-char (match-end 0))
138 (forward-comment (point-max)))
139 ((= (point) (point-max))
140 (setq rng-c-current-token ""))
141 (t (rng-c-error "Invalid token"))))
142
143(defconst rng-c-anchored-datatype-name-re
144 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
145
146(defsubst rng-c-current-token-keyword-p ()
147 (string-match rng-c-anchored-keyword-re rng-c-current-token))
148
149(defsubst rng-c-current-token-prefixed-name-p ()
150 (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
151
152(defsubst rng-c-current-token-literal-p ()
153 (string-match "\\`['\"]" rng-c-current-token))
154
155(defsubst rng-c-current-token-quoted-identifier-p ()
156 (string-match "\\`\\\\" rng-c-current-token))
157
158(defsubst rng-c-current-token-ncname-p ()
159 (string-match rng-c-anchored-ncname-re rng-c-current-token))
160
161(defsubst rng-c-current-token-ns-name-p ()
162 (let ((len (length rng-c-current-token)))
163 (and (> len 0)
164 (= (aref rng-c-current-token (- len 1)) ?*))))
165
166;;; Namespaces
167
168(defvar rng-c-inherit-namespace nil)
169
170(defvar rng-c-default-namespace nil)
171
172(defvar rng-c-default-namespace-declared nil)
173
174(defvar rng-c-namespace-decls nil
175 "Alist of namespace declarations.")
176
177(defconst rng-c-no-namespace nil)
178
179(defun rng-c-declare-standard-namespaces ()
180 (setq rng-c-namespace-decls
181 (cons (cons "xml" nxml-xml-namespace-uri)
182 rng-c-namespace-decls))
183 (when (and (not rng-c-default-namespace-declared)
184 rng-c-inherit-namespace)
185 (setq rng-c-default-namespace rng-c-inherit-namespace)))
186
187(defun rng-c-expand-name (prefixed-name)
188 (let ((i (string-match ":" prefixed-name)))
189 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
190 0
191 i))
192 (substring prefixed-name (+ i 1)))))
193
194(defun rng-c-lookup-prefix (prefix)
195 (let ((binding (assoc prefix rng-c-namespace-decls)))
196 (or binding (rng-c-error "Undefined prefix %s" prefix))
197 (cdr binding)))
198
199(defun rng-c-unqualified-namespace (attribute)
200 (if attribute
201 rng-c-no-namespace
202 rng-c-default-namespace))
203
204(defun rng-c-make-context ()
205 (cons rng-c-default-namespace rng-c-namespace-decls))
206
207;;; Datatypes
208
209(defconst rng-string-datatype
210 (rng-make-datatype rng-builtin-datatypes-uri "string"))
211
212(defconst rng-token-datatype
213 (rng-make-datatype rng-builtin-datatypes-uri "token"))
214
215(defvar rng-c-datatype-decls nil
216 "Alist of datatype declarations.
217Contains a list of pairs (PREFIX . URI) where PREFIX is a string
218and URI is a symbol.")
219
220(defun rng-c-declare-standard-datatypes ()
221 (setq rng-c-datatype-decls
222 (cons (cons "xsd" rng-xsd-datatypes-uri)
223 rng-c-datatype-decls)))
224
225(defun rng-c-lookup-datatype-prefix (prefix)
226 (let ((binding (assoc prefix rng-c-datatype-decls)))
227 (or binding (rng-c-error "Undefined prefix %s" prefix))
228 (cdr binding)))
229
230(defun rng-c-expand-datatype (prefixed-name)
231 (let ((i (string-match ":" prefixed-name)))
232 (rng-make-datatype
233 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
234 (substring prefixed-name (+ i 1)))))
235
236;;; Grammars
237
238(defvar rng-c-current-grammar nil)
239(defvar rng-c-parent-grammar nil)
240
241(defun rng-c-make-grammar ()
242 (make-hash-table :test 'equal))
243
244(defconst rng-c-about-override-slot 0)
245(defconst rng-c-about-combine-slot 1)
246
247(defun rng-c-lookup-create (name grammar)
248 "Return a def object for NAME. A def object is a pair
249\(ABOUT . REF) where REF is returned by `rng-make-ref'. ABOUT is a
250two-element vector [OVERRIDE COMBINE]. COMBINE is either nil, choice
251or interleave. OVERRIDE is either nil, require or t."
252 (let ((def (gethash name grammar)))
253 (if def
254 def
255 (progn
256 (setq def (cons (vector nil nil) (rng-make-ref name)))
257 (puthash name def grammar)
258 def))))
259
260(defun rng-c-make-ref (name)
261 (or rng-c-current-grammar
262 (rng-c-error "Reference not in a grammar"))
263 (cdr (rng-c-lookup-create name rng-c-current-grammar)))
264
265(defun rng-c-make-parent-ref (name)
266 (or rng-c-parent-grammar
267 (rng-c-error "Reference to non-existent parent grammar"))
268 (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
269
270(defvar rng-c-overrides nil
271 "Contains a list of (NAME . DEF) pairs.")
272
273(defun rng-c-merge-combine (def combine name)
274 (let* ((about (car def))
275 (current-combine (aref about rng-c-about-combine-slot)))
276 (if combine
277 (if current-combine
278 (or (eq combine current-combine)
279 (rng-c-error "Inconsistent combine for %s" name))
280 (aset about rng-c-about-combine-slot combine))
281 current-combine)))
282
283(defun rng-c-prepare-define (name combine in-include)
284 (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
285 (about (car def))
286 (overridden (aref about rng-c-about-override-slot)))
287 (and in-include
288 (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
289 (cond (overridden (and (eq overridden 'require)
290 (aset about rng-c-about-override-slot t))
291 nil)
292 (t (setq combine (rng-c-merge-combine def combine name))
293 (and (rng-ref-get (cdr def))
294 (not combine)
295 (rng-c-error "Duplicate definition of %s" name))
296 def))))
297
298(defun rng-c-start-include (overrides)
299 (mapcar (lambda (name-def)
300 (let* ((def (cdr name-def))
301 (about (car def))
302 (save (aref about rng-c-about-override-slot)))
303 (aset about rng-c-about-override-slot 'require)
304 (cons save name-def)))
305 overrides))
306
307(defun rng-c-end-include (overrides)
308 (mapcar (lambda (o)
309 (let* ((saved (car o))
310 (name-def (cdr o))
311 (name (car name-def))
312 (def (cdr name-def))
313 (about (car def)))
314 (and (eq (aref about rng-c-about-override-slot) 'require)
315 (rng-c-error "Definition of %s in include did not override definition in included file" name))
316 (aset about rng-c-about-override-slot saved)))
317 overrides))
318
319(defun rng-c-define (def value)
320 (and def
321 (let ((current-value (rng-ref-get (cdr def))))
322 (rng-ref-set (cdr def)
323 (if current-value
324 (if (eq (aref (car def) rng-c-about-combine-slot)
325 'choice)
326 (rng-make-choice (list current-value value))
327 (rng-make-interleave (list current-value value)))
328 value)))))
329
330(defun rng-c-finish-grammar ()
331 (maphash (lambda (key def)
332 (or (rng-ref-get (cdr def))
333 (rng-c-error "Reference to undefined pattern %s" key)))
334 rng-c-current-grammar)
335 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
336 (rng-c-error "No definition of start")))))
337
338;;; Parsing
339
340(defvar rng-c-escape-positions nil)
341(make-variable-buffer-local 'rng-c-escape-positions)
342
343(defvar rng-c-file-name nil)
344(make-variable-buffer-local 'rng-c-file-name)
345
346(defvar rng-c-file-index nil)
347
348(defun rng-c-parse-file (filename &optional context)
349 (save-excursion
350 (set-buffer (get-buffer-create (rng-c-buffer-name context)))
351 (erase-buffer)
352 (rng-c-init-buffer)
353 (setq rng-c-file-name
354 (car (insert-file-contents filename)))
355 (setq rng-c-escape-positions nil)
356 (rng-c-process-escapes)
357 (rng-c-parse-top-level context)))
358
359(defun rng-c-buffer-name (context)
360 (concat " *RNC Input"
361 (if context
362 (concat "<"
363 (number-to-string (setq rng-c-file-index
364 (1+ rng-c-file-index)))
365 ">*")
366 (setq rng-c-file-index 1)
367 "*")))
368
369(defun rng-c-process-escapes ()
370 ;; Check for any nuls, since we will use nul chars
371 ;; for internal purposes.
372 (let ((pos (search-forward "\C-@" nil t)))
373 (and pos
374 (rng-c-error "Nul character found (binary file?)")))
375 (let ((offset 0))
376 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
377 (point-max)
378 t)
379 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
380 (if (and ch (> ch 0))
381 (let ((begin (match-beginning 0))
382 (end (match-end 0)))
383 (delete-region begin end)
384 ;; Represent an escaped newline by nul, so
385 ;; that we can distinguish it from a literal newline.
386 ;; We will translate it back into a real newline later.
387 (insert (if (eq ch ?\n) 0 ch))
388 (setq offset (+ offset (- end begin 1)))
389 (setq rng-c-escape-positions
390 (cons (cons (point) offset)
391 rng-c-escape-positions)))
392 (rng-c-error "Invalid character escape")))))
393 (goto-char 1))
394
395(defun rng-c-translate-position (pos)
396 (let ((tem rng-c-escape-positions))
397 (while (and tem
398 (> (caar tem) pos))
399 (setq tem (cdr tem)))
400 (if tem
401 (+ pos (cdar tem))
402 pos)))
403
404(defun rng-c-error (&rest args)
405 (rng-c-signal-incorrect-schema rng-c-file-name
406 (rng-c-translate-position (point))
407 (apply 'format args)))
408
409(defun rng-c-parse-top-level (context)
410 (let ((rng-c-namespace-decls nil)
411 (rng-c-default-namespace nil)
412 (rng-c-datatype-decls nil))
413 (goto-char (point-min))
414 (forward-comment (point-max))
415 (rng-c-advance)
416 (rng-c-parse-decls)
417 (let ((p (if (eq context 'include)
418 (if (rng-c-implicit-grammar-p)
419 (rng-c-parse-grammar-body "")
420 (rng-c-parse-included-grammar))
421 (if (rng-c-implicit-grammar-p)
422 (rng-c-parse-implicit-grammar)
423 (rng-c-parse-pattern)))))
424 (or (string-equal rng-c-current-token "")
425 (rng-c-error "Unexpected characters after pattern"))
426 p)))
427
428(defun rng-c-parse-included-grammar ()
429 (or (string-equal rng-c-current-token "grammar")
430 (rng-c-error "Included schema is not a grammar"))
431 (rng-c-advance)
432 (rng-c-expect "{")
433 (rng-c-parse-grammar-body "}"))
434
435(defun rng-c-implicit-grammar-p ()
436 (or (and (or (rng-c-current-token-prefixed-name-p)
437 (rng-c-current-token-quoted-identifier-p)
438 (and (rng-c-current-token-ncname-p)
439 (not (rng-c-current-token-keyword-p))))
440 (looking-at "\\["))
441 (and (string-equal rng-c-current-token "[")
442 (rng-c-parse-lead-annotation)
443 nil)
444 (member rng-c-current-token '("div" "include" ""))
445 (looking-at "[|&]?=")))
446
447(defun rng-c-parse-decls ()
448 (setq rng-c-default-namespace-declared nil)
449 (while (progn
450 (let ((binding
451 (assoc rng-c-current-token
452 '(("namespace" . rng-c-parse-namespace)
453 ("datatypes" . rng-c-parse-datatypes)
454 ("default" . rng-c-parse-default)))))
455 (if binding
456 (progn
457 (rng-c-advance)
458 (funcall (cdr binding))
459 t)
460 nil))))
461 (rng-c-declare-standard-datatypes)
462 (rng-c-declare-standard-namespaces))
463
464(defun rng-c-parse-datatypes ()
465 (let ((prefix (rng-c-parse-identifier-or-keyword)))
466 (or (not (assoc prefix rng-c-datatype-decls))
467 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
468 (rng-c-expect "=")
469 (setq rng-c-datatype-decls
470 (cons (cons prefix
471 (rng-make-datatypes-uri (rng-c-parse-literal)))
472 rng-c-datatype-decls))))
473
474(defun rng-c-parse-namespace ()
475 (rng-c-declare-namespace nil
476 (rng-c-parse-identifier-or-keyword)))
477
478(defun rng-c-parse-default ()
479 (rng-c-expect "namespace")
480 (rng-c-declare-namespace t
481 (if (string-equal rng-c-current-token "=")
482 nil
483 (rng-c-parse-identifier-or-keyword))))
484
485(defun rng-c-declare-namespace (declare-default prefix)
486 (rng-c-expect "=")
487 (let ((ns (cond ((string-equal rng-c-current-token "inherit")
488 (rng-c-advance)
489 rng-c-inherit-namespace)
490 (t
491 (nxml-make-namespace (rng-c-parse-literal))))))
492 (and prefix
493 (or (not (assoc prefix rng-c-namespace-decls))
494 (rng-c-error "Duplicate namespace declaration for prefix %s"
495 prefix))
496 (setq rng-c-namespace-decls
497 (cons (cons prefix ns) rng-c-namespace-decls)))
498 (and declare-default
499 (or (not rng-c-default-namespace-declared)
500 (rng-c-error "Duplicate default namespace declaration"))
501 (setq rng-c-default-namespace-declared t)
502 (setq rng-c-default-namespace ns))))
503
504(defun rng-c-parse-implicit-grammar ()
505 (let* ((rng-c-parent-grammar rng-c-current-grammar)
506 (rng-c-current-grammar (rng-c-make-grammar)))
507 (rng-c-parse-grammar-body "")
508 (rng-c-finish-grammar)))
509
510(defun rng-c-parse-grammar-body (close-token &optional in-include)
511 (while (not (string-equal rng-c-current-token close-token))
512 (cond ((rng-c-current-token-keyword-p)
513 (let ((kw (intern rng-c-current-token)))
514 (cond ((eq kw 'start)
515 (rng-c-parse-define 'start in-include))
516 ((eq kw 'div)
517 (rng-c-advance)
518 (rng-c-parse-div in-include))
519 ((eq kw 'include)
520 (and in-include
521 (rng-c-error "Nested include"))
522 (rng-c-advance)
523 (rng-c-parse-include))
524 (t (rng-c-error "Invalid grammar keyword")))))
525 ((rng-c-current-token-ncname-p)
526 (if (looking-at "\\[")
527 (rng-c-parse-annotation-element)
528 (rng-c-parse-define rng-c-current-token
529 in-include)))
530 ((rng-c-current-token-quoted-identifier-p)
531 (if (looking-at "\\[")
532 (rng-c-parse-annotation-element)
533 (rng-c-parse-define (substring rng-c-current-token 1)
534 in-include)))
535 ((rng-c-current-token-prefixed-name-p)
536 (rng-c-parse-annotation-element))
537 ((string-equal rng-c-current-token "[")
538 (rng-c-parse-lead-annotation)
539 (and (string-equal rng-c-current-token close-token)
540 (rng-c-error "Missing annotation subject"))
541 (and (looking-at "\\[")
542 (rng-c-error "Leading annotation applied to annotation")))
543 (t (rng-c-error "Invalid grammar content"))))
544 (or (string-equal rng-c-current-token "")
545 (rng-c-advance)))
546
547(defun rng-c-parse-div (in-include)
548 (rng-c-expect "{")
549 (rng-c-parse-grammar-body "}" in-include))
550
551(defun rng-c-parse-include ()
552 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
553 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
554 overrides)
555 (cond ((string-equal rng-c-current-token "{")
556 (rng-c-advance)
557 (let ((rng-c-overrides nil))
558 (rng-c-parse-grammar-body "}" t)
559 (setq overrides rng-c-overrides))
560 (setq overrides (rng-c-start-include overrides))
561 (rng-c-parse-file filename 'include)
562 (rng-c-end-include overrides))
563 (t (rng-c-parse-file filename 'include)))))
564
565(defun rng-c-parse-define (name in-include)
566 (rng-c-advance)
567 (let ((assign (assoc rng-c-current-token
568 '(("=" . nil)
569 ("|=" . choice)
570 ("&=" . interleave)))))
571 (or assign
572 (rng-c-error "Expected assignment operator"))
573 (rng-c-advance)
574 (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
575 (rng-c-define ref (rng-c-parse-pattern)))))
576
577(defvar rng-c-had-except nil)
578
579(defun rng-c-parse-pattern ()
580 (let* ((rng-c-had-except nil)
581 (p (rng-c-parse-repeated))
582 (op (assoc rng-c-current-token
583 '(("|" . rng-make-choice)
584 ("," . rng-make-group)
585 ("&" . rng-make-interleave)))))
586 (if op
587 (if rng-c-had-except
588 (rng-c-error "Parentheses required around pattern using -")
589 (let* ((patterns (cons p nil))
590 (tail patterns)
591 (connector rng-c-current-token))
592 (while (progn
593 (rng-c-advance)
594 (let ((newcdr (cons (rng-c-parse-repeated) nil)))
595 (setcdr tail newcdr)
596 (setq tail newcdr))
597 (string-equal rng-c-current-token connector)))
598 (funcall (cdr op) patterns)))
599 p)))
600
601(defun rng-c-parse-repeated ()
602 (let ((p (rng-c-parse-follow-annotations
603 (rng-c-parse-primary)))
604 (op (assoc rng-c-current-token
605 '(("*" . rng-make-zero-or-more)
606 ("+" . rng-make-one-or-more)
607 ("?" . rng-make-optional)))))
608 (if op
609 (if rng-c-had-except
610 (rng-c-error "Parentheses required around pattern using -")
611 (rng-c-parse-follow-annotations
612 (progn
613 (rng-c-advance)
614 (funcall (cdr op) p))))
615 p)))
616
617(defun rng-c-parse-primary ()
618 "Parse a primary expression. The current token must be the first
619token of the expression. After parsing the current token should be
620token following the primary expression."
621 (cond ((rng-c-current-token-keyword-p)
622 (let ((parse-function (get (intern rng-c-current-token)
623 'rng-c-pattern)))
624 (or parse-function
625 (rng-c-error "Keyword %s does not introduce a pattern"
626 rng-c-current-token))
627 (rng-c-advance)
628 (funcall parse-function)))
629 ((rng-c-current-token-ncname-p)
630 (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
631 ((string-equal rng-c-current-token "(")
632 (rng-c-advance)
633 (let ((p (rng-c-parse-pattern)))
634 (rng-c-expect ")")
635 p))
636 ((rng-c-current-token-prefixed-name-p)
637 (let ((name (rng-c-expand-datatype rng-c-current-token)))
638 (rng-c-advance)
639 (rng-c-parse-data name)))
640 ((rng-c-current-token-literal-p)
641 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
642 ((rng-c-current-token-quoted-identifier-p)
643 (rng-c-advance-with
644 (rng-c-make-ref (substring rng-c-current-token 1))))
645 ((string-equal rng-c-current-token "[")
646 (rng-c-parse-lead-annotation)
647 (rng-c-parse-primary))
648 (t (rng-c-error "Invalid pattern"))))
649
650(defun rng-c-parse-parent ()
651 (and (rng-c-current-token-keyword-p)
652 (rng-c-error "Keyword following parent was not quoted"
653 rng-c-current-token))
654 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
655
656(defun rng-c-parse-literal ()
657 (rng-c-fix-escaped-newlines
658 (apply 'concat (rng-c-parse-literal-segments))))
659
660(defun rng-c-parse-literal-segments ()
661 (let ((str (rng-c-parse-literal-segment)))
662 (cons str
663 (cond ((string-equal rng-c-current-token "~")
664 (rng-c-advance)
665 (rng-c-parse-literal-segments))
666 (t nil)))))
667
668(defun rng-c-parse-literal-segment ()
669 (or (rng-c-current-token-literal-p)
670 (rng-c-error "Expected a literal"))
671 (rng-c-advance-with
672 (let ((n (if (and (>= (length rng-c-current-token) 6)
673 (eq (aref rng-c-current-token 0)
674 (aref rng-c-current-token 1)))
675 3
676 1)))
677 (substring rng-c-current-token n (- n)))))
678
679(defun rng-c-fix-escaped-newlines (str)
680 (let ((pos 0))
681 (while (progn
682 (let ((n (string-match "\C-@" str pos)))
683 (and n
684 (aset str n ?\n)
685 (setq pos (1+ n)))))))
686 str)
687
688(defun rng-c-parse-identifier-or-keyword ()
689 (cond ((rng-c-current-token-ncname-p)
690 (rng-c-advance-with rng-c-current-token))
691 ((rng-c-current-token-quoted-identifier-p)
692 (rng-c-advance-with (substring rng-c-current-token 1)))
693 (t (rng-c-error "Expected identifier or keyword"))))
694
695(put 'string 'rng-c-pattern 'rng-c-parse-string)
696(put 'token 'rng-c-pattern 'rng-c-parse-token)
697(put 'element 'rng-c-pattern 'rng-c-parse-element)
698(put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
699(put 'list 'rng-c-pattern 'rng-c-parse-list)
700(put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
701(put 'text 'rng-c-pattern 'rng-c-parse-text)
702(put 'empty 'rng-c-pattern 'rng-c-parse-empty)
703(put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
704(put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
705(put 'parent 'rng-c-pattern 'rng-c-parse-parent)
706(put 'external 'rng-c-pattern 'rng-c-parse-external)
707
708(defun rng-c-parse-element ()
709 (let ((name-class (rng-c-parse-name-class nil)))
710 (rng-c-expect "{")
711 (let ((pattern (rng-c-parse-pattern)))
712 (rng-c-expect "}")
713 (rng-make-element name-class pattern))))
714
715(defun rng-c-parse-attribute ()
716 (let ((name-class (rng-c-parse-name-class 'attribute)))
717 (rng-c-expect "{")
718 (let ((pattern (rng-c-parse-pattern)))
719 (rng-c-expect "}")
720 (rng-make-attribute name-class pattern))))
721
722(defun rng-c-parse-name-class (attribute)
723 (let* ((rng-c-had-except nil)
724 (name-class
725 (rng-c-parse-follow-annotations
726 (rng-c-parse-primary-name-class attribute))))
727 (if (string-equal rng-c-current-token "|")
728 (let* ((name-classes (cons name-class nil))
729 (tail name-classes))
730 (or (not rng-c-had-except)
731 (rng-c-error "Parentheses required around name-class using - operator"))
732 (while (progn
733 (rng-c-advance)
734 (let ((newcdr
735 (cons (rng-c-parse-follow-annotations
736 (rng-c-parse-primary-name-class attribute))
737 nil)))
738 (setcdr tail newcdr)
739 (setq tail newcdr))
740 (string-equal rng-c-current-token "|")))
741 (rng-make-choice-name-class name-classes))
742 name-class)))
743
744(defun rng-c-parse-primary-name-class (attribute)
745 (cond ((rng-c-current-token-ncname-p)
746 (rng-c-advance-with
747 (rng-make-name-name-class
748 (rng-make-name (rng-c-unqualified-namespace attribute)
749 rng-c-current-token))))
750 ((rng-c-current-token-prefixed-name-p)
751 (rng-c-advance-with
752 (rng-make-name-name-class
753 (rng-c-expand-name rng-c-current-token))))
754 ((string-equal rng-c-current-token "*")
755 (let ((except (rng-c-parse-opt-except-name-class attribute)))
756 (if except
757 (rng-make-any-name-except-name-class except)
758 (rng-make-any-name-name-class))))
759 ((rng-c-current-token-ns-name-p)
760 (let* ((ns
761 (rng-c-lookup-prefix (substring rng-c-current-token
762 0
763 -2)))
764 (except (rng-c-parse-opt-except-name-class attribute)))
765 (if except
766 (rng-make-ns-name-except-name-class ns except)
767 (rng-make-ns-name-name-class ns))))
768 ((string-equal rng-c-current-token "(")
769 (rng-c-advance)
770 (let ((name-class (rng-c-parse-name-class attribute)))
771 (rng-c-expect ")")
772 name-class))
773 ((rng-c-current-token-quoted-identifier-p)
774 (rng-c-advance-with
775 (rng-make-name-name-class
776 (rng-make-name (rng-c-unqualified-namespace attribute)
777 (substring rng-c-current-token 1)))))
778 ((string-equal rng-c-current-token "[")
779 (rng-c-parse-lead-annotation)
780 (rng-c-parse-primary-name-class attribute))
781 (t (rng-c-error "Bad name class"))))
782
783(defun rng-c-parse-opt-except-name-class (attribute)
784 (rng-c-advance)
785 (and (string-equal rng-c-current-token "-")
786 (or (not rng-c-had-except)
787 (rng-c-error "Parentheses required around name-class using - operator"))
788 (setq rng-c-had-except t)
789 (progn
790 (rng-c-advance)
791 (rng-c-parse-primary-name-class attribute))))
792
793(defun rng-c-parse-mixed ()
794 (rng-c-expect "{")
795 (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
796 (rng-c-expect "}")
797 pattern))
798
799(defun rng-c-parse-list ()
800 (rng-c-expect "{")
801 (let ((pattern (rng-make-list (rng-c-parse-pattern))))
802 (rng-c-expect "}")
803 pattern))
804
805(defun rng-c-parse-text ()
806 (rng-make-text))
807
808(defun rng-c-parse-empty ()
809 (rng-make-empty))
810
811(defun rng-c-parse-not-allowed ()
812 (rng-make-not-allowed))
813
814(defun rng-c-parse-string ()
815 (rng-c-parse-data rng-string-datatype))
816
817(defun rng-c-parse-token ()
818 (rng-c-parse-data rng-token-datatype))
819
820(defun rng-c-parse-data (name)
821 (if (rng-c-current-token-literal-p)
822 (rng-make-value name
823 (rng-c-parse-literal)
824 (and (car name)
825 (rng-c-make-context)))
826 (let ((params (rng-c-parse-optional-params)))
827 (if (string-equal rng-c-current-token "-")
828 (progn
829 (if rng-c-had-except
830 (rng-c-error "Parentheses required around pattern using -")
831 (setq rng-c-had-except t))
832 (rng-c-advance)
833 (rng-make-data-except name
834 params
835 (rng-c-parse-primary)))
836 (rng-make-data name params)))))
837
838(defun rng-c-parse-optional-params ()
839 (and (string-equal rng-c-current-token "{")
840 (let* ((head (cons nil nil))
841 (tail head))
842 (rng-c-advance)
843 (while (not (string-equal rng-c-current-token "}"))
844 (and (string-equal rng-c-current-token "[")
845 (rng-c-parse-lead-annotation))
846 (let ((name (rng-c-parse-identifier-or-keyword)))
847 (rng-c-expect "=")
848 (let ((newcdr (cons (cons (intern name)
849 (rng-c-parse-literal))
850 nil)))
851 (setcdr tail newcdr)
852 (setq tail newcdr))))
853 (rng-c-advance)
854 (cdr head))))
855
856(defun rng-c-parse-external ()
857 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
858 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
859 (rng-c-parse-file filename 'external)))
860
861(defun rng-c-expand-file (uri)
862 (condition-case err
863 (rng-uri-file-name (rng-uri-resolve uri
864 (rng-file-name-uri rng-c-file-name)))
865 (rng-uri-error
866 (rng-c-error (cadr err)))))
867
868(defun rng-c-parse-opt-inherit ()
869 (cond ((string-equal rng-c-current-token "inherit")
870 (rng-c-advance)
871 (rng-c-expect "=")
872 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
873 (t rng-c-default-namespace)))
874
875(defun rng-c-parse-grammar ()
876 (rng-c-expect "{")
877 (let* ((rng-c-parent-grammar rng-c-current-grammar)
878 (rng-c-current-grammar (rng-c-make-grammar)))
879 (rng-c-parse-grammar-body "}")
880 (rng-c-finish-grammar)))
881
882(defun rng-c-parse-lead-annotation ()
883 (rng-c-parse-annotation-body)
884 (and (string-equal rng-c-current-token "[")
885 (rng-c-error "Multiple leading annotations")))
886
887(defun rng-c-parse-follow-annotations (obj)
888 (while (string-equal rng-c-current-token ">>")
889 (rng-c-advance)
890 (if (rng-c-current-token-prefixed-name-p)
891 (rng-c-advance)
892 (rng-c-parse-identifier-or-keyword))
893 (rng-c-parse-annotation-body t))
894 obj)
895
896(defun rng-c-parse-annotation-element ()
897 (rng-c-advance)
898 (rng-c-parse-annotation-body t))
899
900;; XXX need stricter checking of attribute names
901;; XXX don't allow attributes after text
902
903(defun rng-c-parse-annotation-body (&optional allow-text)
904 "Current token is [. Parse up to matching ]. Current token after
905parse is token following ]."
906 (or (string-equal rng-c-current-token "[")
907 (rng-c-error "Expected ["))
908 (rng-c-advance)
909 (while (not (string-equal rng-c-current-token "]"))
910 (cond ((rng-c-current-token-literal-p)
911 (or allow-text
912 (rng-c-error "Out of place text within annotation"))
913 (rng-c-parse-literal))
914 (t
915 (if (rng-c-current-token-prefixed-name-p)
916 (rng-c-advance)
917 (rng-c-parse-identifier-or-keyword))
918 (cond ((string-equal rng-c-current-token "[")
919 (rng-c-parse-annotation-body t))
920 ((string-equal rng-c-current-token "=")
921 (rng-c-advance)
922 (rng-c-parse-literal))
923 (t (rng-c-error "Expected = or ["))))))
924 (rng-c-advance))
925
926(defun rng-c-advance-with (pattern)
927 (rng-c-advance)
928 pattern)
929
930(defun rng-c-expect (str)
931 (or (string-equal rng-c-current-token str)
932 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
933 (rng-c-advance))
934
935(provide 'rng-cmpct)
936
937;;; rng-cmpct.el