Merge from emacs--rel--22
[bpt/emacs.git] / lisp / nxml / rng-cmpct.el
1 ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
2
3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
4
5 ;; Author: James Clark
6 ;; Keywords: XML, RelaxNG
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;;; Commentary:
26
27 ;; This parses a RELAX NG Compact Syntax schema into the form
28 ;; specified in rng-pttrn.el.
29 ;;
30 ;; RELAX NG Compact Syntax is specified by
31 ;; http://relaxng.org/compact.html
32 ;;
33 ;; This file uses the prefix "rng-c-".
34
35 ;;; Code:
36
37 (require 'nxml-util)
38 (require 'rng-util)
39 (require 'rng-uri)
40 (require 'rng-pttrn)
41
42 ;;;###autoload
43 (defun rng-c-load-schema (filename)
44 "Load a schema in RELAX NG compact syntax from FILENAME.
45 Return a pattern."
46 (rng-c-parse-file filename))
47
48 ;;; Error handling
49
50 (put 'rng-c-incorrect-schema
51 'error-conditions
52 '(error rng-error nxml-file-parse-error rng-c-incorrect-schema))
53
54 (put 'rng-c-incorrect-schema
55 'error-message
56 "Incorrect schema")
57
58 (defun rng-c-signal-incorrect-schema (filename pos message)
59 (nxml-signal-file-parse-error filename
60 pos
61 message
62 'rng-c-incorrect-schema))
63
64 ;;; Lexing
65
66 (defconst rng-c-keywords
67 '("attribute"
68 "default"
69 "datatypes"
70 "div"
71 "element"
72 "empty"
73 "external"
74 "grammar"
75 "include"
76 "inherit"
77 "list"
78 "mixed"
79 "namespace"
80 "notAllowed"
81 "parent"
82 "start"
83 "string"
84 "text"
85 "token")
86 "List of strings that are keywords in the compact syntax.")
87
88 (defconst rng-c-anchored-keyword-re
89 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
90 "Regular expression to match a keyword in the compact syntax.")
91
92 (defvar rng-c-syntax-table nil
93 "Syntax table for parsing the compact syntax.")
94
95 (if rng-c-syntax-table
96 ()
97 (setq rng-c-syntax-table (make-syntax-table))
98 (modify-syntax-entry ?# "<" rng-c-syntax-table)
99 (modify-syntax-entry ?\n ">" rng-c-syntax-table)
100 (modify-syntax-entry ?- "w" rng-c-syntax-table)
101 (modify-syntax-entry ?. "w" rng-c-syntax-table)
102 (modify-syntax-entry ?_ "w" rng-c-syntax-table)
103 (modify-syntax-entry ?: "_" rng-c-syntax-table))
104
105 (defconst rng-c-literal-1-re
106 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
107 "Regular expression to match a single-quoted literal.")
108
109 (defconst rng-c-literal-2-re
110 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
111 "Regular expression to match a double-quoted literal.")
112
113 (defconst rng-c-ncname-re "\\w+")
114
115 (defconst rng-c-anchored-ncname-re
116 (concat "\\`" rng-c-ncname-re "\\'"))
117
118 (defconst rng-c-token-re
119 (concat "[&|]=" "\\|"
120 "[][()|&,*+?{}~=-]" "\\|"
121 rng-c-literal-1-re "\\|"
122 rng-c-literal-2-re "\\|"
123 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
124 "\\\\" rng-c-ncname-re "\\|"
125 ">>")
126 "Regular expression to match a token in the compact syntax.")
127
128 (defun rng-c-init-buffer ()
129 (setq case-fold-search nil) ; automatically becomes buffer-local when set
130 (set-buffer-multibyte t)
131 (set-syntax-table rng-c-syntax-table))
132
133 (defvar rng-c-current-token nil)
134 (make-variable-buffer-local 'rng-c-current-token)
135
136 (defun rng-c-advance ()
137 (cond ((looking-at rng-c-token-re)
138 (setq rng-c-current-token (match-string 0))
139 (goto-char (match-end 0))
140 (forward-comment (point-max)))
141 ((= (point) (point-max))
142 (setq rng-c-current-token ""))
143 (t (rng-c-error "Invalid token"))))
144
145 (defconst rng-c-anchored-datatype-name-re
146 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
147
148 (defsubst rng-c-current-token-keyword-p ()
149 (string-match rng-c-anchored-keyword-re rng-c-current-token))
150
151 (defsubst rng-c-current-token-prefixed-name-p ()
152 (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
153
154 (defsubst rng-c-current-token-literal-p ()
155 (string-match "\\`['\"]" rng-c-current-token))
156
157 (defsubst rng-c-current-token-quoted-identifier-p ()
158 (string-match "\\`\\\\" rng-c-current-token))
159
160 (defsubst rng-c-current-token-ncname-p ()
161 (string-match rng-c-anchored-ncname-re rng-c-current-token))
162
163 (defsubst rng-c-current-token-ns-name-p ()
164 (let ((len (length rng-c-current-token)))
165 (and (> len 0)
166 (= (aref rng-c-current-token (- len 1)) ?*))))
167
168 ;;; Namespaces
169
170 (defvar rng-c-inherit-namespace nil)
171
172 (defvar rng-c-default-namespace nil)
173
174 (defvar rng-c-default-namespace-declared nil)
175
176 (defvar rng-c-namespace-decls nil
177 "Alist of namespace declarations.")
178
179 (defconst rng-c-no-namespace nil)
180
181 (defun rng-c-declare-standard-namespaces ()
182 (setq rng-c-namespace-decls
183 (cons (cons "xml" nxml-xml-namespace-uri)
184 rng-c-namespace-decls))
185 (when (and (not rng-c-default-namespace-declared)
186 rng-c-inherit-namespace)
187 (setq rng-c-default-namespace rng-c-inherit-namespace)))
188
189 (defun rng-c-expand-name (prefixed-name)
190 (let ((i (string-match ":" prefixed-name)))
191 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
192 0
193 i))
194 (substring prefixed-name (+ i 1)))))
195
196 (defun rng-c-lookup-prefix (prefix)
197 (let ((binding (assoc prefix rng-c-namespace-decls)))
198 (or binding (rng-c-error "Undefined prefix %s" prefix))
199 (cdr binding)))
200
201 (defun rng-c-unqualified-namespace (attribute)
202 (if attribute
203 rng-c-no-namespace
204 rng-c-default-namespace))
205
206 (defun rng-c-make-context ()
207 (cons rng-c-default-namespace rng-c-namespace-decls))
208
209 ;;; Datatypes
210
211 (defconst rng-string-datatype
212 (rng-make-datatype rng-builtin-datatypes-uri "string"))
213
214 (defconst rng-token-datatype
215 (rng-make-datatype rng-builtin-datatypes-uri "token"))
216
217 (defvar rng-c-datatype-decls nil
218 "Alist of datatype declarations.
219 Contains a list of pairs (PREFIX . URI) where PREFIX is a string
220 and URI is a symbol.")
221
222 (defun rng-c-declare-standard-datatypes ()
223 (setq rng-c-datatype-decls
224 (cons (cons "xsd" rng-xsd-datatypes-uri)
225 rng-c-datatype-decls)))
226
227 (defun rng-c-lookup-datatype-prefix (prefix)
228 (let ((binding (assoc prefix rng-c-datatype-decls)))
229 (or binding (rng-c-error "Undefined prefix %s" prefix))
230 (cdr binding)))
231
232 (defun rng-c-expand-datatype (prefixed-name)
233 (let ((i (string-match ":" prefixed-name)))
234 (rng-make-datatype
235 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
236 (substring prefixed-name (+ i 1)))))
237
238 ;;; Grammars
239
240 (defvar rng-c-current-grammar nil)
241 (defvar rng-c-parent-grammar nil)
242
243 (defun rng-c-make-grammar ()
244 (make-hash-table :test 'equal))
245
246 (defconst rng-c-about-override-slot 0)
247 (defconst rng-c-about-combine-slot 1)
248
249 (defun rng-c-lookup-create (name grammar)
250 "Return a def object for NAME. A def object is a pair
251 \(ABOUT . REF) where REF is returned by `rng-make-ref'. ABOUT is a
252 two-element vector [OVERRIDE COMBINE]. COMBINE is either nil, choice
253 or interleave. OVERRIDE is either nil, require or t."
254 (let ((def (gethash name grammar)))
255 (if def
256 def
257 (progn
258 (setq def (cons (vector nil nil) (rng-make-ref name)))
259 (puthash name def grammar)
260 def))))
261
262 (defun rng-c-make-ref (name)
263 (or rng-c-current-grammar
264 (rng-c-error "Reference not in a grammar"))
265 (cdr (rng-c-lookup-create name rng-c-current-grammar)))
266
267 (defun rng-c-make-parent-ref (name)
268 (or rng-c-parent-grammar
269 (rng-c-error "Reference to non-existent parent grammar"))
270 (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
271
272 (defvar rng-c-overrides nil
273 "Contains a list of (NAME . DEF) pairs.")
274
275 (defun rng-c-merge-combine (def combine name)
276 (let* ((about (car def))
277 (current-combine (aref about rng-c-about-combine-slot)))
278 (if combine
279 (if current-combine
280 (or (eq combine current-combine)
281 (rng-c-error "Inconsistent combine for %s" name))
282 (aset about rng-c-about-combine-slot combine))
283 current-combine)))
284
285 (defun rng-c-prepare-define (name combine in-include)
286 (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
287 (about (car def))
288 (overridden (aref about rng-c-about-override-slot)))
289 (and in-include
290 (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
291 (cond (overridden (and (eq overridden 'require)
292 (aset about rng-c-about-override-slot t))
293 nil)
294 (t (setq combine (rng-c-merge-combine def combine name))
295 (and (rng-ref-get (cdr def))
296 (not combine)
297 (rng-c-error "Duplicate definition of %s" name))
298 def))))
299
300 (defun rng-c-start-include (overrides)
301 (mapcar (lambda (name-def)
302 (let* ((def (cdr name-def))
303 (about (car def))
304 (save (aref about rng-c-about-override-slot)))
305 (aset about rng-c-about-override-slot 'require)
306 (cons save name-def)))
307 overrides))
308
309 (defun rng-c-end-include (overrides)
310 (mapcar (lambda (o)
311 (let* ((saved (car o))
312 (name-def (cdr o))
313 (name (car name-def))
314 (def (cdr name-def))
315 (about (car def)))
316 (and (eq (aref about rng-c-about-override-slot) 'require)
317 (rng-c-error "Definition of %s in include did not override definition in included file" name))
318 (aset about rng-c-about-override-slot saved)))
319 overrides))
320
321 (defun rng-c-define (def value)
322 (and def
323 (let ((current-value (rng-ref-get (cdr def))))
324 (rng-ref-set (cdr def)
325 (if current-value
326 (if (eq (aref (car def) rng-c-about-combine-slot)
327 'choice)
328 (rng-make-choice (list current-value value))
329 (rng-make-interleave (list current-value value)))
330 value)))))
331
332 (defun rng-c-finish-grammar ()
333 (maphash (lambda (key def)
334 (or (rng-ref-get (cdr def))
335 (rng-c-error "Reference to undefined pattern %s" key)))
336 rng-c-current-grammar)
337 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
338 (rng-c-error "No definition of start")))))
339
340 ;;; Parsing
341
342 (defvar rng-c-escape-positions nil)
343 (make-variable-buffer-local 'rng-c-escape-positions)
344
345 (defvar rng-c-file-name nil)
346 (make-variable-buffer-local 'rng-c-file-name)
347
348 (defvar rng-c-file-index nil)
349
350 (defun rng-c-parse-file (filename &optional context)
351 (save-excursion
352 (set-buffer (get-buffer-create (rng-c-buffer-name context)))
353 (erase-buffer)
354 (rng-c-init-buffer)
355 (setq rng-c-file-name
356 (car (insert-file-contents filename)))
357 (setq rng-c-escape-positions nil)
358 (rng-c-process-escapes)
359 (rng-c-parse-top-level context)))
360
361 (defun rng-c-buffer-name (context)
362 (concat " *RNC Input"
363 (if context
364 (concat "<"
365 (number-to-string (setq rng-c-file-index
366 (1+ rng-c-file-index)))
367 ">*")
368 (setq rng-c-file-index 1)
369 "*")))
370
371 (defun rng-c-process-escapes ()
372 ;; Check for any nuls, since we will use nul chars
373 ;; for internal purposes.
374 (let ((pos (search-forward "\C-@" nil t)))
375 (and pos
376 (rng-c-error "Nul character found (binary file?)")))
377 (let ((offset 0))
378 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
379 (point-max)
380 t)
381 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
382 (if (and ch (> ch 0))
383 (let ((begin (match-beginning 0))
384 (end (match-end 0)))
385 (delete-region begin end)
386 ;; Represent an escaped newline by nul, so
387 ;; that we can distinguish it from a literal newline.
388 ;; We will translate it back into a real newline later.
389 (insert (if (eq ch ?\n) 0 ch))
390 (setq offset (+ offset (- end begin 1)))
391 (setq rng-c-escape-positions
392 (cons (cons (point) offset)
393 rng-c-escape-positions)))
394 (rng-c-error "Invalid character escape")))))
395 (goto-char 1))
396
397 (defun rng-c-translate-position (pos)
398 (let ((tem rng-c-escape-positions))
399 (while (and tem
400 (> (caar tem) pos))
401 (setq tem (cdr tem)))
402 (if tem
403 (+ pos (cdar tem))
404 pos)))
405
406 (defun rng-c-error (&rest args)
407 (rng-c-signal-incorrect-schema rng-c-file-name
408 (rng-c-translate-position (point))
409 (apply 'format args)))
410
411 (defun rng-c-parse-top-level (context)
412 (let ((rng-c-namespace-decls nil)
413 (rng-c-default-namespace nil)
414 (rng-c-datatype-decls nil))
415 (goto-char (point-min))
416 (forward-comment (point-max))
417 (rng-c-advance)
418 (rng-c-parse-decls)
419 (let ((p (if (eq context 'include)
420 (if (rng-c-implicit-grammar-p)
421 (rng-c-parse-grammar-body "")
422 (rng-c-parse-included-grammar))
423 (if (rng-c-implicit-grammar-p)
424 (rng-c-parse-implicit-grammar)
425 (rng-c-parse-pattern)))))
426 (or (string-equal rng-c-current-token "")
427 (rng-c-error "Unexpected characters after pattern"))
428 p)))
429
430 (defun rng-c-parse-included-grammar ()
431 (or (string-equal rng-c-current-token "grammar")
432 (rng-c-error "Included schema is not a grammar"))
433 (rng-c-advance)
434 (rng-c-expect "{")
435 (rng-c-parse-grammar-body "}"))
436
437 (defun rng-c-implicit-grammar-p ()
438 (or (and (or (rng-c-current-token-prefixed-name-p)
439 (rng-c-current-token-quoted-identifier-p)
440 (and (rng-c-current-token-ncname-p)
441 (not (rng-c-current-token-keyword-p))))
442 (looking-at "\\["))
443 (and (string-equal rng-c-current-token "[")
444 (rng-c-parse-lead-annotation)
445 nil)
446 (member rng-c-current-token '("div" "include" ""))
447 (looking-at "[|&]?=")))
448
449 (defun rng-c-parse-decls ()
450 (setq rng-c-default-namespace-declared nil)
451 (while (progn
452 (let ((binding
453 (assoc rng-c-current-token
454 '(("namespace" . rng-c-parse-namespace)
455 ("datatypes" . rng-c-parse-datatypes)
456 ("default" . rng-c-parse-default)))))
457 (if binding
458 (progn
459 (rng-c-advance)
460 (funcall (cdr binding))
461 t)
462 nil))))
463 (rng-c-declare-standard-datatypes)
464 (rng-c-declare-standard-namespaces))
465
466 (defun rng-c-parse-datatypes ()
467 (let ((prefix (rng-c-parse-identifier-or-keyword)))
468 (or (not (assoc prefix rng-c-datatype-decls))
469 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
470 (rng-c-expect "=")
471 (setq rng-c-datatype-decls
472 (cons (cons prefix
473 (rng-make-datatypes-uri (rng-c-parse-literal)))
474 rng-c-datatype-decls))))
475
476 (defun rng-c-parse-namespace ()
477 (rng-c-declare-namespace nil
478 (rng-c-parse-identifier-or-keyword)))
479
480 (defun rng-c-parse-default ()
481 (rng-c-expect "namespace")
482 (rng-c-declare-namespace t
483 (if (string-equal rng-c-current-token "=")
484 nil
485 (rng-c-parse-identifier-or-keyword))))
486
487 (defun rng-c-declare-namespace (declare-default prefix)
488 (rng-c-expect "=")
489 (let ((ns (cond ((string-equal rng-c-current-token "inherit")
490 (rng-c-advance)
491 rng-c-inherit-namespace)
492 (t
493 (nxml-make-namespace (rng-c-parse-literal))))))
494 (and prefix
495 (or (not (assoc prefix rng-c-namespace-decls))
496 (rng-c-error "Duplicate namespace declaration for prefix %s"
497 prefix))
498 (setq rng-c-namespace-decls
499 (cons (cons prefix ns) rng-c-namespace-decls)))
500 (and declare-default
501 (or (not rng-c-default-namespace-declared)
502 (rng-c-error "Duplicate default namespace declaration"))
503 (setq rng-c-default-namespace-declared t)
504 (setq rng-c-default-namespace ns))))
505
506 (defun rng-c-parse-implicit-grammar ()
507 (let* ((rng-c-parent-grammar rng-c-current-grammar)
508 (rng-c-current-grammar (rng-c-make-grammar)))
509 (rng-c-parse-grammar-body "")
510 (rng-c-finish-grammar)))
511
512 (defun rng-c-parse-grammar-body (close-token &optional in-include)
513 (while (not (string-equal rng-c-current-token close-token))
514 (cond ((rng-c-current-token-keyword-p)
515 (let ((kw (intern rng-c-current-token)))
516 (cond ((eq kw 'start)
517 (rng-c-parse-define 'start in-include))
518 ((eq kw 'div)
519 (rng-c-advance)
520 (rng-c-parse-div in-include))
521 ((eq kw 'include)
522 (and in-include
523 (rng-c-error "Nested include"))
524 (rng-c-advance)
525 (rng-c-parse-include))
526 (t (rng-c-error "Invalid grammar keyword")))))
527 ((rng-c-current-token-ncname-p)
528 (if (looking-at "\\[")
529 (rng-c-parse-annotation-element)
530 (rng-c-parse-define rng-c-current-token
531 in-include)))
532 ((rng-c-current-token-quoted-identifier-p)
533 (if (looking-at "\\[")
534 (rng-c-parse-annotation-element)
535 (rng-c-parse-define (substring rng-c-current-token 1)
536 in-include)))
537 ((rng-c-current-token-prefixed-name-p)
538 (rng-c-parse-annotation-element))
539 ((string-equal rng-c-current-token "[")
540 (rng-c-parse-lead-annotation)
541 (and (string-equal rng-c-current-token close-token)
542 (rng-c-error "Missing annotation subject"))
543 (and (looking-at "\\[")
544 (rng-c-error "Leading annotation applied to annotation")))
545 (t (rng-c-error "Invalid grammar content"))))
546 (or (string-equal rng-c-current-token "")
547 (rng-c-advance)))
548
549 (defun rng-c-parse-div (in-include)
550 (rng-c-expect "{")
551 (rng-c-parse-grammar-body "}" in-include))
552
553 (defun rng-c-parse-include ()
554 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
555 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
556 overrides)
557 (cond ((string-equal rng-c-current-token "{")
558 (rng-c-advance)
559 (let ((rng-c-overrides nil))
560 (rng-c-parse-grammar-body "}" t)
561 (setq overrides rng-c-overrides))
562 (setq overrides (rng-c-start-include overrides))
563 (rng-c-parse-file filename 'include)
564 (rng-c-end-include overrides))
565 (t (rng-c-parse-file filename 'include)))))
566
567 (defun rng-c-parse-define (name in-include)
568 (rng-c-advance)
569 (let ((assign (assoc rng-c-current-token
570 '(("=" . nil)
571 ("|=" . choice)
572 ("&=" . interleave)))))
573 (or assign
574 (rng-c-error "Expected assignment operator"))
575 (rng-c-advance)
576 (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
577 (rng-c-define ref (rng-c-parse-pattern)))))
578
579 (defvar rng-c-had-except nil)
580
581 (defun rng-c-parse-pattern ()
582 (let* ((rng-c-had-except nil)
583 (p (rng-c-parse-repeated))
584 (op (assoc rng-c-current-token
585 '(("|" . rng-make-choice)
586 ("," . rng-make-group)
587 ("&" . rng-make-interleave)))))
588 (if op
589 (if rng-c-had-except
590 (rng-c-error "Parentheses required around pattern using -")
591 (let* ((patterns (cons p nil))
592 (tail patterns)
593 (connector rng-c-current-token))
594 (while (progn
595 (rng-c-advance)
596 (let ((newcdr (cons (rng-c-parse-repeated) nil)))
597 (setcdr tail newcdr)
598 (setq tail newcdr))
599 (string-equal rng-c-current-token connector)))
600 (funcall (cdr op) patterns)))
601 p)))
602
603 (defun rng-c-parse-repeated ()
604 (let ((p (rng-c-parse-follow-annotations
605 (rng-c-parse-primary)))
606 (op (assoc rng-c-current-token
607 '(("*" . rng-make-zero-or-more)
608 ("+" . rng-make-one-or-more)
609 ("?" . rng-make-optional)))))
610 (if op
611 (if rng-c-had-except
612 (rng-c-error "Parentheses required around pattern using -")
613 (rng-c-parse-follow-annotations
614 (progn
615 (rng-c-advance)
616 (funcall (cdr op) p))))
617 p)))
618
619 (defun rng-c-parse-primary ()
620 "Parse a primary expression. The current token must be the first
621 token of the expression. After parsing the current token should be
622 token following the primary expression."
623 (cond ((rng-c-current-token-keyword-p)
624 (let ((parse-function (get (intern rng-c-current-token)
625 'rng-c-pattern)))
626 (or parse-function
627 (rng-c-error "Keyword %s does not introduce a pattern"
628 rng-c-current-token))
629 (rng-c-advance)
630 (funcall parse-function)))
631 ((rng-c-current-token-ncname-p)
632 (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
633 ((string-equal rng-c-current-token "(")
634 (rng-c-advance)
635 (let ((p (rng-c-parse-pattern)))
636 (rng-c-expect ")")
637 p))
638 ((rng-c-current-token-prefixed-name-p)
639 (let ((name (rng-c-expand-datatype rng-c-current-token)))
640 (rng-c-advance)
641 (rng-c-parse-data name)))
642 ((rng-c-current-token-literal-p)
643 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
644 ((rng-c-current-token-quoted-identifier-p)
645 (rng-c-advance-with
646 (rng-c-make-ref (substring rng-c-current-token 1))))
647 ((string-equal rng-c-current-token "[")
648 (rng-c-parse-lead-annotation)
649 (rng-c-parse-primary))
650 (t (rng-c-error "Invalid pattern"))))
651
652 (defun rng-c-parse-parent ()
653 (and (rng-c-current-token-keyword-p)
654 (rng-c-error "Keyword following parent was not quoted"
655 rng-c-current-token))
656 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
657
658 (defun rng-c-parse-literal ()
659 (rng-c-fix-escaped-newlines
660 (apply 'concat (rng-c-parse-literal-segments))))
661
662 (defun rng-c-parse-literal-segments ()
663 (let ((str (rng-c-parse-literal-segment)))
664 (cons str
665 (cond ((string-equal rng-c-current-token "~")
666 (rng-c-advance)
667 (rng-c-parse-literal-segments))
668 (t nil)))))
669
670 (defun rng-c-parse-literal-segment ()
671 (or (rng-c-current-token-literal-p)
672 (rng-c-error "Expected a literal"))
673 (rng-c-advance-with
674 (let ((n (if (and (>= (length rng-c-current-token) 6)
675 (eq (aref rng-c-current-token 0)
676 (aref rng-c-current-token 1)))
677 3
678 1)))
679 (substring rng-c-current-token n (- n)))))
680
681 (defun rng-c-fix-escaped-newlines (str)
682 (let ((pos 0))
683 (while (progn
684 (let ((n (string-match "\C-@" str pos)))
685 (and n
686 (aset str n ?\n)
687 (setq pos (1+ n)))))))
688 str)
689
690 (defun rng-c-parse-identifier-or-keyword ()
691 (cond ((rng-c-current-token-ncname-p)
692 (rng-c-advance-with rng-c-current-token))
693 ((rng-c-current-token-quoted-identifier-p)
694 (rng-c-advance-with (substring rng-c-current-token 1)))
695 (t (rng-c-error "Expected identifier or keyword"))))
696
697 (put 'string 'rng-c-pattern 'rng-c-parse-string)
698 (put 'token 'rng-c-pattern 'rng-c-parse-token)
699 (put 'element 'rng-c-pattern 'rng-c-parse-element)
700 (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
701 (put 'list 'rng-c-pattern 'rng-c-parse-list)
702 (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
703 (put 'text 'rng-c-pattern 'rng-c-parse-text)
704 (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
705 (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
706 (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
707 (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
708 (put 'external 'rng-c-pattern 'rng-c-parse-external)
709
710 (defun rng-c-parse-element ()
711 (let ((name-class (rng-c-parse-name-class nil)))
712 (rng-c-expect "{")
713 (let ((pattern (rng-c-parse-pattern)))
714 (rng-c-expect "}")
715 (rng-make-element name-class pattern))))
716
717 (defun rng-c-parse-attribute ()
718 (let ((name-class (rng-c-parse-name-class 'attribute)))
719 (rng-c-expect "{")
720 (let ((pattern (rng-c-parse-pattern)))
721 (rng-c-expect "}")
722 (rng-make-attribute name-class pattern))))
723
724 (defun rng-c-parse-name-class (attribute)
725 (let* ((rng-c-had-except nil)
726 (name-class
727 (rng-c-parse-follow-annotations
728 (rng-c-parse-primary-name-class attribute))))
729 (if (string-equal rng-c-current-token "|")
730 (let* ((name-classes (cons name-class nil))
731 (tail name-classes))
732 (or (not rng-c-had-except)
733 (rng-c-error "Parentheses required around name-class using - operator"))
734 (while (progn
735 (rng-c-advance)
736 (let ((newcdr
737 (cons (rng-c-parse-follow-annotations
738 (rng-c-parse-primary-name-class attribute))
739 nil)))
740 (setcdr tail newcdr)
741 (setq tail newcdr))
742 (string-equal rng-c-current-token "|")))
743 (rng-make-choice-name-class name-classes))
744 name-class)))
745
746 (defun rng-c-parse-primary-name-class (attribute)
747 (cond ((rng-c-current-token-ncname-p)
748 (rng-c-advance-with
749 (rng-make-name-name-class
750 (rng-make-name (rng-c-unqualified-namespace attribute)
751 rng-c-current-token))))
752 ((rng-c-current-token-prefixed-name-p)
753 (rng-c-advance-with
754 (rng-make-name-name-class
755 (rng-c-expand-name rng-c-current-token))))
756 ((string-equal rng-c-current-token "*")
757 (let ((except (rng-c-parse-opt-except-name-class attribute)))
758 (if except
759 (rng-make-any-name-except-name-class except)
760 (rng-make-any-name-name-class))))
761 ((rng-c-current-token-ns-name-p)
762 (let* ((ns
763 (rng-c-lookup-prefix (substring rng-c-current-token
764 0
765 -2)))
766 (except (rng-c-parse-opt-except-name-class attribute)))
767 (if except
768 (rng-make-ns-name-except-name-class ns except)
769 (rng-make-ns-name-name-class ns))))
770 ((string-equal rng-c-current-token "(")
771 (rng-c-advance)
772 (let ((name-class (rng-c-parse-name-class attribute)))
773 (rng-c-expect ")")
774 name-class))
775 ((rng-c-current-token-quoted-identifier-p)
776 (rng-c-advance-with
777 (rng-make-name-name-class
778 (rng-make-name (rng-c-unqualified-namespace attribute)
779 (substring rng-c-current-token 1)))))
780 ((string-equal rng-c-current-token "[")
781 (rng-c-parse-lead-annotation)
782 (rng-c-parse-primary-name-class attribute))
783 (t (rng-c-error "Bad name class"))))
784
785 (defun rng-c-parse-opt-except-name-class (attribute)
786 (rng-c-advance)
787 (and (string-equal rng-c-current-token "-")
788 (or (not rng-c-had-except)
789 (rng-c-error "Parentheses required around name-class using - operator"))
790 (setq rng-c-had-except t)
791 (progn
792 (rng-c-advance)
793 (rng-c-parse-primary-name-class attribute))))
794
795 (defun rng-c-parse-mixed ()
796 (rng-c-expect "{")
797 (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
798 (rng-c-expect "}")
799 pattern))
800
801 (defun rng-c-parse-list ()
802 (rng-c-expect "{")
803 (let ((pattern (rng-make-list (rng-c-parse-pattern))))
804 (rng-c-expect "}")
805 pattern))
806
807 (defun rng-c-parse-text ()
808 (rng-make-text))
809
810 (defun rng-c-parse-empty ()
811 (rng-make-empty))
812
813 (defun rng-c-parse-not-allowed ()
814 (rng-make-not-allowed))
815
816 (defun rng-c-parse-string ()
817 (rng-c-parse-data rng-string-datatype))
818
819 (defun rng-c-parse-token ()
820 (rng-c-parse-data rng-token-datatype))
821
822 (defun rng-c-parse-data (name)
823 (if (rng-c-current-token-literal-p)
824 (rng-make-value name
825 (rng-c-parse-literal)
826 (and (car name)
827 (rng-c-make-context)))
828 (let ((params (rng-c-parse-optional-params)))
829 (if (string-equal rng-c-current-token "-")
830 (progn
831 (if rng-c-had-except
832 (rng-c-error "Parentheses required around pattern using -")
833 (setq rng-c-had-except t))
834 (rng-c-advance)
835 (rng-make-data-except name
836 params
837 (rng-c-parse-primary)))
838 (rng-make-data name params)))))
839
840 (defun rng-c-parse-optional-params ()
841 (and (string-equal rng-c-current-token "{")
842 (let* ((head (cons nil nil))
843 (tail head))
844 (rng-c-advance)
845 (while (not (string-equal rng-c-current-token "}"))
846 (and (string-equal rng-c-current-token "[")
847 (rng-c-parse-lead-annotation))
848 (let ((name (rng-c-parse-identifier-or-keyword)))
849 (rng-c-expect "=")
850 (let ((newcdr (cons (cons (intern name)
851 (rng-c-parse-literal))
852 nil)))
853 (setcdr tail newcdr)
854 (setq tail newcdr))))
855 (rng-c-advance)
856 (cdr head))))
857
858 (defun rng-c-parse-external ()
859 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
860 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
861 (rng-c-parse-file filename 'external)))
862
863 (defun rng-c-expand-file (uri)
864 (condition-case err
865 (rng-uri-file-name (rng-uri-resolve uri
866 (rng-file-name-uri rng-c-file-name)))
867 (rng-uri-error
868 (rng-c-error (cadr err)))))
869
870 (defun rng-c-parse-opt-inherit ()
871 (cond ((string-equal rng-c-current-token "inherit")
872 (rng-c-advance)
873 (rng-c-expect "=")
874 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
875 (t rng-c-default-namespace)))
876
877 (defun rng-c-parse-grammar ()
878 (rng-c-expect "{")
879 (let* ((rng-c-parent-grammar rng-c-current-grammar)
880 (rng-c-current-grammar (rng-c-make-grammar)))
881 (rng-c-parse-grammar-body "}")
882 (rng-c-finish-grammar)))
883
884 (defun rng-c-parse-lead-annotation ()
885 (rng-c-parse-annotation-body)
886 (and (string-equal rng-c-current-token "[")
887 (rng-c-error "Multiple leading annotations")))
888
889 (defun rng-c-parse-follow-annotations (obj)
890 (while (string-equal rng-c-current-token ">>")
891 (rng-c-advance)
892 (if (rng-c-current-token-prefixed-name-p)
893 (rng-c-advance)
894 (rng-c-parse-identifier-or-keyword))
895 (rng-c-parse-annotation-body t))
896 obj)
897
898 (defun rng-c-parse-annotation-element ()
899 (rng-c-advance)
900 (rng-c-parse-annotation-body t))
901
902 ;; XXX need stricter checking of attribute names
903 ;; XXX don't allow attributes after text
904
905 (defun rng-c-parse-annotation-body (&optional allow-text)
906 "Current token is [. Parse up to matching ]. Current token after
907 parse is token following ]."
908 (or (string-equal rng-c-current-token "[")
909 (rng-c-error "Expected ["))
910 (rng-c-advance)
911 (while (not (string-equal rng-c-current-token "]"))
912 (cond ((rng-c-current-token-literal-p)
913 (or allow-text
914 (rng-c-error "Out of place text within annotation"))
915 (rng-c-parse-literal))
916 (t
917 (if (rng-c-current-token-prefixed-name-p)
918 (rng-c-advance)
919 (rng-c-parse-identifier-or-keyword))
920 (cond ((string-equal rng-c-current-token "[")
921 (rng-c-parse-annotation-body t))
922 ((string-equal rng-c-current-token "=")
923 (rng-c-advance)
924 (rng-c-parse-literal))
925 (t (rng-c-error "Expected = or ["))))))
926 (rng-c-advance))
927
928 (defun rng-c-advance-with (pattern)
929 (rng-c-advance)
930 pattern)
931
932 (defun rng-c-expect (str)
933 (or (string-equal rng-c-current-token str)
934 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
935 (rng-c-advance))
936
937 (provide 'rng-cmpct)
938
939 ;;; rng-cmpct.el
940
941 ;; arch-tag: 90395eb1-283b-4146-bbc1-6d6ef1704e57