(nxml-mode): Call rng-nxml-mode-init directly.
[bpt/emacs.git] / lisp / nxml / xmltok.el
1 ;;; xmltok.el --- XML tokenization
2
3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
4
5 ;; Author: James Clark
6 ;; Keywords: XML
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
24
25 ;;; Commentary:
26
27 ;; This implements an XML 1.0 parser. It also implements the XML
28 ;; Namespaces Recommendation. It is designed to be conforming, but it
29 ;; works a bit differently from a normal XML parser. An XML document
30 ;; consists of the prolog and an instance. The prolog is parsed as a
31 ;; single unit using `xmltok-forward-prolog'. The instance is
32 ;; considered as a sequence of tokens, where a token is something like
33 ;; a start-tag, a comment, a chunk of data or a CDATA section. The
34 ;; tokenization of the instance is stateless: the tokenization of one
35 ;; part of the instance does not depend on tokenization of the
36 ;; preceding part of the instance. This allows the instance to be
37 ;; parsed incrementally. The main entry point is `xmltok-forward':
38 ;; this can be called at any point in the instance provided it is
39 ;; between tokens. The other entry point is `xmltok-forward-special'
40 ;; which skips over tokens other comments, processing instructions or
41 ;; CDATA sections (i.e. the constructs in an instance that can contain
42 ;; less than signs that don't start a token).
43 ;;
44 ;; This is a non-validating XML 1.0 processor. It does not resolve
45 ;; parameter entities (including the external DTD subset) and it does
46 ;; not resolve external general entities.
47 ;;
48 ;; It is non-conformant by design in the following respects.
49 ;;
50 ;; 1. It expects the client to detect aspects of well-formedness that
51 ;; are not internal to a single token, specifically checking that
52 ;; end-tags match start-tags and that the instance contains exactly
53 ;; one element.
54 ;;
55 ;; 2. It expects the client to detect duplicate attributes. Detection
56 ;; of duplicate attributes after expansion of namespace prefixes
57 ;; requires the namespace processing state. Detection of duplicate
58 ;; attributes before expansion of namespace prefixes does not, but is
59 ;; redundant given that the client will do detection of duplicate
60 ;; attributes after expansion of namespace prefixes.
61 ;;
62 ;; 3. It allows the client to recover from well-formedness errors.
63 ;; This is essential for use in applications where the document is
64 ;; being parsed during the editing process.
65 ;;
66 ;; 4. It does not support documents that do not conform to the lexical
67 ;; requirements of the XML Namespaces Recommendation (e.g. a document
68 ;; with a colon in an entity name).
69 ;;
70 ;; There are also a number of things that have not yet been
71 ;; implemented that make it non-conformant.
72 ;;
73 ;; 1. It does not implement default attributes. ATTLIST declarations
74 ;; are parsed, but no checking is done on the content of attribute
75 ;; value literals specifying default attribute values, and default
76 ;; attribute values are not reported to the client.
77 ;;
78 ;; 2. It does not implement internal entities containing elements. If
79 ;; an internal entity is referenced and parsing its replacement text
80 ;; yields one or more tags, then it will skip the reference and
81 ;; report this to the client.
82 ;;
83 ;; 3. It does not check the syntax of public identifiers in the DTD.
84 ;;
85 ;; 4. It allows some non-ASCII characters in certain situations where
86 ;; it should not. For example, it only enforces XML 1.0's
87 ;; restrictions on name characters strictly for ASCII characters. The
88 ;; problem here is XML's character model is based squarely on Unicode,
89 ;; whereas Emacs's is not (as of version 21). It is not clear what
90 ;; the right thing to do is.
91
92 ;;; Code:
93
94 (defvar xmltok-type nil)
95 (defvar xmltok-start nil)
96 (defvar xmltok-name-colon nil)
97 (defvar xmltok-name-end nil)
98 (defvar xmltok-replacement nil
99 "String containing replacement for a character or entity reference.")
100
101 (defvar xmltok-attributes nil
102 "List containing attributes of last scanned element.
103 Each member of the list is a vector representing an attribute, which
104 can be accessed using the functions `xmltok-attribute-name-start',
105 `xmltok-attribute-name-colon', `xmltok-attribute-name-end',
106 `xmltok-attribute-value-start', `xmltok-attribute-value-end',
107 `xmltok-attribute-raw-normalized-value', `xmltok-attribute-refs'.")
108
109 (defvar xmltok-namespace-attributes nil
110 "List containing namespace declarations of last scanned element.
111 List has same format as `xmltok-attributes'.")
112
113 (defvar xmltok-dtd nil
114 "Information about the DTD used by `xmltok-forward'.
115 `xmltok-forward-prolog' sets this up.
116
117 It consists of an alist of general entity names vs definitions. The
118 first member of the alist is t if references to entities not in the
119 alist are well-formed \(e.g. because there's an external subset that
120 wasn't parsed).
121
122 Each general entity name is a string. The definition is either nil, a
123 symbol, a string, a cons cell. If the definition is nil, then it
124 means that it's an internal entity but the result of parsing it is
125 unknown. If it is a symbol, then the symbol is either `unparsed',
126 meaning the entity is an unparsed entity, `external', meaning the
127 entity is or references an external entity, `element', meaning the
128 entity includes one or more elements, or `not-well-formed', meaning
129 the replacement text is not well-formed. If the definition is a
130 string, then the replacement text of the entity is that string; this
131 happens only during the parsing of the prolog. If the definition is a
132 cons cell \(ER . AR), then ER specifies the string that results from
133 referencing the entity in element content and AR is either nil,
134 meaning the replacement text included a <, or a string which is the
135 normalized attribute value.")
136
137 (defvar xmltok-dependent-regions nil
138 "List of descriptors of regions that a parsed token depends on.
139
140 A token depends on a region if the region occurs after the token and a
141 change in the region may require the token to be reparsed. This only
142 happens with markup that is not well-formed. For example, if a <?
143 occurs without a matching ?>, then the <? is returned as a
144 not-well-formed token. However, this token is dependent on region
145 from the end of the token to the end of the buffer: if this ever
146 contains ?> then the buffer must be reparsed from the <?.
147
148 A region descriptor is a list (FUN START END ARG ...), where FUN is a
149 function to be called when the region changes, START and END are
150 integers giving the start and end of the region, and ARG... are
151 additional arguments to be passed to FUN. FUN will be called with 5
152 arguments followed by the additional arguments if any: the position of
153 the start of the changed area in the region, the position of the end
154 of the changed area in the region, the length of the changed area
155 before the change, the position of the start of the region, the
156 position of the end of the region. FUN must return non-nil if the
157 region needs reparsing. FUN will be called in a save-excursion with
158 match-data saved.
159
160 `xmltok-forward', `xmltok-forward-special' and `xmltok-forward-prolog'
161 may add entries to the beginning of this list, but will not clear it.
162 `xmltok-forward' and `xmltok-forward-special' will only add entries
163 when returning tokens of type not-well-formed.")
164
165 (defvar xmltok-errors nil
166 "List of errors detected by `xmltok-forward' and `xmltok-forward-prolog'.
167 When `xmltok-forward' and `xmltok-forward-prolog' detect a
168 well-formedness error, they will add an entry to the beginning of this
169 list. Each entry is a vector [MESSAGE START END], where MESSAGE is a
170 string giving the error message and START and END are integers
171 indicating the position of the error.")
172
173 (defmacro xmltok-save (&rest body)
174 `(let (xmltok-type
175 xmltok-start
176 xmltok-name-colon
177 xmltok-name-end
178 xmltok-replacement
179 xmltok-attributes
180 xmltok-namespace-attributes
181 xmltok-dependent-regions
182 xmltok-errors)
183 ,@body))
184
185 (put 'xmltok-save 'lisp-indent-function 0)
186 (def-edebug-spec xmltok-save t)
187
188 (defsubst xmltok-attribute-name-start (att)
189 (aref att 0))
190
191 (defsubst xmltok-attribute-name-colon (att)
192 (aref att 1))
193
194 (defsubst xmltok-attribute-name-end (att)
195 (aref att 2))
196
197 (defsubst xmltok-attribute-value-start (att)
198 (aref att 3))
199
200 (defsubst xmltok-attribute-value-end (att)
201 (aref att 4))
202
203 (defsubst xmltok-attribute-raw-normalized-value (att)
204 "Return an object representing the normalized value of ATT.
205 This can t indicating that the normalized value is the same as the
206 buffer substring from the start to the end of the value or nil
207 indicating that the value is not well-formed or a string."
208 (aref att 5))
209
210 (defsubst xmltok-attribute-refs (att)
211 "Return a list of the entity and character references in ATT.
212 Each member is a vector [TYPE START END] where TYPE is either char-ref
213 or entity-ref and START and END are integers giving the start and end
214 of the reference. Nested entity references are not included in the list."
215 (aref att 6))
216
217 (defun xmltok-attribute-prefix (att)
218 (let ((colon (xmltok-attribute-name-colon att)))
219 (and colon
220 (buffer-substring-no-properties (xmltok-attribute-name-start att)
221 colon))))
222
223 (defun xmltok-attribute-local-name (att)
224 (let ((colon (xmltok-attribute-name-colon att)))
225 (buffer-substring-no-properties (if colon
226 (1+ colon)
227 (xmltok-attribute-name-start att))
228 (xmltok-attribute-name-end att))))
229
230 (defun xmltok-attribute-value (att)
231 (let ((rnv (xmltok-attribute-raw-normalized-value att)))
232 (and rnv
233 (if (stringp rnv)
234 rnv
235 (buffer-substring-no-properties (xmltok-attribute-value-start att)
236 (xmltok-attribute-value-end att))))))
237
238 (defun xmltok-start-tag-prefix ()
239 (and xmltok-name-colon
240 (buffer-substring-no-properties (1+ xmltok-start)
241 xmltok-name-colon)))
242
243 (defun xmltok-start-tag-local-name ()
244 (buffer-substring-no-properties (1+ (or xmltok-name-colon
245 xmltok-start))
246 xmltok-name-end))
247
248 (defun xmltok-end-tag-prefix ()
249 (and xmltok-name-colon
250 (buffer-substring-no-properties (+ 2 xmltok-start)
251 xmltok-name-colon)))
252
253 (defun xmltok-end-tag-local-name ()
254 (buffer-substring-no-properties (if xmltok-name-colon
255 (1+ xmltok-name-colon)
256 (+ 2 xmltok-start))
257 xmltok-name-end))
258
259 (defun xmltok-start-tag-qname ()
260 (buffer-substring-no-properties (+ xmltok-start 1) xmltok-name-end))
261
262 (defun xmltok-end-tag-qname ()
263 (buffer-substring-no-properties (+ xmltok-start 2) xmltok-name-end))
264
265 (defsubst xmltok-make-attribute (name-begin
266 name-colon
267 name-end
268 &optional
269 value-begin
270 value-end
271 raw-normalized-value)
272 "Make an attribute. RAW-NORMALIZED-VALUE is nil if the value is
273 not well-formed, t if the normalized value is the string between
274 VALUE-BEGIN and VALUE-END, otherwise a STRING giving the value."
275 (vector name-begin
276 name-colon
277 name-end
278 value-begin
279 value-end
280 raw-normalized-value
281 nil))
282
283 (defsubst xmltok-error-message (err)
284 (aref err 0))
285
286 (defsubst xmltok-error-start (err)
287 (aref err 1))
288
289 (defsubst xmltok-error-end (err)
290 (aref err 2))
291
292 (defsubst xmltok-make-error (message start end)
293 (vector message start end))
294
295 (defun xmltok-add-error (message &optional start end)
296 (setq xmltok-errors
297 (cons (xmltok-make-error message
298 (or start xmltok-start)
299 (or end (point)))
300 xmltok-errors)))
301
302 (defun xmltok-add-dependent (fun &optional start end &rest args)
303 (setq xmltok-dependent-regions
304 (cons (cons fun
305 (cons (or start xmltok-start)
306 (cons (or end (point-max))
307 args)))
308 xmltok-dependent-regions)))
309
310 (defun xmltok-forward ()
311 (setq xmltok-start (point))
312 (let* ((case-fold-search nil)
313 (space-count (skip-chars-forward " \t\r\n"))
314 (ch (char-after)))
315 (cond ((eq ch ?\<)
316 (cond ((> space-count 0)
317 (setq xmltok-type 'space))
318 (t
319 (goto-char (1+ (point)))
320 (xmltok-scan-after-lt))))
321 ((eq ch ?\&)
322 (cond ((> space-count 0)
323 (setq xmltok-type 'space))
324 (t
325 (goto-char (1+ (point)))
326 (xmltok-scan-after-amp
327 (lambda (start end)
328 (xmltok-handle-entity start end))))))
329 ((re-search-forward "[<&]\\|\\(]]>\\)" nil t)
330 (cond ((not (match-beginning 1))
331 (goto-char (match-beginning 0))
332 ;; must have got a non-space char
333 (setq xmltok-type 'data))
334 ((= (match-beginning 1) xmltok-start)
335 (xmltok-add-error "Found `]]>' not closing a CDATA section")
336 (setq xmltok-type 'not-well-formed))
337 (t
338 (goto-char (match-beginning 0))
339 (setq xmltok-type
340 (if (= (point) (+ xmltok-start space-count))
341 'space
342 'data)))))
343 ((eq ch nil)
344 (setq xmltok-type
345 (if (> space-count 0)
346 'space
347 nil)))
348 (t
349 (goto-char (point-max))
350 (setq xmltok-type 'data)))))
351
352 (defun xmltok-forward-special (bound)
353 "Scan forward past the first special token starting at or after point.
354 Return nil if there is no special token that starts before BOUND.
355 CDATA sections, processing instructions and comments (and indeed
356 anything starting with < following by ? or !) count
357 as special. Return the type of the token."
358 (when (re-search-forward "<[?!]" (1+ bound) t)
359 (setq xmltok-start (match-beginning 0))
360 (goto-char (1+ xmltok-start))
361 (let ((case-fold-search nil))
362 (xmltok-scan-after-lt))))
363
364 (eval-when-compile
365
366 ;; A symbolic regexp is represented by a list whose CAR is the string
367 ;; containing the regexp and whose cdr is a list of symbolic names
368 ;; for the groups in the string.
369
370 ;; Construct a symbolic regexp from a regexp.
371 (defun xmltok-r (str)
372 (cons str nil))
373
374 ;; Concatenate zero of more regexps and symbolic regexps.
375 (defun xmltok+ (&rest args)
376 (let (strs names)
377 (while args
378 (let ((arg (car args)))
379 (if (stringp arg)
380 (setq strs (cons arg strs))
381 (setq strs (cons (car arg) strs))
382 (setq names (cons (cdr arg) names)))
383 (setq args (cdr args))))
384 (cons (apply 'concat (nreverse strs))
385 (apply 'append (nreverse names))))))
386
387 (eval-when-compile
388 ;; Make a symbolic group named NAME from the regexp R.
389 ;; R may be a symbolic regexp or an ordinary regexp.
390 (defmacro xmltok-g (name &rest r)
391 (let ((sym (make-symbol "r")))
392 `(let ((,sym (xmltok+ ,@r)))
393 (if (stringp ,sym)
394 (cons (concat "\\(" ,sym "\\)") (cons ',name nil))
395 (cons (concat "\\(" (car ,sym) "\\)") (cons ',name (cdr ,sym)))))))
396
397 (defun xmltok-p (&rest r) (xmltok+ "\\(?:"
398 (apply 'xmltok+ r)
399 "\\)"))
400
401 ;; Get the group index of ELEM in a LIST of symbols.
402 (defun xmltok-get-index (elem list)
403 (or elem
404 (error "Missing group name"))
405 (let ((found nil)
406 (i 1))
407 (while list
408 (cond ((eq elem (car list))
409 (setq found i)
410 (setq list nil))
411 (t
412 (setq i (1+ i))
413 (setq list (cdr list)))))
414 (or found
415 (error "Bad group name %s" elem))))
416
417 ;; Define a macro SYM using a symbolic regexp R.
418 ;; SYM can be called in three ways:
419 ;; (SYM regexp)
420 ;; expands to the regexp in R
421 ;; (SYM start G)
422 ;; expands to
423 ;; (match-beginning N)
424 ;; where N is the group index of G in R.
425 ;; (SYM end G)
426 ;; expands to
427 ;; (match-end N)
428 ;; where N is the group index of G in R.
429 (defmacro xmltok-defregexp (sym r)
430 `(defalias ',sym
431 (let ((r ,r))
432 `(macro lambda (action &optional group-name)
433 (cond ((eq action 'regexp)
434 ,(car r))
435 ((or (eq action 'start) (eq action 'beginning))
436 (list 'match-beginning (xmltok-get-index group-name
437 ',(cdr r))))
438 ((eq action 'end)
439 (list 'match-end (xmltok-get-index group-name
440 ',(cdr r))))
441 ((eq action 'string)
442 (list 'match-string
443 (xmltok-get-index group-name ',(cdr r))))
444 ((eq action 'string-no-properties)
445 (list 'match-string-no-properties
446 (xmltok-get-index group-name ',(cdr r))))
447 (t (error "Invalid action: %s" action))))))))
448
449
450 (eval-when-compile
451 (let* ((or "\\|")
452 (open "\\(?:")
453 (gopen "\\(")
454 (close "\\)")
455 (name-start-char "[_[:alpha:]]")
456 (name-continue-not-start-char "[-.[:digit:]]")
457 (name-continue-char "[-._[:alnum:]]")
458 (* "*")
459 (+ "+")
460 (opt "?")
461 (question "\\?")
462 (s "[ \r\t\n]")
463 (s+ (concat s +))
464 (s* (concat s *))
465 (ncname (concat name-start-char name-continue-char *))
466 (entity-ref
467 (xmltok+ (xmltok-g entity-name ncname)
468 (xmltok-g entity-ref-close ";") opt))
469 (decimal-ref
470 (xmltok+ (xmltok-g decimal "[0-9]" +)
471 (xmltok-g decimal-ref-close ";") opt))
472 (hex-ref
473 (xmltok+ "x" open
474 (xmltok-g hex "[0-9a-fA-F]" +)
475 (xmltok-g hex-ref-close ";") opt
476 close opt))
477 (char-ref
478 (xmltok+ (xmltok-g number-sign "#")
479 open decimal-ref or hex-ref close opt))
480 (start-tag-close
481 (xmltok+ open (xmltok-g start-tag-close s* ">")
482 or open (xmltok-g empty-tag-slash s* "/")
483 (xmltok-g empty-tag-close ">") opt close
484 or (xmltok-g start-tag-s s+)
485 close))
486 (start-tag
487 (xmltok+ (xmltok-g start-tag-name
488 ncname (xmltok-g start-tag-colon ":" ncname) opt)
489 start-tag-close opt))
490 (end-tag
491 (xmltok+ (xmltok-g end-tag-slash "/")
492 open (xmltok-g end-tag-name
493 ncname
494 (xmltok-g end-tag-colon ":" ncname) opt)
495 (xmltok-g end-tag-close s* ">") opt
496 close opt))
497 (comment
498 (xmltok+ (xmltok-g markup-declaration "!")
499 (xmltok-g comment-first-dash "-"
500 (xmltok-g comment-open "-") opt) opt))
501 (cdata-section
502 (xmltok+ "!"
503 (xmltok-g marked-section-open "\\[")
504 open "C"
505 open "D"
506 open "A"
507 open "T"
508 open "A"
509 (xmltok-g cdata-section-open "\\[" ) opt
510 close opt ; A
511 close opt ; T
512 close opt ; A
513 close opt ; D
514 close opt)) ; C
515 (processing-instruction
516 (xmltok-g processing-instruction-question question)))
517
518 (xmltok-defregexp xmltok-ncname (xmltok+ open ncname close))
519
520 (xmltok-defregexp xmltok-after-amp
521 (xmltok+ entity-ref or char-ref))
522 (xmltok-defregexp xmltok-after-lt
523 (xmltok+ start-tag
524 or end-tag
525 ;; cdata-section must come before comment
526 ;; because we treat <! as a comment
527 ;; and Emacs doesn't do fully greedy matching
528 ;; by default
529 or cdata-section
530 or comment
531 or processing-instruction))
532 (xmltok-defregexp
533 xmltok-attribute
534 (let* ((lit1
535 (xmltok+ "'"
536 "[^<'&\r\n\t]*"
537 (xmltok-g complex1 "[&\r\n\t][^<']*") opt
538 "'"))
539 (lit2 (cons (replace-regexp-in-string "'" "\"" (car lit1))
540 '(complex2)))
541 (literal (xmltok-g literal lit1 or lit2))
542 (name (xmltok+ open (xmltok-g xmlns "xmlns") or ncname close
543 (xmltok-g colon ":" ncname) opt)))
544 (xmltok+ (xmltok-g name name)
545 s* "="
546 ;; If the literal isn't followed by what it should be,
547 ;; then the closing delimiter is probably really the
548 ;; opening delimiter of another literal, so don't
549 ;; absorb the literal in this case.
550 open s* literal start-tag-close close opt)))
551 (xmltok-defregexp
552 xmltok-xml-declaration
553 (let* ((literal-content "[-._:a-zA-Z0-9]+")
554 (literal
555 (concat open "\"" literal-content "\""
556 or "'" literal-content "'" close))
557 (version-att
558 (xmltok+ open
559 s+ (xmltok-g version-name "version")
560 s* "="
561 s* (xmltok-g version-value literal)
562 close opt))
563 (encoding-att
564 (xmltok+ open
565 s+ (xmltok-g encoding-name "encoding")
566 s* "="
567 s* (xmltok-g encoding-value literal)
568 close opt))
569 (yes-no
570 (concat open "yes" or "no" close))
571 (standalone-att
572 (xmltok+ open
573 s+ (xmltok-g standalone-name "standalone")
574 s* "="
575 s* (xmltok-g standalone-value
576 "\"" yes-no "\"" or "'" yes-no "'")
577 close opt)))
578 (xmltok+ "<" question "xml"
579 version-att
580 encoding-att
581 standalone-att
582 s* question ">")))
583 (xmltok-defregexp
584 xmltok-prolog
585 (let* ((single-char (xmltok-g single-char "[[|,(\"'>]"))
586 (internal-subset-close (xmltok-g internal-subset-close
587 "][ \t\r\n]*>"))
588 (starts-with-close-paren
589 (xmltok-g close-paren
590 ")"
591 (xmltok-p
592 (xmltok-g close-paren-occur "[+?]")
593 or
594 (xmltok-g close-paren-star "\\*"))
595 opt))
596 (starts-with-percent
597 (xmltok-g percent
598 "%" (xmltok-g param-entity-ref
599 ncname
600 (xmltok-g param-entity-ref-close
601 ";") opt) opt))
602 (starts-with-nmtoken-not-name
603 (xmltok-g nmtoken
604 (xmltok-p name-continue-not-start-char or ":")
605 (xmltok-p name-continue-char or ":") *))
606 (nmtoken-after-colon
607 (xmltok+
608 (xmltok-p name-continue-not-start-char or ":")
609 (xmltok-p name-continue-char or ":") *
610 or
611 name-start-char
612 name-continue-char *
613 ":"
614 (xmltok-p name-continue-char or ":") *))
615 (after-ncname
616 (xmltok+ (xmltok-g ncname-nmtoken
617 ":" (xmltok-p nmtoken-after-colon))
618 or (xmltok-p (xmltok-g colon ":" ncname)
619 (xmltok-g colon-name-occur "[?+*]") opt)
620 or (xmltok-g ncname-occur "[?+*]")
621 or (xmltok-g ncname-colon ":")))
622 (starts-with-name
623 (xmltok-g name ncname (xmltok-p after-ncname) opt))
624 (starts-with-hash
625 (xmltok-g pound
626 "#" (xmltok-g hash-name ncname)))
627 (markup-declaration
628 (xmltok-g markup-declaration
629 "!" (xmltok-p (xmltok-g comment-first-dash "-"
630 (xmltok-g comment-open "-") opt)
631 or (xmltok-g named-markup-declaration
632 ncname)) opt))
633 (after-lt
634 (xmltok+ markup-declaration
635 or (xmltok-g processing-instruction-question
636 question)
637 or (xmltok-g instance-start
638 ncname)))
639 (starts-with-lt (xmltok-g less-than "<" (xmltok-p after-lt) opt)))
640 (xmltok+ starts-with-lt
641 or single-char
642 or starts-with-close-paren
643 or starts-with-percent
644 or starts-with-name
645 or starts-with-nmtoken-not-name
646 or starts-with-hash
647 or internal-subset-close)))))
648
649 (defconst xmltok-ncname-regexp (xmltok-ncname regexp))
650
651 (defun xmltok-scan-after-lt ()
652 (cond ((not (looking-at (xmltok-after-lt regexp)))
653 (xmltok-add-error "`<' that is not markup must be entered as `&lt;'")
654 (setq xmltok-type 'not-well-formed))
655 (t
656 (goto-char (match-end 0))
657 (cond ((xmltok-after-lt start start-tag-close)
658 (setq xmltok-name-end
659 (xmltok-after-lt end start-tag-name))
660 (setq xmltok-name-colon
661 (xmltok-after-lt start start-tag-colon))
662 (setq xmltok-attributes nil)
663 (setq xmltok-namespace-attributes nil)
664 (setq xmltok-type 'start-tag))
665 ((xmltok-after-lt start end-tag-close)
666 (setq xmltok-name-end
667 (xmltok-after-lt end end-tag-name))
668 (setq xmltok-name-colon
669 (xmltok-after-lt start end-tag-colon))
670 (setq xmltok-type 'end-tag))
671 ((xmltok-after-lt start start-tag-s)
672 (setq xmltok-name-end
673 (xmltok-after-lt end start-tag-name))
674 (setq xmltok-name-colon
675 (xmltok-after-lt start start-tag-colon))
676 (setq xmltok-namespace-attributes nil)
677 (setq xmltok-attributes nil)
678 (xmltok-scan-attributes)
679 xmltok-type)
680 ((xmltok-after-lt start empty-tag-close)
681 (setq xmltok-name-end
682 (xmltok-after-lt end start-tag-name))
683 (setq xmltok-name-colon
684 (xmltok-after-lt start start-tag-colon))
685 (setq xmltok-attributes nil)
686 (setq xmltok-namespace-attributes nil)
687 (setq xmltok-type 'empty-element))
688 ((xmltok-after-lt start cdata-section-open)
689 (setq xmltok-type
690 (if (search-forward "]]>" nil t)
691 'cdata-section
692 (xmltok-add-error "No closing ]]>")
693 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
694 nil
695 nil
696 "]]>")
697 'not-well-formed)))
698 ((xmltok-after-lt start processing-instruction-question)
699 (xmltok-scan-after-processing-instruction-open))
700 ((xmltok-after-lt start comment-open)
701 (xmltok-scan-after-comment-open))
702 ((xmltok-after-lt start empty-tag-slash)
703 (setq xmltok-name-end
704 (xmltok-after-lt end start-tag-name))
705 (setq xmltok-name-colon
706 (xmltok-after-lt start start-tag-colon))
707 (setq xmltok-attributes nil)
708 (setq xmltok-namespace-attributes nil)
709 (xmltok-add-error "Expected `/>'" (1- (point)))
710 (setq xmltok-type 'partial-empty-element))
711 ((xmltok-after-lt start start-tag-name)
712 (xmltok-add-error "Missing `>'"
713 nil
714 (1+ xmltok-start))
715 (setq xmltok-name-end
716 (xmltok-after-lt end start-tag-name))
717 (setq xmltok-name-colon
718 (xmltok-after-lt start start-tag-colon))
719 (setq xmltok-namespace-attributes nil)
720 (setq xmltok-attributes nil)
721 (setq xmltok-type 'partial-start-tag))
722 ((xmltok-after-lt start end-tag-name)
723 (setq xmltok-name-end (xmltok-after-lt end end-tag-name))
724 (setq xmltok-name-colon
725 (xmltok-after-lt start end-tag-colon))
726 (cond ((and (not xmltok-name-colon)
727 (eq (char-after) ?:))
728 (goto-char (1+ (point)))
729 (xmltok-add-error "Expected name following `:'"
730 (1- (point))))
731 (t
732 (xmltok-add-error "Missing `>'"
733 nil
734 (1+ xmltok-start))))
735 (setq xmltok-type 'partial-end-tag))
736 ((xmltok-after-lt start end-tag-slash)
737 (xmltok-add-error "Expected name following `</'")
738 (setq xmltok-name-end nil)
739 (setq xmltok-name-colon nil)
740 (setq xmltok-type 'partial-end-tag))
741 ((xmltok-after-lt start marked-section-open)
742 (xmltok-add-error "Expected `CDATA[' after `<!['"
743 xmltok-start
744 (+ 3 xmltok-start))
745 (setq xmltok-type 'not-well-formed))
746 ((xmltok-after-lt start comment-first-dash)
747 (xmltok-add-error "Expected `-' after `<!-'"
748 xmltok-start
749 (+ 3 xmltok-start))
750 (setq xmltok-type 'not-well-formed))
751 ((xmltok-after-lt start markup-declaration)
752 (xmltok-add-error "Expected `[CDATA[' or `--' after `<!'"
753 xmltok-start
754 (+ 2 xmltok-start))
755 (setq xmltok-type 'not-well-formed))
756 (t
757 (xmltok-add-error "Not well-formed")
758 (setq xmltok-type 'not-well-formed))))))
759
760 ;; XXX This should be unified with
761 ;; xmltok-scan-prolog-after-processing-instruction-open
762 ;; XXX maybe should include rest of line (up to any <,>) in unclosed PI
763 (defun xmltok-scan-after-processing-instruction-open ()
764 (cond ((not (search-forward "?>" nil t))
765 (xmltok-add-error "No closing ?>"
766 xmltok-start
767 (+ xmltok-start 2))
768 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
769 nil
770 nil
771 "?>")
772 (setq xmltok-type 'not-well-formed))
773 (t
774 (cond ((not (save-excursion
775 (goto-char (+ 2 xmltok-start))
776 (and (looking-at (xmltok-ncname regexp))
777 (setq xmltok-name-end (match-end 0)))))
778 (setq xmltok-name-end (+ xmltok-start 2))
779 (xmltok-add-error "<? not followed by name"
780 (+ xmltok-start 2)
781 (+ xmltok-start 3)))
782 ((not (or (memq (char-after xmltok-name-end)
783 '(?\n ?\t ?\r ? ))
784 (= xmltok-name-end (- (point) 2))))
785 (xmltok-add-error "Target not followed by whitespace"
786 xmltok-name-end
787 (1+ xmltok-name-end)))
788 ((and (= xmltok-name-end (+ xmltok-start 5))
789 (save-excursion
790 (goto-char (+ xmltok-start 2))
791 (let ((case-fold-search t))
792 (looking-at "xml"))))
793 (xmltok-add-error "Processing instruction target is xml"
794 (+ xmltok-start 2)
795 (+ xmltok-start 5))))
796 (setq xmltok-type 'processing-instruction))))
797
798 (defun xmltok-scan-after-comment-open ()
799 (setq xmltok-type
800 (cond ((not (search-forward "--" nil t))
801 (xmltok-add-error "No closing -->")
802 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
803 nil
804 nil
805 ;; not --> because
806 ;; -- is not allowed
807 ;; in comments in XML
808 "--")
809 'not-well-formed)
810 ((eq (char-after) ?>)
811 (goto-char (1+ (point)))
812 'comment)
813 (t
814 (xmltok-add-dependent
815 'xmltok-semi-closed-reparse-p
816 nil
817 (point)
818 "--"
819 2)
820 ;; just include the <!-- in the token
821 (goto-char (+ xmltok-start 4))
822 ;; Need do this after the goto-char because
823 ;; marked error should just apply to <!--
824 (xmltok-add-error "First following `--' not followed by `>'")
825 'not-well-formed))))
826
827 (defun xmltok-scan-attributes ()
828 (let ((recovering nil)
829 (atts-needing-normalization nil))
830 (while (cond ((or (looking-at (xmltok-attribute regexp))
831 ;; use non-greedy group
832 (when (looking-at (concat "[^<>\n]+?"
833 (xmltok-attribute regexp)))
834 (unless recovering
835 (xmltok-add-error "Malformed attribute"
836 (point)
837 (save-excursion
838 (goto-char (xmltok-attribute start
839 name))
840 (skip-chars-backward "\r\n\t ")
841 (point))))
842 t))
843 (setq recovering nil)
844 (goto-char (match-end 0))
845 (let ((att (xmltok-add-attribute)))
846 (when att
847 (setq atts-needing-normalization
848 (cons att atts-needing-normalization))))
849 (cond ((xmltok-attribute start start-tag-s) t)
850 ((xmltok-attribute start start-tag-close)
851 (setq xmltok-type 'start-tag)
852 nil)
853 ((xmltok-attribute start empty-tag-close)
854 (setq xmltok-type 'empty-element)
855 nil)
856 ((xmltok-attribute start empty-tag-slash)
857 (setq xmltok-type 'partial-empty-element)
858 (xmltok-add-error "Expected `/>'"
859 (1- (point)))
860 nil)
861 ((looking-at "[ \t\r\n]*[\"']")
862 (goto-char (match-end 0))
863 (xmltok-add-error "Missing closing delimiter"
864 (1- (point)))
865 (setq recovering t)
866 t)
867 ((looking-at "[ \t]*\\([^ \t\r\n\"'=<>/]+\\)[ \t\r\n/>]")
868 (goto-char (match-end 1))
869 (xmltok-add-error "Attribute value not quoted"
870 (match-beginning 1))
871 (setq recovering t)
872 t)
873 (t
874 (xmltok-add-error "Missing attribute value"
875 (1- (point)))
876 (setq recovering t)
877 t)))
878 ((looking-at "[^<>\n]*/>")
879 (let ((start (point)))
880 (goto-char (match-end 0))
881 (unless recovering
882 (xmltok-add-error "Malformed empty-element"
883 start
884 (- (point) 2))))
885 (setq xmltok-type 'empty-element)
886 nil)
887 ((looking-at "[^<>\n]*>")
888 (let ((start (point)))
889 (goto-char (match-end 0))
890 (unless recovering
891 (xmltok-add-error "Malformed start-tag"
892 start
893 (1- (point)))))
894 (setq xmltok-type 'start-tag)
895 nil)
896 (t
897 (when recovering
898 (skip-chars-forward "^<>\n"))
899 (xmltok-add-error "Missing `>'"
900 xmltok-start
901 (1+ xmltok-start))
902 (setq xmltok-type 'partial-start-tag)
903 nil)))
904 (while atts-needing-normalization
905 (xmltok-normalize-attribute (car atts-needing-normalization))
906 (setq atts-needing-normalization (cdr atts-needing-normalization))))
907 (setq xmltok-attributes
908 (nreverse xmltok-attributes))
909 (setq xmltok-namespace-attributes
910 (nreverse xmltok-namespace-attributes)))
911
912 (defun xmltok-add-attribute ()
913 "Return the attribute if it needs normalizing, otherwise nil."
914 (let* ((needs-normalizing nil)
915 (att
916 (if (xmltok-attribute start literal)
917 (progn
918 (setq needs-normalizing
919 (or (xmltok-attribute start complex1)
920 (xmltok-attribute start complex2)))
921 (xmltok-make-attribute (xmltok-attribute start name)
922 (xmltok-attribute start colon)
923 (xmltok-attribute end name)
924 (1+ (xmltok-attribute start literal))
925 (1- (xmltok-attribute end literal))
926 (not needs-normalizing)))
927 (xmltok-make-attribute (xmltok-attribute start name)
928 (xmltok-attribute start colon)
929 (xmltok-attribute end name)))))
930 (if (xmltok-attribute start xmlns)
931 (setq xmltok-namespace-attributes
932 (cons att xmltok-namespace-attributes))
933 (setq xmltok-attributes
934 (cons att xmltok-attributes)))
935 (and needs-normalizing
936 att)))
937
938 (defun xmltok-normalize-attribute (att)
939 (let ((end (xmltok-attribute-value-end att))
940 (well-formed t)
941 (value-parts nil)
942 (refs nil))
943 (save-excursion
944 (goto-char (xmltok-attribute-value-start att))
945 (while (progn
946 (let ((n (skip-chars-forward "^\r\t\n&" end)))
947 (when (> n 0)
948 (setq value-parts
949 (cons (buffer-substring-no-properties (- (point) n)
950 (point))
951 value-parts))))
952 (when (< (point) end)
953 (goto-char (1+ (point)))
954 (cond ((eq (char-before) ?\&)
955 (let ((xmltok-start (1- (point)))
956 xmltok-type xmltok-replacement)
957 (xmltok-scan-after-amp
958 (lambda (start end)
959 (xmltok-handle-entity start end t)))
960 (cond ((or (eq xmltok-type 'char-ref)
961 (eq xmltok-type 'entity-ref))
962 (setq refs
963 (cons (vector xmltok-type
964 xmltok-start
965 (point))
966 refs))
967 (if xmltok-replacement
968 (setq value-parts
969 (cons xmltok-replacement
970 value-parts))
971 (setq well-formed nil)))
972 (t (setq well-formed nil)))))
973 (t (setq value-parts
974 (cons " " value-parts)))))
975 (< (point) end))))
976 (when well-formed
977 (aset att 5 (apply 'concat (nreverse value-parts))))
978 (aset att 6 (nreverse refs))))
979
980 (defun xmltok-scan-after-amp (entity-handler)
981 (cond ((not (looking-at (xmltok-after-amp regexp)))
982 (xmltok-add-error "`&' that is not markup must be entered as `&amp;'")
983 (setq xmltok-type 'not-well-formed))
984 (t
985 (goto-char (match-end 0))
986 (cond ((xmltok-after-amp start entity-ref-close)
987 (funcall entity-handler
988 (xmltok-after-amp start entity-name)
989 (xmltok-after-amp end entity-name))
990 (setq xmltok-type 'entity-ref))
991 ((xmltok-after-amp start decimal-ref-close)
992 (xmltok-scan-char-ref (xmltok-after-amp start decimal)
993 (xmltok-after-amp end decimal)
994 10))
995 ((xmltok-after-amp start hex-ref-close)
996 (xmltok-scan-char-ref (xmltok-after-amp start hex)
997 (xmltok-after-amp end hex)
998 16))
999 ((xmltok-after-amp start number-sign)
1000 (xmltok-add-error "Missing character number")
1001 (setq xmltok-type 'not-well-formed))
1002 (t
1003 (xmltok-add-error "Missing closing `;'")
1004 (setq xmltok-type 'not-well-formed))))))
1005
1006 (defconst xmltok-entity-error-messages
1007 '((unparsed . "Referenced entity is unparsed")
1008 (not-well-formed . "Referenced entity is not well-formed")
1009 (external nil . "Referenced entity is external")
1010 (element nil . "Referenced entity contains <")))
1011
1012 (defun xmltok-handle-entity (start end &optional attributep)
1013 (let* ((name (buffer-substring-no-properties start end))
1014 (name-def (assoc name xmltok-dtd))
1015 (def (cdr name-def)))
1016 (cond ((setq xmltok-replacement (and (consp def)
1017 (if attributep
1018 (cdr def)
1019 (car def)))))
1020 ((null name-def)
1021 (unless (eq (car xmltok-dtd) t)
1022 (xmltok-add-error "Referenced entity has not been defined"
1023 start
1024 end)))
1025 ((and attributep (consp def))
1026 (xmltok-add-error "Referenced entity contains <"
1027 start
1028 end))
1029 (t
1030 (let ((err (cdr (assq def xmltok-entity-error-messages))))
1031 (when (consp err)
1032 (setq err (if attributep (cdr err) (car err))))
1033 (when err
1034 (xmltok-add-error err start end)))))))
1035
1036 (defun xmltok-scan-char-ref (start end base)
1037 (setq xmltok-replacement
1038 (let ((n (string-to-number (buffer-substring-no-properties start end)
1039 base)))
1040 (cond ((and (integerp n) (xmltok-valid-char-p n))
1041 (setq n (xmltok-unicode-to-char n))
1042 (and n (string n)))
1043 (t
1044 (xmltok-add-error "Invalid character code" start end)
1045 nil))))
1046 (setq xmltok-type 'char-ref))
1047
1048 (defun xmltok-char-number (start end)
1049 (let* ((base (if (eq (char-after (+ start 2)) ?x)
1050 16
1051 10))
1052 (n (string-to-number
1053 (buffer-substring-no-properties (+ start (if (= base 16) 3 2))
1054 (1- end))
1055 base)))
1056 (and (integerp n)
1057 (xmltok-valid-char-p n)
1058 n)))
1059
1060 (defun xmltok-unclosed-reparse-p (change-start
1061 change-end
1062 pre-change-length
1063 start
1064 end
1065 delimiter)
1066 (let ((len-1 (1- (length delimiter))))
1067 (goto-char (max start (- change-start len-1)))
1068 (search-forward delimiter (min end (+ change-end len-1)) t)))
1069
1070 ;; Handles a <!-- with the next -- not followed by >
1071
1072 (defun xmltok-semi-closed-reparse-p (change-start
1073 change-end
1074 pre-change-length
1075 start
1076 end
1077 delimiter
1078 delimiter-length)
1079 (or (<= (- end delimiter-length) change-end)
1080 (xmltok-unclosed-reparse-p change-start
1081 change-end
1082 pre-change-length
1083 start
1084 end
1085 delimiter)))
1086
1087 (defun xmltok-valid-char-p (n)
1088 "Return non-nil if n is the Unicode code of a valid XML character."
1089 (cond ((< n #x20) (memq n '(#xA #xD #x9)))
1090 ((< n #xD800) t)
1091 ((< n #xE000) nil)
1092 ((< n #xFFFE) t)
1093 (t (and (> n #xFFFF)
1094 (< n #x110000)))))
1095
1096 (defun xmltok-unicode-to-char (n)
1097 "Return the character corresponding to Unicode scalar value N.
1098 Return nil if unsupported in Emacs."
1099 (decode-char 'ucs n))
1100
1101 ;;; Prolog parsing
1102
1103 (defvar xmltok-contains-doctype nil)
1104 (defvar xmltok-doctype-external-subset-flag nil)
1105 (defvar xmltok-internal-subset-start nil)
1106 (defvar xmltok-had-param-entity-ref nil)
1107 (defvar xmltok-prolog-regions nil)
1108 (defvar xmltok-standalone nil
1109 "Non-nil if there was an XML declaration specifying standalone=\"yes\",")
1110 (defvar xmltok-markup-declaration-doctype-flag nil)
1111
1112 (defconst xmltok-predefined-entity-alist
1113 '(("lt" "<" . "<")
1114 ("gt" ">" . ">")
1115 ("amp" "&" . "&")
1116 ("apos" "'" . "'")
1117 ("quot" "\"" . "\"")))
1118
1119 (defun xmltok-forward-prolog ()
1120 "Move forward to the end of the XML prolog.
1121
1122 Returns a list of vectors [TYPE START END] where TYPE is a symbol and
1123 START and END are integers giving the start and end of the region of
1124 that type. TYPE can be one of xml-declaration,
1125 xml-declaration-attribute-name, xml-declaration-attribute-value,
1126 comment, processing-instruction-left, processing-instruction-right,
1127 markup-declaration-open. markup-declaration-close,
1128 internal-subset-open, internal-subset-close, hash-name, keyword,
1129 literal, encoding-name.
1130 Adds to `xmltok-errors' and `xmltok-dependent-regions' as appropriate."
1131 (let ((case-fold-search nil)
1132 xmltok-start
1133 xmltok-type
1134 xmltok-prolog-regions
1135 xmltok-contains-doctype
1136 xmltok-internal-subset-start
1137 xmltok-had-param-entity-ref
1138 xmltok-standalone
1139 xmltok-doctype-external-subset-flag
1140 xmltok-markup-declaration-doctype-flag)
1141 (setq xmltok-dtd xmltok-predefined-entity-alist)
1142 (xmltok-scan-xml-declaration)
1143 (xmltok-next-prolog-token)
1144 (while (condition-case err
1145 (when (xmltok-parse-prolog-item)
1146 (xmltok-next-prolog-token))
1147 (xmltok-markup-declaration-parse-error
1148 (xmltok-skip-markup-declaration))))
1149 (when xmltok-internal-subset-start
1150 (xmltok-add-error "No closing ]"
1151 (1- xmltok-internal-subset-start)
1152 xmltok-internal-subset-start))
1153 (xmltok-parse-entities)
1154 ;; XXX prune dependent-regions for those entirely in prolog
1155 (nreverse xmltok-prolog-regions)))
1156
1157 (defconst xmltok-bad-xml-decl-regexp
1158 "[ \t\r\n]*<\\?xml\\(?:[ \t\r\n]\\|\\?>\\)")
1159
1160 ;;;###autoload
1161 (defun xmltok-get-declared-encoding-position (&optional limit)
1162 "Return the position of the encoding in the XML declaration at point.
1163 If there is a well-formed XML declaration starting at point and it
1164 contains an encoding declaration, then return (START . END)
1165 where START and END are the positions of the start and the end
1166 of the encoding name; if there is no encoding declaration return
1167 the position where and encoding declaration could be inserted.
1168 If there is XML that is not well-formed that looks like an XML declaration,
1169 return nil. Otherwise, return t.
1170 If LIMIT is non-nil, then do not consider characters beyond LIMIT."
1171 (cond ((let ((case-fold-search nil))
1172 (and (looking-at (xmltok-xml-declaration regexp))
1173 (or (not limit) (<= (match-end 0) limit))))
1174 (let ((end (xmltok-xml-declaration end encoding-value)))
1175 (if end
1176 (cons (1+ (xmltok-xml-declaration start encoding-value))
1177 (1- end))
1178 (or (xmltok-xml-declaration end version-value)
1179 (+ (point) 5)))))
1180 ((not (let ((case-fold-search t))
1181 (looking-at xmltok-bad-xml-decl-regexp))))))
1182
1183 (defun xmltok-scan-xml-declaration ()
1184 (when (looking-at (xmltok-xml-declaration regexp))
1185 (xmltok-add-prolog-region 'xml-declaration (point) (match-end 0))
1186 (goto-char (match-end 0))
1187 (when (xmltok-xml-declaration start version-name)
1188 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1189 (xmltok-xml-declaration start version-name)
1190 (xmltok-xml-declaration end version-name))
1191 (let ((start (xmltok-xml-declaration start version-value))
1192 (end (xmltok-xml-declaration end version-value)))
1193 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1194 start
1195 end)))
1196 ;; XXX need to check encoding name
1197 ;; Should start with letter, not contain colon
1198 (when (xmltok-xml-declaration start encoding-name)
1199 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1200 (xmltok-xml-declaration start encoding-name)
1201 (xmltok-xml-declaration end encoding-name))
1202 (let ((start (xmltok-xml-declaration start encoding-value))
1203 (end (xmltok-xml-declaration end encoding-value)))
1204 (xmltok-add-prolog-region 'encoding-name
1205 (1+ start)
1206 (1- end))
1207 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1208 start
1209 end)))
1210 (when (xmltok-xml-declaration start standalone-name)
1211 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1212 (xmltok-xml-declaration start standalone-name)
1213 (xmltok-xml-declaration end standalone-name))
1214 (let ((start (xmltok-xml-declaration start standalone-value))
1215 (end (xmltok-xml-declaration end standalone-value)))
1216 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1217 start
1218 end)
1219 (setq xmltok-standalone
1220 (string= (buffer-substring-no-properties (1+ start) (1- end))
1221 "yes"))))
1222 t))
1223
1224 (defconst xmltok-markup-declaration-alist
1225 '(("ELEMENT" . xmltok-parse-element-declaration)
1226 ("ATTLIST" . xmltok-parse-attlist-declaration)
1227 ("ENTITY" . xmltok-parse-entity-declaration)
1228 ("NOTATION" . xmltok-parse-notation-declaration)))
1229
1230 (defun xmltok-parse-prolog-item ()
1231 (cond ((eq xmltok-type 'comment)
1232 (xmltok-add-prolog-region 'comment
1233 xmltok-start
1234 (point))
1235 t)
1236 ((eq xmltok-type 'processing-instruction))
1237 ((eq xmltok-type 'named-markup-declaration)
1238 (setq xmltok-markup-declaration-doctype-flag nil)
1239 (xmltok-add-prolog-region 'markup-declaration-open
1240 xmltok-start
1241 (point))
1242 (let* ((name (buffer-substring-no-properties
1243 (+ xmltok-start 2)
1244 (point)))
1245 (fun (cdr (assoc name xmltok-markup-declaration-alist))))
1246 (cond (fun
1247 (unless xmltok-internal-subset-start
1248 (xmltok-add-error
1249 "Declaration allowed only in internal subset"))
1250 (funcall fun))
1251 ((string= name "DOCTYPE")
1252 (xmltok-parse-doctype))
1253 (t
1254 (xmltok-add-error "Unknown markup declaration"
1255 (+ xmltok-start 2))
1256 (xmltok-next-prolog-token)
1257 (xmltok-markup-declaration-parse-error))))
1258 t)
1259 ((or (eq xmltok-type 'end-prolog)
1260 (not xmltok-type))
1261 nil)
1262 ((eq xmltok-type 'internal-subset-close)
1263 (xmltok-add-prolog-region 'internal-subset-close
1264 xmltok-start
1265 (1+ xmltok-start))
1266 (xmltok-add-prolog-region 'markup-declaration-close
1267 (1- (point))
1268 (point))
1269 (if xmltok-internal-subset-start
1270 (setq xmltok-internal-subset-start nil)
1271 (xmltok-add-error "]> outside internal subset"))
1272 t)
1273 ((eq xmltok-type 'param-entity-ref)
1274 (if xmltok-internal-subset-start
1275 (setq xmltok-had-param-entity-ref t)
1276 (xmltok-add-error "Parameter entity reference outside document type declaration"))
1277 t)
1278 ;; If we don't do this, we can get thousands of errors when
1279 ;; a plain text file is parsed.
1280 ((not xmltok-internal-subset-start)
1281 (when (let ((err (car xmltok-errors)))
1282 (or (not err)
1283 (<= (xmltok-error-end err) xmltok-start)))
1284 (goto-char xmltok-start))
1285 nil)
1286 ((eq xmltok-type 'not-well-formed) t)
1287 (t
1288 (xmltok-add-error "Token allowed only inside markup declaration")
1289 t)))
1290
1291 (defun xmltok-parse-doctype ()
1292 (setq xmltok-markup-declaration-doctype-flag t)
1293 (xmltok-next-prolog-token)
1294 (when xmltok-internal-subset-start
1295 (xmltok-add-error "DOCTYPE declaration not allowed in internal subset")
1296 (xmltok-markup-declaration-parse-error))
1297 (when xmltok-contains-doctype
1298 (xmltok-add-error "Duplicate DOCTYPE declaration")
1299 (xmltok-markup-declaration-parse-error))
1300 (setq xmltok-contains-doctype t)
1301 (xmltok-require-token 'name 'prefixed-name)
1302 (xmltok-require-next-token "SYSTEM" "PUBLIC" ?\[ ?>)
1303 (cond ((eq xmltok-type ?\[)
1304 (setq xmltok-internal-subset-start (point)))
1305 ((eq xmltok-type ?>))
1306 (t
1307 (setq xmltok-doctype-external-subset-flag t)
1308 (xmltok-parse-external-id)
1309 (xmltok-require-token ?\[ ?>)
1310 (when (eq xmltok-type ?\[)
1311 (setq xmltok-internal-subset-start (point))))))
1312
1313 (defun xmltok-parse-attlist-declaration ()
1314 (xmltok-require-next-token 'prefixed-name 'name)
1315 (while (progn
1316 (xmltok-require-next-token ?> 'name 'prefixed-name)
1317 (if (eq xmltok-type ?>)
1318 nil
1319 (xmltok-require-next-token ?\(
1320 "CDATA"
1321 "ID"
1322 "IDREF"
1323 "IDREFS"
1324 "ENTITY"
1325 "ENTITIES"
1326 "NMTOKEN"
1327 "NMTOKENS"
1328 "NOTATION")
1329 (cond ((eq xmltok-type ?\()
1330 (xmltok-parse-nmtoken-group))
1331 ((string= (xmltok-current-token-string)
1332 "NOTATION")
1333 (xmltok-require-next-token ?\()
1334 (xmltok-parse-nmtoken-group)))
1335 (xmltok-require-next-token "#IMPLIED"
1336 "#REQUIRED"
1337 "#FIXED"
1338 'literal)
1339 (when (string= (xmltok-current-token-string) "#FIXED")
1340 (xmltok-require-next-token 'literal))
1341 t))))
1342
1343 (defun xmltok-parse-nmtoken-group ()
1344 (while (progn
1345 (xmltok-require-next-token 'nmtoken 'prefixed-name 'name)
1346 (xmltok-require-next-token ?| ?\))
1347 (eq xmltok-type ?|))))
1348
1349 (defun xmltok-parse-element-declaration ()
1350 (xmltok-require-next-token 'name 'prefixed-name)
1351 (xmltok-require-next-token "EMPTY" "ANY" ?\()
1352 (when (eq xmltok-type ?\()
1353 (xmltok-require-next-token "#PCDATA"
1354 'name
1355 'prefixed-name
1356 'name-occur
1357 ?\()
1358 (cond ((eq xmltok-type 'hash-name)
1359 (xmltok-require-next-token ?| ?\) 'close-paren-star)
1360 (while (eq xmltok-type ?|)
1361 (xmltok-require-next-token 'name 'prefixed-name)
1362 (xmltok-require-next-token 'close-paren-star ?|)))
1363 (t (xmltok-parse-model-group))))
1364 (xmltok-require-next-token ?>))
1365
1366 (defun xmltok-parse-model-group ()
1367 (xmltok-parse-model-group-member)
1368 (xmltok-require-next-token ?|
1369 ?,
1370 ?\)
1371 'close-paren-star
1372 'close-paren-occur)
1373 (when (memq xmltok-type '(?, ?|))
1374 (let ((connector xmltok-type))
1375 (while (progn
1376 (xmltok-next-prolog-token)
1377 (xmltok-parse-model-group-member)
1378 (xmltok-require-next-token connector
1379 ?\)
1380 'close-paren-star
1381 'close-paren-occur)
1382 (eq xmltok-type connector))))))
1383
1384 (defun xmltok-parse-model-group-member ()
1385 (xmltok-require-token 'name
1386 'prefixed-name
1387 'name-occur
1388 ?\()
1389 (when (eq xmltok-type ?\()
1390 (xmltok-next-prolog-token)
1391 (xmltok-parse-model-group)))
1392
1393 (defun xmltok-parse-entity-declaration ()
1394 (let (paramp name)
1395 (xmltok-require-next-token 'name ?%)
1396 (when (eq xmltok-type ?%)
1397 (setq paramp t)
1398 (xmltok-require-next-token 'name))
1399 (setq name (xmltok-current-token-string))
1400 (xmltok-require-next-token 'literal "SYSTEM" "PUBLIC")
1401 (cond ((eq xmltok-type 'literal)
1402 (let ((replacement (xmltok-parse-entity-value)))
1403 (unless paramp
1404 (xmltok-define-entity name replacement)))
1405 (xmltok-require-next-token ?>))
1406 (t
1407 (xmltok-parse-external-id)
1408 (if paramp
1409 (xmltok-require-token ?>)
1410 (xmltok-require-token ?> "NDATA")
1411 (if (eq xmltok-type ?>)
1412 (xmltok-define-entity name 'external)
1413 (xmltok-require-next-token 'name)
1414 (xmltok-require-next-token ?>)
1415 (xmltok-define-entity name 'unparsed)))))))
1416
1417 (defun xmltok-define-entity (name value)
1418 (when (and (or (not xmltok-had-param-entity-ref)
1419 xmltok-standalone)
1420 (not (assoc name xmltok-dtd)))
1421 (setq xmltok-dtd
1422 (cons (cons name value) xmltok-dtd))))
1423
1424 (defun xmltok-parse-entity-value ()
1425 (let ((lim (1- (point)))
1426 (well-formed t)
1427 value-parts
1428 start)
1429 (save-excursion
1430 (goto-char (1+ xmltok-start))
1431 (setq start (point))
1432 (while (progn
1433 (skip-chars-forward "^%&" lim)
1434 (when (< (point) lim)
1435 (goto-char (1+ (point)))
1436 (cond ((eq (char-before) ?%)
1437 (xmltok-add-error "Parameter entity references are not allowed in the internal subset"
1438 (1- (point))
1439 (point))
1440 (setq well-formed nil))
1441 (t
1442 (let ((xmltok-start (1- (point)))
1443 xmltok-type xmltok-replacement)
1444 (xmltok-scan-after-amp (lambda (start end)))
1445 (cond ((eq xmltok-type 'char-ref)
1446 (setq value-parts
1447 (cons (buffer-substring-no-properties
1448 start
1449 xmltok-start)
1450 value-parts))
1451 (setq value-parts
1452 (cons xmltok-replacement
1453 value-parts))
1454 (setq start (point)))
1455 ((eq xmltok-type 'not-well-formed)
1456 (setq well-formed nil))))))
1457 t))))
1458 (if (not well-formed)
1459 nil
1460 (apply 'concat
1461 (nreverse (cons (buffer-substring-no-properties start lim)
1462 value-parts))))))
1463
1464 (defun xmltok-parse-notation-declaration ()
1465 (xmltok-require-next-token 'name)
1466 (xmltok-require-next-token "SYSTEM" "PUBLIC")
1467 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1468 (xmltok-require-next-token 'literal)
1469 (cond (publicp
1470 (xmltok-require-next-token 'literal ?>)
1471 (unless (eq xmltok-type ?>)
1472 (xmltok-require-next-token ?>)))
1473 (t (xmltok-require-next-token ?>)))))
1474
1475 (defun xmltok-parse-external-id ()
1476 (xmltok-require-token "SYSTEM" "PUBLIC")
1477 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1478 (xmltok-require-next-token 'literal)
1479 (when publicp
1480 (xmltok-require-next-token 'literal)))
1481 (xmltok-next-prolog-token))
1482
1483 (defun xmltok-require-next-token (&rest types)
1484 (xmltok-next-prolog-token)
1485 (apply 'xmltok-require-token types))
1486
1487 (defun xmltok-require-token (&rest types)
1488 ;; XXX Generate a more helpful error message
1489 (while (and (not (let ((type (car types)))
1490 (if (stringp (car types))
1491 (string= (xmltok-current-token-string) type)
1492 (eq type xmltok-type))))
1493 (setq types (cdr types))))
1494 (unless types
1495 (when (and xmltok-type
1496 (not (eq xmltok-type 'not-well-formed)))
1497 (xmltok-add-error "Unexpected token"))
1498 (xmltok-markup-declaration-parse-error))
1499 (let ((region-type (xmltok-prolog-region-type (car types))))
1500 (when region-type
1501 (xmltok-add-prolog-region region-type
1502 xmltok-start
1503 (point)))))
1504
1505 (defun xmltok-current-token-string ()
1506 (buffer-substring-no-properties xmltok-start (point)))
1507
1508 (put 'xmltok-markup-declaration-parse-error
1509 'error-conditions
1510 '(error xmltok-markup-declaration-parse-error))
1511
1512 (put 'xmltok-markup-declaration-parse-error
1513 'error-message
1514 "Syntax error in markup declaration")
1515
1516 (defun xmltok-markup-declaration-parse-error ()
1517 (signal 'xmltok-markup-declaration-parse-error nil))
1518
1519 (defun xmltok-skip-markup-declaration ()
1520 (while (cond ((eq xmltok-type ?>)
1521 (xmltok-next-prolog-token)
1522 nil)
1523 ((and xmltok-markup-declaration-doctype-flag
1524 (eq xmltok-type ?\[))
1525 (setq xmltok-internal-subset-start (point))
1526 (xmltok-next-prolog-token)
1527 nil)
1528 ((memq xmltok-type '(nil
1529 end-prolog
1530 named-markup-declaration
1531 comment
1532 processing-instruction))
1533 nil)
1534 ((and xmltok-internal-subset-start
1535 (eq xmltok-type 'internal-subset-close))
1536 nil)
1537 (t (xmltok-next-prolog-token) t)))
1538 xmltok-type)
1539
1540 (defun xmltok-prolog-region-type (required)
1541 (cond ((cdr (assq xmltok-type
1542 '((literal . literal)
1543 (?> . markup-declaration-close)
1544 (?\[ . internal-subset-open)
1545 (hash-name . hash-name)))))
1546 ((and (stringp required) (eq xmltok-type 'name))
1547 'keyword)))
1548
1549 ;; Return new token type.
1550
1551 (defun xmltok-next-prolog-token ()
1552 (skip-chars-forward " \t\r\n")
1553 (setq xmltok-start (point))
1554 (cond ((not (and (looking-at (xmltok-prolog regexp))
1555 (goto-char (match-end 0))))
1556 (let ((ch (char-after)))
1557 (cond (ch
1558 (goto-char (1+ (point)))
1559 (xmltok-add-error "Illegal char in prolog")
1560 (setq xmltok-type 'not-well-formed))
1561 (t (setq xmltok-type nil)))))
1562 ((or (xmltok-prolog start ncname-occur)
1563 (xmltok-prolog start colon-name-occur))
1564 (setq xmltok-name-end (1- (point)))
1565 (setq xmltok-name-colon (xmltok-prolog start colon))
1566 (setq xmltok-type 'name-occur))
1567 ((xmltok-prolog start colon)
1568 (setq xmltok-name-end (point))
1569 (setq xmltok-name-colon (xmltok-prolog start colon))
1570 (unless (looking-at "[ \t\r\n>),|[%]")
1571 (xmltok-add-error "Missing space after name"))
1572 (setq xmltok-type 'prefixed-name))
1573 ((or (xmltok-prolog start ncname-nmtoken)
1574 (xmltok-prolog start ncname-colon))
1575 (unless (looking-at "[ \t\r\n>),|[%]")
1576 (xmltok-add-error "Missing space after name token"))
1577 (setq xmltok-type 'nmtoken))
1578 ((xmltok-prolog start name)
1579 (setq xmltok-name-end (point))
1580 (setq xmltok-name-colon nil)
1581 (unless (looking-at "[ \t\r\n>),|[%]")
1582 (xmltok-add-error "Missing space after name"))
1583 (setq xmltok-type 'name))
1584 ((xmltok-prolog start hash-name)
1585 (setq xmltok-name-end (point))
1586 (unless (looking-at "[ \t\r\n>)|%]")
1587 (xmltok-add-error "Missing space after name"))
1588 (setq xmltok-type 'hash-name))
1589 ((xmltok-prolog start processing-instruction-question)
1590 (xmltok-scan-prolog-after-processing-instruction-open))
1591 ((xmltok-prolog start comment-open)
1592 ;; XXX if not-well-formed, ignore some stuff
1593 (xmltok-scan-after-comment-open))
1594 ((xmltok-prolog start named-markup-declaration)
1595 (setq xmltok-type 'named-markup-declaration))
1596 ((xmltok-prolog start instance-start)
1597 (goto-char xmltok-start)
1598 (setq xmltok-type 'end-prolog))
1599 ((xmltok-prolog start close-paren-star)
1600 (setq xmltok-type 'close-paren-star))
1601 ((xmltok-prolog start close-paren-occur)
1602 (setq xmltok-type 'close-paren-occur))
1603 ((xmltok-prolog start close-paren)
1604 (unless (looking-at "[ \t\r\n>,|)]")
1605 (xmltok-add-error "Missing space after )"))
1606 (setq xmltok-type ?\)))
1607 ((xmltok-prolog start single-char)
1608 (let ((ch (char-before)))
1609 (cond ((memq ch '(?\" ?\'))
1610 (xmltok-scan-prolog-literal))
1611 (t (setq xmltok-type ch)))))
1612 ((xmltok-prolog start percent)
1613 (cond ((xmltok-prolog start param-entity-ref-close)
1614 (setq xmltok-name-end (1- (point)))
1615 (setq xmltok-type 'param-entity-ref))
1616 ((xmltok-prolog start param-entity-ref)
1617 (xmltok-add-error "Missing ;")
1618 (setq xmltok-name-end (point))
1619 (setq xmltok-type 'param-entity-ref))
1620 ((looking-at "[ \t\r\n%]")
1621 (setq xmltok-type ?%))
1622 (t
1623 (xmltok-add-error "Expected name after %")
1624 (setq xmltok-type 'not-well-formed))))
1625 ((xmltok-prolog start nmtoken)
1626 (unless (looking-at "[ \t\r\n>),|[%]")
1627 (xmltok-add-error "Missing space after name token"))
1628 (setq xmltok-type 'nmtoken))
1629 ((xmltok-prolog start internal-subset-close)
1630 (setq xmltok-type 'internal-subset-close))
1631 ((xmltok-prolog start pound)
1632 (xmltok-add-error "Expected name after #")
1633 (setq xmltok-type 'not-well-formed))
1634 ((xmltok-prolog start markup-declaration)
1635 (xmltok-add-error "Expected name or -- after <!")
1636 (setq xmltok-type 'not-well-formed))
1637 ((xmltok-prolog start comment-first-dash)
1638 (xmltok-add-error "Expected <!--")
1639 (setq xmltok-type 'not-well-formed))
1640 ((xmltok-prolog start less-than)
1641 (xmltok-add-error "Incomplete markup")
1642 (setq xmltok-type 'not-well-formed))
1643 (t (error "Unhandled token in prolog %s"
1644 (match-string-no-properties 0)))))
1645
1646 (defun xmltok-scan-prolog-literal ()
1647 (let* ((delim (string (char-before)))
1648 (safe-end (save-excursion
1649 (skip-chars-forward (concat "^<>[]" delim))
1650 (point)))
1651 (end (save-excursion
1652 (goto-char safe-end)
1653 (search-forward delim nil t))))
1654 (or (cond ((not end)
1655 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1656 nil
1657 nil
1658 delim)
1659 nil)
1660 ((save-excursion
1661 (goto-char end)
1662 (looking-at "[ \t\r\n>%[]"))
1663 (goto-char end)
1664 (setq xmltok-type 'literal))
1665 ((eq (1+ safe-end) end)
1666 (goto-char end)
1667 (xmltok-add-error (format "Missing space after %s" delim)
1668 safe-end)
1669 (setq xmltok-type 'literal))
1670 (t
1671 (xmltok-add-dependent 'xmltok-semi-closed-reparse-p
1672 xmltok-start
1673 (1+ end)
1674 delim
1675 1)
1676 nil))
1677 (progn
1678 (xmltok-add-error (format "Missing closing %s" delim))
1679 (goto-char safe-end)
1680 (skip-chars-backward " \t\r\n")
1681 (setq xmltok-type 'not-well-formed)))))
1682
1683 (defun xmltok-scan-prolog-after-processing-instruction-open ()
1684 (cond ((not (search-forward "?>" nil t))
1685 (xmltok-add-error "No closing ?>"
1686 xmltok-start
1687 (+ xmltok-start 2))
1688 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1689 nil
1690 nil
1691 "?>")
1692 (setq xmltok-type 'not-well-formed))
1693 (t
1694 (let* ((end (point))
1695 (target
1696 (save-excursion
1697 (goto-char (+ xmltok-start 2))
1698 (and (looking-at (xmltok-ncname regexp))
1699 (or (memq (char-after (match-end 0))
1700 '(?\n ?\t ?\r ? ))
1701 (= (match-end 0) (- end 2)))
1702 (match-string-no-properties 0)))))
1703 (cond ((not target)
1704 (xmltok-add-error "\
1705 Processing instruction does not start with a name"
1706 (+ xmltok-start 2)
1707 (+ xmltok-start 3)))
1708 ((not (and (= (length target) 3)
1709 (let ((case-fold-search t))
1710 (string-match "xml" target)))))
1711 ((= xmltok-start 1)
1712 (xmltok-add-error "Invalid XML declaration"
1713 xmltok-start
1714 (point)))
1715 ((save-excursion
1716 (goto-char xmltok-start)
1717 (looking-at (xmltok-xml-declaration regexp)))
1718 (xmltok-add-error "XML declaration not at beginning of file"
1719 xmltok-start
1720 (point)))
1721 (t
1722 (xmltok-add-error "Processing instruction has target of xml"
1723 (+ xmltok-start 2)
1724 (+ xmltok-start 5))))
1725 (xmltok-add-prolog-region 'processing-instruction-left
1726 xmltok-start
1727 (+ xmltok-start
1728 2
1729 (if target
1730 (length target)
1731 0)))
1732 (xmltok-add-prolog-region 'processing-instruction-right
1733 (if target
1734 (save-excursion
1735 (goto-char (+ xmltok-start
1736 (length target)
1737 2))
1738 (skip-chars-forward " \t\r\n")
1739 (point))
1740 (+ xmltok-start 2))
1741 (point)))
1742 (setq xmltok-type 'processing-instruction))))
1743
1744 (defun xmltok-parse-entities ()
1745 (let ((todo xmltok-dtd))
1746 (when (and (or xmltok-had-param-entity-ref
1747 xmltok-doctype-external-subset-flag)
1748 (not xmltok-standalone))
1749 (setq xmltok-dtd (cons t xmltok-dtd)))
1750 (while todo
1751 (xmltok-parse-entity (car todo))
1752 (setq todo (cdr todo)))))
1753
1754 (defun xmltok-parse-entity (name-def)
1755 (let ((def (cdr name-def))
1756 ;; in case its value is buffer local
1757 (xmltok-dtd xmltok-dtd)
1758 buf)
1759 (when (stringp def)
1760 (if (string-match "\\`[^&<\t\r\n]*\\'" def)
1761 (setcdr name-def (cons def def))
1762 (setcdr name-def 'not-well-formed) ; avoid infinite expansion loops
1763 (setq buf (get-buffer-create
1764 (format " *Entity %s*" (car name-def))))
1765 (save-excursion
1766 (set-buffer buf)
1767 (erase-buffer)
1768 (insert def)
1769 (goto-char (point-min))
1770 (setcdr name-def
1771 (xmltok-parse-entity-replacement)))
1772 (kill-buffer buf)))))
1773
1774 (defun xmltok-parse-entity-replacement ()
1775 (let ((def (cons "" "")))
1776 (while (let* ((start (point))
1777 (found (re-search-forward "[<&\t\r\n]\\|]]>" nil t))
1778 (ch (and found (char-before)))
1779 (str (buffer-substring-no-properties
1780 start
1781 (if found
1782 (match-beginning 0)
1783 (point-max)))))
1784 (setq def
1785 (xmltok-append-entity-def def
1786 (cons str str)))
1787 (cond ((not found) nil)
1788 ((eq ch ?>)
1789 (setq def 'not-well-formed)
1790 nil)
1791 ((eq ch ?<)
1792 (xmltok-save
1793 (setq xmltok-start (1- (point)))
1794 (xmltok-scan-after-lt)
1795 (setq def
1796 (xmltok-append-entity-def
1797 def
1798 (cond ((memq xmltok-type
1799 '(start-tag
1800 end-tag
1801 empty-element))
1802 'element)
1803 ((memq xmltok-type
1804 '(comment
1805 processing-instruction))
1806 (cons "" nil))
1807 ((eq xmltok-type
1808 'cdata-section)
1809 (cons (buffer-substring-no-properties
1810 (+ xmltok-start 9)
1811 (- (point) 3))
1812 nil))
1813 (t 'not-well-formed)))))
1814 t)
1815 ((eq ch ?&)
1816 (let ((xmltok-start (1- (point)))
1817 xmltok-type
1818 xmltok-replacement
1819 xmltok-errors)
1820 (xmltok-scan-after-amp 'xmltok-handle-nested-entity)
1821 (cond ((eq xmltok-type 'entity-ref)
1822 (setq def
1823 (xmltok-append-entity-def
1824 def
1825 xmltok-replacement)))
1826 ((eq xmltok-type 'char-ref)
1827 (setq def
1828 (xmltok-append-entity-def
1829 def
1830 (if xmltok-replacement
1831 (cons xmltok-replacement
1832 xmltok-replacement)
1833 (and xmltok-errors 'not-well-formed)))))
1834 (t
1835 (setq def 'not-well-formed))))
1836 t)
1837 (t
1838 (setq def
1839 (xmltok-append-entity-def
1840 def
1841 (cons (match-string-no-properties 0)
1842 " ")))
1843 t))))
1844 def))
1845
1846 (defun xmltok-handle-nested-entity (start end)
1847 (let* ((name-def (assoc (buffer-substring-no-properties start end)
1848 xmltok-dtd))
1849 (def (cdr name-def)))
1850 (when (stringp def)
1851 (xmltok-parse-entity name-def)
1852 (setq def (cdr name-def)))
1853 (setq xmltok-replacement
1854 (cond ((null name-def)
1855 (if (eq (car xmltok-dtd) t)
1856 nil
1857 'not-well-formed))
1858 ((eq def 'unparsed) 'not-well-formed)
1859 (t def)))))
1860
1861 (defun xmltok-append-entity-def (d1 d2)
1862 (cond ((consp d1)
1863 (if (consp d2)
1864 (cons (concat (car d1) (car d2))
1865 (and (cdr d1)
1866 (cdr d2)
1867 (concat (cdr d1) (cdr d2))))
1868 d2))
1869 ((consp d2) d1)
1870 (t
1871 (let ((defs '(not-well-formed external element)))
1872 (while (not (or (eq (car defs) d1)
1873 (eq (car defs) d2)))
1874 (setq defs (cdr defs)))
1875 (car defs)))))
1876
1877 (defun xmltok-add-prolog-region (type start end)
1878 (setq xmltok-prolog-regions
1879 (cons (vector type start end)
1880 xmltok-prolog-regions)))
1881
1882 (defun xmltok-merge-attributes ()
1883 "Return a list merging `xmltok-attributes' and 'xmltok-namespace-attributes'.
1884 The members of the merged list are in order of occurrence in the
1885 document. The list may share list structure with `xmltok-attributes'
1886 and `xmltok-namespace-attributes'."
1887 (cond ((not xmltok-namespace-attributes)
1888 xmltok-attributes)
1889 ((not xmltok-attributes)
1890 xmltok-namespace-attributes)
1891 (t
1892 (let ((atts1 xmltok-attributes)
1893 (atts2 xmltok-namespace-attributes)
1894 merged)
1895 (while (and atts1 atts2)
1896 (cond ((< (xmltok-attribute-name-start (car atts1))
1897 (xmltok-attribute-name-start (car atts2)))
1898 (setq merged (cons (car atts1) merged))
1899 (setq atts1 (cdr atts1)))
1900 (t
1901 (setq merged (cons (car atts2) merged))
1902 (setq atts2 (cdr atts2)))))
1903 (setq merged (nreverse merged))
1904 (cond (atts1 (setq merged (nconc merged atts1)))
1905 (atts2 (setq merged (nconc merged atts2))))
1906 merged))))
1907
1908 ;;; Testing
1909
1910 (defun xmltok-forward-test ()
1911 (interactive)
1912 (if (xmltok-forward)
1913 (message "Scanned %s" xmltok-type)
1914 (message "Scanned nothing")))
1915
1916 (defun xmltok-next-prolog-token-test ()
1917 (interactive)
1918 (if (xmltok-next-prolog-token)
1919 (message "Scanned %s"
1920 (if (integerp xmltok-type)
1921 (string xmltok-type)
1922 xmltok-type))
1923 (message "Scanned end of file")))
1924
1925 (provide 'xmltok)
1926
1927 ;; arch-tag: 747e5f3a-6fc3-4f8d-bd96-89f05aa99f5e
1928 ;;; xmltok.el ends here