Add 2007 to copyright years.
[bpt/emacs.git] / lisp / nxml / xmltok.el
CommitLineData
8cd39fb3
MH
1;;; xmltok.el --- XML tokenization
2
3;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5;; Author: James Clark
6;; Keywords: XML
7
8;; This program is free software; you can redistribute it and/or
9;; modify it under the terms of the GNU General Public License as
10;; published by the Free Software Foundation; either version 2 of
11;; the License, or (at your option) any later version.
12
13;; This program is distributed in the hope that it will be
14;; useful, but WITHOUT ANY WARRANTY; without even the implied
15;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16;; PURPOSE. See the GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public
19;; License along with this program; if not, write to the Free
20;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
21;; MA 02111-1307 USA
22
23;;; Commentary:
24
25;; This implements an XML 1.0 parser. It also implements the XML
26;; Namespaces Recommendation. It is designed to be conforming, but it
27;; works a bit differently from a normal XML parser. An XML document
28;; consists of the prolog and an instance. The prolog is parsed as a
29;; single unit using `xmltok-forward-prolog'. The instance is
30;; considered as a sequence of tokens, where a token is something like
31;; a start-tag, a comment, a chunk of data or a CDATA section. The
32;; tokenization of the instance is stateless: the tokenization of one
33;; part of the instance does not depend on tokenization of the
34;; preceding part of the instance. This allows the instance to be
35;; parsed incrementally. The main entry point is `xmltok-forward':
36;; this can be called at any point in the instance provided it is
37;; between tokens. The other entry point is `xmltok-forward-special'
38;; which skips over tokens other comments, processing instructions or
39;; CDATA sections (i.e. the constructs in an instance that can contain
40;; less than signs that don't start a token).
41;;
42;; This is a non-validating XML 1.0 processor. It does not resolve
43;; parameter entities (including the external DTD subset) and it does
44;; not resolve external general entities.
45;;
46;; It is non-conformant by design in the following respects.
47;;
48;; 1. It expects the client to detect aspects of well-formedness that
49;; are not internal to a single token, specifically checking that
50;; end-tags match start-tags and that the instance contains exactly
51;; one element.
52;;
53;; 2. It expects the client to detect duplicate attributes. Detection
54;; of duplicate attributes after expansion of namespace prefixes
55;; requires the namespace processing state. Detection of duplicate
56;; attributes before expansion of namespace prefixes does not, but is
57;; redundant given that the client will do detection of duplicate
58;; attributes after expansion of namespace prefixes.
59;;
60;; 3. It allows the client to recover from well-formedness errors.
61;; This is essential for use in applications where the document is
62;; being parsed during the editing process.
63;;
64;; 4. It does not support documents that do not conform to the lexical
65;; requirements of the XML Namespaces Recommendation (e.g. a document
66;; with a colon in an entity name).
67;;
68;; There are also a number of things that have not yet been
69;; implemented that make it non-conformant.
70;;
71;; 1. It does not implement default attributes. ATTLIST declarations
72;; are parsed, but no checking is done on the content of attribute
73;; value literals specifying default attribute values, and default
74;; attribute values are not reported to the client.
75;;
76;; 2. It does not implement internal entities containing elements. If
77;; an internal entity is referenced and parsing its replacement text
78;; yields one or more tags, then it will skip the reference and
79;; report this to the client.
80;;
81;; 3. It does not check the syntax of public identifiers in the DTD.
82;;
83;; 4. It allows some non-ASCII characters in certain situations where
84;; it should not. For example, it only enforces XML 1.0's
85;; restrictions on name characters strictly for ASCII characters. The
86;; problem here is XML's character model is based squarely on Unicode,
87;; whereas Emacs's is not (as of version 21). It is not clear what
88;; the right thing to do is.
89
90;;; Code:
91
92(defvar xmltok-type nil)
93(defvar xmltok-start nil)
94(defvar xmltok-name-colon nil)
95(defvar xmltok-name-end nil)
96(defvar xmltok-replacement nil
97 "String containing replacement for a character or entity reference.")
98
99(defvar xmltok-attributes nil
100 "List containing attributes of last scanned element.
101Each member of the list is a vector representing an attribute, which
102can be accessed using the functions `xmltok-attribute-name-start',
103`xmltok-attribute-name-colon', `xmltok-attribute-name-end',
104`xmltok-attribute-value-start', `xmltok-attribute-value-end',
105`xmltok-attribute-raw-normalized-value', `xmltok-attribute-refs'.")
106
107(defvar xmltok-namespace-attributes nil
108 "List containing namespace declarations of last scanned element.
109List has same format as `xmltok-attributes'.")
110
111(defvar xmltok-dtd nil
112 "Information about the DTD used by `xmltok-forward'.
113`xmltok-forward-prolog' sets this up.
114
115It consists of an alist of general entity names vs definitions. The
116first member of the alist is t if references to entities not in the
117alist are well-formed \(e.g. because there's an external subset that
118wasn't parsed).
119
120Each general entity name is a string. The definition is either nil, a
121symbol, a string, a cons cell. If the definition is nil, then it
122means that it's an internal entity but the result of parsing it is
123unknown. If it is a symbol, then the symbol is either `unparsed',
124meaning the entity is an unparsed entity, `external', meaning the
125entity is or references an external entity, `element', meaning the
126entity includes one or more elements, or `not-well-formed', meaning
127the replacement text is not well-formed. If the definition is a
128string, then the replacement text of the entity is that string; this
129happens only during the parsing of the prolog. If the definition is a
130cons cell \(ER . AR), then ER specifies the string that results from
131referencing the entity in element content and AR is either nil,
132meaning the replacement text included a <, or a string which is the
133normalized attribute value.")
134
135(defvar xmltok-dependent-regions nil
136 "List of descriptors of regions that a parsed token depends on.
137
138A token depends on a region if the region occurs after the token and a
139change in the region may require the token to be reparsed. This only
140happens with markup that is not well-formed. For example, if a <?
141occurs without a matching ?>, then the <? is returned as a
142not-well-formed token. However, this token is dependent on region
143from the end of the token to the end of the buffer: if this ever
144contains ?> then the buffer must be reparsed from the <?.
145
146A region descriptor is a list (FUN START END ARG ...), where FUN is a
147function to be called when the region changes, START and END are
148integers giving the start and end of the region, and ARG... are
149additional arguments to be passed to FUN. FUN will be called with 5
150arguments followed by the additional arguments if any: the position of
151the start of the changed area in the region, the position of the end
152of the changed area in the region, the length of the changed area
153before the change, the position of the start of the region, the
154position of the end of the region. FUN must return non-nil if the
155region needs reparsing. FUN will be called in a save-excursion with
156match-data saved.
157
158`xmltok-forward', `xmltok-forward-special' and `xmltok-forward-prolog'
159may add entries to the beginning of this list, but will not clear it.
160`xmltok-forward' and `xmltok-forward-special' will only add entries
161when returning tokens of type not-well-formed.")
162
163(defvar xmltok-errors nil
164 "List of errors detected by `xmltok-forward' and `xmltok-forward-prolog'.
165When `xmltok-forward' and `xmltok-forward-prolog' detect a
166well-formedness error, they will add an entry to the beginning of this
167list. Each entry is a vector [MESSAGE START END], where MESSAGE is a
168string giving the error message and START and END are integers
169indicating the position of the error.")
170
171(defmacro xmltok-save (&rest body)
172 `(let (xmltok-type
173 xmltok-start
174 xmltok-name-colon
175 xmltok-name-end
176 xmltok-replacement
177 xmltok-attributes
178 xmltok-namespace-attributes
179 xmltok-dependent-regions
180 xmltok-errors)
181 ,@body))
182
183(put 'xmltok-save 'lisp-indent-function 0)
184(def-edebug-spec xmltok-save t)
185
186(defsubst xmltok-attribute-name-start (att)
187 (aref att 0))
188
189(defsubst xmltok-attribute-name-colon (att)
190 (aref att 1))
191
192(defsubst xmltok-attribute-name-end (att)
193 (aref att 2))
194
195(defsubst xmltok-attribute-value-start (att)
196 (aref att 3))
197
198(defsubst xmltok-attribute-value-end (att)
199 (aref att 4))
200
201(defsubst xmltok-attribute-raw-normalized-value (att)
202 "Return an object representing the normalized value of ATT.
203This can t indicating that the normalized value is the same as the
204buffer substring from the start to the end of the value or nil
205indicating that the value is not well-formed or a string."
206 (aref att 5))
207
208(defsubst xmltok-attribute-refs (att)
209 "Return a list of the entity and character references in ATT.
210Each member is a vector [TYPE START END] where TYPE is either char-ref
211or entity-ref and START and END are integers giving the start and end
212of the reference. Nested entity references are not included in the list."
213 (aref att 6))
214
215(defun xmltok-attribute-prefix (att)
216 (let ((colon (xmltok-attribute-name-colon att)))
217 (and colon
218 (buffer-substring-no-properties (xmltok-attribute-name-start att)
219 colon))))
220
221(defun xmltok-attribute-local-name (att)
222 (let ((colon (xmltok-attribute-name-colon att)))
223 (buffer-substring-no-properties (if colon
224 (1+ colon)
225 (xmltok-attribute-name-start att))
226 (xmltok-attribute-name-end att))))
227
228(defun xmltok-attribute-value (att)
229 (let ((rnv (xmltok-attribute-raw-normalized-value att)))
230 (and rnv
231 (if (stringp rnv)
232 rnv
233 (buffer-substring-no-properties (xmltok-attribute-value-start att)
234 (xmltok-attribute-value-end att))))))
235
236(defun xmltok-start-tag-prefix ()
237 (and xmltok-name-colon
238 (buffer-substring-no-properties (1+ xmltok-start)
239 xmltok-name-colon)))
240
241(defun xmltok-start-tag-local-name ()
242 (buffer-substring-no-properties (1+ (or xmltok-name-colon
243 xmltok-start))
244 xmltok-name-end))
245
246(defun xmltok-end-tag-prefix ()
247 (and xmltok-name-colon
248 (buffer-substring-no-properties (+ 2 xmltok-start)
249 xmltok-name-colon)))
250
251(defun xmltok-end-tag-local-name ()
252 (buffer-substring-no-properties (if xmltok-name-colon
253 (1+ xmltok-name-colon)
254 (+ 2 xmltok-start))
255 xmltok-name-end))
256
257(defun xmltok-start-tag-qname ()
258 (buffer-substring-no-properties (+ xmltok-start 1) xmltok-name-end))
259
260(defun xmltok-end-tag-qname ()
261 (buffer-substring-no-properties (+ xmltok-start 2) xmltok-name-end))
262
263(defsubst xmltok-make-attribute (name-begin
264 name-colon
265 name-end
266 &optional
267 value-begin
268 value-end
269 raw-normalized-value)
270 "Make an attribute. RAW-NORMALIZED-VALUE is nil if the value is
271not well-formed, t if the normalized value is the string between
272VALUE-BEGIN and VALUE-END, otherwise a STRING giving the value."
273 (vector name-begin
274 name-colon
275 name-end
276 value-begin
277 value-end
278 raw-normalized-value
279 nil))
280
281(defsubst xmltok-error-message (err)
282 (aref err 0))
283
284(defsubst xmltok-error-start (err)
285 (aref err 1))
286
287(defsubst xmltok-error-end (err)
288 (aref err 2))
289
290(defsubst xmltok-make-error (message start end)
291 (vector message start end))
292
293(defun xmltok-add-error (message &optional start end)
294 (setq xmltok-errors
295 (cons (xmltok-make-error message
296 (or start xmltok-start)
297 (or end (point)))
298 xmltok-errors)))
299
300(defun xmltok-add-dependent (fun &optional start end &rest args)
301 (setq xmltok-dependent-regions
302 (cons (cons fun
303 (cons (or start xmltok-start)
304 (cons (or end (point-max))
305 args)))
306 xmltok-dependent-regions)))
307
308(defun xmltok-forward ()
309 (setq xmltok-start (point))
310 (let* ((case-fold-search nil)
311 (space-count (skip-chars-forward " \t\r\n"))
312 (ch (char-after)))
313 (cond ((eq ch ?\<)
314 (cond ((> space-count 0)
315 (setq xmltok-type 'space))
316 (t
317 (goto-char (1+ (point)))
318 (xmltok-scan-after-lt))))
319 ((eq ch ?\&)
320 (cond ((> space-count 0)
321 (setq xmltok-type 'space))
322 (t
323 (goto-char (1+ (point)))
324 (xmltok-scan-after-amp
325 (lambda (start end)
326 (xmltok-handle-entity start end))))))
327 ((re-search-forward "[<&]\\|\\(]]>\\)" nil t)
328 (cond ((not (match-beginning 1))
329 (goto-char (match-beginning 0))
330 ;; must have got a non-space char
331 (setq xmltok-type 'data))
332 ((= (match-beginning 1) xmltok-start)
333 (xmltok-add-error "Found `]]>' not closing a CDATA section")
334 (setq xmltok-type 'not-well-formed))
335 (t
336 (goto-char (match-beginning 0))
337 (setq xmltok-type
338 (if (= (point) (+ xmltok-start space-count))
339 'space
340 'data)))))
341 ((eq ch nil)
342 (setq xmltok-type
343 (if (> space-count 0)
344 'space
345 nil)))
346 (t
347 (goto-char (point-max))
348 (setq xmltok-type 'data)))))
349
350(defun xmltok-forward-special (bound)
351 "Scan forward past the first special token starting at or after point.
352Return nil if there is no special token that starts before BOUND.
353CDATA sections, processing instructions and comments (and indeed
354anything starting with < following by ? or !) count
355as special. Return the type of the token."
356 (when (re-search-forward "<[?!]" (1+ bound) t)
357 (setq xmltok-start (match-beginning 0))
358 (goto-char (1+ xmltok-start))
359 (let ((case-fold-search nil))
360 (xmltok-scan-after-lt))))
361
362(eval-when-compile
363
364 ;; A symbolic regexp is represented by a list whose CAR is the string
365 ;; containing the regexp and whose cdr is a list of symbolic names
366 ;; for the groups in the string.
367
368 ;; Construct a symbolic regexp from a regexp.
369 (defun xmltok-r (str)
370 (cons str nil))
371
372 ;; Concatenate zero of more regexps and symbolic regexps.
373 (defun xmltok+ (&rest args)
374 (let (strs names)
375 (while args
376 (let ((arg (car args)))
377 (if (stringp arg)
378 (setq strs (cons arg strs))
379 (setq strs (cons (car arg) strs))
380 (setq names (cons (cdr arg) names)))
381 (setq args (cdr args))))
382 (cons (apply 'concat (nreverse strs))
383 (apply 'append (nreverse names))))))
384
385(eval-when-compile
386 ;; Make a symbolic group named NAME from the regexp R.
387 ;; R may be a symbolic regexp or an ordinary regexp.
388 (defmacro xmltok-g (name &rest r)
389 (let ((sym (make-symbol "r")))
390 `(let ((,sym (xmltok+ ,@r)))
391 (if (stringp ,sym)
392 (cons (concat "\\(" ,sym "\\)") (cons ',name nil))
393 (cons (concat "\\(" (car ,sym) "\\)") (cons ',name (cdr ,sym)))))))
394
395 (defun xmltok-p (&rest r) (xmltok+ "\\(?:"
396 (apply 'xmltok+ r)
397 "\\)"))
398
399 ;; Get the group index of ELEM in a LIST of symbols.
400 (defun xmltok-get-index (elem list)
401 (or elem
402 (error "Missing group name"))
403 (let ((found nil)
404 (i 1))
405 (while list
406 (cond ((eq elem (car list))
407 (setq found i)
408 (setq list nil))
409 (t
410 (setq i (1+ i))
411 (setq list (cdr list)))))
412 (or found
413 (error "Bad group name %s" elem))))
414
415 ;; Define a macro SYM using a symbolic regexp R.
416 ;; SYM can be called in three ways:
417 ;; (SYM regexp)
418 ;; expands to the regexp in R
419 ;; (SYM start G)
420 ;; expands to
421 ;; (match-beginning N)
422 ;; where N is the group index of G in R.
423 ;; (SYM end G)
424 ;; expands to
425 ;; (match-end N)
426 ;; where N is the group index of G in R.
427 (defmacro xmltok-defregexp (sym r)
428 `(defalias ',sym
429 (let ((r ,r))
430 `(macro lambda (action &optional group-name)
431 (cond ((eq action 'regexp)
432 ,(car r))
433 ((or (eq action 'start) (eq action 'beginning))
434 (list 'match-beginning (xmltok-get-index group-name
435 ',(cdr r))))
436 ((eq action 'end)
437 (list 'match-end (xmltok-get-index group-name
438 ',(cdr r))))
439 ((eq action 'string)
440 (list 'match-string
441 (xmltok-get-index group-name ',(cdr r))))
442 ((eq action 'string-no-properties)
443 (list 'match-string-no-properties
444 (xmltok-get-index group-name ',(cdr r))))
445 (t (error "Invalid action: %s" action))))))))
446
447
448(eval-when-compile
449 (let* ((or "\\|")
450 (open "\\(?:")
451 (gopen "\\(")
452 (close "\\)")
453 (name-start-char "[_[:alpha:]]")
454 (name-continue-not-start-char "[-.[:digit:]]")
455 (name-continue-char "[-._[:alnum:]]")
456 (* "*")
457 (+ "+")
458 (opt "?")
459 (question "\\?")
460 (s "[ \r\t\n]")
461 (s+ (concat s +))
462 (s* (concat s *))
463 (ncname (concat name-start-char name-continue-char *))
464 (entity-ref
465 (xmltok+ (xmltok-g entity-name ncname)
466 (xmltok-g entity-ref-close ";") opt))
467 (decimal-ref
468 (xmltok+ (xmltok-g decimal "[0-9]" +)
469 (xmltok-g decimal-ref-close ";") opt))
470 (hex-ref
471 (xmltok+ "x" open
472 (xmltok-g hex "[0-9a-fA-F]" +)
473 (xmltok-g hex-ref-close ";") opt
474 close opt))
475 (char-ref
476 (xmltok+ (xmltok-g number-sign "#")
477 open decimal-ref or hex-ref close opt))
478 (start-tag-close
479 (xmltok+ open (xmltok-g start-tag-close s* ">")
480 or open (xmltok-g empty-tag-slash s* "/")
481 (xmltok-g empty-tag-close ">") opt close
482 or (xmltok-g start-tag-s s+)
483 close))
484 (start-tag
485 (xmltok+ (xmltok-g start-tag-name
486 ncname (xmltok-g start-tag-colon ":" ncname) opt)
487 start-tag-close opt))
488 (end-tag
489 (xmltok+ (xmltok-g end-tag-slash "/")
490 open (xmltok-g end-tag-name
491 ncname
492 (xmltok-g end-tag-colon ":" ncname) opt)
493 (xmltok-g end-tag-close s* ">") opt
494 close opt))
495 (comment
496 (xmltok+ (xmltok-g markup-declaration "!")
497 (xmltok-g comment-first-dash "-"
498 (xmltok-g comment-open "-") opt) opt))
499 (cdata-section
500 (xmltok+ "!"
501 (xmltok-g marked-section-open "\\[")
502 open "C"
503 open "D"
504 open "A"
505 open "T"
506 open "A"
507 (xmltok-g cdata-section-open "\\[" ) opt
508 close opt ; A
509 close opt ; T
510 close opt ; A
511 close opt ; D
512 close opt)) ; C
513 (processing-instruction
514 (xmltok-g processing-instruction-question question)))
515
516 (xmltok-defregexp xmltok-ncname (xmltok+ open ncname close))
517
518 (xmltok-defregexp xmltok-after-amp
519 (xmltok+ entity-ref or char-ref))
520 (xmltok-defregexp xmltok-after-lt
521 (xmltok+ start-tag
522 or end-tag
523 ;; cdata-section must come before comment
524 ;; because we treat <! as a comment
525 ;; and Emacs doesn't do fully greedy matching
526 ;; by default
527 or cdata-section
528 or comment
529 or processing-instruction))
530 (xmltok-defregexp
531 xmltok-attribute
532 (let* ((lit1
533 (xmltok+ "'"
534 "[^<'&\r\n\t]*"
535 (xmltok-g complex1 "[&\r\n\t][^<']*") opt
536 "'"))
537 (lit2 (cons (replace-regexp-in-string "'" "\"" (car lit1))
538 '(complex2)))
539 (literal (xmltok-g literal lit1 or lit2))
540 (name (xmltok+ open (xmltok-g xmlns "xmlns") or ncname close
541 (xmltok-g colon ":" ncname) opt)))
542 (xmltok+ (xmltok-g name name)
543 s* "="
544 ;; If the literal isn't followed by what it should be,
545 ;; then the closing delimiter is probably really the
546 ;; opening delimiter of another literal, so don't
547 ;; absorb the literal in this case.
548 open s* literal start-tag-close close opt)))
549 (xmltok-defregexp
550 xmltok-xml-declaration
551 (let* ((literal-content "[-._:a-zA-Z0-9]+")
552 (literal
553 (concat open "\"" literal-content "\""
554 or "'" literal-content "'" close))
555 (version-att
556 (xmltok+ open
557 s+ (xmltok-g version-name "version")
558 s* "="
559 s* (xmltok-g version-value literal)
560 close opt))
561 (encoding-att
562 (xmltok+ open
563 s+ (xmltok-g encoding-name "encoding")
564 s* "="
565 s* (xmltok-g encoding-value literal)
566 close opt))
567 (yes-no
568 (concat open "yes" or "no" close))
569 (standalone-att
570 (xmltok+ open
571 s+ (xmltok-g standalone-name "standalone")
572 s* "="
573 s* (xmltok-g standalone-value
574 "\"" yes-no "\"" or "'" yes-no "'")
575 close opt)))
576 (xmltok+ "<" question "xml"
577 version-att
578 encoding-att
579 standalone-att
580 s* question ">")))
581 (xmltok-defregexp
582 xmltok-prolog
583 (let* ((single-char (xmltok-g single-char "[[|,(\"'>]"))
584 (internal-subset-close (xmltok-g internal-subset-close
585 "][ \t\r\n]*>"))
586 (starts-with-close-paren
587 (xmltok-g close-paren
588 ")"
589 (xmltok-p
590 (xmltok-g close-paren-occur "[+?]")
591 or
592 (xmltok-g close-paren-star "\\*"))
593 opt))
594 (starts-with-percent
595 (xmltok-g percent
596 "%" (xmltok-g param-entity-ref
597 ncname
598 (xmltok-g param-entity-ref-close
599 ";") opt) opt))
600 (starts-with-nmtoken-not-name
601 (xmltok-g nmtoken
602 (xmltok-p name-continue-not-start-char or ":")
603 (xmltok-p name-continue-char or ":") *))
604 (nmtoken-after-colon
605 (xmltok+
606 (xmltok-p name-continue-not-start-char or ":")
607 (xmltok-p name-continue-char or ":") *
608 or
609 name-start-char
610 name-continue-char *
611 ":"
612 (xmltok-p name-continue-char or ":") *))
613 (after-ncname
614 (xmltok+ (xmltok-g ncname-nmtoken
615 ":" (xmltok-p nmtoken-after-colon))
616 or (xmltok-p (xmltok-g colon ":" ncname)
617 (xmltok-g colon-name-occur "[?+*]") opt)
618 or (xmltok-g ncname-occur "[?+*]")
619 or (xmltok-g ncname-colon ":")))
620 (starts-with-name
621 (xmltok-g name ncname (xmltok-p after-ncname) opt))
622 (starts-with-hash
623 (xmltok-g pound
624 "#" (xmltok-g hash-name ncname)))
625 (markup-declaration
626 (xmltok-g markup-declaration
627 "!" (xmltok-p (xmltok-g comment-first-dash "-"
628 (xmltok-g comment-open "-") opt)
629 or (xmltok-g named-markup-declaration
630 ncname)) opt))
631 (after-lt
632 (xmltok+ markup-declaration
633 or (xmltok-g processing-instruction-question
634 question)
635 or (xmltok-g instance-start
636 ncname)))
637 (starts-with-lt (xmltok-g less-than "<" (xmltok-p after-lt) opt)))
638 (xmltok+ starts-with-lt
639 or single-char
640 or starts-with-close-paren
641 or starts-with-percent
642 or starts-with-name
643 or starts-with-nmtoken-not-name
644 or starts-with-hash
645 or internal-subset-close)))))
646
647(defconst xmltok-ncname-regexp (xmltok-ncname regexp))
648
649(defun xmltok-scan-after-lt ()
650 (cond ((not (looking-at (xmltok-after-lt regexp)))
651 (xmltok-add-error "`<' that is not markup must be entered as `&lt;'")
652 (setq xmltok-type 'not-well-formed))
653 (t
654 (goto-char (match-end 0))
655 (cond ((xmltok-after-lt start start-tag-close)
656 (setq xmltok-name-end
657 (xmltok-after-lt end start-tag-name))
658 (setq xmltok-name-colon
659 (xmltok-after-lt start start-tag-colon))
660 (setq xmltok-attributes nil)
661 (setq xmltok-namespace-attributes nil)
662 (setq xmltok-type 'start-tag))
663 ((xmltok-after-lt start end-tag-close)
664 (setq xmltok-name-end
665 (xmltok-after-lt end end-tag-name))
666 (setq xmltok-name-colon
667 (xmltok-after-lt start end-tag-colon))
668 (setq xmltok-type 'end-tag))
669 ((xmltok-after-lt start start-tag-s)
670 (setq xmltok-name-end
671 (xmltok-after-lt end start-tag-name))
672 (setq xmltok-name-colon
673 (xmltok-after-lt start start-tag-colon))
674 (setq xmltok-namespace-attributes nil)
675 (setq xmltok-attributes nil)
676 (xmltok-scan-attributes)
677 xmltok-type)
678 ((xmltok-after-lt start empty-tag-close)
679 (setq xmltok-name-end
680 (xmltok-after-lt end start-tag-name))
681 (setq xmltok-name-colon
682 (xmltok-after-lt start start-tag-colon))
683 (setq xmltok-attributes nil)
684 (setq xmltok-namespace-attributes nil)
685 (setq xmltok-type 'empty-element))
686 ((xmltok-after-lt start cdata-section-open)
687 (setq xmltok-type
688 (if (search-forward "]]>" nil t)
689 'cdata-section
690 (xmltok-add-error "No closing ]]>")
691 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
692 nil
693 nil
694 "]]>")
695 'not-well-formed)))
696 ((xmltok-after-lt start processing-instruction-question)
697 (xmltok-scan-after-processing-instruction-open))
698 ((xmltok-after-lt start comment-open)
699 (xmltok-scan-after-comment-open))
700 ((xmltok-after-lt start empty-tag-slash)
701 (setq xmltok-name-end
702 (xmltok-after-lt end start-tag-name))
703 (setq xmltok-name-colon
704 (xmltok-after-lt start start-tag-colon))
705 (setq xmltok-attributes nil)
706 (setq xmltok-namespace-attributes nil)
707 (xmltok-add-error "Expected `/>'" (1- (point)))
708 (setq xmltok-type 'partial-empty-element))
709 ((xmltok-after-lt start start-tag-name)
710 (xmltok-add-error "Missing `>'"
711 nil
712 (1+ xmltok-start))
713 (setq xmltok-name-end
714 (xmltok-after-lt end start-tag-name))
715 (setq xmltok-name-colon
716 (xmltok-after-lt start start-tag-colon))
717 (setq xmltok-namespace-attributes nil)
718 (setq xmltok-attributes nil)
719 (setq xmltok-type 'partial-start-tag))
720 ((xmltok-after-lt start end-tag-name)
721 (setq xmltok-name-end (xmltok-after-lt end end-tag-name))
722 (setq xmltok-name-colon
723 (xmltok-after-lt start end-tag-colon))
724 (cond ((and (not xmltok-name-colon)
725 (eq (char-after) ?:))
726 (goto-char (1+ (point)))
727 (xmltok-add-error "Expected name following `:'"
728 (1- (point))))
729 (t
730 (xmltok-add-error "Missing `>'"
731 nil
732 (1+ xmltok-start))))
733 (setq xmltok-type 'partial-end-tag))
734 ((xmltok-after-lt start end-tag-slash)
735 (xmltok-add-error "Expected name following `</'")
736 (setq xmltok-name-end nil)
737 (setq xmltok-name-colon nil)
738 (setq xmltok-type 'partial-end-tag))
739 ((xmltok-after-lt start marked-section-open)
740 (xmltok-add-error "Expected `CDATA[' after `<!['"
741 xmltok-start
742 (+ 3 xmltok-start))
743 (setq xmltok-type 'not-well-formed))
744 ((xmltok-after-lt start comment-first-dash)
745 (xmltok-add-error "Expected `-' after `<!-'"
746 xmltok-start
747 (+ 3 xmltok-start))
748 (setq xmltok-type 'not-well-formed))
749 ((xmltok-after-lt start markup-declaration)
750 (xmltok-add-error "Expected `[CDATA[' or `--' after `<!'"
751 xmltok-start
752 (+ 2 xmltok-start))
753 (setq xmltok-type 'not-well-formed))
754 (t
755 (xmltok-add-error "Not well-formed")
756 (setq xmltok-type 'not-well-formed))))))
757
758;; XXX This should be unified with
759;; xmltok-scan-prolog-after-processing-instruction-open
760;; XXX maybe should include rest of line (up to any <,>) in unclosed PI
761(defun xmltok-scan-after-processing-instruction-open ()
762 (cond ((not (search-forward "?>" nil t))
763 (xmltok-add-error "No closing ?>"
764 xmltok-start
765 (+ xmltok-start 2))
766 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
767 nil
768 nil
769 "?>")
770 (setq xmltok-type 'not-well-formed))
771 (t
772 (cond ((not (save-excursion
773 (goto-char (+ 2 xmltok-start))
774 (and (looking-at (xmltok-ncname regexp))
775 (setq xmltok-name-end (match-end 0)))))
776 (setq xmltok-name-end (+ xmltok-start 2))
777 (xmltok-add-error "<? not followed by name"
778 (+ xmltok-start 2)
779 (+ xmltok-start 3)))
780 ((not (or (memq (char-after xmltok-name-end)
781 '(?\n ?\t ?\r ? ))
782 (= xmltok-name-end (- (point) 2))))
783 (xmltok-add-error "Target not followed by whitespace"
784 xmltok-name-end
785 (1+ xmltok-name-end)))
786 ((and (= xmltok-name-end (+ xmltok-start 5))
787 (save-excursion
788 (goto-char (+ xmltok-start 2))
789 (let ((case-fold-search t))
790 (looking-at "xml"))))
791 (xmltok-add-error "Processing instruction target is xml"
792 (+ xmltok-start 2)
793 (+ xmltok-start 5))))
794 (setq xmltok-type 'processing-instruction))))
795
796(defun xmltok-scan-after-comment-open ()
797 (setq xmltok-type
798 (cond ((not (search-forward "--" nil t))
799 (xmltok-add-error "No closing -->")
800 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
801 nil
802 nil
803 ;; not --> because
804 ;; -- is not allowed
805 ;; in comments in XML
806 "--")
807 'not-well-formed)
808 ((eq (char-after) ?>)
809 (goto-char (1+ (point)))
810 'comment)
811 (t
812 (xmltok-add-dependent
813 'xmltok-semi-closed-reparse-p
814 nil
815 (point)
816 "--"
817 2)
818 ;; just include the <!-- in the token
819 (goto-char (+ xmltok-start 4))
820 ;; Need do this after the goto-char because
821 ;; marked error should just apply to <!--
822 (xmltok-add-error "First following `--' not followed by `>'")
823 'not-well-formed))))
824
825(defun xmltok-scan-attributes ()
826 (let ((recovering nil)
827 (atts-needing-normalization nil))
828 (while (cond ((or (looking-at (xmltok-attribute regexp))
829 ;; use non-greedy group
830 (when (looking-at (concat "[^<>\n]+?"
831 (xmltok-attribute regexp)))
832 (unless recovering
833 (xmltok-add-error "Malformed attribute"
834 (point)
835 (save-excursion
836 (goto-char (xmltok-attribute start
837 name))
838 (skip-chars-backward "\r\n\t ")
839 (point))))
840 t))
841 (setq recovering nil)
842 (goto-char (match-end 0))
843 (let ((att (xmltok-add-attribute)))
844 (when att
845 (setq atts-needing-normalization
846 (cons att atts-needing-normalization))))
847 (cond ((xmltok-attribute start start-tag-s) t)
848 ((xmltok-attribute start start-tag-close)
849 (setq xmltok-type 'start-tag)
850 nil)
851 ((xmltok-attribute start empty-tag-close)
852 (setq xmltok-type 'empty-element)
853 nil)
854 ((xmltok-attribute start empty-tag-slash)
855 (setq xmltok-type 'partial-empty-element)
856 (xmltok-add-error "Expected `/>'"
857 (1- (point)))
858 nil)
859 ((looking-at "[ \t\r\n]*[\"']")
860 (goto-char (match-end 0))
861 (xmltok-add-error "Missing closing delimiter"
862 (1- (point)))
863 (setq recovering t)
864 t)
865 ((looking-at "[ \t]*\\([^ \t\r\n\"'=<>/]+\\)[ \t\r\n/>]")
866 (goto-char (match-end 1))
867 (xmltok-add-error "Attribute value not quoted"
868 (match-beginning 1))
869 (setq recovering t)
870 t)
871 (t
872 (xmltok-add-error "Missing attribute value"
873 (1- (point)))
874 (setq recovering t)
875 t)))
876 ((looking-at "[^<>\n]*/>")
877 (let ((start (point)))
878 (goto-char (match-end 0))
879 (unless recovering
880 (xmltok-add-error "Malformed empty-element"
881 start
882 (- (point) 2))))
883 (setq xmltok-type 'empty-element)
884 nil)
885 ((looking-at "[^<>\n]*>")
886 (let ((start (point)))
887 (goto-char (match-end 0))
888 (unless recovering
889 (xmltok-add-error "Malformed start-tag"
890 start
891 (1- (point)))))
892 (setq xmltok-type 'start-tag)
893 nil)
894 (t
895 (when recovering
896 (skip-chars-forward "^<>\n"))
897 (xmltok-add-error "Missing `>'"
898 xmltok-start
899 (1+ xmltok-start))
900 (setq xmltok-type 'partial-start-tag)
901 nil)))
902 (while atts-needing-normalization
903 (xmltok-normalize-attribute (car atts-needing-normalization))
904 (setq atts-needing-normalization (cdr atts-needing-normalization))))
905 (setq xmltok-attributes
906 (nreverse xmltok-attributes))
907 (setq xmltok-namespace-attributes
908 (nreverse xmltok-namespace-attributes)))
909
910(defun xmltok-add-attribute ()
911 "Return the attribute if it needs normalizing, otherwise nil."
912 (let* ((needs-normalizing nil)
913 (att
914 (if (xmltok-attribute start literal)
915 (progn
916 (setq needs-normalizing
917 (or (xmltok-attribute start complex1)
918 (xmltok-attribute start complex2)))
919 (xmltok-make-attribute (xmltok-attribute start name)
920 (xmltok-attribute start colon)
921 (xmltok-attribute end name)
922 (1+ (xmltok-attribute start literal))
923 (1- (xmltok-attribute end literal))
924 (not needs-normalizing)))
925 (xmltok-make-attribute (xmltok-attribute start name)
926 (xmltok-attribute start colon)
927 (xmltok-attribute end name)))))
928 (if (xmltok-attribute start xmlns)
929 (setq xmltok-namespace-attributes
930 (cons att xmltok-namespace-attributes))
931 (setq xmltok-attributes
932 (cons att xmltok-attributes)))
933 (and needs-normalizing
934 att)))
935
936(defun xmltok-normalize-attribute (att)
937 (let ((end (xmltok-attribute-value-end att))
938 (well-formed t)
939 (value-parts nil)
940 (refs nil))
941 (save-excursion
942 (goto-char (xmltok-attribute-value-start att))
943 (while (progn
944 (let ((n (skip-chars-forward "^\r\t\n&" end)))
945 (when (> n 0)
946 (setq value-parts
947 (cons (buffer-substring-no-properties (- (point) n)
948 (point))
949 value-parts))))
950 (when (< (point) end)
951 (goto-char (1+ (point)))
952 (cond ((eq (char-before) ?\&)
953 (let ((xmltok-start (1- (point)))
954 xmltok-type xmltok-replacement)
955 (xmltok-scan-after-amp
956 (lambda (start end)
957 (xmltok-handle-entity start end t)))
958 (cond ((or (eq xmltok-type 'char-ref)
959 (eq xmltok-type 'entity-ref))
960 (setq refs
961 (cons (vector xmltok-type
962 xmltok-start
963 (point))
964 refs))
965 (if xmltok-replacement
966 (setq value-parts
967 (cons xmltok-replacement
968 value-parts))
969 (setq well-formed nil)))
970 (t (setq well-formed nil)))))
971 (t (setq value-parts
972 (cons " " value-parts)))))
973 (< (point) end))))
974 (when well-formed
975 (aset att 5 (apply 'concat (nreverse value-parts))))
976 (aset att 6 (nreverse refs))))
977
978(defun xmltok-scan-after-amp (entity-handler)
979 (cond ((not (looking-at (xmltok-after-amp regexp)))
980 (xmltok-add-error "`&' that is not markup must be entered as `&amp;'")
981 (setq xmltok-type 'not-well-formed))
982 (t
983 (goto-char (match-end 0))
984 (cond ((xmltok-after-amp start entity-ref-close)
985 (funcall entity-handler
986 (xmltok-after-amp start entity-name)
987 (xmltok-after-amp end entity-name))
988 (setq xmltok-type 'entity-ref))
989 ((xmltok-after-amp start decimal-ref-close)
990 (xmltok-scan-char-ref (xmltok-after-amp start decimal)
991 (xmltok-after-amp end decimal)
992 10))
993 ((xmltok-after-amp start hex-ref-close)
994 (xmltok-scan-char-ref (xmltok-after-amp start hex)
995 (xmltok-after-amp end hex)
996 16))
997 ((xmltok-after-amp start number-sign)
998 (xmltok-add-error "Missing character number")
999 (setq xmltok-type 'not-well-formed))
1000 (t
1001 (xmltok-add-error "Missing closing `;'")
1002 (setq xmltok-type 'not-well-formed))))))
1003
1004(defconst xmltok-entity-error-messages
1005 '((unparsed . "Referenced entity is unparsed")
1006 (not-well-formed . "Referenced entity is not well-formed")
1007 (external nil . "Referenced entity is external")
1008 (element nil . "Referenced entity contains <")))
1009
1010(defun xmltok-handle-entity (start end &optional attributep)
1011 (let* ((name (buffer-substring-no-properties start end))
1012 (name-def (assoc name xmltok-dtd))
1013 (def (cdr name-def)))
1014 (cond ((setq xmltok-replacement (and (consp def)
1015 (if attributep
1016 (cdr def)
1017 (car def)))))
1018 ((null name-def)
1019 (unless (eq (car xmltok-dtd) t)
1020 (xmltok-add-error "Referenced entity has not been defined"
1021 start
1022 end)))
1023 ((and attributep (consp def))
1024 (xmltok-add-error "Referenced entity contains <"
1025 start
1026 end))
1027 (t
1028 (let ((err (cdr (assq def xmltok-entity-error-messages))))
1029 (when (consp err)
1030 (setq err (if attributep (cdr err) (car err))))
1031 (when err
1032 (xmltok-add-error err start end)))))))
1033
1034(defun xmltok-scan-char-ref (start end base)
1035 (setq xmltok-replacement
1036 (let ((n (string-to-int (buffer-substring-no-properties start end)
1037 base)))
1038 (cond ((and (integerp n) (xmltok-valid-char-p n))
1039 (setq n (xmltok-unicode-to-char n))
1040 (and n (string n)))
1041 (t
1042 (xmltok-add-error "Invalid character code" start end)
1043 nil))))
1044 (setq xmltok-type 'char-ref))
1045
1046(defun xmltok-char-number (start end)
1047 (let* ((base (if (eq (char-after (+ start 2)) ?x)
1048 16
1049 10))
1050 (n (string-to-int
1051 (buffer-substring-no-properties (+ start (if (= base 16) 3 2))
1052 (1- end))
1053 base)))
1054 (and (integerp n)
1055 (xmltok-valid-char-p n)
1056 n)))
1057
1058(defun xmltok-unclosed-reparse-p (change-start
1059 change-end
1060 pre-change-length
1061 start
1062 end
1063 delimiter)
1064 (let ((len-1 (1- (length delimiter))))
1065 (goto-char (max start (- change-start len-1)))
1066 (search-forward delimiter (min end (+ change-end len-1)) t)))
1067
1068;; Handles a <!-- with the next -- not followed by >
1069
1070(defun xmltok-semi-closed-reparse-p (change-start
1071 change-end
1072 pre-change-length
1073 start
1074 end
1075 delimiter
1076 delimiter-length)
1077 (or (<= (- end delimiter-length) change-end)
1078 (xmltok-unclosed-reparse-p change-start
1079 change-end
1080 pre-change-length
1081 start
1082 end
1083 delimiter)))
1084
1085(defun xmltok-valid-char-p (n)
1086 "Return non-nil if n is the Unicode code of a valid XML character."
1087 (cond ((< n #x20) (memq n '(#xA #xD #x9)))
1088 ((< n #xD800) t)
1089 ((< n #xE000) nil)
1090 ((< n #xFFFE) t)
1091 (t (and (> n #xFFFF)
1092 (< n #x110000)))))
1093
1094(defun xmltok-unicode-to-char (n)
1095 "Return the character corresponding to Unicode scalar value N.
1096Return nil if unsupported in Emacs."
1097 (decode-char 'ucs n))
1098
1099;;; Prolog parsing
1100
1101(defvar xmltok-contains-doctype nil)
1102(defvar xmltok-doctype-external-subset-flag nil)
1103(defvar xmltok-internal-subset-start nil)
1104(defvar xmltok-had-param-entity-ref nil)
1105(defvar xmltok-prolog-regions nil)
1106(defvar xmltok-standalone nil
1107 "Non-nil if there was an XML declaration specifying standalone=\"yes\",")
1108(defvar xmltok-markup-declaration-doctype-flag nil)
1109
1110(defconst xmltok-predefined-entity-alist
1111 '(("lt" "<" . "<")
1112 ("gt" ">" . ">")
1113 ("amp" "&" . "&")
1114 ("apos" "'" . "'")
1115 ("quot" "\"" . "\"")))
1116
1117(defun xmltok-forward-prolog ()
1118 "Move forward to the end of the XML prolog.
1119
1120Returns a list of vectors [TYPE START END] where TYPE is a symbol and
1121START and END are integers giving the start and end of the region of
1122that type. TYPE can be one of xml-declaration,
1123xml-declaration-attribute-name, xml-declaration-attribute-value,
1124comment, processing-instruction-left, processing-instruction-right,
1125markup-declaration-open. markup-declaration-close,
1126internal-subset-open, internal-subset-close, hash-name, keyword,
1127literal, encoding-name.
1128Adds to `xmltok-errors' and `xmltok-dependent-regions' as appropriate."
1129 (let ((case-fold-search nil)
1130 xmltok-start
1131 xmltok-type
1132 xmltok-prolog-regions
1133 xmltok-contains-doctype
1134 xmltok-internal-subset-start
1135 xmltok-had-param-entity-ref
1136 xmltok-standalone
1137 xmltok-doctype-external-subset-flag
1138 xmltok-markup-declaration-doctype-flag)
1139 (setq xmltok-dtd xmltok-predefined-entity-alist)
1140 (xmltok-scan-xml-declaration)
1141 (xmltok-next-prolog-token)
1142 (while (condition-case err
1143 (when (xmltok-parse-prolog-item)
1144 (xmltok-next-prolog-token))
1145 (xmltok-markup-declaration-parse-error
1146 (xmltok-skip-markup-declaration))))
1147 (when xmltok-internal-subset-start
1148 (xmltok-add-error "No closing ]"
1149 (1- xmltok-internal-subset-start)
1150 xmltok-internal-subset-start))
1151 (xmltok-parse-entities)
1152 ;; XXX prune dependent-regions for those entirely in prolog
1153 (nreverse xmltok-prolog-regions)))
1154
1155(defconst xmltok-bad-xml-decl-regexp
1156 "[ \t\r\n]*<\\?xml\\(?:[ \t\r\n]\\|\\?>\\)")
1157
1158;;;###autoload
1159(defun xmltok-get-declared-encoding-position (&optional limit)
1160 "Return the position of the encoding in the XML declaration at point.
1161If there is a well-formed XML declaration starting at point and it
1162contains an encoding declaration, then return (START . END)
1163where START and END are the positions of the start and the end
1164of the encoding name; if there is no encoding declaration return
1165the position where and encoding declaration could be inserted.
1166If there is XML that is not well-formed that looks like an XML declaration,
1167return nil. Otherwise, return t.
1168If LIMIT is non-nil, then do not consider characters beyond LIMIT."
1169 (cond ((let ((case-fold-search nil))
1170 (and (looking-at (xmltok-xml-declaration regexp))
1171 (or (not limit) (<= (match-end 0) limit))))
1172 (let ((end (xmltok-xml-declaration end encoding-value)))
1173 (if end
1174 (cons (1+ (xmltok-xml-declaration start encoding-value))
1175 (1- end))
1176 (or (xmltok-xml-declaration end version-value)
1177 (+ (point) 5)))))
1178 ((not (let ((case-fold-search t))
1179 (looking-at xmltok-bad-xml-decl-regexp))))))
1180
1181(defun xmltok-scan-xml-declaration ()
1182 (when (looking-at (xmltok-xml-declaration regexp))
1183 (xmltok-add-prolog-region 'xml-declaration (point) (match-end 0))
1184 (goto-char (match-end 0))
1185 (when (xmltok-xml-declaration start version-name)
1186 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1187 (xmltok-xml-declaration start version-name)
1188 (xmltok-xml-declaration end version-name))
1189 (let ((start (xmltok-xml-declaration start version-value))
1190 (end (xmltok-xml-declaration end version-value)))
1191 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1192 start
1193 end)))
1194 ;; XXX need to check encoding name
1195 ;; Should start with letter, not contain colon
1196 (when (xmltok-xml-declaration start encoding-name)
1197 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1198 (xmltok-xml-declaration start encoding-name)
1199 (xmltok-xml-declaration end encoding-name))
1200 (let ((start (xmltok-xml-declaration start encoding-value))
1201 (end (xmltok-xml-declaration end encoding-value)))
1202 (xmltok-add-prolog-region 'encoding-name
1203 (1+ start)
1204 (1- end))
1205 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1206 start
1207 end)))
1208 (when (xmltok-xml-declaration start standalone-name)
1209 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1210 (xmltok-xml-declaration start standalone-name)
1211 (xmltok-xml-declaration end standalone-name))
1212 (let ((start (xmltok-xml-declaration start standalone-value))
1213 (end (xmltok-xml-declaration end standalone-value)))
1214 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1215 start
1216 end)
1217 (setq xmltok-standalone
1218 (string= (buffer-substring-no-properties (1+ start) (1- end))
1219 "yes"))))
1220 t))
1221
1222(defconst xmltok-markup-declaration-alist
1223 '(("ELEMENT" . xmltok-parse-element-declaration)
1224 ("ATTLIST" . xmltok-parse-attlist-declaration)
1225 ("ENTITY" . xmltok-parse-entity-declaration)
1226 ("NOTATION" . xmltok-parse-notation-declaration)))
1227
1228(defun xmltok-parse-prolog-item ()
1229 (cond ((eq xmltok-type 'comment)
1230 (xmltok-add-prolog-region 'comment
1231 xmltok-start
1232 (point))
1233 t)
1234 ((eq xmltok-type 'processing-instruction))
1235 ((eq xmltok-type 'named-markup-declaration)
1236 (setq xmltok-markup-declaration-doctype-flag nil)
1237 (xmltok-add-prolog-region 'markup-declaration-open
1238 xmltok-start
1239 (point))
1240 (let* ((name (buffer-substring-no-properties
1241 (+ xmltok-start 2)
1242 (point)))
1243 (fun (cdr (assoc name xmltok-markup-declaration-alist))))
1244 (cond (fun
1245 (unless xmltok-internal-subset-start
1246 (xmltok-add-error
1247 "Declaration allowed only in internal subset"))
1248 (funcall fun))
1249 ((string= name "DOCTYPE")
1250 (xmltok-parse-doctype))
1251 (t
1252 (xmltok-add-error "Unknown markup declaration"
1253 (+ xmltok-start 2))
1254 (xmltok-next-prolog-token)
1255 (xmltok-markup-declaration-parse-error))))
1256 t)
1257 ((or (eq xmltok-type 'end-prolog)
1258 (not xmltok-type))
1259 nil)
1260 ((eq xmltok-type 'internal-subset-close)
1261 (xmltok-add-prolog-region 'internal-subset-close
1262 xmltok-start
1263 (1+ xmltok-start))
1264 (xmltok-add-prolog-region 'markup-declaration-close
1265 (1- (point))
1266 (point))
1267 (if xmltok-internal-subset-start
1268 (setq xmltok-internal-subset-start nil)
1269 (xmltok-add-error "]> outside internal subset"))
1270 t)
1271 ((eq xmltok-type 'param-entity-ref)
1272 (if xmltok-internal-subset-start
1273 (setq xmltok-had-param-entity-ref t)
1274 (xmltok-add-error "Parameter entity reference outside document type declaration"))
1275 t)
1276 ;; If we don't do this, we can get thousands of errors when
1277 ;; a plain text file is parsed.
1278 ((not xmltok-internal-subset-start)
1279 (when (let ((err (car xmltok-errors)))
1280 (or (not err)
1281 (<= (xmltok-error-end err) xmltok-start)))
1282 (goto-char xmltok-start))
1283 nil)
1284 ((eq xmltok-type 'not-well-formed) t)
1285 (t
1286 (xmltok-add-error "Token allowed only inside markup declaration")
1287 t)))
1288
1289(defun xmltok-parse-doctype ()
1290 (setq xmltok-markup-declaration-doctype-flag t)
1291 (xmltok-next-prolog-token)
1292 (when xmltok-internal-subset-start
1293 (xmltok-add-error "DOCTYPE declaration not allowed in internal subset")
1294 (xmltok-markup-declaration-parse-error))
1295 (when xmltok-contains-doctype
1296 (xmltok-add-error "Duplicate DOCTYPE declaration")
1297 (xmltok-markup-declaration-parse-error))
1298 (setq xmltok-contains-doctype t)
1299 (xmltok-require-token 'name 'prefixed-name)
1300 (xmltok-require-next-token "SYSTEM" "PUBLIC" ?\[ ?>)
1301 (cond ((eq xmltok-type ?\[)
1302 (setq xmltok-internal-subset-start (point)))
1303 ((eq xmltok-type ?>))
1304 (t
1305 (setq xmltok-doctype-external-subset-flag t)
1306 (xmltok-parse-external-id)
1307 (xmltok-require-token ?\[ ?>)
1308 (when (eq xmltok-type ?\[)
1309 (setq xmltok-internal-subset-start (point))))))
1310
1311(defun xmltok-parse-attlist-declaration ()
1312 (xmltok-require-next-token 'prefixed-name 'name)
1313 (while (progn
1314 (xmltok-require-next-token ?> 'name 'prefixed-name)
1315 (if (eq xmltok-type ?>)
1316 nil
1317 (xmltok-require-next-token ?\(
1318 "CDATA"
1319 "ID"
1320 "IDREF"
1321 "IDREFS"
1322 "ENTITY"
1323 "ENTITIES"
1324 "NMTOKEN"
1325 "NMTOKENS"
1326 "NOTATION")
1327 (cond ((eq xmltok-type ?\()
1328 (xmltok-parse-nmtoken-group))
1329 ((string= (xmltok-current-token-string)
1330 "NOTATION")
1331 (xmltok-require-next-token ?\()
1332 (xmltok-parse-nmtoken-group)))
1333 (xmltok-require-next-token "#IMPLIED"
1334 "#REQUIRED"
1335 "#FIXED"
1336 'literal)
1337 (when (string= (xmltok-current-token-string) "#FIXED")
1338 (xmltok-require-next-token 'literal))
1339 t))))
1340
1341(defun xmltok-parse-nmtoken-group ()
1342 (while (progn
1343 (xmltok-require-next-token 'nmtoken 'prefixed-name 'name)
1344 (xmltok-require-next-token ?| ?\))
1345 (eq xmltok-type ?|))))
1346
1347(defun xmltok-parse-element-declaration ()
1348 (xmltok-require-next-token 'name 'prefixed-name)
1349 (xmltok-require-next-token "EMPTY" "ANY" ?\()
1350 (when (eq xmltok-type ?\()
1351 (xmltok-require-next-token "#PCDATA"
1352 'name
1353 'prefixed-name
1354 'name-occur
1355 ?\()
1356 (cond ((eq xmltok-type 'hash-name)
1357 (xmltok-require-next-token ?| ?\) 'close-paren-star)
1358 (while (eq xmltok-type ?|)
1359 (xmltok-require-next-token 'name 'prefixed-name)
1360 (xmltok-require-next-token 'close-paren-star ?|)))
1361 (t (xmltok-parse-model-group))))
1362 (xmltok-require-next-token ?>))
1363
1364(defun xmltok-parse-model-group ()
1365 (xmltok-parse-model-group-member)
1366 (xmltok-require-next-token ?|
1367 ?,
1368 ?\)
1369 'close-paren-star
1370 'close-paren-occur)
1371 (when (memq xmltok-type '(?, ?|))
1372 (let ((connector xmltok-type))
1373 (while (progn
1374 (xmltok-next-prolog-token)
1375 (xmltok-parse-model-group-member)
1376 (xmltok-require-next-token connector
1377 ?\)
1378 'close-paren-star
1379 'close-paren-occur)
1380 (eq xmltok-type connector))))))
1381
1382(defun xmltok-parse-model-group-member ()
1383 (xmltok-require-token 'name
1384 'prefixed-name
1385 'name-occur
1386 ?\()
1387 (when (eq xmltok-type ?\()
1388 (xmltok-next-prolog-token)
1389 (xmltok-parse-model-group)))
1390
1391(defun xmltok-parse-entity-declaration ()
1392 (let (paramp name)
1393 (xmltok-require-next-token 'name ?%)
1394 (when (eq xmltok-type ?%)
1395 (setq paramp t)
1396 (xmltok-require-next-token 'name))
1397 (setq name (xmltok-current-token-string))
1398 (xmltok-require-next-token 'literal "SYSTEM" "PUBLIC")
1399 (cond ((eq xmltok-type 'literal)
1400 (let ((replacement (xmltok-parse-entity-value)))
1401 (unless paramp
1402 (xmltok-define-entity name replacement)))
1403 (xmltok-require-next-token ?>))
1404 (t
1405 (xmltok-parse-external-id)
1406 (if paramp
1407 (xmltok-require-token ?>)
1408 (xmltok-require-token ?> "NDATA")
1409 (if (eq xmltok-type ?>)
1410 (xmltok-define-entity name 'external)
1411 (xmltok-require-next-token 'name)
1412 (xmltok-require-next-token ?>)
1413 (xmltok-define-entity name 'unparsed)))))))
1414
1415(defun xmltok-define-entity (name value)
1416 (when (and (or (not xmltok-had-param-entity-ref)
1417 xmltok-standalone)
1418 (not (assoc name xmltok-dtd)))
1419 (setq xmltok-dtd
1420 (cons (cons name value) xmltok-dtd))))
1421
1422(defun xmltok-parse-entity-value ()
1423 (let ((lim (1- (point)))
1424 (well-formed t)
1425 value-parts
1426 start)
1427 (save-excursion
1428 (goto-char (1+ xmltok-start))
1429 (setq start (point))
1430 (while (progn
1431 (skip-chars-forward "^%&" lim)
1432 (when (< (point) lim)
1433 (goto-char (1+ (point)))
1434 (cond ((eq (char-before) ?%)
1435 (xmltok-add-error "Parameter entity references are not allowed in the internal subset"
1436 (1- (point))
1437 (point))
1438 (setq well-formed nil))
1439 (t
1440 (let ((xmltok-start (1- (point)))
1441 xmltok-type xmltok-replacement)
1442 (xmltok-scan-after-amp (lambda (start end)))
1443 (cond ((eq xmltok-type 'char-ref)
1444 (setq value-parts
1445 (cons (buffer-substring-no-properties
1446 start
1447 xmltok-start)
1448 value-parts))
1449 (setq value-parts
1450 (cons xmltok-replacement
1451 value-parts))
1452 (setq start (point)))
1453 ((eq xmltok-type 'not-well-formed)
1454 (setq well-formed nil))))))
1455 t))))
1456 (if (not well-formed)
1457 nil
1458 (apply 'concat
1459 (nreverse (cons (buffer-substring-no-properties start lim)
1460 value-parts))))))
1461
1462(defun xmltok-parse-notation-declaration ()
1463 (xmltok-require-next-token 'name)
1464 (xmltok-require-next-token "SYSTEM" "PUBLIC")
1465 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1466 (xmltok-require-next-token 'literal)
1467 (cond (publicp
1468 (xmltok-require-next-token 'literal ?>)
1469 (unless (eq xmltok-type ?>)
1470 (xmltok-require-next-token ?>)))
1471 (t (xmltok-require-next-token ?>)))))
1472
1473(defun xmltok-parse-external-id ()
1474 (xmltok-require-token "SYSTEM" "PUBLIC")
1475 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1476 (xmltok-require-next-token 'literal)
1477 (when publicp
1478 (xmltok-require-next-token 'literal)))
1479 (xmltok-next-prolog-token))
1480
1481(defun xmltok-require-next-token (&rest types)
1482 (xmltok-next-prolog-token)
1483 (apply 'xmltok-require-token types))
1484
1485(defun xmltok-require-token (&rest types)
1486 ;; XXX Generate a more helpful error message
1487 (while (and (not (let ((type (car types)))
1488 (if (stringp (car types))
1489 (string= (xmltok-current-token-string) type)
1490 (eq type xmltok-type))))
1491 (setq types (cdr types))))
1492 (unless types
1493 (when (and xmltok-type
1494 (not (eq xmltok-type 'not-well-formed)))
1495 (xmltok-add-error "Unexpected token"))
1496 (xmltok-markup-declaration-parse-error))
1497 (let ((region-type (xmltok-prolog-region-type (car types))))
1498 (when region-type
1499 (xmltok-add-prolog-region region-type
1500 xmltok-start
1501 (point)))))
1502
1503(defun xmltok-current-token-string ()
1504 (buffer-substring-no-properties xmltok-start (point)))
1505
1506(put 'xmltok-markup-declaration-parse-error
1507 'error-conditions
1508 '(error xmltok-markup-declaration-parse-error))
1509
1510(put 'xmltok-markup-declaration-parse-error
1511 'error-message
1512 "Syntax error in markup declaration")
1513
1514(defun xmltok-markup-declaration-parse-error ()
1515 (signal 'xmltok-markup-declaration-parse-error nil))
1516
1517(defun xmltok-skip-markup-declaration ()
1518 (while (cond ((eq xmltok-type ?>)
1519 (xmltok-next-prolog-token)
1520 nil)
1521 ((and xmltok-markup-declaration-doctype-flag
1522 (eq xmltok-type ?\[))
1523 (setq xmltok-internal-subset-start (point))
1524 (xmltok-next-prolog-token)
1525 nil)
1526 ((memq xmltok-type '(nil
1527 end-prolog
1528 named-markup-declaration
1529 comment
1530 processing-instruction))
1531 nil)
1532 ((and xmltok-internal-subset-start
1533 (eq xmltok-type 'internal-subset-close))
1534 nil)
1535 (t (xmltok-next-prolog-token) t)))
1536 xmltok-type)
1537
1538(defun xmltok-prolog-region-type (required)
1539 (cond ((cdr (assq xmltok-type
1540 '((literal . literal)
1541 (?> . markup-declaration-close)
1542 (?\[ . internal-subset-open)
1543 (hash-name . hash-name)))))
1544 ((and (stringp required) (eq xmltok-type 'name))
1545 'keyword)))
1546
1547;; Return new token type.
1548
1549(defun xmltok-next-prolog-token ()
1550 (skip-chars-forward " \t\r\n")
1551 (setq xmltok-start (point))
1552 (cond ((not (and (looking-at (xmltok-prolog regexp))
1553 (goto-char (match-end 0))))
1554 (let ((ch (char-after)))
1555 (cond (ch
1556 (goto-char (1+ (point)))
1557 (xmltok-add-error "Illegal char in prolog")
1558 (setq xmltok-type 'not-well-formed))
1559 (t (setq xmltok-type nil)))))
1560 ((or (xmltok-prolog start ncname-occur)
1561 (xmltok-prolog start colon-name-occur))
1562 (setq xmltok-name-end (1- (point)))
1563 (setq xmltok-name-colon (xmltok-prolog start colon))
1564 (setq xmltok-type 'name-occur))
1565 ((xmltok-prolog start colon)
1566 (setq xmltok-name-end (point))
1567 (setq xmltok-name-colon (xmltok-prolog start colon))
1568 (unless (looking-at "[ \t\r\n>),|[%]")
1569 (xmltok-add-error "Missing space after name"))
1570 (setq xmltok-type 'prefixed-name))
1571 ((or (xmltok-prolog start ncname-nmtoken)
1572 (xmltok-prolog start ncname-colon))
1573 (unless (looking-at "[ \t\r\n>),|[%]")
1574 (xmltok-add-error "Missing space after name token"))
1575 (setq xmltok-type 'nmtoken))
1576 ((xmltok-prolog start name)
1577 (setq xmltok-name-end (point))
1578 (setq xmltok-name-colon nil)
1579 (unless (looking-at "[ \t\r\n>),|[%]")
1580 (xmltok-add-error "Missing space after name"))
1581 (setq xmltok-type 'name))
1582 ((xmltok-prolog start hash-name)
1583 (setq xmltok-name-end (point))
1584 (unless (looking-at "[ \t\r\n>)|%]")
1585 (xmltok-add-error "Missing space after name"))
1586 (setq xmltok-type 'hash-name))
1587 ((xmltok-prolog start processing-instruction-question)
1588 (xmltok-scan-prolog-after-processing-instruction-open))
1589 ((xmltok-prolog start comment-open)
1590 ;; XXX if not-well-formed, ignore some stuff
1591 (xmltok-scan-after-comment-open))
1592 ((xmltok-prolog start named-markup-declaration)
1593 (setq xmltok-type 'named-markup-declaration))
1594 ((xmltok-prolog start instance-start)
1595 (goto-char xmltok-start)
1596 (setq xmltok-type 'end-prolog))
1597 ((xmltok-prolog start close-paren-star)
1598 (setq xmltok-type 'close-paren-star))
1599 ((xmltok-prolog start close-paren-occur)
1600 (setq xmltok-type 'close-paren-occur))
1601 ((xmltok-prolog start close-paren)
1602 (unless (looking-at "[ \t\r\n>,|)]")
1603 (xmltok-add-error "Missing space after )"))
1604 (setq xmltok-type ?\)))
1605 ((xmltok-prolog start single-char)
1606 (let ((ch (char-before)))
1607 (cond ((memq ch '(?\" ?\'))
1608 (xmltok-scan-prolog-literal))
1609 (t (setq xmltok-type ch)))))
1610 ((xmltok-prolog start percent)
1611 (cond ((xmltok-prolog start param-entity-ref-close)
1612 (setq xmltok-name-end (1- (point)))
1613 (setq xmltok-type 'param-entity-ref))
1614 ((xmltok-prolog start param-entity-ref)
1615 (xmltok-add-error "Missing ;")
1616 (setq xmltok-name-end (point))
1617 (setq xmltok-type 'param-entity-ref))
1618 ((looking-at "[ \t\r\n%]")
1619 (setq xmltok-type ?%))
1620 (t
1621 (xmltok-add-error "Expected name after %")
1622 (setq xmltok-type 'not-well-formed))))
1623 ((xmltok-prolog start nmtoken)
1624 (unless (looking-at "[ \t\r\n>),|[%]")
1625 (xmltok-add-error "Missing space after name token"))
1626 (setq xmltok-type 'nmtoken))
1627 ((xmltok-prolog start internal-subset-close)
1628 (setq xmltok-type 'internal-subset-close))
1629 ((xmltok-prolog start pound)
1630 (xmltok-add-error "Expected name after #")
1631 (setq xmltok-type 'not-well-formed))
1632 ((xmltok-prolog start markup-declaration)
1633 (xmltok-add-error "Expected name or -- after <!")
1634 (setq xmltok-type 'not-well-formed))
1635 ((xmltok-prolog start comment-first-dash)
1636 (xmltok-add-error "Expected <!--")
1637 (setq xmltok-type 'not-well-formed))
1638 ((xmltok-prolog start less-than)
1639 (xmltok-add-error "Incomplete markup")
1640 (setq xmltok-type 'not-well-formed))
1641 (t (error "Unhandled token in prolog %s"
1642 (match-string-no-properties 0)))))
1643
1644(defun xmltok-scan-prolog-literal ()
1645 (let* ((delim (string (char-before)))
1646 (safe-end (save-excursion
1647 (skip-chars-forward (concat "^<>[]" delim))
1648 (point)))
1649 (end (save-excursion
1650 (goto-char safe-end)
1651 (search-forward delim nil t))))
1652 (or (cond ((not end)
1653 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1654 nil
1655 nil
1656 delim)
1657 nil)
1658 ((save-excursion
1659 (goto-char end)
1660 (looking-at "[ \t\r\n>%[]"))
1661 (goto-char end)
1662 (setq xmltok-type 'literal))
1663 ((eq (1+ safe-end) end)
1664 (goto-char end)
1665 (xmltok-add-error (format "Missing space after %s" delim)
1666 safe-end)
1667 (setq xmltok-type 'literal))
1668 (t
1669 (xmltok-add-dependent 'xmltok-semi-closed-reparse-p
1670 xmltok-start
1671 (1+ end)
1672 delim
1673 1)
1674 nil))
1675 (progn
1676 (xmltok-add-error (format "Missing closing %s" delim))
1677 (goto-char safe-end)
1678 (skip-chars-backward " \t\r\n")
1679 (setq xmltok-type 'not-well-formed)))))
1680
1681(defun xmltok-scan-prolog-after-processing-instruction-open ()
1682 (cond ((not (search-forward "?>" nil t))
1683 (xmltok-add-error "No closing ?>"
1684 xmltok-start
1685 (+ xmltok-start 2))
1686 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1687 nil
1688 nil
1689 "?>")
1690 (setq xmltok-type 'not-well-formed))
1691 (t
1692 (let* ((end (point))
1693 (target
1694 (save-excursion
1695 (goto-char (+ xmltok-start 2))
1696 (and (looking-at (xmltok-ncname regexp))
1697 (or (memq (char-after (match-end 0))
1698 '(?\n ?\t ?\r ? ))
1699 (= (match-end 0) (- end 2)))
1700 (match-string-no-properties 0)))))
1701 (cond ((not target)
1702 (xmltok-add-error "\
1703Processing instruction does not start with a name"
1704 (+ xmltok-start 2)
1705 (+ xmltok-start 3)))
1706 ((not (and (= (length target) 3)
1707 (let ((case-fold-search t))
1708 (string-match "xml" target)))))
1709 ((= xmltok-start 1)
1710 (xmltok-add-error "Invalid XML declaration"
1711 xmltok-start
1712 (point)))
1713 ((save-excursion
1714 (goto-char xmltok-start)
1715 (looking-at (xmltok-xml-declaration regexp)))
1716 (xmltok-add-error "XML declaration not at beginning of file"
1717 xmltok-start
1718 (point)))
1719 (t
1720 (xmltok-add-error "Processing instruction has target of xml"
1721 (+ xmltok-start 2)
1722 (+ xmltok-start 5))))
1723 (xmltok-add-prolog-region 'processing-instruction-left
1724 xmltok-start
1725 (+ xmltok-start
1726 2
1727 (if target
1728 (length target)
1729 0)))
1730 (xmltok-add-prolog-region 'processing-instruction-right
1731 (if target
1732 (save-excursion
1733 (goto-char (+ xmltok-start
1734 (length target)
1735 2))
1736 (skip-chars-forward " \t\r\n")
1737 (point))
1738 (+ xmltok-start 2))
1739 (point)))
1740 (setq xmltok-type 'processing-instruction))))
1741
1742(defun xmltok-parse-entities ()
1743 (let ((todo xmltok-dtd))
1744 (when (and (or xmltok-had-param-entity-ref
1745 xmltok-doctype-external-subset-flag)
1746 (not xmltok-standalone))
1747 (setq xmltok-dtd (cons t xmltok-dtd)))
1748 (while todo
1749 (xmltok-parse-entity (car todo))
1750 (setq todo (cdr todo)))))
1751
1752(defun xmltok-parse-entity (name-def)
1753 (let ((def (cdr name-def))
1754 ;; in case its value is buffer local
1755 (xmltok-dtd xmltok-dtd)
1756 buf)
1757 (when (stringp def)
1758 (if (string-match "\\`[^&<\t\r\n]*\\'" def)
1759 (setcdr name-def (cons def def))
1760 (setcdr name-def 'not-well-formed) ; avoid infinite expansion loops
1761 (setq buf (get-buffer-create
1762 (format " *Entity %s*" (car name-def))))
1763 (save-excursion
1764 (set-buffer buf)
1765 (erase-buffer)
1766 (insert def)
1767 (goto-char (point-min))
1768 (setcdr name-def
1769 (xmltok-parse-entity-replacement)))
1770 (kill-buffer buf)))))
1771
1772(defun xmltok-parse-entity-replacement ()
1773 (let ((def (cons "" "")))
1774 (while (let* ((start (point))
1775 (found (re-search-forward "[<&\t\r\n]\\|]]>" nil t))
1776 (ch (and found (char-before)))
1777 (str (buffer-substring-no-properties
1778 start
1779 (if found
1780 (match-beginning 0)
1781 (point-max)))))
1782 (setq def
1783 (xmltok-append-entity-def def
1784 (cons str str)))
1785 (cond ((not found) nil)
1786 ((eq ch ?>)
1787 (setq def 'not-well-formed)
1788 nil)
1789 ((eq ch ?<)
1790 (xmltok-save
1791 (setq xmltok-start (1- (point)))
1792 (xmltok-scan-after-lt)
1793 (setq def
1794 (xmltok-append-entity-def
1795 def
1796 (cond ((memq xmltok-type
1797 '(start-tag
1798 end-tag
1799 empty-element))
1800 'element)
1801 ((memq xmltok-type
1802 '(comment
1803 processing-instruction))
1804 (cons "" nil))
1805 ((eq xmltok-type
1806 'cdata-section)
1807 (cons (buffer-substring-no-properties
1808 (+ xmltok-start 9)
1809 (- (point) 3))
1810 nil))
1811 (t 'not-well-formed)))))
1812 t)
1813 ((eq ch ?&)
1814 (let ((xmltok-start (1- (point)))
1815 xmltok-type
1816 xmltok-replacement
1817 xmltok-errors)
1818 (xmltok-scan-after-amp 'xmltok-handle-nested-entity)
1819 (cond ((eq xmltok-type 'entity-ref)
1820 (setq def
1821 (xmltok-append-entity-def
1822 def
1823 xmltok-replacement)))
1824 ((eq xmltok-type 'char-ref)
1825 (setq def
1826 (xmltok-append-entity-def
1827 def
1828 (if xmltok-replacement
1829 (cons xmltok-replacement
1830 xmltok-replacement)
1831 (and xmltok-errors 'not-well-formed)))))
1832 (t
1833 (setq def 'not-well-formed))))
1834 t)
1835 (t
1836 (setq def
1837 (xmltok-append-entity-def
1838 def
1839 (cons (match-string-no-properties 0)
1840 " ")))
1841 t))))
1842 def))
1843
1844(defun xmltok-handle-nested-entity (start end)
1845 (let* ((name-def (assoc (buffer-substring-no-properties start end)
1846 xmltok-dtd))
1847 (def (cdr name-def)))
1848 (when (stringp def)
1849 (xmltok-parse-entity name-def)
1850 (setq def (cdr name-def)))
1851 (setq xmltok-replacement
1852 (cond ((null name-def)
1853 (if (eq (car xmltok-dtd) t)
1854 nil
1855 'not-well-formed))
1856 ((eq def 'unparsed) 'not-well-formed)
1857 (t def)))))
1858
1859(defun xmltok-append-entity-def (d1 d2)
1860 (cond ((consp d1)
1861 (if (consp d2)
1862 (cons (concat (car d1) (car d2))
1863 (and (cdr d1)
1864 (cdr d2)
1865 (concat (cdr d1) (cdr d2))))
1866 d2))
1867 ((consp d2) d1)
1868 (t
1869 (let ((defs '(not-well-formed external element)))
1870 (while (not (or (eq (car defs) d1)
1871 (eq (car defs) d2)))
1872 (setq defs (cdr defs)))
1873 (car defs)))))
1874
1875(defun xmltok-add-prolog-region (type start end)
1876 (setq xmltok-prolog-regions
1877 (cons (vector type start end)
1878 xmltok-prolog-regions)))
1879
1880(defun xmltok-merge-attributes ()
1881 "Return a list merging `xmltok-attributes' and 'xmltok-namespace-attributes'.
1882The members of the merged list are in order of occurrence in the
1883document. The list may share list structure with `xmltok-attributes'
1884and `xmltok-namespace-attributes'."
1885 (cond ((not xmltok-namespace-attributes)
1886 xmltok-attributes)
1887 ((not xmltok-attributes)
1888 xmltok-namespace-attributes)
1889 (t
1890 (let ((atts1 xmltok-attributes)
1891 (atts2 xmltok-namespace-attributes)
1892 merged)
1893 (while (and atts1 atts2)
1894 (cond ((< (xmltok-attribute-name-start (car atts1))
1895 (xmltok-attribute-name-start (car atts2)))
1896 (setq merged (cons (car atts1) merged))
1897 (setq atts1 (cdr atts1)))
1898 (t
1899 (setq merged (cons (car atts2) merged))
1900 (setq atts2 (cdr atts2)))))
1901 (setq merged (nreverse merged))
1902 (cond (atts1 (setq merged (nconc merged atts1)))
1903 (atts2 (setq merged (nconc merged atts2))))
1904 merged))))
1905
1906;;; Testing
1907
1908(defun xmltok-forward-test ()
1909 (interactive)
1910 (if (xmltok-forward)
1911 (message "Scanned %s" xmltok-type)
1912 (message "Scanned nothing")))
1913
1914(defun xmltok-next-prolog-token-test ()
1915 (interactive)
1916 (if (xmltok-next-prolog-token)
1917 (message "Scanned %s"
1918 (if (integerp xmltok-type)
1919 (string xmltok-type)
1920 xmltok-type))
1921 (message "Scanned end of file")))
1922
1923(provide 'xmltok)
1924
ab4c34c6 1925;; arch-tag: 747e5f3a-6fc3-4f8d-bd96-89f05aa99f5e
8cd39fb3 1926;;; xmltok.el ends here