(Lisp changes in 21.4): document (match-data t) change.
[bpt/emacs.git] / lisp / xml.el
CommitLineData
1cd7adc6 1;;; xml.el --- XML parser
47db06aa 2
2e9bdf15 3;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc.
47db06aa
GM
4
5;; Author: Emmanuel Briot <briot@gnat.com>
720058f2 6;; Maintainer: Mark A. Hershberger <mah@everybody.org>
a98e819b 7;; Keywords: xml, data
47db06aa
GM
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
25
26;;; Commentary:
27
a98e819b
DL
28;; This file contains a somewhat incomplete non-validating XML parser. It
29;; parses a file, and returns a list that can be used internally by
a1dfa9a3 30;; any other Lisp libraries.
47db06aa
GM
31
32;;; FILE FORMAT
33
a98e819b
DL
34;; The document type declaration may either be ignored or (optionally)
35;; parsed, but currently the parsing will only accept element
a1dfa9a3 36;; declarations. The XML file is assumed to be well-formed. In case
a98e819b
DL
37;; of error, the parsing stops and the XML file is shown where the
38;; parsing stopped.
47db06aa 39;;
a98e819b 40;; It also knows how to ignore comments and processing instructions.
47db06aa
GM
41;;
42;; The XML file should have the following format:
653558a1
GM
43;; <node1 attr1="name1" attr2="name2" ...>value
44;; <node2 attr3="name3" attr4="name4">value2</node2>
45;; <node3 attr5="name5" attr6="name6">value3</node3>
47db06aa 46;; </node1>
a1dfa9a3 47;; Of course, the name of the nodes and attributes can be anything. There can
47db06aa
GM
48;; be any number of attributes (or none), as well as any number of children
49;; below the nodes.
50;;
51;; There can be only top level node, but with any number of children below.
52
53;;; LIST FORMAT
54
c7f8d055
SM
55;; The functions `xml-parse-file', `xml-parse-region' and
56;; `xml-parse-tag' return a list with the following format:
47db06aa
GM
57;;
58;; xml-list ::= (node node ...)
c7f8d055 59;; node ::= (qname attribute-list . child_node_list)
47db06aa
GM
60;; child_node_list ::= child_node child_node ...
61;; child_node ::= node | string
c7f8d055
SM
62;; qname ::= (:namespace-uri . "name") | "name"
63;; attribute_list ::= ((qname . "value") (qname . "value") ...)
47db06aa
GM
64;; | nil
65;; string ::= "..."
66;;
a98e819b
DL
67;; Some macros are provided to ease the parsing of this list.
68;; Whitespace is preserved. Fixme: There should be a tree-walker that
69;; can remove it.
47db06aa 70
c7f8d055
SM
71;; TODO:
72;; * xml:base, xml:space support
73;; * more complete DOCTYPE parsing
74;; * pi support
75
47db06aa
GM
76;;; Code:
77
a98e819b
DL
78;; Note that {buffer-substring,match-string}-no-properties were
79;; formerly used in several places, but that removes composition info.
80
47db06aa
GM
81;;*******************************************************************
82;;**
83;;** Macros to parse the list
84;;**
85;;*******************************************************************
86
971489ea 87(defsubst xml-node-name (node)
47db06aa 88 "Return the tag associated with NODE.
a1dfa9a3
SM
89Without namespace-aware parsing, the tag is a symbol.
90
91With namespace-aware parsing, the tag is a cons of a string
92representing the uri of the namespace with the local name of the
93tag. For example,
94
95 <foo>
96
97would be represented by
98
99 '(\"\" . \"foo\")."
100
971489ea 101 (car node))
47db06aa 102
971489ea 103(defsubst xml-node-attributes (node)
47db06aa
GM
104 "Return the list of attributes of NODE.
105The list can be nil."
971489ea 106 (nth 1 node))
47db06aa 107
971489ea 108(defsubst xml-node-children (node)
47db06aa
GM
109 "Return the list of children of NODE.
110This is a list of nodes, and it can be nil."
971489ea 111 (cddr node))
47db06aa
GM
112
113(defun xml-get-children (node child-name)
114 "Return the children of NODE whose tag is CHILD-NAME.
a1dfa9a3 115CHILD-NAME should match the value returned by `xml-node-name'."
971489ea
SM
116 (let ((match ()))
117 (dolist (child (xml-node-children node))
a1dfa9a3
SM
118 (if (and (listp child)
119 (equal (xml-node-name child) child-name))
120 (push child match)))
971489ea 121 (nreverse match)))
47db06aa 122
9bcd6a7e 123(defun xml-get-attribute-or-nil (node attribute)
47db06aa 124 "Get from NODE the value of ATTRIBUTE.
a1dfa9a3 125Return nil if the attribute was not found.
9bcd6a7e
EZ
126
127See also `xml-get-attribute'."
2e9bdf15 128 (cdr (assoc attribute (xml-node-attributes node))))
9bcd6a7e
EZ
129
130(defsubst xml-get-attribute (node attribute)
131 "Get from NODE the value of ATTRIBUTE.
132An empty string is returned if the attribute was not found.
133
134See also `xml-get-attribute-or-nil'."
135 (or (xml-get-attribute-or-nil node attribute) ""))
47db06aa
GM
136
137;;*******************************************************************
138;;**
139;;** Creating the list
140;;**
141;;*******************************************************************
142
a98e819b 143;;;###autoload
2d42509a 144(defun xml-parse-file (file &optional parse-dtd parse-ns)
a98e819b
DL
145 "Parse the well-formed XML file FILE.
146If FILE is already visited, use its buffer and don't kill it.
47db06aa 147Returns the top node with all its children.
2d42509a
JB
148If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
149If PARSE-NS is non-nil, then QNAMES are expanded."
653558a1
GM
150 (let ((keep))
151 (if (get-file-buffer file)
152 (progn
153 (set-buffer (get-file-buffer file))
154 (setq keep (point)))
a98e819b
DL
155 (let (auto-mode-alist) ; no need for xml-mode
156 (find-file file)))
524425ae 157
653558a1
GM
158 (let ((xml (xml-parse-region (point-min)
159 (point-max)
160 (current-buffer)
2d42509a 161 parse-dtd parse-ns)))
653558a1
GM
162 (if keep
163 (goto-char keep)
164 (kill-buffer (current-buffer)))
165 xml)))
47db06aa 166
a98e819b
DL
167;; Note that this is setup so that we can do whitespace-skipping with
168;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
169;; compared with `re-search-forward', but that has been fixed. Also
170;; note that the standard syntax table contains other characters with
171;; whitespace syntax, like NBSP, but they are invalid in contexts in
172;; which we might skip whitespace -- specifically, they're not
173;; NameChars [XML 4].
174
175(defvar xml-syntax-table
176 (let ((table (make-syntax-table)))
177 ;; Get space syntax correct per XML [3].
178 (dotimes (c 31)
179 (modify-syntax-entry c "." table)) ; all are space in standard table
180 (dolist (c '(?\t ?\n ?\r)) ; these should be space
181 (modify-syntax-entry c " " table))
182 ;; For skipping attributes.
183 (modify-syntax-entry ?\" "\"" table)
184 (modify-syntax-entry ?' "\"" table)
185 ;; Non-alnum name chars should be symbol constituents (`-' and `_'
186 ;; are OK by default).
187 (modify-syntax-entry ?. "_" table)
188 (modify-syntax-entry ?: "_" table)
189 ;; XML [89]
190 (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
191 #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
192 #x30FD #x30FE))
193 (modify-syntax-entry (decode-char 'ucs c) "w" table))
194 ;; Fixme: rest of [4]
195 table)
196 "Syntax table used by `xml-parse-region'.")
197
198;; XML [5]
199;; Note that [:alpha:] matches all multibyte chars with word syntax.
ab161457
JPW
200(eval-and-compile
201 (defconst xml-name-regexp "[[:alpha:]_:][[:alnum:]._:-]*"))
a98e819b
DL
202
203;; Fixme: This needs re-writing to deal with the XML grammar properly, i.e.
204;; document ::= prolog element Misc*
205;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
206
207;;;###autoload
2d42509a 208(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns)
47db06aa
GM
209 "Parse the region from BEG to END in BUFFER.
210If BUFFER is nil, it defaults to the current buffer.
211Returns the XML list for the region, or raises an error if the region
2d42509a 212is not well-formed XML.
47db06aa 213If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
2d42509a
JB
214and returned as the first element of the list.
215If PARSE-NS is non-nil, then QNAMES are expanded."
a98e819b
DL
216 (save-restriction
217 (narrow-to-region beg end)
218 ;; Use fixed syntax table to ensure regexp char classes and syntax
219 ;; specs DTRT.
220 (with-syntax-table (standard-syntax-table)
221 (let ((case-fold-search nil) ; XML is case-sensitive.
222 xml result dtd)
223 (save-excursion
224 (if buffer
225 (set-buffer buffer))
226 (goto-char (point-min))
227 (while (not (eobp))
228 (if (search-forward "<" nil t)
229 (progn
230 (forward-char -1)
34638996
EZ
231 (setq result (xml-parse-tag parse-dtd parse-ns))
232 (if (and xml result)
a98e819b
DL
233 ;; translation of rule [1] of XML specifications
234 (error "XML files can have only one toplevel tag")
47db06aa 235 (cond
971489ea 236 ((null result))
34638996
EZ
237 ((and (listp (car result))
238 parse-dtd)
971489ea 239 (setq dtd (car result))
a98e819b
DL
240 (if (cdr result) ; possible leading comment
241 (add-to-list 'xml (cdr result))))
47db06aa 242 (t
a98e819b
DL
243 (add-to-list 'xml result)))))
244 (goto-char (point-max))))
245 (if parse-dtd
246 (cons dtd (nreverse xml))
247 (nreverse xml)))))))
47db06aa 248
c7f8d055 249(defun xml-maybe-do-ns (name default xml-ns)
a1dfa9a3
SM
250 "Perform any namespace expansion.
251NAME is the name to perform the expansion on.
c7f8d055
SM
252DEFAULT is the default namespace. XML-NS is a cons of namespace
253names to uris. When namespace-aware parsing is off, then XML-NS
254is nil.
255
256During namespace-aware parsing, any name without a namespace is
257put into the namespace identified by DEFAULT. nil is used to
258specify that the name shouldn't be given a namespace."
259 (if (consp xml-ns)
260 (let* ((nsp (string-match ":" name))
261 (lname (if nsp (substring name (match-end 0)) name))
262 (prefix (if nsp (substring name 0 (match-beginning 0)) default))
263 (special (and (string-equal lname "xmlns") (not prefix)))
264 ;; Setting default to nil will insure that there is not
265 ;; matching cons in xml-ns. In which case we
266 (ns (or (cdr (assoc (if special "xmlns" prefix)
267 xml-ns))
268 :)))
269 (cons ns (if special "" lname)))
270 (intern name)))
47db06aa 271
2d42509a 272(defun xml-parse-tag (&optional parse-dtd parse-ns)
a98e819b 273 "Parse the tag at point.
47db06aa
GM
274If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
275returned as the first element in the list.
2d42509a 276If PARSE-NS is non-nil, then QNAMES are expanded.
47db06aa 277Returns one of:
a98e819b
DL
278 - a list : the matching node
279 - nil : the point is not looking at a tag.
280 - a pair : the first element is the DTD, the second is the node."
2d42509a
JB
281 (let ((xml-ns (if (consp parse-ns)
282 parse-ns
283 (if parse-ns
284 (list
c7f8d055
SM
285 ;; Default for empty prefix is no namespace
286 (cons "" :)
287 ;; "xml" namespace
288 (cons "xml" :http://www.w3.org/XML/1998/namespace)
2d42509a 289 ;; We need to seed the xmlns namespace
c7f8d055 290 (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
2d42509a
JB
291 (cond
292 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
293 ;; beginning of a document).
294 ((looking-at "<\\?")
295 (search-forward "?>")
296 (skip-syntax-forward " ")
297 (xml-parse-tag parse-dtd xml-ns))
298 ;; Character data (CDATA) sections, in which no tag should be interpreted
299 ((looking-at "<!\\[CDATA\\[")
300 (let ((pos (match-end 0)))
301 (unless (search-forward "]]>" nil t)
302 (error "CDATA section does not end anywhere in the document"))
303 (buffer-substring pos (match-beginning 0))))
304 ;; DTD for the document
305 ((looking-at "<!DOCTYPE")
306 (let (dtd)
307 (if parse-dtd
308 (setq dtd (xml-parse-dtd))
309 (xml-skip-dtd))
a98e819b 310 (skip-syntax-forward " ")
47db06aa 311 (if dtd
2d42509a
JB
312 (cons dtd (xml-parse-tag nil xml-ns))
313 (xml-parse-tag nil xml-ns))))
314 ;; skip comments
315 ((looking-at "<!--")
316 (search-forward "-->")
317 nil)
318 ;; end tag
319 ((looking-at "</")
320 '())
321 ;; opening tag
322 ((looking-at "<\\([^/>[:space:]]+\\)")
323 (goto-char (match-end 1))
34638996
EZ
324
325 ;; Parse this node
2d42509a 326 (let* ((node-name (match-string 1))
c7f8d055
SM
327 ;; Parse the attribute list.
328 (attrs (xml-parse-attlist xml-ns))
329 children pos)
330
331 ;; add the xmlns:* attrs to our cache
332 (when (consp xml-ns)
333 (dolist (attr attrs)
334 (when (and (consp (car attr))
335 (eq :http://www.w3.org/2000/xmlns/
336 (caar attr)))
337 (push (cons (cdar attr) (intern (concat ":" (cdr attr))))
338 xml-ns))))
339
43b5fd81 340 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
c7f8d055 341
2d42509a
JB
342 ;; is this an empty element ?
343 (if (looking-at "/>")
344 (progn
345 (forward-char 2)
346 (nreverse children))
47db06aa
GM
347
348 ;; is this a valid start tag ?
e54030af 349 (if (eq (char-after) ?>)
47db06aa
GM
350 (progn
351 (forward-char 1)
971489ea
SM
352 ;; Now check that we have the right end-tag. Note that this
353 ;; one might contain spaces after the tag name
a98e819b
DL
354 (let ((end (concat "</" node-name "\\s-*>")))
355 (while (not (looking-at end))
356 (cond
357 ((looking-at "</")
358 (error "XML: Invalid end tag (expecting %s) at pos %d"
359 node-name (point)))
360 ((= (char-after) ?<)
2d42509a 361 (let ((tag (xml-parse-tag nil xml-ns)))
a98e819b
DL
362 (when tag
363 (push tag children))))
364 (t
365 (setq pos (point))
366 (search-forward "<")
367 (forward-char -1)
368 (let ((string (buffer-substring pos (point)))
369 (pos 0))
370
371 ;; Clean up the string. As per XML
372 ;; specifications, the XML processor should
373 ;; always pass the whole string to the
374 ;; application. But \r's should be replaced:
375 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
376 (while (string-match "\r\n?" string pos)
377 (setq string (replace-match "\n" t t string))
378 (setq pos (1+ (match-beginning 0))))
379
380 (setq string (xml-substitute-special string))
381 (setq children
382 (if (stringp (car children))
383 ;; The two strings were separated by a comment.
384 (cons (concat (car children) string)
385 (cdr children))
386 (cons string children))))))))
387
47db06aa 388 (goto-char (match-end 0))
971489ea 389 (nreverse children))
47db06aa 390 ;; This was an invalid start tag
a98e819b 391 (error "XML: Invalid attribute list")))))
2d42509a
JB
392 (t ;; This is not a tag.
393 (error "XML: Invalid character")))))
47db06aa 394
c7f8d055 395(defun xml-parse-attlist (&optional xml-ns)
a1dfa9a3
SM
396 "Return the attribute-list after point.
397Leave point at the first non-blank character after the tag."
971489ea 398 (let ((attlist ())
34638996 399 end-pos name)
a98e819b
DL
400 (skip-syntax-forward " ")
401 (while (looking-at (eval-when-compile
402 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
c7f8d055
SM
403 (setq end-pos (match-end 0))
404 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
405 (goto-char end-pos)
47db06aa 406
a158ff81
JB
407 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
408
47db06aa
GM
409 ;; Do we have a string between quotes (or double-quotes),
410 ;; or a simple word ?
a158ff81 411 (if (looking-at "\"\\([^\"]*\\)\"")
34638996 412 (setq end-pos (match-end 0))
f0ec1711 413 (if (looking-at "'\\([^']*\\)'")
34638996 414 (setq end-pos (match-end 0))
1cd7adc6 415 (error "XML: Attribute values must be given between quotes")))
47db06aa
GM
416
417 ;; Each attribute must be unique within a given element
418 (if (assoc name attlist)
1cd7adc6 419 (error "XML: each attribute must be unique within an element"))
524425ae 420
a158ff81
JB
421 ;; Multiple whitespace characters should be replaced with a single one
422 ;; in the attributes
a98e819b 423 (let ((string (match-string 1))
a158ff81 424 (pos 0))
a98e819b 425 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
a158ff81
JB
426 (push (cons name (xml-substitute-special string)) attlist))
427
34638996 428 (goto-char end-pos)
a98e819b 429 (skip-syntax-forward " "))
971489ea 430 (nreverse attlist)))
47db06aa
GM
431
432;;*******************************************************************
433;;**
434;;** The DTD (document type declaration)
435;;** The following functions know how to skip or parse the DTD of
436;;** a document
437;;**
438;;*******************************************************************
439
a98e819b
DL
440;; Fixme: This fails at least if the DTD contains conditional sections.
441
442(defun xml-skip-dtd ()
443 "Skip the DTD at point.
47db06aa
GM
444This follows the rule [28] in the XML specifications."
445 (forward-char (length "<!DOCTYPE"))
a98e819b 446 (if (looking-at "\\s-*>")
47db06aa
GM
447 (error "XML: invalid DTD (excepting name of the document)"))
448 (condition-case nil
449 (progn
a98e819b
DL
450 (forward-sexp)
451 (skip-syntax-forward " ")
47db06aa 452 (if (looking-at "\\[")
a98e819b
DL
453 (re-search-forward "]\\s-*>")
454 (search-forward ">")))
47db06aa
GM
455 (error (error "XML: No end to the DTD"))))
456
a98e819b
DL
457(defun xml-parse-dtd ()
458 "Parse the DTD at point."
459 (forward-char (eval-when-compile (length "<!DOCTYPE")))
460 (skip-syntax-forward " ")
971489ea
SM
461 (if (looking-at ">")
462 (error "XML: invalid DTD (excepting name of the document)"))
524425ae 463
971489ea 464 ;; Get the name of the document
a98e819b
DL
465 (looking-at xml-name-regexp)
466 (let ((dtd (list (match-string 0) 'dtd))
971489ea 467 type element end-pos)
47db06aa
GM
468 (goto-char (match-end 0))
469
a98e819b
DL
470 (skip-syntax-forward " ")
471 ;; XML [75]
472 (cond ((looking-at "PUBLIC\\s-+")
473 (goto-char (match-end 0))
474 (unless (or (re-search-forward
475 "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
476 nil t)
477 (re-search-forward
478 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
479 nil t))
480 (error "XML: missing public id"))
481 (let ((pubid (match-string 1)))
482 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
483 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
484 (error "XML: missing system id"))
485 (push (list pubid (match-string 1) 'public) dtd)))
486 ((looking-at "SYSTEM\\s-+")
487 (goto-char (match-end 0))
488 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
489 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
490 (error "XML: missing system id"))
491 (push (list (match-string 1) 'system) dtd)))
492 (skip-syntax-forward " ")
493 (if (eq ?> (char-after))
494 (forward-char)
495 (skip-syntax-forward " ")
496 (if (not (eq (char-after) ?\[))
497 (error "XML: bad DTD")
498 (forward-char)
499 ;; Parse the rest of the DTD
500 ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs.
501 (while (not (looking-at "\\s-*\\]"))
502 (skip-syntax-forward " ")
503 (cond
504
505 ;; Translation of rule [45] of XML specifications
506 ((looking-at
507 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
508
34638996 509 (setq element (match-string 1)
a98e819b
DL
510 type (match-string-no-properties 2))
511 (setq end-pos (match-end 0))
512
513 ;; Translation of rule [46] of XML specifications
514 (cond
515 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
516 (setq type 'empty))
517 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
518 (setq type 'any))
519 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
520 (setq type (xml-parse-elem-type (match-string 1 type))))
521 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
522 nil)
523 (t
524 (error "XML: Invalid element type in the DTD")))
525
526 ;; rule [45]: the element declaration must be unique
527 (if (assoc element dtd)
528 (error "XML: element declarations must be unique in a DTD (<%s>)"
461f3ad0 529 element))
a98e819b
DL
530
531 ;; Store the element in the DTD
532 (push (list element type) dtd)
533 (goto-char end-pos))
534 ((looking-at "<!--")
535 (search-forward "-->"))
536
537 (t
538 (error "XML: Invalid DTD item")))
539
540 ;; Skip the end of the DTD
541 (search-forward ">"))))
461f3ad0 542 (nreverse dtd)))
47db06aa
GM
543
544(defun xml-parse-elem-type (string)
a98e819b 545 "Convert element type STRING into a Lisp structure."
47db06aa
GM
546
547 (let (elem modifier)
548 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
549 (progn
550 (setq elem (match-string 1 string)
551 modifier (match-string 2 string))
552 (if (string-match "|" elem)
971489ea 553 (setq elem (cons 'choice
47db06aa
GM
554 (mapcar 'xml-parse-elem-type
555 (split-string elem "|"))))
556 (if (string-match "," elem)
971489ea 557 (setq elem (cons 'seq
47db06aa 558 (mapcar 'xml-parse-elem-type
a98e819b 559 (split-string elem ",")))))))
a158ff81
JB
560 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
561 (setq elem (match-string 1 string)
47db06aa
GM
562 modifier (match-string 2 string))))
563
971489ea
SM
564 (if (and (stringp elem) (string= elem "#PCDATA"))
565 (setq elem 'pcdata))
524425ae 566
971489ea
SM
567 (cond
568 ((string= modifier "+")
569 (list '+ elem))
570 ((string= modifier "*")
571 (list '* elem))
572 ((string= modifier "?")
0fa6f70c 573 (list '\? elem))
971489ea
SM
574 (t
575 elem))))
47db06aa 576
47db06aa
GM
577;;*******************************************************************
578;;**
579;;** Substituting special XML sequences
580;;**
581;;*******************************************************************
582
a98e819b
DL
583(eval-when-compile
584 (defvar str)) ; dynamic from replace-regexp-in-string
585
586;; Fixme: Take declared entities from the DTD when they're available.
587(defun xml-substitute-entity (match)
a1dfa9a3 588 "Subroutine of `xml-substitute-special'."
a98e819b
DL
589 (save-match-data
590 (let ((match1 (match-string 1 str)))
591 (cond ((string= match1 "lt") "<")
592 ((string= match1 "gt") ">")
593 ((string= match1 "apos") "'")
594 ((string= match1 "quot") "\"")
595 ((string= match1 "amp") "&")
596 ((and (string-match "#\\([0-9]+\\)" match1)
597 (let ((c (decode-char
598 'ucs
599 (string-to-number (match-string 1 match1)))))
600 (if c (string c))))) ; else unrepresentable
601 ((and (string-match "#x\\([[:xdigit:]]+\\)" match1)
602 (let ((c (decode-char
603 'ucs
604 (string-to-number (match-string 1 match1) 16))))
605 (if c (string c)))))
606 ;; Default to asis. Arguably, unrepresentable code points
607 ;; might be best replaced with U+FFFD.
608 (t match)))))
609
47db06aa 610(defun xml-substitute-special (string)
a98e819b
DL
611 "Return STRING, after subsituting entity references."
612 ;; This originally made repeated passes through the string from the
613 ;; beginning, which isn't correct, since then either "&amp;amp;" or
614 ;; "&#38;amp;" won't DTRT.
615 (replace-regexp-in-string "&\\([^;]+\\);"
616 #'xml-substitute-entity string t t))
47db06aa
GM
617
618;;*******************************************************************
619;;**
620;;** Printing a tree.
621;;** This function is intended mainly for debugging purposes.
622;;**
623;;*******************************************************************
624
27240aa4
AS
625(defun xml-debug-print (xml &optional indent-string)
626 "Outputs the XML in the current buffer.
627XML can be a tree or a list of nodes.
628The first line is indented with the optional INDENT-STRING."
629 (setq indent-string (or indent-string ""))
971489ea 630 (dolist (node xml)
27240aa4
AS
631 (xml-debug-print-internal node indent-string)))
632
633(defalias 'xml-print 'xml-debug-print)
47db06aa 634
971489ea 635(defun xml-debug-print-internal (xml indent-string)
47db06aa 636 "Outputs the XML tree in the current buffer.
a98e819b 637The first line is indented with INDENT-STRING."
47db06aa
GM
638 (let ((tree xml)
639 attlist)
a98e819b 640 (insert indent-string ?< (symbol-name (xml-node-name tree)))
524425ae 641
47db06aa 642 ;; output the attribute list
971489ea 643 (setq attlist (xml-node-attributes tree))
47db06aa 644 (while attlist
a98e819b 645 (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\")
971489ea 646 (setq attlist (cdr attlist)))
524425ae 647
971489ea 648 (setq tree (xml-node-children tree))
47db06aa 649
27240aa4
AS
650 (if (null tree)
651 (insert ?/ ?>)
652 (insert ?>)
653
654 ;; output the children
655 (dolist (node tree)
656 (cond
657 ((listp node)
658 (insert ?\n)
659 (xml-debug-print-internal node (concat indent-string " ")))
660 ((stringp node) (insert node))
661 (t
662 (error "Invalid XML tree"))))
663
664 (when (not (and (null (cdr tree))
665 (stringp (car tree))))
666 (insert ?\n indent-string))
667 (insert ?< ?/ (symbol-name (xml-node-name xml)) ?>))))
47db06aa
GM
668
669(provide 'xml)
670
8a02e193 671;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b
47db06aa 672;;; xml.el ends here