(xml-parse-tag): Avoid overwriting node-name.
[bpt/emacs.git] / lisp / xml.el
CommitLineData
1cd7adc6 1;;; xml.el --- XML parser
47db06aa 2
2e9bdf15 3;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc.
47db06aa
GM
4
5;; Author: Emmanuel Briot <briot@gnat.com>
720058f2 6;; Maintainer: Mark A. Hershberger <mah@everybody.org>
a98e819b 7;; Keywords: xml, data
47db06aa
GM
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
25
26;;; Commentary:
27
a98e819b
DL
28;; This file contains a somewhat incomplete non-validating XML parser. It
29;; parses a file, and returns a list that can be used internally by
30;; any other lisp libraries.
47db06aa
GM
31
32;;; FILE FORMAT
33
a98e819b
DL
34;; The document type declaration may either be ignored or (optionally)
35;; parsed, but currently the parsing will only accept element
36;; declarations. The XML file is assumed to be well-formed. In case
37;; of error, the parsing stops and the XML file is shown where the
38;; parsing stopped.
47db06aa 39;;
a98e819b 40;; It also knows how to ignore comments and processing instructions.
47db06aa
GM
41;;
42;; The XML file should have the following format:
653558a1
GM
43;; <node1 attr1="name1" attr2="name2" ...>value
44;; <node2 attr3="name3" attr4="name4">value2</node2>
45;; <node3 attr5="name5" attr6="name6">value3</node3>
47db06aa
GM
46;; </node1>
47;; Of course, the name of the nodes and attributes can be anything. There can
48;; be any number of attributes (or none), as well as any number of children
49;; below the nodes.
50;;
51;; There can be only top level node, but with any number of children below.
52
53;;; LIST FORMAT
54
c7f8d055
SM
55;; The functions `xml-parse-file', `xml-parse-region' and
56;; `xml-parse-tag' return a list with the following format:
47db06aa
GM
57;;
58;; xml-list ::= (node node ...)
c7f8d055 59;; node ::= (qname attribute-list . child_node_list)
47db06aa
GM
60;; child_node_list ::= child_node child_node ...
61;; child_node ::= node | string
c7f8d055
SM
62;; qname ::= (:namespace-uri . "name") | "name"
63;; attribute_list ::= ((qname . "value") (qname . "value") ...)
47db06aa
GM
64;; | nil
65;; string ::= "..."
66;;
a98e819b
DL
67;; Some macros are provided to ease the parsing of this list.
68;; Whitespace is preserved. Fixme: There should be a tree-walker that
69;; can remove it.
47db06aa 70
c7f8d055
SM
71;; TODO:
72;; * xml:base, xml:space support
73;; * more complete DOCTYPE parsing
74;; * pi support
75
47db06aa
GM
76;;; Code:
77
a98e819b
DL
78;; Note that {buffer-substring,match-string}-no-properties were
79;; formerly used in several places, but that removes composition info.
80
47db06aa
GM
81;;*******************************************************************
82;;**
83;;** Macros to parse the list
84;;**
85;;*******************************************************************
86
971489ea 87(defsubst xml-node-name (node)
47db06aa
GM
88 "Return the tag associated with NODE.
89The tag is a lower-case symbol."
971489ea 90 (car node))
47db06aa 91
971489ea 92(defsubst xml-node-attributes (node)
47db06aa
GM
93 "Return the list of attributes of NODE.
94The list can be nil."
971489ea 95 (nth 1 node))
47db06aa 96
971489ea 97(defsubst xml-node-children (node)
47db06aa
GM
98 "Return the list of children of NODE.
99This is a list of nodes, and it can be nil."
971489ea 100 (cddr node))
47db06aa
GM
101
102(defun xml-get-children (node child-name)
103 "Return the children of NODE whose tag is CHILD-NAME.
104CHILD-NAME should be a lower case symbol."
971489ea
SM
105 (let ((match ()))
106 (dolist (child (xml-node-children node))
107 (if child
108 (if (equal (xml-node-name child) child-name)
109 (push child match))))
110 (nreverse match)))
47db06aa 111
9bcd6a7e 112(defun xml-get-attribute-or-nil (node attribute)
47db06aa 113 "Get from NODE the value of ATTRIBUTE.
65f3f600 114Return `nil' if the attribute was not found.
9bcd6a7e
EZ
115
116See also `xml-get-attribute'."
2e9bdf15 117 (cdr (assoc attribute (xml-node-attributes node))))
9bcd6a7e
EZ
118
119(defsubst xml-get-attribute (node attribute)
120 "Get from NODE the value of ATTRIBUTE.
121An empty string is returned if the attribute was not found.
122
123See also `xml-get-attribute-or-nil'."
124 (or (xml-get-attribute-or-nil node attribute) ""))
47db06aa
GM
125
126;;*******************************************************************
127;;**
128;;** Creating the list
129;;**
130;;*******************************************************************
131
a98e819b 132;;;###autoload
2d42509a 133(defun xml-parse-file (file &optional parse-dtd parse-ns)
a98e819b
DL
134 "Parse the well-formed XML file FILE.
135If FILE is already visited, use its buffer and don't kill it.
47db06aa 136Returns the top node with all its children.
2d42509a
JB
137If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
138If PARSE-NS is non-nil, then QNAMES are expanded."
653558a1
GM
139 (let ((keep))
140 (if (get-file-buffer file)
141 (progn
142 (set-buffer (get-file-buffer file))
143 (setq keep (point)))
a98e819b
DL
144 (let (auto-mode-alist) ; no need for xml-mode
145 (find-file file)))
524425ae 146
653558a1
GM
147 (let ((xml (xml-parse-region (point-min)
148 (point-max)
149 (current-buffer)
2d42509a 150 parse-dtd parse-ns)))
653558a1
GM
151 (if keep
152 (goto-char keep)
153 (kill-buffer (current-buffer)))
154 xml)))
47db06aa 155
a98e819b
DL
156;; Note that this is setup so that we can do whitespace-skipping with
157;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
158;; compared with `re-search-forward', but that has been fixed. Also
159;; note that the standard syntax table contains other characters with
160;; whitespace syntax, like NBSP, but they are invalid in contexts in
161;; which we might skip whitespace -- specifically, they're not
162;; NameChars [XML 4].
163
164(defvar xml-syntax-table
165 (let ((table (make-syntax-table)))
166 ;; Get space syntax correct per XML [3].
167 (dotimes (c 31)
168 (modify-syntax-entry c "." table)) ; all are space in standard table
169 (dolist (c '(?\t ?\n ?\r)) ; these should be space
170 (modify-syntax-entry c " " table))
171 ;; For skipping attributes.
172 (modify-syntax-entry ?\" "\"" table)
173 (modify-syntax-entry ?' "\"" table)
174 ;; Non-alnum name chars should be symbol constituents (`-' and `_'
175 ;; are OK by default).
176 (modify-syntax-entry ?. "_" table)
177 (modify-syntax-entry ?: "_" table)
178 ;; XML [89]
179 (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
180 #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
181 #x30FD #x30FE))
182 (modify-syntax-entry (decode-char 'ucs c) "w" table))
183 ;; Fixme: rest of [4]
184 table)
185 "Syntax table used by `xml-parse-region'.")
186
187;; XML [5]
188;; Note that [:alpha:] matches all multibyte chars with word syntax.
ab161457
JPW
189(eval-and-compile
190 (defconst xml-name-regexp "[[:alpha:]_:][[:alnum:]._:-]*"))
a98e819b
DL
191
192;; Fixme: This needs re-writing to deal with the XML grammar properly, i.e.
193;; document ::= prolog element Misc*
194;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
195
196;;;###autoload
2d42509a 197(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns)
47db06aa
GM
198 "Parse the region from BEG to END in BUFFER.
199If BUFFER is nil, it defaults to the current buffer.
200Returns the XML list for the region, or raises an error if the region
2d42509a 201is not well-formed XML.
47db06aa 202If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
2d42509a
JB
203and returned as the first element of the list.
204If PARSE-NS is non-nil, then QNAMES are expanded."
a98e819b
DL
205 (save-restriction
206 (narrow-to-region beg end)
207 ;; Use fixed syntax table to ensure regexp char classes and syntax
208 ;; specs DTRT.
209 (with-syntax-table (standard-syntax-table)
210 (let ((case-fold-search nil) ; XML is case-sensitive.
211 xml result dtd)
212 (save-excursion
213 (if buffer
214 (set-buffer buffer))
215 (goto-char (point-min))
216 (while (not (eobp))
217 (if (search-forward "<" nil t)
218 (progn
219 (forward-char -1)
34638996
EZ
220 (setq result (xml-parse-tag parse-dtd parse-ns))
221 (if (and xml result)
a98e819b
DL
222 ;; translation of rule [1] of XML specifications
223 (error "XML files can have only one toplevel tag")
47db06aa 224 (cond
971489ea 225 ((null result))
34638996
EZ
226 ((and (listp (car result))
227 parse-dtd)
971489ea 228 (setq dtd (car result))
a98e819b
DL
229 (if (cdr result) ; possible leading comment
230 (add-to-list 'xml (cdr result))))
47db06aa 231 (t
a98e819b
DL
232 (add-to-list 'xml result)))))
233 (goto-char (point-max))))
234 (if parse-dtd
235 (cons dtd (nreverse xml))
236 (nreverse xml)))))))
47db06aa 237
c7f8d055
SM
238(defun xml-maybe-do-ns (name default xml-ns)
239 "Perform any namespace expansion. NAME is the name to perform the expansion on.
240DEFAULT is the default namespace. XML-NS is a cons of namespace
241names to uris. When namespace-aware parsing is off, then XML-NS
242is nil.
243
244During namespace-aware parsing, any name without a namespace is
245put into the namespace identified by DEFAULT. nil is used to
246specify that the name shouldn't be given a namespace."
247 (if (consp xml-ns)
248 (let* ((nsp (string-match ":" name))
249 (lname (if nsp (substring name (match-end 0)) name))
250 (prefix (if nsp (substring name 0 (match-beginning 0)) default))
251 (special (and (string-equal lname "xmlns") (not prefix)))
252 ;; Setting default to nil will insure that there is not
253 ;; matching cons in xml-ns. In which case we
254 (ns (or (cdr (assoc (if special "xmlns" prefix)
255 xml-ns))
256 :)))
257 (cons ns (if special "" lname)))
258 (intern name)))
47db06aa 259
2d42509a 260(defun xml-parse-tag (&optional parse-dtd parse-ns)
a98e819b 261 "Parse the tag at point.
47db06aa
GM
262If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
263returned as the first element in the list.
2d42509a 264If PARSE-NS is non-nil, then QNAMES are expanded.
47db06aa 265Returns one of:
a98e819b
DL
266 - a list : the matching node
267 - nil : the point is not looking at a tag.
268 - a pair : the first element is the DTD, the second is the node."
2d42509a
JB
269 (let ((xml-ns (if (consp parse-ns)
270 parse-ns
271 (if parse-ns
272 (list
c7f8d055
SM
273 ;; Default for empty prefix is no namespace
274 (cons "" :)
275 ;; "xml" namespace
276 (cons "xml" :http://www.w3.org/XML/1998/namespace)
2d42509a 277 ;; We need to seed the xmlns namespace
c7f8d055 278 (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
2d42509a
JB
279 (cond
280 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
281 ;; beginning of a document).
282 ((looking-at "<\\?")
283 (search-forward "?>")
284 (skip-syntax-forward " ")
285 (xml-parse-tag parse-dtd xml-ns))
286 ;; Character data (CDATA) sections, in which no tag should be interpreted
287 ((looking-at "<!\\[CDATA\\[")
288 (let ((pos (match-end 0)))
289 (unless (search-forward "]]>" nil t)
290 (error "CDATA section does not end anywhere in the document"))
291 (buffer-substring pos (match-beginning 0))))
292 ;; DTD for the document
293 ((looking-at "<!DOCTYPE")
294 (let (dtd)
295 (if parse-dtd
296 (setq dtd (xml-parse-dtd))
297 (xml-skip-dtd))
a98e819b 298 (skip-syntax-forward " ")
47db06aa 299 (if dtd
2d42509a
JB
300 (cons dtd (xml-parse-tag nil xml-ns))
301 (xml-parse-tag nil xml-ns))))
302 ;; skip comments
303 ((looking-at "<!--")
304 (search-forward "-->")
305 nil)
306 ;; end tag
307 ((looking-at "</")
308 '())
309 ;; opening tag
310 ((looking-at "<\\([^/>[:space:]]+\\)")
311 (goto-char (match-end 1))
34638996
EZ
312
313 ;; Parse this node
2d42509a 314 (let* ((node-name (match-string 1))
c7f8d055
SM
315 ;; Parse the attribute list.
316 (attrs (xml-parse-attlist xml-ns))
317 children pos)
318
319 ;; add the xmlns:* attrs to our cache
320 (when (consp xml-ns)
321 (dolist (attr attrs)
322 (when (and (consp (car attr))
323 (eq :http://www.w3.org/2000/xmlns/
324 (caar attr)))
325 (push (cons (cdar attr) (intern (concat ":" (cdr attr))))
326 xml-ns))))
327
43b5fd81 328 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
c7f8d055 329
2d42509a
JB
330 ;; is this an empty element ?
331 (if (looking-at "/>")
332 (progn
333 (forward-char 2)
334 (nreverse children))
47db06aa
GM
335
336 ;; is this a valid start tag ?
e54030af 337 (if (eq (char-after) ?>)
47db06aa
GM
338 (progn
339 (forward-char 1)
971489ea
SM
340 ;; Now check that we have the right end-tag. Note that this
341 ;; one might contain spaces after the tag name
a98e819b
DL
342 (let ((end (concat "</" node-name "\\s-*>")))
343 (while (not (looking-at end))
344 (cond
345 ((looking-at "</")
346 (error "XML: Invalid end tag (expecting %s) at pos %d"
347 node-name (point)))
348 ((= (char-after) ?<)
2d42509a 349 (let ((tag (xml-parse-tag nil xml-ns)))
a98e819b
DL
350 (when tag
351 (push tag children))))
352 (t
353 (setq pos (point))
354 (search-forward "<")
355 (forward-char -1)
356 (let ((string (buffer-substring pos (point)))
357 (pos 0))
358
359 ;; Clean up the string. As per XML
360 ;; specifications, the XML processor should
361 ;; always pass the whole string to the
362 ;; application. But \r's should be replaced:
363 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
364 (while (string-match "\r\n?" string pos)
365 (setq string (replace-match "\n" t t string))
366 (setq pos (1+ (match-beginning 0))))
367
368 (setq string (xml-substitute-special string))
369 (setq children
370 (if (stringp (car children))
371 ;; The two strings were separated by a comment.
372 (cons (concat (car children) string)
373 (cdr children))
374 (cons string children))))))))
375
47db06aa 376 (goto-char (match-end 0))
971489ea 377 (nreverse children))
47db06aa 378 ;; This was an invalid start tag
a98e819b 379 (error "XML: Invalid attribute list")))))
2d42509a
JB
380 (t ;; This is not a tag.
381 (error "XML: Invalid character")))))
47db06aa 382
c7f8d055 383(defun xml-parse-attlist (&optional xml-ns)
34638996
EZ
384 "Return the attribute-list after point. Leave point at the
385first non-blank character after the tag."
971489ea 386 (let ((attlist ())
34638996 387 end-pos name)
a98e819b
DL
388 (skip-syntax-forward " ")
389 (while (looking-at (eval-when-compile
390 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
c7f8d055
SM
391 (setq end-pos (match-end 0))
392 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
393 (goto-char end-pos)
47db06aa 394
a158ff81
JB
395 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
396
47db06aa
GM
397 ;; Do we have a string between quotes (or double-quotes),
398 ;; or a simple word ?
a158ff81 399 (if (looking-at "\"\\([^\"]*\\)\"")
34638996 400 (setq end-pos (match-end 0))
f0ec1711 401 (if (looking-at "'\\([^']*\\)'")
34638996 402 (setq end-pos (match-end 0))
1cd7adc6 403 (error "XML: Attribute values must be given between quotes")))
47db06aa
GM
404
405 ;; Each attribute must be unique within a given element
406 (if (assoc name attlist)
1cd7adc6 407 (error "XML: each attribute must be unique within an element"))
524425ae 408
a158ff81
JB
409 ;; Multiple whitespace characters should be replaced with a single one
410 ;; in the attributes
a98e819b 411 (let ((string (match-string 1))
a158ff81 412 (pos 0))
a98e819b 413 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
a158ff81
JB
414 (push (cons name (xml-substitute-special string)) attlist))
415
34638996 416 (goto-char end-pos)
a98e819b 417 (skip-syntax-forward " "))
971489ea 418 (nreverse attlist)))
47db06aa
GM
419
420;;*******************************************************************
421;;**
422;;** The DTD (document type declaration)
423;;** The following functions know how to skip or parse the DTD of
424;;** a document
425;;**
426;;*******************************************************************
427
a98e819b
DL
428;; Fixme: This fails at least if the DTD contains conditional sections.
429
430(defun xml-skip-dtd ()
431 "Skip the DTD at point.
47db06aa
GM
432This follows the rule [28] in the XML specifications."
433 (forward-char (length "<!DOCTYPE"))
a98e819b 434 (if (looking-at "\\s-*>")
47db06aa
GM
435 (error "XML: invalid DTD (excepting name of the document)"))
436 (condition-case nil
437 (progn
a98e819b
DL
438 (forward-sexp)
439 (skip-syntax-forward " ")
47db06aa 440 (if (looking-at "\\[")
a98e819b
DL
441 (re-search-forward "]\\s-*>")
442 (search-forward ">")))
47db06aa
GM
443 (error (error "XML: No end to the DTD"))))
444
a98e819b
DL
445(defun xml-parse-dtd ()
446 "Parse the DTD at point."
447 (forward-char (eval-when-compile (length "<!DOCTYPE")))
448 (skip-syntax-forward " ")
971489ea
SM
449 (if (looking-at ">")
450 (error "XML: invalid DTD (excepting name of the document)"))
524425ae 451
971489ea 452 ;; Get the name of the document
a98e819b
DL
453 (looking-at xml-name-regexp)
454 (let ((dtd (list (match-string 0) 'dtd))
971489ea 455 type element end-pos)
47db06aa
GM
456 (goto-char (match-end 0))
457
a98e819b
DL
458 (skip-syntax-forward " ")
459 ;; XML [75]
460 (cond ((looking-at "PUBLIC\\s-+")
461 (goto-char (match-end 0))
462 (unless (or (re-search-forward
463 "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
464 nil t)
465 (re-search-forward
466 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
467 nil t))
468 (error "XML: missing public id"))
469 (let ((pubid (match-string 1)))
470 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
471 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
472 (error "XML: missing system id"))
473 (push (list pubid (match-string 1) 'public) dtd)))
474 ((looking-at "SYSTEM\\s-+")
475 (goto-char (match-end 0))
476 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
477 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
478 (error "XML: missing system id"))
479 (push (list (match-string 1) 'system) dtd)))
480 (skip-syntax-forward " ")
481 (if (eq ?> (char-after))
482 (forward-char)
483 (skip-syntax-forward " ")
484 (if (not (eq (char-after) ?\[))
485 (error "XML: bad DTD")
486 (forward-char)
487 ;; Parse the rest of the DTD
488 ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs.
489 (while (not (looking-at "\\s-*\\]"))
490 (skip-syntax-forward " ")
491 (cond
492
493 ;; Translation of rule [45] of XML specifications
494 ((looking-at
495 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
496
34638996 497 (setq element (match-string 1)
a98e819b
DL
498 type (match-string-no-properties 2))
499 (setq end-pos (match-end 0))
500
501 ;; Translation of rule [46] of XML specifications
502 (cond
503 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
504 (setq type 'empty))
505 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
506 (setq type 'any))
507 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
508 (setq type (xml-parse-elem-type (match-string 1 type))))
509 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
510 nil)
511 (t
512 (error "XML: Invalid element type in the DTD")))
513
514 ;; rule [45]: the element declaration must be unique
515 (if (assoc element dtd)
516 (error "XML: element declarations must be unique in a DTD (<%s>)"
461f3ad0 517 element))
a98e819b
DL
518
519 ;; Store the element in the DTD
520 (push (list element type) dtd)
521 (goto-char end-pos))
522 ((looking-at "<!--")
523 (search-forward "-->"))
524
525 (t
526 (error "XML: Invalid DTD item")))
527
528 ;; Skip the end of the DTD
529 (search-forward ">"))))
461f3ad0 530 (nreverse dtd)))
47db06aa
GM
531
532(defun xml-parse-elem-type (string)
a98e819b 533 "Convert element type STRING into a Lisp structure."
47db06aa
GM
534
535 (let (elem modifier)
536 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
537 (progn
538 (setq elem (match-string 1 string)
539 modifier (match-string 2 string))
540 (if (string-match "|" elem)
971489ea 541 (setq elem (cons 'choice
47db06aa
GM
542 (mapcar 'xml-parse-elem-type
543 (split-string elem "|"))))
544 (if (string-match "," elem)
971489ea 545 (setq elem (cons 'seq
47db06aa 546 (mapcar 'xml-parse-elem-type
a98e819b 547 (split-string elem ",")))))))
a158ff81
JB
548 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
549 (setq elem (match-string 1 string)
47db06aa
GM
550 modifier (match-string 2 string))))
551
971489ea
SM
552 (if (and (stringp elem) (string= elem "#PCDATA"))
553 (setq elem 'pcdata))
524425ae 554
971489ea
SM
555 (cond
556 ((string= modifier "+")
557 (list '+ elem))
558 ((string= modifier "*")
559 (list '* elem))
560 ((string= modifier "?")
0fa6f70c 561 (list '\? elem))
971489ea
SM
562 (t
563 elem))))
47db06aa 564
47db06aa
GM
565;;*******************************************************************
566;;**
567;;** Substituting special XML sequences
568;;**
569;;*******************************************************************
570
a98e819b
DL
571(eval-when-compile
572 (defvar str)) ; dynamic from replace-regexp-in-string
573
574;; Fixme: Take declared entities from the DTD when they're available.
575(defun xml-substitute-entity (match)
576 "Subroutine of xml-substitute-special."
577 (save-match-data
578 (let ((match1 (match-string 1 str)))
579 (cond ((string= match1 "lt") "<")
580 ((string= match1 "gt") ">")
581 ((string= match1 "apos") "'")
582 ((string= match1 "quot") "\"")
583 ((string= match1 "amp") "&")
584 ((and (string-match "#\\([0-9]+\\)" match1)
585 (let ((c (decode-char
586 'ucs
587 (string-to-number (match-string 1 match1)))))
588 (if c (string c))))) ; else unrepresentable
589 ((and (string-match "#x\\([[:xdigit:]]+\\)" match1)
590 (let ((c (decode-char
591 'ucs
592 (string-to-number (match-string 1 match1) 16))))
593 (if c (string c)))))
594 ;; Default to asis. Arguably, unrepresentable code points
595 ;; might be best replaced with U+FFFD.
596 (t match)))))
597
47db06aa 598(defun xml-substitute-special (string)
a98e819b
DL
599 "Return STRING, after subsituting entity references."
600 ;; This originally made repeated passes through the string from the
601 ;; beginning, which isn't correct, since then either "&amp;amp;" or
602 ;; "&#38;amp;" won't DTRT.
603 (replace-regexp-in-string "&\\([^;]+\\);"
604 #'xml-substitute-entity string t t))
47db06aa
GM
605
606;;*******************************************************************
607;;**
608;;** Printing a tree.
609;;** This function is intended mainly for debugging purposes.
610;;**
611;;*******************************************************************
612
613(defun xml-debug-print (xml)
971489ea
SM
614 (dolist (node xml)
615 (xml-debug-print-internal node "")))
47db06aa 616
971489ea 617(defun xml-debug-print-internal (xml indent-string)
47db06aa 618 "Outputs the XML tree in the current buffer.
a98e819b 619The first line is indented with INDENT-STRING."
47db06aa
GM
620 (let ((tree xml)
621 attlist)
a98e819b 622 (insert indent-string ?< (symbol-name (xml-node-name tree)))
524425ae 623
47db06aa 624 ;; output the attribute list
971489ea 625 (setq attlist (xml-node-attributes tree))
47db06aa 626 (while attlist
a98e819b 627 (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\")
971489ea 628 (setq attlist (cdr attlist)))
524425ae 629
a98e819b 630 (insert ?>)
524425ae 631
971489ea 632 (setq tree (xml-node-children tree))
47db06aa
GM
633
634 ;; output the children
971489ea 635 (dolist (node tree)
47db06aa 636 (cond
971489ea 637 ((listp node)
a98e819b 638 (insert ?\n)
971489ea
SM
639 (xml-debug-print-internal node (concat indent-string " ")))
640 ((stringp node) (insert node))
47db06aa 641 (t
971489ea 642 (error "Invalid XML tree"))))
47db06aa 643
a98e819b
DL
644 (insert ?\n indent-string
645 ?< ?/ (symbol-name (xml-node-name xml)) ?>)))
47db06aa
GM
646
647(provide 'xml)
648
ab5796a9 649;;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b
47db06aa 650;;; xml.el ends here