(xml-get-attribute-or-nil): Doc fix.
[bpt/emacs.git] / lisp / xml.el
CommitLineData
1cd7adc6 1;;; xml.el --- XML parser
47db06aa 2
a98e819b 3;; Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
47db06aa
GM
4
5;; Author: Emmanuel Briot <briot@gnat.com>
720058f2 6;; Maintainer: Mark A. Hershberger <mah@everybody.org>
a98e819b 7;; Keywords: xml, data
47db06aa
GM
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
25
26;;; Commentary:
27
a98e819b
DL
28;; This file contains a somewhat incomplete non-validating XML parser. It
29;; parses a file, and returns a list that can be used internally by
30;; any other lisp libraries.
47db06aa
GM
31
32;;; FILE FORMAT
33
a98e819b
DL
34;; The document type declaration may either be ignored or (optionally)
35;; parsed, but currently the parsing will only accept element
36;; declarations. The XML file is assumed to be well-formed. In case
37;; of error, the parsing stops and the XML file is shown where the
38;; parsing stopped.
47db06aa 39;;
a98e819b 40;; It also knows how to ignore comments and processing instructions.
47db06aa
GM
41;;
42;; The XML file should have the following format:
653558a1
GM
43;; <node1 attr1="name1" attr2="name2" ...>value
44;; <node2 attr3="name3" attr4="name4">value2</node2>
45;; <node3 attr5="name5" attr6="name6">value3</node3>
47db06aa
GM
46;; </node1>
47;; Of course, the name of the nodes and attributes can be anything. There can
48;; be any number of attributes (or none), as well as any number of children
49;; below the nodes.
50;;
51;; There can be only top level node, but with any number of children below.
52
53;;; LIST FORMAT
54
55;; The functions `xml-parse-file' and `xml-parse-tag' return a list with
56;; the following format:
57;;
58;; xml-list ::= (node node ...)
59;; node ::= (tag_name attribute-list . child_node_list)
60;; child_node_list ::= child_node child_node ...
61;; child_node ::= node | string
62;; tag_name ::= string
63;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...)
64;; | nil
65;; string ::= "..."
66;;
a98e819b
DL
67;; Some macros are provided to ease the parsing of this list.
68;; Whitespace is preserved. Fixme: There should be a tree-walker that
69;; can remove it.
47db06aa
GM
70
71;;; Code:
72
a98e819b
DL
73;; Note that {buffer-substring,match-string}-no-properties were
74;; formerly used in several places, but that removes composition info.
75
47db06aa
GM
76;;*******************************************************************
77;;**
78;;** Macros to parse the list
79;;**
80;;*******************************************************************
81
971489ea 82(defsubst xml-node-name (node)
47db06aa
GM
83 "Return the tag associated with NODE.
84The tag is a lower-case symbol."
971489ea 85 (car node))
47db06aa 86
971489ea 87(defsubst xml-node-attributes (node)
47db06aa
GM
88 "Return the list of attributes of NODE.
89The list can be nil."
971489ea 90 (nth 1 node))
47db06aa 91
971489ea 92(defsubst xml-node-children (node)
47db06aa
GM
93 "Return the list of children of NODE.
94This is a list of nodes, and it can be nil."
971489ea 95 (cddr node))
47db06aa
GM
96
97(defun xml-get-children (node child-name)
98 "Return the children of NODE whose tag is CHILD-NAME.
99CHILD-NAME should be a lower case symbol."
971489ea
SM
100 (let ((match ()))
101 (dolist (child (xml-node-children node))
102 (if child
103 (if (equal (xml-node-name child) child-name)
104 (push child match))))
105 (nreverse match)))
47db06aa 106
9bcd6a7e 107(defun xml-get-attribute-or-nil (node attribute)
47db06aa 108 "Get from NODE the value of ATTRIBUTE.
65f3f600 109Return `nil' if the attribute was not found.
9bcd6a7e
EZ
110
111See also `xml-get-attribute'."
5ebe0443
EZ
112 (when (xml-node-attributes node)
113 (let ((value (assoc attribute (xml-node-attributes node))))
114 (when value
115 (cdr value)))))
9bcd6a7e
EZ
116
117(defsubst xml-get-attribute (node attribute)
118 "Get from NODE the value of ATTRIBUTE.
119An empty string is returned if the attribute was not found.
120
121See also `xml-get-attribute-or-nil'."
122 (or (xml-get-attribute-or-nil node attribute) ""))
47db06aa
GM
123
124;;*******************************************************************
125;;**
126;;** Creating the list
127;;**
128;;*******************************************************************
129
a98e819b 130;;;###autoload
2d42509a 131(defun xml-parse-file (file &optional parse-dtd parse-ns)
a98e819b
DL
132 "Parse the well-formed XML file FILE.
133If FILE is already visited, use its buffer and don't kill it.
47db06aa 134Returns the top node with all its children.
2d42509a
JB
135If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
136If PARSE-NS is non-nil, then QNAMES are expanded."
653558a1
GM
137 (let ((keep))
138 (if (get-file-buffer file)
139 (progn
140 (set-buffer (get-file-buffer file))
141 (setq keep (point)))
a98e819b
DL
142 (let (auto-mode-alist) ; no need for xml-mode
143 (find-file file)))
524425ae 144
653558a1
GM
145 (let ((xml (xml-parse-region (point-min)
146 (point-max)
147 (current-buffer)
2d42509a 148 parse-dtd parse-ns)))
653558a1
GM
149 (if keep
150 (goto-char keep)
151 (kill-buffer (current-buffer)))
152 xml)))
47db06aa 153
a98e819b
DL
154;; Note that this is setup so that we can do whitespace-skipping with
155;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
156;; compared with `re-search-forward', but that has been fixed. Also
157;; note that the standard syntax table contains other characters with
158;; whitespace syntax, like NBSP, but they are invalid in contexts in
159;; which we might skip whitespace -- specifically, they're not
160;; NameChars [XML 4].
161
162(defvar xml-syntax-table
163 (let ((table (make-syntax-table)))
164 ;; Get space syntax correct per XML [3].
165 (dotimes (c 31)
166 (modify-syntax-entry c "." table)) ; all are space in standard table
167 (dolist (c '(?\t ?\n ?\r)) ; these should be space
168 (modify-syntax-entry c " " table))
169 ;; For skipping attributes.
170 (modify-syntax-entry ?\" "\"" table)
171 (modify-syntax-entry ?' "\"" table)
172 ;; Non-alnum name chars should be symbol constituents (`-' and `_'
173 ;; are OK by default).
174 (modify-syntax-entry ?. "_" table)
175 (modify-syntax-entry ?: "_" table)
176 ;; XML [89]
177 (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
178 #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
179 #x30FD #x30FE))
180 (modify-syntax-entry (decode-char 'ucs c) "w" table))
181 ;; Fixme: rest of [4]
182 table)
183 "Syntax table used by `xml-parse-region'.")
184
185;; XML [5]
186;; Note that [:alpha:] matches all multibyte chars with word syntax.
ab161457
JPW
187(eval-and-compile
188 (defconst xml-name-regexp "[[:alpha:]_:][[:alnum:]._:-]*"))
a98e819b
DL
189
190;; Fixme: This needs re-writing to deal with the XML grammar properly, i.e.
191;; document ::= prolog element Misc*
192;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
193
194;;;###autoload
2d42509a 195(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns)
47db06aa
GM
196 "Parse the region from BEG to END in BUFFER.
197If BUFFER is nil, it defaults to the current buffer.
198Returns the XML list for the region, or raises an error if the region
2d42509a 199is not well-formed XML.
47db06aa 200If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
2d42509a
JB
201and returned as the first element of the list.
202If PARSE-NS is non-nil, then QNAMES are expanded."
a98e819b
DL
203 (save-restriction
204 (narrow-to-region beg end)
205 ;; Use fixed syntax table to ensure regexp char classes and syntax
206 ;; specs DTRT.
207 (with-syntax-table (standard-syntax-table)
208 (let ((case-fold-search nil) ; XML is case-sensitive.
209 xml result dtd)
210 (save-excursion
211 (if buffer
212 (set-buffer buffer))
213 (goto-char (point-min))
214 (while (not (eobp))
215 (if (search-forward "<" nil t)
216 (progn
217 (forward-char -1)
34638996
EZ
218 (setq result (xml-parse-tag parse-dtd parse-ns))
219 (if (and xml result)
a98e819b
DL
220 ;; translation of rule [1] of XML specifications
221 (error "XML files can have only one toplevel tag")
47db06aa 222 (cond
971489ea 223 ((null result))
34638996
EZ
224 ((and (listp (car result))
225 parse-dtd)
971489ea 226 (setq dtd (car result))
a98e819b
DL
227 (if (cdr result) ; possible leading comment
228 (add-to-list 'xml (cdr result))))
47db06aa 229 (t
a98e819b
DL
230 (add-to-list 'xml result)))))
231 (goto-char (point-max))))
232 (if parse-dtd
233 (cons dtd (nreverse xml))
234 (nreverse xml)))))))
47db06aa 235
34638996
EZ
236(defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns)
237 "Parse the namespace attributes and return a list of cons in the form:
238\(namespace . prefix)"
239
240 (mapcar
241 (lambda (attr)
242 (let* ((splitup (split-string (car attr) ":"))
243 (prefix (nth 0 splitup))
244 (lname (nth 1 splitup)))
245 (when (string= "xmlns" prefix)
246 (push (cons (if lname
247 lname
248 "")
249 (cdr attr))
250 xml-ns)))) attr-list)
251 xml-ns)
252
253;; expand element names
254(defun xml-ns-expand-el (el xml-ns)
255 "Expand the XML elements from \"prefix:local-name\" to a cons in the form
256\"(namespace . local-name)\"."
257
258 (let* ((splitup (split-string el ":"))
259 (lname (or (nth 1 splitup)
260 (nth 0 splitup)))
261 (prefix (if (nth 1 splitup)
262 (nth 0 splitup)
263 (if (string= lname "xmlns")
264 "xmlns"
265 "")))
266 (ns (cdr (assoc-string prefix xml-ns))))
267 (if (string= "" ns)
268 lname
269 (cons (intern (concat ":" ns))
270 lname))))
271
272;; expand attribute names
273(defun xml-ns-expand-attr (attr-list xml-ns)
274 "Expand the attribute list for a particular element from the form
275\"prefix:local-name\" to the form \"{namespace}:local-name\"."
276
277 (mapcar
278 (lambda (attr)
279 (let* ((splitup (split-string (car attr) ":"))
280 (lname (or (nth 1 splitup)
281 (nth 0 splitup)))
282 (prefix (if (nth 1 splitup)
283 (nth 0 splitup)
284 (if (string= (car attr) "xmlns")
285 "xmlns"
286 "")))
287 (ns (cdr (assoc-string prefix xml-ns))))
288 (setcar attr
289 (if (string= "" ns)
290 lname
291 (cons (intern (concat ":" ns))
292 lname)))))
293 attr-list)
294 attr-list)
295
34638996
EZ
296(defun xml-intern-attrlist (attr-list)
297 "Convert attribute names to symbols for backward compatibility."
298 (mapcar (lambda (attr)
299 (setcar attr (intern (car attr))))
300 attr-list)
301 attr-list)
47db06aa 302
2d42509a 303(defun xml-parse-tag (&optional parse-dtd parse-ns)
a98e819b 304 "Parse the tag at point.
47db06aa
GM
305If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
306returned as the first element in the list.
2d42509a 307If PARSE-NS is non-nil, then QNAMES are expanded.
47db06aa 308Returns one of:
a98e819b
DL
309 - a list : the matching node
310 - nil : the point is not looking at a tag.
311 - a pair : the first element is the DTD, the second is the node."
2d42509a
JB
312 (let ((xml-ns (if (consp parse-ns)
313 parse-ns
314 (if parse-ns
315 (list
316 ;; Default no namespace
317 (cons "" "")
318 ;; We need to seed the xmlns namespace
319 (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
320 (cond
321 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
322 ;; beginning of a document).
323 ((looking-at "<\\?")
324 (search-forward "?>")
325 (skip-syntax-forward " ")
326 (xml-parse-tag parse-dtd xml-ns))
327 ;; Character data (CDATA) sections, in which no tag should be interpreted
328 ((looking-at "<!\\[CDATA\\[")
329 (let ((pos (match-end 0)))
330 (unless (search-forward "]]>" nil t)
331 (error "CDATA section does not end anywhere in the document"))
332 (buffer-substring pos (match-beginning 0))))
333 ;; DTD for the document
334 ((looking-at "<!DOCTYPE")
335 (let (dtd)
336 (if parse-dtd
337 (setq dtd (xml-parse-dtd))
338 (xml-skip-dtd))
a98e819b 339 (skip-syntax-forward " ")
47db06aa 340 (if dtd
2d42509a
JB
341 (cons dtd (xml-parse-tag nil xml-ns))
342 (xml-parse-tag nil xml-ns))))
343 ;; skip comments
344 ((looking-at "<!--")
345 (search-forward "-->")
346 nil)
347 ;; end tag
348 ((looking-at "</")
349 '())
350 ;; opening tag
351 ((looking-at "<\\([^/>[:space:]]+\\)")
352 (goto-char (match-end 1))
34638996
EZ
353
354 ;; Parse this node
2d42509a 355 (let* ((node-name (match-string 1))
34638996
EZ
356 (attr-list (xml-parse-attlist))
357 (children (if (consp xml-ns) ;; take care of namespace parsing
5ebe0443 358 (progn
34638996
EZ
359 (setq xml-ns (xml-ns-parse-ns-attrs
360 attr-list xml-ns))
5ebe0443 361 (list (xml-ns-expand-attr
34638996 362 attr-list xml-ns)
5ebe0443 363 (xml-ns-expand-el
34638996
EZ
364 node-name xml-ns)))
365 (list (xml-intern-attrlist attr-list)
366 (intern node-name))))
2d42509a
JB
367 pos)
368
2d42509a
JB
369 ;; is this an empty element ?
370 (if (looking-at "/>")
371 (progn
372 (forward-char 2)
373 (nreverse children))
47db06aa
GM
374
375 ;; is this a valid start tag ?
e54030af 376 (if (eq (char-after) ?>)
47db06aa
GM
377 (progn
378 (forward-char 1)
971489ea
SM
379 ;; Now check that we have the right end-tag. Note that this
380 ;; one might contain spaces after the tag name
a98e819b
DL
381 (let ((end (concat "</" node-name "\\s-*>")))
382 (while (not (looking-at end))
383 (cond
384 ((looking-at "</")
385 (error "XML: Invalid end tag (expecting %s) at pos %d"
386 node-name (point)))
387 ((= (char-after) ?<)
2d42509a 388 (let ((tag (xml-parse-tag nil xml-ns)))
a98e819b
DL
389 (when tag
390 (push tag children))))
391 (t
392 (setq pos (point))
393 (search-forward "<")
394 (forward-char -1)
395 (let ((string (buffer-substring pos (point)))
396 (pos 0))
397
398 ;; Clean up the string. As per XML
399 ;; specifications, the XML processor should
400 ;; always pass the whole string to the
401 ;; application. But \r's should be replaced:
402 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
403 (while (string-match "\r\n?" string pos)
404 (setq string (replace-match "\n" t t string))
405 (setq pos (1+ (match-beginning 0))))
406
407 (setq string (xml-substitute-special string))
408 (setq children
409 (if (stringp (car children))
410 ;; The two strings were separated by a comment.
411 (cons (concat (car children) string)
412 (cdr children))
413 (cons string children))))))))
414
47db06aa 415 (goto-char (match-end 0))
971489ea 416 (nreverse children))
47db06aa 417 ;; This was an invalid start tag
a98e819b 418 (error "XML: Invalid attribute list")))))
2d42509a
JB
419 (t ;; This is not a tag.
420 (error "XML: Invalid character")))))
47db06aa 421
a98e819b 422(defun xml-parse-attlist ()
34638996
EZ
423 "Return the attribute-list after point. Leave point at the
424first non-blank character after the tag."
971489ea 425 (let ((attlist ())
34638996 426 end-pos name)
a98e819b
DL
427 (skip-syntax-forward " ")
428 (while (looking-at (eval-when-compile
429 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
34638996 430 (setq name (match-string 1))
47db06aa
GM
431 (goto-char (match-end 0))
432
a158ff81
JB
433 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
434
47db06aa
GM
435 ;; Do we have a string between quotes (or double-quotes),
436 ;; or a simple word ?
a158ff81 437 (if (looking-at "\"\\([^\"]*\\)\"")
34638996 438 (setq end-pos (match-end 0))
f0ec1711 439 (if (looking-at "'\\([^']*\\)'")
34638996 440 (setq end-pos (match-end 0))
1cd7adc6 441 (error "XML: Attribute values must be given between quotes")))
47db06aa
GM
442
443 ;; Each attribute must be unique within a given element
444 (if (assoc name attlist)
1cd7adc6 445 (error "XML: each attribute must be unique within an element"))
524425ae 446
a158ff81
JB
447 ;; Multiple whitespace characters should be replaced with a single one
448 ;; in the attributes
a98e819b 449 (let ((string (match-string 1))
a158ff81 450 (pos 0))
a98e819b 451 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
a158ff81
JB
452 (push (cons name (xml-substitute-special string)) attlist))
453
34638996 454 (goto-char end-pos)
a98e819b 455 (skip-syntax-forward " "))
971489ea 456 (nreverse attlist)))
47db06aa
GM
457
458;;*******************************************************************
459;;**
460;;** The DTD (document type declaration)
461;;** The following functions know how to skip or parse the DTD of
462;;** a document
463;;**
464;;*******************************************************************
465
a98e819b
DL
466;; Fixme: This fails at least if the DTD contains conditional sections.
467
468(defun xml-skip-dtd ()
469 "Skip the DTD at point.
47db06aa
GM
470This follows the rule [28] in the XML specifications."
471 (forward-char (length "<!DOCTYPE"))
a98e819b 472 (if (looking-at "\\s-*>")
47db06aa
GM
473 (error "XML: invalid DTD (excepting name of the document)"))
474 (condition-case nil
475 (progn
a98e819b
DL
476 (forward-sexp)
477 (skip-syntax-forward " ")
47db06aa 478 (if (looking-at "\\[")
a98e819b
DL
479 (re-search-forward "]\\s-*>")
480 (search-forward ">")))
47db06aa
GM
481 (error (error "XML: No end to the DTD"))))
482
a98e819b
DL
483(defun xml-parse-dtd ()
484 "Parse the DTD at point."
485 (forward-char (eval-when-compile (length "<!DOCTYPE")))
486 (skip-syntax-forward " ")
971489ea
SM
487 (if (looking-at ">")
488 (error "XML: invalid DTD (excepting name of the document)"))
524425ae 489
971489ea 490 ;; Get the name of the document
a98e819b
DL
491 (looking-at xml-name-regexp)
492 (let ((dtd (list (match-string 0) 'dtd))
971489ea 493 type element end-pos)
47db06aa
GM
494 (goto-char (match-end 0))
495
a98e819b
DL
496 (skip-syntax-forward " ")
497 ;; XML [75]
498 (cond ((looking-at "PUBLIC\\s-+")
499 (goto-char (match-end 0))
500 (unless (or (re-search-forward
501 "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
502 nil t)
503 (re-search-forward
504 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
505 nil t))
506 (error "XML: missing public id"))
507 (let ((pubid (match-string 1)))
508 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
509 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
510 (error "XML: missing system id"))
511 (push (list pubid (match-string 1) 'public) dtd)))
512 ((looking-at "SYSTEM\\s-+")
513 (goto-char (match-end 0))
514 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
515 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
516 (error "XML: missing system id"))
517 (push (list (match-string 1) 'system) dtd)))
518 (skip-syntax-forward " ")
519 (if (eq ?> (char-after))
520 (forward-char)
521 (skip-syntax-forward " ")
522 (if (not (eq (char-after) ?\[))
523 (error "XML: bad DTD")
524 (forward-char)
525 ;; Parse the rest of the DTD
526 ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs.
527 (while (not (looking-at "\\s-*\\]"))
528 (skip-syntax-forward " ")
529 (cond
530
531 ;; Translation of rule [45] of XML specifications
532 ((looking-at
533 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
534
34638996 535 (setq element (match-string 1)
a98e819b
DL
536 type (match-string-no-properties 2))
537 (setq end-pos (match-end 0))
538
539 ;; Translation of rule [46] of XML specifications
540 (cond
541 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
542 (setq type 'empty))
543 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
544 (setq type 'any))
545 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
546 (setq type (xml-parse-elem-type (match-string 1 type))))
547 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
548 nil)
549 (t
550 (error "XML: Invalid element type in the DTD")))
551
552 ;; rule [45]: the element declaration must be unique
553 (if (assoc element dtd)
554 (error "XML: element declarations must be unique in a DTD (<%s>)"
461f3ad0 555 element))
a98e819b
DL
556
557 ;; Store the element in the DTD
558 (push (list element type) dtd)
559 (goto-char end-pos))
560 ((looking-at "<!--")
561 (search-forward "-->"))
562
563 (t
564 (error "XML: Invalid DTD item")))
565
566 ;; Skip the end of the DTD
567 (search-forward ">"))))
461f3ad0 568 (nreverse dtd)))
47db06aa
GM
569
570(defun xml-parse-elem-type (string)
a98e819b 571 "Convert element type STRING into a Lisp structure."
47db06aa
GM
572
573 (let (elem modifier)
574 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
575 (progn
576 (setq elem (match-string 1 string)
577 modifier (match-string 2 string))
578 (if (string-match "|" elem)
971489ea 579 (setq elem (cons 'choice
47db06aa
GM
580 (mapcar 'xml-parse-elem-type
581 (split-string elem "|"))))
582 (if (string-match "," elem)
971489ea 583 (setq elem (cons 'seq
47db06aa 584 (mapcar 'xml-parse-elem-type
a98e819b 585 (split-string elem ",")))))))
a158ff81
JB
586 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
587 (setq elem (match-string 1 string)
47db06aa
GM
588 modifier (match-string 2 string))))
589
971489ea
SM
590 (if (and (stringp elem) (string= elem "#PCDATA"))
591 (setq elem 'pcdata))
524425ae 592
971489ea
SM
593 (cond
594 ((string= modifier "+")
595 (list '+ elem))
596 ((string= modifier "*")
597 (list '* elem))
598 ((string= modifier "?")
0fa6f70c 599 (list '\? elem))
971489ea
SM
600 (t
601 elem))))
47db06aa 602
47db06aa
GM
603;;*******************************************************************
604;;**
605;;** Substituting special XML sequences
606;;**
607;;*******************************************************************
608
a98e819b
DL
609(eval-when-compile
610 (defvar str)) ; dynamic from replace-regexp-in-string
611
612;; Fixme: Take declared entities from the DTD when they're available.
613(defun xml-substitute-entity (match)
614 "Subroutine of xml-substitute-special."
615 (save-match-data
616 (let ((match1 (match-string 1 str)))
617 (cond ((string= match1 "lt") "<")
618 ((string= match1 "gt") ">")
619 ((string= match1 "apos") "'")
620 ((string= match1 "quot") "\"")
621 ((string= match1 "amp") "&")
622 ((and (string-match "#\\([0-9]+\\)" match1)
623 (let ((c (decode-char
624 'ucs
625 (string-to-number (match-string 1 match1)))))
626 (if c (string c))))) ; else unrepresentable
627 ((and (string-match "#x\\([[:xdigit:]]+\\)" match1)
628 (let ((c (decode-char
629 'ucs
630 (string-to-number (match-string 1 match1) 16))))
631 (if c (string c)))))
632 ;; Default to asis. Arguably, unrepresentable code points
633 ;; might be best replaced with U+FFFD.
634 (t match)))))
635
47db06aa 636(defun xml-substitute-special (string)
a98e819b
DL
637 "Return STRING, after subsituting entity references."
638 ;; This originally made repeated passes through the string from the
639 ;; beginning, which isn't correct, since then either "&amp;amp;" or
640 ;; "&#38;amp;" won't DTRT.
641 (replace-regexp-in-string "&\\([^;]+\\);"
642 #'xml-substitute-entity string t t))
47db06aa
GM
643
644;;*******************************************************************
645;;**
646;;** Printing a tree.
647;;** This function is intended mainly for debugging purposes.
648;;**
649;;*******************************************************************
650
651(defun xml-debug-print (xml)
971489ea
SM
652 (dolist (node xml)
653 (xml-debug-print-internal node "")))
47db06aa 654
971489ea 655(defun xml-debug-print-internal (xml indent-string)
47db06aa 656 "Outputs the XML tree in the current buffer.
a98e819b 657The first line is indented with INDENT-STRING."
47db06aa
GM
658 (let ((tree xml)
659 attlist)
a98e819b 660 (insert indent-string ?< (symbol-name (xml-node-name tree)))
524425ae 661
47db06aa 662 ;; output the attribute list
971489ea 663 (setq attlist (xml-node-attributes tree))
47db06aa 664 (while attlist
a98e819b 665 (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\")
971489ea 666 (setq attlist (cdr attlist)))
524425ae 667
a98e819b 668 (insert ?>)
524425ae 669
971489ea 670 (setq tree (xml-node-children tree))
47db06aa
GM
671
672 ;; output the children
971489ea 673 (dolist (node tree)
47db06aa 674 (cond
971489ea 675 ((listp node)
a98e819b 676 (insert ?\n)
971489ea
SM
677 (xml-debug-print-internal node (concat indent-string " ")))
678 ((stringp node) (insert node))
47db06aa 679 (t
971489ea 680 (error "Invalid XML tree"))))
47db06aa 681
a98e819b
DL
682 (insert ?\n indent-string
683 ?< ?/ (symbol-name (xml-node-name xml)) ?>)))
47db06aa
GM
684
685(provide 'xml)
686
ab5796a9 687;;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b
47db06aa 688;;; xml.el ends here