;;; xml.el --- XML parser
-;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc.
+;; Copyright (C) 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
;; Author: Emmanuel Briot <briot@gnat.com>
;; Maintainer: Mark A. Hershberger <mah@everybody.org>
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;;; Code:
-;; Note that {buffer-substring,match-string}-no-properties were
-;; formerly used in several places, but that removes composition info.
+;; Note that buffer-substring and match-string were formerly used in
+;; several places, because the -no-properties variants remove
+;; composition info. However, after some discussion on emacs-devel,
+;; the consensus was that the speed of the -no-properties variants was
+;; a worthwhile tradeoff especially since we're usually parsing files
+;; instead of hand-crafted XML.
;;*******************************************************************
;;**
;;**
;;*******************************************************************
+(defconst xml-undefined-entity "?"
+ "What to substitute for undefined entities")
+
(defvar xml-entity-alist
'(("lt" . "<")
("gt" . ">")
Returns the top node with all its children.
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
If PARSE-NS is non-nil, then QNAMES are expanded."
- (let ((keep))
- (if (get-file-buffer file)
- (progn
- (set-buffer (get-file-buffer file))
- (setq keep (point)))
- (let (auto-mode-alist) ; no need for xml-mode
- (find-file file)))
-
- (let ((xml (xml-parse-region (point-min)
- (point-max)
- (current-buffer)
- parse-dtd parse-ns)))
- (if keep
- (goto-char keep)
- (kill-buffer (current-buffer)))
- xml)))
-
-
-(let* ((start-chars (concat ":[:alpha:]_"))
+ (if (get-file-buffer file)
+ (with-current-buffer (get-file-buffer file)
+ (save-excursion
+ (xml-parse-region (point-min)
+ (point-max)
+ (current-buffer)
+ parse-dtd parse-ns)))
+ (with-temp-buffer
+ (insert-file-contents file)
+ (xml-parse-region (point-min)
+ (point-max)
+ (current-buffer)
+ parse-dtd parse-ns))))
+
+
+(defvar xml-name-re)
+(defvar xml-entity-value-re)
+(defvar xml-att-def-re)
+(let* ((start-chars (concat "[:alpha:]:_"))
(name-chars (concat "-[:digit:]." start-chars))
-;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
+ ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
(whitespace "[ \t\n\r]"))
-;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
-;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
-;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
-;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+ ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
+ ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
+ ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
+ ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
(defvar xml-name-start-char-re (concat "[" start-chars "]"))
-;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
+ ;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
(defvar xml-name-char-re (concat "[" name-chars "]"))
-;;[5] Name ::= NameStartChar (NameChar)*
+ ;;[5] Name ::= NameStartChar (NameChar)*
(defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
-;;[6] Names ::= Name (#x20 Name)*
+ ;;[6] Names ::= Name (#x20 Name)*
(defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
-;;[7] Nmtoken ::= (NameChar)+
+ ;;[7] Nmtoken ::= (NameChar)+
(defvar xml-nmtoken-re (concat xml-name-char-re "+"))
-;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
+ ;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
(defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
-;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+ ;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
(defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
-;;[68] EntityRef ::= '&' Name ';'
+ ;;[68] EntityRef ::= '&' Name ';'
(defvar xml-entity-ref (concat "&" xml-name-re ";"))
-;;[69] PEReference ::= '%' Name ';'
+ ;;[69] PEReference ::= '%' Name ';'
(defvar xml-pe-reference-re (concat "%" xml-name-re ";"))
-;;[67] Reference ::= EntityRef | CharRef
+ ;;[67] Reference ::= EntityRef | CharRef
(defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
-;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
-;; | "'" ([^%&'] | PEReference | Reference)* "'"
+ ;;[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
+ (defvar xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|" xml-reference-re "\\)*\"\\|"
+ "'\\(?:[^&']\\|" xml-reference-re "\\)*'\\)"))
+ ;;[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
+ ;; | 'IDREF' [VC: IDREF]
+ ;; | 'IDREFS' [VC: IDREF]
+ ;; | 'ENTITY' [VC: Entity Name]
+ ;; | 'ENTITIES' [VC: Entity Name]
+ ;; | 'NMTOKEN' [VC: Name Token]
+ ;; | 'NMTOKENS' [VC: Name Token]
+ (defvar xml-tokenized-type-re "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|ENTITIES\\|NMTOKEN\\|NMTOKENS\\)")
+ ;;[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+ (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re
+ "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)"))
+ ;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens]
+ (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re
+ "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*"
+ whitespace ")\\)"))
+ ;;[57] EnumeratedType ::= NotationType | Enumeration
+ (defvar xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re "\\|" xml-enumeration-re "\\)"))
+ ;;[54] AttType ::= StringType | TokenizedType | EnumeratedType
+ ;;[55] StringType ::= 'CDATA'
+ (defvar xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re "\\|" xml-notation-type-re"\\|" xml-enumerated-type-re "\\)"))
+ ;;[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
+ (defvar xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|\\(?:#FIXED" whitespace "\\)*" xml-att-value-re "\\)"))
+ ;;[53] AttDef ::= S Name S AttType S DefaultDecl
+ (defvar xml-att-def-re (concat "\\(?:" whitespace "*" xml-name-re
+ whitespace "*" xml-att-type-re
+ whitespace "*" xml-default-decl-re "\\)"))
+ ;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
+ ;; | "'" ([^%&'] | PEReference | Reference)* "'"
(defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re
"\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|"
xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)")))
;; Get space syntax correct per XML [3].
(dotimes (c 31)
(modify-syntax-entry c "." table)) ; all are space in standard table
- (dolist (c '(?\t ?\n ?\r)) ; these should be space
+ (dolist (c '(?\t ?\n ?\r)) ; these should be space
(modify-syntax-entry c " " table))
;; For skipping attributes.
(modify-syntax-entry ?\" "\"" table)
(modify-syntax-entry ?. "_" table)
(modify-syntax-entry ?: "_" table)
;; XML [89]
- (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
- #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
- #x30FD #x30FE))
- (modify-syntax-entry (decode-char 'ucs c) "w" table))
+ (unless (featurep 'xemacs)
+ (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
+ #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
+ #x30FD #x30FE))
+ (modify-syntax-entry (decode-char 'ucs c) "w" table)))
;; Fixme: rest of [4]
table)
"Syntax table used by `xml-parse-region'.")
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
and returned as the first element of the list.
If PARSE-NS is non-nil, then QNAMES are expanded."
- (save-restriction
- (narrow-to-region beg end)
- ;; Use fixed syntax table to ensure regexp char classes and syntax
- ;; specs DTRT.
- (with-syntax-table (standard-syntax-table)
- (let ((case-fold-search nil) ; XML is case-sensitive.
- xml result dtd)
- (save-excursion
- (if buffer
- (set-buffer buffer))
+ ;; Use fixed syntax table to ensure regexp char classes and syntax
+ ;; specs DTRT.
+ (with-syntax-table (standard-syntax-table)
+ (let ((case-fold-search nil) ; XML is case-sensitive.
+ xml result dtd)
+ (save-excursion
+ (if buffer
+ (set-buffer buffer))
+ (save-restriction
+ (narrow-to-region beg end)
(goto-char (point-min))
(while (not (eobp))
(if (search-forward "<" nil t)
parse-ns
(if parse-ns
(list
- ;; Default for empty prefix is no namespace
+ ;; Default for empty prefix is no namespace
(cons "" "")
;; "xml" namespace
(cons "xml" "http://www.w3.org/XML/1998/namespace")
(let ((pos (match-end 0)))
(unless (search-forward "]]>" nil t)
(error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
- (buffer-substring pos (match-beginning 0))))
+ (concat
+ (buffer-substring-no-properties pos (match-beginning 0))
+ (xml-parse-string))))
;; DTD for the document
((looking-at "<!DOCTYPE")
(let ((dtd (xml-parse-dtd parse-ns)))
(goto-char (match-end 1))
;; Parse this node
- (let* ((node-name (match-string 1))
- ;; Parse the attribute list.
- (attrs (xml-parse-attlist xml-ns))
- children pos)
+ (let* ((node-name (match-string-no-properties 1))
+ ;; Parse the attribute list.
+ (attrs (xml-parse-attlist xml-ns))
+ children pos)
- ;; add the xmlns:* attrs to our cache
- (when (consp xml-ns)
+ ;; add the xmlns:* attrs to our cache
+ (when (consp xml-ns)
(dolist (attr attrs)
(when (and (consp (car attr))
(equal "http://www.w3.org/2000/xmlns/"
(push (cons (cdar attr) (cdr attr))
xml-ns))))
- (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
+ (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
;; is this an empty element ?
(if (looking-at "/>")
(if (stringp expansion)
(if (stringp (car children))
;; The two strings were separated by a comment.
- (setq children (append (concat (car children) expansion)
+ (setq children (append (list (concat (car children) expansion))
(cdr children)))
(setq children (append (list expansion) children)))
(setq children (append expansion children))))))))
(nreverse children)))
;; This was an invalid start tag (Expected ">", but didn't see it.)
(error "XML: (Well-Formed) Couldn't parse tag: %s"
- (buffer-substring (- (point) 10) (+ (point) 1)))))))
+ (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
(t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
(unless xml-sub-parser ; Usually, we error out.
(error "XML: (Well-Formed) Invalid character"))
(defun xml-parse-string ()
"Parse the next whatever. Could be a string, or an element."
- (let* ((pos (point))
- (string (progn (if (search-forward "<" nil t)
- (forward-char -1)
- (goto-char (point-max)))
- (buffer-substring pos (point)))))
- ;; Clean up the string. As per XML specifications, the XML
- ;; processor should always pass the whole string to the
- ;; application. But \r's should be replaced:
- ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
- (setq pos 0)
- (while (string-match "\r\n?" string pos)
- (setq string (replace-match "\n" t t string))
- (setq pos (1+ (match-beginning 0))))
-
- (xml-substitute-special string)))
+ (let* ((pos (point))
+ (string (progn (if (search-forward "<" nil t)
+ (forward-char -1)
+ (goto-char (point-max)))
+ (buffer-substring-no-properties pos (point)))))
+ ;; Clean up the string. As per XML specifications, the XML
+ ;; processor should always pass the whole string to the
+ ;; application. But \r's should be replaced:
+ ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
+ (setq pos 0)
+ (while (string-match "\r\n?" string pos)
+ (setq string (replace-match "\n" t t string))
+ (setq pos (1+ (match-beginning 0))))
+
+ (xml-substitute-special string)))
(defun xml-parse-attlist (&optional xml-ns)
"Return the attribute-list after point.
(while (looking-at (eval-when-compile
(concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
(setq end-pos (match-end 0))
- (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
+ (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
(goto-char end-pos)
;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
;; Multiple whitespace characters should be replaced with a single one
;; in the attributes
- (let ((string (match-string 1))
+ (let ((string (match-string-no-properties 1))
(pos 0))
(replace-regexp-in-string "\\s-\\{2,\\}" " " string)
(let ((expansion (xml-substitute-special string)))
(unless (stringp expansion)
- ; We say this is the constraint. It is acctually that
- ; external entities nor "<" can be in an attribute value.
+ ; We say this is the constraint. It is acctually that
+ ; external entities nor "<" can be in an attribute value.
(error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
(push (cons name expansion) attlist)))
;; Get the name of the document
(looking-at xml-name-regexp)
- (let ((dtd (list (match-string 0) 'dtd))
+ (let ((dtd (list (match-string-no-properties 0) 'dtd))
type element end-pos)
(goto-char (match-end 0))
"\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
nil t))
(error "XML: Missing Public ID"))
- (let ((pubid (match-string 1)))
+ (let ((pubid (match-string-no-properties 1)))
(skip-syntax-forward " ")
(unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
(re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
(error "XML: Missing System ID"))
- (push (list pubid (match-string 1) 'public) dtd)))
+ (push (list pubid (match-string-no-properties 1) 'public) dtd)))
((looking-at "SYSTEM\\s-+")
(goto-char (match-end 0))
(unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
(re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
(error "XML: Missing System ID"))
- (push (list (match-string 1) 'system) dtd)))
+ (push (list (match-string-no-properties 1) 'system) dtd)))
(skip-syntax-forward " ")
(if (eq ?> (char-after))
(forward-char)
(error "XML: Bad DTD")
(forward-char)
;; Parse the rest of the DTD
- ;; Fixme: Deal with ATTLIST, NOTATION, PIs.
+ ;; Fixme: Deal with NOTATION, PIs.
(while (not (looking-at "\\s-*\\]"))
(skip-syntax-forward " ")
(cond
((looking-at
"<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
- (setq element (match-string 1)
+ (setq element (match-string-no-properties 1)
type (match-string-no-properties 2))
(setq end-pos (match-end 0))
((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
(setq type 'any))
((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
- (setq type (xml-parse-elem-type (match-string 1 type))))
+ (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
(if xml-validating-parser
(error "XML: (Validity) Invalid element type in the DTD"))))
-
+
;; rule [45]: the element declaration must be unique
(if (and (assoc element dtd)
xml-validating-parser)
;; Store the element in the DTD
(push (list element type) dtd)
(goto-char end-pos))
+
+ ;; Translation of rule [52] of XML specifications
+ ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+ "\\)[ \t\n\r]*\\(" xml-att-def-re
+ "\\)*[ \t\n\r]*>"))
+
+ ;; We don't do anything with ATTLIST currently
+ (goto-char (match-end 0)))
+
((looking-at "<!--")
(search-forward "-->"))
((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
"\\)[ \t\n\r]*\\(" xml-entity-value-re
"\\)[ \t\n\r]*>"))
- (let ((name (buffer-substring (nth 2 (match-data))
- (nth 3 (match-data))))
- (value (buffer-substring (+ (nth 4 (match-data)) 1)
- (- (nth 5 (match-data)) 1))))
- (goto-char (nth 1 (match-data)))
+ (let ((name (match-string-no-properties 1))
+ (value (substring (match-string-no-properties 2) 1
+ (- (length (match-string-no-properties 2)) 1))))
+ (goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
(list (cons name
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
"[ \t\n\r]*>")))
- (let ((name (buffer-substring (nth 2 (match-data))
- (nth 3 (match-data))))
- (file (buffer-substring (+ (nth 4 (match-data)) 1)
- (- (nth 5 (match-data)) 1))))
- (goto-char (nth 1 (match-data)))
+ (let ((name (match-string-no-properties 1))
+ (file (substring (match-string-no-properties 2) 1
+ (- (length (match-string-no-properties 2)) 1))))
+ (goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
(list (cons name (with-temp-buffer
(xml-parse-fragment
xml-validating-parser
parse-ns))))))))
+ ;; skip parameter entity declarations
+ ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re
+ "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+ "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
+ (looking-at (concat "<!ENTITY[ \t\n\r]+"
+ "%[ \t\n\r]+"
+ "\\(" xml-name-re "\\)[ \t\n\r]+"
+ "PUBLIC[ \t\n\r]+"
+ "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
+ "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+"
+ "\\(\"[^\"]+\"\\|'[^']+'\\)"
+ "[ \t\n\r]*>")))
+ (goto-char (match-end 0)))
+ ;; skip parameter entities
+ ((looking-at (concat "%" xml-name-re ";"))
+ (goto-char (match-end 0)))
(t
- (error "XML: (Validity) Invalid DTD item")))))
+ (when xml-validating-parser
+ (error "XML: (Validity) Invalid DTD item"))))))
(if (looking-at "\\s-*]>")
- (goto-char (nth 1 (match-data)))))
+ (goto-char (match-end 0))))
(nreverse dtd)))
(defun xml-parse-elem-type (string)
(let (elem modifier)
(if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
(progn
- (setq elem (match-string 1 string)
- modifier (match-string 2 string))
+ (setq elem (match-string-no-properties 1 string)
+ modifier (match-string-no-properties 2 string))
(if (string-match "|" elem)
(setq elem (cons 'choice
(mapcar 'xml-parse-elem-type
(mapcar 'xml-parse-elem-type
(split-string elem ",")))))))
(if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
- (setq elem (match-string 1 string)
- modifier (match-string 2 string))))
+ (setq elem (match-string-no-properties 1 string)
+ modifier (match-string-no-properties 2 string))))
(if (and (stringp elem) (string= elem "#PCDATA"))
(setq elem 'pcdata))
(let ((point 0)
children end-point)
- (while (string-match "&\\([^;]+\\);" string point)
+ (while (string-match "&\\([^;]*\\);" string point)
(setq end-point (match-end 0))
- (let* ((this-part (match-string 1 string))
+ (let* ((this-part (match-string-no-properties 1 string))
(prev-part (substring string point (match-beginning 0)))
(entity (assoc this-part xml-entity-alist))
(expansion
(cond ((string-match "#\\([0-9]+\\)" this-part)
(let ((c (decode-char
'ucs
- (string-to-number (match-string 1 this-part)))))
+ (string-to-number (match-string-no-properties 1 this-part)))))
(if c (string c))))
((string-match "#x\\([[:xdigit:]]+\\)" this-part)
(let ((c (decode-char
'ucs
- (string-to-number (match-string 1 this-part) 16))))
+ (string-to-number (match-string-no-properties 1 this-part) 16))))
(if c (string c))))
(entity
(cdr entity))
+ ((eq (length this-part) 0)
+ (error "XML: (Not Well-Formed) No entity given"))
(t
(if xml-validating-parser
(error "XML: (Validity) Undefined entity `%s'"
- (match-string 1 this-part)))))))
+ this-part)
+ xml-undefined-entity)))))
(cond ((null children)
- (if (and (eq (length expansion) 1)
- (stringp (cadr expansion)))
- (setq children (concat prev-part expansion))
- (if (stringp (car expansion))
- (setq children
- (list (concat prev-part (car expansion))
- (append (cdr expansion))))
- (setq children (append expansion prev-part)))))
+ ;; FIXME: If we have an entity that expands into XML, this won't work.
+ (setq children
+ (concat prev-part expansion)))
((stringp children)
(if (stringp expansion)
(setq children (concat children prev-part expansion))
(defalias 'xml-print 'xml-debug-print)
+(defun xml-escape-string (string)
+ "Return the string with entity substitutions made from
+xml-entity-alist."
+ (mapconcat (lambda (byte)
+ (let ((char (char-to-string byte)))
+ (if (rassoc char xml-entity-alist)
+ (concat "&" (car (rassoc char xml-entity-alist)) ";")
+ char)))
+ ;; This differs from the non-unicode branch. Just
+ ;; grabbing the string works here.
+ string ""))
+
(defun xml-debug-print-internal (xml indent-string)
"Outputs the XML tree in the current buffer.
The first line is indented with INDENT-STRING."
;; output the attribute list
(setq attlist (xml-node-attributes tree))
(while attlist
- (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\")
+ (insert ?\ (symbol-name (caar attlist)) "=\""
+ (xml-escape-string (cdar attlist)) ?\")
(setq attlist (cdr attlist)))
(setq tree (xml-node-children tree))
((listp node)
(insert ?\n)
(xml-debug-print-internal node (concat indent-string " ")))
- ((stringp node) (insert node))
+ ((stringp node)
+ (insert (xml-escape-string node)))
(t
(error "Invalid XML tree"))))