;;; xml.el --- XML parser
-;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc.
+;; Copyright (C) 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006 Free Software Foundation, Inc.
;; Author: Emmanuel Briot <briot@gnat.com>
;; Maintainer: Mark A. Hershberger <mah@everybody.org>
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Commentary:
;;**
;;*******************************************************************
+(defconst xml-undefined-entity "?"
+ "What to substitute for undefined entities")
+
(defvar xml-entity-alist
'(("lt" . "<")
("gt" . ">")
xml)))
-(let* ((start-chars (concat ":[:alpha:]_"))
+(defvar xml-name-re)
+(defvar xml-entity-value-re)
+(defvar xml-att-def-re)
+(let* ((start-chars (concat "[:alpha:]:_"))
(name-chars (concat "-[:digit:]." start-chars))
-;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
+ ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
(whitespace "[ \t\n\r]"))
-;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
-;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
-;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
-;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+ ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
+ ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
+ ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
+ ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
(defvar xml-name-start-char-re (concat "[" start-chars "]"))
-;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
+ ;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
(defvar xml-name-char-re (concat "[" name-chars "]"))
-;;[5] Name ::= NameStartChar (NameChar)*
+ ;;[5] Name ::= NameStartChar (NameChar)*
(defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
-;;[6] Names ::= Name (#x20 Name)*
+ ;;[6] Names ::= Name (#x20 Name)*
(defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
-;;[7] Nmtoken ::= (NameChar)+
+ ;;[7] Nmtoken ::= (NameChar)+
(defvar xml-nmtoken-re (concat xml-name-char-re "+"))
-;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
+ ;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
(defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
-;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
+ ;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
(defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
-;;[68] EntityRef ::= '&' Name ';'
+ ;;[68] EntityRef ::= '&' Name ';'
(defvar xml-entity-ref (concat "&" xml-name-re ";"))
-;;[69] PEReference ::= '%' Name ';'
+ ;;[69] PEReference ::= '%' Name ';'
(defvar xml-pe-reference-re (concat "%" xml-name-re ";"))
-;;[67] Reference ::= EntityRef | CharRef
+ ;;[67] Reference ::= EntityRef | CharRef
(defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
-;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
-;; | "'" ([^%&'] | PEReference | Reference)* "'"
+ ;;[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
+ (defvar xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|" xml-reference-re "\\)*\"\\|"
+ "'\\(?:[^&']\\|" xml-reference-re "\\)*'\\)"))
+ ;;[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
+ ;; | 'IDREF' [VC: IDREF]
+ ;; | 'IDREFS' [VC: IDREF]
+ ;; | 'ENTITY' [VC: Entity Name]
+ ;; | 'ENTITIES' [VC: Entity Name]
+ ;; | 'NMTOKEN' [VC: Name Token]
+ ;; | 'NMTOKENS' [VC: Name Token]
+ (defvar xml-tokenized-type-re "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|ENTITIES\\|NMTOKEN\\|NMTOKENS\\)")
+ ;;[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+ (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re
+ "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)"))
+ ;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens]
+ (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re
+ "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*"
+ whitespace ")\\)"))
+ ;;[57] EnumeratedType ::= NotationType | Enumeration
+ (defvar xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re "\\|" xml-enumeration-re "\\)"))
+ ;;[54] AttType ::= StringType | TokenizedType | EnumeratedType
+ ;;[55] StringType ::= 'CDATA'
+ (defvar xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re "\\|" xml-notation-type-re"\\|" xml-enumerated-type-re "\\)"))
+ ;;[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
+ (defvar xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|\\(?:#FIXED" whitespace "\\)*" xml-att-value-re "\\)"))
+ ;;[53] AttDef ::= S Name S AttType S DefaultDecl
+ (defvar xml-att-def-re (concat "\\(?:" whitespace "*" xml-name-re
+ whitespace "*" xml-att-type-re
+ whitespace "*" xml-default-decl-re "\\)"))
+ ;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
+ ;; | "'" ([^%&'] | PEReference | Reference)* "'"
(defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re
"\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|"
xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)")))
;; Get space syntax correct per XML [3].
(dotimes (c 31)
(modify-syntax-entry c "." table)) ; all are space in standard table
- (dolist (c '(?\t ?\n ?\r)) ; these should be space
+ (dolist (c '(?\t ?\n ?\r)) ; these should be space
(modify-syntax-entry c " " table))
;; For skipping attributes.
(modify-syntax-entry ?\" "\"" table)
(modify-syntax-entry ?. "_" table)
(modify-syntax-entry ?: "_" table)
;; XML [89]
- (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
- #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
- #x30FD #x30FE))
- (modify-syntax-entry (decode-char 'ucs c) "w" table))
+ (unless (featurep 'xemacs)
+ (dolist (c '(#x00B7 #x02D0 #x02D1 #x0387 #x0640 #x0E46 #x0EC6 #x3005
+ #x3031 #x3032 #x3033 #x3034 #x3035 #x309D #x309E #x30FC
+ #x30FD #x30FE))
+ (modify-syntax-entry (decode-char 'ucs c) "w" table)))
;; Fixme: rest of [4]
table)
"Syntax table used by `xml-parse-region'.")
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
and returned as the first element of the list.
If PARSE-NS is non-nil, then QNAMES are expanded."
- (save-restriction
- (narrow-to-region beg end)
- ;; Use fixed syntax table to ensure regexp char classes and syntax
- ;; specs DTRT.
- (with-syntax-table (standard-syntax-table)
- (let ((case-fold-search nil) ; XML is case-sensitive.
- xml result dtd)
- (save-excursion
- (if buffer
- (set-buffer buffer))
+ ;; Use fixed syntax table to ensure regexp char classes and syntax
+ ;; specs DTRT.
+ (with-syntax-table (standard-syntax-table)
+ (let ((case-fold-search nil) ; XML is case-sensitive.
+ xml result dtd)
+ (save-excursion
+ (if buffer
+ (set-buffer buffer))
+ (save-restriction
+ (narrow-to-region beg end)
(goto-char (point-min))
(while (not (eobp))
(if (search-forward "<" nil t)
parse-ns
(if parse-ns
(list
- ;; Default for empty prefix is no namespace
+ ;; Default for empty prefix is no namespace
(cons "" "")
;; "xml" namespace
(cons "xml" "http://www.w3.org/XML/1998/namespace")
(let ((pos (match-end 0)))
(unless (search-forward "]]>" nil t)
(error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
- (buffer-substring pos (match-beginning 0))))
+ (concat
+ (buffer-substring pos (match-beginning 0))
+ (xml-parse-string))))
;; DTD for the document
((looking-at "<!DOCTYPE")
(let ((dtd (xml-parse-dtd parse-ns)))
;; Parse this node
(let* ((node-name (match-string 1))
- ;; Parse the attribute list.
- (attrs (xml-parse-attlist xml-ns))
- children pos)
+ ;; Parse the attribute list.
+ (attrs (xml-parse-attlist xml-ns))
+ children pos)
- ;; add the xmlns:* attrs to our cache
- (when (consp xml-ns)
+ ;; add the xmlns:* attrs to our cache
+ (when (consp xml-ns)
(dolist (attr attrs)
(when (and (consp (car attr))
(equal "http://www.w3.org/2000/xmlns/"
(push (cons (cdar attr) (cdr attr))
xml-ns))))
- (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
+ (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
;; is this an empty element ?
(if (looking-at "/>")
(if (stringp expansion)
(if (stringp (car children))
;; The two strings were separated by a comment.
- (setq children (append (concat (car children) expansion)
+ (setq children (append (list (concat (car children) expansion))
(cdr children)))
(setq children (append (list expansion) children)))
(setq children (append expansion children))))))))
(defun xml-parse-string ()
"Parse the next whatever. Could be a string, or an element."
- (let* ((pos (point))
- (string (progn (if (search-forward "<" nil t)
- (forward-char -1)
- (goto-char (point-max)))
- (buffer-substring pos (point)))))
- ;; Clean up the string. As per XML specifications, the XML
- ;; processor should always pass the whole string to the
- ;; application. But \r's should be replaced:
- ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
- (setq pos 0)
- (while (string-match "\r\n?" string pos)
- (setq string (replace-match "\n" t t string))
- (setq pos (1+ (match-beginning 0))))
-
- (xml-substitute-special string)))
+ (let* ((pos (point))
+ (string (progn (if (search-forward "<" nil t)
+ (forward-char -1)
+ (goto-char (point-max)))
+ (buffer-substring pos (point)))))
+ ;; Clean up the string. As per XML specifications, the XML
+ ;; processor should always pass the whole string to the
+ ;; application. But \r's should be replaced:
+ ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
+ (setq pos 0)
+ (while (string-match "\r\n?" string pos)
+ (setq string (replace-match "\n" t t string))
+ (setq pos (1+ (match-beginning 0))))
+
+ (xml-substitute-special string)))
(defun xml-parse-attlist (&optional xml-ns)
"Return the attribute-list after point.
(replace-regexp-in-string "\\s-\\{2,\\}" " " string)
(let ((expansion (xml-substitute-special string)))
(unless (stringp expansion)
- ; We say this is the constraint. It is acctually that
- ; external entities nor "<" can be in an attribute value.
+ ; We say this is the constraint. It is acctually that
+ ; external entities nor "<" can be in an attribute value.
(error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
(push (cons name expansion) attlist)))
(error "XML: Bad DTD")
(forward-char)
;; Parse the rest of the DTD
- ;; Fixme: Deal with ATTLIST, NOTATION, PIs.
+ ;; Fixme: Deal with NOTATION, PIs.
(while (not (looking-at "\\s-*\\]"))
(skip-syntax-forward " ")
(cond
(t
(if xml-validating-parser
(error "XML: (Validity) Invalid element type in the DTD"))))
-
+
;; rule [45]: the element declaration must be unique
(if (and (assoc element dtd)
xml-validating-parser)
;; Store the element in the DTD
(push (list element type) dtd)
(goto-char end-pos))
+
+ ;; Translation of rule [52] of XML specifications
+ ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+ "\\)[ \t\n\r]*\\(" xml-att-def-re
+ "\\)*[ \t\n\r]*>"))
+
+ ;; We don't do anything with ATTLIST currently
+ (goto-char (match-end 0)))
+
((looking-at "<!--")
(search-forward "-->"))
((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
"\\)[ \t\n\r]*\\(" xml-entity-value-re
"\\)[ \t\n\r]*>"))
- (let ((name (buffer-substring (nth 2 (match-data))
- (nth 3 (match-data))))
- (value (buffer-substring (+ (nth 4 (match-data)) 1)
- (- (nth 5 (match-data)) 1))))
- (goto-char (nth 1 (match-data)))
+ (let ((name (match-string 1))
+ (value (substring (match-string 2) 1
+ (- (length (match-string 2)) 1))))
+ (goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
(list (cons name
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
"[ \t\n\r]*>")))
- (let ((name (buffer-substring (nth 2 (match-data))
- (nth 3 (match-data))))
- (file (buffer-substring (+ (nth 4 (match-data)) 1)
- (- (nth 5 (match-data)) 1))))
- (goto-char (nth 1 (match-data)))
+ (let ((name (match-string 1))
+ (file (substring (match-string 2) 1
+ (- (length (match-string 2)) 1))))
+ (goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
(list (cons name (with-temp-buffer
(xml-parse-fragment
xml-validating-parser
parse-ns))))))))
+ ;; skip parameter entity declarations
+ ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re
+ "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+ "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
+ (looking-at (concat "<!ENTITY[ \t\n\r]+"
+ "%[ \t\n\r]+"
+ "\\(" xml-name-re "\\)[ \t\n\r]+"
+ "PUBLIC[ \t\n\r]+"
+ "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
+ "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+"
+ "\\(\"[^\"]+\"\\|'[^']+'\\)"
+ "[ \t\n\r]*>")))
+ (goto-char (match-end 0)))
+ ;; skip parameter entities
+ ((looking-at (concat "%" xml-name-re ";"))
+ (goto-char (match-end 0)))
(t
- (error "XML: (Validity) Invalid DTD item")))))
+ (when xml-validating-parser
+ (error "XML: (Validity) Invalid DTD item"))))))
(if (looking-at "\\s-*]>")
- (goto-char (nth 1 (match-data)))))
+ (goto-char (match-end 0))))
(nreverse dtd)))
(defun xml-parse-elem-type (string)
(let ((point 0)
children end-point)
- (while (string-match "&\\([^;]+\\);" string point)
+ (while (string-match "&\\([^;]*\\);" string point)
(setq end-point (match-end 0))
(let* ((this-part (match-string 1 string))
(prev-part (substring string point (match-beginning 0)))
(if c (string c))))
(entity
(cdr entity))
+ ((eq (length this-part) 0)
+ (error "XML: (Not Well-Formed) No entity given"))
(t
(if xml-validating-parser
(error "XML: (Validity) Undefined entity `%s'"
- (match-string 1 this-part)))))))
+ this-part)
+ xml-undefined-entity)))))
(cond ((null children)
- (if (and (eq (length expansion) 1)
- (stringp (cadr expansion)))
- (setq children (concat prev-part expansion))
- (if (stringp (car expansion))
- (setq children
- (list (concat prev-part (car expansion))
- (append (cdr expansion))))
- (setq children (append expansion prev-part)))))
+ ;; FIXME: If we have an entity that expands into XML, this won't work.
+ (setq children
+ (concat prev-part expansion)))
((stringp children)
(if (stringp expansion)
(setq children (concat children prev-part expansion))