X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/9850eff524bd0747a9561f3b4c90dfc3749f4ecb..66e2e71d556785cd10270931c6fc0424b9dea6a6:/lisp/xml.el diff --git a/lisp/xml.el b/lisp/xml.el index 74a9ae8334..52bb0de7ea 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -1,7 +1,6 @@ ;;; xml.el --- XML parser -;; Copyright (C) 2000, 2001, 2002, 2003, 2004, -;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; Copyright (C) 2000-2011 Free Software Foundation, Inc. ;; Author: Emmanuel Briot ;; Maintainer: Mark A. Hershberger @@ -189,7 +188,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (name-chars (concat "-[:digit:]." start-chars)) ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+ (whitespace "[ \t\n\r]")) - ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] + ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] @@ -227,7 +226,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)")) ;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens] - (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re + (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*" whitespace ")\\)")) ;;[57] EnumeratedType ::= NotationType | Enumeration @@ -248,7 +247,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)"))) ;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral ;; | 'PUBLIC' S PubidLiteral S SystemLiteral -;;[76] NDataDecl ::= S 'NDATA' S +;;[76] NDataDecl ::= S 'NDATA' S ;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?) ;;[71] GEDecl ::= '' ;;[74] PEDef ::= EntityValue | ExternalID @@ -321,18 +320,20 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (progn (forward-char -1) (setq result (xml-parse-tag parse-dtd parse-ns)) - (if (and xml result (not xml-sub-parser)) - ;; translation of rule [1] of XML specifications - (error "XML: (Not Well-Formed) Only one root tag allowed") - (cond - ((null result)) - ((and (listp (car result)) - parse-dtd) - (setq dtd (car result)) - (if (cdr result) ; possible leading comment - (add-to-list 'xml (cdr result)))) - (t - (add-to-list 'xml result))))) + (cond + ((null result) + ;; Not looking at an xml start tag. + (forward-char 1)) + ((and xml (not xml-sub-parser)) + ;; Translation of rule [1] of XML specifications + (error "XML: (Not Well-Formed) Only one root tag allowed")) + ((and (listp (car result)) + parse-dtd) + (setq dtd (car result)) + (if (cdr result) ; possible leading comment + (add-to-list 'xml (cdr result)))) + (t + (add-to-list 'xml result)))) (goto-char (point-max)))) (if parse-dtd (cons dtd (nreverse xml)) @@ -432,7 +433,7 @@ Returns one of: (let* ((node-name (match-string-no-properties 1)) ;; Parse the attribute list. (attrs (xml-parse-attlist xml-ns)) - children pos) + children) ;; add the xmlns:* attrs to our cache (when (consp xml-ns) @@ -494,9 +495,7 @@ Returns one of: (defun xml-parse-string () "Parse the next whatever. Could be a string, or an element." (let* ((pos (point)) - (string (progn (if (search-forward "<" nil t) - (forward-char -1) - (goto-char (point-max))) + (string (progn (skip-chars-forward "^<") (buffer-substring-no-properties pos (point))))) ;; Clean up the string. As per XML specifications, the XML ;; processor should always pass the whole string to the @@ -537,8 +536,7 @@ Leave point at the first non-blank character after the tag." ;; Multiple whitespace characters should be replaced with a single one ;; in the attributes - (let ((string (match-string-no-properties 1)) - (pos 0)) + (let ((string (match-string-no-properties 1))) (replace-regexp-in-string "\\s-\\{2,\\}" " " string) (let ((expansion (xml-substitute-special string))) (unless (stringp expansion) @@ -635,7 +633,7 @@ This follows the rule [28] in the XML specifications." ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t - (if xml-validating-parser + (if xml-validating-parser (error "XML: (Validity) Invalid element type in the DTD")))) ;; rule [45]: the element declaration must be unique @@ -667,7 +665,7 @@ This follows the rule [28] in the XML specifications." (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist - (list (cons name + (list (cons name (with-temp-buffer (insert value) (goto-char (point-min)) @@ -770,7 +768,7 @@ This follows the rule [28] in the XML specifications." (let* ((this-part (match-string-no-properties 1 string)) (prev-part (substring string point (match-beginning 0))) (entity (assoc this-part xml-entity-alist)) - (expansion + (expansion (cond ((string-match "#\\([0-9]+\\)" this-part) (let ((c (decode-char 'ucs @@ -825,6 +823,25 @@ This follows the rule [28] in the XML specifications." "") (substring string point)))))) +(defun xml-substitute-numeric-entities (string) + "Substitute SGML numeric entities by their respective utf characters. +This function replaces numeric entities in the input STRING and +returns the modified string. For example \"*\" gets replaced +by \"*\"." + (if (and string (stringp string)) + (let ((start 0)) + (while (string-match "&#\\([0-9]+\\);" string start) + (condition-case nil + (setq string (replace-match + (string (read (substring string + (match-beginning 1) + (match-end 1)))) + nil nil string)) + (error nil)) + (setq start (1+ (match-beginning 0)))) + string) + nil)) + ;;******************************************************************* ;;** ;;** Printing a tree. @@ -892,5 +909,4 @@ The first line is indented with INDENT-STRING." (provide 'xml) -;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b ;;; xml.el ends here