X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/5178753d3175190585364a4da85b4e5c8e0671af..893d44a1693a196f3022492f66c0205b7ccbeb47:/lisp/xml.el diff --git a/lisp/xml.el b/lisp/xml.el index 444b8c62a7..1251c7a6a7 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -1,7 +1,7 @@ ;;; xml.el --- XML parser ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, -;; 2005 Free Software Foundation, Inc. +;; 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. ;; Author: Emmanuel Briot ;; Maintainer: Mark A. Hershberger @@ -9,10 +9,10 @@ ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,9 +20,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -76,8 +74,12 @@ ;;; Code: -;; Note that {buffer-substring,match-string}-no-properties were -;; formerly used in several places, but that removes composition info. +;; Note that buffer-substring and match-string were formerly used in +;; several places, because the -no-properties variants remove +;; composition info. However, after some discussion on emacs-devel, +;; the consensus was that the speed of the -no-properties variants was +;; a worthwhile tradeoff especially since we're usually parsing files +;; instead of hand-crafted XML. ;;******************************************************************* ;;** @@ -165,22 +167,19 @@ If FILE is already visited, use its buffer and don't kill it. Returns the top node with all its children. If PARSE-DTD is non-nil, the DTD is parsed rather than skipped. If PARSE-NS is non-nil, then QNAMES are expanded." - (let ((keep)) - (if (get-file-buffer file) - (progn - (set-buffer (get-file-buffer file)) - (setq keep (point))) - (let (auto-mode-alist) ; no need for xml-mode - (find-file file))) - - (let ((xml (xml-parse-region (point-min) - (point-max) - (current-buffer) - parse-dtd parse-ns))) - (if keep - (goto-char keep) - (kill-buffer (current-buffer))) - xml))) + (if (get-file-buffer file) + (with-current-buffer (get-file-buffer file) + (save-excursion + (xml-parse-region (point-min) + (point-max) + (current-buffer) + parse-dtd parse-ns))) + (with-temp-buffer + (insert-file-contents file) + (xml-parse-region (point-min) + (point-max) + (current-buffer) + parse-dtd parse-ns)))) (defvar xml-name-re) @@ -306,34 +305,36 @@ is not well-formed XML. If PARSE-DTD is non-nil, the DTD is parsed rather than skipped, and returned as the first element of the list. If PARSE-NS is non-nil, then QNAMES are expanded." - (save-excursion - (if buffer - (set-buffer buffer)) - (save-restriction - (narrow-to-region beg end) - ;; Use fixed syntax table to ensure regexp char classes and syntax - ;; specs DTRT. - (with-syntax-table (standard-syntax-table) - (let ((case-fold-search nil) ; XML is case-sensitive. - xml result dtd) + ;; Use fixed syntax table to ensure regexp char classes and syntax + ;; specs DTRT. + (with-syntax-table (standard-syntax-table) + (let ((case-fold-search nil) ; XML is case-sensitive. + xml result dtd) + (save-excursion + (if buffer + (set-buffer buffer)) + (save-restriction + (narrow-to-region beg end) (goto-char (point-min)) (while (not (eobp)) (if (search-forward "<" nil t) (progn (forward-char -1) (setq result (xml-parse-tag parse-dtd parse-ns)) - (if (and xml result (not xml-sub-parser)) - ;; translation of rule [1] of XML specifications - (error "XML: (Not Well-Formed) Only one root tag allowed") - (cond - ((null result)) - ((and (listp (car result)) - parse-dtd) - (setq dtd (car result)) - (if (cdr result) ; possible leading comment - (add-to-list 'xml (cdr result)))) - (t - (add-to-list 'xml result))))) + (cond + ((null result) + ;; Not looking at an xml start tag. + (forward-char 1)) + ((and xml (not xml-sub-parser)) + ;; Translation of rule [1] of XML specifications + (error "XML: (Not Well-Formed) Only one root tag allowed")) + ((and (listp (car result)) + parse-dtd) + (setq dtd (car result)) + (if (cdr result) ; possible leading comment + (add-to-list 'xml (cdr result)))) + (t + (add-to-list 'xml result)))) (goto-char (point-max)))) (if parse-dtd (cons dtd (nreverse xml)) @@ -409,7 +410,7 @@ Returns one of: (unless (search-forward "]]>" nil t) (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) (concat - (buffer-substring pos (match-beginning 0)) + (buffer-substring-no-properties pos (match-beginning 0)) (xml-parse-string)))) ;; DTD for the document ((looking-at "", but didn't see it.) (error "XML: (Well-Formed) Couldn't parse tag: %s" - (buffer-substring (- (point) 10) (+ (point) 1))))))) + (buffer-substring-no-properties (- (point) 10) (+ (point) 1))))))) (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) (unless xml-sub-parser ; Usually, we error out. (error "XML: (Well-Formed) Invalid character")) @@ -495,10 +496,8 @@ Returns one of: (defun xml-parse-string () "Parse the next whatever. Could be a string, or an element." (let* ((pos (point)) - (string (progn (if (search-forward "<" nil t) - (forward-char -1) - (goto-char (point-max))) - (buffer-substring pos (point))))) + (string (progn (skip-chars-forward "^<") + (buffer-substring-no-properties pos (point))))) ;; Clean up the string. As per XML specifications, the XML ;; processor should always pass the whole string to the ;; application. But \r's should be replaced: @@ -519,7 +518,7 @@ Leave point at the first non-blank character after the tag." (while (looking-at (eval-when-compile (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) (setq end-pos (match-end 0)) - (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) + (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns)) (goto-char end-pos) ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize @@ -538,7 +537,7 @@ Leave point at the first non-blank character after the tag." ;; Multiple whitespace characters should be replaced with a single one ;; in the attributes - (let ((string (match-string 1)) + (let ((string (match-string-no-properties 1)) (pos 0)) (replace-regexp-in-string "\\s-\\{2,\\}" " " string) (let ((expansion (xml-substitute-special string))) @@ -578,7 +577,7 @@ This follows the rule [28] in the XML specifications." ;; Get the name of the document (looking-at xml-name-regexp) - (let ((dtd (list (match-string 0) 'dtd)) + (let ((dtd (list (match-string-no-properties 0) 'dtd)) type element end-pos) (goto-char (match-end 0)) @@ -593,18 +592,18 @@ This follows the rule [28] in the XML specifications." "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" nil t)) (error "XML: Missing Public ID")) - (let ((pubid (match-string 1))) + (let ((pubid (match-string-no-properties 1))) (skip-syntax-forward " ") (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) (error "XML: Missing System ID")) - (push (list pubid (match-string 1) 'public) dtd))) + (push (list pubid (match-string-no-properties 1) 'public) dtd))) ((looking-at "SYSTEM\\s-+") (goto-char (match-end 0)) (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) (error "XML: Missing System ID")) - (push (list (match-string 1) 'system) dtd))) + (push (list (match-string-no-properties 1) 'system) dtd))) (skip-syntax-forward " ") (if (eq ?> (char-after)) (forward-char) @@ -621,7 +620,7 @@ This follows the rule [28] in the XML specifications." ((looking-at "]+\\)>") - (setq element (match-string 1) + (setq element (match-string-no-properties 1) type (match-string-no-properties 2)) (setq end-pos (match-end 0)) @@ -632,7 +631,7 @@ This follows the rule [28] in the XML specifications." ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents (setq type 'any)) ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) - (setq type (xml-parse-elem-type (match-string 1 type)))) + (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t @@ -662,9 +661,9 @@ This follows the rule [28] in the XML specifications." ((looking-at (concat "")) - (let ((name (match-string 1)) - (value (substring (match-string 2) 1 - (- (length (match-string 2)) 1)))) + (let ((name (match-string-no-properties 1)) + (value (substring (match-string-no-properties 2) 1 + (- (length (match-string-no-properties 2)) 1)))) (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist @@ -684,9 +683,9 @@ This follows the rule [28] in the XML specifications." "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" "[ \t\n\r]*>"))) - (let ((name (match-string 1)) - (file (substring (match-string 2) 1 - (- (length (match-string 2)) 1)))) + (let ((name (match-string-no-properties 1)) + (file (substring (match-string-no-properties 2) 1 + (- (length (match-string-no-properties 2)) 1)))) (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist @@ -725,8 +724,8 @@ This follows the rule [28] in the XML specifications." (let (elem modifier) (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) (progn - (setq elem (match-string 1 string) - modifier (match-string 2 string)) + (setq elem (match-string-no-properties 1 string) + modifier (match-string-no-properties 2 string)) (if (string-match "|" elem) (setq elem (cons 'choice (mapcar 'xml-parse-elem-type @@ -736,8 +735,8 @@ This follows the rule [28] in the XML specifications." (mapcar 'xml-parse-elem-type (split-string elem ","))))))) (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) - (setq elem (match-string 1 string) - modifier (match-string 2 string)))) + (setq elem (match-string-no-properties 1 string) + modifier (match-string-no-properties 2 string)))) (if (and (stringp elem) (string= elem "#PCDATA")) (setq elem 'pcdata)) @@ -768,19 +767,19 @@ This follows the rule [28] in the XML specifications." children end-point) (while (string-match "&\\([^;]*\\);" string point) (setq end-point (match-end 0)) - (let* ((this-part (match-string 1 string)) + (let* ((this-part (match-string-no-properties 1 string)) (prev-part (substring string point (match-beginning 0))) (entity (assoc this-part xml-entity-alist)) (expansion (cond ((string-match "#\\([0-9]+\\)" this-part) (let ((c (decode-char 'ucs - (string-to-number (match-string 1 this-part))))) + (string-to-number (match-string-no-properties 1 this-part))))) (if c (string c)))) ((string-match "#x\\([[:xdigit:]]+\\)" this-part) (let ((c (decode-char 'ucs - (string-to-number (match-string 1 this-part) 16)))) + (string-to-number (match-string-no-properties 1 this-part) 16)))) (if c (string c)))) (entity (cdr entity)) @@ -826,6 +825,25 @@ This follows the rule [28] in the XML specifications." "") (substring string point)))))) +(defun xml-substitute-numeric-entities (string) + "Substitute SGML numeric entities by their respective utf characters. +This function replaces numeric entities in the input STRING and +returns the modified string. For example \"*\" gets replaced +by \"*\"." + (if (and string (stringp string)) + (let ((start 0)) + (while (string-match "&#\\([0-9]+\\);" string start) + (condition-case nil + (setq string (replace-match + (string (read (substring string + (match-beginning 1) + (match-end 1)))) + nil nil string)) + (error nil)) + (setq start (1+ (match-beginning 0)))) + string) + nil)) + ;;******************************************************************* ;;** ;;** Printing a tree. @@ -843,6 +861,18 @@ The first line is indented with the optional INDENT-STRING." (defalias 'xml-print 'xml-debug-print) +(defun xml-escape-string (string) + "Return the string with entity substitutions made from +xml-entity-alist." + (mapconcat (lambda (byte) + (let ((char (char-to-string byte))) + (if (rassoc char xml-entity-alist) + (concat "&" (car (rassoc char xml-entity-alist)) ";") + char))) + ;; This differs from the non-unicode branch. Just + ;; grabbing the string works here. + string "")) + (defun xml-debug-print-internal (xml indent-string) "Outputs the XML tree in the current buffer. The first line is indented with INDENT-STRING." @@ -853,7 +883,8 @@ The first line is indented with INDENT-STRING." ;; output the attribute list (setq attlist (xml-node-attributes tree)) (while attlist - (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\") + (insert ?\ (symbol-name (caar attlist)) "=\"" + (xml-escape-string (cdar attlist)) ?\") (setq attlist (cdr attlist))) (setq tree (xml-node-children tree)) @@ -868,7 +899,8 @@ The first line is indented with INDENT-STRING." ((listp node) (insert ?\n) (xml-debug-print-internal node (concat indent-string " "))) - ((stringp node) (insert node)) + ((stringp node) + (insert (xml-escape-string node))) (t (error "Invalid XML tree"))))