X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/0814ca04aea186499b5cc6a1454b029b71f5a5a3..e3fe4da0475674efad25a8a62e90879360ed2601:/lisp/international/mule.el?ds=sidebyside diff --git a/lisp/international/mule.el b/lisp/international/mule.el index b0320ce98d..4a4ad89daa 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -1,8 +1,9 @@ ;;; mule.el --- basic commands for mulitilingual environment -;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 ;; Free Software Foundation, Inc. -;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, +;; 2005, 2006, 2007, 2008 ;; National Institute of Advanced Industrial Science and Technology (AIST) ;; Registration Number H14PRO021 @@ -12,7 +13,7 @@ ;; GNU Emacs is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) +;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; GNU Emacs is distributed in the hope that it will be useful, @@ -38,10 +39,10 @@ Distribution date of this version of MULE (multilingual environment).") (defun load-with-code-conversion (fullname file &optional noerror nomessage) "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. The file contents are decoded before evaluation if necessary. -If optional second arg NOERROR is non-nil, +If optional third arg NOERROR is non-nil, report no error if FILE doesn't exist. Print messages at start and end of loading unless - optional third arg NOMESSAGE is non-nil. + optional fourth arg NOMESSAGE is non-nil. Return t if file exists." (if (null (file-readable-p fullname)) (and (null noerror) @@ -71,8 +72,7 @@ Return t if file exists." (let ((load-file-name fullname) (set-auto-coding-for-load t) (inhibit-file-name-operation nil)) - (save-excursion - (set-buffer buffer) + (with-current-buffer buffer ;; Don't let deactivate-mark remain set. (let (deactivate-mark) (insert-file-contents fullname)) @@ -98,9 +98,9 @@ Return t if file exists." )) (let (kill-buffer-hook kill-buffer-query-functions) (kill-buffer buffer))) - (let ((hook (assoc file after-load-alist))) - (when hook - (mapcar (function eval) (cdr hook)))) + (unless purify-flag + (do-after-load-evaluation fullname)) + (unless (or nomessage noninteractive) (if source (message "Loading %s (source)...done" file) @@ -118,7 +118,7 @@ Return t if file exists." (< (aref vector 0) 160))))) (defsubst charsetp (object) - "T if OBJECT is a charset." + "Return t if OBJECT is a charset." (and (symbolp object) (vectorp (get object 'charset)))) (defsubst charset-info (charset) @@ -268,7 +268,7 @@ See the function `charset-info' for more detail." CODE1 and CODE2 are optional, but if you don't supply sufficient position codes, return a generic character which stands for all characters or group of characters in the character set. -A generic character can be used to index a char table (e.g. syntax-table). +A generic character can be used to index a char table (e.g. `syntax-table'). Such character sets as ascii, eight-bit-control, and eight-bit-graphic don't have corresponding generic characters. If CHARSET is one of @@ -283,16 +283,14 @@ would need to index the corresponding Emacs charset." (make-char-internal (charset-id charset) code1 code2)) (put 'make-char 'byte-compile - (function - (lambda (form) - (let ((charset (nth 1 form))) - (if (charset-quoted-standard-p charset) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (charset-id (nth 1 charset)) (nthcdr 2 form)))) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (list 'charset-id charset) (nthcdr 2 form))))))))) + (lambda (form) + (let ((charset (nth 1 form))) + (byte-compile-normal-call + (cons 'make-char-internal + (cons (if (charset-quoted-standard-p charset) + (charset-id (nth 1 charset)) + (list 'charset-id charset)) + (nthcdr 2 form))))))) (defun charset-list () "Return list of charsets ever defined. @@ -315,14 +313,16 @@ Return nil if such a character is not supported. Currently the only supported coded character set is `ucs' (ISO/IEC 10646: Universal Multi-Octet Coded Character Set), and the result is translated through the translation-table named -`utf-translation-table-for-decode' or the translation-hash-table named -`utf-subst-table-for-decode'. +`utf-translation-table-for-decode', or through the +translation-hash-table named `utf-subst-table-for-decode' +\(if `utf-translate-cjk-mode' is non-nil). Optional argument RESTRICTION specifies a way to map the pair of CCS and CODE-POINT to a character. Currently not supported and just ignored." (cond ((eq ccs 'ucs) - (or (utf-lookup-subst-table-for-decode code-point) + (or (and utf-translate-cjk-mode + (utf-lookup-subst-table-for-decode code-point)) (let ((c (cond ((< code-point 160) code-point) @@ -352,8 +352,9 @@ Return nil if CHAR is not included in CCS. Currently the only supported coded character set is `ucs' (ISO/IEC 10646: Universal Multi-Octet Coded Character Set), and CHAR is first translated through the translation-table named -`utf-translation-table-for-encode' or the translation-hash-table named -`utf-subst-table-for-encode'. +`utf-translation-table-for-encode', or through the +translation-hash-table named `utf-subst-table-for-encode' \(if +`utf-translate-cjk-mode' is non-nil). CHAR should be in one of these charsets: ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, @@ -366,7 +367,8 @@ code-point in CCS. Currently not supported and just ignored." (charset (car split)) trans) (cond ((eq ccs 'ucs) - (or (utf-lookup-subst-table-for-encode char) + (or (and utf-translate-cjk-mode + (utf-lookup-subst-table-for-encode char)) (let ((table (get 'utf-translation-table-for-encode 'translation-table))) (setq trans (aref table char)) @@ -390,6 +392,20 @@ code-point in CCS. Currently not supported and just ignored." char)))))))) +;; Save the ASCII case table in case we need it later. Some locales +;; (such as Turkish) modify the case behavior of ASCII characters, +;; which can interfere with networking code that uses ASCII strings. + +(defvar ascii-case-table + ;; Code copied from copy-case-table to avoid requiring case-table.el + (let ((tbl (copy-sequence (standard-case-table))) + (up (char-table-extra-slot (standard-case-table) 0))) + (if up (set-char-table-extra-slot tbl 0 (copy-sequence up))) + (set-char-table-extra-slot tbl 1 nil) + (set-char-table-extra-slot tbl 2 nil) + tbl) + "Case table for the ASCII character set.") + ;; Coding system stuff ;; Coding system is a symbol that has the property `coding-system'. @@ -620,16 +636,16 @@ It exists just for backward compatibility, and the value is always nil.") (subsidiaries (vector (intern (format "%s-unix" coding-system)) (intern (format "%s-dos" coding-system)) (intern (format "%s-mac" coding-system)))) - (i 0) - temp) - (while (< i 3) - (put (aref subsidiaries i) 'coding-system coding-spec) - (put (aref subsidiaries i) 'eol-type i) - (add-to-coding-system-list (aref subsidiaries i)) - (setq coding-system-alist - (cons (list (symbol-name (aref subsidiaries i))) - coding-system-alist)) - (setq i (1+ i))) + elt) + (dotimes (i 3) + (setq elt (aref subsidiaries i)) + (put elt 'coding-system coding-spec) + (put elt 'eol-type i) + (put elt 'coding-system-define-form nil) + (add-to-coding-system-list elt) + (or (assoc (symbol-name elt) coding-system-alist) + (setq coding-system-alist + (cons (list (symbol-name elt)) coding-system-alist)))) subsidiaries)) (defun transform-make-coding-system-args (name type &optional doc-string props) @@ -1082,8 +1098,9 @@ a value of `safe-charsets' in PLIST." ;; At last, register CODING-SYSTEM in `coding-system-list' and ;; `coding-system-alist'. (add-to-coding-system-list coding-system) - (setq coding-system-alist (cons (list (symbol-name coding-system)) - coding-system-alist)) + (or (assoc (symbol-name coding-system) coding-system-alist) + (setq coding-system-alist (cons (list (symbol-name coding-system)) + coding-system-alist))) ;; For a coding system of cateogory iso-8-1 and iso-8-2, create ;; XXX-with-esc variants. @@ -1114,8 +1131,9 @@ a value of `safe-charsets' in PLIST." (put alias 'coding-system (coding-system-spec coding-system)) (put alias 'coding-system-define-form nil) (add-to-coding-system-list alias) - (setq coding-system-alist (cons (list (symbol-name alias)) - coding-system-alist)) + (or (assoc (symbol-name alias) coding-system-alist) + (setq coding-system-alist (cons (list (symbol-name alias)) + coding-system-alist))) (let ((eol-type (coding-system-eol-type coding-system))) (if (vectorp eol-type) (progn @@ -1275,15 +1293,15 @@ or by the previous use of this command." "Specify coding system for keyboard input. If you set this on a terminal which can't distinguish Meta keys from 8-bit characters, you will have to use ESC to type Meta characters. -See Info node `Specify Coding' and Info node `Single-Byte Character Support'. +See Info node `Terminal Coding' and Info node `Unibyte Mode'. On non-windowing terminals, this is set from the locale by default. Setting this variable directly does not take effect; use either \\[customize] or \\[set-keyboard-coding-system]." :type '(coding-system :tag "Coding system") - :link '(info-link "(emacs)Specify Coding") - :link '(info-link "(emacs)Single-Byte Character Support") + :link '(info-link "(emacs)Terminal Coding") + :link '(info-link "(emacs)Unibyte Mode") :set (lambda (symbol value) ;; Don't load encoded-kbd-mode unnecessarily. (if (or value (boundp 'encoded-kbd-mode)) @@ -1561,11 +1579,14 @@ text, and convert it in the temporary buffer. Otherwise, convert in-place." ;;; FILE I/O (defcustom auto-coding-alist - '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|[jew]ar\\|xpi\\)\\'" . no-conversion) - ("\\.\\(ARC\\|ZIP\\|LZH\\|ZOO\\|[JEW]AR\\|XPI\\)\\'" . no-conversion) - ("\\.\\(sx[dmicw]\\|tar\\|tgz\\)\\'" . no-conversion) + ;; .exe and .EXE are added to support archive-mode looking at DOS + ;; self-extracting exe archives. + '(("\\.\\(arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|exe\\)\\'" . no-conversion) + ("\\.\\(ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|EXE\\)\\'" . no-conversion) + ("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion) ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion) ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion) + ("\\.pdf\\'" . no-conversion) ("/#[^/]+#\\'" . emacs-mule)) "Alist of filename patterns vs corresponding coding systems. Each element looks like (REGEXP . CODING-SYSTEM). @@ -1580,7 +1601,10 @@ and the contents of `file-coding-system-alist'." (symbol :tag "Coding system")))) (defcustom auto-coding-regexp-alist - '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) + '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion) + ("\\`\xFE\xFF" . utf-16be-with-signature) + ("\\`\xFF\xFE" . utf-16le-with-signature) + ("\\`\xEF\xBB\xBF" . utf-8)) "Alist of patterns vs corresponding coding systems. Each element looks like (REGEXP . CODING-SYSTEM). A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. @@ -1646,6 +1670,9 @@ This is used for loading and byte-compiling Emacs Lisp files.") (setq alist (cdr alist)))) coding-system)) +(put 'enable-character-translation 'permanent-local t) +(put 'enable-character-translation 'safe-local-variable 'booleanp) + (defun find-auto-coding (filename size) "Find a coding system for a file FILENAME of which SIZE bytes follow point. These bytes should include at least the first 1k of the file @@ -1666,8 +1693,7 @@ cons (CODING . SOURCE), where CODING is the specified coding system and SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist', `coding:', or `auto-coding-functions' indicating by what CODING is specified. Note that the validity -of CODING is not checked; it's callers responsibility to check -it. +of CODING is not checked; it's the caller's responsibility to check it. If nothing is specified, the return value is nil." (or (let ((coding-system (auto-coding-alist-lookup filename))) @@ -1683,17 +1709,21 @@ If nothing is specified, the return value is nil." (head-end (+ head-start (min size 1024))) (tail-start (+ head-start (max (- size 3072) 0))) (tail-end (+ head-start size)) - coding-system head-found tail-found pos) + coding-system head-found tail-found pos char-trans) ;; Try a short cut by searching for the string "coding:" ;; and for "unibyte:" at the head and tail of SIZE bytes. (setq head-found (or (search-forward "coding:" head-end t) - (search-forward "unibyte:" head-end t))) + (search-forward "unibyte:" head-end t) + (search-forward "enable-character-translation:" + head-end t))) (if (and head-found (> head-found tail-start)) ;; Head and tail are overlapped. (setq tail-found head-found) (goto-char tail-start) (setq tail-found (or (search-forward "coding:" tail-end t) - (search-forward "unibyte:" tail-end t)))) + (search-forward "unibyte:" tail-end t) + (search-forward "enable-character-translation:" + tail-end t)))) ;; At first check the head. (when head-found @@ -1711,14 +1741,18 @@ If nothing is specified, the return value is nil." (re-search-forward "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" head-end t)) - (setq coding-system (intern (match-string 2)))))) + (setq coding-system (intern (match-string 2)))) + (when (re-search-forward + "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)" + head-end t) + (setq char-trans (match-string 2))))) ;; If no coding: tag in the head, check the tail. ;; Here we must pay attention to the case that the end-of-line ;; is just "\r" and we can't use "^" nor "$" in regexp. - (when (and tail-found (not coding-system)) + (when (and tail-found (or (not coding-system) (not char-trans))) (goto-char tail-start) - (re-search-forward "[\r\n]\^L" nil t) + (re-search-forward "[\r\n]\^L" tail-end t) (if (re-search-forward "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]" tail-end t) @@ -1739,6 +1773,11 @@ If nothing is specified, the return value is nil." "[\r\n]" prefix "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" suffix "[\r\n]")) + (re-char-trans + (concat + "[\r\n]" prefix + "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" + suffix "[\r\n]")) (re-end (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix "[\r\n]?")) @@ -1752,7 +1791,21 @@ If nothing is specified, the return value is nil." (setq coding-system 'raw-text)) (when (and (not coding-system) (re-search-forward re-coding tail-end t)) - (setq coding-system (intern (match-string 1))))))) + (setq coding-system (intern (match-string 1)))) + (when (and (not char-trans) + (re-search-forward re-char-trans tail-end t)) + (setq char-trans (match-string 1)))))) + (if coding-system + ;; If the coding-system name ends with "!", remove it and + ;; set char-trans to "nil". + (let ((name (symbol-name coding-system))) + (if (= (aref name (1- (length name))) ?!) + (setq coding-system (intern (substring name 0 -1)) + char-trans "nil")))) + (when (and char-trans + (not (setq char-trans (intern char-trans)))) + (make-local-variable 'enable-character-translation) + (setq enable-character-translation nil)) (if coding-system (cons coding-system :coding))) ;; Finally, try all the `auto-coding-functions'. @@ -1827,9 +1880,15 @@ The optional second arg VISIT non-nil means that we are visiting a file." (let ((pos-marker (copy-marker (+ (point) inserted))) ;; Prevent locking. (buffer-file-name nil)) - (set-buffer-multibyte nil) + (if visit + ;; If we're doing this for find-file, + ;; don't record undo info; this counts as + ;; part of producing the buffer's initial contents. + (let ((buffer-undo-list t)) + (set-buffer-multibyte nil)) + (set-buffer-multibyte nil)) (setq inserted (- pos-marker (point))))) - (set-buffer-modified-p modified-p)))) + (restore-buffer-modified-p modified-p)))) inserted) ;; The coding-spec and eol-type of coding-system returned is decided @@ -1977,7 +2036,8 @@ Part of the job of this function is setting `buffer-undo-list' appropriately." (or coding (setq coding (car (find-operation-coding-system 'insert-file-contents - filename visit beg end replace)))) + (cons filename (current-buffer)) + visit beg end replace)))) (if (coding-system-p coding) (or enable-multibyte-characters (setq coding @@ -2180,8 +2240,7 @@ Value is what BODY returns." (progn (set-category-table ,table) ,@body) - (save-current-buffer - (set-buffer ,old-buffer) + (with-current-buffer ,old-buffer (set-category-table ,old-table)))))) (defun define-translation-hash-table (symbol table) @@ -2243,22 +2302,29 @@ This function is intended to be added to `auto-coding-functions'." (defun sgml-html-meta-auto-coding-function (size) "If the buffer has an HTML meta tag, use it to determine encoding. This function is intended to be added to `auto-coding-functions'." - (setq size (min (+ (point) size) - ;; Only search forward 10 lines - (save-excursion - (forward-line 10) - (point)))) - (when (and (search-forward "" size t) + ;; In case of no header, search only 10 lines. + (forward-line 10)) + (point)))) + ;; Make sure that the buffer really contains an HTML document, by + ;; checking that it starts with a doctype or a start tag + ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is + ;; useful for Mozilla bookmark files. + (when (and (re-search-forward "\\`[[:space:]\n]*\\(