;;; mule.el --- basic commands for mulitilingual environment
-;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
;; Free Software Foundation, Inc.
-;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008
;; National Institute of Advanced Industrial Science and Technology (AIST)
;; Registration Number H14PRO021
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
(defun load-with-code-conversion (fullname file &optional noerror nomessage)
"Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
The file contents are decoded before evaluation if necessary.
-If optional second arg NOERROR is non-nil,
+If optional third arg NOERROR is non-nil,
report no error if FILE doesn't exist.
Print messages at start and end of loading unless
- optional third arg NOMESSAGE is non-nil.
+ optional fourth arg NOMESSAGE is non-nil.
Return t if file exists."
(if (null (file-readable-p fullname))
(and (null noerror)
(let ((load-file-name fullname)
(set-auto-coding-for-load t)
(inhibit-file-name-operation nil))
- (save-excursion
- (set-buffer buffer)
+ (with-current-buffer buffer
;; Don't let deactivate-mark remain set.
(let (deactivate-mark)
(insert-file-contents fullname))
))
(let (kill-buffer-hook kill-buffer-query-functions)
(kill-buffer buffer)))
- (let ((hook (assoc file after-load-alist)))
- (when hook
- (mapcar (function eval) (cdr hook))))
+ (unless purify-flag
+ (do-after-load-evaluation fullname))
+
(unless (or nomessage noninteractive)
(if source
(message "Loading %s (source)...done" file)
(< (aref vector 0) 160)))))
(defsubst charsetp (object)
- "T if OBJECT is a charset."
+ "Return t if OBJECT is a charset."
(and (symbolp object) (vectorp (get object 'charset))))
(defsubst charset-info (charset)
CODE1 and CODE2 are optional, but if you don't supply
sufficient position codes, return a generic character which stands for
all characters or group of characters in the character set.
-A generic character can be used to index a char table (e.g. syntax-table).
+A generic character can be used to index a char table (e.g. `syntax-table').
Such character sets as ascii, eight-bit-control, and eight-bit-graphic
don't have corresponding generic characters. If CHARSET is one of
(make-char-internal (charset-id charset) code1 code2))
(put 'make-char 'byte-compile
- (function
- (lambda (form)
- (let ((charset (nth 1 form)))
- (if (charset-quoted-standard-p charset)
- (byte-compile-normal-call
- (cons 'make-char-internal
- (cons (charset-id (nth 1 charset)) (nthcdr 2 form))))
- (byte-compile-normal-call
- (cons 'make-char-internal
- (cons (list 'charset-id charset) (nthcdr 2 form)))))))))
+ (lambda (form)
+ (let ((charset (nth 1 form)))
+ (byte-compile-normal-call
+ (cons 'make-char-internal
+ (cons (if (charset-quoted-standard-p charset)
+ (charset-id (nth 1 charset))
+ (list 'charset-id charset))
+ (nthcdr 2 form)))))))
(defun charset-list ()
"Return list of charsets ever defined.
Currently the only supported coded character set is `ucs' (ISO/IEC
10646: Universal Multi-Octet Coded Character Set), and the result is
translated through the translation-table named
-`utf-translation-table-for-decode' or the translation-hash-table named
-`utf-subst-table-for-decode'.
+`utf-translation-table-for-decode', or through the
+translation-hash-table named `utf-subst-table-for-decode'
+\(if `utf-translate-cjk-mode' is non-nil).
Optional argument RESTRICTION specifies a way to map the pair of CCS
and CODE-POINT to a character. Currently not supported and just ignored."
(cond
((eq ccs 'ucs)
- (or (utf-lookup-subst-table-for-decode code-point)
+ (or (and utf-translate-cjk-mode
+ (utf-lookup-subst-table-for-decode code-point))
(let ((c (cond
((< code-point 160)
code-point)
Currently the only supported coded character set is `ucs' (ISO/IEC
10646: Universal Multi-Octet Coded Character Set), and CHAR is first
translated through the translation-table named
-`utf-translation-table-for-encode' or the translation-hash-table named
-`utf-subst-table-for-encode'.
+`utf-translation-table-for-encode', or through the
+translation-hash-table named `utf-subst-table-for-encode' \(if
+`utf-translate-cjk-mode' is non-nil).
CHAR should be in one of these charsets:
ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
(charset (car split))
trans)
(cond ((eq ccs 'ucs)
- (or (utf-lookup-subst-table-for-encode char)
+ (or (and utf-translate-cjk-mode
+ (utf-lookup-subst-table-for-encode char))
(let ((table (get 'utf-translation-table-for-encode
'translation-table)))
(setq trans (aref table char))
char))))))))
\f
+;; Save the ASCII case table in case we need it later. Some locales
+;; (such as Turkish) modify the case behavior of ASCII characters,
+;; which can interfere with networking code that uses ASCII strings.
+
+(defvar ascii-case-table
+ ;; Code copied from copy-case-table to avoid requiring case-table.el
+ (let ((tbl (copy-sequence (standard-case-table)))
+ (up (char-table-extra-slot (standard-case-table) 0)))
+ (if up (set-char-table-extra-slot tbl 0 (copy-sequence up)))
+ (set-char-table-extra-slot tbl 1 nil)
+ (set-char-table-extra-slot tbl 2 nil)
+ tbl)
+ "Case table for the ASCII character set.")
+\f
;; Coding system stuff
;; Coding system is a symbol that has the property `coding-system'.
(subsidiaries (vector (intern (format "%s-unix" coding-system))
(intern (format "%s-dos" coding-system))
(intern (format "%s-mac" coding-system))))
- (i 0)
- temp)
- (while (< i 3)
- (put (aref subsidiaries i) 'coding-system coding-spec)
- (put (aref subsidiaries i) 'eol-type i)
- (add-to-coding-system-list (aref subsidiaries i))
- (setq coding-system-alist
- (cons (list (symbol-name (aref subsidiaries i)))
- coding-system-alist))
- (setq i (1+ i)))
+ elt)
+ (dotimes (i 3)
+ (setq elt (aref subsidiaries i))
+ (put elt 'coding-system coding-spec)
+ (put elt 'eol-type i)
+ (put elt 'coding-system-define-form nil)
+ (add-to-coding-system-list elt)
+ (or (assoc (symbol-name elt) coding-system-alist)
+ (setq coding-system-alist
+ (cons (list (symbol-name elt)) coding-system-alist))))
subsidiaries))
(defun transform-make-coding-system-args (name type &optional doc-string props)
;; At last, register CODING-SYSTEM in `coding-system-list' and
;; `coding-system-alist'.
(add-to-coding-system-list coding-system)
- (setq coding-system-alist (cons (list (symbol-name coding-system))
- coding-system-alist))
+ (or (assoc (symbol-name coding-system) coding-system-alist)
+ (setq coding-system-alist (cons (list (symbol-name coding-system))
+ coding-system-alist)))
;; For a coding system of cateogory iso-8-1 and iso-8-2, create
;; XXX-with-esc variants.
(put alias 'coding-system (coding-system-spec coding-system))
(put alias 'coding-system-define-form nil)
(add-to-coding-system-list alias)
- (setq coding-system-alist (cons (list (symbol-name alias))
- coding-system-alist))
+ (or (assoc (symbol-name alias) coding-system-alist)
+ (setq coding-system-alist (cons (list (symbol-name alias))
+ coding-system-alist)))
(let ((eol-type (coding-system-eol-type coding-system)))
(if (vectorp eol-type)
(progn
"Specify coding system for keyboard input.
If you set this on a terminal which can't distinguish Meta keys from
8-bit characters, you will have to use ESC to type Meta characters.
-See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
+See Info node `Terminal Coding' and Info node `Unibyte Mode'.
On non-windowing terminals, this is set from the locale by default.
Setting this variable directly does not take effect;
use either \\[customize] or \\[set-keyboard-coding-system]."
:type '(coding-system :tag "Coding system")
- :link '(info-link "(emacs)Specify Coding")
- :link '(info-link "(emacs)Single-Byte Character Support")
+ :link '(info-link "(emacs)Terminal Coding")
+ :link '(info-link "(emacs)Unibyte Mode")
:set (lambda (symbol value)
;; Don't load encoded-kbd-mode unnecessarily.
(if (or value (boundp 'encoded-kbd-mode))
;;; FILE I/O
(defcustom auto-coding-alist
- '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|[jew]ar\\|xpi\\)\\'" . no-conversion)
- ("\\.\\(ARC\\|ZIP\\|LZH\\|ZOO\\|[JEW]AR\\|XPI\\)\\'" . no-conversion)
- ("\\.\\(sx[dmicw]\\|tar\\|tgz\\)\\'" . no-conversion)
+ ;; .exe and .EXE are added to support archive-mode looking at DOS
+ ;; self-extracting exe archives.
+ '(("\\.\\(arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|exe\\)\\'" . no-conversion)
+ ("\\.\\(ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|EXE\\)\\'" . no-conversion)
+ ("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion)
("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
+ ("\\.pdf\\'" . no-conversion)
("/#[^/]+#\\'" . emacs-mule))
"Alist of filename patterns vs corresponding coding systems.
Each element looks like (REGEXP . CODING-SYSTEM).
(symbol :tag "Coding system"))))
(defcustom auto-coding-regexp-alist
- '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
+ '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
+ ("\\`\xFE\xFF" . utf-16be-with-signature)
+ ("\\`\xFF\xFE" . utf-16le-with-signature)
+ ("\\`\xEF\xBB\xBF" . utf-8))
"Alist of patterns vs corresponding coding systems.
Each element looks like (REGEXP . CODING-SYSTEM).
A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
(setq alist (cdr alist))))
coding-system))
+(put 'enable-character-translation 'permanent-local t)
+(put 'enable-character-translation 'safe-local-variable 'booleanp)
+
(defun find-auto-coding (filename size)
"Find a coding system for a file FILENAME of which SIZE bytes follow point.
These bytes should include at least the first 1k of the file
system and SOURCE is a symbol `auto-coding-alist',
`auto-coding-regexp-alist', `coding:', or `auto-coding-functions'
indicating by what CODING is specified. Note that the validity
-of CODING is not checked; it's callers responsibility to check
-it.
+of CODING is not checked; it's the caller's responsibility to check it.
If nothing is specified, the return value is nil."
(or (let ((coding-system (auto-coding-alist-lookup filename)))
(head-end (+ head-start (min size 1024)))
(tail-start (+ head-start (max (- size 3072) 0)))
(tail-end (+ head-start size))
- coding-system head-found tail-found pos)
+ coding-system head-found tail-found pos char-trans)
;; Try a short cut by searching for the string "coding:"
;; and for "unibyte:" at the head and tail of SIZE bytes.
(setq head-found (or (search-forward "coding:" head-end t)
- (search-forward "unibyte:" head-end t)))
+ (search-forward "unibyte:" head-end t)
+ (search-forward "enable-character-translation:"
+ head-end t)))
(if (and head-found (> head-found tail-start))
;; Head and tail are overlapped.
(setq tail-found head-found)
(goto-char tail-start)
(setq tail-found (or (search-forward "coding:" tail-end t)
- (search-forward "unibyte:" tail-end t))))
+ (search-forward "unibyte:" tail-end t)
+ (search-forward "enable-character-translation:"
+ tail-end t))))
;; At first check the head.
(when head-found
(re-search-forward
"\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
head-end t))
- (setq coding-system (intern (match-string 2))))))
+ (setq coding-system (intern (match-string 2))))
+ (when (re-search-forward
+ "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)"
+ head-end t)
+ (setq char-trans (match-string 2)))))
;; If no coding: tag in the head, check the tail.
;; Here we must pay attention to the case that the end-of-line
;; is just "\r" and we can't use "^" nor "$" in regexp.
- (when (and tail-found (not coding-system))
+ (when (and tail-found (or (not coding-system) (not char-trans)))
(goto-char tail-start)
- (re-search-forward "[\r\n]\^L" nil t)
+ (re-search-forward "[\r\n]\^L" tail-end t)
(if (re-search-forward
"[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
tail-end t)
"[\r\n]" prefix
"[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
suffix "[\r\n]"))
+ (re-char-trans
+ (concat
+ "[\r\n]" prefix
+ "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
+ suffix "[\r\n]"))
(re-end
(concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
"[\r\n]?"))
(setq coding-system 'raw-text))
(when (and (not coding-system)
(re-search-forward re-coding tail-end t))
- (setq coding-system (intern (match-string 1)))))))
+ (setq coding-system (intern (match-string 1))))
+ (when (and (not char-trans)
+ (re-search-forward re-char-trans tail-end t))
+ (setq char-trans (match-string 1))))))
+ (if coding-system
+ ;; If the coding-system name ends with "!", remove it and
+ ;; set char-trans to "nil".
+ (let ((name (symbol-name coding-system)))
+ (if (= (aref name (1- (length name))) ?!)
+ (setq coding-system (intern (substring name 0 -1))
+ char-trans "nil"))))
+ (when (and char-trans
+ (not (setq char-trans (intern char-trans))))
+ (make-local-variable 'enable-character-translation)
+ (setq enable-character-translation nil))
(if coding-system
(cons coding-system :coding)))
;; Finally, try all the `auto-coding-functions'.
(let ((pos-marker (copy-marker (+ (point) inserted)))
;; Prevent locking.
(buffer-file-name nil))
- (set-buffer-multibyte nil)
+ (if visit
+ ;; If we're doing this for find-file,
+ ;; don't record undo info; this counts as
+ ;; part of producing the buffer's initial contents.
+ (let ((buffer-undo-list t))
+ (set-buffer-multibyte nil))
+ (set-buffer-multibyte nil))
(setq inserted (- pos-marker (point)))))
- (set-buffer-modified-p modified-p))))
+ (restore-buffer-modified-p modified-p))))
inserted)
;; The coding-spec and eol-type of coding-system returned is decided
(or coding
(setq coding (car (find-operation-coding-system
'insert-file-contents
- filename visit beg end replace))))
+ (cons filename (current-buffer))
+ visit beg end replace))))
(if (coding-system-p coding)
(or enable-multibyte-characters
(setq coding
(progn
(set-category-table ,table)
,@body)
- (save-current-buffer
- (set-buffer ,old-buffer)
+ (with-current-buffer ,old-buffer
(set-category-table ,old-table))))))
(defun define-translation-hash-table (symbol table)
(defun sgml-html-meta-auto-coding-function (size)
"If the buffer has an HTML meta tag, use it to determine encoding.
This function is intended to be added to `auto-coding-functions'."
- (setq size (min (+ (point) size)
- ;; Only search forward 10 lines
- (save-excursion
- (forward-line 10)
- (point))))
- (when (and (search-forward "<html" size t)
- (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
+ (let ((case-fold-search t))
+ (setq size (min (+ (point) size)
+ (save-excursion
+ ;; Limit the search by the end of the HTML header.
+ (or (search-forward "</head>" size t)
+ ;; In case of no header, search only 10 lines.
+ (forward-line 10))
+ (point))))
+ ;; Make sure that the buffer really contains an HTML document, by
+ ;; checking that it starts with a doctype or a <HTML> start tag
+ ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
+ ;; useful for Mozilla bookmark files.
+ (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
+ (re-search-forward "<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*charset=\\(.+?\\)[\"']" size t))
(let* ((match (match-string 1))
(sym (intern (downcase match))))
(if (coding-system-p sym)
sym
(message "Warning: unknown coding system \"%s\"" match)
- nil))))
+ nil)))))
;;;
(provide 'mule)
-;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
+;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
;;; mule.el ends here