Add 2010 to copyright years.
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
caff3c0a 1;;; mule-cmds.el --- commands for multilingual environment -*-coding: iso-2022-7bit -*-
9ee5b744 2
26b3dce6 3;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
114f9c96 4;; 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 6;; 2005, 2006, 2007, 2008, 2009, 2010
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7 9;; Copyright (C) 2003
97941b05
KH
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869 12
1f547b92 13;; Keywords: mule, i18n
4ed46869
KH
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869 29
60370d40
PJ
30;;; Commentary:
31
4ed46869
KH
32;;; Code:
33
a4723629
GM
34(eval-when-compile (require 'cl)) ; letf
35
26b3dce6
GM
36(defvar dos-codepage)
37(autoload 'widget-value "wid-edit")
cda74479 38
4a91d930 39(defvar mac-system-coding-system)
4a91d930 40
4ed46869
KH
41;;; MULE related key bindings and menus.
42
64657387
SM
43(defvar mule-keymap
44 (let ((map (make-sparse-keymap)))
45 (define-key map "f" 'set-buffer-file-coding-system)
46 (define-key map "r" 'revert-buffer-with-coding-system)
47 (define-key map "F" 'set-file-name-coding-system)
48 (define-key map "t" 'set-terminal-coding-system)
49 (define-key map "k" 'set-keyboard-coding-system)
50 (define-key map "p" 'set-buffer-process-coding-system)
51 (define-key map "x" 'set-selection-coding-system)
52 (define-key map "X" 'set-next-selection-coding-system)
53 (define-key map "\C-\\" 'set-input-method)
54 (define-key map "c" 'universal-coding-system-argument)
55 (define-key map "l" 'set-language-environment)
56 map)
33d17698 57 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 58
8f81f784 59;; Keep "C-x C-m ..." for mule specific commands.
0709d285 60(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 61
dcad02bc 62(defvar describe-language-environment-map
64657387
SM
63 (let ((map (make-sparse-keymap "Describe Language Environment")))
64 (define-key map
8f43cbf3 65 [Default] `(menu-item ,(purecopy "Default") describe-specified-language-support))
64657387 66 map))
15b3e511 67
dcad02bc 68(defvar setup-language-environment-map
64657387
SM
69 (let ((map (make-sparse-keymap "Set Language Environment")))
70 (define-key map
8f43cbf3 71 [Default] `(menu-item ,(purecopy "Default") setup-specified-language-environment))
64657387 72 map))
15b3e511 73
dcad02bc 74(defvar set-coding-system-map
64657387
SM
75 (let ((map (make-sparse-keymap "Set Coding System")))
76 (define-key-after map [universal-coding-system-argument]
8f43cbf3
DN
77 `(menu-item ,(purecopy "For Next Command") universal-coding-system-argument
78 :help ,(purecopy "Coding system to be used by next command")))
04991a1c 79 (define-key-after map [separator-1] menu-bar-separator)
64657387 80 (define-key-after map [set-buffer-file-coding-system]
8f43cbf3
DN
81 `(menu-item ,(purecopy "For Saving This Buffer") set-buffer-file-coding-system
82 :help ,(purecopy "How to encode this buffer when saved")))
64657387 83 (define-key-after map [revert-buffer-with-coding-system]
8f43cbf3 84 `(menu-item ,(purecopy "For Reverting This File Now")
64657387
SM
85 revert-buffer-with-coding-system
86 :enable buffer-file-name
8f43cbf3 87 :help ,(purecopy "Revisit this file immediately using specified coding system")))
64657387 88 (define-key-after map [set-file-name-coding-system]
8f43cbf3
DN
89 `(menu-item ,(purecopy "For File Name") set-file-name-coding-system
90 :help ,(purecopy "How to decode/encode file names")))
04991a1c 91 (define-key-after map [separator-2] menu-bar-separator)
64657387
SM
92
93 (define-key-after map [set-keyboard-coding-system]
8f43cbf3
DN
94 `(menu-item ,(purecopy "For Keyboard") set-keyboard-coding-system
95 :help ,(purecopy "How to decode keyboard input")))
64657387 96 (define-key-after map [set-terminal-coding-system]
8f43cbf3 97 `(menu-item ,(purecopy "For Terminal") set-terminal-coding-system
64657387 98 :enable (null (memq initial-window-system '(x w32 ns)))
8f43cbf3 99 :help ,(purecopy "How to encode terminal output")))
04991a1c 100 (define-key-after map [separator-3] menu-bar-separator)
64657387
SM
101
102 (define-key-after map [set-selection-coding-system]
8f43cbf3 103 `(menu-item ,(purecopy "For X Selections/Clipboard") set-selection-coding-system
64657387 104 :visible (display-selections-p)
8f43cbf3 105 :help ,(purecopy "How to en/decode data to/from selection/clipboard")))
64657387 106 (define-key-after map [set-next-selection-coding-system]
8f43cbf3 107 `(menu-item ,(purecopy "For Next X Selection") set-next-selection-coding-system
64657387 108 :visible (display-selections-p)
8f43cbf3 109 :help ,(purecopy "How to en/decode next selection/clipboard operation")))
64657387 110 (define-key-after map [set-buffer-process-coding-system]
8f43cbf3 111 `(menu-item ,(purecopy "For I/O with Subprocess") set-buffer-process-coding-system
64657387
SM
112 :visible (fboundp 'start-process)
113 :enable (get-buffer-process (current-buffer))
8f43cbf3 114 :help ,(purecopy "How to en/decode I/O from/to subprocess connected to this buffer")))
64657387
SM
115 map))
116
117(defvar mule-menu-keymap
118 (let ((map (make-sparse-keymap "Mule (Multilingual Environment)")))
119 (define-key-after map [set-language-environment]
8f43cbf3 120 `(menu-item ,(purecopy "Set Language Environment") ,setup-language-environment-map))
04991a1c 121 (define-key-after map [separator-mule] menu-bar-separator)
64657387
SM
122
123 (define-key-after map [toggle-input-method]
8f43cbf3 124 `(menu-item ,(purecopy "Toggle Input Method") toggle-input-method))
64657387 125 (define-key-after map [set-input-method]
8f43cbf3 126 `(menu-item ,(purecopy "Select Input Method...") set-input-method))
64657387 127 (define-key-after map [describe-input-method]
8f43cbf3 128 `(menu-item ,(purecopy "Describe Input Method") describe-input-method))
04991a1c 129 (define-key-after map [separator-input-method] menu-bar-separator)
64657387
SM
130
131 (define-key-after map [set-various-coding-system]
8f43cbf3 132 `(menu-item ,(purecopy "Set Coding Systems") ,set-coding-system-map
597e2240 133 :enable (default-value 'enable-multibyte-characters)))
64657387 134 (define-key-after map [view-hello-file]
8f43cbf3 135 `(menu-item ,(purecopy "Show Multi-lingual Text") view-hello-file
64657387
SM
136 :enable (file-readable-p
137 (expand-file-name "HELLO" data-directory))
8f43cbf3 138 :help ,(purecopy "Display file which says HELLO in many languages")))
04991a1c 139 (define-key-after map [separator-coding-system] menu-bar-separator)
64657387
SM
140
141 (define-key-after map [describe-language-environment]
8f43cbf3 142 `(menu-item ,(purecopy "Describe Language Environment")
64657387 143 describe-language-environment-map
8f43cbf3 144 :help ,(purecopy "Show multilingual settings for a specific language")))
64657387 145 (define-key-after map [describe-input-method]
8f43cbf3
DN
146 `(menu-item ,(purecopy "Describe Input Method...") describe-input-method
147 :help ,(purecopy "Keyboard layout for a specific input method")))
64657387 148 (define-key-after map [describe-coding-system]
8f43cbf3 149 `(menu-item ,(purecopy "Describe Coding System...") describe-coding-system))
64657387 150 (define-key-after map [list-character-sets]
8f43cbf3
DN
151 `(menu-item ,(purecopy "List Character Sets") list-character-sets
152 :help ,(purecopy "Show table of available character sets")))
64657387 153 (define-key-after map [mule-diag]
8f43cbf3
DN
154 `(menu-item ,(purecopy "Show All of Mule Status") mule-diag
155 :help ,(purecopy "Display multilingual environment settings")))
64657387
SM
156 map)
157 "Keymap for Mule (Multilingual environment) menu specific commands.")
cda74479 158
4ed46869
KH
159;; This should be a single character key binding because users use it
160;; very frequently while editing multilingual text. Now we can use
161;; only two such keys: "\C-\\" and "\C-^", but the latter is not
162;; convenient because it requires shifting on most keyboards. An
163;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
164;; but it won't be used that frequently.
165(define-key global-map "\C-\\" 'toggle-input-method)
166
5f395df3
SM
167;; This is no good because people often type Shift-SPC
168;; meaning to type SPC. -- rms.
169;; ;; Here's an alternative key binding for X users (Shift-SPACE).
170;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 171
464cc130
KH
172;;; Mule related hyperlinks.
173(defconst help-xref-mule-regexp-template
174 (purecopy (concat "\\(\\<\\("
175 "\\(coding system\\)\\|"
d0c40faa
KH
176 "\\(input method\\)\\|"
177 "\\(character set\\)\\|"
178 "\\(charset\\)"
464cc130
KH
179 "\\)\\s-+\\)?"
180 ;; Note starting with word-syntax character:
181 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
182
26d87040 183(defun coding-system-change-eol-conversion (coding-system eol-type)
caff3c0a 184 "Return a coding system which differs from CODING-SYSTEM in EOL conversion.
26d87040
EZ
185The returned coding system converts end-of-line by EOL-TYPE
186but text as the same way as CODING-SYSTEM.
187EOL-TYPE should be `unix', `dos', `mac', or nil.
188If EOL-TYPE is nil, the returned coding system detects
189how end-of-line is formatted automatically while decoding.
190
191EOL-TYPE can be specified by an integer 0, 1, or 2.
192They means `unix', `dos', and `mac' respectively."
193 (if (symbolp eol-type)
194 (setq eol-type (cond ((eq eol-type 'unix) 0)
195 ((eq eol-type 'dos) 1)
196 ((eq eol-type 'mac) 2)
197 (t eol-type))))
1d5b0c66
CY
198 ;; We call `coding-system-base' before `coding-system-eol-type',
199 ;; because the coding-system may not be initialized until then.
200 (let* ((base (coding-system-base coding-system))
201 (orig-eol-type (coding-system-eol-type coding-system)))
202 (cond ((vectorp orig-eol-type)
203 (if (not eol-type)
204 coding-system
205 (aref orig-eol-type eol-type)))
206 ((not eol-type)
207 base)
208 ((= eol-type orig-eol-type)
209 coding-system)
210 ((progn (setq orig-eol-type (coding-system-eol-type base))
211 (vectorp orig-eol-type))
212 (aref orig-eol-type eol-type)))))
26d87040
EZ
213
214(defun coding-system-change-text-conversion (coding-system coding)
215 "Return a coding system which differs from CODING-SYSTEM in text conversion.
216The returned coding system converts text by CODING
217but end-of-line as the same way as CODING-SYSTEM.
218If CODING is nil, the returned coding system detects
219how text is formatted automatically while decoding."
1d77e15a
JR
220 (let ((eol-type (coding-system-eol-type coding-system)))
221 (coding-system-change-eol-conversion
222 (if coding coding 'undecided)
223 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 224
be7ca044
KH
225;; Canonicalize the coding system name NAME by removing some prefixes
226;; and delimiter characters. Support function of
227;; coding-system-from-name.
228(defun canonicalize-coding-system-name (name)
229 (if (string-match "^iso[-_ ]?[0-9]" name)
230 ;; "iso-8859-1" -> "8859-1", "iso-2022-jp" ->"2022-jp"
231 (setq name (substring name (1- (match-end 0)))))
232 (let ((idx (string-match "[-_ /]" name)))
233 ;; Delete "-", "_", " ", "/" but do distinguish "16-be" and "16be".
234 (while idx
235 (if (and (>= idx 2)
236 (eq (string-match "16-[lb]e$" name (- idx 2))
237 (- idx 2)))
238 (setq idx (string-match "[-_ /]" name (match-end 0)))
239 (setq name (concat (substring name 0 idx) (substring name (1+ idx)))
240 idx (string-match "[-_ /]" name idx))))
241 name))
242
243(defun coding-system-from-name (name)
244 "Return a coding system whose name matches with NAME (string or symbol)."
245 (let (sym)
246 (if (stringp name) (setq sym (intern name))
247 (setq sym name name (symbol-name name)))
248 (if (coding-system-p sym)
249 sym
250 (let ((eol-type
251 (if (string-match "-\\(unix\\|dos\\|mac\\)$" name)
252 (prog1 (intern (match-string 1 name))
253 (setq name (substring name 0 (match-beginning 0)))))))
254 (setq name (canonicalize-coding-system-name (downcase name)))
255 (catch 'tag
256 (dolist (elt (coding-system-list))
257 (if (string= (canonicalize-coding-system-name (symbol-name elt))
258 name)
259 (throw 'tag (if eol-type (coding-system-change-eol-conversion
260 elt eol-type)
261 elt)))))))))
262
4ed46869 263(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1 264 "Change whether this buffer uses multibyte characters.
49275d55 265With ARG, use multibyte characters if the ARG is positive.
6998e1a1
RS
266
267Note that this command does not convert the byte contents of
268the buffer; it only changes the way those bytes are interpreted.
269In general, therefore, this command *changes* the sequence of
270characters that the current buffer contains.
271
caff3c0a
JB
272We suggest you avoid using this command unless you know what you are
273doing. If you use it by mistake, and the buffer is now displayed
6998e1a1 274wrong, use this command again to toggle back to the right mode."
4ed46869 275 (interactive "P")
b7079457
RS
276 (let ((new-flag
277 (if (null arg) (null enable-multibyte-characters)
278 (> (prefix-numeric-value arg) 0))))
279 (set-buffer-multibyte new-flag))
4ed46869
KH
280 (force-mode-line-update))
281
282(defun view-hello-file ()
cf2fc7e9 283 "Display the HELLO file, which lists many languages and characters."
4ed46869 284 (interactive)
8f81f784 285 ;; We have to decode the file in any environment.
a4723629
GM
286 (letf (((default-value 'enable-multibyte-characters) t)
287 (coding-system-for-read 'iso-2022-7bit))
288 (view-file (expand-file-name "HELLO" data-directory))))
4ed46869 289
9e9a77a6 290(defun universal-coding-system-argument (coding-system)
15b3e511 291 "Execute an I/O command using the specified coding system."
9e9a77a6
RS
292 (interactive
293 (let ((default (and buffer-file-coding-system
34104362 294 (not (eq (coding-system-type buffer-file-coding-system)
a0d96cad 295 'undecided))
9e9a77a6
RS
296 buffer-file-coding-system)))
297 (list (read-coding-system
298 (if default
5b76833f 299 (format "Coding system for following command (default %s): " default)
9e9a77a6
RS
300 "Coding system for following command: ")
301 default))))
302 (let* ((keyseq (read-key-sequence
e14a8f4c 303 (format "Command to execute with %s:" coding-system)))
04363179
GM
304 (cmd (key-binding keyseq))
305 prefix)
735c5b17
GM
306 ;; read-key-sequence ignores quit, so make an explicit check.
307 ;; Like many places, this assumes quit == C-g, but it need not be.
1e4bd40d 308 (if (equal last-input-event ?\C-g)
735c5b17 309 (keyboard-quit))
4a304bb2 310 (when (memq cmd '(universal-argument digit-argument))
04363179 311 (call-interactively cmd)
a1506d29 312
04363179
GM
313 ;; Process keys bound in `universal-argument-map'.
314 (while (progn
315 (setq keyseq (read-key-sequence nil t)
316 cmd (key-binding keyseq t))
317 (not (eq cmd 'universal-argument-other-key)))
318 (let ((current-prefix-arg prefix-arg)
61a846fb 319 ;; Have to bind `last-command-event' here so that
6b61353c 320 ;; `digit-argument', for instance, can compute the
04363179 321 ;; prefix arg.
61a846fb 322 (last-command-event (aref keyseq 0)))
04363179
GM
323 (call-interactively cmd)))
324
1f547b92 325 ;; This is the final call to `universal-argument-other-key', which
04363179
GM
326 ;; set's the final `prefix-arg.
327 (let ((current-prefix-arg prefix-arg))
328 (call-interactively cmd))
a1506d29 329
04363179
GM
330 ;; Read the command to execute with the given prefix arg.
331 (setq prefix prefix-arg
332 keyseq (read-key-sequence nil t)
333 cmd (key-binding keyseq)))
334
15b3e511 335 (let ((coding-system-for-read coding-system)
04363179 336 (coding-system-for-write coding-system)
0e9ec609 337 (coding-system-require-warning t)
04363179 338 (current-prefix-arg prefix))
15b3e511
KH
339 (message "")
340 (call-interactively cmd))))
341
de94d711 342(defun set-default-coding-systems (coding-system)
0c3154d2 343 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 344This sets the following coding systems:
0c3154d2 345 o coding system of a newly created buffer
8efc03e1
KH
346 o default coding system for subprocess I/O
347This also sets the following values:
d3e4babd 348 o default value used as `file-name-coding-system' for converting file names
caff3c0a 349 if CODING-SYSTEM is ASCII-compatible
d8cb7e3d 350 o default value for the command `set-terminal-coding-system'
d3e4babd 351 o default value for the command `set-keyboard-coding-system'
caff3c0a 352 if CODING-SYSTEM is ASCII-compatible"
de94d711
KH
353 (check-coding-system coding-system)
354 (setq-default buffer-file-coding-system coding-system)
6b61353c
KH
355 (if (fboundp 'ucs-set-table-for-input)
356 (dolist (buffer (buffer-list))
357 (or (local-variable-p 'buffer-file-coding-system buffer)
358 (ucs-set-table-for-input buffer))))
359
db9aec47 360 (if (eq system-type 'darwin)
a41118cc 361 ;; The file-name coding system on Darwin systems is always utf-8.
db9aec47 362 (setq default-file-name-coding-system 'utf-8)
597e2240 363 (if (and (default-value 'enable-multibyte-characters)
db9aec47
KH
364 (or (not coding-system)
365 (coding-system-get coding-system 'ascii-compatible-p)))
366 (setq default-file-name-coding-system coding-system)))
970c9391 367 (setq default-terminal-coding-system coding-system)
de94d711 368 (setq default-keyboard-coding-system coding-system)
1d77e15a
JR
369 ;; Preserve eol-type from existing default-process-coding-systems.
370 ;; On non-unix-like systems in particular, these may have been set
371 ;; carefully by the user, or by the startup code, to deal with the
372 ;; users shell appropriately, so should not be altered by changing
373 ;; language environment.
374 (let ((output-coding
375 (coding-system-change-text-conversion
376 (car default-process-coding-system) coding-system))
377 (input-coding
378 (coding-system-change-text-conversion
379 (cdr default-process-coding-system) coding-system)))
380 (setq default-process-coding-system
381 (cons output-coding input-coding))))
de94d711 382
0c3154d2
KH
383(defun prefer-coding-system (coding-system)
384 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 385This also sets the following coding systems:
0c3154d2 386 o coding system of a newly created buffer
8efc03e1
KH
387 o default coding system for subprocess I/O
388This also sets the following values:
caff3c0a 389 o default value used as `file-name-coding-system' for converting file names
d8cb7e3d 390 o default value for the command `set-terminal-coding-system'
03c35c83
EZ
391 o default value for the command `set-keyboard-coding-system'
392
bd3ac67e
EZ
393If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
394systems set by this function will use that type of EOL conversion.
395
d0d8885d
JB
396A coding system that requires automatic detection of text+encoding
397\(e.g. undecided, unix) can't be preferred."
0c3154d2
KH
398 (interactive "zPrefer coding system: ")
399 (if (not (and coding-system (coding-system-p coding-system)))
400 (error "Invalid coding system `%s'" coding-system))
97941b05
KH
401 (if (memq (coding-system-type coding-system) '(raw-text undecided))
402 (error "Can't prefer the coding system `%s'" coding-system))
403 (let ((base (coding-system-base coding-system))
bd3ac67e 404 (eol-type (coding-system-eol-type coding-system)))
97941b05 405 (set-coding-system-priority base)
32226619 406 (and (called-interactively-p 'interactive)
97941b05
KH
407 (or (eq base coding-system)
408 (message "Highest priority is set to %s (base of %s)"
409 base coding-system)))
bd3ac67e 410 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 411 (if (memq eol-type '(0 1 2))
97941b05
KH
412 (setq base
413 (coding-system-change-eol-conversion base eol-type)))
414 (set-default-coding-systems base)))
0c3154d2 415
b5edd1d1
KH
416(defvar sort-coding-systems-predicate nil
417 "If non-nil, a predicate function to sort coding systems.
418
419It is called with two coding systems, and should return t if the first
420one is \"less\" than the second.
421
422The function `sort-coding-systems' use it.")
423
424(defun sort-coding-systems (codings)
425 "Sort coding system list CODINGS by a priority of each coding system.
caff3c0a 426Return the sorted list. CODINGS is modified by side effects.
b5edd1d1
KH
427
428If a coding system is most preferred, it has the highest priority.
caff3c0a
JB
429Otherwise, coding systems that correspond to MIME charsets have
430higher priorities. Among them, a coding system included in the
431`coding-system' key of the current language environment has higher
432priority. See also the documentation of `language-info-alist'.
b5edd1d1
KH
433
434If the variable `sort-coding-systems-predicate' (which see) is
caff3c0a 435non-nil, it is used to sort CODINGS instead."
b5edd1d1
KH
436 (if sort-coding-systems-predicate
437 (sort codings sort-coding-systems-predicate)
8f924df7
KH
438 (let* ((from-priority (coding-system-priority-list))
439 (most-preferred (car from-priority))
b5edd1d1
KH
440 (lang-preferred (get-language-info current-language-environment
441 'coding-system))
442 (func (function
443 (lambda (x)
444 (let ((base (coding-system-base x)))
da1ebad1
KH
445 ;; We calculate the priority number 0..255 by
446 ;; using the 8 bits PMMLCEII as this:
3ecd3a56
GM
447 ;; P: 1 if most preferred.
448 ;; MM: greater than 0 if mime-charset.
449 ;; L: 1 if one of the current lang. env.'s codings.
450 ;; C: 1 if one of codings listed in the category list.
451 ;; E: 1 if not XXX-with-esc
da1ebad1
KH
452 ;; II: if iso-2022 based, 0..3, else 1.
453 (logior
454 (lsh (if (eq base most-preferred) 1 0) 7)
455 (lsh
8f924df7 456 (let ((mime (coding-system-get base :mime-charset)))
b439e72a
DL
457 ;; Prefer coding systems corresponding to a
458 ;; MIME charset.
b5edd1d1 459 (if mime
b439e72a
DL
460 ;; Lower utf-16 priority so that we
461 ;; normally prefer utf-8 to it, and put
462 ;; x-ctext below that.
d0d8885d
JB
463 (cond ((string-match-p "utf-16"
464 (symbol-name mime))
da1ebad1 465 2)
d0d8885d 466 ((string-match-p "^x-" (symbol-name mime))
da1ebad1
KH
467 1)
468 (t 3))
b5edd1d1 469 0))
da1ebad1
KH
470 5)
471 (lsh (if (memq base lang-preferred) 1 0) 4)
8f924df7 472 (lsh (if (memq base from-priority) 1 0) 3)
d0d8885d
JB
473 (lsh (if (string-match-p "-with-esc\\'"
474 (symbol-name base))
da1ebad1 475 0 1) 2)
8f924df7
KH
476 (if (eq (coding-system-type base) 'iso-2022)
477 (let ((category (coding-system-category base)))
478 ;; For ISO based coding systems, prefer
479 ;; one that doesn't use designation nor
480 ;; locking/single shifting.
a0d96cad
KH
481 (cond
482 ((or (eq category 'coding-category-iso-8-1)
483 (eq category 'coding-category-iso-8-2))
484 2)
485 ((or (eq category 'coding-category-iso-7-tight)
486 (eq category 'coding-category-iso-7))
487 1)
488 (t
489 0)))
490 1)
a6dfc99b 491 ))))))
b5edd1d1
KH
492 (sort codings (function (lambda (x y)
493 (> (funcall func x) (funcall func y))))))))
54b226f7 494
3fc7dfe5 495(defun find-coding-systems-region (from to)
54b226f7 496 "Return a list of proper coding systems to encode a text between FROM and TO.
8f924df7 497
d37ef0f6 498If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
499All coding systems in the list can safely encode any multibyte characters
500in the text.
501
e8dd0160 502If the text contains no multibyte characters, return a list of a single
3fc7dfe5 503element `undecided'."
b5edd1d1
KH
504 (let ((codings (find-coding-systems-region-internal from to)))
505 (if (eq codings t)
506 ;; The text contains only ASCII characters. Any coding
507 ;; systems are safe.
508 '(undecided)
509 ;; We need copy-sequence because sorting will alter the argument.
510 (sort-coding-systems (copy-sequence codings)))))
54b226f7 511
3fc7dfe5
KH
512(defun find-coding-systems-string (string)
513 "Return a list of proper coding systems to encode STRING.
514All coding systems in the list can safely encode any multibyte characters
515in STRING.
516
e8dd0160 517If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 518element `undecided'."
b5edd1d1 519 (find-coding-systems-region string nil))
3fc7dfe5
KH
520
521(defun find-coding-systems-for-charsets (charsets)
522 "Return a list of proper coding systems to encode characters of CHARSETS.
6053d86a 523CHARSETS is a list of character sets.
1f547b92
DL
524
525This only finds coding systems of type `charset', whose
526`:charset-list' property includes all of CHARSETS (plus `ascii' for
d0d8885d 527ASCII-compatible coding systems). It was used in older versions of
1f547b92
DL
528Emacs, but is unlikely to be what you really want now."
529 ;; Deal with aliases.
530 (setq charsets (mapcar (lambda (c)
531 (get-charset-property c :name))
532 charsets))
b5edd1d1
KH
533 (cond ((or (null charsets)
534 (and (= (length charsets) 1)
535 (eq 'ascii (car charsets))))
536 '(undecided))
537 ((or (memq 'eight-bit-control charsets)
538 (memq 'eight-bit-graphic charsets))
1f547b92 539 '(raw-text utf-8-emacs))
b5edd1d1 540 (t
1f547b92
DL
541 (let (codings)
542 (dolist (cs (coding-system-list t))
c7b4b466
DL
543 (let ((cs-charsets (and (eq (coding-system-type cs) 'charset)
544 (coding-system-charset-list cs)))
1f547b92
DL
545 (charsets charsets))
546 (if (coding-system-get cs :ascii-compatible-p)
547 (add-to-list 'cs-charsets 'ascii))
548 (if (catch 'ok
549 (when cs-charsets
550 (while charsets
551 (unless (memq (pop charsets) cs-charsets)
552 (throw 'ok nil)))
553 t))
554 (push cs codings))))
555 (nreverse codings)))))
54b226f7 556
51ed58ea
KH
557(defun find-multibyte-characters (from to &optional maxcount excludes)
558 "Find multibyte characters in the region specified by FROM and TO.
559If FROM is a string, find multibyte characters in the string.
560The return value is an alist of the following format:
561 ((CHARSET COUNT CHAR ...) ...)
562where
563 CHARSET is a character set,
564 COUNT is a number of characters,
1f547b92 565 CHARs are the characters found from the character set.
51ed58ea 566Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
caff3c0a 567Optional 4th arg EXCLUDES is a list of character sets to be ignored."
51ed58ea
KH
568 (let ((chars nil)
569 charset char)
570 (if (stringp from)
a0d96cad
KH
571 (if (multibyte-string-p from)
572 (let ((idx 0))
d0d8885d 573 (while (setq idx (string-match-p "[^\000-\177]" from idx))
a0d96cad
KH
574 (setq char (aref from idx)
575 charset (char-charset char))
576 (unless (memq charset excludes)
577 (let ((slot (assq charset chars)))
578 (if slot
579 (if (not (memq char (nthcdr 2 slot)))
580 (let ((count (nth 1 slot)))
581 (setcar (cdr slot) (1+ count))
582 (if (or (not maxcount) (< count maxcount))
583 (nconc slot (list char)))))
584 (setq chars (cons (list charset 1 char) chars)))))
585 (setq idx (1+ idx)))))
586 (if enable-multibyte-characters
587 (save-excursion
588 (goto-char from)
589 (while (re-search-forward "[^\000-\177]" to t)
590 (setq char (preceding-char)
591 charset (char-charset char))
592 (unless (memq charset excludes)
51ed58ea
KH
593 (let ((slot (assq charset chars)))
594 (if slot
a0d96cad 595 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
596 (let ((count (nth 1 slot)))
597 (setcar (cdr slot) (1+ count))
598 (if (or (not maxcount) (< count maxcount))
599 (nconc slot (list char)))))
a0d96cad 600 (setq chars (cons (list charset 1 char) chars)))))))))
51ed58ea
KH
601 (nreverse chars)))
602
738746ba
KH
603(defun search-unencodable-char (coding-system)
604 "Search forward from point for a character that is not encodable.
605It asks which coding system to check.
606If such a character is found, set point after that character.
607Otherwise, don't move point.
608
caff3c0a
JB
609When called from a program, the value is the position of the unencodable
610character found, or nil if all characters are encodable."
738746ba
KH
611 (interactive
612 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
613 (read-coding-system
5b76833f 614 (format "Coding-system (default %s): " default)
738746ba
KH
615 default))))
616 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
617 (if pos
618 (goto-char (1+ pos))
619 (message "All following characters are encodable by %s" coding-system))
620 pos))
621
c83c4f60
RS
622(defvar last-coding-system-specified nil
623 "Most recent coding system explicitly specified by the user when asked.
624This variable is set whenever Emacs asks the user which coding system
625to use in order to write a file. If you set it to nil explicitly,
626then call `write-region', then afterward this variable will be non-nil
627only if the user was explicitly asked and specified a coding system.")
628
b5edd1d1 629(defvar select-safe-coding-system-accept-default-p nil
fea6b736 630 "If non-nil, a function to control the behavior of coding system selection.
b5edd1d1
KH
631The meaning is the same as the argument ACCEPT-DEFAULT-P of the
632function `select-safe-coding-system' (which see). This variable
633overrides that argument.")
634
9ee5b744
SM
635(defun select-safe-coding-system-interactively (from to codings unsafe
636 &optional rejected default)
637 "Select interactively a coding system for the region FROM ... TO.
638FROM can be a string, as in `write-region'.
639CODINGS is the list of base coding systems known to be safe for this region,
640 typically obtained with `find-coding-systems-region'.
641UNSAFE is a list of coding systems known to be unsafe for this region.
642REJECTED is a list of coding systems which were safe but for some reason
643 were not recommended in the particular context.
644DEFAULT is the coding system to use by default in the query."
645 ;; At first, if some defaults are unsafe, record at most 11
646 ;; problematic characters and their positions for them by turning
647 ;; (CODING ...)
648 ;; into
649 ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
650 (if unsafe
651 (setq unsafe
652 (mapcar #'(lambda (coding)
653 (cons coding
654 (if (stringp from)
655 (mapcar #'(lambda (pos)
656 (cons pos (aref from pos)))
657 (unencodable-char-position
658 0 (length from) coding
659 11 from))
660 (mapcar #'(lambda (pos)
661 (cons pos (char-after pos)))
662 (unencodable-char-position
663 from to coding 11)))))
664 unsafe)))
665
666 ;; Change each safe coding system to the corresponding
667 ;; mime-charset name if it is also a coding system. Such a name
668 ;; is more friendly to users.
669 (let ((l codings)
670 mime-charset)
671 (while l
a77c22c2
KH
672 (setq mime-charset (coding-system-get (car l) :mime-charset))
673 (if (and mime-charset (coding-system-p mime-charset)
674 (coding-system-equal (car l) mime-charset))
9ee5b744
SM
675 (setcar l mime-charset))
676 (setq l (cdr l))))
677
678 ;; Don't offer variations with locking shift, which you
679 ;; basically never want.
680 (let (l)
681 (dolist (elt codings (setq codings (nreverse l)))
682 (unless (or (eq 'coding-category-iso-7-else
683 (coding-system-category elt))
684 (eq 'coding-category-iso-8-else
685 (coding-system-category elt)))
686 (push elt l))))
687
688 ;; Remove raw-text, emacs-mule and no-conversion unless nothing
689 ;; else is available.
690 (setq codings
691 (or (delq 'raw-text
692 (delq 'emacs-mule
693 (delq 'no-conversion codings)))
694 '(raw-text emacs-mule no-conversion)))
695
696 (let ((window-configuration (current-window-configuration))
697 (bufname (buffer-name))
698 coding-system)
699 (save-excursion
700 ;; If some defaults are unsafe, make sure the offending
701 ;; buffer is displayed.
702 (when (and unsafe (not (stringp from)))
703 (pop-to-buffer bufname)
704 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
705 unsafe))))
706 ;; Then ask users to select one from CODINGS while showing
707 ;; the reason why none of the defaults are not used.
708 (with-output-to-temp-buffer "*Warning*"
709 (with-current-buffer standard-output
710 (if (and (null rejected) (null unsafe))
711 (insert "No default coding systems to try for "
712 (if (stringp from)
713 (format "string \"%s\"." from)
714 (format "buffer `%s'." bufname)))
715 (insert
716 "These default coding systems were tried to encode"
717 (if (stringp from)
718 (concat " \"" (if (> (length from) 10)
719 (concat (substring from 0 10) "...\"")
720 (concat from "\"")))
721 (format " text\nin the buffer `%s'" bufname))
722 ":\n")
723 (let ((pos (point))
724 (fill-prefix " "))
725 (dolist (x (append rejected unsafe))
46c04718 726 (princ " ") (princ x))
9ee5b744
SM
727 (insert "\n")
728 (fill-region-as-paragraph pos (point)))
729 (when rejected
3fc0b26e
EZ
730 (insert "These safely encode the text in the buffer,
731but are not recommended for encoding text in this context,
9ee5b744
SM
732e.g., for sending an email message.\n ")
733 (dolist (x rejected)
734 (princ " ") (princ x))
735 (insert "\n"))
736 (when unsafe
3fc0b26e 737 (insert (if rejected "The other coding systems"
9ee5b744 738 "However, each of them")
3fc0b26e 739 " encountered characters it couldn't encode:\n")
9ee5b744 740 (dolist (coding unsafe)
3fc0b26e 741 (insert (format " %s cannot encode these:" (car coding)))
9ee5b744
SM
742 (let ((i 0)
743 (func1
744 #'(lambda (bufname pos)
745 (when (buffer-live-p (get-buffer bufname))
746 (pop-to-buffer bufname)
747 (goto-char pos))))
748 (func2
749 #'(lambda (bufname pos coding)
750 (when (buffer-live-p (get-buffer bufname))
751 (pop-to-buffer bufname)
752 (if (< (point) pos)
753 (goto-char pos)
754 (forward-char 1)
755 (search-unencodable-char coding)
756 (forward-char -1))))))
757 (dolist (elt (cdr coding))
758 (insert " ")
759 (if (stringp from)
760 (insert (if (< i 10) (cdr elt) "..."))
761 (if (< i 10)
762 (insert-text-button
763 (cdr elt)
764 :type 'help-xref
4e21e5aa 765 'face 'link
9ee5b744
SM
766 'help-echo
767 "mouse-2, RET: jump to this character"
768 'help-function func1
769 'help-args (list bufname (car elt)))
770 (insert-text-button
771 "..."
772 :type 'help-xref
4e21e5aa 773 'face 'link
9ee5b744
SM
774 'help-echo
775 "mouse-2, RET: next unencodable character"
776 'help-function func2
777 'help-args (list bufname (car elt)
778 (car coding)))))
779 (setq i (1+ i))))
780 (insert "\n"))
333f3572 781 (insert (substitute-command-keys "\
7d03c5b1 782
333f3572
KH
783Click on a character (or switch to this window by `\\[other-window]'\n\
784and select the characters by RET) to jump to the place it appears,\n\
3fc0b26e 785where `\\[universal-argument] \\[what-cursor-position]' will give information about it.\n"))))
7d03c5b1 786 (insert (substitute-command-keys "\nSelect \
3fc0b26e
EZ
787one of the safe coding systems listed below,\n\
788or cancel the writing with \\[keyboard-quit] and edit the buffer\n\
789 to remove or modify the problematic characters,\n\
790or specify any other coding system (and risk losing\n\
791 the problematic characters).\n\n"))
9ee5b744
SM
792 (let ((pos (point))
793 (fill-prefix " "))
794 (dolist (x codings)
795 (princ " ") (princ x))
796 (insert "\n")
7d03c5b1 797 (fill-region-as-paragraph pos (point)))))
9ee5b744
SM
798
799 ;; Read a coding system.
800 (setq coding-system
801 (read-coding-system
802 (format "Select coding system (default %s): " default)
803 default))
804 (setq last-coding-system-specified coding-system))
805
806 (kill-buffer "*Warning*")
807 (set-window-configuration window-configuration)
808 coding-system))
809
b5edd1d1 810(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 811 accept-default-p file)
d5266ddf
KH
812 "Ask a user to select a safe coding system from candidates.
813The candidates of coding systems which can safely encode a text
b5edd1d1
KH
814between FROM and TO are shown in a popup window. Among them, the most
815proper one is suggested as the default.
816
d0d8885d 817The list of `buffer-file-coding-system' of the current buffer, the
b56a5ae0 818default `buffer-file-coding-system', and the most preferred coding
d0d8885d
JB
819system (if it corresponds to a MIME charset) is treated as the
820default coding system list. Among them, the first one that safely
821encodes the text is normally selected silently and returned without
822any user interaction. See also the command `prefer-coding-system'.
d37ef0f6
DL
823
824However, the user is queried if the chosen coding system is
29d04c4f 825inconsistent with what would be selected by `find-auto-coding' from
d37ef0f6
DL
826coding cookies &c. if the contents of the region were read from a
827file. (That could lead to data corruption in a file subsequently
828re-visited and edited.)
54b226f7 829
b5edd1d1
KH
830Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
831list of coding systems to be prepended to the default coding system
0e9ec609 832list. However, if DEFAULT-CODING-SYSTEM is a list and the first
ccb06340 833element is t, the cdr part is used as the default coding system list,
b56a5ae0 834i.e. current `buffer-file-coding-system', default `buffer-file-coding-system',
a985cd2f 835and the most preferred coding system are not used.
54b226f7 836
b5edd1d1
KH
837Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
838determine the acceptability of the silently selected coding system.
839It is called with that coding system, and should return nil if it
840should not be silently selected and thus user interaction is required.
841
efa2c6d7
RS
842Optional 5th arg FILE is the file name to use for this purpose.
843That is different from `buffer-file-name' when handling `write-region'
844\(for example).
845
d0d8885d
JB
846The variable `select-safe-coding-system-accept-default-p', if non-nil,
847overrides ACCEPT-DEFAULT-P.
54b226f7
KH
848
849Kludgy feature: if FROM is a string, the string is the target text,
850and TO is ignored."
64657387 851 (if (not (listp default-coding-system))
b5edd1d1
KH
852 (setq default-coding-system (list default-coding-system)))
853
29d04c4f
KH
854 (let ((no-other-defaults nil)
855 auto-cs)
d35deeec 856 (unless (or (stringp from) find-file-literally)
b4dc7d98 857 ;; Find an auto-coding that is specified for the current
29d04c4f
KH
858 ;; buffer and file from the region FROM and TO.
859 (save-excursion
860 (save-restriction
861 (widen)
862 (goto-char from)
863 (setq auto-cs (find-auto-coding (or file buffer-file-name "")
864 (- to from)))
865 (if auto-cs
866 (if (coding-system-p (car auto-cs))
867 (setq auto-cs (car auto-cs))
868 (display-warning
2364df5c 869 'mule
29d04c4f
KH
870 (format "\
871Invalid coding system `%s' is specified
872for the current buffer/file by the %s.
873It is highly recommended to fix it before writing to a file."
874 (car auto-cs)
875 (if (eq (cdr auto-cs) :coding) ":coding tag"
2364df5c
JB
876 (format "variable `%s'" (cdr auto-cs))))
877 :warning)
29d04c4f
KH
878 (or (yes-or-no-p "Really proceed with writing? ")
879 (error "Save aborted"))
880 (setq auto-cs nil))))))
881
0e9ec609
KH
882 (if (eq (car default-coding-system) t)
883 (setq no-other-defaults t
884 default-coding-system (cdr default-coding-system)))
885
886 ;; Change elements of the list to (coding . base-coding).
887 (setq default-coding-system
888 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
889 default-coding-system))
890
29d04c4f 891 (if (and auto-cs (not no-other-defaults))
2df48a87
SM
892 ;; If the file has a coding cookie, use it regardless of any
893 ;; other setting.
29d04c4f 894 (let ((base (coding-system-base auto-cs)))
2df48a87
SM
895 (unless (memq base '(nil undecided))
896 (setq default-coding-system (list (cons auto-cs base)))
897 (setq no-other-defaults t))))
29d04c4f 898
0e9ec609
KH
899 (unless no-other-defaults
900 ;; If buffer-file-coding-system is not nil nor undecided, append it
901 ;; to the defaults.
902 (if buffer-file-coding-system
903 (let ((base (coding-system-base buffer-file-coding-system)))
904 (or (eq base 'undecided)
0e9ec609 905 (rassq base default-coding-system)
ccb06340
KH
906 (setq default-coding-system
907 (append default-coding-system
908 (list (cons buffer-file-coding-system base)))))))
a985cd2f 909
659be258
KH
910 (unless (and buffer-file-coding-system-explicit
911 (cdr buffer-file-coding-system-explicit))
b56a5ae0 912 ;; If default buffer-file-coding-system is not nil nor undecided,
659be258 913 ;; append it to the defaults.
b56a5ae0
SM
914 (when (default-value 'buffer-file-coding-system)
915 (let ((base (coding-system-base
916 (default-value 'buffer-file-coding-system))))
917 (or (eq base 'undecided)
918 (rassq base default-coding-system)
919 (setq default-coding-system
920 (append default-coding-system
921 (list (cons (default-value
922 'buffer-file-coding-system)
923 base)))))))
659be258
KH
924
925 ;; If the most preferred coding system has the property mime-charset,
926 ;; append it to the defaults.
927 (let ((preferred (coding-system-priority-list t))
928 base)
929 (and (coding-system-p preferred)
930 (setq base (coding-system-base preferred))
931 (coding-system-get preferred :mime-charset)
932 (not (rassq base default-coding-system))
933 (setq default-coding-system
934 (append default-coding-system
935 (list (cons preferred base))))))))
29d04c4f
KH
936
937 (if select-safe-coding-system-accept-default-p
938 (setq accept-default-p select-safe-coding-system-accept-default-p))
939
bae7cd08 940 ;; Decide the eol-type from the top of the default codings,
b56a5ae0 941 ;; current buffer-file-coding-system, or default buffer-file-coding-system.
bae7cd08
KH
942 (if default-coding-system
943 (let ((default-eol-type (coding-system-eol-type
944 (caar default-coding-system))))
945 (if (and (vectorp default-eol-type) buffer-file-coding-system)
d35deeec 946 (setq default-eol-type (coding-system-eol-type
bae7cd08 947 buffer-file-coding-system)))
b56a5ae0
SM
948 (if (and (vectorp default-eol-type)
949 (default-value 'buffer-file-coding-system))
950 (setq default-eol-type
951 (coding-system-eol-type
952 (default-value 'buffer-file-coding-system))))
bae7cd08
KH
953 (if (and default-eol-type (not (vectorp default-eol-type)))
954 (dolist (elt default-coding-system)
955 (setcar elt (coding-system-change-eol-conversion
956 (car elt) default-eol-type))))))
957
29d04c4f
KH
958 (let ((codings (find-coding-systems-region from to))
959 (coding-system nil)
c381cb8d 960 (tick (if (not (stringp from)) (buffer-chars-modified-tick)))
29d04c4f 961 safe rejected unsafe)
bae7cd08
KH
962 (if (eq (car codings) 'undecided)
963 ;; Any coding system is ok.
964 (setq coding-system (caar default-coding-system))
965 ;; Reverse the list so that elements are accumulated in safe,
966 ;; rejected, and unsafe in the correct order.
967 (setq default-coding-system (nreverse default-coding-system))
968
969 ;; Classify the defaults into safe, rejected, and unsafe.
970 (dolist (elt default-coding-system)
971 (if (or (eq (car codings) 'undecided)
972 (memq (cdr elt) codings))
973 (if (and (functionp accept-default-p)
974 (not (funcall accept-default-p (cdr elt))))
975 (push (car elt) rejected)
976 (push (car elt) safe))
977 (push (car elt) unsafe)))
978 (if safe
979 (setq coding-system (car safe))))
29d04c4f
KH
980
981 ;; If all the defaults failed, ask a user.
bae7cd08 982 (when (not coding-system)
29d04c4f
KH
983 (setq coding-system (select-safe-coding-system-interactively
984 from to codings unsafe rejected (car codings))))
985
29d04c4f
KH
986 ;; Check we're not inconsistent with what `coding:' spec &c would
987 ;; give when file is re-read.
988 ;; But don't do this if we explicitly ignored the cookie
989 ;; by using `find-file-literally'.
990 (when (and auto-cs
991 (not (and
992 coding-system
993 (memq (coding-system-type coding-system) '(0 5)))))
6b61353c
KH
994 ;; Merge coding-system and auto-cs as far as possible.
995 (if (not coding-system)
996 (setq coding-system auto-cs)
997 (if (not auto-cs)
998 (setq auto-cs coding-system)
999 (let ((eol-type-1 (coding-system-eol-type coding-system))
1000 (eol-type-2 (coding-system-eol-type auto-cs)))
1001 (if (eq (coding-system-base coding-system) 'undecided)
1002 (setq coding-system (coding-system-change-text-conversion
1003 coding-system auto-cs))
1004 (if (eq (coding-system-base auto-cs) 'undecided)
1005 (setq auto-cs (coding-system-change-text-conversion
1006 auto-cs coding-system))))
1007 (if (vectorp eol-type-1)
1008 (or (vectorp eol-type-2)
1009 (setq coding-system (coding-system-change-eol-conversion
1010 coding-system eol-type-2)))
1011 (if (vectorp eol-type-2)
1012 (setq auto-cs (coding-system-change-eol-conversion
1013 auto-cs eol-type-1)))))))
1014
1015 (if (and auto-cs
d37ef0f6
DL
1016 ;; Don't barf if writing a compressed file, say.
1017 ;; This check perhaps isn't ideal, but is probably
1018 ;; the best thing to do.
1019 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
6b61353c 1020 (not (coding-system-equal coding-system auto-cs)))
43afed8c
RS
1021 (unless (yes-or-no-p
1022 (format "Selected encoding %s disagrees with \
1023%s specified by file contents. Really save (else edit coding cookies \
1024and try again)? " coding-system auto-cs))
29d04c4f 1025 (error "Save aborted"))))
c381cb8d 1026 (when (and tick (/= tick (buffer-chars-modified-tick)))
86c3a9fb 1027 (error "Cancelled because the buffer was modified"))
29d04c4f 1028 coding-system)))
54b226f7
KH
1029
1030(setq select-safe-coding-system-function 'select-safe-coding-system)
1031
46babb23
KH
1032(defun select-message-coding-system ()
1033 "Return a coding system to encode the outgoing message of the current buffer.
1034It at first tries the first coding system found in these variables
1035in this order:
1036 (1) local value of `buffer-file-coding-system'
1037 (2) value of `sendmail-coding-system'
b5edd1d1 1038 (3) value of `default-sendmail-coding-system'
b56a5ae0 1039 (4) default value of `buffer-file-coding-system'
46babb23
KH
1040If the found coding system can't encode the current buffer,
1041or none of them are bound to a coding system,
48e41165 1042it asks the user to select a proper coding system."
46babb23 1043 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
1044 buffer-file-coding-system)
1045 sendmail-coding-system
1046 default-sendmail-coding-system
b56a5ae0 1047 (default-value 'buffer-file-coding-system))))
46babb23 1048 (if (eq coding 'no-conversion)
d37ef0f6 1049 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
1050 (setq coding nil))
1051 (if (fboundp select-safe-coding-system-function)
1052 (funcall select-safe-coding-system-function
b5edd1d1 1053 (point-min) (point-max) coding
209c73b0 1054 (function (lambda (x) (coding-system-get x :mime-charset))))
46babb23 1055 coding)))
4ed46869 1056\f
03c35c83 1057;;; Language support stuff.
4ed46869 1058
4ed46869 1059(defvar language-info-alist nil
2c395d56 1060 "Alist of language environment definitions.
4ed46869
KH
1061Each element looks like:
1062 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
1063where LANGUAGE-NAME is a string, the name of the language environment,
1064KEY is a symbol denoting the kind of information, and
1065INFO is the data associated with KEY.
1066Meaningful values for KEY include
1067
1068 documentation value is documentation of what this language environment
1069 is meant for, and how to use it.
7dd42fb1
KH
1070 charset value is a list of the character sets mainly used
1071 by this language environment.
d37ef0f6
DL
1072 sample-text value is an expression which is evalled to generate
1073 a line of text written using characters appropriate
1074 for this language environment.
2c395d56
RS
1075 setup-function value is a function to call to switch to this
1076 language environment.
1077 exit-function value is a function to call to leave this
1078 language environment.
d0d8885d
JB
1079 coding-system value is a list of coding systems that are good for
1080 saving text written in this language environment.
2c395d56
RS
1081 This list serves as suggestions to the user;
1082 in effect, as a kind of documentation.
1083 coding-priority value is a list of coding systems for this language
1084 environment, in order of decreasing priority.
1085 This is used to set up the coding system priority
45d08cb2 1086 list when you switch to this language environment.
ddb5c041 1087 nonascii-translation
7dd42fb1
KH
1088 value is a charset of dimension one to use for
1089 converting a unibyte character to multibyte
1090 and vice versa.
ddb5c041
KH
1091 input-method value is a default input method for this language
1092 environment.
7624ebb9
KH
1093 features value is a list of features requested in this
1094 language environment.
6b61353c 1095 ctext-non-standard-encodings
d0d8885d
JB
1096 value is a list of non-standard encoding names used
1097 in extended segments of CTEXT. See the variable
1098 `ctext-non-standard-encodings' for more detail.
ddb5c041 1099
b9631bb2 1100The following key takes effect only when multibyte characters are
597e2240 1101globally disabled, i.e. the default value of `enable-multibyte-characters'
b9631bb2 1102is nil (which is an obsolete and deprecated use):
ddb5c041 1103
d0d8885d
JB
1104 unibyte-display value is a coding system to encode characters for
1105 the terminal. Characters in the range of 160 to
1106 255 display not as octal escapes, but as non-ASCII
1107 characters in this language environment.")
2c395d56
RS
1108
1109(defun get-language-info (lang-env key)
1110 "Return information listed under KEY for language environment LANG-ENV.
1111KEY is a symbol denoting the kind of information.
1112For a list of useful values for KEY and their meanings,
1113see `language-info-alist'."
1114 (if (symbolp lang-env)
1115 (setq lang-env (symbol-name lang-env)))
f15466c5 1116 (let ((lang-slot (assoc-string lang-env language-info-alist t)))
4ed46869
KH
1117 (if lang-slot
1118 (cdr (assq key (cdr lang-slot))))))
1119
f08adf27 1120(defun set-language-info (lang-env key info)
2c395d56
RS
1121 "Modify part of the definition of language environment LANG-ENV.
1122Specifically, this stores the information INFO under KEY
1123in the definition of this language environment.
4ed46869 1124KEY is a symbol denoting the kind of information.
2c395d56 1125INFO is the value for that information.
281d03ec 1126
2c395d56 1127For a list of useful values for KEY and their meanings,
f08adf27 1128see `language-info-alist'."
2c395d56
RS
1129 (if (symbolp lang-env)
1130 (setq lang-env (symbol-name lang-env)))
eec5c8f9
KH
1131 (set-language-info-internal lang-env key info)
1132 (if (equal lang-env current-language-environment)
d042f8b4 1133 (cond ((eq key 'coding-priority)
e9a8ed3c
KH
1134 (set-language-environment-coding-systems lang-env)
1135 (set-language-environment-charset lang-env))
d042f8b4
KH
1136 ((eq key 'input-method)
1137 (set-language-environment-input-method lang-env))
1138 ((eq key 'nonascii-translation)
1139 (set-language-environment-nonascii-translation lang-env))
1140 ((eq key 'charset)
1141 (set-language-environment-charset lang-env))
597e2240 1142 ((and (not (default-value 'enable-multibyte-characters))
d042f8b4
KH
1143 (or (eq key 'unibyte-syntax) (eq key 'unibyte-display)))
1144 (set-language-environment-unibyte lang-env)))))
eec5c8f9
KH
1145
1146(defun set-language-info-internal (lang-env key info)
1147 "Internal use only.
1148Arguments are the same as `set-language-info'."
4ed46869 1149 (let (lang-slot key-slot)
2c395d56 1150 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 1151 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 1152 (setq lang-slot (list lang-env)
4ed46869
KH
1153 language-info-alist (cons lang-slot language-info-alist)))
1154 (setq key-slot (assq key lang-slot))
1155 (if (null key-slot) ; If no slot for the key, add it.
1156 (progn
1157 (setq key-slot (list key))
1158 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
590dbcba 1159 (setcdr key-slot (purecopy info))
1169bd86 1160 ;; Update the custom-type of `current-language-environment'.
590dbcba 1161 (put 'current-language-environment 'custom-type
1169bd86
MR
1162 (cons 'choice (mapcar
1163 (lambda (lang)
756e055f
MR
1164 (list 'const lang))
1165 (sort (mapcar 'car language-info-alist) 'string<))))))
4ed46869 1166
2c395d56
RS
1167(defun set-language-info-alist (lang-env alist &optional parents)
1168 "Store ALIST as the definition of language environment LANG-ENV.
1169ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 1170`language-info-alist' for the meanings of KEY and INFO.
54b226f7 1171
2c395d56 1172Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 1173where to put this language environment in the
2c395d56
RS
1174Describe Language Environment and Set Language Environment menus.
1175For example, (\"European\") means to put this language environment
1176in the European submenu in each of those two menus."
905a9ed3
DN
1177 (cond ((symbolp lang-env)
1178 (setq lang-env (symbol-name lang-env)))
1179 ((stringp lang-env)
1180 (setq lang-env (purecopy lang-env))))
54b226f7
KH
1181 (let ((describe-map describe-language-environment-map)
1182 (setup-map setup-language-environment-map))
1183 (if parents
1184 (let ((l parents)
9deed82f 1185 map parent-symbol parent prompt)
54b226f7
KH
1186 (while l
1187 (if (symbolp (setq parent-symbol (car l)))
1188 (setq parent (symbol-name parent))
1189 (setq parent parent-symbol parent-symbol (intern parent)))
1190 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1191 ;; This prompt string is for define-prefix-command, so
1192 ;; that the map it creates will be suitable for a menu.
1193 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1194 (if (not map)
1195 (progn
1196 (setq map (intern (format "describe-%s-environment-map"
1197 (downcase parent))))
9deed82f 1198 (define-prefix-command map nil prompt)
54b226f7 1199 (define-key-after describe-map (vector parent-symbol)
64657387 1200 (cons parent map))))
54b226f7
KH
1201 (setq describe-map (symbol-value map))
1202 (setq map (lookup-key setup-map (vector parent-symbol)))
1203 (if (not map)
1204 (progn
1205 (setq map (intern (format "setup-%s-environment-map"
1206 (downcase parent))))
9deed82f 1207 (define-prefix-command map nil prompt)
54b226f7 1208 (define-key-after setup-map (vector parent-symbol)
64657387 1209 (cons parent map))))
54b226f7
KH
1210 (setq setup-map (symbol-value map))
1211 (setq l (cdr l)))))
f08adf27
RS
1212
1213 ;; Set up menu items for this language env.
7624ebb9 1214 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1215 (when doc
1216 (define-key-after describe-map (vector (intern lang-env))
64657387 1217 (cons lang-env 'describe-specified-language-support))))
7624ebb9 1218 (define-key-after setup-map (vector (intern lang-env))
64657387 1219 (cons lang-env 'setup-specified-language-environment))
f08adf27 1220
eec5c8f9
KH
1221 (dolist (elt alist)
1222 (set-language-info-internal lang-env (car elt) (cdr elt)))
d35deeec 1223
eec5c8f9
KH
1224 (if (equal lang-env current-language-environment)
1225 (set-language-environment lang-env))))
4ed46869 1226
ae302641 1227(defun read-language-name (key prompt &optional default)
2c395d56 1228 "Read a language environment name which has information for KEY.
ddb5c041 1229If KEY is nil, read any language environment.
2c395d56
RS
1230Prompt with PROMPT. DEFAULT is the default choice of language environment.
1231This returns a language environment name as a string."
4ed46869
KH
1232 (let* ((completion-ignore-case t)
1233 (name (completing-read prompt
1234 language-info-alist
ddb5c041 1235 (and key
ca429a25 1236 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1237 t nil nil default)))
13e82c04 1238 (if (and (> (length name) 0)
ddb5c041
KH
1239 (or (not key)
1240 (get-language-info name key)))
13e82c04 1241 name)))
4ed46869
KH
1242\f
1243;;; Multilingual input methods.
d37ef0f6 1244(defgroup leim nil
d0c40faa
KH
1245 "LEIM: Libraries of Emacs Input Methods."
1246 :group 'mule)
4ed46869 1247
d0b9c3ab
KH
1248(defconst leim-list-file-name "leim-list.el"
1249 "Name of LEIM list file.
1250This file contains a list of libraries of Emacs input methods (LEIM)
1251in the format of Lisp expression for registering each input method.
1252Emacs loads this file at startup time.")
1253
905a9ed3 1254(defconst leim-list-header (format
316732d2 1255";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: utf-8;-*-
d0b9c3ab 1256;;
bcd76f45
SM
1257;; This file is automatically generated.
1258;;
d0b9c3ab 1259;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1260;; methods in the same directory as this file. Loading this file
1261;; registers all the input methods in Emacs.
d0b9c3ab 1262;;
d33d5fbe 1263;; Each entry has the form:
d0b9c3ab
KH
1264;; (register-input-method
1265;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1266;; TITLE DESCRIPTION
1267;; ARG ...)
c654de1d 1268;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab 1269;;
bcd76f45 1270;; If this directory is included in `load-path', Emacs automatically
d0b9c3ab
KH
1271;; loads this file at startup time.
1272
1273"
1274 leim-list-file-name)
1275 "Header to be inserted in LEIM list file.")
1276
905a9ed3 1277(defconst leim-list-entry-regexp "^(register-input-method"
d0b9c3ab 1278 "Regexp matching head of each entry in LEIM list file.
caff3c0a 1279See also the variable `leim-list-header'.")
d0b9c3ab
KH
1280
1281(defvar update-leim-list-functions
1282 '(quail-update-leim-list-file)
1283 "List of functions to call to update LEIM list file.
1284Each function is called with one arg, LEIM directory name.")
1285
a337fe7f
RS
1286(defun update-leim-list-file (&rest dirs)
1287 "Update LEIM list file in directories DIRS."
d0d8885d
JB
1288 (dolist (function update-leim-list-functions)
1289 (apply function dirs)))
d0b9c3ab 1290
4ed46869
KH
1291(defvar current-input-method nil
1292 "The current input method for multilingual text.
96db204a 1293If nil, that means no input method is activated now.")
4ed46869
KH
1294(make-variable-buffer-local 'current-input-method)
1295(put 'current-input-method 'permanent-local t)
1296
1297(defvar current-input-method-title nil
d0b9c3ab 1298 "Title string of the current input method shown in mode line.")
4ed46869
KH
1299(make-variable-buffer-local 'current-input-method-title)
1300(put 'current-input-method-title 'permanent-local t)
1301
b4fba33f 1302(defcustom default-input-method nil
caff3c0a 1303 "Default input method for multilingual text (a string).
b4fba33f 1304This is the input method activated automatically by the command
9b10b5a3 1305`toggle-input-method' (\\[toggle-input-method])."
1f547b92 1306 :link '(custom-manual "(emacs)Input Methods")
8861c593 1307 :group 'mule
d398dba6
DL
1308 :type '(choice (const nil) (string
1309 :completion-ignore-case t
1310 :complete-function widget-string-complete
1311 :completion-alist input-method-alist
1312 :prompt-history input-method-history))
5806e8a6 1313 :set-after '(current-language-environment))
b4fba33f 1314
0f835e87
KH
1315(put 'input-method-function 'permanent-local t)
1316
723a427a 1317(defvar input-method-history nil
6f5d2452
EZ
1318 "History list of input methods read from the minibuffer.
1319
1320Maximum length of the history list is determined by the value
1321of `history-length', which see.")
723a427a
KH
1322(make-variable-buffer-local 'input-method-history)
1323(put 'input-method-history 'permanent-local t)
4ed46869
KH
1324
1325(defvar inactivate-current-input-method-function nil
1326 "Function to call for inactivating the current input method.
1327Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1328This function is called with no argument.
1329
1330This function should never change the value of `current-input-method'.
1331It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1332(make-variable-buffer-local 'inactivate-current-input-method-function)
1333(put 'inactivate-current-input-method-function 'permanent-local t)
1334
1335(defvar describe-current-input-method-function nil
1336 "Function to call for describing the current input method.
1337This function is called with no argument.")
1338(make-variable-buffer-local 'describe-current-input-method-function)
1339(put 'describe-current-input-method-function 'permanent-local t)
1340
d0b9c3ab 1341(defvar input-method-alist nil
2c395d56 1342 "Alist of input method names vs how to use them.
d0b9c3ab 1343Each element has the form:
2c395d56
RS
1344 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1345See the function `register-input-method' for the meanings of the elements.")
ac549fa5 1346;;;###autoload
6dc3311d 1347(put 'input-method-alist 'risky-local-variable t)
2c395d56 1348
f08adf27 1349(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1350 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
d0b9c3ab 1351
d35deeec 1352INPUT-METHOD and LANG-ENV are symbols or strings.
2c395d56
RS
1353ACTIVATE-FUNC is a function to call to activate this method.
1354TITLE is a string to show in the mode line when this method is active.
1355DESCRIPTION is a string describing this method and what it is good for.
1356The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1357All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1358
c654de1d
DL
1359This function is mainly used in the file \"leim-list.el\" which is
1360created at Emacs build time, registering all Quail input methods
0b6cadff 1361contained in the Emacs distribution.
205814ee 1362
0b6cadff 1363In case you want to register a new Quail input method by yourself, be
205814ee 1364careful to use the same input method title as given in the third
0b6cadff
DL
1365parameter of `quail-define-package'. (If the values are different, the
1366string specified in this function takes precedence.)
205814ee
KH
1367
1368The commands `describe-input-method' and `list-input-methods' need
0b6cadff 1369these duplicated values to show some information about input methods
d35deeec
JB
1370without loading the relevant Quail packages.
1371\n(fn INPUT-METHOD LANG-ENV ACTIVATE-FUNC TITLE DESCRIPTION &rest ARGS)"
f08adf27 1372 (if (symbolp lang-env)
ebfa10d3
DN
1373 (setq lang-env (symbol-name lang-env))
1374 (setq lang-env (purecopy lang-env)))
4ef06f75 1375 (if (symbolp input-method)
ebfa10d3
DN
1376 (setq input-method (symbol-name input-method))
1377 (setq input-method (purecopy input-method)))
1378 (setq args (mapcar 'purecopy args))
f08adf27 1379 (let ((info (cons lang-env args))
d0b9c3ab
KH
1380 (slot (assoc input-method input-method-alist)))
1381 (if slot
1382 (setcdr slot info)
1383 (setq slot (cons input-method info))
1384 (setq input-method-alist (cons slot input-method-alist)))))
1385
4d5ac029 1386(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1387 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1388If DEFAULT is non-nil, use that as the default,
0b6cadff 1389and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1390If INHIBIT-NULL is non-nil, null input signals an error.
1391
1392The return value is a string."
4d5ac029
RS
1393 (if default
1394 (setq prompt (format prompt default)))
d0b9c3ab 1395 (let* ((completion-ignore-case t)
c54044ff
KH
1396 ;; As it is quite normal to change input method in the
1397 ;; minibuffer, we must enable it even if
1398 ;; enable-recursive-minibuffers is currently nil.
1399 (enable-recursive-minibuffers t)
723a427a
KH
1400 ;; This binding is necessary because input-method-history is
1401 ;; buffer local.
d0b9c3ab 1402 (input-method (completing-read prompt input-method-alist
87505a98
RS
1403 nil t nil 'input-method-history
1404 default)))
bf294e6e
KH
1405 (if (and input-method (symbolp input-method))
1406 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1407 (if (> (length input-method) 0)
1408 input-method
1409 (if inhibit-null
43807b77 1410 (error "No valid input method is specified")))))
d0b9c3ab 1411
d0b9c3ab 1412(defun activate-input-method (input-method)
2c395d56
RS
1413 "Switch to input method INPUT-METHOD for the current buffer.
1414If some other input method is already active, turn it off first.
1415If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1416 (if (and input-method (symbolp input-method))
4ef06f75 1417 (setq input-method (symbol-name input-method)))
723a427a
KH
1418 (if (and current-input-method
1419 (not (string= current-input-method input-method)))
305a3cb6 1420 (inactivate-input-method))
2c395d56 1421 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1422 (let ((slot (assoc input-method input-method-alist)))
1423 (if (null slot)
723a427a 1424 (error "Can't activate input method `%s'" input-method))
278dd6ac 1425 (setq current-input-method-title nil)
8efc03e1
KH
1426 (let ((func (nth 2 slot)))
1427 (if (functionp func)
1428 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1429 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1430 (progn
1431 (require (cdr func))
1432 (apply (car func) input-method (nthcdr 5 slot)))
1433 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1434 (setq current-input-method input-method)
278dd6ac
KH
1435 (or (stringp current-input-method-title)
1436 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1437 (unwind-protect
1438 (run-hooks 'input-method-activate-hook)
1439 (force-mode-line-update)))))
15b3e511 1440
15b3e511 1441(defun inactivate-input-method ()
f17ccaee 1442 "Turn off the current input method."
723a427a
KH
1443 (when current-input-method
1444 (if input-method-history
1445 (unless (string= current-input-method (car input-method-history))
1446 (setq input-method-history
1447 (cons current-input-method
1448 (delete current-input-method input-method-history))))
1449 (setq input-method-history (list current-input-method)))
1450 (unwind-protect
36f8618c
KH
1451 (progn
1452 (setq input-method-function nil
1453 current-input-method-title nil)
1454 (funcall inactivate-current-input-method-function))
15b3e511 1455 (unwind-protect
723a427a 1456 (run-hooks 'input-method-inactivate-hook)
36f8618c 1457 (setq current-input-method nil)
28885c0e 1458 (force-mode-line-update)))))
4ed46869 1459
e893eae2 1460(defun set-input-method (input-method &optional interactive)
2c395d56 1461 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1462This also sets the default input method to the one you specify.
402dbbd1
EZ
1463If INPUT-METHOD is nil, this function turns off the input method, and
1464also causes you to be prompted for a name of an input method the next
1465time you invoke \\[toggle-input-method].
e893eae2
RS
1466When called interactively, the optional arg INTERACTIVE is non-nil,
1467which marks the variable `default-input-method' as set for Custom buffers.
402dbbd1 1468
bc406911 1469To deactivate the input method interactively, use \\[toggle-input-method].
caff3c0a 1470To deactivate it programmatically, use `inactivate-input-method'."
d0b9c3ab 1471 (interactive
723a427a 1472 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1473 (list (read-input-method-name
87505a98 1474 (if default "Select input method (default %s): " "Select input method: ")
e893eae2
RS
1475 default t)
1476 t)))
d0b9c3ab 1477 (activate-input-method input-method)
f4990970 1478 (setq default-input-method input-method)
e893eae2 1479 (when interactive
f4990970
PA
1480 (customize-mark-as-set 'default-input-method))
1481 default-input-method)
4ed46869 1482
023df4cf
RS
1483(defvar toggle-input-method-active nil
1484 "Non-nil inside `toggle-input-method'.")
1485
e893eae2 1486(defun toggle-input-method (&optional arg interactive)
f8ec20be
RS
1487 "Enable or disable multilingual text input method for the current buffer.
1488Only one input method can be enabled at any time in a given buffer.
1489
d0d8885d
JB
1490The normal action is to enable an input method if none was enabled,
1491and disable the current one otherwise. Which input method to enable
1492can be determined in various ways--either the one most recently used,
1493or the one specified by `default-input-method', or as a last resort
1494by reading the name of an input method in the minibuffer.
f8ec20be 1495
d0d8885d 1496With a prefix argument ARG, read an input method name with the minibuffer
f8ec20be 1497and enable that one. The default is the most recent input method specified
e893eae2 1498\(not including the currently active input method, if any).
f8ec20be 1499
d0d8885d 1500When called interactively, the optional argument INTERACTIVE is non-nil,
e893eae2
RS
1501which marks the variable `default-input-method' as set for Custom buffers."
1502
1503 (interactive "P\np")
023df4cf
RS
1504 (if toggle-input-method-active
1505 (error "Recursive use of `toggle-input-method'"))
7ddbb5bc
RS
1506 (if (and current-input-method (not arg))
1507 (inactivate-input-method)
023df4cf
RS
1508 (let ((toggle-input-method-active t)
1509 (default (or (car input-method-history) default-input-method)))
7ddbb5bc
RS
1510 (if (and arg default (equal current-input-method default)
1511 (> (length input-method-history) 1))
1512 (setq default (nth 1 input-method-history)))
723a427a
KH
1513 (activate-input-method
1514 (if (or arg (not default))
7ddbb5bc
RS
1515 (progn
1516 (read-input-method-name
1517 (if default "Input method (default %s): " "Input method: " )
1518 default t))
723a427a 1519 default))
f4990970 1520 (unless default-input-method
d37ef0f6 1521 (prog1
f4990970 1522 (setq default-input-method current-input-method)
e893eae2 1523 (when interactive
f4990970 1524 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab 1525
26b3dce6 1526(autoload 'help-buffer "help-mode")
0855c6cd 1527
d0b9c3ab 1528(defun describe-input-method (input-method)
2c395d56 1529 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1530 (interactive
1531 (list (read-input-method-name
5b76833f 1532 "Describe input method (default current choice): ")))
78754934 1533 (if (and input-method (symbolp input-method))
4ef06f75 1534 (setq input-method (symbol-name input-method)))
43125c28
RS
1535 (help-setup-xref (list #'describe-input-method
1536 (or input-method current-input-method))
32226619 1537 (called-interactively-p 'interactive))
f80e2142 1538
d0b9c3ab
KH
1539 (if (null input-method)
1540 (describe-current-input-method)
464cc130
KH
1541 (let ((current current-input-method))
1542 (condition-case nil
1543 (progn
1544 (save-excursion
1545 (activate-input-method input-method)
1546 (describe-current-input-method))
1547 (activate-input-method current))
d37ef0f6 1548 (error
464cc130 1549 (activate-input-method current)
5f395df3 1550 (help-setup-xref (list #'describe-input-method input-method)
32226619 1551 (called-interactively-p 'interactive))
5f395df3 1552 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1553 (let ((elt (assoc input-method input-method-alist)))
1554 (princ (format
1555 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1556 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1557
1558(defun describe-current-input-method ()
f80e2142
RS
1559 "Describe the input method currently in use.
1560This is a subroutine for `describe-input-method'."
4ed46869
KH
1561 (if current-input-method
1562 (if (and (symbolp describe-current-input-method-function)
1563 (fboundp describe-current-input-method-function))
1564 (funcall describe-current-input-method-function)
1565 (message "No way to describe the current input method `%s'"
f2979bdb 1566 current-input-method)
4ed46869 1567 (ding))
d0b9c3ab 1568 (error "No input method is activated now")))
4ed46869 1569
d3459641 1570(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1571 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1572The input method selected last time is activated in minibuffer.
d0d8885d
JB
1573If optional second argument INITIAL-INPUT is non-nil, insert it in the
1574minibuffer initially.
1575Optional 3rd argument INPUT-METHOD specifies the input method to be activated
1576instead of the one selected last time. It is a symbol or a string."
88d559ec
KH
1577 (setq input-method
1578 (or input-method
d3459641 1579 current-input-method
88d559ec
KH
1580 default-input-method
1581 (read-input-method-name "Input method: " nil t)))
3df60841 1582 (if (and input-method (symbolp input-method))
4ef06f75 1583 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1584 (let ((prev-input-method current-input-method))
1585 (unwind-protect
1586 (progn
1587 (activate-input-method input-method)
1588 (read-string prompt initial-input nil nil t))
1589 (activate-input-method prev-input-method))))
4ed46869
KH
1590
1591;; Variables to control behavior of input methods. All input methods
1592;; should react to these variables.
1593
8efc03e1 1594(defcustom input-method-verbose-flag 'default
caff3c0a 1595 "A flag to control extra guidance given by input methods.
8efc03e1 1596The value should be nil, t, `complex-only', or `default'.
4ed46869 1597
cb29dfb6 1598The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1599area. When you use the input method in the minibuffer, the guidance
1600is shown at the bottom short window (split from the existing window).
c27c4ed8 1601
8efc03e1
KH
1602If the value is t, extra guidance is always given, if the value is
1603nil, extra guidance is always suppressed.
1604
1605If the value is `complex-only', only complex input methods such as
1606`chinese-py' and `japanese' give extra guidance.
1607
1608If the value is `default', complex input methods always give extra
1609guidance, but simple input methods give it only when you are not in
1610the minibuffer.
1611
1612See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1613 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1614 (const complex-only) (const default))
42395763
RS
1615 :group 'mule)
1616
1617(defcustom input-method-highlight-flag t
caff3c0a 1618 "If this flag is non-nil, input methods highlight partially-entered text.
42395763
RS
1619For instance, while you are in the middle of a Quail input method sequence,
1620the text inserted so far is temporarily underlined.
8efc03e1
KH
1621The underlining goes away when you finish or abort the input method sequence.
1622See also the variable `input-method-verbose-flag'."
42395763
RS
1623 :type 'boolean
1624 :group 'mule)
4ed46869 1625
1f547b92 1626(defcustom input-method-activate-hook nil
f17ccaee
KH
1627 "Normal hook run just after an input method is activated.
1628
1629The variable `current-input-method' keeps the input method name
1f547b92
DL
1630just activated."
1631 :type 'hook
1632 :group 'mule)
4ed46869 1633
1f547b92 1634(defcustom input-method-inactivate-hook nil
f17ccaee
KH
1635 "Normal hook run just after an input method is inactivated.
1636
1637The variable `current-input-method' still keeps the input method name
1f547b92
DL
1638just inactivated."
1639 :type 'hook
1640 :group 'mule)
4ed46869 1641
1f547b92 1642(defcustom input-method-after-insert-chunk-hook nil
8f924df7 1643 "Normal hook run just after an input method insert some chunk of text."
1f547b92
DL
1644 :type 'hook
1645 :group 'mule)
4ed46869 1646
dccca980 1647(defvar input-method-exit-on-first-char nil
0b6cadff 1648 "This flag controls when an input method returns.
dccca980
KH
1649Usually, the input method does not return while there's a possibility
1650that it may find a different translation if a user types another key.
d0d8885d
JB
1651But, if this flag is non-nil, the input method returns as soon as the
1652current key sequence gets long enough to have some valid translation.")
dccca980 1653
1f547b92 1654(defcustom input-method-use-echo-area nil
dccca980 1655 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1656Usually, the input method inserts the intermediate key sequence,
1657or candidate translations corresponding to the sequence,
1658at point in the current buffer.
1f547b92
DL
1659But, if this flag is non-nil, it displays them in echo area instead."
1660 :type 'hook
1661 :group 'mule)
dccca980 1662
723a427a 1663(defvar input-method-exit-on-invalid-key nil
fea6b736 1664 "This flag controls the behavior of an input method on invalid key input.
723a427a
KH
1665Usually, when a user types a key which doesn't start any character
1666handled by the input method, the key is handled by turning off the
e8dd0160 1667input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1668But, if this flag is non-nil, the input method is never back on.")
1669
4ed46869 1670\f
1f547b92 1671(defcustom set-language-environment-hook nil
8efc03e1
KH
1672 "Normal hook run after some language environment is set.
1673
1674When you set some hook function here, that effect usually should not
1675be inherited to another language environment. So, you had better set
1676another function in `exit-language-environment-hook' (which see) to
1f547b92
DL
1677cancel the effect."
1678 :type 'hook
1679 :group 'mule)
8efc03e1 1680
1f547b92 1681(defcustom exit-language-environment-hook nil
8efc03e1
KH
1682 "Normal hook run after exiting from some language environment.
1683When this hook is run, the variable `current-language-environment'
1684is still bound to the language environment being exited.
1685
e8dd0160 1686This hook is mainly used for canceling the effect of
caff3c0a 1687`set-language-environment-hook' (which see)."
1f547b92
DL
1688 :type 'hook
1689 :group 'mule)
8efc03e1 1690
b0648a00
RS
1691(put 'setup-specified-language-environment 'apropos-inhibit t)
1692
15b3e511 1693(defun setup-specified-language-environment ()
f08adf27 1694 "Switch to a specified language environment."
15b3e511 1695 (interactive)
f850d782 1696 (let (language-name)
15b3e511
KH
1697 (if (and (symbolp last-command-event)
1698 (or (not (eq last-command-event 'Default))
1699 (setq last-command-event 'English))
f850d782 1700 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1701 (prog1
1702 (set-language-environment language-name)
1703 (customize-mark-as-set 'current-language-environment))
15b3e511 1704 (error "Bogus calling sequence"))))
4ed46869 1705
8861c593 1706(defcustom current-language-environment "English"
94d04df6 1707 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1708This variable should be set only with \\[customize], which is equivalent
1709to using the function `set-language-environment'."
94d04df6 1710 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1711 :set (lambda (symbol value) (set-language-environment value))
94d04df6 1712 :get (lambda (x)
f15466c5 1713 (or (car-safe (assoc-string
94d04df6
DL
1714 (if (symbolp current-language-environment)
1715 (symbol-name current-language-environment)
1716 current-language-environment)
f15466c5 1717 language-info-alist t))
94d04df6 1718 "English"))
990a4108
MR
1719 ;; custom type will be updated with `set-language-info'.
1720 :type (if language-info-alist
1721 (cons 'choice (mapcar
1722 (lambda (lang)
756e055f
MR
1723 (list 'const lang))
1724 (sort (mapcar 'car language-info-alist) 'string<)))
990a4108 1725 'string)
8861c593 1726 :initialize 'custom-initialize-default
eb9fc9e6 1727 :group 'mule)
f850d782 1728
ddb5c041
KH
1729(defun reset-language-environment ()
1730 "Reset multilingual environment of Emacs to the default status.
1731
1732The default status is as follows:
1733
d37ef0f6 1734 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1735 The default coding system for process I/O is nil.
1736 The default value for the command `set-terminal-coding-system' is nil.
1737 The default value for the command `set-keyboard-coding-system' is nil.
1738
97941b05
KH
1739 The order of priorities of coding systems are as follows:
1740 utf-8
1741 iso-2022-7bit
1742 iso-latin-1
1743 iso-2022-7bit-lock
1744 iso-2022-8bit-ss2
1745 emacs-mule
1746 raw-text"
ddb5c041
KH
1747 (interactive)
1748 ;; This function formerly set default-enable-multibyte-characters to t,
1749 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1750
97941b05
KH
1751 (set-coding-system-priority
1752 'utf-8
1753 'iso-2022-7bit
1754 'iso-latin-1
1755 'iso-2022-7bit-lock
1756 'iso-2022-8bit-ss2
1757 'emacs-mule
1758 'raw-text)
91693d18 1759
ddb5c041 1760 (set-default-coding-systems nil)
b5edd1d1 1761 (setq default-sendmail-coding-system 'iso-latin-1)
a41118cc
SM
1762 ;; On Darwin systems, this should be utf-8, but when this file is loaded
1763 ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
787caf99 1764 (setq default-file-name-coding-system 'iso-latin-1)
1d77e15a
JR
1765 ;; Preserve eol-type from existing default-process-coding-systems.
1766 ;; On non-unix-like systems in particular, these may have been set
1767 ;; carefully by the user, or by the startup code, to deal with the
1768 ;; users shell appropriately, so should not be altered by changing
1769 ;; language environment.
1770 (let ((output-coding
a099a2ff
JR
1771 ;; When bootstrapping, coding-systems are not defined yet, so
1772 ;; we need to catch the error from check-coding-system.
d37ef0f6 1773 (condition-case nil
a099a2ff
JR
1774 (coding-system-change-text-conversion
1775 (car default-process-coding-system) 'undecided)
1776 (coding-system-error 'undecided)))
1d77e15a 1777 (input-coding
a099a2ff
JR
1778 (condition-case nil
1779 (coding-system-change-text-conversion
1780 (cdr default-process-coding-system) 'iso-latin-1)
1781 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1782 (setq default-process-coding-system
1783 (cons output-coding input-coding)))
b5edd1d1 1784
a43977db
KH
1785 ;; Put the highest priority to the charset iso-8859-1 to prefer the
1786 ;; registry iso8859-1 over iso8859-2 in font selection. It also
1787 ;; makes unibyte-display-via-language-environment to use iso-8859-1
1788 ;; as the unibyte charset.
1789 (set-charset-priority 'iso-8859-1)
1790
ddb5c041
KH
1791 ;; Don't alter the terminal and keyboard coding systems here.
1792 ;; The terminal still supports the same coding system
1793 ;; that it supported a minute ago.
1b8dc791
SM
1794 ;; (set-terminal-coding-system-internal nil)
1795 ;; (set-keyboard-coding-system-internal nil)
ddb5c041 1796
33de15f4
SM
1797 ;; Back in Emacs-20, it was necessary to provide some fallback implicit
1798 ;; conversion, because almost no packages handled coding-system issues.
1799 ;; Nowadays it'd just paper over bugs.
1800 ;; (set-unibyte-charset 'iso-8859-1)
1801 )
ddb5c041 1802
0c47a7c8
KH
1803(reset-language-environment)
1804
97c57fb2 1805(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system display)
40c81f74
PE
1806 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1807 (let ((coding (get-language-info language-name 'unibyte-display)))
735b7c87
KH
1808 (if (and coding
1809 (or (not coding-system)
1810 (coding-system-equal coding coding-system)))
40c81f74 1811 (standard-display-european-internal)
a67ae60e
EZ
1812 ;; The following 2 lines undo the 8-bit display that we set up
1813 ;; in standard-display-european-internal, which see. This is in
1814 ;; case the user has used standard-display-european earlier in
970c9391 1815 ;; this session.
d73a7bb8
JPW
1816 (when standard-display-table
1817 (dotimes (i 128)
b221615b 1818 (aset standard-display-table (+ i 128) nil))))
970c9391 1819 (set-terminal-coding-system (or coding-system coding) display)))
40c81f74 1820
166246f7 1821(defun set-language-environment (language-name)
6c05d680
RS
1822 "Set up multi-lingual environment for using LANGUAGE-NAME.
1823This sets the coding system priority and the default input method
8861c593
RS
1824and sometimes other things. LANGUAGE-NAME should be a string
1825which is the name of a language environment. For example, \"Latin-1\"
1826specifies the character set for the major languages of Western Europe."
8efc03e1 1827 (interactive (list (read-language-name
ddb5c041 1828 nil
5b76833f 1829 "Set language environment (default English): ")))
4ef06f75
KH
1830 (if language-name
1831 (if (symbolp language-name)
1832 (setq language-name (symbol-name language-name)))
1833 (setq language-name "English"))
f15466c5 1834 (let ((slot (assoc-string language-name language-info-alist t)))
95498fd0 1835 (unless slot
f850d782 1836 (error "Language environment not defined: %S" language-name))
95498fd0 1837 (setq language-name (car slot)))
8efc03e1
KH
1838 (if current-language-environment
1839 (let ((func (get-language-info current-language-environment
1840 'exit-function)))
e63645c2 1841 (run-hooks 'exit-language-environment-hook)
5f395df3 1842 (if (functionp func) (funcall func))))
ddb5c041 1843
e9a8ed3c
KH
1844 (reset-language-environment)
1845 ;; The features might set up coding systems.
ddb5c041
KH
1846 (let ((required-features (get-language-info language-name 'features)))
1847 (while required-features
1848 (require (car required-features))
1849 (setq required-features (cdr required-features))))
6b61353c 1850
e9a8ed3c
KH
1851 (setq current-language-environment language-name)
1852
1853 (set-language-environment-coding-systems language-name)
1854 (set-language-environment-input-method language-name)
1855 (set-language-environment-nonascii-translation language-name)
1856 (set-language-environment-charset language-name)
1857 ;; Unibyte setups if necessary.
597e2240 1858 (unless (default-value 'enable-multibyte-characters)
e9a8ed3c
KH
1859 (set-language-environment-unibyte language-name))
1860
ddb5c041 1861 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1862 (if (functionp func)
ddb5c041 1863 (funcall func)))
e9a8ed3c 1864
8aeebac2 1865 (setq current-iso639-language
e9aaa1db
KH
1866 (or (get-language-info language-name 'iso639-language)
1867 current-iso639-language))
8aeebac2 1868
8efc03e1 1869 (run-hooks 'set-language-environment-hook)
f850d782 1870 (force-mode-line-update t))
4ed46869 1871
c3869589 1872(define-widget 'charset 'symbol
aaa448c9 1873 "An Emacs charset."
1644d5b9 1874 :tag "Charset"
c3869589
DL
1875 :complete-function (lambda ()
1876 (interactive)
1877 (lisp-complete-symbol 'charsetp))
1878 :completion-ignore-case t
1879 :value 'ascii
1880 :validate (lambda (widget)
1881 (unless (charsetp (widget-value widget))
1882 (widget-put widget :error (format "Invalid charset: %S"
1883 (widget-value widget)))
1884 widget))
1885 :prompt-history 'charset-history)
1886
1887(defcustom language-info-custom-alist nil
1888 "Customizations of language environment parameters.
1889Value is an alist with elements like those of `language-info-alist'.
1890These are used to set values in `language-info-alist' which replace
1891the defaults. A typical use is replacing the default input method for
1892the environment. Use \\[describe-language-environment] to find the environment's settings.
1893
1894This option is intended for use at startup. Removing items doesn't
1895remove them from the language info until you next restart Emacs.
1896
d0d8885d
JB
1897Setting this variable directly does not take effect.
1898See `set-language-info-alist' for use in programs."
c3869589 1899 :group 'mule
8589dc17 1900 :version "23.1"
c3869589
DL
1901 :set (lambda (s v)
1902 (custom-set-default s v)
1903 ;; Can't do this before language environments are set up.
1904 (when v
1905 ;; modify language-info-alist
1906 (dolist (elt v)
1907 (set-language-info-alist (car elt) (cdr elt)))
1908 ;; re-set the environment in case its parameters changed
1909 (set-language-environment current-language-environment)))
1910 :type `(alist
1911 :key-type (string :tag "Language environment"
1912 :completion-ignore-case t
1913 :complete-function widget-string-complete
1914 :completion-alist language-info-alist)
1915 :value-type
1916 (alist :key-type symbol
1917 :options ((documentation string)
1918 (charset (repeat charset))
1919 (sample-text string)
1920 (setup-function function)
1921 (exit-function function)
1922 (coding-system (repeat coding-system))
1923 (coding-priority (repeat coding-system))
1924 (nonascii-translation charset)
1925 (input-method
1926 (string
1927 :completion-ignore-case t
1928 :complete-function widget-string-complete
1929 :completion-alist input-method-alist
1930 :prompt-history input-method-history))
1931 (features (repeat symbol))
1932 (unibyte-display coding-system)))))
1933
aa360da1
GM
1934(declare-function x-server-vendor "xfns.c" (&optional terminal))
1935(declare-function x-server-version "xfns.c" (&optional terminal))
1936
51a8fc1d
RS
1937(defun standard-display-european-internal ()
1938 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1939 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1940 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1941 ;; the native font, and codes 160 and 146 stand for something very
1942 ;; different there.
597e2240
GM
1943 (or (and (eq window-system 'pc) (not (default-value
1944 'enable-multibyte-characters)))
03c35c83 1945 (progn
6b626913
SM
1946 ;; Most X fonts used to do the wrong thing for latin-1 code 160.
1947 (unless (and (eq window-system 'x)
1948 ;; XFree86 4 has fixed the fonts.
1949 (string= "The XFree86 Project, Inc" (x-server-vendor))
1950 (> (aref (number-to-string (nth 2 (x-server-version))) 0)
1951 ?3))
1952 ;; Make non-line-break space display as a plain space.
1953 (aset standard-display-table 160 [32]))
1492f7ac 1954 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1955 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1956 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1957 ;; U+2019, normally from the windows-1252 character set. XFree 4
1958 ;; fonts probably have the appropriate glyph at this position,
1959 ;; so they could use standard-display-8bit. It's better to use a
1960 ;; proper windows-1252 coding system. --fx]
6b626913 1961 (aset standard-display-table 146 [39]))))
03c35c83 1962
e9a8ed3c
KH
1963(defun set-language-environment-coding-systems (language-name)
1964 "Do various coding system setups for language environment LANGUAGE-NAME."
54b226f7 1965 (let* ((priority (get-language-info language-name 'coding-priority))
e9a8ed3c 1966 (default-coding (car priority))
b56a5ae0 1967 ;; If the default buffer-file-coding-system is nil, don't use
207422da 1968 ;; coding-system-eol-type, because it treats nil as
b56a5ae0 1969 ;; `no-conversion'. The default buffer-file-coding-system is set
207422da
EZ
1970 ;; to nil by reset-language-environment, and in that case we
1971 ;; want to have here the native EOL type for each platform.
1972 ;; FIXME: there should be a common code that runs both on
1973 ;; startup and here to set the default EOL type correctly.
1974 ;; Right now, DOS/Windows platforms set this on dos-w32.el,
1975 ;; which works only as long as the order of loading files at
1976 ;; dump time and calling functions at startup is not modified
1977 ;; significantly, i.e. as long as this function is called
b56a5ae0 1978 ;; _after_ the default buffer-file-coding-system was set by
207422da
EZ
1979 ;; dos-w32.el.
1980 (eol-type
b56a5ae0
SM
1981 (coding-system-eol-type
1982 (or (default-value 'buffer-file-coding-system)
1983 (if (memq system-type '(windows-nt ms-dos)) 'dos 'unix)))))
97941b05
KH
1984 (when priority
1985 (set-default-coding-systems
1986 (if (memq eol-type '(0 1 2 unix dos mac))
1987 (coding-system-change-eol-conversion default-coding eol-type)
1988 default-coding))
1989 (setq default-sendmail-coding-system default-coding)
1990 (apply 'set-coding-system-priority priority))))
54b226f7 1991
d042f8b4
KH
1992(defun set-language-environment-input-method (language-name)
1993 "Do various input method setups for language environment LANGUAGE-NAME."
1994 (let ((input-method (get-language-info language-name 'input-method)))
1995 (when input-method
1996 (setq default-input-method input-method)
1997 (if input-method-history
1998 (setq input-method-history
1999 (cons input-method
2000 (delete input-method input-method-history)))))))
2001
2002(defun set-language-environment-nonascii-translation (language-name)
2003 "Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME."
e9a8ed3c
KH
2004 ;; Note: For DOS, we assumed that the charset cpXXX is already
2005 ;; defined.
2006 (let ((nonascii (get-language-info language-name 'nonascii-translation)))
2007 (if (eq window-system 'pc)
970c9391 2008 (setq nonascii (intern (format "cp%d" dos-codepage))))
e9a8ed3c
KH
2009 (or (and (charsetp nonascii)
2010 (get-charset-property nonascii :ascii-compatible-p))
2011 (setq nonascii 'iso-8859-1))
33de15f4
SM
2012 ;; Back in Emacs-20, it was necessary to provide some fallback implicit
2013 ;; conversion, because almost no packages handled coding-system issues.
2014 ;; Nowadays it'd just paper over bugs.
2015 ;; (set-unibyte-charset nonascii)
2016 ))
d042f8b4
KH
2017
2018(defun set-language-environment-charset (language-name)
2019 "Do various charset setups for language environment LANGUAGE-NAME."
e9a8ed3c
KH
2020 ;; Put higher priorities to such charsets that are supported by the
2021 ;; coding systems of higher priorities in this environment.
2022 (let ((charsets (get-language-info language-name 'charset)))
2023 (dolist (coding (get-language-info language-name 'coding-priority))
bf974dc9
KH
2024 (let ((list (coding-system-charset-list coding)))
2025 (if (consp list)
2026 (setq charsets (append charsets list)))))
e9a8ed3c
KH
2027 (if charsets
2028 (apply 'set-charset-priority charsets))))
d042f8b4
KH
2029
2030(defun set-language-environment-unibyte (language-name)
2031 "Do various unibyte-mode setups for language environment LANGUAGE-NAME."
d042f8b4
KH
2032 (set-display-table-and-terminal-coding-system language-name))
2033
4ed46869 2034(defsubst princ-list (&rest args)
caff3c0a 2035 "Print all arguments with `princ', then print \"\\n\"."
4ed46869
KH
2036 (while args (princ (car args)) (setq args (cdr args)))
2037 (princ "\n"))
2038
b0648a00
RS
2039(put 'describe-specified-language-support 'apropos-inhibit t)
2040
6b61353c 2041;; Print language-specific information such as input methods,
13e82c04 2042;; charsets, and coding systems. This function is intended to be
48082651 2043;; called from the menu:
281d03ec 2044;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
2045;; and should not run it by `M-x describe-current-input-method-function'.
2046(defun describe-specified-language-support ()
96db204a 2047 "Describe how Emacs supports the specified language environment."
48082651 2048 (interactive)
281d03ec 2049 (let (language-name)
48082651 2050 (if (not (and (symbolp last-command-event)
cda74479
DL
2051 (or (not (eq last-command-event 'Default))
2052 (setq last-command-event 'English))
281d03ec 2053 (setq language-name (symbol-name last-command-event))))
48082651 2054 (error "Bogus calling sequence"))
281d03ec
RS
2055 (describe-language-environment language-name)))
2056
2057(defun describe-language-environment (language-name)
2058 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
2059 (interactive
2060 (list (read-language-name
2061 'documentation
5b76833f 2062 "Describe language environment (default current choice): ")))
f850d782
RS
2063 (if (null language-name)
2064 (setq language-name current-language-environment))
281d03ec
RS
2065 (if (or (null language-name)
2066 (null (get-language-info language-name 'documentation)))
2067 (error "No documentation for the specified language"))
4ef06f75
KH
2068 (if (symbolp language-name)
2069 (setq language-name (symbol-name language-name)))
ef5a4730
KH
2070 (dolist (feature (get-language-info language-name 'features))
2071 (require feature))
6b626913 2072 (let ((doc (get-language-info language-name 'documentation)))
c3034e84 2073 (help-setup-xref (list #'describe-language-environment language-name)
32226619 2074 (called-interactively-p 'interactive))
c3034e84 2075 (with-output-to-temp-buffer (help-buffer)
9a529312 2076 (with-current-buffer standard-output
464cc130
KH
2077 (insert language-name " language environment\n\n")
2078 (if (stringp doc)
2079 (insert doc "\n\n"))
e036b0a6
KH
2080 (condition-case nil
2081 (let ((str (eval (get-language-info language-name 'sample-text))))
2082 (if (stringp str)
fd0dd4c3
KH
2083 (insert "Sample text:\n "
2084 (replace-regexp-in-string "\n" "\n " str)
2085 "\n\n")))
e036b0a6 2086 (error nil))
464cc130 2087 (let ((input-method (get-language-info language-name 'input-method))
acd1b9bd
KH
2088 (l (copy-sequence input-method-alist))
2089 (first t))
2090 (when (and input-method
2091 (setq input-method (assoc input-method l)))
2092 (insert "Input methods (default " (car input-method) ")\n")
2093 (setq l (cons input-method (delete input-method l))
2094 first nil))
2095 (dolist (elt l)
2096 (when (or (eq input-method elt)
2097 (eq t (compare-strings language-name nil nil
2098 (nth 1 elt) nil nil t)))
2099 (when first
2100 (insert "Input methods:\n")
2101 (setq first nil))
2102 (insert " " (car elt))
2103 (search-backward (car elt))
2104 (help-xref-button 0 'help-input-method (car elt))
464cc130 2105 (goto-char (point-max))
2fa7e202 2106 (insert " (\""
acd1b9bd
KH
2107 (if (stringp (nth 3 elt)) (nth 3 elt) (car (nth 3 elt)))
2108 "\" in mode line)\n")))
2109 (or first
2110 (insert "\n")))
464cc130
KH
2111 (insert "Character sets:\n")
2112 (let ((l (get-language-info language-name 'charset)))
2113 (if (null l)
2114 (insert " nothing specific to " language-name "\n")
2115 (while l
2116 (insert " " (symbol-name (car l)))
2117 (search-backward (symbol-name (car l)))
467412aa 2118 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
2119 (goto-char (point-max))
2120 (insert ": " (charset-description (car l)) "\n")
2121 (setq l (cdr l)))))
2122 (insert "\n")
2123 (insert "Coding systems:\n")
2124 (let ((l (get-language-info language-name 'coding-system)))
2125 (if (null l)
2126 (insert " nothing specific to " language-name "\n")
2127 (while l
2128 (insert " " (symbol-name (car l)))
2129 (search-backward (symbol-name (car l)))
467412aa 2130 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
2131 (goto-char (point-max))
2132 (insert " (`"
2133 (coding-system-mnemonic (car l))
2134 "' in mode line):\n\t"
2135 (coding-system-doc-string (car l))
2136 "\n")
0855c6cd 2137 (let ((aliases (coding-system-aliases (car l))))
464cc130
KH
2138 (when aliases
2139 (insert "\t(alias:")
2140 (while aliases
2141 (insert " " (symbol-name (car aliases)))
2142 (setq aliases (cdr aliases)))
2143 (insert ")\n")))
c3034e84 2144 (setq l (cdr l)))))))))
4ed46869 2145\f
40c81f74
PE
2146;;; Locales.
2147
0d7c5bb9
DL
2148(defvar locale-translation-file-name nil
2149 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 2150
5f395df3
SM
2151;; The following definitions might as well be marked as constants and
2152;; purecopied, since they're normally used on startup, and probably
2153;; should reflect the facilities of the base Emacs.
2154(defconst locale-language-names
2155 (purecopy
2156 '(
40c81f74
PE
2157 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
2158 ;; as specified in the Single Unix Spec, Version 2.
2159 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
2160 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
2161 ;; see Internet RFC 2165 (1997-06) and
2162 ;; http://www.evertype.com/standards/iso639/iso639-en.html
2163 ;; TERRITORY is a country code taken from ISO 3166
2164 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 2165 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 2166
6b61353c
KH
2167 ;; jasonr comments: MS Windows uses three letter codes for
2168 ;; languages instead of the two letter ISO codes that POSIX
2169 ;; uses. In most cases the first two letters are the same, so
2170 ;; most of the regexps in locale-language-names work. Japanese
2171 ;; and Chinese are exceptions, which are listed in the
2172 ;; non-standard section at the bottom of locale-language-names.
2173
8dedddd5
KH
2174 ("aa_DJ" . "Latin-1") ; Afar
2175 ("aa" . "UTF-8")
2176 ;; ab Abkhazian
6ececc4d 2177 ("af" . "Latin-1") ; Afrikaans
8dedddd5
KH
2178 ("am" "Ethiopic" utf-8) ; Amharic
2179 ("an" . "Latin-9") ; Aragonese
5f395df3 2180 ; ar Arabic glibc uses 8859-6
40c81f74
PE
2181 ; as Assamese
2182 ; ay Aymara
8dedddd5 2183 ("az" . "UTF-8") ; Azerbaijani
40c81f74 2184 ; ba Bashkir
8dedddd5
KH
2185 ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2186 ("bg" "Bulgarian" cp1251) ; Bulgarian
40c81f74
PE
2187 ; bh Bihari
2188 ; bi Bislama
8dedddd5 2189 ("bn" . "UTF-8") ; Bengali, Bangla
40c81f74
PE
2190 ("bo" . "Tibetan")
2191 ("br" . "Latin-1") ; Breton
d37ef0f6 2192 ("bs" . "Latin-2") ; Bosnian
8dedddd5 2193 ("byn" . "UTF-8") ; Bilin; Blin
40c81f74
PE
2194 ("ca" . "Latin-1") ; Catalan
2195 ; co Corsican
8dedddd5
KH
2196 ("cs" "Czech" iso-8859-2)
2197 ("cy" "Welsh" iso-8859-14)
40c81f74 2198 ("da" . "Latin-1") ; Danish
8dedddd5 2199 ("de" "German" iso-8859-1)
569a6374 2200 ; dv Divehi
40c81f74 2201 ; dz Bhutani
8dedddd5 2202 ("el" "Greek" iso-8859-7)
6ececc4d 2203 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
e11cf111
DL
2204 ;; That's actually what the GNU locales define, modulo things like
2205 ;; en_IN -- fx.
8dedddd5 2206 ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
eec5c8f9 2207 ("en" "English" iso-8859-1) ; English
58802cd7 2208 ("eo" . "Esperanto") ; Esperanto
8dedddd5
KH
2209 ("es" "Spanish" iso-8859-1)
2210 ("et" . "Latin-1") ; Estonian
40c81f74 2211 ("eu" . "Latin-1") ; Basque
8dedddd5 2212 ("fa" . "UTF-8") ; Persian
40c81f74 2213 ("fi" . "Latin-1") ; Finnish
8dedddd5 2214 ("fj" . "Latin-1") ; Fiji
40c81f74 2215 ("fo" . "Latin-1") ; Faroese
8dedddd5 2216 ("fr" "French" iso-8859-1) ; French
40c81f74 2217 ("fy" . "Latin-1") ; Frisian
6ececc4d 2218 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
8dedddd5
KH
2219 ("gd" . "Latin-9") ; Scots Gaelic
2220 ("gez" "Ethiopic" utf-8) ; Geez
2221 ("gl" . "Latin-1") ; Gallegan; Galician
40c81f74 2222 ; gn Guarani
8dedddd5
KH
2223 ("gu" . "UTF-8") ; Gujarati
2224 ("gv" . "Latin-1") ; Manx Gaelic
40c81f74 2225 ; ha Hausa
8dedddd5
KH
2226 ("he" "Hebrew" iso-8859-8)
2227 ("hi" "Devanagari" utf-8) ; Hindi
2228 ("hr" "Croatian" iso-8859-2) ; Croatian
40c81f74
PE
2229 ("hu" . "Latin-2") ; Hungarian
2230 ; hy Armenian
2231 ; ia Interlingua
2232 ("id" . "Latin-1") ; Indonesian
2233 ; ie Interlingue
2234 ; ik Inupiak
2235 ("is" . "Latin-1") ; Icelandic
8dedddd5 2236 ("it" "Italian" iso-8859-1) ; Italian
40c81f74 2237 ; iu Inuktitut
8dedddd5
KH
2238 ("iw" "Hebrew" iso-8859-8)
2239 ("ja" "Japanese" euc-jp)
40c81f74 2240 ; jw Javanese
8dedddd5 2241 ("ka" "Georgian" georgian-ps) ; Georgian
40c81f74 2242 ; kk Kazakh
6ececc4d 2243 ("kl" . "Latin-1") ; Greenlandic
40c81f74 2244 ; km Cambodian
8dedddd5
KH
2245 ("kn" "Kannada" utf-8)
2246 ("ko" "Korean" euc-kr)
40c81f74
PE
2247 ; ks Kashmiri
2248 ; ku Kurdish
5f395df3 2249 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
2250 ; ky Kirghiz
2251 ("la" . "Latin-1") ; Latin
5f395df3 2252 ("lb" . "Latin-1") ; Luxemburgish
8dedddd5 2253 ("lg" . "Laint-6") ; Ganda
40c81f74 2254 ; ln Lingala
8dedddd5
KH
2255 ("lo" "Lao" utf-8) ; Laothian
2256 ("lt" "Lithuanian" iso-8859-13)
9c20a8d5 2257 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 2258 ; mg Malagasy
5f395df3 2259 ("mi" . "Latin-7") ; Maori
8dedddd5
KH
2260 ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2261 ("ml" "Malayalam" utf-8)
2262 ("mn" . "UTF-8") ; Mongolian
40c81f74 2263 ; mo Moldavian
8dedddd5 2264 ("mr" "Devanagari" utf-8) ; Marathi
5f395df3 2265 ("ms" . "Latin-1") ; Malay
40c81f74
PE
2266 ("mt" . "Latin-3") ; Maltese
2267 ; my Burmese
2268 ; na Nauru
8dedddd5
KH
2269 ("nb" . "Latin-1") ; Norwegian
2270 ("ne" "Devanagari" utf-8) ; Nepali
2271 ("nl" "Dutch" iso-8859-1)
40c81f74 2272 ("no" . "Latin-1") ; Norwegian
5f395df3 2273 ("oc" . "Latin-1") ; Occitan
8dedddd5
KH
2274 ("om_ET" . "UTF-8") ; (Afan) Oromo
2275 ("om" . "Latin-1") ; (Afan) Oromo
40c81f74 2276 ; or Oriya
8dedddd5 2277 ("pa" . "UTF-8") ; Punjabi
40c81f74
PE
2278 ("pl" . "Latin-2") ; Polish
2279 ; ps Pashto, Pushto
2280 ("pt" . "Latin-1") ; Portuguese
2281 ; qu Quechua
6ececc4d 2282 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74 2283 ; rn Kirundi
8dedddd5
KH
2284 ("ro" "Romanian" iso-8859-2)
2285 ("ru_RU" "Russian" iso-8859-5)
2286 ("ru_UA" "Russian" koi8-u)
40c81f74
PE
2287 ; rw Kinyarwanda
2288 ("sa" . "Devanagari") ; Sanskrit
2289 ; sd Sindhi
8dedddd5 2290 ("se" . "UTF-8") ; Northern Sami
40c81f74
PE
2291 ; sg Sangho
2292 ("sh" . "Latin-2") ; Serbo-Croatian
2293 ; si Sinhalese
8dedddd5
KH
2294 ("sid" . "UTF-8") ; Sidamo
2295 ("sk" "Slovak" iso-8859-2)
2296 ("sl" "Slovenian" iso-8859-2)
40c81f74
PE
2297 ; sm Samoan
2298 ; sn Shona
8dedddd5
KH
2299 ("so_ET" "UTF-8") ; Somali
2300 ("so" "Latin-1") ; Somali
6ececc4d 2301 ("sq" . "Latin-1") ; Albanian
40c81f74
PE
2302 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
2303 ; ss Siswati
8dedddd5 2304 ("st" . "Latin-1") ; Sesotho
40c81f74 2305 ; su Sundanese
8dedddd5 2306 ("sv" "Swedish" iso-8859-1) ; Swedish
40c81f74 2307 ("sw" . "Latin-1") ; Swahili
8dedddd5
KH
2308 ("ta" "Tamil" utf-8)
2309 ("te" . "UTF-8") ; Telugu
2310 ("tg" "Tajik" koi8-t)
2311 ("th" "Thai" tis-620)
2312 ("ti" "Ethiopic" utf-8) ; Tigrinya
2313 ("tig_ER" . "UTF-8") ; Tigre
40c81f74 2314 ; tk Turkmen
6ececc4d 2315 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
2316 ; tn Setswana
2317 ; to Tonga
8dedddd5 2318 ("tr" "Turkish" iso-8859-9)
40c81f74 2319 ; ts Tsonga
8dedddd5 2320 ("tt" . "UTF-8") ; Tatar
40c81f74
PE
2321 ; tw Twi
2322 ; ug Uighur
8dedddd5
KH
2323 ("uk" "Ukrainian" koi8-u)
2324 ("ur" . "UTF-8") ; Urdu
2325 ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
5f395df3 2326 ("uz" . "Latin-1") ; Uzbek
8dedddd5 2327 ("vi" "Vietnamese" utf-8)
40c81f74 2328 ; vo Volapuk
d37ef0f6 2329 ("wa" . "Latin-1") ; Walloon
40c81f74 2330 ; wo Wolof
8dedddd5 2331 ("xh" . "Latin-1") ; Xhosa
9c20a8d5 2332 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2333 ; yo Yoruba
2334 ; za Zhuang
8dedddd5 2335 ("zh_HK" . "Chinese-Big5")
05b81b42 2336 ; zh_HK/BIG5-HKSCS \
8dedddd5 2337 ("zh_TW" . "Chinese-Big5")
05b81b42
KH
2338 ("zh_CN.GB2312" "Chinese-GB")
2339 ("zh_CN.GBK" "Chinese-GBK")
2340 ("zh_CN.GB18030" "Chinese-GB18030")
f21605fa 2341 ("zh_CN.UTF-8" . "Chinese-GBK")
8dedddd5 2342 ("zh_CN" . "Chinese-GB")
40c81f74 2343 ("zh" . "Chinese-GB")
8dedddd5 2344 ("zu" . "Latin-1") ; Zulu
40c81f74
PE
2345
2346 ;; ISO standard locales
2347 ("c$" . "ASCII")
2348 ("posix$" . "ASCII")
2349
40c81f74
PE
2350 ;; The "IPA" Emacs language environment does not correspond
2351 ;; to any ISO 639 code, so let it stand for itself.
2352 ("ipa$" . "IPA")
2353
2354 ;; Nonstandard or obsolete language codes
2355 ("cz" . "Czech") ; e.g. Solaris 2.6
2356 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2357 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2358 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2359 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2360 ("jp" . "Japanese") ; e.g. MS Windows
f21605fa 2361 ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
86d9e628 2362 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
569a6374
EZ
2363 ("gbz" . "UTF-8") ; MS Windows Dari Persian
2364 ("div" . "UTF-8") ; MS Windows Divehi (Maldives)
2365 ("wee" . "Latin-2") ; MS Windows Lower Sorbian
2366 ("wen" . "Latin-2") ; MS Windows Upper Sorbian
367ca50f 2367 ))
8dedddd5 2368 "Alist of locale regexps vs the corresponding languages and coding systems.
caff3c0a 2369Each element has this form:
8dedddd5
KH
2370 \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2371The first element whose LOCALE-REGEXP matches the start of a
caff3c0a 2372downcased locale specifies the LANG-ENV \(language environment)
8dedddd5
KH
2373and CODING-SYSTEM corresponding to that locale. If there is no
2374appropriate language environment, the element may have this form:
2375 \(LOCALE-REGEXP . LANG-ENV)
2376In this case, LANG-ENV is one of generic language environments for an
2377specific encoding such as \"Latin-1\" and \"UTF-8\".")
40c81f74 2378
5f395df3
SM
2379(defconst locale-charset-language-names
2380 (purecopy
2381 '((".*8859[-_]?1\\>" . "Latin-1")
2382 (".*8859[-_]?2\\>" . "Latin-2")
2383 (".*8859[-_]?3\\>" . "Latin-3")
2384 (".*8859[-_]?4\\>" . "Latin-4")
2385 (".*8859[-_]?9\\>" . "Latin-5")
2386 (".*8859[-_]?14\\>" . "Latin-8")
2387 (".*8859[-_]?15\\>" . "Latin-9")
6b61353c
KH
2388 (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
2389 ;; utf-8@euro exists, so put this last. (@euro really specifies
2390 ;; the currency, rather than the charset.)
e522f07b 2391 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2392 "List of pairs of locale regexps and charset language names.
2393The first element whose locale regexp matches the start of a downcased locale
6b61353c 2394specifies the language name whose charset corresponds to that locale.
8dedddd5 2395This language name is used if the locale is not listed in
caff3c0a 2396`locale-language-names'.")
6ececc4d 2397
5f395df3
SM
2398(defconst locale-preferred-coding-systems
2399 (purecopy
8dedddd5
KH
2400 '((".*8859[-_]?1\\>" . iso-8859-1)
2401 (".*8859[-_]?2\\>" . iso-8859-2)
2402 (".*8859[-_]?3\\>" . iso-8859-3)
2403 (".*8859[-_]?4\\>" . iso-8859-4)
2404 (".*8859[-_]?9\\>" . iso-8859-9)
2405 (".*8859[-_]?14\\>" . iso-8859-14)
2406 (".*8859[-_]?15\\>" . iso-8859-15)
2407 (".*utf\\(?:-?8\\)?" . utf-8)
2408 ;; utf-8@euro exists, so put this after utf-8. (@euro really
2409 ;; specifies the currency, rather than the charset.)
2410 (".*@euro" . iso-8859-15)
2411 ("koi8-?r" . koi8-r)
2412 ("koi8-?u" . koi8-u)
2413 ("tcvn" . tcvn)
38c05d07 2414 ("big5[-_]?hkscs" . big5-hkscs)
8dedddd5
KH
2415 ("big5" . big5)
2416 ("euc-?tw" . euc-tw)
d6be7497 2417 ("euc-?cn" . euc-cn)
38c05d07
KH
2418 ("gb2312" . gb2312)
2419 ("gbk" . gbk)
2420 ("gb18030" . gb18030)
8dedddd5 2421 ("ja.*[._]euc" . japanese-iso-8bit)
5f395df3
SM
2422 ("ja.*[._]jis7" . iso-2022-jp)
2423 ("ja.*[._]pck" . japanese-shift-jis)
2424 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2425 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
8dedddd5 2426 ))
6ececc4d
PE
2427 "List of pairs of locale regexps and preferred coding systems.
2428The first element whose locale regexp matches the start of a downcased locale
8dedddd5
KH
2429specifies the coding system to prefer when using that locale.
2430This coding system is used if the locale specifies a specific charset.")
40c81f74
PE
2431
2432(defun locale-name-match (key alist)
2433 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2434Return the value corresponding to the first regexp that matches the
2435start of KEY, or nil if there is no match."
2436 (let (element)
2437 (while (and alist (not element))
d0d8885d 2438 (if (string-match-p (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2439 (setq element (car alist)))
2440 (setq alist (cdr alist)))
2441 (cdr element)))
2442
58b78d5b 2443(defun locale-charset-match-p (charset1 charset2)
91ed0599 2444 "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
58b78d5b
DL
2445Matching is done ignoring case and any hyphens and underscores in the
2446names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
2447 (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
2448 (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
2449 (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
2450
6b61353c
KH
2451(defvar locale-charset-alist nil
2452 "Coding system alist keyed on locale-style charset name.
2453Used by `locale-charset-to-coding-system'.")
2454
2455(defun locale-charset-to-coding-system (charset)
2456 "Find coding system corresponding to CHARSET.
2457CHARSET is any sort of non-Emacs charset name, such as might be used
2458in a locale codeset, or elsewhere. It is matched to a coding system
2459first by case-insensitive lookup in `locale-charset-alist'. Then
2460matches are looked for in the coding system list, treating case and
2461the characters `-' and `_' as insignificant. The coding system base
2462is returned. Thus, for instance, if charset \"ISO8859-2\",
2463`iso-latin-2' is returned."
f15466c5 2464 (or (car (assoc-string charset locale-charset-alist t))
6b61353c
KH
2465 (let ((cs coding-system-alist)
2466 c)
2467 (while (and (not c) cs)
2468 (if (locale-charset-match-p charset (caar cs))
2469 (setq c (intern (caar cs)))
2470 (pop cs)))
2471 (if c (coding-system-base c)))))
2472
2473;; Fixme: This ought to deal with the territory part of the locale
2474;; too, for setting things such as calendar holidays, ps-print paper
2475;; size, spelling dictionary.
2476
44ee1bdf
GM
2477(defun locale-translate (locale)
2478 "Expand LOCALE according to `locale-translation-file-name', if possible.
2479For example, translate \"swedish\" into \"sv_SE.ISO8859-1\"."
2480 (if locale-translation-file-name
2481 (with-temp-buffer
2482 (set-buffer-multibyte nil)
2483 (insert-file-contents locale-translation-file-name)
2484 (if (re-search-forward
2485 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2486 (buffer-substring (point) (line-end-position))
2487 locale))
2488 locale))
2489
36ab8612 2490(defun set-locale-environment (&optional locale-name frame)
40c81f74 2491 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2492This sets the language environment, the coding system priority,
2493the default input method and sometimes other things.
2494
58b78d5b 2495LOCALE-NAME should be a string which is the name of a locale supported
6b61353c 2496by the system. Often it is of the form xx_XX.CODE, where xx is a
58b78d5b
DL
2497language, XX is a country, and CODE specifies a character set and
2498coding system. For example, the locale name \"ja_JP.EUC\" might name
2499a locale for Japanese in Japan using the `japanese-iso-8bit'
2500coding-system. The name may also have a modifier suffix, e.g. `@euro'
2501or `@cyrillic'.
40c81f74 2502
758f07de 2503If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2504variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2505
2506The locale names supported by your system can typically be found in a
0812c1e8 2507directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2508will be translated according to the table specified by
0812c1e8
DL
2509`locale-translation-file-name'.
2510
36ab8612
MB
2511If FRAME is non-nil, only set the keyboard coding system and the
2512terminal coding system for the terminal of that frame, and don't
2513touch session-global parameters like the language environment.
97c57fb2 2514
0812c1e8
DL
2515See also `locale-charset-language-names', `locale-language-names',
2516`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de 2517 (interactive "sSet environment for locale: ")
a1506d29 2518
0d7c5bb9
DL
2519 ;; Do this at runtime for the sake of binaries possibly transported
2520 ;; to a system without X.
2521 (setq locale-translation-file-name
2522 (let ((files
6bba8c70
KH
2523 '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
2524 "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
6b61353c 2525 "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
0d7c5bb9
DL
2526 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2527 ;;
2528 ;; The following name appears after the X-related names above,
2529 ;; since the X-related names are what X actually uses.
2530 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2531 )))
2532 (while (and files (not (file-exists-p (car files))))
2533 (setq files (cdr files)))
2534 (car files)))
2535
7008ccac
GM
2536 (let ((locale locale-name))
2537
2538 (unless locale
2539 ;; Use the first of these three environment variables
2540 ;; that has a nonempty value.
2541 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
db6bd804
DL
2542 (while (and vars
2543 (= 0 (length locale))) ; nil or empty string
36ab8612 2544 (setq locale (getenv (pop vars) frame)))))
7008ccac
GM
2545
2546 (when locale
44ee1bdf 2547 (setq locale (locale-translate locale))
7008ccac
GM
2548
2549 ;; Leave the system locales alone if the caller did not specify
2550 ;; an explicit locale name, as their defaults are set from
2551 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2552 ;; want to set them to the same value as LC_CTYPE.
2553 (when locale-name
2554 (setq system-messages-locale locale)
e9aaa1db
KH
2555 (setq system-time-locale locale))
2556
2557 (if (string-match "^[a-z][a-z]" locale)
2558 (setq current-iso639-language (intern (match-string 0 locale)))))
7008ccac 2559
44ee1bdf
GM
2560 (setq woman-locale
2561 (or system-messages-locale
739b7764 2562 (let ((msglocale (getenv "LC_MESSAGES" frame)))
44ee1bdf
GM
2563 (if (zerop (length msglocale))
2564 locale
2565 (locale-translate msglocale)))))
2566
2567 (when locale
7008ccac
GM
2568 (setq locale (downcase locale))
2569
2570 (let ((language-name
2571 (locale-name-match locale locale-language-names))
2572 (charset-language-name
2573 (locale-name-match locale locale-charset-language-names))
cdb1af30 2574 (default-eol-type (coding-system-eol-type
b56a5ae0 2575 (default-value 'buffer-file-coding-system)))
7008ccac 2576 (coding-system
6b61353c
KH
2577 (or (locale-name-match locale locale-preferred-coding-systems)
2578 (when locale
2579 (if (string-match "\\.\\([^@]+\\)" locale)
2580 (locale-charset-to-coding-system
9e2a2647 2581 (match-string 1 locale)))))))
7008ccac 2582
8dedddd5
KH
2583 (if (consp language-name)
2584 ;; locale-language-names specify both lang-env and coding.
2585 ;; But, what specified in locale-preferred-coding-systems
2586 ;; has higher priority.
2587 (setq coding-system (or coding-system
2588 (nth 1 language-name))
2589 language-name (car language-name))
2590 ;; Otherwise, if locale is not listed in locale-language-names,
2591 ;; use what listed in locale-charset-language-names.
2592 (if (not language-name)
2593 (setq language-name charset-language-name)))
7008ccac 2594
cdb1af30
EZ
2595 ;; If a specific EOL conversion was specified in the default
2596 ;; buffer-file-coding-system, preserve it in the coding system
2597 ;; we will be using from now on.
8bca692e
EZ
2598 (if (and (memq default-eol-type '(0 1 2 unix dos mac))
2599 coding-system
2600 (coding-system-p coding-system))
cdb1af30
EZ
2601 (setq coding-system (coding-system-change-eol-conversion
2602 coding-system default-eol-type)))
2603
7008ccac
GM
2604 (when language-name
2605
2606 ;; Set up for this character set. This is now the right way
2607 ;; to do it for both unibyte and multibyte modes.
36ab8612 2608 (unless frame
97c57fb2 2609 (set-language-environment language-name))
7008ccac 2610
597e2240 2611 ;; If the default enable-multibyte-characters is nil,
7008ccac
GM
2612 ;; we are using single-byte characters,
2613 ;; so the display table and terminal coding system are irrelevant.
597e2240 2614 (when (default-value 'enable-multibyte-characters)
ff76e074 2615 (set-display-table-and-terminal-coding-system
36ab8612 2616 language-name coding-system frame))
7008ccac 2617
166ce29f
DL
2618 ;; Set the `keyboard-coding-system' if appropriate (tty
2619 ;; only). At least X and MS Windows can generate
2620 ;; multilingual input.
68bba4e4 2621 ;; XXX This was disabled unless `window-system', but that
fffa137c 2622 ;; leads to buggy behavior when a tty frame is opened
68bba4e4
KL
2623 ;; later. Setting the keyboard coding system has no adverse
2624 ;; effect on X, so let's do it anyway. -- Lorentey
2625 (let ((kcs (or coding-system
2626 (car (get-language-info language-name
2627 'coding-system)))))
36ab8612 2628 (if kcs (set-keyboard-coding-system kcs frame)))
dc2be2fa 2629
36ab8612 2630 (unless frame
97c57fb2
KL
2631 (setq locale-coding-system
2632 (car (get-language-info language-name 'coding-priority)))))
7008ccac 2633
36ab8612 2634 (when (and (not frame)
97c57fb2 2635 coding-system
8dedddd5
KH
2636 (not (coding-system-equal coding-system
2637 locale-coding-system)))
7008ccac 2638 (prefer-coding-system coding-system)
218e7ce3
KH
2639 ;; Fixme: perhaps prefer-coding-system should set this too.
2640 ;; But it's not the time to do such a fundamental change.
2641 (setq default-sendmail-coding-system coding-system)
80e3310b 2642 (setq locale-coding-system coding-system))))
e76ef161 2643
52c7f9ee 2644 ;; On Windows, override locale-coding-system,
26245233
JR
2645 ;; default-file-name-coding-system, keyboard-coding-system,
2646 ;; terminal-coding-system with system codepage.
5eb94383 2647 (when (boundp 'w32-ansi-code-page)
893b49bb
JR
2648 (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
2649 (when (coding-system-p code-page-coding)
36ab8612
MB
2650 (unless frame (setq locale-coding-system code-page-coding))
2651 (set-keyboard-coding-system code-page-coding frame)
d4aa48db 2652 (set-terminal-coding-system code-page-coding frame)
59938af3
EZ
2653 ;; Set default-file-name-coding-system last, so that Emacs
2654 ;; doesn't try to use cpNNNN when it defines keyboard and
2655 ;; terminal encoding. That's because the above two lines
2656 ;; will want to load code-pages.el, where cpNNNN are
2657 ;; defined; if default-file-name-coding-system were set to
2658 ;; cpNNNN while these two lines run, Emacs will want to use
2659 ;; it for encoding the file name it wants to load. And that
2660 ;; will fail, since cpNNNN is not yet usable until
2661 ;; code-pages.el finishes loading.
2662 (setq default-file-name-coding-system code-page-coding))))
893b49bb 2663
a41118cc 2664 (when (eq system-type 'darwin)
ff76e074
SM
2665 ;; On Darwin, file names are always encoded in utf-8, no matter
2666 ;; the locale.
2667 (setq default-file-name-coding-system 'utf-8)
2668 ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
2669 ;; the locale.
2670 (when (and (null window-system)
36ab8612 2671 (equal (getenv "TERM_PROGRAM" frame) "Apple_Terminal"))
ff76e074
SM
2672 (set-terminal-coding-system 'utf-8)
2673 (set-keyboard-coding-system 'utf-8)))
a41118cc 2674
e76ef161 2675 ;; Default to A4 paper if we're not in a C, POSIX or US locale.
3479c806 2676 ;; (See comments in Flocale_info.)
36ab8612 2677 (unless frame
97c57fb2
KL
2678 (let ((locale locale)
2679 (paper (locale-info 'paper)))
2680 (if paper
2681 ;; This will always be null at the time of writing.
2682 (cond
2683 ((equal paper '(216 279))
2684 (setq ps-paper-type 'letter))
2685 ((equal paper '(210 297))
2686 (setq ps-paper-type 'a4)))
2687 (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2688 (while (and vars (= 0 (length locale)))
36ab8612 2689 (setq locale (getenv (pop vars) frame))))
97c57fb2
KL
2690 (when locale
2691 ;; As of glibc 2.2.5, these are the only US Letter locales,
2692 ;; and the rest are A4.
2693 (setq ps-paper-type
2694 (or (locale-name-match locale '(("c$" . letter)
2695 ("posix$" . letter)
2696 (".._us" . letter)
2697 (".._pr" . letter)
2698 (".._ca" . letter)
2699 ("enu$" . letter) ; Windows
2700 ("esu$" . letter)
2701 ("enc$" . letter)
2702 ("frc$" . letter)))
2703 'a4)))))))
e76ef161 2704 nil)
40c81f74 2705\f
16400c32
KH
2706;;; Character property
2707
2708;; Each element has the form (PROP . TABLE).
2709;; PROP is a symbol representing a character property.
2710;; TABLE is a char-table containing the property value for each character.
2711;; TABLE may be a name of file to load to build a char-table.
2712;; Don't modify this variable directly but use `define-char-code-property'.
2713
2714(defvar char-code-property-alist nil
2715 "Alist of character property name vs char-table containing property values.
2716Internal use only.")
2717
2718(put 'char-code-property-table 'char-table-extra-slots 5)
2719
2720(defun define-char-code-property (name table &optional docstring)
2721 "Define NAME as a character code property given by TABLE.
2722TABLE is a char-table of purpose `char-code-property-table' with
2723these extra slots:
2724 1st: NAME.
2725 2nd: Function to call to get a property value of a character.
caff3c0a 2726 It is called with three arguments CHAR, VAL, and TABLE, where
16400c32
KH
2727 CHAR is a character, VAL is the value of (aref TABLE CHAR).
2728 3rd: Function to call to put a property value of a character.
2729 It is called with the same arguments as above.
2730 4th: Function to call to get a description string of a property value.
2731 It is called with one argument VALUE, a property value.
2732 5th: Data used by the above functions.
2733
2734TABLE may be a name of file to load to build a char-table. The
2735file should contain a call of `define-char-code-property' with a
2736char-table of the above format as the argument TABLE.
2737
2738TABLE may also be nil, in which case no property value is pre-assigned.
2739
49275d55 2740Optional 3rd argument DOCSTRING is a documentation string of the property.
16400c32
KH
2741
2742See also the documentation of `get-char-code-property' and
2743`put-char-code-property'."
2744 (or (symbolp name)
2745 (error "Not a symbol: %s" name))
2746 (if (char-table-p table)
2747 (or (and (eq (char-table-subtype table) 'char-code-property-table)
2748 (eq (char-table-extra-slot table 0) name))
2749 (error "Invalid char-table: %s" table))
2750 (or (stringp table)
2751 (error "Not a char-table nor a file name: %s" table)))
ebfa10d3 2752 (if (stringp table) (setq table (purecopy table)))
16400c32
KH
2753 (let ((slot (assq name char-code-property-alist)))
2754 (if slot
2755 (setcdr slot table)
2756 (setq char-code-property-alist
2757 (cons (cons name table) char-code-property-alist))))
905a9ed3 2758 (put name 'char-code-property-documentation (purecopy docstring)))
4ed46869
KH
2759
2760(defvar char-code-property-table
2761 (make-char-table 'char-code-property-table)
2762 "Char-table containing a property list of each character code.
16400c32 2763This table is used for properties not listed in `char-code-property-alist'.
4ed46869 2764See also the documentation of `get-char-code-property' and
96db204a 2765`put-char-code-property'.")
4ed46869
KH
2766
2767(defun get-char-code-property (char propname)
16400c32
KH
2768 "Return the value of CHAR's PROPNAME property."
2769 (let ((slot (assq propname char-code-property-alist)))
2770 (if slot
2771 (let (table value func)
2772 (if (stringp (cdr slot))
d0d8885d 2773 (load (cdr slot) nil t))
16400c32
KH
2774 (setq table (cdr slot)
2775 value (aref table char)
2776 func (char-table-extra-slot table 1))
2777 (if (functionp func)
2778 (setq value (funcall func char value table)))
2779 value)
2780 (plist-get (aref char-code-property-table char) propname))))
4ed46869
KH
2781
2782(defun put-char-code-property (char propname value)
16400c32 2783 "Store CHAR's PROPNAME property with VALUE.
4ed46869 2784It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
16400c32
KH
2785 (let ((slot (assq propname char-code-property-alist)))
2786 (if slot
2787 (let (table func)
2788 (if (stringp (cdr slot))
d0d8885d 2789 (load (cdr slot) nil t))
16400c32
KH
2790 (setq table (cdr slot)
2791 func (char-table-extra-slot table 2))
2792 (if (functionp func)
2793 (funcall func char value table)
2794 (aset table char value)))
2795 (let* ((plist (aref char-code-property-table char))
2796 (x (plist-put plist propname value)))
2797 (or (eq x plist)
2798 (aset char-code-property-table char x))))
2799 value))
2800
2801(defun char-code-property-description (prop value)
2802 "Return a description string of character property PROP's value VALUE.
2803If there's no description string for VALUE, return nil."
2804 (let ((slot (assq prop char-code-property-alist)))
2805 (if slot
2806 (let (table func)
2807 (if (stringp (cdr slot))
d0d8885d 2808 (load (cdr slot) nil t))
16400c32
KH
2809 (setq table (cdr slot)
2810 func (char-table-extra-slot table 3))
2811 (if (functionp func)
2812 (funcall func value))))))
4ed46869 2813
a127b764
KH
2814\f
2815;; Pretty description of encoded string
2816
2817;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2f7f4bee 2818(defconst iso-2022-control-alist
a127b764
KH
2819 '((?\x1b . "ESC")
2820 (?\x0e . "SO")
2821 (?\x0f . "SI")
2822 (?\x8e . "SS2")
2823 (?\x8f . "SS3")
2824 (?\x9b . "CSI")))
2825
2826(defun encoded-string-description (str coding-system)
2827 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2828 (setq str (string-as-unibyte str))
993b2a7d 2829 (mapconcat
a6dfc99b 2830 (if (and coding-system (eq (coding-system-type coding-system) 'iso-2022))
993b2a7d
KH
2831 ;; Try to get a pretty description for ISO 2022 escape sequences.
2832 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
695effcc
JL
2833 (format "#x%02X" x))))
2834 (function (lambda (x) (format "#x%02X" x))))
993b2a7d 2835 str " "))
a127b764 2836
430c6ced 2837(defun encode-coding-char (char coding-system &optional charset)
a127b764 2838 "Encode CHAR by CODING-SYSTEM and return the resulting string.
430c6ced
KH
2839If CODING-SYSTEM can't safely encode CHAR, return nil.
2840The 3rd optional argument CHARSET, if non-nil, is a charset preferred
2841on encoding."
b529ed1e
KH
2842 (let* ((str1 (string-as-multibyte (string char)))
2843 (str2 (string-as-multibyte (string char char)))
2844 (found (find-coding-systems-string str1))
a127b764 2845 enc1 enc2 i1 i2)
b529ed1e
KH
2846 (if (and (consp found)
2847 (eq (car found) 'undecided))
2848 str1
2849 (when (memq (coding-system-base coding-system) found)
2850 ;; We must find the encoded string of CHAR. But, just encoding
2851 ;; CHAR will put extra control sequences (usually to designate
2852 ;; ASCII charset) at the tail if type of CODING is ISO 2022.
2853 ;; To exclude such tailing bytes, we at first encode one-char
2854 ;; string and two-char string, then check how many bytes at the
2855 ;; tail of both encoded strings are the same.
2856
430c6ced
KH
2857 (when charset
2858 (put-text-property 0 1 'charset charset str1)
2859 (put-text-property 0 2 'charset charset str2))
b529ed1e
KH
2860 (setq enc1 (encode-coding-string str1 coding-system)
2861 i1 (length enc1)
2862 enc2 (encode-coding-string str2 coding-system)
2863 i2 (length enc2))
2864 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2865 (setq i1 (1- i1) i2 (1- i2)))
2866
2867 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2868 ;; and they are the extra control sequences at the tail to
2869 ;; exclude.
2870 (substring enc2 0 i2)))))
a127b764 2871
d2f613a0
DL
2872;; Backwards compatibility. These might be better with :init-value t,
2873;; but that breaks loadup.
2874(define-minor-mode unify-8859-on-encoding-mode
2875 "Obsolete."
2876 :group 'mule
2877 :global t)
2878(define-minor-mode unify-8859-on-decoding-mode
2879 "Obsolete."
2880 :group 'mule
2881 :global t)
a127b764 2882
7dd42fb1
KH
2883(defvar nonascii-insert-offset 0 "This variable is obsolete.")
2884(defvar nonascii-translation-table nil "This variable is obsolete.")
2885
838d78d4
JL
2886(defvar ucs-names nil
2887 "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.")
2888
2889(defun ucs-names ()
2890 "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
2891 (or ucs-names
ae63e572
KH
2892 (let ((bmp-ranges
2893 '((#x0000 . #x33FF)
2894 ;; (#x3400 . #x4DBF) CJK Ideograph Extension A
2895 (#x4DC0 . #x4DFF)
2896 ;; (#x4E00 . #x9FFF) CJK Ideograph
2897 (#xA000 . #x0D7FF)
2898 ;; (#xD800 . #xFAFF) Surrogate/Private
2899 (#xFB00 . #xFFFD)))
2900 (upper-ranges
2901 '((#x10000 . #x134FF)
2902 ;; (#x13500 . #x1CFFF) unsed
2903 (#x1D000 . #x1FFFF)
2904 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unsed
2905 (#xE0000 . #xE01FF)))
2906 (gc-cons-threshold 10000000)
2907 c end name names)
2908 (dolist (range bmp-ranges)
2909 (setq c (car range)
2910 end (cdr range))
2911 (while (<= c end)
2912 (if (setq name (get-char-code-property c 'name))
2913 (push (cons name c) names))
2914 (if (setq name (get-char-code-property c 'old-name))
2915 (push (cons name c) names))
2916 (setq c (1+ c))))
2917 (dolist (range upper-ranges)
2918 (setq c (car range)
2919 end (cdr range))
2920 (while (<= c end)
2921 (if (setq name (get-char-code-property c 'name))
2922 (push (cons name c) names))
2923 (setq c (1+ c))))
e0727873 2924 (setq ucs-names names))))
838d78d4
JL
2925
2926(defvar ucs-completions (lazy-completion-table ucs-completions ucs-names)
2927 "Lazy completion table for completing on Unicode character names.")
bdf66e8d 2928(put 'ucs-completions 'risky-local-variable t)
838d78d4
JL
2929
2930(defun read-char-by-name (prompt)
2931 "Read a character by its Unicode name or hex number string.
c0a67379
JL
2932Display PROMPT and read a string that represents a character by its
2933Unicode property `name' or `old-name'. You can type a few of first
2934letters of the Unicode name and use completion. This function also
2935accepts a hexadecimal number of Unicode code point or a number in
2936hash notation, e.g. #o21430 for octal, #x2318 for hex, or #10r8984
2937for decimal. Returns a character as a number."
838d78d4
JL
2938 (let* ((completion-ignore-case t)
2939 (input (completing-read prompt ucs-completions)))
c0a67379 2940 (cond
d0d8885d 2941 ((string-match-p "^[0-9a-fA-F]+$" input)
c0a67379 2942 (string-to-number input 16))
d0d8885d 2943 ((string-match-p "^#" input)
c0a67379
JL
2944 (read input))
2945 (t
d6ff002e 2946 (cdr (assoc-string input (ucs-names) t))))))
838d78d4 2947
dcbac02a
JL
2948(defun ucs-insert (character &optional count inherit)
2949 "Insert COUNT copies of CHARACTER of the given Unicode code point.
c0a67379 2950Interactively, prompts for a Unicode character name or a hex number
dcbac02a
JL
2951using `read-char-by-name'.
2952The optional third arg INHERIT (non-nil when called interactively),
2953says to inherit text properties from adjoining text, if those
2954properties are sticky."
2955 (interactive
2956 (list (read-char-by-name "Unicode (name or hex): ")
2957 (prefix-numeric-value current-prefix-arg)
2958 t))
2959 (unless count (setq count 1))
2960 (if (stringp character)
2961 (setq character (string-to-number character 16)))
c0a67379 2962 (cond
dcbac02a
JL
2963 ((not (integerp character))
2964 (error "Not a Unicode character code: %S" character))
2965 ((or (< character 0) (> character #x10FFFF))
2966 (error "Not a Unicode character code: 0x%X" character)))
2967 (if inherit
2968 (dotimes (i count) (insert-and-inherit character))
2969 (dotimes (i count) (insert character))))
cb9e47dd 2970
c0a67379 2971(define-key ctl-x-map "8\r" 'ucs-insert)
a127b764 2972
9ee5b744 2973;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
4ed46869 2974;;; mule-cmds.el ends here