Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
caff3c0a 1;;; mule-cmds.el --- commands for multilingual environment -*-coding: iso-2022-7bit -*-
9ee5b744 2
26b3dce6 3;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
49f70d46 4;; 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 6;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7 9;; Copyright (C) 2003
97941b05
KH
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869 12
1f547b92 13;; Keywords: mule, i18n
4ed46869
KH
14
15;; This file is part of GNU Emacs.
16
4936186e 17;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 18;; it under the terms of the GNU General Public License as published by
4936186e
GM
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
4ed46869
KH
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
4936186e 28;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869 29
60370d40
PJ
30;;; Commentary:
31
4ed46869
KH
32;;; Code:
33
a4723629
GM
34(eval-when-compile (require 'cl)) ; letf
35
26b3dce6
GM
36(defvar dos-codepage)
37(autoload 'widget-value "wid-edit")
cda74479 38
4a91d930 39(defvar mac-system-coding-system)
4a91d930 40
4ed46869
KH
41;;; MULE related key bindings and menus.
42
64657387
SM
43(defvar mule-keymap
44 (let ((map (make-sparse-keymap)))
45 (define-key map "f" 'set-buffer-file-coding-system)
46 (define-key map "r" 'revert-buffer-with-coding-system)
47 (define-key map "F" 'set-file-name-coding-system)
48 (define-key map "t" 'set-terminal-coding-system)
49 (define-key map "k" 'set-keyboard-coding-system)
50 (define-key map "p" 'set-buffer-process-coding-system)
51 (define-key map "x" 'set-selection-coding-system)
52 (define-key map "X" 'set-next-selection-coding-system)
53 (define-key map "\C-\\" 'set-input-method)
54 (define-key map "c" 'universal-coding-system-argument)
55 (define-key map "l" 'set-language-environment)
56 map)
33d17698 57 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 58
8f81f784 59;; Keep "C-x C-m ..." for mule specific commands.
0709d285 60(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 61
dcad02bc 62(defvar describe-language-environment-map
64657387
SM
63 (let ((map (make-sparse-keymap "Describe Language Environment")))
64 (define-key map
8f43cbf3 65 [Default] `(menu-item ,(purecopy "Default") describe-specified-language-support))
64657387 66 map))
15b3e511 67
dcad02bc 68(defvar setup-language-environment-map
64657387
SM
69 (let ((map (make-sparse-keymap "Set Language Environment")))
70 (define-key map
8f43cbf3 71 [Default] `(menu-item ,(purecopy "Default") setup-specified-language-environment))
64657387 72 map))
15b3e511 73
dcad02bc 74(defvar set-coding-system-map
64657387
SM
75 (let ((map (make-sparse-keymap "Set Coding System")))
76 (define-key-after map [universal-coding-system-argument]
8f43cbf3
DN
77 `(menu-item ,(purecopy "For Next Command") universal-coding-system-argument
78 :help ,(purecopy "Coding system to be used by next command")))
04991a1c 79 (define-key-after map [separator-1] menu-bar-separator)
64657387 80 (define-key-after map [set-buffer-file-coding-system]
8f43cbf3
DN
81 `(menu-item ,(purecopy "For Saving This Buffer") set-buffer-file-coding-system
82 :help ,(purecopy "How to encode this buffer when saved")))
64657387 83 (define-key-after map [revert-buffer-with-coding-system]
8f43cbf3 84 `(menu-item ,(purecopy "For Reverting This File Now")
64657387
SM
85 revert-buffer-with-coding-system
86 :enable buffer-file-name
8f43cbf3 87 :help ,(purecopy "Revisit this file immediately using specified coding system")))
64657387 88 (define-key-after map [set-file-name-coding-system]
8f43cbf3
DN
89 `(menu-item ,(purecopy "For File Name") set-file-name-coding-system
90 :help ,(purecopy "How to decode/encode file names")))
04991a1c 91 (define-key-after map [separator-2] menu-bar-separator)
64657387
SM
92
93 (define-key-after map [set-keyboard-coding-system]
8f43cbf3
DN
94 `(menu-item ,(purecopy "For Keyboard") set-keyboard-coding-system
95 :help ,(purecopy "How to decode keyboard input")))
64657387 96 (define-key-after map [set-terminal-coding-system]
8f43cbf3 97 `(menu-item ,(purecopy "For Terminal") set-terminal-coding-system
64657387 98 :enable (null (memq initial-window-system '(x w32 ns)))
8f43cbf3 99 :help ,(purecopy "How to encode terminal output")))
04991a1c 100 (define-key-after map [separator-3] menu-bar-separator)
64657387
SM
101
102 (define-key-after map [set-selection-coding-system]
8f43cbf3 103 `(menu-item ,(purecopy "For X Selections/Clipboard") set-selection-coding-system
64657387 104 :visible (display-selections-p)
8f43cbf3 105 :help ,(purecopy "How to en/decode data to/from selection/clipboard")))
64657387 106 (define-key-after map [set-next-selection-coding-system]
8f43cbf3 107 `(menu-item ,(purecopy "For Next X Selection") set-next-selection-coding-system
64657387 108 :visible (display-selections-p)
8f43cbf3 109 :help ,(purecopy "How to en/decode next selection/clipboard operation")))
64657387 110 (define-key-after map [set-buffer-process-coding-system]
8f43cbf3 111 `(menu-item ,(purecopy "For I/O with Subprocess") set-buffer-process-coding-system
64657387
SM
112 :visible (fboundp 'start-process)
113 :enable (get-buffer-process (current-buffer))
8f43cbf3 114 :help ,(purecopy "How to en/decode I/O from/to subprocess connected to this buffer")))
64657387
SM
115 map))
116
117(defvar mule-menu-keymap
118 (let ((map (make-sparse-keymap "Mule (Multilingual Environment)")))
119 (define-key-after map [set-language-environment]
8f43cbf3 120 `(menu-item ,(purecopy "Set Language Environment") ,setup-language-environment-map))
04991a1c 121 (define-key-after map [separator-mule] menu-bar-separator)
64657387
SM
122
123 (define-key-after map [toggle-input-method]
8f43cbf3 124 `(menu-item ,(purecopy "Toggle Input Method") toggle-input-method))
64657387 125 (define-key-after map [set-input-method]
8f43cbf3 126 `(menu-item ,(purecopy "Select Input Method...") set-input-method))
64657387 127 (define-key-after map [describe-input-method]
8f43cbf3 128 `(menu-item ,(purecopy "Describe Input Method") describe-input-method))
04991a1c 129 (define-key-after map [separator-input-method] menu-bar-separator)
64657387
SM
130
131 (define-key-after map [set-various-coding-system]
8f43cbf3 132 `(menu-item ,(purecopy "Set Coding Systems") ,set-coding-system-map
597e2240 133 :enable (default-value 'enable-multibyte-characters)))
64657387 134 (define-key-after map [view-hello-file]
8f43cbf3 135 `(menu-item ,(purecopy "Show Multi-lingual Text") view-hello-file
64657387
SM
136 :enable (file-readable-p
137 (expand-file-name "HELLO" data-directory))
8f43cbf3 138 :help ,(purecopy "Display file which says HELLO in many languages")))
04991a1c 139 (define-key-after map [separator-coding-system] menu-bar-separator)
64657387
SM
140
141 (define-key-after map [describe-language-environment]
8f43cbf3 142 `(menu-item ,(purecopy "Describe Language Environment")
64657387 143 describe-language-environment-map
8f43cbf3 144 :help ,(purecopy "Show multilingual settings for a specific language")))
64657387 145 (define-key-after map [describe-input-method]
8f43cbf3
DN
146 `(menu-item ,(purecopy "Describe Input Method...") describe-input-method
147 :help ,(purecopy "Keyboard layout for a specific input method")))
64657387 148 (define-key-after map [describe-coding-system]
8f43cbf3 149 `(menu-item ,(purecopy "Describe Coding System...") describe-coding-system))
64657387 150 (define-key-after map [list-character-sets]
8f43cbf3
DN
151 `(menu-item ,(purecopy "List Character Sets") list-character-sets
152 :help ,(purecopy "Show table of available character sets")))
64657387 153 (define-key-after map [mule-diag]
8f43cbf3
DN
154 `(menu-item ,(purecopy "Show All of Mule Status") mule-diag
155 :help ,(purecopy "Display multilingual environment settings")))
64657387
SM
156 map)
157 "Keymap for Mule (Multilingual environment) menu specific commands.")
cda74479 158
4ed46869
KH
159;; This should be a single character key binding because users use it
160;; very frequently while editing multilingual text. Now we can use
161;; only two such keys: "\C-\\" and "\C-^", but the latter is not
162;; convenient because it requires shifting on most keyboards. An
163;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
164;; but it won't be used that frequently.
165(define-key global-map "\C-\\" 'toggle-input-method)
166
5f395df3
SM
167;; This is no good because people often type Shift-SPC
168;; meaning to type SPC. -- rms.
169;; ;; Here's an alternative key binding for X users (Shift-SPACE).
170;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 171
464cc130
KH
172;;; Mule related hyperlinks.
173(defconst help-xref-mule-regexp-template
174 (purecopy (concat "\\(\\<\\("
175 "\\(coding system\\)\\|"
d0c40faa
KH
176 "\\(input method\\)\\|"
177 "\\(character set\\)\\|"
178 "\\(charset\\)"
464cc130
KH
179 "\\)\\s-+\\)?"
180 ;; Note starting with word-syntax character:
181 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
182
26d87040 183(defun coding-system-change-eol-conversion (coding-system eol-type)
caff3c0a 184 "Return a coding system which differs from CODING-SYSTEM in EOL conversion.
26d87040
EZ
185The returned coding system converts end-of-line by EOL-TYPE
186but text as the same way as CODING-SYSTEM.
187EOL-TYPE should be `unix', `dos', `mac', or nil.
188If EOL-TYPE is nil, the returned coding system detects
189how end-of-line is formatted automatically while decoding.
190
191EOL-TYPE can be specified by an integer 0, 1, or 2.
192They means `unix', `dos', and `mac' respectively."
193 (if (symbolp eol-type)
194 (setq eol-type (cond ((eq eol-type 'unix) 0)
195 ((eq eol-type 'dos) 1)
196 ((eq eol-type 'mac) 2)
197 (t eol-type))))
1d5b0c66
CY
198 ;; We call `coding-system-base' before `coding-system-eol-type',
199 ;; because the coding-system may not be initialized until then.
200 (let* ((base (coding-system-base coding-system))
201 (orig-eol-type (coding-system-eol-type coding-system)))
202 (cond ((vectorp orig-eol-type)
203 (if (not eol-type)
204 coding-system
205 (aref orig-eol-type eol-type)))
206 ((not eol-type)
207 base)
208 ((= eol-type orig-eol-type)
209 coding-system)
210 ((progn (setq orig-eol-type (coding-system-eol-type base))
211 (vectorp orig-eol-type))
212 (aref orig-eol-type eol-type)))))
26d87040
EZ
213
214(defun coding-system-change-text-conversion (coding-system coding)
215 "Return a coding system which differs from CODING-SYSTEM in text conversion.
216The returned coding system converts text by CODING
217but end-of-line as the same way as CODING-SYSTEM.
218If CODING is nil, the returned coding system detects
219how text is formatted automatically while decoding."
1d77e15a
JR
220 (let ((eol-type (coding-system-eol-type coding-system)))
221 (coding-system-change-eol-conversion
222 (if coding coding 'undecided)
223 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 224
be7ca044
KH
225;; Canonicalize the coding system name NAME by removing some prefixes
226;; and delimiter characters. Support function of
227;; coding-system-from-name.
228(defun canonicalize-coding-system-name (name)
ca4f0e9a
KH
229 (if (string-match "^\\(ms\\|ibm\\|windows-\\)\\([0-9]+\\)$" name)
230 ;; "ms950", "ibm950", "windows-950" -> "cp950"
231 (concat "cp" (match-string 2 name))
232 (if (string-match "^iso[-_ ]?[0-9]" name)
233 ;; "iso-8859-1" -> "8859-1", "iso-2022-jp" ->"2022-jp"
234 (setq name (substring name (1- (match-end 0)))))
235 (let ((idx (string-match "[-_ /]" name)))
236 ;; Delete "-", "_", " ", "/" but do distinguish "16-be" and "16be".
237 (while idx
238 (if (and (>= idx 2)
239 (eq (string-match "16-[lb]e$" name (- idx 2))
240 (- idx 2)))
241 (setq idx (string-match "[-_ /]" name (match-end 0)))
242 (setq name (concat (substring name 0 idx) (substring name (1+ idx)))
243 idx (string-match "[-_ /]" name idx))))
244 name)))
be7ca044
KH
245
246(defun coding-system-from-name (name)
247 "Return a coding system whose name matches with NAME (string or symbol)."
248 (let (sym)
249 (if (stringp name) (setq sym (intern name))
250 (setq sym name name (symbol-name name)))
251 (if (coding-system-p sym)
252 sym
253 (let ((eol-type
254 (if (string-match "-\\(unix\\|dos\\|mac\\)$" name)
255 (prog1 (intern (match-string 1 name))
256 (setq name (substring name 0 (match-beginning 0)))))))
257 (setq name (canonicalize-coding-system-name (downcase name)))
258 (catch 'tag
259 (dolist (elt (coding-system-list))
260 (if (string= (canonicalize-coding-system-name (symbol-name elt))
261 name)
262 (throw 'tag (if eol-type (coding-system-change-eol-conversion
263 elt eol-type)
264 elt)))))))))
265
4ed46869 266(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1 267 "Change whether this buffer uses multibyte characters.
49275d55 268With ARG, use multibyte characters if the ARG is positive.
6998e1a1
RS
269
270Note that this command does not convert the byte contents of
271the buffer; it only changes the way those bytes are interpreted.
272In general, therefore, this command *changes* the sequence of
273characters that the current buffer contains.
274
caff3c0a
JB
275We suggest you avoid using this command unless you know what you are
276doing. If you use it by mistake, and the buffer is now displayed
6998e1a1 277wrong, use this command again to toggle back to the right mode."
4ed46869 278 (interactive "P")
b7079457
RS
279 (let ((new-flag
280 (if (null arg) (null enable-multibyte-characters)
281 (> (prefix-numeric-value arg) 0))))
282 (set-buffer-multibyte new-flag))
4ed46869
KH
283 (force-mode-line-update))
284
285(defun view-hello-file ()
cf2fc7e9 286 "Display the HELLO file, which lists many languages and characters."
4ed46869 287 (interactive)
8f81f784 288 ;; We have to decode the file in any environment.
a4723629
GM
289 (letf (((default-value 'enable-multibyte-characters) t)
290 (coding-system-for-read 'iso-2022-7bit))
291 (view-file (expand-file-name "HELLO" data-directory))))
4ed46869 292
9e9a77a6 293(defun universal-coding-system-argument (coding-system)
15b3e511 294 "Execute an I/O command using the specified coding system."
9e9a77a6
RS
295 (interactive
296 (let ((default (and buffer-file-coding-system
34104362 297 (not (eq (coding-system-type buffer-file-coding-system)
a0d96cad 298 'undecided))
9e9a77a6
RS
299 buffer-file-coding-system)))
300 (list (read-coding-system
301 (if default
5b76833f 302 (format "Coding system for following command (default %s): " default)
9e9a77a6
RS
303 "Coding system for following command: ")
304 default))))
305 (let* ((keyseq (read-key-sequence
e14a8f4c 306 (format "Command to execute with %s:" coding-system)))
04363179
GM
307 (cmd (key-binding keyseq))
308 prefix)
735c5b17
GM
309 ;; read-key-sequence ignores quit, so make an explicit check.
310 ;; Like many places, this assumes quit == C-g, but it need not be.
1e4bd40d 311 (if (equal last-input-event ?\C-g)
735c5b17 312 (keyboard-quit))
4a304bb2 313 (when (memq cmd '(universal-argument digit-argument))
04363179 314 (call-interactively cmd)
a1506d29 315
04363179
GM
316 ;; Process keys bound in `universal-argument-map'.
317 (while (progn
318 (setq keyseq (read-key-sequence nil t)
319 cmd (key-binding keyseq t))
320 (not (eq cmd 'universal-argument-other-key)))
321 (let ((current-prefix-arg prefix-arg)
61a846fb 322 ;; Have to bind `last-command-event' here so that
6b61353c 323 ;; `digit-argument', for instance, can compute the
04363179 324 ;; prefix arg.
61a846fb 325 (last-command-event (aref keyseq 0)))
04363179
GM
326 (call-interactively cmd)))
327
1f547b92 328 ;; This is the final call to `universal-argument-other-key', which
04363179
GM
329 ;; set's the final `prefix-arg.
330 (let ((current-prefix-arg prefix-arg))
331 (call-interactively cmd))
a1506d29 332
04363179
GM
333 ;; Read the command to execute with the given prefix arg.
334 (setq prefix prefix-arg
335 keyseq (read-key-sequence nil t)
336 cmd (key-binding keyseq)))
337
15b3e511 338 (let ((coding-system-for-read coding-system)
04363179 339 (coding-system-for-write coding-system)
0e9ec609 340 (coding-system-require-warning t)
04363179 341 (current-prefix-arg prefix))
15b3e511
KH
342 (message "")
343 (call-interactively cmd))))
344
de94d711 345(defun set-default-coding-systems (coding-system)
0c3154d2 346 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 347This sets the following coding systems:
0c3154d2 348 o coding system of a newly created buffer
8efc03e1
KH
349 o default coding system for subprocess I/O
350This also sets the following values:
d3e4babd 351 o default value used as `file-name-coding-system' for converting file names
caff3c0a 352 if CODING-SYSTEM is ASCII-compatible
d8cb7e3d 353 o default value for the command `set-terminal-coding-system'
d3e4babd 354 o default value for the command `set-keyboard-coding-system'
caff3c0a 355 if CODING-SYSTEM is ASCII-compatible"
de94d711
KH
356 (check-coding-system coding-system)
357 (setq-default buffer-file-coding-system coding-system)
6b61353c
KH
358 (if (fboundp 'ucs-set-table-for-input)
359 (dolist (buffer (buffer-list))
360 (or (local-variable-p 'buffer-file-coding-system buffer)
361 (ucs-set-table-for-input buffer))))
362
db9aec47 363 (if (eq system-type 'darwin)
a41118cc 364 ;; The file-name coding system on Darwin systems is always utf-8.
db9aec47 365 (setq default-file-name-coding-system 'utf-8)
597e2240 366 (if (and (default-value 'enable-multibyte-characters)
db9aec47
KH
367 (or (not coding-system)
368 (coding-system-get coding-system 'ascii-compatible-p)))
369 (setq default-file-name-coding-system coding-system)))
970c9391 370 (setq default-terminal-coding-system coding-system)
d1f61aaf
EZ
371 ;; Prevent default-terminal-coding-system from converting ^M to ^J.
372 (setq default-keyboard-coding-system
373 (coding-system-change-eol-conversion coding-system 'unix))
1d77e15a
JR
374 ;; Preserve eol-type from existing default-process-coding-systems.
375 ;; On non-unix-like systems in particular, these may have been set
376 ;; carefully by the user, or by the startup code, to deal with the
377 ;; users shell appropriately, so should not be altered by changing
378 ;; language environment.
379 (let ((output-coding
380 (coding-system-change-text-conversion
381 (car default-process-coding-system) coding-system))
382 (input-coding
383 (coding-system-change-text-conversion
384 (cdr default-process-coding-system) coding-system)))
385 (setq default-process-coding-system
386 (cons output-coding input-coding))))
de94d711 387
0c3154d2
KH
388(defun prefer-coding-system (coding-system)
389 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 390This also sets the following coding systems:
0c3154d2 391 o coding system of a newly created buffer
8efc03e1
KH
392 o default coding system for subprocess I/O
393This also sets the following values:
caff3c0a 394 o default value used as `file-name-coding-system' for converting file names
d8cb7e3d 395 o default value for the command `set-terminal-coding-system'
03c35c83
EZ
396 o default value for the command `set-keyboard-coding-system'
397
bd3ac67e
EZ
398If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
399systems set by this function will use that type of EOL conversion.
400
d0d8885d
JB
401A coding system that requires automatic detection of text+encoding
402\(e.g. undecided, unix) can't be preferred."
0c3154d2
KH
403 (interactive "zPrefer coding system: ")
404 (if (not (and coding-system (coding-system-p coding-system)))
405 (error "Invalid coding system `%s'" coding-system))
97941b05
KH
406 (if (memq (coding-system-type coding-system) '(raw-text undecided))
407 (error "Can't prefer the coding system `%s'" coding-system))
408 (let ((base (coding-system-base coding-system))
bd3ac67e 409 (eol-type (coding-system-eol-type coding-system)))
97941b05 410 (set-coding-system-priority base)
32226619 411 (and (called-interactively-p 'interactive)
97941b05
KH
412 (or (eq base coding-system)
413 (message "Highest priority is set to %s (base of %s)"
414 base coding-system)))
bd3ac67e 415 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 416 (if (memq eol-type '(0 1 2))
97941b05
KH
417 (setq base
418 (coding-system-change-eol-conversion base eol-type)))
419 (set-default-coding-systems base)))
0c3154d2 420
b5edd1d1
KH
421(defvar sort-coding-systems-predicate nil
422 "If non-nil, a predicate function to sort coding systems.
423
424It is called with two coding systems, and should return t if the first
425one is \"less\" than the second.
426
427The function `sort-coding-systems' use it.")
428
429(defun sort-coding-systems (codings)
430 "Sort coding system list CODINGS by a priority of each coding system.
caff3c0a 431Return the sorted list. CODINGS is modified by side effects.
b5edd1d1
KH
432
433If a coding system is most preferred, it has the highest priority.
caff3c0a
JB
434Otherwise, coding systems that correspond to MIME charsets have
435higher priorities. Among them, a coding system included in the
436`coding-system' key of the current language environment has higher
437priority. See also the documentation of `language-info-alist'.
b5edd1d1
KH
438
439If the variable `sort-coding-systems-predicate' (which see) is
caff3c0a 440non-nil, it is used to sort CODINGS instead."
b5edd1d1
KH
441 (if sort-coding-systems-predicate
442 (sort codings sort-coding-systems-predicate)
8f924df7
KH
443 (let* ((from-priority (coding-system-priority-list))
444 (most-preferred (car from-priority))
b5edd1d1
KH
445 (lang-preferred (get-language-info current-language-environment
446 'coding-system))
447 (func (function
448 (lambda (x)
449 (let ((base (coding-system-base x)))
da1ebad1
KH
450 ;; We calculate the priority number 0..255 by
451 ;; using the 8 bits PMMLCEII as this:
3ecd3a56
GM
452 ;; P: 1 if most preferred.
453 ;; MM: greater than 0 if mime-charset.
454 ;; L: 1 if one of the current lang. env.'s codings.
455 ;; C: 1 if one of codings listed in the category list.
456 ;; E: 1 if not XXX-with-esc
da1ebad1
KH
457 ;; II: if iso-2022 based, 0..3, else 1.
458 (logior
459 (lsh (if (eq base most-preferred) 1 0) 7)
460 (lsh
8f924df7 461 (let ((mime (coding-system-get base :mime-charset)))
b439e72a
DL
462 ;; Prefer coding systems corresponding to a
463 ;; MIME charset.
b5edd1d1 464 (if mime
b439e72a
DL
465 ;; Lower utf-16 priority so that we
466 ;; normally prefer utf-8 to it, and put
467 ;; x-ctext below that.
d0d8885d
JB
468 (cond ((string-match-p "utf-16"
469 (symbol-name mime))
da1ebad1 470 2)
d0d8885d 471 ((string-match-p "^x-" (symbol-name mime))
da1ebad1
KH
472 1)
473 (t 3))
b5edd1d1 474 0))
da1ebad1
KH
475 5)
476 (lsh (if (memq base lang-preferred) 1 0) 4)
8f924df7 477 (lsh (if (memq base from-priority) 1 0) 3)
d0d8885d
JB
478 (lsh (if (string-match-p "-with-esc\\'"
479 (symbol-name base))
da1ebad1 480 0 1) 2)
8f924df7
KH
481 (if (eq (coding-system-type base) 'iso-2022)
482 (let ((category (coding-system-category base)))
483 ;; For ISO based coding systems, prefer
484 ;; one that doesn't use designation nor
485 ;; locking/single shifting.
a0d96cad
KH
486 (cond
487 ((or (eq category 'coding-category-iso-8-1)
488 (eq category 'coding-category-iso-8-2))
489 2)
490 ((or (eq category 'coding-category-iso-7-tight)
491 (eq category 'coding-category-iso-7))
492 1)
493 (t
494 0)))
495 1)
a6dfc99b 496 ))))))
b5edd1d1
KH
497 (sort codings (function (lambda (x y)
498 (> (funcall func x) (funcall func y))))))))
54b226f7 499
3fc7dfe5 500(defun find-coding-systems-region (from to)
54b226f7 501 "Return a list of proper coding systems to encode a text between FROM and TO.
8f924df7 502
d37ef0f6 503If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
504All coding systems in the list can safely encode any multibyte characters
505in the text.
506
e8dd0160 507If the text contains no multibyte characters, return a list of a single
3fc7dfe5 508element `undecided'."
b5edd1d1
KH
509 (let ((codings (find-coding-systems-region-internal from to)))
510 (if (eq codings t)
511 ;; The text contains only ASCII characters. Any coding
512 ;; systems are safe.
513 '(undecided)
514 ;; We need copy-sequence because sorting will alter the argument.
515 (sort-coding-systems (copy-sequence codings)))))
54b226f7 516
3fc7dfe5
KH
517(defun find-coding-systems-string (string)
518 "Return a list of proper coding systems to encode STRING.
519All coding systems in the list can safely encode any multibyte characters
520in STRING.
521
e8dd0160 522If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 523element `undecided'."
b5edd1d1 524 (find-coding-systems-region string nil))
3fc7dfe5
KH
525
526(defun find-coding-systems-for-charsets (charsets)
527 "Return a list of proper coding systems to encode characters of CHARSETS.
6053d86a 528CHARSETS is a list of character sets.
1f547b92
DL
529
530This only finds coding systems of type `charset', whose
531`:charset-list' property includes all of CHARSETS (plus `ascii' for
d0d8885d 532ASCII-compatible coding systems). It was used in older versions of
1f547b92
DL
533Emacs, but is unlikely to be what you really want now."
534 ;; Deal with aliases.
535 (setq charsets (mapcar (lambda (c)
536 (get-charset-property c :name))
537 charsets))
b5edd1d1
KH
538 (cond ((or (null charsets)
539 (and (= (length charsets) 1)
540 (eq 'ascii (car charsets))))
541 '(undecided))
542 ((or (memq 'eight-bit-control charsets)
543 (memq 'eight-bit-graphic charsets))
1f547b92 544 '(raw-text utf-8-emacs))
b5edd1d1 545 (t
1f547b92
DL
546 (let (codings)
547 (dolist (cs (coding-system-list t))
c7b4b466
DL
548 (let ((cs-charsets (and (eq (coding-system-type cs) 'charset)
549 (coding-system-charset-list cs)))
1f547b92
DL
550 (charsets charsets))
551 (if (coding-system-get cs :ascii-compatible-p)
552 (add-to-list 'cs-charsets 'ascii))
553 (if (catch 'ok
554 (when cs-charsets
555 (while charsets
556 (unless (memq (pop charsets) cs-charsets)
557 (throw 'ok nil)))
558 t))
559 (push cs codings))))
560 (nreverse codings)))))
54b226f7 561
51ed58ea
KH
562(defun find-multibyte-characters (from to &optional maxcount excludes)
563 "Find multibyte characters in the region specified by FROM and TO.
564If FROM is a string, find multibyte characters in the string.
565The return value is an alist of the following format:
566 ((CHARSET COUNT CHAR ...) ...)
567where
568 CHARSET is a character set,
569 COUNT is a number of characters,
1f547b92 570 CHARs are the characters found from the character set.
51ed58ea 571Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
caff3c0a 572Optional 4th arg EXCLUDES is a list of character sets to be ignored."
51ed58ea
KH
573 (let ((chars nil)
574 charset char)
575 (if (stringp from)
a0d96cad
KH
576 (if (multibyte-string-p from)
577 (let ((idx 0))
d0d8885d 578 (while (setq idx (string-match-p "[^\000-\177]" from idx))
a0d96cad
KH
579 (setq char (aref from idx)
580 charset (char-charset char))
581 (unless (memq charset excludes)
582 (let ((slot (assq charset chars)))
583 (if slot
584 (if (not (memq char (nthcdr 2 slot)))
585 (let ((count (nth 1 slot)))
586 (setcar (cdr slot) (1+ count))
587 (if (or (not maxcount) (< count maxcount))
588 (nconc slot (list char)))))
589 (setq chars (cons (list charset 1 char) chars)))))
590 (setq idx (1+ idx)))))
591 (if enable-multibyte-characters
592 (save-excursion
593 (goto-char from)
594 (while (re-search-forward "[^\000-\177]" to t)
595 (setq char (preceding-char)
596 charset (char-charset char))
597 (unless (memq charset excludes)
51ed58ea
KH
598 (let ((slot (assq charset chars)))
599 (if slot
a0d96cad 600 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
601 (let ((count (nth 1 slot)))
602 (setcar (cdr slot) (1+ count))
603 (if (or (not maxcount) (< count maxcount))
604 (nconc slot (list char)))))
a0d96cad 605 (setq chars (cons (list charset 1 char) chars)))))))))
51ed58ea
KH
606 (nreverse chars)))
607
738746ba
KH
608(defun search-unencodable-char (coding-system)
609 "Search forward from point for a character that is not encodable.
610It asks which coding system to check.
611If such a character is found, set point after that character.
612Otherwise, don't move point.
613
caff3c0a
JB
614When called from a program, the value is the position of the unencodable
615character found, or nil if all characters are encodable."
738746ba
KH
616 (interactive
617 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
618 (read-coding-system
5b76833f 619 (format "Coding-system (default %s): " default)
738746ba
KH
620 default))))
621 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
622 (if pos
623 (goto-char (1+ pos))
624 (message "All following characters are encodable by %s" coding-system))
625 pos))
626
c83c4f60
RS
627(defvar last-coding-system-specified nil
628 "Most recent coding system explicitly specified by the user when asked.
629This variable is set whenever Emacs asks the user which coding system
630to use in order to write a file. If you set it to nil explicitly,
631then call `write-region', then afterward this variable will be non-nil
632only if the user was explicitly asked and specified a coding system.")
633
b5edd1d1 634(defvar select-safe-coding-system-accept-default-p nil
fea6b736 635 "If non-nil, a function to control the behavior of coding system selection.
b5edd1d1
KH
636The meaning is the same as the argument ACCEPT-DEFAULT-P of the
637function `select-safe-coding-system' (which see). This variable
638overrides that argument.")
639
9ee5b744
SM
640(defun select-safe-coding-system-interactively (from to codings unsafe
641 &optional rejected default)
642 "Select interactively a coding system for the region FROM ... TO.
643FROM can be a string, as in `write-region'.
644CODINGS is the list of base coding systems known to be safe for this region,
645 typically obtained with `find-coding-systems-region'.
646UNSAFE is a list of coding systems known to be unsafe for this region.
647REJECTED is a list of coding systems which were safe but for some reason
648 were not recommended in the particular context.
649DEFAULT is the coding system to use by default in the query."
650 ;; At first, if some defaults are unsafe, record at most 11
651 ;; problematic characters and their positions for them by turning
652 ;; (CODING ...)
653 ;; into
654 ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
655 (if unsafe
656 (setq unsafe
657 (mapcar #'(lambda (coding)
658 (cons coding
659 (if (stringp from)
660 (mapcar #'(lambda (pos)
661 (cons pos (aref from pos)))
662 (unencodable-char-position
663 0 (length from) coding
664 11 from))
665 (mapcar #'(lambda (pos)
666 (cons pos (char-after pos)))
667 (unencodable-char-position
668 from to coding 11)))))
669 unsafe)))
670
671 ;; Change each safe coding system to the corresponding
672 ;; mime-charset name if it is also a coding system. Such a name
673 ;; is more friendly to users.
674 (let ((l codings)
675 mime-charset)
676 (while l
a77c22c2
KH
677 (setq mime-charset (coding-system-get (car l) :mime-charset))
678 (if (and mime-charset (coding-system-p mime-charset)
679 (coding-system-equal (car l) mime-charset))
9ee5b744
SM
680 (setcar l mime-charset))
681 (setq l (cdr l))))
682
683 ;; Don't offer variations with locking shift, which you
684 ;; basically never want.
685 (let (l)
686 (dolist (elt codings (setq codings (nreverse l)))
687 (unless (or (eq 'coding-category-iso-7-else
688 (coding-system-category elt))
689 (eq 'coding-category-iso-8-else
690 (coding-system-category elt)))
691 (push elt l))))
692
693 ;; Remove raw-text, emacs-mule and no-conversion unless nothing
694 ;; else is available.
695 (setq codings
696 (or (delq 'raw-text
697 (delq 'emacs-mule
698 (delq 'no-conversion codings)))
699 '(raw-text emacs-mule no-conversion)))
700
701 (let ((window-configuration (current-window-configuration))
702 (bufname (buffer-name))
703 coding-system)
704 (save-excursion
705 ;; If some defaults are unsafe, make sure the offending
706 ;; buffer is displayed.
707 (when (and unsafe (not (stringp from)))
708 (pop-to-buffer bufname)
709 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
710 unsafe))))
711 ;; Then ask users to select one from CODINGS while showing
712 ;; the reason why none of the defaults are not used.
713 (with-output-to-temp-buffer "*Warning*"
714 (with-current-buffer standard-output
715 (if (and (null rejected) (null unsafe))
716 (insert "No default coding systems to try for "
717 (if (stringp from)
718 (format "string \"%s\"." from)
719 (format "buffer `%s'." bufname)))
720 (insert
721 "These default coding systems were tried to encode"
722 (if (stringp from)
723 (concat " \"" (if (> (length from) 10)
724 (concat (substring from 0 10) "...\"")
725 (concat from "\"")))
726 (format " text\nin the buffer `%s'" bufname))
727 ":\n")
728 (let ((pos (point))
729 (fill-prefix " "))
730 (dolist (x (append rejected unsafe))
46c04718 731 (princ " ") (princ x))
9ee5b744
SM
732 (insert "\n")
733 (fill-region-as-paragraph pos (point)))
734 (when rejected
3fc0b26e
EZ
735 (insert "These safely encode the text in the buffer,
736but are not recommended for encoding text in this context,
9ee5b744
SM
737e.g., for sending an email message.\n ")
738 (dolist (x rejected)
739 (princ " ") (princ x))
740 (insert "\n"))
741 (when unsafe
3fc0b26e 742 (insert (if rejected "The other coding systems"
9ee5b744 743 "However, each of them")
3fc0b26e 744 " encountered characters it couldn't encode:\n")
9ee5b744 745 (dolist (coding unsafe)
3fc0b26e 746 (insert (format " %s cannot encode these:" (car coding)))
9ee5b744
SM
747 (let ((i 0)
748 (func1
749 #'(lambda (bufname pos)
750 (when (buffer-live-p (get-buffer bufname))
751 (pop-to-buffer bufname)
752 (goto-char pos))))
753 (func2
754 #'(lambda (bufname pos coding)
755 (when (buffer-live-p (get-buffer bufname))
756 (pop-to-buffer bufname)
757 (if (< (point) pos)
758 (goto-char pos)
759 (forward-char 1)
760 (search-unencodable-char coding)
761 (forward-char -1))))))
762 (dolist (elt (cdr coding))
763 (insert " ")
764 (if (stringp from)
765 (insert (if (< i 10) (cdr elt) "..."))
766 (if (< i 10)
767 (insert-text-button
768 (cdr elt)
769 :type 'help-xref
4e21e5aa 770 'face 'link
9ee5b744
SM
771 'help-echo
772 "mouse-2, RET: jump to this character"
773 'help-function func1
774 'help-args (list bufname (car elt)))
775 (insert-text-button
776 "..."
777 :type 'help-xref
4e21e5aa 778 'face 'link
9ee5b744
SM
779 'help-echo
780 "mouse-2, RET: next unencodable character"
781 'help-function func2
782 'help-args (list bufname (car elt)
783 (car coding)))))
784 (setq i (1+ i))))
785 (insert "\n"))
333f3572 786 (insert (substitute-command-keys "\
7d03c5b1 787
333f3572
KH
788Click on a character (or switch to this window by `\\[other-window]'\n\
789and select the characters by RET) to jump to the place it appears,\n\
3fc0b26e 790where `\\[universal-argument] \\[what-cursor-position]' will give information about it.\n"))))
7d03c5b1 791 (insert (substitute-command-keys "\nSelect \
3fc0b26e
EZ
792one of the safe coding systems listed below,\n\
793or cancel the writing with \\[keyboard-quit] and edit the buffer\n\
794 to remove or modify the problematic characters,\n\
795or specify any other coding system (and risk losing\n\
796 the problematic characters).\n\n"))
9ee5b744
SM
797 (let ((pos (point))
798 (fill-prefix " "))
799 (dolist (x codings)
800 (princ " ") (princ x))
801 (insert "\n")
7d03c5b1 802 (fill-region-as-paragraph pos (point)))))
9ee5b744
SM
803
804 ;; Read a coding system.
805 (setq coding-system
806 (read-coding-system
807 (format "Select coding system (default %s): " default)
808 default))
809 (setq last-coding-system-specified coding-system))
810
811 (kill-buffer "*Warning*")
812 (set-window-configuration window-configuration)
813 coding-system))
814
b5edd1d1 815(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 816 accept-default-p file)
d5266ddf
KH
817 "Ask a user to select a safe coding system from candidates.
818The candidates of coding systems which can safely encode a text
b5edd1d1
KH
819between FROM and TO are shown in a popup window. Among them, the most
820proper one is suggested as the default.
821
d0d8885d 822The list of `buffer-file-coding-system' of the current buffer, the
b56a5ae0 823default `buffer-file-coding-system', and the most preferred coding
d0d8885d
JB
824system (if it corresponds to a MIME charset) is treated as the
825default coding system list. Among them, the first one that safely
826encodes the text is normally selected silently and returned without
827any user interaction. See also the command `prefer-coding-system'.
d37ef0f6
DL
828
829However, the user is queried if the chosen coding system is
29d04c4f 830inconsistent with what would be selected by `find-auto-coding' from
d37ef0f6
DL
831coding cookies &c. if the contents of the region were read from a
832file. (That could lead to data corruption in a file subsequently
833re-visited and edited.)
54b226f7 834
b5edd1d1
KH
835Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
836list of coding systems to be prepended to the default coding system
0e9ec609 837list. However, if DEFAULT-CODING-SYSTEM is a list and the first
ccb06340 838element is t, the cdr part is used as the default coding system list,
b56a5ae0 839i.e. current `buffer-file-coding-system', default `buffer-file-coding-system',
a985cd2f 840and the most preferred coding system are not used.
54b226f7 841
b5edd1d1
KH
842Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
843determine the acceptability of the silently selected coding system.
844It is called with that coding system, and should return nil if it
845should not be silently selected and thus user interaction is required.
846
efa2c6d7
RS
847Optional 5th arg FILE is the file name to use for this purpose.
848That is different from `buffer-file-name' when handling `write-region'
849\(for example).
850
d0d8885d
JB
851The variable `select-safe-coding-system-accept-default-p', if non-nil,
852overrides ACCEPT-DEFAULT-P.
54b226f7
KH
853
854Kludgy feature: if FROM is a string, the string is the target text,
855and TO is ignored."
64657387 856 (if (not (listp default-coding-system))
b5edd1d1
KH
857 (setq default-coding-system (list default-coding-system)))
858
29d04c4f
KH
859 (let ((no-other-defaults nil)
860 auto-cs)
d35deeec 861 (unless (or (stringp from) find-file-literally)
b4dc7d98 862 ;; Find an auto-coding that is specified for the current
29d04c4f
KH
863 ;; buffer and file from the region FROM and TO.
864 (save-excursion
865 (save-restriction
866 (widen)
867 (goto-char from)
868 (setq auto-cs (find-auto-coding (or file buffer-file-name "")
869 (- to from)))
870 (if auto-cs
871 (if (coding-system-p (car auto-cs))
872 (setq auto-cs (car auto-cs))
873 (display-warning
2364df5c 874 'mule
29d04c4f
KH
875 (format "\
876Invalid coding system `%s' is specified
877for the current buffer/file by the %s.
878It is highly recommended to fix it before writing to a file."
879 (car auto-cs)
880 (if (eq (cdr auto-cs) :coding) ":coding tag"
2364df5c
JB
881 (format "variable `%s'" (cdr auto-cs))))
882 :warning)
29d04c4f
KH
883 (or (yes-or-no-p "Really proceed with writing? ")
884 (error "Save aborted"))
885 (setq auto-cs nil))))))
886
0e9ec609
KH
887 (if (eq (car default-coding-system) t)
888 (setq no-other-defaults t
889 default-coding-system (cdr default-coding-system)))
890
891 ;; Change elements of the list to (coding . base-coding).
892 (setq default-coding-system
893 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
894 default-coding-system))
895
29d04c4f 896 (if (and auto-cs (not no-other-defaults))
2df48a87
SM
897 ;; If the file has a coding cookie, use it regardless of any
898 ;; other setting.
29d04c4f 899 (let ((base (coding-system-base auto-cs)))
2df48a87
SM
900 (unless (memq base '(nil undecided))
901 (setq default-coding-system (list (cons auto-cs base)))
902 (setq no-other-defaults t))))
29d04c4f 903
0e9ec609
KH
904 (unless no-other-defaults
905 ;; If buffer-file-coding-system is not nil nor undecided, append it
906 ;; to the defaults.
907 (if buffer-file-coding-system
908 (let ((base (coding-system-base buffer-file-coding-system)))
909 (or (eq base 'undecided)
0e9ec609 910 (rassq base default-coding-system)
ccb06340
KH
911 (setq default-coding-system
912 (append default-coding-system
913 (list (cons buffer-file-coding-system base)))))))
a985cd2f 914
659be258
KH
915 (unless (and buffer-file-coding-system-explicit
916 (cdr buffer-file-coding-system-explicit))
b56a5ae0 917 ;; If default buffer-file-coding-system is not nil nor undecided,
659be258 918 ;; append it to the defaults.
b56a5ae0
SM
919 (when (default-value 'buffer-file-coding-system)
920 (let ((base (coding-system-base
921 (default-value 'buffer-file-coding-system))))
922 (or (eq base 'undecided)
923 (rassq base default-coding-system)
924 (setq default-coding-system
925 (append default-coding-system
926 (list (cons (default-value
927 'buffer-file-coding-system)
928 base)))))))
659be258
KH
929
930 ;; If the most preferred coding system has the property mime-charset,
931 ;; append it to the defaults.
932 (let ((preferred (coding-system-priority-list t))
933 base)
934 (and (coding-system-p preferred)
935 (setq base (coding-system-base preferred))
936 (coding-system-get preferred :mime-charset)
937 (not (rassq base default-coding-system))
938 (setq default-coding-system
939 (append default-coding-system
940 (list (cons preferred base))))))))
29d04c4f
KH
941
942 (if select-safe-coding-system-accept-default-p
943 (setq accept-default-p select-safe-coding-system-accept-default-p))
944
bae7cd08 945 ;; Decide the eol-type from the top of the default codings,
b56a5ae0 946 ;; current buffer-file-coding-system, or default buffer-file-coding-system.
bae7cd08
KH
947 (if default-coding-system
948 (let ((default-eol-type (coding-system-eol-type
949 (caar default-coding-system))))
950 (if (and (vectorp default-eol-type) buffer-file-coding-system)
d35deeec 951 (setq default-eol-type (coding-system-eol-type
bae7cd08 952 buffer-file-coding-system)))
b56a5ae0
SM
953 (if (and (vectorp default-eol-type)
954 (default-value 'buffer-file-coding-system))
955 (setq default-eol-type
956 (coding-system-eol-type
957 (default-value 'buffer-file-coding-system))))
bae7cd08
KH
958 (if (and default-eol-type (not (vectorp default-eol-type)))
959 (dolist (elt default-coding-system)
960 (setcar elt (coding-system-change-eol-conversion
961 (car elt) default-eol-type))))))
962
29d04c4f
KH
963 (let ((codings (find-coding-systems-region from to))
964 (coding-system nil)
c381cb8d 965 (tick (if (not (stringp from)) (buffer-chars-modified-tick)))
29d04c4f 966 safe rejected unsafe)
bae7cd08
KH
967 (if (eq (car codings) 'undecided)
968 ;; Any coding system is ok.
969 (setq coding-system (caar default-coding-system))
970 ;; Reverse the list so that elements are accumulated in safe,
971 ;; rejected, and unsafe in the correct order.
972 (setq default-coding-system (nreverse default-coding-system))
973
974 ;; Classify the defaults into safe, rejected, and unsafe.
975 (dolist (elt default-coding-system)
976 (if (or (eq (car codings) 'undecided)
977 (memq (cdr elt) codings))
978 (if (and (functionp accept-default-p)
979 (not (funcall accept-default-p (cdr elt))))
980 (push (car elt) rejected)
981 (push (car elt) safe))
982 (push (car elt) unsafe)))
983 (if safe
984 (setq coding-system (car safe))))
29d04c4f
KH
985
986 ;; If all the defaults failed, ask a user.
bae7cd08 987 (when (not coding-system)
29d04c4f
KH
988 (setq coding-system (select-safe-coding-system-interactively
989 from to codings unsafe rejected (car codings))))
990
29d04c4f
KH
991 ;; Check we're not inconsistent with what `coding:' spec &c would
992 ;; give when file is re-read.
993 ;; But don't do this if we explicitly ignored the cookie
994 ;; by using `find-file-literally'.
995 (when (and auto-cs
996 (not (and
997 coding-system
998 (memq (coding-system-type coding-system) '(0 5)))))
6b61353c
KH
999 ;; Merge coding-system and auto-cs as far as possible.
1000 (if (not coding-system)
1001 (setq coding-system auto-cs)
1002 (if (not auto-cs)
1003 (setq auto-cs coding-system)
1004 (let ((eol-type-1 (coding-system-eol-type coding-system))
1005 (eol-type-2 (coding-system-eol-type auto-cs)))
1006 (if (eq (coding-system-base coding-system) 'undecided)
1007 (setq coding-system (coding-system-change-text-conversion
1008 coding-system auto-cs))
1009 (if (eq (coding-system-base auto-cs) 'undecided)
1010 (setq auto-cs (coding-system-change-text-conversion
1011 auto-cs coding-system))))
1012 (if (vectorp eol-type-1)
1013 (or (vectorp eol-type-2)
1014 (setq coding-system (coding-system-change-eol-conversion
1015 coding-system eol-type-2)))
1016 (if (vectorp eol-type-2)
1017 (setq auto-cs (coding-system-change-eol-conversion
1018 auto-cs eol-type-1)))))))
1019
1020 (if (and auto-cs
d37ef0f6
DL
1021 ;; Don't barf if writing a compressed file, say.
1022 ;; This check perhaps isn't ideal, but is probably
1023 ;; the best thing to do.
1024 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
6b61353c 1025 (not (coding-system-equal coding-system auto-cs)))
43afed8c
RS
1026 (unless (yes-or-no-p
1027 (format "Selected encoding %s disagrees with \
1028%s specified by file contents. Really save (else edit coding cookies \
1029and try again)? " coding-system auto-cs))
29d04c4f 1030 (error "Save aborted"))))
c381cb8d 1031 (when (and tick (/= tick (buffer-chars-modified-tick)))
86c3a9fb 1032 (error "Cancelled because the buffer was modified"))
29d04c4f 1033 coding-system)))
54b226f7
KH
1034
1035(setq select-safe-coding-system-function 'select-safe-coding-system)
1036
46babb23
KH
1037(defun select-message-coding-system ()
1038 "Return a coding system to encode the outgoing message of the current buffer.
1039It at first tries the first coding system found in these variables
1040in this order:
1041 (1) local value of `buffer-file-coding-system'
1042 (2) value of `sendmail-coding-system'
b5edd1d1 1043 (3) value of `default-sendmail-coding-system'
b56a5ae0 1044 (4) default value of `buffer-file-coding-system'
46babb23
KH
1045If the found coding system can't encode the current buffer,
1046or none of them are bound to a coding system,
48e41165 1047it asks the user to select a proper coding system."
46babb23 1048 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
1049 buffer-file-coding-system)
1050 sendmail-coding-system
1051 default-sendmail-coding-system
b56a5ae0 1052 (default-value 'buffer-file-coding-system))))
46babb23 1053 (if (eq coding 'no-conversion)
d37ef0f6 1054 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
1055 (setq coding nil))
1056 (if (fboundp select-safe-coding-system-function)
1057 (funcall select-safe-coding-system-function
b5edd1d1 1058 (point-min) (point-max) coding
209c73b0 1059 (function (lambda (x) (coding-system-get x :mime-charset))))
46babb23 1060 coding)))
4ed46869 1061\f
03c35c83 1062;;; Language support stuff.
4ed46869 1063
4ed46869 1064(defvar language-info-alist nil
2c395d56 1065 "Alist of language environment definitions.
4ed46869
KH
1066Each element looks like:
1067 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
1068where LANGUAGE-NAME is a string, the name of the language environment,
1069KEY is a symbol denoting the kind of information, and
1070INFO is the data associated with KEY.
1071Meaningful values for KEY include
1072
1073 documentation value is documentation of what this language environment
1074 is meant for, and how to use it.
7dd42fb1
KH
1075 charset value is a list of the character sets mainly used
1076 by this language environment.
d37ef0f6
DL
1077 sample-text value is an expression which is evalled to generate
1078 a line of text written using characters appropriate
1079 for this language environment.
2c395d56
RS
1080 setup-function value is a function to call to switch to this
1081 language environment.
1082 exit-function value is a function to call to leave this
1083 language environment.
d0d8885d
JB
1084 coding-system value is a list of coding systems that are good for
1085 saving text written in this language environment.
2c395d56
RS
1086 This list serves as suggestions to the user;
1087 in effect, as a kind of documentation.
1088 coding-priority value is a list of coding systems for this language
1089 environment, in order of decreasing priority.
1090 This is used to set up the coding system priority
45d08cb2 1091 list when you switch to this language environment.
ddb5c041 1092 nonascii-translation
7dd42fb1
KH
1093 value is a charset of dimension one to use for
1094 converting a unibyte character to multibyte
1095 and vice versa.
ddb5c041
KH
1096 input-method value is a default input method for this language
1097 environment.
7624ebb9
KH
1098 features value is a list of features requested in this
1099 language environment.
6b61353c 1100 ctext-non-standard-encodings
d0d8885d
JB
1101 value is a list of non-standard encoding names used
1102 in extended segments of CTEXT. See the variable
1103 `ctext-non-standard-encodings' for more detail.
ddb5c041 1104
b9631bb2 1105The following key takes effect only when multibyte characters are
597e2240 1106globally disabled, i.e. the default value of `enable-multibyte-characters'
b9631bb2 1107is nil (which is an obsolete and deprecated use):
ddb5c041 1108
d0d8885d
JB
1109 unibyte-display value is a coding system to encode characters for
1110 the terminal. Characters in the range of 160 to
1111 255 display not as octal escapes, but as non-ASCII
1112 characters in this language environment.")
2c395d56
RS
1113
1114(defun get-language-info (lang-env key)
1115 "Return information listed under KEY for language environment LANG-ENV.
1116KEY is a symbol denoting the kind of information.
1117For a list of useful values for KEY and their meanings,
1118see `language-info-alist'."
1119 (if (symbolp lang-env)
1120 (setq lang-env (symbol-name lang-env)))
f15466c5 1121 (let ((lang-slot (assoc-string lang-env language-info-alist t)))
4ed46869
KH
1122 (if lang-slot
1123 (cdr (assq key (cdr lang-slot))))))
1124
f08adf27 1125(defun set-language-info (lang-env key info)
2c395d56
RS
1126 "Modify part of the definition of language environment LANG-ENV.
1127Specifically, this stores the information INFO under KEY
1128in the definition of this language environment.
4ed46869 1129KEY is a symbol denoting the kind of information.
2c395d56 1130INFO is the value for that information.
281d03ec 1131
2c395d56 1132For a list of useful values for KEY and their meanings,
f08adf27 1133see `language-info-alist'."
2c395d56
RS
1134 (if (symbolp lang-env)
1135 (setq lang-env (symbol-name lang-env)))
eec5c8f9
KH
1136 (set-language-info-internal lang-env key info)
1137 (if (equal lang-env current-language-environment)
d042f8b4 1138 (cond ((eq key 'coding-priority)
e9a8ed3c
KH
1139 (set-language-environment-coding-systems lang-env)
1140 (set-language-environment-charset lang-env))
d042f8b4
KH
1141 ((eq key 'input-method)
1142 (set-language-environment-input-method lang-env))
1143 ((eq key 'nonascii-translation)
1144 (set-language-environment-nonascii-translation lang-env))
1145 ((eq key 'charset)
1146 (set-language-environment-charset lang-env))
597e2240 1147 ((and (not (default-value 'enable-multibyte-characters))
d042f8b4
KH
1148 (or (eq key 'unibyte-syntax) (eq key 'unibyte-display)))
1149 (set-language-environment-unibyte lang-env)))))
eec5c8f9
KH
1150
1151(defun set-language-info-internal (lang-env key info)
1152 "Internal use only.
1153Arguments are the same as `set-language-info'."
4ed46869 1154 (let (lang-slot key-slot)
2c395d56 1155 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 1156 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 1157 (setq lang-slot (list lang-env)
4ed46869
KH
1158 language-info-alist (cons lang-slot language-info-alist)))
1159 (setq key-slot (assq key lang-slot))
1160 (if (null key-slot) ; If no slot for the key, add it.
1161 (progn
1162 (setq key-slot (list key))
1163 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
590dbcba 1164 (setcdr key-slot (purecopy info))
1169bd86 1165 ;; Update the custom-type of `current-language-environment'.
590dbcba 1166 (put 'current-language-environment 'custom-type
1169bd86
MR
1167 (cons 'choice (mapcar
1168 (lambda (lang)
756e055f
MR
1169 (list 'const lang))
1170 (sort (mapcar 'car language-info-alist) 'string<))))))
4ed46869 1171
2c395d56
RS
1172(defun set-language-info-alist (lang-env alist &optional parents)
1173 "Store ALIST as the definition of language environment LANG-ENV.
1174ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 1175`language-info-alist' for the meanings of KEY and INFO.
54b226f7 1176
2c395d56 1177Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 1178where to put this language environment in the
2c395d56
RS
1179Describe Language Environment and Set Language Environment menus.
1180For example, (\"European\") means to put this language environment
1181in the European submenu in each of those two menus."
905a9ed3
DN
1182 (cond ((symbolp lang-env)
1183 (setq lang-env (symbol-name lang-env)))
1184 ((stringp lang-env)
1185 (setq lang-env (purecopy lang-env))))
54b226f7
KH
1186 (let ((describe-map describe-language-environment-map)
1187 (setup-map setup-language-environment-map))
1188 (if parents
1189 (let ((l parents)
9deed82f 1190 map parent-symbol parent prompt)
54b226f7
KH
1191 (while l
1192 (if (symbolp (setq parent-symbol (car l)))
1193 (setq parent (symbol-name parent))
1194 (setq parent parent-symbol parent-symbol (intern parent)))
1195 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1196 ;; This prompt string is for define-prefix-command, so
1197 ;; that the map it creates will be suitable for a menu.
1198 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1199 (if (not map)
1200 (progn
1201 (setq map (intern (format "describe-%s-environment-map"
1202 (downcase parent))))
9deed82f 1203 (define-prefix-command map nil prompt)
54b226f7 1204 (define-key-after describe-map (vector parent-symbol)
64657387 1205 (cons parent map))))
54b226f7
KH
1206 (setq describe-map (symbol-value map))
1207 (setq map (lookup-key setup-map (vector parent-symbol)))
1208 (if (not map)
1209 (progn
1210 (setq map (intern (format "setup-%s-environment-map"
1211 (downcase parent))))
9deed82f 1212 (define-prefix-command map nil prompt)
54b226f7 1213 (define-key-after setup-map (vector parent-symbol)
64657387 1214 (cons parent map))))
54b226f7
KH
1215 (setq setup-map (symbol-value map))
1216 (setq l (cdr l)))))
f08adf27
RS
1217
1218 ;; Set up menu items for this language env.
7624ebb9 1219 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1220 (when doc
1221 (define-key-after describe-map (vector (intern lang-env))
64657387 1222 (cons lang-env 'describe-specified-language-support))))
7624ebb9 1223 (define-key-after setup-map (vector (intern lang-env))
64657387 1224 (cons lang-env 'setup-specified-language-environment))
f08adf27 1225
eec5c8f9
KH
1226 (dolist (elt alist)
1227 (set-language-info-internal lang-env (car elt) (cdr elt)))
d35deeec 1228
eec5c8f9
KH
1229 (if (equal lang-env current-language-environment)
1230 (set-language-environment lang-env))))
4ed46869 1231
ae302641 1232(defun read-language-name (key prompt &optional default)
2c395d56 1233 "Read a language environment name which has information for KEY.
ddb5c041 1234If KEY is nil, read any language environment.
2c395d56
RS
1235Prompt with PROMPT. DEFAULT is the default choice of language environment.
1236This returns a language environment name as a string."
4ed46869
KH
1237 (let* ((completion-ignore-case t)
1238 (name (completing-read prompt
1239 language-info-alist
ddb5c041 1240 (and key
ca429a25 1241 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1242 t nil nil default)))
13e82c04 1243 (if (and (> (length name) 0)
ddb5c041
KH
1244 (or (not key)
1245 (get-language-info name key)))
13e82c04 1246 name)))
4ed46869
KH
1247\f
1248;;; Multilingual input methods.
d37ef0f6 1249(defgroup leim nil
d0c40faa
KH
1250 "LEIM: Libraries of Emacs Input Methods."
1251 :group 'mule)
4ed46869 1252
d0b9c3ab
KH
1253(defconst leim-list-file-name "leim-list.el"
1254 "Name of LEIM list file.
1255This file contains a list of libraries of Emacs input methods (LEIM)
1256in the format of Lisp expression for registering each input method.
1257Emacs loads this file at startup time.")
1258
905a9ed3 1259(defconst leim-list-header (format
316732d2 1260";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: utf-8;-*-
d0b9c3ab 1261;;
bcd76f45
SM
1262;; This file is automatically generated.
1263;;
d0b9c3ab 1264;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1265;; methods in the same directory as this file. Loading this file
1266;; registers all the input methods in Emacs.
d0b9c3ab 1267;;
d33d5fbe 1268;; Each entry has the form:
d0b9c3ab
KH
1269;; (register-input-method
1270;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1271;; TITLE DESCRIPTION
1272;; ARG ...)
c654de1d 1273;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab 1274;;
bcd76f45 1275;; If this directory is included in `load-path', Emacs automatically
d0b9c3ab
KH
1276;; loads this file at startup time.
1277
1278"
1279 leim-list-file-name)
1280 "Header to be inserted in LEIM list file.")
1281
905a9ed3 1282(defconst leim-list-entry-regexp "^(register-input-method"
d0b9c3ab 1283 "Regexp matching head of each entry in LEIM list file.
caff3c0a 1284See also the variable `leim-list-header'.")
d0b9c3ab
KH
1285
1286(defvar update-leim-list-functions
1287 '(quail-update-leim-list-file)
1288 "List of functions to call to update LEIM list file.
1289Each function is called with one arg, LEIM directory name.")
1290
a337fe7f
RS
1291(defun update-leim-list-file (&rest dirs)
1292 "Update LEIM list file in directories DIRS."
d0d8885d
JB
1293 (dolist (function update-leim-list-functions)
1294 (apply function dirs)))
d0b9c3ab 1295
4ed46869
KH
1296(defvar current-input-method nil
1297 "The current input method for multilingual text.
96db204a 1298If nil, that means no input method is activated now.")
4ed46869
KH
1299(make-variable-buffer-local 'current-input-method)
1300(put 'current-input-method 'permanent-local t)
1301
1302(defvar current-input-method-title nil
d0b9c3ab 1303 "Title string of the current input method shown in mode line.")
4ed46869
KH
1304(make-variable-buffer-local 'current-input-method-title)
1305(put 'current-input-method-title 'permanent-local t)
1306
b4fba33f 1307(defcustom default-input-method nil
caff3c0a 1308 "Default input method for multilingual text (a string).
b4fba33f 1309This is the input method activated automatically by the command
9b10b5a3 1310`toggle-input-method' (\\[toggle-input-method])."
1f547b92 1311 :link '(custom-manual "(emacs)Input Methods")
8861c593 1312 :group 'mule
d398dba6
DL
1313 :type '(choice (const nil) (string
1314 :completion-ignore-case t
1315 :complete-function widget-string-complete
1316 :completion-alist input-method-alist
1317 :prompt-history input-method-history))
5806e8a6 1318 :set-after '(current-language-environment))
b4fba33f 1319
0f835e87
KH
1320(put 'input-method-function 'permanent-local t)
1321
723a427a 1322(defvar input-method-history nil
6f5d2452
EZ
1323 "History list of input methods read from the minibuffer.
1324
1325Maximum length of the history list is determined by the value
1326of `history-length', which see.")
723a427a
KH
1327(make-variable-buffer-local 'input-method-history)
1328(put 'input-method-history 'permanent-local t)
4ed46869
KH
1329
1330(defvar inactivate-current-input-method-function nil
1331 "Function to call for inactivating the current input method.
1332Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1333This function is called with no argument.
1334
1335This function should never change the value of `current-input-method'.
1336It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1337(make-variable-buffer-local 'inactivate-current-input-method-function)
1338(put 'inactivate-current-input-method-function 'permanent-local t)
1339
1340(defvar describe-current-input-method-function nil
1341 "Function to call for describing the current input method.
1342This function is called with no argument.")
1343(make-variable-buffer-local 'describe-current-input-method-function)
1344(put 'describe-current-input-method-function 'permanent-local t)
1345
d0b9c3ab 1346(defvar input-method-alist nil
2c395d56 1347 "Alist of input method names vs how to use them.
d0b9c3ab 1348Each element has the form:
2c395d56
RS
1349 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1350See the function `register-input-method' for the meanings of the elements.")
ac549fa5 1351;;;###autoload
6dc3311d 1352(put 'input-method-alist 'risky-local-variable t)
2c395d56 1353
f08adf27 1354(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1355 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
d0b9c3ab 1356
d35deeec 1357INPUT-METHOD and LANG-ENV are symbols or strings.
2c395d56
RS
1358ACTIVATE-FUNC is a function to call to activate this method.
1359TITLE is a string to show in the mode line when this method is active.
1360DESCRIPTION is a string describing this method and what it is good for.
1361The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1362All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1363
c654de1d
DL
1364This function is mainly used in the file \"leim-list.el\" which is
1365created at Emacs build time, registering all Quail input methods
0b6cadff 1366contained in the Emacs distribution.
205814ee 1367
0b6cadff 1368In case you want to register a new Quail input method by yourself, be
205814ee 1369careful to use the same input method title as given in the third
0b6cadff
DL
1370parameter of `quail-define-package'. (If the values are different, the
1371string specified in this function takes precedence.)
205814ee
KH
1372
1373The commands `describe-input-method' and `list-input-methods' need
0b6cadff 1374these duplicated values to show some information about input methods
d35deeec
JB
1375without loading the relevant Quail packages.
1376\n(fn INPUT-METHOD LANG-ENV ACTIVATE-FUNC TITLE DESCRIPTION &rest ARGS)"
f08adf27 1377 (if (symbolp lang-env)
ebfa10d3
DN
1378 (setq lang-env (symbol-name lang-env))
1379 (setq lang-env (purecopy lang-env)))
4ef06f75 1380 (if (symbolp input-method)
ebfa10d3
DN
1381 (setq input-method (symbol-name input-method))
1382 (setq input-method (purecopy input-method)))
1383 (setq args (mapcar 'purecopy args))
f08adf27 1384 (let ((info (cons lang-env args))
d0b9c3ab
KH
1385 (slot (assoc input-method input-method-alist)))
1386 (if slot
1387 (setcdr slot info)
1388 (setq slot (cons input-method info))
1389 (setq input-method-alist (cons slot input-method-alist)))))
1390
4d5ac029 1391(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1392 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1393If DEFAULT is non-nil, use that as the default,
0b6cadff 1394and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1395If INHIBIT-NULL is non-nil, null input signals an error.
1396
1397The return value is a string."
4d5ac029
RS
1398 (if default
1399 (setq prompt (format prompt default)))
d0b9c3ab 1400 (let* ((completion-ignore-case t)
c54044ff
KH
1401 ;; As it is quite normal to change input method in the
1402 ;; minibuffer, we must enable it even if
1403 ;; enable-recursive-minibuffers is currently nil.
1404 (enable-recursive-minibuffers t)
723a427a
KH
1405 ;; This binding is necessary because input-method-history is
1406 ;; buffer local.
d0b9c3ab 1407 (input-method (completing-read prompt input-method-alist
87505a98
RS
1408 nil t nil 'input-method-history
1409 default)))
bf294e6e
KH
1410 (if (and input-method (symbolp input-method))
1411 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1412 (if (> (length input-method) 0)
1413 input-method
1414 (if inhibit-null
43807b77 1415 (error "No valid input method is specified")))))
d0b9c3ab 1416
d0b9c3ab 1417(defun activate-input-method (input-method)
2c395d56
RS
1418 "Switch to input method INPUT-METHOD for the current buffer.
1419If some other input method is already active, turn it off first.
1420If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1421 (if (and input-method (symbolp input-method))
4ef06f75 1422 (setq input-method (symbol-name input-method)))
723a427a
KH
1423 (if (and current-input-method
1424 (not (string= current-input-method input-method)))
305a3cb6 1425 (inactivate-input-method))
2c395d56 1426 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1427 (let ((slot (assoc input-method input-method-alist)))
1428 (if (null slot)
723a427a 1429 (error "Can't activate input method `%s'" input-method))
278dd6ac 1430 (setq current-input-method-title nil)
8efc03e1
KH
1431 (let ((func (nth 2 slot)))
1432 (if (functionp func)
1433 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1434 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1435 (progn
1436 (require (cdr func))
1437 (apply (car func) input-method (nthcdr 5 slot)))
1438 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1439 (setq current-input-method input-method)
278dd6ac
KH
1440 (or (stringp current-input-method-title)
1441 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1442 (unwind-protect
1443 (run-hooks 'input-method-activate-hook)
1444 (force-mode-line-update)))))
15b3e511 1445
15b3e511 1446(defun inactivate-input-method ()
f17ccaee 1447 "Turn off the current input method."
723a427a
KH
1448 (when current-input-method
1449 (if input-method-history
1450 (unless (string= current-input-method (car input-method-history))
1451 (setq input-method-history
1452 (cons current-input-method
1453 (delete current-input-method input-method-history))))
1454 (setq input-method-history (list current-input-method)))
1455 (unwind-protect
36f8618c
KH
1456 (progn
1457 (setq input-method-function nil
1458 current-input-method-title nil)
1459 (funcall inactivate-current-input-method-function))
15b3e511 1460 (unwind-protect
723a427a 1461 (run-hooks 'input-method-inactivate-hook)
36f8618c 1462 (setq current-input-method nil)
28885c0e 1463 (force-mode-line-update)))))
4ed46869 1464
e893eae2 1465(defun set-input-method (input-method &optional interactive)
2c395d56 1466 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1467This also sets the default input method to the one you specify.
402dbbd1
EZ
1468If INPUT-METHOD is nil, this function turns off the input method, and
1469also causes you to be prompted for a name of an input method the next
1470time you invoke \\[toggle-input-method].
e893eae2
RS
1471When called interactively, the optional arg INTERACTIVE is non-nil,
1472which marks the variable `default-input-method' as set for Custom buffers.
402dbbd1 1473
bc406911 1474To deactivate the input method interactively, use \\[toggle-input-method].
caff3c0a 1475To deactivate it programmatically, use `inactivate-input-method'."
d0b9c3ab 1476 (interactive
723a427a 1477 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1478 (list (read-input-method-name
87505a98 1479 (if default "Select input method (default %s): " "Select input method: ")
e893eae2
RS
1480 default t)
1481 t)))
d0b9c3ab 1482 (activate-input-method input-method)
f4990970 1483 (setq default-input-method input-method)
e893eae2 1484 (when interactive
f4990970
PA
1485 (customize-mark-as-set 'default-input-method))
1486 default-input-method)
4ed46869 1487
023df4cf
RS
1488(defvar toggle-input-method-active nil
1489 "Non-nil inside `toggle-input-method'.")
1490
e893eae2 1491(defun toggle-input-method (&optional arg interactive)
f8ec20be
RS
1492 "Enable or disable multilingual text input method for the current buffer.
1493Only one input method can be enabled at any time in a given buffer.
1494
d0d8885d
JB
1495The normal action is to enable an input method if none was enabled,
1496and disable the current one otherwise. Which input method to enable
1497can be determined in various ways--either the one most recently used,
1498or the one specified by `default-input-method', or as a last resort
1499by reading the name of an input method in the minibuffer.
f8ec20be 1500
d0d8885d 1501With a prefix argument ARG, read an input method name with the minibuffer
f8ec20be 1502and enable that one. The default is the most recent input method specified
e893eae2 1503\(not including the currently active input method, if any).
f8ec20be 1504
d0d8885d 1505When called interactively, the optional argument INTERACTIVE is non-nil,
e893eae2
RS
1506which marks the variable `default-input-method' as set for Custom buffers."
1507
1508 (interactive "P\np")
023df4cf
RS
1509 (if toggle-input-method-active
1510 (error "Recursive use of `toggle-input-method'"))
7ddbb5bc
RS
1511 (if (and current-input-method (not arg))
1512 (inactivate-input-method)
023df4cf
RS
1513 (let ((toggle-input-method-active t)
1514 (default (or (car input-method-history) default-input-method)))
7ddbb5bc
RS
1515 (if (and arg default (equal current-input-method default)
1516 (> (length input-method-history) 1))
1517 (setq default (nth 1 input-method-history)))
723a427a
KH
1518 (activate-input-method
1519 (if (or arg (not default))
7ddbb5bc
RS
1520 (progn
1521 (read-input-method-name
1522 (if default "Input method (default %s): " "Input method: " )
1523 default t))
723a427a 1524 default))
f4990970 1525 (unless default-input-method
d37ef0f6 1526 (prog1
f4990970 1527 (setq default-input-method current-input-method)
e893eae2 1528 (when interactive
f4990970 1529 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab 1530
26b3dce6 1531(autoload 'help-buffer "help-mode")
0855c6cd 1532
d0b9c3ab 1533(defun describe-input-method (input-method)
2c395d56 1534 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1535 (interactive
1536 (list (read-input-method-name
5b76833f 1537 "Describe input method (default current choice): ")))
78754934 1538 (if (and input-method (symbolp input-method))
4ef06f75 1539 (setq input-method (symbol-name input-method)))
43125c28
RS
1540 (help-setup-xref (list #'describe-input-method
1541 (or input-method current-input-method))
32226619 1542 (called-interactively-p 'interactive))
f80e2142 1543
d0b9c3ab
KH
1544 (if (null input-method)
1545 (describe-current-input-method)
464cc130
KH
1546 (let ((current current-input-method))
1547 (condition-case nil
1548 (progn
1549 (save-excursion
1550 (activate-input-method input-method)
1551 (describe-current-input-method))
1552 (activate-input-method current))
d37ef0f6 1553 (error
464cc130 1554 (activate-input-method current)
5f395df3 1555 (help-setup-xref (list #'describe-input-method input-method)
32226619 1556 (called-interactively-p 'interactive))
5f395df3 1557 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1558 (let ((elt (assoc input-method input-method-alist)))
1559 (princ (format
1560 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1561 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1562
1563(defun describe-current-input-method ()
f80e2142
RS
1564 "Describe the input method currently in use.
1565This is a subroutine for `describe-input-method'."
4ed46869
KH
1566 (if current-input-method
1567 (if (and (symbolp describe-current-input-method-function)
1568 (fboundp describe-current-input-method-function))
1569 (funcall describe-current-input-method-function)
1570 (message "No way to describe the current input method `%s'"
f2979bdb 1571 current-input-method)
4ed46869 1572 (ding))
d0b9c3ab 1573 (error "No input method is activated now")))
4ed46869 1574
d3459641 1575(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1576 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1577The input method selected last time is activated in minibuffer.
d0d8885d
JB
1578If optional second argument INITIAL-INPUT is non-nil, insert it in the
1579minibuffer initially.
1580Optional 3rd argument INPUT-METHOD specifies the input method to be activated
1581instead of the one selected last time. It is a symbol or a string."
88d559ec
KH
1582 (setq input-method
1583 (or input-method
d3459641 1584 current-input-method
88d559ec
KH
1585 default-input-method
1586 (read-input-method-name "Input method: " nil t)))
3df60841 1587 (if (and input-method (symbolp input-method))
4ef06f75 1588 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1589 (let ((prev-input-method current-input-method))
1590 (unwind-protect
1591 (progn
1592 (activate-input-method input-method)
1593 (read-string prompt initial-input nil nil t))
1594 (activate-input-method prev-input-method))))
4ed46869
KH
1595
1596;; Variables to control behavior of input methods. All input methods
1597;; should react to these variables.
1598
8efc03e1 1599(defcustom input-method-verbose-flag 'default
caff3c0a 1600 "A flag to control extra guidance given by input methods.
8efc03e1 1601The value should be nil, t, `complex-only', or `default'.
4ed46869 1602
cb29dfb6 1603The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1604area. When you use the input method in the minibuffer, the guidance
1605is shown at the bottom short window (split from the existing window).
c27c4ed8 1606
8efc03e1
KH
1607If the value is t, extra guidance is always given, if the value is
1608nil, extra guidance is always suppressed.
1609
1610If the value is `complex-only', only complex input methods such as
1611`chinese-py' and `japanese' give extra guidance.
1612
1613If the value is `default', complex input methods always give extra
1614guidance, but simple input methods give it only when you are not in
1615the minibuffer.
1616
1617See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1618 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1619 (const complex-only) (const default))
42395763
RS
1620 :group 'mule)
1621
1622(defcustom input-method-highlight-flag t
caff3c0a 1623 "If this flag is non-nil, input methods highlight partially-entered text.
42395763
RS
1624For instance, while you are in the middle of a Quail input method sequence,
1625the text inserted so far is temporarily underlined.
8efc03e1
KH
1626The underlining goes away when you finish or abort the input method sequence.
1627See also the variable `input-method-verbose-flag'."
42395763
RS
1628 :type 'boolean
1629 :group 'mule)
4ed46869 1630
1f547b92 1631(defcustom input-method-activate-hook nil
f17ccaee
KH
1632 "Normal hook run just after an input method is activated.
1633
1634The variable `current-input-method' keeps the input method name
1f547b92
DL
1635just activated."
1636 :type 'hook
1637 :group 'mule)
4ed46869 1638
1f547b92 1639(defcustom input-method-inactivate-hook nil
f17ccaee
KH
1640 "Normal hook run just after an input method is inactivated.
1641
1642The variable `current-input-method' still keeps the input method name
1f547b92
DL
1643just inactivated."
1644 :type 'hook
1645 :group 'mule)
4ed46869 1646
1f547b92 1647(defcustom input-method-after-insert-chunk-hook nil
8f924df7 1648 "Normal hook run just after an input method insert some chunk of text."
1f547b92
DL
1649 :type 'hook
1650 :group 'mule)
4ed46869 1651
dccca980 1652(defvar input-method-exit-on-first-char nil
0b6cadff 1653 "This flag controls when an input method returns.
dccca980
KH
1654Usually, the input method does not return while there's a possibility
1655that it may find a different translation if a user types another key.
d0d8885d
JB
1656But, if this flag is non-nil, the input method returns as soon as the
1657current key sequence gets long enough to have some valid translation.")
dccca980 1658
1f547b92 1659(defcustom input-method-use-echo-area nil
dccca980 1660 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1661Usually, the input method inserts the intermediate key sequence,
1662or candidate translations corresponding to the sequence,
1663at point in the current buffer.
1f547b92
DL
1664But, if this flag is non-nil, it displays them in echo area instead."
1665 :type 'hook
1666 :group 'mule)
dccca980 1667
723a427a 1668(defvar input-method-exit-on-invalid-key nil
fea6b736 1669 "This flag controls the behavior of an input method on invalid key input.
723a427a
KH
1670Usually, when a user types a key which doesn't start any character
1671handled by the input method, the key is handled by turning off the
e8dd0160 1672input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1673But, if this flag is non-nil, the input method is never back on.")
1674
4ed46869 1675\f
1f547b92 1676(defcustom set-language-environment-hook nil
8efc03e1
KH
1677 "Normal hook run after some language environment is set.
1678
1679When you set some hook function here, that effect usually should not
1680be inherited to another language environment. So, you had better set
1681another function in `exit-language-environment-hook' (which see) to
1f547b92
DL
1682cancel the effect."
1683 :type 'hook
1684 :group 'mule)
8efc03e1 1685
1f547b92 1686(defcustom exit-language-environment-hook nil
8efc03e1
KH
1687 "Normal hook run after exiting from some language environment.
1688When this hook is run, the variable `current-language-environment'
1689is still bound to the language environment being exited.
1690
e8dd0160 1691This hook is mainly used for canceling the effect of
caff3c0a 1692`set-language-environment-hook' (which see)."
1f547b92
DL
1693 :type 'hook
1694 :group 'mule)
8efc03e1 1695
b0648a00
RS
1696(put 'setup-specified-language-environment 'apropos-inhibit t)
1697
15b3e511 1698(defun setup-specified-language-environment ()
f08adf27 1699 "Switch to a specified language environment."
15b3e511 1700 (interactive)
f850d782 1701 (let (language-name)
15b3e511
KH
1702 (if (and (symbolp last-command-event)
1703 (or (not (eq last-command-event 'Default))
1704 (setq last-command-event 'English))
f850d782 1705 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1706 (prog1
1707 (set-language-environment language-name)
1708 (customize-mark-as-set 'current-language-environment))
15b3e511 1709 (error "Bogus calling sequence"))))
4ed46869 1710
8861c593 1711(defcustom current-language-environment "English"
94d04df6 1712 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1713This variable should be set only with \\[customize], which is equivalent
1714to using the function `set-language-environment'."
94d04df6 1715 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1716 :set (lambda (symbol value) (set-language-environment value))
94d04df6 1717 :get (lambda (x)
f15466c5 1718 (or (car-safe (assoc-string
94d04df6
DL
1719 (if (symbolp current-language-environment)
1720 (symbol-name current-language-environment)
1721 current-language-environment)
f15466c5 1722 language-info-alist t))
94d04df6 1723 "English"))
990a4108
MR
1724 ;; custom type will be updated with `set-language-info'.
1725 :type (if language-info-alist
1726 (cons 'choice (mapcar
1727 (lambda (lang)
756e055f
MR
1728 (list 'const lang))
1729 (sort (mapcar 'car language-info-alist) 'string<)))
990a4108 1730 'string)
8861c593 1731 :initialize 'custom-initialize-default
eb9fc9e6 1732 :group 'mule)
f850d782 1733
ddb5c041
KH
1734(defun reset-language-environment ()
1735 "Reset multilingual environment of Emacs to the default status.
1736
1737The default status is as follows:
1738
d37ef0f6 1739 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1740 The default coding system for process I/O is nil.
1741 The default value for the command `set-terminal-coding-system' is nil.
1742 The default value for the command `set-keyboard-coding-system' is nil.
1743
97941b05
KH
1744 The order of priorities of coding systems are as follows:
1745 utf-8
1746 iso-2022-7bit
1747 iso-latin-1
1748 iso-2022-7bit-lock
1749 iso-2022-8bit-ss2
1750 emacs-mule
1751 raw-text"
ddb5c041
KH
1752 (interactive)
1753 ;; This function formerly set default-enable-multibyte-characters to t,
1754 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1755
97941b05
KH
1756 (set-coding-system-priority
1757 'utf-8
1758 'iso-2022-7bit
1759 'iso-latin-1
1760 'iso-2022-7bit-lock
1761 'iso-2022-8bit-ss2
1762 'emacs-mule
1763 'raw-text)
91693d18 1764
ddb5c041 1765 (set-default-coding-systems nil)
b5edd1d1 1766 (setq default-sendmail-coding-system 'iso-latin-1)
a41118cc
SM
1767 ;; On Darwin systems, this should be utf-8, but when this file is loaded
1768 ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
787caf99 1769 (setq default-file-name-coding-system 'iso-latin-1)
1d77e15a
JR
1770 ;; Preserve eol-type from existing default-process-coding-systems.
1771 ;; On non-unix-like systems in particular, these may have been set
1772 ;; carefully by the user, or by the startup code, to deal with the
1773 ;; users shell appropriately, so should not be altered by changing
1774 ;; language environment.
1775 (let ((output-coding
a099a2ff
JR
1776 ;; When bootstrapping, coding-systems are not defined yet, so
1777 ;; we need to catch the error from check-coding-system.
d37ef0f6 1778 (condition-case nil
a099a2ff
JR
1779 (coding-system-change-text-conversion
1780 (car default-process-coding-system) 'undecided)
1781 (coding-system-error 'undecided)))
1d77e15a 1782 (input-coding
a099a2ff
JR
1783 (condition-case nil
1784 (coding-system-change-text-conversion
1785 (cdr default-process-coding-system) 'iso-latin-1)
1786 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1787 (setq default-process-coding-system
1788 (cons output-coding input-coding)))
b5edd1d1 1789
a43977db
KH
1790 ;; Put the highest priority to the charset iso-8859-1 to prefer the
1791 ;; registry iso8859-1 over iso8859-2 in font selection. It also
1792 ;; makes unibyte-display-via-language-environment to use iso-8859-1
1793 ;; as the unibyte charset.
1794 (set-charset-priority 'iso-8859-1)
1795
ddb5c041
KH
1796 ;; Don't alter the terminal and keyboard coding systems here.
1797 ;; The terminal still supports the same coding system
1798 ;; that it supported a minute ago.
1b8dc791
SM
1799 ;; (set-terminal-coding-system-internal nil)
1800 ;; (set-keyboard-coding-system-internal nil)
ddb5c041 1801
33de15f4
SM
1802 ;; Back in Emacs-20, it was necessary to provide some fallback implicit
1803 ;; conversion, because almost no packages handled coding-system issues.
1804 ;; Nowadays it'd just paper over bugs.
1805 ;; (set-unibyte-charset 'iso-8859-1)
1806 )
ddb5c041 1807
0c47a7c8
KH
1808(reset-language-environment)
1809
97c57fb2 1810(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system display)
40c81f74
PE
1811 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1812 (let ((coding (get-language-info language-name 'unibyte-display)))
735b7c87
KH
1813 (if (and coding
1814 (or (not coding-system)
1815 (coding-system-equal coding coding-system)))
40c81f74 1816 (standard-display-european-internal)
a67ae60e
EZ
1817 ;; The following 2 lines undo the 8-bit display that we set up
1818 ;; in standard-display-european-internal, which see. This is in
1819 ;; case the user has used standard-display-european earlier in
970c9391 1820 ;; this session.
d73a7bb8
JPW
1821 (when standard-display-table
1822 (dotimes (i 128)
b221615b 1823 (aset standard-display-table (+ i 128) nil))))
970c9391 1824 (set-terminal-coding-system (or coding-system coding) display)))
40c81f74 1825
166246f7 1826(defun set-language-environment (language-name)
6c05d680
RS
1827 "Set up multi-lingual environment for using LANGUAGE-NAME.
1828This sets the coding system priority and the default input method
8861c593
RS
1829and sometimes other things. LANGUAGE-NAME should be a string
1830which is the name of a language environment. For example, \"Latin-1\"
1831specifies the character set for the major languages of Western Europe."
8efc03e1 1832 (interactive (list (read-language-name
ddb5c041 1833 nil
5b76833f 1834 "Set language environment (default English): ")))
4ef06f75
KH
1835 (if language-name
1836 (if (symbolp language-name)
1837 (setq language-name (symbol-name language-name)))
1838 (setq language-name "English"))
f15466c5 1839 (let ((slot (assoc-string language-name language-info-alist t)))
95498fd0 1840 (unless slot
f850d782 1841 (error "Language environment not defined: %S" language-name))
95498fd0 1842 (setq language-name (car slot)))
8efc03e1
KH
1843 (if current-language-environment
1844 (let ((func (get-language-info current-language-environment
1845 'exit-function)))
e63645c2 1846 (run-hooks 'exit-language-environment-hook)
5f395df3 1847 (if (functionp func) (funcall func))))
ddb5c041 1848
e9a8ed3c
KH
1849 (reset-language-environment)
1850 ;; The features might set up coding systems.
ddb5c041
KH
1851 (let ((required-features (get-language-info language-name 'features)))
1852 (while required-features
1853 (require (car required-features))
1854 (setq required-features (cdr required-features))))
6b61353c 1855
e9a8ed3c
KH
1856 (setq current-language-environment language-name)
1857
1858 (set-language-environment-coding-systems language-name)
1859 (set-language-environment-input-method language-name)
1860 (set-language-environment-nonascii-translation language-name)
1861 (set-language-environment-charset language-name)
1862 ;; Unibyte setups if necessary.
597e2240 1863 (unless (default-value 'enable-multibyte-characters)
e9a8ed3c
KH
1864 (set-language-environment-unibyte language-name))
1865
ddb5c041 1866 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1867 (if (functionp func)
ddb5c041 1868 (funcall func)))
e9a8ed3c 1869
8aeebac2 1870 (setq current-iso639-language
e9aaa1db
KH
1871 (or (get-language-info language-name 'iso639-language)
1872 current-iso639-language))
8aeebac2 1873
8efc03e1 1874 (run-hooks 'set-language-environment-hook)
f850d782 1875 (force-mode-line-update t))
4ed46869 1876
c3869589 1877(define-widget 'charset 'symbol
aaa448c9 1878 "An Emacs charset."
1644d5b9 1879 :tag "Charset"
c3869589
DL
1880 :complete-function (lambda ()
1881 (interactive)
1882 (lisp-complete-symbol 'charsetp))
1883 :completion-ignore-case t
1884 :value 'ascii
1885 :validate (lambda (widget)
1886 (unless (charsetp (widget-value widget))
1887 (widget-put widget :error (format "Invalid charset: %S"
1888 (widget-value widget)))
1889 widget))
1890 :prompt-history 'charset-history)
1891
1892(defcustom language-info-custom-alist nil
1893 "Customizations of language environment parameters.
1894Value is an alist with elements like those of `language-info-alist'.
1895These are used to set values in `language-info-alist' which replace
1896the defaults. A typical use is replacing the default input method for
1897the environment. Use \\[describe-language-environment] to find the environment's settings.
1898
1899This option is intended for use at startup. Removing items doesn't
1900remove them from the language info until you next restart Emacs.
1901
d0d8885d
JB
1902Setting this variable directly does not take effect.
1903See `set-language-info-alist' for use in programs."
c3869589 1904 :group 'mule
8589dc17 1905 :version "23.1"
c3869589
DL
1906 :set (lambda (s v)
1907 (custom-set-default s v)
1908 ;; Can't do this before language environments are set up.
1909 (when v
1910 ;; modify language-info-alist
1911 (dolist (elt v)
1912 (set-language-info-alist (car elt) (cdr elt)))
1913 ;; re-set the environment in case its parameters changed
1914 (set-language-environment current-language-environment)))
1915 :type `(alist
1916 :key-type (string :tag "Language environment"
1917 :completion-ignore-case t
1918 :complete-function widget-string-complete
1919 :completion-alist language-info-alist)
1920 :value-type
1921 (alist :key-type symbol
1922 :options ((documentation string)
1923 (charset (repeat charset))
1924 (sample-text string)
1925 (setup-function function)
1926 (exit-function function)
1927 (coding-system (repeat coding-system))
1928 (coding-priority (repeat coding-system))
1929 (nonascii-translation charset)
1930 (input-method
1931 (string
1932 :completion-ignore-case t
1933 :complete-function widget-string-complete
1934 :completion-alist input-method-alist
1935 :prompt-history input-method-history))
1936 (features (repeat symbol))
1937 (unibyte-display coding-system)))))
1938
aa360da1
GM
1939(declare-function x-server-vendor "xfns.c" (&optional terminal))
1940(declare-function x-server-version "xfns.c" (&optional terminal))
1941
51a8fc1d
RS
1942(defun standard-display-european-internal ()
1943 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1944 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1945 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1946 ;; the native font, and codes 160 and 146 stand for something very
1947 ;; different there.
597e2240
GM
1948 (or (and (eq window-system 'pc) (not (default-value
1949 'enable-multibyte-characters)))
03c35c83 1950 (progn
6b626913
SM
1951 ;; Most X fonts used to do the wrong thing for latin-1 code 160.
1952 (unless (and (eq window-system 'x)
1953 ;; XFree86 4 has fixed the fonts.
1954 (string= "The XFree86 Project, Inc" (x-server-vendor))
1955 (> (aref (number-to-string (nth 2 (x-server-version))) 0)
1956 ?3))
1957 ;; Make non-line-break space display as a plain space.
d419e1d9 1958 (aset standard-display-table (unibyte-char-to-multibyte 160) [32]))
1492f7ac 1959 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1960 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1961 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1962 ;; U+2019, normally from the windows-1252 character set. XFree 4
1963 ;; fonts probably have the appropriate glyph at this position,
1964 ;; so they could use standard-display-8bit. It's better to use a
1965 ;; proper windows-1252 coding system. --fx]
d419e1d9 1966 (aset standard-display-table (unibyte-char-to-multibyte 146) [39]))))
03c35c83 1967
e9a8ed3c
KH
1968(defun set-language-environment-coding-systems (language-name)
1969 "Do various coding system setups for language environment LANGUAGE-NAME."
54b226f7 1970 (let* ((priority (get-language-info language-name 'coding-priority))
e9a8ed3c 1971 (default-coding (car priority))
b56a5ae0 1972 ;; If the default buffer-file-coding-system is nil, don't use
207422da 1973 ;; coding-system-eol-type, because it treats nil as
b56a5ae0 1974 ;; `no-conversion'. The default buffer-file-coding-system is set
207422da
EZ
1975 ;; to nil by reset-language-environment, and in that case we
1976 ;; want to have here the native EOL type for each platform.
1977 ;; FIXME: there should be a common code that runs both on
1978 ;; startup and here to set the default EOL type correctly.
1979 ;; Right now, DOS/Windows platforms set this on dos-w32.el,
1980 ;; which works only as long as the order of loading files at
1981 ;; dump time and calling functions at startup is not modified
1982 ;; significantly, i.e. as long as this function is called
b56a5ae0 1983 ;; _after_ the default buffer-file-coding-system was set by
207422da
EZ
1984 ;; dos-w32.el.
1985 (eol-type
b56a5ae0
SM
1986 (coding-system-eol-type
1987 (or (default-value 'buffer-file-coding-system)
1988 (if (memq system-type '(windows-nt ms-dos)) 'dos 'unix)))))
97941b05
KH
1989 (when priority
1990 (set-default-coding-systems
1991 (if (memq eol-type '(0 1 2 unix dos mac))
1992 (coding-system-change-eol-conversion default-coding eol-type)
1993 default-coding))
1994 (setq default-sendmail-coding-system default-coding)
1995 (apply 'set-coding-system-priority priority))))
54b226f7 1996
d042f8b4
KH
1997(defun set-language-environment-input-method (language-name)
1998 "Do various input method setups for language environment LANGUAGE-NAME."
1999 (let ((input-method (get-language-info language-name 'input-method)))
2000 (when input-method
2001 (setq default-input-method input-method)
2002 (if input-method-history
2003 (setq input-method-history
2004 (cons input-method
2005 (delete input-method input-method-history)))))))
2006
2007(defun set-language-environment-nonascii-translation (language-name)
2008 "Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME."
e9a8ed3c
KH
2009 ;; Note: For DOS, we assumed that the charset cpXXX is already
2010 ;; defined.
2011 (let ((nonascii (get-language-info language-name 'nonascii-translation)))
2012 (if (eq window-system 'pc)
970c9391 2013 (setq nonascii (intern (format "cp%d" dos-codepage))))
e9a8ed3c
KH
2014 (or (and (charsetp nonascii)
2015 (get-charset-property nonascii :ascii-compatible-p))
2016 (setq nonascii 'iso-8859-1))
33de15f4
SM
2017 ;; Back in Emacs-20, it was necessary to provide some fallback implicit
2018 ;; conversion, because almost no packages handled coding-system issues.
2019 ;; Nowadays it'd just paper over bugs.
2020 ;; (set-unibyte-charset nonascii)
2021 ))
d042f8b4
KH
2022
2023(defun set-language-environment-charset (language-name)
2024 "Do various charset setups for language environment LANGUAGE-NAME."
e9a8ed3c
KH
2025 ;; Put higher priorities to such charsets that are supported by the
2026 ;; coding systems of higher priorities in this environment.
2027 (let ((charsets (get-language-info language-name 'charset)))
2028 (dolist (coding (get-language-info language-name 'coding-priority))
bf974dc9
KH
2029 (let ((list (coding-system-charset-list coding)))
2030 (if (consp list)
2031 (setq charsets (append charsets list)))))
e9a8ed3c
KH
2032 (if charsets
2033 (apply 'set-charset-priority charsets))))
d042f8b4
KH
2034
2035(defun set-language-environment-unibyte (language-name)
2036 "Do various unibyte-mode setups for language environment LANGUAGE-NAME."
d042f8b4
KH
2037 (set-display-table-and-terminal-coding-system language-name))
2038
cc4d2366 2039(defun princ-list (&rest args)
caff3c0a 2040 "Print all arguments with `princ', then print \"\\n\"."
4ed46869
KH
2041 (while args (princ (car args)) (setq args (cdr args)))
2042 (princ "\n"))
cc4d2366 2043(make-obsolete 'princ-list "use mapc and princ instead" "23.3")
4ed46869 2044
b0648a00
RS
2045(put 'describe-specified-language-support 'apropos-inhibit t)
2046
6b61353c 2047;; Print language-specific information such as input methods,
13e82c04 2048;; charsets, and coding systems. This function is intended to be
48082651 2049;; called from the menu:
281d03ec 2050;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
2051;; and should not run it by `M-x describe-current-input-method-function'.
2052(defun describe-specified-language-support ()
96db204a 2053 "Describe how Emacs supports the specified language environment."
48082651 2054 (interactive)
281d03ec 2055 (let (language-name)
48082651 2056 (if (not (and (symbolp last-command-event)
cda74479
DL
2057 (or (not (eq last-command-event 'Default))
2058 (setq last-command-event 'English))
281d03ec 2059 (setq language-name (symbol-name last-command-event))))
48082651 2060 (error "Bogus calling sequence"))
281d03ec
RS
2061 (describe-language-environment language-name)))
2062
2063(defun describe-language-environment (language-name)
2064 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
2065 (interactive
2066 (list (read-language-name
2067 'documentation
5b76833f 2068 "Describe language environment (default current choice): ")))
f850d782
RS
2069 (if (null language-name)
2070 (setq language-name current-language-environment))
281d03ec
RS
2071 (if (or (null language-name)
2072 (null (get-language-info language-name 'documentation)))
2073 (error "No documentation for the specified language"))
4ef06f75
KH
2074 (if (symbolp language-name)
2075 (setq language-name (symbol-name language-name)))
ef5a4730
KH
2076 (dolist (feature (get-language-info language-name 'features))
2077 (require feature))
6b626913 2078 (let ((doc (get-language-info language-name 'documentation)))
c3034e84 2079 (help-setup-xref (list #'describe-language-environment language-name)
32226619 2080 (called-interactively-p 'interactive))
c3034e84 2081 (with-output-to-temp-buffer (help-buffer)
9a529312 2082 (with-current-buffer standard-output
464cc130
KH
2083 (insert language-name " language environment\n\n")
2084 (if (stringp doc)
2085 (insert doc "\n\n"))
e036b0a6
KH
2086 (condition-case nil
2087 (let ((str (eval (get-language-info language-name 'sample-text))))
2088 (if (stringp str)
fd0dd4c3
KH
2089 (insert "Sample text:\n "
2090 (replace-regexp-in-string "\n" "\n " str)
2091 "\n\n")))
e036b0a6 2092 (error nil))
464cc130 2093 (let ((input-method (get-language-info language-name 'input-method))
acd1b9bd
KH
2094 (l (copy-sequence input-method-alist))
2095 (first t))
2096 (when (and input-method
2097 (setq input-method (assoc input-method l)))
2098 (insert "Input methods (default " (car input-method) ")\n")
2099 (setq l (cons input-method (delete input-method l))
2100 first nil))
2101 (dolist (elt l)
2102 (when (or (eq input-method elt)
2103 (eq t (compare-strings language-name nil nil
2104 (nth 1 elt) nil nil t)))
2105 (when first
2106 (insert "Input methods:\n")
2107 (setq first nil))
2108 (insert " " (car elt))
2109 (search-backward (car elt))
2110 (help-xref-button 0 'help-input-method (car elt))
464cc130 2111 (goto-char (point-max))
2fa7e202 2112 (insert " (\""
acd1b9bd
KH
2113 (if (stringp (nth 3 elt)) (nth 3 elt) (car (nth 3 elt)))
2114 "\" in mode line)\n")))
2115 (or first
2116 (insert "\n")))
464cc130
KH
2117 (insert "Character sets:\n")
2118 (let ((l (get-language-info language-name 'charset)))
2119 (if (null l)
2120 (insert " nothing specific to " language-name "\n")
2121 (while l
2122 (insert " " (symbol-name (car l)))
2123 (search-backward (symbol-name (car l)))
467412aa 2124 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
2125 (goto-char (point-max))
2126 (insert ": " (charset-description (car l)) "\n")
2127 (setq l (cdr l)))))
2128 (insert "\n")
2129 (insert "Coding systems:\n")
2130 (let ((l (get-language-info language-name 'coding-system)))
2131 (if (null l)
2132 (insert " nothing specific to " language-name "\n")
2133 (while l
2134 (insert " " (symbol-name (car l)))
2135 (search-backward (symbol-name (car l)))
467412aa 2136 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
2137 (goto-char (point-max))
2138 (insert " (`"
2139 (coding-system-mnemonic (car l))
2140 "' in mode line):\n\t"
2141 (coding-system-doc-string (car l))
2142 "\n")
0855c6cd 2143 (let ((aliases (coding-system-aliases (car l))))
464cc130
KH
2144 (when aliases
2145 (insert "\t(alias:")
2146 (while aliases
2147 (insert " " (symbol-name (car aliases)))
2148 (setq aliases (cdr aliases)))
2149 (insert ")\n")))
c3034e84 2150 (setq l (cdr l)))))))))
4ed46869 2151\f
40c81f74
PE
2152;;; Locales.
2153
0d7c5bb9
DL
2154(defvar locale-translation-file-name nil
2155 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 2156
5f395df3
SM
2157;; The following definitions might as well be marked as constants and
2158;; purecopied, since they're normally used on startup, and probably
2159;; should reflect the facilities of the base Emacs.
2160(defconst locale-language-names
2161 (purecopy
2162 '(
40c81f74
PE
2163 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
2164 ;; as specified in the Single Unix Spec, Version 2.
2165 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
2166 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
2167 ;; see Internet RFC 2165 (1997-06) and
2168 ;; http://www.evertype.com/standards/iso639/iso639-en.html
2169 ;; TERRITORY is a country code taken from ISO 3166
2170 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 2171 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 2172
6b61353c
KH
2173 ;; jasonr comments: MS Windows uses three letter codes for
2174 ;; languages instead of the two letter ISO codes that POSIX
2175 ;; uses. In most cases the first two letters are the same, so
2176 ;; most of the regexps in locale-language-names work. Japanese
2177 ;; and Chinese are exceptions, which are listed in the
2178 ;; non-standard section at the bottom of locale-language-names.
2179
8dedddd5
KH
2180 ("aa_DJ" . "Latin-1") ; Afar
2181 ("aa" . "UTF-8")
2182 ;; ab Abkhazian
6ececc4d 2183 ("af" . "Latin-1") ; Afrikaans
8dedddd5
KH
2184 ("am" "Ethiopic" utf-8) ; Amharic
2185 ("an" . "Latin-9") ; Aragonese
5f395df3 2186 ; ar Arabic glibc uses 8859-6
40c81f74
PE
2187 ; as Assamese
2188 ; ay Aymara
8dedddd5 2189 ("az" . "UTF-8") ; Azerbaijani
40c81f74 2190 ; ba Bashkir
8dedddd5
KH
2191 ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2192 ("bg" "Bulgarian" cp1251) ; Bulgarian
40c81f74
PE
2193 ; bh Bihari
2194 ; bi Bislama
8dedddd5 2195 ("bn" . "UTF-8") ; Bengali, Bangla
40c81f74
PE
2196 ("bo" . "Tibetan")
2197 ("br" . "Latin-1") ; Breton
d37ef0f6 2198 ("bs" . "Latin-2") ; Bosnian
8dedddd5 2199 ("byn" . "UTF-8") ; Bilin; Blin
40c81f74
PE
2200 ("ca" . "Latin-1") ; Catalan
2201 ; co Corsican
8dedddd5
KH
2202 ("cs" "Czech" iso-8859-2)
2203 ("cy" "Welsh" iso-8859-14)
40c81f74 2204 ("da" . "Latin-1") ; Danish
8dedddd5 2205 ("de" "German" iso-8859-1)
569a6374 2206 ; dv Divehi
40c81f74 2207 ; dz Bhutani
8dedddd5 2208 ("el" "Greek" iso-8859-7)
6ececc4d 2209 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
e11cf111
DL
2210 ;; That's actually what the GNU locales define, modulo things like
2211 ;; en_IN -- fx.
8dedddd5 2212 ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
eec5c8f9 2213 ("en" "English" iso-8859-1) ; English
58802cd7 2214 ("eo" . "Esperanto") ; Esperanto
8dedddd5
KH
2215 ("es" "Spanish" iso-8859-1)
2216 ("et" . "Latin-1") ; Estonian
40c81f74 2217 ("eu" . "Latin-1") ; Basque
8dedddd5 2218 ("fa" . "UTF-8") ; Persian
40c81f74 2219 ("fi" . "Latin-1") ; Finnish
8dedddd5 2220 ("fj" . "Latin-1") ; Fiji
40c81f74 2221 ("fo" . "Latin-1") ; Faroese
8dedddd5 2222 ("fr" "French" iso-8859-1) ; French
40c81f74 2223 ("fy" . "Latin-1") ; Frisian
6ececc4d 2224 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
8dedddd5
KH
2225 ("gd" . "Latin-9") ; Scots Gaelic
2226 ("gez" "Ethiopic" utf-8) ; Geez
2227 ("gl" . "Latin-1") ; Gallegan; Galician
40c81f74 2228 ; gn Guarani
8dedddd5
KH
2229 ("gu" . "UTF-8") ; Gujarati
2230 ("gv" . "Latin-1") ; Manx Gaelic
40c81f74 2231 ; ha Hausa
8dedddd5
KH
2232 ("he" "Hebrew" iso-8859-8)
2233 ("hi" "Devanagari" utf-8) ; Hindi
2234 ("hr" "Croatian" iso-8859-2) ; Croatian
40c81f74
PE
2235 ("hu" . "Latin-2") ; Hungarian
2236 ; hy Armenian
2237 ; ia Interlingua
2238 ("id" . "Latin-1") ; Indonesian
2239 ; ie Interlingue
2240 ; ik Inupiak
2241 ("is" . "Latin-1") ; Icelandic
8dedddd5 2242 ("it" "Italian" iso-8859-1) ; Italian
40c81f74 2243 ; iu Inuktitut
8dedddd5
KH
2244 ("iw" "Hebrew" iso-8859-8)
2245 ("ja" "Japanese" euc-jp)
40c81f74 2246 ; jw Javanese
8dedddd5 2247 ("ka" "Georgian" georgian-ps) ; Georgian
40c81f74 2248 ; kk Kazakh
6ececc4d 2249 ("kl" . "Latin-1") ; Greenlandic
40c81f74 2250 ; km Cambodian
8dedddd5
KH
2251 ("kn" "Kannada" utf-8)
2252 ("ko" "Korean" euc-kr)
40c81f74
PE
2253 ; ks Kashmiri
2254 ; ku Kurdish
5f395df3 2255 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
2256 ; ky Kirghiz
2257 ("la" . "Latin-1") ; Latin
5f395df3 2258 ("lb" . "Latin-1") ; Luxemburgish
8dedddd5 2259 ("lg" . "Laint-6") ; Ganda
40c81f74 2260 ; ln Lingala
8dedddd5
KH
2261 ("lo" "Lao" utf-8) ; Laothian
2262 ("lt" "Lithuanian" iso-8859-13)
9c20a8d5 2263 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 2264 ; mg Malagasy
5f395df3 2265 ("mi" . "Latin-7") ; Maori
8dedddd5
KH
2266 ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2267 ("ml" "Malayalam" utf-8)
2268 ("mn" . "UTF-8") ; Mongolian
40c81f74 2269 ; mo Moldavian
8dedddd5 2270 ("mr" "Devanagari" utf-8) ; Marathi
5f395df3 2271 ("ms" . "Latin-1") ; Malay
40c81f74
PE
2272 ("mt" . "Latin-3") ; Maltese
2273 ; my Burmese
2274 ; na Nauru
8dedddd5
KH
2275 ("nb" . "Latin-1") ; Norwegian
2276 ("ne" "Devanagari" utf-8) ; Nepali
2277 ("nl" "Dutch" iso-8859-1)
40c81f74 2278 ("no" . "Latin-1") ; Norwegian
5f395df3 2279 ("oc" . "Latin-1") ; Occitan
8dedddd5
KH
2280 ("om_ET" . "UTF-8") ; (Afan) Oromo
2281 ("om" . "Latin-1") ; (Afan) Oromo
40c81f74 2282 ; or Oriya
8dedddd5 2283 ("pa" . "UTF-8") ; Punjabi
40c81f74
PE
2284 ("pl" . "Latin-2") ; Polish
2285 ; ps Pashto, Pushto
2286 ("pt" . "Latin-1") ; Portuguese
2287 ; qu Quechua
6ececc4d 2288 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74 2289 ; rn Kirundi
8dedddd5
KH
2290 ("ro" "Romanian" iso-8859-2)
2291 ("ru_RU" "Russian" iso-8859-5)
2292 ("ru_UA" "Russian" koi8-u)
40c81f74
PE
2293 ; rw Kinyarwanda
2294 ("sa" . "Devanagari") ; Sanskrit
2295 ; sd Sindhi
8dedddd5 2296 ("se" . "UTF-8") ; Northern Sami
40c81f74
PE
2297 ; sg Sangho
2298 ("sh" . "Latin-2") ; Serbo-Croatian
2299 ; si Sinhalese
8dedddd5
KH
2300 ("sid" . "UTF-8") ; Sidamo
2301 ("sk" "Slovak" iso-8859-2)
2302 ("sl" "Slovenian" iso-8859-2)
40c81f74
PE
2303 ; sm Samoan
2304 ; sn Shona
8dedddd5
KH
2305 ("so_ET" "UTF-8") ; Somali
2306 ("so" "Latin-1") ; Somali
6ececc4d 2307 ("sq" . "Latin-1") ; Albanian
40c81f74
PE
2308 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
2309 ; ss Siswati
8dedddd5 2310 ("st" . "Latin-1") ; Sesotho
40c81f74 2311 ; su Sundanese
8dedddd5 2312 ("sv" "Swedish" iso-8859-1) ; Swedish
40c81f74 2313 ("sw" . "Latin-1") ; Swahili
8dedddd5
KH
2314 ("ta" "Tamil" utf-8)
2315 ("te" . "UTF-8") ; Telugu
2316 ("tg" "Tajik" koi8-t)
2317 ("th" "Thai" tis-620)
2318 ("ti" "Ethiopic" utf-8) ; Tigrinya
2319 ("tig_ER" . "UTF-8") ; Tigre
40c81f74 2320 ; tk Turkmen
6ececc4d 2321 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
2322 ; tn Setswana
2323 ; to Tonga
8dedddd5 2324 ("tr" "Turkish" iso-8859-9)
40c81f74 2325 ; ts Tsonga
8dedddd5 2326 ("tt" . "UTF-8") ; Tatar
40c81f74
PE
2327 ; tw Twi
2328 ; ug Uighur
8dedddd5
KH
2329 ("uk" "Ukrainian" koi8-u)
2330 ("ur" . "UTF-8") ; Urdu
2331 ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
5f395df3 2332 ("uz" . "Latin-1") ; Uzbek
8dedddd5 2333 ("vi" "Vietnamese" utf-8)
40c81f74 2334 ; vo Volapuk
d37ef0f6 2335 ("wa" . "Latin-1") ; Walloon
40c81f74 2336 ; wo Wolof
8dedddd5 2337 ("xh" . "Latin-1") ; Xhosa
9c20a8d5 2338 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2339 ; yo Yoruba
2340 ; za Zhuang
8dedddd5 2341 ("zh_HK" . "Chinese-Big5")
05b81b42 2342 ; zh_HK/BIG5-HKSCS \
8dedddd5 2343 ("zh_TW" . "Chinese-Big5")
05b81b42
KH
2344 ("zh_CN.GB2312" "Chinese-GB")
2345 ("zh_CN.GBK" "Chinese-GBK")
2346 ("zh_CN.GB18030" "Chinese-GB18030")
f21605fa 2347 ("zh_CN.UTF-8" . "Chinese-GBK")
8dedddd5 2348 ("zh_CN" . "Chinese-GB")
40c81f74 2349 ("zh" . "Chinese-GB")
8dedddd5 2350 ("zu" . "Latin-1") ; Zulu
40c81f74
PE
2351
2352 ;; ISO standard locales
2353 ("c$" . "ASCII")
2354 ("posix$" . "ASCII")
2355
40c81f74
PE
2356 ;; The "IPA" Emacs language environment does not correspond
2357 ;; to any ISO 639 code, so let it stand for itself.
2358 ("ipa$" . "IPA")
2359
2360 ;; Nonstandard or obsolete language codes
2361 ("cz" . "Czech") ; e.g. Solaris 2.6
2362 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2363 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2364 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2365 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2366 ("jp" . "Japanese") ; e.g. MS Windows
f21605fa 2367 ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
86d9e628 2368 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
569a6374
EZ
2369 ("gbz" . "UTF-8") ; MS Windows Dari Persian
2370 ("div" . "UTF-8") ; MS Windows Divehi (Maldives)
2371 ("wee" . "Latin-2") ; MS Windows Lower Sorbian
2372 ("wen" . "Latin-2") ; MS Windows Upper Sorbian
367ca50f 2373 ))
8dedddd5 2374 "Alist of locale regexps vs the corresponding languages and coding systems.
caff3c0a 2375Each element has this form:
8dedddd5
KH
2376 \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2377The first element whose LOCALE-REGEXP matches the start of a
caff3c0a 2378downcased locale specifies the LANG-ENV \(language environment)
8dedddd5
KH
2379and CODING-SYSTEM corresponding to that locale. If there is no
2380appropriate language environment, the element may have this form:
2381 \(LOCALE-REGEXP . LANG-ENV)
2382In this case, LANG-ENV is one of generic language environments for an
2383specific encoding such as \"Latin-1\" and \"UTF-8\".")
40c81f74 2384
5f395df3
SM
2385(defconst locale-charset-language-names
2386 (purecopy
2387 '((".*8859[-_]?1\\>" . "Latin-1")
2388 (".*8859[-_]?2\\>" . "Latin-2")
2389 (".*8859[-_]?3\\>" . "Latin-3")
2390 (".*8859[-_]?4\\>" . "Latin-4")
2391 (".*8859[-_]?9\\>" . "Latin-5")
2392 (".*8859[-_]?14\\>" . "Latin-8")
2393 (".*8859[-_]?15\\>" . "Latin-9")
6b61353c
KH
2394 (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
2395 ;; utf-8@euro exists, so put this last. (@euro really specifies
2396 ;; the currency, rather than the charset.)
e522f07b 2397 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2398 "List of pairs of locale regexps and charset language names.
2399The first element whose locale regexp matches the start of a downcased locale
6b61353c 2400specifies the language name whose charset corresponds to that locale.
8dedddd5 2401This language name is used if the locale is not listed in
caff3c0a 2402`locale-language-names'.")
6ececc4d 2403
5f395df3
SM
2404(defconst locale-preferred-coding-systems
2405 (purecopy
8dedddd5
KH
2406 '((".*8859[-_]?1\\>" . iso-8859-1)
2407 (".*8859[-_]?2\\>" . iso-8859-2)
2408 (".*8859[-_]?3\\>" . iso-8859-3)
2409 (".*8859[-_]?4\\>" . iso-8859-4)
2410 (".*8859[-_]?9\\>" . iso-8859-9)
2411 (".*8859[-_]?14\\>" . iso-8859-14)
2412 (".*8859[-_]?15\\>" . iso-8859-15)
2413 (".*utf\\(?:-?8\\)?" . utf-8)
2414 ;; utf-8@euro exists, so put this after utf-8. (@euro really
2415 ;; specifies the currency, rather than the charset.)
2416 (".*@euro" . iso-8859-15)
2417 ("koi8-?r" . koi8-r)
2418 ("koi8-?u" . koi8-u)
2419 ("tcvn" . tcvn)
38c05d07 2420 ("big5[-_]?hkscs" . big5-hkscs)
8dedddd5
KH
2421 ("big5" . big5)
2422 ("euc-?tw" . euc-tw)
d6be7497 2423 ("euc-?cn" . euc-cn)
38c05d07
KH
2424 ("gb2312" . gb2312)
2425 ("gbk" . gbk)
2426 ("gb18030" . gb18030)
8dedddd5 2427 ("ja.*[._]euc" . japanese-iso-8bit)
5f395df3
SM
2428 ("ja.*[._]jis7" . iso-2022-jp)
2429 ("ja.*[._]pck" . japanese-shift-jis)
2430 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2431 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
8dedddd5 2432 ))
6ececc4d
PE
2433 "List of pairs of locale regexps and preferred coding systems.
2434The first element whose locale regexp matches the start of a downcased locale
8dedddd5
KH
2435specifies the coding system to prefer when using that locale.
2436This coding system is used if the locale specifies a specific charset.")
40c81f74
PE
2437
2438(defun locale-name-match (key alist)
2439 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2440Return the value corresponding to the first regexp that matches the
2441start of KEY, or nil if there is no match."
2442 (let (element)
2443 (while (and alist (not element))
d0d8885d 2444 (if (string-match-p (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2445 (setq element (car alist)))
2446 (setq alist (cdr alist)))
2447 (cdr element)))
2448
58b78d5b 2449(defun locale-charset-match-p (charset1 charset2)
91ed0599 2450 "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
58b78d5b
DL
2451Matching is done ignoring case and any hyphens and underscores in the
2452names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
2453 (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
2454 (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
2455 (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
2456
6b61353c
KH
2457(defvar locale-charset-alist nil
2458 "Coding system alist keyed on locale-style charset name.
2459Used by `locale-charset-to-coding-system'.")
2460
2461(defun locale-charset-to-coding-system (charset)
2462 "Find coding system corresponding to CHARSET.
2463CHARSET is any sort of non-Emacs charset name, such as might be used
2464in a locale codeset, or elsewhere. It is matched to a coding system
2465first by case-insensitive lookup in `locale-charset-alist'. Then
2466matches are looked for in the coding system list, treating case and
2467the characters `-' and `_' as insignificant. The coding system base
2468is returned. Thus, for instance, if charset \"ISO8859-2\",
2469`iso-latin-2' is returned."
f15466c5 2470 (or (car (assoc-string charset locale-charset-alist t))
6b61353c
KH
2471 (let ((cs coding-system-alist)
2472 c)
2473 (while (and (not c) cs)
2474 (if (locale-charset-match-p charset (caar cs))
2475 (setq c (intern (caar cs)))
2476 (pop cs)))
2477 (if c (coding-system-base c)))))
2478
2479;; Fixme: This ought to deal with the territory part of the locale
2480;; too, for setting things such as calendar holidays, ps-print paper
2481;; size, spelling dictionary.
2482
44ee1bdf
GM
2483(defun locale-translate (locale)
2484 "Expand LOCALE according to `locale-translation-file-name', if possible.
2485For example, translate \"swedish\" into \"sv_SE.ISO8859-1\"."
2486 (if locale-translation-file-name
2487 (with-temp-buffer
2488 (set-buffer-multibyte nil)
2489 (insert-file-contents locale-translation-file-name)
2490 (if (re-search-forward
2491 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2492 (buffer-substring (point) (line-end-position))
2493 locale))
2494 locale))
2495
36ab8612 2496(defun set-locale-environment (&optional locale-name frame)
40c81f74 2497 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2498This sets the language environment, the coding system priority,
2499the default input method and sometimes other things.
2500
58b78d5b 2501LOCALE-NAME should be a string which is the name of a locale supported
6b61353c 2502by the system. Often it is of the form xx_XX.CODE, where xx is a
58b78d5b
DL
2503language, XX is a country, and CODE specifies a character set and
2504coding system. For example, the locale name \"ja_JP.EUC\" might name
2505a locale for Japanese in Japan using the `japanese-iso-8bit'
2506coding-system. The name may also have a modifier suffix, e.g. `@euro'
2507or `@cyrillic'.
40c81f74 2508
758f07de 2509If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2510variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2511
2512The locale names supported by your system can typically be found in a
0812c1e8 2513directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2514will be translated according to the table specified by
0812c1e8
DL
2515`locale-translation-file-name'.
2516
36ab8612
MB
2517If FRAME is non-nil, only set the keyboard coding system and the
2518terminal coding system for the terminal of that frame, and don't
2519touch session-global parameters like the language environment.
97c57fb2 2520
0812c1e8
DL
2521See also `locale-charset-language-names', `locale-language-names',
2522`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de 2523 (interactive "sSet environment for locale: ")
a1506d29 2524
0d7c5bb9
DL
2525 ;; Do this at runtime for the sake of binaries possibly transported
2526 ;; to a system without X.
2527 (setq locale-translation-file-name
2528 (let ((files
6bba8c70
KH
2529 '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
2530 "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
6b61353c 2531 "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
0d7c5bb9
DL
2532 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2533 ;;
2534 ;; The following name appears after the X-related names above,
2535 ;; since the X-related names are what X actually uses.
2536 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2537 )))
2538 (while (and files (not (file-exists-p (car files))))
2539 (setq files (cdr files)))
2540 (car files)))
2541
7008ccac
GM
2542 (let ((locale locale-name))
2543
2544 (unless locale
2545 ;; Use the first of these three environment variables
2546 ;; that has a nonempty value.
2547 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
db6bd804
DL
2548 (while (and vars
2549 (= 0 (length locale))) ; nil or empty string
36ab8612 2550 (setq locale (getenv (pop vars) frame)))))
7008ccac
GM
2551
2552 (when locale
44ee1bdf 2553 (setq locale (locale-translate locale))
7008ccac
GM
2554
2555 ;; Leave the system locales alone if the caller did not specify
2556 ;; an explicit locale name, as their defaults are set from
2557 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2558 ;; want to set them to the same value as LC_CTYPE.
2559 (when locale-name
2560 (setq system-messages-locale locale)
e9aaa1db
KH
2561 (setq system-time-locale locale))
2562
2563 (if (string-match "^[a-z][a-z]" locale)
2564 (setq current-iso639-language (intern (match-string 0 locale)))))
7008ccac 2565
44ee1bdf
GM
2566 (setq woman-locale
2567 (or system-messages-locale
739b7764 2568 (let ((msglocale (getenv "LC_MESSAGES" frame)))
44ee1bdf
GM
2569 (if (zerop (length msglocale))
2570 locale
2571 (locale-translate msglocale)))))
2572
2573 (when locale
7008ccac
GM
2574 (setq locale (downcase locale))
2575
2576 (let ((language-name
2577 (locale-name-match locale locale-language-names))
2578 (charset-language-name
2579 (locale-name-match locale locale-charset-language-names))
cdb1af30 2580 (default-eol-type (coding-system-eol-type
b56a5ae0 2581 (default-value 'buffer-file-coding-system)))
7008ccac 2582 (coding-system
6b61353c
KH
2583 (or (locale-name-match locale locale-preferred-coding-systems)
2584 (when locale
2585 (if (string-match "\\.\\([^@]+\\)" locale)
2586 (locale-charset-to-coding-system
9e2a2647 2587 (match-string 1 locale)))))))
7008ccac 2588
8dedddd5
KH
2589 (if (consp language-name)
2590 ;; locale-language-names specify both lang-env and coding.
2591 ;; But, what specified in locale-preferred-coding-systems
2592 ;; has higher priority.
2593 (setq coding-system (or coding-system
2594 (nth 1 language-name))
2595 language-name (car language-name))
2596 ;; Otherwise, if locale is not listed in locale-language-names,
2597 ;; use what listed in locale-charset-language-names.
2598 (if (not language-name)
2599 (setq language-name charset-language-name)))
7008ccac 2600
cdb1af30
EZ
2601 ;; If a specific EOL conversion was specified in the default
2602 ;; buffer-file-coding-system, preserve it in the coding system
2603 ;; we will be using from now on.
8bca692e
EZ
2604 (if (and (memq default-eol-type '(0 1 2 unix dos mac))
2605 coding-system
2606 (coding-system-p coding-system))
cdb1af30
EZ
2607 (setq coding-system (coding-system-change-eol-conversion
2608 coding-system default-eol-type)))
2609
7008ccac
GM
2610 (when language-name
2611
2612 ;; Set up for this character set. This is now the right way
2613 ;; to do it for both unibyte and multibyte modes.
36ab8612 2614 (unless frame
97c57fb2 2615 (set-language-environment language-name))
7008ccac 2616
597e2240 2617 ;; If the default enable-multibyte-characters is nil,
7008ccac
GM
2618 ;; we are using single-byte characters,
2619 ;; so the display table and terminal coding system are irrelevant.
597e2240 2620 (when (default-value 'enable-multibyte-characters)
ff76e074 2621 (set-display-table-and-terminal-coding-system
36ab8612 2622 language-name coding-system frame))
7008ccac 2623
166ce29f
DL
2624 ;; Set the `keyboard-coding-system' if appropriate (tty
2625 ;; only). At least X and MS Windows can generate
2626 ;; multilingual input.
68bba4e4 2627 ;; XXX This was disabled unless `window-system', but that
fffa137c 2628 ;; leads to buggy behavior when a tty frame is opened
68bba4e4
KL
2629 ;; later. Setting the keyboard coding system has no adverse
2630 ;; effect on X, so let's do it anyway. -- Lorentey
2631 (let ((kcs (or coding-system
2632 (car (get-language-info language-name
2633 'coding-system)))))
36ab8612 2634 (if kcs (set-keyboard-coding-system kcs frame)))
dc2be2fa 2635
36ab8612 2636 (unless frame
97c57fb2
KL
2637 (setq locale-coding-system
2638 (car (get-language-info language-name 'coding-priority)))))
7008ccac 2639
36ab8612 2640 (when (and (not frame)
97c57fb2 2641 coding-system
8dedddd5
KH
2642 (not (coding-system-equal coding-system
2643 locale-coding-system)))
7008ccac 2644 (prefer-coding-system coding-system)
218e7ce3
KH
2645 ;; Fixme: perhaps prefer-coding-system should set this too.
2646 ;; But it's not the time to do such a fundamental change.
2647 (setq default-sendmail-coding-system coding-system)
80e3310b 2648 (setq locale-coding-system coding-system))))
e76ef161 2649
52c7f9ee 2650 ;; On Windows, override locale-coding-system,
26245233
JR
2651 ;; default-file-name-coding-system, keyboard-coding-system,
2652 ;; terminal-coding-system with system codepage.
5eb94383 2653 (when (boundp 'w32-ansi-code-page)
893b49bb
JR
2654 (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
2655 (when (coding-system-p code-page-coding)
36ab8612
MB
2656 (unless frame (setq locale-coding-system code-page-coding))
2657 (set-keyboard-coding-system code-page-coding frame)
d4aa48db 2658 (set-terminal-coding-system code-page-coding frame)
59938af3
EZ
2659 ;; Set default-file-name-coding-system last, so that Emacs
2660 ;; doesn't try to use cpNNNN when it defines keyboard and
2661 ;; terminal encoding. That's because the above two lines
2662 ;; will want to load code-pages.el, where cpNNNN are
2663 ;; defined; if default-file-name-coding-system were set to
2664 ;; cpNNNN while these two lines run, Emacs will want to use
2665 ;; it for encoding the file name it wants to load. And that
2666 ;; will fail, since cpNNNN is not yet usable until
2667 ;; code-pages.el finishes loading.
2668 (setq default-file-name-coding-system code-page-coding))))
893b49bb 2669
a41118cc 2670 (when (eq system-type 'darwin)
ff76e074
SM
2671 ;; On Darwin, file names are always encoded in utf-8, no matter
2672 ;; the locale.
2673 (setq default-file-name-coding-system 'utf-8)
2674 ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
2675 ;; the locale.
2676 (when (and (null window-system)
36ab8612 2677 (equal (getenv "TERM_PROGRAM" frame) "Apple_Terminal"))
ff76e074
SM
2678 (set-terminal-coding-system 'utf-8)
2679 (set-keyboard-coding-system 'utf-8)))
a41118cc 2680
e76ef161 2681 ;; Default to A4 paper if we're not in a C, POSIX or US locale.
3479c806 2682 ;; (See comments in Flocale_info.)
36ab8612 2683 (unless frame
97c57fb2
KL
2684 (let ((locale locale)
2685 (paper (locale-info 'paper)))
2686 (if paper
2687 ;; This will always be null at the time of writing.
2688 (cond
2689 ((equal paper '(216 279))
2690 (setq ps-paper-type 'letter))
2691 ((equal paper '(210 297))
2692 (setq ps-paper-type 'a4)))
2693 (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2694 (while (and vars (= 0 (length locale)))
36ab8612 2695 (setq locale (getenv (pop vars) frame))))
97c57fb2
KL
2696 (when locale
2697 ;; As of glibc 2.2.5, these are the only US Letter locales,
2698 ;; and the rest are A4.
2699 (setq ps-paper-type
2700 (or (locale-name-match locale '(("c$" . letter)
2701 ("posix$" . letter)
2702 (".._us" . letter)
2703 (".._pr" . letter)
2704 (".._ca" . letter)
2705 ("enu$" . letter) ; Windows
2706 ("esu$" . letter)
2707 ("enc$" . letter)
2708 ("frc$" . letter)))
2709 'a4)))))))
e76ef161 2710 nil)
40c81f74 2711\f
16400c32
KH
2712;;; Character property
2713
2714;; Each element has the form (PROP . TABLE).
2715;; PROP is a symbol representing a character property.
2716;; TABLE is a char-table containing the property value for each character.
2717;; TABLE may be a name of file to load to build a char-table.
2718;; Don't modify this variable directly but use `define-char-code-property'.
2719
2720(defvar char-code-property-alist nil
2721 "Alist of character property name vs char-table containing property values.
2722Internal use only.")
2723
2724(put 'char-code-property-table 'char-table-extra-slots 5)
2725
2726(defun define-char-code-property (name table &optional docstring)
2727 "Define NAME as a character code property given by TABLE.
2728TABLE is a char-table of purpose `char-code-property-table' with
2729these extra slots:
2730 1st: NAME.
2731 2nd: Function to call to get a property value of a character.
caff3c0a 2732 It is called with three arguments CHAR, VAL, and TABLE, where
16400c32
KH
2733 CHAR is a character, VAL is the value of (aref TABLE CHAR).
2734 3rd: Function to call to put a property value of a character.
2735 It is called with the same arguments as above.
2736 4th: Function to call to get a description string of a property value.
2737 It is called with one argument VALUE, a property value.
2738 5th: Data used by the above functions.
2739
2740TABLE may be a name of file to load to build a char-table. The
2741file should contain a call of `define-char-code-property' with a
2742char-table of the above format as the argument TABLE.
2743
2744TABLE may also be nil, in which case no property value is pre-assigned.
2745
49275d55 2746Optional 3rd argument DOCSTRING is a documentation string of the property.
16400c32
KH
2747
2748See also the documentation of `get-char-code-property' and
2749`put-char-code-property'."
2750 (or (symbolp name)
2751 (error "Not a symbol: %s" name))
2752 (if (char-table-p table)
2753 (or (and (eq (char-table-subtype table) 'char-code-property-table)
2754 (eq (char-table-extra-slot table 0) name))
2755 (error "Invalid char-table: %s" table))
2756 (or (stringp table)
2757 (error "Not a char-table nor a file name: %s" table)))
ebfa10d3 2758 (if (stringp table) (setq table (purecopy table)))
16400c32
KH
2759 (let ((slot (assq name char-code-property-alist)))
2760 (if slot
2761 (setcdr slot table)
2762 (setq char-code-property-alist
2763 (cons (cons name table) char-code-property-alist))))
905a9ed3 2764 (put name 'char-code-property-documentation (purecopy docstring)))
4ed46869
KH
2765
2766(defvar char-code-property-table
2767 (make-char-table 'char-code-property-table)
2768 "Char-table containing a property list of each character code.
16400c32 2769This table is used for properties not listed in `char-code-property-alist'.
4ed46869 2770See also the documentation of `get-char-code-property' and
96db204a 2771`put-char-code-property'.")
4ed46869
KH
2772
2773(defun get-char-code-property (char propname)
16400c32
KH
2774 "Return the value of CHAR's PROPNAME property."
2775 (let ((slot (assq propname char-code-property-alist)))
2776 (if slot
2777 (let (table value func)
2778 (if (stringp (cdr slot))
d0d8885d 2779 (load (cdr slot) nil t))
16400c32
KH
2780 (setq table (cdr slot)
2781 value (aref table char)
2782 func (char-table-extra-slot table 1))
2783 (if (functionp func)
2784 (setq value (funcall func char value table)))
2785 value)
2786 (plist-get (aref char-code-property-table char) propname))))
4ed46869
KH
2787
2788(defun put-char-code-property (char propname value)
16400c32 2789 "Store CHAR's PROPNAME property with VALUE.
4ed46869 2790It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
16400c32
KH
2791 (let ((slot (assq propname char-code-property-alist)))
2792 (if slot
2793 (let (table func)
2794 (if (stringp (cdr slot))
d0d8885d 2795 (load (cdr slot) nil t))
16400c32
KH
2796 (setq table (cdr slot)
2797 func (char-table-extra-slot table 2))
2798 (if (functionp func)
2799 (funcall func char value table)
2800 (aset table char value)))
2801 (let* ((plist (aref char-code-property-table char))
2802 (x (plist-put plist propname value)))
2803 (or (eq x plist)
2804 (aset char-code-property-table char x))))
2805 value))
2806
2807(defun char-code-property-description (prop value)
2808 "Return a description string of character property PROP's value VALUE.
2809If there's no description string for VALUE, return nil."
2810 (let ((slot (assq prop char-code-property-alist)))
2811 (if slot
2812 (let (table func)
2813 (if (stringp (cdr slot))
d0d8885d 2814 (load (cdr slot) nil t))
16400c32
KH
2815 (setq table (cdr slot)
2816 func (char-table-extra-slot table 3))
2817 (if (functionp func)
2818 (funcall func value))))))
4ed46869 2819
a127b764
KH
2820\f
2821;; Pretty description of encoded string
2822
2823;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2f7f4bee 2824(defconst iso-2022-control-alist
a127b764
KH
2825 '((?\x1b . "ESC")
2826 (?\x0e . "SO")
2827 (?\x0f . "SI")
2828 (?\x8e . "SS2")
2829 (?\x8f . "SS3")
2830 (?\x9b . "CSI")))
2831
2832(defun encoded-string-description (str coding-system)
2833 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2834 (setq str (string-as-unibyte str))
993b2a7d 2835 (mapconcat
a6dfc99b 2836 (if (and coding-system (eq (coding-system-type coding-system) 'iso-2022))
993b2a7d
KH
2837 ;; Try to get a pretty description for ISO 2022 escape sequences.
2838 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
695effcc
JL
2839 (format "#x%02X" x))))
2840 (function (lambda (x) (format "#x%02X" x))))
993b2a7d 2841 str " "))
a127b764 2842
430c6ced 2843(defun encode-coding-char (char coding-system &optional charset)
a127b764 2844 "Encode CHAR by CODING-SYSTEM and return the resulting string.
430c6ced
KH
2845If CODING-SYSTEM can't safely encode CHAR, return nil.
2846The 3rd optional argument CHARSET, if non-nil, is a charset preferred
2847on encoding."
b529ed1e
KH
2848 (let* ((str1 (string-as-multibyte (string char)))
2849 (str2 (string-as-multibyte (string char char)))
2850 (found (find-coding-systems-string str1))
a127b764 2851 enc1 enc2 i1 i2)
b529ed1e
KH
2852 (if (and (consp found)
2853 (eq (car found) 'undecided))
2854 str1
2855 (when (memq (coding-system-base coding-system) found)
2856 ;; We must find the encoded string of CHAR. But, just encoding
2857 ;; CHAR will put extra control sequences (usually to designate
2858 ;; ASCII charset) at the tail if type of CODING is ISO 2022.
2859 ;; To exclude such tailing bytes, we at first encode one-char
2860 ;; string and two-char string, then check how many bytes at the
2861 ;; tail of both encoded strings are the same.
2862
430c6ced
KH
2863 (when charset
2864 (put-text-property 0 1 'charset charset str1)
2865 (put-text-property 0 2 'charset charset str2))
b529ed1e
KH
2866 (setq enc1 (encode-coding-string str1 coding-system)
2867 i1 (length enc1)
2868 enc2 (encode-coding-string str2 coding-system)
2869 i2 (length enc2))
2870 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2871 (setq i1 (1- i1) i2 (1- i2)))
2872
2873 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2874 ;; and they are the extra control sequences at the tail to
2875 ;; exclude.
2876 (substring enc2 0 i2)))))
a127b764 2877
d2f613a0
DL
2878;; Backwards compatibility. These might be better with :init-value t,
2879;; but that breaks loadup.
2880(define-minor-mode unify-8859-on-encoding-mode
2881 "Obsolete."
2882 :group 'mule
2883 :global t)
2884(define-minor-mode unify-8859-on-decoding-mode
2885 "Obsolete."
2886 :group 'mule
2887 :global t)
a127b764 2888
7dd42fb1
KH
2889(defvar nonascii-insert-offset 0 "This variable is obsolete.")
2890(defvar nonascii-translation-table nil "This variable is obsolete.")
2891
838d78d4
JL
2892(defvar ucs-names nil
2893 "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.")
2894
2895(defun ucs-names ()
2896 "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
2897 (or ucs-names
ae63e572
KH
2898 (let ((bmp-ranges
2899 '((#x0000 . #x33FF)
2900 ;; (#x3400 . #x4DBF) CJK Ideograph Extension A
2901 (#x4DC0 . #x4DFF)
2902 ;; (#x4E00 . #x9FFF) CJK Ideograph
2903 (#xA000 . #x0D7FF)
2904 ;; (#xD800 . #xFAFF) Surrogate/Private
2905 (#xFB00 . #xFFFD)))
2906 (upper-ranges
2907 '((#x10000 . #x134FF)
78edd3b7 2908 ;; (#x13500 . #x1CFFF) unused
ae63e572 2909 (#x1D000 . #x1FFFF)
78edd3b7 2910 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
ae63e572
KH
2911 (#xE0000 . #xE01FF)))
2912 (gc-cons-threshold 10000000)
2913 c end name names)
2914 (dolist (range bmp-ranges)
2915 (setq c (car range)
2916 end (cdr range))
2917 (while (<= c end)
2918 (if (setq name (get-char-code-property c 'name))
2919 (push (cons name c) names))
2920 (if (setq name (get-char-code-property c 'old-name))
2921 (push (cons name c) names))
2922 (setq c (1+ c))))
2923 (dolist (range upper-ranges)
2924 (setq c (car range)
2925 end (cdr range))
2926 (while (<= c end)
2927 (if (setq name (get-char-code-property c 'name))
2928 (push (cons name c) names))
2929 (setq c (1+ c))))
e0727873 2930 (setq ucs-names names))))
838d78d4
JL
2931
2932(defvar ucs-completions (lazy-completion-table ucs-completions ucs-names)
2933 "Lazy completion table for completing on Unicode character names.")
bdf66e8d 2934(put 'ucs-completions 'risky-local-variable t)
838d78d4
JL
2935
2936(defun read-char-by-name (prompt)
2937 "Read a character by its Unicode name or hex number string.
c0a67379 2938Display PROMPT and read a string that represents a character by its
a167e505
EZ
2939Unicode property `name' or `old-name'.
2940
2941This function returns the character as a number.
2942
2943You can type a few of the first letters of the Unicode name and
2944use completion. If you type a substring of the Unicode name
2945preceded by an asterisk `*' and use completion, it will show all
2946the characters whose names include that substring, not necessarily
2947at the beginning of the name.
2948
2949This function also accepts a hexadecimal number of Unicode code
2950point or a number in hash notation, e.g. #o21430 for octal,
2951#x2318 for hex, or #10r8984 for decimal."
838d78d4
JL
2952 (let* ((completion-ignore-case t)
2953 (input (completing-read prompt ucs-completions)))
c0a67379 2954 (cond
d0d8885d 2955 ((string-match-p "^[0-9a-fA-F]+$" input)
c0a67379 2956 (string-to-number input 16))
d0d8885d 2957 ((string-match-p "^#" input)
c0a67379
JL
2958 (read input))
2959 (t
d6ff002e 2960 (cdr (assoc-string input (ucs-names) t))))))
838d78d4 2961
dcbac02a
JL
2962(defun ucs-insert (character &optional count inherit)
2963 "Insert COUNT copies of CHARACTER of the given Unicode code point.
c0a67379 2964Interactively, prompts for a Unicode character name or a hex number
dcbac02a 2965using `read-char-by-name'.
a167e505
EZ
2966
2967You can type a few of the first letters of the Unicode name and
2968use completion. If you type a substring of the Unicode name
2969preceded by an asterisk `*' and use completion, it will show all
2970the characters whose names include that substring, not necessarily
2971at the beginning of the name.
2972
dcbac02a
JL
2973The optional third arg INHERIT (non-nil when called interactively),
2974says to inherit text properties from adjoining text, if those
2975properties are sticky."
2976 (interactive
2977 (list (read-char-by-name "Unicode (name or hex): ")
2978 (prefix-numeric-value current-prefix-arg)
2979 t))
2980 (unless count (setq count 1))
2981 (if (stringp character)
2982 (setq character (string-to-number character 16)))
c0a67379 2983 (cond
dcbac02a
JL
2984 ((not (integerp character))
2985 (error "Not a Unicode character code: %S" character))
2986 ((or (< character 0) (> character #x10FFFF))
2987 (error "Not a Unicode character code: 0x%X" character)))
2988 (if inherit
2989 (dotimes (i count) (insert-and-inherit character))
2990 (dotimes (i count) (insert character))))
cb9e47dd 2991
c0a67379 2992(define-key ctl-x-map "8\r" 'ucs-insert)
a127b764 2993
9ee5b744 2994;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
4ed46869 2995;;; mule-cmds.el ends here