(glyph-char): Fix for a new character code space (22-bit).
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
6b61353c 1;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: iso-2022-7bit -*-
9ee5b744 2
d4877ac1 3;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
d7a0267c 4;; 2006, 2007 Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
d7a0267c 6;; 2005, 2006, 2007
2fd125a3
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
8f924df7 9;; Copyright (C) 2003
97941b05
KH
10;; National Institute of Advanced Industrial Science and Technology (AIST)
11;; Registration Number H13PRO009
4ed46869 12
1f547b92 13;; Keywords: mule, i18n
4ed46869
KH
14
15;; This file is part of GNU Emacs.
16
17;; GNU Emacs is free software; you can redistribute it and/or modify
18;; it under the terms of the GNU General Public License as published by
19;; the Free Software Foundation; either version 2, or (at your option)
20;; any later version.
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
369314dc 28;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
29;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
30;; Boston, MA 02110-1301, USA.
4ed46869 31
60370d40
PJ
32;;; Commentary:
33
4ed46869
KH
34;;; Code:
35
2375e71a
KH
36(eval-when-compile
37 (defvar dos-codepage)
38 (autoload 'widget-value "wid-edit"))
cda74479 39
4a91d930
JB
40(defvar mac-system-coding-system)
41(defvar mac-system-locale)
42
4ed46869
KH
43;;; MULE related key bindings and menus.
44
0709d285 45(defvar mule-keymap (make-sparse-keymap)
33d17698 46 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 47
8f81f784 48;; Keep "C-x C-m ..." for mule specific commands.
0709d285 49(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 50
4ed46869 51(define-key mule-keymap "f" 'set-buffer-file-coding-system)
d80dee05 52(define-key mule-keymap "r" 'revert-buffer-with-coding-system)
2308d0d7 53(define-key mule-keymap "F" 'set-file-name-coding-system)
4ed46869 54(define-key mule-keymap "t" 'set-terminal-coding-system)
15b3e511
KH
55(define-key mule-keymap "k" 'set-keyboard-coding-system)
56(define-key mule-keymap "p" 'set-buffer-process-coding-system)
7624ebb9
KH
57(define-key mule-keymap "x" 'set-selection-coding-system)
58(define-key mule-keymap "X" 'set-next-selection-coding-system)
8b784951 59(define-key mule-keymap "\C-\\" 'set-input-method)
15b3e511 60(define-key mule-keymap "c" 'universal-coding-system-argument)
b4fba33f 61(define-key mule-keymap "l" 'set-language-environment)
4ed46869 62
538d88fb
EZ
63(defvar mule-menu-keymap
64 (make-sparse-keymap "Mule (Multilingual Environment)")
33d17698 65 "Keymap for Mule (Multilingual environment) menu specific commands.")
15b3e511 66
dcad02bc
EZ
67(defvar describe-language-environment-map
68 (make-sparse-keymap "Describe Language Environment"))
15b3e511 69
dcad02bc
EZ
70(defvar setup-language-environment-map
71 (make-sparse-keymap "Set Language Environment"))
15b3e511 72
dcad02bc
EZ
73(defvar set-coding-system-map
74 (make-sparse-keymap "Set Coding System"))
15b3e511 75
15b3e511 76(define-key-after mule-menu-keymap [set-language-environment]
ad9f6125 77 (list 'menu-item "Set Language Environment" setup-language-environment-map))
15b3e511
KH
78(define-key-after mule-menu-keymap [separator-mule]
79 '("--")
80 t)
81(define-key-after mule-menu-keymap [toggle-input-method]
538d88fb 82 '(menu-item "Toggle Input Method" toggle-input-method)
15b3e511 83 t)
8b784951 84(define-key-after mule-menu-keymap [set-input-method]
538d88fb 85 '(menu-item "Select Input Method..." set-input-method)
15b3e511 86 t)
cda74479
DL
87(define-key-after mule-menu-keymap [describe-input-method]
88 '(menu-item "Describe Input Method" describe-input-method))
15b3e511
KH
89(define-key-after mule-menu-keymap [separator-input-method]
90 '("--")
91 t)
15b3e511 92(define-key-after mule-menu-keymap [set-various-coding-system]
cda74479 93 (list 'menu-item "Set Coding Systems" set-coding-system-map
be9650bc 94 :enable 'default-enable-multibyte-characters))
538d88fb
EZ
95(define-key-after mule-menu-keymap [view-hello-file]
96 '(menu-item "Show Multi-lingual Text" view-hello-file
97 :enable (file-readable-p
98 (expand-file-name "HELLO" data-directory))
99 :help "Display file which says HELLO in many languages")
15b3e511
KH
100 t)
101(define-key-after mule-menu-keymap [separator-coding-system]
102 '("--")
103 t)
538d88fb
EZ
104(define-key-after mule-menu-keymap [describe-language-environment]
105 (list 'menu-item "Describe Language Environment"
106 describe-language-environment-map
cda74479 107 :help "Show multilingual settings for a specific language")
15b3e511 108 t)
538d88fb
EZ
109(define-key-after mule-menu-keymap [describe-input-method]
110 '(menu-item "Describe Input Method..." describe-input-method
cda74479 111 :help "Keyboard layout for a specific input method")
538d88fb
EZ
112 t)
113(define-key-after mule-menu-keymap [describe-coding-system]
114 '(menu-item "Describe Coding System..." describe-coding-system)
115 t)
cda74479
DL
116(define-key-after mule-menu-keymap [list-character-sets]
117 '(menu-item "List Character Sets" list-character-sets
118 :help "Show table of available character sets"))
538d88fb
EZ
119(define-key-after mule-menu-keymap [mule-diag]
120 '(menu-item "Show All of Mule Status" mule-diag
121 :help "Display multilingual environment settings")
15b3e511
KH
122 t)
123
3a151e98 124(define-key-after set-coding-system-map [universal-coding-system-argument]
538d88fb
EZ
125 '(menu-item "For Next Command" universal-coding-system-argument
126 :help "Coding system to be used by next command")
3a151e98 127 t)
d80dee05
RS
128(define-key-after set-coding-system-map [separator-1]
129 '("--")
130 t)
131(define-key-after set-coding-system-map [set-buffer-file-coding-system]
132 '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
133 :help "How to encode this buffer when saved")
134 t)
135(define-key-after set-coding-system-map [revert-buffer-with-coding-system]
136 '(menu-item "For Reverting This File Now" revert-buffer-with-coding-system
137 :enable buffer-file-name
138 :help "Revisit this file immediately using specified coding system")
139 t)
2308d0d7
KH
140(define-key-after set-coding-system-map [set-file-name-coding-system]
141 '(menu-item "For File Name" set-file-name-coding-system
142 :help "How to decode/encode file names")
143 t)
d80dee05
RS
144(define-key-after set-coding-system-map [separator-2]
145 '("--")
15b3e511 146 t)
d80dee05 147
15b3e511 148(define-key-after set-coding-system-map [set-keyboard-coding-system]
538d88fb
EZ
149 '(menu-item "For Keyboard" set-keyboard-coding-system
150 :help "How to decode keyboard input")
15b3e511 151 t)
d80dee05
RS
152(define-key-after set-coding-system-map [set-terminal-coding-system]
153 '(menu-item "For Terminal" set-terminal-coding-system
154 :enable (null (memq window-system '(x w32 mac)))
155 :help "How to encode terminal output")
156 t)
157(define-key-after set-coding-system-map [separator-3]
158 '("--")
15b3e511 159 t)
7624ebb9 160(define-key-after set-coding-system-map [set-selection-coding-system]
538d88fb
EZ
161 '(menu-item "For X Selections/Clipboard" set-selection-coding-system
162 :visible (display-selections-p)
163 :help "How to en/decode data to/from selection/clipboard")
7624ebb9
KH
164 t)
165(define-key-after set-coding-system-map [set-next-selection-coding-system]
538d88fb
EZ
166 '(menu-item "For Next X Selection" set-next-selection-coding-system
167 :visible (display-selections-p)
168 :help "How to en/decode next selection/clipboard operation")
7624ebb9 169 t)
d80dee05
RS
170(define-key-after set-coding-system-map [set-buffer-process-coding-system]
171 '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
172 :visible (fboundp 'start-process)
173 :enable (get-buffer-process (current-buffer))
174 :help "How to en/decode I/O from/to subprocess connected to this buffer")
175 t)
176
177
15b3e511 178(define-key setup-language-environment-map
538d88fb 179 [Default] '(menu-item "Default" setup-specified-language-environment))
4ed46869 180
cda74479
DL
181(define-key describe-language-environment-map
182 [Default] '(menu-item "Default" describe-specified-language-support))
183
4ed46869
KH
184;; This should be a single character key binding because users use it
185;; very frequently while editing multilingual text. Now we can use
186;; only two such keys: "\C-\\" and "\C-^", but the latter is not
187;; convenient because it requires shifting on most keyboards. An
188;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
189;; but it won't be used that frequently.
190(define-key global-map "\C-\\" 'toggle-input-method)
191
5f395df3
SM
192;; This is no good because people often type Shift-SPC
193;; meaning to type SPC. -- rms.
194;; ;; Here's an alternative key binding for X users (Shift-SPACE).
195;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 196
464cc130
KH
197;;; Mule related hyperlinks.
198(defconst help-xref-mule-regexp-template
199 (purecopy (concat "\\(\\<\\("
200 "\\(coding system\\)\\|"
d0c40faa
KH
201 "\\(input method\\)\\|"
202 "\\(character set\\)\\|"
203 "\\(charset\\)"
464cc130
KH
204 "\\)\\s-+\\)?"
205 ;; Note starting with word-syntax character:
206 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
207
26d87040
EZ
208(defun coding-system-change-eol-conversion (coding-system eol-type)
209 "Return a coding system which differs from CODING-SYSTEM in eol conversion.
210The returned coding system converts end-of-line by EOL-TYPE
211but text as the same way as CODING-SYSTEM.
212EOL-TYPE should be `unix', `dos', `mac', or nil.
213If EOL-TYPE is nil, the returned coding system detects
214how end-of-line is formatted automatically while decoding.
215
216EOL-TYPE can be specified by an integer 0, 1, or 2.
217They means `unix', `dos', and `mac' respectively."
218 (if (symbolp eol-type)
219 (setq eol-type (cond ((eq eol-type 'unix) 0)
220 ((eq eol-type 'dos) 1)
221 ((eq eol-type 'mac) 2)
222 (t eol-type))))
1d5b0c66
CY
223 ;; We call `coding-system-base' before `coding-system-eol-type',
224 ;; because the coding-system may not be initialized until then.
225 (let* ((base (coding-system-base coding-system))
226 (orig-eol-type (coding-system-eol-type coding-system)))
227 (cond ((vectorp orig-eol-type)
228 (if (not eol-type)
229 coding-system
230 (aref orig-eol-type eol-type)))
231 ((not eol-type)
232 base)
233 ((= eol-type orig-eol-type)
234 coding-system)
235 ((progn (setq orig-eol-type (coding-system-eol-type base))
236 (vectorp orig-eol-type))
237 (aref orig-eol-type eol-type)))))
26d87040
EZ
238
239(defun coding-system-change-text-conversion (coding-system coding)
240 "Return a coding system which differs from CODING-SYSTEM in text conversion.
241The returned coding system converts text by CODING
242but end-of-line as the same way as CODING-SYSTEM.
243If CODING is nil, the returned coding system detects
244how text is formatted automatically while decoding."
1d77e15a
JR
245 (let ((eol-type (coding-system-eol-type coding-system)))
246 (coding-system-change-eol-conversion
247 (if coding coding 'undecided)
248 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 249
4ed46869 250(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1
RS
251 "Change whether this buffer uses multibyte characters.
252With arg, use multibyte characters if the arg is positive.
253
254Note that this command does not convert the byte contents of
255the buffer; it only changes the way those bytes are interpreted.
256In general, therefore, this command *changes* the sequence of
257characters that the current buffer contains.
258
259We suggest you avoid using use this command unless you know what you
260are doing. If you use it by mistake, and the buffer is now displayed
261wrong, use this command again to toggle back to the right mode."
4ed46869 262 (interactive "P")
b7079457
RS
263 (let ((new-flag
264 (if (null arg) (null enable-multibyte-characters)
265 (> (prefix-numeric-value arg) 0))))
266 (set-buffer-multibyte new-flag))
4ed46869
KH
267 (force-mode-line-update))
268
269(defun view-hello-file ()
270 "Display the HELLO file which list up many languages and characters."
271 (interactive)
8f81f784
KH
272 ;; We have to decode the file in any environment.
273 (let ((default-enable-multibyte-characters t)
95fa03b4 274 (coding-system-for-read 'iso-2022-7bit))
544dd975 275 (view-file (expand-file-name "HELLO" data-directory))))
4ed46869 276
9e9a77a6 277(defun universal-coding-system-argument (coding-system)
15b3e511 278 "Execute an I/O command using the specified coding system."
9e9a77a6
RS
279 (interactive
280 (let ((default (and buffer-file-coding-system
34104362 281 (not (eq (coding-system-type buffer-file-coding-system)
a0d96cad 282 'undecided))
9e9a77a6
RS
283 buffer-file-coding-system)))
284 (list (read-coding-system
285 (if default
5b76833f 286 (format "Coding system for following command (default %s): " default)
9e9a77a6
RS
287 "Coding system for following command: ")
288 default))))
289 (let* ((keyseq (read-key-sequence
e14a8f4c 290 (format "Command to execute with %s:" coding-system)))
04363179
GM
291 (cmd (key-binding keyseq))
292 prefix)
293
294 (when (eq cmd 'universal-argument)
295 (call-interactively cmd)
a1506d29 296
04363179
GM
297 ;; Process keys bound in `universal-argument-map'.
298 (while (progn
299 (setq keyseq (read-key-sequence nil t)
300 cmd (key-binding keyseq t))
301 (not (eq cmd 'universal-argument-other-key)))
302 (let ((current-prefix-arg prefix-arg)
d37ef0f6 303 ;; Have to bind `last-command-char' here so that
6b61353c 304 ;; `digit-argument', for instance, can compute the
04363179
GM
305 ;; prefix arg.
306 (last-command-char (aref keyseq 0)))
307 (call-interactively cmd)))
308
1f547b92 309 ;; This is the final call to `universal-argument-other-key', which
04363179
GM
310 ;; set's the final `prefix-arg.
311 (let ((current-prefix-arg prefix-arg))
312 (call-interactively cmd))
a1506d29 313
04363179
GM
314 ;; Read the command to execute with the given prefix arg.
315 (setq prefix prefix-arg
316 keyseq (read-key-sequence nil t)
317 cmd (key-binding keyseq)))
318
15b3e511 319 (let ((coding-system-for-read coding-system)
04363179 320 (coding-system-for-write coding-system)
0e9ec609 321 (coding-system-require-warning t)
04363179 322 (current-prefix-arg prefix))
15b3e511
KH
323 (message "")
324 (call-interactively cmd))))
325
de94d711 326(defun set-default-coding-systems (coding-system)
0c3154d2 327 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 328This sets the following coding systems:
0c3154d2 329 o coding system of a newly created buffer
8efc03e1
KH
330 o default coding system for subprocess I/O
331This also sets the following values:
d3e4babd
MB
332 o default value used as `file-name-coding-system' for converting file names
333 if CODING-SYSTEM is ASCII-compatible.
03c35c83 334 o default value for the command `set-terminal-coding-system' (not on MSDOS)
d3e4babd
MB
335 o default value for the command `set-keyboard-coding-system'
336 if CODING-SYSTEM is ASCII-compatible.."
de94d711
KH
337 (check-coding-system coding-system)
338 (setq-default buffer-file-coding-system coding-system)
6b61353c
KH
339 (if (fboundp 'ucs-set-table-for-input)
340 (dolist (buffer (buffer-list))
341 (or (local-variable-p 'buffer-file-coding-system buffer)
342 (ucs-set-table-for-input buffer))))
343
db9aec47 344 (if (eq system-type 'darwin)
a41118cc 345 ;; The file-name coding system on Darwin systems is always utf-8.
db9aec47
KH
346 (setq default-file-name-coding-system 'utf-8)
347 (if (and default-enable-multibyte-characters
348 (or (not coding-system)
349 (coding-system-get coding-system 'ascii-compatible-p)))
350 (setq default-file-name-coding-system coding-system)))
03c35c83
EZ
351 ;; If coding-system is nil, honor that on MS-DOS as well, so
352 ;; that they could reset the terminal coding system.
353 (unless (and (eq window-system 'pc) coding-system)
354 (setq default-terminal-coding-system coding-system))
de94d711 355 (setq default-keyboard-coding-system coding-system)
1d77e15a
JR
356 ;; Preserve eol-type from existing default-process-coding-systems.
357 ;; On non-unix-like systems in particular, these may have been set
358 ;; carefully by the user, or by the startup code, to deal with the
359 ;; users shell appropriately, so should not be altered by changing
360 ;; language environment.
361 (let ((output-coding
362 (coding-system-change-text-conversion
363 (car default-process-coding-system) coding-system))
364 (input-coding
365 (coding-system-change-text-conversion
366 (cdr default-process-coding-system) coding-system)))
367 (setq default-process-coding-system
368 (cons output-coding input-coding))))
de94d711 369
0c3154d2
KH
370(defun prefer-coding-system (coding-system)
371 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 372This also sets the following coding systems:
0c3154d2 373 o coding system of a newly created buffer
8efc03e1
KH
374 o default coding system for subprocess I/O
375This also sets the following values:
c654de1d 376 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
377 o default value for the command `set-terminal-coding-system' (not on MSDOS)
378 o default value for the command `set-keyboard-coding-system'
379
bd3ac67e
EZ
380If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
381systems set by this function will use that type of EOL conversion.
382
03c35c83
EZ
383This command does not change the default value of terminal coding system
384for MS-DOS terminal, because DOS terminals only support a single coding
385system, and Emacs automatically sets the default to that coding system at
5beaa0d1
KH
386startup.
387
c654de1d 388A coding system that requires automatic detection of text
8f924df7 389+encoding (e.g. undecided, unix) can't be preferred.."
0c3154d2
KH
390 (interactive "zPrefer coding system: ")
391 (if (not (and coding-system (coding-system-p coding-system)))
392 (error "Invalid coding system `%s'" coding-system))
97941b05
KH
393 (if (memq (coding-system-type coding-system) '(raw-text undecided))
394 (error "Can't prefer the coding system `%s'" coding-system))
395 (let ((base (coding-system-base coding-system))
bd3ac67e 396 (eol-type (coding-system-eol-type coding-system)))
97941b05
KH
397 (set-coding-system-priority base)
398 (and (interactive-p)
399 (or (eq base coding-system)
400 (message "Highest priority is set to %s (base of %s)"
401 base coding-system)))
bd3ac67e 402 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 403 (if (memq eol-type '(0 1 2))
97941b05
KH
404 (setq base
405 (coding-system-change-eol-conversion base eol-type)))
406 (set-default-coding-systems base)))
0c3154d2 407
b5edd1d1
KH
408(defvar sort-coding-systems-predicate nil
409 "If non-nil, a predicate function to sort coding systems.
410
411It is called with two coding systems, and should return t if the first
412one is \"less\" than the second.
413
414The function `sort-coding-systems' use it.")
415
416(defun sort-coding-systems (codings)
417 "Sort coding system list CODINGS by a priority of each coding system.
9b99ec0e 418Returns the sorted list. CODINGS is modified by side effects.
b5edd1d1
KH
419
420If a coding system is most preferred, it has the highest priority.
421Otherwise, a coding system corresponds to some MIME charset has higher
422priorities. Among them, a coding system included in `coding-system'
423key of the current language environment has higher priorities. See
424also the documentation of `language-info-alist'.
425
426If the variable `sort-coding-systems-predicate' (which see) is
427non-nil, it is used to sort CODINGS in the different way than above."
428 (if sort-coding-systems-predicate
429 (sort codings sort-coding-systems-predicate)
8f924df7
KH
430 (let* ((from-priority (coding-system-priority-list))
431 (most-preferred (car from-priority))
b5edd1d1
KH
432 (lang-preferred (get-language-info current-language-environment
433 'coding-system))
434 (func (function
435 (lambda (x)
436 (let ((base (coding-system-base x)))
da1ebad1
KH
437 ;; We calculate the priority number 0..255 by
438 ;; using the 8 bits PMMLCEII as this:
439 ;; P: 1 iff most preferred.
440 ;; MM: greater than 0 iff mime-charset.
441 ;; L: 1 iff one of the current lang. env.'s codings.
442 ;; C: 1 iff one of codings listed in the category list.
443 ;; E: 1 iff not XXX-with-esc
444 ;; II: if iso-2022 based, 0..3, else 1.
445 (logior
446 (lsh (if (eq base most-preferred) 1 0) 7)
447 (lsh
8f924df7 448 (let ((mime (coding-system-get base :mime-charset)))
b439e72a
DL
449 ;; Prefer coding systems corresponding to a
450 ;; MIME charset.
b5edd1d1 451 (if mime
b439e72a
DL
452 ;; Lower utf-16 priority so that we
453 ;; normally prefer utf-8 to it, and put
454 ;; x-ctext below that.
de814dd9
KH
455 (cond ((string-match "utf-16"
456 (symbol-name mime))
da1ebad1 457 2)
b439e72a 458 ((string-match "^x-" (symbol-name mime))
da1ebad1
KH
459 1)
460 (t 3))
b5edd1d1 461 0))
da1ebad1
KH
462 5)
463 (lsh (if (memq base lang-preferred) 1 0) 4)
8f924df7 464 (lsh (if (memq base from-priority) 1 0) 3)
da1ebad1
KH
465 (lsh (if (string-match "-with-esc\\'"
466 (symbol-name base))
467 0 1) 2)
8f924df7
KH
468 (if (eq (coding-system-type base) 'iso-2022)
469 (let ((category (coding-system-category base)))
470 ;; For ISO based coding systems, prefer
471 ;; one that doesn't use designation nor
472 ;; locking/single shifting.
a0d96cad
KH
473 (cond
474 ((or (eq category 'coding-category-iso-8-1)
475 (eq category 'coding-category-iso-8-2))
476 2)
477 ((or (eq category 'coding-category-iso-7-tight)
478 (eq category 'coding-category-iso-7))
479 1)
480 (t
481 0)))
482 1)
a6dfc99b 483 ))))))
b5edd1d1
KH
484 (sort codings (function (lambda (x y)
485 (> (funcall func x) (funcall func y))))))))
54b226f7 486
3fc7dfe5 487(defun find-coding-systems-region (from to)
54b226f7 488 "Return a list of proper coding systems to encode a text between FROM and TO.
8f924df7 489
d37ef0f6 490If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
491All coding systems in the list can safely encode any multibyte characters
492in the text.
493
e8dd0160 494If the text contains no multibyte characters, return a list of a single
3fc7dfe5 495element `undecided'."
b5edd1d1
KH
496 (let ((codings (find-coding-systems-region-internal from to)))
497 (if (eq codings t)
498 ;; The text contains only ASCII characters. Any coding
499 ;; systems are safe.
500 '(undecided)
501 ;; We need copy-sequence because sorting will alter the argument.
502 (sort-coding-systems (copy-sequence codings)))))
54b226f7 503
3fc7dfe5
KH
504(defun find-coding-systems-string (string)
505 "Return a list of proper coding systems to encode STRING.
506All coding systems in the list can safely encode any multibyte characters
507in STRING.
508
e8dd0160 509If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 510element `undecided'."
b5edd1d1 511 (find-coding-systems-region string nil))
3fc7dfe5
KH
512
513(defun find-coding-systems-for-charsets (charsets)
514 "Return a list of proper coding systems to encode characters of CHARSETS.
6053d86a 515CHARSETS is a list of character sets.
1f547b92
DL
516
517This only finds coding systems of type `charset', whose
518`:charset-list' property includes all of CHARSETS (plus `ascii' for
519ascii-compatible coding systems). It was used in older versions of
520Emacs, but is unlikely to be what you really want now."
521 ;; Deal with aliases.
522 (setq charsets (mapcar (lambda (c)
523 (get-charset-property c :name))
524 charsets))
b5edd1d1
KH
525 (cond ((or (null charsets)
526 (and (= (length charsets) 1)
527 (eq 'ascii (car charsets))))
528 '(undecided))
529 ((or (memq 'eight-bit-control charsets)
530 (memq 'eight-bit-graphic charsets))
1f547b92 531 '(raw-text utf-8-emacs))
b5edd1d1 532 (t
1f547b92
DL
533 (let (codings)
534 (dolist (cs (coding-system-list t))
c7b4b466
DL
535 (let ((cs-charsets (and (eq (coding-system-type cs) 'charset)
536 (coding-system-charset-list cs)))
1f547b92
DL
537 (charsets charsets))
538 (if (coding-system-get cs :ascii-compatible-p)
539 (add-to-list 'cs-charsets 'ascii))
540 (if (catch 'ok
541 (when cs-charsets
542 (while charsets
543 (unless (memq (pop charsets) cs-charsets)
544 (throw 'ok nil)))
545 t))
546 (push cs codings))))
547 (nreverse codings)))))
54b226f7 548
51ed58ea
KH
549(defun find-multibyte-characters (from to &optional maxcount excludes)
550 "Find multibyte characters in the region specified by FROM and TO.
551If FROM is a string, find multibyte characters in the string.
552The return value is an alist of the following format:
553 ((CHARSET COUNT CHAR ...) ...)
554where
555 CHARSET is a character set,
556 COUNT is a number of characters,
1f547b92 557 CHARs are the characters found from the character set.
51ed58ea 558Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
15f2c48a 559Optional 4th arg EXCLUDE is a list of character sets to be ignored."
51ed58ea
KH
560 (let ((chars nil)
561 charset char)
562 (if (stringp from)
a0d96cad
KH
563 (if (multibyte-string-p from)
564 (let ((idx 0))
565 (while (setq idx (string-match "[^\000-\177]" from idx))
566 (setq char (aref from idx)
567 charset (char-charset char))
568 (unless (memq charset excludes)
569 (let ((slot (assq charset chars)))
570 (if slot
571 (if (not (memq char (nthcdr 2 slot)))
572 (let ((count (nth 1 slot)))
573 (setcar (cdr slot) (1+ count))
574 (if (or (not maxcount) (< count maxcount))
575 (nconc slot (list char)))))
576 (setq chars (cons (list charset 1 char) chars)))))
577 (setq idx (1+ idx)))))
578 (if enable-multibyte-characters
579 (save-excursion
580 (goto-char from)
581 (while (re-search-forward "[^\000-\177]" to t)
582 (setq char (preceding-char)
583 charset (char-charset char))
584 (unless (memq charset excludes)
51ed58ea
KH
585 (let ((slot (assq charset chars)))
586 (if slot
a0d96cad 587 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
588 (let ((count (nth 1 slot)))
589 (setcar (cdr slot) (1+ count))
590 (if (or (not maxcount) (< count maxcount))
591 (nconc slot (list char)))))
a0d96cad 592 (setq chars (cons (list charset 1 char) chars)))))))))
51ed58ea
KH
593 (nreverse chars)))
594
738746ba
KH
595(defun search-unencodable-char (coding-system)
596 "Search forward from point for a character that is not encodable.
597It asks which coding system to check.
598If such a character is found, set point after that character.
599Otherwise, don't move point.
600
601When called from a program, the value is a position of the found character,
602or nil if all characters are encodable."
603 (interactive
604 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
605 (read-coding-system
5b76833f 606 (format "Coding-system (default %s): " default)
738746ba
KH
607 default))))
608 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
609 (if pos
610 (goto-char (1+ pos))
611 (message "All following characters are encodable by %s" coding-system))
612 pos))
613
c83c4f60
RS
614(defvar last-coding-system-specified nil
615 "Most recent coding system explicitly specified by the user when asked.
616This variable is set whenever Emacs asks the user which coding system
617to use in order to write a file. If you set it to nil explicitly,
618then call `write-region', then afterward this variable will be non-nil
619only if the user was explicitly asked and specified a coding system.")
620
b5edd1d1 621(defvar select-safe-coding-system-accept-default-p nil
fea6b736 622 "If non-nil, a function to control the behavior of coding system selection.
b5edd1d1
KH
623The meaning is the same as the argument ACCEPT-DEFAULT-P of the
624function `select-safe-coding-system' (which see). This variable
625overrides that argument.")
626
9ee5b744
SM
627(defun select-safe-coding-system-interactively (from to codings unsafe
628 &optional rejected default)
629 "Select interactively a coding system for the region FROM ... TO.
630FROM can be a string, as in `write-region'.
631CODINGS is the list of base coding systems known to be safe for this region,
632 typically obtained with `find-coding-systems-region'.
633UNSAFE is a list of coding systems known to be unsafe for this region.
634REJECTED is a list of coding systems which were safe but for some reason
635 were not recommended in the particular context.
636DEFAULT is the coding system to use by default in the query."
637 ;; At first, if some defaults are unsafe, record at most 11
638 ;; problematic characters and their positions for them by turning
639 ;; (CODING ...)
640 ;; into
641 ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
642 (if unsafe
643 (setq unsafe
644 (mapcar #'(lambda (coding)
645 (cons coding
646 (if (stringp from)
647 (mapcar #'(lambda (pos)
648 (cons pos (aref from pos)))
649 (unencodable-char-position
650 0 (length from) coding
651 11 from))
652 (mapcar #'(lambda (pos)
653 (cons pos (char-after pos)))
654 (unencodable-char-position
655 from to coding 11)))))
656 unsafe)))
657
658 ;; Change each safe coding system to the corresponding
659 ;; mime-charset name if it is also a coding system. Such a name
660 ;; is more friendly to users.
661 (let ((l codings)
662 mime-charset)
663 (while l
a77c22c2
KH
664 (setq mime-charset (coding-system-get (car l) :mime-charset))
665 (if (and mime-charset (coding-system-p mime-charset)
666 (coding-system-equal (car l) mime-charset))
9ee5b744
SM
667 (setcar l mime-charset))
668 (setq l (cdr l))))
669
670 ;; Don't offer variations with locking shift, which you
671 ;; basically never want.
672 (let (l)
673 (dolist (elt codings (setq codings (nreverse l)))
674 (unless (or (eq 'coding-category-iso-7-else
675 (coding-system-category elt))
676 (eq 'coding-category-iso-8-else
677 (coding-system-category elt)))
678 (push elt l))))
679
680 ;; Remove raw-text, emacs-mule and no-conversion unless nothing
681 ;; else is available.
682 (setq codings
683 (or (delq 'raw-text
684 (delq 'emacs-mule
685 (delq 'no-conversion codings)))
686 '(raw-text emacs-mule no-conversion)))
687
688 (let ((window-configuration (current-window-configuration))
689 (bufname (buffer-name))
690 coding-system)
691 (save-excursion
692 ;; If some defaults are unsafe, make sure the offending
693 ;; buffer is displayed.
694 (when (and unsafe (not (stringp from)))
695 (pop-to-buffer bufname)
696 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
697 unsafe))))
698 ;; Then ask users to select one from CODINGS while showing
699 ;; the reason why none of the defaults are not used.
700 (with-output-to-temp-buffer "*Warning*"
701 (with-current-buffer standard-output
702 (if (and (null rejected) (null unsafe))
703 (insert "No default coding systems to try for "
704 (if (stringp from)
705 (format "string \"%s\"." from)
706 (format "buffer `%s'." bufname)))
707 (insert
708 "These default coding systems were tried to encode"
709 (if (stringp from)
710 (concat " \"" (if (> (length from) 10)
711 (concat (substring from 0 10) "...\"")
712 (concat from "\"")))
713 (format " text\nin the buffer `%s'" bufname))
714 ":\n")
715 (let ((pos (point))
716 (fill-prefix " "))
717 (dolist (x (append rejected unsafe))
718 (princ " ") (princ (car x)))
719 (insert "\n")
720 (fill-region-as-paragraph pos (point)))
721 (when rejected
3fc0b26e
EZ
722 (insert "These safely encode the text in the buffer,
723but are not recommended for encoding text in this context,
9ee5b744
SM
724e.g., for sending an email message.\n ")
725 (dolist (x rejected)
726 (princ " ") (princ x))
727 (insert "\n"))
728 (when unsafe
3fc0b26e 729 (insert (if rejected "The other coding systems"
9ee5b744 730 "However, each of them")
3fc0b26e 731 " encountered characters it couldn't encode:\n")
9ee5b744 732 (dolist (coding unsafe)
3fc0b26e 733 (insert (format " %s cannot encode these:" (car coding)))
9ee5b744
SM
734 (let ((i 0)
735 (func1
736 #'(lambda (bufname pos)
737 (when (buffer-live-p (get-buffer bufname))
738 (pop-to-buffer bufname)
739 (goto-char pos))))
740 (func2
741 #'(lambda (bufname pos coding)
742 (when (buffer-live-p (get-buffer bufname))
743 (pop-to-buffer bufname)
744 (if (< (point) pos)
745 (goto-char pos)
746 (forward-char 1)
747 (search-unencodable-char coding)
748 (forward-char -1))))))
749 (dolist (elt (cdr coding))
750 (insert " ")
751 (if (stringp from)
752 (insert (if (< i 10) (cdr elt) "..."))
753 (if (< i 10)
754 (insert-text-button
755 (cdr elt)
756 :type 'help-xref
4e21e5aa 757 'face 'link
9ee5b744
SM
758 'help-echo
759 "mouse-2, RET: jump to this character"
760 'help-function func1
761 'help-args (list bufname (car elt)))
762 (insert-text-button
763 "..."
764 :type 'help-xref
4e21e5aa 765 'face 'link
9ee5b744
SM
766 'help-echo
767 "mouse-2, RET: next unencodable character"
768 'help-function func2
769 'help-args (list bufname (car elt)
770 (car coding)))))
771 (setq i (1+ i))))
772 (insert "\n"))
333f3572 773 (insert (substitute-command-keys "\
7d03c5b1 774
333f3572
KH
775Click on a character (or switch to this window by `\\[other-window]'\n\
776and select the characters by RET) to jump to the place it appears,\n\
3fc0b26e 777where `\\[universal-argument] \\[what-cursor-position]' will give information about it.\n"))))
7d03c5b1 778 (insert (substitute-command-keys "\nSelect \
3fc0b26e
EZ
779one of the safe coding systems listed below,\n\
780or cancel the writing with \\[keyboard-quit] and edit the buffer\n\
781 to remove or modify the problematic characters,\n\
782or specify any other coding system (and risk losing\n\
783 the problematic characters).\n\n"))
9ee5b744
SM
784 (let ((pos (point))
785 (fill-prefix " "))
786 (dolist (x codings)
787 (princ " ") (princ x))
788 (insert "\n")
7d03c5b1 789 (fill-region-as-paragraph pos (point)))))
9ee5b744
SM
790
791 ;; Read a coding system.
792 (setq coding-system
793 (read-coding-system
794 (format "Select coding system (default %s): " default)
795 default))
796 (setq last-coding-system-specified coding-system))
797
798 (kill-buffer "*Warning*")
799 (set-window-configuration window-configuration)
800 coding-system))
801
b5edd1d1 802(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 803 accept-default-p file)
d5266ddf
KH
804 "Ask a user to select a safe coding system from candidates.
805The candidates of coding systems which can safely encode a text
b5edd1d1
KH
806between FROM and TO are shown in a popup window. Among them, the most
807proper one is suggested as the default.
808
a985cd2f
KH
809The list of `buffer-file-coding-system' of the current buffer,
810the `default-buffer-file-coding-system', and the
b5edd1d1
KH
811most preferred coding system (if it corresponds to a MIME charset) is
812treated as the default coding system list. Among them, the first one
d37ef0f6
DL
813that safely encodes the text is normally selected silently and
814returned without any user interaction. See also the command
815`prefer-coding-system'.
816
817However, the user is queried if the chosen coding system is
29d04c4f 818inconsistent with what would be selected by `find-auto-coding' from
d37ef0f6
DL
819coding cookies &c. if the contents of the region were read from a
820file. (That could lead to data corruption in a file subsequently
821re-visited and edited.)
54b226f7 822
b5edd1d1
KH
823Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
824list of coding systems to be prepended to the default coding system
0e9ec609 825list. However, if DEFAULT-CODING-SYSTEM is a list and the first
ccb06340 826element is t, the cdr part is used as the default coding system list,
a985cd2f
KH
827i.e. `buffer-file-coding-system', `default-buffer-file-coding-system',
828and the most preferred coding system are not used.
54b226f7 829
b5edd1d1
KH
830Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
831determine the acceptability of the silently selected coding system.
832It is called with that coding system, and should return nil if it
833should not be silently selected and thus user interaction is required.
834
efa2c6d7
RS
835Optional 5th arg FILE is the file name to use for this purpose.
836That is different from `buffer-file-name' when handling `write-region'
837\(for example).
838
b5edd1d1
KH
839The variable `select-safe-coding-system-accept-default-p', if
840non-nil, overrides ACCEPT-DEFAULT-P.
54b226f7
KH
841
842Kludgy feature: if FROM is a string, the string is the target text,
843and TO is ignored."
b5edd1d1
KH
844 (if (and default-coding-system
845 (not (listp default-coding-system)))
846 (setq default-coding-system (list default-coding-system)))
847
29d04c4f
KH
848 (let ((no-other-defaults nil)
849 auto-cs)
d35deeec 850 (unless (or (stringp from) find-file-literally)
29d04c4f
KH
851 ;; Find an auto-coding that is specified for the the current
852 ;; buffer and file from the region FROM and TO.
853 (save-excursion
854 (save-restriction
855 (widen)
856 (goto-char from)
857 (setq auto-cs (find-auto-coding (or file buffer-file-name "")
858 (- to from)))
859 (if auto-cs
860 (if (coding-system-p (car auto-cs))
861 (setq auto-cs (car auto-cs))
862 (display-warning
863 :warning
864 (format "\
865Invalid coding system `%s' is specified
866for the current buffer/file by the %s.
867It is highly recommended to fix it before writing to a file."
868 (car auto-cs)
869 (if (eq (cdr auto-cs) :coding) ":coding tag"
870 (format "variable `%s'" (cdr auto-cs)))))
871 (or (yes-or-no-p "Really proceed with writing? ")
872 (error "Save aborted"))
873 (setq auto-cs nil))))))
874
0e9ec609
KH
875 (if (eq (car default-coding-system) t)
876 (setq no-other-defaults t
877 default-coding-system (cdr default-coding-system)))
878
879 ;; Change elements of the list to (coding . base-coding).
880 (setq default-coding-system
881 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
882 default-coding-system))
883
29d04c4f
KH
884 (if (and auto-cs (not no-other-defaults))
885 ;; If the file has a coding cookie, try to use it before anything
886 ;; else (i.e. before default-coding-system which will typically come
887 ;; from file-coding-system-alist).
888 (let ((base (coding-system-base auto-cs)))
889 (or (memq base '(nil undecided))
890 (rassq base default-coding-system)
891 (push (cons auto-cs base) default-coding-system))))
892
0e9ec609
KH
893 (unless no-other-defaults
894 ;; If buffer-file-coding-system is not nil nor undecided, append it
895 ;; to the defaults.
896 (if buffer-file-coding-system
897 (let ((base (coding-system-base buffer-file-coding-system)))
898 (or (eq base 'undecided)
0e9ec609 899 (rassq base default-coding-system)
ccb06340
KH
900 (setq default-coding-system
901 (append default-coding-system
902 (list (cons buffer-file-coding-system base)))))))
a985cd2f
KH
903
904 ;; If default-buffer-file-coding-system is not nil nor undecided,
905 ;; append it to the defaults.
906 (if default-buffer-file-coding-system
907 (let ((base (coding-system-base default-buffer-file-coding-system)))
908 (or (eq base 'undecided)
909 (rassq base default-coding-system)
ccb06340
KH
910 (setq default-coding-system
911 (append default-coding-system
d35deeec 912 (list (cons default-buffer-file-coding-system
ccb06340 913 base)))))))
0e9ec609
KH
914
915 ;; If the most preferred coding system has the property mime-charset,
916 ;; append it to the defaults.
8f924df7
KH
917 (let ((preferred (coding-system-priority-list t))
918 base)
0e9ec609
KH
919 (and (coding-system-p preferred)
920 (setq base (coding-system-base preferred))
8f924df7 921 (coding-system-get preferred :mime-charset)
0e9ec609 922 (not (rassq base default-coding-system))
ccb06340
KH
923 (setq default-coding-system
924 (append default-coding-system
925 (list (cons preferred base)))))))
29d04c4f
KH
926
927 (if select-safe-coding-system-accept-default-p
928 (setq accept-default-p select-safe-coding-system-accept-default-p))
929
bae7cd08
KH
930 ;; Decide the eol-type from the top of the default codings,
931 ;; buffer-file-coding-system, or
932 ;; default-buffer-file-coding-system.
933 (if default-coding-system
934 (let ((default-eol-type (coding-system-eol-type
935 (caar default-coding-system))))
936 (if (and (vectorp default-eol-type) buffer-file-coding-system)
d35deeec 937 (setq default-eol-type (coding-system-eol-type
bae7cd08
KH
938 buffer-file-coding-system)))
939 (if (and (vectorp default-eol-type) default-buffer-file-coding-system)
d35deeec 940 (setq default-eol-type (coding-system-eol-type
bae7cd08
KH
941 default-buffer-file-coding-system)))
942 (if (and default-eol-type (not (vectorp default-eol-type)))
943 (dolist (elt default-coding-system)
944 (setcar elt (coding-system-change-eol-conversion
945 (car elt) default-eol-type))))))
946
29d04c4f
KH
947 (let ((codings (find-coding-systems-region from to))
948 (coding-system nil)
949 safe rejected unsafe)
bae7cd08
KH
950 (if (eq (car codings) 'undecided)
951 ;; Any coding system is ok.
952 (setq coding-system (caar default-coding-system))
953 ;; Reverse the list so that elements are accumulated in safe,
954 ;; rejected, and unsafe in the correct order.
955 (setq default-coding-system (nreverse default-coding-system))
956
957 ;; Classify the defaults into safe, rejected, and unsafe.
958 (dolist (elt default-coding-system)
959 (if (or (eq (car codings) 'undecided)
960 (memq (cdr elt) codings))
961 (if (and (functionp accept-default-p)
962 (not (funcall accept-default-p (cdr elt))))
963 (push (car elt) rejected)
964 (push (car elt) safe))
965 (push (car elt) unsafe)))
966 (if safe
967 (setq coding-system (car safe))))
29d04c4f
KH
968
969 ;; If all the defaults failed, ask a user.
bae7cd08 970 (when (not coding-system)
29d04c4f
KH
971 (setq coding-system (select-safe-coding-system-interactively
972 from to codings unsafe rejected (car codings))))
973
29d04c4f
KH
974 ;; Check we're not inconsistent with what `coding:' spec &c would
975 ;; give when file is re-read.
976 ;; But don't do this if we explicitly ignored the cookie
977 ;; by using `find-file-literally'.
978 (when (and auto-cs
979 (not (and
980 coding-system
981 (memq (coding-system-type coding-system) '(0 5)))))
6b61353c
KH
982 ;; Merge coding-system and auto-cs as far as possible.
983 (if (not coding-system)
984 (setq coding-system auto-cs)
985 (if (not auto-cs)
986 (setq auto-cs coding-system)
987 (let ((eol-type-1 (coding-system-eol-type coding-system))
988 (eol-type-2 (coding-system-eol-type auto-cs)))
989 (if (eq (coding-system-base coding-system) 'undecided)
990 (setq coding-system (coding-system-change-text-conversion
991 coding-system auto-cs))
992 (if (eq (coding-system-base auto-cs) 'undecided)
993 (setq auto-cs (coding-system-change-text-conversion
994 auto-cs coding-system))))
995 (if (vectorp eol-type-1)
996 (or (vectorp eol-type-2)
997 (setq coding-system (coding-system-change-eol-conversion
998 coding-system eol-type-2)))
999 (if (vectorp eol-type-2)
1000 (setq auto-cs (coding-system-change-eol-conversion
1001 auto-cs eol-type-1)))))))
1002
1003 (if (and auto-cs
d37ef0f6
DL
1004 ;; Don't barf if writing a compressed file, say.
1005 ;; This check perhaps isn't ideal, but is probably
1006 ;; the best thing to do.
1007 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
6b61353c 1008 (not (coding-system-equal coding-system auto-cs)))
43afed8c
RS
1009 (unless (yes-or-no-p
1010 (format "Selected encoding %s disagrees with \
1011%s specified by file contents. Really save (else edit coding cookies \
1012and try again)? " coding-system auto-cs))
29d04c4f
KH
1013 (error "Save aborted"))))
1014 coding-system)))
54b226f7
KH
1015
1016(setq select-safe-coding-system-function 'select-safe-coding-system)
1017
46babb23
KH
1018(defun select-message-coding-system ()
1019 "Return a coding system to encode the outgoing message of the current buffer.
1020It at first tries the first coding system found in these variables
1021in this order:
1022 (1) local value of `buffer-file-coding-system'
1023 (2) value of `sendmail-coding-system'
b5edd1d1
KH
1024 (3) value of `default-sendmail-coding-system'
1025 (4) value of `default-buffer-file-coding-system'
46babb23
KH
1026If the found coding system can't encode the current buffer,
1027or none of them are bound to a coding system,
48e41165 1028it asks the user to select a proper coding system."
46babb23 1029 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
1030 buffer-file-coding-system)
1031 sendmail-coding-system
1032 default-sendmail-coding-system
1033 default-buffer-file-coding-system)))
46babb23 1034 (if (eq coding 'no-conversion)
d37ef0f6 1035 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
1036 (setq coding nil))
1037 (if (fboundp select-safe-coding-system-function)
1038 (funcall select-safe-coding-system-function
b5edd1d1 1039 (point-min) (point-max) coding
209c73b0 1040 (function (lambda (x) (coding-system-get x :mime-charset))))
46babb23 1041 coding)))
4ed46869 1042\f
03c35c83 1043;;; Language support stuff.
4ed46869 1044
4ed46869 1045(defvar language-info-alist nil
2c395d56 1046 "Alist of language environment definitions.
4ed46869
KH
1047Each element looks like:
1048 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
1049where LANGUAGE-NAME is a string, the name of the language environment,
1050KEY is a symbol denoting the kind of information, and
1051INFO is the data associated with KEY.
1052Meaningful values for KEY include
1053
1054 documentation value is documentation of what this language environment
1055 is meant for, and how to use it.
7dd42fb1
KH
1056 charset value is a list of the character sets mainly used
1057 by this language environment.
d37ef0f6
DL
1058 sample-text value is an expression which is evalled to generate
1059 a line of text written using characters appropriate
1060 for this language environment.
2c395d56
RS
1061 setup-function value is a function to call to switch to this
1062 language environment.
1063 exit-function value is a function to call to leave this
1064 language environment.
1065 coding-system value is a list of coding systems that are good
1066 for saving text written in this language environment.
1067 This list serves as suggestions to the user;
1068 in effect, as a kind of documentation.
1069 coding-priority value is a list of coding systems for this language
1070 environment, in order of decreasing priority.
1071 This is used to set up the coding system priority
45d08cb2 1072 list when you switch to this language environment.
ddb5c041 1073 nonascii-translation
7dd42fb1
KH
1074 value is a charset of dimension one to use for
1075 converting a unibyte character to multibyte
1076 and vice versa.
ddb5c041
KH
1077 input-method value is a default input method for this language
1078 environment.
7624ebb9
KH
1079 features value is a list of features requested in this
1080 language environment.
6b61353c
KH
1081 ctext-non-standard-encodings
1082 value is a list of non-standard encoding
1083 names used in extended segments of CTEXT.
1084 See the variable
1085 `ctext-non-standard-encodings' for more
1086 detail.
ddb5c041
KH
1087
1088The following keys take effect only when multibyte characters are
1089globally disabled, i.e. the value of `default-enable-multibyte-characters'
1090is nil.
1091
ddb5c041
KH
1092 unibyte-display value is a coding system to encode characters
1093 for the terminal. Characters in the range
1094 of 160 to 255 display not as octal escapes,
1095 but as non-ASCII characters in this language
1096 environment.")
2c395d56
RS
1097
1098(defun get-language-info (lang-env key)
1099 "Return information listed under KEY for language environment LANG-ENV.
1100KEY is a symbol denoting the kind of information.
1101For a list of useful values for KEY and their meanings,
1102see `language-info-alist'."
1103 (if (symbolp lang-env)
1104 (setq lang-env (symbol-name lang-env)))
f15466c5 1105 (let ((lang-slot (assoc-string lang-env language-info-alist t)))
4ed46869
KH
1106 (if lang-slot
1107 (cdr (assq key (cdr lang-slot))))))
1108
f08adf27 1109(defun set-language-info (lang-env key info)
2c395d56
RS
1110 "Modify part of the definition of language environment LANG-ENV.
1111Specifically, this stores the information INFO under KEY
1112in the definition of this language environment.
4ed46869 1113KEY is a symbol denoting the kind of information.
2c395d56 1114INFO is the value for that information.
281d03ec 1115
2c395d56 1116For a list of useful values for KEY and their meanings,
f08adf27 1117see `language-info-alist'."
2c395d56
RS
1118 (if (symbolp lang-env)
1119 (setq lang-env (symbol-name lang-env)))
eec5c8f9
KH
1120 (set-language-info-internal lang-env key info)
1121 (if (equal lang-env current-language-environment)
d042f8b4 1122 (cond ((eq key 'coding-priority)
e9a8ed3c
KH
1123 (set-language-environment-coding-systems lang-env)
1124 (set-language-environment-charset lang-env))
d042f8b4
KH
1125 ((eq key 'input-method)
1126 (set-language-environment-input-method lang-env))
1127 ((eq key 'nonascii-translation)
1128 (set-language-environment-nonascii-translation lang-env))
1129 ((eq key 'charset)
1130 (set-language-environment-charset lang-env))
d042f8b4
KH
1131 ((and (not default-enable-multibyte-characters)
1132 (or (eq key 'unibyte-syntax) (eq key 'unibyte-display)))
1133 (set-language-environment-unibyte lang-env)))))
eec5c8f9
KH
1134
1135(defun set-language-info-internal (lang-env key info)
1136 "Internal use only.
1137Arguments are the same as `set-language-info'."
4ed46869 1138 (let (lang-slot key-slot)
2c395d56 1139 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 1140 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 1141 (setq lang-slot (list lang-env)
4ed46869
KH
1142 language-info-alist (cons lang-slot language-info-alist)))
1143 (setq key-slot (assq key lang-slot))
1144 (if (null key-slot) ; If no slot for the key, add it.
1145 (progn
1146 (setq key-slot (list key))
1147 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
590dbcba 1148 (setcdr key-slot (purecopy info))
1169bd86 1149 ;; Update the custom-type of `current-language-environment'.
590dbcba 1150 (put 'current-language-environment 'custom-type
1169bd86
MR
1151 (cons 'choice (mapcar
1152 (lambda (lang)
756e055f
MR
1153 (list 'const lang))
1154 (sort (mapcar 'car language-info-alist) 'string<))))))
4ed46869 1155
2c395d56
RS
1156(defun set-language-info-alist (lang-env alist &optional parents)
1157 "Store ALIST as the definition of language environment LANG-ENV.
1158ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 1159`language-info-alist' for the meanings of KEY and INFO.
54b226f7 1160
2c395d56 1161Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 1162where to put this language environment in the
2c395d56
RS
1163Describe Language Environment and Set Language Environment menus.
1164For example, (\"European\") means to put this language environment
1165in the European submenu in each of those two menus."
1166 (if (symbolp lang-env)
1167 (setq lang-env (symbol-name lang-env)))
54b226f7
KH
1168 (let ((describe-map describe-language-environment-map)
1169 (setup-map setup-language-environment-map))
1170 (if parents
1171 (let ((l parents)
9deed82f 1172 map parent-symbol parent prompt)
54b226f7
KH
1173 (while l
1174 (if (symbolp (setq parent-symbol (car l)))
1175 (setq parent (symbol-name parent))
1176 (setq parent parent-symbol parent-symbol (intern parent)))
1177 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1178 ;; This prompt string is for define-prefix-command, so
1179 ;; that the map it creates will be suitable for a menu.
1180 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1181 (if (not map)
1182 (progn
1183 (setq map (intern (format "describe-%s-environment-map"
1184 (downcase parent))))
9deed82f 1185 (define-prefix-command map nil prompt)
54b226f7
KH
1186 (define-key-after describe-map (vector parent-symbol)
1187 (cons parent map) t)))
1188 (setq describe-map (symbol-value map))
1189 (setq map (lookup-key setup-map (vector parent-symbol)))
1190 (if (not map)
1191 (progn
1192 (setq map (intern (format "setup-%s-environment-map"
1193 (downcase parent))))
9deed82f 1194 (define-prefix-command map nil prompt)
54b226f7
KH
1195 (define-key-after setup-map (vector parent-symbol)
1196 (cons parent map) t)))
1197 (setq setup-map (symbol-value map))
1198 (setq l (cdr l)))))
f08adf27
RS
1199
1200 ;; Set up menu items for this language env.
7624ebb9 1201 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1202 (when doc
1203 (define-key-after describe-map (vector (intern lang-env))
7624ebb9
KH
1204 (cons lang-env 'describe-specified-language-support) t)))
1205 (define-key-after setup-map (vector (intern lang-env))
1206 (cons lang-env 'setup-specified-language-environment) t)
f08adf27 1207
eec5c8f9
KH
1208 (dolist (elt alist)
1209 (set-language-info-internal lang-env (car elt) (cdr elt)))
d35deeec 1210
eec5c8f9
KH
1211 (if (equal lang-env current-language-environment)
1212 (set-language-environment lang-env))))
4ed46869 1213
ae302641 1214(defun read-language-name (key prompt &optional default)
2c395d56 1215 "Read a language environment name which has information for KEY.
ddb5c041 1216If KEY is nil, read any language environment.
2c395d56
RS
1217Prompt with PROMPT. DEFAULT is the default choice of language environment.
1218This returns a language environment name as a string."
4ed46869
KH
1219 (let* ((completion-ignore-case t)
1220 (name (completing-read prompt
1221 language-info-alist
ddb5c041 1222 (and key
ca429a25 1223 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1224 t nil nil default)))
13e82c04 1225 (if (and (> (length name) 0)
ddb5c041
KH
1226 (or (not key)
1227 (get-language-info name key)))
13e82c04 1228 name)))
4ed46869
KH
1229\f
1230;;; Multilingual input methods.
d37ef0f6 1231(defgroup leim nil
d0c40faa
KH
1232 "LEIM: Libraries of Emacs Input Methods."
1233 :group 'mule)
4ed46869 1234
d0b9c3ab
KH
1235(defconst leim-list-file-name "leim-list.el"
1236 "Name of LEIM list file.
1237This file contains a list of libraries of Emacs input methods (LEIM)
1238in the format of Lisp expression for registering each input method.
1239Emacs loads this file at startup time.")
1240
2e224638 1241(defvar leim-list-header (format
d37ef0f6 1242";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: iso-2022-7bit;-*-
d0b9c3ab
KH
1243;;
1244;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1245;; methods in the same directory as this file. Loading this file
1246;; registers all the input methods in Emacs.
d0b9c3ab 1247;;
d33d5fbe 1248;; Each entry has the form:
d0b9c3ab
KH
1249;; (register-input-method
1250;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1251;; TITLE DESCRIPTION
1252;; ARG ...)
c654de1d 1253;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab
KH
1254;;
1255;; If this directory is included in load-path, Emacs automatically
1256;; loads this file at startup time.
1257
1258"
1259 leim-list-file-name)
1260 "Header to be inserted in LEIM list file.")
1261
e55e92ee 1262(defvar leim-list-entry-regexp "^(register-input-method"
d0b9c3ab
KH
1263 "Regexp matching head of each entry in LEIM list file.
1264See also the variable `leim-list-header'")
1265
1266(defvar update-leim-list-functions
1267 '(quail-update-leim-list-file)
1268 "List of functions to call to update LEIM list file.
1269Each function is called with one arg, LEIM directory name.")
1270
a337fe7f
RS
1271(defun update-leim-list-file (&rest dirs)
1272 "Update LEIM list file in directories DIRS."
d0b9c3ab
KH
1273 (let ((functions update-leim-list-functions))
1274 (while functions
a337fe7f 1275 (apply (car functions) dirs)
d0b9c3ab
KH
1276 (setq functions (cdr functions)))))
1277
4ed46869
KH
1278(defvar current-input-method nil
1279 "The current input method for multilingual text.
96db204a 1280If nil, that means no input method is activated now.")
4ed46869
KH
1281(make-variable-buffer-local 'current-input-method)
1282(put 'current-input-method 'permanent-local t)
1283
1284(defvar current-input-method-title nil
d0b9c3ab 1285 "Title string of the current input method shown in mode line.")
4ed46869
KH
1286(make-variable-buffer-local 'current-input-method-title)
1287(put 'current-input-method-title 'permanent-local t)
1288
b4fba33f 1289(defcustom default-input-method nil
8861c593 1290 "*Default input method for multilingual text (a string).
b4fba33f 1291This is the input method activated automatically by the command
9b10b5a3 1292`toggle-input-method' (\\[toggle-input-method])."
1f547b92 1293 :link '(custom-manual "(emacs)Input Methods")
8861c593 1294 :group 'mule
d398dba6
DL
1295 :type '(choice (const nil) (string
1296 :completion-ignore-case t
1297 :complete-function widget-string-complete
1298 :completion-alist input-method-alist
1299 :prompt-history input-method-history))
5806e8a6 1300 :set-after '(current-language-environment))
b4fba33f 1301
0f835e87
KH
1302(put 'input-method-function 'permanent-local t)
1303
723a427a
KH
1304(defvar input-method-history nil
1305 "History list for some commands that read input methods.")
1306(make-variable-buffer-local 'input-method-history)
1307(put 'input-method-history 'permanent-local t)
4ed46869
KH
1308
1309(defvar inactivate-current-input-method-function nil
1310 "Function to call for inactivating the current input method.
1311Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1312This function is called with no argument.
1313
1314This function should never change the value of `current-input-method'.
1315It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1316(make-variable-buffer-local 'inactivate-current-input-method-function)
1317(put 'inactivate-current-input-method-function 'permanent-local t)
1318
1319(defvar describe-current-input-method-function nil
1320 "Function to call for describing the current input method.
1321This function is called with no argument.")
1322(make-variable-buffer-local 'describe-current-input-method-function)
1323(put 'describe-current-input-method-function 'permanent-local t)
1324
d0b9c3ab 1325(defvar input-method-alist nil
2c395d56 1326 "Alist of input method names vs how to use them.
d0b9c3ab 1327Each element has the form:
2c395d56
RS
1328 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1329See the function `register-input-method' for the meanings of the elements.")
1330
f08adf27 1331(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1332 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
d0b9c3ab 1333
d35deeec 1334INPUT-METHOD and LANG-ENV are symbols or strings.
2c395d56
RS
1335ACTIVATE-FUNC is a function to call to activate this method.
1336TITLE is a string to show in the mode line when this method is active.
1337DESCRIPTION is a string describing this method and what it is good for.
1338The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1339All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1340
c654de1d
DL
1341This function is mainly used in the file \"leim-list.el\" which is
1342created at Emacs build time, registering all Quail input methods
0b6cadff 1343contained in the Emacs distribution.
205814ee 1344
0b6cadff 1345In case you want to register a new Quail input method by yourself, be
205814ee 1346careful to use the same input method title as given in the third
0b6cadff
DL
1347parameter of `quail-define-package'. (If the values are different, the
1348string specified in this function takes precedence.)
205814ee
KH
1349
1350The commands `describe-input-method' and `list-input-methods' need
0b6cadff 1351these duplicated values to show some information about input methods
d35deeec
JB
1352without loading the relevant Quail packages.
1353\n(fn INPUT-METHOD LANG-ENV ACTIVATE-FUNC TITLE DESCRIPTION &rest ARGS)"
f08adf27
RS
1354 (if (symbolp lang-env)
1355 (setq lang-env (symbol-name lang-env)))
4ef06f75
KH
1356 (if (symbolp input-method)
1357 (setq input-method (symbol-name input-method)))
f08adf27 1358 (let ((info (cons lang-env args))
d0b9c3ab
KH
1359 (slot (assoc input-method input-method-alist)))
1360 (if slot
1361 (setcdr slot info)
1362 (setq slot (cons input-method info))
1363 (setq input-method-alist (cons slot input-method-alist)))))
1364
4d5ac029 1365(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1366 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1367If DEFAULT is non-nil, use that as the default,
0b6cadff 1368and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1369If INHIBIT-NULL is non-nil, null input signals an error.
1370
1371The return value is a string."
4d5ac029
RS
1372 (if default
1373 (setq prompt (format prompt default)))
d0b9c3ab 1374 (let* ((completion-ignore-case t)
c54044ff
KH
1375 ;; As it is quite normal to change input method in the
1376 ;; minibuffer, we must enable it even if
1377 ;; enable-recursive-minibuffers is currently nil.
1378 (enable-recursive-minibuffers t)
723a427a
KH
1379 ;; This binding is necessary because input-method-history is
1380 ;; buffer local.
d0b9c3ab 1381 (input-method (completing-read prompt input-method-alist
87505a98
RS
1382 nil t nil 'input-method-history
1383 default)))
bf294e6e
KH
1384 (if (and input-method (symbolp input-method))
1385 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1386 (if (> (length input-method) 0)
1387 input-method
1388 (if inhibit-null
43807b77 1389 (error "No valid input method is specified")))))
d0b9c3ab 1390
d0b9c3ab 1391(defun activate-input-method (input-method)
2c395d56
RS
1392 "Switch to input method INPUT-METHOD for the current buffer.
1393If some other input method is already active, turn it off first.
1394If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1395 (if (and input-method (symbolp input-method))
4ef06f75 1396 (setq input-method (symbol-name input-method)))
723a427a
KH
1397 (if (and current-input-method
1398 (not (string= current-input-method input-method)))
305a3cb6 1399 (inactivate-input-method))
2c395d56 1400 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1401 (let ((slot (assoc input-method input-method-alist)))
1402 (if (null slot)
723a427a 1403 (error "Can't activate input method `%s'" input-method))
278dd6ac 1404 (setq current-input-method-title nil)
8efc03e1
KH
1405 (let ((func (nth 2 slot)))
1406 (if (functionp func)
1407 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1408 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1409 (progn
1410 (require (cdr func))
1411 (apply (car func) input-method (nthcdr 5 slot)))
1412 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1413 (setq current-input-method input-method)
278dd6ac
KH
1414 (or (stringp current-input-method-title)
1415 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1416 (unwind-protect
1417 (run-hooks 'input-method-activate-hook)
1418 (force-mode-line-update)))))
15b3e511 1419
15b3e511 1420(defun inactivate-input-method ()
f17ccaee 1421 "Turn off the current input method."
723a427a
KH
1422 (when current-input-method
1423 (if input-method-history
1424 (unless (string= current-input-method (car input-method-history))
1425 (setq input-method-history
1426 (cons current-input-method
1427 (delete current-input-method input-method-history))))
1428 (setq input-method-history (list current-input-method)))
1429 (unwind-protect
1430 (funcall inactivate-current-input-method-function)
15b3e511 1431 (unwind-protect
723a427a
KH
1432 (run-hooks 'input-method-inactivate-hook)
1433 (setq current-input-method nil
2b1ec973 1434 input-method-function nil
28885c0e
KH
1435 current-input-method-title nil)
1436 (force-mode-line-update)))))
4ed46869 1437
e893eae2 1438(defun set-input-method (input-method &optional interactive)
2c395d56 1439 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1440This also sets the default input method to the one you specify.
402dbbd1
EZ
1441If INPUT-METHOD is nil, this function turns off the input method, and
1442also causes you to be prompted for a name of an input method the next
1443time you invoke \\[toggle-input-method].
e893eae2
RS
1444When called interactively, the optional arg INTERACTIVE is non-nil,
1445which marks the variable `default-input-method' as set for Custom buffers.
402dbbd1 1446
bc406911
EZ
1447To deactivate the input method interactively, use \\[toggle-input-method].
1448To deactivate it programmatically, use \\[inactivate-input-method]."
d0b9c3ab 1449 (interactive
723a427a 1450 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1451 (list (read-input-method-name
87505a98 1452 (if default "Select input method (default %s): " "Select input method: ")
e893eae2
RS
1453 default t)
1454 t)))
d0b9c3ab 1455 (activate-input-method input-method)
f4990970 1456 (setq default-input-method input-method)
e893eae2 1457 (when interactive
f4990970
PA
1458 (customize-mark-as-set 'default-input-method))
1459 default-input-method)
4ed46869 1460
e893eae2 1461(defun toggle-input-method (&optional arg interactive)
f8ec20be
RS
1462 "Enable or disable multilingual text input method for the current buffer.
1463Only one input method can be enabled at any time in a given buffer.
1464
1465The normal action is to enable an input method if none was
1466enabled, and disable the current one otherwise. Which input method
1467to enable can be determined in various ways--either the one most
1468recently used, or the one specified by `default-input-method', or
1469as a last resort by reading the name of an input method in the
1470minibuffer.
1471
1472With a prefix argument, read an input method name with the minibuffer
1473and enable that one. The default is the most recent input method specified
e893eae2 1474\(not including the currently active input method, if any).
f8ec20be 1475
e893eae2
RS
1476When called interactively, the optional arg INTERACTIVE is non-nil,
1477which marks the variable `default-input-method' as set for Custom buffers."
1478
1479 (interactive "P\np")
7ddbb5bc
RS
1480 (if (and current-input-method (not arg))
1481 (inactivate-input-method)
1482 (let ((default (or (car input-method-history) default-input-method)))
1483 (if (and arg default (equal current-input-method default)
1484 (> (length input-method-history) 1))
1485 (setq default (nth 1 input-method-history)))
723a427a
KH
1486 (activate-input-method
1487 (if (or arg (not default))
7ddbb5bc
RS
1488 (progn
1489 (read-input-method-name
1490 (if default "Input method (default %s): " "Input method: " )
1491 default t))
723a427a 1492 default))
f4990970 1493 (unless default-input-method
d37ef0f6 1494 (prog1
f4990970 1495 (setq default-input-method current-input-method)
e893eae2 1496 (when interactive
f4990970 1497 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab 1498
0855c6cd
DL
1499(eval-when-compile (autoload 'help-buffer "help-mode"))
1500
d0b9c3ab 1501(defun describe-input-method (input-method)
2c395d56 1502 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1503 (interactive
1504 (list (read-input-method-name
5b76833f 1505 "Describe input method (default current choice): ")))
78754934 1506 (if (and input-method (symbolp input-method))
4ef06f75 1507 (setq input-method (symbol-name input-method)))
43125c28
RS
1508 (help-setup-xref (list #'describe-input-method
1509 (or input-method current-input-method))
f80e2142
RS
1510 (interactive-p))
1511
d0b9c3ab
KH
1512 (if (null input-method)
1513 (describe-current-input-method)
464cc130
KH
1514 (let ((current current-input-method))
1515 (condition-case nil
1516 (progn
1517 (save-excursion
1518 (activate-input-method input-method)
1519 (describe-current-input-method))
1520 (activate-input-method current))
d37ef0f6 1521 (error
464cc130 1522 (activate-input-method current)
5f395df3
SM
1523 (help-setup-xref (list #'describe-input-method input-method)
1524 (interactive-p))
1525 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1526 (let ((elt (assoc input-method input-method-alist)))
1527 (princ (format
1528 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1529 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1530
1531(defun describe-current-input-method ()
f80e2142
RS
1532 "Describe the input method currently in use.
1533This is a subroutine for `describe-input-method'."
4ed46869
KH
1534 (if current-input-method
1535 (if (and (symbolp describe-current-input-method-function)
1536 (fboundp describe-current-input-method-function))
1537 (funcall describe-current-input-method-function)
1538 (message "No way to describe the current input method `%s'"
f2979bdb 1539 current-input-method)
4ed46869 1540 (ding))
d0b9c3ab 1541 (error "No input method is activated now")))
4ed46869 1542
d3459641 1543(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1544 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1545The input method selected last time is activated in minibuffer.
15b3e511 1546If optional second arg INITIAL-INPUT is non-nil, insert it in the minibuffer
d0b9c3ab
KH
1547initially.
1548Optional 3rd argument INPUT-METHOD specifies the input method
4ef06f75
KH
1549to be activated instead of the one selected last time. It is a symbol
1550or a string."
88d559ec
KH
1551 (setq input-method
1552 (or input-method
d3459641 1553 current-input-method
88d559ec
KH
1554 default-input-method
1555 (read-input-method-name "Input method: " nil t)))
3df60841 1556 (if (and input-method (symbolp input-method))
4ef06f75 1557 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1558 (let ((prev-input-method current-input-method))
1559 (unwind-protect
1560 (progn
1561 (activate-input-method input-method)
1562 (read-string prompt initial-input nil nil t))
1563 (activate-input-method prev-input-method))))
4ed46869
KH
1564
1565;; Variables to control behavior of input methods. All input methods
1566;; should react to these variables.
1567
8efc03e1
KH
1568(defcustom input-method-verbose-flag 'default
1569 "*A flag to control extra guidance given by input methods.
1570The value should be nil, t, `complex-only', or `default'.
4ed46869 1571
cb29dfb6 1572The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1573area. When you use the input method in the minibuffer, the guidance
1574is shown at the bottom short window (split from the existing window).
c27c4ed8 1575
8efc03e1
KH
1576If the value is t, extra guidance is always given, if the value is
1577nil, extra guidance is always suppressed.
1578
1579If the value is `complex-only', only complex input methods such as
1580`chinese-py' and `japanese' give extra guidance.
1581
1582If the value is `default', complex input methods always give extra
1583guidance, but simple input methods give it only when you are not in
1584the minibuffer.
1585
1586See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1587 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1588 (const complex-only) (const default))
42395763
RS
1589 :group 'mule)
1590
1591(defcustom input-method-highlight-flag t
1592 "*If this flag is non-nil, input methods highlight partially-entered text.
1593For instance, while you are in the middle of a Quail input method sequence,
1594the text inserted so far is temporarily underlined.
8efc03e1
KH
1595The underlining goes away when you finish or abort the input method sequence.
1596See also the variable `input-method-verbose-flag'."
42395763
RS
1597 :type 'boolean
1598 :group 'mule)
4ed46869 1599
1f547b92 1600(defcustom input-method-activate-hook nil
f17ccaee
KH
1601 "Normal hook run just after an input method is activated.
1602
1603The variable `current-input-method' keeps the input method name
1f547b92
DL
1604just activated."
1605 :type 'hook
1606 :group 'mule)
4ed46869 1607
1f547b92 1608(defcustom input-method-inactivate-hook nil
f17ccaee
KH
1609 "Normal hook run just after an input method is inactivated.
1610
1611The variable `current-input-method' still keeps the input method name
1f547b92
DL
1612just inactivated."
1613 :type 'hook
1614 :group 'mule)
4ed46869 1615
1f547b92 1616(defcustom input-method-after-insert-chunk-hook nil
8f924df7 1617 "Normal hook run just after an input method insert some chunk of text."
1f547b92
DL
1618 :type 'hook
1619 :group 'mule)
4ed46869 1620
dccca980 1621(defvar input-method-exit-on-first-char nil
0b6cadff 1622 "This flag controls when an input method returns.
dccca980
KH
1623Usually, the input method does not return while there's a possibility
1624that it may find a different translation if a user types another key.
39e643e2
RS
1625But, it this flag is non-nil, the input method returns as soon as
1626the current key sequence gets long enough to have some valid translation.")
dccca980 1627
1f547b92 1628(defcustom input-method-use-echo-area nil
dccca980 1629 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1630Usually, the input method inserts the intermediate key sequence,
1631or candidate translations corresponding to the sequence,
1632at point in the current buffer.
1f547b92
DL
1633But, if this flag is non-nil, it displays them in echo area instead."
1634 :type 'hook
1635 :group 'mule)
dccca980 1636
723a427a 1637(defvar input-method-exit-on-invalid-key nil
fea6b736 1638 "This flag controls the behavior of an input method on invalid key input.
723a427a
KH
1639Usually, when a user types a key which doesn't start any character
1640handled by the input method, the key is handled by turning off the
e8dd0160 1641input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1642But, if this flag is non-nil, the input method is never back on.")
1643
4ed46869 1644\f
1f547b92 1645(defcustom set-language-environment-hook nil
8efc03e1
KH
1646 "Normal hook run after some language environment is set.
1647
1648When you set some hook function here, that effect usually should not
1649be inherited to another language environment. So, you had better set
1650another function in `exit-language-environment-hook' (which see) to
1f547b92
DL
1651cancel the effect."
1652 :type 'hook
1653 :group 'mule)
8efc03e1 1654
1f547b92 1655(defcustom exit-language-environment-hook nil
8efc03e1
KH
1656 "Normal hook run after exiting from some language environment.
1657When this hook is run, the variable `current-language-environment'
1658is still bound to the language environment being exited.
1659
e8dd0160 1660This hook is mainly used for canceling the effect of
1f547b92
DL
1661`set-language-environment-hook' (which-see)."
1662 :type 'hook
1663 :group 'mule)
8efc03e1 1664
b0648a00
RS
1665(put 'setup-specified-language-environment 'apropos-inhibit t)
1666
15b3e511 1667(defun setup-specified-language-environment ()
f08adf27 1668 "Switch to a specified language environment."
15b3e511 1669 (interactive)
f850d782 1670 (let (language-name)
15b3e511
KH
1671 (if (and (symbolp last-command-event)
1672 (or (not (eq last-command-event 'Default))
1673 (setq last-command-event 'English))
f850d782 1674 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1675 (prog1
1676 (set-language-environment language-name)
1677 (customize-mark-as-set 'current-language-environment))
15b3e511 1678 (error "Bogus calling sequence"))))
4ed46869 1679
8861c593 1680(defcustom current-language-environment "English"
94d04df6 1681 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1682This variable should be set only with \\[customize], which is equivalent
1683to using the function `set-language-environment'."
94d04df6 1684 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1685 :set (lambda (symbol value) (set-language-environment value))
94d04df6 1686 :get (lambda (x)
f15466c5 1687 (or (car-safe (assoc-string
94d04df6
DL
1688 (if (symbolp current-language-environment)
1689 (symbol-name current-language-environment)
1690 current-language-environment)
f15466c5 1691 language-info-alist t))
94d04df6 1692 "English"))
990a4108
MR
1693 ;; custom type will be updated with `set-language-info'.
1694 :type (if language-info-alist
1695 (cons 'choice (mapcar
1696 (lambda (lang)
756e055f
MR
1697 (list 'const lang))
1698 (sort (mapcar 'car language-info-alist) 'string<)))
990a4108 1699 'string)
8861c593 1700 :initialize 'custom-initialize-default
eb9fc9e6 1701 :group 'mule)
f850d782 1702
ddb5c041
KH
1703(defun reset-language-environment ()
1704 "Reset multilingual environment of Emacs to the default status.
1705
1706The default status is as follows:
1707
d37ef0f6 1708 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1709 The default coding system for process I/O is nil.
1710 The default value for the command `set-terminal-coding-system' is nil.
1711 The default value for the command `set-keyboard-coding-system' is nil.
1712
97941b05
KH
1713 The order of priorities of coding systems are as follows:
1714 utf-8
1715 iso-2022-7bit
1716 iso-latin-1
1717 iso-2022-7bit-lock
1718 iso-2022-8bit-ss2
1719 emacs-mule
1720 raw-text"
ddb5c041
KH
1721 (interactive)
1722 ;; This function formerly set default-enable-multibyte-characters to t,
1723 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1724
97941b05
KH
1725 (set-coding-system-priority
1726 'utf-8
1727 'iso-2022-7bit
1728 'iso-latin-1
1729 'iso-2022-7bit-lock
1730 'iso-2022-8bit-ss2
1731 'emacs-mule
1732 'raw-text)
91693d18 1733
ddb5c041 1734 (set-default-coding-systems nil)
b5edd1d1 1735 (setq default-sendmail-coding-system 'iso-latin-1)
a41118cc
SM
1736 ;; On Darwin systems, this should be utf-8, but when this file is loaded
1737 ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
787caf99 1738 (setq default-file-name-coding-system 'iso-latin-1)
1d77e15a
JR
1739 ;; Preserve eol-type from existing default-process-coding-systems.
1740 ;; On non-unix-like systems in particular, these may have been set
1741 ;; carefully by the user, or by the startup code, to deal with the
1742 ;; users shell appropriately, so should not be altered by changing
1743 ;; language environment.
1744 (let ((output-coding
a099a2ff
JR
1745 ;; When bootstrapping, coding-systems are not defined yet, so
1746 ;; we need to catch the error from check-coding-system.
d37ef0f6 1747 (condition-case nil
a099a2ff
JR
1748 (coding-system-change-text-conversion
1749 (car default-process-coding-system) 'undecided)
1750 (coding-system-error 'undecided)))
1d77e15a 1751 (input-coding
a099a2ff
JR
1752 (condition-case nil
1753 (coding-system-change-text-conversion
1754 (cdr default-process-coding-system) 'iso-latin-1)
1755 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1756 (setq default-process-coding-system
1757 (cons output-coding input-coding)))
b5edd1d1 1758
ddb5c041
KH
1759 ;; Don't alter the terminal and keyboard coding systems here.
1760 ;; The terminal still supports the same coding system
1761 ;; that it supported a minute ago.
1b8dc791
SM
1762 ;; (set-terminal-coding-system-internal nil)
1763 ;; (set-keyboard-coding-system-internal nil)
ddb5c041 1764
7dd42fb1 1765 (set-unibyte-charset 'iso-8859-1))
ddb5c041 1766
0c47a7c8
KH
1767(reset-language-environment)
1768
ff76e074 1769(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system)
40c81f74
PE
1770 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1771 (let ((coding (get-language-info language-name 'unibyte-display)))
735b7c87
KH
1772 (if (and coding
1773 (or (not coding-system)
1774 (coding-system-equal coding coding-system)))
40c81f74 1775 (standard-display-european-internal)
a67ae60e
EZ
1776 ;; The following 2 lines undo the 8-bit display that we set up
1777 ;; in standard-display-european-internal, which see. This is in
1778 ;; case the user has used standard-display-european earlier in
1779 ;; this session. (The MS-DOS port doesn't use that setup, so it
1780 ;; doesn't need to undo it.)
d73a7bb8
JPW
1781 (when standard-display-table
1782 (dotimes (i 128)
b221615b 1783 (aset standard-display-table (+ i 128) nil))))
40c81f74 1784 (or (eq window-system 'pc)
ff76e074 1785 (set-terminal-coding-system (or coding-system coding)))))
40c81f74 1786
166246f7 1787(defun set-language-environment (language-name)
6c05d680
RS
1788 "Set up multi-lingual environment for using LANGUAGE-NAME.
1789This sets the coding system priority and the default input method
8861c593
RS
1790and sometimes other things. LANGUAGE-NAME should be a string
1791which is the name of a language environment. For example, \"Latin-1\"
1792specifies the character set for the major languages of Western Europe."
8efc03e1 1793 (interactive (list (read-language-name
ddb5c041 1794 nil
5b76833f 1795 "Set language environment (default English): ")))
4ef06f75
KH
1796 (if language-name
1797 (if (symbolp language-name)
1798 (setq language-name (symbol-name language-name)))
1799 (setq language-name "English"))
f15466c5 1800 (let ((slot (assoc-string language-name language-info-alist t)))
95498fd0 1801 (unless slot
f850d782 1802 (error "Language environment not defined: %S" language-name))
95498fd0 1803 (setq language-name (car slot)))
8efc03e1
KH
1804 (if current-language-environment
1805 (let ((func (get-language-info current-language-environment
1806 'exit-function)))
e63645c2 1807 (run-hooks 'exit-language-environment-hook)
5f395df3 1808 (if (functionp func) (funcall func))))
ddb5c041 1809
e9a8ed3c
KH
1810 (reset-language-environment)
1811 ;; The features might set up coding systems.
ddb5c041
KH
1812 (let ((required-features (get-language-info language-name 'features)))
1813 (while required-features
1814 (require (car required-features))
1815 (setq required-features (cdr required-features))))
6b61353c 1816
e9a8ed3c
KH
1817 (setq current-language-environment language-name)
1818
1819 (set-language-environment-coding-systems language-name)
1820 (set-language-environment-input-method language-name)
1821 (set-language-environment-nonascii-translation language-name)
1822 (set-language-environment-charset language-name)
1823 ;; Unibyte setups if necessary.
1824 (unless default-enable-multibyte-characters
1825 (set-language-environment-unibyte language-name))
1826
ddb5c041 1827 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1828 (if (functionp func)
ddb5c041 1829 (funcall func)))
e9a8ed3c 1830
8efc03e1 1831 (run-hooks 'set-language-environment-hook)
f850d782 1832 (force-mode-line-update t))
4ed46869 1833
c3869589 1834(define-widget 'charset 'symbol
1644d5b9
DL
1835 "An Emacs charset."
1836 :tag "Charset"
c3869589
DL
1837 :complete-function (lambda ()
1838 (interactive)
1839 (lisp-complete-symbol 'charsetp))
1840 :completion-ignore-case t
1841 :value 'ascii
1842 :validate (lambda (widget)
1843 (unless (charsetp (widget-value widget))
1844 (widget-put widget :error (format "Invalid charset: %S"
1845 (widget-value widget)))
1846 widget))
1847 :prompt-history 'charset-history)
1848
1849(defcustom language-info-custom-alist nil
1850 "Customizations of language environment parameters.
1851Value is an alist with elements like those of `language-info-alist'.
1852These are used to set values in `language-info-alist' which replace
1853the defaults. A typical use is replacing the default input method for
1854the environment. Use \\[describe-language-environment] to find the environment's settings.
1855
1856This option is intended for use at startup. Removing items doesn't
1857remove them from the language info until you next restart Emacs.
1858
1859Setting this variable directly does not take effect. See
1860`set-language-info-alist' for use in programs."
1861 :group 'mule
8589dc17 1862 :version "23.1"
c3869589
DL
1863 :set (lambda (s v)
1864 (custom-set-default s v)
1865 ;; Can't do this before language environments are set up.
1866 (when v
1867 ;; modify language-info-alist
1868 (dolist (elt v)
1869 (set-language-info-alist (car elt) (cdr elt)))
1870 ;; re-set the environment in case its parameters changed
1871 (set-language-environment current-language-environment)))
1872 :type `(alist
1873 :key-type (string :tag "Language environment"
1874 :completion-ignore-case t
1875 :complete-function widget-string-complete
1876 :completion-alist language-info-alist)
1877 :value-type
1878 (alist :key-type symbol
1879 :options ((documentation string)
1880 (charset (repeat charset))
1881 (sample-text string)
1882 (setup-function function)
1883 (exit-function function)
1884 (coding-system (repeat coding-system))
1885 (coding-priority (repeat coding-system))
1886 (nonascii-translation charset)
1887 (input-method
1888 (string
1889 :completion-ignore-case t
1890 :complete-function widget-string-complete
1891 :completion-alist input-method-alist
1892 :prompt-history input-method-history))
1893 (features (repeat symbol))
1894 (unibyte-display coding-system)))))
1895
51a8fc1d
RS
1896(defun standard-display-european-internal ()
1897 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1898 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1899 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1900 ;; the native font, and codes 160 and 146 stand for something very
1901 ;; different there.
1902 (or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
1903 (progn
6b626913
SM
1904 ;; Most X fonts used to do the wrong thing for latin-1 code 160.
1905 (unless (and (eq window-system 'x)
1906 ;; XFree86 4 has fixed the fonts.
1907 (string= "The XFree86 Project, Inc" (x-server-vendor))
1908 (> (aref (number-to-string (nth 2 (x-server-version))) 0)
1909 ?3))
1910 ;; Make non-line-break space display as a plain space.
1911 (aset standard-display-table 160 [32]))
1492f7ac 1912 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1913 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1914 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1915 ;; U+2019, normally from the windows-1252 character set. XFree 4
1916 ;; fonts probably have the appropriate glyph at this position,
1917 ;; so they could use standard-display-8bit. It's better to use a
1918 ;; proper windows-1252 coding system. --fx]
6b626913 1919 (aset standard-display-table 146 [39]))))
03c35c83 1920
e9a8ed3c
KH
1921(defun set-language-environment-coding-systems (language-name)
1922 "Do various coding system setups for language environment LANGUAGE-NAME."
54b226f7 1923 (let* ((priority (get-language-info language-name 'coding-priority))
e9a8ed3c
KH
1924 (default-coding (car priority))
1925 (eol-type (coding-system-eol-type default-buffer-file-coding-system)))
97941b05
KH
1926 (when priority
1927 (set-default-coding-systems
1928 (if (memq eol-type '(0 1 2 unix dos mac))
1929 (coding-system-change-eol-conversion default-coding eol-type)
1930 default-coding))
1931 (setq default-sendmail-coding-system default-coding)
1932 (apply 'set-coding-system-priority priority))))
54b226f7 1933
d042f8b4
KH
1934(defun set-language-environment-input-method (language-name)
1935 "Do various input method setups for language environment LANGUAGE-NAME."
1936 (let ((input-method (get-language-info language-name 'input-method)))
1937 (when input-method
1938 (setq default-input-method input-method)
1939 (if input-method-history
1940 (setq input-method-history
1941 (cons input-method
1942 (delete input-method input-method-history)))))))
1943
1944(defun set-language-environment-nonascii-translation (language-name)
1945 "Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME."
e9a8ed3c
KH
1946 ;; Note: For DOS, we assumed that the charset cpXXX is already
1947 ;; defined.
1948 (let ((nonascii (get-language-info language-name 'nonascii-translation)))
1949 (if (eq window-system 'pc)
1950 (setq nonascii (intern "cp%d" dos-codepage)))
1951 (or (and (charsetp nonascii)
1952 (get-charset-property nonascii :ascii-compatible-p))
1953 (setq nonascii 'iso-8859-1))
1954 (set-unibyte-charset nonascii)))
d042f8b4
KH
1955
1956(defun set-language-environment-charset (language-name)
1957 "Do various charset setups for language environment LANGUAGE-NAME."
e9a8ed3c
KH
1958 ;; Put higher priorities to such charsets that are supported by the
1959 ;; coding systems of higher priorities in this environment.
1960 (let ((charsets (get-language-info language-name 'charset)))
1961 (dolist (coding (get-language-info language-name 'coding-priority))
1962 (setq charsets (append charsets (coding-system-charset-list coding))))
1963 (if charsets
1964 (apply 'set-charset-priority charsets))))
d042f8b4
KH
1965
1966(defun set-language-environment-unibyte (language-name)
1967 "Do various unibyte-mode setups for language environment LANGUAGE-NAME."
d042f8b4
KH
1968 (set-display-table-and-terminal-coding-system language-name))
1969
4ed46869 1970(defsubst princ-list (&rest args)
d37ef0f6 1971 "Print all arguments with `princ', then print \"\n\"."
4ed46869
KH
1972 (while args (princ (car args)) (setq args (cdr args)))
1973 (princ "\n"))
1974
b0648a00
RS
1975(put 'describe-specified-language-support 'apropos-inhibit t)
1976
6b61353c 1977;; Print language-specific information such as input methods,
13e82c04 1978;; charsets, and coding systems. This function is intended to be
48082651 1979;; called from the menu:
281d03ec 1980;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
1981;; and should not run it by `M-x describe-current-input-method-function'.
1982(defun describe-specified-language-support ()
96db204a 1983 "Describe how Emacs supports the specified language environment."
48082651 1984 (interactive)
281d03ec 1985 (let (language-name)
48082651 1986 (if (not (and (symbolp last-command-event)
cda74479
DL
1987 (or (not (eq last-command-event 'Default))
1988 (setq last-command-event 'English))
281d03ec 1989 (setq language-name (symbol-name last-command-event))))
48082651 1990 (error "Bogus calling sequence"))
281d03ec
RS
1991 (describe-language-environment language-name)))
1992
1993(defun describe-language-environment (language-name)
1994 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
1995 (interactive
1996 (list (read-language-name
1997 'documentation
5b76833f 1998 "Describe language environment (default current choice): ")))
f850d782
RS
1999 (if (null language-name)
2000 (setq language-name current-language-environment))
281d03ec
RS
2001 (if (or (null language-name)
2002 (null (get-language-info language-name 'documentation)))
2003 (error "No documentation for the specified language"))
4ef06f75
KH
2004 (if (symbolp language-name)
2005 (setq language-name (symbol-name language-name)))
ef5a4730
KH
2006 (dolist (feature (get-language-info language-name 'features))
2007 (require feature))
6b626913 2008 (let ((doc (get-language-info language-name 'documentation)))
c3034e84
SM
2009 (help-setup-xref (list #'describe-language-environment language-name)
2010 (interactive-p))
2011 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
2012 (save-excursion
2013 (set-buffer standard-output)
2014 (insert language-name " language environment\n\n")
2015 (if (stringp doc)
2016 (insert doc "\n\n"))
e036b0a6
KH
2017 (condition-case nil
2018 (let ((str (eval (get-language-info language-name 'sample-text))))
2019 (if (stringp str)
2020 (insert "Sample text:\n " str "\n\n")))
2021 (error nil))
464cc130
KH
2022 (let ((input-method (get-language-info language-name 'input-method))
2023 (l (copy-sequence input-method-alist)))
2024 (insert "Input methods")
2025 (when input-method
5b76833f 2026 (insert " (default " input-method ")")
464cc130
KH
2027 (setq input-method (assoc input-method input-method-alist))
2028 (setq l (cons input-method (delete input-method l))))
2029 (insert ":\n")
48082651 2030 (while l
58b78d5b
DL
2031 (when (eq t (compare-strings language-name nil nil
2032 (nth 1 (car l)) nil nil t))
464cc130
KH
2033 (insert " " (car (car l)))
2034 (search-backward (car (car l)))
467412aa 2035 (help-xref-button 0 'help-input-method (car (car l)))
464cc130 2036 (goto-char (point-max))
2fa7e202
KH
2037 (insert " (\""
2038 (if (stringp (nth 3 (car l)))
2039 (nth 3 (car l))
2040 (car (nth 3 (car l))))
2041 "\" in mode line)\n"))
464cc130
KH
2042 (setq l (cdr l)))
2043 (insert "\n"))
2044 (insert "Character sets:\n")
2045 (let ((l (get-language-info language-name 'charset)))
2046 (if (null l)
2047 (insert " nothing specific to " language-name "\n")
2048 (while l
2049 (insert " " (symbol-name (car l)))
2050 (search-backward (symbol-name (car l)))
467412aa 2051 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
2052 (goto-char (point-max))
2053 (insert ": " (charset-description (car l)) "\n")
2054 (setq l (cdr l)))))
2055 (insert "\n")
2056 (insert "Coding systems:\n")
2057 (let ((l (get-language-info language-name 'coding-system)))
2058 (if (null l)
2059 (insert " nothing specific to " language-name "\n")
2060 (while l
2061 (insert " " (symbol-name (car l)))
2062 (search-backward (symbol-name (car l)))
467412aa 2063 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
2064 (goto-char (point-max))
2065 (insert " (`"
2066 (coding-system-mnemonic (car l))
2067 "' in mode line):\n\t"
2068 (coding-system-doc-string (car l))
2069 "\n")
0855c6cd 2070 (let ((aliases (coding-system-aliases (car l))))
464cc130
KH
2071 (when aliases
2072 (insert "\t(alias:")
2073 (while aliases
2074 (insert " " (symbol-name (car aliases)))
2075 (setq aliases (cdr aliases)))
2076 (insert ")\n")))
c3034e84 2077 (setq l (cdr l)))))))))
4ed46869 2078\f
40c81f74
PE
2079;;; Locales.
2080
0d7c5bb9
DL
2081(defvar locale-translation-file-name nil
2082 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 2083
5f395df3
SM
2084;; The following definitions might as well be marked as constants and
2085;; purecopied, since they're normally used on startup, and probably
2086;; should reflect the facilities of the base Emacs.
2087(defconst locale-language-names
2088 (purecopy
2089 '(
40c81f74
PE
2090 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
2091 ;; as specified in the Single Unix Spec, Version 2.
2092 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
2093 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
2094 ;; see Internet RFC 2165 (1997-06) and
2095 ;; http://www.evertype.com/standards/iso639/iso639-en.html
2096 ;; TERRITORY is a country code taken from ISO 3166
2097 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 2098 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 2099
6b61353c
KH
2100 ;; jasonr comments: MS Windows uses three letter codes for
2101 ;; languages instead of the two letter ISO codes that POSIX
2102 ;; uses. In most cases the first two letters are the same, so
2103 ;; most of the regexps in locale-language-names work. Japanese
2104 ;; and Chinese are exceptions, which are listed in the
2105 ;; non-standard section at the bottom of locale-language-names.
2106
8dedddd5
KH
2107 ("aa_DJ" . "Latin-1") ; Afar
2108 ("aa" . "UTF-8")
2109 ;; ab Abkhazian
6ececc4d 2110 ("af" . "Latin-1") ; Afrikaans
8dedddd5
KH
2111 ("am" "Ethiopic" utf-8) ; Amharic
2112 ("an" . "Latin-9") ; Aragonese
5f395df3 2113 ; ar Arabic glibc uses 8859-6
40c81f74
PE
2114 ; as Assamese
2115 ; ay Aymara
8dedddd5 2116 ("az" . "UTF-8") ; Azerbaijani
40c81f74 2117 ; ba Bashkir
8dedddd5
KH
2118 ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2119 ("bg" "Bulgarian" cp1251) ; Bulgarian
40c81f74
PE
2120 ; bh Bihari
2121 ; bi Bislama
8dedddd5 2122 ("bn" . "UTF-8") ; Bengali, Bangla
40c81f74
PE
2123 ("bo" . "Tibetan")
2124 ("br" . "Latin-1") ; Breton
d37ef0f6 2125 ("bs" . "Latin-2") ; Bosnian
8dedddd5 2126 ("byn" . "UTF-8") ; Bilin; Blin
40c81f74
PE
2127 ("ca" . "Latin-1") ; Catalan
2128 ; co Corsican
8dedddd5
KH
2129 ("cs" "Czech" iso-8859-2)
2130 ("cy" "Welsh" iso-8859-14)
40c81f74 2131 ("da" . "Latin-1") ; Danish
8dedddd5 2132 ("de" "German" iso-8859-1)
569a6374 2133 ; dv Divehi
40c81f74 2134 ; dz Bhutani
8dedddd5 2135 ("el" "Greek" iso-8859-7)
6ececc4d 2136 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
e11cf111
DL
2137 ;; That's actually what the GNU locales define, modulo things like
2138 ;; en_IN -- fx.
8dedddd5 2139 ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
eec5c8f9 2140 ("en" "English" iso-8859-1) ; English
58802cd7 2141 ("eo" . "Esperanto") ; Esperanto
8dedddd5
KH
2142 ("es" "Spanish" iso-8859-1)
2143 ("et" . "Latin-1") ; Estonian
40c81f74 2144 ("eu" . "Latin-1") ; Basque
8dedddd5 2145 ("fa" . "UTF-8") ; Persian
40c81f74 2146 ("fi" . "Latin-1") ; Finnish
8dedddd5 2147 ("fj" . "Latin-1") ; Fiji
40c81f74 2148 ("fo" . "Latin-1") ; Faroese
8dedddd5 2149 ("fr" "French" iso-8859-1) ; French
40c81f74 2150 ("fy" . "Latin-1") ; Frisian
6ececc4d 2151 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
8dedddd5
KH
2152 ("gd" . "Latin-9") ; Scots Gaelic
2153 ("gez" "Ethiopic" utf-8) ; Geez
2154 ("gl" . "Latin-1") ; Gallegan; Galician
40c81f74 2155 ; gn Guarani
8dedddd5
KH
2156 ("gu" . "UTF-8") ; Gujarati
2157 ("gv" . "Latin-1") ; Manx Gaelic
40c81f74 2158 ; ha Hausa
8dedddd5
KH
2159 ("he" "Hebrew" iso-8859-8)
2160 ("hi" "Devanagari" utf-8) ; Hindi
2161 ("hr" "Croatian" iso-8859-2) ; Croatian
40c81f74
PE
2162 ("hu" . "Latin-2") ; Hungarian
2163 ; hy Armenian
2164 ; ia Interlingua
2165 ("id" . "Latin-1") ; Indonesian
2166 ; ie Interlingue
2167 ; ik Inupiak
2168 ("is" . "Latin-1") ; Icelandic
8dedddd5 2169 ("it" "Italian" iso-8859-1) ; Italian
40c81f74 2170 ; iu Inuktitut
8dedddd5
KH
2171 ("iw" "Hebrew" iso-8859-8)
2172 ("ja" "Japanese" euc-jp)
40c81f74 2173 ; jw Javanese
8dedddd5 2174 ("ka" "Georgian" georgian-ps) ; Georgian
40c81f74 2175 ; kk Kazakh
6ececc4d 2176 ("kl" . "Latin-1") ; Greenlandic
40c81f74 2177 ; km Cambodian
8dedddd5
KH
2178 ("kn" "Kannada" utf-8)
2179 ("ko" "Korean" euc-kr)
40c81f74
PE
2180 ; ks Kashmiri
2181 ; ku Kurdish
5f395df3 2182 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
2183 ; ky Kirghiz
2184 ("la" . "Latin-1") ; Latin
5f395df3 2185 ("lb" . "Latin-1") ; Luxemburgish
8dedddd5 2186 ("lg" . "Laint-6") ; Ganda
40c81f74 2187 ; ln Lingala
8dedddd5
KH
2188 ("lo" "Lao" utf-8) ; Laothian
2189 ("lt" "Lithuanian" iso-8859-13)
9c20a8d5 2190 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 2191 ; mg Malagasy
5f395df3 2192 ("mi" . "Latin-7") ; Maori
8dedddd5
KH
2193 ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2194 ("ml" "Malayalam" utf-8)
2195 ("mn" . "UTF-8") ; Mongolian
40c81f74 2196 ; mo Moldavian
8dedddd5 2197 ("mr" "Devanagari" utf-8) ; Marathi
5f395df3 2198 ("ms" . "Latin-1") ; Malay
40c81f74
PE
2199 ("mt" . "Latin-3") ; Maltese
2200 ; my Burmese
2201 ; na Nauru
8dedddd5
KH
2202 ("nb" . "Latin-1") ; Norwegian
2203 ("ne" "Devanagari" utf-8) ; Nepali
2204 ("nl" "Dutch" iso-8859-1)
40c81f74 2205 ("no" . "Latin-1") ; Norwegian
5f395df3 2206 ("oc" . "Latin-1") ; Occitan
8dedddd5
KH
2207 ("om_ET" . "UTF-8") ; (Afan) Oromo
2208 ("om" . "Latin-1") ; (Afan) Oromo
40c81f74 2209 ; or Oriya
8dedddd5 2210 ("pa" . "UTF-8") ; Punjabi
40c81f74
PE
2211 ("pl" . "Latin-2") ; Polish
2212 ; ps Pashto, Pushto
2213 ("pt" . "Latin-1") ; Portuguese
2214 ; qu Quechua
6ececc4d 2215 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74 2216 ; rn Kirundi
8dedddd5
KH
2217 ("ro" "Romanian" iso-8859-2)
2218 ("ru_RU" "Russian" iso-8859-5)
2219 ("ru_UA" "Russian" koi8-u)
40c81f74
PE
2220 ; rw Kinyarwanda
2221 ("sa" . "Devanagari") ; Sanskrit
2222 ; sd Sindhi
8dedddd5 2223 ("se" . "UTF-8") ; Northern Sami
40c81f74
PE
2224 ; sg Sangho
2225 ("sh" . "Latin-2") ; Serbo-Croatian
2226 ; si Sinhalese
8dedddd5
KH
2227 ("sid" . "UTF-8") ; Sidamo
2228 ("sk" "Slovak" iso-8859-2)
2229 ("sl" "Slovenian" iso-8859-2)
40c81f74
PE
2230 ; sm Samoan
2231 ; sn Shona
8dedddd5
KH
2232 ("so_ET" "UTF-8") ; Somali
2233 ("so" "Latin-1") ; Somali
6ececc4d 2234 ("sq" . "Latin-1") ; Albanian
40c81f74
PE
2235 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
2236 ; ss Siswati
8dedddd5 2237 ("st" . "Latin-1") ; Sesotho
40c81f74 2238 ; su Sundanese
8dedddd5 2239 ("sv" "Swedish" iso-8859-1) ; Swedish
40c81f74 2240 ("sw" . "Latin-1") ; Swahili
8dedddd5
KH
2241 ("ta" "Tamil" utf-8)
2242 ("te" . "UTF-8") ; Telugu
2243 ("tg" "Tajik" koi8-t)
2244 ("th" "Thai" tis-620)
2245 ("ti" "Ethiopic" utf-8) ; Tigrinya
2246 ("tig_ER" . "UTF-8") ; Tigre
40c81f74 2247 ; tk Turkmen
6ececc4d 2248 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
2249 ; tn Setswana
2250 ; to Tonga
8dedddd5 2251 ("tr" "Turkish" iso-8859-9)
40c81f74 2252 ; ts Tsonga
8dedddd5 2253 ("tt" . "UTF-8") ; Tatar
40c81f74
PE
2254 ; tw Twi
2255 ; ug Uighur
8dedddd5
KH
2256 ("uk" "Ukrainian" koi8-u)
2257 ("ur" . "UTF-8") ; Urdu
2258 ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
5f395df3 2259 ("uz" . "Latin-1") ; Uzbek
8dedddd5 2260 ("vi" "Vietnamese" utf-8)
40c81f74 2261 ; vo Volapuk
d37ef0f6 2262 ("wa" . "Latin-1") ; Walloon
40c81f74 2263 ; wo Wolof
8dedddd5 2264 ("xh" . "Latin-1") ; Xhosa
9c20a8d5 2265 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2266 ; yo Yoruba
2267 ; za Zhuang
8dedddd5 2268 ("zh_HK" . "Chinese-Big5")
05b81b42 2269 ; zh_HK/BIG5-HKSCS \
8dedddd5 2270 ("zh_TW" . "Chinese-Big5")
05b81b42
KH
2271 ("zh_CN.GB2312" "Chinese-GB")
2272 ("zh_CN.GBK" "Chinese-GBK")
2273 ("zh_CN.GB18030" "Chinese-GB18030")
f21605fa 2274 ("zh_CN.UTF-8" . "Chinese-GBK")
8dedddd5 2275 ("zh_CN" . "Chinese-GB")
40c81f74 2276 ("zh" . "Chinese-GB")
8dedddd5 2277 ("zu" . "Latin-1") ; Zulu
40c81f74
PE
2278
2279 ;; ISO standard locales
2280 ("c$" . "ASCII")
2281 ("posix$" . "ASCII")
2282
40c81f74
PE
2283 ;; The "IPA" Emacs language environment does not correspond
2284 ;; to any ISO 639 code, so let it stand for itself.
2285 ("ipa$" . "IPA")
2286
2287 ;; Nonstandard or obsolete language codes
2288 ("cz" . "Czech") ; e.g. Solaris 2.6
2289 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2290 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2291 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2292 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2293 ("jp" . "Japanese") ; e.g. MS Windows
f21605fa 2294 ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
86d9e628 2295 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
569a6374
EZ
2296 ("gbz" . "UTF-8") ; MS Windows Dari Persian
2297 ("div" . "UTF-8") ; MS Windows Divehi (Maldives)
2298 ("wee" . "Latin-2") ; MS Windows Lower Sorbian
2299 ("wen" . "Latin-2") ; MS Windows Upper Sorbian
367ca50f 2300 ))
8dedddd5
KH
2301 "Alist of locale regexps vs the corresponding languages and coding systems.
2302Each element has these form:
2303 \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2304The first element whose LOCALE-REGEXP matches the start of a
2305downcased locale specifies the LANG-ENV \(language environtment)
2306and CODING-SYSTEM corresponding to that locale. If there is no
2307appropriate language environment, the element may have this form:
2308 \(LOCALE-REGEXP . LANG-ENV)
2309In this case, LANG-ENV is one of generic language environments for an
2310specific encoding such as \"Latin-1\" and \"UTF-8\".")
40c81f74 2311
5f395df3
SM
2312(defconst locale-charset-language-names
2313 (purecopy
2314 '((".*8859[-_]?1\\>" . "Latin-1")
2315 (".*8859[-_]?2\\>" . "Latin-2")
2316 (".*8859[-_]?3\\>" . "Latin-3")
2317 (".*8859[-_]?4\\>" . "Latin-4")
2318 (".*8859[-_]?9\\>" . "Latin-5")
2319 (".*8859[-_]?14\\>" . "Latin-8")
2320 (".*8859[-_]?15\\>" . "Latin-9")
6b61353c
KH
2321 (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
2322 ;; utf-8@euro exists, so put this last. (@euro really specifies
2323 ;; the currency, rather than the charset.)
e522f07b 2324 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2325 "List of pairs of locale regexps and charset language names.
2326The first element whose locale regexp matches the start of a downcased locale
6b61353c 2327specifies the language name whose charset corresponds to that locale.
8dedddd5
KH
2328This language name is used if the locale is not listed in
2329`locale-language-names'")
6ececc4d 2330
5f395df3
SM
2331(defconst locale-preferred-coding-systems
2332 (purecopy
8dedddd5
KH
2333 '((".*8859[-_]?1\\>" . iso-8859-1)
2334 (".*8859[-_]?2\\>" . iso-8859-2)
2335 (".*8859[-_]?3\\>" . iso-8859-3)
2336 (".*8859[-_]?4\\>" . iso-8859-4)
2337 (".*8859[-_]?9\\>" . iso-8859-9)
2338 (".*8859[-_]?14\\>" . iso-8859-14)
2339 (".*8859[-_]?15\\>" . iso-8859-15)
2340 (".*utf\\(?:-?8\\)?" . utf-8)
2341 ;; utf-8@euro exists, so put this after utf-8. (@euro really
2342 ;; specifies the currency, rather than the charset.)
2343 (".*@euro" . iso-8859-15)
2344 ("koi8-?r" . koi8-r)
2345 ("koi8-?u" . koi8-u)
2346 ("tcvn" . tcvn)
38c05d07 2347 ("big5[-_]?hkscs" . big5-hkscs)
8dedddd5
KH
2348 ("big5" . big5)
2349 ("euc-?tw" . euc-tw)
38c05d07
KH
2350 ("euc-?cn" .euc-cn)
2351 ("gb2312" . gb2312)
2352 ("gbk" . gbk)
2353 ("gb18030" . gb18030)
8dedddd5 2354 ("ja.*[._]euc" . japanese-iso-8bit)
5f395df3
SM
2355 ("ja.*[._]jis7" . iso-2022-jp)
2356 ("ja.*[._]pck" . japanese-shift-jis)
2357 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2358 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
8dedddd5 2359 ))
6ececc4d
PE
2360 "List of pairs of locale regexps and preferred coding systems.
2361The first element whose locale regexp matches the start of a downcased locale
8dedddd5
KH
2362specifies the coding system to prefer when using that locale.
2363This coding system is used if the locale specifies a specific charset.")
40c81f74
PE
2364
2365(defun locale-name-match (key alist)
2366 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2367Return the value corresponding to the first regexp that matches the
2368start of KEY, or nil if there is no match."
2369 (let (element)
2370 (while (and alist (not element))
ddf6b836 2371 (if (string-match (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2372 (setq element (car alist)))
2373 (setq alist (cdr alist)))
2374 (cdr element)))
2375
58b78d5b 2376(defun locale-charset-match-p (charset1 charset2)
91ed0599 2377 "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
58b78d5b
DL
2378Matching is done ignoring case and any hyphens and underscores in the
2379names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
2380 (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
2381 (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
2382 (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
2383
6b61353c
KH
2384(defvar locale-charset-alist nil
2385 "Coding system alist keyed on locale-style charset name.
2386Used by `locale-charset-to-coding-system'.")
2387
2388(defun locale-charset-to-coding-system (charset)
2389 "Find coding system corresponding to CHARSET.
2390CHARSET is any sort of non-Emacs charset name, such as might be used
2391in a locale codeset, or elsewhere. It is matched to a coding system
2392first by case-insensitive lookup in `locale-charset-alist'. Then
2393matches are looked for in the coding system list, treating case and
2394the characters `-' and `_' as insignificant. The coding system base
2395is returned. Thus, for instance, if charset \"ISO8859-2\",
2396`iso-latin-2' is returned."
f15466c5 2397 (or (car (assoc-string charset locale-charset-alist t))
6b61353c
KH
2398 (let ((cs coding-system-alist)
2399 c)
2400 (while (and (not c) cs)
2401 (if (locale-charset-match-p charset (caar cs))
2402 (setq c (intern (caar cs)))
2403 (pop cs)))
2404 (if c (coding-system-base c)))))
2405
2406;; Fixme: This ought to deal with the territory part of the locale
2407;; too, for setting things such as calendar holidays, ps-print paper
2408;; size, spelling dictionary.
2409
758f07de 2410(defun set-locale-environment (&optional locale-name)
40c81f74 2411 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2412This sets the language environment, the coding system priority,
2413the default input method and sometimes other things.
2414
58b78d5b 2415LOCALE-NAME should be a string which is the name of a locale supported
6b61353c 2416by the system. Often it is of the form xx_XX.CODE, where xx is a
58b78d5b
DL
2417language, XX is a country, and CODE specifies a character set and
2418coding system. For example, the locale name \"ja_JP.EUC\" might name
2419a locale for Japanese in Japan using the `japanese-iso-8bit'
2420coding-system. The name may also have a modifier suffix, e.g. `@euro'
2421or `@cyrillic'.
40c81f74 2422
758f07de 2423If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2424variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2425
2426The locale names supported by your system can typically be found in a
0812c1e8 2427directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2428will be translated according to the table specified by
0812c1e8
DL
2429`locale-translation-file-name'.
2430
2431See also `locale-charset-language-names', `locale-language-names',
2432`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de 2433 (interactive "sSet environment for locale: ")
a1506d29 2434
0d7c5bb9
DL
2435 ;; Do this at runtime for the sake of binaries possibly transported
2436 ;; to a system without X.
2437 (setq locale-translation-file-name
2438 (let ((files
6bba8c70
KH
2439 '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
2440 "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
6b61353c 2441 "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
0d7c5bb9
DL
2442 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2443 ;;
2444 ;; The following name appears after the X-related names above,
2445 ;; since the X-related names are what X actually uses.
2446 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2447 )))
2448 (while (and files (not (file-exists-p (car files))))
2449 (setq files (cdr files)))
2450 (car files)))
2451
7008ccac
GM
2452 (let ((locale locale-name))
2453
2454 (unless locale
2455 ;; Use the first of these three environment variables
2456 ;; that has a nonempty value.
2457 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
db6bd804
DL
2458 (while (and vars
2459 (= 0 (length locale))) ; nil or empty string
2460 (setq locale (getenv (pop vars))))))
7008ccac 2461
02de72e9
SM
2462 (unless locale
2463 ;; The two tests are kept separate so the byte-compiler sees
2464 ;; that mac-get-preference is only called after checking its existence.
2465 (when (fboundp 'mac-get-preference)
2466 (setq locale (mac-get-preference "AppleLocale"))
2467 (unless locale
2468 (let ((languages (mac-get-preference "AppleLanguages")))
2469 (unless (= (length languages) 0) ; nil or empty vector
2470 (setq locale (aref languages 0)))))))
cfd67504
YM
2471 (unless (or locale (not (boundp 'mac-system-locale)))
2472 (setq locale mac-system-locale))
2473
7008ccac
GM
2474 (when locale
2475
2476 ;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
2477 ;; using the translation file that many systems have.
2478 (when locale-translation-file-name
2479 (with-temp-buffer
8e9e2c18 2480 (set-buffer-multibyte nil)
7008ccac
GM
2481 (insert-file-contents locale-translation-file-name)
2482 (when (re-search-forward
2483 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2484 (setq locale (buffer-substring (point) (line-end-position))))))
2485
2486 ;; Leave the system locales alone if the caller did not specify
2487 ;; an explicit locale name, as their defaults are set from
2488 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2489 ;; want to set them to the same value as LC_CTYPE.
2490 (when locale-name
2491 (setq system-messages-locale locale)
2492 (setq system-time-locale locale))
2493
2494 (setq locale (downcase locale))
2495
2496 (let ((language-name
2497 (locale-name-match locale locale-language-names))
2498 (charset-language-name
2499 (locale-name-match locale locale-charset-language-names))
cdb1af30
EZ
2500 (default-eol-type (coding-system-eol-type
2501 default-buffer-file-coding-system))
7008ccac 2502 (coding-system
6b61353c
KH
2503 (or (locale-name-match locale locale-preferred-coding-systems)
2504 (when locale
2505 (if (string-match "\\.\\([^@]+\\)" locale)
2506 (locale-charset-to-coding-system
cfd67504
YM
2507 (match-string 1 locale))))
2508 (and (eq system-type 'macos) mac-system-coding-system))))
7008ccac 2509
8dedddd5
KH
2510 (if (consp language-name)
2511 ;; locale-language-names specify both lang-env and coding.
2512 ;; But, what specified in locale-preferred-coding-systems
2513 ;; has higher priority.
2514 (setq coding-system (or coding-system
2515 (nth 1 language-name))
2516 language-name (car language-name))
2517 ;; Otherwise, if locale is not listed in locale-language-names,
2518 ;; use what listed in locale-charset-language-names.
2519 (if (not language-name)
2520 (setq language-name charset-language-name)))
7008ccac 2521
cdb1af30
EZ
2522 ;; If a specific EOL conversion was specified in the default
2523 ;; buffer-file-coding-system, preserve it in the coding system
2524 ;; we will be using from now on.
8bca692e
EZ
2525 (if (and (memq default-eol-type '(0 1 2 unix dos mac))
2526 coding-system
2527 (coding-system-p coding-system))
cdb1af30
EZ
2528 (setq coding-system (coding-system-change-eol-conversion
2529 coding-system default-eol-type)))
2530
7008ccac
GM
2531 (when language-name
2532
2533 ;; Set up for this character set. This is now the right way
2534 ;; to do it for both unibyte and multibyte modes.
2535 (set-language-environment language-name)
2536
2537 ;; If default-enable-multibyte-characters is nil,
2538 ;; we are using single-byte characters,
2539 ;; so the display table and terminal coding system are irrelevant.
2540 (when default-enable-multibyte-characters
ff76e074
SM
2541 (set-display-table-and-terminal-coding-system
2542 language-name coding-system))
7008ccac 2543
166ce29f
DL
2544 ;; Set the `keyboard-coding-system' if appropriate (tty
2545 ;; only). At least X and MS Windows can generate
2546 ;; multilingual input.
2547 (unless window-system
2548 (let ((kcs (or coding-system
2549 (car (get-language-info language-name
2550 'coding-system)))))
2551 (if kcs (set-keyboard-coding-system kcs))))
ddf6b836 2552
7008ccac
GM
2553 (setq locale-coding-system
2554 (car (get-language-info language-name 'coding-priority))))
2555
8dedddd5
KH
2556 (when (and coding-system
2557 (not (coding-system-equal coding-system
2558 locale-coding-system)))
7008ccac 2559 (prefer-coding-system coding-system)
218e7ce3
KH
2560 ;; Fixme: perhaps prefer-coding-system should set this too.
2561 ;; But it's not the time to do such a fundamental change.
2562 (setq default-sendmail-coding-system coding-system)
58b78d5b 2563 (setq locale-coding-system coding-system))
4bc5d766 2564
c7b4b466 2565 (when (get-language-info current-language-environment 'coding-priority)
8f924df7
KH
2566 (let ((codeset (locale-info 'codeset))
2567 (coding-system (car (coding-system-priority-list))))
2568 (when codeset
2569 (let ((cs (coding-system-aliases coding-system))
2570 result)
2571 (while (and cs (not result))
2572 (setq result
2573 (locale-charset-match-p (symbol-name (pop cs))
2574 (locale-info 'codeset))))
2575 (unless result
c7b4b466 2576 (message "Warning: Default coding system `%s' disagrees with
8f924df7 2577system codeset `%s' for this locale." coding-system codeset))))))))
e76ef161 2578
52c7f9ee
JR
2579 ;; On Windows, override locale-coding-system,
2580 ;; keyboard-coding-system with system codepage. Note:
2581 ;; selection-coding-system is already set in w32select.c.
5eb94383 2582 (when (boundp 'w32-ansi-code-page)
893b49bb
JR
2583 (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
2584 (when (coding-system-p code-page-coding)
2585 (setq locale-coding-system code-page-coding)
893b49bb
JR
2586 (set-keyboard-coding-system code-page-coding)
2587 (set-terminal-coding-system code-page-coding))))
2588
a41118cc 2589 (when (eq system-type 'darwin)
ff76e074
SM
2590 ;; On Darwin, file names are always encoded in utf-8, no matter
2591 ;; the locale.
2592 (setq default-file-name-coding-system 'utf-8)
2593 ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
2594 ;; the locale.
2595 (when (and (null window-system)
2596 (equal (getenv "TERM_PROGRAM") "Apple_Terminal"))
2597 (set-terminal-coding-system 'utf-8)
2598 (set-keyboard-coding-system 'utf-8)))
a41118cc 2599
e76ef161 2600 ;; Default to A4 paper if we're not in a C, POSIX or US locale.
3479c806 2601 ;; (See comments in Flocale_info.)
e76ef161 2602 (let ((locale locale)
3479c806 2603 (paper (locale-info 'paper)))
e76ef161
DL
2604 (if paper
2605 ;; This will always be null at the time of writing.
2606 (cond
2607 ((equal paper '(216 279))
2608 (setq ps-paper-type 'letter))
2609 ((equal paper '(210 297))
2610 (setq ps-paper-type 'a4)))
2611 (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2612 (while (and vars (= 0 (length locale)))
2613 (setq locale (getenv (pop vars)))))
2614 (when locale
2615 ;; As of glibc 2.2.5, these are the only US Letter locales,
2616 ;; and the rest are A4.
2617 (setq ps-paper-type
2618 (or (locale-name-match locale '(("c$" . letter)
2619 ("posix$" . letter)
2620 (".._us" . letter)
2621 (".._pr" . letter)
893b49bb
JR
2622 (".._ca" . letter)
2623 ("enu$" . letter) ; Windows
2624 ("esu$" . letter)
2625 ("enc$" . letter)
2626 ("frc$" . letter)))
e76ef161
DL
2627 'a4))))))
2628 nil)
40c81f74 2629\f
16400c32
KH
2630;;; Character property
2631
2632;; Each element has the form (PROP . TABLE).
2633;; PROP is a symbol representing a character property.
2634;; TABLE is a char-table containing the property value for each character.
2635;; TABLE may be a name of file to load to build a char-table.
2636;; Don't modify this variable directly but use `define-char-code-property'.
2637
2638(defvar char-code-property-alist nil
2639 "Alist of character property name vs char-table containing property values.
2640Internal use only.")
2641
2642(put 'char-code-property-table 'char-table-extra-slots 5)
2643
2644(defun define-char-code-property (name table &optional docstring)
2645 "Define NAME as a character code property given by TABLE.
2646TABLE is a char-table of purpose `char-code-property-table' with
2647these extra slots:
2648 1st: NAME.
2649 2nd: Function to call to get a property value of a character.
2650 It is called with three arugments CHAR, VAL, and TABLE, where
2651 CHAR is a character, VAL is the value of (aref TABLE CHAR).
2652 3rd: Function to call to put a property value of a character.
2653 It is called with the same arguments as above.
2654 4th: Function to call to get a description string of a property value.
2655 It is called with one argument VALUE, a property value.
2656 5th: Data used by the above functions.
2657
2658TABLE may be a name of file to load to build a char-table. The
2659file should contain a call of `define-char-code-property' with a
2660char-table of the above format as the argument TABLE.
2661
2662TABLE may also be nil, in which case no property value is pre-assigned.
2663
2664Optional 3rd argment DOCSTRING is a documentation string of the property.
2665
2666See also the documentation of `get-char-code-property' and
2667`put-char-code-property'."
2668 (or (symbolp name)
2669 (error "Not a symbol: %s" name))
2670 (if (char-table-p table)
2671 (or (and (eq (char-table-subtype table) 'char-code-property-table)
2672 (eq (char-table-extra-slot table 0) name))
2673 (error "Invalid char-table: %s" table))
2674 (or (stringp table)
2675 (error "Not a char-table nor a file name: %s" table)))
2676 (let ((slot (assq name char-code-property-alist)))
2677 (if slot
2678 (setcdr slot table)
2679 (setq char-code-property-alist
2680 (cons (cons name table) char-code-property-alist))))
2681 (put name 'char-code-property-documentation docstring))
4ed46869
KH
2682
2683(defvar char-code-property-table
2684 (make-char-table 'char-code-property-table)
2685 "Char-table containing a property list of each character code.
16400c32 2686This table is used for properties not listed in `char-code-property-alist'.
4ed46869 2687See also the documentation of `get-char-code-property' and
96db204a 2688`put-char-code-property'.")
4ed46869
KH
2689
2690(defun get-char-code-property (char propname)
16400c32
KH
2691 "Return the value of CHAR's PROPNAME property."
2692 (let ((slot (assq propname char-code-property-alist)))
2693 (if slot
2694 (let (table value func)
2695 (if (stringp (cdr slot))
2696 (load (cdr slot)))
2697 (setq table (cdr slot)
2698 value (aref table char)
2699 func (char-table-extra-slot table 1))
2700 (if (functionp func)
2701 (setq value (funcall func char value table)))
2702 value)
2703 (plist-get (aref char-code-property-table char) propname))))
4ed46869
KH
2704
2705(defun put-char-code-property (char propname value)
16400c32 2706 "Store CHAR's PROPNAME property with VALUE.
4ed46869 2707It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
16400c32
KH
2708 (let ((slot (assq propname char-code-property-alist)))
2709 (if slot
2710 (let (table func)
2711 (if (stringp (cdr slot))
2712 (load (cdr slot)))
2713 (setq table (cdr slot)
2714 func (char-table-extra-slot table 2))
2715 (if (functionp func)
2716 (funcall func char value table)
2717 (aset table char value)))
2718 (let* ((plist (aref char-code-property-table char))
2719 (x (plist-put plist propname value)))
2720 (or (eq x plist)
2721 (aset char-code-property-table char x))))
2722 value))
2723
2724(defun char-code-property-description (prop value)
2725 "Return a description string of character property PROP's value VALUE.
2726If there's no description string for VALUE, return nil."
2727 (let ((slot (assq prop char-code-property-alist)))
2728 (if slot
2729 (let (table func)
2730 (if (stringp (cdr slot))
2731 (load (cdr slot)))
2732 (setq table (cdr slot)
2733 func (char-table-extra-slot table 3))
2734 (if (functionp func)
2735 (funcall func value))))))
4ed46869 2736
a127b764
KH
2737\f
2738;; Pretty description of encoded string
2739
2740;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2741(defvar iso-2022-control-alist
2742 '((?\x1b . "ESC")
2743 (?\x0e . "SO")
2744 (?\x0f . "SI")
2745 (?\x8e . "SS2")
2746 (?\x8f . "SS3")
2747 (?\x9b . "CSI")))
2748
2749(defun encoded-string-description (str coding-system)
2750 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2751 (setq str (string-as-unibyte str))
993b2a7d 2752 (mapconcat
a6dfc99b 2753 (if (and coding-system (eq (coding-system-type coding-system) 'iso-2022))
993b2a7d
KH
2754 ;; Try to get a pretty description for ISO 2022 escape sequences.
2755 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
695effcc
JL
2756 (format "#x%02X" x))))
2757 (function (lambda (x) (format "#x%02X" x))))
993b2a7d 2758 str " "))
a127b764
KH
2759
2760(defun encode-coding-char (char coding-system)
2761 "Encode CHAR by CODING-SYSTEM and return the resulting string.
2762If CODING-SYSTEM can't safely encode CHAR, return nil."
bc304a20
DL
2763 (let ((str1 (string-as-multibyte (string char)))
2764 (str2 (string-as-multibyte (string char char)))
a127b764 2765 enc1 enc2 i1 i2)
bc304a20
DL
2766 (when (memq (coding-system-base coding-system)
2767 (find-coding-systems-string str1))
a127b764
KH
2768 ;; We must find the encoded string of CHAR. But, just encoding
2769 ;; CHAR will put extra control sequences (usually to designate
bc304a20 2770 ;; ASCII charset) at the tail if type of CODING is ISO 2022.
a127b764
KH
2771 ;; To exclude such tailing bytes, we at first encode one-char
2772 ;; string and two-char string, then check how many bytes at the
2773 ;; tail of both encoded strings are the same.
2774
bd953173 2775 (setq enc1 (encode-coding-string str1 coding-system)
a127b764 2776 i1 (length enc1)
bd953173 2777 enc2 (encode-coding-string str2 coding-system)
a127b764
KH
2778 i2 (length enc2))
2779 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2780 (setq i1 (1- i1) i2 (1- i2)))
2781
2782 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2783 ;; and they are the extra control sequences at the tail to
2784 ;; exclude.
2785 (substring enc2 0 i2))))
2786
d2f613a0
DL
2787;; Backwards compatibility. These might be better with :init-value t,
2788;; but that breaks loadup.
2789(define-minor-mode unify-8859-on-encoding-mode
2790 "Obsolete."
2791 :group 'mule
2792 :global t)
2793(define-minor-mode unify-8859-on-decoding-mode
2794 "Obsolete."
2795 :group 'mule
2796 :global t)
a127b764 2797
7dd42fb1
KH
2798(defvar nonascii-insert-offset 0 "This variable is obsolete.")
2799(defvar nonascii-translation-table nil "This variable is obsolete.")
2800
cb9e47dd
KH
2801(defun ucs-insert (arg)
2802 "Insert a character of the given Unicode code point.
2803Interactively, prompts for a hex string giving the code."
2804 (interactive "sUnicode (hex): ")
2805 (or (integerp arg)
2806 (setq arg (string-to-number arg 16)))
2807 (if (or (< arg 0) (> arg #x10FFFF))
2808 (error "Not a Unicode character code: 0x%X" arg))
2809 (insert arg))
2810
a127b764 2811
9ee5b744 2812;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
4ed46869 2813;;; mule-cmds.el ends here