*** empty log message ***
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
60370d40 1;;; mule-cmds.el --- commands for mulitilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
fa526c4a 4;; Licensed to the Free Software Foundation.
32db08f1 5;; Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
4ed46869
KH
6
7;; Keywords: mule, multilingual
8
9;; This file is part of GNU Emacs.
10
11;; GNU Emacs is free software; you can redistribute it and/or modify
12;; it under the terms of the GNU General Public License as published by
13;; the Free Software Foundation; either version 2, or (at your option)
14;; any later version.
15
16;; GNU Emacs is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;; GNU General Public License for more details.
20
21;; You should have received a copy of the GNU General Public License
369314dc
KH
22;; along with GNU Emacs; see the file COPYING. If not, write to the
23;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24;; Boston, MA 02111-1307, USA.
4ed46869 25
60370d40
PJ
26;;; Commentary:
27
4ed46869
KH
28;;; Code:
29
cda74479
DL
30(eval-when-compile (defvar dos-codepage))
31
4ed46869
KH
32;;; MULE related key bindings and menus.
33
0709d285 34(defvar mule-keymap (make-sparse-keymap)
33d17698 35 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 36
8f81f784 37;; Keep "C-x C-m ..." for mule specific commands.
0709d285 38(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 39
4ed46869 40(define-key mule-keymap "f" 'set-buffer-file-coding-system)
d80dee05 41(define-key mule-keymap "r" 'revert-buffer-with-coding-system)
4ed46869 42(define-key mule-keymap "t" 'set-terminal-coding-system)
15b3e511
KH
43(define-key mule-keymap "k" 'set-keyboard-coding-system)
44(define-key mule-keymap "p" 'set-buffer-process-coding-system)
7624ebb9
KH
45(define-key mule-keymap "x" 'set-selection-coding-system)
46(define-key mule-keymap "X" 'set-next-selection-coding-system)
8b784951 47(define-key mule-keymap "\C-\\" 'set-input-method)
15b3e511 48(define-key mule-keymap "c" 'universal-coding-system-argument)
b4fba33f 49(define-key mule-keymap "l" 'set-language-environment)
4ed46869 50
538d88fb
EZ
51(defvar mule-menu-keymap
52 (make-sparse-keymap "Mule (Multilingual Environment)")
33d17698 53 "Keymap for Mule (Multilingual environment) menu specific commands.")
15b3e511 54
dcad02bc
EZ
55(defvar describe-language-environment-map
56 (make-sparse-keymap "Describe Language Environment"))
15b3e511 57
dcad02bc
EZ
58(defvar setup-language-environment-map
59 (make-sparse-keymap "Set Language Environment"))
15b3e511 60
dcad02bc
EZ
61(defvar set-coding-system-map
62 (make-sparse-keymap "Set Coding System"))
15b3e511 63
15b3e511 64(define-key-after mule-menu-keymap [set-language-environment]
cda74479
DL
65 (list 'menu-item "Set Language Environment" setup-language-environment-map
66 :help "Multilingual environment suitable for a specific language"))
a61f401d 67(define-key-after mule-menu-keymap [mouse-set-font]
538d88fb 68 '(menu-item "Set Font/Fontset" mouse-set-font
cda74479
DL
69 :visible (fboundp 'generate-fontset-menu)
70 :help "Select a font from list of known fonts/fontsets"))
15b3e511
KH
71(define-key-after mule-menu-keymap [separator-mule]
72 '("--")
73 t)
74(define-key-after mule-menu-keymap [toggle-input-method]
538d88fb 75 '(menu-item "Toggle Input Method" toggle-input-method)
15b3e511 76 t)
8b784951 77(define-key-after mule-menu-keymap [set-input-method]
538d88fb 78 '(menu-item "Select Input Method..." set-input-method)
15b3e511 79 t)
cda74479
DL
80(define-key-after mule-menu-keymap [describe-input-method]
81 '(menu-item "Describe Input Method" describe-input-method))
15b3e511
KH
82(define-key-after mule-menu-keymap [separator-input-method]
83 '("--")
84 t)
15b3e511 85(define-key-after mule-menu-keymap [set-various-coding-system]
cda74479
DL
86 (list 'menu-item "Set Coding Systems" set-coding-system-map
87 :enable 'enable-multibyte-characters))
538d88fb
EZ
88(define-key-after mule-menu-keymap [view-hello-file]
89 '(menu-item "Show Multi-lingual Text" view-hello-file
90 :enable (file-readable-p
91 (expand-file-name "HELLO" data-directory))
92 :help "Display file which says HELLO in many languages")
15b3e511
KH
93 t)
94(define-key-after mule-menu-keymap [separator-coding-system]
95 '("--")
96 t)
538d88fb
EZ
97(define-key-after mule-menu-keymap [describe-language-environment]
98 (list 'menu-item "Describe Language Environment"
99 describe-language-environment-map
cda74479 100 :help "Show multilingual settings for a specific language")
15b3e511 101 t)
538d88fb
EZ
102(define-key-after mule-menu-keymap [describe-input-method]
103 '(menu-item "Describe Input Method..." describe-input-method
cda74479 104 :help "Keyboard layout for a specific input method")
538d88fb
EZ
105 t)
106(define-key-after mule-menu-keymap [describe-coding-system]
107 '(menu-item "Describe Coding System..." describe-coding-system)
108 t)
cda74479
DL
109(define-key-after mule-menu-keymap [list-character-sets]
110 '(menu-item "List Character Sets" list-character-sets
111 :help "Show table of available character sets"))
538d88fb
EZ
112(define-key-after mule-menu-keymap [mule-diag]
113 '(menu-item "Show All of Mule Status" mule-diag
114 :help "Display multilingual environment settings")
15b3e511
KH
115 t)
116
3a151e98 117(define-key-after set-coding-system-map [universal-coding-system-argument]
538d88fb
EZ
118 '(menu-item "For Next Command" universal-coding-system-argument
119 :help "Coding system to be used by next command")
3a151e98 120 t)
d80dee05
RS
121(define-key-after set-coding-system-map [separator-1]
122 '("--")
123 t)
124(define-key-after set-coding-system-map [set-buffer-file-coding-system]
125 '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
126 :help "How to encode this buffer when saved")
127 t)
128(define-key-after set-coding-system-map [revert-buffer-with-coding-system]
129 '(menu-item "For Reverting This File Now" revert-buffer-with-coding-system
130 :enable buffer-file-name
131 :help "Revisit this file immediately using specified coding system")
132 t)
133(define-key-after set-coding-system-map [separator-2]
134 '("--")
15b3e511 135 t)
d80dee05 136
15b3e511 137(define-key-after set-coding-system-map [set-keyboard-coding-system]
538d88fb
EZ
138 '(menu-item "For Keyboard" set-keyboard-coding-system
139 :help "How to decode keyboard input")
15b3e511 140 t)
d80dee05
RS
141(define-key-after set-coding-system-map [set-terminal-coding-system]
142 '(menu-item "For Terminal" set-terminal-coding-system
143 :enable (null (memq window-system '(x w32 mac)))
144 :help "How to encode terminal output")
145 t)
146(define-key-after set-coding-system-map [separator-3]
147 '("--")
15b3e511 148 t)
7624ebb9 149(define-key-after set-coding-system-map [set-selection-coding-system]
538d88fb
EZ
150 '(menu-item "For X Selections/Clipboard" set-selection-coding-system
151 :visible (display-selections-p)
152 :help "How to en/decode data to/from selection/clipboard")
7624ebb9
KH
153 t)
154(define-key-after set-coding-system-map [set-next-selection-coding-system]
538d88fb
EZ
155 '(menu-item "For Next X Selection" set-next-selection-coding-system
156 :visible (display-selections-p)
157 :help "How to en/decode next selection/clipboard operation")
7624ebb9 158 t)
d80dee05
RS
159(define-key-after set-coding-system-map [set-buffer-process-coding-system]
160 '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
161 :visible (fboundp 'start-process)
162 :enable (get-buffer-process (current-buffer))
163 :help "How to en/decode I/O from/to subprocess connected to this buffer")
164 t)
165
166
15b3e511 167(define-key setup-language-environment-map
538d88fb 168 [Default] '(menu-item "Default" setup-specified-language-environment))
4ed46869 169
cda74479
DL
170(define-key describe-language-environment-map
171 [Default] '(menu-item "Default" describe-specified-language-support))
172
4ed46869
KH
173;; This should be a single character key binding because users use it
174;; very frequently while editing multilingual text. Now we can use
175;; only two such keys: "\C-\\" and "\C-^", but the latter is not
176;; convenient because it requires shifting on most keyboards. An
177;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
178;; but it won't be used that frequently.
179(define-key global-map "\C-\\" 'toggle-input-method)
180
5f395df3
SM
181;; This is no good because people often type Shift-SPC
182;; meaning to type SPC. -- rms.
183;; ;; Here's an alternative key binding for X users (Shift-SPACE).
184;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 185
464cc130
KH
186;;; Mule related hyperlinks.
187(defconst help-xref-mule-regexp-template
188 (purecopy (concat "\\(\\<\\("
189 "\\(coding system\\)\\|"
d0c40faa
KH
190 "\\(input method\\)\\|"
191 "\\(character set\\)\\|"
192 "\\(charset\\)"
464cc130
KH
193 "\\)\\s-+\\)?"
194 ;; Note starting with word-syntax character:
195 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
196
26d87040
EZ
197(defun coding-system-change-eol-conversion (coding-system eol-type)
198 "Return a coding system which differs from CODING-SYSTEM in eol conversion.
199The returned coding system converts end-of-line by EOL-TYPE
200but text as the same way as CODING-SYSTEM.
201EOL-TYPE should be `unix', `dos', `mac', or nil.
202If EOL-TYPE is nil, the returned coding system detects
203how end-of-line is formatted automatically while decoding.
204
205EOL-TYPE can be specified by an integer 0, 1, or 2.
206They means `unix', `dos', and `mac' respectively."
207 (if (symbolp eol-type)
208 (setq eol-type (cond ((eq eol-type 'unix) 0)
209 ((eq eol-type 'dos) 1)
210 ((eq eol-type 'mac) 2)
211 (t eol-type))))
212 (let ((orig-eol-type (coding-system-eol-type coding-system)))
213 (if (vectorp orig-eol-type)
214 (if (not eol-type)
215 coding-system
216 (aref orig-eol-type eol-type))
217 (let ((base (coding-system-base coding-system)))
218 (if (not eol-type)
219 base
220 (if (= eol-type orig-eol-type)
221 coding-system
222 (setq orig-eol-type (coding-system-eol-type base))
223 (if (vectorp orig-eol-type)
224 (aref orig-eol-type eol-type))))))))
225
226(defun coding-system-change-text-conversion (coding-system coding)
227 "Return a coding system which differs from CODING-SYSTEM in text conversion.
228The returned coding system converts text by CODING
229but end-of-line as the same way as CODING-SYSTEM.
230If CODING is nil, the returned coding system detects
231how text is formatted automatically while decoding."
1d77e15a
JR
232 (let ((eol-type (coding-system-eol-type coding-system)))
233 (coding-system-change-eol-conversion
234 (if coding coding 'undecided)
235 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 236
4ed46869 237(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1
RS
238 "Change whether this buffer uses multibyte characters.
239With arg, use multibyte characters if the arg is positive.
240
241Note that this command does not convert the byte contents of
242the buffer; it only changes the way those bytes are interpreted.
243In general, therefore, this command *changes* the sequence of
244characters that the current buffer contains.
245
246We suggest you avoid using use this command unless you know what you
247are doing. If you use it by mistake, and the buffer is now displayed
248wrong, use this command again to toggle back to the right mode."
4ed46869 249 (interactive "P")
b7079457
RS
250 (let ((new-flag
251 (if (null arg) (null enable-multibyte-characters)
252 (> (prefix-numeric-value arg) 0))))
253 (set-buffer-multibyte new-flag))
4ed46869
KH
254 (force-mode-line-update))
255
256(defun view-hello-file ()
257 "Display the HELLO file which list up many languages and characters."
258 (interactive)
8f81f784
KH
259 ;; We have to decode the file in any environment.
260 (let ((default-enable-multibyte-characters t)
95fa03b4 261 (coding-system-for-read 'iso-2022-7bit))
8f81f784 262 (find-file-read-only (expand-file-name "HELLO" data-directory))))
4ed46869 263
15b3e511
KH
264(defun universal-coding-system-argument ()
265 "Execute an I/O command using the specified coding system."
266 (interactive)
34104362
KH
267 (let* ((default (and buffer-file-coding-system
268 (not (eq (coding-system-type buffer-file-coding-system)
269 t))
270 buffer-file-coding-system))
271 (coding-system (read-coding-system
272 (if default
273 (format "Coding system for following command (default, %s): " default)
274 "Coding system for following command: ")
275 default))
15b3e511 276 (keyseq (read-key-sequence
e14a8f4c 277 (format "Command to execute with %s:" coding-system)))
04363179
GM
278 (cmd (key-binding keyseq))
279 prefix)
280
281 (when (eq cmd 'universal-argument)
282 (call-interactively cmd)
283
284 ;; Process keys bound in `universal-argument-map'.
285 (while (progn
286 (setq keyseq (read-key-sequence nil t)
287 cmd (key-binding keyseq t))
288 (not (eq cmd 'universal-argument-other-key)))
289 (let ((current-prefix-arg prefix-arg)
d37ef0f6 290 ;; Have to bind `last-command-char' here so that
04363179
GM
291 ;; `digit-argument', for isntance, can compute the
292 ;; prefix arg.
293 (last-command-char (aref keyseq 0)))
294 (call-interactively cmd)))
295
296 ;; This is the final call to `univeral-argument-other-key', which
297 ;; set's the final `prefix-arg.
298 (let ((current-prefix-arg prefix-arg))
299 (call-interactively cmd))
300
301 ;; Read the command to execute with the given prefix arg.
302 (setq prefix prefix-arg
303 keyseq (read-key-sequence nil t)
304 cmd (key-binding keyseq)))
305
15b3e511 306 (let ((coding-system-for-read coding-system)
04363179
GM
307 (coding-system-for-write coding-system)
308 (current-prefix-arg prefix))
15b3e511
KH
309 (message "")
310 (call-interactively cmd))))
311
de94d711 312(defun set-default-coding-systems (coding-system)
0c3154d2 313 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 314This sets the following coding systems:
0c3154d2 315 o coding system of a newly created buffer
8efc03e1
KH
316 o default coding system for subprocess I/O
317This also sets the following values:
d37ef0f6 318 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
319 o default value for the command `set-terminal-coding-system' (not on MSDOS)
320 o default value for the command `set-keyboard-coding-system'."
de94d711
KH
321 (check-coding-system coding-system)
322 (setq-default buffer-file-coding-system coding-system)
716184d4
RS
323 (if default-enable-multibyte-characters
324 (setq default-file-name-coding-system coding-system))
03c35c83
EZ
325 ;; If coding-system is nil, honor that on MS-DOS as well, so
326 ;; that they could reset the terminal coding system.
327 (unless (and (eq window-system 'pc) coding-system)
328 (setq default-terminal-coding-system coding-system))
de94d711 329 (setq default-keyboard-coding-system coding-system)
1d77e15a
JR
330 ;; Preserve eol-type from existing default-process-coding-systems.
331 ;; On non-unix-like systems in particular, these may have been set
332 ;; carefully by the user, or by the startup code, to deal with the
333 ;; users shell appropriately, so should not be altered by changing
334 ;; language environment.
335 (let ((output-coding
336 (coding-system-change-text-conversion
337 (car default-process-coding-system) coding-system))
338 (input-coding
339 (coding-system-change-text-conversion
340 (cdr default-process-coding-system) coding-system)))
341 (setq default-process-coding-system
342 (cons output-coding input-coding))))
de94d711 343
45d08cb2 344(defalias 'update-iso-coding-systems 'update-coding-systems-internal)
2598a293 345(make-obsolete 'update-iso-coding-systems 'update-coding-systems-internal "20.3")
45d08cb2 346
0c3154d2
KH
347(defun prefer-coding-system (coding-system)
348 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 349This also sets the following coding systems:
0c3154d2 350 o coding system of a newly created buffer
8efc03e1
KH
351 o default coding system for subprocess I/O
352This also sets the following values:
c654de1d 353 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
354 o default value for the command `set-terminal-coding-system' (not on MSDOS)
355 o default value for the command `set-keyboard-coding-system'
356
bd3ac67e
EZ
357If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
358systems set by this function will use that type of EOL conversion.
359
03c35c83
EZ
360This command does not change the default value of terminal coding system
361for MS-DOS terminal, because DOS terminals only support a single coding
362system, and Emacs automatically sets the default to that coding system at
5beaa0d1
KH
363startup.
364
c654de1d
DL
365A coding system that requires automatic detection of text
366encoding (e.g. undecided, unix) can't be preferred.
367
368See also `coding-category-list' and `coding-system-category'."
0c3154d2
KH
369 (interactive "zPrefer coding system: ")
370 (if (not (and coding-system (coding-system-p coding-system)))
371 (error "Invalid coding system `%s'" coding-system))
372 (let ((coding-category (coding-system-category coding-system))
bd3ac67e
EZ
373 (base (coding-system-base coding-system))
374 (eol-type (coding-system-eol-type coding-system)))
0c3154d2
KH
375 (if (not coding-category)
376 ;; CODING-SYSTEM is no-conversion or undecided.
377 (error "Can't prefer the coding system `%s'" coding-system))
8efc03e1 378 (set coding-category (or base coding-system))
45d08cb2 379 (update-coding-systems-internal)
812cad80 380 (or (eq coding-category (car coding-category-list))
0c3154d2 381 ;; We must change the order.
812cad80 382 (set-coding-priority (list coding-category)))
8efc03e1
KH
383 (if (and base (interactive-p))
384 (message "Highest priority is set to %s (base of %s)"
385 base coding-system))
bd3ac67e 386 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 387 (if (memq eol-type '(0 1 2))
bd3ac67e
EZ
388 (setq coding-system
389 (coding-system-change-eol-conversion base eol-type))
390 (setq coding-system base))
391 (set-default-coding-systems coding-system)))
0c3154d2 392
b5edd1d1
KH
393(defvar sort-coding-systems-predicate nil
394 "If non-nil, a predicate function to sort coding systems.
395
396It is called with two coding systems, and should return t if the first
397one is \"less\" than the second.
398
399The function `sort-coding-systems' use it.")
400
401(defun sort-coding-systems (codings)
402 "Sort coding system list CODINGS by a priority of each coding system.
403
404If a coding system is most preferred, it has the highest priority.
405Otherwise, a coding system corresponds to some MIME charset has higher
406priorities. Among them, a coding system included in `coding-system'
407key of the current language environment has higher priorities. See
408also the documentation of `language-info-alist'.
409
410If the variable `sort-coding-systems-predicate' (which see) is
411non-nil, it is used to sort CODINGS in the different way than above."
412 (if sort-coding-systems-predicate
413 (sort codings sort-coding-systems-predicate)
414 (let* ((most-preferred (symbol-value (car coding-category-list)))
415 (lang-preferred (get-language-info current-language-environment
416 'coding-system))
417 (func (function
418 (lambda (x)
419 (let ((base (coding-system-base x)))
420 (+ (if (eq base most-preferred) 64 0)
421 (let ((mime (coding-system-get base 'mime-charset)))
422 (if mime
423 (if (string-match "^x-" (symbol-name mime))
424 16 32)
425 0))
426 (if (memq base lang-preferred) 8 0)
427 (if (string-match "-with-esc$" (symbol-name base))
428 0 4)
429 (if (eq (coding-system-type base) 2)
430 ;; For ISO based coding systems, prefer
431 ;; one that doesn't use escape sequences.
432 (let ((flags (coding-system-flags base)))
433 (if (or (consp (aref flags 0))
434 (consp (aref flags 1))
435 (consp (aref flags 2))
436 (consp (aref flags 3)))
437 (if (or (aref flags 8) (aref flags 9))
438 0
439 1)
440 2))
441 1)))))))
442 (sort codings (function (lambda (x y)
443 (> (funcall func x) (funcall func y))))))))
54b226f7 444
3fc7dfe5 445(defun find-coding-systems-region (from to)
54b226f7 446 "Return a list of proper coding systems to encode a text between FROM and TO.
d37ef0f6 447If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
448All coding systems in the list can safely encode any multibyte characters
449in the text.
450
e8dd0160 451If the text contains no multibyte characters, return a list of a single
3fc7dfe5 452element `undecided'."
b5edd1d1
KH
453 (let ((codings (find-coding-systems-region-internal from to)))
454 (if (eq codings t)
455 ;; The text contains only ASCII characters. Any coding
456 ;; systems are safe.
457 '(undecided)
458 ;; We need copy-sequence because sorting will alter the argument.
459 (sort-coding-systems (copy-sequence codings)))))
54b226f7 460
3fc7dfe5
KH
461(defun find-coding-systems-string (string)
462 "Return a list of proper coding systems to encode STRING.
463All coding systems in the list can safely encode any multibyte characters
464in STRING.
465
e8dd0160 466If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 467element `undecided'."
b5edd1d1 468 (find-coding-systems-region string nil))
3fc7dfe5
KH
469
470(defun find-coding-systems-for-charsets (charsets)
471 "Return a list of proper coding systems to encode characters of CHARSETS.
472CHARSETS is a list of character sets."
b5edd1d1
KH
473 (cond ((or (null charsets)
474 (and (= (length charsets) 1)
475 (eq 'ascii (car charsets))))
476 '(undecided))
477 ((or (memq 'eight-bit-control charsets)
478 (memq 'eight-bit-graphic charsets))
479 '(raw-text emacs-mule))
480 (t
481 (let ((codings t)
482 charset l ll)
483 (while (and codings charsets)
484 (setq charset (car charsets) charsets (cdr charsets))
485 (unless (eq charset 'ascii)
486 (setq l (aref char-coding-system-table (make-char charset)))
487 (if (eq codings t)
488 (setq codings l)
489 (let ((ll nil))
490 (while codings
491 (if (memq (car codings) l)
492 (setq ll (cons (car codings) ll)))
493 (setq codings (cdr codings)))
494 (setq codings ll)))))
495 (append codings
496 (char-table-extra-slot char-coding-system-table 0))))))
54b226f7 497
51ed58ea
KH
498(defun find-multibyte-characters (from to &optional maxcount excludes)
499 "Find multibyte characters in the region specified by FROM and TO.
500If FROM is a string, find multibyte characters in the string.
501The return value is an alist of the following format:
502 ((CHARSET COUNT CHAR ...) ...)
503where
504 CHARSET is a character set,
505 COUNT is a number of characters,
506 CHARs are found characters of the character set.
507Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
251d4f4b
KH
508Optional 4th arg EXCLUDE is a list of character sets to be ignored.
509
510For invalid characters, CHARs are actually strings."
51ed58ea
KH
511 (let ((chars nil)
512 charset char)
513 (if (stringp from)
514 (let ((idx 0))
515 (while (setq idx (string-match "[^\000-\177]" from idx))
516 (setq char (aref from idx)
517 charset (char-charset char))
251d4f4b
KH
518 (if (eq charset 'unknown)
519 (setq char (match-string 0)))
14333e31
KH
520 (if (or (memq charset '(unknown
521 eight-bit-control eight-bit-graphic))
251d4f4b 522 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
523 (let ((slot (assq charset chars)))
524 (if slot
525 (if (not (memq char (nthcdr 2 slot)))
526 (let ((count (nth 1 slot)))
527 (setcar (cdr slot) (1+ count))
528 (if (or (not maxcount) (< count maxcount))
529 (nconc slot (list char)))))
530 (setq chars (cons (list charset 1 char) chars)))))
531 (setq idx (1+ idx))))
532 (save-excursion
533 (goto-char from)
534 (while (re-search-forward "[^\000-\177]" to t)
535 (setq char (preceding-char)
536 charset (char-charset char))
251d4f4b
KH
537 (if (eq charset 'unknown)
538 (setq char (match-string 0)))
14333e31 539 (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic))
251d4f4b 540 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
541 (let ((slot (assq charset chars)))
542 (if slot
251d4f4b 543 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
544 (let ((count (nth 1 slot)))
545 (setcar (cdr slot) (1+ count))
546 (if (or (not maxcount) (< count maxcount))
547 (nconc slot (list char)))))
548 (setq chars (cons (list charset 1 char) chars))))))))
549 (nreverse chars)))
550
738746ba
KH
551
552(defun search-unencodable-char (coding-system)
553 "Search forward from point for a character that is not encodable.
554It asks which coding system to check.
555If such a character is found, set point after that character.
556Otherwise, don't move point.
557
558When called from a program, the value is a position of the found character,
559or nil if all characters are encodable."
560 (interactive
561 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
562 (read-coding-system
563 (format "Coding-system (default, %s): " default)
564 default))))
565 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
566 (if pos
567 (goto-char (1+ pos))
568 (message "All following characters are encodable by %s" coding-system))
569 pos))
570
571
c83c4f60
RS
572(defvar last-coding-system-specified nil
573 "Most recent coding system explicitly specified by the user when asked.
574This variable is set whenever Emacs asks the user which coding system
575to use in order to write a file. If you set it to nil explicitly,
576then call `write-region', then afterward this variable will be non-nil
577only if the user was explicitly asked and specified a coding system.")
578
b5edd1d1
KH
579(defvar select-safe-coding-system-accept-default-p nil
580 "If non-nil, a function to control the behaviour of coding system selection.
581The meaning is the same as the argument ACCEPT-DEFAULT-P of the
582function `select-safe-coding-system' (which see). This variable
583overrides that argument.")
584
585(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 586 accept-default-p file)
d5266ddf
KH
587 "Ask a user to select a safe coding system from candidates.
588The candidates of coding systems which can safely encode a text
b5edd1d1
KH
589between FROM and TO are shown in a popup window. Among them, the most
590proper one is suggested as the default.
591
592The list of `buffer-file-coding-system' of the current buffer and the
593most preferred coding system (if it corresponds to a MIME charset) is
594treated as the default coding system list. Among them, the first one
d37ef0f6
DL
595that safely encodes the text is normally selected silently and
596returned without any user interaction. See also the command
597`prefer-coding-system'.
598
599However, the user is queried if the chosen coding system is
600inconsistent with what would be selected by `set-auto-coding' from
601coding cookies &c. if the contents of the region were read from a
602file. (That could lead to data corruption in a file subsequently
603re-visited and edited.)
54b226f7 604
b5edd1d1
KH
605Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
606list of coding systems to be prepended to the default coding system
607list.
54b226f7 608
b5edd1d1
KH
609Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
610determine the acceptability of the silently selected coding system.
611It is called with that coding system, and should return nil if it
612should not be silently selected and thus user interaction is required.
613
efa2c6d7
RS
614Optional 5th arg FILE is the file name to use for this purpose.
615That is different from `buffer-file-name' when handling `write-region'
616\(for example).
617
b5edd1d1
KH
618The variable `select-safe-coding-system-accept-default-p', if
619non-nil, overrides ACCEPT-DEFAULT-P.
54b226f7
KH
620
621Kludgy feature: if FROM is a string, the string is the target text,
622and TO is ignored."
b5edd1d1
KH
623 (if (and default-coding-system
624 (not (listp default-coding-system)))
625 (setq default-coding-system (list default-coding-system)))
626
627 ;; Change elements of the list to (coding . base-coding).
628 (setq default-coding-system
629 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
630 default-coding-system))
631
632 ;; If buffer-file-coding-system is not nil nor undecided, append it
633 ;; to the defaults.
634 (if buffer-file-coding-system
635 (let ((base (coding-system-base buffer-file-coding-system)))
636 (or (eq base 'undecided)
637 (assq buffer-file-coding-system default-coding-system)
638 (rassq base default-coding-system)
e56d7900 639 (setq default-coding-system
b5edd1d1
KH
640 (append default-coding-system
641 (list (cons buffer-file-coding-system base)))))))
642
643 ;; If the most preferred coding system has the property mime-charset,
644 ;; append it to the defaults.
c24e49a8
KH
645 (let ((tail coding-category-list)
646 preferred base)
647 (while (and tail
87ad2c90 648 (not (setq preferred (symbol-value (car tail)))))
c24e49a8
KH
649 (setq tail (cdr tail)))
650 (and (coding-system-p preferred)
651 (setq base (coding-system-base preferred))
652 (coding-system-get preferred 'mime-charset)
b5edd1d1
KH
653 (not (assq preferred default-coding-system))
654 (not (rassq base default-coding-system))
655 (setq default-coding-system
656 (append default-coding-system (list (cons preferred base))))))
657
658 (if select-safe-coding-system-accept-default-p
659 (setq accept-default-p select-safe-coding-system-accept-default-p))
660
661 (let ((codings (find-coding-systems-region from to))
662 (coding-system nil)
45610c98 663 (bufname (buffer-name))
b5edd1d1
KH
664 (l default-coding-system))
665 (if (eq (car codings) 'undecided)
666 ;; Any coding system is ok.
667 (setq coding-system t)
668 ;; Try the defaults.
669 (while (and l (not coding-system))
670 (if (memq (cdr (car l)) codings)
671 (setq coding-system (car (car l)))
672 (setq l (cdr l))))
673 (if (and coding-system accept-default-p)
674 (or (funcall accept-default-p coding-system)
675 (setq coding-system (list coding-system)))))
676
677 ;; If all the defaults failed, ask a user.
678 (when (or (not coding-system) (consp coding-system))
738746ba
KH
679 ;; At first, record at most 11 problematic characters and their
680 ;; positions for each default.
681 (if (stringp from)
682 (mapc #'(lambda (coding)
683 (setcdr coding
684 (mapcar #'(lambda (pos)
685 (cons pos (aref from pos)))
686 (unencodable-char-position
687 0 (length from) (car coding) 11 from))))
688 default-coding-system)
689 (mapc #'(lambda (coding)
690 (setcdr coding
691 (mapcar #'(lambda (pos)
692 (cons pos (char-after pos)))
693 (unencodable-char-position
694 from to (car coding) 11))))
695 default-coding-system))
696 ;; If 11 unencodable characters were found, mark the last one as nil.
697 (mapc #'(lambda (coding)
698 (if (> (length coding) 11)
699 (setcdr (car (last coding)) nil)))
700 default-coding-system)
701
702 ;; Change each safe coding system to the corresponding
b5edd1d1
KH
703 ;; mime-charset name if it is also a coding system. Such a name
704 ;; is more friendly to users.
705 (let ((l codings)
34104362
KH
706 mime-charset)
707 (while l
708 (setq mime-charset (coding-system-get (car l) 'mime-charset))
709 (if (and mime-charset (coding-system-p mime-charset))
710 (setcar l mime-charset))
711 (setq l (cdr l))))
712
43afed8c
RS
713 ;; Don't offer variations with locking shift, which you
714 ;; basically never want.
715 (let (l)
716 (dolist (elt codings (setq codings (nreverse l)))
717 (unless (or (eq 'coding-category-iso-7-else
718 (coding-system-category elt))
719 (eq 'coding-category-iso-8-else
720 (coding-system-category elt)))
721 (push elt l))))
722
738746ba
KH
723 (let ((window-configuration (current-window-configuration)))
724 (save-excursion
725 ;; Make sure the offending buffer is displayed.
726 (when (and default-coding-system (not (stringp from)))
727 (pop-to-buffer bufname)
728 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
729 default-coding-system))))
730 ;; Then ask users to select one from CODINGS.
731 (with-output-to-temp-buffer "*Warning*"
873a4be1 732 (save-excursion
738746ba
KH
733 (set-buffer standard-output)
734 (if (not default-coding-system)
735 (insert "No default coding systems to try for "
736 (if (stringp from)
737 (format "string \"%s\"." from)
738 (format "buffer `%s'." bufname)))
739 (insert
740 "These default coding systems were tried to encode"
741 (if (stringp from)
742 (concat " \"" (if (> (length from) 10)
743 (concat (substring from 0 10) "...\"")
744 (concat from "\"")))
745 (format " text\nin the buffer `%s'" bufname))
746 ":\n")
747 (let ((pos (point))
748 (fill-prefix " "))
749 (mapcar (function (lambda (x)
750 (princ " ") (princ (car x))))
751 default-coding-system)
752 (insert "\n")
753 (fill-region-as-paragraph pos (point)))
754 (if (consp coding-system)
755 (insert (format "%s safely encodes the target text,\n"
756 (car coding-system))
757 "\
d37ef0f6
DL
758but it is not recommended for encoding text in this context,
759e.g., for sending an email message.\n")
738746ba
KH
760 (insert "\
761However, each of them encountered these problematic characters:\n")
762 (mapc
763 #'(lambda (coding)
764 (insert (format " %s:" (car coding)))
765 (dolist (elt (cdr coding))
766 (insert " ")
767 (if (stringp from)
768 (insert (or (cdr elt) "..."))
769 (if (cdr elt)
770 (insert-text-button
771 (cdr elt)
772 :type 'help-xref
773 'help-echo
774 "mouse-2, RET: jump to this character"
775 'help-function
776 #'(lambda (bufname pos)
777 (when (buffer-live-p (get-buffer bufname))
778 (pop-to-buffer bufname)
779 (goto-char pos)))
780 'help-args (list bufname (car elt)))
781 (insert-text-button
782 "..."
783 :type 'help-xref
784 'help-echo
785 "mouse-2, RET: next unencodable character"
786 'help-function
787 #'(lambda (bufname pos coding)
788 (when (buffer-live-p (get-buffer bufname))
789 (pop-to-buffer bufname)
790 (if (< (point) pos)
791 (goto-char pos)
792 (forward-char 1)
793 (search-unencodable-char coding)
794 (forward-char -1))))
795 'help-args (list bufname (car elt)
796 (car coding))))))
797 (insert "\n"))
798 default-coding-system)
799 (insert "\
d37ef0f6 800The first problematic character is at point in the displayed buffer,\n"
738746ba 801 (substitute-command-keys "\
d37ef0f6 802and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
738746ba
KH
803 (insert (if (consp coding-system)
804 "\nSelect the above, or "
805 "\nSelect ")
806 "\
d37ef0f6 807one of the following safe coding systems, or edit the buffer:\n")
738746ba
KH
808 (let ((pos (point))
809 (fill-prefix " "))
810 (mapcar (function (lambda (x) (princ " ") (princ x)))
811 codings)
812 (insert "\n")
813 (fill-region-as-paragraph pos (point)))))
814
815 ;; Read a coding system.
816 (if (consp coding-system)
817 (setq codings (cons (car coding-system) codings)))
818 (let* ((safe-names (mapcar (lambda (x) (list (symbol-name x)))
819 codings))
820 (name (completing-read
821 (format "Select coding system (default %s): "
822 (car codings))
823 safe-names nil t nil nil
824 (car (car safe-names)))))
825 (setq last-coding-system-specified (intern name)
826 coding-system last-coding-system-specified)))
827 (kill-buffer "*Warning*")
828 (set-window-configuration window-configuration)))
b5edd1d1
KH
829
830 (if (vectorp (coding-system-eol-type coding-system))
831 (let ((eol (coding-system-eol-type buffer-file-coding-system)))
832 (if (numberp eol)
833 (setq coding-system
834 (coding-system-change-eol-conversion coding-system eol)))))
835
836 (if (eq coding-system t)
837 (setq coding-system buffer-file-coding-system))
43afed8c
RS
838 ;; Check we're not inconsistent with what `coding:' spec &c would
839 ;; give when file is re-read.
9e7638c3
RS
840 ;; But don't do this if we explicitly ignored the cookie
841 ;; by using `find-file-literally'.
842 (unless (or (stringp from) find-file-literally)
efa2c6d7
RS
843 (let ((auto-cs (save-excursion
844 (save-restriction
845 (widen)
846 (narrow-to-region from to)
43afed8c 847 (goto-char (point-min))
efa2c6d7 848 (set-auto-coding (or file buffer-file-name "")
43afed8c 849 (buffer-size))))))
872c0ba8 850 (if (and auto-cs coding-system
d37ef0f6
DL
851 ;; Don't barf if writing a compressed file, say.
852 ;; This check perhaps isn't ideal, but is probably
853 ;; the best thing to do.
854 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
43afed8c
RS
855 (not (coding-system-equal (coding-system-base coding-system)
856 (coding-system-base auto-cs))))
857 (unless (yes-or-no-p
858 (format "Selected encoding %s disagrees with \
859%s specified by file contents. Really save (else edit coding cookies \
860and try again)? " coding-system auto-cs))
41bfe57a
RS
861 (error "Save aborted")))))
862 coding-system))
54b226f7
KH
863
864(setq select-safe-coding-system-function 'select-safe-coding-system)
865
46babb23
KH
866(defun select-message-coding-system ()
867 "Return a coding system to encode the outgoing message of the current buffer.
868It at first tries the first coding system found in these variables
869in this order:
870 (1) local value of `buffer-file-coding-system'
871 (2) value of `sendmail-coding-system'
b5edd1d1
KH
872 (3) value of `default-sendmail-coding-system'
873 (4) value of `default-buffer-file-coding-system'
46babb23
KH
874If the found coding system can't encode the current buffer,
875or none of them are bound to a coding system,
48e41165 876it asks the user to select a proper coding system."
46babb23 877 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
878 buffer-file-coding-system)
879 sendmail-coding-system
880 default-sendmail-coding-system
881 default-buffer-file-coding-system)))
46babb23 882 (if (eq coding 'no-conversion)
d37ef0f6 883 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
884 (setq coding nil))
885 (if (fboundp select-safe-coding-system-function)
886 (funcall select-safe-coding-system-function
b5edd1d1
KH
887 (point-min) (point-max) coding
888 (function (lambda (x) (coding-system-get x 'mime-charset))))
46babb23 889 coding)))
4ed46869 890\f
03c35c83 891;;; Language support stuff.
4ed46869 892
4ed46869 893(defvar language-info-alist nil
2c395d56 894 "Alist of language environment definitions.
4ed46869
KH
895Each element looks like:
896 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
897where LANGUAGE-NAME is a string, the name of the language environment,
898KEY is a symbol denoting the kind of information, and
899INFO is the data associated with KEY.
900Meaningful values for KEY include
901
902 documentation value is documentation of what this language environment
903 is meant for, and how to use it.
904 charset value is a list of the character sets used by this
905 language environment.
d37ef0f6
DL
906 sample-text value is an expression which is evalled to generate
907 a line of text written using characters appropriate
908 for this language environment.
2c395d56
RS
909 setup-function value is a function to call to switch to this
910 language environment.
911 exit-function value is a function to call to leave this
912 language environment.
913 coding-system value is a list of coding systems that are good
914 for saving text written in this language environment.
915 This list serves as suggestions to the user;
916 in effect, as a kind of documentation.
917 coding-priority value is a list of coding systems for this language
918 environment, in order of decreasing priority.
919 This is used to set up the coding system priority
45d08cb2 920 list when you switch to this language environment.
ddb5c041 921 nonascii-translation
7624ebb9 922 value is a translation table to be set in the
45d08cb2 923 variable `nonascii-translation-table' in this
7624ebb9
KH
924 language environment, or a character set from
925 which `nonascii-insert-offset' is calculated.
ddb5c041
KH
926 input-method value is a default input method for this language
927 environment.
7624ebb9
KH
928 features value is a list of features requested in this
929 language environment.
ddb5c041
KH
930
931The following keys take effect only when multibyte characters are
932globally disabled, i.e. the value of `default-enable-multibyte-characters'
933is nil.
934
935 unibyte-syntax value is a library name to load to set
e8dd0160 936 unibyte 8-bit character syntaxes for this
ddb5c041
KH
937 language environment.
938
939 unibyte-display value is a coding system to encode characters
940 for the terminal. Characters in the range
941 of 160 to 255 display not as octal escapes,
942 but as non-ASCII characters in this language
943 environment.")
2c395d56
RS
944
945(defun get-language-info (lang-env key)
946 "Return information listed under KEY for language environment LANG-ENV.
947KEY is a symbol denoting the kind of information.
948For a list of useful values for KEY and their meanings,
949see `language-info-alist'."
950 (if (symbolp lang-env)
951 (setq lang-env (symbol-name lang-env)))
952 (let ((lang-slot (assoc-ignore-case lang-env language-info-alist)))
4ed46869
KH
953 (if lang-slot
954 (cdr (assq key (cdr lang-slot))))))
955
f08adf27 956(defun set-language-info (lang-env key info)
2c395d56
RS
957 "Modify part of the definition of language environment LANG-ENV.
958Specifically, this stores the information INFO under KEY
959in the definition of this language environment.
4ed46869 960KEY is a symbol denoting the kind of information.
2c395d56 961INFO is the value for that information.
281d03ec 962
2c395d56 963For a list of useful values for KEY and their meanings,
f08adf27 964see `language-info-alist'."
2c395d56
RS
965 (if (symbolp lang-env)
966 (setq lang-env (symbol-name lang-env)))
4ed46869 967 (let (lang-slot key-slot)
2c395d56 968 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 969 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 970 (setq lang-slot (list lang-env)
4ed46869
KH
971 language-info-alist (cons lang-slot language-info-alist)))
972 (setq key-slot (assq key lang-slot))
973 (if (null key-slot) ; If no slot for the key, add it.
974 (progn
975 (setq key-slot (list key))
976 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
cda74479 977 (setcdr key-slot (purecopy info))))
4ed46869 978
2c395d56
RS
979(defun set-language-info-alist (lang-env alist &optional parents)
980 "Store ALIST as the definition of language environment LANG-ENV.
981ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 982`language-info-alist' for the meanings of KEY and INFO.
54b226f7 983
2c395d56 984Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 985where to put this language environment in the
2c395d56
RS
986Describe Language Environment and Set Language Environment menus.
987For example, (\"European\") means to put this language environment
988in the European submenu in each of those two menus."
989 (if (symbolp lang-env)
990 (setq lang-env (symbol-name lang-env)))
54b226f7
KH
991 (let ((describe-map describe-language-environment-map)
992 (setup-map setup-language-environment-map))
993 (if parents
994 (let ((l parents)
9deed82f 995 map parent-symbol parent prompt)
54b226f7
KH
996 (while l
997 (if (symbolp (setq parent-symbol (car l)))
998 (setq parent (symbol-name parent))
999 (setq parent parent-symbol parent-symbol (intern parent)))
1000 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1001 ;; This prompt string is for define-prefix-command, so
1002 ;; that the map it creates will be suitable for a menu.
1003 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1004 (if (not map)
1005 (progn
1006 (setq map (intern (format "describe-%s-environment-map"
1007 (downcase parent))))
9deed82f 1008 (define-prefix-command map nil prompt)
54b226f7
KH
1009 (define-key-after describe-map (vector parent-symbol)
1010 (cons parent map) t)))
1011 (setq describe-map (symbol-value map))
1012 (setq map (lookup-key setup-map (vector parent-symbol)))
1013 (if (not map)
1014 (progn
1015 (setq map (intern (format "setup-%s-environment-map"
1016 (downcase parent))))
9deed82f 1017 (define-prefix-command map nil prompt)
54b226f7
KH
1018 (define-key-after setup-map (vector parent-symbol)
1019 (cons parent map) t)))
1020 (setq setup-map (symbol-value map))
1021 (setq l (cdr l)))))
f08adf27
RS
1022
1023 ;; Set up menu items for this language env.
7624ebb9 1024 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1025 (when doc
1026 (define-key-after describe-map (vector (intern lang-env))
7624ebb9
KH
1027 (cons lang-env 'describe-specified-language-support) t)))
1028 (define-key-after setup-map (vector (intern lang-env))
1029 (cons lang-env 'setup-specified-language-environment) t)
f08adf27 1030
54b226f7 1031 (while alist
f08adf27 1032 (set-language-info lang-env (car (car alist)) (cdr (car alist)))
54b226f7 1033 (setq alist (cdr alist)))))
4ed46869 1034
ae302641 1035(defun read-language-name (key prompt &optional default)
2c395d56 1036 "Read a language environment name which has information for KEY.
ddb5c041 1037If KEY is nil, read any language environment.
2c395d56
RS
1038Prompt with PROMPT. DEFAULT is the default choice of language environment.
1039This returns a language environment name as a string."
4ed46869
KH
1040 (let* ((completion-ignore-case t)
1041 (name (completing-read prompt
1042 language-info-alist
ddb5c041 1043 (and key
ca429a25 1044 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1045 t nil nil default)))
13e82c04 1046 (if (and (> (length name) 0)
ddb5c041
KH
1047 (or (not key)
1048 (get-language-info name key)))
13e82c04 1049 name)))
4ed46869
KH
1050\f
1051;;; Multilingual input methods.
d37ef0f6 1052(defgroup leim nil
d0c40faa
KH
1053 "LEIM: Libraries of Emacs Input Methods."
1054 :group 'mule)
4ed46869 1055
d0b9c3ab
KH
1056(defconst leim-list-file-name "leim-list.el"
1057 "Name of LEIM list file.
1058This file contains a list of libraries of Emacs input methods (LEIM)
1059in the format of Lisp expression for registering each input method.
1060Emacs loads this file at startup time.")
1061
2e224638 1062(defvar leim-list-header (format
d37ef0f6 1063";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: iso-2022-7bit;-*-
d0b9c3ab
KH
1064;;
1065;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1066;; methods in the same directory as this file. Loading this file
1067;; registers all the input methods in Emacs.
d0b9c3ab 1068;;
d33d5fbe 1069;; Each entry has the form:
d0b9c3ab
KH
1070;; (register-input-method
1071;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1072;; TITLE DESCRIPTION
1073;; ARG ...)
c654de1d 1074;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab
KH
1075;;
1076;; If this directory is included in load-path, Emacs automatically
1077;; loads this file at startup time.
1078
1079"
1080 leim-list-file-name)
1081 "Header to be inserted in LEIM list file.")
1082
e55e92ee 1083(defvar leim-list-entry-regexp "^(register-input-method"
d0b9c3ab
KH
1084 "Regexp matching head of each entry in LEIM list file.
1085See also the variable `leim-list-header'")
1086
1087(defvar update-leim-list-functions
1088 '(quail-update-leim-list-file)
1089 "List of functions to call to update LEIM list file.
1090Each function is called with one arg, LEIM directory name.")
1091
a337fe7f
RS
1092(defun update-leim-list-file (&rest dirs)
1093 "Update LEIM list file in directories DIRS."
d0b9c3ab
KH
1094 (let ((functions update-leim-list-functions))
1095 (while functions
a337fe7f 1096 (apply (car functions) dirs)
d0b9c3ab
KH
1097 (setq functions (cdr functions)))))
1098
4ed46869
KH
1099(defvar current-input-method nil
1100 "The current input method for multilingual text.
96db204a 1101If nil, that means no input method is activated now.")
4ed46869
KH
1102(make-variable-buffer-local 'current-input-method)
1103(put 'current-input-method 'permanent-local t)
1104
1105(defvar current-input-method-title nil
d0b9c3ab 1106 "Title string of the current input method shown in mode line.")
4ed46869
KH
1107(make-variable-buffer-local 'current-input-method-title)
1108(put 'current-input-method-title 'permanent-local t)
1109
b4fba33f 1110(defcustom default-input-method nil
8861c593 1111 "*Default input method for multilingual text (a string).
b4fba33f 1112This is the input method activated automatically by the command
9b10b5a3 1113`toggle-input-method' (\\[toggle-input-method])."
8861c593 1114 :group 'mule
5806e8a6
GM
1115 :type '(choice (const nil) string)
1116 :set-after '(current-language-environment))
b4fba33f 1117
0f835e87
KH
1118(put 'input-method-function 'permanent-local t)
1119
723a427a
KH
1120(defvar input-method-history nil
1121 "History list for some commands that read input methods.")
1122(make-variable-buffer-local 'input-method-history)
1123(put 'input-method-history 'permanent-local t)
4ed46869
KH
1124
1125(defvar inactivate-current-input-method-function nil
1126 "Function to call for inactivating the current input method.
1127Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1128This function is called with no argument.
1129
1130This function should never change the value of `current-input-method'.
1131It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1132(make-variable-buffer-local 'inactivate-current-input-method-function)
1133(put 'inactivate-current-input-method-function 'permanent-local t)
1134
1135(defvar describe-current-input-method-function nil
1136 "Function to call for describing the current input method.
1137This function is called with no argument.")
1138(make-variable-buffer-local 'describe-current-input-method-function)
1139(put 'describe-current-input-method-function 'permanent-local t)
1140
d0b9c3ab 1141(defvar input-method-alist nil
2c395d56 1142 "Alist of input method names vs how to use them.
d0b9c3ab 1143Each element has the form:
2c395d56
RS
1144 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1145See the function `register-input-method' for the meanings of the elements.")
1146
f08adf27 1147(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1148 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
f08adf27 1149INPUT-METHOD and LANG-ENV are symbols or strings.
d0b9c3ab 1150
d0b9c3ab 1151The remaining arguments are:
2c395d56
RS
1152 ACTIVATE-FUNC, TITLE, DESCRIPTION, and ARGS...
1153ACTIVATE-FUNC is a function to call to activate this method.
1154TITLE is a string to show in the mode line when this method is active.
1155DESCRIPTION is a string describing this method and what it is good for.
1156The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1157All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1158
c654de1d
DL
1159This function is mainly used in the file \"leim-list.el\" which is
1160created at Emacs build time, registering all Quail input methods
0b6cadff 1161contained in the Emacs distribution.
205814ee 1162
0b6cadff 1163In case you want to register a new Quail input method by yourself, be
205814ee 1164careful to use the same input method title as given in the third
0b6cadff
DL
1165parameter of `quail-define-package'. (If the values are different, the
1166string specified in this function takes precedence.)
205814ee
KH
1167
1168The commands `describe-input-method' and `list-input-methods' need
0b6cadff
DL
1169these duplicated values to show some information about input methods
1170without loading the relevant Quail packages."
f08adf27
RS
1171 (if (symbolp lang-env)
1172 (setq lang-env (symbol-name lang-env)))
4ef06f75
KH
1173 (if (symbolp input-method)
1174 (setq input-method (symbol-name input-method)))
f08adf27 1175 (let ((info (cons lang-env args))
d0b9c3ab
KH
1176 (slot (assoc input-method input-method-alist)))
1177 (if slot
1178 (setcdr slot info)
1179 (setq slot (cons input-method info))
1180 (setq input-method-alist (cons slot input-method-alist)))))
1181
4d5ac029 1182(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1183 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1184If DEFAULT is non-nil, use that as the default,
0b6cadff 1185and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1186If INHIBIT-NULL is non-nil, null input signals an error.
1187
1188The return value is a string."
4d5ac029
RS
1189 (if default
1190 (setq prompt (format prompt default)))
d0b9c3ab 1191 (let* ((completion-ignore-case t)
c54044ff
KH
1192 ;; As it is quite normal to change input method in the
1193 ;; minibuffer, we must enable it even if
1194 ;; enable-recursive-minibuffers is currently nil.
1195 (enable-recursive-minibuffers t)
723a427a
KH
1196 ;; This binding is necessary because input-method-history is
1197 ;; buffer local.
d0b9c3ab 1198 (input-method (completing-read prompt input-method-alist
87505a98
RS
1199 nil t nil 'input-method-history
1200 default)))
bf294e6e
KH
1201 (if (and input-method (symbolp input-method))
1202 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1203 (if (> (length input-method) 0)
1204 input-method
1205 (if inhibit-null
43807b77 1206 (error "No valid input method is specified")))))
d0b9c3ab 1207
d0b9c3ab 1208(defun activate-input-method (input-method)
2c395d56
RS
1209 "Switch to input method INPUT-METHOD for the current buffer.
1210If some other input method is already active, turn it off first.
1211If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1212 (if (and input-method (symbolp input-method))
4ef06f75 1213 (setq input-method (symbol-name input-method)))
723a427a
KH
1214 (if (and current-input-method
1215 (not (string= current-input-method input-method)))
305a3cb6 1216 (inactivate-input-method))
2c395d56 1217 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1218 (let ((slot (assoc input-method input-method-alist)))
1219 (if (null slot)
723a427a 1220 (error "Can't activate input method `%s'" input-method))
278dd6ac 1221 (setq current-input-method-title nil)
8efc03e1
KH
1222 (let ((func (nth 2 slot)))
1223 (if (functionp func)
1224 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1225 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1226 (progn
1227 (require (cdr func))
1228 (apply (car func) input-method (nthcdr 5 slot)))
1229 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1230 (setq current-input-method input-method)
278dd6ac
KH
1231 (or (stringp current-input-method-title)
1232 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1233 (unwind-protect
1234 (run-hooks 'input-method-activate-hook)
1235 (force-mode-line-update)))))
15b3e511 1236
15b3e511 1237(defun inactivate-input-method ()
f17ccaee 1238 "Turn off the current input method."
723a427a
KH
1239 (when current-input-method
1240 (if input-method-history
1241 (unless (string= current-input-method (car input-method-history))
1242 (setq input-method-history
1243 (cons current-input-method
1244 (delete current-input-method input-method-history))))
1245 (setq input-method-history (list current-input-method)))
1246 (unwind-protect
1247 (funcall inactivate-current-input-method-function)
15b3e511 1248 (unwind-protect
723a427a
KH
1249 (run-hooks 'input-method-inactivate-hook)
1250 (setq current-input-method nil
2b1ec973 1251 input-method-function nil
28885c0e
KH
1252 current-input-method-title nil)
1253 (force-mode-line-update)))))
4ed46869 1254
8b784951 1255(defun set-input-method (input-method)
2c395d56 1256 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1257This also sets the default input method to the one you specify.
402dbbd1
EZ
1258If INPUT-METHOD is nil, this function turns off the input method, and
1259also causes you to be prompted for a name of an input method the next
1260time you invoke \\[toggle-input-method].
1261
bc406911
EZ
1262To deactivate the input method interactively, use \\[toggle-input-method].
1263To deactivate it programmatically, use \\[inactivate-input-method]."
d0b9c3ab 1264 (interactive
723a427a 1265 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1266 (list (read-input-method-name
87505a98 1267 (if default "Select input method (default %s): " "Select input method: ")
42395763 1268 default t))))
d0b9c3ab 1269 (activate-input-method input-method)
f4990970
PA
1270 (setq default-input-method input-method)
1271 (when (interactive-p)
1272 (customize-mark-as-set 'default-input-method))
1273 default-input-method)
4ed46869
KH
1274
1275(defun toggle-input-method (&optional arg)
f8ec20be
RS
1276 "Enable or disable multilingual text input method for the current buffer.
1277Only one input method can be enabled at any time in a given buffer.
1278
1279The normal action is to enable an input method if none was
1280enabled, and disable the current one otherwise. Which input method
1281to enable can be determined in various ways--either the one most
1282recently used, or the one specified by `default-input-method', or
1283as a last resort by reading the name of an input method in the
1284minibuffer.
1285
1286With a prefix argument, read an input method name with the minibuffer
1287and enable that one. The default is the most recent input method specified
f2979bdb 1288\(not including the currently active input method, if any)."
f8ec20be 1289
4ed46869 1290 (interactive "P")
7ddbb5bc
RS
1291 (if (and current-input-method (not arg))
1292 (inactivate-input-method)
1293 (let ((default (or (car input-method-history) default-input-method)))
1294 (if (and arg default (equal current-input-method default)
1295 (> (length input-method-history) 1))
1296 (setq default (nth 1 input-method-history)))
723a427a
KH
1297 (activate-input-method
1298 (if (or arg (not default))
7ddbb5bc
RS
1299 (progn
1300 (read-input-method-name
1301 (if default "Input method (default %s): " "Input method: " )
1302 default t))
723a427a 1303 default))
f4990970 1304 (unless default-input-method
d37ef0f6 1305 (prog1
f4990970
PA
1306 (setq default-input-method current-input-method)
1307 (when (interactive-p)
1308 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab
KH
1309
1310(defun describe-input-method (input-method)
2c395d56 1311 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1312 (interactive
1313 (list (read-input-method-name
1314 "Describe input method (default, current choice): ")))
78754934 1315 (if (and input-method (symbolp input-method))
4ef06f75 1316 (setq input-method (symbol-name input-method)))
43125c28
RS
1317 (help-setup-xref (list #'describe-input-method
1318 (or input-method current-input-method))
f80e2142
RS
1319 (interactive-p))
1320
d0b9c3ab
KH
1321 (if (null input-method)
1322 (describe-current-input-method)
464cc130
KH
1323 (let ((current current-input-method))
1324 (condition-case nil
1325 (progn
1326 (save-excursion
1327 (activate-input-method input-method)
1328 (describe-current-input-method))
1329 (activate-input-method current))
d37ef0f6 1330 (error
464cc130 1331 (activate-input-method current)
5f395df3
SM
1332 (help-setup-xref (list #'describe-input-method input-method)
1333 (interactive-p))
1334 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1335 (let ((elt (assoc input-method input-method-alist)))
1336 (princ (format
1337 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1338 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1339
1340(defun describe-current-input-method ()
f80e2142
RS
1341 "Describe the input method currently in use.
1342This is a subroutine for `describe-input-method'."
4ed46869
KH
1343 (if current-input-method
1344 (if (and (symbolp describe-current-input-method-function)
1345 (fboundp describe-current-input-method-function))
1346 (funcall describe-current-input-method-function)
1347 (message "No way to describe the current input method `%s'"
f2979bdb 1348 current-input-method)
4ed46869 1349 (ding))
d0b9c3ab 1350 (error "No input method is activated now")))
4ed46869 1351
d3459641 1352(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1353 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1354The input method selected last time is activated in minibuffer.
15b3e511 1355If optional second arg INITIAL-INPUT is non-nil, insert it in the minibuffer
d0b9c3ab
KH
1356initially.
1357Optional 3rd argument INPUT-METHOD specifies the input method
4ef06f75
KH
1358to be activated instead of the one selected last time. It is a symbol
1359or a string."
88d559ec
KH
1360 (setq input-method
1361 (or input-method
d3459641 1362 current-input-method
88d559ec
KH
1363 default-input-method
1364 (read-input-method-name "Input method: " nil t)))
3df60841 1365 (if (and input-method (symbolp input-method))
4ef06f75 1366 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1367 (let ((prev-input-method current-input-method))
1368 (unwind-protect
1369 (progn
1370 (activate-input-method input-method)
1371 (read-string prompt initial-input nil nil t))
1372 (activate-input-method prev-input-method))))
4ed46869
KH
1373
1374;; Variables to control behavior of input methods. All input methods
1375;; should react to these variables.
1376
8efc03e1
KH
1377(defcustom input-method-verbose-flag 'default
1378 "*A flag to control extra guidance given by input methods.
1379The value should be nil, t, `complex-only', or `default'.
4ed46869 1380
cb29dfb6 1381The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1382area. When you use the input method in the minibuffer, the guidance
1383is shown at the bottom short window (split from the existing window).
c27c4ed8 1384
8efc03e1
KH
1385If the value is t, extra guidance is always given, if the value is
1386nil, extra guidance is always suppressed.
1387
1388If the value is `complex-only', only complex input methods such as
1389`chinese-py' and `japanese' give extra guidance.
1390
1391If the value is `default', complex input methods always give extra
1392guidance, but simple input methods give it only when you are not in
1393the minibuffer.
1394
1395See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1396 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1397 (const complex-only) (const default))
42395763
RS
1398 :group 'mule)
1399
1400(defcustom input-method-highlight-flag t
1401 "*If this flag is non-nil, input methods highlight partially-entered text.
1402For instance, while you are in the middle of a Quail input method sequence,
1403the text inserted so far is temporarily underlined.
8efc03e1
KH
1404The underlining goes away when you finish or abort the input method sequence.
1405See also the variable `input-method-verbose-flag'."
42395763
RS
1406 :type 'boolean
1407 :group 'mule)
4ed46869
KH
1408
1409(defvar input-method-activate-hook nil
f17ccaee
KH
1410 "Normal hook run just after an input method is activated.
1411
1412The variable `current-input-method' keeps the input method name
1413just activated.")
4ed46869
KH
1414
1415(defvar input-method-inactivate-hook nil
f17ccaee
KH
1416 "Normal hook run just after an input method is inactivated.
1417
1418The variable `current-input-method' still keeps the input method name
4d0e6a11 1419just inactivated.")
4ed46869
KH
1420
1421(defvar input-method-after-insert-chunk-hook nil
1422 "Normal hook run just after an input method insert some chunk of text.")
1423
dccca980 1424(defvar input-method-exit-on-first-char nil
0b6cadff 1425 "This flag controls when an input method returns.
dccca980
KH
1426Usually, the input method does not return while there's a possibility
1427that it may find a different translation if a user types another key.
39e643e2
RS
1428But, it this flag is non-nil, the input method returns as soon as
1429the current key sequence gets long enough to have some valid translation.")
dccca980
KH
1430
1431(defvar input-method-use-echo-area nil
1432 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1433Usually, the input method inserts the intermediate key sequence,
1434or candidate translations corresponding to the sequence,
1435at point in the current buffer.
1436But, if this flag is non-nil, it displays them in echo area instead.")
dccca980 1437
723a427a
KH
1438(defvar input-method-exit-on-invalid-key nil
1439 "This flag controls the behaviour of an input method on invalid key input.
1440Usually, when a user types a key which doesn't start any character
1441handled by the input method, the key is handled by turning off the
e8dd0160 1442input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1443But, if this flag is non-nil, the input method is never back on.")
1444
4ed46869 1445\f
8efc03e1
KH
1446(defvar set-language-environment-hook nil
1447 "Normal hook run after some language environment is set.
1448
1449When you set some hook function here, that effect usually should not
1450be inherited to another language environment. So, you had better set
1451another function in `exit-language-environment-hook' (which see) to
1452cancel the effect.")
1453
1454(defvar exit-language-environment-hook nil
1455 "Normal hook run after exiting from some language environment.
1456When this hook is run, the variable `current-language-environment'
1457is still bound to the language environment being exited.
1458
e8dd0160 1459This hook is mainly used for canceling the effect of
8efc03e1
KH
1460`set-language-environment-hook' (which-see).")
1461
b0648a00
RS
1462(put 'setup-specified-language-environment 'apropos-inhibit t)
1463
15b3e511 1464(defun setup-specified-language-environment ()
f08adf27 1465 "Switch to a specified language environment."
15b3e511 1466 (interactive)
f850d782 1467 (let (language-name)
15b3e511
KH
1468 (if (and (symbolp last-command-event)
1469 (or (not (eq last-command-event 'Default))
1470 (setq last-command-event 'English))
f850d782 1471 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1472 (prog1
1473 (set-language-environment language-name)
1474 (customize-mark-as-set 'current-language-environment))
15b3e511 1475 (error "Bogus calling sequence"))))
4ed46869 1476
8861c593 1477(defcustom current-language-environment "English"
94d04df6 1478 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1479This variable should be set only with \\[customize], which is equivalent
1480to using the function `set-language-environment'."
94d04df6 1481 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1482 :set (lambda (symbol value) (set-language-environment value))
94d04df6
DL
1483 :get (lambda (x)
1484 (or (car-safe (assoc-ignore-case
1485 (if (symbolp current-language-environment)
1486 (symbol-name current-language-environment)
1487 current-language-environment)
1488 language-info-alist))
1489 "English"))
1490 :type (cons 'choice (mapcar (lambda (lang)
1491 (list 'const (car lang)))
1492 language-info-alist))
8861c593
RS
1493 :initialize 'custom-initialize-default
1494 :group 'mule
1495 :type 'string)
f850d782 1496
ddb5c041
KH
1497(defun reset-language-environment ()
1498 "Reset multilingual environment of Emacs to the default status.
1499
1500The default status is as follows:
1501
d37ef0f6 1502 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1503 The default coding system for process I/O is nil.
1504 The default value for the command `set-terminal-coding-system' is nil.
1505 The default value for the command `set-keyboard-coding-system' is nil.
1506
1507 The order of priorities of coding categories and the coding system
1508 bound to each category are as follows
1509 coding category coding system
1510 --------------------------------------------------
1511 coding-category-iso-8-2 iso-latin-1
1512 coding-category-iso-8-1 iso-latin-1
1513 coding-category-iso-7-tight iso-2022-jp
1514 coding-category-iso-7 iso-2022-7bit
1515 coding-category-iso-7-else iso-2022-7bit-lock
1516 coding-category-iso-8-else iso-2022-8bit-ss2
1517 coding-category-emacs-mule emacs-mule
1518 coding-category-raw-text raw-text
1519 coding-category-sjis japanese-shift-jis
1520 coding-category-big5 chinese-big5
1521 coding-category-ccl nil
e8dd0160 1522 coding-category-binary no-conversion
631f62fd
KH
1523 coding-category-utf-16-be nil
1524 coding-category-utf-16-le nil
c654de1d 1525 coding-category-utf-8 mule-utf-8"
ddb5c041
KH
1526 (interactive)
1527 ;; This function formerly set default-enable-multibyte-characters to t,
1528 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1529
1530 (setq coding-category-iso-7-tight 'iso-2022-jp
1531 coding-category-iso-7 'iso-2022-7bit
1532 coding-category-iso-8-1 'iso-latin-1
1533 coding-category-iso-8-2 'iso-latin-1
1534 coding-category-iso-7-else 'iso-2022-7bit-lock
1535 coding-category-iso-8-else 'iso-2022-8bit-ss2
1536 coding-category-emacs-mule 'emacs-mule
1537 coding-category-raw-text 'raw-text
1538 coding-category-sjis 'japanese-shift-jis
1539 coding-category-big5 'chinese-big5
631f62fd 1540 coding-category-utf-16-be nil
b909cfc0
KH
1541 coding-category-utf-16-le nil
1542 coding-category-utf-8 'mule-utf-8
1543 coding-category-ccl nil
1544 coding-category-binary 'no-conversion)
ddb5c041
KH
1545
1546 (set-coding-priority
1547 '(coding-category-iso-8-1
1548 coding-category-iso-8-2
1549 coding-category-iso-7-tight
1550 coding-category-iso-7
1551 coding-category-iso-7-else
1552 coding-category-iso-8-else
d37ef0f6 1553 coding-category-emacs-mule
ddb5c041
KH
1554 coding-category-raw-text
1555 coding-category-sjis
1556 coding-category-big5
1557 coding-category-ccl
9bfcd269
KH
1558 coding-category-binary
1559 coding-category-utf-16-be
b909cfc0
KH
1560 coding-category-utf-16-le
1561 coding-category-utf-8))
ddb5c041 1562
91693d18
KH
1563 (update-coding-systems-internal)
1564
ddb5c041 1565 (set-default-coding-systems nil)
b5edd1d1 1566 (setq default-sendmail-coding-system 'iso-latin-1)
1d77e15a
JR
1567 ;; Preserve eol-type from existing default-process-coding-systems.
1568 ;; On non-unix-like systems in particular, these may have been set
1569 ;; carefully by the user, or by the startup code, to deal with the
1570 ;; users shell appropriately, so should not be altered by changing
1571 ;; language environment.
1572 (let ((output-coding
a099a2ff
JR
1573 ;; When bootstrapping, coding-systems are not defined yet, so
1574 ;; we need to catch the error from check-coding-system.
d37ef0f6 1575 (condition-case nil
a099a2ff
JR
1576 (coding-system-change-text-conversion
1577 (car default-process-coding-system) 'undecided)
1578 (coding-system-error 'undecided)))
1d77e15a 1579 (input-coding
a099a2ff
JR
1580 (condition-case nil
1581 (coding-system-change-text-conversion
1582 (cdr default-process-coding-system) 'iso-latin-1)
1583 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1584 (setq default-process-coding-system
1585 (cons output-coding input-coding)))
b5edd1d1 1586
ddb5c041
KH
1587 ;; Don't alter the terminal and keyboard coding systems here.
1588 ;; The terminal still supports the same coding system
1589 ;; that it supported a minute ago.
1b8dc791
SM
1590 ;; (set-terminal-coding-system-internal nil)
1591 ;; (set-keyboard-coding-system-internal nil)
ddb5c041
KH
1592
1593 (setq nonascii-translation-table nil
1594 nonascii-insert-offset 0))
1595
0c47a7c8
KH
1596(reset-language-environment)
1597
40c81f74
PE
1598(defun set-display-table-and-terminal-coding-system (language-name)
1599 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1600 (let ((coding (get-language-info language-name 'unibyte-display)))
1601 (if coding
1602 (standard-display-european-internal)
1603 (standard-display-default (if (eq window-system 'pc) 128 160) 255)
1604 (aset standard-display-table 146 nil))
1605 (or (eq window-system 'pc)
1606 (set-terminal-coding-system coding))))
1607
166246f7 1608(defun set-language-environment (language-name)
6c05d680
RS
1609 "Set up multi-lingual environment for using LANGUAGE-NAME.
1610This sets the coding system priority and the default input method
8861c593
RS
1611and sometimes other things. LANGUAGE-NAME should be a string
1612which is the name of a language environment. For example, \"Latin-1\"
1613specifies the character set for the major languages of Western Europe."
8efc03e1 1614 (interactive (list (read-language-name
ddb5c041 1615 nil
8efc03e1 1616 "Set language environment (default, English): ")))
4ef06f75
KH
1617 (if language-name
1618 (if (symbolp language-name)
1619 (setq language-name (symbol-name language-name)))
1620 (setq language-name "English"))
ddb5c041 1621 (or (assoc-ignore-case language-name language-info-alist)
f850d782 1622 (error "Language environment not defined: %S" language-name))
8efc03e1
KH
1623 (if current-language-environment
1624 (let ((func (get-language-info current-language-environment
1625 'exit-function)))
e63645c2 1626 (run-hooks 'exit-language-environment-hook)
5f395df3 1627 (if (functionp func) (funcall func))))
03c35c83
EZ
1628 (let ((default-eol-type (coding-system-eol-type
1629 default-buffer-file-coding-system)))
1630 (reset-language-environment)
ddb5c041 1631
e522f07b 1632 ;; The features might set up coding systems.
9c20a8d5
DL
1633 (let ((required-features (get-language-info language-name 'features)))
1634 (while required-features
1635 (require (car required-features))
1636 (setq required-features (cdr required-features))))
1637
03c35c83
EZ
1638 (setq current-language-environment language-name)
1639 (set-language-environment-coding-systems language-name default-eol-type))
ddb5c041
KH
1640 (let ((input-method (get-language-info language-name 'input-method)))
1641 (when input-method
1642 (setq default-input-method input-method)
1643 (if input-method-history
1644 (setq input-method-history
1645 (cons input-method
1646 (delete input-method input-method-history))))))
ec241f58
EZ
1647 (let ((nonascii (get-language-info language-name 'nonascii-translation))
1648 (dos-table
4e2ac2d9
EZ
1649 (if (eq window-system 'pc)
1650 (intern
d9c0a50e 1651 (format "cp%d-nonascii-translation-table" dos-codepage)))))
03c35c83
EZ
1652 (cond
1653 ((char-table-p nonascii)
1654 (setq nonascii-translation-table nonascii))
ec241f58 1655 ((and (eq window-system 'pc) (boundp dos-table))
03c35c83
EZ
1656 ;; DOS terminals' default is to use a special non-ASCII translation
1657 ;; table as appropriate for the installed codepage.
ec241f58 1658 (setq nonascii-translation-table (symbol-value dos-table)))
03c35c83
EZ
1659 ((charsetp nonascii)
1660 (setq nonascii-insert-offset (- (make-char nonascii) 128)))))
ddb5c041 1661
63283a8f 1662 ;; Unibyte setups if necessary.
ddb5c041 1663 (unless default-enable-multibyte-characters
63283a8f 1664 ;; Syntax and case table.
ddb5c041
KH
1665 (let ((syntax (get-language-info language-name 'unibyte-syntax)))
1666 (if syntax
1667 (let ((set-case-syntax-set-multibyte nil))
63283a8f
KH
1668 (load syntax nil t))
1669 ;; No information for syntax and case. Reset to the defaults.
1670 (let ((syntax-table (standard-syntax-table))
1671 (case-table (standard-case-table))
03c35c83 1672 (ch (if (eq window-system 'pc) 128 160)))
63283a8f
KH
1673 (while (< ch 256)
1674 (modify-syntax-entry ch " " syntax-table)
1675 (aset case-table ch ch)
1676 (setq ch (1+ ch)))
1677 (set-char-table-extra-slot case-table 0 nil)
1678 (set-char-table-extra-slot case-table 1 nil)
1679 (set-char-table-extra-slot case-table 2 nil))
1680 (set-standard-case-table (standard-case-table))
1681 (let ((list (buffer-list)))
1682 (while list
1683 (with-current-buffer (car list)
1684 (set-case-table (standard-case-table)))
1685 (setq list (cdr list))))))
40c81f74 1686 (set-display-table-and-terminal-coding-system language-name))
ddb5c041
KH
1687
1688 (let ((required-features (get-language-info language-name 'features)))
1689 (while required-features
1690 (require (car required-features))
1691 (setq required-features (cdr required-features))))
1692 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1693 (if (functionp func)
ddb5c041 1694 (funcall func)))
8efc03e1 1695 (run-hooks 'set-language-environment-hook)
f850d782 1696 (force-mode-line-update t))
4ed46869 1697
51a8fc1d
RS
1698(defun standard-display-european-internal ()
1699 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1700 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1701 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1702 ;; the native font, and codes 160 and 146 stand for something very
1703 ;; different there.
1704 (or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
1705 (progn
1706 ;; Make non-line-break space display as a plain space.
1707 ;; Most X fonts do the wrong thing for code 160.
1708 (aset standard-display-table 160 [32])
1492f7ac
DL
1709 ;; With luck, non-Latin-1 fonts are more recent and so don't
1710 ;; have this bug.
1711 (aset standard-display-table 2208 [32]) ; Latin-1 NBSP
1712 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1713 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1714 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1715 ;; U+2019, normally from the windows-1252 character set. XFree 4
1716 ;; fonts probably have the appropriate glyph at this position,
1717 ;; so they could use standard-display-8bit. It's better to use a
1718 ;; proper windows-1252 coding system. --fx]
03c35c83
EZ
1719 (aset standard-display-table 146 [39]))))
1720
1721(defun set-language-environment-coding-systems (language-name
1722 &optional eol-type)
1723 "Do various coding system setups for language environment LANGUAGE-NAME.
1724
1725The optional arg EOL-TYPE specifies the eol-type of the default value
d37ef0f6 1726of `buffer-file-coding-system' set by this function."
54b226f7
KH
1727 (let* ((priority (get-language-info language-name 'coding-priority))
1728 (default-coding (car priority)))
1729 (if priority
1730 (let ((categories (mapcar 'coding-system-category priority)))
03c35c83
EZ
1731 (set-default-coding-systems
1732 (if (memq eol-type '(0 1 2 unix dos mac))
1733 (coding-system-change-eol-conversion default-coding eol-type)
1734 default-coding))
46babb23 1735 (setq default-sendmail-coding-system default-coding)
54b226f7
KH
1736 (set-coding-priority categories)
1737 (while priority
1738 (set (car categories) (car priority))
1739 (setq priority (cdr priority) categories (cdr categories)))
45d08cb2 1740 (update-coding-systems-internal)))))
54b226f7 1741
4ed46869 1742(defsubst princ-list (&rest args)
d37ef0f6 1743 "Print all arguments with `princ', then print \"\n\"."
4ed46869
KH
1744 (while args (princ (car args)) (setq args (cdr args)))
1745 (princ "\n"))
1746
b0648a00
RS
1747(put 'describe-specified-language-support 'apropos-inhibit t)
1748
48082651 1749;; Print a language specific information such as input methods,
13e82c04 1750;; charsets, and coding systems. This function is intended to be
48082651 1751;; called from the menu:
281d03ec 1752;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
1753;; and should not run it by `M-x describe-current-input-method-function'.
1754(defun describe-specified-language-support ()
96db204a 1755 "Describe how Emacs supports the specified language environment."
48082651 1756 (interactive)
281d03ec 1757 (let (language-name)
48082651 1758 (if (not (and (symbolp last-command-event)
cda74479
DL
1759 (or (not (eq last-command-event 'Default))
1760 (setq last-command-event 'English))
281d03ec 1761 (setq language-name (symbol-name last-command-event))))
48082651 1762 (error "Bogus calling sequence"))
281d03ec
RS
1763 (describe-language-environment language-name)))
1764
1765(defun describe-language-environment (language-name)
1766 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
1767 (interactive
1768 (list (read-language-name
1769 'documentation
8adfa8be 1770 "Describe language environment (default, current choice): ")))
f850d782
RS
1771 (if (null language-name)
1772 (setq language-name current-language-environment))
281d03ec
RS
1773 (if (or (null language-name)
1774 (null (get-language-info language-name 'documentation)))
1775 (error "No documentation for the specified language"))
4ef06f75
KH
1776 (if (symbolp language-name)
1777 (setq language-name (symbol-name language-name)))
464cc130
KH
1778 (let ((doc (get-language-info language-name 'documentation))
1779 pos)
c3034e84
SM
1780 (help-setup-xref (list #'describe-language-environment language-name)
1781 (interactive-p))
1782 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1783 (save-excursion
1784 (set-buffer standard-output)
1785 (insert language-name " language environment\n\n")
1786 (if (stringp doc)
1787 (insert doc "\n\n"))
e036b0a6
KH
1788 (condition-case nil
1789 (let ((str (eval (get-language-info language-name 'sample-text))))
1790 (if (stringp str)
1791 (insert "Sample text:\n " str "\n\n")))
1792 (error nil))
464cc130
KH
1793 (let ((input-method (get-language-info language-name 'input-method))
1794 (l (copy-sequence input-method-alist)))
1795 (insert "Input methods")
1796 (when input-method
1797 (insert " (default, " input-method ")")
1798 (setq input-method (assoc input-method input-method-alist))
1799 (setq l (cons input-method (delete input-method l))))
1800 (insert ":\n")
48082651 1801 (while l
464cc130
KH
1802 (when (string= language-name (nth 1 (car l)))
1803 (insert " " (car (car l)))
1804 (search-backward (car (car l)))
467412aa 1805 (help-xref-button 0 'help-input-method (car (car l)))
464cc130 1806 (goto-char (point-max))
2fa7e202
KH
1807 (insert " (\""
1808 (if (stringp (nth 3 (car l)))
1809 (nth 3 (car l))
1810 (car (nth 3 (car l))))
1811 "\" in mode line)\n"))
464cc130
KH
1812 (setq l (cdr l)))
1813 (insert "\n"))
1814 (insert "Character sets:\n")
1815 (let ((l (get-language-info language-name 'charset)))
1816 (if (null l)
1817 (insert " nothing specific to " language-name "\n")
1818 (while l
1819 (insert " " (symbol-name (car l)))
1820 (search-backward (symbol-name (car l)))
467412aa 1821 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
1822 (goto-char (point-max))
1823 (insert ": " (charset-description (car l)) "\n")
1824 (setq l (cdr l)))))
1825 (insert "\n")
1826 (insert "Coding systems:\n")
1827 (let ((l (get-language-info language-name 'coding-system)))
1828 (if (null l)
1829 (insert " nothing specific to " language-name "\n")
1830 (while l
1831 (insert " " (symbol-name (car l)))
1832 (search-backward (symbol-name (car l)))
467412aa 1833 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
1834 (goto-char (point-max))
1835 (insert " (`"
1836 (coding-system-mnemonic (car l))
1837 "' in mode line):\n\t"
1838 (coding-system-doc-string (car l))
1839 "\n")
1840 (let ((aliases (coding-system-get (car l)
1841 'alias-coding-systems)))
1842 (when aliases
1843 (insert "\t(alias:")
1844 (while aliases
1845 (insert " " (symbol-name (car aliases)))
1846 (setq aliases (cdr aliases)))
1847 (insert ")\n")))
c3034e84 1848 (setq l (cdr l)))))))))
4ed46869 1849\f
40c81f74
PE
1850;;; Locales.
1851
0d7c5bb9
DL
1852(defvar locale-translation-file-name nil
1853 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 1854
5f395df3
SM
1855;; The following definitions might as well be marked as constants and
1856;; purecopied, since they're normally used on startup, and probably
1857;; should reflect the facilities of the base Emacs.
1858(defconst locale-language-names
1859 (purecopy
1860 '(
40c81f74
PE
1861 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
1862 ;; as specified in the Single Unix Spec, Version 2.
1863 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
1864 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
1865 ;; see Internet RFC 2165 (1997-06) and
1866 ;; http://www.evertype.com/standards/iso639/iso639-en.html
1867 ;; TERRITORY is a country code taken from ISO 3166
1868 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 1869 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 1870
40c81f74
PE
1871 ; aa Afar
1872 ; ab Abkhazian
6ececc4d 1873 ("af" . "Latin-1") ; Afrikaans
40c81f74 1874 ("am" . "Ethiopic") ; Amharic
5f395df3 1875 ; ar Arabic glibc uses 8859-6
40c81f74
PE
1876 ; as Assamese
1877 ; ay Aymara
1878 ; az Azerbaijani
1879 ; ba Bashkir
e522f07b 1880 ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
9c20a8d5 1881 ("bg" . "Bulgarian") ; Bulgarian
40c81f74
PE
1882 ; bh Bihari
1883 ; bi Bislama
1884 ; bn Bengali, Bangla
1885 ("bo" . "Tibetan")
1886 ("br" . "Latin-1") ; Breton
d37ef0f6 1887 ("bs" . "Latin-2") ; Bosnian
40c81f74
PE
1888 ("ca" . "Latin-1") ; Catalan
1889 ; co Corsican
1890 ("cs" . "Czech")
d37ef0f6 1891 ("cy" . "Welsh") ; Welsh
40c81f74
PE
1892 ("da" . "Latin-1") ; Danish
1893 ("de" . "German")
1894 ; dz Bhutani
1895 ("el" . "Greek")
6ececc4d
PE
1896 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
1897 ("en" . "Latin-1") ; English
40c81f74 1898 ("eo" . "Latin-3") ; Esperanto
8c4b6822 1899 ("es" . "Spanish")
40c81f74
PE
1900 ("et" . "Latin-4") ; Estonian
1901 ("eu" . "Latin-1") ; Basque
d37ef0f6 1902 ; fa Persian glibc uses utf-8
40c81f74
PE
1903 ("fi" . "Latin-1") ; Finnish
1904 ; fj Fiji
1905 ("fo" . "Latin-1") ; Faroese
8d738cfd 1906 ("fr" . "French") ; French
40c81f74 1907 ("fy" . "Latin-1") ; Frisian
6ececc4d
PE
1908 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
1909 ("gd" . "Latin-1") ; Scots Gaelic
1910 ("gl" . "Latin-1") ; Galician
40c81f74
PE
1911 ; gn Guarani
1912 ; gu Gujarati
d37ef0f6 1913 ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
40c81f74
PE
1914 ; ha Hausa
1915 ("he" . "Hebrew")
5f395df3 1916 ("hi" . "Devanagari") ; Hindi glibc uses utf-8
40c81f74
PE
1917 ("hr" . "Latin-2") ; Croatian
1918 ("hu" . "Latin-2") ; Hungarian
1919 ; hy Armenian
1920 ; ia Interlingua
1921 ("id" . "Latin-1") ; Indonesian
1922 ; ie Interlingue
1923 ; ik Inupiak
1924 ("is" . "Latin-1") ; Icelandic
1925 ("it" . "Latin-1") ; Italian
1926 ; iu Inuktitut
1927 ("ja" . "Japanese")
1928 ; jw Javanese
9c20a8d5 1929 ("ka" . "Georgian") ; Georgian
40c81f74 1930 ; kk Kazakh
6ececc4d 1931 ("kl" . "Latin-1") ; Greenlandic
40c81f74
PE
1932 ; km Cambodian
1933 ; kn Kannada
1934 ("ko" . "Korean")
1935 ; ks Kashmiri
1936 ; ku Kurdish
5f395df3 1937 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
1938 ; ky Kirghiz
1939 ("la" . "Latin-1") ; Latin
5f395df3 1940 ("lb" . "Latin-1") ; Luxemburgish
40c81f74
PE
1941 ; ln Lingala
1942 ("lo" . "Lao") ; Laothian
9c20a8d5
DL
1943 ("lt" . "Lithuanian")
1944 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 1945 ; mg Malagasy
5f395df3 1946 ("mi" . "Latin-7") ; Maori
f1282c7f 1947 ("mk" . "Cyrillic-ISO") ; Macedonian
40c81f74
PE
1948 ; ml Malayalam
1949 ; mn Mongolian
1950 ; mo Moldavian
5f395df3
SM
1951 ("mr" . "Devanagari") ; Marathi glibc uses utf-8
1952 ("ms" . "Latin-1") ; Malay
40c81f74
PE
1953 ("mt" . "Latin-3") ; Maltese
1954 ; my Burmese
1955 ; na Nauru
1956 ("ne" . "Devanagari") ; Nepali
8c4b6822 1957 ("nl" . "Dutch")
40c81f74 1958 ("no" . "Latin-1") ; Norwegian
5f395df3 1959 ("oc" . "Latin-1") ; Occitan
40c81f74
PE
1960 ; om (Afan) Oromo
1961 ; or Oriya
1962 ; pa Punjabi
1963 ("pl" . "Latin-2") ; Polish
1964 ; ps Pashto, Pushto
1965 ("pt" . "Latin-1") ; Portuguese
1966 ; qu Quechua
6ececc4d 1967 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74
PE
1968 ; rn Kirundi
1969 ("ro" . "Romanian")
1970 ("ru.*[_.]koi8" . "Cyrillic-KOI8") ; Russian
f1282c7f 1971 ("ru" . "Cyrillic-ISO") ; Russian
40c81f74
PE
1972 ; rw Kinyarwanda
1973 ("sa" . "Devanagari") ; Sanskrit
1974 ; sd Sindhi
5f395df3 1975 ; se Northern Sami
40c81f74
PE
1976 ; sg Sangho
1977 ("sh" . "Latin-2") ; Serbo-Croatian
1978 ; si Sinhalese
1979 ("sk" . "Slovak")
1980 ("sl" . "Slovenian")
1981 ; sm Samoan
1982 ; sn Shona
1983 ; so Somali
6ececc4d 1984 ("sq" . "Latin-1") ; Albanian
40c81f74 1985 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
e522f07b 1986 ("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
40c81f74
PE
1987 ; ss Siswati
1988 ; st Sesotho
1989 ; su Sundanese
1990 ("sv" . "Latin-1") ; Swedish
1991 ("sw" . "Latin-1") ; Swahili
5f395df3
SM
1992 ; ta Tamil glibc uses utf-8
1993 ; te Telugu glibc uses utf-8
e522f07b 1994 ("tg" . "Tajik")
40c81f74
PE
1995 ("th" . "Thai")
1996 ; ti Tigrinya
1997 ; tk Turkmen
6ececc4d 1998 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
1999 ; tn Setswana
2000 ; to Tonga
2001 ("tr" . "Latin-5") ; Turkish
2002 ; ts Tsonga
2003 ; tt Tatar
2004 ; tw Twi
2005 ; ug Uighur
9c20a8d5 2006 ("uk" . "Ukrainian") ; Ukrainian
5f395df3
SM
2007 ; ur Urdu glibc uses utf-8
2008 ("uz" . "Latin-1") ; Uzbek
2009 ("vi" . "Vietnamese") ; glibc uses utf-8
40c81f74 2010 ; vo Volapuk
d37ef0f6 2011 ("wa" . "Latin-1") ; Walloon
40c81f74
PE
2012 ; wo Wolof
2013 ; xh Xhosa
9c20a8d5 2014 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2015 ; yo Yoruba
2016 ; za Zhuang
5f395df3
SM
2017
2018 ; glibc:
2019 ; zh_CN.GB18030/GB18030 \
2020 ; zh_CN.GBK/GBK \
2021 ; zh_HK/BIG5-HKSCS \
2022 ; zh_TW/BIG5 \
2023 ; zh_TW.EUC-TW/EUC-TW \
2024
40c81f74
PE
2025 ("zh.*[._]big5" . "Chinese-BIG5")
2026 ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
2027 ("zh_tw" . "Chinese-CNS")
2028 ("zh" . "Chinese-GB")
2029 ; zu Zulu
2030
2031 ;; ISO standard locales
2032 ("c$" . "ASCII")
2033 ("posix$" . "ASCII")
2034
40c81f74
PE
2035 ;; The "IPA" Emacs language environment does not correspond
2036 ;; to any ISO 639 code, so let it stand for itself.
2037 ("ipa$" . "IPA")
2038
2039 ;; Nonstandard or obsolete language codes
2040 ("cz" . "Czech") ; e.g. Solaris 2.6
2041 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2042 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2043 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2044 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2045 ("jp" . "Japanese") ; e.g. MS Windows
86d9e628
JR
2046 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
2047 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
367ca50f 2048 ))
40c81f74 2049 "List of pairs of locale regexps and language names.
6ececc4d
PE
2050The first element whose locale regexp matches the start of a downcased locale
2051specifies the language name corresponding to that locale.
40c81f74
PE
2052If the language name is nil, there is no corresponding language environment.")
2053
5f395df3
SM
2054(defconst locale-charset-language-names
2055 (purecopy
2056 '((".*8859[-_]?1\\>" . "Latin-1")
2057 (".*8859[-_]?2\\>" . "Latin-2")
2058 (".*8859[-_]?3\\>" . "Latin-3")
2059 (".*8859[-_]?4\\>" . "Latin-4")
2060 (".*8859[-_]?9\\>" . "Latin-5")
2061 (".*8859[-_]?14\\>" . "Latin-8")
2062 (".*8859[-_]?15\\>" . "Latin-9")
e522f07b
DL
2063 (".*utf\\(-?8\\)\\>" . "UTF-8")
2064 ;; utf-8@euro exists, so put this last. (@euro really specifies
2065 ;; the currency, rather than the charset.)
2066 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2067 "List of pairs of locale regexps and charset language names.
2068The first element whose locale regexp matches the start of a downcased locale
2069specifies the language name whose charsets corresponds to that locale.
2070This language name is used if its charsets disagree with the charsets of
2071the language name that would otherwise be used for this locale.")
2072
5f395df3
SM
2073(defconst locale-preferred-coding-systems
2074 (purecopy
2075 '(("ja.*[._]euc" . japanese-iso-8bit)
2076 ("ja.*[._]jis7" . iso-2022-jp)
2077 ("ja.*[._]pck" . japanese-shift-jis)
2078 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2079 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
9c20a8d5 2080 (".*[._]utf" . utf-8)))
6ececc4d
PE
2081 "List of pairs of locale regexps and preferred coding systems.
2082The first element whose locale regexp matches the start of a downcased locale
2083specifies the coding system to prefer when using that locale.")
40c81f74 2084
ddf6b836
SM
2085(defconst standard-keyboard-coding-systems
2086 (purecopy
2087 '(iso-latin-1 iso-latin-2 iso-latin-3 iso-latin-4 iso-latin-5
1b8dc791 2088 iso-latin-6 iso-latin-7 iso-latin-8 iso-latin-9 koi8-u koi8-r))
ddf6b836
SM
2089 "Coding systems that are commonly used for keyboards.
2090`set-locale-environment' will set the `keyboard-coding-system' if the
2091coding-system specified by the locale setting is a member of this list.")
2092
40c81f74
PE
2093(defun locale-name-match (key alist)
2094 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2095Return the value corresponding to the first regexp that matches the
2096start of KEY, or nil if there is no match."
2097 (let (element)
2098 (while (and alist (not element))
ddf6b836 2099 (if (string-match (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2100 (setq element (car alist)))
2101 (setq alist (cdr alist)))
2102 (cdr element)))
2103
758f07de 2104(defun set-locale-environment (&optional locale-name)
40c81f74 2105 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2106This sets the language environment, the coding system priority,
2107the default input method and sometimes other things.
2108
2109LOCALE-NAME should be a string
40c81f74
PE
2110which is the name of a locale supported by the system;
2111often it is of the form xx_XX.CODE, where xx is a language,
2112XX is a country, and CODE specifies a character set and coding system.
2113For example, the locale name \"ja_JP.EUC\" might name a locale
2114for Japanese in Japan using the `japanese-iso-8bit' coding-system.
2115
758f07de 2116If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2117variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2118
2119The locale names supported by your system can typically be found in a
0812c1e8 2120directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2121will be translated according to the table specified by
0812c1e8
DL
2122`locale-translation-file-name'.
2123
2124See also `locale-charset-language-names', `locale-language-names',
2125`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de
RS
2126 (interactive "sSet environment for locale: ")
2127
0d7c5bb9
DL
2128 ;; Do this at runtime for the sake of binaries possibly transported
2129 ;; to a system without X.
2130 (setq locale-translation-file-name
2131 (let ((files
2132 '("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
2133 "/usr/X11R6/lib/X11/locale/locale.alias" ; e.g. RedHat 4.2
2134 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2135 ;;
2136 ;; The following name appears after the X-related names above,
2137 ;; since the X-related names are what X actually uses.
2138 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2139 )))
2140 (while (and files (not (file-exists-p (car files))))
2141 (setq files (cdr files)))
2142 (car files)))
2143
7008ccac
GM
2144 (let ((locale locale-name))
2145
2146 (unless locale
2147 ;; Use the first of these three environment variables
2148 ;; that has a nonempty value.
2149 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
2150 (while (and vars (not (setq locale (getenv (car vars)))))
2151 (setq vars (cdr vars)))))
2152
2153 (when locale
2154
2155 ;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
2156 ;; using the translation file that many systems have.
2157 (when locale-translation-file-name
2158 (with-temp-buffer
2159 (insert-file-contents locale-translation-file-name)
2160 (when (re-search-forward
2161 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2162 (setq locale (buffer-substring (point) (line-end-position))))))
2163
2164 ;; Leave the system locales alone if the caller did not specify
2165 ;; an explicit locale name, as their defaults are set from
2166 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2167 ;; want to set them to the same value as LC_CTYPE.
2168 (when locale-name
2169 (setq system-messages-locale locale)
2170 (setq system-time-locale locale))
2171
2172 (setq locale (downcase locale))
2173
2174 (let ((language-name
2175 (locale-name-match locale locale-language-names))
2176 (charset-language-name
2177 (locale-name-match locale locale-charset-language-names))
2178 (coding-system
2179 (locale-name-match locale locale-preferred-coding-systems)))
2180
5f395df3 2181 ;; Give preference to charset-language-name over language-name.
7008ccac
GM
2182 (if (and charset-language-name
2183 (not
2184 (equal (get-language-info language-name 'charset)
2185 (get-language-info charset-language-name 'charset))))
2186 (setq language-name charset-language-name))
2187
2188 (when language-name
2189
2190 ;; Set up for this character set. This is now the right way
2191 ;; to do it for both unibyte and multibyte modes.
2192 (set-language-environment language-name)
2193
2194 ;; If default-enable-multibyte-characters is nil,
2195 ;; we are using single-byte characters,
2196 ;; so the display table and terminal coding system are irrelevant.
2197 (when default-enable-multibyte-characters
2198 (set-display-table-and-terminal-coding-system language-name))
2199
ddf6b836
SM
2200 ;; Set the `keyboard-coding-system' if appropriate.
2201 (let ((kcs (or coding-system
2202 (car (get-language-info language-name
2203 'coding-system)))))
2204 (if (memq kcs standard-keyboard-coding-systems)
2205 (set-keyboard-coding-system kcs)))
2206
7008ccac
GM
2207 (setq locale-coding-system
2208 (car (get-language-info language-name 'coding-priority))))
2209
2210 (when coding-system
2211 (prefer-coding-system coding-system)
2212 (setq locale-coding-system coding-system))))))
40c81f74 2213\f
4ed46869
KH
2214;;; Charset property
2215
251d4f4b 2216(defun get-charset-property (charset propname)
4ed46869
KH
2217 "Return the value of CHARSET's PROPNAME property.
2218This is the last value stored with
96db204a 2219 (put-charset-property CHARSET PROPNAME VALUE)."
251d4f4b
KH
2220 (and (not (eq charset 'composition))
2221 (plist-get (charset-plist charset) propname)))
4ed46869 2222
251d4f4b 2223(defun put-charset-property (charset propname value)
4ed46869
KH
2224 "Store CHARSETS's PROPNAME property with value VALUE.
2225It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
1300d43f
KH
2226 (or (eq charset 'composition)
2227 (set-charset-plist charset
2228 (plist-put (charset-plist charset) propname value))))
4ed46869
KH
2229
2230;;; Character code property
2231(put 'char-code-property-table 'char-table-extra-slots 0)
2232
2233(defvar char-code-property-table
2234 (make-char-table 'char-code-property-table)
2235 "Char-table containing a property list of each character code.
2236
2237See also the documentation of `get-char-code-property' and
96db204a 2238`put-char-code-property'.")
4ed46869
KH
2239
2240(defun get-char-code-property (char propname)
2241 "Return the value of CHAR's PROPNAME property in `char-code-property-table'."
2242 (let ((plist (aref char-code-property-table char)))
2243 (if (listp plist)
2244 (car (cdr (memq propname plist))))))
2245
2246(defun put-char-code-property (char propname value)
2247 "Store CHAR's PROPNAME property with VALUE in `char-code-property-table'.
2248It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
2249 (let ((plist (aref char-code-property-table char)))
2250 (if plist
2251 (let ((slot (memq propname plist)))
2252 (if slot
2253 (setcar (cdr slot) value)
2254 (nconc plist (list propname value))))
2255 (aset char-code-property-table char (list propname value)))))
2256
a127b764
KH
2257\f
2258;; Pretty description of encoded string
2259
2260;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2261(defvar iso-2022-control-alist
2262 '((?\x1b . "ESC")
2263 (?\x0e . "SO")
2264 (?\x0f . "SI")
2265 (?\x8e . "SS2")
2266 (?\x8f . "SS3")
2267 (?\x9b . "CSI")))
2268
2269(defun encoded-string-description (str coding-system)
2270 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2271 (setq str (string-as-unibyte str))
993b2a7d 2272 (mapconcat
8c9d55a9 2273 (if (and coding-system (eq (coding-system-type coding-system) 2))
993b2a7d
KH
2274 ;; Try to get a pretty description for ISO 2022 escape sequences.
2275 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
2276 (format "%02X" x))))
5de75f53 2277 (function (lambda (x) (format "0x%02X" x))))
993b2a7d 2278 str " "))
a127b764
KH
2279
2280(defun encode-coding-char (char coding-system)
2281 "Encode CHAR by CODING-SYSTEM and return the resulting string.
2282If CODING-SYSTEM can't safely encode CHAR, return nil."
bd953173
KH
2283 (let ((str1 (string-as-multibyte (char-to-string char)))
2284 (str2 (string-as-multibyte (make-string 2 char)))
a89f541b
KH
2285 (safe-chars (and coding-system
2286 (coding-system-get coding-system 'safe-chars)))
8c9d55a9 2287 (charset (char-charset char))
a127b764 2288 enc1 enc2 i1 i2)
a89f541b 2289 (when (or (eq safe-chars t)
8c9d55a9 2290 (eq charset 'ascii)
a89f541b 2291 (and safe-chars (aref safe-chars char)))
a127b764
KH
2292 ;; We must find the encoded string of CHAR. But, just encoding
2293 ;; CHAR will put extra control sequences (usually to designate
2294 ;; ASCII charaset) at the tail if type of CODING is ISO 2022.
2295 ;; To exclude such tailing bytes, we at first encode one-char
2296 ;; string and two-char string, then check how many bytes at the
2297 ;; tail of both encoded strings are the same.
2298
bd953173 2299 (setq enc1 (encode-coding-string str1 coding-system)
a127b764 2300 i1 (length enc1)
bd953173 2301 enc2 (encode-coding-string str2 coding-system)
a127b764
KH
2302 i2 (length enc2))
2303 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2304 (setq i1 (1- i1) i2 (1- i2)))
2305
2306 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2307 ;; and they are the extra control sequences at the tail to
2308 ;; exclude.
2309 (substring enc2 0 i2))))
2310
2311
4ed46869 2312;;; mule-cmds.el ends here