Print line number correctly in a region.
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
60370d40 1;;; mule-cmds.el --- commands for mulitilingual environment
a985cd2f 2;; Copyright (C) 1995, 2003 Electrotechnical Laboratory, JAPAN.
fa526c4a 3;; Licensed to the Free Software Foundation.
e76ef161 4;; Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4ed46869
KH
5
6;; Keywords: mule, multilingual
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869 24
60370d40
PJ
25;;; Commentary:
26
4ed46869
KH
27;;; Code:
28
cda74479
DL
29(eval-when-compile (defvar dos-codepage))
30
4ed46869
KH
31;;; MULE related key bindings and menus.
32
0709d285 33(defvar mule-keymap (make-sparse-keymap)
33d17698 34 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 35
8f81f784 36;; Keep "C-x C-m ..." for mule specific commands.
0709d285 37(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 38
4ed46869 39(define-key mule-keymap "f" 'set-buffer-file-coding-system)
d80dee05 40(define-key mule-keymap "r" 'revert-buffer-with-coding-system)
2308d0d7 41(define-key mule-keymap "F" 'set-file-name-coding-system)
4ed46869 42(define-key mule-keymap "t" 'set-terminal-coding-system)
15b3e511
KH
43(define-key mule-keymap "k" 'set-keyboard-coding-system)
44(define-key mule-keymap "p" 'set-buffer-process-coding-system)
7624ebb9
KH
45(define-key mule-keymap "x" 'set-selection-coding-system)
46(define-key mule-keymap "X" 'set-next-selection-coding-system)
8b784951 47(define-key mule-keymap "\C-\\" 'set-input-method)
15b3e511 48(define-key mule-keymap "c" 'universal-coding-system-argument)
b4fba33f 49(define-key mule-keymap "l" 'set-language-environment)
4ed46869 50
538d88fb
EZ
51(defvar mule-menu-keymap
52 (make-sparse-keymap "Mule (Multilingual Environment)")
33d17698 53 "Keymap for Mule (Multilingual environment) menu specific commands.")
15b3e511 54
dcad02bc
EZ
55(defvar describe-language-environment-map
56 (make-sparse-keymap "Describe Language Environment"))
15b3e511 57
dcad02bc
EZ
58(defvar setup-language-environment-map
59 (make-sparse-keymap "Set Language Environment"))
15b3e511 60
dcad02bc
EZ
61(defvar set-coding-system-map
62 (make-sparse-keymap "Set Coding System"))
15b3e511 63
15b3e511 64(define-key-after mule-menu-keymap [set-language-environment]
cda74479
DL
65 (list 'menu-item "Set Language Environment" setup-language-environment-map
66 :help "Multilingual environment suitable for a specific language"))
15b3e511
KH
67(define-key-after mule-menu-keymap [separator-mule]
68 '("--")
69 t)
70(define-key-after mule-menu-keymap [toggle-input-method]
538d88fb 71 '(menu-item "Toggle Input Method" toggle-input-method)
15b3e511 72 t)
8b784951 73(define-key-after mule-menu-keymap [set-input-method]
538d88fb 74 '(menu-item "Select Input Method..." set-input-method)
15b3e511 75 t)
cda74479
DL
76(define-key-after mule-menu-keymap [describe-input-method]
77 '(menu-item "Describe Input Method" describe-input-method))
15b3e511
KH
78(define-key-after mule-menu-keymap [separator-input-method]
79 '("--")
80 t)
15b3e511 81(define-key-after mule-menu-keymap [set-various-coding-system]
cda74479 82 (list 'menu-item "Set Coding Systems" set-coding-system-map
be9650bc 83 :enable 'default-enable-multibyte-characters))
538d88fb
EZ
84(define-key-after mule-menu-keymap [view-hello-file]
85 '(menu-item "Show Multi-lingual Text" view-hello-file
86 :enable (file-readable-p
87 (expand-file-name "HELLO" data-directory))
88 :help "Display file which says HELLO in many languages")
15b3e511
KH
89 t)
90(define-key-after mule-menu-keymap [separator-coding-system]
91 '("--")
92 t)
538d88fb
EZ
93(define-key-after mule-menu-keymap [describe-language-environment]
94 (list 'menu-item "Describe Language Environment"
95 describe-language-environment-map
cda74479 96 :help "Show multilingual settings for a specific language")
15b3e511 97 t)
538d88fb
EZ
98(define-key-after mule-menu-keymap [describe-input-method]
99 '(menu-item "Describe Input Method..." describe-input-method
cda74479 100 :help "Keyboard layout for a specific input method")
538d88fb
EZ
101 t)
102(define-key-after mule-menu-keymap [describe-coding-system]
103 '(menu-item "Describe Coding System..." describe-coding-system)
104 t)
cda74479
DL
105(define-key-after mule-menu-keymap [list-character-sets]
106 '(menu-item "List Character Sets" list-character-sets
107 :help "Show table of available character sets"))
538d88fb
EZ
108(define-key-after mule-menu-keymap [mule-diag]
109 '(menu-item "Show All of Mule Status" mule-diag
110 :help "Display multilingual environment settings")
15b3e511
KH
111 t)
112
3a151e98 113(define-key-after set-coding-system-map [universal-coding-system-argument]
538d88fb
EZ
114 '(menu-item "For Next Command" universal-coding-system-argument
115 :help "Coding system to be used by next command")
3a151e98 116 t)
d80dee05
RS
117(define-key-after set-coding-system-map [separator-1]
118 '("--")
119 t)
120(define-key-after set-coding-system-map [set-buffer-file-coding-system]
121 '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
122 :help "How to encode this buffer when saved")
123 t)
124(define-key-after set-coding-system-map [revert-buffer-with-coding-system]
125 '(menu-item "For Reverting This File Now" revert-buffer-with-coding-system
126 :enable buffer-file-name
127 :help "Revisit this file immediately using specified coding system")
128 t)
2308d0d7
KH
129(define-key-after set-coding-system-map [set-file-name-coding-system]
130 '(menu-item "For File Name" set-file-name-coding-system
131 :help "How to decode/encode file names")
132 t)
d80dee05
RS
133(define-key-after set-coding-system-map [separator-2]
134 '("--")
15b3e511 135 t)
d80dee05 136
15b3e511 137(define-key-after set-coding-system-map [set-keyboard-coding-system]
538d88fb
EZ
138 '(menu-item "For Keyboard" set-keyboard-coding-system
139 :help "How to decode keyboard input")
15b3e511 140 t)
d80dee05
RS
141(define-key-after set-coding-system-map [set-terminal-coding-system]
142 '(menu-item "For Terminal" set-terminal-coding-system
143 :enable (null (memq window-system '(x w32 mac)))
144 :help "How to encode terminal output")
145 t)
146(define-key-after set-coding-system-map [separator-3]
147 '("--")
15b3e511 148 t)
7624ebb9 149(define-key-after set-coding-system-map [set-selection-coding-system]
538d88fb
EZ
150 '(menu-item "For X Selections/Clipboard" set-selection-coding-system
151 :visible (display-selections-p)
152 :help "How to en/decode data to/from selection/clipboard")
7624ebb9
KH
153 t)
154(define-key-after set-coding-system-map [set-next-selection-coding-system]
538d88fb
EZ
155 '(menu-item "For Next X Selection" set-next-selection-coding-system
156 :visible (display-selections-p)
157 :help "How to en/decode next selection/clipboard operation")
7624ebb9 158 t)
d80dee05
RS
159(define-key-after set-coding-system-map [set-buffer-process-coding-system]
160 '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
161 :visible (fboundp 'start-process)
162 :enable (get-buffer-process (current-buffer))
163 :help "How to en/decode I/O from/to subprocess connected to this buffer")
164 t)
165
166
15b3e511 167(define-key setup-language-environment-map
538d88fb 168 [Default] '(menu-item "Default" setup-specified-language-environment))
4ed46869 169
cda74479
DL
170(define-key describe-language-environment-map
171 [Default] '(menu-item "Default" describe-specified-language-support))
172
4ed46869
KH
173;; This should be a single character key binding because users use it
174;; very frequently while editing multilingual text. Now we can use
175;; only two such keys: "\C-\\" and "\C-^", but the latter is not
176;; convenient because it requires shifting on most keyboards. An
177;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
178;; but it won't be used that frequently.
179(define-key global-map "\C-\\" 'toggle-input-method)
180
5f395df3
SM
181;; This is no good because people often type Shift-SPC
182;; meaning to type SPC. -- rms.
183;; ;; Here's an alternative key binding for X users (Shift-SPACE).
184;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 185
464cc130
KH
186;;; Mule related hyperlinks.
187(defconst help-xref-mule-regexp-template
188 (purecopy (concat "\\(\\<\\("
189 "\\(coding system\\)\\|"
d0c40faa
KH
190 "\\(input method\\)\\|"
191 "\\(character set\\)\\|"
192 "\\(charset\\)"
464cc130
KH
193 "\\)\\s-+\\)?"
194 ;; Note starting with word-syntax character:
195 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
196
26d87040
EZ
197(defun coding-system-change-eol-conversion (coding-system eol-type)
198 "Return a coding system which differs from CODING-SYSTEM in eol conversion.
199The returned coding system converts end-of-line by EOL-TYPE
200but text as the same way as CODING-SYSTEM.
201EOL-TYPE should be `unix', `dos', `mac', or nil.
202If EOL-TYPE is nil, the returned coding system detects
203how end-of-line is formatted automatically while decoding.
204
205EOL-TYPE can be specified by an integer 0, 1, or 2.
206They means `unix', `dos', and `mac' respectively."
207 (if (symbolp eol-type)
208 (setq eol-type (cond ((eq eol-type 'unix) 0)
209 ((eq eol-type 'dos) 1)
210 ((eq eol-type 'mac) 2)
211 (t eol-type))))
212 (let ((orig-eol-type (coding-system-eol-type coding-system)))
213 (if (vectorp orig-eol-type)
214 (if (not eol-type)
215 coding-system
216 (aref orig-eol-type eol-type))
217 (let ((base (coding-system-base coding-system)))
218 (if (not eol-type)
219 base
220 (if (= eol-type orig-eol-type)
221 coding-system
222 (setq orig-eol-type (coding-system-eol-type base))
223 (if (vectorp orig-eol-type)
224 (aref orig-eol-type eol-type))))))))
225
226(defun coding-system-change-text-conversion (coding-system coding)
227 "Return a coding system which differs from CODING-SYSTEM in text conversion.
228The returned coding system converts text by CODING
229but end-of-line as the same way as CODING-SYSTEM.
230If CODING is nil, the returned coding system detects
231how text is formatted automatically while decoding."
1d77e15a
JR
232 (let ((eol-type (coding-system-eol-type coding-system)))
233 (coding-system-change-eol-conversion
234 (if coding coding 'undecided)
235 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 236
4ed46869 237(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1
RS
238 "Change whether this buffer uses multibyte characters.
239With arg, use multibyte characters if the arg is positive.
240
241Note that this command does not convert the byte contents of
242the buffer; it only changes the way those bytes are interpreted.
243In general, therefore, this command *changes* the sequence of
244characters that the current buffer contains.
245
246We suggest you avoid using use this command unless you know what you
247are doing. If you use it by mistake, and the buffer is now displayed
248wrong, use this command again to toggle back to the right mode."
4ed46869 249 (interactive "P")
b7079457
RS
250 (let ((new-flag
251 (if (null arg) (null enable-multibyte-characters)
252 (> (prefix-numeric-value arg) 0))))
253 (set-buffer-multibyte new-flag))
4ed46869
KH
254 (force-mode-line-update))
255
256(defun view-hello-file ()
257 "Display the HELLO file which list up many languages and characters."
258 (interactive)
8f81f784
KH
259 ;; We have to decode the file in any environment.
260 (let ((default-enable-multibyte-characters t)
95fa03b4 261 (coding-system-for-read 'iso-2022-7bit))
544dd975 262 (view-file (expand-file-name "HELLO" data-directory))))
4ed46869 263
9e9a77a6 264(defun universal-coding-system-argument (coding-system)
15b3e511 265 "Execute an I/O command using the specified coding system."
9e9a77a6
RS
266 (interactive
267 (let ((default (and buffer-file-coding-system
34104362
KH
268 (not (eq (coding-system-type buffer-file-coding-system)
269 t))
9e9a77a6
RS
270 buffer-file-coding-system)))
271 (list (read-coding-system
272 (if default
273 (format "Coding system for following command (default, %s): " default)
274 "Coding system for following command: ")
275 default))))
276 (let* ((keyseq (read-key-sequence
e14a8f4c 277 (format "Command to execute with %s:" coding-system)))
04363179
GM
278 (cmd (key-binding keyseq))
279 prefix)
280
281 (when (eq cmd 'universal-argument)
282 (call-interactively cmd)
a1506d29 283
04363179
GM
284 ;; Process keys bound in `universal-argument-map'.
285 (while (progn
286 (setq keyseq (read-key-sequence nil t)
287 cmd (key-binding keyseq t))
288 (not (eq cmd 'universal-argument-other-key)))
289 (let ((current-prefix-arg prefix-arg)
d37ef0f6 290 ;; Have to bind `last-command-char' here so that
04363179
GM
291 ;; `digit-argument', for isntance, can compute the
292 ;; prefix arg.
293 (last-command-char (aref keyseq 0)))
294 (call-interactively cmd)))
295
296 ;; This is the final call to `univeral-argument-other-key', which
297 ;; set's the final `prefix-arg.
298 (let ((current-prefix-arg prefix-arg))
299 (call-interactively cmd))
a1506d29 300
04363179
GM
301 ;; Read the command to execute with the given prefix arg.
302 (setq prefix prefix-arg
303 keyseq (read-key-sequence nil t)
304 cmd (key-binding keyseq)))
305
15b3e511 306 (let ((coding-system-for-read coding-system)
04363179 307 (coding-system-for-write coding-system)
0e9ec609 308 (coding-system-require-warning t)
04363179 309 (current-prefix-arg prefix))
15b3e511
KH
310 (message "")
311 (call-interactively cmd))))
312
de94d711 313(defun set-default-coding-systems (coding-system)
0c3154d2 314 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 315This sets the following coding systems:
0c3154d2 316 o coding system of a newly created buffer
8efc03e1
KH
317 o default coding system for subprocess I/O
318This also sets the following values:
d37ef0f6 319 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
320 o default value for the command `set-terminal-coding-system' (not on MSDOS)
321 o default value for the command `set-keyboard-coding-system'."
de94d711
KH
322 (check-coding-system coding-system)
323 (setq-default buffer-file-coding-system coding-system)
716184d4
RS
324 (if default-enable-multibyte-characters
325 (setq default-file-name-coding-system coding-system))
03c35c83
EZ
326 ;; If coding-system is nil, honor that on MS-DOS as well, so
327 ;; that they could reset the terminal coding system.
328 (unless (and (eq window-system 'pc) coding-system)
329 (setq default-terminal-coding-system coding-system))
de94d711 330 (setq default-keyboard-coding-system coding-system)
1d77e15a
JR
331 ;; Preserve eol-type from existing default-process-coding-systems.
332 ;; On non-unix-like systems in particular, these may have been set
333 ;; carefully by the user, or by the startup code, to deal with the
334 ;; users shell appropriately, so should not be altered by changing
335 ;; language environment.
336 (let ((output-coding
337 (coding-system-change-text-conversion
338 (car default-process-coding-system) coding-system))
339 (input-coding
340 (coding-system-change-text-conversion
341 (cdr default-process-coding-system) coding-system)))
342 (setq default-process-coding-system
343 (cons output-coding input-coding))))
de94d711 344
45d08cb2 345(defalias 'update-iso-coding-systems 'update-coding-systems-internal)
2598a293 346(make-obsolete 'update-iso-coding-systems 'update-coding-systems-internal "20.3")
45d08cb2 347
0c3154d2
KH
348(defun prefer-coding-system (coding-system)
349 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 350This also sets the following coding systems:
0c3154d2 351 o coding system of a newly created buffer
8efc03e1
KH
352 o default coding system for subprocess I/O
353This also sets the following values:
c654de1d 354 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
355 o default value for the command `set-terminal-coding-system' (not on MSDOS)
356 o default value for the command `set-keyboard-coding-system'
357
bd3ac67e
EZ
358If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
359systems set by this function will use that type of EOL conversion.
360
03c35c83
EZ
361This command does not change the default value of terminal coding system
362for MS-DOS terminal, because DOS terminals only support a single coding
363system, and Emacs automatically sets the default to that coding system at
5beaa0d1
KH
364startup.
365
c654de1d
DL
366A coding system that requires automatic detection of text
367encoding (e.g. undecided, unix) can't be preferred.
368
369See also `coding-category-list' and `coding-system-category'."
0c3154d2
KH
370 (interactive "zPrefer coding system: ")
371 (if (not (and coding-system (coding-system-p coding-system)))
372 (error "Invalid coding system `%s'" coding-system))
373 (let ((coding-category (coding-system-category coding-system))
bd3ac67e
EZ
374 (base (coding-system-base coding-system))
375 (eol-type (coding-system-eol-type coding-system)))
0c3154d2
KH
376 (if (not coding-category)
377 ;; CODING-SYSTEM is no-conversion or undecided.
378 (error "Can't prefer the coding system `%s'" coding-system))
8efc03e1 379 (set coding-category (or base coding-system))
45d08cb2 380 (update-coding-systems-internal)
812cad80 381 (or (eq coding-category (car coding-category-list))
0c3154d2 382 ;; We must change the order.
812cad80 383 (set-coding-priority (list coding-category)))
8efc03e1
KH
384 (if (and base (interactive-p))
385 (message "Highest priority is set to %s (base of %s)"
386 base coding-system))
bd3ac67e 387 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 388 (if (memq eol-type '(0 1 2))
bd3ac67e
EZ
389 (setq coding-system
390 (coding-system-change-eol-conversion base eol-type))
391 (setq coding-system base))
392 (set-default-coding-systems coding-system)))
0c3154d2 393
b5edd1d1
KH
394(defvar sort-coding-systems-predicate nil
395 "If non-nil, a predicate function to sort coding systems.
396
397It is called with two coding systems, and should return t if the first
398one is \"less\" than the second.
399
400The function `sort-coding-systems' use it.")
401
402(defun sort-coding-systems (codings)
403 "Sort coding system list CODINGS by a priority of each coding system.
404
405If a coding system is most preferred, it has the highest priority.
406Otherwise, a coding system corresponds to some MIME charset has higher
407priorities. Among them, a coding system included in `coding-system'
408key of the current language environment has higher priorities. See
409also the documentation of `language-info-alist'.
410
411If the variable `sort-coding-systems-predicate' (which see) is
412non-nil, it is used to sort CODINGS in the different way than above."
413 (if sort-coding-systems-predicate
414 (sort codings sort-coding-systems-predicate)
da1ebad1
KH
415 (let* ((from-categories (mapcar #'(lambda (x) (symbol-value x))
416 coding-category-list))
417 (most-preferred (car from-categories))
b5edd1d1
KH
418 (lang-preferred (get-language-info current-language-environment
419 'coding-system))
420 (func (function
421 (lambda (x)
422 (let ((base (coding-system-base x)))
da1ebad1
KH
423 ;; We calculate the priority number 0..255 by
424 ;; using the 8 bits PMMLCEII as this:
425 ;; P: 1 iff most preferred.
426 ;; MM: greater than 0 iff mime-charset.
427 ;; L: 1 iff one of the current lang. env.'s codings.
428 ;; C: 1 iff one of codings listed in the category list.
429 ;; E: 1 iff not XXX-with-esc
430 ;; II: if iso-2022 based, 0..3, else 1.
431 (logior
432 (lsh (if (eq base most-preferred) 1 0) 7)
433 (lsh
434 (let ((mime (coding-system-get base 'mime-charset)))
b439e72a
DL
435 ;; Prefer coding systems corresponding to a
436 ;; MIME charset.
b5edd1d1 437 (if mime
b439e72a
DL
438 ;; Lower utf-16 priority so that we
439 ;; normally prefer utf-8 to it, and put
440 ;; x-ctext below that.
de814dd9
KH
441 (cond ((string-match "utf-16"
442 (symbol-name mime))
da1ebad1 443 2)
b439e72a 444 ((string-match "^x-" (symbol-name mime))
da1ebad1
KH
445 1)
446 (t 3))
b5edd1d1 447 0))
da1ebad1
KH
448 5)
449 (lsh (if (memq base lang-preferred) 1 0) 4)
450 (lsh (if (memq base from-categories) 1 0) 3)
451 (lsh (if (string-match "-with-esc\\'"
452 (symbol-name base))
453 0 1) 2)
454 (if (eq (coding-system-type base) 2)
455 ;; For ISO based coding systems, prefer
456 ;; one that doesn't use escape sequences.
457 (let ((flags (coding-system-flags base)))
458 (if (or (consp (aref flags 0))
459 (consp (aref flags 1))
460 (consp (aref flags 2))
461 (consp (aref flags 3)))
462 (if (or (aref flags 8) (aref flags 9))
463 0
464 1)
465 2))
466 1)))))))
b5edd1d1
KH
467 (sort codings (function (lambda (x y)
468 (> (funcall func x) (funcall func y))))))))
54b226f7 469
3fc7dfe5 470(defun find-coding-systems-region (from to)
54b226f7 471 "Return a list of proper coding systems to encode a text between FROM and TO.
d37ef0f6 472If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
473All coding systems in the list can safely encode any multibyte characters
474in the text.
475
e8dd0160 476If the text contains no multibyte characters, return a list of a single
3fc7dfe5 477element `undecided'."
b5edd1d1
KH
478 (let ((codings (find-coding-systems-region-internal from to)))
479 (if (eq codings t)
480 ;; The text contains only ASCII characters. Any coding
481 ;; systems are safe.
482 '(undecided)
483 ;; We need copy-sequence because sorting will alter the argument.
484 (sort-coding-systems (copy-sequence codings)))))
54b226f7 485
3fc7dfe5
KH
486(defun find-coding-systems-string (string)
487 "Return a list of proper coding systems to encode STRING.
488All coding systems in the list can safely encode any multibyte characters
489in STRING.
490
e8dd0160 491If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 492element `undecided'."
b5edd1d1 493 (find-coding-systems-region string nil))
3fc7dfe5
KH
494
495(defun find-coding-systems-for-charsets (charsets)
496 "Return a list of proper coding systems to encode characters of CHARSETS.
6053d86a
KH
497CHARSETS is a list of character sets.
498It actually checks at most the first 96 characters of each charset.
499So, if a charset of dimension two is included in CHARSETS, the value may
500contain a coding system that can't encode all characters of the charset."
b5edd1d1
KH
501 (cond ((or (null charsets)
502 (and (= (length charsets) 1)
503 (eq 'ascii (car charsets))))
504 '(undecided))
505 ((or (memq 'eight-bit-control charsets)
506 (memq 'eight-bit-graphic charsets))
507 '(raw-text emacs-mule))
508 (t
509 (let ((codings t)
6053d86a 510 charset l str)
b5edd1d1
KH
511 (while (and codings charsets)
512 (setq charset (car charsets) charsets (cdr charsets))
513 (unless (eq charset 'ascii)
6053d86a
KH
514 (setq str (make-string 96 32))
515 (if (= (charset-dimension charset) 1)
516 (if (= (charset-chars charset) 96)
517 (dotimes (i 96)
518 (aset str i (make-char charset (+ i 32))))
519 (dotimes (i 94)
520 (aset str i (make-char charset (+ i 33)))))
521 (if (= (charset-chars charset) 96)
522 (dotimes (i 96)
523 (aset str i (make-char charset 32 (+ i 32))))
524 (dotimes (i 94)
525 (aset str i (make-char charset 33 (+ i 33))))))
526 (setq l (find-coding-systems-string str))
b5edd1d1
KH
527 (if (eq codings t)
528 (setq codings l)
529 (let ((ll nil))
6053d86a
KH
530 (dolist (elt codings)
531 (if (memq elt l)
532 (setq ll (cons elt ll))))
b5edd1d1 533 (setq codings ll)))))
6053d86a 534 codings))))
54b226f7 535
51ed58ea
KH
536(defun find-multibyte-characters (from to &optional maxcount excludes)
537 "Find multibyte characters in the region specified by FROM and TO.
538If FROM is a string, find multibyte characters in the string.
539The return value is an alist of the following format:
540 ((CHARSET COUNT CHAR ...) ...)
541where
542 CHARSET is a character set,
543 COUNT is a number of characters,
544 CHARs are found characters of the character set.
545Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
251d4f4b
KH
546Optional 4th arg EXCLUDE is a list of character sets to be ignored.
547
548For invalid characters, CHARs are actually strings."
51ed58ea
KH
549 (let ((chars nil)
550 charset char)
551 (if (stringp from)
552 (let ((idx 0))
553 (while (setq idx (string-match "[^\000-\177]" from idx))
554 (setq char (aref from idx)
555 charset (char-charset char))
251d4f4b
KH
556 (if (eq charset 'unknown)
557 (setq char (match-string 0)))
14333e31
KH
558 (if (or (memq charset '(unknown
559 eight-bit-control eight-bit-graphic))
251d4f4b 560 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
561 (let ((slot (assq charset chars)))
562 (if slot
563 (if (not (memq char (nthcdr 2 slot)))
564 (let ((count (nth 1 slot)))
565 (setcar (cdr slot) (1+ count))
566 (if (or (not maxcount) (< count maxcount))
567 (nconc slot (list char)))))
568 (setq chars (cons (list charset 1 char) chars)))))
569 (setq idx (1+ idx))))
570 (save-excursion
571 (goto-char from)
572 (while (re-search-forward "[^\000-\177]" to t)
573 (setq char (preceding-char)
574 charset (char-charset char))
251d4f4b
KH
575 (if (eq charset 'unknown)
576 (setq char (match-string 0)))
14333e31 577 (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic))
251d4f4b 578 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
579 (let ((slot (assq charset chars)))
580 (if slot
251d4f4b 581 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
582 (let ((count (nth 1 slot)))
583 (setcar (cdr slot) (1+ count))
584 (if (or (not maxcount) (< count maxcount))
585 (nconc slot (list char)))))
586 (setq chars (cons (list charset 1 char) chars))))))))
587 (nreverse chars)))
588
738746ba
KH
589
590(defun search-unencodable-char (coding-system)
591 "Search forward from point for a character that is not encodable.
592It asks which coding system to check.
593If such a character is found, set point after that character.
594Otherwise, don't move point.
595
596When called from a program, the value is a position of the found character,
597or nil if all characters are encodable."
598 (interactive
599 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
600 (read-coding-system
601 (format "Coding-system (default, %s): " default)
602 default))))
603 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
604 (if pos
605 (goto-char (1+ pos))
606 (message "All following characters are encodable by %s" coding-system))
607 pos))
608
609
c83c4f60
RS
610(defvar last-coding-system-specified nil
611 "Most recent coding system explicitly specified by the user when asked.
612This variable is set whenever Emacs asks the user which coding system
613to use in order to write a file. If you set it to nil explicitly,
614then call `write-region', then afterward this variable will be non-nil
615only if the user was explicitly asked and specified a coding system.")
616
b5edd1d1
KH
617(defvar select-safe-coding-system-accept-default-p nil
618 "If non-nil, a function to control the behaviour of coding system selection.
619The meaning is the same as the argument ACCEPT-DEFAULT-P of the
620function `select-safe-coding-system' (which see). This variable
621overrides that argument.")
622
623(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 624 accept-default-p file)
d5266ddf
KH
625 "Ask a user to select a safe coding system from candidates.
626The candidates of coding systems which can safely encode a text
b5edd1d1
KH
627between FROM and TO are shown in a popup window. Among them, the most
628proper one is suggested as the default.
629
a985cd2f
KH
630The list of `buffer-file-coding-system' of the current buffer,
631the `default-buffer-file-coding-system', and the
b5edd1d1
KH
632most preferred coding system (if it corresponds to a MIME charset) is
633treated as the default coding system list. Among them, the first one
d37ef0f6
DL
634that safely encodes the text is normally selected silently and
635returned without any user interaction. See also the command
636`prefer-coding-system'.
637
638However, the user is queried if the chosen coding system is
639inconsistent with what would be selected by `set-auto-coding' from
640coding cookies &c. if the contents of the region were read from a
641file. (That could lead to data corruption in a file subsequently
642re-visited and edited.)
54b226f7 643
b5edd1d1
KH
644Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
645list of coding systems to be prepended to the default coding system
0e9ec609
KH
646list. However, if DEFAULT-CODING-SYSTEM is a list and the first
647element is t, the cdr part is used as the defualt coding system list,
a985cd2f
KH
648i.e. `buffer-file-coding-system', `default-buffer-file-coding-system',
649and the most preferred coding system are not used.
54b226f7 650
b5edd1d1
KH
651Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
652determine the acceptability of the silently selected coding system.
653It is called with that coding system, and should return nil if it
654should not be silently selected and thus user interaction is required.
655
efa2c6d7
RS
656Optional 5th arg FILE is the file name to use for this purpose.
657That is different from `buffer-file-name' when handling `write-region'
658\(for example).
659
b5edd1d1
KH
660The variable `select-safe-coding-system-accept-default-p', if
661non-nil, overrides ACCEPT-DEFAULT-P.
54b226f7
KH
662
663Kludgy feature: if FROM is a string, the string is the target text,
664and TO is ignored."
b5edd1d1
KH
665 (if (and default-coding-system
666 (not (listp default-coding-system)))
667 (setq default-coding-system (list default-coding-system)))
668
0e9ec609
KH
669 (let ((no-other-defaults nil))
670 (if (eq (car default-coding-system) t)
671 (setq no-other-defaults t
672 default-coding-system (cdr default-coding-system)))
673
674 ;; Change elements of the list to (coding . base-coding).
675 (setq default-coding-system
676 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
677 default-coding-system))
678
a985cd2f
KH
679 ;; From now on, the list of defaults is reversed.
680 (setq default-coding-system (nreverse default-coding-system))
681
0e9ec609
KH
682 (unless no-other-defaults
683 ;; If buffer-file-coding-system is not nil nor undecided, append it
684 ;; to the defaults.
685 (if buffer-file-coding-system
686 (let ((base (coding-system-base buffer-file-coding-system)))
687 (or (eq base 'undecided)
0e9ec609 688 (rassq base default-coding-system)
a985cd2f
KH
689 (push (cons buffer-file-coding-system base)
690 default-coding-system))))
691
692 ;; If default-buffer-file-coding-system is not nil nor undecided,
693 ;; append it to the defaults.
694 (if default-buffer-file-coding-system
695 (let ((base (coding-system-base default-buffer-file-coding-system)))
696 (or (eq base 'undecided)
697 (rassq base default-coding-system)
698 (push (cons default-buffer-file-coding-system base)
699 default-coding-system))))
0e9ec609
KH
700
701 ;; If the most preferred coding system has the property mime-charset,
702 ;; append it to the defaults.
703 (let ((tail coding-category-list)
704 preferred base)
a985cd2f 705 (while (and tail (not (setq preferred (symbol-value (car tail)))))
0e9ec609
KH
706 (setq tail (cdr tail)))
707 (and (coding-system-p preferred)
708 (setq base (coding-system-base preferred))
709 (coding-system-get preferred 'mime-charset)
0e9ec609 710 (not (rassq base default-coding-system))
a985cd2f
KH
711 (push (cons preferred base)
712 default-coding-system)))))
b5edd1d1
KH
713
714 (if select-safe-coding-system-accept-default-p
715 (setq accept-default-p select-safe-coding-system-accept-default-p))
716
717 (let ((codings (find-coding-systems-region from to))
718 (coding-system nil)
45610c98 719 (bufname (buffer-name))
c0d3ed97 720 safe rejected unsafe)
b5edd1d1
KH
721 (if (eq (car codings) 'undecided)
722 ;; Any coding system is ok.
723 (setq coding-system t)
c0d3ed97
KH
724 ;; Classify the defaults into safe, rejected, and unsafe.
725 (dolist (elt default-coding-system)
726 (if (memq (cdr elt) codings)
727 (if (and (functionp accept-default-p)
728 (not (funcall accept-default-p (cdr elt))))
729 (push (car elt) rejected)
730 (push (car elt) safe))
731 (push (car elt) unsafe)))
732 (if safe
a985cd2f 733 (setq coding-system (car safe))))
c0d3ed97 734
b5edd1d1 735 ;; If all the defaults failed, ask a user.
c0d3ed97
KH
736 (when (not coding-system)
737 ;; At first, if some defaults are unsafe, record at most 11
738 ;; problematic characters and their positions for them by turning
739 ;; (CODING ...)
740 ;; into
741 ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
742 (if unsafe
743 (if (stringp from)
744 (setq unsafe
745 (mapcar #'(lambda (coding)
746 (cons coding
747 (mapcar #'(lambda (pos)
748 (cons pos (aref from pos)))
749 (unencodable-char-position
750 0 (length from) coding
751 11 from))))
752 unsafe))
a1506d29 753 (setq unsafe
c0d3ed97
KH
754 (mapcar #'(lambda (coding)
755 (cons coding
756 (mapcar #'(lambda (pos)
757 (cons pos (char-after pos)))
758 (unencodable-char-position
759 from to coding 11))))
760 unsafe))))
738746ba
KH
761
762 ;; Change each safe coding system to the corresponding
b5edd1d1
KH
763 ;; mime-charset name if it is also a coding system. Such a name
764 ;; is more friendly to users.
765 (let ((l codings)
34104362
KH
766 mime-charset)
767 (while l
768 (setq mime-charset (coding-system-get (car l) 'mime-charset))
769 (if (and mime-charset (coding-system-p mime-charset))
770 (setcar l mime-charset))
771 (setq l (cdr l))))
772
43afed8c
RS
773 ;; Don't offer variations with locking shift, which you
774 ;; basically never want.
775 (let (l)
776 (dolist (elt codings (setq codings (nreverse l)))
777 (unless (or (eq 'coding-category-iso-7-else
778 (coding-system-category elt))
779 (eq 'coding-category-iso-8-else
780 (coding-system-category elt)))
781 (push elt l))))
782
050afd64
SM
783 ;; Remove raw-text, emacs-mule and no-conversion unless nothing
784 ;; else is available.
785 (setq codings
786 (or (delq 'raw-text
787 (delq 'emacs-mule
788 (delq 'no-conversion codings)))
789 '(raw-text emacs-mule no-conversion)))
790
738746ba
KH
791 (let ((window-configuration (current-window-configuration)))
792 (save-excursion
c0d3ed97
KH
793 ;; If some defaults are unsafe, make sure the offending
794 ;; buffer is displayed.
795 (when (and unsafe (not (stringp from)))
738746ba 796 (pop-to-buffer bufname)
c0d3ed97
KH
797 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
798 unsafe))))
799 ;; Then ask users to select one from CODINGS while showing
800 ;; the reason why none of the defaults are not used.
738746ba 801 (with-output-to-temp-buffer "*Warning*"
873a4be1 802 (save-excursion
738746ba
KH
803 (set-buffer standard-output)
804 (if (not default-coding-system)
805 (insert "No default coding systems to try for "
806 (if (stringp from)
807 (format "string \"%s\"." from)
808 (format "buffer `%s'." bufname)))
809 (insert
810 "These default coding systems were tried to encode"
811 (if (stringp from)
812 (concat " \"" (if (> (length from) 10)
813 (concat (substring from 0 10) "...\"")
814 (concat from "\"")))
815 (format " text\nin the buffer `%s'" bufname))
816 ":\n")
817 (let ((pos (point))
818 (fill-prefix " "))
c0d3ed97
KH
819 (mapc #'(lambda (x) (princ " ") (princ (car x)))
820 default-coding-system)
738746ba
KH
821 (insert "\n")
822 (fill-region-as-paragraph pos (point)))
c0d3ed97
KH
823 (when rejected
824 (insert "These safely encodes the target text,
d37ef0f6 825but it is not recommended for encoding text in this context,
c0d3ed97
KH
826e.g., for sending an email message.\n ")
827 (mapc #'(lambda (x) (princ " ") (princ x)) rejected)
828 (insert "\n"))
829 (when unsafe
a1506d29 830 (insert (if rejected "And the others"
c0d3ed97
KH
831 "However, each of them")
832 " encountered these problematic characters:\n")
738746ba
KH
833 (mapc
834 #'(lambda (coding)
835 (insert (format " %s:" (car coding)))
c0d3ed97
KH
836 (let ((i 0)
837 (func1
838 #'(lambda (bufname pos)
839 (when (buffer-live-p (get-buffer bufname))
840 (pop-to-buffer bufname)
841 (goto-char pos))))
842 (func2
738746ba
KH
843 #'(lambda (bufname pos coding)
844 (when (buffer-live-p (get-buffer bufname))
845 (pop-to-buffer bufname)
846 (if (< (point) pos)
847 (goto-char pos)
848 (forward-char 1)
849 (search-unencodable-char coding)
c0d3ed97
KH
850 (forward-char -1))))))
851 (dolist (elt (cdr coding))
852 (insert " ")
853 (if (stringp from)
854 (insert (if (< i 10) (cdr elt) "..."))
855 (if (< i 10)
856 (insert-text-button
857 (cdr elt)
858 :type 'help-xref
859 'help-echo
860 "mouse-2, RET: jump to this character"
861 'help-function func1
862 'help-args (list bufname (car elt)))
863 (insert-text-button
864 "..."
865 :type 'help-xref
866 'help-echo
867 "mouse-2, RET: next unencodable character"
868 'help-function func2
869 'help-args (list bufname (car elt)
870 (car coding)))))
871 (setq i (1+ i))))
738746ba 872 (insert "\n"))
c0d3ed97 873 unsafe)
738746ba 874 (insert "\
d37ef0f6 875The first problematic character is at point in the displayed buffer,\n"
738746ba 876 (substitute-command-keys "\
d37ef0f6 877and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
c0d3ed97 878 (insert (if safe
738746ba
KH
879 "\nSelect the above, or "
880 "\nSelect ")
881 "\
d37ef0f6 882one of the following safe coding systems, or edit the buffer:\n")
738746ba
KH
883 (let ((pos (point))
884 (fill-prefix " "))
885 (mapcar (function (lambda (x) (princ " ") (princ x)))
886 codings)
887 (insert "\n")
0e9ec609
KH
888 (fill-region-as-paragraph pos (point)))
889 (insert "Or specify any other coding system
9ddf362e 890on your risk of losing the problematic characters.\n")))
738746ba
KH
891
892 ;; Read a coding system.
0e9ec609
KH
893 (setq default-coding-system (or (car safe) (car codings)))
894 (setq coding-system
a1506d29 895 (read-coding-system
0e9ec609
KH
896 (format "Select coding system (default %s): "
897 default-coding-system)
898 default-coding-system))
899 (setq last-coding-system-specified coding-system))
900
738746ba
KH
901 (kill-buffer "*Warning*")
902 (set-window-configuration window-configuration)))
b5edd1d1
KH
903
904 (if (vectorp (coding-system-eol-type coding-system))
905 (let ((eol (coding-system-eol-type buffer-file-coding-system)))
906 (if (numberp eol)
907 (setq coding-system
908 (coding-system-change-eol-conversion coding-system eol)))))
909
910 (if (eq coding-system t)
911 (setq coding-system buffer-file-coding-system))
43afed8c
RS
912 ;; Check we're not inconsistent with what `coding:' spec &c would
913 ;; give when file is re-read.
9e7638c3
RS
914 ;; But don't do this if we explicitly ignored the cookie
915 ;; by using `find-file-literally'.
916 (unless (or (stringp from) find-file-literally)
efa2c6d7
RS
917 (let ((auto-cs (save-excursion
918 (save-restriction
919 (widen)
920 (narrow-to-region from to)
43afed8c 921 (goto-char (point-min))
efa2c6d7 922 (set-auto-coding (or file buffer-file-name "")
43afed8c 923 (buffer-size))))))
872c0ba8 924 (if (and auto-cs coding-system
d37ef0f6
DL
925 ;; Don't barf if writing a compressed file, say.
926 ;; This check perhaps isn't ideal, but is probably
927 ;; the best thing to do.
928 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
43afed8c
RS
929 (not (coding-system-equal (coding-system-base coding-system)
930 (coding-system-base auto-cs))))
931 (unless (yes-or-no-p
932 (format "Selected encoding %s disagrees with \
933%s specified by file contents. Really save (else edit coding cookies \
934and try again)? " coding-system auto-cs))
41bfe57a
RS
935 (error "Save aborted")))))
936 coding-system))
54b226f7
KH
937
938(setq select-safe-coding-system-function 'select-safe-coding-system)
939
46babb23
KH
940(defun select-message-coding-system ()
941 "Return a coding system to encode the outgoing message of the current buffer.
942It at first tries the first coding system found in these variables
943in this order:
944 (1) local value of `buffer-file-coding-system'
945 (2) value of `sendmail-coding-system'
b5edd1d1
KH
946 (3) value of `default-sendmail-coding-system'
947 (4) value of `default-buffer-file-coding-system'
46babb23
KH
948If the found coding system can't encode the current buffer,
949or none of them are bound to a coding system,
48e41165 950it asks the user to select a proper coding system."
46babb23 951 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
952 buffer-file-coding-system)
953 sendmail-coding-system
954 default-sendmail-coding-system
955 default-buffer-file-coding-system)))
46babb23 956 (if (eq coding 'no-conversion)
d37ef0f6 957 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
958 (setq coding nil))
959 (if (fboundp select-safe-coding-system-function)
960 (funcall select-safe-coding-system-function
b5edd1d1
KH
961 (point-min) (point-max) coding
962 (function (lambda (x) (coding-system-get x 'mime-charset))))
46babb23 963 coding)))
4ed46869 964\f
03c35c83 965;;; Language support stuff.
4ed46869 966
4ed46869 967(defvar language-info-alist nil
2c395d56 968 "Alist of language environment definitions.
4ed46869
KH
969Each element looks like:
970 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
971where LANGUAGE-NAME is a string, the name of the language environment,
972KEY is a symbol denoting the kind of information, and
973INFO is the data associated with KEY.
974Meaningful values for KEY include
975
976 documentation value is documentation of what this language environment
977 is meant for, and how to use it.
978 charset value is a list of the character sets used by this
979 language environment.
d37ef0f6
DL
980 sample-text value is an expression which is evalled to generate
981 a line of text written using characters appropriate
982 for this language environment.
2c395d56
RS
983 setup-function value is a function to call to switch to this
984 language environment.
985 exit-function value is a function to call to leave this
986 language environment.
987 coding-system value is a list of coding systems that are good
988 for saving text written in this language environment.
989 This list serves as suggestions to the user;
990 in effect, as a kind of documentation.
991 coding-priority value is a list of coding systems for this language
992 environment, in order of decreasing priority.
993 This is used to set up the coding system priority
45d08cb2 994 list when you switch to this language environment.
ddb5c041 995 nonascii-translation
7624ebb9 996 value is a translation table to be set in the
45d08cb2 997 variable `nonascii-translation-table' in this
7624ebb9
KH
998 language environment, or a character set from
999 which `nonascii-insert-offset' is calculated.
ddb5c041
KH
1000 input-method value is a default input method for this language
1001 environment.
7624ebb9
KH
1002 features value is a list of features requested in this
1003 language environment.
ddb5c041
KH
1004
1005The following keys take effect only when multibyte characters are
1006globally disabled, i.e. the value of `default-enable-multibyte-characters'
1007is nil.
1008
1009 unibyte-syntax value is a library name to load to set
e8dd0160 1010 unibyte 8-bit character syntaxes for this
ddb5c041
KH
1011 language environment.
1012
1013 unibyte-display value is a coding system to encode characters
1014 for the terminal. Characters in the range
1015 of 160 to 255 display not as octal escapes,
1016 but as non-ASCII characters in this language
1017 environment.")
2c395d56
RS
1018
1019(defun get-language-info (lang-env key)
1020 "Return information listed under KEY for language environment LANG-ENV.
1021KEY is a symbol denoting the kind of information.
1022For a list of useful values for KEY and their meanings,
1023see `language-info-alist'."
1024 (if (symbolp lang-env)
1025 (setq lang-env (symbol-name lang-env)))
1026 (let ((lang-slot (assoc-ignore-case lang-env language-info-alist)))
4ed46869
KH
1027 (if lang-slot
1028 (cdr (assq key (cdr lang-slot))))))
1029
f08adf27 1030(defun set-language-info (lang-env key info)
2c395d56
RS
1031 "Modify part of the definition of language environment LANG-ENV.
1032Specifically, this stores the information INFO under KEY
1033in the definition of this language environment.
4ed46869 1034KEY is a symbol denoting the kind of information.
2c395d56 1035INFO is the value for that information.
281d03ec 1036
2c395d56 1037For a list of useful values for KEY and their meanings,
f08adf27 1038see `language-info-alist'."
2c395d56
RS
1039 (if (symbolp lang-env)
1040 (setq lang-env (symbol-name lang-env)))
4ed46869 1041 (let (lang-slot key-slot)
2c395d56 1042 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 1043 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 1044 (setq lang-slot (list lang-env)
4ed46869
KH
1045 language-info-alist (cons lang-slot language-info-alist)))
1046 (setq key-slot (assq key lang-slot))
1047 (if (null key-slot) ; If no slot for the key, add it.
1048 (progn
1049 (setq key-slot (list key))
1050 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
590dbcba 1051 (setcdr key-slot (purecopy info))
1169bd86 1052 ;; Update the custom-type of `current-language-environment'.
590dbcba 1053 (put 'current-language-environment 'custom-type
1169bd86
MR
1054 (cons 'choice (mapcar
1055 (lambda (lang)
756e055f
MR
1056 (list 'const lang))
1057 (sort (mapcar 'car language-info-alist) 'string<))))))
4ed46869 1058
2c395d56
RS
1059(defun set-language-info-alist (lang-env alist &optional parents)
1060 "Store ALIST as the definition of language environment LANG-ENV.
1061ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 1062`language-info-alist' for the meanings of KEY and INFO.
54b226f7 1063
2c395d56 1064Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 1065where to put this language environment in the
2c395d56
RS
1066Describe Language Environment and Set Language Environment menus.
1067For example, (\"European\") means to put this language environment
1068in the European submenu in each of those two menus."
1069 (if (symbolp lang-env)
1070 (setq lang-env (symbol-name lang-env)))
54b226f7
KH
1071 (let ((describe-map describe-language-environment-map)
1072 (setup-map setup-language-environment-map))
1073 (if parents
1074 (let ((l parents)
9deed82f 1075 map parent-symbol parent prompt)
54b226f7
KH
1076 (while l
1077 (if (symbolp (setq parent-symbol (car l)))
1078 (setq parent (symbol-name parent))
1079 (setq parent parent-symbol parent-symbol (intern parent)))
1080 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1081 ;; This prompt string is for define-prefix-command, so
1082 ;; that the map it creates will be suitable for a menu.
1083 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1084 (if (not map)
1085 (progn
1086 (setq map (intern (format "describe-%s-environment-map"
1087 (downcase parent))))
9deed82f 1088 (define-prefix-command map nil prompt)
54b226f7
KH
1089 (define-key-after describe-map (vector parent-symbol)
1090 (cons parent map) t)))
1091 (setq describe-map (symbol-value map))
1092 (setq map (lookup-key setup-map (vector parent-symbol)))
1093 (if (not map)
1094 (progn
1095 (setq map (intern (format "setup-%s-environment-map"
1096 (downcase parent))))
9deed82f 1097 (define-prefix-command map nil prompt)
54b226f7
KH
1098 (define-key-after setup-map (vector parent-symbol)
1099 (cons parent map) t)))
1100 (setq setup-map (symbol-value map))
1101 (setq l (cdr l)))))
f08adf27
RS
1102
1103 ;; Set up menu items for this language env.
7624ebb9 1104 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1105 (when doc
1106 (define-key-after describe-map (vector (intern lang-env))
7624ebb9
KH
1107 (cons lang-env 'describe-specified-language-support) t)))
1108 (define-key-after setup-map (vector (intern lang-env))
1109 (cons lang-env 'setup-specified-language-environment) t)
f08adf27 1110
54b226f7 1111 (while alist
f08adf27 1112 (set-language-info lang-env (car (car alist)) (cdr (car alist)))
54b226f7 1113 (setq alist (cdr alist)))))
4ed46869 1114
ae302641 1115(defun read-language-name (key prompt &optional default)
2c395d56 1116 "Read a language environment name which has information for KEY.
ddb5c041 1117If KEY is nil, read any language environment.
2c395d56
RS
1118Prompt with PROMPT. DEFAULT is the default choice of language environment.
1119This returns a language environment name as a string."
4ed46869
KH
1120 (let* ((completion-ignore-case t)
1121 (name (completing-read prompt
1122 language-info-alist
ddb5c041 1123 (and key
ca429a25 1124 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1125 t nil nil default)))
13e82c04 1126 (if (and (> (length name) 0)
ddb5c041
KH
1127 (or (not key)
1128 (get-language-info name key)))
13e82c04 1129 name)))
4ed46869
KH
1130\f
1131;;; Multilingual input methods.
d37ef0f6 1132(defgroup leim nil
d0c40faa
KH
1133 "LEIM: Libraries of Emacs Input Methods."
1134 :group 'mule)
4ed46869 1135
d0b9c3ab
KH
1136(defconst leim-list-file-name "leim-list.el"
1137 "Name of LEIM list file.
1138This file contains a list of libraries of Emacs input methods (LEIM)
1139in the format of Lisp expression for registering each input method.
1140Emacs loads this file at startup time.")
1141
2e224638 1142(defvar leim-list-header (format
d37ef0f6 1143";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: iso-2022-7bit;-*-
d0b9c3ab
KH
1144;;
1145;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1146;; methods in the same directory as this file. Loading this file
1147;; registers all the input methods in Emacs.
d0b9c3ab 1148;;
d33d5fbe 1149;; Each entry has the form:
d0b9c3ab
KH
1150;; (register-input-method
1151;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1152;; TITLE DESCRIPTION
1153;; ARG ...)
c654de1d 1154;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab
KH
1155;;
1156;; If this directory is included in load-path, Emacs automatically
1157;; loads this file at startup time.
1158
1159"
1160 leim-list-file-name)
1161 "Header to be inserted in LEIM list file.")
1162
e55e92ee 1163(defvar leim-list-entry-regexp "^(register-input-method"
d0b9c3ab
KH
1164 "Regexp matching head of each entry in LEIM list file.
1165See also the variable `leim-list-header'")
1166
1167(defvar update-leim-list-functions
1168 '(quail-update-leim-list-file)
1169 "List of functions to call to update LEIM list file.
1170Each function is called with one arg, LEIM directory name.")
1171
a337fe7f
RS
1172(defun update-leim-list-file (&rest dirs)
1173 "Update LEIM list file in directories DIRS."
d0b9c3ab
KH
1174 (let ((functions update-leim-list-functions))
1175 (while functions
a337fe7f 1176 (apply (car functions) dirs)
d0b9c3ab
KH
1177 (setq functions (cdr functions)))))
1178
4ed46869
KH
1179(defvar current-input-method nil
1180 "The current input method for multilingual text.
96db204a 1181If nil, that means no input method is activated now.")
4ed46869
KH
1182(make-variable-buffer-local 'current-input-method)
1183(put 'current-input-method 'permanent-local t)
1184
1185(defvar current-input-method-title nil
d0b9c3ab 1186 "Title string of the current input method shown in mode line.")
4ed46869
KH
1187(make-variable-buffer-local 'current-input-method-title)
1188(put 'current-input-method-title 'permanent-local t)
1189
b4fba33f 1190(defcustom default-input-method nil
8861c593 1191 "*Default input method for multilingual text (a string).
b4fba33f 1192This is the input method activated automatically by the command
9b10b5a3 1193`toggle-input-method' (\\[toggle-input-method])."
8861c593 1194 :group 'mule
5806e8a6
GM
1195 :type '(choice (const nil) string)
1196 :set-after '(current-language-environment))
b4fba33f 1197
0f835e87
KH
1198(put 'input-method-function 'permanent-local t)
1199
723a427a
KH
1200(defvar input-method-history nil
1201 "History list for some commands that read input methods.")
1202(make-variable-buffer-local 'input-method-history)
1203(put 'input-method-history 'permanent-local t)
4ed46869
KH
1204
1205(defvar inactivate-current-input-method-function nil
1206 "Function to call for inactivating the current input method.
1207Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1208This function is called with no argument.
1209
1210This function should never change the value of `current-input-method'.
1211It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1212(make-variable-buffer-local 'inactivate-current-input-method-function)
1213(put 'inactivate-current-input-method-function 'permanent-local t)
1214
1215(defvar describe-current-input-method-function nil
1216 "Function to call for describing the current input method.
1217This function is called with no argument.")
1218(make-variable-buffer-local 'describe-current-input-method-function)
1219(put 'describe-current-input-method-function 'permanent-local t)
1220
d0b9c3ab 1221(defvar input-method-alist nil
2c395d56 1222 "Alist of input method names vs how to use them.
d0b9c3ab 1223Each element has the form:
2c395d56
RS
1224 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1225See the function `register-input-method' for the meanings of the elements.")
1226
f08adf27 1227(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1228 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
f08adf27 1229INPUT-METHOD and LANG-ENV are symbols or strings.
d0b9c3ab 1230
d0b9c3ab 1231The remaining arguments are:
2c395d56
RS
1232 ACTIVATE-FUNC, TITLE, DESCRIPTION, and ARGS...
1233ACTIVATE-FUNC is a function to call to activate this method.
1234TITLE is a string to show in the mode line when this method is active.
1235DESCRIPTION is a string describing this method and what it is good for.
1236The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1237All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1238
c654de1d
DL
1239This function is mainly used in the file \"leim-list.el\" which is
1240created at Emacs build time, registering all Quail input methods
0b6cadff 1241contained in the Emacs distribution.
205814ee 1242
0b6cadff 1243In case you want to register a new Quail input method by yourself, be
205814ee 1244careful to use the same input method title as given in the third
0b6cadff
DL
1245parameter of `quail-define-package'. (If the values are different, the
1246string specified in this function takes precedence.)
205814ee
KH
1247
1248The commands `describe-input-method' and `list-input-methods' need
0b6cadff
DL
1249these duplicated values to show some information about input methods
1250without loading the relevant Quail packages."
f08adf27
RS
1251 (if (symbolp lang-env)
1252 (setq lang-env (symbol-name lang-env)))
4ef06f75
KH
1253 (if (symbolp input-method)
1254 (setq input-method (symbol-name input-method)))
f08adf27 1255 (let ((info (cons lang-env args))
d0b9c3ab
KH
1256 (slot (assoc input-method input-method-alist)))
1257 (if slot
1258 (setcdr slot info)
1259 (setq slot (cons input-method info))
1260 (setq input-method-alist (cons slot input-method-alist)))))
1261
4d5ac029 1262(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1263 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1264If DEFAULT is non-nil, use that as the default,
0b6cadff 1265and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1266If INHIBIT-NULL is non-nil, null input signals an error.
1267
1268The return value is a string."
4d5ac029
RS
1269 (if default
1270 (setq prompt (format prompt default)))
d0b9c3ab 1271 (let* ((completion-ignore-case t)
c54044ff
KH
1272 ;; As it is quite normal to change input method in the
1273 ;; minibuffer, we must enable it even if
1274 ;; enable-recursive-minibuffers is currently nil.
1275 (enable-recursive-minibuffers t)
723a427a
KH
1276 ;; This binding is necessary because input-method-history is
1277 ;; buffer local.
d0b9c3ab 1278 (input-method (completing-read prompt input-method-alist
87505a98
RS
1279 nil t nil 'input-method-history
1280 default)))
bf294e6e
KH
1281 (if (and input-method (symbolp input-method))
1282 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1283 (if (> (length input-method) 0)
1284 input-method
1285 (if inhibit-null
43807b77 1286 (error "No valid input method is specified")))))
d0b9c3ab 1287
d0b9c3ab 1288(defun activate-input-method (input-method)
2c395d56
RS
1289 "Switch to input method INPUT-METHOD for the current buffer.
1290If some other input method is already active, turn it off first.
1291If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1292 (if (and input-method (symbolp input-method))
4ef06f75 1293 (setq input-method (symbol-name input-method)))
723a427a
KH
1294 (if (and current-input-method
1295 (not (string= current-input-method input-method)))
305a3cb6 1296 (inactivate-input-method))
2c395d56 1297 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1298 (let ((slot (assoc input-method input-method-alist)))
1299 (if (null slot)
723a427a 1300 (error "Can't activate input method `%s'" input-method))
278dd6ac 1301 (setq current-input-method-title nil)
8efc03e1
KH
1302 (let ((func (nth 2 slot)))
1303 (if (functionp func)
1304 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1305 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1306 (progn
1307 (require (cdr func))
1308 (apply (car func) input-method (nthcdr 5 slot)))
1309 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1310 (setq current-input-method input-method)
278dd6ac
KH
1311 (or (stringp current-input-method-title)
1312 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1313 (unwind-protect
1314 (run-hooks 'input-method-activate-hook)
1315 (force-mode-line-update)))))
15b3e511 1316
15b3e511 1317(defun inactivate-input-method ()
f17ccaee 1318 "Turn off the current input method."
723a427a
KH
1319 (when current-input-method
1320 (if input-method-history
1321 (unless (string= current-input-method (car input-method-history))
1322 (setq input-method-history
1323 (cons current-input-method
1324 (delete current-input-method input-method-history))))
1325 (setq input-method-history (list current-input-method)))
1326 (unwind-protect
1327 (funcall inactivate-current-input-method-function)
15b3e511 1328 (unwind-protect
723a427a
KH
1329 (run-hooks 'input-method-inactivate-hook)
1330 (setq current-input-method nil
2b1ec973 1331 input-method-function nil
28885c0e
KH
1332 current-input-method-title nil)
1333 (force-mode-line-update)))))
4ed46869 1334
8b784951 1335(defun set-input-method (input-method)
2c395d56 1336 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1337This also sets the default input method to the one you specify.
402dbbd1
EZ
1338If INPUT-METHOD is nil, this function turns off the input method, and
1339also causes you to be prompted for a name of an input method the next
1340time you invoke \\[toggle-input-method].
1341
bc406911
EZ
1342To deactivate the input method interactively, use \\[toggle-input-method].
1343To deactivate it programmatically, use \\[inactivate-input-method]."
d0b9c3ab 1344 (interactive
723a427a 1345 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1346 (list (read-input-method-name
87505a98 1347 (if default "Select input method (default %s): " "Select input method: ")
42395763 1348 default t))))
d0b9c3ab 1349 (activate-input-method input-method)
f4990970
PA
1350 (setq default-input-method input-method)
1351 (when (interactive-p)
1352 (customize-mark-as-set 'default-input-method))
1353 default-input-method)
4ed46869
KH
1354
1355(defun toggle-input-method (&optional arg)
f8ec20be
RS
1356 "Enable or disable multilingual text input method for the current buffer.
1357Only one input method can be enabled at any time in a given buffer.
1358
1359The normal action is to enable an input method if none was
1360enabled, and disable the current one otherwise. Which input method
1361to enable can be determined in various ways--either the one most
1362recently used, or the one specified by `default-input-method', or
1363as a last resort by reading the name of an input method in the
1364minibuffer.
1365
1366With a prefix argument, read an input method name with the minibuffer
1367and enable that one. The default is the most recent input method specified
f2979bdb 1368\(not including the currently active input method, if any)."
f8ec20be 1369
4ed46869 1370 (interactive "P")
7ddbb5bc
RS
1371 (if (and current-input-method (not arg))
1372 (inactivate-input-method)
1373 (let ((default (or (car input-method-history) default-input-method)))
1374 (if (and arg default (equal current-input-method default)
1375 (> (length input-method-history) 1))
1376 (setq default (nth 1 input-method-history)))
723a427a
KH
1377 (activate-input-method
1378 (if (or arg (not default))
7ddbb5bc
RS
1379 (progn
1380 (read-input-method-name
1381 (if default "Input method (default %s): " "Input method: " )
1382 default t))
723a427a 1383 default))
f4990970 1384 (unless default-input-method
d37ef0f6 1385 (prog1
f4990970
PA
1386 (setq default-input-method current-input-method)
1387 (when (interactive-p)
1388 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab
KH
1389
1390(defun describe-input-method (input-method)
2c395d56 1391 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1392 (interactive
1393 (list (read-input-method-name
1394 "Describe input method (default, current choice): ")))
78754934 1395 (if (and input-method (symbolp input-method))
4ef06f75 1396 (setq input-method (symbol-name input-method)))
43125c28
RS
1397 (help-setup-xref (list #'describe-input-method
1398 (or input-method current-input-method))
f80e2142
RS
1399 (interactive-p))
1400
d0b9c3ab
KH
1401 (if (null input-method)
1402 (describe-current-input-method)
464cc130
KH
1403 (let ((current current-input-method))
1404 (condition-case nil
1405 (progn
1406 (save-excursion
1407 (activate-input-method input-method)
1408 (describe-current-input-method))
1409 (activate-input-method current))
d37ef0f6 1410 (error
464cc130 1411 (activate-input-method current)
5f395df3
SM
1412 (help-setup-xref (list #'describe-input-method input-method)
1413 (interactive-p))
1414 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1415 (let ((elt (assoc input-method input-method-alist)))
1416 (princ (format
1417 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1418 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1419
1420(defun describe-current-input-method ()
f80e2142
RS
1421 "Describe the input method currently in use.
1422This is a subroutine for `describe-input-method'."
4ed46869
KH
1423 (if current-input-method
1424 (if (and (symbolp describe-current-input-method-function)
1425 (fboundp describe-current-input-method-function))
1426 (funcall describe-current-input-method-function)
1427 (message "No way to describe the current input method `%s'"
f2979bdb 1428 current-input-method)
4ed46869 1429 (ding))
d0b9c3ab 1430 (error "No input method is activated now")))
4ed46869 1431
d3459641 1432(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1433 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1434The input method selected last time is activated in minibuffer.
15b3e511 1435If optional second arg INITIAL-INPUT is non-nil, insert it in the minibuffer
d0b9c3ab
KH
1436initially.
1437Optional 3rd argument INPUT-METHOD specifies the input method
4ef06f75
KH
1438to be activated instead of the one selected last time. It is a symbol
1439or a string."
88d559ec
KH
1440 (setq input-method
1441 (or input-method
d3459641 1442 current-input-method
88d559ec
KH
1443 default-input-method
1444 (read-input-method-name "Input method: " nil t)))
3df60841 1445 (if (and input-method (symbolp input-method))
4ef06f75 1446 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1447 (let ((prev-input-method current-input-method))
1448 (unwind-protect
1449 (progn
1450 (activate-input-method input-method)
1451 (read-string prompt initial-input nil nil t))
1452 (activate-input-method prev-input-method))))
4ed46869
KH
1453
1454;; Variables to control behavior of input methods. All input methods
1455;; should react to these variables.
1456
8efc03e1
KH
1457(defcustom input-method-verbose-flag 'default
1458 "*A flag to control extra guidance given by input methods.
1459The value should be nil, t, `complex-only', or `default'.
4ed46869 1460
cb29dfb6 1461The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1462area. When you use the input method in the minibuffer, the guidance
1463is shown at the bottom short window (split from the existing window).
c27c4ed8 1464
8efc03e1
KH
1465If the value is t, extra guidance is always given, if the value is
1466nil, extra guidance is always suppressed.
1467
1468If the value is `complex-only', only complex input methods such as
1469`chinese-py' and `japanese' give extra guidance.
1470
1471If the value is `default', complex input methods always give extra
1472guidance, but simple input methods give it only when you are not in
1473the minibuffer.
1474
1475See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1476 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1477 (const complex-only) (const default))
42395763
RS
1478 :group 'mule)
1479
1480(defcustom input-method-highlight-flag t
1481 "*If this flag is non-nil, input methods highlight partially-entered text.
1482For instance, while you are in the middle of a Quail input method sequence,
1483the text inserted so far is temporarily underlined.
8efc03e1
KH
1484The underlining goes away when you finish or abort the input method sequence.
1485See also the variable `input-method-verbose-flag'."
42395763
RS
1486 :type 'boolean
1487 :group 'mule)
4ed46869
KH
1488
1489(defvar input-method-activate-hook nil
f17ccaee
KH
1490 "Normal hook run just after an input method is activated.
1491
1492The variable `current-input-method' keeps the input method name
1493just activated.")
4ed46869
KH
1494
1495(defvar input-method-inactivate-hook nil
f17ccaee
KH
1496 "Normal hook run just after an input method is inactivated.
1497
1498The variable `current-input-method' still keeps the input method name
4d0e6a11 1499just inactivated.")
4ed46869
KH
1500
1501(defvar input-method-after-insert-chunk-hook nil
1502 "Normal hook run just after an input method insert some chunk of text.")
1503
dccca980 1504(defvar input-method-exit-on-first-char nil
0b6cadff 1505 "This flag controls when an input method returns.
dccca980
KH
1506Usually, the input method does not return while there's a possibility
1507that it may find a different translation if a user types another key.
39e643e2
RS
1508But, it this flag is non-nil, the input method returns as soon as
1509the current key sequence gets long enough to have some valid translation.")
dccca980
KH
1510
1511(defvar input-method-use-echo-area nil
1512 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1513Usually, the input method inserts the intermediate key sequence,
1514or candidate translations corresponding to the sequence,
1515at point in the current buffer.
1516But, if this flag is non-nil, it displays them in echo area instead.")
dccca980 1517
723a427a
KH
1518(defvar input-method-exit-on-invalid-key nil
1519 "This flag controls the behaviour of an input method on invalid key input.
1520Usually, when a user types a key which doesn't start any character
1521handled by the input method, the key is handled by turning off the
e8dd0160 1522input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1523But, if this flag is non-nil, the input method is never back on.")
1524
4ed46869 1525\f
8efc03e1
KH
1526(defvar set-language-environment-hook nil
1527 "Normal hook run after some language environment is set.
1528
1529When you set some hook function here, that effect usually should not
1530be inherited to another language environment. So, you had better set
1531another function in `exit-language-environment-hook' (which see) to
1532cancel the effect.")
1533
1534(defvar exit-language-environment-hook nil
1535 "Normal hook run after exiting from some language environment.
1536When this hook is run, the variable `current-language-environment'
1537is still bound to the language environment being exited.
1538
e8dd0160 1539This hook is mainly used for canceling the effect of
8efc03e1
KH
1540`set-language-environment-hook' (which-see).")
1541
b0648a00
RS
1542(put 'setup-specified-language-environment 'apropos-inhibit t)
1543
15b3e511 1544(defun setup-specified-language-environment ()
f08adf27 1545 "Switch to a specified language environment."
15b3e511 1546 (interactive)
f850d782 1547 (let (language-name)
15b3e511
KH
1548 (if (and (symbolp last-command-event)
1549 (or (not (eq last-command-event 'Default))
1550 (setq last-command-event 'English))
f850d782 1551 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1552 (prog1
1553 (set-language-environment language-name)
1554 (customize-mark-as-set 'current-language-environment))
15b3e511 1555 (error "Bogus calling sequence"))))
4ed46869 1556
8861c593 1557(defcustom current-language-environment "English"
94d04df6 1558 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1559This variable should be set only with \\[customize], which is equivalent
1560to using the function `set-language-environment'."
94d04df6 1561 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1562 :set (lambda (symbol value) (set-language-environment value))
94d04df6
DL
1563 :get (lambda (x)
1564 (or (car-safe (assoc-ignore-case
1565 (if (symbolp current-language-environment)
1566 (symbol-name current-language-environment)
1567 current-language-environment)
1568 language-info-alist))
1569 "English"))
990a4108
MR
1570 ;; custom type will be updated with `set-language-info'.
1571 :type (if language-info-alist
1572 (cons 'choice (mapcar
1573 (lambda (lang)
756e055f
MR
1574 (list 'const lang))
1575 (sort (mapcar 'car language-info-alist) 'string<)))
990a4108 1576 'string)
8861c593 1577 :initialize 'custom-initialize-default
eb9fc9e6 1578 :group 'mule)
f850d782 1579
ddb5c041
KH
1580(defun reset-language-environment ()
1581 "Reset multilingual environment of Emacs to the default status.
1582
1583The default status is as follows:
1584
d37ef0f6 1585 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1586 The default coding system for process I/O is nil.
1587 The default value for the command `set-terminal-coding-system' is nil.
1588 The default value for the command `set-keyboard-coding-system' is nil.
1589
1590 The order of priorities of coding categories and the coding system
1591 bound to each category are as follows
1592 coding category coding system
1593 --------------------------------------------------
ddb5c041 1594 coding-category-iso-8-1 iso-latin-1
370bd9cf
KH
1595 coding-category-iso-8-2 iso-latin-1
1596 coding-category-utf-8 mule-utf-8
b510f863
KH
1597 coding-category-utf-16-be mule-utf-16be-with-signature
1598 coding-category-utf-16-le mule-utf-16le-with-signature
ddb5c041
KH
1599 coding-category-iso-7-tight iso-2022-jp
1600 coding-category-iso-7 iso-2022-7bit
1601 coding-category-iso-7-else iso-2022-7bit-lock
1602 coding-category-iso-8-else iso-2022-8bit-ss2
1603 coding-category-emacs-mule emacs-mule
1604 coding-category-raw-text raw-text
1605 coding-category-sjis japanese-shift-jis
1606 coding-category-big5 chinese-big5
1607 coding-category-ccl nil
370bd9cf 1608 coding-category-binary no-conversion"
ddb5c041
KH
1609 (interactive)
1610 ;; This function formerly set default-enable-multibyte-characters to t,
1611 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1612
1613 (setq coding-category-iso-7-tight 'iso-2022-jp
1614 coding-category-iso-7 'iso-2022-7bit
1615 coding-category-iso-8-1 'iso-latin-1
1616 coding-category-iso-8-2 'iso-latin-1
1617 coding-category-iso-7-else 'iso-2022-7bit-lock
1618 coding-category-iso-8-else 'iso-2022-8bit-ss2
1619 coding-category-emacs-mule 'emacs-mule
1620 coding-category-raw-text 'raw-text
1621 coding-category-sjis 'japanese-shift-jis
1622 coding-category-big5 'chinese-big5
b510f863
KH
1623 coding-category-utf-16-be 'mule-utf-16be-with-signature
1624 coding-category-utf-16-le 'mule-utf-16le-with-signature
b909cfc0
KH
1625 coding-category-utf-8 'mule-utf-8
1626 coding-category-ccl nil
1627 coding-category-binary 'no-conversion)
ddb5c041
KH
1628
1629 (set-coding-priority
1630 '(coding-category-iso-8-1
1631 coding-category-iso-8-2
370bd9cf
KH
1632 coding-category-utf-8
1633 coding-category-utf-16-be
1634 coding-category-utf-16-le
ddb5c041
KH
1635 coding-category-iso-7-tight
1636 coding-category-iso-7
1637 coding-category-iso-7-else
1638 coding-category-iso-8-else
d37ef0f6 1639 coding-category-emacs-mule
ddb5c041
KH
1640 coding-category-raw-text
1641 coding-category-sjis
1642 coding-category-big5
1643 coding-category-ccl
370bd9cf 1644 coding-category-binary))
ddb5c041 1645
91693d18
KH
1646 (update-coding-systems-internal)
1647
ddb5c041 1648 (set-default-coding-systems nil)
b5edd1d1 1649 (setq default-sendmail-coding-system 'iso-latin-1)
787caf99 1650 (setq default-file-name-coding-system 'iso-latin-1)
1d77e15a
JR
1651 ;; Preserve eol-type from existing default-process-coding-systems.
1652 ;; On non-unix-like systems in particular, these may have been set
1653 ;; carefully by the user, or by the startup code, to deal with the
1654 ;; users shell appropriately, so should not be altered by changing
1655 ;; language environment.
1656 (let ((output-coding
a099a2ff
JR
1657 ;; When bootstrapping, coding-systems are not defined yet, so
1658 ;; we need to catch the error from check-coding-system.
d37ef0f6 1659 (condition-case nil
a099a2ff
JR
1660 (coding-system-change-text-conversion
1661 (car default-process-coding-system) 'undecided)
1662 (coding-system-error 'undecided)))
1d77e15a 1663 (input-coding
a099a2ff
JR
1664 (condition-case nil
1665 (coding-system-change-text-conversion
1666 (cdr default-process-coding-system) 'iso-latin-1)
1667 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1668 (setq default-process-coding-system
1669 (cons output-coding input-coding)))
b5edd1d1 1670
ddb5c041
KH
1671 ;; Don't alter the terminal and keyboard coding systems here.
1672 ;; The terminal still supports the same coding system
1673 ;; that it supported a minute ago.
1b8dc791
SM
1674 ;; (set-terminal-coding-system-internal nil)
1675 ;; (set-keyboard-coding-system-internal nil)
ddb5c041
KH
1676
1677 (setq nonascii-translation-table nil
1678 nonascii-insert-offset 0))
1679
0c47a7c8
KH
1680(reset-language-environment)
1681
40c81f74
PE
1682(defun set-display-table-and-terminal-coding-system (language-name)
1683 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1684 (let ((coding (get-language-info language-name 'unibyte-display)))
1685 (if coding
1686 (standard-display-european-internal)
a67ae60e
EZ
1687 ;; The following 2 lines undo the 8-bit display that we set up
1688 ;; in standard-display-european-internal, which see. This is in
1689 ;; case the user has used standard-display-european earlier in
1690 ;; this session. (The MS-DOS port doesn't use that setup, so it
1691 ;; doesn't need to undo it.)
d73a7bb8
JPW
1692 (when standard-display-table
1693 (dotimes (i 128)
b221615b 1694 (aset standard-display-table (+ i 128) nil))))
40c81f74 1695 (or (eq window-system 'pc)
d73a7bb8 1696 (set-terminal-coding-system coding))))
40c81f74 1697
166246f7 1698(defun set-language-environment (language-name)
6c05d680
RS
1699 "Set up multi-lingual environment for using LANGUAGE-NAME.
1700This sets the coding system priority and the default input method
8861c593
RS
1701and sometimes other things. LANGUAGE-NAME should be a string
1702which is the name of a language environment. For example, \"Latin-1\"
1703specifies the character set for the major languages of Western Europe."
8efc03e1 1704 (interactive (list (read-language-name
ddb5c041 1705 nil
8efc03e1 1706 "Set language environment (default, English): ")))
4ef06f75
KH
1707 (if language-name
1708 (if (symbolp language-name)
1709 (setq language-name (symbol-name language-name)))
1710 (setq language-name "English"))
ddb5c041 1711 (or (assoc-ignore-case language-name language-info-alist)
f850d782 1712 (error "Language environment not defined: %S" language-name))
8efc03e1
KH
1713 (if current-language-environment
1714 (let ((func (get-language-info current-language-environment
1715 'exit-function)))
e63645c2 1716 (run-hooks 'exit-language-environment-hook)
5f395df3 1717 (if (functionp func) (funcall func))))
03c35c83
EZ
1718 (let ((default-eol-type (coding-system-eol-type
1719 default-buffer-file-coding-system)))
1720 (reset-language-environment)
ddb5c041 1721
e522f07b 1722 ;; The features might set up coding systems.
9c20a8d5
DL
1723 (let ((required-features (get-language-info language-name 'features)))
1724 (while required-features
1725 (require (car required-features))
1726 (setq required-features (cdr required-features))))
1727
03c35c83
EZ
1728 (setq current-language-environment language-name)
1729 (set-language-environment-coding-systems language-name default-eol-type))
ddb5c041
KH
1730 (let ((input-method (get-language-info language-name 'input-method)))
1731 (when input-method
1732 (setq default-input-method input-method)
1733 (if input-method-history
1734 (setq input-method-history
1735 (cons input-method
1736 (delete input-method input-method-history))))))
ec241f58
EZ
1737 (let ((nonascii (get-language-info language-name 'nonascii-translation))
1738 (dos-table
4e2ac2d9
EZ
1739 (if (eq window-system 'pc)
1740 (intern
d9c0a50e 1741 (format "cp%d-nonascii-translation-table" dos-codepage)))))
03c35c83
EZ
1742 (cond
1743 ((char-table-p nonascii)
1744 (setq nonascii-translation-table nonascii))
ec241f58 1745 ((and (eq window-system 'pc) (boundp dos-table))
03c35c83
EZ
1746 ;; DOS terminals' default is to use a special non-ASCII translation
1747 ;; table as appropriate for the installed codepage.
ec241f58 1748 (setq nonascii-translation-table (symbol-value dos-table)))
03c35c83
EZ
1749 ((charsetp nonascii)
1750 (setq nonascii-insert-offset (- (make-char nonascii) 128)))))
ddb5c041 1751
63283a8f 1752 ;; Unibyte setups if necessary.
ddb5c041 1753 (unless default-enable-multibyte-characters
63283a8f 1754 ;; Syntax and case table.
ddb5c041
KH
1755 (let ((syntax (get-language-info language-name 'unibyte-syntax)))
1756 (if syntax
1757 (let ((set-case-syntax-set-multibyte nil))
63283a8f
KH
1758 (load syntax nil t))
1759 ;; No information for syntax and case. Reset to the defaults.
1760 (let ((syntax-table (standard-syntax-table))
1761 (case-table (standard-case-table))
03c35c83 1762 (ch (if (eq window-system 'pc) 128 160)))
63283a8f
KH
1763 (while (< ch 256)
1764 (modify-syntax-entry ch " " syntax-table)
1765 (aset case-table ch ch)
1766 (setq ch (1+ ch)))
1767 (set-char-table-extra-slot case-table 0 nil)
1768 (set-char-table-extra-slot case-table 1 nil)
1769 (set-char-table-extra-slot case-table 2 nil))
1770 (set-standard-case-table (standard-case-table))
1771 (let ((list (buffer-list)))
1772 (while list
1773 (with-current-buffer (car list)
1774 (set-case-table (standard-case-table)))
1775 (setq list (cdr list))))))
40c81f74 1776 (set-display-table-and-terminal-coding-system language-name))
ddb5c041
KH
1777
1778 (let ((required-features (get-language-info language-name 'features)))
1779 (while required-features
1780 (require (car required-features))
1781 (setq required-features (cdr required-features))))
1782 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1783 (if (functionp func)
ddb5c041 1784 (funcall func)))
8efc03e1 1785 (run-hooks 'set-language-environment-hook)
f850d782 1786 (force-mode-line-update t))
4ed46869 1787
51a8fc1d
RS
1788(defun standard-display-european-internal ()
1789 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1790 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1791 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1792 ;; the native font, and codes 160 and 146 stand for something very
1793 ;; different there.
1794 (or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
1795 (progn
1796 ;; Make non-line-break space display as a plain space.
1797 ;; Most X fonts do the wrong thing for code 160.
1798 (aset standard-display-table 160 [32])
1492f7ac
DL
1799 ;; With luck, non-Latin-1 fonts are more recent and so don't
1800 ;; have this bug.
1801 (aset standard-display-table 2208 [32]) ; Latin-1 NBSP
1802 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1803 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1804 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1805 ;; U+2019, normally from the windows-1252 character set. XFree 4
1806 ;; fonts probably have the appropriate glyph at this position,
1807 ;; so they could use standard-display-8bit. It's better to use a
1808 ;; proper windows-1252 coding system. --fx]
03c35c83
EZ
1809 (aset standard-display-table 146 [39]))))
1810
1811(defun set-language-environment-coding-systems (language-name
1812 &optional eol-type)
1813 "Do various coding system setups for language environment LANGUAGE-NAME.
1814
1815The optional arg EOL-TYPE specifies the eol-type of the default value
d37ef0f6 1816of `buffer-file-coding-system' set by this function."
54b226f7
KH
1817 (let* ((priority (get-language-info language-name 'coding-priority))
1818 (default-coding (car priority)))
1819 (if priority
1820 (let ((categories (mapcar 'coding-system-category priority)))
03c35c83
EZ
1821 (set-default-coding-systems
1822 (if (memq eol-type '(0 1 2 unix dos mac))
1823 (coding-system-change-eol-conversion default-coding eol-type)
1824 default-coding))
46babb23 1825 (setq default-sendmail-coding-system default-coding)
54b226f7
KH
1826 (set-coding-priority categories)
1827 (while priority
1828 (set (car categories) (car priority))
1829 (setq priority (cdr priority) categories (cdr categories)))
45d08cb2 1830 (update-coding-systems-internal)))))
54b226f7 1831
4ed46869 1832(defsubst princ-list (&rest args)
d37ef0f6 1833 "Print all arguments with `princ', then print \"\n\"."
4ed46869
KH
1834 (while args (princ (car args)) (setq args (cdr args)))
1835 (princ "\n"))
1836
b0648a00
RS
1837(put 'describe-specified-language-support 'apropos-inhibit t)
1838
48082651 1839;; Print a language specific information such as input methods,
13e82c04 1840;; charsets, and coding systems. This function is intended to be
48082651 1841;; called from the menu:
281d03ec 1842;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
1843;; and should not run it by `M-x describe-current-input-method-function'.
1844(defun describe-specified-language-support ()
96db204a 1845 "Describe how Emacs supports the specified language environment."
48082651 1846 (interactive)
281d03ec 1847 (let (language-name)
48082651 1848 (if (not (and (symbolp last-command-event)
cda74479
DL
1849 (or (not (eq last-command-event 'Default))
1850 (setq last-command-event 'English))
281d03ec 1851 (setq language-name (symbol-name last-command-event))))
48082651 1852 (error "Bogus calling sequence"))
281d03ec
RS
1853 (describe-language-environment language-name)))
1854
1855(defun describe-language-environment (language-name)
1856 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
1857 (interactive
1858 (list (read-language-name
1859 'documentation
8adfa8be 1860 "Describe language environment (default, current choice): ")))
f850d782
RS
1861 (if (null language-name)
1862 (setq language-name current-language-environment))
281d03ec
RS
1863 (if (or (null language-name)
1864 (null (get-language-info language-name 'documentation)))
1865 (error "No documentation for the specified language"))
4ef06f75
KH
1866 (if (symbolp language-name)
1867 (setq language-name (symbol-name language-name)))
ef5a4730
KH
1868 (dolist (feature (get-language-info language-name 'features))
1869 (require feature))
464cc130
KH
1870 (let ((doc (get-language-info language-name 'documentation))
1871 pos)
c3034e84
SM
1872 (help-setup-xref (list #'describe-language-environment language-name)
1873 (interactive-p))
1874 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1875 (save-excursion
1876 (set-buffer standard-output)
1877 (insert language-name " language environment\n\n")
1878 (if (stringp doc)
1879 (insert doc "\n\n"))
e036b0a6
KH
1880 (condition-case nil
1881 (let ((str (eval (get-language-info language-name 'sample-text))))
1882 (if (stringp str)
1883 (insert "Sample text:\n " str "\n\n")))
1884 (error nil))
464cc130
KH
1885 (let ((input-method (get-language-info language-name 'input-method))
1886 (l (copy-sequence input-method-alist)))
1887 (insert "Input methods")
1888 (when input-method
1889 (insert " (default, " input-method ")")
1890 (setq input-method (assoc input-method input-method-alist))
1891 (setq l (cons input-method (delete input-method l))))
1892 (insert ":\n")
48082651 1893 (while l
464cc130
KH
1894 (when (string= language-name (nth 1 (car l)))
1895 (insert " " (car (car l)))
1896 (search-backward (car (car l)))
467412aa 1897 (help-xref-button 0 'help-input-method (car (car l)))
464cc130 1898 (goto-char (point-max))
2fa7e202
KH
1899 (insert " (\""
1900 (if (stringp (nth 3 (car l)))
1901 (nth 3 (car l))
1902 (car (nth 3 (car l))))
1903 "\" in mode line)\n"))
464cc130
KH
1904 (setq l (cdr l)))
1905 (insert "\n"))
1906 (insert "Character sets:\n")
1907 (let ((l (get-language-info language-name 'charset)))
1908 (if (null l)
1909 (insert " nothing specific to " language-name "\n")
1910 (while l
1911 (insert " " (symbol-name (car l)))
1912 (search-backward (symbol-name (car l)))
467412aa 1913 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
1914 (goto-char (point-max))
1915 (insert ": " (charset-description (car l)) "\n")
1916 (setq l (cdr l)))))
1917 (insert "\n")
1918 (insert "Coding systems:\n")
1919 (let ((l (get-language-info language-name 'coding-system)))
1920 (if (null l)
1921 (insert " nothing specific to " language-name "\n")
1922 (while l
1923 (insert " " (symbol-name (car l)))
1924 (search-backward (symbol-name (car l)))
467412aa 1925 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
1926 (goto-char (point-max))
1927 (insert " (`"
1928 (coding-system-mnemonic (car l))
1929 "' in mode line):\n\t"
1930 (coding-system-doc-string (car l))
1931 "\n")
1932 (let ((aliases (coding-system-get (car l)
1933 'alias-coding-systems)))
1934 (when aliases
1935 (insert "\t(alias:")
1936 (while aliases
1937 (insert " " (symbol-name (car aliases)))
1938 (setq aliases (cdr aliases)))
1939 (insert ")\n")))
c3034e84 1940 (setq l (cdr l)))))))))
4ed46869 1941\f
40c81f74
PE
1942;;; Locales.
1943
0d7c5bb9
DL
1944(defvar locale-translation-file-name nil
1945 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 1946
5f395df3
SM
1947;; The following definitions might as well be marked as constants and
1948;; purecopied, since they're normally used on startup, and probably
1949;; should reflect the facilities of the base Emacs.
1950(defconst locale-language-names
1951 (purecopy
1952 '(
40c81f74
PE
1953 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
1954 ;; as specified in the Single Unix Spec, Version 2.
1955 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
1956 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
1957 ;; see Internet RFC 2165 (1997-06) and
1958 ;; http://www.evertype.com/standards/iso639/iso639-en.html
1959 ;; TERRITORY is a country code taken from ISO 3166
1960 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 1961 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 1962
40c81f74
PE
1963 ; aa Afar
1964 ; ab Abkhazian
6ececc4d 1965 ("af" . "Latin-1") ; Afrikaans
40c81f74 1966 ("am" . "Ethiopic") ; Amharic
5f395df3 1967 ; ar Arabic glibc uses 8859-6
40c81f74
PE
1968 ; as Assamese
1969 ; ay Aymara
1970 ; az Azerbaijani
1971 ; ba Bashkir
e522f07b 1972 ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
9c20a8d5 1973 ("bg" . "Bulgarian") ; Bulgarian
40c81f74
PE
1974 ; bh Bihari
1975 ; bi Bislama
1976 ; bn Bengali, Bangla
1977 ("bo" . "Tibetan")
1978 ("br" . "Latin-1") ; Breton
d37ef0f6 1979 ("bs" . "Latin-2") ; Bosnian
40c81f74
PE
1980 ("ca" . "Latin-1") ; Catalan
1981 ; co Corsican
1982 ("cs" . "Czech")
e76ef161 1983 ("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]
40c81f74
PE
1984 ("da" . "Latin-1") ; Danish
1985 ("de" . "German")
1986 ; dz Bhutani
1987 ("el" . "Greek")
6ececc4d 1988 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
e11cf111
DL
1989 ;; That's actually what the GNU locales define, modulo things like
1990 ;; en_IN -- fx.
6ececc4d 1991 ("en" . "Latin-1") ; English
40c81f74 1992 ("eo" . "Latin-3") ; Esperanto
8c4b6822 1993 ("es" . "Spanish")
40c81f74
PE
1994 ("et" . "Latin-4") ; Estonian
1995 ("eu" . "Latin-1") ; Basque
d37ef0f6 1996 ; fa Persian glibc uses utf-8
40c81f74
PE
1997 ("fi" . "Latin-1") ; Finnish
1998 ; fj Fiji
1999 ("fo" . "Latin-1") ; Faroese
8d738cfd 2000 ("fr" . "French") ; French
40c81f74 2001 ("fy" . "Latin-1") ; Frisian
6ececc4d
PE
2002 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
2003 ("gd" . "Latin-1") ; Scots Gaelic
2004 ("gl" . "Latin-1") ; Galician
40c81f74
PE
2005 ; gn Guarani
2006 ; gu Gujarati
d37ef0f6 2007 ("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
40c81f74
PE
2008 ; ha Hausa
2009 ("he" . "Hebrew")
5f395df3 2010 ("hi" . "Devanagari") ; Hindi glibc uses utf-8
03cb0324 2011 ("hr" . "Croatian") ; Croatian
40c81f74
PE
2012 ("hu" . "Latin-2") ; Hungarian
2013 ; hy Armenian
2014 ; ia Interlingua
2015 ("id" . "Latin-1") ; Indonesian
2016 ; ie Interlingue
2017 ; ik Inupiak
2018 ("is" . "Latin-1") ; Icelandic
e76ef161 2019 ("it" . "Italian") ; Italian
40c81f74
PE
2020 ; iu Inuktitut
2021 ("ja" . "Japanese")
2022 ; jw Javanese
9c20a8d5 2023 ("ka" . "Georgian") ; Georgian
40c81f74 2024 ; kk Kazakh
6ececc4d 2025 ("kl" . "Latin-1") ; Greenlandic
40c81f74
PE
2026 ; km Cambodian
2027 ; kn Kannada
2028 ("ko" . "Korean")
2029 ; ks Kashmiri
2030 ; ku Kurdish
5f395df3 2031 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
2032 ; ky Kirghiz
2033 ("la" . "Latin-1") ; Latin
5f395df3 2034 ("lb" . "Latin-1") ; Luxemburgish
40c81f74
PE
2035 ; ln Lingala
2036 ("lo" . "Lao") ; Laothian
9c20a8d5
DL
2037 ("lt" . "Lithuanian")
2038 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 2039 ; mg Malagasy
5f395df3 2040 ("mi" . "Latin-7") ; Maori
f1282c7f 2041 ("mk" . "Cyrillic-ISO") ; Macedonian
40c81f74
PE
2042 ; ml Malayalam
2043 ; mn Mongolian
2044 ; mo Moldavian
5f395df3
SM
2045 ("mr" . "Devanagari") ; Marathi glibc uses utf-8
2046 ("ms" . "Latin-1") ; Malay
40c81f74
PE
2047 ("mt" . "Latin-3") ; Maltese
2048 ; my Burmese
2049 ; na Nauru
2050 ("ne" . "Devanagari") ; Nepali
8c4b6822 2051 ("nl" . "Dutch")
40c81f74 2052 ("no" . "Latin-1") ; Norwegian
5f395df3 2053 ("oc" . "Latin-1") ; Occitan
40c81f74
PE
2054 ; om (Afan) Oromo
2055 ; or Oriya
2056 ; pa Punjabi
2057 ("pl" . "Latin-2") ; Polish
2058 ; ps Pashto, Pushto
2059 ("pt" . "Latin-1") ; Portuguese
2060 ; qu Quechua
6ececc4d 2061 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74
PE
2062 ; rn Kirundi
2063 ("ro" . "Romanian")
e76ef161 2064 ("ru.*[_.]koi8" . "Russian")
f1282c7f 2065 ("ru" . "Cyrillic-ISO") ; Russian
40c81f74
PE
2066 ; rw Kinyarwanda
2067 ("sa" . "Devanagari") ; Sanskrit
2068 ; sd Sindhi
5f395df3 2069 ; se Northern Sami
40c81f74
PE
2070 ; sg Sangho
2071 ("sh" . "Latin-2") ; Serbo-Croatian
2072 ; si Sinhalese
2073 ("sk" . "Slovak")
2074 ("sl" . "Slovenian")
2075 ; sm Samoan
2076 ; sn Shona
2077 ; so Somali
6ececc4d 2078 ("sq" . "Latin-1") ; Albanian
40c81f74 2079 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
e522f07b 2080 ("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
40c81f74
PE
2081 ; ss Siswati
2082 ; st Sesotho
2083 ; su Sundanese
03cb0324 2084 ("sv" . "Swedish") ; Swedish
40c81f74 2085 ("sw" . "Latin-1") ; Swahili
5f395df3
SM
2086 ; ta Tamil glibc uses utf-8
2087 ; te Telugu glibc uses utf-8
e522f07b 2088 ("tg" . "Tajik")
40c81f74
PE
2089 ("th" . "Thai")
2090 ; ti Tigrinya
2091 ; tk Turkmen
6ececc4d 2092 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
2093 ; tn Setswana
2094 ; to Tonga
e76ef161 2095 ("tr" . "Turkish")
40c81f74
PE
2096 ; ts Tsonga
2097 ; tt Tatar
2098 ; tw Twi
2099 ; ug Uighur
9c20a8d5 2100 ("uk" . "Ukrainian") ; Ukrainian
5f395df3
SM
2101 ; ur Urdu glibc uses utf-8
2102 ("uz" . "Latin-1") ; Uzbek
2103 ("vi" . "Vietnamese") ; glibc uses utf-8
40c81f74 2104 ; vo Volapuk
d37ef0f6 2105 ("wa" . "Latin-1") ; Walloon
40c81f74
PE
2106 ; wo Wolof
2107 ; xh Xhosa
9c20a8d5 2108 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2109 ; yo Yoruba
2110 ; za Zhuang
5f395df3
SM
2111
2112 ; glibc:
2113 ; zh_CN.GB18030/GB18030 \
2114 ; zh_CN.GBK/GBK \
2115 ; zh_HK/BIG5-HKSCS \
5f395df3 2116
40c81f74
PE
2117 ("zh.*[._]big5" . "Chinese-BIG5")
2118 ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
e76ef161
DL
2119 ("zh_tw" . "Chinese-CNS") ; glibc uses big5
2120 ("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
40c81f74
PE
2121 ("zh" . "Chinese-GB")
2122 ; zu Zulu
2123
2124 ;; ISO standard locales
2125 ("c$" . "ASCII")
2126 ("posix$" . "ASCII")
2127
40c81f74
PE
2128 ;; The "IPA" Emacs language environment does not correspond
2129 ;; to any ISO 639 code, so let it stand for itself.
2130 ("ipa$" . "IPA")
2131
2132 ;; Nonstandard or obsolete language codes
2133 ("cz" . "Czech") ; e.g. Solaris 2.6
2134 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2135 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2136 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2137 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2138 ("jp" . "Japanese") ; e.g. MS Windows
86d9e628
JR
2139 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
2140 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
367ca50f 2141 ))
40c81f74 2142 "List of pairs of locale regexps and language names.
6ececc4d
PE
2143The first element whose locale regexp matches the start of a downcased locale
2144specifies the language name corresponding to that locale.
40c81f74
PE
2145If the language name is nil, there is no corresponding language environment.")
2146
5f395df3
SM
2147(defconst locale-charset-language-names
2148 (purecopy
2149 '((".*8859[-_]?1\\>" . "Latin-1")
2150 (".*8859[-_]?2\\>" . "Latin-2")
2151 (".*8859[-_]?3\\>" . "Latin-3")
2152 (".*8859[-_]?4\\>" . "Latin-4")
2153 (".*8859[-_]?9\\>" . "Latin-5")
2154 (".*8859[-_]?14\\>" . "Latin-8")
2155 (".*8859[-_]?15\\>" . "Latin-9")
e522f07b
DL
2156 (".*utf\\(-?8\\)\\>" . "UTF-8")
2157 ;; utf-8@euro exists, so put this last. (@euro really specifies
2158 ;; the currency, rather than the charset.)
2159 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2160 "List of pairs of locale regexps and charset language names.
2161The first element whose locale regexp matches the start of a downcased locale
2162specifies the language name whose charsets corresponds to that locale.
2163This language name is used if its charsets disagree with the charsets of
2164the language name that would otherwise be used for this locale.")
2165
5f395df3
SM
2166(defconst locale-preferred-coding-systems
2167 (purecopy
2168 '(("ja.*[._]euc" . japanese-iso-8bit)
2169 ("ja.*[._]jis7" . iso-2022-jp)
2170 ("ja.*[._]pck" . japanese-shift-jis)
2171 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2172 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
9c20a8d5 2173 (".*[._]utf" . utf-8)))
6ececc4d
PE
2174 "List of pairs of locale regexps and preferred coding systems.
2175The first element whose locale regexp matches the start of a downcased locale
2176specifies the coding system to prefer when using that locale.")
40c81f74
PE
2177
2178(defun locale-name-match (key alist)
2179 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2180Return the value corresponding to the first regexp that matches the
2181start of KEY, or nil if there is no match."
2182 (let (element)
2183 (while (and alist (not element))
ddf6b836 2184 (if (string-match (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2185 (setq element (car alist)))
2186 (setq alist (cdr alist)))
2187 (cdr element)))
2188
758f07de 2189(defun set-locale-environment (&optional locale-name)
40c81f74 2190 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2191This sets the language environment, the coding system priority,
2192the default input method and sometimes other things.
2193
2194LOCALE-NAME should be a string
40c81f74
PE
2195which is the name of a locale supported by the system;
2196often it is of the form xx_XX.CODE, where xx is a language,
2197XX is a country, and CODE specifies a character set and coding system.
2198For example, the locale name \"ja_JP.EUC\" might name a locale
2199for Japanese in Japan using the `japanese-iso-8bit' coding-system.
2200
758f07de 2201If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2202variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2203
2204The locale names supported by your system can typically be found in a
0812c1e8 2205directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2206will be translated according to the table specified by
0812c1e8
DL
2207`locale-translation-file-name'.
2208
2209See also `locale-charset-language-names', `locale-language-names',
2210`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de 2211 (interactive "sSet environment for locale: ")
a1506d29 2212
0d7c5bb9
DL
2213 ;; Do this at runtime for the sake of binaries possibly transported
2214 ;; to a system without X.
2215 (setq locale-translation-file-name
2216 (let ((files
2217 '("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
2218 "/usr/X11R6/lib/X11/locale/locale.alias" ; e.g. RedHat 4.2
2219 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2220 ;;
2221 ;; The following name appears after the X-related names above,
2222 ;; since the X-related names are what X actually uses.
2223 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2224 )))
2225 (while (and files (not (file-exists-p (car files))))
2226 (setq files (cdr files)))
2227 (car files)))
2228
7008ccac
GM
2229 (let ((locale locale-name))
2230
2231 (unless locale
2232 ;; Use the first of these three environment variables
2233 ;; that has a nonempty value.
2234 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
db6bd804
DL
2235 (while (and vars
2236 (= 0 (length locale))) ; nil or empty string
2237 (setq locale (getenv (pop vars))))))
7008ccac
GM
2238
2239 (when locale
2240
2241 ;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
2242 ;; using the translation file that many systems have.
2243 (when locale-translation-file-name
2244 (with-temp-buffer
2245 (insert-file-contents locale-translation-file-name)
2246 (when (re-search-forward
2247 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2248 (setq locale (buffer-substring (point) (line-end-position))))))
2249
2250 ;; Leave the system locales alone if the caller did not specify
2251 ;; an explicit locale name, as their defaults are set from
2252 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2253 ;; want to set them to the same value as LC_CTYPE.
2254 (when locale-name
2255 (setq system-messages-locale locale)
2256 (setq system-time-locale locale))
2257
2258 (setq locale (downcase locale))
2259
2260 (let ((language-name
2261 (locale-name-match locale locale-language-names))
2262 (charset-language-name
2263 (locale-name-match locale locale-charset-language-names))
2264 (coding-system
2265 (locale-name-match locale locale-preferred-coding-systems)))
2266
5f395df3 2267 ;; Give preference to charset-language-name over language-name.
7008ccac
GM
2268 (if (and charset-language-name
2269 (not
2270 (equal (get-language-info language-name 'charset)
2271 (get-language-info charset-language-name 'charset))))
2272 (setq language-name charset-language-name))
2273
2274 (when language-name
2275
2276 ;; Set up for this character set. This is now the right way
2277 ;; to do it for both unibyte and multibyte modes.
2278 (set-language-environment language-name)
2279
2280 ;; If default-enable-multibyte-characters is nil,
2281 ;; we are using single-byte characters,
2282 ;; so the display table and terminal coding system are irrelevant.
2283 (when default-enable-multibyte-characters
2284 (set-display-table-and-terminal-coding-system language-name))
2285
166ce29f
DL
2286 ;; Set the `keyboard-coding-system' if appropriate (tty
2287 ;; only). At least X and MS Windows can generate
2288 ;; multilingual input.
2289 (unless window-system
2290 (let ((kcs (or coding-system
2291 (car (get-language-info language-name
2292 'coding-system)))))
2293 (if kcs (set-keyboard-coding-system kcs))))
ddf6b836 2294
7008ccac
GM
2295 (setq locale-coding-system
2296 (car (get-language-info language-name 'coding-priority))))
2297
2298 (when coding-system
2299 (prefer-coding-system coding-system)
e76ef161
DL
2300 (setq locale-coding-system coding-system))))
2301
2302 ;; Default to A4 paper if we're not in a C, POSIX or US locale.
2303 ;; (See comments in Flanginfo.)
2304 (let ((locale locale)
2305 (paper (langinfo 'paper)))
2306 (if paper
2307 ;; This will always be null at the time of writing.
2308 (cond
2309 ((equal paper '(216 279))
2310 (setq ps-paper-type 'letter))
2311 ((equal paper '(210 297))
2312 (setq ps-paper-type 'a4)))
2313 (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2314 (while (and vars (= 0 (length locale)))
2315 (setq locale (getenv (pop vars)))))
2316 (when locale
2317 ;; As of glibc 2.2.5, these are the only US Letter locales,
2318 ;; and the rest are A4.
2319 (setq ps-paper-type
2320 (or (locale-name-match locale '(("c$" . letter)
2321 ("posix$" . letter)
2322 (".._us" . letter)
2323 (".._pr" . letter)
2324 (".._ca" . letter)))
2325 'a4))))))
2326 nil)
40c81f74 2327\f
4ed46869
KH
2328;;; Charset property
2329
251d4f4b 2330(defun get-charset-property (charset propname)
4ed46869
KH
2331 "Return the value of CHARSET's PROPNAME property.
2332This is the last value stored with
96db204a 2333 (put-charset-property CHARSET PROPNAME VALUE)."
251d4f4b
KH
2334 (and (not (eq charset 'composition))
2335 (plist-get (charset-plist charset) propname)))
4ed46869 2336
251d4f4b 2337(defun put-charset-property (charset propname value)
4ed46869
KH
2338 "Store CHARSETS's PROPNAME property with value VALUE.
2339It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
1300d43f
KH
2340 (or (eq charset 'composition)
2341 (set-charset-plist charset
2342 (plist-put (charset-plist charset) propname value))))
4ed46869
KH
2343
2344;;; Character code property
2345(put 'char-code-property-table 'char-table-extra-slots 0)
2346
2347(defvar char-code-property-table
2348 (make-char-table 'char-code-property-table)
2349 "Char-table containing a property list of each character code.
2350
2351See also the documentation of `get-char-code-property' and
96db204a 2352`put-char-code-property'.")
4ed46869
KH
2353
2354(defun get-char-code-property (char propname)
2355 "Return the value of CHAR's PROPNAME property in `char-code-property-table'."
2356 (let ((plist (aref char-code-property-table char)))
2357 (if (listp plist)
2358 (car (cdr (memq propname plist))))))
2359
2360(defun put-char-code-property (char propname value)
2361 "Store CHAR's PROPNAME property with VALUE in `char-code-property-table'.
2362It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
2363 (let ((plist (aref char-code-property-table char)))
2364 (if plist
2365 (let ((slot (memq propname plist)))
2366 (if slot
2367 (setcar (cdr slot) value)
2368 (nconc plist (list propname value))))
2369 (aset char-code-property-table char (list propname value)))))
2370
a127b764
KH
2371\f
2372;; Pretty description of encoded string
2373
2374;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2375(defvar iso-2022-control-alist
2376 '((?\x1b . "ESC")
2377 (?\x0e . "SO")
2378 (?\x0f . "SI")
2379 (?\x8e . "SS2")
2380 (?\x8f . "SS3")
2381 (?\x9b . "CSI")))
2382
2383(defun encoded-string-description (str coding-system)
2384 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2385 (setq str (string-as-unibyte str))
993b2a7d 2386 (mapconcat
8c9d55a9 2387 (if (and coding-system (eq (coding-system-type coding-system) 2))
993b2a7d
KH
2388 ;; Try to get a pretty description for ISO 2022 escape sequences.
2389 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
2390 (format "%02X" x))))
5de75f53 2391 (function (lambda (x) (format "0x%02X" x))))
993b2a7d 2392 str " "))
a127b764
KH
2393
2394(defun encode-coding-char (char coding-system)
2395 "Encode CHAR by CODING-SYSTEM and return the resulting string.
2396If CODING-SYSTEM can't safely encode CHAR, return nil."
bd953173
KH
2397 (let ((str1 (string-as-multibyte (char-to-string char)))
2398 (str2 (string-as-multibyte (make-string 2 char)))
a89f541b
KH
2399 (safe-chars (and coding-system
2400 (coding-system-get coding-system 'safe-chars)))
8c9d55a9 2401 (charset (char-charset char))
a127b764 2402 enc1 enc2 i1 i2)
a89f541b 2403 (when (or (eq safe-chars t)
8c9d55a9 2404 (eq charset 'ascii)
a89f541b 2405 (and safe-chars (aref safe-chars char)))
a127b764
KH
2406 ;; We must find the encoded string of CHAR. But, just encoding
2407 ;; CHAR will put extra control sequences (usually to designate
2408 ;; ASCII charaset) at the tail if type of CODING is ISO 2022.
2409 ;; To exclude such tailing bytes, we at first encode one-char
2410 ;; string and two-char string, then check how many bytes at the
2411 ;; tail of both encoded strings are the same.
2412
bd953173 2413 (setq enc1 (encode-coding-string str1 coding-system)
a127b764 2414 i1 (length enc1)
bd953173 2415 enc2 (encode-coding-string str2 coding-system)
a127b764
KH
2416 i2 (length enc2))
2417 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2418 (setq i1 (1- i1) i2 (1- i2)))
2419
2420 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2421 ;; and they are the extra control sequences at the tail to
2422 ;; exclude.
2423 (substring enc2 0 i2))))
2424
2425
4ed46869 2426;;; mule-cmds.el ends here