(viper-find-char-forward): Fix spellings in docstrings.
[bpt/emacs.git] / lisp / international / mule-cmds.el
CommitLineData
8045ca80 1;;; mule-cmds.el --- commands for mulitilingual environment -*-coding: iso-2022-7bit -*-
9ee5b744 2
2fd125a3
KH
3;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
4;; Free Software Foundation, Inc.
5;; Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
6;; National Institute of Advanced Industrial Science and Technology (AIST)
7;; Registration Number H14PRO021
4ed46869
KH
8
9;; Keywords: mule, multilingual
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
369314dc
KH
24;; along with GNU Emacs; see the file COPYING. If not, write to the
25;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26;; Boston, MA 02111-1307, USA.
4ed46869 27
60370d40
PJ
28;;; Commentary:
29
4ed46869
KH
30;;; Code:
31
2375e71a
KH
32(eval-when-compile
33 (defvar dos-codepage)
34 (autoload 'widget-value "wid-edit"))
cda74479 35
4ed46869
KH
36;;; MULE related key bindings and menus.
37
0709d285 38(defvar mule-keymap (make-sparse-keymap)
33d17698 39 "Keymap for Mule (Multilingual environment) specific commands.")
4ed46869 40
8f81f784 41;; Keep "C-x C-m ..." for mule specific commands.
0709d285 42(define-key ctl-x-map "\C-m" mule-keymap)
ef8a8c8c 43
4ed46869 44(define-key mule-keymap "f" 'set-buffer-file-coding-system)
d80dee05 45(define-key mule-keymap "r" 'revert-buffer-with-coding-system)
2308d0d7 46(define-key mule-keymap "F" 'set-file-name-coding-system)
4ed46869 47(define-key mule-keymap "t" 'set-terminal-coding-system)
15b3e511
KH
48(define-key mule-keymap "k" 'set-keyboard-coding-system)
49(define-key mule-keymap "p" 'set-buffer-process-coding-system)
7624ebb9
KH
50(define-key mule-keymap "x" 'set-selection-coding-system)
51(define-key mule-keymap "X" 'set-next-selection-coding-system)
8b784951 52(define-key mule-keymap "\C-\\" 'set-input-method)
15b3e511 53(define-key mule-keymap "c" 'universal-coding-system-argument)
b4fba33f 54(define-key mule-keymap "l" 'set-language-environment)
4ed46869 55
538d88fb
EZ
56(defvar mule-menu-keymap
57 (make-sparse-keymap "Mule (Multilingual Environment)")
33d17698 58 "Keymap for Mule (Multilingual environment) menu specific commands.")
15b3e511 59
dcad02bc
EZ
60(defvar describe-language-environment-map
61 (make-sparse-keymap "Describe Language Environment"))
15b3e511 62
dcad02bc
EZ
63(defvar setup-language-environment-map
64 (make-sparse-keymap "Set Language Environment"))
15b3e511 65
dcad02bc
EZ
66(defvar set-coding-system-map
67 (make-sparse-keymap "Set Coding System"))
15b3e511 68
15b3e511 69(define-key-after mule-menu-keymap [set-language-environment]
cda74479
DL
70 (list 'menu-item "Set Language Environment" setup-language-environment-map
71 :help "Multilingual environment suitable for a specific language"))
15b3e511
KH
72(define-key-after mule-menu-keymap [separator-mule]
73 '("--")
74 t)
75(define-key-after mule-menu-keymap [toggle-input-method]
538d88fb 76 '(menu-item "Toggle Input Method" toggle-input-method)
15b3e511 77 t)
8b784951 78(define-key-after mule-menu-keymap [set-input-method]
538d88fb 79 '(menu-item "Select Input Method..." set-input-method)
15b3e511 80 t)
cda74479
DL
81(define-key-after mule-menu-keymap [describe-input-method]
82 '(menu-item "Describe Input Method" describe-input-method))
15b3e511
KH
83(define-key-after mule-menu-keymap [separator-input-method]
84 '("--")
85 t)
15b3e511 86(define-key-after mule-menu-keymap [set-various-coding-system]
cda74479 87 (list 'menu-item "Set Coding Systems" set-coding-system-map
be9650bc 88 :enable 'default-enable-multibyte-characters))
538d88fb
EZ
89(define-key-after mule-menu-keymap [view-hello-file]
90 '(menu-item "Show Multi-lingual Text" view-hello-file
91 :enable (file-readable-p
92 (expand-file-name "HELLO" data-directory))
93 :help "Display file which says HELLO in many languages")
15b3e511
KH
94 t)
95(define-key-after mule-menu-keymap [separator-coding-system]
96 '("--")
97 t)
538d88fb
EZ
98(define-key-after mule-menu-keymap [describe-language-environment]
99 (list 'menu-item "Describe Language Environment"
100 describe-language-environment-map
cda74479 101 :help "Show multilingual settings for a specific language")
15b3e511 102 t)
538d88fb
EZ
103(define-key-after mule-menu-keymap [describe-input-method]
104 '(menu-item "Describe Input Method..." describe-input-method
cda74479 105 :help "Keyboard layout for a specific input method")
538d88fb
EZ
106 t)
107(define-key-after mule-menu-keymap [describe-coding-system]
108 '(menu-item "Describe Coding System..." describe-coding-system)
109 t)
cda74479
DL
110(define-key-after mule-menu-keymap [list-character-sets]
111 '(menu-item "List Character Sets" list-character-sets
112 :help "Show table of available character sets"))
538d88fb
EZ
113(define-key-after mule-menu-keymap [mule-diag]
114 '(menu-item "Show All of Mule Status" mule-diag
115 :help "Display multilingual environment settings")
15b3e511
KH
116 t)
117
3a151e98 118(define-key-after set-coding-system-map [universal-coding-system-argument]
538d88fb
EZ
119 '(menu-item "For Next Command" universal-coding-system-argument
120 :help "Coding system to be used by next command")
3a151e98 121 t)
d80dee05
RS
122(define-key-after set-coding-system-map [separator-1]
123 '("--")
124 t)
125(define-key-after set-coding-system-map [set-buffer-file-coding-system]
126 '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
127 :help "How to encode this buffer when saved")
128 t)
129(define-key-after set-coding-system-map [revert-buffer-with-coding-system]
130 '(menu-item "For Reverting This File Now" revert-buffer-with-coding-system
131 :enable buffer-file-name
132 :help "Revisit this file immediately using specified coding system")
133 t)
2308d0d7
KH
134(define-key-after set-coding-system-map [set-file-name-coding-system]
135 '(menu-item "For File Name" set-file-name-coding-system
136 :help "How to decode/encode file names")
137 t)
d80dee05
RS
138(define-key-after set-coding-system-map [separator-2]
139 '("--")
15b3e511 140 t)
d80dee05 141
15b3e511 142(define-key-after set-coding-system-map [set-keyboard-coding-system]
538d88fb
EZ
143 '(menu-item "For Keyboard" set-keyboard-coding-system
144 :help "How to decode keyboard input")
15b3e511 145 t)
d80dee05
RS
146(define-key-after set-coding-system-map [set-terminal-coding-system]
147 '(menu-item "For Terminal" set-terminal-coding-system
148 :enable (null (memq window-system '(x w32 mac)))
149 :help "How to encode terminal output")
150 t)
151(define-key-after set-coding-system-map [separator-3]
152 '("--")
15b3e511 153 t)
7624ebb9 154(define-key-after set-coding-system-map [set-selection-coding-system]
538d88fb
EZ
155 '(menu-item "For X Selections/Clipboard" set-selection-coding-system
156 :visible (display-selections-p)
157 :help "How to en/decode data to/from selection/clipboard")
7624ebb9
KH
158 t)
159(define-key-after set-coding-system-map [set-next-selection-coding-system]
538d88fb
EZ
160 '(menu-item "For Next X Selection" set-next-selection-coding-system
161 :visible (display-selections-p)
162 :help "How to en/decode next selection/clipboard operation")
7624ebb9 163 t)
d80dee05
RS
164(define-key-after set-coding-system-map [set-buffer-process-coding-system]
165 '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
166 :visible (fboundp 'start-process)
167 :enable (get-buffer-process (current-buffer))
168 :help "How to en/decode I/O from/to subprocess connected to this buffer")
169 t)
170
171
15b3e511 172(define-key setup-language-environment-map
538d88fb 173 [Default] '(menu-item "Default" setup-specified-language-environment))
4ed46869 174
cda74479
DL
175(define-key describe-language-environment-map
176 [Default] '(menu-item "Default" describe-specified-language-support))
177
4ed46869
KH
178;; This should be a single character key binding because users use it
179;; very frequently while editing multilingual text. Now we can use
180;; only two such keys: "\C-\\" and "\C-^", but the latter is not
181;; convenient because it requires shifting on most keyboards. An
182;; alternative is "\C-\]" which is now bound to `abort-recursive-edit'
183;; but it won't be used that frequently.
184(define-key global-map "\C-\\" 'toggle-input-method)
185
5f395df3
SM
186;; This is no good because people often type Shift-SPC
187;; meaning to type SPC. -- rms.
188;; ;; Here's an alternative key binding for X users (Shift-SPACE).
189;; (define-key global-map [?\S- ] 'toggle-input-method)
b4fba33f 190
464cc130
KH
191;;; Mule related hyperlinks.
192(defconst help-xref-mule-regexp-template
193 (purecopy (concat "\\(\\<\\("
194 "\\(coding system\\)\\|"
d0c40faa
KH
195 "\\(input method\\)\\|"
196 "\\(character set\\)\\|"
197 "\\(charset\\)"
464cc130
KH
198 "\\)\\s-+\\)?"
199 ;; Note starting with word-syntax character:
200 "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'")))
201
26d87040
EZ
202(defun coding-system-change-eol-conversion (coding-system eol-type)
203 "Return a coding system which differs from CODING-SYSTEM in eol conversion.
204The returned coding system converts end-of-line by EOL-TYPE
205but text as the same way as CODING-SYSTEM.
206EOL-TYPE should be `unix', `dos', `mac', or nil.
207If EOL-TYPE is nil, the returned coding system detects
208how end-of-line is formatted automatically while decoding.
209
210EOL-TYPE can be specified by an integer 0, 1, or 2.
211They means `unix', `dos', and `mac' respectively."
212 (if (symbolp eol-type)
213 (setq eol-type (cond ((eq eol-type 'unix) 0)
214 ((eq eol-type 'dos) 1)
215 ((eq eol-type 'mac) 2)
216 (t eol-type))))
217 (let ((orig-eol-type (coding-system-eol-type coding-system)))
218 (if (vectorp orig-eol-type)
219 (if (not eol-type)
220 coding-system
221 (aref orig-eol-type eol-type))
222 (let ((base (coding-system-base coding-system)))
223 (if (not eol-type)
224 base
225 (if (= eol-type orig-eol-type)
226 coding-system
227 (setq orig-eol-type (coding-system-eol-type base))
228 (if (vectorp orig-eol-type)
229 (aref orig-eol-type eol-type))))))))
230
231(defun coding-system-change-text-conversion (coding-system coding)
232 "Return a coding system which differs from CODING-SYSTEM in text conversion.
233The returned coding system converts text by CODING
234but end-of-line as the same way as CODING-SYSTEM.
235If CODING is nil, the returned coding system detects
236how text is formatted automatically while decoding."
1d77e15a
JR
237 (let ((eol-type (coding-system-eol-type coding-system)))
238 (coding-system-change-eol-conversion
239 (if coding coding 'undecided)
240 (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
26d87040 241
4ed46869 242(defun toggle-enable-multibyte-characters (&optional arg)
6998e1a1
RS
243 "Change whether this buffer uses multibyte characters.
244With arg, use multibyte characters if the arg is positive.
245
246Note that this command does not convert the byte contents of
247the buffer; it only changes the way those bytes are interpreted.
248In general, therefore, this command *changes* the sequence of
249characters that the current buffer contains.
250
251We suggest you avoid using use this command unless you know what you
252are doing. If you use it by mistake, and the buffer is now displayed
253wrong, use this command again to toggle back to the right mode."
4ed46869 254 (interactive "P")
b7079457
RS
255 (let ((new-flag
256 (if (null arg) (null enable-multibyte-characters)
257 (> (prefix-numeric-value arg) 0))))
258 (set-buffer-multibyte new-flag))
4ed46869
KH
259 (force-mode-line-update))
260
261(defun view-hello-file ()
262 "Display the HELLO file which list up many languages and characters."
263 (interactive)
8f81f784
KH
264 ;; We have to decode the file in any environment.
265 (let ((default-enable-multibyte-characters t)
95fa03b4 266 (coding-system-for-read 'iso-2022-7bit))
544dd975 267 (view-file (expand-file-name "HELLO" data-directory))))
4ed46869 268
9e9a77a6 269(defun universal-coding-system-argument (coding-system)
15b3e511 270 "Execute an I/O command using the specified coding system."
9e9a77a6
RS
271 (interactive
272 (let ((default (and buffer-file-coding-system
34104362
KH
273 (not (eq (coding-system-type buffer-file-coding-system)
274 t))
9e9a77a6
RS
275 buffer-file-coding-system)))
276 (list (read-coding-system
277 (if default
278 (format "Coding system for following command (default, %s): " default)
279 "Coding system for following command: ")
280 default))))
281 (let* ((keyseq (read-key-sequence
e14a8f4c 282 (format "Command to execute with %s:" coding-system)))
04363179
GM
283 (cmd (key-binding keyseq))
284 prefix)
285
286 (when (eq cmd 'universal-argument)
287 (call-interactively cmd)
a1506d29 288
04363179
GM
289 ;; Process keys bound in `universal-argument-map'.
290 (while (progn
291 (setq keyseq (read-key-sequence nil t)
292 cmd (key-binding keyseq t))
293 (not (eq cmd 'universal-argument-other-key)))
294 (let ((current-prefix-arg prefix-arg)
d37ef0f6 295 ;; Have to bind `last-command-char' here so that
34703cb1 296 ;; `digit-argument', for instance, can compute the
04363179
GM
297 ;; prefix arg.
298 (last-command-char (aref keyseq 0)))
299 (call-interactively cmd)))
300
34703cb1 301 ;; This is the final call to `universal-argument-other-key', which
04363179
GM
302 ;; set's the final `prefix-arg.
303 (let ((current-prefix-arg prefix-arg))
304 (call-interactively cmd))
a1506d29 305
04363179
GM
306 ;; Read the command to execute with the given prefix arg.
307 (setq prefix prefix-arg
308 keyseq (read-key-sequence nil t)
309 cmd (key-binding keyseq)))
310
15b3e511 311 (let ((coding-system-for-read coding-system)
04363179 312 (coding-system-for-write coding-system)
0e9ec609 313 (coding-system-require-warning t)
04363179 314 (current-prefix-arg prefix))
15b3e511
KH
315 (message "")
316 (call-interactively cmd))))
317
de94d711 318(defun set-default-coding-systems (coding-system)
0c3154d2 319 "Set default value of various coding systems to CODING-SYSTEM.
387136f6 320This sets the following coding systems:
0c3154d2 321 o coding system of a newly created buffer
8efc03e1
KH
322 o default coding system for subprocess I/O
323This also sets the following values:
d37ef0f6 324 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
325 o default value for the command `set-terminal-coding-system' (not on MSDOS)
326 o default value for the command `set-keyboard-coding-system'."
de94d711
KH
327 (check-coding-system coding-system)
328 (setq-default buffer-file-coding-system coding-system)
9a0ed120
KH
329 (if (fboundp 'ucs-set-table-for-input)
330 (dolist (buffer (buffer-list))
331 (or (local-variable-p 'buffer-file-coding-system buffer)
332 (ucs-set-table-for-input buffer))))
333
a41118cc
SM
334 (if (and default-enable-multibyte-characters (not (eq system-type 'darwin)))
335 ;; The file-name coding system on Darwin systems is always utf-8.
716184d4 336 (setq default-file-name-coding-system coding-system))
03c35c83
EZ
337 ;; If coding-system is nil, honor that on MS-DOS as well, so
338 ;; that they could reset the terminal coding system.
339 (unless (and (eq window-system 'pc) coding-system)
340 (setq default-terminal-coding-system coding-system))
de94d711 341 (setq default-keyboard-coding-system coding-system)
1d77e15a
JR
342 ;; Preserve eol-type from existing default-process-coding-systems.
343 ;; On non-unix-like systems in particular, these may have been set
344 ;; carefully by the user, or by the startup code, to deal with the
345 ;; users shell appropriately, so should not be altered by changing
346 ;; language environment.
347 (let ((output-coding
348 (coding-system-change-text-conversion
349 (car default-process-coding-system) coding-system))
350 (input-coding
351 (coding-system-change-text-conversion
352 (cdr default-process-coding-system) coding-system)))
353 (setq default-process-coding-system
354 (cons output-coding input-coding))))
de94d711 355
0c3154d2
KH
356(defun prefer-coding-system (coding-system)
357 "Add CODING-SYSTEM at the front of the priority list for automatic detection.
387136f6 358This also sets the following coding systems:
0c3154d2 359 o coding system of a newly created buffer
8efc03e1
KH
360 o default coding system for subprocess I/O
361This also sets the following values:
c654de1d 362 o default value used as `file-name-coding-system' for converting file names.
03c35c83
EZ
363 o default value for the command `set-terminal-coding-system' (not on MSDOS)
364 o default value for the command `set-keyboard-coding-system'
365
bd3ac67e
EZ
366If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
367systems set by this function will use that type of EOL conversion.
368
03c35c83
EZ
369This command does not change the default value of terminal coding system
370for MS-DOS terminal, because DOS terminals only support a single coding
371system, and Emacs automatically sets the default to that coding system at
5beaa0d1
KH
372startup.
373
c654de1d
DL
374A coding system that requires automatic detection of text
375encoding (e.g. undecided, unix) can't be preferred.
376
377See also `coding-category-list' and `coding-system-category'."
0c3154d2
KH
378 (interactive "zPrefer coding system: ")
379 (if (not (and coding-system (coding-system-p coding-system)))
380 (error "Invalid coding system `%s'" coding-system))
381 (let ((coding-category (coding-system-category coding-system))
bd3ac67e
EZ
382 (base (coding-system-base coding-system))
383 (eol-type (coding-system-eol-type coding-system)))
0c3154d2
KH
384 (if (not coding-category)
385 ;; CODING-SYSTEM is no-conversion or undecided.
386 (error "Can't prefer the coding system `%s'" coding-system))
8efc03e1 387 (set coding-category (or base coding-system))
6a17cee0 388 ;; Changing the binding of a coding category requires this call.
45d08cb2 389 (update-coding-systems-internal)
812cad80 390 (or (eq coding-category (car coding-category-list))
0c3154d2 391 ;; We must change the order.
812cad80 392 (set-coding-priority (list coding-category)))
8efc03e1
KH
393 (if (and base (interactive-p))
394 (message "Highest priority is set to %s (base of %s)"
395 base coding-system))
bd3ac67e 396 ;; If they asked for specific EOL conversion, honor that.
6f9dc4fd 397 (if (memq eol-type '(0 1 2))
bd3ac67e
EZ
398 (setq coding-system
399 (coding-system-change-eol-conversion base eol-type))
400 (setq coding-system base))
401 (set-default-coding-systems coding-system)))
0c3154d2 402
b5edd1d1
KH
403(defvar sort-coding-systems-predicate nil
404 "If non-nil, a predicate function to sort coding systems.
405
406It is called with two coding systems, and should return t if the first
407one is \"less\" than the second.
408
409The function `sort-coding-systems' use it.")
410
411(defun sort-coding-systems (codings)
412 "Sort coding system list CODINGS by a priority of each coding system.
413
414If a coding system is most preferred, it has the highest priority.
415Otherwise, a coding system corresponds to some MIME charset has higher
416priorities. Among them, a coding system included in `coding-system'
417key of the current language environment has higher priorities. See
418also the documentation of `language-info-alist'.
419
420If the variable `sort-coding-systems-predicate' (which see) is
421non-nil, it is used to sort CODINGS in the different way than above."
422 (if sort-coding-systems-predicate
423 (sort codings sort-coding-systems-predicate)
da1ebad1
KH
424 (let* ((from-categories (mapcar #'(lambda (x) (symbol-value x))
425 coding-category-list))
426 (most-preferred (car from-categories))
b5edd1d1
KH
427 (lang-preferred (get-language-info current-language-environment
428 'coding-system))
429 (func (function
430 (lambda (x)
431 (let ((base (coding-system-base x)))
da1ebad1
KH
432 ;; We calculate the priority number 0..255 by
433 ;; using the 8 bits PMMLCEII as this:
434 ;; P: 1 iff most preferred.
435 ;; MM: greater than 0 iff mime-charset.
436 ;; L: 1 iff one of the current lang. env.'s codings.
437 ;; C: 1 iff one of codings listed in the category list.
438 ;; E: 1 iff not XXX-with-esc
439 ;; II: if iso-2022 based, 0..3, else 1.
440 (logior
441 (lsh (if (eq base most-preferred) 1 0) 7)
442 (lsh
443 (let ((mime (coding-system-get base 'mime-charset)))
b439e72a
DL
444 ;; Prefer coding systems corresponding to a
445 ;; MIME charset.
b5edd1d1 446 (if mime
b439e72a
DL
447 ;; Lower utf-16 priority so that we
448 ;; normally prefer utf-8 to it, and put
449 ;; x-ctext below that.
de814dd9
KH
450 (cond ((string-match "utf-16"
451 (symbol-name mime))
da1ebad1 452 2)
b439e72a 453 ((string-match "^x-" (symbol-name mime))
da1ebad1
KH
454 1)
455 (t 3))
b5edd1d1 456 0))
da1ebad1
KH
457 5)
458 (lsh (if (memq base lang-preferred) 1 0) 4)
459 (lsh (if (memq base from-categories) 1 0) 3)
460 (lsh (if (string-match "-with-esc\\'"
461 (symbol-name base))
462 0 1) 2)
463 (if (eq (coding-system-type base) 2)
464 ;; For ISO based coding systems, prefer
465 ;; one that doesn't use escape sequences.
466 (let ((flags (coding-system-flags base)))
467 (if (or (consp (aref flags 0))
468 (consp (aref flags 1))
469 (consp (aref flags 2))
470 (consp (aref flags 3)))
471 (if (or (aref flags 8) (aref flags 9))
472 0
473 1)
474 2))
475 1)))))))
b5edd1d1
KH
476 (sort codings (function (lambda (x y)
477 (> (funcall func x) (funcall func y))))))))
54b226f7 478
3fc7dfe5 479(defun find-coding-systems-region (from to)
54b226f7 480 "Return a list of proper coding systems to encode a text between FROM and TO.
d37ef0f6 481If FROM is a string, find coding systems in that instead of the buffer.
54b226f7
KH
482All coding systems in the list can safely encode any multibyte characters
483in the text.
484
e8dd0160 485If the text contains no multibyte characters, return a list of a single
3fc7dfe5 486element `undecided'."
b5edd1d1
KH
487 (let ((codings (find-coding-systems-region-internal from to)))
488 (if (eq codings t)
489 ;; The text contains only ASCII characters. Any coding
490 ;; systems are safe.
491 '(undecided)
492 ;; We need copy-sequence because sorting will alter the argument.
493 (sort-coding-systems (copy-sequence codings)))))
54b226f7 494
3fc7dfe5
KH
495(defun find-coding-systems-string (string)
496 "Return a list of proper coding systems to encode STRING.
497All coding systems in the list can safely encode any multibyte characters
498in STRING.
499
e8dd0160 500If STRING contains no multibyte characters, return a list of a single
3fc7dfe5 501element `undecided'."
b5edd1d1 502 (find-coding-systems-region string nil))
3fc7dfe5
KH
503
504(defun find-coding-systems-for-charsets (charsets)
505 "Return a list of proper coding systems to encode characters of CHARSETS.
6053d86a
KH
506CHARSETS is a list of character sets.
507It actually checks at most the first 96 characters of each charset.
508So, if a charset of dimension two is included in CHARSETS, the value may
509contain a coding system that can't encode all characters of the charset."
b5edd1d1
KH
510 (cond ((or (null charsets)
511 (and (= (length charsets) 1)
512 (eq 'ascii (car charsets))))
513 '(undecided))
514 ((or (memq 'eight-bit-control charsets)
515 (memq 'eight-bit-graphic charsets))
516 '(raw-text emacs-mule))
517 (t
518 (let ((codings t)
6053d86a 519 charset l str)
b5edd1d1
KH
520 (while (and codings charsets)
521 (setq charset (car charsets) charsets (cdr charsets))
522 (unless (eq charset 'ascii)
6053d86a
KH
523 (setq str (make-string 96 32))
524 (if (= (charset-dimension charset) 1)
525 (if (= (charset-chars charset) 96)
526 (dotimes (i 96)
527 (aset str i (make-char charset (+ i 32))))
528 (dotimes (i 94)
529 (aset str i (make-char charset (+ i 33)))))
530 (if (= (charset-chars charset) 96)
531 (dotimes (i 96)
532 (aset str i (make-char charset 32 (+ i 32))))
533 (dotimes (i 94)
534 (aset str i (make-char charset 33 (+ i 33))))))
535 (setq l (find-coding-systems-string str))
b5edd1d1
KH
536 (if (eq codings t)
537 (setq codings l)
538 (let ((ll nil))
6053d86a
KH
539 (dolist (elt codings)
540 (if (memq elt l)
541 (setq ll (cons elt ll))))
b5edd1d1 542 (setq codings ll)))))
6053d86a 543 codings))))
54b226f7 544
51ed58ea
KH
545(defun find-multibyte-characters (from to &optional maxcount excludes)
546 "Find multibyte characters in the region specified by FROM and TO.
547If FROM is a string, find multibyte characters in the string.
548The return value is an alist of the following format:
549 ((CHARSET COUNT CHAR ...) ...)
550where
551 CHARSET is a character set,
552 COUNT is a number of characters,
34703cb1 553 CHARs are the characters found from the character set.
51ed58ea 554Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
251d4f4b
KH
555Optional 4th arg EXCLUDE is a list of character sets to be ignored.
556
557For invalid characters, CHARs are actually strings."
51ed58ea
KH
558 (let ((chars nil)
559 charset char)
560 (if (stringp from)
561 (let ((idx 0))
562 (while (setq idx (string-match "[^\000-\177]" from idx))
563 (setq char (aref from idx)
564 charset (char-charset char))
251d4f4b
KH
565 (if (eq charset 'unknown)
566 (setq char (match-string 0)))
14333e31
KH
567 (if (or (memq charset '(unknown
568 eight-bit-control eight-bit-graphic))
251d4f4b 569 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
570 (let ((slot (assq charset chars)))
571 (if slot
572 (if (not (memq char (nthcdr 2 slot)))
573 (let ((count (nth 1 slot)))
574 (setcar (cdr slot) (1+ count))
575 (if (or (not maxcount) (< count maxcount))
576 (nconc slot (list char)))))
577 (setq chars (cons (list charset 1 char) chars)))))
578 (setq idx (1+ idx))))
579 (save-excursion
580 (goto-char from)
581 (while (re-search-forward "[^\000-\177]" to t)
582 (setq char (preceding-char)
583 charset (char-charset char))
251d4f4b
KH
584 (if (eq charset 'unknown)
585 (setq char (match-string 0)))
14333e31 586 (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic))
251d4f4b 587 (not (or (eq excludes t) (memq charset excludes))))
51ed58ea
KH
588 (let ((slot (assq charset chars)))
589 (if slot
251d4f4b 590 (if (not (member char (nthcdr 2 slot)))
51ed58ea
KH
591 (let ((count (nth 1 slot)))
592 (setcar (cdr slot) (1+ count))
593 (if (or (not maxcount) (< count maxcount))
594 (nconc slot (list char)))))
595 (setq chars (cons (list charset 1 char) chars))))))))
596 (nreverse chars)))
597
738746ba
KH
598
599(defun search-unencodable-char (coding-system)
600 "Search forward from point for a character that is not encodable.
601It asks which coding system to check.
602If such a character is found, set point after that character.
603Otherwise, don't move point.
604
605When called from a program, the value is a position of the found character,
606or nil if all characters are encodable."
607 (interactive
608 (list (let ((default (or buffer-file-coding-system 'us-ascii)))
609 (read-coding-system
610 (format "Coding-system (default, %s): " default)
611 default))))
612 (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
613 (if pos
614 (goto-char (1+ pos))
615 (message "All following characters are encodable by %s" coding-system))
616 pos))
617
618
c83c4f60
RS
619(defvar last-coding-system-specified nil
620 "Most recent coding system explicitly specified by the user when asked.
621This variable is set whenever Emacs asks the user which coding system
622to use in order to write a file. If you set it to nil explicitly,
623then call `write-region', then afterward this variable will be non-nil
624only if the user was explicitly asked and specified a coding system.")
625
b5edd1d1
KH
626(defvar select-safe-coding-system-accept-default-p nil
627 "If non-nil, a function to control the behaviour of coding system selection.
628The meaning is the same as the argument ACCEPT-DEFAULT-P of the
629function `select-safe-coding-system' (which see). This variable
630overrides that argument.")
631
9ee5b744
SM
632(defun select-safe-coding-system-interactively (from to codings unsafe
633 &optional rejected default)
634 "Select interactively a coding system for the region FROM ... TO.
635FROM can be a string, as in `write-region'.
636CODINGS is the list of base coding systems known to be safe for this region,
637 typically obtained with `find-coding-systems-region'.
638UNSAFE is a list of coding systems known to be unsafe for this region.
639REJECTED is a list of coding systems which were safe but for some reason
640 were not recommended in the particular context.
641DEFAULT is the coding system to use by default in the query."
642 ;; At first, if some defaults are unsafe, record at most 11
643 ;; problematic characters and their positions for them by turning
644 ;; (CODING ...)
645 ;; into
646 ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
647 (if unsafe
648 (setq unsafe
649 (mapcar #'(lambda (coding)
650 (cons coding
651 (if (stringp from)
652 (mapcar #'(lambda (pos)
653 (cons pos (aref from pos)))
654 (unencodable-char-position
655 0 (length from) coding
656 11 from))
657 (mapcar #'(lambda (pos)
658 (cons pos (char-after pos)))
659 (unencodable-char-position
660 from to coding 11)))))
661 unsafe)))
662
663 ;; Change each safe coding system to the corresponding
664 ;; mime-charset name if it is also a coding system. Such a name
665 ;; is more friendly to users.
666 (let ((l codings)
667 mime-charset)
668 (while l
669 (setq mime-charset (coding-system-get (car l) 'mime-charset))
670 (if (and mime-charset (coding-system-p mime-charset))
671 (setcar l mime-charset))
672 (setq l (cdr l))))
673
674 ;; Don't offer variations with locking shift, which you
675 ;; basically never want.
676 (let (l)
677 (dolist (elt codings (setq codings (nreverse l)))
678 (unless (or (eq 'coding-category-iso-7-else
679 (coding-system-category elt))
680 (eq 'coding-category-iso-8-else
681 (coding-system-category elt)))
682 (push elt l))))
683
684 ;; Remove raw-text, emacs-mule and no-conversion unless nothing
685 ;; else is available.
686 (setq codings
687 (or (delq 'raw-text
688 (delq 'emacs-mule
689 (delq 'no-conversion codings)))
690 '(raw-text emacs-mule no-conversion)))
691
692 (let ((window-configuration (current-window-configuration))
693 (bufname (buffer-name))
694 coding-system)
695 (save-excursion
696 ;; If some defaults are unsafe, make sure the offending
697 ;; buffer is displayed.
698 (when (and unsafe (not (stringp from)))
699 (pop-to-buffer bufname)
700 (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
701 unsafe))))
702 ;; Then ask users to select one from CODINGS while showing
703 ;; the reason why none of the defaults are not used.
704 (with-output-to-temp-buffer "*Warning*"
705 (with-current-buffer standard-output
706 (if (and (null rejected) (null unsafe))
707 (insert "No default coding systems to try for "
708 (if (stringp from)
709 (format "string \"%s\"." from)
710 (format "buffer `%s'." bufname)))
711 (insert
712 "These default coding systems were tried to encode"
713 (if (stringp from)
714 (concat " \"" (if (> (length from) 10)
715 (concat (substring from 0 10) "...\"")
716 (concat from "\"")))
717 (format " text\nin the buffer `%s'" bufname))
718 ":\n")
719 (let ((pos (point))
720 (fill-prefix " "))
721 (dolist (x (append rejected unsafe))
722 (princ " ") (princ (car x)))
723 (insert "\n")
724 (fill-region-as-paragraph pos (point)))
725 (when rejected
726 (insert "These safely encodes the target text,
727but it is not recommended for encoding text in this context,
728e.g., for sending an email message.\n ")
729 (dolist (x rejected)
730 (princ " ") (princ x))
731 (insert "\n"))
732 (when unsafe
733 (insert (if rejected "And the others"
734 "However, each of them")
735 " encountered these problematic characters:\n")
736 (dolist (coding unsafe)
737 (insert (format " %s:" (car coding)))
738 (let ((i 0)
739 (func1
740 #'(lambda (bufname pos)
741 (when (buffer-live-p (get-buffer bufname))
742 (pop-to-buffer bufname)
743 (goto-char pos))))
744 (func2
745 #'(lambda (bufname pos coding)
746 (when (buffer-live-p (get-buffer bufname))
747 (pop-to-buffer bufname)
748 (if (< (point) pos)
749 (goto-char pos)
750 (forward-char 1)
751 (search-unencodable-char coding)
752 (forward-char -1))))))
753 (dolist (elt (cdr coding))
754 (insert " ")
755 (if (stringp from)
756 (insert (if (< i 10) (cdr elt) "..."))
757 (if (< i 10)
758 (insert-text-button
759 (cdr elt)
760 :type 'help-xref
761 'help-echo
762 "mouse-2, RET: jump to this character"
763 'help-function func1
764 'help-args (list bufname (car elt)))
765 (insert-text-button
766 "..."
767 :type 'help-xref
768 'help-echo
769 "mouse-2, RET: next unencodable character"
770 'help-function func2
771 'help-args (list bufname (car elt)
772 (car coding)))))
773 (setq i (1+ i))))
774 (insert "\n"))
775 (insert "\
776The first problematic character is at point in the displayed buffer,\n"
777 (substitute-command-keys "\
778and \\[universal-argument] \\[what-cursor-position] will give information about it.\n"))))
779 (insert "\nSelect \
780one of the following safe coding systems, or edit the buffer:\n")
781 (let ((pos (point))
782 (fill-prefix " "))
783 (dolist (x codings)
784 (princ " ") (princ x))
785 (insert "\n")
786 (fill-region-as-paragraph pos (point)))
787 (insert "Or specify any other coding system
788at the risk of losing the problematic characters.\n")))
789
790 ;; Read a coding system.
791 (setq coding-system
792 (read-coding-system
793 (format "Select coding system (default %s): " default)
794 default))
795 (setq last-coding-system-specified coding-system))
796
797 (kill-buffer "*Warning*")
798 (set-window-configuration window-configuration)
799 coding-system))
800
b5edd1d1 801(defun select-safe-coding-system (from to &optional default-coding-system
efa2c6d7 802 accept-default-p file)
d5266ddf
KH
803 "Ask a user to select a safe coding system from candidates.
804The candidates of coding systems which can safely encode a text
b5edd1d1
KH
805between FROM and TO are shown in a popup window. Among them, the most
806proper one is suggested as the default.
807
a985cd2f
KH
808The list of `buffer-file-coding-system' of the current buffer,
809the `default-buffer-file-coding-system', and the
b5edd1d1
KH
810most preferred coding system (if it corresponds to a MIME charset) is
811treated as the default coding system list. Among them, the first one
d37ef0f6
DL
812that safely encodes the text is normally selected silently and
813returned without any user interaction. See also the command
814`prefer-coding-system'.
815
816However, the user is queried if the chosen coding system is
817inconsistent with what would be selected by `set-auto-coding' from
818coding cookies &c. if the contents of the region were read from a
819file. (That could lead to data corruption in a file subsequently
820re-visited and edited.)
54b226f7 821
b5edd1d1
KH
822Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
823list of coding systems to be prepended to the default coding system
0e9ec609
KH
824list. However, if DEFAULT-CODING-SYSTEM is a list and the first
825element is t, the cdr part is used as the defualt coding system list,
a985cd2f
KH
826i.e. `buffer-file-coding-system', `default-buffer-file-coding-system',
827and the most preferred coding system are not used.
54b226f7 828
b5edd1d1
KH
829Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
830determine the acceptability of the silently selected coding system.
831It is called with that coding system, and should return nil if it
832should not be silently selected and thus user interaction is required.
833
efa2c6d7
RS
834Optional 5th arg FILE is the file name to use for this purpose.
835That is different from `buffer-file-name' when handling `write-region'
836\(for example).
837
b5edd1d1
KH
838The variable `select-safe-coding-system-accept-default-p', if
839non-nil, overrides ACCEPT-DEFAULT-P.
54b226f7
KH
840
841Kludgy feature: if FROM is a string, the string is the target text,
842and TO is ignored."
b5edd1d1
KH
843 (if (and default-coding-system
844 (not (listp default-coding-system)))
845 (setq default-coding-system (list default-coding-system)))
846
0e9ec609
KH
847 (let ((no-other-defaults nil))
848 (if (eq (car default-coding-system) t)
849 (setq no-other-defaults t
850 default-coding-system (cdr default-coding-system)))
851
852 ;; Change elements of the list to (coding . base-coding).
853 (setq default-coding-system
854 (mapcar (function (lambda (x) (cons x (coding-system-base x))))
855 default-coding-system))
856
a985cd2f
KH
857 ;; From now on, the list of defaults is reversed.
858 (setq default-coding-system (nreverse default-coding-system))
859
0e9ec609
KH
860 (unless no-other-defaults
861 ;; If buffer-file-coding-system is not nil nor undecided, append it
862 ;; to the defaults.
863 (if buffer-file-coding-system
864 (let ((base (coding-system-base buffer-file-coding-system)))
865 (or (eq base 'undecided)
0e9ec609 866 (rassq base default-coding-system)
a985cd2f
KH
867 (push (cons buffer-file-coding-system base)
868 default-coding-system))))
869
870 ;; If default-buffer-file-coding-system is not nil nor undecided,
871 ;; append it to the defaults.
872 (if default-buffer-file-coding-system
873 (let ((base (coding-system-base default-buffer-file-coding-system)))
874 (or (eq base 'undecided)
875 (rassq base default-coding-system)
876 (push (cons default-buffer-file-coding-system base)
877 default-coding-system))))
0e9ec609
KH
878
879 ;; If the most preferred coding system has the property mime-charset,
880 ;; append it to the defaults.
881 (let ((tail coding-category-list)
882 preferred base)
a985cd2f 883 (while (and tail (not (setq preferred (symbol-value (car tail)))))
0e9ec609
KH
884 (setq tail (cdr tail)))
885 (and (coding-system-p preferred)
886 (setq base (coding-system-base preferred))
887 (coding-system-get preferred 'mime-charset)
0e9ec609 888 (not (rassq base default-coding-system))
a985cd2f
KH
889 (push (cons preferred base)
890 default-coding-system)))))
b5edd1d1
KH
891
892 (if select-safe-coding-system-accept-default-p
893 (setq accept-default-p select-safe-coding-system-accept-default-p))
894
895 (let ((codings (find-coding-systems-region from to))
896 (coding-system nil)
c0d3ed97 897 safe rejected unsafe)
b5edd1d1
KH
898 (if (eq (car codings) 'undecided)
899 ;; Any coding system is ok.
900 (setq coding-system t)
c0d3ed97
KH
901 ;; Classify the defaults into safe, rejected, and unsafe.
902 (dolist (elt default-coding-system)
903 (if (memq (cdr elt) codings)
904 (if (and (functionp accept-default-p)
905 (not (funcall accept-default-p (cdr elt))))
906 (push (car elt) rejected)
907 (push (car elt) safe))
908 (push (car elt) unsafe)))
909 (if safe
a985cd2f 910 (setq coding-system (car safe))))
c0d3ed97 911
b5edd1d1 912 ;; If all the defaults failed, ask a user.
c0d3ed97 913 (when (not coding-system)
9ee5b744
SM
914 (setq coding-system (select-safe-coding-system-interactively
915 from to codings unsafe rejected (car codings))))
b5edd1d1
KH
916
917 (if (vectorp (coding-system-eol-type coding-system))
918 (let ((eol (coding-system-eol-type buffer-file-coding-system)))
919 (if (numberp eol)
920 (setq coding-system
921 (coding-system-change-eol-conversion coding-system eol)))))
922
923 (if (eq coding-system t)
924 (setq coding-system buffer-file-coding-system))
43afed8c
RS
925 ;; Check we're not inconsistent with what `coding:' spec &c would
926 ;; give when file is re-read.
9e7638c3
RS
927 ;; But don't do this if we explicitly ignored the cookie
928 ;; by using `find-file-literally'.
901ae597
KH
929 (unless (or (stringp from)
930 find-file-literally
bc10bcbd
KH
931 (and coding-system
932 (memq (coding-system-type coding-system) '(0 5))))
efa2c6d7
RS
933 (let ((auto-cs (save-excursion
934 (save-restriction
935 (widen)
936 (narrow-to-region from to)
43afed8c 937 (goto-char (point-min))
efa2c6d7 938 (set-auto-coding (or file buffer-file-name "")
43afed8c 939 (buffer-size))))))
6aa69bd7
KH
940 ;; Merge coding-system and auto-cs as far as possible.
941 (if (not coding-system)
942 (setq coding-system auto-cs)
943 (if (not auto-cs)
944 (setq auto-cs coding-system)
945 (let ((eol-type-1 (coding-system-eol-type coding-system))
946 (eol-type-2 (coding-system-eol-type auto-cs)))
947 (if (eq (coding-system-base coding-system) 'undecided)
948 (setq coding-system (coding-system-change-text-conversion
949 coding-system auto-cs))
950 (if (eq (coding-system-base auto-cs) 'undecided)
951 (setq auto-cs (coding-system-change-text-conversion
952 auto-cs coding-system))))
953 (if (vectorp eol-type-1)
954 (or (vectorp eol-type-2)
955 (setq coding-system (coding-system-change-eol-conversion
956 coding-system eol-type-2)))
957 (if (vectorp eol-type-2)
958 (setq auto-cs (coding-system-change-eol-conversion
959 auto-cs eol-type-1)))))))
960
961 (if (and auto-cs
d37ef0f6
DL
962 ;; Don't barf if writing a compressed file, say.
963 ;; This check perhaps isn't ideal, but is probably
964 ;; the best thing to do.
965 (not (auto-coding-alist-lookup (or file buffer-file-name "")))
6aa69bd7 966 (not (coding-system-equal coding-system auto-cs)))
43afed8c
RS
967 (unless (yes-or-no-p
968 (format "Selected encoding %s disagrees with \
969%s specified by file contents. Really save (else edit coding cookies \
970and try again)? " coding-system auto-cs))
41bfe57a
RS
971 (error "Save aborted")))))
972 coding-system))
54b226f7
KH
973
974(setq select-safe-coding-system-function 'select-safe-coding-system)
975
46babb23
KH
976(defun select-message-coding-system ()
977 "Return a coding system to encode the outgoing message of the current buffer.
978It at first tries the first coding system found in these variables
979in this order:
980 (1) local value of `buffer-file-coding-system'
981 (2) value of `sendmail-coding-system'
b5edd1d1
KH
982 (3) value of `default-sendmail-coding-system'
983 (4) value of `default-buffer-file-coding-system'
46babb23
KH
984If the found coding system can't encode the current buffer,
985or none of them are bound to a coding system,
48e41165 986it asks the user to select a proper coding system."
46babb23 987 (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
b5edd1d1
KH
988 buffer-file-coding-system)
989 sendmail-coding-system
990 default-sendmail-coding-system
991 default-buffer-file-coding-system)))
46babb23 992 (if (eq coding 'no-conversion)
d37ef0f6 993 ;; We should never use no-conversion for outgoing mail.
46babb23
KH
994 (setq coding nil))
995 (if (fboundp select-safe-coding-system-function)
996 (funcall select-safe-coding-system-function
b5edd1d1
KH
997 (point-min) (point-max) coding
998 (function (lambda (x) (coding-system-get x 'mime-charset))))
46babb23 999 coding)))
4ed46869 1000\f
03c35c83 1001;;; Language support stuff.
4ed46869 1002
4ed46869 1003(defvar language-info-alist nil
2c395d56 1004 "Alist of language environment definitions.
4ed46869
KH
1005Each element looks like:
1006 (LANGUAGE-NAME . ((KEY . INFO) ...))
2c395d56
RS
1007where LANGUAGE-NAME is a string, the name of the language environment,
1008KEY is a symbol denoting the kind of information, and
1009INFO is the data associated with KEY.
1010Meaningful values for KEY include
1011
1012 documentation value is documentation of what this language environment
1013 is meant for, and how to use it.
1014 charset value is a list of the character sets used by this
1015 language environment.
d37ef0f6
DL
1016 sample-text value is an expression which is evalled to generate
1017 a line of text written using characters appropriate
1018 for this language environment.
2c395d56
RS
1019 setup-function value is a function to call to switch to this
1020 language environment.
1021 exit-function value is a function to call to leave this
1022 language environment.
1023 coding-system value is a list of coding systems that are good
1024 for saving text written in this language environment.
1025 This list serves as suggestions to the user;
1026 in effect, as a kind of documentation.
1027 coding-priority value is a list of coding systems for this language
1028 environment, in order of decreasing priority.
1029 This is used to set up the coding system priority
45d08cb2 1030 list when you switch to this language environment.
ddb5c041 1031 nonascii-translation
7624ebb9 1032 value is a translation table to be set in the
45d08cb2 1033 variable `nonascii-translation-table' in this
7624ebb9
KH
1034 language environment, or a character set from
1035 which `nonascii-insert-offset' is calculated.
ddb5c041
KH
1036 input-method value is a default input method for this language
1037 environment.
7624ebb9
KH
1038 features value is a list of features requested in this
1039 language environment.
0077e394
KH
1040 ctext-non-standard-encodings
1041 value is a list of non-standard encoding
1042 names used in extended segments of CTEXT.
1043 See the variable
1044 `ctext-non-standard-encodings' for more
1045 detail.
ddb5c041
KH
1046
1047The following keys take effect only when multibyte characters are
1048globally disabled, i.e. the value of `default-enable-multibyte-characters'
1049is nil.
1050
1051 unibyte-syntax value is a library name to load to set
e8dd0160 1052 unibyte 8-bit character syntaxes for this
ddb5c041
KH
1053 language environment.
1054
1055 unibyte-display value is a coding system to encode characters
1056 for the terminal. Characters in the range
1057 of 160 to 255 display not as octal escapes,
1058 but as non-ASCII characters in this language
1059 environment.")
2c395d56
RS
1060
1061(defun get-language-info (lang-env key)
1062 "Return information listed under KEY for language environment LANG-ENV.
1063KEY is a symbol denoting the kind of information.
1064For a list of useful values for KEY and their meanings,
1065see `language-info-alist'."
1066 (if (symbolp lang-env)
1067 (setq lang-env (symbol-name lang-env)))
f15466c5 1068 (let ((lang-slot (assoc-string lang-env language-info-alist t)))
4ed46869
KH
1069 (if lang-slot
1070 (cdr (assq key (cdr lang-slot))))))
1071
f08adf27 1072(defun set-language-info (lang-env key info)
2c395d56
RS
1073 "Modify part of the definition of language environment LANG-ENV.
1074Specifically, this stores the information INFO under KEY
1075in the definition of this language environment.
4ed46869 1076KEY is a symbol denoting the kind of information.
2c395d56 1077INFO is the value for that information.
281d03ec 1078
2c395d56 1079For a list of useful values for KEY and their meanings,
f08adf27 1080see `language-info-alist'."
2c395d56
RS
1081 (if (symbolp lang-env)
1082 (setq lang-env (symbol-name lang-env)))
4ed46869 1083 (let (lang-slot key-slot)
2c395d56 1084 (setq lang-slot (assoc lang-env language-info-alist))
4ed46869 1085 (if (null lang-slot) ; If no slot for the language, add it.
2c395d56 1086 (setq lang-slot (list lang-env)
4ed46869
KH
1087 language-info-alist (cons lang-slot language-info-alist)))
1088 (setq key-slot (assq key lang-slot))
1089 (if (null key-slot) ; If no slot for the key, add it.
1090 (progn
1091 (setq key-slot (list key))
1092 (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
590dbcba 1093 (setcdr key-slot (purecopy info))
1169bd86 1094 ;; Update the custom-type of `current-language-environment'.
590dbcba 1095 (put 'current-language-environment 'custom-type
1169bd86
MR
1096 (cons 'choice (mapcar
1097 (lambda (lang)
756e055f
MR
1098 (list 'const lang))
1099 (sort (mapcar 'car language-info-alist) 'string<))))))
4ed46869 1100
2c395d56
RS
1101(defun set-language-info-alist (lang-env alist &optional parents)
1102 "Store ALIST as the definition of language environment LANG-ENV.
1103ALIST is an alist of KEY and INFO values. See the documentation of
98c6d6ed 1104`language-info-alist' for the meanings of KEY and INFO.
54b226f7 1105
2c395d56 1106Optional arg PARENTS is a list of parent menu names; it specifies
d37ef0f6 1107where to put this language environment in the
2c395d56
RS
1108Describe Language Environment and Set Language Environment menus.
1109For example, (\"European\") means to put this language environment
1110in the European submenu in each of those two menus."
1111 (if (symbolp lang-env)
1112 (setq lang-env (symbol-name lang-env)))
54b226f7
KH
1113 (let ((describe-map describe-language-environment-map)
1114 (setup-map setup-language-environment-map))
1115 (if parents
1116 (let ((l parents)
9deed82f 1117 map parent-symbol parent prompt)
54b226f7
KH
1118 (while l
1119 (if (symbolp (setq parent-symbol (car l)))
1120 (setq parent (symbol-name parent))
1121 (setq parent parent-symbol parent-symbol (intern parent)))
1122 (setq map (lookup-key describe-map (vector parent-symbol)))
9deed82f
EZ
1123 ;; This prompt string is for define-prefix-command, so
1124 ;; that the map it creates will be suitable for a menu.
1125 (or map (setq prompt (format "%s Environment" parent)))
54b226f7
KH
1126 (if (not map)
1127 (progn
1128 (setq map (intern (format "describe-%s-environment-map"
1129 (downcase parent))))
9deed82f 1130 (define-prefix-command map nil prompt)
54b226f7
KH
1131 (define-key-after describe-map (vector parent-symbol)
1132 (cons parent map) t)))
1133 (setq describe-map (symbol-value map))
1134 (setq map (lookup-key setup-map (vector parent-symbol)))
1135 (if (not map)
1136 (progn
1137 (setq map (intern (format "setup-%s-environment-map"
1138 (downcase parent))))
9deed82f 1139 (define-prefix-command map nil prompt)
54b226f7
KH
1140 (define-key-after setup-map (vector parent-symbol)
1141 (cons parent map) t)))
1142 (setq setup-map (symbol-value map))
1143 (setq l (cdr l)))))
f08adf27
RS
1144
1145 ;; Set up menu items for this language env.
7624ebb9 1146 (let ((doc (assq 'documentation alist)))
f08adf27
RS
1147 (when doc
1148 (define-key-after describe-map (vector (intern lang-env))
7624ebb9
KH
1149 (cons lang-env 'describe-specified-language-support) t)))
1150 (define-key-after setup-map (vector (intern lang-env))
1151 (cons lang-env 'setup-specified-language-environment) t)
f08adf27 1152
54b226f7 1153 (while alist
f08adf27 1154 (set-language-info lang-env (car (car alist)) (cdr (car alist)))
54b226f7 1155 (setq alist (cdr alist)))))
4ed46869 1156
ae302641 1157(defun read-language-name (key prompt &optional default)
2c395d56 1158 "Read a language environment name which has information for KEY.
ddb5c041 1159If KEY is nil, read any language environment.
2c395d56
RS
1160Prompt with PROMPT. DEFAULT is the default choice of language environment.
1161This returns a language environment name as a string."
4ed46869
KH
1162 (let* ((completion-ignore-case t)
1163 (name (completing-read prompt
1164 language-info-alist
ddb5c041 1165 (and key
ca429a25 1166 (function (lambda (elm) (and (listp elm) (assq key elm)))))
ae302641 1167 t nil nil default)))
13e82c04 1168 (if (and (> (length name) 0)
ddb5c041
KH
1169 (or (not key)
1170 (get-language-info name key)))
13e82c04 1171 name)))
4ed46869
KH
1172\f
1173;;; Multilingual input methods.
d37ef0f6 1174(defgroup leim nil
d0c40faa
KH
1175 "LEIM: Libraries of Emacs Input Methods."
1176 :group 'mule)
4ed46869 1177
d0b9c3ab
KH
1178(defconst leim-list-file-name "leim-list.el"
1179 "Name of LEIM list file.
1180This file contains a list of libraries of Emacs input methods (LEIM)
1181in the format of Lisp expression for registering each input method.
1182Emacs loads this file at startup time.")
1183
2e224638 1184(defvar leim-list-header (format
d37ef0f6 1185";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: iso-2022-7bit;-*-
d0b9c3ab
KH
1186;;
1187;; This file contains a list of LEIM (Library of Emacs Input Method)
c654de1d
DL
1188;; methods in the same directory as this file. Loading this file
1189;; registers all the input methods in Emacs.
d0b9c3ab 1190;;
d33d5fbe 1191;; Each entry has the form:
d0b9c3ab
KH
1192;; (register-input-method
1193;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1194;; TITLE DESCRIPTION
1195;; ARG ...)
c654de1d 1196;; See the function `register-input-method' for the meanings of the arguments.
d0b9c3ab
KH
1197;;
1198;; If this directory is included in load-path, Emacs automatically
1199;; loads this file at startup time.
1200
1201"
1202 leim-list-file-name)
1203 "Header to be inserted in LEIM list file.")
1204
e55e92ee 1205(defvar leim-list-entry-regexp "^(register-input-method"
d0b9c3ab
KH
1206 "Regexp matching head of each entry in LEIM list file.
1207See also the variable `leim-list-header'")
1208
1209(defvar update-leim-list-functions
1210 '(quail-update-leim-list-file)
1211 "List of functions to call to update LEIM list file.
1212Each function is called with one arg, LEIM directory name.")
1213
a337fe7f
RS
1214(defun update-leim-list-file (&rest dirs)
1215 "Update LEIM list file in directories DIRS."
d0b9c3ab
KH
1216 (let ((functions update-leim-list-functions))
1217 (while functions
a337fe7f 1218 (apply (car functions) dirs)
d0b9c3ab
KH
1219 (setq functions (cdr functions)))))
1220
4ed46869
KH
1221(defvar current-input-method nil
1222 "The current input method for multilingual text.
96db204a 1223If nil, that means no input method is activated now.")
4ed46869
KH
1224(make-variable-buffer-local 'current-input-method)
1225(put 'current-input-method 'permanent-local t)
1226
1227(defvar current-input-method-title nil
d0b9c3ab 1228 "Title string of the current input method shown in mode line.")
4ed46869
KH
1229(make-variable-buffer-local 'current-input-method-title)
1230(put 'current-input-method-title 'permanent-local t)
1231
b4fba33f 1232(defcustom default-input-method nil
8861c593 1233 "*Default input method for multilingual text (a string).
b4fba33f 1234This is the input method activated automatically by the command
9b10b5a3 1235`toggle-input-method' (\\[toggle-input-method])."
34703cb1 1236 :link '(custom-manual "(emacs)Input Methods")
8861c593 1237 :group 'mule
34703cb1
DL
1238 :type '(choice (const nil) (string
1239 :completion-ignore-case t
1240 :complete-function widget-string-complete
1241 :completion-alist input-method-alist
1242 :prompt-history input-method-history))
5806e8a6 1243 :set-after '(current-language-environment))
b4fba33f 1244
0f835e87
KH
1245(put 'input-method-function 'permanent-local t)
1246
723a427a
KH
1247(defvar input-method-history nil
1248 "History list for some commands that read input methods.")
1249(make-variable-buffer-local 'input-method-history)
1250(put 'input-method-history 'permanent-local t)
4ed46869
KH
1251
1252(defvar inactivate-current-input-method-function nil
1253 "Function to call for inactivating the current input method.
1254Every input method should set this to an appropriate value when activated.
f17ccaee
KH
1255This function is called with no argument.
1256
1257This function should never change the value of `current-input-method'.
1258It is set to nil by the function `inactivate-input-method'.")
4ed46869
KH
1259(make-variable-buffer-local 'inactivate-current-input-method-function)
1260(put 'inactivate-current-input-method-function 'permanent-local t)
1261
1262(defvar describe-current-input-method-function nil
1263 "Function to call for describing the current input method.
1264This function is called with no argument.")
1265(make-variable-buffer-local 'describe-current-input-method-function)
1266(put 'describe-current-input-method-function 'permanent-local t)
1267
d0b9c3ab 1268(defvar input-method-alist nil
2c395d56 1269 "Alist of input method names vs how to use them.
d0b9c3ab 1270Each element has the form:
2c395d56
RS
1271 (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1272See the function `register-input-method' for the meanings of the elements.")
1273
f08adf27 1274(defun register-input-method (input-method lang-env &rest args)
bf42aa15 1275 "Register INPUT-METHOD as an input method for language environment LANG-ENV.
f08adf27 1276INPUT-METHOD and LANG-ENV are symbols or strings.
d0b9c3ab 1277
d0b9c3ab 1278The remaining arguments are:
2c395d56
RS
1279 ACTIVATE-FUNC, TITLE, DESCRIPTION, and ARGS...
1280ACTIVATE-FUNC is a function to call to activate this method.
1281TITLE is a string to show in the mode line when this method is active.
1282DESCRIPTION is a string describing this method and what it is good for.
1283The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
205814ee
KH
1284All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1285
c654de1d
DL
1286This function is mainly used in the file \"leim-list.el\" which is
1287created at Emacs build time, registering all Quail input methods
0b6cadff 1288contained in the Emacs distribution.
205814ee 1289
0b6cadff 1290In case you want to register a new Quail input method by yourself, be
205814ee 1291careful to use the same input method title as given in the third
0b6cadff
DL
1292parameter of `quail-define-package'. (If the values are different, the
1293string specified in this function takes precedence.)
205814ee
KH
1294
1295The commands `describe-input-method' and `list-input-methods' need
0b6cadff
DL
1296these duplicated values to show some information about input methods
1297without loading the relevant Quail packages."
f08adf27
RS
1298 (if (symbolp lang-env)
1299 (setq lang-env (symbol-name lang-env)))
4ef06f75
KH
1300 (if (symbolp input-method)
1301 (setq input-method (symbol-name input-method)))
f08adf27 1302 (let ((info (cons lang-env args))
d0b9c3ab
KH
1303 (slot (assoc input-method input-method-alist)))
1304 (if slot
1305 (setcdr slot info)
1306 (setq slot (cons input-method info))
1307 (setq input-method-alist (cons slot input-method-alist)))))
1308
4d5ac029 1309(defun read-input-method-name (prompt &optional default inhibit-null)
d0b9c3ab 1310 "Read a name of input method from a minibuffer prompting with PROMPT.
4d5ac029 1311If DEFAULT is non-nil, use that as the default,
0b6cadff 1312and substitute it into PROMPT at the first `%s'.
4ef06f75
KH
1313If INHIBIT-NULL is non-nil, null input signals an error.
1314
1315The return value is a string."
4d5ac029
RS
1316 (if default
1317 (setq prompt (format prompt default)))
d0b9c3ab 1318 (let* ((completion-ignore-case t)
c54044ff
KH
1319 ;; As it is quite normal to change input method in the
1320 ;; minibuffer, we must enable it even if
1321 ;; enable-recursive-minibuffers is currently nil.
1322 (enable-recursive-minibuffers t)
723a427a
KH
1323 ;; This binding is necessary because input-method-history is
1324 ;; buffer local.
d0b9c3ab 1325 (input-method (completing-read prompt input-method-alist
87505a98
RS
1326 nil t nil 'input-method-history
1327 default)))
bf294e6e
KH
1328 (if (and input-method (symbolp input-method))
1329 (setq input-method (symbol-name input-method)))
d0b9c3ab
KH
1330 (if (> (length input-method) 0)
1331 input-method
1332 (if inhibit-null
43807b77 1333 (error "No valid input method is specified")))))
d0b9c3ab 1334
d0b9c3ab 1335(defun activate-input-method (input-method)
2c395d56
RS
1336 "Switch to input method INPUT-METHOD for the current buffer.
1337If some other input method is already active, turn it off first.
1338If INPUT-METHOD is nil, deactivate any current input method."
305a3cb6 1339 (if (and input-method (symbolp input-method))
4ef06f75 1340 (setq input-method (symbol-name input-method)))
723a427a
KH
1341 (if (and current-input-method
1342 (not (string= current-input-method input-method)))
305a3cb6 1343 (inactivate-input-method))
2c395d56 1344 (unless (or current-input-method (null input-method))
d0b9c3ab
KH
1345 (let ((slot (assoc input-method input-method-alist)))
1346 (if (null slot)
723a427a 1347 (error "Can't activate input method `%s'" input-method))
278dd6ac 1348 (setq current-input-method-title nil)
8efc03e1
KH
1349 (let ((func (nth 2 slot)))
1350 (if (functionp func)
1351 (apply (nth 2 slot) input-method (nthcdr 5 slot))
1352 (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1353 (progn
1354 (require (cdr func))
1355 (apply (car func) input-method (nthcdr 5 slot)))
1356 (error "Can't activate input method `%s'" input-method))))
d0b9c3ab 1357 (setq current-input-method input-method)
278dd6ac
KH
1358 (or (stringp current-input-method-title)
1359 (setq current-input-method-title (nth 3 slot)))
28885c0e
KH
1360 (unwind-protect
1361 (run-hooks 'input-method-activate-hook)
1362 (force-mode-line-update)))))
15b3e511 1363
15b3e511 1364(defun inactivate-input-method ()
f17ccaee 1365 "Turn off the current input method."
723a427a
KH
1366 (when current-input-method
1367 (if input-method-history
1368 (unless (string= current-input-method (car input-method-history))
1369 (setq input-method-history
1370 (cons current-input-method
1371 (delete current-input-method input-method-history))))
1372 (setq input-method-history (list current-input-method)))
1373 (unwind-protect
1374 (funcall inactivate-current-input-method-function)
15b3e511 1375 (unwind-protect
723a427a
KH
1376 (run-hooks 'input-method-inactivate-hook)
1377 (setq current-input-method nil
2b1ec973 1378 input-method-function nil
28885c0e
KH
1379 current-input-method-title nil)
1380 (force-mode-line-update)))))
4ed46869 1381
e893eae2 1382(defun set-input-method (input-method &optional interactive)
2c395d56 1383 "Select and activate input method INPUT-METHOD for the current buffer.
bc406911 1384This also sets the default input method to the one you specify.
402dbbd1
EZ
1385If INPUT-METHOD is nil, this function turns off the input method, and
1386also causes you to be prompted for a name of an input method the next
1387time you invoke \\[toggle-input-method].
e893eae2
RS
1388When called interactively, the optional arg INTERACTIVE is non-nil,
1389which marks the variable `default-input-method' as set for Custom buffers.
402dbbd1 1390
bc406911
EZ
1391To deactivate the input method interactively, use \\[toggle-input-method].
1392To deactivate it programmatically, use \\[inactivate-input-method]."
d0b9c3ab 1393 (interactive
723a427a 1394 (let* ((default (or (car input-method-history) default-input-method)))
42395763 1395 (list (read-input-method-name
87505a98 1396 (if default "Select input method (default %s): " "Select input method: ")
e893eae2
RS
1397 default t)
1398 t)))
d0b9c3ab 1399 (activate-input-method input-method)
f4990970 1400 (setq default-input-method input-method)
e893eae2 1401 (when interactive
f4990970
PA
1402 (customize-mark-as-set 'default-input-method))
1403 default-input-method)
4ed46869 1404
e893eae2 1405(defun toggle-input-method (&optional arg interactive)
f8ec20be
RS
1406 "Enable or disable multilingual text input method for the current buffer.
1407Only one input method can be enabled at any time in a given buffer.
1408
1409The normal action is to enable an input method if none was
1410enabled, and disable the current one otherwise. Which input method
1411to enable can be determined in various ways--either the one most
1412recently used, or the one specified by `default-input-method', or
1413as a last resort by reading the name of an input method in the
1414minibuffer.
1415
1416With a prefix argument, read an input method name with the minibuffer
1417and enable that one. The default is the most recent input method specified
e893eae2 1418\(not including the currently active input method, if any).
f8ec20be 1419
e893eae2
RS
1420When called interactively, the optional arg INTERACTIVE is non-nil,
1421which marks the variable `default-input-method' as set for Custom buffers."
1422
1423 (interactive "P\np")
7ddbb5bc
RS
1424 (if (and current-input-method (not arg))
1425 (inactivate-input-method)
1426 (let ((default (or (car input-method-history) default-input-method)))
1427 (if (and arg default (equal current-input-method default)
1428 (> (length input-method-history) 1))
1429 (setq default (nth 1 input-method-history)))
723a427a
KH
1430 (activate-input-method
1431 (if (or arg (not default))
7ddbb5bc
RS
1432 (progn
1433 (read-input-method-name
1434 (if default "Input method (default %s): " "Input method: " )
1435 default t))
723a427a 1436 default))
f4990970 1437 (unless default-input-method
d37ef0f6 1438 (prog1
f4990970 1439 (setq default-input-method current-input-method)
e893eae2 1440 (when interactive
f4990970 1441 (customize-mark-as-set 'default-input-method)))))))
d0b9c3ab
KH
1442
1443(defun describe-input-method (input-method)
2c395d56 1444 "Describe input method INPUT-METHOD."
d0b9c3ab
KH
1445 (interactive
1446 (list (read-input-method-name
1447 "Describe input method (default, current choice): ")))
78754934 1448 (if (and input-method (symbolp input-method))
4ef06f75 1449 (setq input-method (symbol-name input-method)))
43125c28
RS
1450 (help-setup-xref (list #'describe-input-method
1451 (or input-method current-input-method))
f80e2142
RS
1452 (interactive-p))
1453
d0b9c3ab
KH
1454 (if (null input-method)
1455 (describe-current-input-method)
464cc130
KH
1456 (let ((current current-input-method))
1457 (condition-case nil
1458 (progn
1459 (save-excursion
1460 (activate-input-method input-method)
1461 (describe-current-input-method))
1462 (activate-input-method current))
d37ef0f6 1463 (error
464cc130 1464 (activate-input-method current)
5f395df3
SM
1465 (help-setup-xref (list #'describe-input-method input-method)
1466 (interactive-p))
1467 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1468 (let ((elt (assoc input-method input-method-alist)))
1469 (princ (format
1470 "Input method: %s (`%s' in mode line) for %s\n %s\n"
1471 input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
d0b9c3ab
KH
1472
1473(defun describe-current-input-method ()
f80e2142
RS
1474 "Describe the input method currently in use.
1475This is a subroutine for `describe-input-method'."
4ed46869
KH
1476 (if current-input-method
1477 (if (and (symbolp describe-current-input-method-function)
1478 (fboundp describe-current-input-method-function))
1479 (funcall describe-current-input-method-function)
1480 (message "No way to describe the current input method `%s'"
f2979bdb 1481 current-input-method)
4ed46869 1482 (ding))
d0b9c3ab 1483 (error "No input method is activated now")))
4ed46869 1484
d3459641 1485(defun read-multilingual-string (prompt &optional initial-input input-method)
4ed46869
KH
1486 "Read a multilingual string from minibuffer, prompting with string PROMPT.
1487The input method selected last time is activated in minibuffer.
15b3e511 1488If optional second arg INITIAL-INPUT is non-nil, insert it in the minibuffer
d0b9c3ab
KH
1489initially.
1490Optional 3rd argument INPUT-METHOD specifies the input method
4ef06f75
KH
1491to be activated instead of the one selected last time. It is a symbol
1492or a string."
88d559ec
KH
1493 (setq input-method
1494 (or input-method
d3459641 1495 current-input-method
88d559ec
KH
1496 default-input-method
1497 (read-input-method-name "Input method: " nil t)))
3df60841 1498 (if (and input-method (symbolp input-method))
4ef06f75 1499 (setq input-method (symbol-name input-method)))
305a3cb6
KH
1500 (let ((prev-input-method current-input-method))
1501 (unwind-protect
1502 (progn
1503 (activate-input-method input-method)
1504 (read-string prompt initial-input nil nil t))
1505 (activate-input-method prev-input-method))))
4ed46869
KH
1506
1507;; Variables to control behavior of input methods. All input methods
1508;; should react to these variables.
1509
8efc03e1
KH
1510(defcustom input-method-verbose-flag 'default
1511 "*A flag to control extra guidance given by input methods.
1512The value should be nil, t, `complex-only', or `default'.
4ed46869 1513
cb29dfb6 1514The extra guidance is done by showing list of available keys in echo
8efc03e1
KH
1515area. When you use the input method in the minibuffer, the guidance
1516is shown at the bottom short window (split from the existing window).
c27c4ed8 1517
8efc03e1
KH
1518If the value is t, extra guidance is always given, if the value is
1519nil, extra guidance is always suppressed.
1520
1521If the value is `complex-only', only complex input methods such as
1522`chinese-py' and `japanese' give extra guidance.
1523
1524If the value is `default', complex input methods always give extra
1525guidance, but simple input methods give it only when you are not in
1526the minibuffer.
1527
1528See also the variable `input-method-highlight-flag'."
d37ef0f6
DL
1529 :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1530 (const complex-only) (const default))
42395763
RS
1531 :group 'mule)
1532
1533(defcustom input-method-highlight-flag t
1534 "*If this flag is non-nil, input methods highlight partially-entered text.
1535For instance, while you are in the middle of a Quail input method sequence,
1536the text inserted so far is temporarily underlined.
8efc03e1
KH
1537The underlining goes away when you finish or abort the input method sequence.
1538See also the variable `input-method-verbose-flag'."
42395763
RS
1539 :type 'boolean
1540 :group 'mule)
4ed46869
KH
1541
1542(defvar input-method-activate-hook nil
f17ccaee
KH
1543 "Normal hook run just after an input method is activated.
1544
1545The variable `current-input-method' keeps the input method name
1546just activated.")
4ed46869
KH
1547
1548(defvar input-method-inactivate-hook nil
f17ccaee
KH
1549 "Normal hook run just after an input method is inactivated.
1550
1551The variable `current-input-method' still keeps the input method name
4d0e6a11 1552just inactivated.")
4ed46869
KH
1553
1554(defvar input-method-after-insert-chunk-hook nil
1555 "Normal hook run just after an input method insert some chunk of text.")
1556
dccca980 1557(defvar input-method-exit-on-first-char nil
0b6cadff 1558 "This flag controls when an input method returns.
dccca980
KH
1559Usually, the input method does not return while there's a possibility
1560that it may find a different translation if a user types another key.
39e643e2
RS
1561But, it this flag is non-nil, the input method returns as soon as
1562the current key sequence gets long enough to have some valid translation.")
dccca980
KH
1563
1564(defvar input-method-use-echo-area nil
1565 "This flag controls how an input method shows an intermediate key sequence.
39e643e2
RS
1566Usually, the input method inserts the intermediate key sequence,
1567or candidate translations corresponding to the sequence,
1568at point in the current buffer.
1569But, if this flag is non-nil, it displays them in echo area instead.")
dccca980 1570
723a427a
KH
1571(defvar input-method-exit-on-invalid-key nil
1572 "This flag controls the behaviour of an input method on invalid key input.
1573Usually, when a user types a key which doesn't start any character
1574handled by the input method, the key is handled by turning off the
e8dd0160 1575input method temporarily. After that key, the input method is re-enabled.
723a427a
KH
1576But, if this flag is non-nil, the input method is never back on.")
1577
4ed46869 1578\f
8efc03e1
KH
1579(defvar set-language-environment-hook nil
1580 "Normal hook run after some language environment is set.
1581
1582When you set some hook function here, that effect usually should not
1583be inherited to another language environment. So, you had better set
1584another function in `exit-language-environment-hook' (which see) to
1585cancel the effect.")
1586
1587(defvar exit-language-environment-hook nil
1588 "Normal hook run after exiting from some language environment.
1589When this hook is run, the variable `current-language-environment'
1590is still bound to the language environment being exited.
1591
e8dd0160 1592This hook is mainly used for canceling the effect of
8efc03e1
KH
1593`set-language-environment-hook' (which-see).")
1594
b0648a00
RS
1595(put 'setup-specified-language-environment 'apropos-inhibit t)
1596
15b3e511 1597(defun setup-specified-language-environment ()
f08adf27 1598 "Switch to a specified language environment."
15b3e511 1599 (interactive)
f850d782 1600 (let (language-name)
15b3e511
KH
1601 (if (and (symbolp last-command-event)
1602 (or (not (eq last-command-event 'Default))
1603 (setq last-command-event 'English))
f850d782 1604 (setq language-name (symbol-name last-command-event)))
f4990970
PA
1605 (prog1
1606 (set-language-environment language-name)
1607 (customize-mark-as-set 'current-language-environment))
15b3e511 1608 (error "Bogus calling sequence"))))
4ed46869 1609
8861c593 1610(defcustom current-language-environment "English"
94d04df6 1611 "The last language environment specified with `set-language-environment'.
ebef6d93
KH
1612This variable should be set only with \\[customize], which is equivalent
1613to using the function `set-language-environment'."
94d04df6 1614 :link '(custom-manual "(emacs)Language Environments")
dff1aa24 1615 :set (lambda (symbol value) (set-language-environment value))
94d04df6 1616 :get (lambda (x)
f15466c5 1617 (or (car-safe (assoc-string
94d04df6
DL
1618 (if (symbolp current-language-environment)
1619 (symbol-name current-language-environment)
1620 current-language-environment)
f15466c5 1621 language-info-alist t))
94d04df6 1622 "English"))
990a4108
MR
1623 ;; custom type will be updated with `set-language-info'.
1624 :type (if language-info-alist
1625 (cons 'choice (mapcar
1626 (lambda (lang)
756e055f
MR
1627 (list 'const lang))
1628 (sort (mapcar 'car language-info-alist) 'string<)))
990a4108 1629 'string)
8861c593 1630 :initialize 'custom-initialize-default
eb9fc9e6 1631 :group 'mule)
f850d782 1632
ddb5c041
KH
1633(defun reset-language-environment ()
1634 "Reset multilingual environment of Emacs to the default status.
1635
1636The default status is as follows:
1637
d37ef0f6 1638 The default value of `buffer-file-coding-system' is nil.
ddb5c041
KH
1639 The default coding system for process I/O is nil.
1640 The default value for the command `set-terminal-coding-system' is nil.
1641 The default value for the command `set-keyboard-coding-system' is nil.
1642
1643 The order of priorities of coding categories and the coding system
1644 bound to each category are as follows
1645 coding category coding system
1646 --------------------------------------------------
ddb5c041 1647 coding-category-iso-8-1 iso-latin-1
370bd9cf
KH
1648 coding-category-iso-8-2 iso-latin-1
1649 coding-category-utf-8 mule-utf-8
b510f863
KH
1650 coding-category-utf-16-be mule-utf-16be-with-signature
1651 coding-category-utf-16-le mule-utf-16le-with-signature
ddb5c041
KH
1652 coding-category-iso-7-tight iso-2022-jp
1653 coding-category-iso-7 iso-2022-7bit
1654 coding-category-iso-7-else iso-2022-7bit-lock
1655 coding-category-iso-8-else iso-2022-8bit-ss2
1656 coding-category-emacs-mule emacs-mule
1657 coding-category-raw-text raw-text
1658 coding-category-sjis japanese-shift-jis
1659 coding-category-big5 chinese-big5
1660 coding-category-ccl nil
370bd9cf 1661 coding-category-binary no-conversion"
ddb5c041
KH
1662 (interactive)
1663 ;; This function formerly set default-enable-multibyte-characters to t,
1664 ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1665
1666 (setq coding-category-iso-7-tight 'iso-2022-jp
1667 coding-category-iso-7 'iso-2022-7bit
1668 coding-category-iso-8-1 'iso-latin-1
1669 coding-category-iso-8-2 'iso-latin-1
1670 coding-category-iso-7-else 'iso-2022-7bit-lock
1671 coding-category-iso-8-else 'iso-2022-8bit-ss2
1672 coding-category-emacs-mule 'emacs-mule
1673 coding-category-raw-text 'raw-text
1674 coding-category-sjis 'japanese-shift-jis
1675 coding-category-big5 'chinese-big5
b510f863
KH
1676 coding-category-utf-16-be 'mule-utf-16be-with-signature
1677 coding-category-utf-16-le 'mule-utf-16le-with-signature
b909cfc0
KH
1678 coding-category-utf-8 'mule-utf-8
1679 coding-category-ccl nil
1680 coding-category-binary 'no-conversion)
ddb5c041
KH
1681
1682 (set-coding-priority
1683 '(coding-category-iso-8-1
1684 coding-category-iso-8-2
370bd9cf
KH
1685 coding-category-utf-8
1686 coding-category-utf-16-be
1687 coding-category-utf-16-le
ddb5c041
KH
1688 coding-category-iso-7-tight
1689 coding-category-iso-7
1690 coding-category-iso-7-else
1691 coding-category-iso-8-else
d37ef0f6 1692 coding-category-emacs-mule
ddb5c041
KH
1693 coding-category-raw-text
1694 coding-category-sjis
1695 coding-category-big5
1696 coding-category-ccl
370bd9cf 1697 coding-category-binary))
ddb5c041 1698
6a17cee0 1699 ;; Changing the binding of a coding category requires this call.
91693d18
KH
1700 (update-coding-systems-internal)
1701
ddb5c041 1702 (set-default-coding-systems nil)
b5edd1d1 1703 (setq default-sendmail-coding-system 'iso-latin-1)
a41118cc
SM
1704 ;; On Darwin systems, this should be utf-8, but when this file is loaded
1705 ;; utf-8 is not yet defined, so we set it in set-locale-environment instead.
787caf99 1706 (setq default-file-name-coding-system 'iso-latin-1)
1d77e15a
JR
1707 ;; Preserve eol-type from existing default-process-coding-systems.
1708 ;; On non-unix-like systems in particular, these may have been set
1709 ;; carefully by the user, or by the startup code, to deal with the
1710 ;; users shell appropriately, so should not be altered by changing
1711 ;; language environment.
1712 (let ((output-coding
a099a2ff
JR
1713 ;; When bootstrapping, coding-systems are not defined yet, so
1714 ;; we need to catch the error from check-coding-system.
d37ef0f6 1715 (condition-case nil
a099a2ff
JR
1716 (coding-system-change-text-conversion
1717 (car default-process-coding-system) 'undecided)
1718 (coding-system-error 'undecided)))
1d77e15a 1719 (input-coding
a099a2ff
JR
1720 (condition-case nil
1721 (coding-system-change-text-conversion
1722 (cdr default-process-coding-system) 'iso-latin-1)
1723 (coding-system-error 'iso-latin-1))))
1d77e15a
JR
1724 (setq default-process-coding-system
1725 (cons output-coding input-coding)))
b5edd1d1 1726
ddb5c041
KH
1727 ;; Don't alter the terminal and keyboard coding systems here.
1728 ;; The terminal still supports the same coding system
1729 ;; that it supported a minute ago.
1b8dc791
SM
1730 ;; (set-terminal-coding-system-internal nil)
1731 ;; (set-keyboard-coding-system-internal nil)
ddb5c041
KH
1732
1733 (setq nonascii-translation-table nil
0077e394
KH
1734 nonascii-insert-offset 0)
1735
5d26489f
EZ
1736 ;; Don't invoke fontset-related functions if fontsets aren't
1737 ;; supported in this build of Emacs.
1738 (and (fboundp 'fontset-list)
1739 (set-overriding-fontspec-internal nil)))
ddb5c041 1740
0c47a7c8
KH
1741(reset-language-environment)
1742
ff76e074 1743(defun set-display-table-and-terminal-coding-system (language-name &optional coding-system)
40c81f74
PE
1744 "Set up the display table and terminal coding system for LANGUAGE-NAME."
1745 (let ((coding (get-language-info language-name 'unibyte-display)))
1746 (if coding
1747 (standard-display-european-internal)
a67ae60e
EZ
1748 ;; The following 2 lines undo the 8-bit display that we set up
1749 ;; in standard-display-european-internal, which see. This is in
1750 ;; case the user has used standard-display-european earlier in
1751 ;; this session. (The MS-DOS port doesn't use that setup, so it
1752 ;; doesn't need to undo it.)
d73a7bb8
JPW
1753 (when standard-display-table
1754 (dotimes (i 128)
b221615b 1755 (aset standard-display-table (+ i 128) nil))))
40c81f74 1756 (or (eq window-system 'pc)
ff76e074 1757 (set-terminal-coding-system (or coding-system coding)))))
40c81f74 1758
166246f7 1759(defun set-language-environment (language-name)
6c05d680
RS
1760 "Set up multi-lingual environment for using LANGUAGE-NAME.
1761This sets the coding system priority and the default input method
8861c593
RS
1762and sometimes other things. LANGUAGE-NAME should be a string
1763which is the name of a language environment. For example, \"Latin-1\"
1764specifies the character set for the major languages of Western Europe."
8efc03e1 1765 (interactive (list (read-language-name
ddb5c041 1766 nil
8efc03e1 1767 "Set language environment (default, English): ")))
4ef06f75
KH
1768 (if language-name
1769 (if (symbolp language-name)
1770 (setq language-name (symbol-name language-name)))
1771 (setq language-name "English"))
f15466c5 1772 (let ((slot (assoc-string language-name language-info-alist t)))
95498fd0 1773 (unless slot
f850d782 1774 (error "Language environment not defined: %S" language-name))
95498fd0 1775 (setq language-name (car slot)))
8efc03e1
KH
1776 (if current-language-environment
1777 (let ((func (get-language-info current-language-environment
1778 'exit-function)))
e63645c2 1779 (run-hooks 'exit-language-environment-hook)
5f395df3 1780 (if (functionp func) (funcall func))))
03c35c83
EZ
1781 (let ((default-eol-type (coding-system-eol-type
1782 default-buffer-file-coding-system)))
1783 (reset-language-environment)
ddb5c041 1784
e522f07b 1785 ;; The features might set up coding systems.
9c20a8d5
DL
1786 (let ((required-features (get-language-info language-name 'features)))
1787 (while required-features
1788 (require (car required-features))
1789 (setq required-features (cdr required-features))))
1790
03c35c83
EZ
1791 (setq current-language-environment language-name)
1792 (set-language-environment-coding-systems language-name default-eol-type))
ddb5c041
KH
1793 (let ((input-method (get-language-info language-name 'input-method)))
1794 (when input-method
1795 (setq default-input-method input-method)
1796 (if input-method-history
1797 (setq input-method-history
1798 (cons input-method
1799 (delete input-method input-method-history))))))
ec241f58
EZ
1800 (let ((nonascii (get-language-info language-name 'nonascii-translation))
1801 (dos-table
4e2ac2d9
EZ
1802 (if (eq window-system 'pc)
1803 (intern
d9c0a50e 1804 (format "cp%d-nonascii-translation-table" dos-codepage)))))
03c35c83
EZ
1805 (cond
1806 ((char-table-p nonascii)
1807 (setq nonascii-translation-table nonascii))
ec241f58 1808 ((and (eq window-system 'pc) (boundp dos-table))
03c35c83
EZ
1809 ;; DOS terminals' default is to use a special non-ASCII translation
1810 ;; table as appropriate for the installed codepage.
ec241f58 1811 (setq nonascii-translation-table (symbol-value dos-table)))
03c35c83
EZ
1812 ((charsetp nonascii)
1813 (setq nonascii-insert-offset (- (make-char nonascii) 128)))))
ddb5c041 1814
63283a8f 1815 ;; Unibyte setups if necessary.
ddb5c041 1816 (unless default-enable-multibyte-characters
63283a8f 1817 ;; Syntax and case table.
ddb5c041
KH
1818 (let ((syntax (get-language-info language-name 'unibyte-syntax)))
1819 (if syntax
1820 (let ((set-case-syntax-set-multibyte nil))
63283a8f
KH
1821 (load syntax nil t))
1822 ;; No information for syntax and case. Reset to the defaults.
1823 (let ((syntax-table (standard-syntax-table))
1824 (case-table (standard-case-table))
03c35c83 1825 (ch (if (eq window-system 'pc) 128 160)))
63283a8f
KH
1826 (while (< ch 256)
1827 (modify-syntax-entry ch " " syntax-table)
1828 (aset case-table ch ch)
1829 (setq ch (1+ ch)))
1830 (set-char-table-extra-slot case-table 0 nil)
1831 (set-char-table-extra-slot case-table 1 nil)
1832 (set-char-table-extra-slot case-table 2 nil))
1833 (set-standard-case-table (standard-case-table))
1834 (let ((list (buffer-list)))
1835 (while list
1836 (with-current-buffer (car list)
1837 (set-case-table (standard-case-table)))
1838 (setq list (cdr list))))))
40c81f74 1839 (set-display-table-and-terminal-coding-system language-name))
ddb5c041
KH
1840
1841 (let ((required-features (get-language-info language-name 'features)))
1842 (while required-features
1843 (require (car required-features))
1844 (setq required-features (cdr required-features))))
0077e394 1845
5d26489f
EZ
1846 ;; Don't invoke fontset-related functions if fontsets aren't
1847 ;; supported in this build of Emacs.
1848 (when (fboundp 'fontset-list)
1849 (let ((overriding-fontspec (get-language-info language-name
1850 'overriding-fontspec)))
1851 (if overriding-fontspec
1852 (set-overriding-fontspec-internal overriding-fontspec))))
0077e394 1853
ddb5c041 1854 (let ((func (get-language-info language-name 'setup-function)))
5f395df3 1855 (if (functionp func)
ddb5c041 1856 (funcall func)))
60d334c3
KH
1857 (if (and utf-translate-cjk-mode
1858 utf-translate-cjk-lang-env
1859 (not (eq utf-translate-cjk-lang-env language-name))
1860 (catch 'tag
1861 (dolist (charset (get-language-info language-name 'charset))
1862 (if (memq charset utf-translate-cjk-charsets)
1863 (throw 'tag t)))
1864 nil))
1865 (utf-translate-cjk-load-tables))
8efc03e1 1866 (run-hooks 'set-language-environment-hook)
f850d782 1867 (force-mode-line-update t))
4ed46869 1868
51a8fc1d
RS
1869(defun standard-display-european-internal ()
1870 ;; Actually set up direct output of non-ASCII characters.
03c35c83
EZ
1871 (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1872 ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1873 ;; the native font, and codes 160 and 146 stand for something very
1874 ;; different there.
1875 (or (and (eq window-system 'pc) (not default-enable-multibyte-characters))
1876 (progn
6b626913
SM
1877 ;; Most X fonts used to do the wrong thing for latin-1 code 160.
1878 (unless (and (eq window-system 'x)
1879 ;; XFree86 4 has fixed the fonts.
1880 (string= "The XFree86 Project, Inc" (x-server-vendor))
1881 (> (aref (number-to-string (nth 2 (x-server-version))) 0)
1882 ?3))
1883 ;; Make non-line-break space display as a plain space.
1884 (aset standard-display-table 160 [32]))
1492f7ac 1885 ;; Most Windows programs send out apostrophes as \222. Most X fonts
03c35c83 1886 ;; don't contain a character at that position. Map it to the ASCII
5f395df3
SM
1887 ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1888 ;; U+2019, normally from the windows-1252 character set. XFree 4
1889 ;; fonts probably have the appropriate glyph at this position,
1890 ;; so they could use standard-display-8bit. It's better to use a
1891 ;; proper windows-1252 coding system. --fx]
6b626913 1892 (aset standard-display-table 146 [39]))))
03c35c83
EZ
1893
1894(defun set-language-environment-coding-systems (language-name
1895 &optional eol-type)
1896 "Do various coding system setups for language environment LANGUAGE-NAME.
1897
1898The optional arg EOL-TYPE specifies the eol-type of the default value
d37ef0f6 1899of `buffer-file-coding-system' set by this function."
54b226f7
KH
1900 (let* ((priority (get-language-info language-name 'coding-priority))
1901 (default-coding (car priority)))
1902 (if priority
1903 (let ((categories (mapcar 'coding-system-category priority)))
03c35c83
EZ
1904 (set-default-coding-systems
1905 (if (memq eol-type '(0 1 2 unix dos mac))
1906 (coding-system-change-eol-conversion default-coding eol-type)
1907 default-coding))
46babb23 1908 (setq default-sendmail-coding-system default-coding)
54b226f7
KH
1909 (set-coding-priority categories)
1910 (while priority
1911 (set (car categories) (car priority))
1912 (setq priority (cdr priority) categories (cdr categories)))
6a17cee0 1913 ;; Changing the binding of a coding category requires this call.
45d08cb2 1914 (update-coding-systems-internal)))))
54b226f7 1915
4ed46869 1916(defsubst princ-list (&rest args)
d37ef0f6 1917 "Print all arguments with `princ', then print \"\n\"."
4ed46869
KH
1918 (while args (princ (car args)) (setq args (cdr args)))
1919 (princ "\n"))
1920
b0648a00
RS
1921(put 'describe-specified-language-support 'apropos-inhibit t)
1922
34703cb1 1923;; Print language-specific information such as input methods,
13e82c04 1924;; charsets, and coding systems. This function is intended to be
48082651 1925;; called from the menu:
281d03ec 1926;; [menu-bar mule describe-language-environment LANGUAGE]
48082651
KH
1927;; and should not run it by `M-x describe-current-input-method-function'.
1928(defun describe-specified-language-support ()
96db204a 1929 "Describe how Emacs supports the specified language environment."
48082651 1930 (interactive)
281d03ec 1931 (let (language-name)
48082651 1932 (if (not (and (symbolp last-command-event)
cda74479
DL
1933 (or (not (eq last-command-event 'Default))
1934 (setq last-command-event 'English))
281d03ec 1935 (setq language-name (symbol-name last-command-event))))
48082651 1936 (error "Bogus calling sequence"))
281d03ec
RS
1937 (describe-language-environment language-name)))
1938
1939(defun describe-language-environment (language-name)
1940 "Describe how Emacs supports language environment LANGUAGE-NAME."
78754934
KH
1941 (interactive
1942 (list (read-language-name
1943 'documentation
8adfa8be 1944 "Describe language environment (default, current choice): ")))
f850d782
RS
1945 (if (null language-name)
1946 (setq language-name current-language-environment))
281d03ec
RS
1947 (if (or (null language-name)
1948 (null (get-language-info language-name 'documentation)))
1949 (error "No documentation for the specified language"))
4ef06f75
KH
1950 (if (symbolp language-name)
1951 (setq language-name (symbol-name language-name)))
ef5a4730
KH
1952 (dolist (feature (get-language-info language-name 'features))
1953 (require feature))
6b626913 1954 (let ((doc (get-language-info language-name 'documentation)))
c3034e84
SM
1955 (help-setup-xref (list #'describe-language-environment language-name)
1956 (interactive-p))
1957 (with-output-to-temp-buffer (help-buffer)
464cc130
KH
1958 (save-excursion
1959 (set-buffer standard-output)
1960 (insert language-name " language environment\n\n")
1961 (if (stringp doc)
1962 (insert doc "\n\n"))
e036b0a6
KH
1963 (condition-case nil
1964 (let ((str (eval (get-language-info language-name 'sample-text))))
1965 (if (stringp str)
1966 (insert "Sample text:\n " str "\n\n")))
1967 (error nil))
464cc130
KH
1968 (let ((input-method (get-language-info language-name 'input-method))
1969 (l (copy-sequence input-method-alist)))
1970 (insert "Input methods")
1971 (when input-method
1972 (insert " (default, " input-method ")")
1973 (setq input-method (assoc input-method input-method-alist))
1974 (setq l (cons input-method (delete input-method l))))
1975 (insert ":\n")
48082651 1976 (while l
464cc130
KH
1977 (when (string= language-name (nth 1 (car l)))
1978 (insert " " (car (car l)))
1979 (search-backward (car (car l)))
467412aa 1980 (help-xref-button 0 'help-input-method (car (car l)))
464cc130 1981 (goto-char (point-max))
2fa7e202
KH
1982 (insert " (\""
1983 (if (stringp (nth 3 (car l)))
1984 (nth 3 (car l))
1985 (car (nth 3 (car l))))
1986 "\" in mode line)\n"))
464cc130
KH
1987 (setq l (cdr l)))
1988 (insert "\n"))
1989 (insert "Character sets:\n")
1990 (let ((l (get-language-info language-name 'charset)))
1991 (if (null l)
1992 (insert " nothing specific to " language-name "\n")
1993 (while l
1994 (insert " " (symbol-name (car l)))
1995 (search-backward (symbol-name (car l)))
467412aa 1996 (help-xref-button 0 'help-character-set (car l))
464cc130
KH
1997 (goto-char (point-max))
1998 (insert ": " (charset-description (car l)) "\n")
1999 (setq l (cdr l)))))
2000 (insert "\n")
2001 (insert "Coding systems:\n")
2002 (let ((l (get-language-info language-name 'coding-system)))
2003 (if (null l)
2004 (insert " nothing specific to " language-name "\n")
2005 (while l
2006 (insert " " (symbol-name (car l)))
2007 (search-backward (symbol-name (car l)))
467412aa 2008 (help-xref-button 0 'help-coding-system (car l))
464cc130
KH
2009 (goto-char (point-max))
2010 (insert " (`"
2011 (coding-system-mnemonic (car l))
2012 "' in mode line):\n\t"
2013 (coding-system-doc-string (car l))
2014 "\n")
2015 (let ((aliases (coding-system-get (car l)
2016 'alias-coding-systems)))
2017 (when aliases
2018 (insert "\t(alias:")
2019 (while aliases
2020 (insert " " (symbol-name (car aliases)))
2021 (setq aliases (cdr aliases)))
2022 (insert ")\n")))
c3034e84 2023 (setq l (cdr l)))))))))
4ed46869 2024\f
40c81f74
PE
2025;;; Locales.
2026
0d7c5bb9
DL
2027(defvar locale-translation-file-name nil
2028 "File name for the system's file of locale-name aliases, or nil if none.")
40c81f74 2029
5f395df3
SM
2030;; The following definitions might as well be marked as constants and
2031;; purecopied, since they're normally used on startup, and probably
2032;; should reflect the facilities of the base Emacs.
2033(defconst locale-language-names
2034 (purecopy
2035 '(
40c81f74
PE
2036 ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
2037 ;; as specified in the Single Unix Spec, Version 2.
2038 ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
2039 ;; with additions from ISO 639/RA Newsletter No.1/1989;
5f395df3
SM
2040 ;; see Internet RFC 2165 (1997-06) and
2041 ;; http://www.evertype.com/standards/iso639/iso639-en.html
2042 ;; TERRITORY is a country code taken from ISO 3166
2043 ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
40c81f74 2044 ;; CODESET and MODIFIER are implementation-dependent.
5f395df3 2045
0363e09a
DL
2046 ;; jasonr comments: MS Windows uses three letter codes for
2047 ;; languages instead of the two letter ISO codes that POSIX
2048 ;; uses. In most cases the first two letters are the same, so
2049 ;; most of the regexps in locale-language-names work. Japanese
2050 ;; and Chinese are exceptions, which are listed in the
2051 ;; non-standard section at the bottom of locale-language-names.
2052
8dedddd5
KH
2053 ("aa_DJ" . "Latin-1") ; Afar
2054 ("aa" . "UTF-8")
2055 ;; ab Abkhazian
6ececc4d 2056 ("af" . "Latin-1") ; Afrikaans
8dedddd5
KH
2057 ("am" "Ethiopic" utf-8) ; Amharic
2058 ("an" . "Latin-9") ; Aragonese
5f395df3 2059 ; ar Arabic glibc uses 8859-6
40c81f74
PE
2060 ; as Assamese
2061 ; ay Aymara
8dedddd5 2062 ("az" . "UTF-8") ; Azerbaijani
40c81f74 2063 ; ba Bashkir
8dedddd5
KH
2064 ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2065 ("bg" "Bulgarian" cp1251) ; Bulgarian
40c81f74
PE
2066 ; bh Bihari
2067 ; bi Bislama
8dedddd5 2068 ("bn" . "UTF-8") ; Bengali, Bangla
40c81f74
PE
2069 ("bo" . "Tibetan")
2070 ("br" . "Latin-1") ; Breton
d37ef0f6 2071 ("bs" . "Latin-2") ; Bosnian
8dedddd5 2072 ("byn" . "UTF-8") ; Bilin; Blin
40c81f74
PE
2073 ("ca" . "Latin-1") ; Catalan
2074 ; co Corsican
8dedddd5
KH
2075 ("cs" "Czech" iso-8859-2)
2076 ("cy" "Welsh" iso-8859-14)
40c81f74 2077 ("da" . "Latin-1") ; Danish
8dedddd5 2078 ("de" "German" iso-8859-1)
40c81f74 2079 ; dz Bhutani
8dedddd5 2080 ("el" "Greek" iso-8859-7)
6ececc4d 2081 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
e11cf111
DL
2082 ;; That's actually what the GNU locales define, modulo things like
2083 ;; en_IN -- fx.
8dedddd5 2084 ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
6ececc4d 2085 ("en" . "Latin-1") ; English
40c81f74 2086 ("eo" . "Latin-3") ; Esperanto
8dedddd5
KH
2087 ("es" "Spanish" iso-8859-1)
2088 ("et" . "Latin-1") ; Estonian
40c81f74 2089 ("eu" . "Latin-1") ; Basque
8dedddd5 2090 ("fa" . "UTF-8") ; Persian
40c81f74 2091 ("fi" . "Latin-1") ; Finnish
8dedddd5 2092 ("fj" . "Latin-1") ; Fiji
40c81f74 2093 ("fo" . "Latin-1") ; Faroese
8dedddd5 2094 ("fr" "French" iso-8859-1) ; French
40c81f74 2095 ("fy" . "Latin-1") ; Frisian
6ececc4d 2096 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
8dedddd5
KH
2097 ("gd" . "Latin-9") ; Scots Gaelic
2098 ("gez" "Ethiopic" utf-8) ; Geez
2099 ("gl" . "Latin-1") ; Gallegan; Galician
40c81f74 2100 ; gn Guarani
8dedddd5
KH
2101 ("gu" . "UTF-8") ; Gujarati
2102 ("gv" . "Latin-1") ; Manx Gaelic
40c81f74 2103 ; ha Hausa
8dedddd5
KH
2104 ("he" "Hebrew" iso-8859-8)
2105 ("hi" "Devanagari" utf-8) ; Hindi
2106 ("hr" "Croatian" iso-8859-2) ; Croatian
40c81f74
PE
2107 ("hu" . "Latin-2") ; Hungarian
2108 ; hy Armenian
2109 ; ia Interlingua
2110 ("id" . "Latin-1") ; Indonesian
2111 ; ie Interlingue
2112 ; ik Inupiak
2113 ("is" . "Latin-1") ; Icelandic
8dedddd5 2114 ("it" "Italian" iso-8859-1) ; Italian
40c81f74 2115 ; iu Inuktitut
8dedddd5
KH
2116 ("iw" "Hebrew" iso-8859-8)
2117 ("ja" "Japanese" euc-jp)
40c81f74 2118 ; jw Javanese
8dedddd5 2119 ("ka" "Georgian" georgian-ps) ; Georgian
40c81f74 2120 ; kk Kazakh
6ececc4d 2121 ("kl" . "Latin-1") ; Greenlandic
40c81f74 2122 ; km Cambodian
8dedddd5
KH
2123 ("kn" "Kannada" utf-8)
2124 ("ko" "Korean" euc-kr)
40c81f74
PE
2125 ; ks Kashmiri
2126 ; ku Kurdish
5f395df3 2127 ("kw" . "Latin-1") ; Cornish
40c81f74
PE
2128 ; ky Kirghiz
2129 ("la" . "Latin-1") ; Latin
5f395df3 2130 ("lb" . "Latin-1") ; Luxemburgish
8dedddd5 2131 ("lg" . "Laint-6") ; Ganda
40c81f74 2132 ; ln Lingala
8dedddd5
KH
2133 ("lo" "Lao" utf-8) ; Laothian
2134 ("lt" "Lithuanian" iso-8859-13)
9c20a8d5 2135 ("lv" . "Latvian") ; Latvian, Lettish
40c81f74 2136 ; mg Malagasy
5f395df3 2137 ("mi" . "Latin-7") ; Maori
8dedddd5
KH
2138 ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2139 ("ml" "Malayalam" utf-8)
2140 ("mn" . "UTF-8") ; Mongolian
40c81f74 2141 ; mo Moldavian
8dedddd5 2142 ("mr" "Devanagari" utf-8) ; Marathi
5f395df3 2143 ("ms" . "Latin-1") ; Malay
40c81f74
PE
2144 ("mt" . "Latin-3") ; Maltese
2145 ; my Burmese
2146 ; na Nauru
8dedddd5
KH
2147 ("nb" . "Latin-1") ; Norwegian
2148 ("ne" "Devanagari" utf-8) ; Nepali
2149 ("nl" "Dutch" iso-8859-1)
40c81f74 2150 ("no" . "Latin-1") ; Norwegian
5f395df3 2151 ("oc" . "Latin-1") ; Occitan
8dedddd5
KH
2152 ("om_ET" . "UTF-8") ; (Afan) Oromo
2153 ("om" . "Latin-1") ; (Afan) Oromo
40c81f74 2154 ; or Oriya
8dedddd5 2155 ("pa" . "UTF-8") ; Punjabi
40c81f74
PE
2156 ("pl" . "Latin-2") ; Polish
2157 ; ps Pashto, Pushto
2158 ("pt" . "Latin-1") ; Portuguese
2159 ; qu Quechua
6ececc4d 2160 ("rm" . "Latin-1") ; Rhaeto-Romanic
40c81f74 2161 ; rn Kirundi
8dedddd5
KH
2162 ("ro" "Romanian" iso-8859-2)
2163 ("ru_RU" "Russian" iso-8859-5)
2164 ("ru_UA" "Russian" koi8-u)
40c81f74
PE
2165 ; rw Kinyarwanda
2166 ("sa" . "Devanagari") ; Sanskrit
2167 ; sd Sindhi
8dedddd5 2168 ("se" . "UTF-8") ; Northern Sami
40c81f74
PE
2169 ; sg Sangho
2170 ("sh" . "Latin-2") ; Serbo-Croatian
2171 ; si Sinhalese
8dedddd5
KH
2172 ("sid" . "UTF-8") ; Sidamo
2173 ("sk" "Slovak" iso-8859-2)
2174 ("sl" "Slovenian" iso-8859-2)
40c81f74
PE
2175 ; sm Samoan
2176 ; sn Shona
8dedddd5
KH
2177 ("so_ET" "UTF-8") ; Somali
2178 ("so" "Latin-1") ; Somali
6ececc4d 2179 ("sq" . "Latin-1") ; Albanian
8dedddd5 2180 ("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
40c81f74
PE
2181 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
2182 ; ss Siswati
8dedddd5 2183 ("st" . "Latin-1") ; Sesotho
40c81f74 2184 ; su Sundanese
8dedddd5 2185 ("sv" "Swedish" iso-8859-1) ; Swedish
40c81f74 2186 ("sw" . "Latin-1") ; Swahili
8dedddd5
KH
2187 ("ta" "Tamil" utf-8)
2188 ("te" . "UTF-8") ; Telugu
2189 ("tg" "Tajik" koi8-t)
2190 ("th" "Thai" tis-620)
2191 ("ti" "Ethiopic" utf-8) ; Tigrinya
2192 ("tig_ER" . "UTF-8") ; Tigre
40c81f74 2193 ; tk Turkmen
6ececc4d 2194 ("tl" . "Latin-1") ; Tagalog
40c81f74
PE
2195 ; tn Setswana
2196 ; to Tonga
8dedddd5 2197 ("tr" "Turkish" iso-8859-9)
40c81f74 2198 ; ts Tsonga
8dedddd5 2199 ("tt" . "UTF-8") ; Tatar
40c81f74
PE
2200 ; tw Twi
2201 ; ug Uighur
8dedddd5
KH
2202 ("uk" "Ukrainian" koi8-u)
2203 ("ur" . "UTF-8") ; Urdu
2204 ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
5f395df3 2205 ("uz" . "Latin-1") ; Uzbek
8dedddd5 2206 ("vi" "Vietnamese" utf-8)
40c81f74 2207 ; vo Volapuk
d37ef0f6 2208 ("wa" . "Latin-1") ; Walloon
40c81f74 2209 ; wo Wolof
8dedddd5 2210 ("xh" . "Latin-1") ; Xhosa
9c20a8d5 2211 ("yi" . "Windows-1255") ; Yiddish
40c81f74
PE
2212 ; yo Yoruba
2213 ; za Zhuang
8dedddd5
KH
2214 ("zh_HK" . "Chinese-Big5")
2215 ("zh_TW" . "Chinese-Big5")
2216 ("zh_CN" . "Chinese-GB")
2217 ("zh" . "Chinese-GB")
5f395df3
SM
2218 ; zh_CN.GB18030/GB18030 \
2219 ; zh_CN.GBK/GBK \
2220 ; zh_HK/BIG5-HKSCS \
8dedddd5 2221 ("zu" . "Latin-1") ; Zulu
40c81f74
PE
2222
2223 ;; ISO standard locales
2224 ("c$" . "ASCII")
2225 ("posix$" . "ASCII")
2226
40c81f74
PE
2227 ;; The "IPA" Emacs language environment does not correspond
2228 ;; to any ISO 639 code, so let it stand for itself.
2229 ("ipa$" . "IPA")
2230
2231 ;; Nonstandard or obsolete language codes
2232 ("cz" . "Czech") ; e.g. Solaris 2.6
2233 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2234 ("iw" . "Hebrew") ; e.g. X11R6.4
f1282c7f 2235 ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
40c81f74 2236 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2e86ceaa 2237 ("jp" . "Japanese") ; e.g. MS Windows
86d9e628
JR
2238 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
2239 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
367ca50f 2240 ))
8dedddd5
KH
2241 "Alist of locale regexps vs the corresponding languages and coding systems.
2242Each element has these form:
2243 \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2244The first element whose LOCALE-REGEXP matches the start of a
2245downcased locale specifies the LANG-ENV \(language environtment)
2246and CODING-SYSTEM corresponding to that locale. If there is no
2247appropriate language environment, the element may have this form:
2248 \(LOCALE-REGEXP . LANG-ENV)
2249In this case, LANG-ENV is one of generic language environments for an
2250specific encoding such as \"Latin-1\" and \"UTF-8\".")
40c81f74 2251
5f395df3
SM
2252(defconst locale-charset-language-names
2253 (purecopy
2254 '((".*8859[-_]?1\\>" . "Latin-1")
2255 (".*8859[-_]?2\\>" . "Latin-2")
2256 (".*8859[-_]?3\\>" . "Latin-3")
2257 (".*8859[-_]?4\\>" . "Latin-4")
2258 (".*8859[-_]?9\\>" . "Latin-5")
2259 (".*8859[-_]?14\\>" . "Latin-8")
2260 (".*8859[-_]?15\\>" . "Latin-9")
34703cb1 2261 (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
e522f07b
DL
2262 ;; utf-8@euro exists, so put this last. (@euro really specifies
2263 ;; the currency, rather than the charset.)
2264 (".*@euro\\>" . "Latin-9")))
6ececc4d
PE
2265 "List of pairs of locale regexps and charset language names.
2266The first element whose locale regexp matches the start of a downcased locale
34703cb1 2267specifies the language name whose charset corresponds to that locale.
8dedddd5
KH
2268This language name is used if the locale is not listed in
2269`locale-language-names'")
6ececc4d 2270
5f395df3
SM
2271(defconst locale-preferred-coding-systems
2272 (purecopy
8dedddd5
KH
2273 '((".*8859[-_]?1\\>" . iso-8859-1)
2274 (".*8859[-_]?2\\>" . iso-8859-2)
2275 (".*8859[-_]?3\\>" . iso-8859-3)
2276 (".*8859[-_]?4\\>" . iso-8859-4)
2277 (".*8859[-_]?9\\>" . iso-8859-9)
2278 (".*8859[-_]?14\\>" . iso-8859-14)
2279 (".*8859[-_]?15\\>" . iso-8859-15)
2280 (".*utf\\(?:-?8\\)?" . utf-8)
2281 ;; utf-8@euro exists, so put this after utf-8. (@euro really
2282 ;; specifies the currency, rather than the charset.)
2283 (".*@euro" . iso-8859-15)
2284 ("koi8-?r" . koi8-r)
2285 ("koi8-?u" . koi8-u)
2286 ("tcvn" . tcvn)
2287 ("big5" . big5)
2288 ("euc-?tw" . euc-tw)
2289 ;; We don't support GBK, but as it is upper compatible with
2290 ;; GB-2312, we setup the default coding system to gb2312.
2291 ("gbk" . gb2312)
2292 ;; We don't support BIG5-HKSCS, but as it is upper compatible with
2293 ;; BIG5, we setup the default coding system to big5.
2294 ("big5hkscs" . big5)
2295 ("ja.*[._]euc" . japanese-iso-8bit)
5f395df3
SM
2296 ("ja.*[._]jis7" . iso-2022-jp)
2297 ("ja.*[._]pck" . japanese-shift-jis)
2298 ("ja.*[._]sjis" . japanese-shift-jis)
69210880 2299 ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
8dedddd5 2300 ))
6ececc4d
PE
2301 "List of pairs of locale regexps and preferred coding systems.
2302The first element whose locale regexp matches the start of a downcased locale
8dedddd5
KH
2303specifies the coding system to prefer when using that locale.
2304This coding system is used if the locale specifies a specific charset.")
40c81f74
PE
2305
2306(defun locale-name-match (key alist)
2307 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2308Return the value corresponding to the first regexp that matches the
2309start of KEY, or nil if there is no match."
2310 (let (element)
2311 (while (and alist (not element))
ddf6b836 2312 (if (string-match (concat "\\`\\(?:" (car (car alist)) "\\)") key)
40c81f74
PE
2313 (setq element (car alist)))
2314 (setq alist (cdr alist)))
2315 (cdr element)))
2316
34703cb1
DL
2317(defun locale-charset-match-p (charset1 charset2)
2318 "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
2319Matching is done ignoring case and any hyphens and underscores in the
2320names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
2321 (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
2322 (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
2323 (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
2324
2325(defvar locale-charset-alist nil
2326 "Coding system alist keyed on locale-style charset name.
2327Used by `locale-charset-to-coding-system'.")
2328
2329(defun locale-charset-to-coding-system (charset)
2330 "Find coding system corresponding to CHARSET.
2331CHARSET is any sort of non-Emacs charset name, such as might be used
2332in a locale codeset, or elsewhere. It is matched to a coding system
2333first by case-insensitive lookup in `locale-charset-alist'. Then
2334matches are looked for in the coding system list, treating case and
2335the characters `-' and `_' as insignificant. The coding system base
2336is returned. Thus, for instance, if charset \"ISO8859-2\",
2337`iso-latin-2' is returned."
f15466c5 2338 (or (car (assoc-string charset locale-charset-alist t))
34703cb1
DL
2339 (let ((cs coding-system-alist)
2340 c)
2341 (while (and (not c) cs)
2342 (if (locale-charset-match-p charset (caar cs))
2343 (setq c (intern (caar cs)))
2344 (pop cs)))
0363e09a 2345 (if c (coding-system-base c)))))
34703cb1
DL
2346
2347;; Fixme: This ought to deal with the territory part of the locale
2348;; too, for setting things such as calendar holidays, ps-print paper
2349;; size, spelling dictionary.
2350
758f07de 2351(defun set-locale-environment (&optional locale-name)
40c81f74 2352 "Set up multi-lingual environment for using LOCALE-NAME.
758f07de
RS
2353This sets the language environment, the coding system priority,
2354the default input method and sometimes other things.
2355
34703cb1
DL
2356LOCALE-NAME should be a string which is the name of a locale supported
2357by the system. Often it is of the form xx_XX.CODE, where xx is a
2358language, XX is a country, and CODE specifies a character set and
2359coding system. For example, the locale name \"ja_JP.EUC\" might name
2360a locale for Japanese in Japan using the `japanese-iso-8bit'
2361coding-system. The name may also have a modifier suffix, e.g. `@euro'
2362or `@cyrillic'.
40c81f74 2363
758f07de 2364If LOCALE-NAME is nil, its value is taken from the environment
d37ef0f6 2365variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
40c81f74
PE
2366
2367The locale names supported by your system can typically be found in a
0812c1e8 2368directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
758f07de 2369will be translated according to the table specified by
0812c1e8
DL
2370`locale-translation-file-name'.
2371
2372See also `locale-charset-language-names', `locale-language-names',
2373`locale-preferred-coding-systems' and `locale-coding-system'."
758f07de 2374 (interactive "sSet environment for locale: ")
a1506d29 2375
0d7c5bb9
DL
2376 ;; Do this at runtime for the sake of binaries possibly transported
2377 ;; to a system without X.
2378 (setq locale-translation-file-name
2379 (let ((files
2380 '("/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
34703cb1 2381 "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
0d7c5bb9
DL
2382 "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2383 ;;
2384 ;; The following name appears after the X-related names above,
2385 ;; since the X-related names are what X actually uses.
2386 "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2387 )))
2388 (while (and files (not (file-exists-p (car files))))
2389 (setq files (cdr files)))
2390 (car files)))
2391
7008ccac
GM
2392 (let ((locale locale-name))
2393
2394 (unless locale
2395 ;; Use the first of these three environment variables
2396 ;; that has a nonempty value.
2397 (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
db6bd804
DL
2398 (while (and vars
2399 (= 0 (length locale))) ; nil or empty string
2400 (setq locale (getenv (pop vars))))))
7008ccac 2401
cfd67504
YM
2402 (unless (or locale (not (fboundp 'mac-get-preference)))
2403 (setq locale (mac-get-preference "AppleLocale"))
2404 (unless locale
2405 (let ((languages (mac-get-preference "AppleLanguages")))
2406 (unless (= (length languages) 0) ; nil or empty vector
2407 (setq locale (aref languages 0))))))
2408 (unless (or locale (not (boundp 'mac-system-locale)))
2409 (setq locale mac-system-locale))
2410
7008ccac
GM
2411 (when locale
2412
2413 ;; Translate "swedish" into "sv_SE.ISO8859-1", and so on,
2414 ;; using the translation file that many systems have.
2415 (when locale-translation-file-name
2416 (with-temp-buffer
2417 (insert-file-contents locale-translation-file-name)
2418 (when (re-search-forward
2419 (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2420 (setq locale (buffer-substring (point) (line-end-position))))))
2421
2422 ;; Leave the system locales alone if the caller did not specify
2423 ;; an explicit locale name, as their defaults are set from
2424 ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2425 ;; want to set them to the same value as LC_CTYPE.
2426 (when locale-name
2427 (setq system-messages-locale locale)
2428 (setq system-time-locale locale))
2429
2430 (setq locale (downcase locale))
2431
2432 (let ((language-name
2433 (locale-name-match locale locale-language-names))
2434 (charset-language-name
2435 (locale-name-match locale locale-charset-language-names))
2436 (coding-system
34703cb1
DL
2437 (or (locale-name-match locale locale-preferred-coding-systems)
2438 (when locale
2439 (if (string-match "\\.\\([^@]+\\)" locale)
2440 (locale-charset-to-coding-system
cfd67504
YM
2441 (match-string 1 locale))))
2442 (and (eq system-type 'macos) mac-system-coding-system))))
7008ccac 2443
8dedddd5
KH
2444 (if (consp language-name)
2445 ;; locale-language-names specify both lang-env and coding.
2446 ;; But, what specified in locale-preferred-coding-systems
2447 ;; has higher priority.
2448 (setq coding-system (or coding-system
2449 (nth 1 language-name))
2450 language-name (car language-name))
2451 ;; Otherwise, if locale is not listed in locale-language-names,
2452 ;; use what listed in locale-charset-language-names.
2453 (if (not language-name)
2454 (setq language-name charset-language-name)))
7008ccac
GM
2455
2456 (when language-name
2457
2458 ;; Set up for this character set. This is now the right way
2459 ;; to do it for both unibyte and multibyte modes.
2460 (set-language-environment language-name)
2461
2462 ;; If default-enable-multibyte-characters is nil,
2463 ;; we are using single-byte characters,
2464 ;; so the display table and terminal coding system are irrelevant.
2465 (when default-enable-multibyte-characters
ff76e074
SM
2466 (set-display-table-and-terminal-coding-system
2467 language-name coding-system))
7008ccac 2468
166ce29f
DL
2469 ;; Set the `keyboard-coding-system' if appropriate (tty
2470 ;; only). At least X and MS Windows can generate
2471 ;; multilingual input.
2472 (unless window-system
2473 (let ((kcs (or coding-system
2474 (car (get-language-info language-name
2475 'coding-system)))))
2476 (if kcs (set-keyboard-coding-system kcs))))
ddf6b836 2477
7008ccac
GM
2478 (setq locale-coding-system
2479 (car (get-language-info language-name 'coding-priority))))
2480
8dedddd5
KH
2481 (when (and coding-system
2482 (not (coding-system-equal coding-system
2483 locale-coding-system)))
7008ccac 2484 (prefer-coding-system coding-system)
e76ef161
DL
2485 (setq locale-coding-system coding-system))))
2486
52c7f9ee
JR
2487 ;; On Windows, override locale-coding-system,
2488 ;; keyboard-coding-system with system codepage. Note:
2489 ;; selection-coding-system is already set in w32select.c.
5eb94383 2490 (when (boundp 'w32-ansi-code-page)
893b49bb
JR
2491 (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
2492 (when (coding-system-p code-page-coding)
2493 (setq locale-coding-system code-page-coding)
893b49bb
JR
2494 (set-keyboard-coding-system code-page-coding)
2495 (set-terminal-coding-system code-page-coding))))
2496
a41118cc 2497 (when (eq system-type 'darwin)
ff76e074
SM
2498 ;; On Darwin, file names are always encoded in utf-8, no matter
2499 ;; the locale.
2500 (setq default-file-name-coding-system 'utf-8)
2501 ;; Mac OS X's Terminal.app by default uses utf-8 regardless of
2502 ;; the locale.
2503 (when (and (null window-system)
2504 (equal (getenv "TERM_PROGRAM") "Apple_Terminal"))
2505 (set-terminal-coding-system 'utf-8)
2506 (set-keyboard-coding-system 'utf-8)))
a41118cc 2507
e76ef161 2508 ;; Default to A4 paper if we're not in a C, POSIX or US locale.
3479c806 2509 ;; (See comments in Flocale_info.)
e76ef161 2510 (let ((locale locale)
3479c806 2511 (paper (locale-info 'paper)))
e76ef161
DL
2512 (if paper
2513 ;; This will always be null at the time of writing.
2514 (cond
2515 ((equal paper '(216 279))
2516 (setq ps-paper-type 'letter))
2517 ((equal paper '(210 297))
2518 (setq ps-paper-type 'a4)))
2519 (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2520 (while (and vars (= 0 (length locale)))
2521 (setq locale (getenv (pop vars)))))
2522 (when locale
2523 ;; As of glibc 2.2.5, these are the only US Letter locales,
2524 ;; and the rest are A4.
2525 (setq ps-paper-type
2526 (or (locale-name-match locale '(("c$" . letter)
2527 ("posix$" . letter)
2528 (".._us" . letter)
2529 (".._pr" . letter)
893b49bb
JR
2530 (".._ca" . letter)
2531 ("enu$" . letter) ; Windows
2532 ("esu$" . letter)
2533 ("enc$" . letter)
2534 ("frc$" . letter)))
e76ef161
DL
2535 'a4))))))
2536 nil)
40c81f74 2537\f
4ed46869
KH
2538;;; Charset property
2539
251d4f4b 2540(defun get-charset-property (charset propname)
4ed46869
KH
2541 "Return the value of CHARSET's PROPNAME property.
2542This is the last value stored with
96db204a 2543 (put-charset-property CHARSET PROPNAME VALUE)."
251d4f4b
KH
2544 (and (not (eq charset 'composition))
2545 (plist-get (charset-plist charset) propname)))
4ed46869 2546
251d4f4b 2547(defun put-charset-property (charset propname value)
4ed46869
KH
2548 "Store CHARSETS's PROPNAME property with value VALUE.
2549It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
1300d43f
KH
2550 (or (eq charset 'composition)
2551 (set-charset-plist charset
2552 (plist-put (charset-plist charset) propname value))))
4ed46869
KH
2553
2554;;; Character code property
2555(put 'char-code-property-table 'char-table-extra-slots 0)
2556
2557(defvar char-code-property-table
2558 (make-char-table 'char-code-property-table)
2559 "Char-table containing a property list of each character code.
2560
2561See also the documentation of `get-char-code-property' and
96db204a 2562`put-char-code-property'.")
4ed46869
KH
2563
2564(defun get-char-code-property (char propname)
2565 "Return the value of CHAR's PROPNAME property in `char-code-property-table'."
2566 (let ((plist (aref char-code-property-table char)))
2567 (if (listp plist)
2568 (car (cdr (memq propname plist))))))
2569
2570(defun put-char-code-property (char propname value)
2571 "Store CHAR's PROPNAME property with VALUE in `char-code-property-table'.
2572It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
2573 (let ((plist (aref char-code-property-table char)))
2574 (if plist
2575 (let ((slot (memq propname plist)))
2576 (if slot
2577 (setcar (cdr slot) value)
2578 (nconc plist (list propname value))))
2579 (aset char-code-property-table char (list propname value)))))
2580
a127b764
KH
2581\f
2582;; Pretty description of encoded string
2583
2584;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2585(defvar iso-2022-control-alist
2586 '((?\x1b . "ESC")
2587 (?\x0e . "SO")
2588 (?\x0f . "SI")
2589 (?\x8e . "SS2")
2590 (?\x8f . "SS3")
2591 (?\x9b . "CSI")))
2592
2593(defun encoded-string-description (str coding-system)
2594 "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2595 (setq str (string-as-unibyte str))
993b2a7d 2596 (mapconcat
8c9d55a9 2597 (if (and coding-system (eq (coding-system-type coding-system) 2))
993b2a7d
KH
2598 ;; Try to get a pretty description for ISO 2022 escape sequences.
2599 (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
a9f60091 2600 (format "0x%02X" x))))
5de75f53 2601 (function (lambda (x) (format "0x%02X" x))))
993b2a7d 2602 str " "))
a127b764
KH
2603
2604(defun encode-coding-char (char coding-system)
2605 "Encode CHAR by CODING-SYSTEM and return the resulting string.
2606If CODING-SYSTEM can't safely encode CHAR, return nil."
bd953173
KH
2607 (let ((str1 (string-as-multibyte (char-to-string char)))
2608 (str2 (string-as-multibyte (make-string 2 char)))
a89f541b
KH
2609 (safe-chars (and coding-system
2610 (coding-system-get coding-system 'safe-chars)))
8c9d55a9 2611 (charset (char-charset char))
a127b764 2612 enc1 enc2 i1 i2)
a89f541b 2613 (when (or (eq safe-chars t)
8c9d55a9 2614 (eq charset 'ascii)
a89f541b 2615 (and safe-chars (aref safe-chars char)))
a127b764
KH
2616 ;; We must find the encoded string of CHAR. But, just encoding
2617 ;; CHAR will put extra control sequences (usually to designate
34703cb1 2618 ;; ASCII charset) at the tail if type of CODING is ISO 2022.
a127b764
KH
2619 ;; To exclude such tailing bytes, we at first encode one-char
2620 ;; string and two-char string, then check how many bytes at the
2621 ;; tail of both encoded strings are the same.
2622
bd953173 2623 (setq enc1 (encode-coding-string str1 coding-system)
a127b764 2624 i1 (length enc1)
bd953173 2625 enc2 (encode-coding-string str2 coding-system)
a127b764
KH
2626 i2 (length enc2))
2627 (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2628 (setq i1 (1- i1) i2 (1- i2)))
2629
2630 ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2631 ;; and they are the extra control sequences at the tail to
2632 ;; exclude.
2633 (substring enc2 0 i2))))
2634
2635
9ee5b744 2636;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc
4ed46869 2637;;; mule-cmds.el ends here