Commit | Line | Data |
---|---|---|
caff3c0a | 1 | ;;; mule-cmds.el --- commands for multilingual environment -*-coding: iso-2022-7bit -*- |
9ee5b744 | 2 | |
26b3dce6 | 3 | ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, |
49f70d46 | 4 | ;; 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. |
7976eda0 | 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 6 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
2fd125a3 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
8f924df7 | 9 | ;; Copyright (C) 2003 |
97941b05 KH |
10 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
11 | ;; Registration Number H13PRO009 | |
4ed46869 | 12 | |
1f547b92 | 13 | ;; Keywords: mule, i18n |
4ed46869 KH |
14 | |
15 | ;; This file is part of GNU Emacs. | |
16 | ||
4936186e | 17 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 18 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
19 | ;; the Free Software Foundation, either version 3 of the License, or |
20 | ;; (at your option) any later version. | |
4ed46869 KH |
21 | |
22 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
23 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | ;; GNU General Public License for more details. | |
26 | ||
27 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 28 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 | 29 | |
60370d40 PJ |
30 | ;;; Commentary: |
31 | ||
4ed46869 KH |
32 | ;;; Code: |
33 | ||
a4723629 GM |
34 | (eval-when-compile (require 'cl)) ; letf |
35 | ||
26b3dce6 GM |
36 | (defvar dos-codepage) |
37 | (autoload 'widget-value "wid-edit") | |
cda74479 | 38 | |
4a91d930 | 39 | (defvar mac-system-coding-system) |
4a91d930 | 40 | |
4ed46869 KH |
41 | ;;; MULE related key bindings and menus. |
42 | ||
64657387 SM |
43 | (defvar mule-keymap |
44 | (let ((map (make-sparse-keymap))) | |
45 | (define-key map "f" 'set-buffer-file-coding-system) | |
46 | (define-key map "r" 'revert-buffer-with-coding-system) | |
47 | (define-key map "F" 'set-file-name-coding-system) | |
48 | (define-key map "t" 'set-terminal-coding-system) | |
49 | (define-key map "k" 'set-keyboard-coding-system) | |
50 | (define-key map "p" 'set-buffer-process-coding-system) | |
51 | (define-key map "x" 'set-selection-coding-system) | |
52 | (define-key map "X" 'set-next-selection-coding-system) | |
53 | (define-key map "\C-\\" 'set-input-method) | |
54 | (define-key map "c" 'universal-coding-system-argument) | |
55 | (define-key map "l" 'set-language-environment) | |
56 | map) | |
33d17698 | 57 | "Keymap for Mule (Multilingual environment) specific commands.") |
4ed46869 | 58 | |
8f81f784 | 59 | ;; Keep "C-x C-m ..." for mule specific commands. |
0709d285 | 60 | (define-key ctl-x-map "\C-m" mule-keymap) |
ef8a8c8c | 61 | |
dcad02bc | 62 | (defvar describe-language-environment-map |
64657387 SM |
63 | (let ((map (make-sparse-keymap "Describe Language Environment"))) |
64 | (define-key map | |
8f43cbf3 | 65 | [Default] `(menu-item ,(purecopy "Default") describe-specified-language-support)) |
64657387 | 66 | map)) |
15b3e511 | 67 | |
dcad02bc | 68 | (defvar setup-language-environment-map |
64657387 SM |
69 | (let ((map (make-sparse-keymap "Set Language Environment"))) |
70 | (define-key map | |
8f43cbf3 | 71 | [Default] `(menu-item ,(purecopy "Default") setup-specified-language-environment)) |
64657387 | 72 | map)) |
15b3e511 | 73 | |
dcad02bc | 74 | (defvar set-coding-system-map |
64657387 SM |
75 | (let ((map (make-sparse-keymap "Set Coding System"))) |
76 | (define-key-after map [universal-coding-system-argument] | |
8f43cbf3 DN |
77 | `(menu-item ,(purecopy "For Next Command") universal-coding-system-argument |
78 | :help ,(purecopy "Coding system to be used by next command"))) | |
04991a1c | 79 | (define-key-after map [separator-1] menu-bar-separator) |
64657387 | 80 | (define-key-after map [set-buffer-file-coding-system] |
8f43cbf3 DN |
81 | `(menu-item ,(purecopy "For Saving This Buffer") set-buffer-file-coding-system |
82 | :help ,(purecopy "How to encode this buffer when saved"))) | |
64657387 | 83 | (define-key-after map [revert-buffer-with-coding-system] |
8f43cbf3 | 84 | `(menu-item ,(purecopy "For Reverting This File Now") |
64657387 SM |
85 | revert-buffer-with-coding-system |
86 | :enable buffer-file-name | |
8f43cbf3 | 87 | :help ,(purecopy "Revisit this file immediately using specified coding system"))) |
64657387 | 88 | (define-key-after map [set-file-name-coding-system] |
8f43cbf3 DN |
89 | `(menu-item ,(purecopy "For File Name") set-file-name-coding-system |
90 | :help ,(purecopy "How to decode/encode file names"))) | |
04991a1c | 91 | (define-key-after map [separator-2] menu-bar-separator) |
64657387 SM |
92 | |
93 | (define-key-after map [set-keyboard-coding-system] | |
8f43cbf3 DN |
94 | `(menu-item ,(purecopy "For Keyboard") set-keyboard-coding-system |
95 | :help ,(purecopy "How to decode keyboard input"))) | |
64657387 | 96 | (define-key-after map [set-terminal-coding-system] |
8f43cbf3 | 97 | `(menu-item ,(purecopy "For Terminal") set-terminal-coding-system |
64657387 | 98 | :enable (null (memq initial-window-system '(x w32 ns))) |
8f43cbf3 | 99 | :help ,(purecopy "How to encode terminal output"))) |
04991a1c | 100 | (define-key-after map [separator-3] menu-bar-separator) |
64657387 SM |
101 | |
102 | (define-key-after map [set-selection-coding-system] | |
8f43cbf3 | 103 | `(menu-item ,(purecopy "For X Selections/Clipboard") set-selection-coding-system |
64657387 | 104 | :visible (display-selections-p) |
8f43cbf3 | 105 | :help ,(purecopy "How to en/decode data to/from selection/clipboard"))) |
64657387 | 106 | (define-key-after map [set-next-selection-coding-system] |
8f43cbf3 | 107 | `(menu-item ,(purecopy "For Next X Selection") set-next-selection-coding-system |
64657387 | 108 | :visible (display-selections-p) |
8f43cbf3 | 109 | :help ,(purecopy "How to en/decode next selection/clipboard operation"))) |
64657387 | 110 | (define-key-after map [set-buffer-process-coding-system] |
8f43cbf3 | 111 | `(menu-item ,(purecopy "For I/O with Subprocess") set-buffer-process-coding-system |
64657387 SM |
112 | :visible (fboundp 'start-process) |
113 | :enable (get-buffer-process (current-buffer)) | |
8f43cbf3 | 114 | :help ,(purecopy "How to en/decode I/O from/to subprocess connected to this buffer"))) |
64657387 SM |
115 | map)) |
116 | ||
117 | (defvar mule-menu-keymap | |
118 | (let ((map (make-sparse-keymap "Mule (Multilingual Environment)"))) | |
119 | (define-key-after map [set-language-environment] | |
8f43cbf3 | 120 | `(menu-item ,(purecopy "Set Language Environment") ,setup-language-environment-map)) |
04991a1c | 121 | (define-key-after map [separator-mule] menu-bar-separator) |
64657387 SM |
122 | |
123 | (define-key-after map [toggle-input-method] | |
8f43cbf3 | 124 | `(menu-item ,(purecopy "Toggle Input Method") toggle-input-method)) |
64657387 | 125 | (define-key-after map [set-input-method] |
8f43cbf3 | 126 | `(menu-item ,(purecopy "Select Input Method...") set-input-method)) |
64657387 | 127 | (define-key-after map [describe-input-method] |
8f43cbf3 | 128 | `(menu-item ,(purecopy "Describe Input Method") describe-input-method)) |
04991a1c | 129 | (define-key-after map [separator-input-method] menu-bar-separator) |
64657387 SM |
130 | |
131 | (define-key-after map [set-various-coding-system] | |
8f43cbf3 | 132 | `(menu-item ,(purecopy "Set Coding Systems") ,set-coding-system-map |
597e2240 | 133 | :enable (default-value 'enable-multibyte-characters))) |
64657387 | 134 | (define-key-after map [view-hello-file] |
8f43cbf3 | 135 | `(menu-item ,(purecopy "Show Multi-lingual Text") view-hello-file |
64657387 SM |
136 | :enable (file-readable-p |
137 | (expand-file-name "HELLO" data-directory)) | |
8f43cbf3 | 138 | :help ,(purecopy "Display file which says HELLO in many languages"))) |
04991a1c | 139 | (define-key-after map [separator-coding-system] menu-bar-separator) |
64657387 SM |
140 | |
141 | (define-key-after map [describe-language-environment] | |
8f43cbf3 | 142 | `(menu-item ,(purecopy "Describe Language Environment") |
64657387 | 143 | describe-language-environment-map |
8f43cbf3 | 144 | :help ,(purecopy "Show multilingual settings for a specific language"))) |
64657387 | 145 | (define-key-after map [describe-input-method] |
8f43cbf3 DN |
146 | `(menu-item ,(purecopy "Describe Input Method...") describe-input-method |
147 | :help ,(purecopy "Keyboard layout for a specific input method"))) | |
64657387 | 148 | (define-key-after map [describe-coding-system] |
8f43cbf3 | 149 | `(menu-item ,(purecopy "Describe Coding System...") describe-coding-system)) |
64657387 | 150 | (define-key-after map [list-character-sets] |
8f43cbf3 DN |
151 | `(menu-item ,(purecopy "List Character Sets") list-character-sets |
152 | :help ,(purecopy "Show table of available character sets"))) | |
64657387 | 153 | (define-key-after map [mule-diag] |
8f43cbf3 DN |
154 | `(menu-item ,(purecopy "Show All of Mule Status") mule-diag |
155 | :help ,(purecopy "Display multilingual environment settings"))) | |
64657387 SM |
156 | map) |
157 | "Keymap for Mule (Multilingual environment) menu specific commands.") | |
cda74479 | 158 | |
4ed46869 KH |
159 | ;; This should be a single character key binding because users use it |
160 | ;; very frequently while editing multilingual text. Now we can use | |
161 | ;; only two such keys: "\C-\\" and "\C-^", but the latter is not | |
162 | ;; convenient because it requires shifting on most keyboards. An | |
163 | ;; alternative is "\C-\]" which is now bound to `abort-recursive-edit' | |
164 | ;; but it won't be used that frequently. | |
165 | (define-key global-map "\C-\\" 'toggle-input-method) | |
166 | ||
5f395df3 SM |
167 | ;; This is no good because people often type Shift-SPC |
168 | ;; meaning to type SPC. -- rms. | |
169 | ;; ;; Here's an alternative key binding for X users (Shift-SPACE). | |
170 | ;; (define-key global-map [?\S- ] 'toggle-input-method) | |
b4fba33f | 171 | |
464cc130 KH |
172 | ;;; Mule related hyperlinks. |
173 | (defconst help-xref-mule-regexp-template | |
174 | (purecopy (concat "\\(\\<\\(" | |
175 | "\\(coding system\\)\\|" | |
d0c40faa KH |
176 | "\\(input method\\)\\|" |
177 | "\\(character set\\)\\|" | |
178 | "\\(charset\\)" | |
464cc130 KH |
179 | "\\)\\s-+\\)?" |
180 | ;; Note starting with word-syntax character: | |
181 | "`\\(\\sw\\(\\sw\\|\\s_\\)+\\)'"))) | |
182 | ||
26d87040 | 183 | (defun coding-system-change-eol-conversion (coding-system eol-type) |
caff3c0a | 184 | "Return a coding system which differs from CODING-SYSTEM in EOL conversion. |
26d87040 EZ |
185 | The returned coding system converts end-of-line by EOL-TYPE |
186 | but text as the same way as CODING-SYSTEM. | |
187 | EOL-TYPE should be `unix', `dos', `mac', or nil. | |
188 | If EOL-TYPE is nil, the returned coding system detects | |
189 | how end-of-line is formatted automatically while decoding. | |
190 | ||
191 | EOL-TYPE can be specified by an integer 0, 1, or 2. | |
192 | They means `unix', `dos', and `mac' respectively." | |
193 | (if (symbolp eol-type) | |
194 | (setq eol-type (cond ((eq eol-type 'unix) 0) | |
195 | ((eq eol-type 'dos) 1) | |
196 | ((eq eol-type 'mac) 2) | |
197 | (t eol-type)))) | |
1d5b0c66 CY |
198 | ;; We call `coding-system-base' before `coding-system-eol-type', |
199 | ;; because the coding-system may not be initialized until then. | |
200 | (let* ((base (coding-system-base coding-system)) | |
201 | (orig-eol-type (coding-system-eol-type coding-system))) | |
202 | (cond ((vectorp orig-eol-type) | |
203 | (if (not eol-type) | |
204 | coding-system | |
205 | (aref orig-eol-type eol-type))) | |
206 | ((not eol-type) | |
207 | base) | |
208 | ((= eol-type orig-eol-type) | |
209 | coding-system) | |
210 | ((progn (setq orig-eol-type (coding-system-eol-type base)) | |
211 | (vectorp orig-eol-type)) | |
212 | (aref orig-eol-type eol-type))))) | |
26d87040 EZ |
213 | |
214 | (defun coding-system-change-text-conversion (coding-system coding) | |
215 | "Return a coding system which differs from CODING-SYSTEM in text conversion. | |
216 | The returned coding system converts text by CODING | |
217 | but end-of-line as the same way as CODING-SYSTEM. | |
218 | If CODING is nil, the returned coding system detects | |
219 | how text is formatted automatically while decoding." | |
1d77e15a JR |
220 | (let ((eol-type (coding-system-eol-type coding-system))) |
221 | (coding-system-change-eol-conversion | |
222 | (if coding coding 'undecided) | |
223 | (if (numberp eol-type) (aref [unix dos mac] eol-type))))) | |
26d87040 | 224 | |
be7ca044 KH |
225 | ;; Canonicalize the coding system name NAME by removing some prefixes |
226 | ;; and delimiter characters. Support function of | |
227 | ;; coding-system-from-name. | |
228 | (defun canonicalize-coding-system-name (name) | |
ca4f0e9a KH |
229 | (if (string-match "^\\(ms\\|ibm\\|windows-\\)\\([0-9]+\\)$" name) |
230 | ;; "ms950", "ibm950", "windows-950" -> "cp950" | |
231 | (concat "cp" (match-string 2 name)) | |
232 | (if (string-match "^iso[-_ ]?[0-9]" name) | |
233 | ;; "iso-8859-1" -> "8859-1", "iso-2022-jp" ->"2022-jp" | |
234 | (setq name (substring name (1- (match-end 0))))) | |
235 | (let ((idx (string-match "[-_ /]" name))) | |
236 | ;; Delete "-", "_", " ", "/" but do distinguish "16-be" and "16be". | |
237 | (while idx | |
238 | (if (and (>= idx 2) | |
239 | (eq (string-match "16-[lb]e$" name (- idx 2)) | |
240 | (- idx 2))) | |
241 | (setq idx (string-match "[-_ /]" name (match-end 0))) | |
242 | (setq name (concat (substring name 0 idx) (substring name (1+ idx))) | |
243 | idx (string-match "[-_ /]" name idx)))) | |
244 | name))) | |
be7ca044 KH |
245 | |
246 | (defun coding-system-from-name (name) | |
247 | "Return a coding system whose name matches with NAME (string or symbol)." | |
248 | (let (sym) | |
249 | (if (stringp name) (setq sym (intern name)) | |
250 | (setq sym name name (symbol-name name))) | |
251 | (if (coding-system-p sym) | |
252 | sym | |
253 | (let ((eol-type | |
254 | (if (string-match "-\\(unix\\|dos\\|mac\\)$" name) | |
255 | (prog1 (intern (match-string 1 name)) | |
256 | (setq name (substring name 0 (match-beginning 0))))))) | |
257 | (setq name (canonicalize-coding-system-name (downcase name))) | |
258 | (catch 'tag | |
259 | (dolist (elt (coding-system-list)) | |
260 | (if (string= (canonicalize-coding-system-name (symbol-name elt)) | |
261 | name) | |
262 | (throw 'tag (if eol-type (coding-system-change-eol-conversion | |
263 | elt eol-type) | |
264 | elt))))))))) | |
265 | ||
4ed46869 | 266 | (defun toggle-enable-multibyte-characters (&optional arg) |
6998e1a1 | 267 | "Change whether this buffer uses multibyte characters. |
49275d55 | 268 | With ARG, use multibyte characters if the ARG is positive. |
6998e1a1 RS |
269 | |
270 | Note that this command does not convert the byte contents of | |
271 | the buffer; it only changes the way those bytes are interpreted. | |
272 | In general, therefore, this command *changes* the sequence of | |
273 | characters that the current buffer contains. | |
274 | ||
caff3c0a JB |
275 | We suggest you avoid using this command unless you know what you are |
276 | doing. If you use it by mistake, and the buffer is now displayed | |
6998e1a1 | 277 | wrong, use this command again to toggle back to the right mode." |
4ed46869 | 278 | (interactive "P") |
b7079457 RS |
279 | (let ((new-flag |
280 | (if (null arg) (null enable-multibyte-characters) | |
281 | (> (prefix-numeric-value arg) 0)))) | |
282 | (set-buffer-multibyte new-flag)) | |
4ed46869 KH |
283 | (force-mode-line-update)) |
284 | ||
285 | (defun view-hello-file () | |
cf2fc7e9 | 286 | "Display the HELLO file, which lists many languages and characters." |
4ed46869 | 287 | (interactive) |
8f81f784 | 288 | ;; We have to decode the file in any environment. |
a4723629 GM |
289 | (letf (((default-value 'enable-multibyte-characters) t) |
290 | (coding-system-for-read 'iso-2022-7bit)) | |
291 | (view-file (expand-file-name "HELLO" data-directory)))) | |
4ed46869 | 292 | |
9e9a77a6 | 293 | (defun universal-coding-system-argument (coding-system) |
15b3e511 | 294 | "Execute an I/O command using the specified coding system." |
9e9a77a6 RS |
295 | (interactive |
296 | (let ((default (and buffer-file-coding-system | |
34104362 | 297 | (not (eq (coding-system-type buffer-file-coding-system) |
a0d96cad | 298 | 'undecided)) |
9e9a77a6 RS |
299 | buffer-file-coding-system))) |
300 | (list (read-coding-system | |
301 | (if default | |
5b76833f | 302 | (format "Coding system for following command (default %s): " default) |
9e9a77a6 RS |
303 | "Coding system for following command: ") |
304 | default)))) | |
305 | (let* ((keyseq (read-key-sequence | |
e14a8f4c | 306 | (format "Command to execute with %s:" coding-system))) |
04363179 GM |
307 | (cmd (key-binding keyseq)) |
308 | prefix) | |
735c5b17 GM |
309 | ;; read-key-sequence ignores quit, so make an explicit check. |
310 | ;; Like many places, this assumes quit == C-g, but it need not be. | |
1e4bd40d | 311 | (if (equal last-input-event ?\C-g) |
735c5b17 | 312 | (keyboard-quit)) |
4a304bb2 | 313 | (when (memq cmd '(universal-argument digit-argument)) |
04363179 | 314 | (call-interactively cmd) |
a1506d29 | 315 | |
04363179 GM |
316 | ;; Process keys bound in `universal-argument-map'. |
317 | (while (progn | |
318 | (setq keyseq (read-key-sequence nil t) | |
319 | cmd (key-binding keyseq t)) | |
320 | (not (eq cmd 'universal-argument-other-key))) | |
321 | (let ((current-prefix-arg prefix-arg) | |
61a846fb | 322 | ;; Have to bind `last-command-event' here so that |
6b61353c | 323 | ;; `digit-argument', for instance, can compute the |
04363179 | 324 | ;; prefix arg. |
61a846fb | 325 | (last-command-event (aref keyseq 0))) |
04363179 GM |
326 | (call-interactively cmd))) |
327 | ||
1f547b92 | 328 | ;; This is the final call to `universal-argument-other-key', which |
04363179 GM |
329 | ;; set's the final `prefix-arg. |
330 | (let ((current-prefix-arg prefix-arg)) | |
331 | (call-interactively cmd)) | |
a1506d29 | 332 | |
04363179 GM |
333 | ;; Read the command to execute with the given prefix arg. |
334 | (setq prefix prefix-arg | |
335 | keyseq (read-key-sequence nil t) | |
336 | cmd (key-binding keyseq))) | |
337 | ||
15b3e511 | 338 | (let ((coding-system-for-read coding-system) |
04363179 | 339 | (coding-system-for-write coding-system) |
0e9ec609 | 340 | (coding-system-require-warning t) |
04363179 | 341 | (current-prefix-arg prefix)) |
15b3e511 KH |
342 | (message "") |
343 | (call-interactively cmd)))) | |
344 | ||
de94d711 | 345 | (defun set-default-coding-systems (coding-system) |
0c3154d2 | 346 | "Set default value of various coding systems to CODING-SYSTEM. |
387136f6 | 347 | This sets the following coding systems: |
0c3154d2 | 348 | o coding system of a newly created buffer |
8efc03e1 KH |
349 | o default coding system for subprocess I/O |
350 | This also sets the following values: | |
d3e4babd | 351 | o default value used as `file-name-coding-system' for converting file names |
caff3c0a | 352 | if CODING-SYSTEM is ASCII-compatible |
d8cb7e3d | 353 | o default value for the command `set-terminal-coding-system' |
d3e4babd | 354 | o default value for the command `set-keyboard-coding-system' |
caff3c0a | 355 | if CODING-SYSTEM is ASCII-compatible" |
de94d711 KH |
356 | (check-coding-system coding-system) |
357 | (setq-default buffer-file-coding-system coding-system) | |
6b61353c KH |
358 | (if (fboundp 'ucs-set-table-for-input) |
359 | (dolist (buffer (buffer-list)) | |
360 | (or (local-variable-p 'buffer-file-coding-system buffer) | |
361 | (ucs-set-table-for-input buffer)))) | |
362 | ||
db9aec47 | 363 | (if (eq system-type 'darwin) |
a41118cc | 364 | ;; The file-name coding system on Darwin systems is always utf-8. |
db9aec47 | 365 | (setq default-file-name-coding-system 'utf-8) |
597e2240 | 366 | (if (and (default-value 'enable-multibyte-characters) |
db9aec47 KH |
367 | (or (not coding-system) |
368 | (coding-system-get coding-system 'ascii-compatible-p))) | |
369 | (setq default-file-name-coding-system coding-system))) | |
970c9391 | 370 | (setq default-terminal-coding-system coding-system) |
d1f61aaf EZ |
371 | ;; Prevent default-terminal-coding-system from converting ^M to ^J. |
372 | (setq default-keyboard-coding-system | |
373 | (coding-system-change-eol-conversion coding-system 'unix)) | |
1d77e15a JR |
374 | ;; Preserve eol-type from existing default-process-coding-systems. |
375 | ;; On non-unix-like systems in particular, these may have been set | |
376 | ;; carefully by the user, or by the startup code, to deal with the | |
377 | ;; users shell appropriately, so should not be altered by changing | |
378 | ;; language environment. | |
379 | (let ((output-coding | |
380 | (coding-system-change-text-conversion | |
381 | (car default-process-coding-system) coding-system)) | |
382 | (input-coding | |
383 | (coding-system-change-text-conversion | |
384 | (cdr default-process-coding-system) coding-system))) | |
385 | (setq default-process-coding-system | |
386 | (cons output-coding input-coding)))) | |
de94d711 | 387 | |
0c3154d2 KH |
388 | (defun prefer-coding-system (coding-system) |
389 | "Add CODING-SYSTEM at the front of the priority list for automatic detection. | |
387136f6 | 390 | This also sets the following coding systems: |
0c3154d2 | 391 | o coding system of a newly created buffer |
8efc03e1 KH |
392 | o default coding system for subprocess I/O |
393 | This also sets the following values: | |
caff3c0a | 394 | o default value used as `file-name-coding-system' for converting file names |
d8cb7e3d | 395 | o default value for the command `set-terminal-coding-system' |
03c35c83 EZ |
396 | o default value for the command `set-keyboard-coding-system' |
397 | ||
bd3ac67e EZ |
398 | If CODING-SYSTEM specifies a certain type of EOL conversion, the coding |
399 | systems set by this function will use that type of EOL conversion. | |
400 | ||
d0d8885d JB |
401 | A coding system that requires automatic detection of text+encoding |
402 | \(e.g. undecided, unix) can't be preferred." | |
0c3154d2 KH |
403 | (interactive "zPrefer coding system: ") |
404 | (if (not (and coding-system (coding-system-p coding-system))) | |
405 | (error "Invalid coding system `%s'" coding-system)) | |
97941b05 KH |
406 | (if (memq (coding-system-type coding-system) '(raw-text undecided)) |
407 | (error "Can't prefer the coding system `%s'" coding-system)) | |
408 | (let ((base (coding-system-base coding-system)) | |
bd3ac67e | 409 | (eol-type (coding-system-eol-type coding-system))) |
97941b05 | 410 | (set-coding-system-priority base) |
32226619 | 411 | (and (called-interactively-p 'interactive) |
97941b05 KH |
412 | (or (eq base coding-system) |
413 | (message "Highest priority is set to %s (base of %s)" | |
414 | base coding-system))) | |
bd3ac67e | 415 | ;; If they asked for specific EOL conversion, honor that. |
6f9dc4fd | 416 | (if (memq eol-type '(0 1 2)) |
97941b05 KH |
417 | (setq base |
418 | (coding-system-change-eol-conversion base eol-type))) | |
419 | (set-default-coding-systems base))) | |
0c3154d2 | 420 | |
b5edd1d1 KH |
421 | (defvar sort-coding-systems-predicate nil |
422 | "If non-nil, a predicate function to sort coding systems. | |
423 | ||
424 | It is called with two coding systems, and should return t if the first | |
425 | one is \"less\" than the second. | |
426 | ||
427 | The function `sort-coding-systems' use it.") | |
428 | ||
429 | (defun sort-coding-systems (codings) | |
430 | "Sort coding system list CODINGS by a priority of each coding system. | |
caff3c0a | 431 | Return the sorted list. CODINGS is modified by side effects. |
b5edd1d1 KH |
432 | |
433 | If a coding system is most preferred, it has the highest priority. | |
caff3c0a JB |
434 | Otherwise, coding systems that correspond to MIME charsets have |
435 | higher priorities. Among them, a coding system included in the | |
436 | `coding-system' key of the current language environment has higher | |
437 | priority. See also the documentation of `language-info-alist'. | |
b5edd1d1 KH |
438 | |
439 | If the variable `sort-coding-systems-predicate' (which see) is | |
caff3c0a | 440 | non-nil, it is used to sort CODINGS instead." |
b5edd1d1 KH |
441 | (if sort-coding-systems-predicate |
442 | (sort codings sort-coding-systems-predicate) | |
8f924df7 KH |
443 | (let* ((from-priority (coding-system-priority-list)) |
444 | (most-preferred (car from-priority)) | |
b5edd1d1 KH |
445 | (lang-preferred (get-language-info current-language-environment |
446 | 'coding-system)) | |
447 | (func (function | |
448 | (lambda (x) | |
449 | (let ((base (coding-system-base x))) | |
da1ebad1 KH |
450 | ;; We calculate the priority number 0..255 by |
451 | ;; using the 8 bits PMMLCEII as this: | |
3ecd3a56 GM |
452 | ;; P: 1 if most preferred. |
453 | ;; MM: greater than 0 if mime-charset. | |
454 | ;; L: 1 if one of the current lang. env.'s codings. | |
455 | ;; C: 1 if one of codings listed in the category list. | |
456 | ;; E: 1 if not XXX-with-esc | |
da1ebad1 KH |
457 | ;; II: if iso-2022 based, 0..3, else 1. |
458 | (logior | |
459 | (lsh (if (eq base most-preferred) 1 0) 7) | |
460 | (lsh | |
8f924df7 | 461 | (let ((mime (coding-system-get base :mime-charset))) |
b439e72a DL |
462 | ;; Prefer coding systems corresponding to a |
463 | ;; MIME charset. | |
b5edd1d1 | 464 | (if mime |
b439e72a DL |
465 | ;; Lower utf-16 priority so that we |
466 | ;; normally prefer utf-8 to it, and put | |
467 | ;; x-ctext below that. | |
d0d8885d JB |
468 | (cond ((string-match-p "utf-16" |
469 | (symbol-name mime)) | |
da1ebad1 | 470 | 2) |
d0d8885d | 471 | ((string-match-p "^x-" (symbol-name mime)) |
da1ebad1 KH |
472 | 1) |
473 | (t 3)) | |
b5edd1d1 | 474 | 0)) |
da1ebad1 KH |
475 | 5) |
476 | (lsh (if (memq base lang-preferred) 1 0) 4) | |
8f924df7 | 477 | (lsh (if (memq base from-priority) 1 0) 3) |
d0d8885d JB |
478 | (lsh (if (string-match-p "-with-esc\\'" |
479 | (symbol-name base)) | |
da1ebad1 | 480 | 0 1) 2) |
8f924df7 KH |
481 | (if (eq (coding-system-type base) 'iso-2022) |
482 | (let ((category (coding-system-category base))) | |
483 | ;; For ISO based coding systems, prefer | |
484 | ;; one that doesn't use designation nor | |
485 | ;; locking/single shifting. | |
a0d96cad KH |
486 | (cond |
487 | ((or (eq category 'coding-category-iso-8-1) | |
488 | (eq category 'coding-category-iso-8-2)) | |
489 | 2) | |
490 | ((or (eq category 'coding-category-iso-7-tight) | |
491 | (eq category 'coding-category-iso-7)) | |
492 | 1) | |
493 | (t | |
494 | 0))) | |
495 | 1) | |
a6dfc99b | 496 | )))))) |
b5edd1d1 KH |
497 | (sort codings (function (lambda (x y) |
498 | (> (funcall func x) (funcall func y)))))))) | |
54b226f7 | 499 | |
3fc7dfe5 | 500 | (defun find-coding-systems-region (from to) |
54b226f7 | 501 | "Return a list of proper coding systems to encode a text between FROM and TO. |
8f924df7 | 502 | |
d37ef0f6 | 503 | If FROM is a string, find coding systems in that instead of the buffer. |
54b226f7 KH |
504 | All coding systems in the list can safely encode any multibyte characters |
505 | in the text. | |
506 | ||
e8dd0160 | 507 | If the text contains no multibyte characters, return a list of a single |
3fc7dfe5 | 508 | element `undecided'." |
b5edd1d1 KH |
509 | (let ((codings (find-coding-systems-region-internal from to))) |
510 | (if (eq codings t) | |
511 | ;; The text contains only ASCII characters. Any coding | |
512 | ;; systems are safe. | |
513 | '(undecided) | |
514 | ;; We need copy-sequence because sorting will alter the argument. | |
515 | (sort-coding-systems (copy-sequence codings))))) | |
54b226f7 | 516 | |
3fc7dfe5 KH |
517 | (defun find-coding-systems-string (string) |
518 | "Return a list of proper coding systems to encode STRING. | |
519 | All coding systems in the list can safely encode any multibyte characters | |
520 | in STRING. | |
521 | ||
e8dd0160 | 522 | If STRING contains no multibyte characters, return a list of a single |
3fc7dfe5 | 523 | element `undecided'." |
b5edd1d1 | 524 | (find-coding-systems-region string nil)) |
3fc7dfe5 KH |
525 | |
526 | (defun find-coding-systems-for-charsets (charsets) | |
527 | "Return a list of proper coding systems to encode characters of CHARSETS. | |
6053d86a | 528 | CHARSETS is a list of character sets. |
1f547b92 DL |
529 | |
530 | This only finds coding systems of type `charset', whose | |
531 | `:charset-list' property includes all of CHARSETS (plus `ascii' for | |
d0d8885d | 532 | ASCII-compatible coding systems). It was used in older versions of |
1f547b92 DL |
533 | Emacs, but is unlikely to be what you really want now." |
534 | ;; Deal with aliases. | |
535 | (setq charsets (mapcar (lambda (c) | |
536 | (get-charset-property c :name)) | |
537 | charsets)) | |
b5edd1d1 KH |
538 | (cond ((or (null charsets) |
539 | (and (= (length charsets) 1) | |
540 | (eq 'ascii (car charsets)))) | |
541 | '(undecided)) | |
542 | ((or (memq 'eight-bit-control charsets) | |
543 | (memq 'eight-bit-graphic charsets)) | |
1f547b92 | 544 | '(raw-text utf-8-emacs)) |
b5edd1d1 | 545 | (t |
1f547b92 DL |
546 | (let (codings) |
547 | (dolist (cs (coding-system-list t)) | |
c7b4b466 DL |
548 | (let ((cs-charsets (and (eq (coding-system-type cs) 'charset) |
549 | (coding-system-charset-list cs))) | |
1f547b92 DL |
550 | (charsets charsets)) |
551 | (if (coding-system-get cs :ascii-compatible-p) | |
552 | (add-to-list 'cs-charsets 'ascii)) | |
553 | (if (catch 'ok | |
554 | (when cs-charsets | |
555 | (while charsets | |
556 | (unless (memq (pop charsets) cs-charsets) | |
557 | (throw 'ok nil))) | |
558 | t)) | |
559 | (push cs codings)))) | |
560 | (nreverse codings))))) | |
54b226f7 | 561 | |
51ed58ea KH |
562 | (defun find-multibyte-characters (from to &optional maxcount excludes) |
563 | "Find multibyte characters in the region specified by FROM and TO. | |
564 | If FROM is a string, find multibyte characters in the string. | |
565 | The return value is an alist of the following format: | |
566 | ((CHARSET COUNT CHAR ...) ...) | |
567 | where | |
568 | CHARSET is a character set, | |
569 | COUNT is a number of characters, | |
1f547b92 | 570 | CHARs are the characters found from the character set. |
51ed58ea | 571 | Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. |
caff3c0a | 572 | Optional 4th arg EXCLUDES is a list of character sets to be ignored." |
51ed58ea KH |
573 | (let ((chars nil) |
574 | charset char) | |
575 | (if (stringp from) | |
a0d96cad KH |
576 | (if (multibyte-string-p from) |
577 | (let ((idx 0)) | |
d0d8885d | 578 | (while (setq idx (string-match-p "[^\000-\177]" from idx)) |
a0d96cad KH |
579 | (setq char (aref from idx) |
580 | charset (char-charset char)) | |
581 | (unless (memq charset excludes) | |
582 | (let ((slot (assq charset chars))) | |
583 | (if slot | |
584 | (if (not (memq char (nthcdr 2 slot))) | |
585 | (let ((count (nth 1 slot))) | |
586 | (setcar (cdr slot) (1+ count)) | |
587 | (if (or (not maxcount) (< count maxcount)) | |
588 | (nconc slot (list char))))) | |
589 | (setq chars (cons (list charset 1 char) chars))))) | |
590 | (setq idx (1+ idx))))) | |
591 | (if enable-multibyte-characters | |
592 | (save-excursion | |
593 | (goto-char from) | |
594 | (while (re-search-forward "[^\000-\177]" to t) | |
595 | (setq char (preceding-char) | |
596 | charset (char-charset char)) | |
597 | (unless (memq charset excludes) | |
51ed58ea KH |
598 | (let ((slot (assq charset chars))) |
599 | (if slot | |
a0d96cad | 600 | (if (not (member char (nthcdr 2 slot))) |
51ed58ea KH |
601 | (let ((count (nth 1 slot))) |
602 | (setcar (cdr slot) (1+ count)) | |
603 | (if (or (not maxcount) (< count maxcount)) | |
604 | (nconc slot (list char))))) | |
a0d96cad | 605 | (setq chars (cons (list charset 1 char) chars))))))))) |
51ed58ea KH |
606 | (nreverse chars))) |
607 | ||
738746ba KH |
608 | (defun search-unencodable-char (coding-system) |
609 | "Search forward from point for a character that is not encodable. | |
610 | It asks which coding system to check. | |
611 | If such a character is found, set point after that character. | |
612 | Otherwise, don't move point. | |
613 | ||
caff3c0a JB |
614 | When called from a program, the value is the position of the unencodable |
615 | character found, or nil if all characters are encodable." | |
738746ba KH |
616 | (interactive |
617 | (list (let ((default (or buffer-file-coding-system 'us-ascii))) | |
618 | (read-coding-system | |
5b76833f | 619 | (format "Coding-system (default %s): " default) |
738746ba KH |
620 | default)))) |
621 | (let ((pos (unencodable-char-position (point) (point-max) coding-system))) | |
622 | (if pos | |
623 | (goto-char (1+ pos)) | |
624 | (message "All following characters are encodable by %s" coding-system)) | |
625 | pos)) | |
626 | ||
c83c4f60 RS |
627 | (defvar last-coding-system-specified nil |
628 | "Most recent coding system explicitly specified by the user when asked. | |
629 | This variable is set whenever Emacs asks the user which coding system | |
630 | to use in order to write a file. If you set it to nil explicitly, | |
631 | then call `write-region', then afterward this variable will be non-nil | |
632 | only if the user was explicitly asked and specified a coding system.") | |
633 | ||
b5edd1d1 | 634 | (defvar select-safe-coding-system-accept-default-p nil |
fea6b736 | 635 | "If non-nil, a function to control the behavior of coding system selection. |
b5edd1d1 KH |
636 | The meaning is the same as the argument ACCEPT-DEFAULT-P of the |
637 | function `select-safe-coding-system' (which see). This variable | |
638 | overrides that argument.") | |
639 | ||
9ee5b744 SM |
640 | (defun select-safe-coding-system-interactively (from to codings unsafe |
641 | &optional rejected default) | |
642 | "Select interactively a coding system for the region FROM ... TO. | |
643 | FROM can be a string, as in `write-region'. | |
644 | CODINGS is the list of base coding systems known to be safe for this region, | |
645 | typically obtained with `find-coding-systems-region'. | |
646 | UNSAFE is a list of coding systems known to be unsafe for this region. | |
647 | REJECTED is a list of coding systems which were safe but for some reason | |
648 | were not recommended in the particular context. | |
649 | DEFAULT is the coding system to use by default in the query." | |
650 | ;; At first, if some defaults are unsafe, record at most 11 | |
651 | ;; problematic characters and their positions for them by turning | |
652 | ;; (CODING ...) | |
653 | ;; into | |
654 | ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...) | |
655 | (if unsafe | |
656 | (setq unsafe | |
657 | (mapcar #'(lambda (coding) | |
658 | (cons coding | |
659 | (if (stringp from) | |
660 | (mapcar #'(lambda (pos) | |
661 | (cons pos (aref from pos))) | |
662 | (unencodable-char-position | |
663 | 0 (length from) coding | |
664 | 11 from)) | |
665 | (mapcar #'(lambda (pos) | |
666 | (cons pos (char-after pos))) | |
667 | (unencodable-char-position | |
668 | from to coding 11))))) | |
669 | unsafe))) | |
670 | ||
671 | ;; Change each safe coding system to the corresponding | |
672 | ;; mime-charset name if it is also a coding system. Such a name | |
673 | ;; is more friendly to users. | |
674 | (let ((l codings) | |
675 | mime-charset) | |
676 | (while l | |
a77c22c2 KH |
677 | (setq mime-charset (coding-system-get (car l) :mime-charset)) |
678 | (if (and mime-charset (coding-system-p mime-charset) | |
679 | (coding-system-equal (car l) mime-charset)) | |
9ee5b744 SM |
680 | (setcar l mime-charset)) |
681 | (setq l (cdr l)))) | |
682 | ||
683 | ;; Don't offer variations with locking shift, which you | |
684 | ;; basically never want. | |
685 | (let (l) | |
686 | (dolist (elt codings (setq codings (nreverse l))) | |
687 | (unless (or (eq 'coding-category-iso-7-else | |
688 | (coding-system-category elt)) | |
689 | (eq 'coding-category-iso-8-else | |
690 | (coding-system-category elt))) | |
691 | (push elt l)))) | |
692 | ||
693 | ;; Remove raw-text, emacs-mule and no-conversion unless nothing | |
694 | ;; else is available. | |
695 | (setq codings | |
696 | (or (delq 'raw-text | |
697 | (delq 'emacs-mule | |
698 | (delq 'no-conversion codings))) | |
699 | '(raw-text emacs-mule no-conversion))) | |
700 | ||
701 | (let ((window-configuration (current-window-configuration)) | |
702 | (bufname (buffer-name)) | |
703 | coding-system) | |
704 | (save-excursion | |
705 | ;; If some defaults are unsafe, make sure the offending | |
706 | ;; buffer is displayed. | |
707 | (when (and unsafe (not (stringp from))) | |
708 | (pop-to-buffer bufname) | |
709 | (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x))) | |
710 | unsafe)))) | |
711 | ;; Then ask users to select one from CODINGS while showing | |
712 | ;; the reason why none of the defaults are not used. | |
713 | (with-output-to-temp-buffer "*Warning*" | |
714 | (with-current-buffer standard-output | |
715 | (if (and (null rejected) (null unsafe)) | |
716 | (insert "No default coding systems to try for " | |
717 | (if (stringp from) | |
718 | (format "string \"%s\"." from) | |
719 | (format "buffer `%s'." bufname))) | |
720 | (insert | |
721 | "These default coding systems were tried to encode" | |
722 | (if (stringp from) | |
723 | (concat " \"" (if (> (length from) 10) | |
724 | (concat (substring from 0 10) "...\"") | |
725 | (concat from "\""))) | |
726 | (format " text\nin the buffer `%s'" bufname)) | |
727 | ":\n") | |
728 | (let ((pos (point)) | |
729 | (fill-prefix " ")) | |
730 | (dolist (x (append rejected unsafe)) | |
46c04718 | 731 | (princ " ") (princ x)) |
9ee5b744 SM |
732 | (insert "\n") |
733 | (fill-region-as-paragraph pos (point))) | |
734 | (when rejected | |
3fc0b26e EZ |
735 | (insert "These safely encode the text in the buffer, |
736 | but are not recommended for encoding text in this context, | |
9ee5b744 SM |
737 | e.g., for sending an email message.\n ") |
738 | (dolist (x rejected) | |
739 | (princ " ") (princ x)) | |
740 | (insert "\n")) | |
741 | (when unsafe | |
3fc0b26e | 742 | (insert (if rejected "The other coding systems" |
9ee5b744 | 743 | "However, each of them") |
3fc0b26e | 744 | " encountered characters it couldn't encode:\n") |
9ee5b744 | 745 | (dolist (coding unsafe) |
3fc0b26e | 746 | (insert (format " %s cannot encode these:" (car coding))) |
9ee5b744 SM |
747 | (let ((i 0) |
748 | (func1 | |
749 | #'(lambda (bufname pos) | |
750 | (when (buffer-live-p (get-buffer bufname)) | |
751 | (pop-to-buffer bufname) | |
752 | (goto-char pos)))) | |
753 | (func2 | |
754 | #'(lambda (bufname pos coding) | |
755 | (when (buffer-live-p (get-buffer bufname)) | |
756 | (pop-to-buffer bufname) | |
757 | (if (< (point) pos) | |
758 | (goto-char pos) | |
759 | (forward-char 1) | |
760 | (search-unencodable-char coding) | |
761 | (forward-char -1)))))) | |
762 | (dolist (elt (cdr coding)) | |
763 | (insert " ") | |
764 | (if (stringp from) | |
765 | (insert (if (< i 10) (cdr elt) "...")) | |
766 | (if (< i 10) | |
767 | (insert-text-button | |
768 | (cdr elt) | |
769 | :type 'help-xref | |
4e21e5aa | 770 | 'face 'link |
9ee5b744 SM |
771 | 'help-echo |
772 | "mouse-2, RET: jump to this character" | |
773 | 'help-function func1 | |
774 | 'help-args (list bufname (car elt))) | |
775 | (insert-text-button | |
776 | "..." | |
777 | :type 'help-xref | |
4e21e5aa | 778 | 'face 'link |
9ee5b744 SM |
779 | 'help-echo |
780 | "mouse-2, RET: next unencodable character" | |
781 | 'help-function func2 | |
782 | 'help-args (list bufname (car elt) | |
783 | (car coding))))) | |
784 | (setq i (1+ i)))) | |
785 | (insert "\n")) | |
333f3572 | 786 | (insert (substitute-command-keys "\ |
7d03c5b1 | 787 | |
333f3572 KH |
788 | Click on a character (or switch to this window by `\\[other-window]'\n\ |
789 | and select the characters by RET) to jump to the place it appears,\n\ | |
3fc0b26e | 790 | where `\\[universal-argument] \\[what-cursor-position]' will give information about it.\n")))) |
7d03c5b1 | 791 | (insert (substitute-command-keys "\nSelect \ |
3fc0b26e EZ |
792 | one of the safe coding systems listed below,\n\ |
793 | or cancel the writing with \\[keyboard-quit] and edit the buffer\n\ | |
794 | to remove or modify the problematic characters,\n\ | |
795 | or specify any other coding system (and risk losing\n\ | |
796 | the problematic characters).\n\n")) | |
9ee5b744 SM |
797 | (let ((pos (point)) |
798 | (fill-prefix " ")) | |
799 | (dolist (x codings) | |
800 | (princ " ") (princ x)) | |
801 | (insert "\n") | |
7d03c5b1 | 802 | (fill-region-as-paragraph pos (point))))) |
9ee5b744 SM |
803 | |
804 | ;; Read a coding system. | |
805 | (setq coding-system | |
806 | (read-coding-system | |
807 | (format "Select coding system (default %s): " default) | |
808 | default)) | |
809 | (setq last-coding-system-specified coding-system)) | |
810 | ||
811 | (kill-buffer "*Warning*") | |
812 | (set-window-configuration window-configuration) | |
813 | coding-system)) | |
814 | ||
b5edd1d1 | 815 | (defun select-safe-coding-system (from to &optional default-coding-system |
efa2c6d7 | 816 | accept-default-p file) |
d5266ddf KH |
817 | "Ask a user to select a safe coding system from candidates. |
818 | The candidates of coding systems which can safely encode a text | |
b5edd1d1 KH |
819 | between FROM and TO are shown in a popup window. Among them, the most |
820 | proper one is suggested as the default. | |
821 | ||
d0d8885d | 822 | The list of `buffer-file-coding-system' of the current buffer, the |
b56a5ae0 | 823 | default `buffer-file-coding-system', and the most preferred coding |
d0d8885d JB |
824 | system (if it corresponds to a MIME charset) is treated as the |
825 | default coding system list. Among them, the first one that safely | |
826 | encodes the text is normally selected silently and returned without | |
827 | any user interaction. See also the command `prefer-coding-system'. | |
d37ef0f6 DL |
828 | |
829 | However, the user is queried if the chosen coding system is | |
29d04c4f | 830 | inconsistent with what would be selected by `find-auto-coding' from |
d37ef0f6 DL |
831 | coding cookies &c. if the contents of the region were read from a |
832 | file. (That could lead to data corruption in a file subsequently | |
833 | re-visited and edited.) | |
54b226f7 | 834 | |
b5edd1d1 KH |
835 | Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a |
836 | list of coding systems to be prepended to the default coding system | |
0e9ec609 | 837 | list. However, if DEFAULT-CODING-SYSTEM is a list and the first |
ccb06340 | 838 | element is t, the cdr part is used as the default coding system list, |
b56a5ae0 | 839 | i.e. current `buffer-file-coding-system', default `buffer-file-coding-system', |
a985cd2f | 840 | and the most preferred coding system are not used. |
54b226f7 | 841 | |
b5edd1d1 KH |
842 | Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to |
843 | determine the acceptability of the silently selected coding system. | |
844 | It is called with that coding system, and should return nil if it | |
845 | should not be silently selected and thus user interaction is required. | |
846 | ||
efa2c6d7 RS |
847 | Optional 5th arg FILE is the file name to use for this purpose. |
848 | That is different from `buffer-file-name' when handling `write-region' | |
849 | \(for example). | |
850 | ||
d0d8885d JB |
851 | The variable `select-safe-coding-system-accept-default-p', if non-nil, |
852 | overrides ACCEPT-DEFAULT-P. | |
54b226f7 KH |
853 | |
854 | Kludgy feature: if FROM is a string, the string is the target text, | |
855 | and TO is ignored." | |
64657387 | 856 | (if (not (listp default-coding-system)) |
b5edd1d1 KH |
857 | (setq default-coding-system (list default-coding-system))) |
858 | ||
29d04c4f KH |
859 | (let ((no-other-defaults nil) |
860 | auto-cs) | |
d35deeec | 861 | (unless (or (stringp from) find-file-literally) |
b4dc7d98 | 862 | ;; Find an auto-coding that is specified for the current |
29d04c4f KH |
863 | ;; buffer and file from the region FROM and TO. |
864 | (save-excursion | |
865 | (save-restriction | |
866 | (widen) | |
867 | (goto-char from) | |
868 | (setq auto-cs (find-auto-coding (or file buffer-file-name "") | |
869 | (- to from))) | |
870 | (if auto-cs | |
871 | (if (coding-system-p (car auto-cs)) | |
872 | (setq auto-cs (car auto-cs)) | |
873 | (display-warning | |
2364df5c | 874 | 'mule |
29d04c4f KH |
875 | (format "\ |
876 | Invalid coding system `%s' is specified | |
877 | for the current buffer/file by the %s. | |
878 | It is highly recommended to fix it before writing to a file." | |
879 | (car auto-cs) | |
880 | (if (eq (cdr auto-cs) :coding) ":coding tag" | |
2364df5c JB |
881 | (format "variable `%s'" (cdr auto-cs)))) |
882 | :warning) | |
29d04c4f KH |
883 | (or (yes-or-no-p "Really proceed with writing? ") |
884 | (error "Save aborted")) | |
885 | (setq auto-cs nil)))))) | |
886 | ||
0e9ec609 KH |
887 | (if (eq (car default-coding-system) t) |
888 | (setq no-other-defaults t | |
889 | default-coding-system (cdr default-coding-system))) | |
890 | ||
891 | ;; Change elements of the list to (coding . base-coding). | |
892 | (setq default-coding-system | |
893 | (mapcar (function (lambda (x) (cons x (coding-system-base x)))) | |
894 | default-coding-system)) | |
895 | ||
29d04c4f | 896 | (if (and auto-cs (not no-other-defaults)) |
2df48a87 SM |
897 | ;; If the file has a coding cookie, use it regardless of any |
898 | ;; other setting. | |
29d04c4f | 899 | (let ((base (coding-system-base auto-cs))) |
2df48a87 SM |
900 | (unless (memq base '(nil undecided)) |
901 | (setq default-coding-system (list (cons auto-cs base))) | |
902 | (setq no-other-defaults t)))) | |
29d04c4f | 903 | |
0e9ec609 KH |
904 | (unless no-other-defaults |
905 | ;; If buffer-file-coding-system is not nil nor undecided, append it | |
906 | ;; to the defaults. | |
907 | (if buffer-file-coding-system | |
908 | (let ((base (coding-system-base buffer-file-coding-system))) | |
909 | (or (eq base 'undecided) | |
0e9ec609 | 910 | (rassq base default-coding-system) |
ccb06340 KH |
911 | (setq default-coding-system |
912 | (append default-coding-system | |
913 | (list (cons buffer-file-coding-system base))))))) | |
a985cd2f | 914 | |
659be258 KH |
915 | (unless (and buffer-file-coding-system-explicit |
916 | (cdr buffer-file-coding-system-explicit)) | |
b56a5ae0 | 917 | ;; If default buffer-file-coding-system is not nil nor undecided, |
659be258 | 918 | ;; append it to the defaults. |
b56a5ae0 SM |
919 | (when (default-value 'buffer-file-coding-system) |
920 | (let ((base (coding-system-base | |
921 | (default-value 'buffer-file-coding-system)))) | |
922 | (or (eq base 'undecided) | |
923 | (rassq base default-coding-system) | |
924 | (setq default-coding-system | |
925 | (append default-coding-system | |
926 | (list (cons (default-value | |
927 | 'buffer-file-coding-system) | |
928 | base))))))) | |
659be258 KH |
929 | |
930 | ;; If the most preferred coding system has the property mime-charset, | |
931 | ;; append it to the defaults. | |
932 | (let ((preferred (coding-system-priority-list t)) | |
933 | base) | |
934 | (and (coding-system-p preferred) | |
935 | (setq base (coding-system-base preferred)) | |
936 | (coding-system-get preferred :mime-charset) | |
937 | (not (rassq base default-coding-system)) | |
938 | (setq default-coding-system | |
939 | (append default-coding-system | |
940 | (list (cons preferred base)))))))) | |
29d04c4f KH |
941 | |
942 | (if select-safe-coding-system-accept-default-p | |
943 | (setq accept-default-p select-safe-coding-system-accept-default-p)) | |
944 | ||
bae7cd08 | 945 | ;; Decide the eol-type from the top of the default codings, |
b56a5ae0 | 946 | ;; current buffer-file-coding-system, or default buffer-file-coding-system. |
bae7cd08 KH |
947 | (if default-coding-system |
948 | (let ((default-eol-type (coding-system-eol-type | |
949 | (caar default-coding-system)))) | |
950 | (if (and (vectorp default-eol-type) buffer-file-coding-system) | |
d35deeec | 951 | (setq default-eol-type (coding-system-eol-type |
bae7cd08 | 952 | buffer-file-coding-system))) |
b56a5ae0 SM |
953 | (if (and (vectorp default-eol-type) |
954 | (default-value 'buffer-file-coding-system)) | |
955 | (setq default-eol-type | |
956 | (coding-system-eol-type | |
957 | (default-value 'buffer-file-coding-system)))) | |
bae7cd08 KH |
958 | (if (and default-eol-type (not (vectorp default-eol-type))) |
959 | (dolist (elt default-coding-system) | |
960 | (setcar elt (coding-system-change-eol-conversion | |
961 | (car elt) default-eol-type)))))) | |
962 | ||
29d04c4f KH |
963 | (let ((codings (find-coding-systems-region from to)) |
964 | (coding-system nil) | |
c381cb8d | 965 | (tick (if (not (stringp from)) (buffer-chars-modified-tick))) |
29d04c4f | 966 | safe rejected unsafe) |
bae7cd08 KH |
967 | (if (eq (car codings) 'undecided) |
968 | ;; Any coding system is ok. | |
969 | (setq coding-system (caar default-coding-system)) | |
970 | ;; Reverse the list so that elements are accumulated in safe, | |
971 | ;; rejected, and unsafe in the correct order. | |
972 | (setq default-coding-system (nreverse default-coding-system)) | |
973 | ||
974 | ;; Classify the defaults into safe, rejected, and unsafe. | |
975 | (dolist (elt default-coding-system) | |
976 | (if (or (eq (car codings) 'undecided) | |
977 | (memq (cdr elt) codings)) | |
978 | (if (and (functionp accept-default-p) | |
979 | (not (funcall accept-default-p (cdr elt)))) | |
980 | (push (car elt) rejected) | |
981 | (push (car elt) safe)) | |
982 | (push (car elt) unsafe))) | |
983 | (if safe | |
984 | (setq coding-system (car safe)))) | |
29d04c4f KH |
985 | |
986 | ;; If all the defaults failed, ask a user. | |
bae7cd08 | 987 | (when (not coding-system) |
29d04c4f KH |
988 | (setq coding-system (select-safe-coding-system-interactively |
989 | from to codings unsafe rejected (car codings)))) | |
990 | ||
29d04c4f KH |
991 | ;; Check we're not inconsistent with what `coding:' spec &c would |
992 | ;; give when file is re-read. | |
993 | ;; But don't do this if we explicitly ignored the cookie | |
994 | ;; by using `find-file-literally'. | |
995 | (when (and auto-cs | |
996 | (not (and | |
997 | coding-system | |
998 | (memq (coding-system-type coding-system) '(0 5))))) | |
6b61353c KH |
999 | ;; Merge coding-system and auto-cs as far as possible. |
1000 | (if (not coding-system) | |
1001 | (setq coding-system auto-cs) | |
1002 | (if (not auto-cs) | |
1003 | (setq auto-cs coding-system) | |
1004 | (let ((eol-type-1 (coding-system-eol-type coding-system)) | |
1005 | (eol-type-2 (coding-system-eol-type auto-cs))) | |
1006 | (if (eq (coding-system-base coding-system) 'undecided) | |
1007 | (setq coding-system (coding-system-change-text-conversion | |
1008 | coding-system auto-cs)) | |
1009 | (if (eq (coding-system-base auto-cs) 'undecided) | |
1010 | (setq auto-cs (coding-system-change-text-conversion | |
1011 | auto-cs coding-system)))) | |
1012 | (if (vectorp eol-type-1) | |
1013 | (or (vectorp eol-type-2) | |
1014 | (setq coding-system (coding-system-change-eol-conversion | |
1015 | coding-system eol-type-2))) | |
1016 | (if (vectorp eol-type-2) | |
1017 | (setq auto-cs (coding-system-change-eol-conversion | |
1018 | auto-cs eol-type-1))))))) | |
1019 | ||
1020 | (if (and auto-cs | |
d37ef0f6 DL |
1021 | ;; Don't barf if writing a compressed file, say. |
1022 | ;; This check perhaps isn't ideal, but is probably | |
1023 | ;; the best thing to do. | |
1024 | (not (auto-coding-alist-lookup (or file buffer-file-name ""))) | |
6b61353c | 1025 | (not (coding-system-equal coding-system auto-cs))) |
43afed8c RS |
1026 | (unless (yes-or-no-p |
1027 | (format "Selected encoding %s disagrees with \ | |
1028 | %s specified by file contents. Really save (else edit coding cookies \ | |
1029 | and try again)? " coding-system auto-cs)) | |
29d04c4f | 1030 | (error "Save aborted")))) |
c381cb8d | 1031 | (when (and tick (/= tick (buffer-chars-modified-tick))) |
86c3a9fb | 1032 | (error "Cancelled because the buffer was modified")) |
29d04c4f | 1033 | coding-system))) |
54b226f7 KH |
1034 | |
1035 | (setq select-safe-coding-system-function 'select-safe-coding-system) | |
1036 | ||
46babb23 KH |
1037 | (defun select-message-coding-system () |
1038 | "Return a coding system to encode the outgoing message of the current buffer. | |
1039 | It at first tries the first coding system found in these variables | |
1040 | in this order: | |
1041 | (1) local value of `buffer-file-coding-system' | |
1042 | (2) value of `sendmail-coding-system' | |
b5edd1d1 | 1043 | (3) value of `default-sendmail-coding-system' |
b56a5ae0 | 1044 | (4) default value of `buffer-file-coding-system' |
46babb23 KH |
1045 | If the found coding system can't encode the current buffer, |
1046 | or none of them are bound to a coding system, | |
48e41165 | 1047 | it asks the user to select a proper coding system." |
46babb23 | 1048 | (let ((coding (or (and (local-variable-p 'buffer-file-coding-system) |
b5edd1d1 KH |
1049 | buffer-file-coding-system) |
1050 | sendmail-coding-system | |
1051 | default-sendmail-coding-system | |
b56a5ae0 | 1052 | (default-value 'buffer-file-coding-system)))) |
46babb23 | 1053 | (if (eq coding 'no-conversion) |
d37ef0f6 | 1054 | ;; We should never use no-conversion for outgoing mail. |
46babb23 KH |
1055 | (setq coding nil)) |
1056 | (if (fboundp select-safe-coding-system-function) | |
1057 | (funcall select-safe-coding-system-function | |
b5edd1d1 | 1058 | (point-min) (point-max) coding |
209c73b0 | 1059 | (function (lambda (x) (coding-system-get x :mime-charset)))) |
46babb23 | 1060 | coding))) |
4ed46869 | 1061 | \f |
03c35c83 | 1062 | ;;; Language support stuff. |
4ed46869 | 1063 | |
4ed46869 | 1064 | (defvar language-info-alist nil |
2c395d56 | 1065 | "Alist of language environment definitions. |
4ed46869 KH |
1066 | Each element looks like: |
1067 | (LANGUAGE-NAME . ((KEY . INFO) ...)) | |
2c395d56 RS |
1068 | where LANGUAGE-NAME is a string, the name of the language environment, |
1069 | KEY is a symbol denoting the kind of information, and | |
1070 | INFO is the data associated with KEY. | |
1071 | Meaningful values for KEY include | |
1072 | ||
1073 | documentation value is documentation of what this language environment | |
1074 | is meant for, and how to use it. | |
7dd42fb1 KH |
1075 | charset value is a list of the character sets mainly used |
1076 | by this language environment. | |
d37ef0f6 DL |
1077 | sample-text value is an expression which is evalled to generate |
1078 | a line of text written using characters appropriate | |
1079 | for this language environment. | |
2c395d56 RS |
1080 | setup-function value is a function to call to switch to this |
1081 | language environment. | |
1082 | exit-function value is a function to call to leave this | |
1083 | language environment. | |
d0d8885d JB |
1084 | coding-system value is a list of coding systems that are good for |
1085 | saving text written in this language environment. | |
2c395d56 RS |
1086 | This list serves as suggestions to the user; |
1087 | in effect, as a kind of documentation. | |
1088 | coding-priority value is a list of coding systems for this language | |
1089 | environment, in order of decreasing priority. | |
1090 | This is used to set up the coding system priority | |
45d08cb2 | 1091 | list when you switch to this language environment. |
ddb5c041 | 1092 | nonascii-translation |
7dd42fb1 KH |
1093 | value is a charset of dimension one to use for |
1094 | converting a unibyte character to multibyte | |
1095 | and vice versa. | |
ddb5c041 KH |
1096 | input-method value is a default input method for this language |
1097 | environment. | |
7624ebb9 KH |
1098 | features value is a list of features requested in this |
1099 | language environment. | |
6b61353c | 1100 | ctext-non-standard-encodings |
d0d8885d JB |
1101 | value is a list of non-standard encoding names used |
1102 | in extended segments of CTEXT. See the variable | |
1103 | `ctext-non-standard-encodings' for more detail. | |
ddb5c041 | 1104 | |
b9631bb2 | 1105 | The following key takes effect only when multibyte characters are |
597e2240 | 1106 | globally disabled, i.e. the default value of `enable-multibyte-characters' |
b9631bb2 | 1107 | is nil (which is an obsolete and deprecated use): |
ddb5c041 | 1108 | |
d0d8885d JB |
1109 | unibyte-display value is a coding system to encode characters for |
1110 | the terminal. Characters in the range of 160 to | |
1111 | 255 display not as octal escapes, but as non-ASCII | |
1112 | characters in this language environment.") | |
2c395d56 RS |
1113 | |
1114 | (defun get-language-info (lang-env key) | |
1115 | "Return information listed under KEY for language environment LANG-ENV. | |
1116 | KEY is a symbol denoting the kind of information. | |
1117 | For a list of useful values for KEY and their meanings, | |
1118 | see `language-info-alist'." | |
1119 | (if (symbolp lang-env) | |
1120 | (setq lang-env (symbol-name lang-env))) | |
f15466c5 | 1121 | (let ((lang-slot (assoc-string lang-env language-info-alist t))) |
4ed46869 KH |
1122 | (if lang-slot |
1123 | (cdr (assq key (cdr lang-slot)))))) | |
1124 | ||
f08adf27 | 1125 | (defun set-language-info (lang-env key info) |
2c395d56 RS |
1126 | "Modify part of the definition of language environment LANG-ENV. |
1127 | Specifically, this stores the information INFO under KEY | |
1128 | in the definition of this language environment. | |
4ed46869 | 1129 | KEY is a symbol denoting the kind of information. |
2c395d56 | 1130 | INFO is the value for that information. |
281d03ec | 1131 | |
2c395d56 | 1132 | For a list of useful values for KEY and their meanings, |
f08adf27 | 1133 | see `language-info-alist'." |
2c395d56 RS |
1134 | (if (symbolp lang-env) |
1135 | (setq lang-env (symbol-name lang-env))) | |
eec5c8f9 KH |
1136 | (set-language-info-internal lang-env key info) |
1137 | (if (equal lang-env current-language-environment) | |
d042f8b4 | 1138 | (cond ((eq key 'coding-priority) |
e9a8ed3c KH |
1139 | (set-language-environment-coding-systems lang-env) |
1140 | (set-language-environment-charset lang-env)) | |
d042f8b4 KH |
1141 | ((eq key 'input-method) |
1142 | (set-language-environment-input-method lang-env)) | |
1143 | ((eq key 'nonascii-translation) | |
1144 | (set-language-environment-nonascii-translation lang-env)) | |
1145 | ((eq key 'charset) | |
1146 | (set-language-environment-charset lang-env)) | |
597e2240 | 1147 | ((and (not (default-value 'enable-multibyte-characters)) |
d042f8b4 KH |
1148 | (or (eq key 'unibyte-syntax) (eq key 'unibyte-display))) |
1149 | (set-language-environment-unibyte lang-env))))) | |
eec5c8f9 KH |
1150 | |
1151 | (defun set-language-info-internal (lang-env key info) | |
1152 | "Internal use only. | |
1153 | Arguments are the same as `set-language-info'." | |
4ed46869 | 1154 | (let (lang-slot key-slot) |
2c395d56 | 1155 | (setq lang-slot (assoc lang-env language-info-alist)) |
4ed46869 | 1156 | (if (null lang-slot) ; If no slot for the language, add it. |
2c395d56 | 1157 | (setq lang-slot (list lang-env) |
4ed46869 KH |
1158 | language-info-alist (cons lang-slot language-info-alist))) |
1159 | (setq key-slot (assq key lang-slot)) | |
1160 | (if (null key-slot) ; If no slot for the key, add it. | |
1161 | (progn | |
1162 | (setq key-slot (list key)) | |
1163 | (setcdr lang-slot (cons key-slot (cdr lang-slot))))) | |
590dbcba | 1164 | (setcdr key-slot (purecopy info)) |
1169bd86 | 1165 | ;; Update the custom-type of `current-language-environment'. |
590dbcba | 1166 | (put 'current-language-environment 'custom-type |
1169bd86 MR |
1167 | (cons 'choice (mapcar |
1168 | (lambda (lang) | |
756e055f MR |
1169 | (list 'const lang)) |
1170 | (sort (mapcar 'car language-info-alist) 'string<)))))) | |
4ed46869 | 1171 | |
2c395d56 RS |
1172 | (defun set-language-info-alist (lang-env alist &optional parents) |
1173 | "Store ALIST as the definition of language environment LANG-ENV. | |
1174 | ALIST is an alist of KEY and INFO values. See the documentation of | |
98c6d6ed | 1175 | `language-info-alist' for the meanings of KEY and INFO. |
54b226f7 | 1176 | |
2c395d56 | 1177 | Optional arg PARENTS is a list of parent menu names; it specifies |
d37ef0f6 | 1178 | where to put this language environment in the |
2c395d56 RS |
1179 | Describe Language Environment and Set Language Environment menus. |
1180 | For example, (\"European\") means to put this language environment | |
1181 | in the European submenu in each of those two menus." | |
905a9ed3 DN |
1182 | (cond ((symbolp lang-env) |
1183 | (setq lang-env (symbol-name lang-env))) | |
1184 | ((stringp lang-env) | |
1185 | (setq lang-env (purecopy lang-env)))) | |
54b226f7 KH |
1186 | (let ((describe-map describe-language-environment-map) |
1187 | (setup-map setup-language-environment-map)) | |
1188 | (if parents | |
1189 | (let ((l parents) | |
9deed82f | 1190 | map parent-symbol parent prompt) |
54b226f7 KH |
1191 | (while l |
1192 | (if (symbolp (setq parent-symbol (car l))) | |
1193 | (setq parent (symbol-name parent)) | |
1194 | (setq parent parent-symbol parent-symbol (intern parent))) | |
1195 | (setq map (lookup-key describe-map (vector parent-symbol))) | |
9deed82f EZ |
1196 | ;; This prompt string is for define-prefix-command, so |
1197 | ;; that the map it creates will be suitable for a menu. | |
1198 | (or map (setq prompt (format "%s Environment" parent))) | |
54b226f7 KH |
1199 | (if (not map) |
1200 | (progn | |
1201 | (setq map (intern (format "describe-%s-environment-map" | |
1202 | (downcase parent)))) | |
9deed82f | 1203 | (define-prefix-command map nil prompt) |
54b226f7 | 1204 | (define-key-after describe-map (vector parent-symbol) |
64657387 | 1205 | (cons parent map)))) |
54b226f7 KH |
1206 | (setq describe-map (symbol-value map)) |
1207 | (setq map (lookup-key setup-map (vector parent-symbol))) | |
1208 | (if (not map) | |
1209 | (progn | |
1210 | (setq map (intern (format "setup-%s-environment-map" | |
1211 | (downcase parent)))) | |
9deed82f | 1212 | (define-prefix-command map nil prompt) |
54b226f7 | 1213 | (define-key-after setup-map (vector parent-symbol) |
64657387 | 1214 | (cons parent map)))) |
54b226f7 KH |
1215 | (setq setup-map (symbol-value map)) |
1216 | (setq l (cdr l))))) | |
f08adf27 RS |
1217 | |
1218 | ;; Set up menu items for this language env. | |
7624ebb9 | 1219 | (let ((doc (assq 'documentation alist))) |
f08adf27 RS |
1220 | (when doc |
1221 | (define-key-after describe-map (vector (intern lang-env)) | |
64657387 | 1222 | (cons lang-env 'describe-specified-language-support)))) |
7624ebb9 | 1223 | (define-key-after setup-map (vector (intern lang-env)) |
64657387 | 1224 | (cons lang-env 'setup-specified-language-environment)) |
f08adf27 | 1225 | |
eec5c8f9 KH |
1226 | (dolist (elt alist) |
1227 | (set-language-info-internal lang-env (car elt) (cdr elt))) | |
d35deeec | 1228 | |
eec5c8f9 KH |
1229 | (if (equal lang-env current-language-environment) |
1230 | (set-language-environment lang-env)))) | |
4ed46869 | 1231 | |
ae302641 | 1232 | (defun read-language-name (key prompt &optional default) |
2c395d56 | 1233 | "Read a language environment name which has information for KEY. |
ddb5c041 | 1234 | If KEY is nil, read any language environment. |
2c395d56 RS |
1235 | Prompt with PROMPT. DEFAULT is the default choice of language environment. |
1236 | This returns a language environment name as a string." | |
4ed46869 KH |
1237 | (let* ((completion-ignore-case t) |
1238 | (name (completing-read prompt | |
1239 | language-info-alist | |
ddb5c041 | 1240 | (and key |
ca429a25 | 1241 | (function (lambda (elm) (and (listp elm) (assq key elm))))) |
ae302641 | 1242 | t nil nil default))) |
13e82c04 | 1243 | (if (and (> (length name) 0) |
ddb5c041 KH |
1244 | (or (not key) |
1245 | (get-language-info name key))) | |
13e82c04 | 1246 | name))) |
4ed46869 KH |
1247 | \f |
1248 | ;;; Multilingual input methods. | |
d37ef0f6 | 1249 | (defgroup leim nil |
d0c40faa KH |
1250 | "LEIM: Libraries of Emacs Input Methods." |
1251 | :group 'mule) | |
4ed46869 | 1252 | |
d0b9c3ab KH |
1253 | (defconst leim-list-file-name "leim-list.el" |
1254 | "Name of LEIM list file. | |
1255 | This file contains a list of libraries of Emacs input methods (LEIM) | |
1256 | in the format of Lisp expression for registering each input method. | |
1257 | Emacs loads this file at startup time.") | |
1258 | ||
905a9ed3 | 1259 | (defconst leim-list-header (format |
316732d2 | 1260 | ";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: utf-8;-*- |
d0b9c3ab | 1261 | ;; |
bcd76f45 SM |
1262 | ;; This file is automatically generated. |
1263 | ;; | |
d0b9c3ab | 1264 | ;; This file contains a list of LEIM (Library of Emacs Input Method) |
c654de1d DL |
1265 | ;; methods in the same directory as this file. Loading this file |
1266 | ;; registers all the input methods in Emacs. | |
d0b9c3ab | 1267 | ;; |
d33d5fbe | 1268 | ;; Each entry has the form: |
d0b9c3ab KH |
1269 | ;; (register-input-method |
1270 | ;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC | |
1271 | ;; TITLE DESCRIPTION | |
1272 | ;; ARG ...) | |
c654de1d | 1273 | ;; See the function `register-input-method' for the meanings of the arguments. |
d0b9c3ab | 1274 | ;; |
bcd76f45 | 1275 | ;; If this directory is included in `load-path', Emacs automatically |
d0b9c3ab KH |
1276 | ;; loads this file at startup time. |
1277 | ||
1278 | " | |
1279 | leim-list-file-name) | |
1280 | "Header to be inserted in LEIM list file.") | |
1281 | ||
905a9ed3 | 1282 | (defconst leim-list-entry-regexp "^(register-input-method" |
d0b9c3ab | 1283 | "Regexp matching head of each entry in LEIM list file. |
caff3c0a | 1284 | See also the variable `leim-list-header'.") |
d0b9c3ab KH |
1285 | |
1286 | (defvar update-leim-list-functions | |
1287 | '(quail-update-leim-list-file) | |
1288 | "List of functions to call to update LEIM list file. | |
1289 | Each function is called with one arg, LEIM directory name.") | |
1290 | ||
a337fe7f RS |
1291 | (defun update-leim-list-file (&rest dirs) |
1292 | "Update LEIM list file in directories DIRS." | |
d0d8885d JB |
1293 | (dolist (function update-leim-list-functions) |
1294 | (apply function dirs))) | |
d0b9c3ab | 1295 | |
4ed46869 KH |
1296 | (defvar current-input-method nil |
1297 | "The current input method for multilingual text. | |
96db204a | 1298 | If nil, that means no input method is activated now.") |
4ed46869 KH |
1299 | (make-variable-buffer-local 'current-input-method) |
1300 | (put 'current-input-method 'permanent-local t) | |
1301 | ||
1302 | (defvar current-input-method-title nil | |
d0b9c3ab | 1303 | "Title string of the current input method shown in mode line.") |
4ed46869 KH |
1304 | (make-variable-buffer-local 'current-input-method-title) |
1305 | (put 'current-input-method-title 'permanent-local t) | |
1306 | ||
b4fba33f | 1307 | (defcustom default-input-method nil |
caff3c0a | 1308 | "Default input method for multilingual text (a string). |
b4fba33f | 1309 | This is the input method activated automatically by the command |
9b10b5a3 | 1310 | `toggle-input-method' (\\[toggle-input-method])." |
1f547b92 | 1311 | :link '(custom-manual "(emacs)Input Methods") |
8861c593 | 1312 | :group 'mule |
d398dba6 DL |
1313 | :type '(choice (const nil) (string |
1314 | :completion-ignore-case t | |
1315 | :complete-function widget-string-complete | |
1316 | :completion-alist input-method-alist | |
1317 | :prompt-history input-method-history)) | |
5806e8a6 | 1318 | :set-after '(current-language-environment)) |
b4fba33f | 1319 | |
0f835e87 KH |
1320 | (put 'input-method-function 'permanent-local t) |
1321 | ||
723a427a | 1322 | (defvar input-method-history nil |
6f5d2452 EZ |
1323 | "History list of input methods read from the minibuffer. |
1324 | ||
1325 | Maximum length of the history list is determined by the value | |
1326 | of `history-length', which see.") | |
723a427a KH |
1327 | (make-variable-buffer-local 'input-method-history) |
1328 | (put 'input-method-history 'permanent-local t) | |
4ed46869 KH |
1329 | |
1330 | (defvar inactivate-current-input-method-function nil | |
1331 | "Function to call for inactivating the current input method. | |
1332 | Every input method should set this to an appropriate value when activated. | |
f17ccaee KH |
1333 | This function is called with no argument. |
1334 | ||
1335 | This function should never change the value of `current-input-method'. | |
1336 | It is set to nil by the function `inactivate-input-method'.") | |
4ed46869 KH |
1337 | (make-variable-buffer-local 'inactivate-current-input-method-function) |
1338 | (put 'inactivate-current-input-method-function 'permanent-local t) | |
1339 | ||
1340 | (defvar describe-current-input-method-function nil | |
1341 | "Function to call for describing the current input method. | |
1342 | This function is called with no argument.") | |
1343 | (make-variable-buffer-local 'describe-current-input-method-function) | |
1344 | (put 'describe-current-input-method-function 'permanent-local t) | |
1345 | ||
d0b9c3ab | 1346 | (defvar input-method-alist nil |
2c395d56 | 1347 | "Alist of input method names vs how to use them. |
d0b9c3ab | 1348 | Each element has the form: |
2c395d56 RS |
1349 | (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...) |
1350 | See the function `register-input-method' for the meanings of the elements.") | |
ac549fa5 | 1351 | ;;;###autoload |
6dc3311d | 1352 | (put 'input-method-alist 'risky-local-variable t) |
2c395d56 | 1353 | |
f08adf27 | 1354 | (defun register-input-method (input-method lang-env &rest args) |
bf42aa15 | 1355 | "Register INPUT-METHOD as an input method for language environment LANG-ENV. |
d0b9c3ab | 1356 | |
d35deeec | 1357 | INPUT-METHOD and LANG-ENV are symbols or strings. |
2c395d56 RS |
1358 | ACTIVATE-FUNC is a function to call to activate this method. |
1359 | TITLE is a string to show in the mode line when this method is active. | |
1360 | DESCRIPTION is a string describing this method and what it is good for. | |
1361 | The ARGS, if any, are passed as arguments to ACTIVATE-FUNC. | |
205814ee KH |
1362 | All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS. |
1363 | ||
c654de1d DL |
1364 | This function is mainly used in the file \"leim-list.el\" which is |
1365 | created at Emacs build time, registering all Quail input methods | |
0b6cadff | 1366 | contained in the Emacs distribution. |
205814ee | 1367 | |
0b6cadff | 1368 | In case you want to register a new Quail input method by yourself, be |
205814ee | 1369 | careful to use the same input method title as given in the third |
0b6cadff DL |
1370 | parameter of `quail-define-package'. (If the values are different, the |
1371 | string specified in this function takes precedence.) | |
205814ee KH |
1372 | |
1373 | The commands `describe-input-method' and `list-input-methods' need | |
0b6cadff | 1374 | these duplicated values to show some information about input methods |
d35deeec JB |
1375 | without loading the relevant Quail packages. |
1376 | \n(fn INPUT-METHOD LANG-ENV ACTIVATE-FUNC TITLE DESCRIPTION &rest ARGS)" | |
f08adf27 | 1377 | (if (symbolp lang-env) |
ebfa10d3 DN |
1378 | (setq lang-env (symbol-name lang-env)) |
1379 | (setq lang-env (purecopy lang-env))) | |
4ef06f75 | 1380 | (if (symbolp input-method) |
ebfa10d3 DN |
1381 | (setq input-method (symbol-name input-method)) |
1382 | (setq input-method (purecopy input-method))) | |
1383 | (setq args (mapcar 'purecopy args)) | |
f08adf27 | 1384 | (let ((info (cons lang-env args)) |
d0b9c3ab KH |
1385 | (slot (assoc input-method input-method-alist))) |
1386 | (if slot | |
1387 | (setcdr slot info) | |
1388 | (setq slot (cons input-method info)) | |
1389 | (setq input-method-alist (cons slot input-method-alist))))) | |
1390 | ||
4d5ac029 | 1391 | (defun read-input-method-name (prompt &optional default inhibit-null) |
d0b9c3ab | 1392 | "Read a name of input method from a minibuffer prompting with PROMPT. |
4d5ac029 | 1393 | If DEFAULT is non-nil, use that as the default, |
0b6cadff | 1394 | and substitute it into PROMPT at the first `%s'. |
4ef06f75 KH |
1395 | If INHIBIT-NULL is non-nil, null input signals an error. |
1396 | ||
1397 | The return value is a string." | |
4d5ac029 RS |
1398 | (if default |
1399 | (setq prompt (format prompt default))) | |
d0b9c3ab | 1400 | (let* ((completion-ignore-case t) |
c54044ff KH |
1401 | ;; As it is quite normal to change input method in the |
1402 | ;; minibuffer, we must enable it even if | |
1403 | ;; enable-recursive-minibuffers is currently nil. | |
1404 | (enable-recursive-minibuffers t) | |
723a427a KH |
1405 | ;; This binding is necessary because input-method-history is |
1406 | ;; buffer local. | |
d0b9c3ab | 1407 | (input-method (completing-read prompt input-method-alist |
87505a98 RS |
1408 | nil t nil 'input-method-history |
1409 | default))) | |
bf294e6e KH |
1410 | (if (and input-method (symbolp input-method)) |
1411 | (setq input-method (symbol-name input-method))) | |
d0b9c3ab KH |
1412 | (if (> (length input-method) 0) |
1413 | input-method | |
1414 | (if inhibit-null | |
43807b77 | 1415 | (error "No valid input method is specified"))))) |
d0b9c3ab | 1416 | |
d0b9c3ab | 1417 | (defun activate-input-method (input-method) |
2c395d56 RS |
1418 | "Switch to input method INPUT-METHOD for the current buffer. |
1419 | If some other input method is already active, turn it off first. | |
1420 | If INPUT-METHOD is nil, deactivate any current input method." | |
305a3cb6 | 1421 | (if (and input-method (symbolp input-method)) |
4ef06f75 | 1422 | (setq input-method (symbol-name input-method))) |
723a427a KH |
1423 | (if (and current-input-method |
1424 | (not (string= current-input-method input-method))) | |
305a3cb6 | 1425 | (inactivate-input-method)) |
2c395d56 | 1426 | (unless (or current-input-method (null input-method)) |
d0b9c3ab KH |
1427 | (let ((slot (assoc input-method input-method-alist))) |
1428 | (if (null slot) | |
723a427a | 1429 | (error "Can't activate input method `%s'" input-method)) |
278dd6ac | 1430 | (setq current-input-method-title nil) |
8efc03e1 KH |
1431 | (let ((func (nth 2 slot))) |
1432 | (if (functionp func) | |
1433 | (apply (nth 2 slot) input-method (nthcdr 5 slot)) | |
1434 | (if (and (consp func) (symbolp (car func)) (symbolp (cdr func))) | |
1435 | (progn | |
1436 | (require (cdr func)) | |
1437 | (apply (car func) input-method (nthcdr 5 slot))) | |
1438 | (error "Can't activate input method `%s'" input-method)))) | |
d0b9c3ab | 1439 | (setq current-input-method input-method) |
278dd6ac KH |
1440 | (or (stringp current-input-method-title) |
1441 | (setq current-input-method-title (nth 3 slot))) | |
28885c0e KH |
1442 | (unwind-protect |
1443 | (run-hooks 'input-method-activate-hook) | |
1444 | (force-mode-line-update))))) | |
15b3e511 | 1445 | |
15b3e511 | 1446 | (defun inactivate-input-method () |
f17ccaee | 1447 | "Turn off the current input method." |
723a427a KH |
1448 | (when current-input-method |
1449 | (if input-method-history | |
1450 | (unless (string= current-input-method (car input-method-history)) | |
1451 | (setq input-method-history | |
1452 | (cons current-input-method | |
1453 | (delete current-input-method input-method-history)))) | |
1454 | (setq input-method-history (list current-input-method))) | |
1455 | (unwind-protect | |
36f8618c KH |
1456 | (progn |
1457 | (setq input-method-function nil | |
1458 | current-input-method-title nil) | |
1459 | (funcall inactivate-current-input-method-function)) | |
15b3e511 | 1460 | (unwind-protect |
723a427a | 1461 | (run-hooks 'input-method-inactivate-hook) |
36f8618c | 1462 | (setq current-input-method nil) |
28885c0e | 1463 | (force-mode-line-update))))) |
4ed46869 | 1464 | |
e893eae2 | 1465 | (defun set-input-method (input-method &optional interactive) |
2c395d56 | 1466 | "Select and activate input method INPUT-METHOD for the current buffer. |
bc406911 | 1467 | This also sets the default input method to the one you specify. |
402dbbd1 EZ |
1468 | If INPUT-METHOD is nil, this function turns off the input method, and |
1469 | also causes you to be prompted for a name of an input method the next | |
1470 | time you invoke \\[toggle-input-method]. | |
e893eae2 RS |
1471 | When called interactively, the optional arg INTERACTIVE is non-nil, |
1472 | which marks the variable `default-input-method' as set for Custom buffers. | |
402dbbd1 | 1473 | |
bc406911 | 1474 | To deactivate the input method interactively, use \\[toggle-input-method]. |
caff3c0a | 1475 | To deactivate it programmatically, use `inactivate-input-method'." |
d0b9c3ab | 1476 | (interactive |
723a427a | 1477 | (let* ((default (or (car input-method-history) default-input-method))) |
42395763 | 1478 | (list (read-input-method-name |
87505a98 | 1479 | (if default "Select input method (default %s): " "Select input method: ") |
e893eae2 RS |
1480 | default t) |
1481 | t))) | |
d0b9c3ab | 1482 | (activate-input-method input-method) |
f4990970 | 1483 | (setq default-input-method input-method) |
e893eae2 | 1484 | (when interactive |
f4990970 PA |
1485 | (customize-mark-as-set 'default-input-method)) |
1486 | default-input-method) | |
4ed46869 | 1487 | |
023df4cf RS |
1488 | (defvar toggle-input-method-active nil |
1489 | "Non-nil inside `toggle-input-method'.") | |
1490 | ||
e893eae2 | 1491 | (defun toggle-input-method (&optional arg interactive) |
f8ec20be RS |
1492 | "Enable or disable multilingual text input method for the current buffer. |
1493 | Only one input method can be enabled at any time in a given buffer. | |
1494 | ||
d0d8885d JB |
1495 | The normal action is to enable an input method if none was enabled, |
1496 | and disable the current one otherwise. Which input method to enable | |
1497 | can be determined in various ways--either the one most recently used, | |
1498 | or the one specified by `default-input-method', or as a last resort | |
1499 | by reading the name of an input method in the minibuffer. | |
f8ec20be | 1500 | |
d0d8885d | 1501 | With a prefix argument ARG, read an input method name with the minibuffer |
f8ec20be | 1502 | and enable that one. The default is the most recent input method specified |
e893eae2 | 1503 | \(not including the currently active input method, if any). |
f8ec20be | 1504 | |
d0d8885d | 1505 | When called interactively, the optional argument INTERACTIVE is non-nil, |
e893eae2 RS |
1506 | which marks the variable `default-input-method' as set for Custom buffers." |
1507 | ||
1508 | (interactive "P\np") | |
023df4cf RS |
1509 | (if toggle-input-method-active |
1510 | (error "Recursive use of `toggle-input-method'")) | |
7ddbb5bc RS |
1511 | (if (and current-input-method (not arg)) |
1512 | (inactivate-input-method) | |
023df4cf RS |
1513 | (let ((toggle-input-method-active t) |
1514 | (default (or (car input-method-history) default-input-method))) | |
7ddbb5bc RS |
1515 | (if (and arg default (equal current-input-method default) |
1516 | (> (length input-method-history) 1)) | |
1517 | (setq default (nth 1 input-method-history))) | |
723a427a KH |
1518 | (activate-input-method |
1519 | (if (or arg (not default)) | |
7ddbb5bc RS |
1520 | (progn |
1521 | (read-input-method-name | |
1522 | (if default "Input method (default %s): " "Input method: " ) | |
1523 | default t)) | |
723a427a | 1524 | default)) |
f4990970 | 1525 | (unless default-input-method |
d37ef0f6 | 1526 | (prog1 |
f4990970 | 1527 | (setq default-input-method current-input-method) |
e893eae2 | 1528 | (when interactive |
f4990970 | 1529 | (customize-mark-as-set 'default-input-method))))))) |
d0b9c3ab | 1530 | |
26b3dce6 | 1531 | (autoload 'help-buffer "help-mode") |
0855c6cd | 1532 | |
d0b9c3ab | 1533 | (defun describe-input-method (input-method) |
2c395d56 | 1534 | "Describe input method INPUT-METHOD." |
d0b9c3ab KH |
1535 | (interactive |
1536 | (list (read-input-method-name | |
5b76833f | 1537 | "Describe input method (default current choice): "))) |
78754934 | 1538 | (if (and input-method (symbolp input-method)) |
4ef06f75 | 1539 | (setq input-method (symbol-name input-method))) |
43125c28 RS |
1540 | (help-setup-xref (list #'describe-input-method |
1541 | (or input-method current-input-method)) | |
32226619 | 1542 | (called-interactively-p 'interactive)) |
f80e2142 | 1543 | |
d0b9c3ab KH |
1544 | (if (null input-method) |
1545 | (describe-current-input-method) | |
464cc130 KH |
1546 | (let ((current current-input-method)) |
1547 | (condition-case nil | |
1548 | (progn | |
1549 | (save-excursion | |
1550 | (activate-input-method input-method) | |
1551 | (describe-current-input-method)) | |
1552 | (activate-input-method current)) | |
d37ef0f6 | 1553 | (error |
464cc130 | 1554 | (activate-input-method current) |
5f395df3 | 1555 | (help-setup-xref (list #'describe-input-method input-method) |
32226619 | 1556 | (called-interactively-p 'interactive)) |
5f395df3 | 1557 | (with-output-to-temp-buffer (help-buffer) |
464cc130 KH |
1558 | (let ((elt (assoc input-method input-method-alist))) |
1559 | (princ (format | |
1560 | "Input method: %s (`%s' in mode line) for %s\n %s\n" | |
1561 | input-method (nth 3 elt) (nth 1 elt) (nth 4 elt)))))))))) | |
d0b9c3ab KH |
1562 | |
1563 | (defun describe-current-input-method () | |
f80e2142 RS |
1564 | "Describe the input method currently in use. |
1565 | This is a subroutine for `describe-input-method'." | |
4ed46869 KH |
1566 | (if current-input-method |
1567 | (if (and (symbolp describe-current-input-method-function) | |
1568 | (fboundp describe-current-input-method-function)) | |
1569 | (funcall describe-current-input-method-function) | |
1570 | (message "No way to describe the current input method `%s'" | |
f2979bdb | 1571 | current-input-method) |
4ed46869 | 1572 | (ding)) |
d0b9c3ab | 1573 | (error "No input method is activated now"))) |
4ed46869 | 1574 | |
d3459641 | 1575 | (defun read-multilingual-string (prompt &optional initial-input input-method) |
4ed46869 KH |
1576 | "Read a multilingual string from minibuffer, prompting with string PROMPT. |
1577 | The input method selected last time is activated in minibuffer. | |
d0d8885d JB |
1578 | If optional second argument INITIAL-INPUT is non-nil, insert it in the |
1579 | minibuffer initially. | |
1580 | Optional 3rd argument INPUT-METHOD specifies the input method to be activated | |
1581 | instead of the one selected last time. It is a symbol or a string." | |
88d559ec KH |
1582 | (setq input-method |
1583 | (or input-method | |
d3459641 | 1584 | current-input-method |
88d559ec KH |
1585 | default-input-method |
1586 | (read-input-method-name "Input method: " nil t))) | |
3df60841 | 1587 | (if (and input-method (symbolp input-method)) |
4ef06f75 | 1588 | (setq input-method (symbol-name input-method))) |
305a3cb6 KH |
1589 | (let ((prev-input-method current-input-method)) |
1590 | (unwind-protect | |
1591 | (progn | |
1592 | (activate-input-method input-method) | |
1593 | (read-string prompt initial-input nil nil t)) | |
1594 | (activate-input-method prev-input-method)))) | |
4ed46869 KH |
1595 | |
1596 | ;; Variables to control behavior of input methods. All input methods | |
1597 | ;; should react to these variables. | |
1598 | ||
8efc03e1 | 1599 | (defcustom input-method-verbose-flag 'default |
caff3c0a | 1600 | "A flag to control extra guidance given by input methods. |
8efc03e1 | 1601 | The value should be nil, t, `complex-only', or `default'. |
4ed46869 | 1602 | |
cb29dfb6 | 1603 | The extra guidance is done by showing list of available keys in echo |
8efc03e1 KH |
1604 | area. When you use the input method in the minibuffer, the guidance |
1605 | is shown at the bottom short window (split from the existing window). | |
c27c4ed8 | 1606 | |
8efc03e1 KH |
1607 | If the value is t, extra guidance is always given, if the value is |
1608 | nil, extra guidance is always suppressed. | |
1609 | ||
1610 | If the value is `complex-only', only complex input methods such as | |
1611 | `chinese-py' and `japanese' give extra guidance. | |
1612 | ||
1613 | If the value is `default', complex input methods always give extra | |
1614 | guidance, but simple input methods give it only when you are not in | |
1615 | the minibuffer. | |
1616 | ||
1617 | See also the variable `input-method-highlight-flag'." | |
d37ef0f6 DL |
1618 | :type '(choice (const :tag "Always" t) (const :tag "Never" nil) |
1619 | (const complex-only) (const default)) | |
42395763 RS |
1620 | :group 'mule) |
1621 | ||
1622 | (defcustom input-method-highlight-flag t | |
caff3c0a | 1623 | "If this flag is non-nil, input methods highlight partially-entered text. |
42395763 RS |
1624 | For instance, while you are in the middle of a Quail input method sequence, |
1625 | the text inserted so far is temporarily underlined. | |
8efc03e1 KH |
1626 | The underlining goes away when you finish or abort the input method sequence. |
1627 | See also the variable `input-method-verbose-flag'." | |
42395763 RS |
1628 | :type 'boolean |
1629 | :group 'mule) | |
4ed46869 | 1630 | |
1f547b92 | 1631 | (defcustom input-method-activate-hook nil |
f17ccaee KH |
1632 | "Normal hook run just after an input method is activated. |
1633 | ||
1634 | The variable `current-input-method' keeps the input method name | |
1f547b92 DL |
1635 | just activated." |
1636 | :type 'hook | |
1637 | :group 'mule) | |
4ed46869 | 1638 | |
1f547b92 | 1639 | (defcustom input-method-inactivate-hook nil |
f17ccaee KH |
1640 | "Normal hook run just after an input method is inactivated. |
1641 | ||
1642 | The variable `current-input-method' still keeps the input method name | |
1f547b92 DL |
1643 | just inactivated." |
1644 | :type 'hook | |
1645 | :group 'mule) | |
4ed46869 | 1646 | |
1f547b92 | 1647 | (defcustom input-method-after-insert-chunk-hook nil |
8f924df7 | 1648 | "Normal hook run just after an input method insert some chunk of text." |
1f547b92 DL |
1649 | :type 'hook |
1650 | :group 'mule) | |
4ed46869 | 1651 | |
dccca980 | 1652 | (defvar input-method-exit-on-first-char nil |
0b6cadff | 1653 | "This flag controls when an input method returns. |
dccca980 KH |
1654 | Usually, the input method does not return while there's a possibility |
1655 | that it may find a different translation if a user types another key. | |
d0d8885d JB |
1656 | But, if this flag is non-nil, the input method returns as soon as the |
1657 | current key sequence gets long enough to have some valid translation.") | |
dccca980 | 1658 | |
1f547b92 | 1659 | (defcustom input-method-use-echo-area nil |
dccca980 | 1660 | "This flag controls how an input method shows an intermediate key sequence. |
39e643e2 RS |
1661 | Usually, the input method inserts the intermediate key sequence, |
1662 | or candidate translations corresponding to the sequence, | |
1663 | at point in the current buffer. | |
1f547b92 DL |
1664 | But, if this flag is non-nil, it displays them in echo area instead." |
1665 | :type 'hook | |
1666 | :group 'mule) | |
dccca980 | 1667 | |
723a427a | 1668 | (defvar input-method-exit-on-invalid-key nil |
fea6b736 | 1669 | "This flag controls the behavior of an input method on invalid key input. |
723a427a KH |
1670 | Usually, when a user types a key which doesn't start any character |
1671 | handled by the input method, the key is handled by turning off the | |
e8dd0160 | 1672 | input method temporarily. After that key, the input method is re-enabled. |
723a427a KH |
1673 | But, if this flag is non-nil, the input method is never back on.") |
1674 | ||
4ed46869 | 1675 | \f |
1f547b92 | 1676 | (defcustom set-language-environment-hook nil |
8efc03e1 KH |
1677 | "Normal hook run after some language environment is set. |
1678 | ||
1679 | When you set some hook function here, that effect usually should not | |
1680 | be inherited to another language environment. So, you had better set | |
1681 | another function in `exit-language-environment-hook' (which see) to | |
1f547b92 DL |
1682 | cancel the effect." |
1683 | :type 'hook | |
1684 | :group 'mule) | |
8efc03e1 | 1685 | |
1f547b92 | 1686 | (defcustom exit-language-environment-hook nil |
8efc03e1 KH |
1687 | "Normal hook run after exiting from some language environment. |
1688 | When this hook is run, the variable `current-language-environment' | |
1689 | is still bound to the language environment being exited. | |
1690 | ||
e8dd0160 | 1691 | This hook is mainly used for canceling the effect of |
caff3c0a | 1692 | `set-language-environment-hook' (which see)." |
1f547b92 DL |
1693 | :type 'hook |
1694 | :group 'mule) | |
8efc03e1 | 1695 | |
b0648a00 RS |
1696 | (put 'setup-specified-language-environment 'apropos-inhibit t) |
1697 | ||
15b3e511 | 1698 | (defun setup-specified-language-environment () |
f08adf27 | 1699 | "Switch to a specified language environment." |
15b3e511 | 1700 | (interactive) |
f850d782 | 1701 | (let (language-name) |
15b3e511 KH |
1702 | (if (and (symbolp last-command-event) |
1703 | (or (not (eq last-command-event 'Default)) | |
1704 | (setq last-command-event 'English)) | |
f850d782 | 1705 | (setq language-name (symbol-name last-command-event))) |
f4990970 PA |
1706 | (prog1 |
1707 | (set-language-environment language-name) | |
1708 | (customize-mark-as-set 'current-language-environment)) | |
15b3e511 | 1709 | (error "Bogus calling sequence")))) |
4ed46869 | 1710 | |
8861c593 | 1711 | (defcustom current-language-environment "English" |
94d04df6 | 1712 | "The last language environment specified with `set-language-environment'. |
ebef6d93 KH |
1713 | This variable should be set only with \\[customize], which is equivalent |
1714 | to using the function `set-language-environment'." | |
94d04df6 | 1715 | :link '(custom-manual "(emacs)Language Environments") |
dff1aa24 | 1716 | :set (lambda (symbol value) (set-language-environment value)) |
94d04df6 | 1717 | :get (lambda (x) |
f15466c5 | 1718 | (or (car-safe (assoc-string |
94d04df6 DL |
1719 | (if (symbolp current-language-environment) |
1720 | (symbol-name current-language-environment) | |
1721 | current-language-environment) | |
f15466c5 | 1722 | language-info-alist t)) |
94d04df6 | 1723 | "English")) |
990a4108 MR |
1724 | ;; custom type will be updated with `set-language-info'. |
1725 | :type (if language-info-alist | |
1726 | (cons 'choice (mapcar | |
1727 | (lambda (lang) | |
756e055f MR |
1728 | (list 'const lang)) |
1729 | (sort (mapcar 'car language-info-alist) 'string<))) | |
990a4108 | 1730 | 'string) |
8861c593 | 1731 | :initialize 'custom-initialize-default |
eb9fc9e6 | 1732 | :group 'mule) |
f850d782 | 1733 | |
ddb5c041 KH |
1734 | (defun reset-language-environment () |
1735 | "Reset multilingual environment of Emacs to the default status. | |
1736 | ||
1737 | The default status is as follows: | |
1738 | ||
d37ef0f6 | 1739 | The default value of `buffer-file-coding-system' is nil. |
ddb5c041 KH |
1740 | The default coding system for process I/O is nil. |
1741 | The default value for the command `set-terminal-coding-system' is nil. | |
1742 | The default value for the command `set-keyboard-coding-system' is nil. | |
1743 | ||
97941b05 KH |
1744 | The order of priorities of coding systems are as follows: |
1745 | utf-8 | |
1746 | iso-2022-7bit | |
1747 | iso-latin-1 | |
1748 | iso-2022-7bit-lock | |
1749 | iso-2022-8bit-ss2 | |
1750 | emacs-mule | |
1751 | raw-text" | |
ddb5c041 KH |
1752 | (interactive) |
1753 | ;; This function formerly set default-enable-multibyte-characters to t, | |
1754 | ;; but that is incorrect. It should not alter the unibyte/multibyte choice. | |
1755 | ||
97941b05 KH |
1756 | (set-coding-system-priority |
1757 | 'utf-8 | |
1758 | 'iso-2022-7bit | |
1759 | 'iso-latin-1 | |
1760 | 'iso-2022-7bit-lock | |
1761 | 'iso-2022-8bit-ss2 | |
1762 | 'emacs-mule | |
1763 | 'raw-text) | |
91693d18 | 1764 | |
ddb5c041 | 1765 | (set-default-coding-systems nil) |
b5edd1d1 | 1766 | (setq default-sendmail-coding-system 'iso-latin-1) |
a41118cc SM |
1767 | ;; On Darwin systems, this should be utf-8, but when this file is loaded |
1768 | ;; utf-8 is not yet defined, so we set it in set-locale-environment instead. | |
787caf99 | 1769 | (setq default-file-name-coding-system 'iso-latin-1) |
1d77e15a JR |
1770 | ;; Preserve eol-type from existing default-process-coding-systems. |
1771 | ;; On non-unix-like systems in particular, these may have been set | |
1772 | ;; carefully by the user, or by the startup code, to deal with the | |
1773 | ;; users shell appropriately, so should not be altered by changing | |
1774 | ;; language environment. | |
1775 | (let ((output-coding | |
a099a2ff JR |
1776 | ;; When bootstrapping, coding-systems are not defined yet, so |
1777 | ;; we need to catch the error from check-coding-system. | |
d37ef0f6 | 1778 | (condition-case nil |
a099a2ff JR |
1779 | (coding-system-change-text-conversion |
1780 | (car default-process-coding-system) 'undecided) | |
1781 | (coding-system-error 'undecided))) | |
1d77e15a | 1782 | (input-coding |
a099a2ff JR |
1783 | (condition-case nil |
1784 | (coding-system-change-text-conversion | |
1785 | (cdr default-process-coding-system) 'iso-latin-1) | |
1786 | (coding-system-error 'iso-latin-1)))) | |
1d77e15a JR |
1787 | (setq default-process-coding-system |
1788 | (cons output-coding input-coding))) | |
b5edd1d1 | 1789 | |
a43977db KH |
1790 | ;; Put the highest priority to the charset iso-8859-1 to prefer the |
1791 | ;; registry iso8859-1 over iso8859-2 in font selection. It also | |
1792 | ;; makes unibyte-display-via-language-environment to use iso-8859-1 | |
1793 | ;; as the unibyte charset. | |
1794 | (set-charset-priority 'iso-8859-1) | |
1795 | ||
ddb5c041 KH |
1796 | ;; Don't alter the terminal and keyboard coding systems here. |
1797 | ;; The terminal still supports the same coding system | |
1798 | ;; that it supported a minute ago. | |
1b8dc791 SM |
1799 | ;; (set-terminal-coding-system-internal nil) |
1800 | ;; (set-keyboard-coding-system-internal nil) | |
ddb5c041 | 1801 | |
33de15f4 SM |
1802 | ;; Back in Emacs-20, it was necessary to provide some fallback implicit |
1803 | ;; conversion, because almost no packages handled coding-system issues. | |
1804 | ;; Nowadays it'd just paper over bugs. | |
1805 | ;; (set-unibyte-charset 'iso-8859-1) | |
1806 | ) | |
ddb5c041 | 1807 | |
0c47a7c8 KH |
1808 | (reset-language-environment) |
1809 | ||
97c57fb2 | 1810 | (defun set-display-table-and-terminal-coding-system (language-name &optional coding-system display) |
40c81f74 PE |
1811 | "Set up the display table and terminal coding system for LANGUAGE-NAME." |
1812 | (let ((coding (get-language-info language-name 'unibyte-display))) | |
735b7c87 KH |
1813 | (if (and coding |
1814 | (or (not coding-system) | |
1815 | (coding-system-equal coding coding-system))) | |
40c81f74 | 1816 | (standard-display-european-internal) |
a67ae60e EZ |
1817 | ;; The following 2 lines undo the 8-bit display that we set up |
1818 | ;; in standard-display-european-internal, which see. This is in | |
1819 | ;; case the user has used standard-display-european earlier in | |
970c9391 | 1820 | ;; this session. |
d73a7bb8 JPW |
1821 | (when standard-display-table |
1822 | (dotimes (i 128) | |
b221615b | 1823 | (aset standard-display-table (+ i 128) nil)))) |
970c9391 | 1824 | (set-terminal-coding-system (or coding-system coding) display))) |
40c81f74 | 1825 | |
166246f7 | 1826 | (defun set-language-environment (language-name) |
6c05d680 RS |
1827 | "Set up multi-lingual environment for using LANGUAGE-NAME. |
1828 | This sets the coding system priority and the default input method | |
8861c593 RS |
1829 | and sometimes other things. LANGUAGE-NAME should be a string |
1830 | which is the name of a language environment. For example, \"Latin-1\" | |
1831 | specifies the character set for the major languages of Western Europe." | |
8efc03e1 | 1832 | (interactive (list (read-language-name |
ddb5c041 | 1833 | nil |
5b76833f | 1834 | "Set language environment (default English): "))) |
4ef06f75 KH |
1835 | (if language-name |
1836 | (if (symbolp language-name) | |
1837 | (setq language-name (symbol-name language-name))) | |
1838 | (setq language-name "English")) | |
f15466c5 | 1839 | (let ((slot (assoc-string language-name language-info-alist t))) |
95498fd0 | 1840 | (unless slot |
f850d782 | 1841 | (error "Language environment not defined: %S" language-name)) |
95498fd0 | 1842 | (setq language-name (car slot))) |
8efc03e1 KH |
1843 | (if current-language-environment |
1844 | (let ((func (get-language-info current-language-environment | |
1845 | 'exit-function))) | |
e63645c2 | 1846 | (run-hooks 'exit-language-environment-hook) |
5f395df3 | 1847 | (if (functionp func) (funcall func)))) |
ddb5c041 | 1848 | |
e9a8ed3c KH |
1849 | (reset-language-environment) |
1850 | ;; The features might set up coding systems. | |
ddb5c041 KH |
1851 | (let ((required-features (get-language-info language-name 'features))) |
1852 | (while required-features | |
1853 | (require (car required-features)) | |
1854 | (setq required-features (cdr required-features)))) | |
6b61353c | 1855 | |
e9a8ed3c KH |
1856 | (setq current-language-environment language-name) |
1857 | ||
1858 | (set-language-environment-coding-systems language-name) | |
1859 | (set-language-environment-input-method language-name) | |
1860 | (set-language-environment-nonascii-translation language-name) | |
1861 | (set-language-environment-charset language-name) | |
1862 | ;; Unibyte setups if necessary. | |
597e2240 | 1863 | (unless (default-value 'enable-multibyte-characters) |
e9a8ed3c KH |
1864 | (set-language-environment-unibyte language-name)) |
1865 | ||
ddb5c041 | 1866 | (let ((func (get-language-info language-name 'setup-function))) |
5f395df3 | 1867 | (if (functionp func) |
ddb5c041 | 1868 | (funcall func))) |
e9a8ed3c | 1869 | |
8aeebac2 | 1870 | (setq current-iso639-language |
e9aaa1db KH |
1871 | (or (get-language-info language-name 'iso639-language) |
1872 | current-iso639-language)) | |
8aeebac2 | 1873 | |
8efc03e1 | 1874 | (run-hooks 'set-language-environment-hook) |
f850d782 | 1875 | (force-mode-line-update t)) |
4ed46869 | 1876 | |
c3869589 | 1877 | (define-widget 'charset 'symbol |
aaa448c9 | 1878 | "An Emacs charset." |
1644d5b9 | 1879 | :tag "Charset" |
c3869589 DL |
1880 | :complete-function (lambda () |
1881 | (interactive) | |
1882 | (lisp-complete-symbol 'charsetp)) | |
1883 | :completion-ignore-case t | |
1884 | :value 'ascii | |
1885 | :validate (lambda (widget) | |
1886 | (unless (charsetp (widget-value widget)) | |
1887 | (widget-put widget :error (format "Invalid charset: %S" | |
1888 | (widget-value widget))) | |
1889 | widget)) | |
1890 | :prompt-history 'charset-history) | |
1891 | ||
1892 | (defcustom language-info-custom-alist nil | |
1893 | "Customizations of language environment parameters. | |
1894 | Value is an alist with elements like those of `language-info-alist'. | |
1895 | These are used to set values in `language-info-alist' which replace | |
1896 | the defaults. A typical use is replacing the default input method for | |
1897 | the environment. Use \\[describe-language-environment] to find the environment's settings. | |
1898 | ||
1899 | This option is intended for use at startup. Removing items doesn't | |
1900 | remove them from the language info until you next restart Emacs. | |
1901 | ||
d0d8885d JB |
1902 | Setting this variable directly does not take effect. |
1903 | See `set-language-info-alist' for use in programs." | |
c3869589 | 1904 | :group 'mule |
8589dc17 | 1905 | :version "23.1" |
c3869589 DL |
1906 | :set (lambda (s v) |
1907 | (custom-set-default s v) | |
1908 | ;; Can't do this before language environments are set up. | |
1909 | (when v | |
1910 | ;; modify language-info-alist | |
1911 | (dolist (elt v) | |
1912 | (set-language-info-alist (car elt) (cdr elt))) | |
1913 | ;; re-set the environment in case its parameters changed | |
1914 | (set-language-environment current-language-environment))) | |
1915 | :type `(alist | |
1916 | :key-type (string :tag "Language environment" | |
1917 | :completion-ignore-case t | |
1918 | :complete-function widget-string-complete | |
1919 | :completion-alist language-info-alist) | |
1920 | :value-type | |
1921 | (alist :key-type symbol | |
1922 | :options ((documentation string) | |
1923 | (charset (repeat charset)) | |
1924 | (sample-text string) | |
1925 | (setup-function function) | |
1926 | (exit-function function) | |
1927 | (coding-system (repeat coding-system)) | |
1928 | (coding-priority (repeat coding-system)) | |
1929 | (nonascii-translation charset) | |
1930 | (input-method | |
1931 | (string | |
1932 | :completion-ignore-case t | |
1933 | :complete-function widget-string-complete | |
1934 | :completion-alist input-method-alist | |
1935 | :prompt-history input-method-history)) | |
1936 | (features (repeat symbol)) | |
1937 | (unibyte-display coding-system))))) | |
1938 | ||
aa360da1 GM |
1939 | (declare-function x-server-vendor "xfns.c" (&optional terminal)) |
1940 | (declare-function x-server-version "xfns.c" (&optional terminal)) | |
1941 | ||
51a8fc1d RS |
1942 | (defun standard-display-european-internal () |
1943 | ;; Actually set up direct output of non-ASCII characters. | |
03c35c83 EZ |
1944 | (standard-display-8bit (if (eq window-system 'pc) 128 160) 255) |
1945 | ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with | |
1946 | ;; the native font, and codes 160 and 146 stand for something very | |
1947 | ;; different there. | |
597e2240 GM |
1948 | (or (and (eq window-system 'pc) (not (default-value |
1949 | 'enable-multibyte-characters))) | |
03c35c83 | 1950 | (progn |
6b626913 SM |
1951 | ;; Most X fonts used to do the wrong thing for latin-1 code 160. |
1952 | (unless (and (eq window-system 'x) | |
1953 | ;; XFree86 4 has fixed the fonts. | |
1954 | (string= "The XFree86 Project, Inc" (x-server-vendor)) | |
1955 | (> (aref (number-to-string (nth 2 (x-server-version))) 0) | |
1956 | ?3)) | |
1957 | ;; Make non-line-break space display as a plain space. | |
d419e1d9 | 1958 | (aset standard-display-table (unibyte-char-to-multibyte 160) [32])) |
1492f7ac | 1959 | ;; Most Windows programs send out apostrophes as \222. Most X fonts |
03c35c83 | 1960 | ;; don't contain a character at that position. Map it to the ASCII |
5f395df3 SM |
1961 | ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK, |
1962 | ;; U+2019, normally from the windows-1252 character set. XFree 4 | |
1963 | ;; fonts probably have the appropriate glyph at this position, | |
1964 | ;; so they could use standard-display-8bit. It's better to use a | |
1965 | ;; proper windows-1252 coding system. --fx] | |
d419e1d9 | 1966 | (aset standard-display-table (unibyte-char-to-multibyte 146) [39])))) |
03c35c83 | 1967 | |
e9a8ed3c KH |
1968 | (defun set-language-environment-coding-systems (language-name) |
1969 | "Do various coding system setups for language environment LANGUAGE-NAME." | |
54b226f7 | 1970 | (let* ((priority (get-language-info language-name 'coding-priority)) |
e9a8ed3c | 1971 | (default-coding (car priority)) |
b56a5ae0 | 1972 | ;; If the default buffer-file-coding-system is nil, don't use |
207422da | 1973 | ;; coding-system-eol-type, because it treats nil as |
b56a5ae0 | 1974 | ;; `no-conversion'. The default buffer-file-coding-system is set |
207422da EZ |
1975 | ;; to nil by reset-language-environment, and in that case we |
1976 | ;; want to have here the native EOL type for each platform. | |
1977 | ;; FIXME: there should be a common code that runs both on | |
1978 | ;; startup and here to set the default EOL type correctly. | |
1979 | ;; Right now, DOS/Windows platforms set this on dos-w32.el, | |
1980 | ;; which works only as long as the order of loading files at | |
1981 | ;; dump time and calling functions at startup is not modified | |
1982 | ;; significantly, i.e. as long as this function is called | |
b56a5ae0 | 1983 | ;; _after_ the default buffer-file-coding-system was set by |
207422da EZ |
1984 | ;; dos-w32.el. |
1985 | (eol-type | |
b56a5ae0 SM |
1986 | (coding-system-eol-type |
1987 | (or (default-value 'buffer-file-coding-system) | |
1988 | (if (memq system-type '(windows-nt ms-dos)) 'dos 'unix))))) | |
97941b05 KH |
1989 | (when priority |
1990 | (set-default-coding-systems | |
1991 | (if (memq eol-type '(0 1 2 unix dos mac)) | |
1992 | (coding-system-change-eol-conversion default-coding eol-type) | |
1993 | default-coding)) | |
1994 | (setq default-sendmail-coding-system default-coding) | |
1995 | (apply 'set-coding-system-priority priority)))) | |
54b226f7 | 1996 | |
d042f8b4 KH |
1997 | (defun set-language-environment-input-method (language-name) |
1998 | "Do various input method setups for language environment LANGUAGE-NAME." | |
1999 | (let ((input-method (get-language-info language-name 'input-method))) | |
2000 | (when input-method | |
2001 | (setq default-input-method input-method) | |
2002 | (if input-method-history | |
2003 | (setq input-method-history | |
2004 | (cons input-method | |
2005 | (delete input-method input-method-history))))))) | |
2006 | ||
2007 | (defun set-language-environment-nonascii-translation (language-name) | |
2008 | "Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME." | |
e9a8ed3c KH |
2009 | ;; Note: For DOS, we assumed that the charset cpXXX is already |
2010 | ;; defined. | |
2011 | (let ((nonascii (get-language-info language-name 'nonascii-translation))) | |
2012 | (if (eq window-system 'pc) | |
970c9391 | 2013 | (setq nonascii (intern (format "cp%d" dos-codepage)))) |
e9a8ed3c KH |
2014 | (or (and (charsetp nonascii) |
2015 | (get-charset-property nonascii :ascii-compatible-p)) | |
2016 | (setq nonascii 'iso-8859-1)) | |
33de15f4 SM |
2017 | ;; Back in Emacs-20, it was necessary to provide some fallback implicit |
2018 | ;; conversion, because almost no packages handled coding-system issues. | |
2019 | ;; Nowadays it'd just paper over bugs. | |
2020 | ;; (set-unibyte-charset nonascii) | |
2021 | )) | |
d042f8b4 KH |
2022 | |
2023 | (defun set-language-environment-charset (language-name) | |
2024 | "Do various charset setups for language environment LANGUAGE-NAME." | |
e9a8ed3c KH |
2025 | ;; Put higher priorities to such charsets that are supported by the |
2026 | ;; coding systems of higher priorities in this environment. | |
2027 | (let ((charsets (get-language-info language-name 'charset))) | |
2028 | (dolist (coding (get-language-info language-name 'coding-priority)) | |
bf974dc9 KH |
2029 | (let ((list (coding-system-charset-list coding))) |
2030 | (if (consp list) | |
2031 | (setq charsets (append charsets list))))) | |
e9a8ed3c KH |
2032 | (if charsets |
2033 | (apply 'set-charset-priority charsets)))) | |
d042f8b4 KH |
2034 | |
2035 | (defun set-language-environment-unibyte (language-name) | |
2036 | "Do various unibyte-mode setups for language environment LANGUAGE-NAME." | |
d042f8b4 KH |
2037 | (set-display-table-and-terminal-coding-system language-name)) |
2038 | ||
cc4d2366 | 2039 | (defun princ-list (&rest args) |
caff3c0a | 2040 | "Print all arguments with `princ', then print \"\\n\"." |
4ed46869 KH |
2041 | (while args (princ (car args)) (setq args (cdr args))) |
2042 | (princ "\n")) | |
cc4d2366 | 2043 | (make-obsolete 'princ-list "use mapc and princ instead" "23.3") |
4ed46869 | 2044 | |
b0648a00 RS |
2045 | (put 'describe-specified-language-support 'apropos-inhibit t) |
2046 | ||
6b61353c | 2047 | ;; Print language-specific information such as input methods, |
13e82c04 | 2048 | ;; charsets, and coding systems. This function is intended to be |
48082651 | 2049 | ;; called from the menu: |
281d03ec | 2050 | ;; [menu-bar mule describe-language-environment LANGUAGE] |
48082651 KH |
2051 | ;; and should not run it by `M-x describe-current-input-method-function'. |
2052 | (defun describe-specified-language-support () | |
96db204a | 2053 | "Describe how Emacs supports the specified language environment." |
48082651 | 2054 | (interactive) |
281d03ec | 2055 | (let (language-name) |
48082651 | 2056 | (if (not (and (symbolp last-command-event) |
cda74479 DL |
2057 | (or (not (eq last-command-event 'Default)) |
2058 | (setq last-command-event 'English)) | |
281d03ec | 2059 | (setq language-name (symbol-name last-command-event)))) |
48082651 | 2060 | (error "Bogus calling sequence")) |
281d03ec RS |
2061 | (describe-language-environment language-name))) |
2062 | ||
2063 | (defun describe-language-environment (language-name) | |
2064 | "Describe how Emacs supports language environment LANGUAGE-NAME." | |
78754934 KH |
2065 | (interactive |
2066 | (list (read-language-name | |
2067 | 'documentation | |
5b76833f | 2068 | "Describe language environment (default current choice): "))) |
f850d782 RS |
2069 | (if (null language-name) |
2070 | (setq language-name current-language-environment)) | |
281d03ec RS |
2071 | (if (or (null language-name) |
2072 | (null (get-language-info language-name 'documentation))) | |
2073 | (error "No documentation for the specified language")) | |
4ef06f75 KH |
2074 | (if (symbolp language-name) |
2075 | (setq language-name (symbol-name language-name))) | |
ef5a4730 KH |
2076 | (dolist (feature (get-language-info language-name 'features)) |
2077 | (require feature)) | |
6b626913 | 2078 | (let ((doc (get-language-info language-name 'documentation))) |
c3034e84 | 2079 | (help-setup-xref (list #'describe-language-environment language-name) |
32226619 | 2080 | (called-interactively-p 'interactive)) |
c3034e84 | 2081 | (with-output-to-temp-buffer (help-buffer) |
9a529312 | 2082 | (with-current-buffer standard-output |
464cc130 KH |
2083 | (insert language-name " language environment\n\n") |
2084 | (if (stringp doc) | |
2085 | (insert doc "\n\n")) | |
e036b0a6 KH |
2086 | (condition-case nil |
2087 | (let ((str (eval (get-language-info language-name 'sample-text)))) | |
2088 | (if (stringp str) | |
fd0dd4c3 KH |
2089 | (insert "Sample text:\n " |
2090 | (replace-regexp-in-string "\n" "\n " str) | |
2091 | "\n\n"))) | |
e036b0a6 | 2092 | (error nil)) |
464cc130 | 2093 | (let ((input-method (get-language-info language-name 'input-method)) |
acd1b9bd KH |
2094 | (l (copy-sequence input-method-alist)) |
2095 | (first t)) | |
2096 | (when (and input-method | |
2097 | (setq input-method (assoc input-method l))) | |
2098 | (insert "Input methods (default " (car input-method) ")\n") | |
2099 | (setq l (cons input-method (delete input-method l)) | |
2100 | first nil)) | |
2101 | (dolist (elt l) | |
2102 | (when (or (eq input-method elt) | |
2103 | (eq t (compare-strings language-name nil nil | |
2104 | (nth 1 elt) nil nil t))) | |
2105 | (when first | |
2106 | (insert "Input methods:\n") | |
2107 | (setq first nil)) | |
2108 | (insert " " (car elt)) | |
2109 | (search-backward (car elt)) | |
2110 | (help-xref-button 0 'help-input-method (car elt)) | |
464cc130 | 2111 | (goto-char (point-max)) |
2fa7e202 | 2112 | (insert " (\"" |
acd1b9bd KH |
2113 | (if (stringp (nth 3 elt)) (nth 3 elt) (car (nth 3 elt))) |
2114 | "\" in mode line)\n"))) | |
2115 | (or first | |
2116 | (insert "\n"))) | |
464cc130 KH |
2117 | (insert "Character sets:\n") |
2118 | (let ((l (get-language-info language-name 'charset))) | |
2119 | (if (null l) | |
2120 | (insert " nothing specific to " language-name "\n") | |
2121 | (while l | |
2122 | (insert " " (symbol-name (car l))) | |
2123 | (search-backward (symbol-name (car l))) | |
467412aa | 2124 | (help-xref-button 0 'help-character-set (car l)) |
464cc130 KH |
2125 | (goto-char (point-max)) |
2126 | (insert ": " (charset-description (car l)) "\n") | |
2127 | (setq l (cdr l))))) | |
2128 | (insert "\n") | |
2129 | (insert "Coding systems:\n") | |
2130 | (let ((l (get-language-info language-name 'coding-system))) | |
2131 | (if (null l) | |
2132 | (insert " nothing specific to " language-name "\n") | |
2133 | (while l | |
2134 | (insert " " (symbol-name (car l))) | |
2135 | (search-backward (symbol-name (car l))) | |
467412aa | 2136 | (help-xref-button 0 'help-coding-system (car l)) |
464cc130 KH |
2137 | (goto-char (point-max)) |
2138 | (insert " (`" | |
2139 | (coding-system-mnemonic (car l)) | |
2140 | "' in mode line):\n\t" | |
2141 | (coding-system-doc-string (car l)) | |
2142 | "\n") | |
0855c6cd | 2143 | (let ((aliases (coding-system-aliases (car l)))) |
464cc130 KH |
2144 | (when aliases |
2145 | (insert "\t(alias:") | |
2146 | (while aliases | |
2147 | (insert " " (symbol-name (car aliases))) | |
2148 | (setq aliases (cdr aliases))) | |
2149 | (insert ")\n"))) | |
c3034e84 | 2150 | (setq l (cdr l))))))))) |
4ed46869 | 2151 | \f |
40c81f74 PE |
2152 | ;;; Locales. |
2153 | ||
0d7c5bb9 DL |
2154 | (defvar locale-translation-file-name nil |
2155 | "File name for the system's file of locale-name aliases, or nil if none.") | |
40c81f74 | 2156 | |
5f395df3 SM |
2157 | ;; The following definitions might as well be marked as constants and |
2158 | ;; purecopied, since they're normally used on startup, and probably | |
2159 | ;; should reflect the facilities of the base Emacs. | |
2160 | (defconst locale-language-names | |
2161 | (purecopy | |
2162 | '( | |
40c81f74 PE |
2163 | ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER] |
2164 | ;; as specified in the Single Unix Spec, Version 2. | |
2165 | ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F) | |
2166 | ;; with additions from ISO 639/RA Newsletter No.1/1989; | |
5f395df3 SM |
2167 | ;; see Internet RFC 2165 (1997-06) and |
2168 | ;; http://www.evertype.com/standards/iso639/iso639-en.html | |
2169 | ;; TERRITORY is a country code taken from ISO 3166 | |
2170 | ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html. | |
40c81f74 | 2171 | ;; CODESET and MODIFIER are implementation-dependent. |
5f395df3 | 2172 | |
6b61353c KH |
2173 | ;; jasonr comments: MS Windows uses three letter codes for |
2174 | ;; languages instead of the two letter ISO codes that POSIX | |
2175 | ;; uses. In most cases the first two letters are the same, so | |
2176 | ;; most of the regexps in locale-language-names work. Japanese | |
2177 | ;; and Chinese are exceptions, which are listed in the | |
2178 | ;; non-standard section at the bottom of locale-language-names. | |
2179 | ||
8dedddd5 KH |
2180 | ("aa_DJ" . "Latin-1") ; Afar |
2181 | ("aa" . "UTF-8") | |
2182 | ;; ab Abkhazian | |
6ececc4d | 2183 | ("af" . "Latin-1") ; Afrikaans |
8dedddd5 KH |
2184 | ("am" "Ethiopic" utf-8) ; Amharic |
2185 | ("an" . "Latin-9") ; Aragonese | |
5f395df3 | 2186 | ; ar Arabic glibc uses 8859-6 |
40c81f74 PE |
2187 | ; as Assamese |
2188 | ; ay Aymara | |
8dedddd5 | 2189 | ("az" . "UTF-8") ; Azerbaijani |
40c81f74 | 2190 | ; ba Bashkir |
8dedddd5 KH |
2191 | ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s] |
2192 | ("bg" "Bulgarian" cp1251) ; Bulgarian | |
40c81f74 PE |
2193 | ; bh Bihari |
2194 | ; bi Bislama | |
8dedddd5 | 2195 | ("bn" . "UTF-8") ; Bengali, Bangla |
40c81f74 PE |
2196 | ("bo" . "Tibetan") |
2197 | ("br" . "Latin-1") ; Breton | |
d37ef0f6 | 2198 | ("bs" . "Latin-2") ; Bosnian |
8dedddd5 | 2199 | ("byn" . "UTF-8") ; Bilin; Blin |
40c81f74 PE |
2200 | ("ca" . "Latin-1") ; Catalan |
2201 | ; co Corsican | |
8dedddd5 KH |
2202 | ("cs" "Czech" iso-8859-2) |
2203 | ("cy" "Welsh" iso-8859-14) | |
40c81f74 | 2204 | ("da" . "Latin-1") ; Danish |
8dedddd5 | 2205 | ("de" "German" iso-8859-1) |
569a6374 | 2206 | ; dv Divehi |
40c81f74 | 2207 | ; dz Bhutani |
8dedddd5 | 2208 | ("el" "Greek" iso-8859-7) |
6ececc4d | 2209 | ;; Users who specify "en" explicitly typically want Latin-1, not ASCII. |
e11cf111 DL |
2210 | ;; That's actually what the GNU locales define, modulo things like |
2211 | ;; en_IN -- fx. | |
8dedddd5 | 2212 | ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India |
eec5c8f9 | 2213 | ("en" "English" iso-8859-1) ; English |
58802cd7 | 2214 | ("eo" . "Esperanto") ; Esperanto |
8dedddd5 KH |
2215 | ("es" "Spanish" iso-8859-1) |
2216 | ("et" . "Latin-1") ; Estonian | |
40c81f74 | 2217 | ("eu" . "Latin-1") ; Basque |
8dedddd5 | 2218 | ("fa" . "UTF-8") ; Persian |
40c81f74 | 2219 | ("fi" . "Latin-1") ; Finnish |
8dedddd5 | 2220 | ("fj" . "Latin-1") ; Fiji |
40c81f74 | 2221 | ("fo" . "Latin-1") ; Faroese |
8dedddd5 | 2222 | ("fr" "French" iso-8859-1) ; French |
40c81f74 | 2223 | ("fy" . "Latin-1") ; Frisian |
6ececc4d | 2224 | ("ga" . "Latin-1") ; Irish Gaelic (new orthography) |
8dedddd5 KH |
2225 | ("gd" . "Latin-9") ; Scots Gaelic |
2226 | ("gez" "Ethiopic" utf-8) ; Geez | |
2227 | ("gl" . "Latin-1") ; Gallegan; Galician | |
40c81f74 | 2228 | ; gn Guarani |
8dedddd5 KH |
2229 | ("gu" . "UTF-8") ; Gujarati |
2230 | ("gv" . "Latin-1") ; Manx Gaelic | |
40c81f74 | 2231 | ; ha Hausa |
8dedddd5 KH |
2232 | ("he" "Hebrew" iso-8859-8) |
2233 | ("hi" "Devanagari" utf-8) ; Hindi | |
2234 | ("hr" "Croatian" iso-8859-2) ; Croatian | |
40c81f74 PE |
2235 | ("hu" . "Latin-2") ; Hungarian |
2236 | ; hy Armenian | |
2237 | ; ia Interlingua | |
2238 | ("id" . "Latin-1") ; Indonesian | |
2239 | ; ie Interlingue | |
2240 | ; ik Inupiak | |
2241 | ("is" . "Latin-1") ; Icelandic | |
8dedddd5 | 2242 | ("it" "Italian" iso-8859-1) ; Italian |
40c81f74 | 2243 | ; iu Inuktitut |
8dedddd5 KH |
2244 | ("iw" "Hebrew" iso-8859-8) |
2245 | ("ja" "Japanese" euc-jp) | |
40c81f74 | 2246 | ; jw Javanese |
8dedddd5 | 2247 | ("ka" "Georgian" georgian-ps) ; Georgian |
40c81f74 | 2248 | ; kk Kazakh |
6ececc4d | 2249 | ("kl" . "Latin-1") ; Greenlandic |
40c81f74 | 2250 | ; km Cambodian |
8dedddd5 KH |
2251 | ("kn" "Kannada" utf-8) |
2252 | ("ko" "Korean" euc-kr) | |
40c81f74 PE |
2253 | ; ks Kashmiri |
2254 | ; ku Kurdish | |
5f395df3 | 2255 | ("kw" . "Latin-1") ; Cornish |
40c81f74 PE |
2256 | ; ky Kirghiz |
2257 | ("la" . "Latin-1") ; Latin | |
5f395df3 | 2258 | ("lb" . "Latin-1") ; Luxemburgish |
8dedddd5 | 2259 | ("lg" . "Laint-6") ; Ganda |
40c81f74 | 2260 | ; ln Lingala |
8dedddd5 KH |
2261 | ("lo" "Lao" utf-8) ; Laothian |
2262 | ("lt" "Lithuanian" iso-8859-13) | |
9c20a8d5 | 2263 | ("lv" . "Latvian") ; Latvian, Lettish |
40c81f74 | 2264 | ; mg Malagasy |
5f395df3 | 2265 | ("mi" . "Latin-7") ; Maori |
8dedddd5 KH |
2266 | ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian |
2267 | ("ml" "Malayalam" utf-8) | |
2268 | ("mn" . "UTF-8") ; Mongolian | |
40c81f74 | 2269 | ; mo Moldavian |
8dedddd5 | 2270 | ("mr" "Devanagari" utf-8) ; Marathi |
5f395df3 | 2271 | ("ms" . "Latin-1") ; Malay |
40c81f74 PE |
2272 | ("mt" . "Latin-3") ; Maltese |
2273 | ; my Burmese | |
2274 | ; na Nauru | |
8dedddd5 KH |
2275 | ("nb" . "Latin-1") ; Norwegian |
2276 | ("ne" "Devanagari" utf-8) ; Nepali | |
2277 | ("nl" "Dutch" iso-8859-1) | |
40c81f74 | 2278 | ("no" . "Latin-1") ; Norwegian |
5f395df3 | 2279 | ("oc" . "Latin-1") ; Occitan |
8dedddd5 KH |
2280 | ("om_ET" . "UTF-8") ; (Afan) Oromo |
2281 | ("om" . "Latin-1") ; (Afan) Oromo | |
40c81f74 | 2282 | ; or Oriya |
8dedddd5 | 2283 | ("pa" . "UTF-8") ; Punjabi |
40c81f74 PE |
2284 | ("pl" . "Latin-2") ; Polish |
2285 | ; ps Pashto, Pushto | |
2286 | ("pt" . "Latin-1") ; Portuguese | |
2287 | ; qu Quechua | |
6ececc4d | 2288 | ("rm" . "Latin-1") ; Rhaeto-Romanic |
40c81f74 | 2289 | ; rn Kirundi |
8dedddd5 KH |
2290 | ("ro" "Romanian" iso-8859-2) |
2291 | ("ru_RU" "Russian" iso-8859-5) | |
2292 | ("ru_UA" "Russian" koi8-u) | |
40c81f74 PE |
2293 | ; rw Kinyarwanda |
2294 | ("sa" . "Devanagari") ; Sanskrit | |
2295 | ; sd Sindhi | |
8dedddd5 | 2296 | ("se" . "UTF-8") ; Northern Sami |
40c81f74 PE |
2297 | ; sg Sangho |
2298 | ("sh" . "Latin-2") ; Serbo-Croatian | |
2299 | ; si Sinhalese | |
8dedddd5 KH |
2300 | ("sid" . "UTF-8") ; Sidamo |
2301 | ("sk" "Slovak" iso-8859-2) | |
2302 | ("sl" "Slovenian" iso-8859-2) | |
40c81f74 PE |
2303 | ; sm Samoan |
2304 | ; sn Shona | |
8dedddd5 KH |
2305 | ("so_ET" "UTF-8") ; Somali |
2306 | ("so" "Latin-1") ; Somali | |
6ececc4d | 2307 | ("sq" . "Latin-1") ; Albanian |
40c81f74 PE |
2308 | ("sr" . "Latin-2") ; Serbian (Latin alphabet) |
2309 | ; ss Siswati | |
8dedddd5 | 2310 | ("st" . "Latin-1") ; Sesotho |
40c81f74 | 2311 | ; su Sundanese |
8dedddd5 | 2312 | ("sv" "Swedish" iso-8859-1) ; Swedish |
40c81f74 | 2313 | ("sw" . "Latin-1") ; Swahili |
8dedddd5 KH |
2314 | ("ta" "Tamil" utf-8) |
2315 | ("te" . "UTF-8") ; Telugu | |
2316 | ("tg" "Tajik" koi8-t) | |
2317 | ("th" "Thai" tis-620) | |
2318 | ("ti" "Ethiopic" utf-8) ; Tigrinya | |
2319 | ("tig_ER" . "UTF-8") ; Tigre | |
40c81f74 | 2320 | ; tk Turkmen |
6ececc4d | 2321 | ("tl" . "Latin-1") ; Tagalog |
40c81f74 PE |
2322 | ; tn Setswana |
2323 | ; to Tonga | |
8dedddd5 | 2324 | ("tr" "Turkish" iso-8859-9) |
40c81f74 | 2325 | ; ts Tsonga |
8dedddd5 | 2326 | ("tt" . "UTF-8") ; Tatar |
40c81f74 PE |
2327 | ; tw Twi |
2328 | ; ug Uighur | |
8dedddd5 KH |
2329 | ("uk" "Ukrainian" koi8-u) |
2330 | ("ur" . "UTF-8") ; Urdu | |
2331 | ("uz_UZ@cyrillic" . "UTF-8"); Uzbek | |
5f395df3 | 2332 | ("uz" . "Latin-1") ; Uzbek |
8dedddd5 | 2333 | ("vi" "Vietnamese" utf-8) |
40c81f74 | 2334 | ; vo Volapuk |
d37ef0f6 | 2335 | ("wa" . "Latin-1") ; Walloon |
40c81f74 | 2336 | ; wo Wolof |
8dedddd5 | 2337 | ("xh" . "Latin-1") ; Xhosa |
9c20a8d5 | 2338 | ("yi" . "Windows-1255") ; Yiddish |
40c81f74 PE |
2339 | ; yo Yoruba |
2340 | ; za Zhuang | |
8dedddd5 | 2341 | ("zh_HK" . "Chinese-Big5") |
05b81b42 | 2342 | ; zh_HK/BIG5-HKSCS \ |
8dedddd5 | 2343 | ("zh_TW" . "Chinese-Big5") |
05b81b42 KH |
2344 | ("zh_CN.GB2312" "Chinese-GB") |
2345 | ("zh_CN.GBK" "Chinese-GBK") | |
2346 | ("zh_CN.GB18030" "Chinese-GB18030") | |
f21605fa | 2347 | ("zh_CN.UTF-8" . "Chinese-GBK") |
8dedddd5 | 2348 | ("zh_CN" . "Chinese-GB") |
40c81f74 | 2349 | ("zh" . "Chinese-GB") |
8dedddd5 | 2350 | ("zu" . "Latin-1") ; Zulu |
40c81f74 PE |
2351 | |
2352 | ;; ISO standard locales | |
2353 | ("c$" . "ASCII") | |
2354 | ("posix$" . "ASCII") | |
2355 | ||
40c81f74 PE |
2356 | ;; The "IPA" Emacs language environment does not correspond |
2357 | ;; to any ISO 639 code, so let it stand for itself. | |
2358 | ("ipa$" . "IPA") | |
2359 | ||
2360 | ;; Nonstandard or obsolete language codes | |
2361 | ("cz" . "Czech") ; e.g. Solaris 2.6 | |
2362 | ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4 | |
2363 | ("iw" . "Hebrew") ; e.g. X11R6.4 | |
f1282c7f | 2364 | ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 |
40c81f74 | 2365 | ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 |
2e86ceaa | 2366 | ("jp" . "Japanese") ; e.g. MS Windows |
f21605fa | 2367 | ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified |
86d9e628 | 2368 | ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional |
569a6374 EZ |
2369 | ("gbz" . "UTF-8") ; MS Windows Dari Persian |
2370 | ("div" . "UTF-8") ; MS Windows Divehi (Maldives) | |
2371 | ("wee" . "Latin-2") ; MS Windows Lower Sorbian | |
2372 | ("wen" . "Latin-2") ; MS Windows Upper Sorbian | |
367ca50f | 2373 | )) |
8dedddd5 | 2374 | "Alist of locale regexps vs the corresponding languages and coding systems. |
caff3c0a | 2375 | Each element has this form: |
8dedddd5 KH |
2376 | \(LOCALE-REGEXP LANG-ENV CODING-SYSTEM) |
2377 | The first element whose LOCALE-REGEXP matches the start of a | |
caff3c0a | 2378 | downcased locale specifies the LANG-ENV \(language environment) |
8dedddd5 KH |
2379 | and CODING-SYSTEM corresponding to that locale. If there is no |
2380 | appropriate language environment, the element may have this form: | |
2381 | \(LOCALE-REGEXP . LANG-ENV) | |
2382 | In this case, LANG-ENV is one of generic language environments for an | |
2383 | specific encoding such as \"Latin-1\" and \"UTF-8\".") | |
40c81f74 | 2384 | |
5f395df3 SM |
2385 | (defconst locale-charset-language-names |
2386 | (purecopy | |
2387 | '((".*8859[-_]?1\\>" . "Latin-1") | |
2388 | (".*8859[-_]?2\\>" . "Latin-2") | |
2389 | (".*8859[-_]?3\\>" . "Latin-3") | |
2390 | (".*8859[-_]?4\\>" . "Latin-4") | |
2391 | (".*8859[-_]?9\\>" . "Latin-5") | |
2392 | (".*8859[-_]?14\\>" . "Latin-8") | |
2393 | (".*8859[-_]?15\\>" . "Latin-9") | |
6b61353c KH |
2394 | (".*utf\\(?:-?8\\)?\\>" . "UTF-8") |
2395 | ;; utf-8@euro exists, so put this last. (@euro really specifies | |
2396 | ;; the currency, rather than the charset.) | |
e522f07b | 2397 | (".*@euro\\>" . "Latin-9"))) |
6ececc4d PE |
2398 | "List of pairs of locale regexps and charset language names. |
2399 | The first element whose locale regexp matches the start of a downcased locale | |
6b61353c | 2400 | specifies the language name whose charset corresponds to that locale. |
8dedddd5 | 2401 | This language name is used if the locale is not listed in |
caff3c0a | 2402 | `locale-language-names'.") |
6ececc4d | 2403 | |
5f395df3 SM |
2404 | (defconst locale-preferred-coding-systems |
2405 | (purecopy | |
8dedddd5 KH |
2406 | '((".*8859[-_]?1\\>" . iso-8859-1) |
2407 | (".*8859[-_]?2\\>" . iso-8859-2) | |
2408 | (".*8859[-_]?3\\>" . iso-8859-3) | |
2409 | (".*8859[-_]?4\\>" . iso-8859-4) | |
2410 | (".*8859[-_]?9\\>" . iso-8859-9) | |
2411 | (".*8859[-_]?14\\>" . iso-8859-14) | |
2412 | (".*8859[-_]?15\\>" . iso-8859-15) | |
2413 | (".*utf\\(?:-?8\\)?" . utf-8) | |
2414 | ;; utf-8@euro exists, so put this after utf-8. (@euro really | |
2415 | ;; specifies the currency, rather than the charset.) | |
2416 | (".*@euro" . iso-8859-15) | |
2417 | ("koi8-?r" . koi8-r) | |
2418 | ("koi8-?u" . koi8-u) | |
2419 | ("tcvn" . tcvn) | |
38c05d07 | 2420 | ("big5[-_]?hkscs" . big5-hkscs) |
8dedddd5 KH |
2421 | ("big5" . big5) |
2422 | ("euc-?tw" . euc-tw) | |
d6be7497 | 2423 | ("euc-?cn" . euc-cn) |
38c05d07 KH |
2424 | ("gb2312" . gb2312) |
2425 | ("gbk" . gbk) | |
2426 | ("gb18030" . gb18030) | |
8dedddd5 | 2427 | ("ja.*[._]euc" . japanese-iso-8bit) |
5f395df3 SM |
2428 | ("ja.*[._]jis7" . iso-2022-jp) |
2429 | ("ja.*[._]pck" . japanese-shift-jis) | |
2430 | ("ja.*[._]sjis" . japanese-shift-jis) | |
69210880 | 2431 | ("jpn" . japanese-shift-jis) ; MS-Windows uses this. |
8dedddd5 | 2432 | )) |
6ececc4d PE |
2433 | "List of pairs of locale regexps and preferred coding systems. |
2434 | The first element whose locale regexp matches the start of a downcased locale | |
8dedddd5 KH |
2435 | specifies the coding system to prefer when using that locale. |
2436 | This coding system is used if the locale specifies a specific charset.") | |
40c81f74 PE |
2437 | |
2438 | (defun locale-name-match (key alist) | |
2439 | "Search for KEY in ALIST, which should be a list of regexp-value pairs. | |
2440 | Return the value corresponding to the first regexp that matches the | |
2441 | start of KEY, or nil if there is no match." | |
2442 | (let (element) | |
2443 | (while (and alist (not element)) | |
d0d8885d | 2444 | (if (string-match-p (concat "\\`\\(?:" (car (car alist)) "\\)") key) |
40c81f74 PE |
2445 | (setq element (car alist))) |
2446 | (setq alist (cdr alist))) | |
2447 | (cdr element))) | |
2448 | ||
58b78d5b | 2449 | (defun locale-charset-match-p (charset1 charset2) |
91ed0599 | 2450 | "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent. |
58b78d5b DL |
2451 | Matching is done ignoring case and any hyphens and underscores in the |
2452 | names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'." | |
2453 | (setq charset1 (replace-regexp-in-string "[-_]" "" charset1)) | |
2454 | (setq charset2 (replace-regexp-in-string "[-_]" "" charset2)) | |
2455 | (eq t (compare-strings charset1 nil nil charset2 nil nil t))) | |
2456 | ||
6b61353c KH |
2457 | (defvar locale-charset-alist nil |
2458 | "Coding system alist keyed on locale-style charset name. | |
2459 | Used by `locale-charset-to-coding-system'.") | |
2460 | ||
2461 | (defun locale-charset-to-coding-system (charset) | |
2462 | "Find coding system corresponding to CHARSET. | |
2463 | CHARSET is any sort of non-Emacs charset name, such as might be used | |
2464 | in a locale codeset, or elsewhere. It is matched to a coding system | |
2465 | first by case-insensitive lookup in `locale-charset-alist'. Then | |
2466 | matches are looked for in the coding system list, treating case and | |
2467 | the characters `-' and `_' as insignificant. The coding system base | |
2468 | is returned. Thus, for instance, if charset \"ISO8859-2\", | |
2469 | `iso-latin-2' is returned." | |
f15466c5 | 2470 | (or (car (assoc-string charset locale-charset-alist t)) |
6b61353c KH |
2471 | (let ((cs coding-system-alist) |
2472 | c) | |
2473 | (while (and (not c) cs) | |
2474 | (if (locale-charset-match-p charset (caar cs)) | |
2475 | (setq c (intern (caar cs))) | |
2476 | (pop cs))) | |
2477 | (if c (coding-system-base c))))) | |
2478 | ||
2479 | ;; Fixme: This ought to deal with the territory part of the locale | |
2480 | ;; too, for setting things such as calendar holidays, ps-print paper | |
2481 | ;; size, spelling dictionary. | |
2482 | ||
44ee1bdf GM |
2483 | (defun locale-translate (locale) |
2484 | "Expand LOCALE according to `locale-translation-file-name', if possible. | |
2485 | For example, translate \"swedish\" into \"sv_SE.ISO8859-1\"." | |
2486 | (if locale-translation-file-name | |
2487 | (with-temp-buffer | |
2488 | (set-buffer-multibyte nil) | |
2489 | (insert-file-contents locale-translation-file-name) | |
2490 | (if (re-search-forward | |
2491 | (concat "^" (regexp-quote locale) ":?[ \t]+") nil t) | |
2492 | (buffer-substring (point) (line-end-position)) | |
2493 | locale)) | |
2494 | locale)) | |
2495 | ||
36ab8612 | 2496 | (defun set-locale-environment (&optional locale-name frame) |
40c81f74 | 2497 | "Set up multi-lingual environment for using LOCALE-NAME. |
758f07de RS |
2498 | This sets the language environment, the coding system priority, |
2499 | the default input method and sometimes other things. | |
2500 | ||
58b78d5b | 2501 | LOCALE-NAME should be a string which is the name of a locale supported |
6b61353c | 2502 | by the system. Often it is of the form xx_XX.CODE, where xx is a |
58b78d5b DL |
2503 | language, XX is a country, and CODE specifies a character set and |
2504 | coding system. For example, the locale name \"ja_JP.EUC\" might name | |
2505 | a locale for Japanese in Japan using the `japanese-iso-8bit' | |
2506 | coding-system. The name may also have a modifier suffix, e.g. `@euro' | |
2507 | or `@cyrillic'. | |
40c81f74 | 2508 | |
758f07de | 2509 | If LOCALE-NAME is nil, its value is taken from the environment |
d37ef0f6 | 2510 | variables LC_ALL, LC_CTYPE and LANG (the first one that is set). |
40c81f74 PE |
2511 | |
2512 | The locale names supported by your system can typically be found in a | |
0812c1e8 | 2513 | directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME |
758f07de | 2514 | will be translated according to the table specified by |
0812c1e8 DL |
2515 | `locale-translation-file-name'. |
2516 | ||
36ab8612 MB |
2517 | If FRAME is non-nil, only set the keyboard coding system and the |
2518 | terminal coding system for the terminal of that frame, and don't | |
2519 | touch session-global parameters like the language environment. | |
97c57fb2 | 2520 | |
0812c1e8 DL |
2521 | See also `locale-charset-language-names', `locale-language-names', |
2522 | `locale-preferred-coding-systems' and `locale-coding-system'." | |
758f07de | 2523 | (interactive "sSet environment for locale: ") |
a1506d29 | 2524 | |
0d7c5bb9 DL |
2525 | ;; Do this at runtime for the sake of binaries possibly transported |
2526 | ;; to a system without X. | |
2527 | (setq locale-translation-file-name | |
2528 | (let ((files | |
6bba8c70 KH |
2529 | '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7 |
2530 | "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4 | |
6b61353c | 2531 | "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2 |
0d7c5bb9 DL |
2532 | "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6 |
2533 | ;; | |
2534 | ;; The following name appears after the X-related names above, | |
2535 | ;; since the X-related names are what X actually uses. | |
2536 | "/usr/share/locale/locale.alias" ; GNU/Linux sans X | |
2537 | ))) | |
2538 | (while (and files (not (file-exists-p (car files)))) | |
2539 | (setq files (cdr files))) | |
2540 | (car files))) | |
2541 | ||
7008ccac GM |
2542 | (let ((locale locale-name)) |
2543 | ||
2544 | (unless locale | |
2545 | ;; Use the first of these three environment variables | |
2546 | ;; that has a nonempty value. | |
2547 | (let ((vars '("LC_ALL" "LC_CTYPE" "LANG"))) | |
db6bd804 DL |
2548 | (while (and vars |
2549 | (= 0 (length locale))) ; nil or empty string | |
36ab8612 | 2550 | (setq locale (getenv (pop vars) frame))))) |
7008ccac GM |
2551 | |
2552 | (when locale | |
44ee1bdf | 2553 | (setq locale (locale-translate locale)) |
7008ccac GM |
2554 | |
2555 | ;; Leave the system locales alone if the caller did not specify | |
2556 | ;; an explicit locale name, as their defaults are set from | |
2557 | ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not | |
2558 | ;; want to set them to the same value as LC_CTYPE. | |
2559 | (when locale-name | |
2560 | (setq system-messages-locale locale) | |
e9aaa1db KH |
2561 | (setq system-time-locale locale)) |
2562 | ||
2563 | (if (string-match "^[a-z][a-z]" locale) | |
2564 | (setq current-iso639-language (intern (match-string 0 locale))))) | |
7008ccac | 2565 | |
44ee1bdf GM |
2566 | (setq woman-locale |
2567 | (or system-messages-locale | |
739b7764 | 2568 | (let ((msglocale (getenv "LC_MESSAGES" frame))) |
44ee1bdf GM |
2569 | (if (zerop (length msglocale)) |
2570 | locale | |
2571 | (locale-translate msglocale))))) | |
2572 | ||
2573 | (when locale | |
7008ccac GM |
2574 | (setq locale (downcase locale)) |
2575 | ||
2576 | (let ((language-name | |
2577 | (locale-name-match locale locale-language-names)) | |
2578 | (charset-language-name | |
2579 | (locale-name-match locale locale-charset-language-names)) | |
cdb1af30 | 2580 | (default-eol-type (coding-system-eol-type |
b56a5ae0 | 2581 | (default-value 'buffer-file-coding-system))) |
7008ccac | 2582 | (coding-system |
6b61353c KH |
2583 | (or (locale-name-match locale locale-preferred-coding-systems) |
2584 | (when locale | |
2585 | (if (string-match "\\.\\([^@]+\\)" locale) | |
2586 | (locale-charset-to-coding-system | |
9e2a2647 | 2587 | (match-string 1 locale))))))) |
7008ccac | 2588 | |
8dedddd5 KH |
2589 | (if (consp language-name) |
2590 | ;; locale-language-names specify both lang-env and coding. | |
2591 | ;; But, what specified in locale-preferred-coding-systems | |
2592 | ;; has higher priority. | |
2593 | (setq coding-system (or coding-system | |
2594 | (nth 1 language-name)) | |
2595 | language-name (car language-name)) | |
2596 | ;; Otherwise, if locale is not listed in locale-language-names, | |
2597 | ;; use what listed in locale-charset-language-names. | |
2598 | (if (not language-name) | |
2599 | (setq language-name charset-language-name))) | |
7008ccac | 2600 | |
cdb1af30 EZ |
2601 | ;; If a specific EOL conversion was specified in the default |
2602 | ;; buffer-file-coding-system, preserve it in the coding system | |
2603 | ;; we will be using from now on. | |
8bca692e EZ |
2604 | (if (and (memq default-eol-type '(0 1 2 unix dos mac)) |
2605 | coding-system | |
2606 | (coding-system-p coding-system)) | |
cdb1af30 EZ |
2607 | (setq coding-system (coding-system-change-eol-conversion |
2608 | coding-system default-eol-type))) | |
2609 | ||
7008ccac GM |
2610 | (when language-name |
2611 | ||
2612 | ;; Set up for this character set. This is now the right way | |
2613 | ;; to do it for both unibyte and multibyte modes. | |
36ab8612 | 2614 | (unless frame |
97c57fb2 | 2615 | (set-language-environment language-name)) |
7008ccac | 2616 | |
597e2240 | 2617 | ;; If the default enable-multibyte-characters is nil, |
7008ccac GM |
2618 | ;; we are using single-byte characters, |
2619 | ;; so the display table and terminal coding system are irrelevant. | |
597e2240 | 2620 | (when (default-value 'enable-multibyte-characters) |
ff76e074 | 2621 | (set-display-table-and-terminal-coding-system |
36ab8612 | 2622 | language-name coding-system frame)) |
7008ccac | 2623 | |
166ce29f DL |
2624 | ;; Set the `keyboard-coding-system' if appropriate (tty |
2625 | ;; only). At least X and MS Windows can generate | |
2626 | ;; multilingual input. | |
68bba4e4 | 2627 | ;; XXX This was disabled unless `window-system', but that |
fffa137c | 2628 | ;; leads to buggy behavior when a tty frame is opened |
68bba4e4 KL |
2629 | ;; later. Setting the keyboard coding system has no adverse |
2630 | ;; effect on X, so let's do it anyway. -- Lorentey | |
2631 | (let ((kcs (or coding-system | |
2632 | (car (get-language-info language-name | |
2633 | 'coding-system))))) | |
36ab8612 | 2634 | (if kcs (set-keyboard-coding-system kcs frame))) |
dc2be2fa | 2635 | |
36ab8612 | 2636 | (unless frame |
97c57fb2 KL |
2637 | (setq locale-coding-system |
2638 | (car (get-language-info language-name 'coding-priority))))) | |
7008ccac | 2639 | |
36ab8612 | 2640 | (when (and (not frame) |
97c57fb2 | 2641 | coding-system |
8dedddd5 KH |
2642 | (not (coding-system-equal coding-system |
2643 | locale-coding-system))) | |
7008ccac | 2644 | (prefer-coding-system coding-system) |
218e7ce3 KH |
2645 | ;; Fixme: perhaps prefer-coding-system should set this too. |
2646 | ;; But it's not the time to do such a fundamental change. | |
2647 | (setq default-sendmail-coding-system coding-system) | |
80e3310b | 2648 | (setq locale-coding-system coding-system)))) |
e76ef161 | 2649 | |
52c7f9ee | 2650 | ;; On Windows, override locale-coding-system, |
26245233 JR |
2651 | ;; default-file-name-coding-system, keyboard-coding-system, |
2652 | ;; terminal-coding-system with system codepage. | |
5eb94383 | 2653 | (when (boundp 'w32-ansi-code-page) |
893b49bb JR |
2654 | (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page)))) |
2655 | (when (coding-system-p code-page-coding) | |
36ab8612 MB |
2656 | (unless frame (setq locale-coding-system code-page-coding)) |
2657 | (set-keyboard-coding-system code-page-coding frame) | |
d4aa48db | 2658 | (set-terminal-coding-system code-page-coding frame) |
59938af3 EZ |
2659 | ;; Set default-file-name-coding-system last, so that Emacs |
2660 | ;; doesn't try to use cpNNNN when it defines keyboard and | |
2661 | ;; terminal encoding. That's because the above two lines | |
2662 | ;; will want to load code-pages.el, where cpNNNN are | |
2663 | ;; defined; if default-file-name-coding-system were set to | |
2664 | ;; cpNNNN while these two lines run, Emacs will want to use | |
2665 | ;; it for encoding the file name it wants to load. And that | |
2666 | ;; will fail, since cpNNNN is not yet usable until | |
2667 | ;; code-pages.el finishes loading. | |
2668 | (setq default-file-name-coding-system code-page-coding)))) | |
893b49bb | 2669 | |
a41118cc | 2670 | (when (eq system-type 'darwin) |
ff76e074 SM |
2671 | ;; On Darwin, file names are always encoded in utf-8, no matter |
2672 | ;; the locale. | |
2673 | (setq default-file-name-coding-system 'utf-8) | |
2674 | ;; Mac OS X's Terminal.app by default uses utf-8 regardless of | |
2675 | ;; the locale. | |
2676 | (when (and (null window-system) | |
36ab8612 | 2677 | (equal (getenv "TERM_PROGRAM" frame) "Apple_Terminal")) |
ff76e074 SM |
2678 | (set-terminal-coding-system 'utf-8) |
2679 | (set-keyboard-coding-system 'utf-8))) | |
a41118cc | 2680 | |
e76ef161 | 2681 | ;; Default to A4 paper if we're not in a C, POSIX or US locale. |
3479c806 | 2682 | ;; (See comments in Flocale_info.) |
36ab8612 | 2683 | (unless frame |
97c57fb2 KL |
2684 | (let ((locale locale) |
2685 | (paper (locale-info 'paper))) | |
2686 | (if paper | |
2687 | ;; This will always be null at the time of writing. | |
2688 | (cond | |
2689 | ((equal paper '(216 279)) | |
2690 | (setq ps-paper-type 'letter)) | |
2691 | ((equal paper '(210 297)) | |
2692 | (setq ps-paper-type 'a4))) | |
2693 | (let ((vars '("LC_ALL" "LC_PAPER" "LANG"))) | |
2694 | (while (and vars (= 0 (length locale))) | |
36ab8612 | 2695 | (setq locale (getenv (pop vars) frame)))) |
97c57fb2 KL |
2696 | (when locale |
2697 | ;; As of glibc 2.2.5, these are the only US Letter locales, | |
2698 | ;; and the rest are A4. | |
2699 | (setq ps-paper-type | |
2700 | (or (locale-name-match locale '(("c$" . letter) | |
2701 | ("posix$" . letter) | |
2702 | (".._us" . letter) | |
2703 | (".._pr" . letter) | |
2704 | (".._ca" . letter) | |
2705 | ("enu$" . letter) ; Windows | |
2706 | ("esu$" . letter) | |
2707 | ("enc$" . letter) | |
2708 | ("frc$" . letter))) | |
2709 | 'a4))))))) | |
e76ef161 | 2710 | nil) |
40c81f74 | 2711 | \f |
16400c32 KH |
2712 | ;;; Character property |
2713 | ||
2714 | ;; Each element has the form (PROP . TABLE). | |
2715 | ;; PROP is a symbol representing a character property. | |
2716 | ;; TABLE is a char-table containing the property value for each character. | |
2717 | ;; TABLE may be a name of file to load to build a char-table. | |
2718 | ;; Don't modify this variable directly but use `define-char-code-property'. | |
2719 | ||
2720 | (defvar char-code-property-alist nil | |
2721 | "Alist of character property name vs char-table containing property values. | |
2722 | Internal use only.") | |
2723 | ||
2724 | (put 'char-code-property-table 'char-table-extra-slots 5) | |
2725 | ||
2726 | (defun define-char-code-property (name table &optional docstring) | |
2727 | "Define NAME as a character code property given by TABLE. | |
2728 | TABLE is a char-table of purpose `char-code-property-table' with | |
2729 | these extra slots: | |
2730 | 1st: NAME. | |
2731 | 2nd: Function to call to get a property value of a character. | |
caff3c0a | 2732 | It is called with three arguments CHAR, VAL, and TABLE, where |
16400c32 KH |
2733 | CHAR is a character, VAL is the value of (aref TABLE CHAR). |
2734 | 3rd: Function to call to put a property value of a character. | |
2735 | It is called with the same arguments as above. | |
2736 | 4th: Function to call to get a description string of a property value. | |
2737 | It is called with one argument VALUE, a property value. | |
2738 | 5th: Data used by the above functions. | |
2739 | ||
2740 | TABLE may be a name of file to load to build a char-table. The | |
2741 | file should contain a call of `define-char-code-property' with a | |
2742 | char-table of the above format as the argument TABLE. | |
2743 | ||
2744 | TABLE may also be nil, in which case no property value is pre-assigned. | |
2745 | ||
49275d55 | 2746 | Optional 3rd argument DOCSTRING is a documentation string of the property. |
16400c32 KH |
2747 | |
2748 | See also the documentation of `get-char-code-property' and | |
2749 | `put-char-code-property'." | |
2750 | (or (symbolp name) | |
2751 | (error "Not a symbol: %s" name)) | |
2752 | (if (char-table-p table) | |
2753 | (or (and (eq (char-table-subtype table) 'char-code-property-table) | |
2754 | (eq (char-table-extra-slot table 0) name)) | |
2755 | (error "Invalid char-table: %s" table)) | |
2756 | (or (stringp table) | |
2757 | (error "Not a char-table nor a file name: %s" table))) | |
ebfa10d3 | 2758 | (if (stringp table) (setq table (purecopy table))) |
16400c32 KH |
2759 | (let ((slot (assq name char-code-property-alist))) |
2760 | (if slot | |
2761 | (setcdr slot table) | |
2762 | (setq char-code-property-alist | |
2763 | (cons (cons name table) char-code-property-alist)))) | |
905a9ed3 | 2764 | (put name 'char-code-property-documentation (purecopy docstring))) |
4ed46869 KH |
2765 | |
2766 | (defvar char-code-property-table | |
2767 | (make-char-table 'char-code-property-table) | |
2768 | "Char-table containing a property list of each character code. | |
16400c32 | 2769 | This table is used for properties not listed in `char-code-property-alist'. |
4ed46869 | 2770 | See also the documentation of `get-char-code-property' and |
96db204a | 2771 | `put-char-code-property'.") |
4ed46869 KH |
2772 | |
2773 | (defun get-char-code-property (char propname) | |
16400c32 KH |
2774 | "Return the value of CHAR's PROPNAME property." |
2775 | (let ((slot (assq propname char-code-property-alist))) | |
2776 | (if slot | |
2777 | (let (table value func) | |
2778 | (if (stringp (cdr slot)) | |
d0d8885d | 2779 | (load (cdr slot) nil t)) |
16400c32 KH |
2780 | (setq table (cdr slot) |
2781 | value (aref table char) | |
2782 | func (char-table-extra-slot table 1)) | |
2783 | (if (functionp func) | |
2784 | (setq value (funcall func char value table))) | |
2785 | value) | |
2786 | (plist-get (aref char-code-property-table char) propname)))) | |
4ed46869 KH |
2787 | |
2788 | (defun put-char-code-property (char propname value) | |
16400c32 | 2789 | "Store CHAR's PROPNAME property with VALUE. |
4ed46869 | 2790 | It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." |
16400c32 KH |
2791 | (let ((slot (assq propname char-code-property-alist))) |
2792 | (if slot | |
2793 | (let (table func) | |
2794 | (if (stringp (cdr slot)) | |
d0d8885d | 2795 | (load (cdr slot) nil t)) |
16400c32 KH |
2796 | (setq table (cdr slot) |
2797 | func (char-table-extra-slot table 2)) | |
2798 | (if (functionp func) | |
2799 | (funcall func char value table) | |
2800 | (aset table char value))) | |
2801 | (let* ((plist (aref char-code-property-table char)) | |
2802 | (x (plist-put plist propname value))) | |
2803 | (or (eq x plist) | |
2804 | (aset char-code-property-table char x)))) | |
2805 | value)) | |
2806 | ||
2807 | (defun char-code-property-description (prop value) | |
2808 | "Return a description string of character property PROP's value VALUE. | |
2809 | If there's no description string for VALUE, return nil." | |
2810 | (let ((slot (assq prop char-code-property-alist))) | |
2811 | (if slot | |
2812 | (let (table func) | |
2813 | (if (stringp (cdr slot)) | |
d0d8885d | 2814 | (load (cdr slot) nil t)) |
16400c32 KH |
2815 | (setq table (cdr slot) |
2816 | func (char-table-extra-slot table 3)) | |
2817 | (if (functionp func) | |
2818 | (funcall func value)))))) | |
4ed46869 | 2819 | |
a127b764 KH |
2820 | \f |
2821 | ;; Pretty description of encoded string | |
2822 | ||
2823 | ;; Alist of ISO 2022 control code vs the corresponding mnemonic string. | |
2f7f4bee | 2824 | (defconst iso-2022-control-alist |
a127b764 KH |
2825 | '((?\x1b . "ESC") |
2826 | (?\x0e . "SO") | |
2827 | (?\x0f . "SI") | |
2828 | (?\x8e . "SS2") | |
2829 | (?\x8f . "SS3") | |
2830 | (?\x9b . "CSI"))) | |
2831 | ||
2832 | (defun encoded-string-description (str coding-system) | |
2833 | "Return a pretty description of STR that is encoded by CODING-SYSTEM." | |
2834 | (setq str (string-as-unibyte str)) | |
993b2a7d | 2835 | (mapconcat |
a6dfc99b | 2836 | (if (and coding-system (eq (coding-system-type coding-system) 'iso-2022)) |
993b2a7d KH |
2837 | ;; Try to get a pretty description for ISO 2022 escape sequences. |
2838 | (function (lambda (x) (or (cdr (assq x iso-2022-control-alist)) | |
695effcc JL |
2839 | (format "#x%02X" x)))) |
2840 | (function (lambda (x) (format "#x%02X" x)))) | |
993b2a7d | 2841 | str " ")) |
a127b764 | 2842 | |
430c6ced | 2843 | (defun encode-coding-char (char coding-system &optional charset) |
a127b764 | 2844 | "Encode CHAR by CODING-SYSTEM and return the resulting string. |
430c6ced KH |
2845 | If CODING-SYSTEM can't safely encode CHAR, return nil. |
2846 | The 3rd optional argument CHARSET, if non-nil, is a charset preferred | |
2847 | on encoding." | |
b529ed1e KH |
2848 | (let* ((str1 (string-as-multibyte (string char))) |
2849 | (str2 (string-as-multibyte (string char char))) | |
2850 | (found (find-coding-systems-string str1)) | |
a127b764 | 2851 | enc1 enc2 i1 i2) |
b529ed1e KH |
2852 | (if (and (consp found) |
2853 | (eq (car found) 'undecided)) | |
2854 | str1 | |
2855 | (when (memq (coding-system-base coding-system) found) | |
2856 | ;; We must find the encoded string of CHAR. But, just encoding | |
2857 | ;; CHAR will put extra control sequences (usually to designate | |
2858 | ;; ASCII charset) at the tail if type of CODING is ISO 2022. | |
2859 | ;; To exclude such tailing bytes, we at first encode one-char | |
2860 | ;; string and two-char string, then check how many bytes at the | |
2861 | ;; tail of both encoded strings are the same. | |
2862 | ||
430c6ced KH |
2863 | (when charset |
2864 | (put-text-property 0 1 'charset charset str1) | |
2865 | (put-text-property 0 2 'charset charset str2)) | |
b529ed1e KH |
2866 | (setq enc1 (encode-coding-string str1 coding-system) |
2867 | i1 (length enc1) | |
2868 | enc2 (encode-coding-string str2 coding-system) | |
2869 | i2 (length enc2)) | |
2870 | (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2)))) | |
2871 | (setq i1 (1- i1) i2 (1- i2))) | |
2872 | ||
2873 | ;; Now (substring enc1 i1) and (substring enc2 i2) are the same, | |
2874 | ;; and they are the extra control sequences at the tail to | |
2875 | ;; exclude. | |
2876 | (substring enc2 0 i2))))) | |
a127b764 | 2877 | |
d2f613a0 DL |
2878 | ;; Backwards compatibility. These might be better with :init-value t, |
2879 | ;; but that breaks loadup. | |
2880 | (define-minor-mode unify-8859-on-encoding-mode | |
2881 | "Obsolete." | |
2882 | :group 'mule | |
2883 | :global t) | |
2884 | (define-minor-mode unify-8859-on-decoding-mode | |
2885 | "Obsolete." | |
2886 | :group 'mule | |
2887 | :global t) | |
a127b764 | 2888 | |
7dd42fb1 KH |
2889 | (defvar nonascii-insert-offset 0 "This variable is obsolete.") |
2890 | (defvar nonascii-translation-table nil "This variable is obsolete.") | |
2891 | ||
838d78d4 JL |
2892 | (defvar ucs-names nil |
2893 | "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") | |
2894 | ||
2895 | (defun ucs-names () | |
2896 | "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." | |
2897 | (or ucs-names | |
ae63e572 KH |
2898 | (let ((bmp-ranges |
2899 | '((#x0000 . #x33FF) | |
2900 | ;; (#x3400 . #x4DBF) CJK Ideograph Extension A | |
2901 | (#x4DC0 . #x4DFF) | |
2902 | ;; (#x4E00 . #x9FFF) CJK Ideograph | |
2903 | (#xA000 . #x0D7FF) | |
2904 | ;; (#xD800 . #xFAFF) Surrogate/Private | |
2905 | (#xFB00 . #xFFFD))) | |
2906 | (upper-ranges | |
2907 | '((#x10000 . #x134FF) | |
78edd3b7 | 2908 | ;; (#x13500 . #x1CFFF) unused |
ae63e572 | 2909 | (#x1D000 . #x1FFFF) |
78edd3b7 | 2910 | ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused |
ae63e572 KH |
2911 | (#xE0000 . #xE01FF))) |
2912 | (gc-cons-threshold 10000000) | |
2913 | c end name names) | |
2914 | (dolist (range bmp-ranges) | |
2915 | (setq c (car range) | |
2916 | end (cdr range)) | |
2917 | (while (<= c end) | |
2918 | (if (setq name (get-char-code-property c 'name)) | |
2919 | (push (cons name c) names)) | |
2920 | (if (setq name (get-char-code-property c 'old-name)) | |
2921 | (push (cons name c) names)) | |
2922 | (setq c (1+ c)))) | |
2923 | (dolist (range upper-ranges) | |
2924 | (setq c (car range) | |
2925 | end (cdr range)) | |
2926 | (while (<= c end) | |
2927 | (if (setq name (get-char-code-property c 'name)) | |
2928 | (push (cons name c) names)) | |
2929 | (setq c (1+ c)))) | |
e0727873 | 2930 | (setq ucs-names names)))) |
838d78d4 JL |
2931 | |
2932 | (defvar ucs-completions (lazy-completion-table ucs-completions ucs-names) | |
2933 | "Lazy completion table for completing on Unicode character names.") | |
bdf66e8d | 2934 | (put 'ucs-completions 'risky-local-variable t) |
838d78d4 JL |
2935 | |
2936 | (defun read-char-by-name (prompt) | |
2937 | "Read a character by its Unicode name or hex number string. | |
c0a67379 | 2938 | Display PROMPT and read a string that represents a character by its |
a167e505 EZ |
2939 | Unicode property `name' or `old-name'. |
2940 | ||
2941 | This function returns the character as a number. | |
2942 | ||
2943 | You can type a few of the first letters of the Unicode name and | |
2944 | use completion. If you type a substring of the Unicode name | |
2945 | preceded by an asterisk `*' and use completion, it will show all | |
2946 | the characters whose names include that substring, not necessarily | |
2947 | at the beginning of the name. | |
2948 | ||
2949 | This function also accepts a hexadecimal number of Unicode code | |
2950 | point or a number in hash notation, e.g. #o21430 for octal, | |
2951 | #x2318 for hex, or #10r8984 for decimal." | |
838d78d4 JL |
2952 | (let* ((completion-ignore-case t) |
2953 | (input (completing-read prompt ucs-completions))) | |
c0a67379 | 2954 | (cond |
d0d8885d | 2955 | ((string-match-p "^[0-9a-fA-F]+$" input) |
c0a67379 | 2956 | (string-to-number input 16)) |
d0d8885d | 2957 | ((string-match-p "^#" input) |
c0a67379 JL |
2958 | (read input)) |
2959 | (t | |
d6ff002e | 2960 | (cdr (assoc-string input (ucs-names) t)))))) |
838d78d4 | 2961 | |
dcbac02a JL |
2962 | (defun ucs-insert (character &optional count inherit) |
2963 | "Insert COUNT copies of CHARACTER of the given Unicode code point. | |
c0a67379 | 2964 | Interactively, prompts for a Unicode character name or a hex number |
dcbac02a | 2965 | using `read-char-by-name'. |
a167e505 EZ |
2966 | |
2967 | You can type a few of the first letters of the Unicode name and | |
2968 | use completion. If you type a substring of the Unicode name | |
2969 | preceded by an asterisk `*' and use completion, it will show all | |
2970 | the characters whose names include that substring, not necessarily | |
2971 | at the beginning of the name. | |
2972 | ||
dcbac02a JL |
2973 | The optional third arg INHERIT (non-nil when called interactively), |
2974 | says to inherit text properties from adjoining text, if those | |
2975 | properties are sticky." | |
2976 | (interactive | |
2977 | (list (read-char-by-name "Unicode (name or hex): ") | |
2978 | (prefix-numeric-value current-prefix-arg) | |
2979 | t)) | |
2980 | (unless count (setq count 1)) | |
2981 | (if (stringp character) | |
2982 | (setq character (string-to-number character 16))) | |
c0a67379 | 2983 | (cond |
dcbac02a JL |
2984 | ((not (integerp character)) |
2985 | (error "Not a Unicode character code: %S" character)) | |
2986 | ((or (< character 0) (> character #x10FFFF)) | |
2987 | (error "Not a Unicode character code: 0x%X" character))) | |
2988 | (if inherit | |
2989 | (dotimes (i count) (insert-and-inherit character)) | |
2990 | (dotimes (i count) (insert character)))) | |
cb9e47dd | 2991 | |
c0a67379 | 2992 | (define-key ctl-x-map "8\r" 'ucs-insert) |
a127b764 | 2993 | |
9ee5b744 | 2994 | ;; arch-tag: b382c432-4b36-460e-bf4c-05efd0bb18dc |
4ed46869 | 2995 | ;;; mule-cmds.el ends here |