Commit | Line | Data |
---|---|---|
07513d64 | 1 | ;;; mule.el --- basic commands for multilingual environment |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
58cfed09 | 4 | ;; Licensed to the Free Software Foundation. |
9e3b6057 | 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. |
c1841772 KH |
6 | ;; Copyright (C) 2001, 2002 |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
8 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
9 | |
10 | ;; Keywords: mule, multilingual, character set, coding system | |
11 | ||
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
16 | ;; the Free Software Foundation; either version 2, or (at your option) | |
17 | ;; any later version. | |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
26 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
27 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 | 28 | |
60370d40 PJ |
29 | ;;; Commentary: |
30 | ||
4ed46869 KH |
31 | ;;; Code: |
32 | ||
c1841772 | 33 | (defconst mule-version "7.0 (SAKAKI)" "\ |
4ed46869 KH |
34 | Version number and name of this version of MULE (multilingual environment).") |
35 | ||
c1841772 | 36 | (defconst mule-version-date "2002.2.28" "\ |
4ed46869 KH |
37 | Distribution date of this version of MULE (multilingual environment).") |
38 | ||
c1841772 KH |
39 | |
40 | \f | |
41 | ;;; CHARACTER | |
42 | (defalias 'char-valid-p 'characterp) | |
43 | (make-obsolete 'char-valid-p 'characterp "22.1") | |
44 | ||
45 | \f | |
46 | ;;; CHARSET | |
47 | ||
48 | (defun define-charset (name docstring &rest props) | |
49 | "Define NAME (symbol) as a charset with DOCSTRING. | |
50 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
07513d64 | 51 | may be any symbol. The following have special meanings, and one of |
c1841772 KH |
52 | `:code-offset', `:map', `:parents' must be specified. |
53 | ||
54 | `:short-name' | |
55 | ||
56 | VALUE must be a short string to identify the charset. If omitted, | |
57 | NAME is used. | |
58 | ||
59 | `:long-name' | |
60 | ||
61 | VALUE must be a string longer than `:short-name' to identify the | |
07513d64 | 62 | charset. If omitted, the value of the `:short-name' attribute is used. |
c1841772 KH |
63 | |
64 | `:dimension' | |
65 | ||
66 | VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of | |
07513d64 DL |
67 | code-points of the charsets. If omitted, it is calculated from the |
68 | value of the `:code-space' attribute. | |
c1841772 KH |
69 | |
70 | `:code-space' | |
71 | ||
72 | VALUE must be a vector of length at most 8 specifying the byte code | |
73 | range of each dimension in this format: | |
74 | [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] | |
07513d64 | 75 | where MIN-N is the minimum byte value of Nth dimension of code-point, |
c1841772 KH |
76 | MAX-N is the maximum byte value of that. |
77 | ||
78 | `:iso-final-char' | |
79 | ||
80 | VALUE must be a character in the range 32 to 127 (inclusive) | |
81 | specifying the final char of the charset for ISO-2022 encoding. If | |
82 | omitted, the charset can't be encoded by ISO-2022 based | |
83 | coding-systems. | |
84 | ||
85 | `:iso-revision-number' | |
86 | ||
87 | VALUE must be an integer in the range 0..63, specifying the revision | |
88 | number of the charset for ISO-2022 encoding. | |
89 | ||
90 | `:emacs-mule-id' | |
91 | ||
92 | VALUE must be an integer of 0, 128..255. If omitted, the charset | |
93 | can't be encoded by coding-systems of type `emacs-mule'. | |
94 | ||
95 | `:ascii-compatible-p' | |
96 | ||
07513d64 DL |
97 | VALUE must be nil or t (default nil). If VALUE is t, the charset is |
98 | compatible with ASCII, i.e. the first 128 code points map to ASCII. | |
c1841772 KH |
99 | |
100 | `:supplementary-p' | |
101 | ||
102 | VALUE must be nil or t. If the VALUE is t, the charset is | |
07513d64 DL |
103 | supplementary, which means it is used only as a parent of some other |
104 | charset. | |
c1841772 KH |
105 | |
106 | `:invalid-code' | |
107 | ||
108 | VALUE must be a nonnegative integer that can be used as an invalid | |
109 | code point of the charset. If the minimum code is 0 and the maximum | |
110 | code is greater than Emacs' maximum integer value, `:invalid-code' | |
111 | should not be omitted. | |
112 | ||
113 | `:code-offset' | |
114 | ||
07513d64 DL |
115 | VALUE must be an integer added to the index number of a character to |
116 | get the corresponding character code. | |
c1841772 KH |
117 | |
118 | `:map' | |
119 | ||
120 | VALUE must be vector or string. | |
121 | ||
122 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
123 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 124 | corresponding character code. |
c1841772 KH |
125 | |
126 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
127 | information. Each line of the file must be this format: |
128 | 0xXXX 0xYYY | |
129 | where XXX is a hexadecimal representation of CODE-n and YYY is a | |
130 | hexadecimal representation of CHAR-n. A line starting with `#' is a | |
131 | comment line. | |
c1841772 KH |
132 | |
133 | `:parents' | |
134 | ||
135 | VALUE must be a list of parent charsets. The charset inherits | |
136 | characters from them. Each element of the list may be a cons (PARENT | |
137 | . OFFSET), where PARENT is a parent charset, and OFFSET is an offset | |
138 | value to add to a code point of this charset to get the corresponding | |
139 | code point of PARENT. | |
140 | ||
141 | `:unify-map' | |
142 | ||
143 | VALUE must be vector or string. | |
144 | ||
145 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
146 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 147 | corresponding Unicode character code. |
c1841772 KH |
148 | |
149 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
150 | information. The file format is the same as what described for `:map' |
151 | attribute." | |
c1841772 KH |
152 | (let ((attrs (mapcar 'list '(:dimension |
153 | :code-space | |
154 | :iso-final-char | |
155 | :iso-revision-number | |
156 | :emacs-mule-id | |
157 | :ascii-compatible-p | |
158 | :supplementary-p | |
159 | :invalid-code | |
160 | :code-offset | |
161 | :map | |
162 | :parents | |
163 | :unify-map | |
164 | :plist)))) | |
165 | ||
166 | ;; If :dimension is omitted, get the dimension from :code-space. | |
167 | (let ((dimension (plist-get props :dimension))) | |
168 | (or dimension | |
169 | (progn | |
170 | (setq dimension (/ (length (plist-get props :code-space)) 2)) | |
171 | (setq props (plist-put props :dimension dimension))))) | |
172 | ||
173 | (dolist (slot attrs) | |
174 | (setcdr slot (plist-get props (car slot)))) | |
175 | ||
176 | ;; Make sure that the value of :code-space is a vector of 8 | |
177 | ;; elements. | |
178 | (let* ((slot (assq :code-space attrs)) | |
179 | (val (cdr slot)) | |
180 | (len (length val))) | |
181 | (if (< len 8) | |
182 | (setcdr slot | |
183 | (vconcat val (make-vector (- 8 len) 0))))) | |
184 | ||
185 | ;; Add :name and :docstring properties to PROPS. | |
186 | (setq props | |
187 | (cons :name (cons name (cons :docstring (cons docstring props))))) | |
188 | (or (plist-get props :short-name) | |
189 | (plist-put props :short-name (symbol-name name))) | |
190 | (or (plist-get props :long-name) | |
191 | (plist-put props :long-name (plist-get props :short-name))) | |
e1e529fa DL |
192 | ;; We can probably get a worthwhile amount in purespace. |
193 | (setq props | |
194 | (mapcar (lambda (elt) | |
195 | (if (stringp elt) | |
196 | (purecopy elt) | |
197 | elt)) | |
198 | props)) | |
c1841772 KH |
199 | (setcdr (assq :plist attrs) props) |
200 | ||
201 | (apply 'define-charset-internal name (mapcar 'cdr attrs)))) | |
202 | ||
203 | ||
4ed46869 | 204 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) |
0f69cb38 KH |
205 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
206 | The file contents are decoded before evaluation if necessary. | |
4ed46869 KH |
207 | If optional second arg NOERROR is non-nil, |
208 | report no error if FILE doesn't exist. | |
209 | Print messages at start and end of loading unless | |
210 | optional third arg NOMESSAGE is non-nil. | |
211 | Return t if file exists." | |
212 | (if (null (file-readable-p fullname)) | |
213 | (and (null noerror) | |
214 | (signal 'file-error (list "Cannot open load file" file))) | |
215 | ;; Read file with code conversion, and then eval. | |
216 | (let* ((buffer | |
217 | ;; To avoid any autoloading, set default-major-mode to | |
218 | ;; fundamental-mode. | |
88162676 RS |
219 | ;; So that we don't get completely screwed if the |
220 | ;; file is encoded in some complicated character set, | |
221 | ;; read it with real decoding, as a multibyte buffer, | |
222 | ;; even if this is a --unibyte Emacs session. | |
223 | (let ((default-major-mode 'fundamental-mode) | |
224 | (default-enable-multibyte-characters t)) | |
4ed46869 KH |
225 | ;; We can't use `generate-new-buffer' because files.el |
226 | ;; is not yet loaded. | |
227 | (get-buffer-create (generate-new-buffer-name " *load*")))) | |
db5cae4b SM |
228 | (load-in-progress t) |
229 | (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
230 | (unless nomessage | |
231 | (if source | |
232 | (message "Loading %s (source)..." file) | |
233 | (message "Loading %s..." file))) | |
234 | (when purify-flag | |
235 | (setq preloaded-file-list (cons file preloaded-file-list))) | |
4ed46869 | 236 | (unwind-protect |
a6acd8a2 | 237 | (let ((load-file-name fullname) |
1c4cc63a | 238 | (set-auto-coding-for-load t) |
a6acd8a2 | 239 | (inhibit-file-name-operation nil)) |
4ed46869 KH |
240 | (save-excursion |
241 | (set-buffer buffer) | |
242 | (insert-file-contents fullname) | |
7d276780 EZ |
243 | ;; If the loaded file was inserted with no-conversion or |
244 | ;; raw-text coding system, make the buffer unibyte. | |
245 | ;; Otherwise, eval-buffer might try to interpret random | |
246 | ;; binary junk as multibyte characters. | |
247 | (if (and enable-multibyte-characters | |
248 | (or (eq (coding-system-type last-coding-system-used) 5) | |
249 | (eq last-coding-system-used 'no-conversion))) | |
250 | (set-buffer-multibyte nil)) | |
4ed46869 KH |
251 | ;; Make `kill-buffer' quiet. |
252 | (set-buffer-modified-p nil)) | |
0f69cb38 | 253 | ;; Have the original buffer current while we eval. |
88162676 RS |
254 | (eval-buffer buffer nil file |
255 | ;; If this Emacs is running with --unibyte, | |
256 | ;; convert multibyte strings to unibyte | |
257 | ;; after reading them. | |
ba74e833 | 258 | ;; (not default-enable-multibyte-characters) |
8dd08b5b | 259 | nil t |
ba74e833 | 260 | )) |
cfc70cdf RS |
261 | (let (kill-buffer-hook kill-buffer-query-functions) |
262 | (kill-buffer buffer))) | |
4ed46869 | 263 | (let ((hook (assoc file after-load-alist))) |
db5cae4b SM |
264 | (when hook |
265 | (mapcar (function eval) (cdr hook)))) | |
266 | (unless (or nomessage noninteractive) | |
267 | (if source | |
268 | (message "Loading %s (source)...done" file) | |
269 | (message "Loading %s...done" file))) | |
4ed46869 KH |
270 | t))) |
271 | ||
272 | ;; API (Application Program Interface) for charsets. | |
273 | ||
c1841772 KH |
274 | ;;; Charset property |
275 | ||
276 | (defun get-charset-property (charset propname) | |
277 | "Return the value of CHARSET's PROPNAME property. | |
278 | This is the last value stored with | |
279 | (put-charset-property CHARSET PROPNAME VALUE)." | |
280 | (plist-get (charset-plist charset) propname)) | |
281 | ||
282 | (defun put-charset-property (charset propname value) | |
283 | "Store CHARSETS's PROPNAME property with value VALUE. | |
284 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." | |
285 | (set-charset-plist charset | |
286 | (plist-put (charset-plist charset) propname value))) | |
287 | ||
288 | ||
289 | (defun charset-description (charset) | |
290 | "Return description string of CHARSET." | |
291 | (plist-get (charset-plist charset) :docstring)) | |
292 | ||
293 | (defun charset-dimension (charset) | |
294 | "Return dimension string of CHARSET." | |
295 | (plist-get (charset-plist charset) :dimension)) | |
296 | ||
297 | (defun charset-chars (charset) | |
298 | "Return character numbers contained in a dimension of CHARSET." | |
103cc921 | 299 | (let ((code-space (plist-get (charset-plist charset) :code-space))) |
c1841772 KH |
300 | (1+ (- (aref code-space 1) (aref code-space 0))))) |
301 | ||
302 | (defun charset-iso-final-char (charset) | |
303 | "Return final char of CHARSET." | |
304 | (or (plist-get (charset-plist charset) :iso-final-char) | |
305 | -1)) | |
4ed46869 KH |
306 | |
307 | (defmacro charset-short-name (charset) | |
c1841772 KH |
308 | "Return short name of CHARSET." |
309 | (plist-get (charset-plist charset) :short-name)) | |
4ed46869 KH |
310 | |
311 | (defmacro charset-long-name (charset) | |
c1841772 KH |
312 | "Return long name of CHARSET." |
313 | (plist-get (charset-plist charset) :long-name)) | |
4ed46869 | 314 | |
d3675a42 | 315 | (defun charset-list () |
900dc6e3 KH |
316 | "Return list of charsets ever defined. |
317 | ||
d3675a42 | 318 | This function is provided for backward compatibility. |
900dc6e3 | 319 | Now we have the variable `charset-list'." |
d3675a42 | 320 | charset-list) |
07513d64 | 321 | (make-obsolete 'charset-list "Use variable `charset-list'" "22.1") |
d3675a42 | 322 | |
c1841772 KH |
323 | (defun generic-char-p (char) |
324 | "Always return nil. This exists only for backward compatibility." | |
325 | nil) | |
07513d64 | 326 | (make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1") |
0269ddfb | 327 | \f |
e76938e7 | 328 | ;; Coding system stuff |
4ed46869 | 329 | |
c1841772 KH |
330 | ;; Coding system is a symbol that has been defined by the function |
331 | ;; `define-coding-system'. | |
c76b5c99 | 332 | |
c1841772 KH |
333 | (defconst coding-system-iso-2022-flags |
334 | '(long-form | |
335 | ascii-at-eol | |
336 | ascii-at-cntl | |
337 | 7-bit | |
338 | locking-shift | |
339 | single-shift | |
340 | designation | |
341 | revision | |
342 | direction | |
343 | init-at-bol | |
344 | designate-at-bol | |
345 | safe | |
346 | latin-extra | |
347 | composition | |
caa7db3a | 348 | euc-tw-shift) |
c1841772 | 349 | "List of symbols that control ISO-2022 encoder/decoder. |
0269ddfb | 350 | |
c1841772 | 351 | The value of `:flags' attribute in the argument of the function |
caa7db3a | 352 | `define-coding-system' must be one of them. |
4ed46869 | 353 | |
c1841772 KH |
354 | If `long-form' is specified, use a long designation sequence on |
355 | encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', | |
356 | and `japanese-jisx0208'. The long designation sequence doesn't | |
357 | conform to ISO 2022, but used by such a coding system as | |
358 | `compound-text'. | |
359 | ||
360 | If `ascii-at-eol' is specified, designate ASCII to g0 at end of line | |
361 | on encoding. | |
362 | ||
363 | If `ascii-at-cntl' is specified, designate ASCII to g0 before control | |
364 | codes and SPC on encoding. | |
365 | ||
366 | If `7-bit' is specified, use 7-bit code only on encoding. | |
367 | ||
368 | If `locking-shift' is specified, decode locking-shift code correctly | |
369 | on decoding, and use locking-shift to invoke a graphic element on | |
370 | encoding. | |
371 | ||
372 | If `single-shift' is specified, decode single-shift code correctly on | |
373 | decoding, and use single-shift to invoke a graphic element on encoding. | |
374 | ||
375 | If `designation' is specified, decode designation code correctly on | |
376 | decoding, and use designation to designate a charset to a graphic | |
377 | element on encoding. | |
378 | ||
379 | If `revision' is specified, produce an escape sequence to specify | |
380 | revision number of a charset on encoding. Such an escape sequence is | |
381 | always correctly decoded on decoding. | |
382 | ||
383 | If `direction' is specified, decode ISO6429's code for specifying | |
384 | direction correctly, and produced the code on encoding. | |
385 | ||
386 | If `init-at-bol' is specified, on encoding, it is assumed that | |
387 | invocation and designation statuses are reset at each beginning of | |
388 | line even if `ascii-at-eol' is not specified thus no code for | |
389 | resetting them are produced. | |
390 | ||
391 | If `safe' is specified, on encoding, characters not supported by a | |
392 | coding are replaced with `?'. | |
393 | ||
394 | If `latin-extra' is specified, code-detection routine assumes that a | |
395 | code specified in `latin-extra-code-table' (which see) is valid. | |
396 | ||
397 | If `composition' is specified, an escape sequence to specify | |
398 | composition sequence is correctly decode on decoding, and is produced | |
399 | on encoding. | |
400 | ||
401 | If `euc-tw-shift' is specified, the EUC-TW specific shifting code is | |
caa7db3a | 402 | correctly decoded on decoding, and is produced on encoding.") |
c1841772 KH |
403 | |
404 | (defun define-coding-system (name docstring &rest props) | |
405 | "Define NAME (symbol) as a coding system with DOCSTRING and attributes. | |
406 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
407 | may be any symbol. | |
408 | ||
409 | The following attributes have special meanings. If labeled as | |
410 | \"(required)\", it should not be omitted. | |
411 | ||
412 | `:mnemonic' (required) | |
413 | ||
414 | VALUE is a character to display on mode line for the coding system. | |
415 | ||
416 | `:coding-type' (required) | |
417 | ||
418 | VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', | |
5170b329 | 419 | `emacs-mule', `shift-jis', `big5', `ccl', `raw-text', `undecided'. |
c1841772 KH |
420 | |
421 | `:eol-type' (optional) | |
422 | ||
423 | VALUE is an EOL (end-of-line) format of the coding system. It must be | |
424 | one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL | |
425 | \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), | |
426 | and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on | |
427 | decoding by the coding system, Emacs automatically detects an EOL | |
428 | format of the source text. | |
429 | ||
430 | `:charset-list' (required) | |
431 | ||
432 | VALUE must be a list of charsets supported by the coding system. On | |
433 | encoding by the coding system, if a character belongs to multiple | |
434 | charsets in the list, a charset that comes earlier in the list is | |
435 | selected. | |
436 | ||
437 | `:ascii-compatible-p' (optional) | |
438 | ||
439 | If VALUE is non-nil, the coding system decodes all 7-bit bytes into | |
07513d64 DL |
440 | the corresponding ASCII characters, and encodes all ASCII characters |
441 | back to the corresponding 7-bit bytes. If omitted, the VALUE defaults | |
c1841772 KH |
442 | to nil. |
443 | ||
444 | `:decode-translation-table' (optional) | |
445 | ||
446 | VALUE must be a translation table to use on decoding. | |
447 | ||
448 | `:encode-translation-table' (optional) | |
449 | ||
450 | VALUE must be a translation table to use on encoding. | |
451 | ||
452 | `:post-read-conversion' (optional) | |
453 | ||
454 | VALUE must be a function to call after some text is inserted and | |
455 | decoded by the coding system itself and before any functions in | |
456 | `after-insert-functions' are called. The arguments to this function | |
457 | is the same as those of a function in `after-insert-functions', | |
458 | i.e. LENGTH of a text while putting point at the head of the text to | |
459 | be decoded | |
460 | ||
461 | `:pre-write-conversion' | |
462 | ||
463 | VALUE must be a function to call after all functions in | |
464 | `write-region-annotate-functions' and `buffer-file-format' are called, | |
465 | and before the text is encoded by the coding system itself. The | |
466 | arguments to this function is the same as those of a function in | |
467 | `write-region-annotate-functions', i.e. FROM and TO specifying region | |
468 | of a text. | |
469 | ||
470 | `:default-char' | |
471 | ||
472 | VALUE must be a character. On encoding, a character not supported by | |
473 | the coding system is replaced with VALUE. | |
474 | ||
475 | `:eol-type' | |
476 | ||
477 | VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like | |
478 | EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like | |
479 | EOL (CR). If omitted, on decoding, the coding system detect EOL | |
480 | format automatically, and on encoding, used Unix-like EOL. | |
481 | ||
482 | `:mime-charset' | |
483 | ||
484 | VALUE must be a symbol who has MIME-charset name. | |
485 | ||
486 | `:flags' | |
487 | ||
488 | VALUE must be a list of symbols that control ISO-2022 converter. Each | |
489 | symbol must be a member of the variable `coding-system-iso-2022-flags' | |
490 | \(which see). This attribute has a meaning only when `:coding-type' | |
491 | is `iso-2022'. | |
492 | ||
493 | `:designation' | |
494 | ||
495 | VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. | |
496 | GN-USAGE specifies the usage of graphic register GN as follows. | |
497 | ||
498 | If it is nil, no charset can be designated to GN. | |
499 | ||
07513d64 | 500 | If it is a charset, the charset is initially designated to GN, and |
c1841772 KH |
501 | never used by the other charsets. |
502 | ||
503 | If it is a list, the elements must be charsets, nil, 94, or 96. GN | |
504 | can be used by all listed charsets. If the list contains 94, any | |
07513d64 DL |
505 | charsets whose iso-chars is 94 can be designated to GN. If the list |
506 | contains 96, any charsets whose iso-chars is 96 can be designated to | |
c1841772 | 507 | GN. If the first element is a charset, the charset is initially |
07513d64 | 508 | designated to GN. |
c1841772 KH |
509 | |
510 | This attribute has a meaning only when `:coding-type' is `iso-2022'. | |
511 | ||
512 | `:bom' | |
513 | ||
514 | VALUE must nil, t, or cons of coding systems whose `:coding-type' is | |
515 | `utf-16'. | |
516 | ||
517 | This attribute has a meaning only when `:coding-type' is `utf-16'. | |
518 | ||
519 | `:endian' | |
520 | ||
521 | VALUE must be t or nil. See the above description for the detail. | |
522 | ||
523 | This attribute has a meaning only when `:coding-type' is `utf-16'. | |
524 | ||
525 | `:ccl-decoder' | |
526 | ||
527 | This attribute has a meaning only when `:coding-type' is `ccl'. | |
528 | ||
529 | `:ccl-encoder' | |
530 | ||
531 | This attribute has a meaning only when `:coding-type' is `ccl'." | |
532 | (let* ((common-attrs (mapcar 'list | |
533 | '(:mnemonic | |
534 | :coding-type | |
535 | :charset-list | |
536 | :ascii-compatible-p | |
537 | :docode-translation-table | |
538 | :encode-translation-table | |
539 | :post-read-conversion | |
540 | :pre-write-conversion | |
541 | :default-char | |
542 | :plist | |
543 | :eol-type))) | |
544 | (coding-type (plist-get props :coding-type)) | |
545 | (spec-attrs (mapcar 'list | |
546 | (cond ((eq coding-type 'iso-2022) | |
547 | '(:initial | |
548 | :reg-usage | |
549 | :request | |
550 | :flags)) | |
551 | ((eq coding-type 'utf-16) | |
552 | '(:bom | |
553 | :endian)) | |
554 | ((eq coding-type 'ccl) | |
555 | '(:ccl-decoder | |
556 | :ccl-encoder | |
557 | :valids)))))) | |
558 | ||
559 | (dolist (slot common-attrs) | |
560 | (setcdr slot (plist-get props (car slot)))) | |
561 | ||
562 | (dolist (slot spec-attrs) | |
563 | (setcdr slot (plist-get props (car slot)))) | |
564 | ||
565 | (if (eq coding-type 'iso-2022) | |
566 | (let ((designation (plist-get props :designation)) | |
567 | (flags (plist-get props :flags)) | |
568 | (initial (make-vector 4 nil)) | |
569 | (reg-usage (cons 4 4)) | |
570 | request elt) | |
571 | (dotimes (i 4) | |
572 | (setq elt (aref designation i)) | |
573 | (cond ((charsetp elt) | |
574 | (aset initial i elt) | |
575 | (setq request (cons (cons elt i) request))) | |
576 | ((consp elt) | |
577 | (aset initial i (car elt)) | |
578 | (if (charsetp (car elt)) | |
579 | (setq request (cons (cons (car elt) i) request))) | |
580 | (dolist (e (cdr elt)) | |
581 | (cond ((charsetp e) | |
582 | (setq request (cons (cons e i) request))) | |
583 | ((eq e 94) | |
584 | (setcar reg-usage i)) | |
585 | ((eq e 96) | |
586 | (setcdr reg-usage i)) | |
587 | ((eq e t) | |
588 | (setcar reg-usage i) | |
589 | (setcdr reg-usage i))))))) | |
590 | (setcdr (assq :initial spec-attrs) initial) | |
591 | (setcdr (assq :reg-usage spec-attrs) reg-usage) | |
592 | (setcdr (assq :request spec-attrs) request) | |
593 | ||
594 | ;; Change :flags value from a list to a bit-mask. | |
595 | (let ((bits 0) | |
596 | (i 0)) | |
597 | (dolist (elt coding-system-iso-2022-flags) | |
598 | (if (memq elt flags) | |
599 | (setq bits (logior bits (lsh 1 i)))) | |
600 | (setq i (1+ i))) | |
601 | (setcdr (assq :flags spec-attrs) bits)))) | |
602 | ||
603 | ;; Add :name and :docstring properties to PROPS. | |
604 | (setq props | |
e1e529fa DL |
605 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) |
606 | props))))) | |
c1841772 KH |
607 | (setcdr (assq :plist common-attrs) props) |
608 | ||
609 | (apply 'define-coding-system-internal | |
610 | name (mapcar 'cdr (append common-attrs spec-attrs))))) | |
611 | ||
612 | (defun coding-system-doc-string (coding-system) | |
613 | "Return the documentation string for CODING-SYSTEM." | |
614 | (plist-get (coding-system-plist coding-system) :docstring)) | |
4ed46869 | 615 | |
4ed46869 | 616 | (defun coding-system-mnemonic (coding-system) |
0269ddfb | 617 | "Return the mnemonic character of CODING-SYSTEM. |
6e2c8840 KH |
618 | The mnemonic character of a coding system is used in mode line |
619 | to indicate the coding system. If the arg is nil, return ?-." | |
c1841772 | 620 | (plist-get (coding-system-plist coding-system) :mnemonic)) |
4ed46869 | 621 | |
c1841772 KH |
622 | (defun coding-system-type (coding-system) |
623 | "Return the coding type of CODING-SYSTEM. | |
624 | A coding type is a symbol indicating the encoding method of CODING-SYSTEM. | |
625 | See the function `define-coding-system' for more detail." | |
626 | (plist-get (coding-system-plist coding-system) :coding-type)) | |
d3675a42 | 627 | |
c1841772 | 628 | (defun coding-system-charset-list (coding-system) |
07513d64 | 629 | "Return list of charsets supported by CODING-SYSTEM. |
c1841772 KH |
630 | If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. |
631 | If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." | |
632 | (plist-get (coding-system-plist coding-system) :charset-list)) | |
0269ddfb KH |
633 | |
634 | (defun coding-system-get (coding-system prop) | |
07513d64 DL |
635 | "Extract a value from CODING-SYSTEM's property list for property PROP. |
636 | For compatibility with Emacs 20/21, this accepts old-style symbols | |
637 | like `mime-charset' as well as the current style like `:mime-charset'." | |
638 | (or (plist-get (coding-system-plist coding-system) prop) | |
639 | (if (not (keywordp prop)) | |
640 | (plist-get (coding-system-plist coding-system) | |
641 | (intern (concat ":" (symbol-name prop))))))) | |
0269ddfb KH |
642 | |
643 | (defun coding-system-put (coding-system prop val) | |
644 | "Change value in CODING-SYSTEM's property list PROP to VAL." | |
c1841772 | 645 | (plist-put (coding-system-plist coding-system) prop val)) |
0269ddfb KH |
646 | |
647 | (defalias 'coding-system-parent 'coding-system-base) | |
2598a293 | 648 | (make-obsolete 'coding-system-parent 'coding-system-base "20.3") |
0269ddfb KH |
649 | |
650 | ;; Coding system also has a property `eol-type'. | |
651 | ;; | |
652 | ;; This property indicates how the coding system handles end-of-line | |
653 | ;; format. The value is integer 0, 1, 2, or a vector of three coding | |
654 | ;; systems. Each integer value 0, 1, and 2 indicates the format of | |
655 | ;; end-of-line LF, CRLF, and CR respectively. A vector value | |
656 | ;; indicates that the format of end-of-line should be detected | |
657 | ;; automatically. Nth element of the vector is the subsidiary coding | |
658 | ;; system whose `eol-type' property is N. | |
4ed46869 | 659 | |
857ea15c AS |
660 | (defun coding-system-lessp (x y) |
661 | (cond ((eq x 'no-conversion) t) | |
662 | ((eq y 'no-conversion) nil) | |
663 | ((eq x 'emacs-mule) t) | |
664 | ((eq y 'emacs-mule) nil) | |
665 | ((eq x 'undecided) t) | |
666 | ((eq y 'undecided) nil) | |
667 | (t (let ((c1 (coding-system-mnemonic x)) | |
668 | (c2 (coding-system-mnemonic y))) | |
669 | (or (< (downcase c1) (downcase c2)) | |
670 | (and (not (> (downcase c1) (downcase c2))) | |
671 | (< c1 c2))))))) | |
672 | ||
857ea15c | 673 | (defun add-to-coding-system-list (coding-system) |
521d4010 | 674 | "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." |
857ea15c AS |
675 | (if (or (null coding-system-list) |
676 | (coding-system-lessp coding-system (car coding-system-list))) | |
677 | (setq coding-system-list (cons coding-system coding-system-list)) | |
678 | (let ((len (length coding-system-list)) | |
679 | mid (tem coding-system-list)) | |
680 | (while (> len 1) | |
681 | (setq mid (nthcdr (/ len 2) tem)) | |
682 | (if (coding-system-lessp (car mid) coding-system) | |
683 | (setq tem mid | |
684 | len (- len (/ len 2))) | |
685 | (setq len (/ len 2)))) | |
686 | (setcdr tem (cons coding-system (cdr tem)))))) | |
687 | ||
80a7463d | 688 | (defun coding-system-list (&optional base-only) |
c11a8f77 KH |
689 | "Return a list of all existing non-subsidiary coding systems. |
690 | If optional arg BASE-ONLY is non-nil, only base coding systems are listed. | |
691 | The value doesn't include subsidiary coding systems which are what | |
692 | made from bases and aliases automatically for various end-of-line | |
693 | formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
80a7463d KH |
694 | (let* ((codings (copy-sequence coding-system-list)) |
695 | (tail (cons nil codings))) | |
696 | ;; Remove subsidiary coding systems (eol variants) and alias | |
697 | ;; coding systems (if necessary). | |
698 | (while (cdr tail) | |
699 | (let* ((coding (car (cdr tail))) | |
700 | (aliases (coding-system-get coding 'alias-coding-systems))) | |
701 | (if (or | |
702 | ;; CODING is an eol variant if not in ALIASES. | |
703 | (not (memq coding aliases)) | |
704 | ;; CODING is an alias if it is not car of ALIASES. | |
705 | (and base-only (not (eq coding (car aliases))))) | |
706 | (setcdr tail (cdr (cdr tail))) | |
707 | (setq tail (cdr tail))))) | |
708 | codings)) | |
709 | ||
4ed46869 | 710 | (defun set-buffer-file-coding-system (coding-system &optional force) |
358d28fb RS |
711 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
712 | This means that when you save the buffer, it will be converted | |
713 | according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM, | |
714 | use \\[list-coding-systems]. | |
715 | ||
716 | If the buffer's previous file coding-system value specifies end-of-line | |
717 | conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is | |
718 | merged with the already-specified end-of-line conversion. | |
b839fdcc KH |
719 | |
720 | If the buffer's previous file coding-system value specifies text | |
721 | conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is | |
722 | merged with the already-specified text conversion. | |
723 | ||
724 | However, if the optional prefix argument FORCE is non-nil, then | |
725 | CODING-SYSTEM is used exactly as specified. | |
aeef8f07 KH |
726 | |
727 | This marks the buffer modified so that the succeeding \\[save-buffer] | |
728 | surely saves the buffer with CODING-SYSTEM. From a program, if you | |
729 | don't want to mark the buffer modified, just set the variable | |
730 | `buffer-file-coding-system' directly." | |
d9e3229d | 731 | (interactive "zCoding system for visited file (default, nil): \nP") |
4ed46869 | 732 | (check-coding-system coding-system) |
36d455c4 | 733 | (if (and coding-system buffer-file-coding-system (null force)) |
8dd735c1 KH |
734 | (let ((base (coding-system-base buffer-file-coding-system)) |
735 | (eol (coding-system-eol-type buffer-file-coding-system))) | |
736 | ;; If CODING-SYSTEM doesn't specify text conversion, merge | |
737 | ;; with that of buffer-file-coding-system. | |
738 | (if (eq (coding-system-base coding-system) 'undecided) | |
739 | (setq coding-system (coding-system-change-text-conversion | |
740 | coding-system base))) | |
741 | ;; If CODING-SYSTEM doesn't specify eol conversion, merge with | |
742 | ;; that of buffer-file-coding-system. | |
743 | (if (and (vectorp (coding-system-eol-type coding-system)) | |
744 | (numberp eol) (>= eol 0) (<= eol 2)) | |
745 | (setq coding-system (coding-system-change-eol-conversion | |
746 | coding-system eol))))) | |
4ed46869 KH |
747 | (setq buffer-file-coding-system coding-system) |
748 | (set-buffer-modified-p t) | |
749 | (force-mode-line-update)) | |
750 | ||
358d28fb RS |
751 | (defvar default-terminal-coding-system nil |
752 | "Default value for the terminal coding system. | |
753 | This is normally set according to the selected language environment. | |
754 | See also the command `set-terminal-coding-system'.") | |
755 | ||
df100398 KH |
756 | (defun set-terminal-coding-system (coding-system) |
757 | "Set coding system of your terminal to CODING-SYSTEM. | |
358d28fb RS |
758 | All text output to the terminal will be encoded |
759 | with the specified coding system. | |
760 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
761 | The default is determined by the selected language environment | |
762 | or by the previous use of this command." | |
763 | (interactive | |
2e02a76f RS |
764 | (list (let ((default (if (and (not (terminal-coding-system)) |
765 | default-terminal-coding-system) | |
766 | default-terminal-coding-system))) | |
767 | (read-coding-system | |
768 | (format "Coding system for terminal display (default, %s): " | |
769 | default) | |
770 | default)))) | |
358d28fb RS |
771 | (if (and (not coding-system) |
772 | (not (terminal-coding-system))) | |
773 | (setq coding-system default-terminal-coding-system)) | |
774 | (if coding-system | |
521d4010 | 775 | (setq default-terminal-coding-system coding-system)) |
df100398 KH |
776 | (set-terminal-coding-system-internal coding-system) |
777 | (redraw-frame (selected-frame))) | |
778 | ||
358d28fb RS |
779 | (defvar default-keyboard-coding-system nil |
780 | "Default value of the keyboard coding system. | |
781 | This is normally set according to the selected language environment. | |
782 | See also the command `set-keyboard-coding-system'.") | |
783 | ||
df100398 | 784 | (defun set-keyboard-coding-system (coding-system) |
358d28fb RS |
785 | "Set coding system for keyboard input to CODING-SYSTEM. |
786 | In addition, this command enables Encoded-kbd minor mode. | |
6d34f495 DL |
787 | \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see |
788 | `encoded-kbd-mode'.) | |
358d28fb RS |
789 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
790 | The default is determined by the selected language environment | |
791 | or by the previous use of this command." | |
792 | (interactive | |
2e02a76f RS |
793 | (list (let ((default (if (and (not (keyboard-coding-system)) |
794 | default-keyboard-coding-system) | |
795 | default-keyboard-coding-system))) | |
796 | (read-coding-system | |
797 | (format "Coding system for keyboard input (default, %s): " | |
798 | default) | |
799 | default)))) | |
358d28fb RS |
800 | (if (and (not coding-system) |
801 | (not (keyboard-coding-system))) | |
802 | (setq coding-system default-keyboard-coding-system)) | |
803 | (if coding-system | |
804 | (setq default-keyboard-coding-system coding-system)) | |
df100398 | 805 | (set-keyboard-coding-system-internal coding-system) |
b23bad0b | 806 | (setq keyboard-coding-system coding-system) |
df100398 KH |
807 | (encoded-kbd-mode (if coding-system 1 0))) |
808 | ||
6d34f495 DL |
809 | (defcustom keyboard-coding-system nil |
810 | "Specify coding system for keyboard input. | |
811 | If you set this on a terminal which can't distinguish Meta keys from | |
812 | 8-bit characters, you will have to use ESC to type Meta characters. | |
813 | See Info node `Specify Coding' and Info node `Single-Byte Character Support'. | |
814 | ||
815 | Setting this variable directly does not take effect; | |
816 | use either M-x customize or \\[set-keyboard-coding-system]." | |
817 | :type '(coding-system :tag "Coding system") | |
818 | :link '(info-link "(emacs)Specify Coding") | |
819 | :link '(info-link "(emacs)Single-Byte Character Support") | |
820 | :set (lambda (symbol value) | |
821 | ;; Don't load encoded-kbd-mode unnecessarily. | |
822 | (if (or value (boundp 'encoded-kbd-mode)) | |
823 | (set-keyboard-coding-system value) | |
824 | (set-default 'keyboard-coding-system nil))) ; must initialize | |
825 | :version "21.1" | |
826 | :group 'keyboard | |
827 | :group 'mule) | |
828 | ||
df100398 | 829 | (defun set-buffer-process-coding-system (decoding encoding) |
358d28fb | 830 | "Set coding systems for the process associated with the current buffer. |
df100398 | 831 | DECODING is the coding system to be used to decode input from the process, |
358d28fb RS |
832 | ENCODING is the coding system to be used to encode output to the process. |
833 | ||
834 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]." | |
4ed46869 | 835 | (interactive |
83911021 | 836 | "zCoding-system for output from the process: \nzCoding-system for input to the process: ") |
4ed46869 KH |
837 | (let ((proc (get-buffer-process (current-buffer)))) |
838 | (if (null proc) | |
521d4010 | 839 | (error "No process") |
df100398 KH |
840 | (check-coding-system decoding) |
841 | (check-coding-system encoding) | |
842 | (set-process-coding-system proc decoding encoding))) | |
4ed46869 KH |
843 | (force-mode-line-update)) |
844 | ||
d0b99881 RS |
845 | (defalias 'set-clipboard-coding-system 'set-selection-coding-system) |
846 | ||
14915c37 | 847 | (defun set-selection-coding-system (coding-system) |
b25eef20 KH |
848 | "Make CODING-SYSTEM used for communicating with other X clients . |
849 | When sending or receiving text via cut_buffer, selection, and clipboard, | |
850 | the text is encoded or decoded by CODING-SYSTEM." | |
a03b3ce1 | 851 | (interactive "zCoding system for X selection: ") |
b25eef20 | 852 | (check-coding-system coding-system) |
14915c37 | 853 | (setq selection-coding-system coding-system)) |
b25eef20 | 854 | |
e8dd0160 | 855 | ;; Coding system lastly specified by the command |
a03b3ce1 KH |
856 | ;; set-next-selection-coding-system. |
857 | (defvar last-next-selection-coding-system nil) | |
858 | ||
859 | (defun set-next-selection-coding-system (coding-system) | |
860 | "Make CODING-SYSTEM used for the next communication with other X clients. | |
861 | This setting is effective for the next communication only." | |
862 | (interactive | |
863 | (list (read-coding-system | |
864 | (if last-next-selection-coding-system | |
865 | (format "Coding system for the next X selection (default, %S): " | |
866 | last-next-selection-coding-system) | |
867 | "Coding system for the next X selection: ") | |
868 | last-next-selection-coding-system))) | |
869 | (if coding-system | |
870 | (setq last-next-selection-coding-system coding-system) | |
871 | (setq coding-system last-next-selection-coding-system)) | |
872 | (check-coding-system coding-system) | |
873 | ||
874 | (setq next-selection-coding-system coding-system)) | |
875 | ||
4ed46869 | 876 | (defun set-coding-priority (arg) |
521d4010 | 877 | "Set priority of coding categories according to ARG. |
c1841772 KH |
878 | ARG is a list of coding categories ordered by priority. |
879 | ||
880 | This function is provided for backward compatibility. | |
881 | Now we have more convenient function `set-coding-system-priority'." | |
d9e3229d KH |
882 | (let ((l arg) |
883 | (current-list (copy-sequence coding-category-list))) | |
e8dd0160 | 884 | ;; Check the validity of ARG while deleting coding categories in |
d9e3229d KH |
885 | ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST |
886 | ;; contains all coding categories. | |
887 | (while l | |
888 | (if (or (null (get (car l) 'coding-category-index)) | |
889 | (null (memq (car l) current-list))) | |
890 | (error "Invalid or duplicated element in argument: %s" arg)) | |
891 | (setq current-list (delq (car l) current-list)) | |
892 | (setq l (cdr l))) | |
4ed46869 | 893 | ;; Update `coding-category-list' and return it. |
2feaf204 | 894 | (setq coding-category-list (append arg current-list)) |
07513d64 | 895 | ;; Fixme: not defined. |
2feaf204 | 896 | (set-coding-priority-internal))) |
07513d64 | 897 | (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1") |
4ed46869 | 898 | |
835cbadb EZ |
899 | ;;; X selections |
900 | ||
901 | (defvar non-standard-icccm-encodings-alist | |
902 | '(("ISO8859-15" . latin-iso8859-15) | |
903 | ("ISO8859-14" . latin-iso8859-14) | |
904 | ("KOI8-R" . koi8-r) | |
905 | ("BIG5-0" . big5)) | |
906 | "Alist of font charset names defined by XLFD, and the corresponding Emacs | |
907 | charsets or coding systems.") | |
908 | ||
909 | ;; Functions to support "Non-Standard Character Set Encodings" defined | |
910 | ;; by the ICCCM spec. We support that by converting the leading | |
911 | ;; sequence of the ``extended segment'' to the corresponding ISO-2022 | |
912 | ;; sequences (if the leading sequence names an Emacs charset), or decode | |
913 | ;; the segment (if it names a coding system). Encoding does the reverse. | |
914 | (defun ctext-post-read-conversion (len) | |
915 | "Decode LEN characters encoded as Compound Text with Extended Segments." | |
916 | (buffer-disable-undo) ; minimize consing due to insertions and deletions | |
917 | (narrow-to-region (point) (+ (point) len)) | |
918 | (save-match-data | |
919 | (let ((pt (point-marker)) | |
920 | (oldpt (point-marker)) | |
921 | (newpt (make-marker)) | |
922 | (modified-p (buffer-modified-p)) | |
923 | (case-fold-search nil) | |
924 | last-coding-system-used | |
925 | encoding textlen chset) | |
926 | (while (re-search-forward | |
927 | "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" | |
928 | nil 'move) | |
929 | (set-marker newpt (point)) | |
930 | (set-marker pt (match-beginning 0)) | |
931 | (setq encoding (match-string 3)) | |
932 | (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) | |
933 | (- (aref (match-string 2) 1) 128)) | |
934 | (1+ (length encoding)))) | |
935 | (setq | |
936 | chset (cdr (assoc-ignore-case encoding | |
937 | non-standard-icccm-encodings-alist))) | |
938 | (cond ((null chset) | |
939 | ;; This charset is not supported--leave this extended | |
940 | ;; segment unaltered and skip over it. | |
941 | (goto-char (+ (point) textlen))) | |
942 | ((charsetp chset) | |
943 | ;; If it's a charset, replace the leading escape sequence | |
944 | ;; with a standard ISO-2022 sequence. We will decode all | |
945 | ;; such segments later, in one go, when we exit the loop | |
946 | ;; or find an extended segment that names a coding | |
947 | ;; system, not a charset. | |
948 | (replace-match | |
949 | (concat "\\1" | |
950 | (if (= 0 (charset-iso-graphic-plane chset)) | |
951 | ;; GL charsets | |
952 | (if (= 1 (charset-dimension chset)) "(" "$(") | |
953 | ;; GR charsets | |
954 | (if (= 96 (charset-chars chset)) | |
955 | "-" | |
956 | (if (= 1 (charset-dimension chset)) ")" "$)"))) | |
957 | (string (charset-iso-final-char chset))) | |
958 | t) | |
959 | (goto-char (+ (point) textlen))) | |
960 | ((coding-system-p chset) | |
961 | ;; If it's a coding system, we need to decode the segment | |
962 | ;; right away. But first, decode what we've skipped | |
963 | ;; across until now. | |
964 | (when (> pt oldpt) | |
965 | (decode-coding-region oldpt pt 'ctext-no-compositions)) | |
966 | (delete-region pt newpt) | |
967 | (set-marker newpt (+ newpt textlen)) | |
968 | (decode-coding-region pt newpt chset) | |
969 | (goto-char newpt) | |
970 | (set-marker oldpt newpt)))) | |
971 | ;; Decode what's left. | |
972 | (when (> (point) oldpt) | |
973 | (decode-coding-region oldpt (point) 'ctext-no-compositions)) | |
974 | ;; This buffer started as unibyte, because the string we get from | |
975 | ;; the X selection is a unibyte string. We must now make it | |
976 | ;; multibyte, so that the decoded text is inserted as multibyte | |
977 | ;; into its buffer. | |
978 | (set-buffer-multibyte t) | |
979 | (set-buffer-modified-p modified-p) | |
980 | (- (point-max) (point-min))))) | |
981 | ||
982 | (defvar non-standard-designations-alist | |
983 | '(("$(0" . (big5 "big5-0" 2)) | |
984 | ("$(1" . (big5 "big5-0" 2)) | |
985 | ("-V" . (t "iso8859-10" 1)) | |
986 | ("-Y" . (t "iso8859-13" 1)) | |
987 | ("-_" . (t "iso8859-14" 1)) | |
988 | ("-b" . (t "iso8859-15" 1)) | |
989 | ("-f" . (t "iso8859-16" 1))) | |
990 | "Alist of ctext control sequences that introduce character sets which | |
991 | are not in the list of approved ICCCM encodings, and the corresponding | |
992 | coding system, identifier string, and number of octets per encoded | |
993 | character. | |
994 | ||
995 | Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ | |
996 | is the control sequence (sans the leading ESC) that introduces the character | |
997 | set in the text encoded by compound-text. ENCODING is a coding system | |
998 | symbol; if it is t, it means that the ctext coding system already encodes | |
999 | the text correctly, and only the leading control sequence needs to be altered. | |
1000 | If ENCODING is a coding system, we need to re-encode the text with that | |
1001 | coding system. CHARSET is the ICCCM name of the charset we need to put into | |
1002 | the leading control sequence. NOCTETS is the number of octets (bytes) that | |
1003 | encode each character in this charset. NOCTETS can be 0 (meaning the number | |
1004 | of octets per character is variable), 1, 2, 3, or 4.") | |
1005 | ||
1006 | (defun ctext-pre-write-conversion (from to) | |
5dde3c71 EZ |
1007 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
1008 | ||
1009 | If FROM is a string, or if the current buffer is not the one set up for us | |
1010 | by run_pre_post_conversion_on_str, generate a new temp buffer, insert the | |
1011 | text, and convert it in the temporary buffer. Otherwise, convert in-place." | |
1012 | (cond ((and (string= (buffer-name) " *code-converting-work*") | |
1013 | (not (stringp from))) | |
1014 | ; Minimize consing due to subsequent insertions and deletions. | |
1015 | (buffer-disable-undo) | |
1016 | (narrow-to-region from to)) | |
1017 | (t | |
1018 | (let ((buf (current-buffer))) | |
1019 | (set-buffer (generate-new-buffer " *temp")) | |
1020 | (buffer-disable-undo) | |
1021 | (if (stringp from) | |
1022 | (insert from) | |
1023 | (insert-buffer-substring buf from to))))) | |
835cbadb EZ |
1024 | (encode-coding-region from to 'ctext-no-compositions) |
1025 | ;; Replace ISO-2022 charset designations with extended segments, for | |
1026 | ;; those charsets that are not part of the official X registry. | |
1027 | (save-match-data | |
1028 | (goto-char (point-min)) | |
1029 | (let ((newpt (make-marker)) | |
1030 | (case-fold-search nil) | |
1031 | pt desig encode-info encoding chset noctets textlen) | |
1032 | (set-buffer-multibyte nil) | |
5dde3c71 EZ |
1033 | ;; The regexp below finds the leading sequences for big5 and |
1034 | ;; iso8859-1[03-6] charsets. | |
835cbadb EZ |
1035 | (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move) |
1036 | (setq desig (match-string 1) | |
1037 | pt (point-marker) | |
1038 | encode-info (cdr (assoc desig non-standard-designations-alist)) | |
1039 | encoding (car encode-info) | |
1040 | chset (cadr encode-info) | |
1041 | noctets (car (cddr encode-info))) | |
1042 | (skip-chars-forward "^\e") | |
1043 | (set-marker newpt (point)) | |
1044 | (cond | |
1045 | ((eq encoding t) ; only the leading sequence needs to be changed | |
1046 | (setq textlen (+ (- newpt pt) (length chset) 1)) | |
5dde3c71 | 1047 | ;; Generate the ICCCM control sequence for an extended segment. |
835cbadb EZ |
1048 | (replace-match (format "\e%%/%d%c%c%s\ 2" |
1049 | noctets | |
1050 | (+ (/ textlen 128) 128) | |
1051 | (+ (% textlen 128) 128) | |
1052 | chset) | |
1053 | t t)) | |
1054 | ((coding-system-p encoding) ; need to recode the entire segment... | |
1055 | (set-marker pt (match-beginning 0)) | |
1056 | (decode-coding-region pt newpt 'ctext-no-compositions) | |
1057 | (set-buffer-multibyte t) | |
1058 | (encode-coding-region pt newpt encoding) | |
1059 | (set-buffer-multibyte nil) | |
1060 | (setq textlen (+ (- newpt pt) (length chset) 1)) | |
1061 | (goto-char pt) | |
1062 | (insert (format "\e%%/%d%c%c%s\ 2" | |
1063 | noctets | |
1064 | (+ (/ textlen 128) 128) | |
1065 | (+ (% textlen 128) 128) | |
1066 | chset)))) | |
1067 | (goto-char newpt)))) | |
1068 | (set-buffer-multibyte t) | |
5dde3c71 | 1069 | ;; Must return nil, as build_annotations_2 expects that. |
835cbadb EZ |
1070 | nil) |
1071 | ||
c1841772 KH |
1072 | (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0") |
1073 | ||
4ed46869 KH |
1074 | ;;; FILE I/O |
1075 | ||
e76938e7 | 1076 | (defcustom auto-coding-alist |
0735296c | 1077 | '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion) |
4f16d1d1 | 1078 | ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)) |
835f49b8 KH |
1079 | "Alist of filename patterns vs corresponding coding systems. |
1080 | Each element looks like (REGEXP . CODING-SYSTEM). | |
558b0c86 | 1081 | A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. |
835f49b8 | 1082 | |
7fed493a RS |
1083 | The settings in this alist take priority over `coding:' tags |
1084 | in the file (see the function `set-auto-coding') | |
e76938e7 DL |
1085 | and the contents of `file-coding-system-alist'." |
1086 | :group 'files | |
1087 | :group 'mule | |
1088 | :type '(repeat (cons (regexp :tag "File name regexp") | |
1089 | (symbol :tag "Coding system")))) | |
835f49b8 | 1090 | |
502522b2 GM |
1091 | (defcustom auto-coding-regexp-alist |
1092 | '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) | |
1093 | "Alist of patterns vs corresponding coding systems. | |
1094 | Each element looks like (REGEXP . CODING-SYSTEM). | |
1095 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1096 | ||
1097 | The settings in this alist take priority over `coding:' tags | |
1098 | in the file (see the function `set-auto-coding') | |
1099 | and the contents of `file-coding-system-alist'." | |
1100 | :group 'files | |
1101 | :group 'mule | |
1102 | :type '(repeat (cons (regexp :tag "Regexp") | |
1103 | (symbol :tag "Coding system")))) | |
1104 | ||
1c4cc63a KH |
1105 | (defvar set-auto-coding-for-load nil |
1106 | "Non-nil means look for `load-coding' property instead of `coding'. | |
1107 | This is used for loading and byte-compiling Emacs Lisp files.") | |
1108 | ||
8a592131 RS |
1109 | (defun auto-coding-alist-lookup (filename) |
1110 | "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1111 | (let ((alist auto-coding-alist) | |
ca128d75 | 1112 | (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos))) |
8a592131 RS |
1113 | coding-system) |
1114 | (while (and alist (not coding-system)) | |
1115 | (if (string-match (car (car alist)) filename) | |
1116 | (setq coding-system (cdr (car alist))) | |
1117 | (setq alist (cdr alist)))) | |
1118 | coding-system)) | |
1119 | ||
502522b2 GM |
1120 | |
1121 | (defun auto-coding-from-file-contents (size) | |
1122 | "Determine a coding system from the contents of the current buffer. | |
1123 | The current buffer contains SIZE bytes starting at point. | |
1124 | Value is either a coding system or nil." | |
1125 | (save-excursion | |
1126 | (let ((alist auto-coding-regexp-alist) | |
1127 | coding-system) | |
1128 | (while (and alist (not coding-system)) | |
1129 | (let ((regexp (car (car alist)))) | |
1130 | (when (re-search-forward regexp (+ (point) size) t) | |
1131 | (setq coding-system (cdr (car alist))))) | |
1132 | (setq alist (cdr alist))) | |
1133 | coding-system))) | |
1134 | ||
1135 | ||
835f49b8 KH |
1136 | (defun set-auto-coding (filename size) |
1137 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1c4cc63a KH |
1138 | These bytes should include at least the first 1k of the file |
1139 | and the last 3k of the file, but the middle may be omitted. | |
63561304 | 1140 | |
502522b2 GM |
1141 | It checks FILENAME against the variable `auto-coding-alist'. If |
1142 | FILENAME doesn't match any entries in the variable, it checks the | |
1143 | contents of the current buffer following point against | |
1144 | `auto-coding-regexp-alist'. If no match is found, it checks for a | |
1145 | `coding:' tag in the first one or two lines following point. If no | |
1146 | `coding:' tag is found, it checks for local variables list in the last | |
1147 | 3K bytes out of the SIZE bytes. | |
63561304 KH |
1148 | |
1149 | The return value is the specified coding system, | |
1150 | or nil if nothing specified. | |
87aba788 | 1151 | |
ba74e833 | 1152 | The variable `set-auto-coding-function' (which see) is set to this |
87aba788 | 1153 | function by default." |
502522b2 GM |
1154 | (or (auto-coding-alist-lookup filename) |
1155 | (auto-coding-from-file-contents size) | |
1156 | (let* ((case-fold-search t) | |
1157 | (head-start (point)) | |
1158 | (head-end (+ head-start (min size 1024))) | |
1159 | (tail-start (+ head-start (max (- size 3072) 0))) | |
1160 | (tail-end (+ head-start size)) | |
1161 | coding-system head-found tail-found pos) | |
1162 | ;; Try a short cut by searching for the string "coding:" | |
1163 | ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1164 | (setq head-found (or (search-forward "coding:" head-end t) | |
1165 | (search-forward "unibyte:" head-end t))) | |
1166 | (if (and head-found (> head-found tail-start)) | |
1167 | ;; Head and tail are overlapped. | |
1168 | (setq tail-found head-found) | |
1169 | (goto-char tail-start) | |
1170 | (setq tail-found (or (search-forward "coding:" tail-end t) | |
1171 | (search-forward "unibyte:" tail-end t)))) | |
1172 | ||
1173 | ;; At first check the head. | |
1174 | (when head-found | |
1175 | (goto-char head-start) | |
6b66d028 RS |
1176 | (setq head-end (set-auto-mode-1)) |
1177 | (setq head-start (point)) | |
1d8e9a7c | 1178 | (when (and head-end (< head-found head-end)) |
835f49b8 | 1179 | (goto-char head-start) |
502522b2 GM |
1180 | (when (and set-auto-coding-for-load |
1181 | (re-search-forward | |
6b66d028 | 1182 | "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
502522b2 GM |
1183 | head-end t)) |
1184 | (setq coding-system 'raw-text)) | |
1185 | (when (and (not coding-system) | |
1186 | (re-search-forward | |
6b66d028 | 1187 | "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
502522b2 GM |
1188 | head-end t)) |
1189 | (setq coding-system (intern (match-string 2))) | |
1190 | (or (coding-system-p coding-system) | |
1191 | (setq coding-system nil))))) | |
1192 | ||
1193 | ;; If no coding: tag in the head, check the tail. | |
1194 | (when (and tail-found (not coding-system)) | |
1195 | (goto-char tail-start) | |
1196 | (search-forward "\n\^L" nil t) | |
1197 | (if (re-search-forward | |
1198 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | |
1199 | ;; The prefix is what comes before "local variables:" in its | |
1200 | ;; line. The suffix is what comes after "local variables:" | |
1201 | ;; in its line. | |
1202 | (let* ((prefix (regexp-quote (match-string 1))) | |
1203 | (suffix (regexp-quote (match-string 2))) | |
1204 | (re-coding | |
1205 | (concat | |
1206 | "^" prefix | |
cfe98f50 GM |
1207 | ;; N.B. without the \n below, the regexp can |
1208 | ;; eat newlines. | |
1209 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
502522b2 GM |
1210 | suffix "$")) |
1211 | (re-unibyte | |
1212 | (concat | |
1213 | "^" prefix | |
cfe98f50 | 1214 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" |
502522b2 GM |
1215 | suffix "$")) |
1216 | (re-end | |
cfe98f50 | 1217 | (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$")) |
502522b2 GM |
1218 | (pos (point))) |
1219 | (re-search-forward re-end tail-end 'move) | |
1220 | (setq tail-end (point)) | |
1221 | (goto-char pos) | |
1222 | (when (and set-auto-coding-for-load | |
1223 | (re-search-forward re-unibyte tail-end t)) | |
1224 | (setq coding-system 'raw-text)) | |
1225 | (when (and (not coding-system) | |
1226 | (re-search-forward re-coding tail-end t)) | |
1227 | (setq coding-system (intern (match-string 1))) | |
1228 | (or (coding-system-p coding-system) | |
1229 | (setq coding-system nil)))))) | |
1230 | coding-system))) | |
63561304 KH |
1231 | |
1232 | (setq set-auto-coding-function 'set-auto-coding) | |
87aba788 | 1233 | |
4ed46869 | 1234 | (defun after-insert-file-set-buffer-file-coding-system (inserted) |
521d4010 | 1235 | "Set `buffer-file-coding-system' of current buffer after text is inserted." |
4ed46869 KH |
1236 | (if last-coding-system-used |
1237 | (let ((coding-system | |
1238 | (find-new-buffer-file-coding-system last-coding-system-used)) | |
1239 | (modified-p (buffer-modified-p))) | |
0269ddfb | 1240 | (when coding-system |
71983219 | 1241 | (set-buffer-file-coding-system coding-system t) |
d0c26c63 | 1242 | (if (and enable-multibyte-characters |
c1841772 | 1243 | (or (eq (coding-system-type coding-system) 'raw-text)) |
136e48e4 KH |
1244 | ;; If buffer was unmodified and the size is the |
1245 | ;; same as INSERTED, we must be visiting it. | |
1246 | (not modified-p) | |
1247 | (= (buffer-size) inserted)) | |
ccb77b4e RS |
1248 | ;; For coding systems no-conversion and raw-text..., |
1249 | ;; edit the buffer as unibyte. | |
d0c26c63 KH |
1250 | (let ((pos-byte (position-bytes (+ (point) inserted)))) |
1251 | (set-buffer-multibyte nil) | |
1252 | (setq inserted (- pos-byte (position-bytes (point)))))) | |
0269ddfb | 1253 | (set-buffer-modified-p modified-p)))) |
d0c26c63 | 1254 | inserted) |
4ed46869 | 1255 | |
84c9d215 KH |
1256 | (add-hook 'after-insert-file-functions |
1257 | 'after-insert-file-set-buffer-file-coding-system) | |
4ed46869 | 1258 | |
8057896b | 1259 | ;; The coding-spec and eol-type of coding-system returned is decided |
4ed46869 KH |
1260 | ;; independently in the following order. |
1261 | ;; 1. That of buffer-file-coding-system locally bound. | |
1262 | ;; 2. That of CODING. | |
1263 | ||
1264 | (defun find-new-buffer-file-coding-system (coding) | |
1265 | "Return a coding system for a buffer when a file of CODING is inserted. | |
a73a8c89 KH |
1266 | The local variable `buffer-file-coding-system' of the current buffer |
1267 | is set to the returned value. | |
509064c5 | 1268 | Return nil if there's no need to set `buffer-file-coding-system'." |
4ed46869 | 1269 | (let (local-coding local-eol |
b685f8d6 | 1270 | found-coding found-eol |
4ed46869 KH |
1271 | new-coding new-eol) |
1272 | (if (null coding) | |
1273 | ;; Nothing found about coding. | |
1274 | nil | |
1275 | ||
b685f8d6 RS |
1276 | ;; Get information of `buffer-file-coding-system' in LOCAL-EOL |
1277 | ;; and LOCAL-CODING. | |
1278 | (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
1279 | (if (null (numberp local-eol)) | |
1280 | ;; But eol-type is not yet set. | |
1281 | (setq local-eol nil)) | |
0269ddfb | 1282 | (if (and buffer-file-coding-system |
c1841772 KH |
1283 | (not (eq (coding-system-type buffer-file-coding-system) |
1284 | 'undecided))) | |
0269ddfb | 1285 | (setq local-coding (coding-system-base buffer-file-coding-system))) |
b685f8d6 RS |
1286 | |
1287 | (if (and (local-variable-p 'buffer-file-coding-system) | |
1288 | local-eol local-coding) | |
4ed46869 KH |
1289 | ;; The current buffer has already set full coding-system, we |
1290 | ;; had better not change it. | |
1291 | nil | |
1292 | ||
8057896b | 1293 | (setq found-eol (coding-system-eol-type coding)) |
4ed46869 | 1294 | (if (null (numberp found-eol)) |
be02cd54 EZ |
1295 | ;; But eol-type is not found. |
1296 | ;; If EOL conversions are inhibited, force unix eol-type. | |
1297 | (setq found-eol (if inhibit-eol-conversion 0))) | |
c1841772 | 1298 | (setq found-coding (coding-system-base coding)) |
c76b5c99 KH |
1299 | |
1300 | (if (and (not found-eol) (eq found-coding 'undecided)) | |
1301 | ;; No valid coding information found. | |
1302 | nil | |
1303 | ||
1304 | ;; Some coding information (eol or text) found. | |
1305 | ||
1306 | ;; The local setting takes precedence over the found one. | |
1307 | (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
1308 | (or local-coding found-coding) | |
1309 | (or found-coding local-coding))) | |
1310 | (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
1311 | (or local-eol found-eol) | |
1312 | (or found-eol local-eol))) | |
1313 | ||
1314 | (let ((eol-type (coding-system-eol-type new-coding))) | |
1315 | (if (and (numberp new-eol) (vectorp eol-type)) | |
1316 | (aref eol-type new-eol) | |
1317 | new-coding))))))) | |
4ed46869 | 1318 | |
fe831d33 GV |
1319 | (defun modify-coding-system-alist (target-type regexp coding-system) |
1320 | "Modify one of look up tables for finding a coding system on I/O operation. | |
8c453b46 RS |
1321 | There are three of such tables, `file-coding-system-alist', |
1322 | `process-coding-system-alist', and `network-coding-system-alist'. | |
fe831d33 GV |
1323 | |
1324 | TARGET-TYPE specifies which of them to modify. | |
8c453b46 RS |
1325 | If it is `file', it affects `file-coding-system-alist' (which see). |
1326 | If it is `process', it affects `process-coding-system-alist' (which see). | |
e8dd0160 | 1327 | If it is `network', it affects `network-coding-system-alist' (which see). |
fe831d33 GV |
1328 | |
1329 | REGEXP is a regular expression matching a target of I/O operation. | |
1330 | The target is a file name if TARGET-TYPE is `file', a program name if | |
1331 | TARGET-TYPE is `process', or a network service name or a port number | |
1332 | to connect to if TARGET-TYPE is `network'. | |
1333 | ||
1334 | CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
8c453b46 RS |
1335 | operation, or a cons cell (DECODING . ENCODING) specifying the coding systems |
1336 | for decoding and encoding respectively, | |
1337 | or a function symbol which, when called, returns such a cons cell." | |
fe831d33 GV |
1338 | (or (memq target-type '(file process network)) |
1339 | (error "Invalid target type: %s" target-type)) | |
1340 | (or (stringp regexp) | |
1341 | (and (eq target-type 'network) (integerp regexp)) | |
1342 | (error "Invalid regular expression: %s" regexp)) | |
1343 | (if (symbolp coding-system) | |
1344 | (if (not (fboundp coding-system)) | |
1345 | (progn | |
1346 | (check-coding-system coding-system) | |
1347 | (setq coding-system (cons coding-system coding-system)))) | |
1348 | (check-coding-system (car coding-system)) | |
1349 | (check-coding-system (cdr coding-system))) | |
1350 | (cond ((eq target-type 'file) | |
1351 | (let ((slot (assoc regexp file-coding-system-alist))) | |
1352 | (if slot | |
1353 | (setcdr slot coding-system) | |
1354 | (setq file-coding-system-alist | |
1355 | (cons (cons regexp coding-system) | |
1356 | file-coding-system-alist))))) | |
1357 | ((eq target-type 'process) | |
1358 | (let ((slot (assoc regexp process-coding-system-alist))) | |
1359 | (if slot | |
1360 | (setcdr slot coding-system) | |
1361 | (setq process-coding-system-alist | |
1362 | (cons (cons regexp coding-system) | |
1363 | process-coding-system-alist))))) | |
1364 | (t | |
1365 | (let ((slot (assoc regexp network-coding-system-alist))) | |
1366 | (if slot | |
1367 | (setcdr slot coding-system) | |
1368 | (setq network-coding-system-alist | |
1369 | (cons (cons regexp coding-system) | |
1370 | network-coding-system-alist))))))) | |
1371 | ||
b25eef20 | 1372 | (defun make-translation-table (&rest args) |
a284eea3 | 1373 | "Make a translation table from arguments. |
d38b07f9 | 1374 | A translation table is a char table intended for character |
a284eea3 DL |
1375 | translation in CCL programs. |
1376 | ||
d38b07f9 | 1377 | Each argument is a list of elements of the form (FROM . TO), where FROM |
a284eea3 | 1378 | is a character to be translated to TO. |
13d5617d | 1379 | |
452fdb31 | 1380 | FROM can be a generic character (see `make-char'). In this case, TO is |
d38b07f9 | 1381 | a generic character containing the same number of characters, or an |
452fdb31 | 1382 | ordinary character. If FROM and TO are both generic characters, all |
b25eef20 | 1383 | characters belonging to FROM are translated to characters belonging to TO |
4e003d37 KH |
1384 | without changing their position code(s). |
1385 | ||
1386 | The arguments and forms in each argument are processed in the given | |
1387 | order, and if a previous form already translates TO to some other | |
1388 | character, say TO-ALT, FROM is also translated to TO-ALT." | |
f967223b | 1389 | (let ((table (make-char-table 'translation-table)) |
a73a8c89 KH |
1390 | revlist) |
1391 | (while args | |
1392 | (let ((elts (car args))) | |
1393 | (while elts | |
13d5617d KH |
1394 | (let* ((from (car (car elts))) |
1395 | (from-i 0) ; degree of freedom of FROM | |
1396 | (from-rev (nreverse (split-char from))) | |
1397 | (to (cdr (car elts))) | |
1398 | (to-i 0) ; degree of freedom of TO | |
1399 | (to-rev (nreverse (split-char to)))) | |
1400 | ;; Check numbers of heading 0s in FROM-REV and TO-REV. | |
1401 | (while (eq (car from-rev) 0) | |
1402 | (setq from-i (1+ from-i) from-rev (cdr from-rev))) | |
1403 | (while (eq (car to-rev) 0) | |
1404 | (setq to-i (1+ to-i) to-rev (cdr to-rev))) | |
1405 | (if (and (/= from-i to-i) (/= to-i 0)) | |
1406 | (error "Invalid character pair (%d . %d)" from to)) | |
b25eef20 KH |
1407 | ;; If we have already translated TO to TO-ALT, FROM should |
1408 | ;; also be translated to TO-ALT. But, this is only if TO | |
1409 | ;; is a generic character or TO-ALT is not a generic | |
13d5617d KH |
1410 | ;; character. |
1411 | (let ((to-alt (aref table to))) | |
1412 | (if (and to-alt | |
1413 | (or (> to-i 0) (not (generic-char-p to-alt)))) | |
1414 | (setq to to-alt))) | |
1415 | (if (> from-i 0) | |
1416 | (set-char-table-default table from to) | |
1417 | (aset table from to)) | |
b25eef20 KH |
1418 | ;; If we have already translated some chars to FROM, they |
1419 | ;; should also be translated to TO. | |
a73a8c89 KH |
1420 | (let ((l (assq from revlist))) |
1421 | (if l | |
1422 | (let ((ch (car l))) | |
1423 | (setcar l to) | |
1424 | (setq l (cdr l)) | |
1425 | (while l | |
1426 | (aset table ch to) | |
1427 | (setq l (cdr l)) )))) | |
1428 | ;; Now update REVLIST. | |
1429 | (let ((l (assq to revlist))) | |
1430 | (if l | |
1431 | (setcdr l (cons from (cdr l))) | |
1432 | (setq revlist (cons (list to from) revlist))))) | |
1433 | (setq elts (cdr elts)))) | |
1434 | (setq args (cdr args))) | |
1435 | ;; Return TABLE just created. | |
1436 | table)) | |
1437 | ||
c76b5c99 KH |
1438 | (defun make-translation-table-from-vector (vec) |
1439 | "Make translation table from decoding vector VEC. | |
9e3b6057 DL |
1440 | VEC is an array of 256 elements to map unibyte codes to multibyte |
1441 | characters. Elements may be nil for undefined code points. | |
c76b5c99 KH |
1442 | See also the variable `nonascii-translation-table'." |
1443 | (let ((table (make-char-table 'translation-table)) | |
1444 | (rev-table (make-char-table 'translation-table)) | |
c76b5c99 | 1445 | ch) |
9e3b6057 | 1446 | (dotimes (i 256) |
c76b5c99 | 1447 | (setq ch (aref vec i)) |
9e3b6057 DL |
1448 | (when ch |
1449 | (aset table i ch) | |
1450 | (if (>= ch 256) | |
1451 | (aset rev-table ch i)))) | |
c76b5c99 KH |
1452 | (set-char-table-extra-slot table 0 rev-table) |
1453 | table)) | |
1454 | ||
f967223b | 1455 | (defun define-translation-table (symbol &rest args) |
a284eea3 DL |
1456 | "Define SYMBOL as the name of translation table made by ARGS. |
1457 | This sets up information so that the table can be used for | |
1458 | translations in a CCL program. | |
b25eef20 | 1459 | |
a284eea3 DL |
1460 | If the first element of ARGS is a char-table whose purpose is |
1461 | `translation-table', just define SYMBOL to name it. (Note that this | |
1462 | function does not bind SYMBOL.) | |
007c79c8 | 1463 | |
a284eea3 | 1464 | Any other ARGS should be suitable as arguments of the function |
007c79c8 | 1465 | `make-translation-table' (which see). |
b25eef20 | 1466 | |
452fdb31 | 1467 | This function sets properties `translation-table' and |
521d4010 DL |
1468 | `translation-table-id' of SYMBOL to the created table itself and the |
1469 | identification number of the table respectively. It also registers | |
1470 | the table in `translation-table-vector'." | |
007c79c8 KH |
1471 | (let ((table (if (and (char-table-p (car args)) |
1472 | (eq (char-table-subtype (car args)) | |
1473 | 'translation-table)) | |
1474 | (car args) | |
1475 | (apply 'make-translation-table args))) | |
f967223b | 1476 | (len (length translation-table-vector)) |
d9e3229d | 1477 | (id 0) |
b25eef20 | 1478 | (done nil)) |
f967223b | 1479 | (put symbol 'translation-table table) |
b25eef20 KH |
1480 | (while (not done) |
1481 | (if (>= id len) | |
f967223b KH |
1482 | (setq translation-table-vector |
1483 | (vconcat translation-table-vector (make-vector len nil)))) | |
1484 | (let ((slot (aref translation-table-vector id))) | |
b25eef20 KH |
1485 | (if (or (not slot) |
1486 | (eq (car slot) symbol)) | |
1487 | (progn | |
f967223b | 1488 | (aset translation-table-vector id (cons symbol table)) |
007c79c8 KH |
1489 | (setq done t)) |
1490 | (setq id (1+ id))))) | |
f967223b | 1491 | (put symbol 'translation-table-id id) |
d9e3229d KH |
1492 | id)) |
1493 | ||
35554641 KH |
1494 | (put 'with-category-table 'lisp-indent-function 1) |
1495 | ||
1496 | (defmacro with-category-table (category-table &rest body) | |
07513d64 | 1497 | "Execute BODY like `progn' with CATEGORY-TABLE the current category table." |
35554641 KH |
1498 | `(let ((current-category-table (category-table))) |
1499 | (set-category-table ,category-table) | |
1500 | (unwind-protect | |
1501 | (progn ,@body) | |
1502 | (set-category-table current-category-table)))) | |
1503 | ||
69eba008 KH |
1504 | ;;; Initialize some variables. |
1505 | ||
1506 | (put 'use-default-ascent 'char-table-extra-slots 0) | |
1507 | (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
d6d6d592 KH |
1508 | (put 'ignore-relative-composition 'char-table-extra-slots 0) |
1509 | (setq ignore-relative-composition | |
1510 | (make-char-table 'ignore-relative-composition)) | |
69eba008 KH |
1511 | |
1512 | ;;; | |
4ed46869 KH |
1513 | (provide 'mule) |
1514 | ||
1515 | ;;; mule.el ends here |