Commit | Line | Data |
---|---|---|
07513d64 | 1 | ;;; mule.el --- basic commands for multilingual environment |
4ed46869 | 2 | |
4ed46869 | 3 | ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
8f924df7 | 4 | ;; Licensed to the Free Software Foundation. |
32db08f1 | 5 | ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. |
8f924df7 | 6 | ;; Copyright (C) 2003 |
c1841772 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
9 | |
10 | ;; Keywords: mule, multilingual, character set, coding system | |
11 | ||
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
16 | ;; the Free Software Foundation; either version 2, or (at your option) | |
17 | ;; any later version. | |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
369314dc KH |
25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
26 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
27 | ;; Boston, MA 02111-1307, USA. | |
4ed46869 | 28 | |
60370d40 PJ |
29 | ;;; Commentary: |
30 | ||
4ed46869 KH |
31 | ;;; Code: |
32 | ||
8f924df7 | 33 | (defconst mule-version "6.0 (HANACHIRUSATO)" "\ |
4ed46869 KH |
34 | Version number and name of this version of MULE (multilingual environment).") |
35 | ||
8f924df7 | 36 | (defconst mule-version-date "2003.9.1" "\ |
4ed46869 KH |
37 | Distribution date of this version of MULE (multilingual environment).") |
38 | ||
c1841772 KH |
39 | |
40 | \f | |
41 | ;;; CHARACTER | |
42 | (defalias 'char-valid-p 'characterp) | |
43 | (make-obsolete 'char-valid-p 'characterp "22.1") | |
44 | ||
45 | \f | |
46 | ;;; CHARSET | |
47 | ||
48 | (defun define-charset (name docstring &rest props) | |
49 | "Define NAME (symbol) as a charset with DOCSTRING. | |
50 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
07513d64 | 51 | may be any symbol. The following have special meanings, and one of |
bec25acc | 52 | `:code-offset', `:map', `:subset', `:superset' must be specified. |
c1841772 KH |
53 | |
54 | `:short-name' | |
55 | ||
56 | VALUE must be a short string to identify the charset. If omitted, | |
57 | NAME is used. | |
58 | ||
59 | `:long-name' | |
60 | ||
61 | VALUE must be a string longer than `:short-name' to identify the | |
07513d64 | 62 | charset. If omitted, the value of the `:short-name' attribute is used. |
c1841772 KH |
63 | |
64 | `:dimension' | |
65 | ||
66 | VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of | |
07513d64 DL |
67 | code-points of the charsets. If omitted, it is calculated from the |
68 | value of the `:code-space' attribute. | |
c1841772 KH |
69 | |
70 | `:code-space' | |
71 | ||
72 | VALUE must be a vector of length at most 8 specifying the byte code | |
73 | range of each dimension in this format: | |
74 | [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] | |
07513d64 | 75 | where MIN-N is the minimum byte value of Nth dimension of code-point, |
c1841772 KH |
76 | MAX-N is the maximum byte value of that. |
77 | ||
b1a79461 KH |
78 | `:min-code' |
79 | ||
80 | VALUE must be an integer specifying the mininum code point of the | |
81 | charset. If omitted, it is calculated from `:code-space'. VALUE may | |
82 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
83 | the code point and LOW is the least significant 16 bits. | |
84 | ||
1f32125f | 85 | `:max-code' |
b1a79461 KH |
86 | |
87 | VALUE must be an integer specifying the maxinum code point of the | |
88 | charset. If omitted, it is calculated from `:code-space'. VALUE may | |
89 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
90 | the code point and LOW is the least significant 16 bits. | |
91 | ||
c1841772 KH |
92 | `:iso-final-char' |
93 | ||
94 | VALUE must be a character in the range 32 to 127 (inclusive) | |
95 | specifying the final char of the charset for ISO-2022 encoding. If | |
96 | omitted, the charset can't be encoded by ISO-2022 based | |
97 | coding-systems. | |
98 | ||
99 | `:iso-revision-number' | |
100 | ||
101 | VALUE must be an integer in the range 0..63, specifying the revision | |
102 | number of the charset for ISO-2022 encoding. | |
103 | ||
104 | `:emacs-mule-id' | |
105 | ||
106 | VALUE must be an integer of 0, 128..255. If omitted, the charset | |
107 | can't be encoded by coding-systems of type `emacs-mule'. | |
108 | ||
109 | `:ascii-compatible-p' | |
110 | ||
07513d64 DL |
111 | VALUE must be nil or t (default nil). If VALUE is t, the charset is |
112 | compatible with ASCII, i.e. the first 128 code points map to ASCII. | |
c1841772 KH |
113 | |
114 | `:supplementary-p' | |
115 | ||
116 | VALUE must be nil or t. If the VALUE is t, the charset is | |
07513d64 DL |
117 | supplementary, which means it is used only as a parent of some other |
118 | charset. | |
c1841772 KH |
119 | |
120 | `:invalid-code' | |
121 | ||
122 | VALUE must be a nonnegative integer that can be used as an invalid | |
123 | code point of the charset. If the minimum code is 0 and the maximum | |
124 | code is greater than Emacs' maximum integer value, `:invalid-code' | |
125 | should not be omitted. | |
126 | ||
127 | `:code-offset' | |
128 | ||
07513d64 DL |
129 | VALUE must be an integer added to the index number of a character to |
130 | get the corresponding character code. | |
c1841772 KH |
131 | |
132 | `:map' | |
133 | ||
134 | VALUE must be vector or string. | |
135 | ||
136 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
137 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 138 | corresponding character code. |
c1841772 KH |
139 | |
140 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
141 | information. Each line of the file must be this format: |
142 | 0xXXX 0xYYY | |
143 | where XXX is a hexadecimal representation of CODE-n and YYY is a | |
144 | hexadecimal representation of CHAR-n. A line starting with `#' is a | |
145 | comment line. | |
c1841772 | 146 | |
2c2a254f KH |
147 | `:subset' |
148 | ||
149 | VALUE must be a list: | |
150 | ( PARENT MIN-CODE MAX-CODE OFFSET ) | |
151 | PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range | |
152 | of characters inherited from the parent. OFFSET is an integer value | |
153 | to add to a code point of the parent charset to get the corresponding | |
154 | code point of this charset. | |
155 | ||
156 | `:superset' | |
c1841772 KH |
157 | |
158 | VALUE must be a list of parent charsets. The charset inherits | |
159 | characters from them. Each element of the list may be a cons (PARENT | |
160 | . OFFSET), where PARENT is a parent charset, and OFFSET is an offset | |
2c2a254f KH |
161 | value to add to a code point of PARENT to get the corresponding code |
162 | point of this charset. | |
c1841772 KH |
163 | |
164 | `:unify-map' | |
165 | ||
166 | VALUE must be vector or string. | |
167 | ||
168 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
169 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 170 | corresponding Unicode character code. |
c1841772 KH |
171 | |
172 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
173 | information. The file format is the same as what described for `:map' |
174 | attribute." | |
c1841772 KH |
175 | (let ((attrs (mapcar 'list '(:dimension |
176 | :code-space | |
b1a79461 KH |
177 | :min-code |
178 | :max-code | |
c1841772 KH |
179 | :iso-final-char |
180 | :iso-revision-number | |
181 | :emacs-mule-id | |
182 | :ascii-compatible-p | |
183 | :supplementary-p | |
184 | :invalid-code | |
185 | :code-offset | |
186 | :map | |
2c2a254f KH |
187 | :subset |
188 | :superset | |
c1841772 KH |
189 | :unify-map |
190 | :plist)))) | |
191 | ||
192 | ;; If :dimension is omitted, get the dimension from :code-space. | |
193 | (let ((dimension (plist-get props :dimension))) | |
194 | (or dimension | |
195 | (progn | |
196 | (setq dimension (/ (length (plist-get props :code-space)) 2)) | |
197 | (setq props (plist-put props :dimension dimension))))) | |
198 | ||
199 | (dolist (slot attrs) | |
200 | (setcdr slot (plist-get props (car slot)))) | |
201 | ||
202 | ;; Make sure that the value of :code-space is a vector of 8 | |
203 | ;; elements. | |
204 | (let* ((slot (assq :code-space attrs)) | |
205 | (val (cdr slot)) | |
206 | (len (length val))) | |
207 | (if (< len 8) | |
208 | (setcdr slot | |
209 | (vconcat val (make-vector (- 8 len) 0))))) | |
210 | ||
211 | ;; Add :name and :docstring properties to PROPS. | |
212 | (setq props | |
213 | (cons :name (cons name (cons :docstring (cons docstring props))))) | |
214 | (or (plist-get props :short-name) | |
215 | (plist-put props :short-name (symbol-name name))) | |
216 | (or (plist-get props :long-name) | |
217 | (plist-put props :long-name (plist-get props :short-name))) | |
e1e529fa DL |
218 | ;; We can probably get a worthwhile amount in purespace. |
219 | (setq props | |
220 | (mapcar (lambda (elt) | |
221 | (if (stringp elt) | |
222 | (purecopy elt) | |
223 | elt)) | |
224 | props)) | |
c1841772 KH |
225 | (setcdr (assq :plist attrs) props) |
226 | ||
227 | (apply 'define-charset-internal name (mapcar 'cdr attrs)))) | |
228 | ||
229 | ||
4ed46869 | 230 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) |
0f69cb38 KH |
231 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
232 | The file contents are decoded before evaluation if necessary. | |
4ed46869 KH |
233 | If optional second arg NOERROR is non-nil, |
234 | report no error if FILE doesn't exist. | |
235 | Print messages at start and end of loading unless | |
236 | optional third arg NOMESSAGE is non-nil. | |
237 | Return t if file exists." | |
238 | (if (null (file-readable-p fullname)) | |
239 | (and (null noerror) | |
240 | (signal 'file-error (list "Cannot open load file" file))) | |
241 | ;; Read file with code conversion, and then eval. | |
242 | (let* ((buffer | |
243 | ;; To avoid any autoloading, set default-major-mode to | |
244 | ;; fundamental-mode. | |
88162676 RS |
245 | ;; So that we don't get completely screwed if the |
246 | ;; file is encoded in some complicated character set, | |
247 | ;; read it with real decoding, as a multibyte buffer, | |
248 | ;; even if this is a --unibyte Emacs session. | |
249 | (let ((default-major-mode 'fundamental-mode) | |
250 | (default-enable-multibyte-characters t)) | |
4ed46869 KH |
251 | ;; We can't use `generate-new-buffer' because files.el |
252 | ;; is not yet loaded. | |
253 | (get-buffer-create (generate-new-buffer-name " *load*")))) | |
db5cae4b SM |
254 | (load-in-progress t) |
255 | (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
256 | (unless nomessage | |
257 | (if source | |
258 | (message "Loading %s (source)..." file) | |
259 | (message "Loading %s..." file))) | |
260 | (when purify-flag | |
4c86cca0 | 261 | (push file preloaded-file-list)) |
4ed46869 | 262 | (unwind-protect |
a6acd8a2 | 263 | (let ((load-file-name fullname) |
1c4cc63a | 264 | (set-auto-coding-for-load t) |
a6acd8a2 | 265 | (inhibit-file-name-operation nil)) |
4ed46869 KH |
266 | (save-excursion |
267 | (set-buffer buffer) | |
268 | (insert-file-contents fullname) | |
7d276780 EZ |
269 | ;; If the loaded file was inserted with no-conversion or |
270 | ;; raw-text coding system, make the buffer unibyte. | |
271 | ;; Otherwise, eval-buffer might try to interpret random | |
272 | ;; binary junk as multibyte characters. | |
273 | (if (and enable-multibyte-characters | |
8f924df7 KH |
274 | (or (eq (coding-system-type last-coding-system-used) |
275 | 'raw-text))) | |
7d276780 | 276 | (set-buffer-multibyte nil)) |
4ed46869 KH |
277 | ;; Make `kill-buffer' quiet. |
278 | (set-buffer-modified-p nil)) | |
0f69cb38 | 279 | ;; Have the original buffer current while we eval. |
88162676 RS |
280 | (eval-buffer buffer nil file |
281 | ;; If this Emacs is running with --unibyte, | |
282 | ;; convert multibyte strings to unibyte | |
283 | ;; after reading them. | |
ba74e833 | 284 | ;; (not default-enable-multibyte-characters) |
8dd08b5b | 285 | nil t |
ba74e833 | 286 | )) |
cfc70cdf RS |
287 | (let (kill-buffer-hook kill-buffer-query-functions) |
288 | (kill-buffer buffer))) | |
4ed46869 | 289 | (let ((hook (assoc file after-load-alist))) |
db5cae4b SM |
290 | (when hook |
291 | (mapcar (function eval) (cdr hook)))) | |
292 | (unless (or nomessage noninteractive) | |
293 | (if source | |
294 | (message "Loading %s (source)...done" file) | |
295 | (message "Loading %s...done" file))) | |
4ed46869 KH |
296 | t))) |
297 | ||
8f924df7 | 298 | (defun charset-info (charset) |
4ed46869 | 299 | "Return a vector of information of CHARSET. |
8f924df7 | 300 | This function is provided for backward compatibility. |
4ed46869 | 301 | |
4ed46869 KH |
302 | The elements of the vector are: |
303 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | |
304 | LEADING-CODE-BASE, LEADING-CODE-EXT, | |
305 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | |
306 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | |
8f924df7 | 307 | PLIST. |
4ed46869 | 308 | where |
8f924df7 KH |
309 | CHARSET-ID is always 0. |
310 | BYTES is always 0. | |
311 | DIMENSION is the number of bytes of a code-point of the charset: | |
312 | 1, 2, 3, or 4. | |
313 | CHARS is the number of characters in a dimension: | |
314 | 94, 96, 128, or 256. | |
315 | WIDTH is always 0. | |
316 | DIRECTION is always 0. | |
317 | LEADING-CODE-BASE is always 0. | |
318 | LEADING-CODE-EXT is always 0. | |
4ed46869 | 319 | ISO-FINAL-CHAR (character) is the final character of the |
7dd4c92d KH |
320 | corresponding ISO 2022 charset. If the charset is not assigned |
321 | any final character, the value is -1. | |
8f924df7 KH |
322 | ISO-GRAPHIC-PLANE is always 0. |
323 | REVERSE-CHARSET is always -1. | |
4ed46869 KH |
324 | SHORT-NAME (string) is the short name to refer to the charset. |
325 | LONG-NAME (string) is the long name to refer to the charset | |
326 | DESCRIPTION (string) is the description string of the charset. | |
327 | PLIST (property list) may contain any type of information a user | |
328 | want to put and get by functions `put-charset-property' and | |
329 | `get-charset-property' respectively." | |
8f924df7 KH |
330 | (vector 0 |
331 | 0 | |
332 | (charset-dimension charset) | |
333 | (charset-chars charset) | |
334 | 0 | |
335 | 0 | |
336 | 0 | |
337 | 0 | |
338 | (charset-iso-final-char charset) | |
339 | 0 | |
340 | -1 | |
341 | (get-charset-property charset :short-name) | |
342 | (get-charset-property charset :short-name) | |
343 | (charset-description charset) | |
344 | (charset-plist charset))) | |
4ed46869 | 345 | |
40c81f74 PE |
346 | ;; It is better not to use backquote in this file, |
347 | ;; because that makes a bootstrapping problem | |
348 | ;; if you need to recompile all the Lisp files using interpreted code. | |
349 | ||
8f924df7 KH |
350 | (defun charset-id (charset) |
351 | "Always return 0. This is provided for backward compatibility." | |
352 | 0) | |
4ed46869 KH |
353 | |
354 | (defmacro charset-bytes (charset) | |
8f924df7 KH |
355 | "Always return 0. This is provided for backward compatibility." |
356 | 0) | |
c1841772 KH |
357 | |
358 | (defun get-charset-property (charset propname) | |
359 | "Return the value of CHARSET's PROPNAME property. | |
360 | This is the last value stored with | |
361 | (put-charset-property CHARSET PROPNAME VALUE)." | |
362 | (plist-get (charset-plist charset) propname)) | |
363 | ||
364 | (defun put-charset-property (charset propname value) | |
1f32125f | 365 | "Set CHARSETS's PROPNAME property to value VALUE. |
c1841772 KH |
366 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
367 | (set-charset-plist charset | |
368 | (plist-put (charset-plist charset) propname value))) | |
369 | ||
c1841772 KH |
370 | (defun charset-description (charset) |
371 | "Return description string of CHARSET." | |
372 | (plist-get (charset-plist charset) :docstring)) | |
373 | ||
374 | (defun charset-dimension (charset) | |
12504f57 | 375 | "Return dimension of CHARSET." |
c1841772 KH |
376 | (plist-get (charset-plist charset) :dimension)) |
377 | ||
346a8d64 | 378 | (defun charset-chars (charset &optional dimension) |
12504f57 | 379 | "Return number of characters contained in DIMENSION of CHARSET. |
346a8d64 DL |
380 | DIMENSION defaults to the first dimension." |
381 | (unless dimension (setq dimension 1)) | |
103cc921 | 382 | (let ((code-space (plist-get (charset-plist charset) :code-space))) |
346a8d64 DL |
383 | (1+ (- (aref code-space (1- (* 2 dimension))) |
384 | (aref code-space (- (* 2 dimension) 2)))))) | |
c1841772 KH |
385 | |
386 | (defun charset-iso-final-char (charset) | |
1d839a14 DL |
387 | "Return ISO-2022 final character of CHARSET. |
388 | Return -1 if charset isn't an ISO 2022 one." | |
c1841772 KH |
389 | (or (plist-get (charset-plist charset) :iso-final-char) |
390 | -1)) | |
4ed46869 KH |
391 | |
392 | (defmacro charset-short-name (charset) | |
c1841772 KH |
393 | "Return short name of CHARSET." |
394 | (plist-get (charset-plist charset) :short-name)) | |
4ed46869 KH |
395 | |
396 | (defmacro charset-long-name (charset) | |
c1841772 KH |
397 | "Return long name of CHARSET." |
398 | (plist-get (charset-plist charset) :long-name)) | |
4ed46869 | 399 | |
d3675a42 | 400 | (defun charset-list () |
12504f57 | 401 | "Return list of all charsets ever defined. |
900dc6e3 | 402 | |
d3675a42 | 403 | This function is provided for backward compatibility. |
900dc6e3 | 404 | Now we have the variable `charset-list'." |
d3675a42 | 405 | charset-list) |
07513d64 | 406 | (make-obsolete 'charset-list "Use variable `charset-list'" "22.1") |
d3675a42 | 407 | |
c1841772 | 408 | (defun generic-char-p (char) |
8f924df7 | 409 | "Always return nil. This is provided for backward compatibility." |
c1841772 | 410 | nil) |
07513d64 | 411 | (make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1") |
0269ddfb | 412 | \f |
e76938e7 | 413 | ;; Coding system stuff |
4ed46869 | 414 | |
c1841772 KH |
415 | ;; Coding system is a symbol that has been defined by the function |
416 | ;; `define-coding-system'. | |
4ed46869 | 417 | |
c1841772 KH |
418 | (defconst coding-system-iso-2022-flags |
419 | '(long-form | |
420 | ascii-at-eol | |
421 | ascii-at-cntl | |
422 | 7-bit | |
423 | locking-shift | |
424 | single-shift | |
425 | designation | |
426 | revision | |
427 | direction | |
428 | init-at-bol | |
429 | designate-at-bol | |
430 | safe | |
431 | latin-extra | |
432 | composition | |
3ed58a15 KH |
433 | euc-tw-shift |
434 | use-roman | |
435 | use-oldjis) | |
c1841772 | 436 | "List of symbols that control ISO-2022 encoder/decoder. |
4ed46869 | 437 | |
12504f57 | 438 | The value of the `:flags' attribute in the argument of the function |
caa7db3a | 439 | `define-coding-system' must be one of them. |
4ed46869 | 440 | |
c1841772 KH |
441 | If `long-form' is specified, use a long designation sequence on |
442 | encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', | |
443 | and `japanese-jisx0208'. The long designation sequence doesn't | |
12504f57 | 444 | conform to ISO 2022, but is used by such coding systems as |
c1841772 KH |
445 | `compound-text'. |
446 | ||
447 | If `ascii-at-eol' is specified, designate ASCII to g0 at end of line | |
448 | on encoding. | |
449 | ||
450 | If `ascii-at-cntl' is specified, designate ASCII to g0 before control | |
451 | codes and SPC on encoding. | |
452 | ||
453 | If `7-bit' is specified, use 7-bit code only on encoding. | |
454 | ||
455 | If `locking-shift' is specified, decode locking-shift code correctly | |
456 | on decoding, and use locking-shift to invoke a graphic element on | |
457 | encoding. | |
458 | ||
459 | If `single-shift' is specified, decode single-shift code correctly on | |
460 | decoding, and use single-shift to invoke a graphic element on encoding. | |
461 | ||
462 | If `designation' is specified, decode designation code correctly on | |
463 | decoding, and use designation to designate a charset to a graphic | |
464 | element on encoding. | |
465 | ||
466 | If `revision' is specified, produce an escape sequence to specify | |
467 | revision number of a charset on encoding. Such an escape sequence is | |
468 | always correctly decoded on decoding. | |
469 | ||
470 | If `direction' is specified, decode ISO6429's code for specifying | |
12504f57 | 471 | direction correctly, and produce the code on encoding. |
c1841772 KH |
472 | |
473 | If `init-at-bol' is specified, on encoding, it is assumed that | |
474 | invocation and designation statuses are reset at each beginning of | |
12504f57 | 475 | line even if `ascii-at-eol' is not specified; thus no codes for |
c1841772 KH |
476 | resetting them are produced. |
477 | ||
478 | If `safe' is specified, on encoding, characters not supported by a | |
479 | coding are replaced with `?'. | |
480 | ||
12504f57 | 481 | If `latin-extra' is specified, the code-detection routine assumes that a |
c1841772 KH |
482 | code specified in `latin-extra-code-table' (which see) is valid. |
483 | ||
484 | If `composition' is specified, an escape sequence to specify | |
12504f57 | 485 | composition sequence is correctly decoded on decoding, and is produced |
c1841772 KH |
486 | on encoding. |
487 | ||
488 | If `euc-tw-shift' is specified, the EUC-TW specific shifting code is | |
12504f57 | 489 | correctly decoded on decoding, and is produced on encoding. |
c1841772 | 490 | |
12504f57 DL |
491 | If `use-roman' is specified, JIS0201-1976-Roman is designated instead |
492 | of ASCII. | |
493 | ||
494 | If `use-oldjis' is specified, JIS0208-1976 is designated instead of | |
495 | JIS0208-1983.") | |
496 | ||
c1841772 | 497 | (defun define-coding-system (name docstring &rest props) |
12504f57 | 498 | "Define NAME (a symbol) as a coding system with DOCSTRING and attributes. |
c1841772 KH |
499 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE |
500 | may be any symbol. | |
501 | ||
12504f57 DL |
502 | The following attributes have special meanings. Those labeled as |
503 | \"(required)\", should not be omitted. | |
c1841772 KH |
504 | |
505 | `:mnemonic' (required) | |
506 | ||
507 | VALUE is a character to display on mode line for the coding system. | |
508 | ||
509 | `:coding-type' (required) | |
510 | ||
511 | VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', | |
1bfd603c | 512 | `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'. |
c1841772 | 513 | |
12504f57 | 514 | `:eol-type' |
c1841772 | 515 | |
12504f57 | 516 | VALUE is the EOL (end-of-line) format of the coding system. It must be |
c1841772 KH |
517 | one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL |
518 | \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), | |
519 | and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on | |
12504f57 | 520 | decoding by the coding system, Emacs automatically detects the EOL |
c1841772 KH |
521 | format of the source text. |
522 | ||
736345cb | 523 | `:charset-list' |
c1841772 KH |
524 | |
525 | VALUE must be a list of charsets supported by the coding system. On | |
526 | encoding by the coding system, if a character belongs to multiple | |
527 | charsets in the list, a charset that comes earlier in the list is | |
736345cb KH |
528 | selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022', |
529 | which indicates that the coding system supports all ISO-2022 based | |
530 | charsets. If `:coding-type' is `emacs-mule', VALUE may be | |
531 | `emacs-mule', which indicates that the coding system supports all | |
1d839a14 | 532 | charsets that have the `:emacs-mule-id' property. |
c1841772 | 533 | |
12504f57 | 534 | `:ascii-compatible-p' |
c1841772 KH |
535 | |
536 | If VALUE is non-nil, the coding system decodes all 7-bit bytes into | |
07513d64 | 537 | the corresponding ASCII characters, and encodes all ASCII characters |
12504f57 | 538 | back to the corresponding 7-bit bytes. VALUE defaults to nil. |
c1841772 | 539 | |
12504f57 | 540 | `:decode-translation-table' |
c1841772 KH |
541 | |
542 | VALUE must be a translation table to use on decoding. | |
543 | ||
12504f57 | 544 | `:encode-translation-table' |
c1841772 KH |
545 | |
546 | VALUE must be a translation table to use on encoding. | |
547 | ||
12504f57 | 548 | `:post-read-conversion' |
c1841772 KH |
549 | |
550 | VALUE must be a function to call after some text is inserted and | |
551 | decoded by the coding system itself and before any functions in | |
552 | `after-insert-functions' are called. The arguments to this function | |
12504f57 DL |
553 | are the same as those of a function in `after-insert-file-functions', |
554 | i.e. LENGTH of the text to be decoded with point at the head of it, | |
555 | and the function should leave point unchanged. | |
c1841772 KH |
556 | |
557 | `:pre-write-conversion' | |
558 | ||
559 | VALUE must be a function to call after all functions in | |
560 | `write-region-annotate-functions' and `buffer-file-format' are called, | |
561 | and before the text is encoded by the coding system itself. The | |
12504f57 DL |
562 | arguments to this function are the same as those of a function in |
563 | `write-region-annotate-functions'. | |
c1841772 KH |
564 | |
565 | `:default-char' | |
566 | ||
567 | VALUE must be a character. On encoding, a character not supported by | |
568 | the coding system is replaced with VALUE. | |
569 | ||
8f924df7 KH |
570 | `:for-unibyte' |
571 | ||
572 | VALUE non-nil means that visiting a file with the coding system | |
573 | results in a unibyte buffer. | |
574 | ||
c1841772 KH |
575 | `:eol-type' |
576 | ||
577 | VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like | |
578 | EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like | |
12504f57 DL |
579 | EOL (CR). If omitted, on decoding, the coding system detects EOL |
580 | format automatically, and on encoding, uses Unix-like EOL. | |
c1841772 KH |
581 | |
582 | `:mime-charset' | |
583 | ||
12504f57 DL |
584 | VALUE must be a symbol whose name is that of a MIME charset converted |
585 | to lower case. | |
c1841772 | 586 | |
1bfd603c DL |
587 | `:mime-text-unsuitable' |
588 | ||
589 | VALUE non-nil means the `:mime-charset' property names a charset which | |
1894d108 | 590 | is unsuitable for the top-level media type \"text\". |
1bfd603c | 591 | |
c1841772 KH |
592 | `:flags' |
593 | ||
12504f57 DL |
594 | VALUE must be a list of symbols that control the ISO-2022 converter. |
595 | Each must be a member of the list `coding-system-iso-2022-flags' | |
c1841772 KH |
596 | \(which see). This attribute has a meaning only when `:coding-type' |
597 | is `iso-2022'. | |
598 | ||
599 | `:designation' | |
600 | ||
12504f57 | 601 | VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. |
c1841772 KH |
602 | GN-USAGE specifies the usage of graphic register GN as follows. |
603 | ||
604 | If it is nil, no charset can be designated to GN. | |
605 | ||
07513d64 | 606 | If it is a charset, the charset is initially designated to GN, and |
c1841772 KH |
607 | never used by the other charsets. |
608 | ||
609 | If it is a list, the elements must be charsets, nil, 94, or 96. GN | |
12504f57 DL |
610 | can be used by all the listed charsets. If the list contains 94, any |
611 | iso-2022 charset whose code-space ranges are 94 long can be designated | |
612 | to GN. If the list contains 96, any charsets whose whose ranges are | |
613 | 96 long can be designated to GN. If the first element is a charset, | |
614 | that charset is initially designated to GN. | |
c1841772 KH |
615 | |
616 | This attribute has a meaning only when `:coding-type' is `iso-2022'. | |
617 | ||
618 | `:bom' | |
619 | ||
12504f57 DL |
620 | This attributes specifies whether the coding system uses a `byte order |
621 | mark'. VALUE must nil, t, or cons of coding systems whose | |
622 | `:coding-type' is `utf-16'. | |
c1841772 | 623 | |
0ea1a6ca KH |
624 | If the value is nil, on decoding, don't treat the first two-byte as |
625 | BOM, and on encoding, don't produce BOM bytes. | |
626 | ||
627 | If the value is t, on decoding, skip the first two-byte as BOM, and on | |
628 | encoding, produce BOM bytes accoding to the value of `:endian'. | |
629 | ||
630 | If the value is cons, on decoding, check the first two-byte. If theyq | |
631 | are 0xFE 0xFF, use the car part coding system of the value. If they | |
632 | are 0xFF 0xFE, use the car part coding system of the value. | |
633 | Otherwise, treat them as bytes for a normal character. On encoding, | |
634 | produce BOM bytes accoding to the value of `:endian'. | |
635 | ||
c1841772 KH |
636 | This attribute has a meaning only when `:coding-type' is `utf-16'. |
637 | ||
638 | `:endian' | |
639 | ||
0ea1a6ca KH |
640 | VALUE must be `big' or `little' specifying big-endian and |
641 | little-endian respectively. The default value is `big'. | |
c1841772 KH |
642 | |
643 | This attribute has a meaning only when `:coding-type' is `utf-16'. | |
644 | ||
645 | `:ccl-decoder' | |
646 | ||
12504f57 DL |
647 | VALUE is a symbol representing the registered CCL program used for |
648 | decoding. This attribute has a meaning only when `:coding-type' is | |
649 | `ccl'. | |
c1841772 KH |
650 | |
651 | `:ccl-encoder' | |
652 | ||
12504f57 DL |
653 | VALUE is a symbol representing the registered CCL program used for |
654 | encoding. This attribute has a meaning only when `:coding-type' is | |
655 | `ccl'." | |
c1841772 KH |
656 | (let* ((common-attrs (mapcar 'list |
657 | '(:mnemonic | |
658 | :coding-type | |
659 | :charset-list | |
660 | :ascii-compatible-p | |
1a9db556 | 661 | :decode-translation-table |
c1841772 KH |
662 | :encode-translation-table |
663 | :post-read-conversion | |
664 | :pre-write-conversion | |
665 | :default-char | |
8f924df7 | 666 | :prefer-unibyte |
c1841772 KH |
667 | :plist |
668 | :eol-type))) | |
669 | (coding-type (plist-get props :coding-type)) | |
670 | (spec-attrs (mapcar 'list | |
671 | (cond ((eq coding-type 'iso-2022) | |
672 | '(:initial | |
673 | :reg-usage | |
674 | :request | |
675 | :flags)) | |
676 | ((eq coding-type 'utf-16) | |
677 | '(:bom | |
678 | :endian)) | |
679 | ((eq coding-type 'ccl) | |
680 | '(:ccl-decoder | |
681 | :ccl-encoder | |
682 | :valids)))))) | |
683 | ||
684 | (dolist (slot common-attrs) | |
685 | (setcdr slot (plist-get props (car slot)))) | |
686 | ||
687 | (dolist (slot spec-attrs) | |
688 | (setcdr slot (plist-get props (car slot)))) | |
689 | ||
690 | (if (eq coding-type 'iso-2022) | |
691 | (let ((designation (plist-get props :designation)) | |
692 | (flags (plist-get props :flags)) | |
693 | (initial (make-vector 4 nil)) | |
694 | (reg-usage (cons 4 4)) | |
695 | request elt) | |
696 | (dotimes (i 4) | |
697 | (setq elt (aref designation i)) | |
698 | (cond ((charsetp elt) | |
699 | (aset initial i elt) | |
700 | (setq request (cons (cons elt i) request))) | |
701 | ((consp elt) | |
702 | (aset initial i (car elt)) | |
703 | (if (charsetp (car elt)) | |
704 | (setq request (cons (cons (car elt) i) request))) | |
705 | (dolist (e (cdr elt)) | |
706 | (cond ((charsetp e) | |
707 | (setq request (cons (cons e i) request))) | |
708 | ((eq e 94) | |
709 | (setcar reg-usage i)) | |
710 | ((eq e 96) | |
711 | (setcdr reg-usage i)) | |
712 | ((eq e t) | |
713 | (setcar reg-usage i) | |
714 | (setcdr reg-usage i))))))) | |
715 | (setcdr (assq :initial spec-attrs) initial) | |
716 | (setcdr (assq :reg-usage spec-attrs) reg-usage) | |
717 | (setcdr (assq :request spec-attrs) request) | |
718 | ||
719 | ;; Change :flags value from a list to a bit-mask. | |
720 | (let ((bits 0) | |
721 | (i 0)) | |
722 | (dolist (elt coding-system-iso-2022-flags) | |
723 | (if (memq elt flags) | |
724 | (setq bits (logior bits (lsh 1 i)))) | |
725 | (setq i (1+ i))) | |
726 | (setcdr (assq :flags spec-attrs) bits)))) | |
727 | ||
728 | ;; Add :name and :docstring properties to PROPS. | |
729 | (setq props | |
e1e529fa DL |
730 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) |
731 | props))))) | |
c1841772 | 732 | (setcdr (assq :plist common-attrs) props) |
c1841772 KH |
733 | (apply 'define-coding-system-internal |
734 | name (mapcar 'cdr (append common-attrs spec-attrs))))) | |
4ed46869 | 735 | |
8057896b | 736 | (defun coding-system-doc-string (coding-system) |
0269ddfb | 737 | "Return the documentation string for CODING-SYSTEM." |
c1841772 | 738 | (plist-get (coding-system-plist coding-system) :docstring)) |
4ed46869 | 739 | |
4ed46869 | 740 | (defun coding-system-mnemonic (coding-system) |
0269ddfb | 741 | "Return the mnemonic character of CODING-SYSTEM. |
12504f57 DL |
742 | The mnemonic character of a coding system is used in mode line to |
743 | indicate the coding system. If CODING-SYSTEM. is nil, return ?=." | |
c1841772 | 744 | (plist-get (coding-system-plist coding-system) :mnemonic)) |
4ed46869 | 745 | |
c1841772 KH |
746 | (defun coding-system-type (coding-system) |
747 | "Return the coding type of CODING-SYSTEM. | |
748 | A coding type is a symbol indicating the encoding method of CODING-SYSTEM. | |
749 | See the function `define-coding-system' for more detail." | |
750 | (plist-get (coding-system-plist coding-system) :coding-type)) | |
d3675a42 | 751 | |
c1841772 | 752 | (defun coding-system-charset-list (coding-system) |
07513d64 | 753 | "Return list of charsets supported by CODING-SYSTEM. |
c1841772 KH |
754 | If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. |
755 | If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." | |
756 | (plist-get (coding-system-plist coding-system) :charset-list)) | |
0269ddfb | 757 | |
2f1e746b KH |
758 | (defun coding-system-category (coding-system) |
759 | "Return a category symbol of CODING-SYSTEM." | |
760 | (plist-get (coding-system-plist coding-system) :category)) | |
0269ddfb KH |
761 | |
762 | (defun coding-system-get (coding-system prop) | |
07513d64 DL |
763 | "Extract a value from CODING-SYSTEM's property list for property PROP. |
764 | For compatibility with Emacs 20/21, this accepts old-style symbols | |
765 | like `mime-charset' as well as the current style like `:mime-charset'." | |
766 | (or (plist-get (coding-system-plist coding-system) prop) | |
767 | (if (not (keywordp prop)) | |
768 | (plist-get (coding-system-plist coding-system) | |
769 | (intern (concat ":" (symbol-name prop))))))) | |
0269ddfb KH |
770 | |
771 | (defun coding-system-put (coding-system prop val) | |
772 | "Change value in CODING-SYSTEM's property list PROP to VAL." | |
c1841772 | 773 | (plist-put (coding-system-plist coding-system) prop val)) |
0269ddfb KH |
774 | |
775 | (defalias 'coding-system-parent 'coding-system-base) | |
2598a293 | 776 | (make-obsolete 'coding-system-parent 'coding-system-base "20.3") |
0269ddfb | 777 | |
2e729bfa JB |
778 | (defun coding-system-eol-type-mnemonic (coding-system) |
779 | "Return the string indicating end-of-line format of CODING-SYSTEM." | |
780 | (let* ((eol-type (coding-system-eol-type coding-system)) | |
f4f00827 | 781 | (val (cond ((eq eol-type 0) eol-mnemonic-unix) |
2e729bfa JB |
782 | ((eq eol-type 1) eol-mnemonic-dos) |
783 | ((eq eol-type 2) eol-mnemonic-mac) | |
f4f00827 | 784 | (t eol-mnemonic-undecided)))) |
2e729bfa JB |
785 | (if (stringp val) |
786 | val | |
787 | (char-to-string val)))) | |
788 | ||
857ea15c AS |
789 | (defun coding-system-lessp (x y) |
790 | (cond ((eq x 'no-conversion) t) | |
791 | ((eq y 'no-conversion) nil) | |
792 | ((eq x 'emacs-mule) t) | |
793 | ((eq y 'emacs-mule) nil) | |
794 | ((eq x 'undecided) t) | |
795 | ((eq y 'undecided) nil) | |
796 | (t (let ((c1 (coding-system-mnemonic x)) | |
797 | (c2 (coding-system-mnemonic y))) | |
798 | (or (< (downcase c1) (downcase c2)) | |
799 | (and (not (> (downcase c1) (downcase c2))) | |
800 | (< c1 c2))))))) | |
801 | ||
857ea15c | 802 | (defun add-to-coding-system-list (coding-system) |
521d4010 | 803 | "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." |
857ea15c AS |
804 | (if (or (null coding-system-list) |
805 | (coding-system-lessp coding-system (car coding-system-list))) | |
806 | (setq coding-system-list (cons coding-system coding-system-list)) | |
807 | (let ((len (length coding-system-list)) | |
808 | mid (tem coding-system-list)) | |
809 | (while (> len 1) | |
810 | (setq mid (nthcdr (/ len 2) tem)) | |
811 | (if (coding-system-lessp (car mid) coding-system) | |
812 | (setq tem mid | |
813 | len (- len (/ len 2))) | |
814 | (setq len (/ len 2)))) | |
815 | (setcdr tem (cons coding-system (cdr tem)))))) | |
816 | ||
80a7463d | 817 | (defun coding-system-list (&optional base-only) |
c11a8f77 | 818 | "Return a list of all existing non-subsidiary coding systems. |
12504f57 DL |
819 | If optional arg BASE-ONLY is non-nil, only base coding systems are |
820 | listed. The value doesn't include subsidiary coding systems which are | |
c11a8f77 KH |
821 | made from bases and aliases automatically for various end-of-line |
822 | formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
80a7463d KH |
823 | (let* ((codings (copy-sequence coding-system-list)) |
824 | (tail (cons nil codings))) | |
825 | ;; Remove subsidiary coding systems (eol variants) and alias | |
826 | ;; coding systems (if necessary). | |
827 | (while (cdr tail) | |
828 | (let* ((coding (car (cdr tail))) | |
687441de | 829 | (aliases (coding-system-aliases coding))) |
80a7463d KH |
830 | (if (or |
831 | ;; CODING is an eol variant if not in ALIASES. | |
832 | (not (memq coding aliases)) | |
833 | ;; CODING is an alias if it is not car of ALIASES. | |
834 | (and base-only (not (eq coding (car aliases))))) | |
835 | (setcdr tail (cdr (cdr tail))) | |
836 | (setq tail (cdr tail))))) | |
837 | codings)) | |
838 | ||
620956ca KH |
839 | (defconst char-coding-system-table nil |
840 | "This is an obsolete variable. | |
841 | It exists just for backward compatibility, and the value is always nil.") | |
c11a8f77 | 842 | |
50c29104 KH |
843 | (defun transform-make-coding-system-args (name type &optional doc-string props) |
844 | "For internal use only. | |
845 | Transform XEmacs style args for `make-coding-system' to Emacs style. | |
846 | Value is a list of transformed arguments." | |
847 | (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) | |
848 | (eol-type (plist-get props 'eol-type)) | |
849 | properties tmp) | |
850 | (cond | |
851 | ((eq eol-type 'lf) (setq eol-type 'unix)) | |
852 | ((eq eol-type 'crlf) (setq eol-type 'dos)) | |
853 | ((eq eol-type 'cr) (setq eol-type 'mac))) | |
854 | (if (setq tmp (plist-get props 'post-read-conversion)) | |
855 | (setq properties (plist-put properties 'post-read-conversion tmp))) | |
856 | (if (setq tmp (plist-get props 'pre-write-conversion)) | |
857 | (setq properties (plist-put properties 'pre-write-conversion tmp))) | |
858 | (cond | |
f4a012a6 KH |
859 | ((eq type 'shift-jis) |
860 | `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
861 | ((eq type 'iso2022) ; This is not perfect. | |
862 | (if (plist-get props 'escape-quoted) | |
863 | (error "escape-quoted is not supported: %S" | |
864 | `(,name ,type ,doc-string ,props))) | |
865 | (let ((g0 (plist-get props 'charset-g0)) | |
866 | (g1 (plist-get props 'charset-g1)) | |
867 | (g2 (plist-get props 'charset-g2)) | |
868 | (g3 (plist-get props 'charset-g3)) | |
869 | (use-roman | |
870 | (and | |
871 | (eq (cadr (assoc 'latin-jisx0201 | |
872 | (plist-get props 'input-charset-conversion))) | |
873 | 'ascii) | |
874 | (eq (cadr (assoc 'ascii | |
875 | (plist-get props 'output-charset-conversion))) | |
876 | 'latin-jisx0201))) | |
877 | (use-oldjis | |
878 | (and | |
879 | (eq (cadr (assoc 'japanese-jisx0208-1978 | |
880 | (plist-get props 'input-charset-conversion))) | |
881 | 'japanese-jisx0208) | |
882 | (eq (cadr (assoc 'japanese-jisx0208 | |
883 | (plist-get props 'output-charset-conversion))) | |
884 | 'japanese-jisx0208-1978)))) | |
885 | (if (charsetp g0) | |
886 | (if (plist-get props 'force-g0-on-output) | |
887 | (setq g0 `(nil ,g0)) | |
888 | (setq g0 `(,g0 t)))) | |
889 | (if (charsetp g1) | |
890 | (if (plist-get props 'force-g1-on-output) | |
891 | (setq g1 `(nil ,g1)) | |
892 | (setq g1 `(,g1 t)))) | |
893 | (if (charsetp g2) | |
894 | (if (plist-get props 'force-g2-on-output) | |
895 | (setq g2 `(nil ,g2)) | |
896 | (setq g2 `(,g2 t)))) | |
897 | (if (charsetp g3) | |
898 | (if (plist-get props 'force-g3-on-output) | |
899 | (setq g3 `(nil ,g3)) | |
900 | (setq g3 `(,g3 t)))) | |
901 | `(,name 2 ,mnemonic ,doc-string | |
902 | (,g0 ,g1 ,g2 ,g3 | |
903 | ,(plist-get props 'short) | |
904 | ,(not (plist-get props 'no-ascii-eol)) | |
905 | ,(not (plist-get props 'no-ascii-cntl)) | |
906 | ,(plist-get props 'seven) | |
907 | t | |
908 | ,(not (plist-get props 'lock-shift)) | |
909 | ,use-roman | |
910 | ,use-oldjis | |
911 | ,(plist-get props 'no-iso6429) | |
912 | nil nil nil nil) | |
913 | ,properties ,eol-type))) | |
914 | ((eq type 'big5) | |
915 | `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
50c29104 | 916 | ((eq type 'ccl) |
f4a012a6 | 917 | `(,name 4 ,mnemonic ,doc-string |
50c29104 | 918 | (,(plist-get props 'decode) . ,(plist-get props 'encode)) |
f4a012a6 | 919 | ,properties ,eol-type)) |
50c29104 | 920 | (t |
f4a012a6 | 921 | (error "unsupported XEmacs style make-coding-style arguments: %S" |
50c29104 KH |
922 | `(,name ,type ,doc-string ,props)))))) |
923 | ||
8057896b | 924 | (defun make-coding-system (coding-system type mnemonic doc-string |
1b46a680 KH |
925 | &optional |
926 | flags | |
927 | properties | |
928 | eol-type) | |
3bb1accb | 929 | "Define a new coding system CODING-SYSTEM (symbol). |
8f924df7 KH |
930 | This function is provided for backward compatibility. |
931 | Use `define-coding-system' instead." | |
50c29104 | 932 | ;; For compatiblity with XEmacs, we check the type of TYPE. If it |
c3d0ee51 EZ |
933 | ;; is a symbol, perhaps, this function is called with XEmacs-style |
934 | ;; arguments. Here, try to transform that kind of arguments to | |
50c29104 KH |
935 | ;; Emacs style. |
936 | (if (symbolp type) | |
937 | (let ((args (transform-make-coding-system-args coding-system type | |
938 | mnemonic doc-string))) | |
939 | (setq coding-system (car args) | |
1053cc93 | 940 | type (nth 1 args) |
50c29104 KH |
941 | mnemonic (nth 2 args) |
942 | doc-string (nth 3 args) | |
943 | flags (nth 4 args) | |
944 | properties (nth 5 args) | |
945 | eol-type (nth 6 args)))) | |
946 | ||
8f924df7 KH |
947 | (setq type |
948 | (cond ((eq type 0) 'emacs-mule) | |
949 | ((eq type 1) 'shift-jis) | |
950 | ((eq type 2) 'iso2022) | |
951 | ((eq type 3) 'big5) | |
952 | ((eq type 4) 'ccl) | |
953 | ((eq type 5) 'raw-text) | |
1b46a680 | 954 | (t |
8f924df7 KH |
955 | (error "Invalid coding system type: %s" type)))) |
956 | ||
957 | (setq properties | |
958 | (let ((plist nil) key) | |
959 | (dolist (elt properties) | |
960 | (setq key (car elt)) | |
961 | (cond ((eq key 'post-read-conversion) | |
962 | (setq key :post-read-conversion)) | |
963 | ((eq key 'pre-write-conversion) | |
964 | (setq key :pre-write-conversion)) | |
965 | ((eq key 'translation-table-for-decode) | |
966 | (setq key :decode-translation-table)) | |
967 | ((eq key 'translation-table-for-encode) | |
968 | (setq key :encode-translation-table)) | |
969 | ((eq key 'safe-charsets) | |
970 | (setq key :charset-list)) | |
971 | ((eq key 'mime-charset) | |
972 | (setq key :mime-charset)) | |
973 | ((eq key 'valid-codes) | |
974 | (setq key :valids))) | |
975 | (setq plist (plist-put plist key (cdr elt)))) | |
976 | plist)) | |
977 | (plist-put properties :mnemonic mnemonic) | |
978 | (plist-put properties :coding-type type) | |
979 | (cond ((eq eol-type 0) (setq eol-type 'unix)) | |
980 | ((eq eol-type 1) (setq eol-type 'dos)) | |
981 | ((eq eol-type 2) (setq eol-type 'mac)) | |
982 | ((vectorp eol-type) (setq eol-type nil))) | |
983 | (plist-put properties :eol-type eol-type) | |
984 | ||
985 | (cond | |
986 | ((eq type 'iso2022) | |
987 | (plist-put properties :flags | |
988 | (list (and (or (consp (nth 0 flags)) | |
989 | (consp (nth 1 flags)) | |
990 | (consp (nth 2 flags)) | |
991 | (consp (nth 3 flags))) 'designation) | |
992 | (or (nth 4 flags) 'long-form) | |
993 | (and (nth 5 flags) 'ascii-at-eol) | |
994 | (and (nth 6 flags) 'ascii-at-cntl) | |
995 | (and (nth 7 flags) '7-bit) | |
996 | (and (nth 8 flags) 'locking-shift) | |
997 | (and (nth 9 flags) 'single-shift) | |
998 | (and (nth 10 flags) 'use-roman) | |
999 | (and (nth 11 flags) 'use-oldjis) | |
1000 | (or (nth 12 flags) 'direction) | |
1001 | (and (nth 13 flags) 'init-at-bol) | |
1002 | (and (nth 14 flags) 'designate-at-bol) | |
1003 | (and (nth 15 flags) 'safe) | |
1004 | (and (nth 16 flags) 'latin-extra))) | |
1005 | (plist-put properties :designation | |
1006 | (let ((vec (make-vector 4 nil))) | |
1007 | (dotimes (i 4) | |
1008 | (let ((spec (nth i flags))) | |
1009 | (if (eq spec t) | |
1010 | (aset vec i '(94 96)) | |
1011 | (if (consp spec) | |
1012 | (progn | |
1013 | (if (memq t spec) | |
1014 | (setq spec (append (delq t spec) '(94 96)))) | |
1015 | (aset vec i spec)))))) | |
1016 | vec))) | |
1017 | ||
1018 | ((eq type 'ccl) | |
1019 | (plist-put properties :ccl-decoder (car flags)) | |
1020 | (plist-put properties :ccl-encoder (cdr flags)))) | |
1021 | ||
1022 | (apply 'define-coding-system coding-system doc-string properties)) | |
4ed46869 | 1023 | |
bbdea948 RS |
1024 | (defun merge-coding-systems (first second) |
1025 | "Fill in any unspecified aspects of coding system FIRST from SECOND. | |
1026 | Return the resulting coding system." | |
1027 | (let ((base (coding-system-base second)) | |
1028 | (eol (coding-system-eol-type second))) | |
1029 | ;; If FIRST doesn't specify text conversion, merge with that of SECOND. | |
1030 | (if (eq (coding-system-base first) 'undecided) | |
1031 | (setq first (coding-system-change-text-conversion first base))) | |
1032 | ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. | |
1033 | (if (and (vectorp (coding-system-eol-type first)) | |
1034 | (numberp eol) (>= eol 0) (<= eol 2)) | |
1035 | (setq first (coding-system-change-eol-conversion | |
1036 | first eol))) | |
1037 | first)) | |
1038 | ||
4ed46869 | 1039 | (defun set-buffer-file-coding-system (coding-system &optional force) |
358d28fb RS |
1040 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
1041 | This means that when you save the buffer, it will be converted | |
1042 | according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM, | |
1043 | use \\[list-coding-systems]. | |
1044 | ||
bbdea948 RS |
1045 | If CODING-SYSTEM leaves the text conversion unspecified, or if it |
1046 | leaves the end-of-line conversion unspecified, FORCE controls what to | |
1047 | do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1048 | buffer's previous `buffer-file-coding-system' value (if it is | |
463f5630 | 1049 | specified there). Otherwise, levae it unspecified. |
aeef8f07 KH |
1050 | |
1051 | This marks the buffer modified so that the succeeding \\[save-buffer] | |
1052 | surely saves the buffer with CODING-SYSTEM. From a program, if you | |
1053 | don't want to mark the buffer modified, just set the variable | |
1054 | `buffer-file-coding-system' directly." | |
bbdea948 | 1055 | (interactive "zCoding system for saving file (default, nil): \nP") |
4ed46869 | 1056 | (check-coding-system coding-system) |
36d455c4 | 1057 | (if (and coding-system buffer-file-coding-system (null force)) |
bbdea948 RS |
1058 | (setq coding-system |
1059 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
4ed46869 | 1060 | (setq buffer-file-coding-system coding-system) |
38a1356d RS |
1061 | ;; This is in case of an explicit call. Normally, `normal-mode' and |
1062 | ;; `set-buffer-major-mode-hook' take care of setting the table. | |
1063 | (if (fboundp 'ucs-set-table-for-input) ; don't lose when building | |
1064 | (ucs-set-table-for-input)) | |
4ed46869 KH |
1065 | (set-buffer-modified-p t) |
1066 | (force-mode-line-update)) | |
1067 | ||
bbdea948 RS |
1068 | (defun revert-buffer-with-coding-system (coding-system &optional force) |
1069 | "Visit the current buffer's file again using coding system CODING-SYSTEM. | |
1070 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1071 | ||
1072 | If CODING-SYSTEM leaves the text conversion unspecified, or if it | |
1073 | leaves the end-of-line conversion unspecified, FORCE controls what to | |
1074 | do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1075 | buffer's previous `buffer-file-coding-system' value (if it is | |
1076 | specified there). Otherwise, determine it from the file contents as | |
1077 | usual for visiting a file." | |
1078 | (interactive "zCoding system for visited file (default, nil): \nP") | |
1079 | (check-coding-system coding-system) | |
1080 | (if (and coding-system buffer-file-coding-system (null force)) | |
1081 | (setq coding-system | |
1082 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
1083 | (let ((coding-system-for-read coding-system)) | |
1084 | (revert-buffer))) | |
1085 | ||
701414e3 KH |
1086 | (defun set-file-name-coding-system (coding-system) |
1087 | "Set coding system for decoding and encoding file names to CODING-SYSTEM. | |
1088 | It actually just set the variable `file-name-coding-system' (which | |
1089 | see) to CODING-SYSTEM." | |
1090 | (interactive "zCoding system for file names (default, nil): ") | |
1091 | (check-coding-system coding-system) | |
1092 | (setq file-name-coding-system coding-system)) | |
1093 | ||
358d28fb RS |
1094 | (defvar default-terminal-coding-system nil |
1095 | "Default value for the terminal coding system. | |
1096 | This is normally set according to the selected language environment. | |
1097 | See also the command `set-terminal-coding-system'.") | |
1098 | ||
df100398 KH |
1099 | (defun set-terminal-coding-system (coding-system) |
1100 | "Set coding system of your terminal to CODING-SYSTEM. | |
358d28fb RS |
1101 | All text output to the terminal will be encoded |
1102 | with the specified coding system. | |
1103 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1104 | The default is determined by the selected language environment | |
1105 | or by the previous use of this command." | |
1106 | (interactive | |
2e02a76f RS |
1107 | (list (let ((default (if (and (not (terminal-coding-system)) |
1108 | default-terminal-coding-system) | |
1109 | default-terminal-coding-system))) | |
1110 | (read-coding-system | |
1111 | (format "Coding system for terminal display (default, %s): " | |
1112 | default) | |
1113 | default)))) | |
358d28fb RS |
1114 | (if (and (not coding-system) |
1115 | (not (terminal-coding-system))) | |
1116 | (setq coding-system default-terminal-coding-system)) | |
1117 | (if coding-system | |
521d4010 | 1118 | (setq default-terminal-coding-system coding-system)) |
df100398 KH |
1119 | (set-terminal-coding-system-internal coding-system) |
1120 | (redraw-frame (selected-frame))) | |
1121 | ||
358d28fb RS |
1122 | (defvar default-keyboard-coding-system nil |
1123 | "Default value of the keyboard coding system. | |
1124 | This is normally set according to the selected language environment. | |
1125 | See also the command `set-keyboard-coding-system'.") | |
1126 | ||
df100398 | 1127 | (defun set-keyboard-coding-system (coding-system) |
358d28fb RS |
1128 | "Set coding system for keyboard input to CODING-SYSTEM. |
1129 | In addition, this command enables Encoded-kbd minor mode. | |
6d34f495 DL |
1130 | \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see |
1131 | `encoded-kbd-mode'.) | |
358d28fb RS |
1132 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
1133 | The default is determined by the selected language environment | |
1134 | or by the previous use of this command." | |
1135 | (interactive | |
2e02a76f RS |
1136 | (list (let ((default (if (and (not (keyboard-coding-system)) |
1137 | default-keyboard-coding-system) | |
1138 | default-keyboard-coding-system))) | |
1139 | (read-coding-system | |
1140 | (format "Coding system for keyboard input (default, %s): " | |
1141 | default) | |
1142 | default)))) | |
358d28fb RS |
1143 | (if (and (not coding-system) |
1144 | (not (keyboard-coding-system))) | |
1145 | (setq coding-system default-keyboard-coding-system)) | |
1146 | (if coding-system | |
1147 | (setq default-keyboard-coding-system coding-system)) | |
df100398 | 1148 | (set-keyboard-coding-system-internal coding-system) |
b23bad0b | 1149 | (setq keyboard-coding-system coding-system) |
df100398 KH |
1150 | (encoded-kbd-mode (if coding-system 1 0))) |
1151 | ||
6d34f495 DL |
1152 | (defcustom keyboard-coding-system nil |
1153 | "Specify coding system for keyboard input. | |
1154 | If you set this on a terminal which can't distinguish Meta keys from | |
1155 | 8-bit characters, you will have to use ESC to type Meta characters. | |
1156 | See Info node `Specify Coding' and Info node `Single-Byte Character Support'. | |
1157 | ||
237e5993 DL |
1158 | On non-windowing terminals, this is set from the locale by default. |
1159 | ||
6d34f495 DL |
1160 | Setting this variable directly does not take effect; |
1161 | use either M-x customize or \\[set-keyboard-coding-system]." | |
1162 | :type '(coding-system :tag "Coding system") | |
1163 | :link '(info-link "(emacs)Specify Coding") | |
1164 | :link '(info-link "(emacs)Single-Byte Character Support") | |
1165 | :set (lambda (symbol value) | |
1166 | ;; Don't load encoded-kbd-mode unnecessarily. | |
1167 | (if (or value (boundp 'encoded-kbd-mode)) | |
1168 | (set-keyboard-coding-system value) | |
1169 | (set-default 'keyboard-coding-system nil))) ; must initialize | |
237e5993 | 1170 | :version "21.4" |
6d34f495 DL |
1171 | :group 'keyboard |
1172 | :group 'mule) | |
1173 | ||
df100398 | 1174 | (defun set-buffer-process-coding-system (decoding encoding) |
358d28fb | 1175 | "Set coding systems for the process associated with the current buffer. |
df100398 | 1176 | DECODING is the coding system to be used to decode input from the process, |
358d28fb RS |
1177 | ENCODING is the coding system to be used to encode output to the process. |
1178 | ||
1179 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]." | |
4ed46869 | 1180 | (interactive |
83911021 | 1181 | "zCoding-system for output from the process: \nzCoding-system for input to the process: ") |
4ed46869 KH |
1182 | (let ((proc (get-buffer-process (current-buffer)))) |
1183 | (if (null proc) | |
521d4010 | 1184 | (error "No process") |
df100398 KH |
1185 | (check-coding-system decoding) |
1186 | (check-coding-system encoding) | |
1187 | (set-process-coding-system proc decoding encoding))) | |
4ed46869 KH |
1188 | (force-mode-line-update)) |
1189 | ||
d0b99881 RS |
1190 | (defalias 'set-clipboard-coding-system 'set-selection-coding-system) |
1191 | ||
14915c37 | 1192 | (defun set-selection-coding-system (coding-system) |
8c52d564 | 1193 | "Make CODING-SYSTEM used for communicating with other X clients. |
b25eef20 KH |
1194 | When sending or receiving text via cut_buffer, selection, and clipboard, |
1195 | the text is encoded or decoded by CODING-SYSTEM." | |
a03b3ce1 | 1196 | (interactive "zCoding system for X selection: ") |
b25eef20 | 1197 | (check-coding-system coding-system) |
14915c37 | 1198 | (setq selection-coding-system coding-system)) |
b25eef20 | 1199 | |
e8dd0160 | 1200 | ;; Coding system lastly specified by the command |
a03b3ce1 KH |
1201 | ;; set-next-selection-coding-system. |
1202 | (defvar last-next-selection-coding-system nil) | |
1203 | ||
1204 | (defun set-next-selection-coding-system (coding-system) | |
12504f57 | 1205 | "Use CODING-SYSTEM for next communication with other window system clients. |
a03b3ce1 KH |
1206 | This setting is effective for the next communication only." |
1207 | (interactive | |
1208 | (list (read-coding-system | |
1209 | (if last-next-selection-coding-system | |
12504f57 | 1210 | (format "Coding system for the next selection (default, %S): " |
a03b3ce1 | 1211 | last-next-selection-coding-system) |
12504f57 | 1212 | "Coding system for the next selection: ") |
a03b3ce1 KH |
1213 | last-next-selection-coding-system))) |
1214 | (if coding-system | |
1215 | (setq last-next-selection-coding-system coding-system) | |
1216 | (setq coding-system last-next-selection-coding-system)) | |
1217 | (check-coding-system coding-system) | |
1218 | ||
1219 | (setq next-selection-coding-system coding-system)) | |
1220 | ||
4ed46869 | 1221 | (defun set-coding-priority (arg) |
521d4010 | 1222 | "Set priority of coding categories according to ARG. |
c1841772 KH |
1223 | ARG is a list of coding categories ordered by priority. |
1224 | ||
1225 | This function is provided for backward compatibility. | |
1226 | Now we have more convenient function `set-coding-system-priority'." | |
5d75f46f KH |
1227 | (apply 'set-coding-system-priority |
1228 | (mapcar #'(lambda (x) (symbol-value x)) arg))) | |
07513d64 | 1229 | (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1") |
4ed46869 | 1230 | |
835cbadb EZ |
1231 | ;;; X selections |
1232 | ||
cc926903 | 1233 | (defvar ctext-non-standard-encodings-alist |
835cbadb EZ |
1234 | '(("ISO8859-15" . latin-iso8859-15) |
1235 | ("ISO8859-14" . latin-iso8859-14) | |
1236 | ("KOI8-R" . koi8-r) | |
1237 | ("BIG5-0" . big5)) | |
cc926903 KH |
1238 | "Alist of non-standard encoding names vs Emacs coding systems. |
1239 | This alist is used to decode an extened segment of a compound text.") | |
1240 | ||
1241 | (defvar ctext-non-standard-encodings-regexp | |
1242 | (string-to-multibyte | |
1243 | (concat | |
1244 | ;; For non-standard encodings. | |
1245 | "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)" | |
1246 | "\\|" | |
1247 | ;; For UTF-8 encoding. | |
1248 | "\\(\e%G[^\e]*\e%@\\)"))) | |
835cbadb EZ |
1249 | |
1250 | ;; Functions to support "Non-Standard Character Set Encodings" defined | |
5c88a01e | 1251 | ;; by the COMPOUND-TEXT spec. |
cc926903 KH |
1252 | ;; We support that by decoding the whole data by `ctext' which just |
1253 | ;; pertains byte sequences belonging to ``extended segment'', then | |
1254 | ;; decoding those byte sequences one by one in Lisp. | |
5c88a01e KH |
1255 | ;; This function also supports "The UTF-8 encoding" described in the |
1256 | ;; section 7 of the documentation fo COMPOUND-TEXT distributed with | |
1257 | ;; XFree86. | |
1258 | ||
835cbadb EZ |
1259 | (defun ctext-post-read-conversion (len) |
1260 | "Decode LEN characters encoded as Compound Text with Extended Segments." | |
1894d108 KH |
1261 | ;; We don't need the following because it is expected that this |
1262 | ;; function is mainly used for decoding X selection which is not | |
1263 | ;; that big data. | |
1264 | ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions | |
835cbadb | 1265 | (save-match-data |
cc926903 | 1266 | (save-restriction |
1894d108 | 1267 | (narrow-to-region (point) (+ (point) len)) |
cc926903 | 1268 | (let ((case-fold-search nil) |
cc926903 KH |
1269 | last-coding-system-used |
1270 | pos bytes) | |
cc926903 | 1271 | (decode-coding-region (point-min) (point-max) 'ctext) |
cc926903 KH |
1272 | (while (re-search-forward ctext-non-standard-encodings-regexp |
1273 | nil 'move) | |
1274 | (setq pos (match-beginning 0)) | |
1275 | (if (match-beginning 1) | |
1276 | ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- | |
1277 | (let* ((M (char-after (+ pos 4))) | |
1278 | (L (char-after (+ pos 5))) | |
1279 | (encoding (match-string 2)) | |
1280 | (coding (or (cdr (assoc-ignore-case | |
1281 | encoding | |
1282 | ctext-non-standard-encodings-alist)) | |
1283 | (coding-system-p | |
1284 | (intern (downcase encoding)))))) | |
1894d108 KH |
1285 | (if enable-multibyte-characters |
1286 | (setq M (multibyte-char-to-unibyte M) | |
1287 | L (multibyte-char-to-unibyte L))) | |
cc926903 KH |
1288 | (setq bytes (- (+ (* (- M 128) 128) (- L 128)) |
1289 | (- (point) (+ pos 6)))) | |
1290 | (when coding | |
1291 | (delete-region pos (point)) | |
1292 | (forward-char bytes) | |
1293 | (decode-coding-region (- (point) bytes) (point) coding))) | |
1294 | ;; ESC % G --UTF-8-BYTES-- ESC % @ | |
1295 | (setq bytes (- (point) pos)) | |
1296 | (decode-coding-region (- (point) bytes) (point) 'utf-8)))) | |
1297 | (goto-char (point-min)) | |
1298 | (- (point-max) (point))))) | |
835cbadb | 1299 | |
c74fe809 EZ |
1300 | ;; If you add charsets here, be sure to modify the regexp used by |
1301 | ;; ctext-pre-write-conversion to look up non-standard charsets. | |
cc926903 | 1302 | (defvar ctext-non-standard-designations-alist |
835cbadb EZ |
1303 | '(("$(0" . (big5 "big5-0" 2)) |
1304 | ("$(1" . (big5 "big5-0" 2)) | |
eb747e41 DL |
1305 | ;; The following are actually standard; generating extended |
1306 | ;; segments for them is wrong and screws e.g. Latin-9 users. | |
1307 | ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx | |
1308 | ;; ("-V" . (t "iso8859-10" 1)) | |
1309 | ;; ("-Y" . (t "iso8859-13" 1)) | |
1310 | ;; ("-_" . (t "iso8859-14" 1)) | |
1311 | ;; ("-b" . (t "iso8859-15" 1)) | |
1312 | ;; ("-f" . (t "iso8859-16" 1)) | |
1313 | ) | |
835cbadb | 1314 | "Alist of ctext control sequences that introduce character sets which |
eb747e41 | 1315 | are not in the list of approved encodings, and the corresponding |
835cbadb EZ |
1316 | coding system, identifier string, and number of octets per encoded |
1317 | character. | |
1318 | ||
1319 | Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ | |
1320 | is the control sequence (sans the leading ESC) that introduces the character | |
1321 | set in the text encoded by compound-text. ENCODING is a coding system | |
1322 | symbol; if it is t, it means that the ctext coding system already encodes | |
1323 | the text correctly, and only the leading control sequence needs to be altered. | |
1324 | If ENCODING is a coding system, we need to re-encode the text with that | |
eb747e41 | 1325 | coding system. CHARSET is the name of the charset we need to put into |
835cbadb EZ |
1326 | the leading control sequence. NOCTETS is the number of octets (bytes) that |
1327 | encode each character in this charset. NOCTETS can be 0 (meaning the number | |
1328 | of octets per character is variable), 1, 2, 3, or 4.") | |
1329 | ||
1330 | (defun ctext-pre-write-conversion (from to) | |
5dde3c71 EZ |
1331 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
1332 | ||
1333 | If FROM is a string, or if the current buffer is not the one set up for us | |
cc926903 | 1334 | by encode-coding-string, generate a new temp buffer, insert the |
5dde3c71 | 1335 | text, and convert it in the temporary buffer. Otherwise, convert in-place." |
835cbadb | 1336 | (save-match-data |
cc926903 | 1337 | ;; Setup a working buffer if necessary. |
f1beb0e0 KH |
1338 | (when (stringp from) |
1339 | (set-buffer (generate-new-buffer " *temp")) | |
1340 | (set-buffer-multibyte (multibyte-string-p from)) | |
1341 | (insert from)) | |
cc926903 KH |
1342 | |
1343 | ;; Now we can encode the whole buffer. | |
1344 | (let ((case-fold-search nil) | |
1345 | last-coding-system-used | |
1346 | pos posend desig encode-info encoding chset noctets textlen) | |
1347 | (goto-char (point-min)) | |
1348 | ;; At first encode the whole buffer. | |
1349 | (encode-coding-region (point-min) (point-max) 'ctext-no-compositions) | |
1350 | ;; Then replace ISO-2022 charset designations with extended | |
1351 | ;; segments, for those charsets that are not part of the | |
1352 | ;; official X registry. The regexp below finds the leading | |
1353 | ;; sequences for big5. | |
eb747e41 | 1354 | (while (re-search-forward "\e\\(\$([01]\\)" nil 'move) |
cc926903 KH |
1355 | (setq pos (match-beginning 0) |
1356 | posend (point) | |
1357 | desig (match-string 1) | |
1358 | encode-info (cdr (assoc desig | |
1359 | ctext-non-standard-designations-alist)) | |
835cbadb EZ |
1360 | encoding (car encode-info) |
1361 | chset (cadr encode-info) | |
1362 | noctets (car (cddr encode-info))) | |
1363 | (skip-chars-forward "^\e") | |
835cbadb | 1364 | (cond |
f1beb0e0 | 1365 | ((eq encoding t) ; only the leading sequence needs to be changed |
cc926903 KH |
1366 | (setq textlen (+ (- (point) posend) (length chset) 1)) |
1367 | ;; Generate the control sequence for an extended segment. | |
f1beb0e0 KH |
1368 | (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s\ 2" |
1369 | noctets | |
1370 | (+ (/ textlen 128) 128) | |
1371 | (+ (% textlen 128) 128) | |
1372 | chset)) | |
835cbadb EZ |
1373 | t t)) |
1374 | ((coding-system-p encoding) ; need to recode the entire segment... | |
cc926903 KH |
1375 | (decode-coding-region pos (point) 'ctext-no-compositions) |
1376 | (encode-coding-region pos (point) encoding) | |
cc926903 KH |
1377 | (setq textlen (+ (- (point) pos) (length chset) 1)) |
1378 | (save-excursion | |
1379 | (goto-char pos) | |
f1beb0e0 KH |
1380 | (insert (string-to-multibyte (format "\e%%/%d%c%c%s\ 2" |
1381 | noctets | |
1382 | (+ (/ textlen 128) 128) | |
1383 | (+ (% textlen 128) 128) | |
1384 | chset))))))) | |
cc926903 | 1385 | (goto-char (point-min)))) |
5dde3c71 | 1386 | ;; Must return nil, as build_annotations_2 expects that. |
835cbadb EZ |
1387 | nil) |
1388 | ||
4ed46869 KH |
1389 | ;;; FILE I/O |
1390 | ||
e76938e7 | 1391 | (defcustom auto-coding-alist |
8f924df7 KH |
1392 | '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte) |
1393 | ("\\.tgz\\'" . no-conversion) | |
6caef2da | 1394 | ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion) |
45885400 | 1395 | ("/#[^/]+#\\'" . emacs-mule)) |
835f49b8 KH |
1396 | "Alist of filename patterns vs corresponding coding systems. |
1397 | Each element looks like (REGEXP . CODING-SYSTEM). | |
558b0c86 | 1398 | A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. |
835f49b8 | 1399 | |
7fed493a RS |
1400 | The settings in this alist take priority over `coding:' tags |
1401 | in the file (see the function `set-auto-coding') | |
e76938e7 DL |
1402 | and the contents of `file-coding-system-alist'." |
1403 | :group 'files | |
1404 | :group 'mule | |
1405 | :type '(repeat (cons (regexp :tag "File name regexp") | |
1406 | (symbol :tag "Coding system")))) | |
835f49b8 | 1407 | |
502522b2 | 1408 | (defcustom auto-coding-regexp-alist |
4e4a5bca DL |
1409 | '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion) |
1410 | ("\\`;ELC\14\0\0\0" . emacs-mule)) ; Emacs 20-compiled | |
502522b2 GM |
1411 | "Alist of patterns vs corresponding coding systems. |
1412 | Each element looks like (REGEXP . CODING-SYSTEM). | |
1413 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1414 | ||
1415 | The settings in this alist take priority over `coding:' tags | |
1416 | in the file (see the function `set-auto-coding') | |
1417 | and the contents of `file-coding-system-alist'." | |
1418 | :group 'files | |
1419 | :group 'mule | |
1420 | :type '(repeat (cons (regexp :tag "Regexp") | |
1421 | (symbol :tag "Coding system")))) | |
1422 | ||
d9f6dfe6 | 1423 | ;; See the bottom of this file for built-in auto coding functions. |
447404a3 CW |
1424 | (defcustom auto-coding-functions '(sgml-xml-auto-coding-function |
1425 | sgml-html-meta-auto-coding-function) | |
d9f6dfe6 CW |
1426 | "A list of functions which attempt to determine a coding system. |
1427 | ||
66643502 RS |
1428 | Each function in this list should be written to operate on the |
1429 | current buffer, but should not modify it in any way. The buffer | |
1430 | will contain undecoded text of parts of the file. Each function | |
1431 | should take one argument, SIZE, which says how many | |
1432 | characters (starting from point) it should look at. | |
1433 | ||
1434 | If one of these functions succeeds in determining a coding | |
1435 | system, it should return that coding system. Otherwise, it | |
1436 | should return nil. | |
1437 | ||
1438 | If a file has a `coding:' tag, that takes precedence over these | |
1439 | functions, so they won't be called at all." | |
d9f6dfe6 CW |
1440 | :group 'files |
1441 | :group 'mule | |
1442 | :type '(repeat function)) | |
1443 | ||
1c4cc63a KH |
1444 | (defvar set-auto-coding-for-load nil |
1445 | "Non-nil means look for `load-coding' property instead of `coding'. | |
1446 | This is used for loading and byte-compiling Emacs Lisp files.") | |
1447 | ||
8a592131 RS |
1448 | (defun auto-coding-alist-lookup (filename) |
1449 | "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1450 | (let ((alist auto-coding-alist) | |
c60ee5e7 | 1451 | (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin))) |
8a592131 RS |
1452 | coding-system) |
1453 | (while (and alist (not coding-system)) | |
1454 | (if (string-match (car (car alist)) filename) | |
1455 | (setq coding-system (cdr (car alist))) | |
1456 | (setq alist (cdr alist)))) | |
1457 | coding-system)) | |
1458 | ||
835f49b8 KH |
1459 | (defun set-auto-coding (filename size) |
1460 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1c4cc63a KH |
1461 | These bytes should include at least the first 1k of the file |
1462 | and the last 3k of the file, but the middle may be omitted. | |
63561304 | 1463 | |
d21ba5e0 DL |
1464 | The function checks FILENAME against the variable `auto-coding-alist'. |
1465 | If FILENAME doesn't match any entries in the variable, it checks the | |
502522b2 | 1466 | contents of the current buffer following point against |
447404a3 | 1467 | `auto-coding-regexp-alist'. If no match is found, it checks for a |
502522b2 | 1468 | `coding:' tag in the first one or two lines following point. If no |
d21ba5e0 | 1469 | `coding:' tag is found, it checks any local variables list in the last |
447404a3 | 1470 | 3K bytes out of the SIZE bytes. Finally, if none of these methods |
d21ba5e0 DL |
1471 | succeed, it checks to see if any function in `auto-coding-functions' |
1472 | gives a match. | |
63561304 | 1473 | |
d21ba5e0 DL |
1474 | The return value is the specified coding system, or nil if nothing is |
1475 | specified. | |
87aba788 | 1476 | |
ba74e833 | 1477 | The variable `set-auto-coding-function' (which see) is set to this |
87aba788 | 1478 | function by default." |
502522b2 | 1479 | (or (auto-coding-alist-lookup filename) |
447404a3 CW |
1480 | ;; Try using `auto-coding-regexp-alist'. |
1481 | (save-excursion | |
1482 | (let ((alist auto-coding-regexp-alist) | |
1483 | coding-system) | |
1484 | (while (and alist (not coding-system)) | |
1485 | (let ((regexp (car (car alist)))) | |
1486 | (when (re-search-forward regexp (+ (point) size) t) | |
1487 | (setq coding-system (cdr (car alist))))) | |
0bca779a | 1488 | (setq alist (cdr alist))) |
447404a3 | 1489 | coding-system)) |
502522b2 GM |
1490 | (let* ((case-fold-search t) |
1491 | (head-start (point)) | |
1492 | (head-end (+ head-start (min size 1024))) | |
1493 | (tail-start (+ head-start (max (- size 3072) 0))) | |
1494 | (tail-end (+ head-start size)) | |
1495 | coding-system head-found tail-found pos) | |
1496 | ;; Try a short cut by searching for the string "coding:" | |
1497 | ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1498 | (setq head-found (or (search-forward "coding:" head-end t) | |
1499 | (search-forward "unibyte:" head-end t))) | |
1500 | (if (and head-found (> head-found tail-start)) | |
1501 | ;; Head and tail are overlapped. | |
1502 | (setq tail-found head-found) | |
1503 | (goto-char tail-start) | |
1504 | (setq tail-found (or (search-forward "coding:" tail-end t) | |
1505 | (search-forward "unibyte:" tail-end t)))) | |
1506 | ||
1507 | ;; At first check the head. | |
1508 | (when head-found | |
1509 | (goto-char head-start) | |
6b66d028 RS |
1510 | (setq head-end (set-auto-mode-1)) |
1511 | (setq head-start (point)) | |
1d8e9a7c | 1512 | (when (and head-end (< head-found head-end)) |
835f49b8 | 1513 | (goto-char head-start) |
502522b2 GM |
1514 | (when (and set-auto-coding-for-load |
1515 | (re-search-forward | |
6b66d028 | 1516 | "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
502522b2 GM |
1517 | head-end t)) |
1518 | (setq coding-system 'raw-text)) | |
1519 | (when (and (not coding-system) | |
1520 | (re-search-forward | |
6b66d028 | 1521 | "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
502522b2 GM |
1522 | head-end t)) |
1523 | (setq coding-system (intern (match-string 2))) | |
1524 | (or (coding-system-p coding-system) | |
1525 | (setq coding-system nil))))) | |
1526 | ||
1527 | ;; If no coding: tag in the head, check the tail. | |
1528 | (when (and tail-found (not coding-system)) | |
1529 | (goto-char tail-start) | |
1530 | (search-forward "\n\^L" nil t) | |
1531 | (if (re-search-forward | |
1532 | "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | |
1533 | ;; The prefix is what comes before "local variables:" in its | |
1534 | ;; line. The suffix is what comes after "local variables:" | |
1535 | ;; in its line. | |
1536 | (let* ((prefix (regexp-quote (match-string 1))) | |
1537 | (suffix (regexp-quote (match-string 2))) | |
1538 | (re-coding | |
1539 | (concat | |
1540 | "^" prefix | |
cfe98f50 GM |
1541 | ;; N.B. without the \n below, the regexp can |
1542 | ;; eat newlines. | |
1543 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
502522b2 GM |
1544 | suffix "$")) |
1545 | (re-unibyte | |
1546 | (concat | |
1547 | "^" prefix | |
cfe98f50 | 1548 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" |
502522b2 GM |
1549 | suffix "$")) |
1550 | (re-end | |
cfe98f50 | 1551 | (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$")) |
502522b2 GM |
1552 | (pos (point))) |
1553 | (re-search-forward re-end tail-end 'move) | |
1554 | (setq tail-end (point)) | |
1555 | (goto-char pos) | |
1556 | (when (and set-auto-coding-for-load | |
1557 | (re-search-forward re-unibyte tail-end t)) | |
1558 | (setq coding-system 'raw-text)) | |
1559 | (when (and (not coding-system) | |
1560 | (re-search-forward re-coding tail-end t)) | |
1561 | (setq coding-system (intern (match-string 1))) | |
1562 | (or (coding-system-p coding-system) | |
1563 | (setq coding-system nil)))))) | |
447404a3 CW |
1564 | coding-system) |
1565 | ;; Finally, try all the `auto-coding-functions'. | |
1566 | (let ((funcs auto-coding-functions) | |
1567 | (coding-system nil)) | |
1568 | (while (and funcs (not coding-system)) | |
1569 | (setq coding-system (condition-case e | |
1570 | (save-excursion | |
1571 | (goto-char (point-min)) | |
1572 | (funcall (pop funcs) size)) | |
1573 | (error nil)))) | |
502522b2 | 1574 | coding-system))) |
63561304 KH |
1575 | |
1576 | (setq set-auto-coding-function 'set-auto-coding) | |
87aba788 | 1577 | |
c36b37ed | 1578 | (defun after-insert-file-set-coding (inserted) |
872a0a6f RS |
1579 | "Set `buffer-file-coding-system' of current buffer after text is inserted. |
1580 | INSERTED is the number of characters that were inserted, as figured | |
1581 | in the situation before this function. Return the number of characters | |
1582 | inserted, as figured in the situation after. The two numbers can be | |
1583 | different if the buffer has become unibyte." | |
4ed46869 KH |
1584 | (if last-coding-system-used |
1585 | (let ((coding-system | |
1586 | (find-new-buffer-file-coding-system last-coding-system-used)) | |
1587 | (modified-p (buffer-modified-p))) | |
0269ddfb | 1588 | (when coding-system |
71983219 | 1589 | (set-buffer-file-coding-system coding-system t) |
0269ddfb | 1590 | (set-buffer-modified-p modified-p)))) |
d0c26c63 | 1591 | inserted) |
4ed46869 | 1592 | |
8057896b | 1593 | ;; The coding-spec and eol-type of coding-system returned is decided |
4ed46869 KH |
1594 | ;; independently in the following order. |
1595 | ;; 1. That of buffer-file-coding-system locally bound. | |
1596 | ;; 2. That of CODING. | |
1597 | ||
1598 | (defun find-new-buffer-file-coding-system (coding) | |
1599 | "Return a coding system for a buffer when a file of CODING is inserted. | |
a73a8c89 KH |
1600 | The local variable `buffer-file-coding-system' of the current buffer |
1601 | is set to the returned value. | |
509064c5 | 1602 | Return nil if there's no need to set `buffer-file-coding-system'." |
4ed46869 | 1603 | (let (local-coding local-eol |
b685f8d6 | 1604 | found-coding found-eol |
4ed46869 KH |
1605 | new-coding new-eol) |
1606 | (if (null coding) | |
1607 | ;; Nothing found about coding. | |
1608 | nil | |
1609 | ||
b685f8d6 RS |
1610 | ;; Get information of `buffer-file-coding-system' in LOCAL-EOL |
1611 | ;; and LOCAL-CODING. | |
1612 | (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
1613 | (if (null (numberp local-eol)) | |
1614 | ;; But eol-type is not yet set. | |
1615 | (setq local-eol nil)) | |
0269ddfb | 1616 | (if (and buffer-file-coding-system |
c1841772 KH |
1617 | (not (eq (coding-system-type buffer-file-coding-system) |
1618 | 'undecided))) | |
0269ddfb | 1619 | (setq local-coding (coding-system-base buffer-file-coding-system))) |
b685f8d6 RS |
1620 | |
1621 | (if (and (local-variable-p 'buffer-file-coding-system) | |
1622 | local-eol local-coding) | |
4ed46869 KH |
1623 | ;; The current buffer has already set full coding-system, we |
1624 | ;; had better not change it. | |
1625 | nil | |
1626 | ||
8057896b | 1627 | (setq found-eol (coding-system-eol-type coding)) |
4ed46869 | 1628 | (if (null (numberp found-eol)) |
be02cd54 EZ |
1629 | ;; But eol-type is not found. |
1630 | ;; If EOL conversions are inhibited, force unix eol-type. | |
1631 | (setq found-eol (if inhibit-eol-conversion 0))) | |
c1841772 | 1632 | (setq found-coding (coding-system-base coding)) |
c76b5c99 KH |
1633 | |
1634 | (if (and (not found-eol) (eq found-coding 'undecided)) | |
1635 | ;; No valid coding information found. | |
1636 | nil | |
1637 | ||
1638 | ;; Some coding information (eol or text) found. | |
1639 | ||
1640 | ;; The local setting takes precedence over the found one. | |
1641 | (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
1642 | (or local-coding found-coding) | |
1643 | (or found-coding local-coding))) | |
1644 | (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
1645 | (or local-eol found-eol) | |
1646 | (or found-eol local-eol))) | |
1647 | ||
1648 | (let ((eol-type (coding-system-eol-type new-coding))) | |
1649 | (if (and (numberp new-eol) (vectorp eol-type)) | |
1650 | (aref eol-type new-eol) | |
1651 | new-coding))))))) | |
4ed46869 | 1652 | |
fe831d33 GV |
1653 | (defun modify-coding-system-alist (target-type regexp coding-system) |
1654 | "Modify one of look up tables for finding a coding system on I/O operation. | |
8c453b46 RS |
1655 | There are three of such tables, `file-coding-system-alist', |
1656 | `process-coding-system-alist', and `network-coding-system-alist'. | |
fe831d33 GV |
1657 | |
1658 | TARGET-TYPE specifies which of them to modify. | |
8c453b46 RS |
1659 | If it is `file', it affects `file-coding-system-alist' (which see). |
1660 | If it is `process', it affects `process-coding-system-alist' (which see). | |
e8dd0160 | 1661 | If it is `network', it affects `network-coding-system-alist' (which see). |
fe831d33 GV |
1662 | |
1663 | REGEXP is a regular expression matching a target of I/O operation. | |
1664 | The target is a file name if TARGET-TYPE is `file', a program name if | |
1665 | TARGET-TYPE is `process', or a network service name or a port number | |
1666 | to connect to if TARGET-TYPE is `network'. | |
1667 | ||
1668 | CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
8c453b46 RS |
1669 | operation, or a cons cell (DECODING . ENCODING) specifying the coding systems |
1670 | for decoding and encoding respectively, | |
1671 | or a function symbol which, when called, returns such a cons cell." | |
fe831d33 GV |
1672 | (or (memq target-type '(file process network)) |
1673 | (error "Invalid target type: %s" target-type)) | |
1674 | (or (stringp regexp) | |
1675 | (and (eq target-type 'network) (integerp regexp)) | |
1676 | (error "Invalid regular expression: %s" regexp)) | |
1677 | (if (symbolp coding-system) | |
1678 | (if (not (fboundp coding-system)) | |
1679 | (progn | |
1680 | (check-coding-system coding-system) | |
1681 | (setq coding-system (cons coding-system coding-system)))) | |
1682 | (check-coding-system (car coding-system)) | |
1683 | (check-coding-system (cdr coding-system))) | |
1684 | (cond ((eq target-type 'file) | |
1685 | (let ((slot (assoc regexp file-coding-system-alist))) | |
1686 | (if slot | |
1687 | (setcdr slot coding-system) | |
1688 | (setq file-coding-system-alist | |
1689 | (cons (cons regexp coding-system) | |
1690 | file-coding-system-alist))))) | |
1691 | ((eq target-type 'process) | |
1692 | (let ((slot (assoc regexp process-coding-system-alist))) | |
1693 | (if slot | |
1694 | (setcdr slot coding-system) | |
1695 | (setq process-coding-system-alist | |
1696 | (cons (cons regexp coding-system) | |
1697 | process-coding-system-alist))))) | |
1698 | (t | |
1699 | (let ((slot (assoc regexp network-coding-system-alist))) | |
1700 | (if slot | |
1701 | (setcdr slot coding-system) | |
1702 | (setq network-coding-system-alist | |
1703 | (cons (cons regexp coding-system) | |
1704 | network-coding-system-alist))))))) | |
1705 | ||
db046b7d KH |
1706 | (defun decode-coding-inserted-region (from to filename |
1707 | &optional visit beg end replace) | |
f29387e8 KH |
1708 | "Decode the region between FROM and TO as if it is read from file FILENAME. |
1709 | Optional arguments VISIT, BEG, END, and REPLACE are the same as those | |
1710 | of the function `insert-file-contents'." | |
1711 | (save-excursion | |
1712 | (save-restriction | |
1713 | (narrow-to-region from to) | |
1714 | (goto-char (point-min)) | |
1715 | (let ((coding coding-system-for-read)) | |
1716 | (or coding | |
1717 | (setq coding (funcall set-auto-coding-function | |
1718 | filename (- (point-max) (point-min))))) | |
1719 | (or coding | |
1720 | (setq coding (find-operation-coding-system | |
1721 | 'insert-file-contents | |
1722 | filename visit beg end replace))) | |
1723 | (if (coding-system-p coding) | |
1724 | (or enable-multibyte-characters | |
1725 | (setq coding | |
1726 | (coding-system-change-text-conversion coding 'raw-text))) | |
1727 | (setq coding nil)) | |
1728 | (if coding | |
1729 | (decode-coding-region (point-min) (point-max) coding)) | |
1730 | (setq last-coding-system-used coding))))) | |
1731 | ||
b25eef20 | 1732 | (defun make-translation-table (&rest args) |
a284eea3 | 1733 | "Make a translation table from arguments. |
d38b07f9 | 1734 | A translation table is a char table intended for character |
a284eea3 DL |
1735 | translation in CCL programs. |
1736 | ||
d38b07f9 | 1737 | Each argument is a list of elements of the form (FROM . TO), where FROM |
a284eea3 | 1738 | is a character to be translated to TO. |
13d5617d | 1739 | |
4e003d37 KH |
1740 | The arguments and forms in each argument are processed in the given |
1741 | order, and if a previous form already translates TO to some other | |
1742 | character, say TO-ALT, FROM is also translated to TO-ALT." | |
f967223b | 1743 | (let ((table (make-char-table 'translation-table)) |
a73a8c89 | 1744 | revlist) |
5d75f46f KH |
1745 | (dolist (elts args) |
1746 | (dolist (elt elts) | |
1747 | (let ((from (car elt)) | |
1748 | (to (cdr elt)) | |
1749 | to-alt rev-from rev-to) | |
1750 | ;; If we have already translated TO to TO-ALT, FROM should | |
1751 | ;; also be translated to TO-ALT. | |
1752 | (if (setq to-alt (aref table to)) | |
1753 | (setq to to-alt)) | |
1754 | (aset table from to) | |
1755 | ;; If we have already translated some chars to FROM, they | |
1756 | ;; should also be translated to TO. | |
1757 | (when (setq rev-from (assq from revlist)) | |
1758 | (dolist (elt (cdr rev-from)) | |
1759 | (aset table elt to)) | |
1760 | (setq revlist (delq rev-from revlist) | |
1761 | rev-from (cdr rev-from))) | |
1762 | ;; Now update REVLIST. | |
1763 | (setq rev-to (assq to revlist)) | |
1764 | (if rev-to | |
1765 | (setcdr rev-to (cons from (cdr rev-to))) | |
1766 | (setq rev-to (list to from) | |
1767 | revlist (cons rev-to revlist))) | |
1768 | (if rev-from | |
1769 | (setcdr rev-to (append rev-from (cdr rev-to))))))) | |
a73a8c89 KH |
1770 | ;; Return TABLE just created. |
1771 | table)) | |
1772 | ||
c76b5c99 KH |
1773 | (defun make-translation-table-from-vector (vec) |
1774 | "Make translation table from decoding vector VEC. | |
9e3b6057 DL |
1775 | VEC is an array of 256 elements to map unibyte codes to multibyte |
1776 | characters. Elements may be nil for undefined code points. | |
c76b5c99 KH |
1777 | See also the variable `nonascii-translation-table'." |
1778 | (let ((table (make-char-table 'translation-table)) | |
1779 | (rev-table (make-char-table 'translation-table)) | |
c76b5c99 | 1780 | ch) |
9e3b6057 | 1781 | (dotimes (i 256) |
c76b5c99 | 1782 | (setq ch (aref vec i)) |
9e3b6057 DL |
1783 | (when ch |
1784 | (aset table i ch) | |
1785 | (if (>= ch 256) | |
1786 | (aset rev-table ch i)))) | |
c76b5c99 KH |
1787 | (set-char-table-extra-slot table 0 rev-table) |
1788 | table)) | |
1789 | ||
f967223b | 1790 | (defun define-translation-table (symbol &rest args) |
a284eea3 DL |
1791 | "Define SYMBOL as the name of translation table made by ARGS. |
1792 | This sets up information so that the table can be used for | |
1793 | translations in a CCL program. | |
b25eef20 | 1794 | |
a284eea3 DL |
1795 | If the first element of ARGS is a char-table whose purpose is |
1796 | `translation-table', just define SYMBOL to name it. (Note that this | |
1797 | function does not bind SYMBOL.) | |
007c79c8 | 1798 | |
a284eea3 | 1799 | Any other ARGS should be suitable as arguments of the function |
007c79c8 | 1800 | `make-translation-table' (which see). |
b25eef20 | 1801 | |
452fdb31 | 1802 | This function sets properties `translation-table' and |
521d4010 DL |
1803 | `translation-table-id' of SYMBOL to the created table itself and the |
1804 | identification number of the table respectively. It also registers | |
1805 | the table in `translation-table-vector'." | |
007c79c8 KH |
1806 | (let ((table (if (and (char-table-p (car args)) |
1807 | (eq (char-table-subtype (car args)) | |
1808 | 'translation-table)) | |
1809 | (car args) | |
1810 | (apply 'make-translation-table args))) | |
f967223b | 1811 | (len (length translation-table-vector)) |
d9e3229d | 1812 | (id 0) |
b25eef20 | 1813 | (done nil)) |
f967223b | 1814 | (put symbol 'translation-table table) |
b25eef20 KH |
1815 | (while (not done) |
1816 | (if (>= id len) | |
f967223b KH |
1817 | (setq translation-table-vector |
1818 | (vconcat translation-table-vector (make-vector len nil)))) | |
1819 | (let ((slot (aref translation-table-vector id))) | |
b25eef20 KH |
1820 | (if (or (not slot) |
1821 | (eq (car slot) symbol)) | |
1822 | (progn | |
f967223b | 1823 | (aset translation-table-vector id (cons symbol table)) |
007c79c8 KH |
1824 | (setq done t)) |
1825 | (setq id (1+ id))))) | |
f967223b | 1826 | (put symbol 'translation-table-id id) |
d9e3229d KH |
1827 | id)) |
1828 | ||
35554641 KH |
1829 | (put 'with-category-table 'lisp-indent-function 1) |
1830 | ||
ef6e365d | 1831 | (defmacro with-category-table (table &rest body) |
8f924df7 | 1832 | "Execute BODY like `progn' with CATEGORY-TABLE the current category table. |
ef6e365d JPW |
1833 | The category table of the current buffer is saved, BODY is evaluated, |
1834 | then the saved table is restored, even in case of an abnormal exit. | |
1835 | Value is what BODY returns." | |
1836 | (let ((old-table (make-symbol "old-table")) | |
1837 | (old-buffer (make-symbol "old-buffer"))) | |
1838 | `(let ((,old-table (category-table)) | |
1839 | (,old-buffer (current-buffer))) | |
1840 | (unwind-protect | |
1841 | (progn | |
1842 | (set-category-table ,table) | |
1843 | ,@body) | |
1844 | (save-current-buffer | |
1845 | (set-buffer ,old-buffer) | |
1846 | (set-category-table ,old-table)))))) | |
35554641 | 1847 | |
394e4eb0 DL |
1848 | (defun define-translation-hash-table (symbol table) |
1849 | "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
1850 | ||
1851 | Analogous to `define-translation-table', but updates | |
1852 | `translation-hash-table-vector' and the table is for use in the CCL | |
1853 | `lookup-integer' and `lookup-character' functions." | |
1854 | (unless (and (symbolp symbol) | |
1855 | (hash-table-p table)) | |
1856 | (error "Bad args to define-translation-hash-table")) | |
1857 | (let ((len (length translation-hash-table-vector)) | |
1858 | (id 0) | |
1859 | done) | |
1860 | (put symbol 'translation-hash-table table) | |
1861 | (while (not done) | |
1862 | (if (>= id len) | |
1863 | (setq translation-hash-table-vector | |
1864 | (vconcat translation-hash-table-vector [nil]))) | |
1865 | (let ((slot (aref translation-hash-table-vector id))) | |
1866 | (if (or (not slot) | |
1867 | (eq (car slot) symbol)) | |
1868 | (progn | |
1869 | (aset translation-hash-table-vector id (cons symbol table)) | |
1870 | (setq done t)) | |
1871 | (setq id (1+ id))))) | |
1872 | (put symbol 'translation-hash-table-id id) | |
1873 | id)) | |
1874 | ||
69eba008 KH |
1875 | ;;; Initialize some variables. |
1876 | ||
1877 | (put 'use-default-ascent 'char-table-extra-slots 0) | |
1878 | (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
d6d6d592 KH |
1879 | (put 'ignore-relative-composition 'char-table-extra-slots 0) |
1880 | (setq ignore-relative-composition | |
1881 | (make-char-table 'ignore-relative-composition)) | |
69eba008 | 1882 | |
256d0fef DL |
1883 | (make-obsolete 'set-char-table-default |
1884 | "Generic characters no longer exist" "22.1") | |
d9f6dfe6 CW |
1885 | |
1886 | ;;; Built-in auto-coding-functions: | |
1887 | ||
1888 | (defun sgml-xml-auto-coding-function (size) | |
1889 | "Determine whether the buffer is XML, and if so, its encoding. | |
1890 | This function is intended to be added to `auto-coding-functions'." | |
c069d3ac SM |
1891 | (setq size (+ (point) size)) |
1892 | (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) | |
d9f6dfe6 CW |
1893 | (let ((end (save-excursion |
1894 | ;; This is a hack. | |
2f4e9c47 | 1895 | (re-search-forward "\"\\s-*\\?>" size t)))) |
d9f6dfe6 CW |
1896 | (when end |
1897 | (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t) | |
447404a3 CW |
1898 | (let* ((match (match-string 1)) |
1899 | (sym (intern (downcase match)))) | |
1900 | (if (coding-system-p sym) | |
1901 | sym | |
1902 | (message "Warning: unknown coding system \"%s\"" match) | |
1903 | nil)) | |
d9f6dfe6 CW |
1904 | 'utf-8))))) |
1905 | ||
447404a3 CW |
1906 | (defun sgml-html-meta-auto-coding-function (size) |
1907 | "If the buffer has an HTML meta tag, use it to determine encoding. | |
1908 | This function is intended to be added to `auto-coding-functions'." | |
c069d3ac | 1909 | (setq size (min (+ (point) size) |
447404a3 CW |
1910 | ;; Only search forward 10 lines |
1911 | (save-excursion | |
1912 | (forward-line 10) | |
1913 | (point)))) | |
1914 | (when (and (search-forward "<html>" size t) | |
1915 | (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t)) | |
1916 | (let* ((match (match-string 1)) | |
1917 | (sym (intern (downcase match)))) | |
1918 | (if (coding-system-p sym) | |
1919 | sym | |
1920 | (message "Warning: unknown coding system \"%s\"" match) | |
1921 | nil)))) | |
0bca779a | 1922 | |
69eba008 | 1923 | ;;; |
4ed46869 KH |
1924 | (provide 'mule) |
1925 | ||
1926 | ;;; mule.el ends here |