(make-coding-system): Add description
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
4ed46869
KH
1;;; mule.el --- basic commands for mulitilingual environment
2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
58cfed09 4;; Licensed to the Free Software Foundation.
4ed46869
KH
5
6;; Keywords: mule, multilingual, character set, coding system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Code:
26
d2542b42 27(defconst mule-version "5.0 (SAKAKI)" "\
4ed46869
KH
28Version number and name of this version of MULE (multilingual environment).")
29
35554641 30(defconst mule-version-date "1999.12.7" "\
4ed46869
KH
31Distribution date of this version of MULE (multilingual environment).")
32
33(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
34 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
35The file contents are decoded before evaluation if necessary.
4ed46869
KH
36If optional second arg NOERROR is non-nil,
37 report no error if FILE doesn't exist.
38Print messages at start and end of loading unless
39 optional third arg NOMESSAGE is non-nil.
40Return t if file exists."
41 (if (null (file-readable-p fullname))
42 (and (null noerror)
43 (signal 'file-error (list "Cannot open load file" file)))
44 ;; Read file with code conversion, and then eval.
45 (let* ((buffer
46 ;; To avoid any autoloading, set default-major-mode to
47 ;; fundamental-mode.
88162676
RS
48 ;; So that we don't get completely screwed if the
49 ;; file is encoded in some complicated character set,
50 ;; read it with real decoding, as a multibyte buffer,
51 ;; even if this is a --unibyte Emacs session.
52 (let ((default-major-mode 'fundamental-mode)
53 (default-enable-multibyte-characters t))
4ed46869
KH
54 ;; We can't use `generate-new-buffer' because files.el
55 ;; is not yet loaded.
56 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
57 (load-in-progress t)
58 (source (save-match-data (string-match "\\.el\\'" fullname))))
59 (unless nomessage
60 (if source
61 (message "Loading %s (source)..." file)
62 (message "Loading %s..." file)))
63 (when purify-flag
64 (setq preloaded-file-list (cons file preloaded-file-list)))
4ed46869 65 (unwind-protect
a6acd8a2 66 (let ((load-file-name fullname)
1c4cc63a 67 (set-auto-coding-for-load t)
a6acd8a2 68 (inhibit-file-name-operation nil))
4ed46869
KH
69 (save-excursion
70 (set-buffer buffer)
71 (insert-file-contents fullname)
7d276780
EZ
72 ;; If the loaded file was inserted with no-conversion or
73 ;; raw-text coding system, make the buffer unibyte.
74 ;; Otherwise, eval-buffer might try to interpret random
75 ;; binary junk as multibyte characters.
76 (if (and enable-multibyte-characters
77 (or (eq (coding-system-type last-coding-system-used) 5)
78 (eq last-coding-system-used 'no-conversion)))
79 (set-buffer-multibyte nil))
4ed46869
KH
80 ;; Make `kill-buffer' quiet.
81 (set-buffer-modified-p nil))
0f69cb38 82 ;; Have the original buffer current while we eval.
88162676
RS
83 (eval-buffer buffer nil file
84 ;; If this Emacs is running with --unibyte,
85 ;; convert multibyte strings to unibyte
86 ;; after reading them.
ba74e833 87;; (not default-enable-multibyte-characters)
8dd08b5b 88 nil t
ba74e833 89 ))
cfc70cdf
RS
90 (let (kill-buffer-hook kill-buffer-query-functions)
91 (kill-buffer buffer)))
4ed46869 92 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
93 (when hook
94 (mapcar (function eval) (cdr hook))))
95 (unless (or nomessage noninteractive)
96 (if source
97 (message "Loading %s (source)...done" file)
98 (message "Loading %s...done" file)))
4ed46869
KH
99 t)))
100
101;; API (Application Program Interface) for charsets.
102
2e02a76f
RS
103;; Return t if OBJ is a quoted symbol
104;; and the symbol is the name of a standard charset.
105(defsubst charset-quoted-standard-p (obj)
106 (and (listp obj) (eq (car obj) 'quote)
107 (symbolp (car-safe (cdr obj)))
108 (let ((vector (get (car-safe (cdr obj)) 'charset)))
109 (and (vectorp vector)
110 (< (aref vector 0) 160)))))
4ed46869
KH
111
112(defsubst charsetp (object)
e4a7fdfc 113 "T if OBJECT is a charset."
4ed46869
KH
114 (and (symbolp object) (vectorp (get object 'charset))))
115
116(defsubst charset-info (charset)
117 "Return a vector of information of CHARSET.
118The elements of the vector are:
119 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
120 LEADING-CODE-BASE, LEADING-CODE-EXT,
121 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
122 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
123 PLIST,
124where
125CHARSET-ID (integer) is the identification number of the charset.
aa8bb645
KH
126BYTES (integer) is the length of multi-byte form of a character in
127 the charset: one of 1, 2, 3, and 4.
4ed46869
KH
128DIMENSION (integer) is the number of bytes to represent a character of
129the charset: 1 or 2.
130CHARS (integer) is the number of characters in a dimension: 94 or 96.
4ed46869
KH
131WIDTH (integer) is the number of columns a character in the charset
132 occupies on the screen: one of 0, 1, and 2.
133DIRECTION (integer) is the rendering direction of characters in the
08b4ace5
KH
134 charset when rendering. If 0, render from left to right, else
135 render from right to left.
4ed46869
KH
136LEADING-CODE-BASE (integer) is the base leading-code for the
137 charset.
138LEADING-CODE-EXT (integer) is the extended leading-code for the
139 charset. All charsets of less than 0xA0 has the value 0.
140ISO-FINAL-CHAR (character) is the final character of the
7dd4c92d
KH
141 corresponding ISO 2022 charset. If the charset is not assigned
142 any final character, the value is -1.
4ed46869
KH
143ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
144 while encoding to variants of ISO 2022 coding system, one of the
145 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
7dd4c92d 146 If the charset is not assigned any final character, the value is -1.
4ed46869
KH
147REVERSE-CHARSET (integer) is the charset which differs only in
148 LEFT-TO-RIGHT value from the charset. If there's no such a
149 charset, the value is -1.
150SHORT-NAME (string) is the short name to refer to the charset.
151LONG-NAME (string) is the long name to refer to the charset
152DESCRIPTION (string) is the description string of the charset.
153PLIST (property list) may contain any type of information a user
154 want to put and get by functions `put-charset-property' and
155 `get-charset-property' respectively."
156 (get charset 'charset))
157
40c81f74
PE
158;; It is better not to use backquote in this file,
159;; because that makes a bootstrapping problem
160;; if you need to recompile all the Lisp files using interpreted code.
161
4ed46869
KH
162(defmacro charset-id (charset)
163 "Return charset identification number of CHARSET."
2e02a76f 164 (if (charset-quoted-standard-p charset)
4ed46869 165 (aref (charset-info (nth 1 charset)) 0)
40c81f74 166 (list 'aref (list 'charset-info charset) 0)))
4ed46869
KH
167
168(defmacro charset-bytes (charset)
900dc6e3
KH
169 "Return bytes of CHARSET.
170See the function `charset-info' for more detail."
2e02a76f 171 (if (charset-quoted-standard-p charset)
4ed46869 172 (aref (charset-info (nth 1 charset)) 1)
40c81f74 173 (list 'aref (list 'charset-info charset) 1)))
4ed46869
KH
174
175(defmacro charset-dimension (charset)
900dc6e3
KH
176 "Return dimension of CHARSET.
177See the function `charset-info' for more detail."
2e02a76f 178 (if (charset-quoted-standard-p charset)
4ed46869 179 (aref (charset-info (nth 1 charset)) 2)
40c81f74 180 (list 'aref (list 'charset-info charset) 2)))
4ed46869
KH
181
182(defmacro charset-chars (charset)
900dc6e3
KH
183 "Return character numbers contained in a dimension of CHARSET.
184See the function `charset-info' for more detail."
2e02a76f 185 (if (charset-quoted-standard-p charset)
4ed46869 186 (aref (charset-info (nth 1 charset)) 3)
40c81f74 187 (list 'aref (list 'charset-info charset) 3)))
4ed46869
KH
188
189(defmacro charset-width (charset)
900dc6e3
KH
190 "Return width (how many column occupied on a screen) of CHARSET.
191See the function `charset-info' for more detail."
2e02a76f 192 (if (charset-quoted-standard-p charset)
4ed46869 193 (aref (charset-info (nth 1 charset)) 4)
40c81f74 194 (list 'aref (list 'charset-info charset) 4)))
4ed46869
KH
195
196(defmacro charset-direction (charset)
900dc6e3
KH
197 "Return direction of CHARSET.
198See the function `charset-info' for more detail."
2e02a76f 199 (if (charset-quoted-standard-p charset)
4ed46869 200 (aref (charset-info (nth 1 charset)) 5)
40c81f74 201 (list 'aref (list 'charset-info charset) 5)))
4ed46869
KH
202
203(defmacro charset-iso-final-char (charset)
900dc6e3
KH
204 "Return final char of CHARSET.
205See the function `charset-info' for more detail."
2e02a76f 206 (if (charset-quoted-standard-p charset)
4ed46869 207 (aref (charset-info (nth 1 charset)) 8)
40c81f74 208 (list 'aref (list 'charset-info charset) 8)))
4ed46869
KH
209
210(defmacro charset-iso-graphic-plane (charset)
900dc6e3
KH
211 "Return graphic plane of CHARSET.
212See the function `charset-info' for more detail."
2e02a76f 213 (if (charset-quoted-standard-p charset)
4ed46869 214 (aref (charset-info (nth 1 charset)) 9)
40c81f74 215 (list 'aref (list 'charset-info charset) 9)))
4ed46869
KH
216
217(defmacro charset-reverse-charset (charset)
900dc6e3
KH
218 "Return reverse charset of CHARSET.
219See the function `charset-info' for more detail."
2e02a76f 220 (if (charset-quoted-standard-p charset)
4ed46869 221 (aref (charset-info (nth 1 charset)) 10)
40c81f74 222 (list 'aref (list 'charset-info charset) 10)))
4ed46869
KH
223
224(defmacro charset-short-name (charset)
900dc6e3
KH
225 "Return short name of CHARSET.
226See the function `charset-info' for more detail."
2e02a76f 227 (if (charset-quoted-standard-p charset)
4ed46869 228 (aref (charset-info (nth 1 charset)) 11)
40c81f74 229 (list 'aref (list 'charset-info charset) 11)))
4ed46869
KH
230
231(defmacro charset-long-name (charset)
900dc6e3
KH
232 "Return long name of CHARSET.
233See the function `charset-info' for more detail."
2e02a76f 234 (if (charset-quoted-standard-p charset)
4ed46869 235 (aref (charset-info (nth 1 charset)) 12)
40c81f74 236 (list 'aref (list 'charset-info charset) 12)))
4ed46869
KH
237
238(defmacro charset-description (charset)
0611934b 239 "Return description of CHARSET.
900dc6e3 240See the function `charset-info' for more detail."
2e02a76f 241 (if (charset-quoted-standard-p charset)
4ed46869 242 (aref (charset-info (nth 1 charset)) 13)
40c81f74 243 (list 'aref (list 'charset-info charset) 13)))
4ed46869
KH
244
245(defmacro charset-plist (charset)
900dc6e3
KH
246 "Return list charset property of CHARSET.
247See the function `charset-info' for more detail."
40c81f74
PE
248 (list 'aref
249 (if (charset-quoted-standard-p charset)
250 (charset-info (nth 1 charset))
251 (list 'charset-info charset))
252 14))
4ed46869
KH
253
254(defun set-charset-plist (charset plist)
e8dd0160 255 "Set CHARSET's property list to PLIST, and return PLIST."
4ed46869
KH
256 (aset (charset-info charset) 14 plist))
257
d3e7e7cf
EZ
258(defun make-char (charset &optional code1 code2)
259 "Return a character of CHARSET whose position codes are CODE1 and CODE2.
f98e2797 260CODE1 and CODE2 are optional, but if you don't supply
a9fb0b58
KH
261sufficient position codes, return a generic character which stands for
262all characters or group of characters in the character set.
d3e7e7cf 263A generic character can be used to index a char table (e.g. syntax-table).
9ff05eae
KH
264
265Such character sets as ascii, eight-bit-control, and eight-bit-graphic
266don't have corresponding generic characters. If CHARSET is one of
267them and you don't supply CODE1, return the character of the smallest
268code in CHARSET.
269
d3e7e7cf 270If CODE1 or CODE2 are invalid (out of range), this function signals an error."
636799f2 271 (make-char-internal (charset-id charset) code1 code2))
a6acd8a2
KH
272
273(put 'make-char 'byte-compile
274 (function
275 (lambda (form)
276 (let ((charset (nth 1 form)))
277 (if (charset-quoted-standard-p charset)
278 (byte-compile-normal-call
279 (cons 'make-char-internal
280 (cons (charset-id (nth 1 charset)) (nthcdr 2 form))))
281 (byte-compile-normal-call
282 (cons 'make-char-internal
283 (cons (list 'charset-id charset) (nthcdr 2 form)))))))))
4ed46869 284
d3675a42 285(defun charset-list ()
900dc6e3
KH
286 "Return list of charsets ever defined.
287
d3675a42 288This function is provided for backward compatibility.
900dc6e3 289Now we have the variable `charset-list'."
d3675a42
KH
290 charset-list)
291
13d5617d
KH
292(defsubst generic-char-p (char)
293 "Return t if and only if CHAR is a generic character.
294See also the documentation of make-char."
1d935669
KH
295 (and (>= char 0400)
296 (let ((l (split-char char)))
297 (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
298 (not (eq (car l) 'composition))))))
69eba008 299
cc57cc54 300(defun decode-char (ccs code-point &optional restriction)
7f341749 301 "Return character specified by coded character set CCS and CODE-POINT in it.
cc57cc54 302Return nil if such a character is not supported.
e76938e7
DL
303Currently the only supported coded character set is `ucs' (ISO/IEC
30410646: Universal Multi-Octet Coded Character Set).
cc57cc54
KH
305
306Optional argument RESTRICTION specifies a way to map the pair of CCS
307and CODE-POINT to a chracter. Currently not supported and just ignored."
308 (cond ((eq ccs 'ucs)
6ba9b20b 309 (cond ((< code-point 160)
cc57cc54
KH
310 code-point)
311 ((< code-point 256)
312 (make-char 'latin-iso8859-1 code-point))
313 ((< code-point #x2500)
314 (setq code-point (- code-point #x0100))
315 (make-char 'mule-unicode-0100-24ff
316 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
6ba9b20b 317 ((< code-point #x3400)
cc57cc54
KH
318 (setq code-point (- code-point #x2500))
319 (make-char 'mule-unicode-2500-33ff
320 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
321 ((and (>= code-point #xe000) (< code-point #x10000))
322 (setq code-point (- code-point #xe000))
323 (make-char 'mule-unicode-e000-ffff
324 (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
325 ))))
326
327(defun encode-char (char ccs &optional restriction)
7f341749 328 "Return code-point in coded character set CCS that corresponds to CHAR.
cc57cc54 329Return nil if CHAR is not included in CCS.
e76938e7
DL
330Currently the only supported coded character set is `ucs' (ISO/IEC
33110646: Universal Multi-Octet Coded Character Set).
7f341749
KH
332
333CHAR should be in one of these charsets:
cc57cc54 334 ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
6ba9b20b 335 mule-unicode-e000-ffff, eight-bit-control
cc57cc54
KH
336Otherwise, return nil.
337
338Optional argument RESTRICTION specifies a way to map CHAR to a
339code-point in CCS. Currently not supported and just ignored."
340 (let* ((split (split-char char))
341 (charset (car split)))
342 (cond ((eq ccs 'ucs)
343 (cond ((eq charset 'ascii)
344 char)
345 ((eq charset 'latin-iso8859-1)
346 (+ (nth 1 split) 128))
347 ((eq charset 'mule-unicode-0100-24ff)
348 (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
349 (- (nth 2 split) 32))))
350 ((eq charset 'mule-unicode-2500-33ff)
351 (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
352 (- (nth 2 split) 32))))
353 ((eq charset 'mule-unicode-e000-ffff)
354 (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
6ba9b20b
KH
355 (- (nth 2 split) 32))))
356 ((eq charset 'eight-bit-control)
357 char))))))
cc57cc54 358
0269ddfb 359\f
e76938e7 360;; Coding system stuff
4ed46869 361
8057896b 362;; Coding system is a symbol that has the property `coding-system'.
4ed46869 363;;
8057896b
KH
364;; The value of the property `coding-system' is a vector of the
365;; following format:
d3675a42 366;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
8057896b 367;; We call this vector as coding-spec. See comments in src/coding.c
d3675a42 368;; for more detail.
8057896b
KH
369
370(defconst coding-spec-type-idx 0)
371(defconst coding-spec-mnemonic-idx 1)
372(defconst coding-spec-doc-string-idx 2)
0269ddfb 373(defconst coding-spec-plist-idx 3)
8057896b
KH
374(defconst coding-spec-flags-idx 4)
375
a6acd8a2
KH
376;; PLIST is a property list of a coding system. To share PLIST among
377;; alias coding systems, a coding system has PLIST in coding-spec
378;; instead of having it in normal property list of Lisp symbol.
379;; Here's a list of coding system properties currently being used.
0269ddfb
KH
380;;
381;; o coding-category
382;;
383;; The value is a coding category the coding system belongs to. The
1b46a680
KH
384;; function `make-coding-system' sets this value automatically
385;; unless its argument PROPERTIES specifies this property.
4ed46869 386;;
0269ddfb 387;; o alias-coding-systems
4ed46869 388;;
0269ddfb
KH
389;; The value is a list of coding systems of the same alias group. The
390;; first element is the coding system made at first, which we call as
1b46a680
KH
391;; `base coding system'. The function `make-coding-system' sets this
392;; value automatically and `define-coding-system-alias' updates it.
0269ddfb
KH
393;;
394;; o post-read-conversion
395;;
396;; The value is a function to call after some text is inserted and
397;; decoded by the coding system itself and before any functions in
398;; `after-insert-functions' are called. The arguments to this
399;; function is the same as those of a function in
4ed46869
KH
400;; `after-insert-functions', i.e. LENGTH of a text while putting point
401;; at the head of the text to be decoded
402;;
0269ddfb
KH
403;; o pre-write-conversion
404;;
405;; The value is a function to call after all functions in
406;; `write-region-annotate-functions' and `buffer-file-format' are
407;; called, and before the text is encoded by the coding system itself.
408;; The arguments to this function is the same as those of a function
409;; in `write-region-annotate-functions', i.e. FROM and TO specifying
410;; region of a text.
411;;
f967223b 412;; o translation-table-for-decode
0269ddfb 413;;
f967223b
KH
414;; The value is a translation table to be applied on decoding. See
415;; the function `make-translation-table' for the format of translation
416;; table.
0269ddfb 417;;
f967223b 418;; o translation-table-for-encode
0269ddfb 419;;
f967223b 420;; The value is a translation table to be applied on encoding.
a6acd8a2 421;;
c11a8f77
KH
422;; o safe-chars
423;;
424;; The value is a char table. If a character has non-nil value in it,
425;; the character is safely supported by the coding system. This
426;; overrides the specification of safe-charsets.
427
a6acd8a2
KH
428;; o safe-charsets
429;;
430;; The value is a list of charsets safely supported by the coding
431;; system. The value t means that all charsets Emacs handles are
432;; supported. Even if some charset is not in this list, it doesn't
433;; mean that the charset can't be encoded in the coding system,
434;; instead, it just means that some other receiver of a text encoded
435;; in the coding system won't be able to handle that charset.
b25eef20
KH
436;;
437;; o mime-charset
438;;
439;; The value is a symbol of which name is `MIME-charset' parameter of
440;; the coding system.
c76b5c99 441;;
1322c8ca
KH
442;; o charset-origin-alist
443;;
444;; The value is a list of this form:
445;; (CHARSET EXTERNAL-CHARSET-NAME ENCODING-FUNCTION).
446;; ENCODING-FUNCTION is a function to encode a character in CHARSET
447;; to the code in EXTERNAL-CHARSET-NAME. The command what-cursor-position
448;; uses this information of the buffer-file-coding-system.
caa85ad9
KH
449;; ENCODING-FUNCTION may be a translation table or a symbol whose
450;; property `translation-table' is a translation table. In these case,
451;; the translation table is used to encode the character.
1322c8ca 452;;
c76b5c99
KH
453;; o valid-codes (meaningful only for a coding system based on CCL)
454;;
455;; The value is a list to indicate valid byte ranges of the encoded
456;; file. Each element of the list is an integer or a cons of integer.
457;; In the former case, the integer value is a valid byte code. In the
e8dd0160 458;; latter case, the integers specifies the range of valid byte codes.
c76b5c99 459
0269ddfb
KH
460
461;; Return coding-spec of CODING-SYSTEM
462(defsubst coding-system-spec (coding-system)
463 (get (check-coding-system coding-system) 'coding-system))
4ed46869 464
4ed46869 465(defun coding-system-type (coding-system)
0269ddfb
KH
466 "Return the coding type of CODING-SYSTEM.
467A coding type is an integer value indicating the encoding method
468of CODING-SYSTEM. See the function `make-coding-system' for more detail."
469 (aref (coding-system-spec coding-system) coding-spec-type-idx))
4ed46869 470
4ed46869 471(defun coding-system-mnemonic (coding-system)
0269ddfb 472 "Return the mnemonic character of CODING-SYSTEM.
6e2c8840
KH
473The mnemonic character of a coding system is used in mode line
474to indicate the coding system. If the arg is nil, return ?-."
475 (let ((spec (coding-system-spec coding-system)))
476 (if spec (aref spec coding-spec-mnemonic-idx) ?-)))
4ed46869 477
8057896b 478(defun coding-system-doc-string (coding-system)
0269ddfb
KH
479 "Return the documentation string for CODING-SYSTEM."
480 (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
4ed46869 481
d3675a42 482(defun coding-system-plist (coding-system)
0269ddfb
KH
483 "Return the property list of CODING-SYSTEM."
484 (aref (coding-system-spec coding-system) coding-spec-plist-idx))
d3675a42 485
4ed46869 486(defun coding-system-flags (coding-system)
0269ddfb
KH
487 "Return `flags' of CODING-SYSTEM.
488A `flags' of a coding system is a vector of length 32 indicating detailed
489information of a coding system. See the function `make-coding-system'
490for more detail."
491 (aref (coding-system-spec coding-system) coding-spec-flags-idx))
492
493(defun coding-system-get (coding-system prop)
494 "Extract a value from CODING-SYSTEM's property list for property PROP."
495 (plist-get (coding-system-plist coding-system) prop))
496
497(defun coding-system-put (coding-system prop val)
498 "Change value in CODING-SYSTEM's property list PROP to VAL."
499 (let ((plist (coding-system-plist coding-system)))
500 (if plist
501 (plist-put plist prop val)
502 (aset (coding-system-spec coding-system) coding-spec-plist-idx
503 (list prop val)))))
504
505(defun coding-system-category (coding-system)
506 "Return the coding category of CODING-SYSTEM."
507 (coding-system-get coding-system 'coding-category))
508
509(defun coding-system-base (coding-system)
510 "Return the base coding system of CODING-SYSTEM.
d9e3229d
KH
511A base coding system is what made by `make-coding-system'.
512Any alias nor subsidiary coding systems are not base coding system."
0269ddfb
KH
513 (car (coding-system-get coding-system 'alias-coding-systems)))
514
515(defalias 'coding-system-parent 'coding-system-base)
2598a293 516(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb
KH
517
518;; Coding system also has a property `eol-type'.
519;;
520;; This property indicates how the coding system handles end-of-line
521;; format. The value is integer 0, 1, 2, or a vector of three coding
522;; systems. Each integer value 0, 1, and 2 indicates the format of
523;; end-of-line LF, CRLF, and CR respectively. A vector value
524;; indicates that the format of end-of-line should be detected
525;; automatically. Nth element of the vector is the subsidiary coding
526;; system whose `eol-type' property is N.
4ed46869 527
8057896b 528(defun coding-system-eol-type (coding-system)
0269ddfb
KH
529 "Return eol-type of CODING-SYSTEM.
530An eol-type is integer 0, 1, 2, or a vector of coding systems.
4ed46869 531
0269ddfb
KH
532Integer values 0, 1, and 2 indicate a format of end-of-line; LF,
533CRLF, and CR respectively.
534
535A vector value indicates that a format of end-of-line should be
536detected automatically. Nth element of the vector is the subsidiary
537coding system whose eol-type is N."
538 (get coding-system 'eol-type))
bd882697 539
857ea15c
AS
540(defun coding-system-lessp (x y)
541 (cond ((eq x 'no-conversion) t)
542 ((eq y 'no-conversion) nil)
543 ((eq x 'emacs-mule) t)
544 ((eq y 'emacs-mule) nil)
545 ((eq x 'undecided) t)
546 ((eq y 'undecided) nil)
547 (t (let ((c1 (coding-system-mnemonic x))
548 (c2 (coding-system-mnemonic y)))
549 (or (< (downcase c1) (downcase c2))
550 (and (not (> (downcase c1) (downcase c2)))
551 (< c1 c2)))))))
552
553;; Add CODING-SYSTEM to coding-system-list while keeping it sorted.
554(defun add-to-coding-system-list (coding-system)
555 (if (or (null coding-system-list)
556 (coding-system-lessp coding-system (car coding-system-list)))
557 (setq coding-system-list (cons coding-system coding-system-list))
558 (let ((len (length coding-system-list))
559 mid (tem coding-system-list))
560 (while (> len 1)
561 (setq mid (nthcdr (/ len 2) tem))
562 (if (coding-system-lessp (car mid) coding-system)
563 (setq tem mid
564 len (- len (/ len 2)))
565 (setq len (/ len 2))))
566 (setcdr tem (cons coding-system (cdr tem))))))
567
80a7463d 568(defun coding-system-list (&optional base-only)
c11a8f77
KH
569 "Return a list of all existing non-subsidiary coding systems.
570If optional arg BASE-ONLY is non-nil, only base coding systems are listed.
571The value doesn't include subsidiary coding systems which are what
572made from bases and aliases automatically for various end-of-line
573formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
574 (let* ((codings (copy-sequence coding-system-list))
575 (tail (cons nil codings)))
576 ;; Remove subsidiary coding systems (eol variants) and alias
577 ;; coding systems (if necessary).
578 (while (cdr tail)
579 (let* ((coding (car (cdr tail)))
580 (aliases (coding-system-get coding 'alias-coding-systems)))
581 (if (or
582 ;; CODING is an eol variant if not in ALIASES.
583 (not (memq coding aliases))
584 ;; CODING is an alias if it is not car of ALIASES.
585 (and base-only (not (eq coding (car aliases)))))
586 (setcdr tail (cdr (cdr tail)))
587 (setq tail (cdr tail)))))
588 codings))
589
c11a8f77
KH
590(defun register-char-codings (coding-system safe-chars)
591 (let ((general (char-table-extra-slot char-coding-system-table 0)))
592 (if (eq safe-chars t)
593 (or (memq coding-system general)
594 (set-char-table-extra-slot char-coding-system-table 0
595 (cons coding-system general)))
596 (map-char-table
597 (function
598 (lambda (key val)
599 (if (and (>= key 128) val)
600 (let ((codings (aref char-coding-system-table key)))
601 (or (memq coding-system codings)
602 (aset char-coding-system-table key
603 (cons coding-system codings)))))))
604 safe-chars))))
605
606
6e9722b0
KH
607;; Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM.
608(defun make-subsidiary-coding-system (coding-system)
0269ddfb
KH
609 (let ((coding-spec (coding-system-spec coding-system))
610 (subsidiaries (vector (intern (format "%s-unix" coding-system))
8057896b
KH
611 (intern (format "%s-dos" coding-system))
612 (intern (format "%s-mac" coding-system))))
0269ddfb
KH
613 (i 0)
614 temp)
8057896b 615 (while (< i 3)
0269ddfb 616 (put (aref subsidiaries i) 'coding-system coding-spec)
8057896b 617 (put (aref subsidiaries i) 'eol-type i)
857ea15c 618 (add-to-coding-system-list (aref subsidiaries i))
0269ddfb
KH
619 (setq coding-system-alist
620 (cons (list (symbol-name (aref subsidiaries i)))
621 coding-system-alist))
8057896b
KH
622 (setq i (1+ i)))
623 subsidiaries))
4ed46869 624
50c29104
KH
625(defun transform-make-coding-system-args (name type &optional doc-string props)
626 "For internal use only.
627Transform XEmacs style args for `make-coding-system' to Emacs style.
628Value is a list of transformed arguments."
629 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
630 (eol-type (plist-get props 'eol-type))
631 properties tmp)
632 (cond
633 ((eq eol-type 'lf) (setq eol-type 'unix))
634 ((eq eol-type 'crlf) (setq eol-type 'dos))
635 ((eq eol-type 'cr) (setq eol-type 'mac)))
636 (if (setq tmp (plist-get props 'post-read-conversion))
637 (setq properties (plist-put properties 'post-read-conversion tmp)))
638 (if (setq tmp (plist-get props 'pre-write-conversion))
639 (setq properties (plist-put properties 'pre-write-conversion tmp)))
640 (cond
641 ((eq type 'ccl)
642 `(,name 4
643 ,mnemonic
644 ,doc-string
645 (,(plist-get props 'decode) . ,(plist-get props 'encode))
646 ,properties
647 ,eol-type))
648 (t
649 (error "Unsupported XEmacs style arguments for make-coding-style: %S"
650 `(,name ,type ,doc-string ,props))))))
651
8057896b 652(defun make-coding-system (coding-system type mnemonic doc-string
1b46a680
KH
653 &optional
654 flags
655 properties
656 eol-type)
3bb1accb 657 "Define a new coding system CODING-SYSTEM (symbol).
a6acd8a2 658Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional),
d9e3229d 659and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM
a6acd8a2 660in the following format:
0269ddfb 661 [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
3bb1accb
KH
662
663TYPE is an integer value indicating the type of the coding system as follows:
4ed46869
KH
664 0: Emacs internal format,
665 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
666 2: ISO-2022 including many variants,
667 3: Big5 used mainly on Chinese PC,
cd9c3177
KH
668 4: private, CCL programs provide encoding/decoding algorithm,
669 5: Raw-text, which means that text contains random 8-bit codes.
0269ddfb 670
3bb1accb 671MNEMONIC is a character to be displayed on mode line for the coding system.
0269ddfb 672
3bb1accb 673DOC-STRING is a documentation string for the coding system.
0269ddfb 674
3bb1accb 675FLAGS specifies more detailed information of the coding system as follows:
d6d6d592 676
3bb1accb 677 If TYPE is 2 (ISO-2022), FLAGS is a list of these elements:
4ed46869
KH
678 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
679 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
d6d6d592 680 USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL,
850101ed 681 SAFE, ACCEPT-LATIN-EXTRA-CODE.
4ed46869
KH
682 CHARSETn are character sets initially designated to Gn graphic registers.
683 If CHARSETn is nil, Gn is never used.
684 If CHARSETn is t, Gn can be used but nothing designated initially.
685 If CHARSETn is a list of character sets, those character sets are
686 designated to Gn on output, but nothing designated to Gn initially.
421e3b4e 687 But, character set `ascii' can be designated only to G0.
4ed46869
KH
688 SHORT-FORM non-nil means use short designation sequence on output.
689 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
690 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
691 SPACE on output.
692 SEVEN non-nil means use 7-bit code only on output.
693 LOCKING-SHIFT non-nil means use locking-shift.
694 SINGLE-SHIFT non-nil means use single-shift.
695 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
696 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
697 NO-ISO6429 non-nil means not use ISO6429's direction specification.
69eba008
KH
698 INIT-BOL non-nil means any designation state is assumed to be reset
699 to initial at each beginning of line on output.
700 DESIGNATION-BOL non-nil means designation sequences should be placed
701 at beginning of line on output.
a6acd8a2 702 SAFE non-nil means convert unsafe characters to `?' on output.
c11a8f77
KH
703 Characters not specified in the property `safe-charsets' nor
704 `safe-chars' are unsafe.
850101ed
RS
705 ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts
706 a code specified in `latin-extra-code-table' (which see) as a valid
707 code of the coding system.
d6d6d592 708
3bb1accb
KH
709 If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for
710 decoding and encoding. CCL programs should be specified by their
711 symbols.
d9e3229d 712
a2852e45
KH
713PROPERTIES is an alist of properties vs the corresponding values. The
714following properties are recognized:
715
716 o post-read-conversion
717
718 The value is a function to call after some text is inserted and
719 decoded by the coding system itself and before any functions in
720 `after-insert-functions' are called. The arguments to this
721 function is the same as those of a function in
722 `after-insert-functions', i.e. LENGTH of a text while putting point
723 at the head of the text to be decoded
724
725 o pre-write-conversion
726
727 The value is a function to call after all functions in
728 `write-region-annotate-functions' and `buffer-file-format' are
729 called, and before the text is encoded by the coding system itself.
730 The arguments to this function is the same as those of a function
731 in `write-region-annotate-functions', i.e. FROM and TO specifying
732 region of a text.
733
734 o translation-table-for-decode
735
736 The value is a translation table to be applied on decoding. See
737 the function `make-translation-table' for the format of translation
738 table.
739
740 o translation-table-for-encode
741
742 The value is a translation table to be applied on encoding.
743
744 o safe-chars
745
746 The value is a char table. If a character has non-nil value in it,
747 the character is safely supported by the coding system. This
748 overrides the specification of safe-charsets.
749
750 o safe-charsets
751
752 The value is a list of charsets safely supported by the coding
753 system. The value t means that all charsets Emacs handles are
754 supported. Even if some charset is not in this list, it doesn't
755 mean that the charset can't be encoded in the coding system,
756 instead, it just means that some other receiver of a text encoded
757 in the coding system won't be able to handle that charset.
758
759 o mime-charset
760
761 The value is a symbol of which name is `MIME-charset' parameter of
762 the coding system.
763
764 o valid-codes (meaningful only for a coding system based on CCL)
765
766 The value is a list to indicate valid byte ranges of the encoded
767 file. Each element of the list is an integer or a cons of integer.
768 In the former case, the integer value is a valid byte code. In the
769 latter case, the integers specifies the range of valid byte codes.
770
d9e3229d
KH
771These properties are set in PLIST, a property list. This function
772also sets properties `coding-category' and `alias-coding-systems'
773automatically.
4ed46869 774
1b46a680
KH
775EOL-TYPE specifies the EOL type of the coding-system in one of the
776following formats:
777
778 o symbol (unix, dos, or mac)
779
780 The symbol `unix' means Unix-like EOL (LF), `dos' means
781 DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR).
782
783 o number (0, 1, or 2)
784
785 The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL
786 respectively.
787
788 o vector of coding-systems of length 3
789
790 The EOL type is detected automatically for the coding system.
791 And, according to the detected EOL type, one of the coding
792 systems in the vector is selected. Elements of the vector
4d3a9228 793 corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL
1b46a680
KH
794 in this order.
795
3bb1accb
KH
796Kludgy features for backward compatibility:
797
7981. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is
799treated as a compiled CCL code.
800
8012. If PROPERTIES is just a list of character sets, the list is set as
802a value of `safe-charsets' in PLIST."
50c29104
KH
803
804 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
c3d0ee51
EZ
805 ;; is a symbol, perhaps, this function is called with XEmacs-style
806 ;; arguments. Here, try to transform that kind of arguments to
50c29104
KH
807 ;; Emacs style.
808 (if (symbolp type)
809 (let ((args (transform-make-coding-system-args coding-system type
810 mnemonic doc-string)))
811 (setq coding-system (car args)
1053cc93 812 type (nth 1 args)
50c29104
KH
813 mnemonic (nth 2 args)
814 doc-string (nth 3 args)
815 flags (nth 4 args)
816 properties (nth 5 args)
817 eol-type (nth 6 args))))
818
0269ddfb 819 ;; Set a value of `coding-system' property.
6e9722b0 820 (let ((coding-spec (make-vector 5 nil))
d9e3229d
KH
821 (no-initial-designation t)
822 (no-alternative-designation t)
e6cddb1a 823 (accept-latin-extra-code nil)
6e9722b0 824 coding-category)
cd9c3177 825 (if (or (not (integerp type)) (< type 0) (> type 5))
0269ddfb 826 (error "TYPE argument must be 0..5"))
8057896b 827 (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127))
e8dd0160 828 (error "MNEMONIC argument must be an ASCII printable character."))
0269ddfb
KH
829 (aset coding-spec coding-spec-type-idx type)
830 (aset coding-spec coding-spec-mnemonic-idx mnemonic)
831 (aset coding-spec coding-spec-doc-string-idx
e127a722 832 (purecopy (if (stringp doc-string) doc-string "")))
6e9722b0
KH
833 (cond ((= type 0)
834 (setq coding-category 'coding-category-emacs-mule))
835 ((= type 1)
836 (setq coding-category 'coding-category-sjis))
837 ((= type 2) ; ISO2022
4ed46869 838 (let ((i 0)
6e9722b0 839 (vec (make-vector 32 nil))
05134257
KH
840 (g1-designation nil)
841 (fl flags))
4ed46869 842 (while (< i 4)
05134257 843 (let ((charset (car fl)))
6e9722b0
KH
844 (if (and no-initial-designation
845 (> i 0)
846 (or (charsetp charset)
847 (and (consp charset)
848 (charsetp (car charset)))))
849 (setq no-initial-designation nil))
850 (if (charsetp charset)
851 (if (= i 1) (setq g1-designation charset))
852 (if (consp charset)
853 (let ((tail charset)
854 elt)
855 (while tail
856 (setq elt (car tail))
d9e3229d
KH
857 (if (eq elt t)
858 (setq no-alternative-designation nil)
859 (if (and elt (not (charsetp elt)))
860 (error "Invalid charset: %s" elt)))
6e9722b0
KH
861 (setq tail (cdr tail)))
862 (setq g1-designation (car charset)))
d9e3229d
KH
863 (if charset
864 (if (eq charset t)
865 (setq no-alternative-designation nil)
866 (error "Invalid charset: %s" charset)))))
4ed46869 867 (aset vec i charset))
05134257
KH
868 (setq fl (cdr fl) i (1+ i)))
869 (while (and (< i 32) fl)
870 (aset vec i (car fl))
e6cddb1a
KH
871 (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE
872 (car fl))
873 (setq accept-latin-extra-code t))
05134257 874 (setq fl (cdr fl) i (1+ i)))
6e9722b0 875 (aset coding-spec 4 vec)
6e9722b0
KH
876 (setq coding-category
877 (if (aref vec 8) ; Use locking-shift.
dc64cd19
KH
878 (or (and (aref vec 7) 'coding-category-iso-7-else)
879 'coding-category-iso-8-else)
6e9722b0
KH
880 (if (aref vec 7) ; 7-bit only.
881 (if (aref vec 9) ; Use single-shift.
dc64cd19 882 'coding-category-iso-7-else
d9e3229d
KH
883 (if no-alternative-designation
884 'coding-category-iso-7-tight
885 'coding-category-iso-7))
3ad911d8
KH
886 (if (or no-initial-designation
887 (not no-alternative-designation))
dc64cd19 888 'coding-category-iso-8-else
6e9722b0
KH
889 (if (and (charsetp g1-designation)
890 (= (charset-dimension g1-designation) 2))
891 'coding-category-iso-8-2
892 'coding-category-iso-8-1)))))))
893 ((= type 3)
894 (setq coding-category 'coding-category-big5))
895 ((= type 4) ; private
c76b5c99 896 (setq coding-category 'coding-category-ccl)
3bb1accb
KH
897 (if (not (consp flags))
898 (error "Invalid FLAGS argument for TYPE 4 (CCL)")
899 (let ((decoder (check-ccl-program
900 (car flags)
901 (intern (format "%s-decoder" coding-system))))
902 (encoder (check-ccl-program
903 (cdr flags)
904 (intern (format "%s-encoder" coding-system)))))
905 (if (and decoder encoder)
906 (aset coding-spec 4 (cons decoder encoder))
907 (error "Invalid FLAGS argument for TYPE 4 (CCL)")))))
cd9c3177
KH
908 (t ; i.e. (= type 5)
909 (setq coding-category 'coding-category-raw-text)))
0269ddfb
KH
910
911 (let ((plist (list 'coding-category coding-category
d9e3229d 912 'alias-coding-systems (list coding-system))))
0269ddfb 913 (if no-initial-designation
d9e3229d
KH
914 (plist-put plist 'no-initial-designation t))
915 (if (and properties
916 (or (eq properties t)
917 (not (consp (car properties)))))
918 ;; In the old version, the arg PROPERTIES is a list to be
919 ;; set in PLIST as a value of property `safe-charsets'.
c11a8f77
KH
920 (setq properties (list (cons 'safe-charsets properties))))
921 ;; In the current version PROPERTIES is a property list.
922 ;; Reflect it into PLIST one by one while handling safe-chars
923 ;; specially.
924 (let ((safe-charsets (cdr (assq 'safe-charsets properties)))
925 (safe-chars (cdr (assq 'safe-chars properties)))
926 (l properties)
927 prop val)
928 ;; If only safe-charsets is specified, make a char-table from
929 ;; it, and store that char-table as the value of `safe-chars'.
930 (if (and (not safe-chars) safe-charsets)
931 (let (charset)
932 (if (eq safe-charsets t)
933 (setq safe-chars t)
934 (setq safe-chars (make-char-table 'safe-chars))
935 (while safe-charsets
936 (setq charset (car safe-charsets)
937 safe-charsets (cdr safe-charsets))
938 (cond ((eq charset 'ascii)) ; just ignore
939 ((eq charset 'eight-bit-control)
940 (let ((i 128))
941 (while (< i 160)
942 (aset safe-chars i t)
943 (setq i (1+ i)))))
944 ((eq charset 'eight-bit-graphic)
945 (let ((i 160))
946 (while (< i 256)
947 (aset safe-chars i t)
948 (setq i (1+ i)))))
949 (t
e6cddb1a
KH
950 (aset safe-chars (make-char charset) t))))
951 (if accept-latin-extra-code
952 (let ((i 128))
953 (while (< i 160)
954 (if (aref latin-extra-code-table i)
955 (aset safe-chars i t))
956 (setq i (1+ i))))))
c11a8f77
KH
957 (setq l (cons (cons 'safe-chars safe-chars) l))))
958 (while l
959 (setq prop (car (car l)) val (cdr (car l)) l (cdr l))
960 (if (eq prop 'safe-chars)
961 (progn
cc57cc54
KH
962 (if (and (symbolp val)
963 (get val 'translation-table))
964 (setq safe-chars (get val 'translation-table)))
965 (register-char-codings coding-system safe-chars)
966 (setq val safe-chars)))
c11a8f77 967 (plist-put plist prop val)))
1b46a680
KH
968 ;; The property `coding-category' may have been set differently
969 ;; through PROPERTIES.
970 (setq coding-category (plist-get plist 'coding-category))
0269ddfb 971 (aset coding-spec coding-spec-plist-idx plist))
6e9722b0 972 (put coding-system 'coding-system coding-spec)
6e9722b0
KH
973 (put coding-category 'coding-systems
974 (cons coding-system (get coding-category 'coding-systems))))
4ed46869 975
1b46a680 976 ;; Next, set a value of `eol-type' property.
af873cd1 977 (if (not eol-type)
1b46a680
KH
978 ;; If EOL-TYPE is nil, set a vector of subsidiary coding
979 ;; systems, each corresponds to a coding system for the detected
980 ;; EOL format.
981 (setq eol-type (make-subsidiary-coding-system coding-system)))
982 (setq eol-type
983 (cond ((or (eq eol-type 'unix) (null eol-type))
984 0)
985 ((eq eol-type 'dos)
986 1)
987 ((eq eol-type 'mac)
988 2)
989 ((or (and (vectorp eol-type)
990 (= (length eol-type) 3))
991 (and (numberp eol-type)
992 (and (>= eol-type 0)
993 (<= eol-type 2))))
994 eol-type)
995 (t
996 (error "Invalid EOL-TYPE spec:%S" eol-type))))
997 (put coding-system 'eol-type eol-type)
0269ddfb
KH
998
999 ;; At last, register CODING-SYSTEM in `coding-system-list' and
1000 ;; `coding-system-alist'.
857ea15c 1001 (add-to-coding-system-list coding-system)
0269ddfb 1002 (setq coding-system-alist (cons (list (symbol-name coding-system))
d9e3229d 1003 coding-system-alist))
05134257
KH
1004
1005 ;; For a coding system of cateogory iso-8-1 and iso-8-2, create
1006 ;; XXX-with-esc variants.
1007 (let ((coding-category (coding-system-category coding-system)))
1008 (if (or (eq coding-category 'coding-category-iso-8-1)
1009 (eq coding-category 'coding-category-iso-8-2))
1010 (let ((esc (intern (concat (symbol-name coding-system) "-with-esc")))
c11a8f77
KH
1011 (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system))
1012 (safe-charsets (assq 'safe-charsets properties))
1013 (mime-charset (assq 'mime-charset properties)))
1014 (if safe-charsets
1015 (setcdr safe-charsets t)
1016 (setq properties (cons (cons 'safe-charsets t) properties)))
1017 (if mime-charset
1018 (setcdr mime-charset nil))
05134257
KH
1019 (make-coding-system esc type mnemonic doc
1020 (if (listp (car flags))
1021 (cons (append (car flags) '(t)) (cdr flags))
1022 (cons (list (car flags) t) (cdr flags)))
c11a8f77 1023 properties))))
05134257 1024
d9e3229d 1025 coding-system)
8057896b 1026
a42763dc 1027(defun define-coding-system-alias (alias coding-system)
358d28fb 1028 "Define ALIAS as an alias for coding system CODING-SYSTEM."
0269ddfb
KH
1029 (put alias 'coding-system (coding-system-spec coding-system))
1030 (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
857ea15c 1031 (add-to-coding-system-list alias)
0269ddfb
KH
1032 (setq coding-system-alist (cons (list (symbol-name alias))
1033 coding-system-alist))
1034 (let ((eol-type (coding-system-eol-type coding-system)))
1035 (if (vectorp eol-type)
1036 (put alias 'eol-type (make-subsidiary-coding-system alias))
1037 (put alias 'eol-type eol-type))))
4ed46869
KH
1038
1039(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
1040 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1041This means that when you save the buffer, it will be converted
1042according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1043use \\[list-coding-systems].
1044
1045If the buffer's previous file coding-system value specifies end-of-line
1046conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
1047merged with the already-specified end-of-line conversion.
b839fdcc
KH
1048
1049If the buffer's previous file coding-system value specifies text
1050conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
1051merged with the already-specified text conversion.
1052
1053However, if the optional prefix argument FORCE is non-nil, then
1054CODING-SYSTEM is used exactly as specified.
aeef8f07
KH
1055
1056This marks the buffer modified so that the succeeding \\[save-buffer]
1057surely saves the buffer with CODING-SYSTEM. From a program, if you
1058don't want to mark the buffer modified, just set the variable
1059`buffer-file-coding-system' directly."
d9e3229d 1060 (interactive "zCoding system for visited file (default, nil): \nP")
4ed46869 1061 (check-coding-system coding-system)
36d455c4 1062 (if (and coding-system buffer-file-coding-system (null force))
8dd735c1
KH
1063 (let ((base (coding-system-base buffer-file-coding-system))
1064 (eol (coding-system-eol-type buffer-file-coding-system)))
1065 ;; If CODING-SYSTEM doesn't specify text conversion, merge
1066 ;; with that of buffer-file-coding-system.
1067 (if (eq (coding-system-base coding-system) 'undecided)
1068 (setq coding-system (coding-system-change-text-conversion
1069 coding-system base)))
1070 ;; If CODING-SYSTEM doesn't specify eol conversion, merge with
1071 ;; that of buffer-file-coding-system.
1072 (if (and (vectorp (coding-system-eol-type coding-system))
1073 (numberp eol) (>= eol 0) (<= eol 2))
1074 (setq coding-system (coding-system-change-eol-conversion
1075 coding-system eol)))))
4ed46869
KH
1076 (setq buffer-file-coding-system coding-system)
1077 (set-buffer-modified-p t)
1078 (force-mode-line-update))
1079
358d28fb
RS
1080(defvar default-terminal-coding-system nil
1081 "Default value for the terminal coding system.
1082This is normally set according to the selected language environment.
1083See also the command `set-terminal-coding-system'.")
1084
df100398
KH
1085(defun set-terminal-coding-system (coding-system)
1086 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
1087All text output to the terminal will be encoded
1088with the specified coding system.
1089For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1090The default is determined by the selected language environment
1091or by the previous use of this command."
1092 (interactive
2e02a76f
RS
1093 (list (let ((default (if (and (not (terminal-coding-system))
1094 default-terminal-coding-system)
1095 default-terminal-coding-system)))
1096 (read-coding-system
1097 (format "Coding system for terminal display (default, %s): "
1098 default)
1099 default))))
358d28fb
RS
1100 (if (and (not coding-system)
1101 (not (terminal-coding-system)))
1102 (setq coding-system default-terminal-coding-system))
1103 (if coding-system
1104 (setq default-terminal-coding-system coding-system))
df100398
KH
1105 (set-terminal-coding-system-internal coding-system)
1106 (redraw-frame (selected-frame)))
1107
358d28fb
RS
1108(defvar default-keyboard-coding-system nil
1109 "Default value of the keyboard coding system.
1110This is normally set according to the selected language environment.
1111See also the command `set-keyboard-coding-system'.")
1112
df100398 1113(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
1114 "Set coding system for keyboard input to CODING-SYSTEM.
1115In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
1116\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1117`encoded-kbd-mode'.)
358d28fb
RS
1118For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1119The default is determined by the selected language environment
1120or by the previous use of this command."
1121 (interactive
2e02a76f
RS
1122 (list (let ((default (if (and (not (keyboard-coding-system))
1123 default-keyboard-coding-system)
1124 default-keyboard-coding-system)))
1125 (read-coding-system
1126 (format "Coding system for keyboard input (default, %s): "
1127 default)
1128 default))))
358d28fb
RS
1129 (if (and (not coding-system)
1130 (not (keyboard-coding-system)))
1131 (setq coding-system default-keyboard-coding-system))
1132 (if coding-system
1133 (setq default-keyboard-coding-system coding-system))
df100398
KH
1134 (set-keyboard-coding-system-internal coding-system)
1135 (encoded-kbd-mode (if coding-system 1 0)))
1136
6d34f495
DL
1137(defcustom keyboard-coding-system nil
1138 "Specify coding system for keyboard input.
1139If you set this on a terminal which can't distinguish Meta keys from
11408-bit characters, you will have to use ESC to type Meta characters.
1141See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1142
1143Setting this variable directly does not take effect;
1144use either M-x customize or \\[set-keyboard-coding-system]."
1145 :type '(coding-system :tag "Coding system")
1146 :link '(info-link "(emacs)Specify Coding")
1147 :link '(info-link "(emacs)Single-Byte Character Support")
1148 :set (lambda (symbol value)
1149 ;; Don't load encoded-kbd-mode unnecessarily.
1150 (if (or value (boundp 'encoded-kbd-mode))
1151 (set-keyboard-coding-system value)
1152 (set-default 'keyboard-coding-system nil))) ; must initialize
1153 :version "21.1"
1154 :group 'keyboard
1155 :group 'mule)
1156
df100398 1157(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 1158 "Set coding systems for the process associated with the current buffer.
df100398 1159DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
1160ENCODING is the coding system to be used to encode output to the process.
1161
1162For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 1163 (interactive
83911021 1164 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
1165 (let ((proc (get-buffer-process (current-buffer))))
1166 (if (null proc)
1167 (error "no process")
df100398
KH
1168 (check-coding-system decoding)
1169 (check-coding-system encoding)
1170 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
1171 (force-mode-line-update))
1172
d0b99881
RS
1173(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1174
14915c37 1175(defun set-selection-coding-system (coding-system)
b25eef20
KH
1176 "Make CODING-SYSTEM used for communicating with other X clients .
1177When sending or receiving text via cut_buffer, selection, and clipboard,
1178the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 1179 (interactive "zCoding system for X selection: ")
b25eef20 1180 (check-coding-system coding-system)
14915c37 1181 (setq selection-coding-system coding-system))
b25eef20 1182
e8dd0160 1183;; Coding system lastly specified by the command
a03b3ce1
KH
1184;; set-next-selection-coding-system.
1185(defvar last-next-selection-coding-system nil)
1186
1187(defun set-next-selection-coding-system (coding-system)
1188 "Make CODING-SYSTEM used for the next communication with other X clients.
1189This setting is effective for the next communication only."
1190 (interactive
1191 (list (read-coding-system
1192 (if last-next-selection-coding-system
1193 (format "Coding system for the next X selection (default, %S): "
1194 last-next-selection-coding-system)
1195 "Coding system for the next X selection: ")
1196 last-next-selection-coding-system)))
1197 (if coding-system
1198 (setq last-next-selection-coding-system coding-system)
1199 (setq coding-system last-next-selection-coding-system))
1200 (check-coding-system coding-system)
1201
1202 (setq next-selection-coding-system coding-system))
1203
4ed46869 1204(defun set-coding-priority (arg)
d9e3229d
KH
1205 "Set priority of coding categories according to LIST.
1206LIST is a list of coding categories ordered by priority."
1207 (let ((l arg)
1208 (current-list (copy-sequence coding-category-list)))
e8dd0160 1209 ;; Check the validity of ARG while deleting coding categories in
d9e3229d
KH
1210 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST
1211 ;; contains all coding categories.
1212 (while l
1213 (if (or (null (get (car l) 'coding-category-index))
1214 (null (memq (car l) current-list)))
1215 (error "Invalid or duplicated element in argument: %s" arg))
1216 (setq current-list (delq (car l) current-list))
1217 (setq l (cdr l)))
4ed46869 1218 ;; Update `coding-category-list' and return it.
2feaf204
KH
1219 (setq coding-category-list (append arg current-list))
1220 (set-coding-priority-internal)))
4ed46869
KH
1221
1222;;; FILE I/O
1223
e76938e7 1224(defcustom auto-coding-alist
0735296c
DL
1225 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion)
1226 ("\\.\\(ARC\\|ZIP\\|LZH\\|ZOO\\|JAR\\|TAR\\|TGZ\\)\\'" . no-conversion))
835f49b8
KH
1227 "Alist of filename patterns vs corresponding coding systems.
1228Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1229A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1230
7fed493a
RS
1231The settings in this alist take priority over `coding:' tags
1232in the file (see the function `set-auto-coding')
e76938e7
DL
1233and the contents of `file-coding-system-alist'."
1234 :group 'files
1235 :group 'mule
1236 :type '(repeat (cons (regexp :tag "File name regexp")
1237 (symbol :tag "Coding system"))))
835f49b8 1238
1c4cc63a
KH
1239(defvar set-auto-coding-for-load nil
1240 "Non-nil means look for `load-coding' property instead of `coding'.
1241This is used for loading and byte-compiling Emacs Lisp files.")
1242
8a592131
RS
1243(defun auto-coding-alist-lookup (filename)
1244 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1245 (let ((alist auto-coding-alist)
ca128d75 1246 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos)))
8a592131
RS
1247 coding-system)
1248 (while (and alist (not coding-system))
1249 (if (string-match (car (car alist)) filename)
1250 (setq coding-system (cdr (car alist)))
1251 (setq alist (cdr alist))))
1252 coding-system))
1253
835f49b8
KH
1254(defun set-auto-coding (filename size)
1255 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1256These bytes should include at least the first 1k of the file
1257and the last 3k of the file, but the middle may be omitted.
63561304 1258
835f49b8
KH
1259It checks FILENAME against the variable `auto-coding-alist'.
1260If FILENAME doesn't match any entries in the variable,
1261it checks for a `coding:' tag in the first one or two lines following
e8dd0160 1262point. If no `coding:' tag is found, it checks for local variables
1c4cc63a 1263list in the last 3K bytes out of the SIZE bytes.
63561304
KH
1264
1265The return value is the specified coding system,
1266or nil if nothing specified.
87aba788 1267
ba74e833 1268The variable `set-auto-coding-function' (which see) is set to this
87aba788 1269function by default."
8a592131 1270 (let ((coding-system (auto-coding-alist-lookup filename)))
835f49b8
KH
1271
1272 (or coding-system
1273 (let* ((case-fold-search t)
1274 (head-start (point))
1275 (head-end (+ head-start (min size 1024)))
1276 (tail-start (+ head-start (max (- size 3072) 0)))
1277 (tail-end (+ head-start size))
1278 coding-system head-found tail-found pos)
1279 ;; Try a short cut by searching for the string "coding:"
e8dd0160 1280 ;; and for "unibyte:" at the head and tail of SIZE bytes.
835f49b8
KH
1281 (setq head-found (or (search-forward "coding:" head-end t)
1282 (search-forward "unibyte:" head-end t)))
1283 (if (and head-found (> head-found tail-start))
1284 ;; Head and tail are overlapped.
1285 (setq tail-found head-found)
1286 (goto-char tail-start)
1287 (setq tail-found (or (search-forward "coding:" tail-end t)
1288 (search-forward "unibyte:" tail-end t))))
1289
1290 ;; At first check the head.
1291 (when head-found
1292 (goto-char head-start)
1293 (setq pos (re-search-forward "[\n\r]" head-end t))
1294 (if (and pos
1295 (= (char-after head-start) ?#)
1296 (= (char-after (1+ head-start)) ?!))
1297 ;; If the file begins with "#!" (exec interpreter magic),
1298 ;; look for coding frobs in the first two lines. You cannot
1299 ;; necessarily put them in the first line of such a file
1300 ;; without screwing up the interpreter invocation.
1301 (setq pos (search-forward "\n" head-end t)))
1302 (if pos (setq head-end pos))
1303 (when (< head-found head-end)
1304 (goto-char head-start)
1305 (when (and set-auto-coding-for-load
1306 (re-search-forward
1307 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
1308 head-end t))
1309 (setq coding-system 'raw-text))
1310 (when (and (not coding-system)
1311 (re-search-forward
1312 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
1313 head-end t))
1314 (setq coding-system (intern (match-string 2)))
1315 (or (coding-system-p coding-system)
1316 (setq coding-system nil)))))
1317
1318 ;; If no coding: tag in the head, check the tail.
1319 (when (and tail-found (not coding-system))
1320 (goto-char tail-start)
1321 (search-forward "\n\^L" nil t)
1322 (if (re-search-forward
1323 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1324 ;; The prefix is what comes before "local variables:" in its
1325 ;; line. The suffix is what comes after "local variables:"
1326 ;; in its line.
1327 (let* ((prefix (regexp-quote (match-string 1)))
1328 (suffix (regexp-quote (match-string 2)))
d318fcf4
EZ
1329 (re-coding
1330 (concat
1331 "^" prefix
1332 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1333 suffix "$"))
1334 (re-unibyte
1335 (concat
1336 "^" prefix
1337 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1338 suffix "$"))
1339 (re-end
1340 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
835f49b8
KH
1341 (pos (point)))
1342 (re-search-forward re-end tail-end 'move)
1343 (setq tail-end (point))
1344 (goto-char pos)
1345 (when (and set-auto-coding-for-load
1346 (re-search-forward re-unibyte tail-end t))
1347 (setq coding-system 'raw-text))
1348 (when (and (not coding-system)
1349 (re-search-forward re-coding tail-end t))
1350 (setq coding-system (intern (match-string 1)))
1351 (or (coding-system-p coding-system)
1352 (setq coding-system nil))))))
1353 coding-system))))
63561304
KH
1354
1355(setq set-auto-coding-function 'set-auto-coding)
87aba788 1356
4ed46869
KH
1357;; Set buffer-file-coding-system of the current buffer after some text
1358;; is inserted.
1359(defun after-insert-file-set-buffer-file-coding-system (inserted)
1360 (if last-coding-system-used
1361 (let ((coding-system
1362 (find-new-buffer-file-coding-system last-coding-system-used))
1363 (modified-p (buffer-modified-p)))
0269ddfb 1364 (when coding-system
71983219 1365 (set-buffer-file-coding-system coding-system t)
d0c26c63
KH
1366 (if (and enable-multibyte-characters
1367 (or (eq coding-system 'no-conversion)
ccb77b4e 1368 (eq (coding-system-type coding-system) 5))
136e48e4
KH
1369 ;; If buffer was unmodified and the size is the
1370 ;; same as INSERTED, we must be visiting it.
1371 (not modified-p)
1372 (= (buffer-size) inserted))
ccb77b4e
RS
1373 ;; For coding systems no-conversion and raw-text...,
1374 ;; edit the buffer as unibyte.
d0c26c63
KH
1375 (let ((pos-byte (position-bytes (+ (point) inserted))))
1376 (set-buffer-multibyte nil)
1377 (setq inserted (- pos-byte (position-bytes (point))))))
0269ddfb 1378 (set-buffer-modified-p modified-p))))
d0c26c63 1379 inserted)
4ed46869 1380
84c9d215
KH
1381(add-hook 'after-insert-file-functions
1382 'after-insert-file-set-buffer-file-coding-system)
4ed46869 1383
8057896b 1384;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1385;; independently in the following order.
1386;; 1. That of buffer-file-coding-system locally bound.
1387;; 2. That of CODING.
1388
1389(defun find-new-buffer-file-coding-system (coding)
1390 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1391The local variable `buffer-file-coding-system' of the current buffer
1392is set to the returned value.
509064c5 1393Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1394 (let (local-coding local-eol
b685f8d6 1395 found-coding found-eol
4ed46869
KH
1396 new-coding new-eol)
1397 (if (null coding)
1398 ;; Nothing found about coding.
1399 nil
1400
b685f8d6
RS
1401 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1402 ;; and LOCAL-CODING.
1403 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1404 (if (null (numberp local-eol))
1405 ;; But eol-type is not yet set.
1406 (setq local-eol nil))
0269ddfb
KH
1407 (if (and buffer-file-coding-system
1408 (not (eq (coding-system-type buffer-file-coding-system) t)))
1409 ;; This is not `undecided'.
1410 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1411
1412 (if (and (local-variable-p 'buffer-file-coding-system)
1413 local-eol local-coding)
4ed46869
KH
1414 ;; The current buffer has already set full coding-system, we
1415 ;; had better not change it.
1416 nil
1417
8057896b 1418 (setq found-eol (coding-system-eol-type coding))
4ed46869 1419 (if (null (numberp found-eol))
be02cd54
EZ
1420 ;; But eol-type is not found.
1421 ;; If EOL conversions are inhibited, force unix eol-type.
1422 (setq found-eol (if inhibit-eol-conversion 0)))
c76b5c99
KH
1423 (if (eq (coding-system-type coding) t)
1424 (setq found-coding 'undecided)
1425 (setq found-coding (coding-system-base coding)))
1426
1427 (if (and (not found-eol) (eq found-coding 'undecided))
1428 ;; No valid coding information found.
1429 nil
1430
1431 ;; Some coding information (eol or text) found.
1432
1433 ;; The local setting takes precedence over the found one.
1434 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1435 (or local-coding found-coding)
1436 (or found-coding local-coding)))
1437 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1438 (or local-eol found-eol)
1439 (or found-eol local-eol)))
1440
1441 (let ((eol-type (coding-system-eol-type new-coding)))
1442 (if (and (numberp new-eol) (vectorp eol-type))
1443 (aref eol-type new-eol)
1444 new-coding)))))))
4ed46869 1445
fe831d33
GV
1446(defun modify-coding-system-alist (target-type regexp coding-system)
1447 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1448There are three of such tables, `file-coding-system-alist',
1449`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1450
1451TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1452If it is `file', it affects `file-coding-system-alist' (which see).
1453If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1454If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1455
1456REGEXP is a regular expression matching a target of I/O operation.
1457The target is a file name if TARGET-TYPE is `file', a program name if
1458TARGET-TYPE is `process', or a network service name or a port number
1459to connect to if TARGET-TYPE is `network'.
1460
1461CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1462operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1463for decoding and encoding respectively,
1464or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1465 (or (memq target-type '(file process network))
1466 (error "Invalid target type: %s" target-type))
1467 (or (stringp regexp)
1468 (and (eq target-type 'network) (integerp regexp))
1469 (error "Invalid regular expression: %s" regexp))
1470 (if (symbolp coding-system)
1471 (if (not (fboundp coding-system))
1472 (progn
1473 (check-coding-system coding-system)
1474 (setq coding-system (cons coding-system coding-system))))
1475 (check-coding-system (car coding-system))
1476 (check-coding-system (cdr coding-system)))
1477 (cond ((eq target-type 'file)
1478 (let ((slot (assoc regexp file-coding-system-alist)))
1479 (if slot
1480 (setcdr slot coding-system)
1481 (setq file-coding-system-alist
1482 (cons (cons regexp coding-system)
1483 file-coding-system-alist)))))
1484 ((eq target-type 'process)
1485 (let ((slot (assoc regexp process-coding-system-alist)))
1486 (if slot
1487 (setcdr slot coding-system)
1488 (setq process-coding-system-alist
1489 (cons (cons regexp coding-system)
1490 process-coding-system-alist)))))
1491 (t
1492 (let ((slot (assoc regexp network-coding-system-alist)))
1493 (if slot
1494 (setcdr slot coding-system)
1495 (setq network-coding-system-alist
1496 (cons (cons regexp coding-system)
1497 network-coding-system-alist)))))))
1498
b25eef20 1499(defun make-translation-table (&rest args)
f967223b 1500 "Make a translation table (char table) from arguments.
13d5617d 1501Each argument is a list of the form (FROM . TO),
b25eef20 1502where FROM is a character to be translated to TO.
13d5617d 1503
452fdb31
RS
1504FROM can be a generic character (see `make-char'). In this case, TO is
1505a generic character containing the same number of characters, or a
1506ordinary character. If FROM and TO are both generic characters, all
b25eef20 1507characters belonging to FROM are translated to characters belonging to TO
4e003d37
KH
1508without changing their position code(s).
1509
1510The arguments and forms in each argument are processed in the given
1511order, and if a previous form already translates TO to some other
1512character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1513 (let ((table (make-char-table 'translation-table))
a73a8c89
KH
1514 revlist)
1515 (while args
1516 (let ((elts (car args)))
1517 (while elts
13d5617d
KH
1518 (let* ((from (car (car elts)))
1519 (from-i 0) ; degree of freedom of FROM
1520 (from-rev (nreverse (split-char from)))
1521 (to (cdr (car elts)))
1522 (to-i 0) ; degree of freedom of TO
1523 (to-rev (nreverse (split-char to))))
1524 ;; Check numbers of heading 0s in FROM-REV and TO-REV.
1525 (while (eq (car from-rev) 0)
1526 (setq from-i (1+ from-i) from-rev (cdr from-rev)))
1527 (while (eq (car to-rev) 0)
1528 (setq to-i (1+ to-i) to-rev (cdr to-rev)))
1529 (if (and (/= from-i to-i) (/= to-i 0))
1530 (error "Invalid character pair (%d . %d)" from to))
b25eef20
KH
1531 ;; If we have already translated TO to TO-ALT, FROM should
1532 ;; also be translated to TO-ALT. But, this is only if TO
1533 ;; is a generic character or TO-ALT is not a generic
13d5617d
KH
1534 ;; character.
1535 (let ((to-alt (aref table to)))
1536 (if (and to-alt
1537 (or (> to-i 0) (not (generic-char-p to-alt))))
1538 (setq to to-alt)))
1539 (if (> from-i 0)
1540 (set-char-table-default table from to)
1541 (aset table from to))
b25eef20
KH
1542 ;; If we have already translated some chars to FROM, they
1543 ;; should also be translated to TO.
a73a8c89
KH
1544 (let ((l (assq from revlist)))
1545 (if l
1546 (let ((ch (car l)))
1547 (setcar l to)
1548 (setq l (cdr l))
1549 (while l
1550 (aset table ch to)
1551 (setq l (cdr l)) ))))
1552 ;; Now update REVLIST.
1553 (let ((l (assq to revlist)))
1554 (if l
1555 (setcdr l (cons from (cdr l)))
1556 (setq revlist (cons (list to from) revlist)))))
1557 (setq elts (cdr elts))))
1558 (setq args (cdr args)))
1559 ;; Return TABLE just created.
1560 table))
1561
c76b5c99
KH
1562(defun make-translation-table-from-vector (vec)
1563 "Make translation table from decoding vector VEC.
1564VEC is an array of 256 elements to map unibyte codes to multibyte characters.
1565See also the variable `nonascii-translation-table'."
1566 (let ((table (make-char-table 'translation-table))
1567 (rev-table (make-char-table 'translation-table))
1568 (i 0)
1569 ch)
1570 (while (< i 256)
1571 (setq ch (aref vec i))
1572 (aset table i ch)
1573 (if (>= ch 256)
1574 (aset rev-table ch i))
1575 (setq i (1+ i)))
1576 (set-char-table-extra-slot table 0 rev-table)
1577 table))
1578
f967223b 1579(defun define-translation-table (symbol &rest args)
e8dd0160 1580 "Define SYMBOL as a name of translation table made by ARGS.
b25eef20 1581
007c79c8
KH
1582If the first element of ARGS is a char-table of which purpose is
1583translation-table, just define SYMBOL as the name of it.
1584
1585In the other case, ARGS are the same as arguments to the function
1586`make-translation-table' (which see).
b25eef20 1587
452fdb31
RS
1588This function sets properties `translation-table' and
1589`translation-table-id' of SYMBOL to the created table itself and
f967223b 1590identification number of the table respectively."
007c79c8
KH
1591 (let ((table (if (and (char-table-p (car args))
1592 (eq (char-table-subtype (car args))
1593 'translation-table))
1594 (car args)
1595 (apply 'make-translation-table args)))
f967223b 1596 (len (length translation-table-vector))
d9e3229d 1597 (id 0)
b25eef20 1598 (done nil))
f967223b 1599 (put symbol 'translation-table table)
b25eef20
KH
1600 (while (not done)
1601 (if (>= id len)
f967223b
KH
1602 (setq translation-table-vector
1603 (vconcat translation-table-vector (make-vector len nil))))
1604 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1605 (if (or (not slot)
1606 (eq (car slot) symbol))
1607 (progn
f967223b 1608 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1609 (setq done t))
1610 (setq id (1+ id)))))
f967223b 1611 (put symbol 'translation-table-id id)
d9e3229d
KH
1612 id))
1613
35554641
KH
1614(put 'with-category-table 'lisp-indent-function 1)
1615
1616(defmacro with-category-table (category-table &rest body)
1617 `(let ((current-category-table (category-table)))
1618 (set-category-table ,category-table)
1619 (unwind-protect
1620 (progn ,@body)
1621 (set-category-table current-category-table))))
1622
69eba008
KH
1623;;; Initialize some variables.
1624
1625(put 'use-default-ascent 'char-table-extra-slots 0)
1626(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
1627(put 'ignore-relative-composition 'char-table-extra-slots 0)
1628(setq ignore-relative-composition
1629 (make-char-table 'ignore-relative-composition))
69eba008
KH
1630
1631;;;
4ed46869
KH
1632(provide 'mule)
1633
1634;;; mule.el ends here