(concat): Pay attention to multibyte characters when
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
4ed46869
KH
1;;; mule.el --- basic commands for mulitilingual environment
2
3;; Copyright (C) 1995 Free Software Foundation, Inc.
4;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5
6;; Keywords: mule, multilingual, character set, coding system
7
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
369314dc
KH
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
4ed46869
KH
24
25;;; Code:
26
27(defconst mule-version "3.0 (MOMIJINOGA)" "\
28Version number and name of this version of MULE (multilingual environment).")
29
30(defconst mule-version-date "1998.1.1" "\
31Distribution date of this version of MULE (multilingual environment).")
32
33(defun load-with-code-conversion (fullname file &optional noerror nomessage)
34 "Execute a file of Lisp code named FILE whose absolute path is FULLNAME.
35The FILE is decoded before evaluation if necessary.
36If optional second arg NOERROR is non-nil,
37 report no error if FILE doesn't exist.
38Print messages at start and end of loading unless
39 optional third arg NOMESSAGE is non-nil.
40Return t if file exists."
41 (if (null (file-readable-p fullname))
42 (and (null noerror)
43 (signal 'file-error (list "Cannot open load file" file)))
44 ;; Read file with code conversion, and then eval.
45 (let* ((buffer
46 ;; To avoid any autoloading, set default-major-mode to
47 ;; fundamental-mode.
48 (let ((default-major-mode 'fundamental-mode))
49 ;; We can't use `generate-new-buffer' because files.el
50 ;; is not yet loaded.
51 (get-buffer-create (generate-new-buffer-name " *load*"))))
52 (load-in-progress t))
53 (or nomessage (message "Loading %s..." file))
54 (unwind-protect
55 (progn
56 (save-excursion
57 (set-buffer buffer)
58 (insert-file-contents fullname)
59 ;; We must set `buffer-file-name' for `eval-buffer' and
60 ;; `load-history'.
61 (setq buffer-file-name file)
62 ;; Make `kill-buffer' quiet.
63 (set-buffer-modified-p nil))
64 ;; Eval in the original buffer.
65 (eval-buffer buffer))
66 (kill-buffer buffer))
67 (let ((hook (assoc file after-load-alist)))
68 (if hook
69 (mapcar (function eval) (cdr hook))))
70 (or nomessage noninteractive
71 (message "Loading %s...done" file))
72 t)))
73
74;; API (Application Program Interface) for charsets.
75
76;; Return t if OBJ is a quoted symbol.
77(defsubst quoted-symbol-p (obj)
78 (and (listp obj) (eq (car obj) 'quote)))
79
80(defsubst charsetp (object)
81 "T is OBJECT is a charset."
82 (and (symbolp object) (vectorp (get object 'charset))))
83
84(defsubst charset-info (charset)
85 "Return a vector of information of CHARSET.
86The elements of the vector are:
87 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
88 LEADING-CODE-BASE, LEADING-CODE-EXT,
89 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
90 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
91 PLIST,
92where
93CHARSET-ID (integer) is the identification number of the charset.
94DIMENSION (integer) is the number of bytes to represent a character of
95the charset: 1 or 2.
96CHARS (integer) is the number of characters in a dimension: 94 or 96.
97BYTE (integer) is the length of multi-byte form of a character in
98 the charset: one of 1, 2, 3, and 4.
99WIDTH (integer) is the number of columns a character in the charset
100 occupies on the screen: one of 0, 1, and 2.
101DIRECTION (integer) is the rendering direction of characters in the
102 charset when rendering. If 0, render from right to left, else
103 render from left to right.
104LEADING-CODE-BASE (integer) is the base leading-code for the
105 charset.
106LEADING-CODE-EXT (integer) is the extended leading-code for the
107 charset. All charsets of less than 0xA0 has the value 0.
108ISO-FINAL-CHAR (character) is the final character of the
109 corresponding ISO 2022 charset.
110ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
111 while encoding to variants of ISO 2022 coding system, one of the
112 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
113REVERSE-CHARSET (integer) is the charset which differs only in
114 LEFT-TO-RIGHT value from the charset. If there's no such a
115 charset, the value is -1.
116SHORT-NAME (string) is the short name to refer to the charset.
117LONG-NAME (string) is the long name to refer to the charset
118DESCRIPTION (string) is the description string of the charset.
119PLIST (property list) may contain any type of information a user
120 want to put and get by functions `put-charset-property' and
121 `get-charset-property' respectively."
122 (get charset 'charset))
123
124(defmacro charset-id (charset)
125 "Return charset identification number of CHARSET."
126 (if (and (listp charset) (eq (car charset) 'quote))
127 (aref (charset-info (nth 1 charset)) 0)
128 `(aref (charset-info ,charset) 0)))
129
130(defmacro charset-bytes (charset)
131 (if (quoted-symbol-p charset)
132 (aref (charset-info (nth 1 charset)) 1)
133 `(aref (charset-info ,charset) 1)))
134
135(defmacro charset-dimension (charset)
136 (if (quoted-symbol-p charset)
137 (aref (charset-info (nth 1 charset)) 2)
138 `(aref (charset-info ,charset) 2)))
139
140(defmacro charset-chars (charset)
141 (if (quoted-symbol-p charset)
142 (aref (charset-info (nth 1 charset)) 3)
143 `(aref (charset-info ,charset) 3)))
144
145(defmacro charset-width (charset)
146 (if (quoted-symbol-p charset)
147 (aref (charset-info (nth 1 charset)) 4)
148 `(aref (charset-info ,charset) 4)))
149
150(defmacro charset-direction (charset)
151 (if (quoted-symbol-p charset)
152 (aref (charset-info (nth 1 charset)) 5)
153 `(aref (charset-info ,charset) 5)))
154
155(defmacro charset-iso-final-char (charset)
156 (if (quoted-symbol-p charset)
157 (aref (charset-info (nth 1 charset)) 8)
158 `(aref (charset-info ,charset) 8)))
159
160(defmacro charset-iso-graphic-plane (charset)
161 (if (quoted-symbol-p charset)
162 (aref (charset-info (nth 1 charset)) 9)
163 `(aref (charset-info ,charset) 9)))
164
165(defmacro charset-reverse-charset (charset)
166 (if (quoted-symbol-p charset)
167 (aref (charset-info (nth 1 charset)) 10)
168 `(aref (charset-info ,charset) 10)))
169
170(defmacro charset-short-name (charset)
171 (if (quoted-symbol-p charset)
172 (aref (charset-info (nth 1 charset)) 11)
173 `(aref (charset-info ,charset) 11)))
174
175(defmacro charset-long-name (charset)
176 (if (quoted-symbol-p charset)
177 (aref (charset-info (nth 1 charset)) 12)
178 `(aref (charset-info ,charset) 12)))
179
180(defmacro charset-description (charset)
181 (if (quoted-symbol-p charset)
182 (aref (charset-info (nth 1 charset)) 13)
183 `(aref (charset-info ,charset) 13)))
184
185(defmacro charset-plist (charset)
186 (if (quoted-symbol-p charset)
f98e2797 187 `(aref ,(charset-info (nth 1 charset)) 14)
4ed46869
KH
188 `(aref (charset-info ,charset) 14)))
189
190(defun set-charset-plist (charset plist)
191 (aset (charset-info charset) 14 plist))
192
193(defmacro make-char (charset &optional c1 c2)
f98e2797
KH
194 "Return a character of CHARSET and position-codes CODE1 and CODE2.
195CODE1 and CODE2 are optional, but if you don't supply
196 sufficient position-codes, return a generic character which stands for
197all characters or group of characters in the character sets.
a73a8c89 198A generic character can be used to index a char table (e.g. syntax-table)."
4ed46869
KH
199 (if (quoted-symbol-p charset)
200 `(make-char-internal ,(charset-id (nth 1 charset)) ,c1 ,c2)
201 `(make-char-internal (charset-id ,charset) ,c1 ,c2)))
202
69eba008
KH
203(defmacro charset-list ()
204 "Return list of charsets ever defined."
13d5617d
KH
205 'charset-list)
206
207(defsubst generic-char-p (char)
208 "Return t if and only if CHAR is a generic character.
209See also the documentation of make-char."
210 (let ((l (split-char char)))
211 (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
212 (not (eq (car l) 'composition)))))
69eba008 213
8057896b 214;; Coding system staffs
4ed46869 215
8057896b 216;; Coding system is a symbol that has the property `coding-system'.
4ed46869 217;;
8057896b
KH
218;; The value of the property `coding-system' is a vector of the
219;; following format:
220;; [TYPE MNEMONIC DOC-STRING NOT-USED-NOW FLAGS]
221;; We call this vector as coding-spec. See comments in src/coding.c
222;; for more detail. The property value may be another coding system,
223;; in which case, the coding-spec should be taken from that
224;; coding-system. The 4th element NOT-USED-NOW is kept just for
225;; backward compatibility with old version of Mule.
226
227(defconst coding-spec-type-idx 0)
228(defconst coding-spec-mnemonic-idx 1)
229(defconst coding-spec-doc-string-idx 2)
230(defconst coding-spec-flags-idx 4)
231
232;; Coding system may have proerpty `eol-type'. The value of the
233;; property `eol-type' is integer 0..2 or a vector of three coding
234;; systems. The integer value 0, 1, and 2 indicate the format of
4ed46869
KH
235;; end-of-line LF, CRLF, and CR respectively. The vector value
236;; indicates that the format of end-of-line should be detected
8057896b
KH
237;; automatically. Nth element of the vector is the subsidiary coding
238;; system whose `eol-type' property is N.
4ed46869 239;;
8057896b
KH
240;; Coding system may also have properties `post-read-conversion' and
241;; `pre-write-conversion. Values of these properties are functions.
4ed46869
KH
242;;
243;; The function in `post-read-conversion' is called after some text is
8057896b 244;; inserted and decoded along the coding system and before any
4ed46869
KH
245;; functions in `after-insert-functions' are called. The arguments to
246;; this function is the same as those of a function in
247;; `after-insert-functions', i.e. LENGTH of a text while putting point
248;; at the head of the text to be decoded
249;;
250;; The function in `pre-write-conversion' is called after all
251;; functions in `write-region-annotate-functions' and
252;; `buffer-file-format' are called, and before the text is encoded by
8057896b 253;; the coding system. The arguments to this function is the same as
4ed46869
KH
254;; those of a function in `write-region-annotate-functions', i.e. FROM
255;; and TO specifying region of a text.
256
8057896b
KH
257;; Return Nth element of coding-spec of CODING-SYSTEM.
258(defun coding-system-spec-ref (coding-system n)
259 (check-coding-system coding-system)
260 (let ((vec (coding-system-spec coding-system)))
261 (and vec (aref vec n))))
4ed46869 262
4ed46869 263(defun coding-system-type (coding-system)
6e9722b0 264 "Return TYPE element in coding-spec of CODING-SYSTEM."
8057896b 265 (coding-system-spec-ref coding-system coding-spec-type-idx))
4ed46869 266
4ed46869 267(defun coding-system-mnemonic (coding-system)
8057896b
KH
268 "Return MNEMONIC element in coding-spec of CODING-SYSTEM."
269 (or (coding-system-spec-ref coding-system coding-spec-mnemonic-idx)
270 ?-))
4ed46869 271
8057896b
KH
272(defun coding-system-doc-string (coding-system)
273 "Return DOC-STRING element in coding-spec of CODING-SYSTEM."
274 (coding-system-spec-ref coding-system coding-spec-doc-string-idx))
4ed46869 275
4ed46869 276(defun coding-system-flags (coding-system)
8057896b
KH
277 "Return FLAGS element in coding-spec of CODING-SYSTEM."
278 (coding-system-spec-ref coding-system coding-spec-flags-idx))
4ed46869 279
8057896b
KH
280(defun coding-system-eol-type (coding-system)
281 "Return eol-type property of CODING-SYSTEM."
69eba008 282 (check-coding-system coding-system)
4ed46869
KH
283 (and coding-system
284 (or (get coding-system 'eol-type)
8057896b 285 (coding-system-eol-type (get coding-system 'coding-system)))))
4ed46869 286
6e9722b0
KH
287(defun coding-system-category (coding-system)
288 "Return coding category of CODING-SYSTEM."
289 (and coding-system
290 (symbolp coding-system)
291 (or (get coding-system 'coding-category)
292 (coding-system-category (get coding-system 'coding-system)))))
293
294;; Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM.
295(defun make-subsidiary-coding-system (coding-system)
8057896b
KH
296 (let ((subsidiaries (vector (intern (format "%s-unix" coding-system))
297 (intern (format "%s-dos" coding-system))
298 (intern (format "%s-mac" coding-system))))
299 (i 0))
300 (while (< i 3)
6e9722b0 301 (put (aref subsidiaries i) 'coding-system coding-system)
8057896b
KH
302 (put (aref subsidiaries i) 'eol-type i)
303 (put (aref subsidiaries i) 'eol-variant t)
304 (setq i (1+ i)))
305 subsidiaries))
4ed46869 306
8057896b
KH
307(defun make-coding-system (coding-system type mnemonic doc-string
308 &optional flags)
4ed46869 309 "Define a new CODING-SYSTEM (symbol).
8057896b
KH
310Remaining arguments are TYPE, MNEMONIC, DOC-STRING, and FLAGS (optional) which
311construct a coding-spec of CODING-SYSTEM in the following format:
312 [TYPE MNEMONIC DOC-STRING nil FLAGS]
4ed46869
KH
313TYPE is an integer value indicating the type of coding-system as follows:
314 0: Emacs internal format,
315 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
316 2: ISO-2022 including many variants,
317 3: Big5 used mainly on Chinese PC,
318 4: private, CCL programs provide encoding/decoding algorithm.
319MNEMONIC is a character to be displayed on mode line for the coding-system.
8057896b 320DOC-STRING is a documentation string for the coding-system.
4ed46869
KH
321FLAGS specifies more precise information of each TYPE.
322 If TYPE is 2 (ISO-2022), FLAGS should be a list of:
323 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
324 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
a73a8c89 325 USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL.
4ed46869
KH
326 CHARSETn are character sets initially designated to Gn graphic registers.
327 If CHARSETn is nil, Gn is never used.
328 If CHARSETn is t, Gn can be used but nothing designated initially.
329 If CHARSETn is a list of character sets, those character sets are
330 designated to Gn on output, but nothing designated to Gn initially.
331 SHORT-FORM non-nil means use short designation sequence on output.
332 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
333 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
334 SPACE on output.
335 SEVEN non-nil means use 7-bit code only on output.
336 LOCKING-SHIFT non-nil means use locking-shift.
337 SINGLE-SHIFT non-nil means use single-shift.
338 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
339 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
340 NO-ISO6429 non-nil means not use ISO6429's direction specification.
69eba008
KH
341 INIT-BOL non-nil means any designation state is assumed to be reset
342 to initial at each beginning of line on output.
343 DESIGNATION-BOL non-nil means designation sequences should be placed
344 at beginning of line on output.
4ed46869
KH
345 If TYPE is 4 (private), FLAGS should be a cons of CCL programs,
346 for encoding and decoding. See the documentation of CCL for more detail."
347
348 ;; At first, set a value of `coding-system' property.
6e9722b0
KH
349 (let ((coding-spec (make-vector 5 nil))
350 coding-category)
8057896b
KH
351 (if (or (not (integerp type)) (< type 0) (> type 4))
352 (error "TYPE argument must be 0..4"))
353 (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127))
354 (error "MNEMONIC arguemnt must be a printable character."))
355 (aset coding-spec 0 type)
356 (aset coding-spec 1 mnemonic)
357 (aset coding-spec 2 (if (stringp doc-string) doc-string ""))
358 (aset coding-spec 3 nil) ; obsolete element
6e9722b0
KH
359 (cond ((= type 0)
360 (setq coding-category 'coding-category-emacs-mule))
361 ((= type 1)
362 (setq coding-category 'coding-category-sjis))
363 ((= type 2) ; ISO2022
4ed46869 364 (let ((i 0)
6e9722b0
KH
365 (vec (make-vector 32 nil))
366 (no-initial-designation t)
367 (g1-designation nil))
4ed46869
KH
368 (while (< i 4)
369 (let ((charset (car flags)))
6e9722b0
KH
370 (if (and no-initial-designation
371 (> i 0)
372 (or (charsetp charset)
373 (and (consp charset)
374 (charsetp (car charset)))))
375 (setq no-initial-designation nil))
376 (if (charsetp charset)
377 (if (= i 1) (setq g1-designation charset))
378 (if (consp charset)
379 (let ((tail charset)
380 elt)
381 (while tail
382 (setq elt (car tail))
69eba008
KH
383 (or (not elt) (eq elt t) (charsetp elt)
384 (error "Invalid charset: %s" elt))
6e9722b0
KH
385 (setq tail (cdr tail)))
386 (setq g1-designation (car charset)))
387 (if (and charset (not (eq charset t)))
388 (error "Invalid charset: %s" charset))))
4ed46869
KH
389 (aset vec i charset))
390 (setq flags (cdr flags) i (1+ i)))
391 (while (and (< i 32) flags)
392 (aset vec i (car flags))
393 (setq flags (cdr flags) i (1+ i)))
6e9722b0
KH
394 (aset coding-spec 4 vec)
395 (if no-initial-designation
396 (put coding-system 'no-initial-designation t))
397 (setq coding-category
398 (if (aref vec 8) ; Use locking-shift.
399 'coding-category-iso-else
400 (if (aref vec 7) ; 7-bit only.
401 (if (aref vec 9) ; Use single-shift.
402 'coding-category-iso-else
403 'coding-category-iso-7)
404 (if no-initial-designation
405 'coding-category-iso-else
406 (if (and (charsetp g1-designation)
407 (= (charset-dimension g1-designation) 2))
408 'coding-category-iso-8-2
409 'coding-category-iso-8-1)))))))
410 ((= type 3)
411 (setq coding-category 'coding-category-big5))
412 ((= type 4) ; private
413 (setq coding-category 'coding-category-binary)
4ed46869
KH
414 (if (and (consp flags)
415 (vectorp (car flags))
416 (vectorp (cdr flags)))
8057896b 417 (aset coding-spec 4 flags)
6e9722b0
KH
418 (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))
419 (put coding-system 'coding-system coding-spec)
420 (put coding-system 'coding-category coding-category)
421 (put coding-category 'coding-systems
422 (cons coding-system (get coding-category 'coding-systems))))
4ed46869
KH
423
424 ;; Next, set a value of `eol-type' property. The value is a vector
6e9722b0 425 ;; of subsidiary coding systems, each corresponds to a coding system
4ed46869 426 ;; for the detected end-of-line format.
8057896b
KH
427 (put coding-system 'eol-type
428 (if (<= type 3)
6e9722b0 429 (make-subsidiary-coding-system coding-system)
8057896b
KH
430 0)))
431
432(defun define-coding-system-alias (coding-system alias)
433 "Define ALIAS as an alias coding system of CODING-SYSTEM."
434 (check-coding-system coding-system)
6e9722b0
KH
435 (let ((parent (coding-system-parent coding-system)))
436 (if parent
437 (setq coding-system parent)))
8057896b 438 (put alias 'coding-system coding-system)
6e9722b0
KH
439 (put alias 'parent-coding-system coding-system)
440 (put coding-system 'alias-coding-systems
441 (cons alias (get coding-system 'alias-coding-systems)))
442 (let ((eol-variants (coding-system-eol-type coding-system))
443 subsidiaries)
444 (if (vectorp eol-variants)
445 (let ((i 0))
446 (setq subsidiaries (make-subsidiary-coding-system alias))
447 (while (< i 3)
448 (put (aref subsidiaries i) 'parent-coding-system
449 (aref eol-variants i))
450 (put (aref eol-variants i) 'alias-coding-systems
451 (cons (aref subsidiaries i) (get (aref eol-variants i)
452 'alias-coding-systems)))
453 (setq i (1+ i)))))))
4ed46869
KH
454
455(defun set-buffer-file-coding-system (coding-system &optional force)
456 "Set buffer-file-coding-system of the current buffer to CODING-SYSTEM.
457If eol-type of the current buffer-file-coding-system is an integer value N, and
a73a8c89 458 eol-type of CODING-SYSTEM is a vector, the Nth element of the vector is used
4ed46869
KH
459 instead of CODING-SYSTEM itself.
460Optional prefix argument FORCE non-nil means CODING-SYSTEM is set
461 regardless of eol-type of the current buffer-file-coding-system."
462 (interactive "zBuffer-file-coding-system: \nP")
463 (check-coding-system coding-system)
464 (if (null force)
8057896b
KH
465 (let ((x (coding-system-eol-type buffer-file-coding-system))
466 (y (coding-system-eol-type coding-system)))
4ed46869
KH
467 (if (and (numberp x) (>= x 0) (<= x 2) (vectorp y))
468 (setq coding-system (aref y x)))))
469 (setq buffer-file-coding-system coding-system)
470 (set-buffer-modified-p t)
471 (force-mode-line-update))
472
df100398
KH
473(defun set-terminal-coding-system (coding-system)
474 "Set coding system of your terminal to CODING-SYSTEM.
475All outputs to terminal are encoded by the specified coding system."
476 (interactive "zCoding-system for terminal display: ")
477 (set-terminal-coding-system-internal coding-system)
478 (redraw-frame (selected-frame)))
479
480(defun set-keyboard-coding-system (coding-system)
481 "Set coding system of codes sent from terminal keyboard to CODING-SYSTEM.
482In addition, this command toggles Encoded-kbd minor mode.
483If the specified coding system is nil, Encoded-bkd mode is turned off,
484else it is turned on so that user inputs are decoded by the
485specified coding system."
486 (interactive "zCoding-system for keyboard input: ")
487 (set-keyboard-coding-system-internal coding-system)
488 (encoded-kbd-mode (if coding-system 1 0)))
489
490(defun set-buffer-process-coding-system (decoding encoding)
491 "Set coding systems to the process associated with the current buffer.
492DECODING is the coding system to be used to decode input from the process,
493ENCODING is to be used to encode output to the process."
4ed46869
KH
494 (interactive
495 "zCoding-system for process input: \nzCoding-system for process output: ")
496 (let ((proc (get-buffer-process (current-buffer))))
497 (if (null proc)
498 (error "no process")
df100398
KH
499 (check-coding-system decoding)
500 (check-coding-system encoding)
501 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
502 (force-mode-line-update))
503
4ed46869
KH
504(defun set-coding-priority (arg)
505 "Set priority of coding-category according to LIST.
506LIST is a list of coding-categories ordered by priority."
507 (let (l)
508 ;; Put coding-categories listed in ARG to L while checking the
509 ;; validity. We assume that `coding-category-list' contains whole
510 ;; coding-categories.
511 (while arg
512 (if (null (memq (car arg) coding-category-list))
513 (error "Invalid element in argument: %s" (car arg)))
514 (setq l (cons (car arg) l))
515 (setq arg (cdr arg)))
516 ;; Put coding-category not listed in ARG to L.
517 (while coding-category-list
518 (if (null (memq (car coding-category-list) l))
519 (setq l (cons (car coding-category-list) l)))
520 (setq coding-category-list (cdr coding-category-list)))
521 ;; Update `coding-category-list' and return it.
522 (setq coding-category-list (nreverse l))))
523
524;;; FILE I/O
525
526;; Set buffer-file-coding-system of the current buffer after some text
527;; is inserted.
528(defun after-insert-file-set-buffer-file-coding-system (inserted)
529 (if last-coding-system-used
530 (let ((coding-system
531 (find-new-buffer-file-coding-system last-coding-system-used))
532 (modified-p (buffer-modified-p)))
533 (if coding-system
534 (set-buffer-file-coding-system coding-system))
535 (set-buffer-modified-p modified-p)))
536 nil)
537
538(setq after-insert-file-functions
539 (cons 'after-insert-file-set-buffer-file-coding-system
540 after-insert-file-functions))
541
8057896b 542;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
543;; independently in the following order.
544;; 1. That of buffer-file-coding-system locally bound.
545;; 2. That of CODING.
546
547(defun find-new-buffer-file-coding-system (coding)
548 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
549The local variable `buffer-file-coding-system' of the current buffer
550is set to the returned value.
df100398 551Return nil if there's no need of setting new buffer-file-coding-system."
4ed46869
KH
552 (let (local-coding local-eol
553 found-eol
554 new-coding new-eol)
555 (if (null coding)
556 ;; Nothing found about coding.
557 nil
558
559 ;; Get information of the current local value of
560 ;; `buffer-file-coding-system' in LOCAL-EOL and LOCAL-CODING.
561 (if (local-variable-p 'buffer-file-coding-system)
562 ;; Something already set locally.
563 (progn
8057896b 564 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
4ed46869
KH
565 (if (null (numberp local-eol))
566 ;; But eol-type is not yet set.
567 (setq local-eol nil))
568 (if (null (eq (coding-system-type buffer-file-coding-system) t))
13d5617d 569 ;; This is not `undecided'.
4ed46869
KH
570 (progn
571 (setq local-coding buffer-file-coding-system)
572 (while (symbolp (get local-coding 'coding-system))
573 (setq local-coding (get local-coding 'coding-system))))
574 )))
575
576 (if (and local-eol local-coding)
577 ;; The current buffer has already set full coding-system, we
578 ;; had better not change it.
579 nil
580
8057896b 581 (setq found-eol (coding-system-eol-type coding))
4ed46869
KH
582 (if (null (numberp found-eol))
583 ;; But eol-type is not found.
584 (setq found-eol nil))
585 (if (eq (coding-system-type coding) t)
13d5617d
KH
586 ;; This is `undecided', which means nothing found except
587 ;; for eol-type.
4ed46869
KH
588 (setq coding nil))
589
590 ;; The local setting takes precedence over the found one.
591 (setq new-coding (or local-coding coding))
592 (setq new-eol (or local-eol found-eol))
593 (if (and (numberp new-eol)
8057896b 594 (vectorp (coding-system-eol-type new-coding)))
4ed46869 595 (setq new-coding
8057896b 596 (aref (coding-system-eol-type new-coding) new-eol)))
4ed46869
KH
597 new-coding))))
598
a73a8c89
KH
599(defun make-unification-table (&rest args)
600 "Make a unification table (char table) from arguments.
13d5617d
KH
601Each argument is a list of the form (FROM . TO),
602where FROM is a character to be unified to TO.
603
604FROM can be a generic character (see make-char). In this case, TO is
605a generic character containing the same number of charcters or a
606oridinal character. If FROM and TO are both generic characters, all
607characters belonging to FROM are unified to characters belonging to TO
608without changing their position code(s)."
a73a8c89
KH
609 (let ((table (make-char-table 'character-unification-table))
610 revlist)
611 (while args
612 (let ((elts (car args)))
613 (while elts
13d5617d
KH
614 (let* ((from (car (car elts)))
615 (from-i 0) ; degree of freedom of FROM
616 (from-rev (nreverse (split-char from)))
617 (to (cdr (car elts)))
618 (to-i 0) ; degree of freedom of TO
619 (to-rev (nreverse (split-char to))))
620 ;; Check numbers of heading 0s in FROM-REV and TO-REV.
621 (while (eq (car from-rev) 0)
622 (setq from-i (1+ from-i) from-rev (cdr from-rev)))
623 (while (eq (car to-rev) 0)
624 (setq to-i (1+ to-i) to-rev (cdr to-rev)))
625 (if (and (/= from-i to-i) (/= to-i 0))
626 (error "Invalid character pair (%d . %d)" from to))
627 ;; If we have already unified TO to TO-ALT, FROM should
628 ;; also be unified to TO-ALT. But, this is only if TO is
629 ;; a generic character or TO-ALT is not a generic
630 ;; character.
631 (let ((to-alt (aref table to)))
632 (if (and to-alt
633 (or (> to-i 0) (not (generic-char-p to-alt))))
634 (setq to to-alt)))
635 (if (> from-i 0)
636 (set-char-table-default table from to)
637 (aset table from to))
a73a8c89
KH
638 ;; If we have already unified some chars to FROM, they
639 ;; should also be unified to TO.
640 (let ((l (assq from revlist)))
641 (if l
642 (let ((ch (car l)))
643 (setcar l to)
644 (setq l (cdr l))
645 (while l
646 (aset table ch to)
647 (setq l (cdr l)) ))))
648 ;; Now update REVLIST.
649 (let ((l (assq to revlist)))
650 (if l
651 (setcdr l (cons from (cdr l)))
652 (setq revlist (cons (list to from) revlist)))))
653 (setq elts (cdr elts))))
654 (setq args (cdr args)))
655 ;; Return TABLE just created.
656 table))
657
69eba008
KH
658;;; Initialize some variables.
659
660(put 'use-default-ascent 'char-table-extra-slots 0)
661(setq use-default-ascent (make-char-table 'use-default-ascent))
662
663;;;
4ed46869
KH
664(provide 'mule)
665
666;;; mule.el ends here