*** empty log message ***
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
07513d64 1;;; mule.el --- basic commands for multilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
8f924df7 4;; Licensed to the Free Software Foundation.
32db08f1 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 6;; Copyright (C) 2003
c1841772
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
4ed46869
KH
9
10;; Keywords: mule, multilingual, character set, coding system
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc
KH
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
4ed46869 28
60370d40
PJ
29;;; Commentary:
30
4ed46869
KH
31;;; Code:
32
8f924df7 33(defconst mule-version "6.0 (HANACHIRUSATO)" "\
4ed46869
KH
34Version number and name of this version of MULE (multilingual environment).")
35
8f924df7 36(defconst mule-version-date "2003.9.1" "\
4ed46869
KH
37Distribution date of this version of MULE (multilingual environment).")
38
c1841772
KH
39\f
40;;; CHARSET
41
6d2b6635
KH
42;; Backward compatibility code for handling emacs-mule charsets.
43(defvar private-char-area-1-min #xF0000)
44(defvar private-char-area-1-max #xFFFFE)
45(defvar private-char-area-2-min #x100000)
46(defvar private-char-area-2-max #x10FFFE)
47
48;; Table of emacs-mule charsets indexed by their emacs-mule ID.
49(defvar emacs-mule-charset-table (make-vector 256 nil))
50(aset emacs-mule-charset-table 0 'ascii)
51
52;; Convert the argument of old-style calll of define-charset to a
53;; property list used by the new-style.
54;; INFO-VECTOR is a vector of the format:
55;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
56;; SHORT-NAME LONG-NAME DESCRIPTION]
57
58(defun convert-define-charset-argument (emacs-mule-id info-vector)
59 (let* ((dim (aref info-vector 0))
60 (chars (aref info-vector 1))
61 (total (if (= dim 1) chars (* chars chars)))
62 (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
63 (if (= chars 96) [32 127 32 127] [33 126 33 126])))
64 code-offset)
65 (if (integerp emacs-mule-id)
66 (or (= emacs-mule-id 0)
67 (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
68 (error "Invalid CHARSET-ID: %d" emacs-mule-id))
69 (let (from-id to-id)
70 (if (= dim 1) (setq from-id 160 to-id 224)
71 (setq from-id 224 to-id 255))
72 (while (and (< from-id to-id)
73 (not (aref emacs-mule-charset-table from-id)))
74 (setq from-id (1+ from-id)))
75 (if (= from-id to-id)
76 (error "No more room for the new Emacs-mule charset"))
77 (setq emacs-mule-id from-id)))
78 (if (> (- private-char-area-1-max private-char-area-1-min) total)
79 (setq code-offset private-char-area-1-min
80 private-char-area-1-min (+ private-char-area-1-min total))
81 (if (> (- private-char-area-2-max private-char-area-2-min) total)
82 (setq code-offset private-char-area-2-min
83 private-char-area-2-min (+ private-char-area-2-min total))
84 (error "No more space for a new charset.")))
85 (list :dimension dim
86 :code-space code-space
87 :iso-final-char (aref info-vector 4)
88 :code-offset code-offset
89 :emacs-mule-id emacs-mule-id)))
90
c1841772
KH
91(defun define-charset (name docstring &rest props)
92 "Define NAME (symbol) as a charset with DOCSTRING.
93The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
07513d64 94may be any symbol. The following have special meanings, and one of
bec25acc 95`:code-offset', `:map', `:subset', `:superset' must be specified.
c1841772
KH
96
97`:short-name'
98
99VALUE must be a short string to identify the charset. If omitted,
100NAME is used.
101
102`:long-name'
103
104VALUE must be a string longer than `:short-name' to identify the
07513d64 105charset. If omitted, the value of the `:short-name' attribute is used.
c1841772
KH
106
107`:dimension'
108
109VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
07513d64
DL
110code-points of the charsets. If omitted, it is calculated from the
111value of the `:code-space' attribute.
c1841772
KH
112
113`:code-space'
114
115VALUE must be a vector of length at most 8 specifying the byte code
116range of each dimension in this format:
117 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
07513d64 118where MIN-N is the minimum byte value of Nth dimension of code-point,
c1841772
KH
119MAX-N is the maximum byte value of that.
120
b1a79461
KH
121`:min-code'
122
123VALUE must be an integer specifying the mininum code point of the
124charset. If omitted, it is calculated from `:code-space'. VALUE may
125be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
126the code point and LOW is the least significant 16 bits.
127
1f32125f 128`:max-code'
b1a79461
KH
129
130VALUE must be an integer specifying the maxinum code point of the
131charset. If omitted, it is calculated from `:code-space'. VALUE may
132be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
133the code point and LOW is the least significant 16 bits.
134
c1841772
KH
135`:iso-final-char'
136
137VALUE must be a character in the range 32 to 127 (inclusive)
138specifying the final char of the charset for ISO-2022 encoding. If
139omitted, the charset can't be encoded by ISO-2022 based
140coding-systems.
141
142`:iso-revision-number'
143
144VALUE must be an integer in the range 0..63, specifying the revision
145number of the charset for ISO-2022 encoding.
146
147`:emacs-mule-id'
148
6d2b6635 149VALUE must be an integer of 0, 129..255. If omitted, the charset
c1841772
KH
150can't be encoded by coding-systems of type `emacs-mule'.
151
152`:ascii-compatible-p'
153
07513d64
DL
154VALUE must be nil or t (default nil). If VALUE is t, the charset is
155compatible with ASCII, i.e. the first 128 code points map to ASCII.
c1841772
KH
156
157`:supplementary-p'
158
159VALUE must be nil or t. If the VALUE is t, the charset is
07513d64
DL
160supplementary, which means it is used only as a parent of some other
161charset.
c1841772
KH
162
163`:invalid-code'
164
165VALUE must be a nonnegative integer that can be used as an invalid
166code point of the charset. If the minimum code is 0 and the maximum
167code is greater than Emacs' maximum integer value, `:invalid-code'
168should not be omitted.
169
170`:code-offset'
171
07513d64
DL
172VALUE must be an integer added to the index number of a character to
173get the corresponding character code.
c1841772
KH
174
175`:map'
176
177VALUE must be vector or string.
178
179If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
180where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 181corresponding character code.
c1841772
KH
182
183If it is a string, it is a name of file that contains the above
3e4abc9e
KH
184information. Each line of the file must be this format:
185 0xXXX 0xYYY
186where XXX is a hexadecimal representation of CODE-n and YYY is a
187hexadecimal representation of CHAR-n. A line starting with `#' is a
188comment line.
c1841772 189
2c2a254f
KH
190`:subset'
191
192VALUE must be a list:
193 ( PARENT MIN-CODE MAX-CODE OFFSET )
194PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
195of characters inherited from the parent. OFFSET is an integer value
196to add to a code point of the parent charset to get the corresponding
197code point of this charset.
198
199`:superset'
c1841772
KH
200
201VALUE must be a list of parent charsets. The charset inherits
202characters from them. Each element of the list may be a cons (PARENT
203. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
2c2a254f
KH
204value to add to a code point of PARENT to get the corresponding code
205point of this charset.
c1841772
KH
206
207`:unify-map'
208
209VALUE must be vector or string.
210
211If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
212where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 213corresponding Unicode character code.
c1841772
KH
214
215If it is a string, it is a name of file that contains the above
3e4abc9e
KH
216information. The file format is the same as what described for `:map'
217attribute."
6d2b6635
KH
218 (when (vectorp (car props))
219 ;; Old style code:
220 ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
221 ;; Convert the argument to make it fit with the current style.
222 (let ((vec (car props)))
223 (setq props (convert-define-charset-argument name vec)
224 name docstring
225 docstring (aref vec 8))))
c1841772
KH
226 (let ((attrs (mapcar 'list '(:dimension
227 :code-space
b1a79461
KH
228 :min-code
229 :max-code
c1841772
KH
230 :iso-final-char
231 :iso-revision-number
232 :emacs-mule-id
233 :ascii-compatible-p
234 :supplementary-p
235 :invalid-code
236 :code-offset
237 :map
2c2a254f
KH
238 :subset
239 :superset
c1841772
KH
240 :unify-map
241 :plist))))
242
243 ;; If :dimension is omitted, get the dimension from :code-space.
244 (let ((dimension (plist-get props :dimension)))
245 (or dimension
c04e918c
KH
246 (let ((code-space (plist-get props :code-space)))
247 (setq dimension (if code-space (/ (length code-space) 2) 4))
c1841772
KH
248 (setq props (plist-put props :dimension dimension)))))
249
c04e918c
KH
250 (let ((code-space (plist-get props :code-space)))
251 (or code-space
252 (let ((dimension (plist-get props :dimension)))
253 (setq code-space (make-vector 8 0))
254 (dotimes (i dimension)
255 (aset code-space (1+ (* i 2)) #xFF))
256 (setq props (plist-put props :code-space code-space)))))
257
6d2b6635
KH
258 ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
259 (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
260 (if (integerp emacs-mule-id)
261 (aset emacs-mule-charset-table emacs-mule-id name)))
262
c1841772
KH
263 (dolist (slot attrs)
264 (setcdr slot (plist-get props (car slot))))
265
266 ;; Make sure that the value of :code-space is a vector of 8
267 ;; elements.
268 (let* ((slot (assq :code-space attrs))
269 (val (cdr slot))
270 (len (length val)))
271 (if (< len 8)
272 (setcdr slot
273 (vconcat val (make-vector (- 8 len) 0)))))
274
275 ;; Add :name and :docstring properties to PROPS.
276 (setq props
277 (cons :name (cons name (cons :docstring (cons docstring props)))))
278 (or (plist-get props :short-name)
279 (plist-put props :short-name (symbol-name name)))
280 (or (plist-get props :long-name)
281 (plist-put props :long-name (plist-get props :short-name)))
e1e529fa
DL
282 ;; We can probably get a worthwhile amount in purespace.
283 (setq props
284 (mapcar (lambda (elt)
285 (if (stringp elt)
286 (purecopy elt)
287 elt))
288 props))
c1841772
KH
289 (setcdr (assq :plist attrs) props)
290
291 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
292
293
4ed46869 294(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
295 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
296The file contents are decoded before evaluation if necessary.
4ed46869
KH
297If optional second arg NOERROR is non-nil,
298 report no error if FILE doesn't exist.
299Print messages at start and end of loading unless
300 optional third arg NOMESSAGE is non-nil.
301Return t if file exists."
302 (if (null (file-readable-p fullname))
303 (and (null noerror)
304 (signal 'file-error (list "Cannot open load file" file)))
305 ;; Read file with code conversion, and then eval.
306 (let* ((buffer
307 ;; To avoid any autoloading, set default-major-mode to
308 ;; fundamental-mode.
88162676
RS
309 ;; So that we don't get completely screwed if the
310 ;; file is encoded in some complicated character set,
311 ;; read it with real decoding, as a multibyte buffer,
312 ;; even if this is a --unibyte Emacs session.
313 (let ((default-major-mode 'fundamental-mode)
314 (default-enable-multibyte-characters t))
4ed46869
KH
315 ;; We can't use `generate-new-buffer' because files.el
316 ;; is not yet loaded.
317 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
318 (load-in-progress t)
319 (source (save-match-data (string-match "\\.el\\'" fullname))))
320 (unless nomessage
321 (if source
322 (message "Loading %s (source)..." file)
323 (message "Loading %s..." file)))
324 (when purify-flag
4c86cca0 325 (push file preloaded-file-list))
4ed46869 326 (unwind-protect
a6acd8a2 327 (let ((load-file-name fullname)
1c4cc63a 328 (set-auto-coding-for-load t)
a6acd8a2 329 (inhibit-file-name-operation nil))
4ed46869
KH
330 (save-excursion
331 (set-buffer buffer)
332 (insert-file-contents fullname)
7d276780
EZ
333 ;; If the loaded file was inserted with no-conversion or
334 ;; raw-text coding system, make the buffer unibyte.
335 ;; Otherwise, eval-buffer might try to interpret random
336 ;; binary junk as multibyte characters.
337 (if (and enable-multibyte-characters
8f924df7
KH
338 (or (eq (coding-system-type last-coding-system-used)
339 'raw-text)))
7d276780 340 (set-buffer-multibyte nil))
4ed46869
KH
341 ;; Make `kill-buffer' quiet.
342 (set-buffer-modified-p nil))
0f69cb38 343 ;; Have the original buffer current while we eval.
88162676
RS
344 (eval-buffer buffer nil file
345 ;; If this Emacs is running with --unibyte,
346 ;; convert multibyte strings to unibyte
347 ;; after reading them.
ba74e833 348;; (not default-enable-multibyte-characters)
8dd08b5b 349 nil t
ba74e833 350 ))
cfc70cdf
RS
351 (let (kill-buffer-hook kill-buffer-query-functions)
352 (kill-buffer buffer)))
4ed46869 353 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
354 (when hook
355 (mapcar (function eval) (cdr hook))))
356 (unless (or nomessage noninteractive)
357 (if source
358 (message "Loading %s (source)...done" file)
359 (message "Loading %s...done" file)))
4ed46869
KH
360 t)))
361
8f924df7 362(defun charset-info (charset)
4ed46869 363 "Return a vector of information of CHARSET.
8f924df7 364This function is provided for backward compatibility.
4ed46869 365
4ed46869
KH
366The elements of the vector are:
367 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
368 LEADING-CODE-BASE, LEADING-CODE-EXT,
369 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
370 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
8f924df7 371 PLIST.
4ed46869 372where
8f924df7
KH
373CHARSET-ID is always 0.
374BYTES is always 0.
375DIMENSION is the number of bytes of a code-point of the charset:
376 1, 2, 3, or 4.
377CHARS is the number of characters in a dimension:
378 94, 96, 128, or 256.
379WIDTH is always 0.
380DIRECTION is always 0.
381LEADING-CODE-BASE is always 0.
382LEADING-CODE-EXT is always 0.
4ed46869 383ISO-FINAL-CHAR (character) is the final character of the
7dd4c92d
KH
384 corresponding ISO 2022 charset. If the charset is not assigned
385 any final character, the value is -1.
8f924df7
KH
386ISO-GRAPHIC-PLANE is always 0.
387REVERSE-CHARSET is always -1.
4ed46869
KH
388SHORT-NAME (string) is the short name to refer to the charset.
389LONG-NAME (string) is the long name to refer to the charset
390DESCRIPTION (string) is the description string of the charset.
391PLIST (property list) may contain any type of information a user
392 want to put and get by functions `put-charset-property' and
393 `get-charset-property' respectively."
8f924df7
KH
394 (vector 0
395 0
396 (charset-dimension charset)
397 (charset-chars charset)
398 0
399 0
400 0
401 0
402 (charset-iso-final-char charset)
403 0
404 -1
405 (get-charset-property charset :short-name)
406 (get-charset-property charset :short-name)
407 (charset-description charset)
408 (charset-plist charset)))
4ed46869 409
40c81f74
PE
410;; It is better not to use backquote in this file,
411;; because that makes a bootstrapping problem
412;; if you need to recompile all the Lisp files using interpreted code.
413
8f924df7
KH
414(defun charset-id (charset)
415 "Always return 0. This is provided for backward compatibility."
416 0)
4ed46869
KH
417
418(defmacro charset-bytes (charset)
8f924df7
KH
419 "Always return 0. This is provided for backward compatibility."
420 0)
c1841772
KH
421
422(defun get-charset-property (charset propname)
423 "Return the value of CHARSET's PROPNAME property.
424This is the last value stored with
425 (put-charset-property CHARSET PROPNAME VALUE)."
426 (plist-get (charset-plist charset) propname))
427
428(defun put-charset-property (charset propname value)
1f32125f 429 "Set CHARSETS's PROPNAME property to value VALUE.
c1841772
KH
430It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
431 (set-charset-plist charset
432 (plist-put (charset-plist charset) propname value)))
433
c1841772
KH
434(defun charset-description (charset)
435 "Return description string of CHARSET."
436 (plist-get (charset-plist charset) :docstring))
437
438(defun charset-dimension (charset)
12504f57 439 "Return dimension of CHARSET."
c1841772
KH
440 (plist-get (charset-plist charset) :dimension))
441
346a8d64 442(defun charset-chars (charset &optional dimension)
12504f57 443 "Return number of characters contained in DIMENSION of CHARSET.
346a8d64
DL
444DIMENSION defaults to the first dimension."
445 (unless dimension (setq dimension 1))
103cc921 446 (let ((code-space (plist-get (charset-plist charset) :code-space)))
346a8d64
DL
447 (1+ (- (aref code-space (1- (* 2 dimension)))
448 (aref code-space (- (* 2 dimension) 2))))))
c1841772
KH
449
450(defun charset-iso-final-char (charset)
1d839a14
DL
451 "Return ISO-2022 final character of CHARSET.
452Return -1 if charset isn't an ISO 2022 one."
c1841772
KH
453 (or (plist-get (charset-plist charset) :iso-final-char)
454 -1))
4ed46869
KH
455
456(defmacro charset-short-name (charset)
c1841772
KH
457 "Return short name of CHARSET."
458 (plist-get (charset-plist charset) :short-name))
4ed46869
KH
459
460(defmacro charset-long-name (charset)
c1841772
KH
461 "Return long name of CHARSET."
462 (plist-get (charset-plist charset) :long-name))
4ed46869 463
d3675a42 464(defun charset-list ()
12504f57 465 "Return list of all charsets ever defined.
900dc6e3 466
d3675a42 467This function is provided for backward compatibility.
900dc6e3 468Now we have the variable `charset-list'."
d3675a42 469 charset-list)
07513d64 470(make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
d3675a42 471
6d2b6635
KH
472\f
473;;; CHARACTER
474(defalias 'char-valid-p 'characterp)
475(make-obsolete 'char-valid-p 'characterp "22.1")
476
c1841772 477(defun generic-char-p (char)
8f924df7 478 "Always return nil. This is provided for backward compatibility."
c1841772 479 nil)
07513d64 480(make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
6d2b6635
KH
481
482(defun make-char-internal (charset-id &optional code1 code2)
483 (let ((charset (aref emacs-mule-charset-table charset-id)))
484 (or charset
485 (error "Invalid Emacs-mule charset ID: %d" charset-id))
486 (make-char charset code1 code2)))
0269ddfb 487\f
e76938e7 488;; Coding system stuff
4ed46869 489
c1841772
KH
490;; Coding system is a symbol that has been defined by the function
491;; `define-coding-system'.
4ed46869 492
c1841772
KH
493(defconst coding-system-iso-2022-flags
494 '(long-form
495 ascii-at-eol
496 ascii-at-cntl
497 7-bit
498 locking-shift
499 single-shift
500 designation
501 revision
502 direction
503 init-at-bol
504 designate-at-bol
505 safe
506 latin-extra
507 composition
3ed58a15
KH
508 euc-tw-shift
509 use-roman
510 use-oldjis)
c1841772 511 "List of symbols that control ISO-2022 encoder/decoder.
4ed46869 512
12504f57 513The value of the `:flags' attribute in the argument of the function
caa7db3a 514`define-coding-system' must be one of them.
4ed46869 515
c1841772
KH
516If `long-form' is specified, use a long designation sequence on
517encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
518and `japanese-jisx0208'. The long designation sequence doesn't
12504f57 519conform to ISO 2022, but is used by such coding systems as
c1841772
KH
520`compound-text'.
521
522If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
523on encoding.
524
525If `ascii-at-cntl' is specified, designate ASCII to g0 before control
526codes and SPC on encoding.
527
528If `7-bit' is specified, use 7-bit code only on encoding.
529
530If `locking-shift' is specified, decode locking-shift code correctly
531on decoding, and use locking-shift to invoke a graphic element on
532encoding.
533
534If `single-shift' is specified, decode single-shift code correctly on
535decoding, and use single-shift to invoke a graphic element on encoding.
536
537If `designation' is specified, decode designation code correctly on
538decoding, and use designation to designate a charset to a graphic
539element on encoding.
540
541If `revision' is specified, produce an escape sequence to specify
542revision number of a charset on encoding. Such an escape sequence is
543always correctly decoded on decoding.
544
545If `direction' is specified, decode ISO6429's code for specifying
12504f57 546direction correctly, and produce the code on encoding.
c1841772
KH
547
548If `init-at-bol' is specified, on encoding, it is assumed that
549invocation and designation statuses are reset at each beginning of
12504f57 550line even if `ascii-at-eol' is not specified; thus no codes for
c1841772
KH
551resetting them are produced.
552
553If `safe' is specified, on encoding, characters not supported by a
554coding are replaced with `?'.
555
12504f57 556If `latin-extra' is specified, the code-detection routine assumes that a
c1841772
KH
557code specified in `latin-extra-code-table' (which see) is valid.
558
559If `composition' is specified, an escape sequence to specify
12504f57 560composition sequence is correctly decoded on decoding, and is produced
c1841772
KH
561on encoding.
562
563If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
12504f57 564correctly decoded on decoding, and is produced on encoding.
c1841772 565
12504f57
DL
566If `use-roman' is specified, JIS0201-1976-Roman is designated instead
567of ASCII.
568
569If `use-oldjis' is specified, JIS0208-1976 is designated instead of
570JIS0208-1983.")
571
c1841772 572(defun define-coding-system (name docstring &rest props)
12504f57 573 "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
c1841772
KH
574The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
575may be any symbol.
576
12504f57
DL
577The following attributes have special meanings. Those labeled as
578\"(required)\", should not be omitted.
c1841772
KH
579
580`:mnemonic' (required)
581
582VALUE is a character to display on mode line for the coding system.
583
584`:coding-type' (required)
585
586VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
1bfd603c 587`emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
c1841772 588
12504f57 589`:eol-type'
c1841772 590
12504f57 591VALUE is the EOL (end-of-line) format of the coding system. It must be
c1841772
KH
592one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
593\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
594and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
12504f57 595decoding by the coding system, Emacs automatically detects the EOL
c1841772
KH
596format of the source text.
597
736345cb 598`:charset-list'
c1841772
KH
599
600VALUE must be a list of charsets supported by the coding system. On
601encoding by the coding system, if a character belongs to multiple
602charsets in the list, a charset that comes earlier in the list is
736345cb
KH
603selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
604which indicates that the coding system supports all ISO-2022 based
605charsets. If `:coding-type' is `emacs-mule', VALUE may be
606`emacs-mule', which indicates that the coding system supports all
1d839a14 607charsets that have the `:emacs-mule-id' property.
c1841772 608
12504f57 609`:ascii-compatible-p'
c1841772
KH
610
611If VALUE is non-nil, the coding system decodes all 7-bit bytes into
07513d64 612the corresponding ASCII characters, and encodes all ASCII characters
12504f57 613back to the corresponding 7-bit bytes. VALUE defaults to nil.
c1841772 614
12504f57 615`:decode-translation-table'
c1841772
KH
616
617VALUE must be a translation table to use on decoding.
618
12504f57 619`:encode-translation-table'
c1841772
KH
620
621VALUE must be a translation table to use on encoding.
622
12504f57 623`:post-read-conversion'
c1841772
KH
624
625VALUE must be a function to call after some text is inserted and
626decoded by the coding system itself and before any functions in
627`after-insert-functions' are called. The arguments to this function
12504f57
DL
628are the same as those of a function in `after-insert-file-functions',
629i.e. LENGTH of the text to be decoded with point at the head of it,
630and the function should leave point unchanged.
c1841772
KH
631
632`:pre-write-conversion'
633
634VALUE must be a function to call after all functions in
635`write-region-annotate-functions' and `buffer-file-format' are called,
636and before the text is encoded by the coding system itself. The
12504f57
DL
637arguments to this function are the same as those of a function in
638`write-region-annotate-functions'.
c1841772
KH
639
640`:default-char'
641
642VALUE must be a character. On encoding, a character not supported by
643the coding system is replaced with VALUE.
644
8f924df7
KH
645`:for-unibyte'
646
647VALUE non-nil means that visiting a file with the coding system
648results in a unibyte buffer.
649
c1841772
KH
650`:eol-type'
651
652VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
653EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
12504f57
DL
654EOL (CR). If omitted, on decoding, the coding system detects EOL
655format automatically, and on encoding, uses Unix-like EOL.
c1841772
KH
656
657`:mime-charset'
658
12504f57
DL
659VALUE must be a symbol whose name is that of a MIME charset converted
660to lower case.
c1841772 661
1bfd603c
DL
662`:mime-text-unsuitable'
663
664VALUE non-nil means the `:mime-charset' property names a charset which
1894d108 665is unsuitable for the top-level media type \"text\".
1bfd603c 666
c1841772
KH
667`:flags'
668
12504f57
DL
669VALUE must be a list of symbols that control the ISO-2022 converter.
670Each must be a member of the list `coding-system-iso-2022-flags'
c1841772
KH
671\(which see). This attribute has a meaning only when `:coding-type'
672is `iso-2022'.
673
674`:designation'
675
12504f57 676VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
c1841772
KH
677GN-USAGE specifies the usage of graphic register GN as follows.
678
679If it is nil, no charset can be designated to GN.
680
07513d64 681If it is a charset, the charset is initially designated to GN, and
c1841772
KH
682never used by the other charsets.
683
684If it is a list, the elements must be charsets, nil, 94, or 96. GN
12504f57
DL
685can be used by all the listed charsets. If the list contains 94, any
686iso-2022 charset whose code-space ranges are 94 long can be designated
687to GN. If the list contains 96, any charsets whose whose ranges are
68896 long can be designated to GN. If the first element is a charset,
689that charset is initially designated to GN.
c1841772
KH
690
691This attribute has a meaning only when `:coding-type' is `iso-2022'.
692
693`:bom'
694
12504f57
DL
695This attributes specifies whether the coding system uses a `byte order
696mark'. VALUE must nil, t, or cons of coding systems whose
697`:coding-type' is `utf-16'.
c1841772 698
0ea1a6ca
KH
699If the value is nil, on decoding, don't treat the first two-byte as
700BOM, and on encoding, don't produce BOM bytes.
701
702If the value is t, on decoding, skip the first two-byte as BOM, and on
703encoding, produce BOM bytes accoding to the value of `:endian'.
704
705If the value is cons, on decoding, check the first two-byte. If theyq
706are 0xFE 0xFF, use the car part coding system of the value. If they
707are 0xFF 0xFE, use the car part coding system of the value.
708Otherwise, treat them as bytes for a normal character. On encoding,
709produce BOM bytes accoding to the value of `:endian'.
710
c1841772
KH
711This attribute has a meaning only when `:coding-type' is `utf-16'.
712
713`:endian'
714
0ea1a6ca
KH
715VALUE must be `big' or `little' specifying big-endian and
716little-endian respectively. The default value is `big'.
c1841772
KH
717
718This attribute has a meaning only when `:coding-type' is `utf-16'.
719
720`:ccl-decoder'
721
12504f57
DL
722VALUE is a symbol representing the registered CCL program used for
723decoding. This attribute has a meaning only when `:coding-type' is
724`ccl'.
c1841772
KH
725
726`:ccl-encoder'
727
12504f57
DL
728VALUE is a symbol representing the registered CCL program used for
729encoding. This attribute has a meaning only when `:coding-type' is
730`ccl'."
c1841772
KH
731 (let* ((common-attrs (mapcar 'list
732 '(:mnemonic
733 :coding-type
734 :charset-list
735 :ascii-compatible-p
1a9db556 736 :decode-translation-table
c1841772
KH
737 :encode-translation-table
738 :post-read-conversion
739 :pre-write-conversion
740 :default-char
7e742024 741 :for-unibyte
c1841772
KH
742 :plist
743 :eol-type)))
744 (coding-type (plist-get props :coding-type))
745 (spec-attrs (mapcar 'list
746 (cond ((eq coding-type 'iso-2022)
747 '(:initial
748 :reg-usage
749 :request
750 :flags))
751 ((eq coding-type 'utf-16)
752 '(:bom
753 :endian))
754 ((eq coding-type 'ccl)
755 '(:ccl-decoder
756 :ccl-encoder
757 :valids))))))
758
759 (dolist (slot common-attrs)
760 (setcdr slot (plist-get props (car slot))))
761
762 (dolist (slot spec-attrs)
763 (setcdr slot (plist-get props (car slot))))
764
765 (if (eq coding-type 'iso-2022)
766 (let ((designation (plist-get props :designation))
767 (flags (plist-get props :flags))
768 (initial (make-vector 4 nil))
769 (reg-usage (cons 4 4))
770 request elt)
771 (dotimes (i 4)
772 (setq elt (aref designation i))
773 (cond ((charsetp elt)
774 (aset initial i elt)
775 (setq request (cons (cons elt i) request)))
776 ((consp elt)
777 (aset initial i (car elt))
778 (if (charsetp (car elt))
779 (setq request (cons (cons (car elt) i) request)))
780 (dolist (e (cdr elt))
781 (cond ((charsetp e)
782 (setq request (cons (cons e i) request)))
783 ((eq e 94)
784 (setcar reg-usage i))
785 ((eq e 96)
786 (setcdr reg-usage i))
787 ((eq e t)
788 (setcar reg-usage i)
789 (setcdr reg-usage i)))))))
790 (setcdr (assq :initial spec-attrs) initial)
791 (setcdr (assq :reg-usage spec-attrs) reg-usage)
792 (setcdr (assq :request spec-attrs) request)
793
794 ;; Change :flags value from a list to a bit-mask.
795 (let ((bits 0)
796 (i 0))
797 (dolist (elt coding-system-iso-2022-flags)
798 (if (memq elt flags)
799 (setq bits (logior bits (lsh 1 i))))
800 (setq i (1+ i)))
801 (setcdr (assq :flags spec-attrs) bits))))
802
803 ;; Add :name and :docstring properties to PROPS.
804 (setq props
e1e529fa
DL
805 (cons :name (cons name (cons :docstring (cons (purecopy docstring)
806 props)))))
c1841772 807 (setcdr (assq :plist common-attrs) props)
c1841772
KH
808 (apply 'define-coding-system-internal
809 name (mapcar 'cdr (append common-attrs spec-attrs)))))
4ed46869 810
8057896b 811(defun coding-system-doc-string (coding-system)
0269ddfb 812 "Return the documentation string for CODING-SYSTEM."
c1841772 813 (plist-get (coding-system-plist coding-system) :docstring))
4ed46869 814
4ed46869 815(defun coding-system-mnemonic (coding-system)
0269ddfb 816 "Return the mnemonic character of CODING-SYSTEM.
12504f57
DL
817The mnemonic character of a coding system is used in mode line to
818indicate the coding system. If CODING-SYSTEM. is nil, return ?=."
c1841772 819 (plist-get (coding-system-plist coding-system) :mnemonic))
4ed46869 820
c1841772
KH
821(defun coding-system-type (coding-system)
822 "Return the coding type of CODING-SYSTEM.
823A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
824See the function `define-coding-system' for more detail."
825 (plist-get (coding-system-plist coding-system) :coding-type))
d3675a42 826
c1841772 827(defun coding-system-charset-list (coding-system)
07513d64 828 "Return list of charsets supported by CODING-SYSTEM.
c1841772
KH
829If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
830If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
831 (plist-get (coding-system-plist coding-system) :charset-list))
0269ddfb 832
2f1e746b
KH
833(defun coding-system-category (coding-system)
834 "Return a category symbol of CODING-SYSTEM."
835 (plist-get (coding-system-plist coding-system) :category))
0269ddfb
KH
836
837(defun coding-system-get (coding-system prop)
07513d64
DL
838 "Extract a value from CODING-SYSTEM's property list for property PROP.
839For compatibility with Emacs 20/21, this accepts old-style symbols
840like `mime-charset' as well as the current style like `:mime-charset'."
841 (or (plist-get (coding-system-plist coding-system) prop)
842 (if (not (keywordp prop))
843 (plist-get (coding-system-plist coding-system)
844 (intern (concat ":" (symbol-name prop)))))))
0269ddfb 845
0269ddfb 846(defalias 'coding-system-parent 'coding-system-base)
2598a293 847(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb 848
2e729bfa
JB
849(defun coding-system-eol-type-mnemonic (coding-system)
850 "Return the string indicating end-of-line format of CODING-SYSTEM."
851 (let* ((eol-type (coding-system-eol-type coding-system))
f4f00827 852 (val (cond ((eq eol-type 0) eol-mnemonic-unix)
2e729bfa
JB
853 ((eq eol-type 1) eol-mnemonic-dos)
854 ((eq eol-type 2) eol-mnemonic-mac)
f4f00827 855 (t eol-mnemonic-undecided))))
2e729bfa
JB
856 (if (stringp val)
857 val
858 (char-to-string val))))
859
857ea15c
AS
860(defun coding-system-lessp (x y)
861 (cond ((eq x 'no-conversion) t)
862 ((eq y 'no-conversion) nil)
863 ((eq x 'emacs-mule) t)
864 ((eq y 'emacs-mule) nil)
865 ((eq x 'undecided) t)
866 ((eq y 'undecided) nil)
867 (t (let ((c1 (coding-system-mnemonic x))
868 (c2 (coding-system-mnemonic y)))
869 (or (< (downcase c1) (downcase c2))
870 (and (not (> (downcase c1) (downcase c2)))
871 (< c1 c2)))))))
872
857ea15c 873(defun add-to-coding-system-list (coding-system)
521d4010 874 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
857ea15c
AS
875 (if (or (null coding-system-list)
876 (coding-system-lessp coding-system (car coding-system-list)))
877 (setq coding-system-list (cons coding-system coding-system-list))
878 (let ((len (length coding-system-list))
879 mid (tem coding-system-list))
880 (while (> len 1)
881 (setq mid (nthcdr (/ len 2) tem))
882 (if (coding-system-lessp (car mid) coding-system)
883 (setq tem mid
884 len (- len (/ len 2)))
885 (setq len (/ len 2))))
886 (setcdr tem (cons coding-system (cdr tem))))))
887
80a7463d 888(defun coding-system-list (&optional base-only)
c11a8f77 889 "Return a list of all existing non-subsidiary coding systems.
12504f57
DL
890If optional arg BASE-ONLY is non-nil, only base coding systems are
891listed. The value doesn't include subsidiary coding systems which are
c11a8f77
KH
892made from bases and aliases automatically for various end-of-line
893formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
894 (let* ((codings (copy-sequence coding-system-list))
895 (tail (cons nil codings)))
896 ;; Remove subsidiary coding systems (eol variants) and alias
897 ;; coding systems (if necessary).
898 (while (cdr tail)
899 (let* ((coding (car (cdr tail)))
687441de 900 (aliases (coding-system-aliases coding)))
80a7463d
KH
901 (if (or
902 ;; CODING is an eol variant if not in ALIASES.
903 (not (memq coding aliases))
904 ;; CODING is an alias if it is not car of ALIASES.
905 (and base-only (not (eq coding (car aliases)))))
906 (setcdr tail (cdr (cdr tail)))
907 (setq tail (cdr tail)))))
908 codings))
909
620956ca
KH
910(defconst char-coding-system-table nil
911 "This is an obsolete variable.
912It exists just for backward compatibility, and the value is always nil.")
c11a8f77 913
50c29104
KH
914(defun transform-make-coding-system-args (name type &optional doc-string props)
915 "For internal use only.
916Transform XEmacs style args for `make-coding-system' to Emacs style.
917Value is a list of transformed arguments."
918 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
919 (eol-type (plist-get props 'eol-type))
920 properties tmp)
921 (cond
922 ((eq eol-type 'lf) (setq eol-type 'unix))
923 ((eq eol-type 'crlf) (setq eol-type 'dos))
924 ((eq eol-type 'cr) (setq eol-type 'mac)))
925 (if (setq tmp (plist-get props 'post-read-conversion))
926 (setq properties (plist-put properties 'post-read-conversion tmp)))
927 (if (setq tmp (plist-get props 'pre-write-conversion))
928 (setq properties (plist-put properties 'pre-write-conversion tmp)))
929 (cond
f4a012a6
KH
930 ((eq type 'shift-jis)
931 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
932 ((eq type 'iso2022) ; This is not perfect.
933 (if (plist-get props 'escape-quoted)
934 (error "escape-quoted is not supported: %S"
935 `(,name ,type ,doc-string ,props)))
936 (let ((g0 (plist-get props 'charset-g0))
937 (g1 (plist-get props 'charset-g1))
938 (g2 (plist-get props 'charset-g2))
939 (g3 (plist-get props 'charset-g3))
940 (use-roman
941 (and
942 (eq (cadr (assoc 'latin-jisx0201
943 (plist-get props 'input-charset-conversion)))
944 'ascii)
945 (eq (cadr (assoc 'ascii
946 (plist-get props 'output-charset-conversion)))
947 'latin-jisx0201)))
948 (use-oldjis
949 (and
950 (eq (cadr (assoc 'japanese-jisx0208-1978
951 (plist-get props 'input-charset-conversion)))
952 'japanese-jisx0208)
953 (eq (cadr (assoc 'japanese-jisx0208
954 (plist-get props 'output-charset-conversion)))
955 'japanese-jisx0208-1978))))
956 (if (charsetp g0)
957 (if (plist-get props 'force-g0-on-output)
958 (setq g0 `(nil ,g0))
959 (setq g0 `(,g0 t))))
960 (if (charsetp g1)
961 (if (plist-get props 'force-g1-on-output)
962 (setq g1 `(nil ,g1))
963 (setq g1 `(,g1 t))))
964 (if (charsetp g2)
965 (if (plist-get props 'force-g2-on-output)
966 (setq g2 `(nil ,g2))
967 (setq g2 `(,g2 t))))
968 (if (charsetp g3)
969 (if (plist-get props 'force-g3-on-output)
970 (setq g3 `(nil ,g3))
971 (setq g3 `(,g3 t))))
972 `(,name 2 ,mnemonic ,doc-string
973 (,g0 ,g1 ,g2 ,g3
974 ,(plist-get props 'short)
975 ,(not (plist-get props 'no-ascii-eol))
976 ,(not (plist-get props 'no-ascii-cntl))
977 ,(plist-get props 'seven)
978 t
979 ,(not (plist-get props 'lock-shift))
980 ,use-roman
981 ,use-oldjis
982 ,(plist-get props 'no-iso6429)
983 nil nil nil nil)
984 ,properties ,eol-type)))
985 ((eq type 'big5)
986 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
50c29104 987 ((eq type 'ccl)
f4a012a6 988 `(,name 4 ,mnemonic ,doc-string
50c29104 989 (,(plist-get props 'decode) . ,(plist-get props 'encode))
f4a012a6 990 ,properties ,eol-type))
50c29104 991 (t
f4a012a6 992 (error "unsupported XEmacs style make-coding-style arguments: %S"
50c29104
KH
993 `(,name ,type ,doc-string ,props))))))
994
8057896b 995(defun make-coding-system (coding-system type mnemonic doc-string
1b46a680
KH
996 &optional
997 flags
998 properties
999 eol-type)
3bb1accb 1000 "Define a new coding system CODING-SYSTEM (symbol).
8f924df7
KH
1001This function is provided for backward compatibility.
1002Use `define-coding-system' instead."
50c29104 1003 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
c3d0ee51
EZ
1004 ;; is a symbol, perhaps, this function is called with XEmacs-style
1005 ;; arguments. Here, try to transform that kind of arguments to
50c29104
KH
1006 ;; Emacs style.
1007 (if (symbolp type)
1008 (let ((args (transform-make-coding-system-args coding-system type
1009 mnemonic doc-string)))
1010 (setq coding-system (car args)
1053cc93 1011 type (nth 1 args)
50c29104
KH
1012 mnemonic (nth 2 args)
1013 doc-string (nth 3 args)
1014 flags (nth 4 args)
1015 properties (nth 5 args)
1016 eol-type (nth 6 args))))
1017
8f924df7
KH
1018 (setq type
1019 (cond ((eq type 0) 'emacs-mule)
1020 ((eq type 1) 'shift-jis)
1021 ((eq type 2) 'iso2022)
1022 ((eq type 3) 'big5)
1023 ((eq type 4) 'ccl)
1024 ((eq type 5) 'raw-text)
1b46a680 1025 (t
8f924df7
KH
1026 (error "Invalid coding system type: %s" type))))
1027
1028 (setq properties
1029 (let ((plist nil) key)
1030 (dolist (elt properties)
1031 (setq key (car elt))
1032 (cond ((eq key 'post-read-conversion)
1033 (setq key :post-read-conversion))
1034 ((eq key 'pre-write-conversion)
1035 (setq key :pre-write-conversion))
1036 ((eq key 'translation-table-for-decode)
1037 (setq key :decode-translation-table))
1038 ((eq key 'translation-table-for-encode)
1039 (setq key :encode-translation-table))
1040 ((eq key 'safe-charsets)
1041 (setq key :charset-list))
1042 ((eq key 'mime-charset)
1043 (setq key :mime-charset))
1044 ((eq key 'valid-codes)
1045 (setq key :valids)))
1046 (setq plist (plist-put plist key (cdr elt))))
1047 plist))
28380f17 1048 (setq properties (plist-put properties :mnemonic mnemonic))
8f924df7
KH
1049 (plist-put properties :coding-type type)
1050 (cond ((eq eol-type 0) (setq eol-type 'unix))
1051 ((eq eol-type 1) (setq eol-type 'dos))
1052 ((eq eol-type 2) (setq eol-type 'mac))
1053 ((vectorp eol-type) (setq eol-type nil)))
1054 (plist-put properties :eol-type eol-type)
1055
1056 (cond
1057 ((eq type 'iso2022)
1058 (plist-put properties :flags
1059 (list (and (or (consp (nth 0 flags))
1060 (consp (nth 1 flags))
1061 (consp (nth 2 flags))
1062 (consp (nth 3 flags))) 'designation)
1063 (or (nth 4 flags) 'long-form)
1064 (and (nth 5 flags) 'ascii-at-eol)
1065 (and (nth 6 flags) 'ascii-at-cntl)
1066 (and (nth 7 flags) '7-bit)
1067 (and (nth 8 flags) 'locking-shift)
1068 (and (nth 9 flags) 'single-shift)
1069 (and (nth 10 flags) 'use-roman)
1070 (and (nth 11 flags) 'use-oldjis)
1071 (or (nth 12 flags) 'direction)
1072 (and (nth 13 flags) 'init-at-bol)
1073 (and (nth 14 flags) 'designate-at-bol)
1074 (and (nth 15 flags) 'safe)
1075 (and (nth 16 flags) 'latin-extra)))
1076 (plist-put properties :designation
1077 (let ((vec (make-vector 4 nil)))
1078 (dotimes (i 4)
1079 (let ((spec (nth i flags)))
1080 (if (eq spec t)
1081 (aset vec i '(94 96))
1082 (if (consp spec)
1083 (progn
1084 (if (memq t spec)
1085 (setq spec (append (delq t spec) '(94 96))))
1086 (aset vec i spec))))))
1087 vec)))
1088
1089 ((eq type 'ccl)
1090 (plist-put properties :ccl-decoder (car flags))
1091 (plist-put properties :ccl-encoder (cdr flags))))
1092
1093 (apply 'define-coding-system coding-system doc-string properties))
4ed46869 1094
bbdea948
RS
1095(defun merge-coding-systems (first second)
1096 "Fill in any unspecified aspects of coding system FIRST from SECOND.
1097Return the resulting coding system."
1098 (let ((base (coding-system-base second))
1099 (eol (coding-system-eol-type second)))
1100 ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1101 (if (eq (coding-system-base first) 'undecided)
1102 (setq first (coding-system-change-text-conversion first base)))
1103 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1104 (if (and (vectorp (coding-system-eol-type first))
1105 (numberp eol) (>= eol 0) (<= eol 2))
1106 (setq first (coding-system-change-eol-conversion
1107 first eol)))
1108 first))
1109
4ed46869 1110(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
1111 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1112This means that when you save the buffer, it will be converted
1113according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1114use \\[list-coding-systems].
1115
bbdea948
RS
1116If CODING-SYSTEM leaves the text conversion unspecified, or if it
1117leaves the end-of-line conversion unspecified, FORCE controls what to
1118do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1119buffer's previous `buffer-file-coding-system' value (if it is
463f5630 1120specified there). Otherwise, levae it unspecified.
aeef8f07
KH
1121
1122This marks the buffer modified so that the succeeding \\[save-buffer]
1123surely saves the buffer with CODING-SYSTEM. From a program, if you
1124don't want to mark the buffer modified, just set the variable
1125`buffer-file-coding-system' directly."
bbdea948 1126 (interactive "zCoding system for saving file (default, nil): \nP")
4ed46869 1127 (check-coding-system coding-system)
36d455c4 1128 (if (and coding-system buffer-file-coding-system (null force))
bbdea948
RS
1129 (setq coding-system
1130 (merge-coding-systems coding-system buffer-file-coding-system)))
4ed46869 1131 (setq buffer-file-coding-system coding-system)
38a1356d
RS
1132 ;; This is in case of an explicit call. Normally, `normal-mode' and
1133 ;; `set-buffer-major-mode-hook' take care of setting the table.
1134 (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
1135 (ucs-set-table-for-input))
4ed46869
KH
1136 (set-buffer-modified-p t)
1137 (force-mode-line-update))
1138
bbdea948
RS
1139(defun revert-buffer-with-coding-system (coding-system &optional force)
1140 "Visit the current buffer's file again using coding system CODING-SYSTEM.
1141For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1142
1143If CODING-SYSTEM leaves the text conversion unspecified, or if it
1144leaves the end-of-line conversion unspecified, FORCE controls what to
1145do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1146buffer's previous `buffer-file-coding-system' value (if it is
1147specified there). Otherwise, determine it from the file contents as
1148usual for visiting a file."
1149 (interactive "zCoding system for visited file (default, nil): \nP")
1150 (check-coding-system coding-system)
1151 (if (and coding-system buffer-file-coding-system (null force))
1152 (setq coding-system
1153 (merge-coding-systems coding-system buffer-file-coding-system)))
1154 (let ((coding-system-for-read coding-system))
1155 (revert-buffer)))
1156
701414e3
KH
1157(defun set-file-name-coding-system (coding-system)
1158 "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1159It actually just set the variable `file-name-coding-system' (which
1160see) to CODING-SYSTEM."
1161 (interactive "zCoding system for file names (default, nil): ")
1162 (check-coding-system coding-system)
1163 (setq file-name-coding-system coding-system))
1164
358d28fb
RS
1165(defvar default-terminal-coding-system nil
1166 "Default value for the terminal coding system.
1167This is normally set according to the selected language environment.
1168See also the command `set-terminal-coding-system'.")
1169
df100398
KH
1170(defun set-terminal-coding-system (coding-system)
1171 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
1172All text output to the terminal will be encoded
1173with the specified coding system.
1174For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1175The default is determined by the selected language environment
1176or by the previous use of this command."
1177 (interactive
2e02a76f
RS
1178 (list (let ((default (if (and (not (terminal-coding-system))
1179 default-terminal-coding-system)
1180 default-terminal-coding-system)))
1181 (read-coding-system
1182 (format "Coding system for terminal display (default, %s): "
1183 default)
1184 default))))
358d28fb
RS
1185 (if (and (not coding-system)
1186 (not (terminal-coding-system)))
1187 (setq coding-system default-terminal-coding-system))
1188 (if coding-system
521d4010 1189 (setq default-terminal-coding-system coding-system))
df100398
KH
1190 (set-terminal-coding-system-internal coding-system)
1191 (redraw-frame (selected-frame)))
1192
358d28fb
RS
1193(defvar default-keyboard-coding-system nil
1194 "Default value of the keyboard coding system.
1195This is normally set according to the selected language environment.
1196See also the command `set-keyboard-coding-system'.")
1197
df100398 1198(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
1199 "Set coding system for keyboard input to CODING-SYSTEM.
1200In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
1201\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1202`encoded-kbd-mode'.)
358d28fb
RS
1203For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1204The default is determined by the selected language environment
1205or by the previous use of this command."
1206 (interactive
2e02a76f
RS
1207 (list (let ((default (if (and (not (keyboard-coding-system))
1208 default-keyboard-coding-system)
1209 default-keyboard-coding-system)))
1210 (read-coding-system
1211 (format "Coding system for keyboard input (default, %s): "
1212 default)
1213 default))))
358d28fb
RS
1214 (if (and (not coding-system)
1215 (not (keyboard-coding-system)))
1216 (setq coding-system default-keyboard-coding-system))
1217 (if coding-system
1218 (setq default-keyboard-coding-system coding-system))
df100398 1219 (set-keyboard-coding-system-internal coding-system)
b23bad0b 1220 (setq keyboard-coding-system coding-system)
df100398
KH
1221 (encoded-kbd-mode (if coding-system 1 0)))
1222
6d34f495
DL
1223(defcustom keyboard-coding-system nil
1224 "Specify coding system for keyboard input.
1225If you set this on a terminal which can't distinguish Meta keys from
12268-bit characters, you will have to use ESC to type Meta characters.
1227See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1228
237e5993
DL
1229On non-windowing terminals, this is set from the locale by default.
1230
6d34f495
DL
1231Setting this variable directly does not take effect;
1232use either M-x customize or \\[set-keyboard-coding-system]."
1233 :type '(coding-system :tag "Coding system")
1234 :link '(info-link "(emacs)Specify Coding")
1235 :link '(info-link "(emacs)Single-Byte Character Support")
1236 :set (lambda (symbol value)
1237 ;; Don't load encoded-kbd-mode unnecessarily.
1238 (if (or value (boundp 'encoded-kbd-mode))
1239 (set-keyboard-coding-system value)
1240 (set-default 'keyboard-coding-system nil))) ; must initialize
237e5993 1241 :version "21.4"
6d34f495
DL
1242 :group 'keyboard
1243 :group 'mule)
1244
df100398 1245(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 1246 "Set coding systems for the process associated with the current buffer.
df100398 1247DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
1248ENCODING is the coding system to be used to encode output to the process.
1249
1250For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 1251 (interactive
83911021 1252 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
1253 (let ((proc (get-buffer-process (current-buffer))))
1254 (if (null proc)
521d4010 1255 (error "No process")
df100398
KH
1256 (check-coding-system decoding)
1257 (check-coding-system encoding)
1258 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
1259 (force-mode-line-update))
1260
d0b99881
RS
1261(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1262
14915c37 1263(defun set-selection-coding-system (coding-system)
8c52d564 1264 "Make CODING-SYSTEM used for communicating with other X clients.
b25eef20
KH
1265When sending or receiving text via cut_buffer, selection, and clipboard,
1266the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 1267 (interactive "zCoding system for X selection: ")
b25eef20 1268 (check-coding-system coding-system)
14915c37 1269 (setq selection-coding-system coding-system))
b25eef20 1270
e8dd0160 1271;; Coding system lastly specified by the command
a03b3ce1
KH
1272;; set-next-selection-coding-system.
1273(defvar last-next-selection-coding-system nil)
1274
1275(defun set-next-selection-coding-system (coding-system)
12504f57 1276 "Use CODING-SYSTEM for next communication with other window system clients.
a03b3ce1
KH
1277This setting is effective for the next communication only."
1278 (interactive
1279 (list (read-coding-system
1280 (if last-next-selection-coding-system
12504f57 1281 (format "Coding system for the next selection (default, %S): "
a03b3ce1 1282 last-next-selection-coding-system)
12504f57 1283 "Coding system for the next selection: ")
a03b3ce1
KH
1284 last-next-selection-coding-system)))
1285 (if coding-system
1286 (setq last-next-selection-coding-system coding-system)
1287 (setq coding-system last-next-selection-coding-system))
1288 (check-coding-system coding-system)
1289
1290 (setq next-selection-coding-system coding-system))
1291
4ed46869 1292(defun set-coding-priority (arg)
521d4010 1293 "Set priority of coding categories according to ARG.
c1841772
KH
1294ARG is a list of coding categories ordered by priority.
1295
1296This function is provided for backward compatibility.
1297Now we have more convenient function `set-coding-system-priority'."
5d75f46f
KH
1298 (apply 'set-coding-system-priority
1299 (mapcar #'(lambda (x) (symbol-value x)) arg)))
07513d64 1300(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
4ed46869 1301
835cbadb
EZ
1302;;; X selections
1303
cc926903 1304(defvar ctext-non-standard-encodings-alist
835cbadb
EZ
1305 '(("ISO8859-15" . latin-iso8859-15)
1306 ("ISO8859-14" . latin-iso8859-14)
1307 ("KOI8-R" . koi8-r)
1308 ("BIG5-0" . big5))
cc926903
KH
1309 "Alist of non-standard encoding names vs Emacs coding systems.
1310This alist is used to decode an extened segment of a compound text.")
1311
1312(defvar ctext-non-standard-encodings-regexp
1313 (string-to-multibyte
1314 (concat
1315 ;; For non-standard encodings.
1316 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1317 "\\|"
1318 ;; For UTF-8 encoding.
1319 "\\(\e%G[^\e]*\e%@\\)")))
835cbadb
EZ
1320
1321;; Functions to support "Non-Standard Character Set Encodings" defined
5c88a01e 1322;; by the COMPOUND-TEXT spec.
cc926903
KH
1323;; We support that by decoding the whole data by `ctext' which just
1324;; pertains byte sequences belonging to ``extended segment'', then
1325;; decoding those byte sequences one by one in Lisp.
5c88a01e
KH
1326;; This function also supports "The UTF-8 encoding" described in the
1327;; section 7 of the documentation fo COMPOUND-TEXT distributed with
1328;; XFree86.
1329
835cbadb
EZ
1330(defun ctext-post-read-conversion (len)
1331 "Decode LEN characters encoded as Compound Text with Extended Segments."
1894d108
KH
1332 ;; We don't need the following because it is expected that this
1333 ;; function is mainly used for decoding X selection which is not
1334 ;; that big data.
1335 ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
835cbadb 1336 (save-match-data
cc926903 1337 (save-restriction
1894d108 1338 (narrow-to-region (point) (+ (point) len))
cc926903 1339 (let ((case-fold-search nil)
cc926903
KH
1340 last-coding-system-used
1341 pos bytes)
cc926903 1342 (decode-coding-region (point-min) (point-max) 'ctext)
cc926903
KH
1343 (while (re-search-forward ctext-non-standard-encodings-regexp
1344 nil 'move)
1345 (setq pos (match-beginning 0))
1346 (if (match-beginning 1)
1347 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1348 (let* ((M (char-after (+ pos 4)))
1349 (L (char-after (+ pos 5)))
1350 (encoding (match-string 2))
1351 (coding (or (cdr (assoc-ignore-case
1352 encoding
1353 ctext-non-standard-encodings-alist))
1354 (coding-system-p
1355 (intern (downcase encoding))))))
1894d108
KH
1356 (if enable-multibyte-characters
1357 (setq M (multibyte-char-to-unibyte M)
1358 L (multibyte-char-to-unibyte L)))
cc926903
KH
1359 (setq bytes (- (+ (* (- M 128) 128) (- L 128))
1360 (- (point) (+ pos 6))))
1361 (when coding
1362 (delete-region pos (point))
1363 (forward-char bytes)
1364 (decode-coding-region (- (point) bytes) (point) coding)))
1365 ;; ESC % G --UTF-8-BYTES-- ESC % @
1366 (setq bytes (- (point) pos))
1367 (decode-coding-region (- (point) bytes) (point) 'utf-8))))
1368 (goto-char (point-min))
1369 (- (point-max) (point)))))
835cbadb 1370
c74fe809
EZ
1371;; If you add charsets here, be sure to modify the regexp used by
1372;; ctext-pre-write-conversion to look up non-standard charsets.
cc926903 1373(defvar ctext-non-standard-designations-alist
835cbadb
EZ
1374 '(("$(0" . (big5 "big5-0" 2))
1375 ("$(1" . (big5 "big5-0" 2))
eb747e41
DL
1376 ;; The following are actually standard; generating extended
1377 ;; segments for them is wrong and screws e.g. Latin-9 users.
1378 ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
1379;; ("-V" . (t "iso8859-10" 1))
1380;; ("-Y" . (t "iso8859-13" 1))
1381;; ("-_" . (t "iso8859-14" 1))
1382;; ("-b" . (t "iso8859-15" 1))
1383;; ("-f" . (t "iso8859-16" 1))
1384 )
835cbadb 1385 "Alist of ctext control sequences that introduce character sets which
eb747e41 1386are not in the list of approved encodings, and the corresponding
835cbadb
EZ
1387coding system, identifier string, and number of octets per encoded
1388character.
1389
1390Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
1391is the control sequence (sans the leading ESC) that introduces the character
1392set in the text encoded by compound-text. ENCODING is a coding system
1393symbol; if it is t, it means that the ctext coding system already encodes
1394the text correctly, and only the leading control sequence needs to be altered.
1395If ENCODING is a coding system, we need to re-encode the text with that
eb747e41 1396coding system. CHARSET is the name of the charset we need to put into
835cbadb
EZ
1397the leading control sequence. NOCTETS is the number of octets (bytes) that
1398encode each character in this charset. NOCTETS can be 0 (meaning the number
1399of octets per character is variable), 1, 2, 3, or 4.")
1400
1401(defun ctext-pre-write-conversion (from to)
5dde3c71
EZ
1402 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1403
1404If FROM is a string, or if the current buffer is not the one set up for us
cc926903 1405by encode-coding-string, generate a new temp buffer, insert the
5dde3c71 1406text, and convert it in the temporary buffer. Otherwise, convert in-place."
835cbadb 1407 (save-match-data
cc926903 1408 ;; Setup a working buffer if necessary.
f1beb0e0
KH
1409 (when (stringp from)
1410 (set-buffer (generate-new-buffer " *temp"))
1411 (set-buffer-multibyte (multibyte-string-p from))
1412 (insert from))
cc926903
KH
1413
1414 ;; Now we can encode the whole buffer.
1415 (let ((case-fold-search nil)
1416 last-coding-system-used
1417 pos posend desig encode-info encoding chset noctets textlen)
1418 (goto-char (point-min))
1419 ;; At first encode the whole buffer.
1420 (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
1421 ;; Then replace ISO-2022 charset designations with extended
1422 ;; segments, for those charsets that are not part of the
1423 ;; official X registry. The regexp below finds the leading
1424 ;; sequences for big5.
eb747e41 1425 (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
cc926903
KH
1426 (setq pos (match-beginning 0)
1427 posend (point)
1428 desig (match-string 1)
1429 encode-info (cdr (assoc desig
1430 ctext-non-standard-designations-alist))
835cbadb
EZ
1431 encoding (car encode-info)
1432 chset (cadr encode-info)
1433 noctets (car (cddr encode-info)))
1434 (skip-chars-forward "^\e")
835cbadb 1435 (cond
f1beb0e0 1436 ((eq encoding t) ; only the leading sequence needs to be changed
cc926903
KH
1437 (setq textlen (+ (- (point) posend) (length chset) 1))
1438 ;; Generate the control sequence for an extended segment.
f1beb0e0
KH
1439 (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1440 noctets
1441 (+ (/ textlen 128) 128)
1442 (+ (% textlen 128) 128)
1443 chset))
835cbadb
EZ
1444 t t))
1445 ((coding-system-p encoding) ; need to recode the entire segment...
cc926903
KH
1446 (decode-coding-region pos (point) 'ctext-no-compositions)
1447 (encode-coding-region pos (point) encoding)
cc926903
KH
1448 (setq textlen (+ (- (point) pos) (length chset) 1))
1449 (save-excursion
1450 (goto-char pos)
f1beb0e0
KH
1451 (insert (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1452 noctets
1453 (+ (/ textlen 128) 128)
1454 (+ (% textlen 128) 128)
1455 chset)))))))
cc926903 1456 (goto-char (point-min))))
5dde3c71 1457 ;; Must return nil, as build_annotations_2 expects that.
835cbadb
EZ
1458 nil)
1459
4ed46869
KH
1460;;; FILE I/O
1461
e76938e7 1462(defcustom auto-coding-alist
8f924df7
KH
1463 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte)
1464 ("\\.tgz\\'" . no-conversion)
6caef2da 1465 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
45885400 1466 ("/#[^/]+#\\'" . emacs-mule))
835f49b8
KH
1467 "Alist of filename patterns vs corresponding coding systems.
1468Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1469A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1470
7fed493a
RS
1471The settings in this alist take priority over `coding:' tags
1472in the file (see the function `set-auto-coding')
e76938e7
DL
1473and the contents of `file-coding-system-alist'."
1474 :group 'files
1475 :group 'mule
1476 :type '(repeat (cons (regexp :tag "File name regexp")
1477 (symbol :tag "Coding system"))))
835f49b8 1478
502522b2 1479(defcustom auto-coding-regexp-alist
4e4a5bca
DL
1480 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
1481 ("\\`;ELC\14\0\0\0" . emacs-mule)) ; Emacs 20-compiled
502522b2
GM
1482 "Alist of patterns vs corresponding coding systems.
1483Each element looks like (REGEXP . CODING-SYSTEM).
1484A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1485
1486The settings in this alist take priority over `coding:' tags
1487in the file (see the function `set-auto-coding')
1488and the contents of `file-coding-system-alist'."
1489 :group 'files
1490 :group 'mule
1491 :type '(repeat (cons (regexp :tag "Regexp")
1492 (symbol :tag "Coding system"))))
1493
d9f6dfe6 1494;; See the bottom of this file for built-in auto coding functions.
447404a3
CW
1495(defcustom auto-coding-functions '(sgml-xml-auto-coding-function
1496 sgml-html-meta-auto-coding-function)
d9f6dfe6
CW
1497 "A list of functions which attempt to determine a coding system.
1498
66643502
RS
1499Each function in this list should be written to operate on the
1500current buffer, but should not modify it in any way. The buffer
1501will contain undecoded text of parts of the file. Each function
1502should take one argument, SIZE, which says how many
1503characters (starting from point) it should look at.
1504
1505If one of these functions succeeds in determining a coding
1506system, it should return that coding system. Otherwise, it
1507should return nil.
1508
1509If a file has a `coding:' tag, that takes precedence over these
1510functions, so they won't be called at all."
d9f6dfe6
CW
1511 :group 'files
1512 :group 'mule
1513 :type '(repeat function))
1514
1c4cc63a
KH
1515(defvar set-auto-coding-for-load nil
1516 "Non-nil means look for `load-coding' property instead of `coding'.
1517This is used for loading and byte-compiling Emacs Lisp files.")
1518
8a592131
RS
1519(defun auto-coding-alist-lookup (filename)
1520 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1521 (let ((alist auto-coding-alist)
c60ee5e7 1522 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin)))
8a592131
RS
1523 coding-system)
1524 (while (and alist (not coding-system))
1525 (if (string-match (car (car alist)) filename)
1526 (setq coding-system (cdr (car alist)))
1527 (setq alist (cdr alist))))
1528 coding-system))
1529
835f49b8
KH
1530(defun set-auto-coding (filename size)
1531 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1532These bytes should include at least the first 1k of the file
1533and the last 3k of the file, but the middle may be omitted.
63561304 1534
d21ba5e0
DL
1535The function checks FILENAME against the variable `auto-coding-alist'.
1536If FILENAME doesn't match any entries in the variable, it checks the
502522b2 1537contents of the current buffer following point against
447404a3 1538`auto-coding-regexp-alist'. If no match is found, it checks for a
502522b2 1539`coding:' tag in the first one or two lines following point. If no
d21ba5e0 1540`coding:' tag is found, it checks any local variables list in the last
447404a3 15413K bytes out of the SIZE bytes. Finally, if none of these methods
d21ba5e0
DL
1542succeed, it checks to see if any function in `auto-coding-functions'
1543gives a match.
63561304 1544
d21ba5e0
DL
1545The return value is the specified coding system, or nil if nothing is
1546specified.
87aba788 1547
ba74e833 1548The variable `set-auto-coding-function' (which see) is set to this
87aba788 1549function by default."
502522b2 1550 (or (auto-coding-alist-lookup filename)
447404a3
CW
1551 ;; Try using `auto-coding-regexp-alist'.
1552 (save-excursion
1553 (let ((alist auto-coding-regexp-alist)
1554 coding-system)
1555 (while (and alist (not coding-system))
1556 (let ((regexp (car (car alist))))
1557 (when (re-search-forward regexp (+ (point) size) t)
1558 (setq coding-system (cdr (car alist)))))
0bca779a 1559 (setq alist (cdr alist)))
447404a3 1560 coding-system))
502522b2
GM
1561 (let* ((case-fold-search t)
1562 (head-start (point))
1563 (head-end (+ head-start (min size 1024)))
1564 (tail-start (+ head-start (max (- size 3072) 0)))
1565 (tail-end (+ head-start size))
1566 coding-system head-found tail-found pos)
1567 ;; Try a short cut by searching for the string "coding:"
1568 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1569 (setq head-found (or (search-forward "coding:" head-end t)
1570 (search-forward "unibyte:" head-end t)))
1571 (if (and head-found (> head-found tail-start))
1572 ;; Head and tail are overlapped.
1573 (setq tail-found head-found)
1574 (goto-char tail-start)
1575 (setq tail-found (or (search-forward "coding:" tail-end t)
1576 (search-forward "unibyte:" tail-end t))))
1577
1578 ;; At first check the head.
1579 (when head-found
1580 (goto-char head-start)
6b66d028
RS
1581 (setq head-end (set-auto-mode-1))
1582 (setq head-start (point))
1d8e9a7c 1583 (when (and head-end (< head-found head-end))
835f49b8 1584 (goto-char head-start)
502522b2
GM
1585 (when (and set-auto-coding-for-load
1586 (re-search-forward
6b66d028 1587 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1588 head-end t))
1589 (setq coding-system 'raw-text))
1590 (when (and (not coding-system)
1591 (re-search-forward
6b66d028 1592 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1593 head-end t))
1594 (setq coding-system (intern (match-string 2)))
1595 (or (coding-system-p coding-system)
1596 (setq coding-system nil)))))
1597
1598 ;; If no coding: tag in the head, check the tail.
1599 (when (and tail-found (not coding-system))
1600 (goto-char tail-start)
1601 (search-forward "\n\^L" nil t)
1602 (if (re-search-forward
1603 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1604 ;; The prefix is what comes before "local variables:" in its
1605 ;; line. The suffix is what comes after "local variables:"
1606 ;; in its line.
1607 (let* ((prefix (regexp-quote (match-string 1)))
1608 (suffix (regexp-quote (match-string 2)))
1609 (re-coding
1610 (concat
1611 "^" prefix
cfe98f50
GM
1612 ;; N.B. without the \n below, the regexp can
1613 ;; eat newlines.
1614 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1615 suffix "$"))
1616 (re-unibyte
1617 (concat
1618 "^" prefix
cfe98f50 1619 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1620 suffix "$"))
1621 (re-end
cfe98f50 1622 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
502522b2
GM
1623 (pos (point)))
1624 (re-search-forward re-end tail-end 'move)
1625 (setq tail-end (point))
1626 (goto-char pos)
1627 (when (and set-auto-coding-for-load
1628 (re-search-forward re-unibyte tail-end t))
1629 (setq coding-system 'raw-text))
1630 (when (and (not coding-system)
1631 (re-search-forward re-coding tail-end t))
1632 (setq coding-system (intern (match-string 1)))
1633 (or (coding-system-p coding-system)
1634 (setq coding-system nil))))))
447404a3
CW
1635 coding-system)
1636 ;; Finally, try all the `auto-coding-functions'.
1637 (let ((funcs auto-coding-functions)
1638 (coding-system nil))
1639 (while (and funcs (not coding-system))
1640 (setq coding-system (condition-case e
1641 (save-excursion
1642 (goto-char (point-min))
1643 (funcall (pop funcs) size))
1644 (error nil))))
502522b2 1645 coding-system)))
63561304
KH
1646
1647(setq set-auto-coding-function 'set-auto-coding)
87aba788 1648
c36b37ed 1649(defun after-insert-file-set-coding (inserted)
872a0a6f
RS
1650 "Set `buffer-file-coding-system' of current buffer after text is inserted.
1651INSERTED is the number of characters that were inserted, as figured
1652in the situation before this function. Return the number of characters
1653inserted, as figured in the situation after. The two numbers can be
1654different if the buffer has become unibyte."
4ed46869
KH
1655 (if last-coding-system-used
1656 (let ((coding-system
1657 (find-new-buffer-file-coding-system last-coding-system-used))
1658 (modified-p (buffer-modified-p)))
0269ddfb 1659 (when coding-system
71983219 1660 (set-buffer-file-coding-system coding-system t)
0269ddfb 1661 (set-buffer-modified-p modified-p))))
d0c26c63 1662 inserted)
4ed46869 1663
8057896b 1664;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1665;; independently in the following order.
1666;; 1. That of buffer-file-coding-system locally bound.
1667;; 2. That of CODING.
1668
1669(defun find-new-buffer-file-coding-system (coding)
1670 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1671The local variable `buffer-file-coding-system' of the current buffer
1672is set to the returned value.
509064c5 1673Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1674 (let (local-coding local-eol
b685f8d6 1675 found-coding found-eol
4ed46869
KH
1676 new-coding new-eol)
1677 (if (null coding)
1678 ;; Nothing found about coding.
1679 nil
1680
b685f8d6
RS
1681 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1682 ;; and LOCAL-CODING.
1683 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1684 (if (null (numberp local-eol))
1685 ;; But eol-type is not yet set.
1686 (setq local-eol nil))
0269ddfb 1687 (if (and buffer-file-coding-system
c1841772
KH
1688 (not (eq (coding-system-type buffer-file-coding-system)
1689 'undecided)))
0269ddfb 1690 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1691
1692 (if (and (local-variable-p 'buffer-file-coding-system)
1693 local-eol local-coding)
4ed46869
KH
1694 ;; The current buffer has already set full coding-system, we
1695 ;; had better not change it.
1696 nil
1697
8057896b 1698 (setq found-eol (coding-system-eol-type coding))
4ed46869 1699 (if (null (numberp found-eol))
be02cd54
EZ
1700 ;; But eol-type is not found.
1701 ;; If EOL conversions are inhibited, force unix eol-type.
1702 (setq found-eol (if inhibit-eol-conversion 0)))
c1841772 1703 (setq found-coding (coding-system-base coding))
c76b5c99
KH
1704
1705 (if (and (not found-eol) (eq found-coding 'undecided))
1706 ;; No valid coding information found.
1707 nil
1708
1709 ;; Some coding information (eol or text) found.
1710
1711 ;; The local setting takes precedence over the found one.
1712 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1713 (or local-coding found-coding)
1714 (or found-coding local-coding)))
1715 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1716 (or local-eol found-eol)
1717 (or found-eol local-eol)))
1718
1719 (let ((eol-type (coding-system-eol-type new-coding)))
1720 (if (and (numberp new-eol) (vectorp eol-type))
1721 (aref eol-type new-eol)
1722 new-coding)))))))
4ed46869 1723
fe831d33
GV
1724(defun modify-coding-system-alist (target-type regexp coding-system)
1725 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1726There are three of such tables, `file-coding-system-alist',
1727`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1728
1729TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1730If it is `file', it affects `file-coding-system-alist' (which see).
1731If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1732If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1733
1734REGEXP is a regular expression matching a target of I/O operation.
1735The target is a file name if TARGET-TYPE is `file', a program name if
1736TARGET-TYPE is `process', or a network service name or a port number
1737to connect to if TARGET-TYPE is `network'.
1738
1739CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1740operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1741for decoding and encoding respectively,
1742or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1743 (or (memq target-type '(file process network))
1744 (error "Invalid target type: %s" target-type))
1745 (or (stringp regexp)
1746 (and (eq target-type 'network) (integerp regexp))
1747 (error "Invalid regular expression: %s" regexp))
1748 (if (symbolp coding-system)
1749 (if (not (fboundp coding-system))
1750 (progn
1751 (check-coding-system coding-system)
1752 (setq coding-system (cons coding-system coding-system))))
1753 (check-coding-system (car coding-system))
1754 (check-coding-system (cdr coding-system)))
1755 (cond ((eq target-type 'file)
1756 (let ((slot (assoc regexp file-coding-system-alist)))
1757 (if slot
1758 (setcdr slot coding-system)
1759 (setq file-coding-system-alist
1760 (cons (cons regexp coding-system)
1761 file-coding-system-alist)))))
1762 ((eq target-type 'process)
1763 (let ((slot (assoc regexp process-coding-system-alist)))
1764 (if slot
1765 (setcdr slot coding-system)
1766 (setq process-coding-system-alist
1767 (cons (cons regexp coding-system)
1768 process-coding-system-alist)))))
1769 (t
1770 (let ((slot (assoc regexp network-coding-system-alist)))
1771 (if slot
1772 (setcdr slot coding-system)
1773 (setq network-coding-system-alist
1774 (cons (cons regexp coding-system)
1775 network-coding-system-alist)))))))
1776
db046b7d
KH
1777(defun decode-coding-inserted-region (from to filename
1778 &optional visit beg end replace)
f29387e8
KH
1779 "Decode the region between FROM and TO as if it is read from file FILENAME.
1780Optional arguments VISIT, BEG, END, and REPLACE are the same as those
1781of the function `insert-file-contents'."
1782 (save-excursion
1783 (save-restriction
1784 (narrow-to-region from to)
1785 (goto-char (point-min))
1786 (let ((coding coding-system-for-read))
1787 (or coding
1788 (setq coding (funcall set-auto-coding-function
1789 filename (- (point-max) (point-min)))))
1790 (or coding
1791 (setq coding (find-operation-coding-system
1792 'insert-file-contents
1793 filename visit beg end replace)))
1794 (if (coding-system-p coding)
1795 (or enable-multibyte-characters
1796 (setq coding
1797 (coding-system-change-text-conversion coding 'raw-text)))
1798 (setq coding nil))
1799 (if coding
1800 (decode-coding-region (point-min) (point-max) coding))
1801 (setq last-coding-system-used coding)))))
1802
b25eef20 1803(defun make-translation-table (&rest args)
a284eea3 1804 "Make a translation table from arguments.
d38b07f9 1805A translation table is a char table intended for character
a284eea3
DL
1806translation in CCL programs.
1807
d38b07f9 1808Each argument is a list of elements of the form (FROM . TO), where FROM
a284eea3 1809is a character to be translated to TO.
13d5617d 1810
4e003d37
KH
1811The arguments and forms in each argument are processed in the given
1812order, and if a previous form already translates TO to some other
1813character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1814 (let ((table (make-char-table 'translation-table))
a73a8c89 1815 revlist)
5d75f46f
KH
1816 (dolist (elts args)
1817 (dolist (elt elts)
1818 (let ((from (car elt))
1819 (to (cdr elt))
1820 to-alt rev-from rev-to)
1821 ;; If we have already translated TO to TO-ALT, FROM should
1822 ;; also be translated to TO-ALT.
1823 (if (setq to-alt (aref table to))
1824 (setq to to-alt))
1825 (aset table from to)
1826 ;; If we have already translated some chars to FROM, they
1827 ;; should also be translated to TO.
1828 (when (setq rev-from (assq from revlist))
1829 (dolist (elt (cdr rev-from))
1830 (aset table elt to))
1831 (setq revlist (delq rev-from revlist)
1832 rev-from (cdr rev-from)))
1833 ;; Now update REVLIST.
1834 (setq rev-to (assq to revlist))
1835 (if rev-to
1836 (setcdr rev-to (cons from (cdr rev-to)))
1837 (setq rev-to (list to from)
1838 revlist (cons rev-to revlist)))
1839 (if rev-from
1840 (setcdr rev-to (append rev-from (cdr rev-to)))))))
a73a8c89
KH
1841 ;; Return TABLE just created.
1842 table))
1843
c76b5c99
KH
1844(defun make-translation-table-from-vector (vec)
1845 "Make translation table from decoding vector VEC.
9e3b6057
DL
1846VEC is an array of 256 elements to map unibyte codes to multibyte
1847characters. Elements may be nil for undefined code points.
c76b5c99
KH
1848See also the variable `nonascii-translation-table'."
1849 (let ((table (make-char-table 'translation-table))
1850 (rev-table (make-char-table 'translation-table))
c76b5c99 1851 ch)
9e3b6057 1852 (dotimes (i 256)
c76b5c99 1853 (setq ch (aref vec i))
9e3b6057
DL
1854 (when ch
1855 (aset table i ch)
1856 (if (>= ch 256)
1857 (aset rev-table ch i))))
c76b5c99
KH
1858 (set-char-table-extra-slot table 0 rev-table)
1859 table))
1860
a6d1872e
KH
1861(defun make-translation-table-from-alist (alist)
1862 "Make translation table from N<->M mapping in ALIST.
1863ALIST is an alist, each element has the form (FROM . TO).
1864FROM and TO are a character or a vector of characters.
1865If FROM is a character, that character is translated to TO.
1866If FROM is a vector of characters, that sequence is translated to TO.
1867The second extra-slot of the value is a translation table for reverse mapping."
1868 (let ((table (make-char-table 'translation-table))
1869 (rev-table (make-char-table 'translation-table))
1870 max-lookup from to)
1871 (setq max-lookup 1)
1872 (dolist (elt alist)
1873 (setq from (car elt) to (cdr elt))
1874 (if (characterp from)
1875 (aset table from to)
1876 (let* ((ch (aref from 0))
1877 (val (aref table ch)))
1878 (aset table ch (cons (cons from to) val)))
1879 (setq max-lookup (max max-lookup (length from)))))
1880 (set-char-table-extra-slot table 1 max-lookup)
1881 (setq max-lookup 1)
1882 (dolist (elt alist)
1883 (setq from (cdr elt) to (car elt))
1884 (if (characterp from)
1885 (aset rev-table from to)
1886 (let* ((ch (aref from 0))
1887 (val (aref rev-table ch)))
1888 (aset rev-table ch (cons (cons from to) val)))
1889 (setq max-lookup (max max-lookup (length from)))))
1890 (set-char-table-extra-slot rev-table 1 max-lookup)
1891 (set-char-table-extra-slot table 0 rev-table)
1892 table))
1893
f967223b 1894(defun define-translation-table (symbol &rest args)
a284eea3
DL
1895 "Define SYMBOL as the name of translation table made by ARGS.
1896This sets up information so that the table can be used for
1897translations in a CCL program.
b25eef20 1898
a284eea3
DL
1899If the first element of ARGS is a char-table whose purpose is
1900`translation-table', just define SYMBOL to name it. (Note that this
1901function does not bind SYMBOL.)
007c79c8 1902
a284eea3 1903Any other ARGS should be suitable as arguments of the function
007c79c8 1904`make-translation-table' (which see).
b25eef20 1905
452fdb31 1906This function sets properties `translation-table' and
521d4010
DL
1907`translation-table-id' of SYMBOL to the created table itself and the
1908identification number of the table respectively. It also registers
1909the table in `translation-table-vector'."
007c79c8
KH
1910 (let ((table (if (and (char-table-p (car args))
1911 (eq (char-table-subtype (car args))
1912 'translation-table))
1913 (car args)
1914 (apply 'make-translation-table args)))
f967223b 1915 (len (length translation-table-vector))
d9e3229d 1916 (id 0)
b25eef20 1917 (done nil))
f967223b 1918 (put symbol 'translation-table table)
b25eef20
KH
1919 (while (not done)
1920 (if (>= id len)
f967223b
KH
1921 (setq translation-table-vector
1922 (vconcat translation-table-vector (make-vector len nil))))
1923 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1924 (if (or (not slot)
1925 (eq (car slot) symbol))
1926 (progn
f967223b 1927 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1928 (setq done t))
1929 (setq id (1+ id)))))
f967223b 1930 (put symbol 'translation-table-id id)
d9e3229d
KH
1931 id))
1932
16431d3c
KH
1933(defun translate-region (start end table)
1934 "From START to END, translate characters according to TABLE.
1935TABLE is a string or a char-table.
1936If TABLE is a string, the Nth character in it is the mapping
1937for the character with code N.
1938If TABLE is a char-table, the element for character N is the mapping
1939for the character with code N.
1940It returns the number of characters changed."
1941 (interactive
1942 (list (region-beginning)
1943 (region-end)
1944 (let (table l)
1945 (dotimes (i (length translation-table-vector))
1946 (if (consp (aref translation-table-vector i))
1947 (push (list (symbol-name
1948 (car (aref translation-table-vector i)))) l)))
1949 (if (not l)
1950 (error "No translation table defined"))
1951 (while (not table)
1952 (setq table (completing-read "Translation table: " l nil t)))
1953 (intern table))))
1954 (if (symbolp table)
1955 (let ((val (get table 'translation-table)))
1956 (or (char-table-p val)
1957 (error "Invalid translation table name: %s" table))
1958 (setq table val)))
1959 (translate-region-internal start end table))
1960
35554641
KH
1961(put 'with-category-table 'lisp-indent-function 1)
1962
ef6e365d 1963(defmacro with-category-table (table &rest body)
8f924df7 1964 "Execute BODY like `progn' with CATEGORY-TABLE the current category table.
ef6e365d
JPW
1965The category table of the current buffer is saved, BODY is evaluated,
1966then the saved table is restored, even in case of an abnormal exit.
1967Value is what BODY returns."
1968 (let ((old-table (make-symbol "old-table"))
1969 (old-buffer (make-symbol "old-buffer")))
1970 `(let ((,old-table (category-table))
1971 (,old-buffer (current-buffer)))
1972 (unwind-protect
1973 (progn
1974 (set-category-table ,table)
1975 ,@body)
1976 (save-current-buffer
1977 (set-buffer ,old-buffer)
1978 (set-category-table ,old-table))))))
35554641 1979
394e4eb0
DL
1980(defun define-translation-hash-table (symbol table)
1981 "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
1982
1983Analogous to `define-translation-table', but updates
1984`translation-hash-table-vector' and the table is for use in the CCL
1985`lookup-integer' and `lookup-character' functions."
1986 (unless (and (symbolp symbol)
1987 (hash-table-p table))
1988 (error "Bad args to define-translation-hash-table"))
1989 (let ((len (length translation-hash-table-vector))
1990 (id 0)
1991 done)
1992 (put symbol 'translation-hash-table table)
1993 (while (not done)
1994 (if (>= id len)
1995 (setq translation-hash-table-vector
1996 (vconcat translation-hash-table-vector [nil])))
1997 (let ((slot (aref translation-hash-table-vector id)))
1998 (if (or (not slot)
1999 (eq (car slot) symbol))
2000 (progn
2001 (aset translation-hash-table-vector id (cons symbol table))
2002 (setq done t))
2003 (setq id (1+ id)))))
2004 (put symbol 'translation-hash-table-id id)
2005 id))
2006
69eba008
KH
2007;;; Initialize some variables.
2008
2009(put 'use-default-ascent 'char-table-extra-slots 0)
2010(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
2011(put 'ignore-relative-composition 'char-table-extra-slots 0)
2012(setq ignore-relative-composition
2013 (make-char-table 'ignore-relative-composition))
69eba008 2014
256d0fef
DL
2015(make-obsolete 'set-char-table-default
2016 "Generic characters no longer exist" "22.1")
d9f6dfe6
CW
2017
2018;;; Built-in auto-coding-functions:
2019
2020(defun sgml-xml-auto-coding-function (size)
2021 "Determine whether the buffer is XML, and if so, its encoding.
2022This function is intended to be added to `auto-coding-functions'."
c069d3ac
SM
2023 (setq size (+ (point) size))
2024 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
d9f6dfe6
CW
2025 (let ((end (save-excursion
2026 ;; This is a hack.
2f4e9c47 2027 (re-search-forward "\"\\s-*\\?>" size t))))
d9f6dfe6
CW
2028 (when end
2029 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t)
447404a3
CW
2030 (let* ((match (match-string 1))
2031 (sym (intern (downcase match))))
2032 (if (coding-system-p sym)
2033 sym
2034 (message "Warning: unknown coding system \"%s\"" match)
2035 nil))
d9f6dfe6
CW
2036 'utf-8)))))
2037
447404a3
CW
2038(defun sgml-html-meta-auto-coding-function (size)
2039 "If the buffer has an HTML meta tag, use it to determine encoding.
2040This function is intended to be added to `auto-coding-functions'."
c069d3ac 2041 (setq size (min (+ (point) size)
447404a3
CW
2042 ;; Only search forward 10 lines
2043 (save-excursion
2044 (forward-line 10)
2045 (point))))
2046 (when (and (search-forward "<html>" size t)
2047 (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
2048 (let* ((match (match-string 1))
2049 (sym (intern (downcase match))))
2050 (if (coding-system-p sym)
2051 sym
2052 (message "Warning: unknown coding system \"%s\"" match)
2053 nil))))
0bca779a 2054
69eba008 2055;;;
4ed46869
KH
2056(provide 'mule)
2057
2058;;; mule.el ends here