(katakana-sjis, cp932-2-byte, cp932): New charsets.
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
07513d64 1;;; mule.el --- basic commands for multilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
8f924df7 4;; Licensed to the Free Software Foundation.
32db08f1 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 6;; Copyright (C) 2003
c1841772
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
4ed46869
KH
9
10;; Keywords: mule, multilingual, character set, coding system
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc
KH
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
4ed46869 28
60370d40
PJ
29;;; Commentary:
30
4ed46869
KH
31;;; Code:
32
8f924df7 33(defconst mule-version "6.0 (HANACHIRUSATO)" "\
4ed46869
KH
34Version number and name of this version of MULE (multilingual environment).")
35
8f924df7 36(defconst mule-version-date "2003.9.1" "\
4ed46869
KH
37Distribution date of this version of MULE (multilingual environment).")
38
c1841772
KH
39\f
40;;; CHARSET
41
6d2b6635
KH
42;; Backward compatibility code for handling emacs-mule charsets.
43(defvar private-char-area-1-min #xF0000)
44(defvar private-char-area-1-max #xFFFFE)
45(defvar private-char-area-2-min #x100000)
46(defvar private-char-area-2-max #x10FFFE)
47
48;; Table of emacs-mule charsets indexed by their emacs-mule ID.
49(defvar emacs-mule-charset-table (make-vector 256 nil))
50(aset emacs-mule-charset-table 0 'ascii)
51
52;; Convert the argument of old-style calll of define-charset to a
53;; property list used by the new-style.
54;; INFO-VECTOR is a vector of the format:
55;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
56;; SHORT-NAME LONG-NAME DESCRIPTION]
57
58(defun convert-define-charset-argument (emacs-mule-id info-vector)
59 (let* ((dim (aref info-vector 0))
60 (chars (aref info-vector 1))
61 (total (if (= dim 1) chars (* chars chars)))
62 (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
63 (if (= chars 96) [32 127 32 127] [33 126 33 126])))
64 code-offset)
65 (if (integerp emacs-mule-id)
66 (or (= emacs-mule-id 0)
67 (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
68 (error "Invalid CHARSET-ID: %d" emacs-mule-id))
69 (let (from-id to-id)
70 (if (= dim 1) (setq from-id 160 to-id 224)
71 (setq from-id 224 to-id 255))
72 (while (and (< from-id to-id)
73 (not (aref emacs-mule-charset-table from-id)))
74 (setq from-id (1+ from-id)))
75 (if (= from-id to-id)
76 (error "No more room for the new Emacs-mule charset"))
77 (setq emacs-mule-id from-id)))
78 (if (> (- private-char-area-1-max private-char-area-1-min) total)
79 (setq code-offset private-char-area-1-min
80 private-char-area-1-min (+ private-char-area-1-min total))
81 (if (> (- private-char-area-2-max private-char-area-2-min) total)
82 (setq code-offset private-char-area-2-min
83 private-char-area-2-min (+ private-char-area-2-min total))
84 (error "No more space for a new charset.")))
85 (list :dimension dim
86 :code-space code-space
87 :iso-final-char (aref info-vector 4)
88 :code-offset code-offset
89 :emacs-mule-id emacs-mule-id)))
90
c1841772
KH
91(defun define-charset (name docstring &rest props)
92 "Define NAME (symbol) as a charset with DOCSTRING.
93The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
07513d64 94may be any symbol. The following have special meanings, and one of
bec25acc 95`:code-offset', `:map', `:subset', `:superset' must be specified.
c1841772
KH
96
97`:short-name'
98
99VALUE must be a short string to identify the charset. If omitted,
100NAME is used.
101
102`:long-name'
103
104VALUE must be a string longer than `:short-name' to identify the
07513d64 105charset. If omitted, the value of the `:short-name' attribute is used.
c1841772
KH
106
107`:dimension'
108
109VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
07513d64
DL
110code-points of the charsets. If omitted, it is calculated from the
111value of the `:code-space' attribute.
c1841772
KH
112
113`:code-space'
114
115VALUE must be a vector of length at most 8 specifying the byte code
116range of each dimension in this format:
117 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
07513d64 118where MIN-N is the minimum byte value of Nth dimension of code-point,
c1841772
KH
119MAX-N is the maximum byte value of that.
120
b1a79461
KH
121`:min-code'
122
123VALUE must be an integer specifying the mininum code point of the
124charset. If omitted, it is calculated from `:code-space'. VALUE may
125be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
126the code point and LOW is the least significant 16 bits.
127
1f32125f 128`:max-code'
b1a79461
KH
129
130VALUE must be an integer specifying the maxinum code point of the
131charset. If omitted, it is calculated from `:code-space'. VALUE may
132be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
133the code point and LOW is the least significant 16 bits.
134
c1841772
KH
135`:iso-final-char'
136
137VALUE must be a character in the range 32 to 127 (inclusive)
138specifying the final char of the charset for ISO-2022 encoding. If
139omitted, the charset can't be encoded by ISO-2022 based
140coding-systems.
141
142`:iso-revision-number'
143
144VALUE must be an integer in the range 0..63, specifying the revision
145number of the charset for ISO-2022 encoding.
146
147`:emacs-mule-id'
148
6d2b6635 149VALUE must be an integer of 0, 129..255. If omitted, the charset
c1841772
KH
150can't be encoded by coding-systems of type `emacs-mule'.
151
152`:ascii-compatible-p'
153
07513d64
DL
154VALUE must be nil or t (default nil). If VALUE is t, the charset is
155compatible with ASCII, i.e. the first 128 code points map to ASCII.
c1841772
KH
156
157`:supplementary-p'
158
159VALUE must be nil or t. If the VALUE is t, the charset is
07513d64
DL
160supplementary, which means it is used only as a parent of some other
161charset.
c1841772
KH
162
163`:invalid-code'
164
165VALUE must be a nonnegative integer that can be used as an invalid
166code point of the charset. If the minimum code is 0 and the maximum
167code is greater than Emacs' maximum integer value, `:invalid-code'
168should not be omitted.
169
170`:code-offset'
171
07513d64
DL
172VALUE must be an integer added to the index number of a character to
173get the corresponding character code.
c1841772
KH
174
175`:map'
176
177VALUE must be vector or string.
178
179If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
180where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 181corresponding character code.
c1841772
KH
182
183If it is a string, it is a name of file that contains the above
3e4abc9e
KH
184information. Each line of the file must be this format:
185 0xXXX 0xYYY
186where XXX is a hexadecimal representation of CODE-n and YYY is a
187hexadecimal representation of CHAR-n. A line starting with `#' is a
188comment line.
c1841772 189
2c2a254f
KH
190`:subset'
191
192VALUE must be a list:
193 ( PARENT MIN-CODE MAX-CODE OFFSET )
194PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
195of characters inherited from the parent. OFFSET is an integer value
196to add to a code point of the parent charset to get the corresponding
197code point of this charset.
198
199`:superset'
c1841772
KH
200
201VALUE must be a list of parent charsets. The charset inherits
202characters from them. Each element of the list may be a cons (PARENT
203. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
2c2a254f
KH
204value to add to a code point of PARENT to get the corresponding code
205point of this charset.
c1841772
KH
206
207`:unify-map'
208
209VALUE must be vector or string.
210
211If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
212where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 213corresponding Unicode character code.
c1841772
KH
214
215If it is a string, it is a name of file that contains the above
3e4abc9e
KH
216information. The file format is the same as what described for `:map'
217attribute."
6d2b6635
KH
218 (when (vectorp (car props))
219 ;; Old style code:
220 ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
221 ;; Convert the argument to make it fit with the current style.
222 (let ((vec (car props)))
223 (setq props (convert-define-charset-argument name vec)
224 name docstring
225 docstring (aref vec 8))))
c1841772
KH
226 (let ((attrs (mapcar 'list '(:dimension
227 :code-space
b1a79461
KH
228 :min-code
229 :max-code
c1841772
KH
230 :iso-final-char
231 :iso-revision-number
232 :emacs-mule-id
233 :ascii-compatible-p
234 :supplementary-p
235 :invalid-code
236 :code-offset
237 :map
2c2a254f
KH
238 :subset
239 :superset
c1841772
KH
240 :unify-map
241 :plist))))
242
243 ;; If :dimension is omitted, get the dimension from :code-space.
244 (let ((dimension (plist-get props :dimension)))
245 (or dimension
246 (progn
247 (setq dimension (/ (length (plist-get props :code-space)) 2))
248 (setq props (plist-put props :dimension dimension)))))
249
6d2b6635
KH
250 ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
251 (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
252 (if (integerp emacs-mule-id)
253 (aset emacs-mule-charset-table emacs-mule-id name)))
254
c1841772
KH
255 (dolist (slot attrs)
256 (setcdr slot (plist-get props (car slot))))
257
258 ;; Make sure that the value of :code-space is a vector of 8
259 ;; elements.
260 (let* ((slot (assq :code-space attrs))
261 (val (cdr slot))
262 (len (length val)))
263 (if (< len 8)
264 (setcdr slot
265 (vconcat val (make-vector (- 8 len) 0)))))
266
267 ;; Add :name and :docstring properties to PROPS.
268 (setq props
269 (cons :name (cons name (cons :docstring (cons docstring props)))))
270 (or (plist-get props :short-name)
271 (plist-put props :short-name (symbol-name name)))
272 (or (plist-get props :long-name)
273 (plist-put props :long-name (plist-get props :short-name)))
e1e529fa
DL
274 ;; We can probably get a worthwhile amount in purespace.
275 (setq props
276 (mapcar (lambda (elt)
277 (if (stringp elt)
278 (purecopy elt)
279 elt))
280 props))
c1841772
KH
281 (setcdr (assq :plist attrs) props)
282
283 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
284
285
4ed46869 286(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
287 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
288The file contents are decoded before evaluation if necessary.
4ed46869
KH
289If optional second arg NOERROR is non-nil,
290 report no error if FILE doesn't exist.
291Print messages at start and end of loading unless
292 optional third arg NOMESSAGE is non-nil.
293Return t if file exists."
294 (if (null (file-readable-p fullname))
295 (and (null noerror)
296 (signal 'file-error (list "Cannot open load file" file)))
297 ;; Read file with code conversion, and then eval.
298 (let* ((buffer
299 ;; To avoid any autoloading, set default-major-mode to
300 ;; fundamental-mode.
88162676
RS
301 ;; So that we don't get completely screwed if the
302 ;; file is encoded in some complicated character set,
303 ;; read it with real decoding, as a multibyte buffer,
304 ;; even if this is a --unibyte Emacs session.
305 (let ((default-major-mode 'fundamental-mode)
306 (default-enable-multibyte-characters t))
4ed46869
KH
307 ;; We can't use `generate-new-buffer' because files.el
308 ;; is not yet loaded.
309 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
310 (load-in-progress t)
311 (source (save-match-data (string-match "\\.el\\'" fullname))))
312 (unless nomessage
313 (if source
314 (message "Loading %s (source)..." file)
315 (message "Loading %s..." file)))
316 (when purify-flag
4c86cca0 317 (push file preloaded-file-list))
4ed46869 318 (unwind-protect
a6acd8a2 319 (let ((load-file-name fullname)
1c4cc63a 320 (set-auto-coding-for-load t)
a6acd8a2 321 (inhibit-file-name-operation nil))
4ed46869
KH
322 (save-excursion
323 (set-buffer buffer)
324 (insert-file-contents fullname)
7d276780
EZ
325 ;; If the loaded file was inserted with no-conversion or
326 ;; raw-text coding system, make the buffer unibyte.
327 ;; Otherwise, eval-buffer might try to interpret random
328 ;; binary junk as multibyte characters.
329 (if (and enable-multibyte-characters
8f924df7
KH
330 (or (eq (coding-system-type last-coding-system-used)
331 'raw-text)))
7d276780 332 (set-buffer-multibyte nil))
4ed46869
KH
333 ;; Make `kill-buffer' quiet.
334 (set-buffer-modified-p nil))
0f69cb38 335 ;; Have the original buffer current while we eval.
88162676
RS
336 (eval-buffer buffer nil file
337 ;; If this Emacs is running with --unibyte,
338 ;; convert multibyte strings to unibyte
339 ;; after reading them.
ba74e833 340;; (not default-enable-multibyte-characters)
8dd08b5b 341 nil t
ba74e833 342 ))
cfc70cdf
RS
343 (let (kill-buffer-hook kill-buffer-query-functions)
344 (kill-buffer buffer)))
4ed46869 345 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
346 (when hook
347 (mapcar (function eval) (cdr hook))))
348 (unless (or nomessage noninteractive)
349 (if source
350 (message "Loading %s (source)...done" file)
351 (message "Loading %s...done" file)))
4ed46869
KH
352 t)))
353
8f924df7 354(defun charset-info (charset)
4ed46869 355 "Return a vector of information of CHARSET.
8f924df7 356This function is provided for backward compatibility.
4ed46869 357
4ed46869
KH
358The elements of the vector are:
359 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
360 LEADING-CODE-BASE, LEADING-CODE-EXT,
361 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
362 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
8f924df7 363 PLIST.
4ed46869 364where
8f924df7
KH
365CHARSET-ID is always 0.
366BYTES is always 0.
367DIMENSION is the number of bytes of a code-point of the charset:
368 1, 2, 3, or 4.
369CHARS is the number of characters in a dimension:
370 94, 96, 128, or 256.
371WIDTH is always 0.
372DIRECTION is always 0.
373LEADING-CODE-BASE is always 0.
374LEADING-CODE-EXT is always 0.
4ed46869 375ISO-FINAL-CHAR (character) is the final character of the
7dd4c92d
KH
376 corresponding ISO 2022 charset. If the charset is not assigned
377 any final character, the value is -1.
8f924df7
KH
378ISO-GRAPHIC-PLANE is always 0.
379REVERSE-CHARSET is always -1.
4ed46869
KH
380SHORT-NAME (string) is the short name to refer to the charset.
381LONG-NAME (string) is the long name to refer to the charset
382DESCRIPTION (string) is the description string of the charset.
383PLIST (property list) may contain any type of information a user
384 want to put and get by functions `put-charset-property' and
385 `get-charset-property' respectively."
8f924df7
KH
386 (vector 0
387 0
388 (charset-dimension charset)
389 (charset-chars charset)
390 0
391 0
392 0
393 0
394 (charset-iso-final-char charset)
395 0
396 -1
397 (get-charset-property charset :short-name)
398 (get-charset-property charset :short-name)
399 (charset-description charset)
400 (charset-plist charset)))
4ed46869 401
40c81f74
PE
402;; It is better not to use backquote in this file,
403;; because that makes a bootstrapping problem
404;; if you need to recompile all the Lisp files using interpreted code.
405
8f924df7
KH
406(defun charset-id (charset)
407 "Always return 0. This is provided for backward compatibility."
408 0)
4ed46869
KH
409
410(defmacro charset-bytes (charset)
8f924df7
KH
411 "Always return 0. This is provided for backward compatibility."
412 0)
c1841772
KH
413
414(defun get-charset-property (charset propname)
415 "Return the value of CHARSET's PROPNAME property.
416This is the last value stored with
417 (put-charset-property CHARSET PROPNAME VALUE)."
418 (plist-get (charset-plist charset) propname))
419
420(defun put-charset-property (charset propname value)
1f32125f 421 "Set CHARSETS's PROPNAME property to value VALUE.
c1841772
KH
422It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
423 (set-charset-plist charset
424 (plist-put (charset-plist charset) propname value)))
425
c1841772
KH
426(defun charset-description (charset)
427 "Return description string of CHARSET."
428 (plist-get (charset-plist charset) :docstring))
429
430(defun charset-dimension (charset)
12504f57 431 "Return dimension of CHARSET."
c1841772
KH
432 (plist-get (charset-plist charset) :dimension))
433
346a8d64 434(defun charset-chars (charset &optional dimension)
12504f57 435 "Return number of characters contained in DIMENSION of CHARSET.
346a8d64
DL
436DIMENSION defaults to the first dimension."
437 (unless dimension (setq dimension 1))
103cc921 438 (let ((code-space (plist-get (charset-plist charset) :code-space)))
346a8d64
DL
439 (1+ (- (aref code-space (1- (* 2 dimension)))
440 (aref code-space (- (* 2 dimension) 2))))))
c1841772
KH
441
442(defun charset-iso-final-char (charset)
1d839a14
DL
443 "Return ISO-2022 final character of CHARSET.
444Return -1 if charset isn't an ISO 2022 one."
c1841772
KH
445 (or (plist-get (charset-plist charset) :iso-final-char)
446 -1))
4ed46869
KH
447
448(defmacro charset-short-name (charset)
c1841772
KH
449 "Return short name of CHARSET."
450 (plist-get (charset-plist charset) :short-name))
4ed46869
KH
451
452(defmacro charset-long-name (charset)
c1841772
KH
453 "Return long name of CHARSET."
454 (plist-get (charset-plist charset) :long-name))
4ed46869 455
d3675a42 456(defun charset-list ()
12504f57 457 "Return list of all charsets ever defined.
900dc6e3 458
d3675a42 459This function is provided for backward compatibility.
900dc6e3 460Now we have the variable `charset-list'."
d3675a42 461 charset-list)
07513d64 462(make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
d3675a42 463
6d2b6635
KH
464\f
465;;; CHARACTER
466(defalias 'char-valid-p 'characterp)
467(make-obsolete 'char-valid-p 'characterp "22.1")
468
c1841772 469(defun generic-char-p (char)
8f924df7 470 "Always return nil. This is provided for backward compatibility."
c1841772 471 nil)
07513d64 472(make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
6d2b6635
KH
473
474(defun make-char-internal (charset-id &optional code1 code2)
475 (let ((charset (aref emacs-mule-charset-table charset-id)))
476 (or charset
477 (error "Invalid Emacs-mule charset ID: %d" charset-id))
478 (make-char charset code1 code2)))
0269ddfb 479\f
e76938e7 480;; Coding system stuff
4ed46869 481
c1841772
KH
482;; Coding system is a symbol that has been defined by the function
483;; `define-coding-system'.
4ed46869 484
c1841772
KH
485(defconst coding-system-iso-2022-flags
486 '(long-form
487 ascii-at-eol
488 ascii-at-cntl
489 7-bit
490 locking-shift
491 single-shift
492 designation
493 revision
494 direction
495 init-at-bol
496 designate-at-bol
497 safe
498 latin-extra
499 composition
3ed58a15
KH
500 euc-tw-shift
501 use-roman
502 use-oldjis)
c1841772 503 "List of symbols that control ISO-2022 encoder/decoder.
4ed46869 504
12504f57 505The value of the `:flags' attribute in the argument of the function
caa7db3a 506`define-coding-system' must be one of them.
4ed46869 507
c1841772
KH
508If `long-form' is specified, use a long designation sequence on
509encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
510and `japanese-jisx0208'. The long designation sequence doesn't
12504f57 511conform to ISO 2022, but is used by such coding systems as
c1841772
KH
512`compound-text'.
513
514If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
515on encoding.
516
517If `ascii-at-cntl' is specified, designate ASCII to g0 before control
518codes and SPC on encoding.
519
520If `7-bit' is specified, use 7-bit code only on encoding.
521
522If `locking-shift' is specified, decode locking-shift code correctly
523on decoding, and use locking-shift to invoke a graphic element on
524encoding.
525
526If `single-shift' is specified, decode single-shift code correctly on
527decoding, and use single-shift to invoke a graphic element on encoding.
528
529If `designation' is specified, decode designation code correctly on
530decoding, and use designation to designate a charset to a graphic
531element on encoding.
532
533If `revision' is specified, produce an escape sequence to specify
534revision number of a charset on encoding. Such an escape sequence is
535always correctly decoded on decoding.
536
537If `direction' is specified, decode ISO6429's code for specifying
12504f57 538direction correctly, and produce the code on encoding.
c1841772
KH
539
540If `init-at-bol' is specified, on encoding, it is assumed that
541invocation and designation statuses are reset at each beginning of
12504f57 542line even if `ascii-at-eol' is not specified; thus no codes for
c1841772
KH
543resetting them are produced.
544
545If `safe' is specified, on encoding, characters not supported by a
546coding are replaced with `?'.
547
12504f57 548If `latin-extra' is specified, the code-detection routine assumes that a
c1841772
KH
549code specified in `latin-extra-code-table' (which see) is valid.
550
551If `composition' is specified, an escape sequence to specify
12504f57 552composition sequence is correctly decoded on decoding, and is produced
c1841772
KH
553on encoding.
554
555If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
12504f57 556correctly decoded on decoding, and is produced on encoding.
c1841772 557
12504f57
DL
558If `use-roman' is specified, JIS0201-1976-Roman is designated instead
559of ASCII.
560
561If `use-oldjis' is specified, JIS0208-1976 is designated instead of
562JIS0208-1983.")
563
c1841772 564(defun define-coding-system (name docstring &rest props)
12504f57 565 "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
c1841772
KH
566The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
567may be any symbol.
568
12504f57
DL
569The following attributes have special meanings. Those labeled as
570\"(required)\", should not be omitted.
c1841772
KH
571
572`:mnemonic' (required)
573
574VALUE is a character to display on mode line for the coding system.
575
576`:coding-type' (required)
577
578VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
1bfd603c 579`emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
c1841772 580
12504f57 581`:eol-type'
c1841772 582
12504f57 583VALUE is the EOL (end-of-line) format of the coding system. It must be
c1841772
KH
584one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
585\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
586and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
12504f57 587decoding by the coding system, Emacs automatically detects the EOL
c1841772
KH
588format of the source text.
589
736345cb 590`:charset-list'
c1841772
KH
591
592VALUE must be a list of charsets supported by the coding system. On
593encoding by the coding system, if a character belongs to multiple
594charsets in the list, a charset that comes earlier in the list is
736345cb
KH
595selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
596which indicates that the coding system supports all ISO-2022 based
597charsets. If `:coding-type' is `emacs-mule', VALUE may be
598`emacs-mule', which indicates that the coding system supports all
1d839a14 599charsets that have the `:emacs-mule-id' property.
c1841772 600
12504f57 601`:ascii-compatible-p'
c1841772
KH
602
603If VALUE is non-nil, the coding system decodes all 7-bit bytes into
07513d64 604the corresponding ASCII characters, and encodes all ASCII characters
12504f57 605back to the corresponding 7-bit bytes. VALUE defaults to nil.
c1841772 606
12504f57 607`:decode-translation-table'
c1841772
KH
608
609VALUE must be a translation table to use on decoding.
610
12504f57 611`:encode-translation-table'
c1841772
KH
612
613VALUE must be a translation table to use on encoding.
614
12504f57 615`:post-read-conversion'
c1841772
KH
616
617VALUE must be a function to call after some text is inserted and
618decoded by the coding system itself and before any functions in
619`after-insert-functions' are called. The arguments to this function
12504f57
DL
620are the same as those of a function in `after-insert-file-functions',
621i.e. LENGTH of the text to be decoded with point at the head of it,
622and the function should leave point unchanged.
c1841772
KH
623
624`:pre-write-conversion'
625
626VALUE must be a function to call after all functions in
627`write-region-annotate-functions' and `buffer-file-format' are called,
628and before the text is encoded by the coding system itself. The
12504f57
DL
629arguments to this function are the same as those of a function in
630`write-region-annotate-functions'.
c1841772
KH
631
632`:default-char'
633
634VALUE must be a character. On encoding, a character not supported by
635the coding system is replaced with VALUE.
636
8f924df7
KH
637`:for-unibyte'
638
639VALUE non-nil means that visiting a file with the coding system
640results in a unibyte buffer.
641
c1841772
KH
642`:eol-type'
643
644VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
645EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
12504f57
DL
646EOL (CR). If omitted, on decoding, the coding system detects EOL
647format automatically, and on encoding, uses Unix-like EOL.
c1841772
KH
648
649`:mime-charset'
650
12504f57
DL
651VALUE must be a symbol whose name is that of a MIME charset converted
652to lower case.
c1841772 653
1bfd603c
DL
654`:mime-text-unsuitable'
655
656VALUE non-nil means the `:mime-charset' property names a charset which
1894d108 657is unsuitable for the top-level media type \"text\".
1bfd603c 658
c1841772
KH
659`:flags'
660
12504f57
DL
661VALUE must be a list of symbols that control the ISO-2022 converter.
662Each must be a member of the list `coding-system-iso-2022-flags'
c1841772
KH
663\(which see). This attribute has a meaning only when `:coding-type'
664is `iso-2022'.
665
666`:designation'
667
12504f57 668VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
c1841772
KH
669GN-USAGE specifies the usage of graphic register GN as follows.
670
671If it is nil, no charset can be designated to GN.
672
07513d64 673If it is a charset, the charset is initially designated to GN, and
c1841772
KH
674never used by the other charsets.
675
676If it is a list, the elements must be charsets, nil, 94, or 96. GN
12504f57
DL
677can be used by all the listed charsets. If the list contains 94, any
678iso-2022 charset whose code-space ranges are 94 long can be designated
679to GN. If the list contains 96, any charsets whose whose ranges are
68096 long can be designated to GN. If the first element is a charset,
681that charset is initially designated to GN.
c1841772
KH
682
683This attribute has a meaning only when `:coding-type' is `iso-2022'.
684
685`:bom'
686
12504f57
DL
687This attributes specifies whether the coding system uses a `byte order
688mark'. VALUE must nil, t, or cons of coding systems whose
689`:coding-type' is `utf-16'.
c1841772 690
0ea1a6ca
KH
691If the value is nil, on decoding, don't treat the first two-byte as
692BOM, and on encoding, don't produce BOM bytes.
693
694If the value is t, on decoding, skip the first two-byte as BOM, and on
695encoding, produce BOM bytes accoding to the value of `:endian'.
696
697If the value is cons, on decoding, check the first two-byte. If theyq
698are 0xFE 0xFF, use the car part coding system of the value. If they
699are 0xFF 0xFE, use the car part coding system of the value.
700Otherwise, treat them as bytes for a normal character. On encoding,
701produce BOM bytes accoding to the value of `:endian'.
702
c1841772
KH
703This attribute has a meaning only when `:coding-type' is `utf-16'.
704
705`:endian'
706
0ea1a6ca
KH
707VALUE must be `big' or `little' specifying big-endian and
708little-endian respectively. The default value is `big'.
c1841772
KH
709
710This attribute has a meaning only when `:coding-type' is `utf-16'.
711
712`:ccl-decoder'
713
12504f57
DL
714VALUE is a symbol representing the registered CCL program used for
715decoding. This attribute has a meaning only when `:coding-type' is
716`ccl'.
c1841772
KH
717
718`:ccl-encoder'
719
12504f57
DL
720VALUE is a symbol representing the registered CCL program used for
721encoding. This attribute has a meaning only when `:coding-type' is
722`ccl'."
c1841772
KH
723 (let* ((common-attrs (mapcar 'list
724 '(:mnemonic
725 :coding-type
726 :charset-list
727 :ascii-compatible-p
1a9db556 728 :decode-translation-table
c1841772
KH
729 :encode-translation-table
730 :post-read-conversion
731 :pre-write-conversion
732 :default-char
7e742024 733 :for-unibyte
c1841772
KH
734 :plist
735 :eol-type)))
736 (coding-type (plist-get props :coding-type))
737 (spec-attrs (mapcar 'list
738 (cond ((eq coding-type 'iso-2022)
739 '(:initial
740 :reg-usage
741 :request
742 :flags))
743 ((eq coding-type 'utf-16)
744 '(:bom
745 :endian))
746 ((eq coding-type 'ccl)
747 '(:ccl-decoder
748 :ccl-encoder
749 :valids))))))
750
751 (dolist (slot common-attrs)
752 (setcdr slot (plist-get props (car slot))))
753
754 (dolist (slot spec-attrs)
755 (setcdr slot (plist-get props (car slot))))
756
757 (if (eq coding-type 'iso-2022)
758 (let ((designation (plist-get props :designation))
759 (flags (plist-get props :flags))
760 (initial (make-vector 4 nil))
761 (reg-usage (cons 4 4))
762 request elt)
763 (dotimes (i 4)
764 (setq elt (aref designation i))
765 (cond ((charsetp elt)
766 (aset initial i elt)
767 (setq request (cons (cons elt i) request)))
768 ((consp elt)
769 (aset initial i (car elt))
770 (if (charsetp (car elt))
771 (setq request (cons (cons (car elt) i) request)))
772 (dolist (e (cdr elt))
773 (cond ((charsetp e)
774 (setq request (cons (cons e i) request)))
775 ((eq e 94)
776 (setcar reg-usage i))
777 ((eq e 96)
778 (setcdr reg-usage i))
779 ((eq e t)
780 (setcar reg-usage i)
781 (setcdr reg-usage i)))))))
782 (setcdr (assq :initial spec-attrs) initial)
783 (setcdr (assq :reg-usage spec-attrs) reg-usage)
784 (setcdr (assq :request spec-attrs) request)
785
786 ;; Change :flags value from a list to a bit-mask.
787 (let ((bits 0)
788 (i 0))
789 (dolist (elt coding-system-iso-2022-flags)
790 (if (memq elt flags)
791 (setq bits (logior bits (lsh 1 i))))
792 (setq i (1+ i)))
793 (setcdr (assq :flags spec-attrs) bits))))
794
795 ;; Add :name and :docstring properties to PROPS.
796 (setq props
e1e529fa
DL
797 (cons :name (cons name (cons :docstring (cons (purecopy docstring)
798 props)))))
c1841772 799 (setcdr (assq :plist common-attrs) props)
c1841772
KH
800 (apply 'define-coding-system-internal
801 name (mapcar 'cdr (append common-attrs spec-attrs)))))
4ed46869 802
8057896b 803(defun coding-system-doc-string (coding-system)
0269ddfb 804 "Return the documentation string for CODING-SYSTEM."
c1841772 805 (plist-get (coding-system-plist coding-system) :docstring))
4ed46869 806
4ed46869 807(defun coding-system-mnemonic (coding-system)
0269ddfb 808 "Return the mnemonic character of CODING-SYSTEM.
12504f57
DL
809The mnemonic character of a coding system is used in mode line to
810indicate the coding system. If CODING-SYSTEM. is nil, return ?=."
c1841772 811 (plist-get (coding-system-plist coding-system) :mnemonic))
4ed46869 812
c1841772
KH
813(defun coding-system-type (coding-system)
814 "Return the coding type of CODING-SYSTEM.
815A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
816See the function `define-coding-system' for more detail."
817 (plist-get (coding-system-plist coding-system) :coding-type))
d3675a42 818
c1841772 819(defun coding-system-charset-list (coding-system)
07513d64 820 "Return list of charsets supported by CODING-SYSTEM.
c1841772
KH
821If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
822If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
823 (plist-get (coding-system-plist coding-system) :charset-list))
0269ddfb 824
2f1e746b
KH
825(defun coding-system-category (coding-system)
826 "Return a category symbol of CODING-SYSTEM."
827 (plist-get (coding-system-plist coding-system) :category))
0269ddfb
KH
828
829(defun coding-system-get (coding-system prop)
07513d64
DL
830 "Extract a value from CODING-SYSTEM's property list for property PROP.
831For compatibility with Emacs 20/21, this accepts old-style symbols
832like `mime-charset' as well as the current style like `:mime-charset'."
833 (or (plist-get (coding-system-plist coding-system) prop)
834 (if (not (keywordp prop))
835 (plist-get (coding-system-plist coding-system)
836 (intern (concat ":" (symbol-name prop)))))))
0269ddfb
KH
837
838(defun coding-system-put (coding-system prop val)
839 "Change value in CODING-SYSTEM's property list PROP to VAL."
c1841772 840 (plist-put (coding-system-plist coding-system) prop val))
0269ddfb
KH
841
842(defalias 'coding-system-parent 'coding-system-base)
2598a293 843(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb 844
2e729bfa
JB
845(defun coding-system-eol-type-mnemonic (coding-system)
846 "Return the string indicating end-of-line format of CODING-SYSTEM."
847 (let* ((eol-type (coding-system-eol-type coding-system))
f4f00827 848 (val (cond ((eq eol-type 0) eol-mnemonic-unix)
2e729bfa
JB
849 ((eq eol-type 1) eol-mnemonic-dos)
850 ((eq eol-type 2) eol-mnemonic-mac)
f4f00827 851 (t eol-mnemonic-undecided))))
2e729bfa
JB
852 (if (stringp val)
853 val
854 (char-to-string val))))
855
857ea15c
AS
856(defun coding-system-lessp (x y)
857 (cond ((eq x 'no-conversion) t)
858 ((eq y 'no-conversion) nil)
859 ((eq x 'emacs-mule) t)
860 ((eq y 'emacs-mule) nil)
861 ((eq x 'undecided) t)
862 ((eq y 'undecided) nil)
863 (t (let ((c1 (coding-system-mnemonic x))
864 (c2 (coding-system-mnemonic y)))
865 (or (< (downcase c1) (downcase c2))
866 (and (not (> (downcase c1) (downcase c2)))
867 (< c1 c2)))))))
868
857ea15c 869(defun add-to-coding-system-list (coding-system)
521d4010 870 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
857ea15c
AS
871 (if (or (null coding-system-list)
872 (coding-system-lessp coding-system (car coding-system-list)))
873 (setq coding-system-list (cons coding-system coding-system-list))
874 (let ((len (length coding-system-list))
875 mid (tem coding-system-list))
876 (while (> len 1)
877 (setq mid (nthcdr (/ len 2) tem))
878 (if (coding-system-lessp (car mid) coding-system)
879 (setq tem mid
880 len (- len (/ len 2)))
881 (setq len (/ len 2))))
882 (setcdr tem (cons coding-system (cdr tem))))))
883
80a7463d 884(defun coding-system-list (&optional base-only)
c11a8f77 885 "Return a list of all existing non-subsidiary coding systems.
12504f57
DL
886If optional arg BASE-ONLY is non-nil, only base coding systems are
887listed. The value doesn't include subsidiary coding systems which are
c11a8f77
KH
888made from bases and aliases automatically for various end-of-line
889formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
890 (let* ((codings (copy-sequence coding-system-list))
891 (tail (cons nil codings)))
892 ;; Remove subsidiary coding systems (eol variants) and alias
893 ;; coding systems (if necessary).
894 (while (cdr tail)
895 (let* ((coding (car (cdr tail)))
687441de 896 (aliases (coding-system-aliases coding)))
80a7463d
KH
897 (if (or
898 ;; CODING is an eol variant if not in ALIASES.
899 (not (memq coding aliases))
900 ;; CODING is an alias if it is not car of ALIASES.
901 (and base-only (not (eq coding (car aliases)))))
902 (setcdr tail (cdr (cdr tail)))
903 (setq tail (cdr tail)))))
904 codings))
905
620956ca
KH
906(defconst char-coding-system-table nil
907 "This is an obsolete variable.
908It exists just for backward compatibility, and the value is always nil.")
c11a8f77 909
50c29104
KH
910(defun transform-make-coding-system-args (name type &optional doc-string props)
911 "For internal use only.
912Transform XEmacs style args for `make-coding-system' to Emacs style.
913Value is a list of transformed arguments."
914 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
915 (eol-type (plist-get props 'eol-type))
916 properties tmp)
917 (cond
918 ((eq eol-type 'lf) (setq eol-type 'unix))
919 ((eq eol-type 'crlf) (setq eol-type 'dos))
920 ((eq eol-type 'cr) (setq eol-type 'mac)))
921 (if (setq tmp (plist-get props 'post-read-conversion))
922 (setq properties (plist-put properties 'post-read-conversion tmp)))
923 (if (setq tmp (plist-get props 'pre-write-conversion))
924 (setq properties (plist-put properties 'pre-write-conversion tmp)))
925 (cond
f4a012a6
KH
926 ((eq type 'shift-jis)
927 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
928 ((eq type 'iso2022) ; This is not perfect.
929 (if (plist-get props 'escape-quoted)
930 (error "escape-quoted is not supported: %S"
931 `(,name ,type ,doc-string ,props)))
932 (let ((g0 (plist-get props 'charset-g0))
933 (g1 (plist-get props 'charset-g1))
934 (g2 (plist-get props 'charset-g2))
935 (g3 (plist-get props 'charset-g3))
936 (use-roman
937 (and
938 (eq (cadr (assoc 'latin-jisx0201
939 (plist-get props 'input-charset-conversion)))
940 'ascii)
941 (eq (cadr (assoc 'ascii
942 (plist-get props 'output-charset-conversion)))
943 'latin-jisx0201)))
944 (use-oldjis
945 (and
946 (eq (cadr (assoc 'japanese-jisx0208-1978
947 (plist-get props 'input-charset-conversion)))
948 'japanese-jisx0208)
949 (eq (cadr (assoc 'japanese-jisx0208
950 (plist-get props 'output-charset-conversion)))
951 'japanese-jisx0208-1978))))
952 (if (charsetp g0)
953 (if (plist-get props 'force-g0-on-output)
954 (setq g0 `(nil ,g0))
955 (setq g0 `(,g0 t))))
956 (if (charsetp g1)
957 (if (plist-get props 'force-g1-on-output)
958 (setq g1 `(nil ,g1))
959 (setq g1 `(,g1 t))))
960 (if (charsetp g2)
961 (if (plist-get props 'force-g2-on-output)
962 (setq g2 `(nil ,g2))
963 (setq g2 `(,g2 t))))
964 (if (charsetp g3)
965 (if (plist-get props 'force-g3-on-output)
966 (setq g3 `(nil ,g3))
967 (setq g3 `(,g3 t))))
968 `(,name 2 ,mnemonic ,doc-string
969 (,g0 ,g1 ,g2 ,g3
970 ,(plist-get props 'short)
971 ,(not (plist-get props 'no-ascii-eol))
972 ,(not (plist-get props 'no-ascii-cntl))
973 ,(plist-get props 'seven)
974 t
975 ,(not (plist-get props 'lock-shift))
976 ,use-roman
977 ,use-oldjis
978 ,(plist-get props 'no-iso6429)
979 nil nil nil nil)
980 ,properties ,eol-type)))
981 ((eq type 'big5)
982 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
50c29104 983 ((eq type 'ccl)
f4a012a6 984 `(,name 4 ,mnemonic ,doc-string
50c29104 985 (,(plist-get props 'decode) . ,(plist-get props 'encode))
f4a012a6 986 ,properties ,eol-type))
50c29104 987 (t
f4a012a6 988 (error "unsupported XEmacs style make-coding-style arguments: %S"
50c29104
KH
989 `(,name ,type ,doc-string ,props))))))
990
8057896b 991(defun make-coding-system (coding-system type mnemonic doc-string
1b46a680
KH
992 &optional
993 flags
994 properties
995 eol-type)
3bb1accb 996 "Define a new coding system CODING-SYSTEM (symbol).
8f924df7
KH
997This function is provided for backward compatibility.
998Use `define-coding-system' instead."
50c29104 999 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
c3d0ee51
EZ
1000 ;; is a symbol, perhaps, this function is called with XEmacs-style
1001 ;; arguments. Here, try to transform that kind of arguments to
50c29104
KH
1002 ;; Emacs style.
1003 (if (symbolp type)
1004 (let ((args (transform-make-coding-system-args coding-system type
1005 mnemonic doc-string)))
1006 (setq coding-system (car args)
1053cc93 1007 type (nth 1 args)
50c29104
KH
1008 mnemonic (nth 2 args)
1009 doc-string (nth 3 args)
1010 flags (nth 4 args)
1011 properties (nth 5 args)
1012 eol-type (nth 6 args))))
1013
8f924df7
KH
1014 (setq type
1015 (cond ((eq type 0) 'emacs-mule)
1016 ((eq type 1) 'shift-jis)
1017 ((eq type 2) 'iso2022)
1018 ((eq type 3) 'big5)
1019 ((eq type 4) 'ccl)
1020 ((eq type 5) 'raw-text)
1b46a680 1021 (t
8f924df7
KH
1022 (error "Invalid coding system type: %s" type))))
1023
1024 (setq properties
1025 (let ((plist nil) key)
1026 (dolist (elt properties)
1027 (setq key (car elt))
1028 (cond ((eq key 'post-read-conversion)
1029 (setq key :post-read-conversion))
1030 ((eq key 'pre-write-conversion)
1031 (setq key :pre-write-conversion))
1032 ((eq key 'translation-table-for-decode)
1033 (setq key :decode-translation-table))
1034 ((eq key 'translation-table-for-encode)
1035 (setq key :encode-translation-table))
1036 ((eq key 'safe-charsets)
1037 (setq key :charset-list))
1038 ((eq key 'mime-charset)
1039 (setq key :mime-charset))
1040 ((eq key 'valid-codes)
1041 (setq key :valids)))
1042 (setq plist (plist-put plist key (cdr elt))))
1043 plist))
28380f17 1044 (setq properties (plist-put properties :mnemonic mnemonic))
8f924df7
KH
1045 (plist-put properties :coding-type type)
1046 (cond ((eq eol-type 0) (setq eol-type 'unix))
1047 ((eq eol-type 1) (setq eol-type 'dos))
1048 ((eq eol-type 2) (setq eol-type 'mac))
1049 ((vectorp eol-type) (setq eol-type nil)))
1050 (plist-put properties :eol-type eol-type)
1051
1052 (cond
1053 ((eq type 'iso2022)
1054 (plist-put properties :flags
1055 (list (and (or (consp (nth 0 flags))
1056 (consp (nth 1 flags))
1057 (consp (nth 2 flags))
1058 (consp (nth 3 flags))) 'designation)
1059 (or (nth 4 flags) 'long-form)
1060 (and (nth 5 flags) 'ascii-at-eol)
1061 (and (nth 6 flags) 'ascii-at-cntl)
1062 (and (nth 7 flags) '7-bit)
1063 (and (nth 8 flags) 'locking-shift)
1064 (and (nth 9 flags) 'single-shift)
1065 (and (nth 10 flags) 'use-roman)
1066 (and (nth 11 flags) 'use-oldjis)
1067 (or (nth 12 flags) 'direction)
1068 (and (nth 13 flags) 'init-at-bol)
1069 (and (nth 14 flags) 'designate-at-bol)
1070 (and (nth 15 flags) 'safe)
1071 (and (nth 16 flags) 'latin-extra)))
1072 (plist-put properties :designation
1073 (let ((vec (make-vector 4 nil)))
1074 (dotimes (i 4)
1075 (let ((spec (nth i flags)))
1076 (if (eq spec t)
1077 (aset vec i '(94 96))
1078 (if (consp spec)
1079 (progn
1080 (if (memq t spec)
1081 (setq spec (append (delq t spec) '(94 96))))
1082 (aset vec i spec))))))
1083 vec)))
1084
1085 ((eq type 'ccl)
1086 (plist-put properties :ccl-decoder (car flags))
1087 (plist-put properties :ccl-encoder (cdr flags))))
1088
1089 (apply 'define-coding-system coding-system doc-string properties))
4ed46869 1090
bbdea948
RS
1091(defun merge-coding-systems (first second)
1092 "Fill in any unspecified aspects of coding system FIRST from SECOND.
1093Return the resulting coding system."
1094 (let ((base (coding-system-base second))
1095 (eol (coding-system-eol-type second)))
1096 ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1097 (if (eq (coding-system-base first) 'undecided)
1098 (setq first (coding-system-change-text-conversion first base)))
1099 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1100 (if (and (vectorp (coding-system-eol-type first))
1101 (numberp eol) (>= eol 0) (<= eol 2))
1102 (setq first (coding-system-change-eol-conversion
1103 first eol)))
1104 first))
1105
4ed46869 1106(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
1107 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1108This means that when you save the buffer, it will be converted
1109according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1110use \\[list-coding-systems].
1111
bbdea948
RS
1112If CODING-SYSTEM leaves the text conversion unspecified, or if it
1113leaves the end-of-line conversion unspecified, FORCE controls what to
1114do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1115buffer's previous `buffer-file-coding-system' value (if it is
463f5630 1116specified there). Otherwise, levae it unspecified.
aeef8f07
KH
1117
1118This marks the buffer modified so that the succeeding \\[save-buffer]
1119surely saves the buffer with CODING-SYSTEM. From a program, if you
1120don't want to mark the buffer modified, just set the variable
1121`buffer-file-coding-system' directly."
bbdea948 1122 (interactive "zCoding system for saving file (default, nil): \nP")
4ed46869 1123 (check-coding-system coding-system)
36d455c4 1124 (if (and coding-system buffer-file-coding-system (null force))
bbdea948
RS
1125 (setq coding-system
1126 (merge-coding-systems coding-system buffer-file-coding-system)))
4ed46869 1127 (setq buffer-file-coding-system coding-system)
38a1356d
RS
1128 ;; This is in case of an explicit call. Normally, `normal-mode' and
1129 ;; `set-buffer-major-mode-hook' take care of setting the table.
1130 (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
1131 (ucs-set-table-for-input))
4ed46869
KH
1132 (set-buffer-modified-p t)
1133 (force-mode-line-update))
1134
bbdea948
RS
1135(defun revert-buffer-with-coding-system (coding-system &optional force)
1136 "Visit the current buffer's file again using coding system CODING-SYSTEM.
1137For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1138
1139If CODING-SYSTEM leaves the text conversion unspecified, or if it
1140leaves the end-of-line conversion unspecified, FORCE controls what to
1141do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1142buffer's previous `buffer-file-coding-system' value (if it is
1143specified there). Otherwise, determine it from the file contents as
1144usual for visiting a file."
1145 (interactive "zCoding system for visited file (default, nil): \nP")
1146 (check-coding-system coding-system)
1147 (if (and coding-system buffer-file-coding-system (null force))
1148 (setq coding-system
1149 (merge-coding-systems coding-system buffer-file-coding-system)))
1150 (let ((coding-system-for-read coding-system))
1151 (revert-buffer)))
1152
701414e3
KH
1153(defun set-file-name-coding-system (coding-system)
1154 "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1155It actually just set the variable `file-name-coding-system' (which
1156see) to CODING-SYSTEM."
1157 (interactive "zCoding system for file names (default, nil): ")
1158 (check-coding-system coding-system)
1159 (setq file-name-coding-system coding-system))
1160
358d28fb
RS
1161(defvar default-terminal-coding-system nil
1162 "Default value for the terminal coding system.
1163This is normally set according to the selected language environment.
1164See also the command `set-terminal-coding-system'.")
1165
df100398
KH
1166(defun set-terminal-coding-system (coding-system)
1167 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
1168All text output to the terminal will be encoded
1169with the specified coding system.
1170For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1171The default is determined by the selected language environment
1172or by the previous use of this command."
1173 (interactive
2e02a76f
RS
1174 (list (let ((default (if (and (not (terminal-coding-system))
1175 default-terminal-coding-system)
1176 default-terminal-coding-system)))
1177 (read-coding-system
1178 (format "Coding system for terminal display (default, %s): "
1179 default)
1180 default))))
358d28fb
RS
1181 (if (and (not coding-system)
1182 (not (terminal-coding-system)))
1183 (setq coding-system default-terminal-coding-system))
1184 (if coding-system
521d4010 1185 (setq default-terminal-coding-system coding-system))
df100398
KH
1186 (set-terminal-coding-system-internal coding-system)
1187 (redraw-frame (selected-frame)))
1188
358d28fb
RS
1189(defvar default-keyboard-coding-system nil
1190 "Default value of the keyboard coding system.
1191This is normally set according to the selected language environment.
1192See also the command `set-keyboard-coding-system'.")
1193
df100398 1194(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
1195 "Set coding system for keyboard input to CODING-SYSTEM.
1196In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
1197\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1198`encoded-kbd-mode'.)
358d28fb
RS
1199For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1200The default is determined by the selected language environment
1201or by the previous use of this command."
1202 (interactive
2e02a76f
RS
1203 (list (let ((default (if (and (not (keyboard-coding-system))
1204 default-keyboard-coding-system)
1205 default-keyboard-coding-system)))
1206 (read-coding-system
1207 (format "Coding system for keyboard input (default, %s): "
1208 default)
1209 default))))
358d28fb
RS
1210 (if (and (not coding-system)
1211 (not (keyboard-coding-system)))
1212 (setq coding-system default-keyboard-coding-system))
1213 (if coding-system
1214 (setq default-keyboard-coding-system coding-system))
df100398 1215 (set-keyboard-coding-system-internal coding-system)
b23bad0b 1216 (setq keyboard-coding-system coding-system)
df100398
KH
1217 (encoded-kbd-mode (if coding-system 1 0)))
1218
6d34f495
DL
1219(defcustom keyboard-coding-system nil
1220 "Specify coding system for keyboard input.
1221If you set this on a terminal which can't distinguish Meta keys from
12228-bit characters, you will have to use ESC to type Meta characters.
1223See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1224
237e5993
DL
1225On non-windowing terminals, this is set from the locale by default.
1226
6d34f495
DL
1227Setting this variable directly does not take effect;
1228use either M-x customize or \\[set-keyboard-coding-system]."
1229 :type '(coding-system :tag "Coding system")
1230 :link '(info-link "(emacs)Specify Coding")
1231 :link '(info-link "(emacs)Single-Byte Character Support")
1232 :set (lambda (symbol value)
1233 ;; Don't load encoded-kbd-mode unnecessarily.
1234 (if (or value (boundp 'encoded-kbd-mode))
1235 (set-keyboard-coding-system value)
1236 (set-default 'keyboard-coding-system nil))) ; must initialize
237e5993 1237 :version "21.4"
6d34f495
DL
1238 :group 'keyboard
1239 :group 'mule)
1240
df100398 1241(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 1242 "Set coding systems for the process associated with the current buffer.
df100398 1243DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
1244ENCODING is the coding system to be used to encode output to the process.
1245
1246For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 1247 (interactive
83911021 1248 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
1249 (let ((proc (get-buffer-process (current-buffer))))
1250 (if (null proc)
521d4010 1251 (error "No process")
df100398
KH
1252 (check-coding-system decoding)
1253 (check-coding-system encoding)
1254 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
1255 (force-mode-line-update))
1256
d0b99881
RS
1257(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1258
14915c37 1259(defun set-selection-coding-system (coding-system)
8c52d564 1260 "Make CODING-SYSTEM used for communicating with other X clients.
b25eef20
KH
1261When sending or receiving text via cut_buffer, selection, and clipboard,
1262the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 1263 (interactive "zCoding system for X selection: ")
b25eef20 1264 (check-coding-system coding-system)
14915c37 1265 (setq selection-coding-system coding-system))
b25eef20 1266
e8dd0160 1267;; Coding system lastly specified by the command
a03b3ce1
KH
1268;; set-next-selection-coding-system.
1269(defvar last-next-selection-coding-system nil)
1270
1271(defun set-next-selection-coding-system (coding-system)
12504f57 1272 "Use CODING-SYSTEM for next communication with other window system clients.
a03b3ce1
KH
1273This setting is effective for the next communication only."
1274 (interactive
1275 (list (read-coding-system
1276 (if last-next-selection-coding-system
12504f57 1277 (format "Coding system for the next selection (default, %S): "
a03b3ce1 1278 last-next-selection-coding-system)
12504f57 1279 "Coding system for the next selection: ")
a03b3ce1
KH
1280 last-next-selection-coding-system)))
1281 (if coding-system
1282 (setq last-next-selection-coding-system coding-system)
1283 (setq coding-system last-next-selection-coding-system))
1284 (check-coding-system coding-system)
1285
1286 (setq next-selection-coding-system coding-system))
1287
4ed46869 1288(defun set-coding-priority (arg)
521d4010 1289 "Set priority of coding categories according to ARG.
c1841772
KH
1290ARG is a list of coding categories ordered by priority.
1291
1292This function is provided for backward compatibility.
1293Now we have more convenient function `set-coding-system-priority'."
5d75f46f
KH
1294 (apply 'set-coding-system-priority
1295 (mapcar #'(lambda (x) (symbol-value x)) arg)))
07513d64 1296(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
4ed46869 1297
835cbadb
EZ
1298;;; X selections
1299
cc926903 1300(defvar ctext-non-standard-encodings-alist
835cbadb
EZ
1301 '(("ISO8859-15" . latin-iso8859-15)
1302 ("ISO8859-14" . latin-iso8859-14)
1303 ("KOI8-R" . koi8-r)
1304 ("BIG5-0" . big5))
cc926903
KH
1305 "Alist of non-standard encoding names vs Emacs coding systems.
1306This alist is used to decode an extened segment of a compound text.")
1307
1308(defvar ctext-non-standard-encodings-regexp
1309 (string-to-multibyte
1310 (concat
1311 ;; For non-standard encodings.
1312 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1313 "\\|"
1314 ;; For UTF-8 encoding.
1315 "\\(\e%G[^\e]*\e%@\\)")))
835cbadb
EZ
1316
1317;; Functions to support "Non-Standard Character Set Encodings" defined
5c88a01e 1318;; by the COMPOUND-TEXT spec.
cc926903
KH
1319;; We support that by decoding the whole data by `ctext' which just
1320;; pertains byte sequences belonging to ``extended segment'', then
1321;; decoding those byte sequences one by one in Lisp.
5c88a01e
KH
1322;; This function also supports "The UTF-8 encoding" described in the
1323;; section 7 of the documentation fo COMPOUND-TEXT distributed with
1324;; XFree86.
1325
835cbadb
EZ
1326(defun ctext-post-read-conversion (len)
1327 "Decode LEN characters encoded as Compound Text with Extended Segments."
1894d108
KH
1328 ;; We don't need the following because it is expected that this
1329 ;; function is mainly used for decoding X selection which is not
1330 ;; that big data.
1331 ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
835cbadb 1332 (save-match-data
cc926903 1333 (save-restriction
1894d108 1334 (narrow-to-region (point) (+ (point) len))
cc926903 1335 (let ((case-fold-search nil)
cc926903
KH
1336 last-coding-system-used
1337 pos bytes)
cc926903 1338 (decode-coding-region (point-min) (point-max) 'ctext)
cc926903
KH
1339 (while (re-search-forward ctext-non-standard-encodings-regexp
1340 nil 'move)
1341 (setq pos (match-beginning 0))
1342 (if (match-beginning 1)
1343 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1344 (let* ((M (char-after (+ pos 4)))
1345 (L (char-after (+ pos 5)))
1346 (encoding (match-string 2))
1347 (coding (or (cdr (assoc-ignore-case
1348 encoding
1349 ctext-non-standard-encodings-alist))
1350 (coding-system-p
1351 (intern (downcase encoding))))))
1894d108
KH
1352 (if enable-multibyte-characters
1353 (setq M (multibyte-char-to-unibyte M)
1354 L (multibyte-char-to-unibyte L)))
cc926903
KH
1355 (setq bytes (- (+ (* (- M 128) 128) (- L 128))
1356 (- (point) (+ pos 6))))
1357 (when coding
1358 (delete-region pos (point))
1359 (forward-char bytes)
1360 (decode-coding-region (- (point) bytes) (point) coding)))
1361 ;; ESC % G --UTF-8-BYTES-- ESC % @
1362 (setq bytes (- (point) pos))
1363 (decode-coding-region (- (point) bytes) (point) 'utf-8))))
1364 (goto-char (point-min))
1365 (- (point-max) (point)))))
835cbadb 1366
c74fe809
EZ
1367;; If you add charsets here, be sure to modify the regexp used by
1368;; ctext-pre-write-conversion to look up non-standard charsets.
cc926903 1369(defvar ctext-non-standard-designations-alist
835cbadb
EZ
1370 '(("$(0" . (big5 "big5-0" 2))
1371 ("$(1" . (big5 "big5-0" 2))
eb747e41
DL
1372 ;; The following are actually standard; generating extended
1373 ;; segments for them is wrong and screws e.g. Latin-9 users.
1374 ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
1375;; ("-V" . (t "iso8859-10" 1))
1376;; ("-Y" . (t "iso8859-13" 1))
1377;; ("-_" . (t "iso8859-14" 1))
1378;; ("-b" . (t "iso8859-15" 1))
1379;; ("-f" . (t "iso8859-16" 1))
1380 )
835cbadb 1381 "Alist of ctext control sequences that introduce character sets which
eb747e41 1382are not in the list of approved encodings, and the corresponding
835cbadb
EZ
1383coding system, identifier string, and number of octets per encoded
1384character.
1385
1386Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
1387is the control sequence (sans the leading ESC) that introduces the character
1388set in the text encoded by compound-text. ENCODING is a coding system
1389symbol; if it is t, it means that the ctext coding system already encodes
1390the text correctly, and only the leading control sequence needs to be altered.
1391If ENCODING is a coding system, we need to re-encode the text with that
eb747e41 1392coding system. CHARSET is the name of the charset we need to put into
835cbadb
EZ
1393the leading control sequence. NOCTETS is the number of octets (bytes) that
1394encode each character in this charset. NOCTETS can be 0 (meaning the number
1395of octets per character is variable), 1, 2, 3, or 4.")
1396
1397(defun ctext-pre-write-conversion (from to)
5dde3c71
EZ
1398 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1399
1400If FROM is a string, or if the current buffer is not the one set up for us
cc926903 1401by encode-coding-string, generate a new temp buffer, insert the
5dde3c71 1402text, and convert it in the temporary buffer. Otherwise, convert in-place."
835cbadb 1403 (save-match-data
cc926903 1404 ;; Setup a working buffer if necessary.
f1beb0e0
KH
1405 (when (stringp from)
1406 (set-buffer (generate-new-buffer " *temp"))
1407 (set-buffer-multibyte (multibyte-string-p from))
1408 (insert from))
cc926903
KH
1409
1410 ;; Now we can encode the whole buffer.
1411 (let ((case-fold-search nil)
1412 last-coding-system-used
1413 pos posend desig encode-info encoding chset noctets textlen)
1414 (goto-char (point-min))
1415 ;; At first encode the whole buffer.
1416 (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
1417 ;; Then replace ISO-2022 charset designations with extended
1418 ;; segments, for those charsets that are not part of the
1419 ;; official X registry. The regexp below finds the leading
1420 ;; sequences for big5.
eb747e41 1421 (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
cc926903
KH
1422 (setq pos (match-beginning 0)
1423 posend (point)
1424 desig (match-string 1)
1425 encode-info (cdr (assoc desig
1426 ctext-non-standard-designations-alist))
835cbadb
EZ
1427 encoding (car encode-info)
1428 chset (cadr encode-info)
1429 noctets (car (cddr encode-info)))
1430 (skip-chars-forward "^\e")
835cbadb 1431 (cond
f1beb0e0 1432 ((eq encoding t) ; only the leading sequence needs to be changed
cc926903
KH
1433 (setq textlen (+ (- (point) posend) (length chset) 1))
1434 ;; Generate the control sequence for an extended segment.
f1beb0e0
KH
1435 (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1436 noctets
1437 (+ (/ textlen 128) 128)
1438 (+ (% textlen 128) 128)
1439 chset))
835cbadb
EZ
1440 t t))
1441 ((coding-system-p encoding) ; need to recode the entire segment...
cc926903
KH
1442 (decode-coding-region pos (point) 'ctext-no-compositions)
1443 (encode-coding-region pos (point) encoding)
cc926903
KH
1444 (setq textlen (+ (- (point) pos) (length chset) 1))
1445 (save-excursion
1446 (goto-char pos)
f1beb0e0
KH
1447 (insert (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1448 noctets
1449 (+ (/ textlen 128) 128)
1450 (+ (% textlen 128) 128)
1451 chset)))))))
cc926903 1452 (goto-char (point-min))))
5dde3c71 1453 ;; Must return nil, as build_annotations_2 expects that.
835cbadb
EZ
1454 nil)
1455
4ed46869
KH
1456;;; FILE I/O
1457
e76938e7 1458(defcustom auto-coding-alist
8f924df7
KH
1459 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte)
1460 ("\\.tgz\\'" . no-conversion)
6caef2da 1461 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
45885400 1462 ("/#[^/]+#\\'" . emacs-mule))
835f49b8
KH
1463 "Alist of filename patterns vs corresponding coding systems.
1464Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1465A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1466
7fed493a
RS
1467The settings in this alist take priority over `coding:' tags
1468in the file (see the function `set-auto-coding')
e76938e7
DL
1469and the contents of `file-coding-system-alist'."
1470 :group 'files
1471 :group 'mule
1472 :type '(repeat (cons (regexp :tag "File name regexp")
1473 (symbol :tag "Coding system"))))
835f49b8 1474
502522b2 1475(defcustom auto-coding-regexp-alist
4e4a5bca
DL
1476 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
1477 ("\\`;ELC\14\0\0\0" . emacs-mule)) ; Emacs 20-compiled
502522b2
GM
1478 "Alist of patterns vs corresponding coding systems.
1479Each element looks like (REGEXP . CODING-SYSTEM).
1480A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1481
1482The settings in this alist take priority over `coding:' tags
1483in the file (see the function `set-auto-coding')
1484and the contents of `file-coding-system-alist'."
1485 :group 'files
1486 :group 'mule
1487 :type '(repeat (cons (regexp :tag "Regexp")
1488 (symbol :tag "Coding system"))))
1489
d9f6dfe6 1490;; See the bottom of this file for built-in auto coding functions.
447404a3
CW
1491(defcustom auto-coding-functions '(sgml-xml-auto-coding-function
1492 sgml-html-meta-auto-coding-function)
d9f6dfe6
CW
1493 "A list of functions which attempt to determine a coding system.
1494
66643502
RS
1495Each function in this list should be written to operate on the
1496current buffer, but should not modify it in any way. The buffer
1497will contain undecoded text of parts of the file. Each function
1498should take one argument, SIZE, which says how many
1499characters (starting from point) it should look at.
1500
1501If one of these functions succeeds in determining a coding
1502system, it should return that coding system. Otherwise, it
1503should return nil.
1504
1505If a file has a `coding:' tag, that takes precedence over these
1506functions, so they won't be called at all."
d9f6dfe6
CW
1507 :group 'files
1508 :group 'mule
1509 :type '(repeat function))
1510
1c4cc63a
KH
1511(defvar set-auto-coding-for-load nil
1512 "Non-nil means look for `load-coding' property instead of `coding'.
1513This is used for loading and byte-compiling Emacs Lisp files.")
1514
8a592131
RS
1515(defun auto-coding-alist-lookup (filename)
1516 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1517 (let ((alist auto-coding-alist)
c60ee5e7 1518 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin)))
8a592131
RS
1519 coding-system)
1520 (while (and alist (not coding-system))
1521 (if (string-match (car (car alist)) filename)
1522 (setq coding-system (cdr (car alist)))
1523 (setq alist (cdr alist))))
1524 coding-system))
1525
835f49b8
KH
1526(defun set-auto-coding (filename size)
1527 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1528These bytes should include at least the first 1k of the file
1529and the last 3k of the file, but the middle may be omitted.
63561304 1530
d21ba5e0
DL
1531The function checks FILENAME against the variable `auto-coding-alist'.
1532If FILENAME doesn't match any entries in the variable, it checks the
502522b2 1533contents of the current buffer following point against
447404a3 1534`auto-coding-regexp-alist'. If no match is found, it checks for a
502522b2 1535`coding:' tag in the first one or two lines following point. If no
d21ba5e0 1536`coding:' tag is found, it checks any local variables list in the last
447404a3 15373K bytes out of the SIZE bytes. Finally, if none of these methods
d21ba5e0
DL
1538succeed, it checks to see if any function in `auto-coding-functions'
1539gives a match.
63561304 1540
d21ba5e0
DL
1541The return value is the specified coding system, or nil if nothing is
1542specified.
87aba788 1543
ba74e833 1544The variable `set-auto-coding-function' (which see) is set to this
87aba788 1545function by default."
502522b2 1546 (or (auto-coding-alist-lookup filename)
447404a3
CW
1547 ;; Try using `auto-coding-regexp-alist'.
1548 (save-excursion
1549 (let ((alist auto-coding-regexp-alist)
1550 coding-system)
1551 (while (and alist (not coding-system))
1552 (let ((regexp (car (car alist))))
1553 (when (re-search-forward regexp (+ (point) size) t)
1554 (setq coding-system (cdr (car alist)))))
0bca779a 1555 (setq alist (cdr alist)))
447404a3 1556 coding-system))
502522b2
GM
1557 (let* ((case-fold-search t)
1558 (head-start (point))
1559 (head-end (+ head-start (min size 1024)))
1560 (tail-start (+ head-start (max (- size 3072) 0)))
1561 (tail-end (+ head-start size))
1562 coding-system head-found tail-found pos)
1563 ;; Try a short cut by searching for the string "coding:"
1564 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1565 (setq head-found (or (search-forward "coding:" head-end t)
1566 (search-forward "unibyte:" head-end t)))
1567 (if (and head-found (> head-found tail-start))
1568 ;; Head and tail are overlapped.
1569 (setq tail-found head-found)
1570 (goto-char tail-start)
1571 (setq tail-found (or (search-forward "coding:" tail-end t)
1572 (search-forward "unibyte:" tail-end t))))
1573
1574 ;; At first check the head.
1575 (when head-found
1576 (goto-char head-start)
6b66d028
RS
1577 (setq head-end (set-auto-mode-1))
1578 (setq head-start (point))
1d8e9a7c 1579 (when (and head-end (< head-found head-end))
835f49b8 1580 (goto-char head-start)
502522b2
GM
1581 (when (and set-auto-coding-for-load
1582 (re-search-forward
6b66d028 1583 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1584 head-end t))
1585 (setq coding-system 'raw-text))
1586 (when (and (not coding-system)
1587 (re-search-forward
6b66d028 1588 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1589 head-end t))
1590 (setq coding-system (intern (match-string 2)))
1591 (or (coding-system-p coding-system)
1592 (setq coding-system nil)))))
1593
1594 ;; If no coding: tag in the head, check the tail.
1595 (when (and tail-found (not coding-system))
1596 (goto-char tail-start)
1597 (search-forward "\n\^L" nil t)
1598 (if (re-search-forward
1599 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1600 ;; The prefix is what comes before "local variables:" in its
1601 ;; line. The suffix is what comes after "local variables:"
1602 ;; in its line.
1603 (let* ((prefix (regexp-quote (match-string 1)))
1604 (suffix (regexp-quote (match-string 2)))
1605 (re-coding
1606 (concat
1607 "^" prefix
cfe98f50
GM
1608 ;; N.B. without the \n below, the regexp can
1609 ;; eat newlines.
1610 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1611 suffix "$"))
1612 (re-unibyte
1613 (concat
1614 "^" prefix
cfe98f50 1615 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1616 suffix "$"))
1617 (re-end
cfe98f50 1618 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
502522b2
GM
1619 (pos (point)))
1620 (re-search-forward re-end tail-end 'move)
1621 (setq tail-end (point))
1622 (goto-char pos)
1623 (when (and set-auto-coding-for-load
1624 (re-search-forward re-unibyte tail-end t))
1625 (setq coding-system 'raw-text))
1626 (when (and (not coding-system)
1627 (re-search-forward re-coding tail-end t))
1628 (setq coding-system (intern (match-string 1)))
1629 (or (coding-system-p coding-system)
1630 (setq coding-system nil))))))
447404a3
CW
1631 coding-system)
1632 ;; Finally, try all the `auto-coding-functions'.
1633 (let ((funcs auto-coding-functions)
1634 (coding-system nil))
1635 (while (and funcs (not coding-system))
1636 (setq coding-system (condition-case e
1637 (save-excursion
1638 (goto-char (point-min))
1639 (funcall (pop funcs) size))
1640 (error nil))))
502522b2 1641 coding-system)))
63561304
KH
1642
1643(setq set-auto-coding-function 'set-auto-coding)
87aba788 1644
c36b37ed 1645(defun after-insert-file-set-coding (inserted)
872a0a6f
RS
1646 "Set `buffer-file-coding-system' of current buffer after text is inserted.
1647INSERTED is the number of characters that were inserted, as figured
1648in the situation before this function. Return the number of characters
1649inserted, as figured in the situation after. The two numbers can be
1650different if the buffer has become unibyte."
4ed46869
KH
1651 (if last-coding-system-used
1652 (let ((coding-system
1653 (find-new-buffer-file-coding-system last-coding-system-used))
1654 (modified-p (buffer-modified-p)))
0269ddfb 1655 (when coding-system
71983219 1656 (set-buffer-file-coding-system coding-system t)
0269ddfb 1657 (set-buffer-modified-p modified-p))))
d0c26c63 1658 inserted)
4ed46869 1659
8057896b 1660;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1661;; independently in the following order.
1662;; 1. That of buffer-file-coding-system locally bound.
1663;; 2. That of CODING.
1664
1665(defun find-new-buffer-file-coding-system (coding)
1666 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1667The local variable `buffer-file-coding-system' of the current buffer
1668is set to the returned value.
509064c5 1669Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1670 (let (local-coding local-eol
b685f8d6 1671 found-coding found-eol
4ed46869
KH
1672 new-coding new-eol)
1673 (if (null coding)
1674 ;; Nothing found about coding.
1675 nil
1676
b685f8d6
RS
1677 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1678 ;; and LOCAL-CODING.
1679 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1680 (if (null (numberp local-eol))
1681 ;; But eol-type is not yet set.
1682 (setq local-eol nil))
0269ddfb 1683 (if (and buffer-file-coding-system
c1841772
KH
1684 (not (eq (coding-system-type buffer-file-coding-system)
1685 'undecided)))
0269ddfb 1686 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1687
1688 (if (and (local-variable-p 'buffer-file-coding-system)
1689 local-eol local-coding)
4ed46869
KH
1690 ;; The current buffer has already set full coding-system, we
1691 ;; had better not change it.
1692 nil
1693
8057896b 1694 (setq found-eol (coding-system-eol-type coding))
4ed46869 1695 (if (null (numberp found-eol))
be02cd54
EZ
1696 ;; But eol-type is not found.
1697 ;; If EOL conversions are inhibited, force unix eol-type.
1698 (setq found-eol (if inhibit-eol-conversion 0)))
c1841772 1699 (setq found-coding (coding-system-base coding))
c76b5c99
KH
1700
1701 (if (and (not found-eol) (eq found-coding 'undecided))
1702 ;; No valid coding information found.
1703 nil
1704
1705 ;; Some coding information (eol or text) found.
1706
1707 ;; The local setting takes precedence over the found one.
1708 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1709 (or local-coding found-coding)
1710 (or found-coding local-coding)))
1711 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1712 (or local-eol found-eol)
1713 (or found-eol local-eol)))
1714
1715 (let ((eol-type (coding-system-eol-type new-coding)))
1716 (if (and (numberp new-eol) (vectorp eol-type))
1717 (aref eol-type new-eol)
1718 new-coding)))))))
4ed46869 1719
fe831d33
GV
1720(defun modify-coding-system-alist (target-type regexp coding-system)
1721 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1722There are three of such tables, `file-coding-system-alist',
1723`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1724
1725TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1726If it is `file', it affects `file-coding-system-alist' (which see).
1727If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1728If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1729
1730REGEXP is a regular expression matching a target of I/O operation.
1731The target is a file name if TARGET-TYPE is `file', a program name if
1732TARGET-TYPE is `process', or a network service name or a port number
1733to connect to if TARGET-TYPE is `network'.
1734
1735CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1736operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1737for decoding and encoding respectively,
1738or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1739 (or (memq target-type '(file process network))
1740 (error "Invalid target type: %s" target-type))
1741 (or (stringp regexp)
1742 (and (eq target-type 'network) (integerp regexp))
1743 (error "Invalid regular expression: %s" regexp))
1744 (if (symbolp coding-system)
1745 (if (not (fboundp coding-system))
1746 (progn
1747 (check-coding-system coding-system)
1748 (setq coding-system (cons coding-system coding-system))))
1749 (check-coding-system (car coding-system))
1750 (check-coding-system (cdr coding-system)))
1751 (cond ((eq target-type 'file)
1752 (let ((slot (assoc regexp file-coding-system-alist)))
1753 (if slot
1754 (setcdr slot coding-system)
1755 (setq file-coding-system-alist
1756 (cons (cons regexp coding-system)
1757 file-coding-system-alist)))))
1758 ((eq target-type 'process)
1759 (let ((slot (assoc regexp process-coding-system-alist)))
1760 (if slot
1761 (setcdr slot coding-system)
1762 (setq process-coding-system-alist
1763 (cons (cons regexp coding-system)
1764 process-coding-system-alist)))))
1765 (t
1766 (let ((slot (assoc regexp network-coding-system-alist)))
1767 (if slot
1768 (setcdr slot coding-system)
1769 (setq network-coding-system-alist
1770 (cons (cons regexp coding-system)
1771 network-coding-system-alist)))))))
1772
db046b7d
KH
1773(defun decode-coding-inserted-region (from to filename
1774 &optional visit beg end replace)
f29387e8
KH
1775 "Decode the region between FROM and TO as if it is read from file FILENAME.
1776Optional arguments VISIT, BEG, END, and REPLACE are the same as those
1777of the function `insert-file-contents'."
1778 (save-excursion
1779 (save-restriction
1780 (narrow-to-region from to)
1781 (goto-char (point-min))
1782 (let ((coding coding-system-for-read))
1783 (or coding
1784 (setq coding (funcall set-auto-coding-function
1785 filename (- (point-max) (point-min)))))
1786 (or coding
1787 (setq coding (find-operation-coding-system
1788 'insert-file-contents
1789 filename visit beg end replace)))
1790 (if (coding-system-p coding)
1791 (or enable-multibyte-characters
1792 (setq coding
1793 (coding-system-change-text-conversion coding 'raw-text)))
1794 (setq coding nil))
1795 (if coding
1796 (decode-coding-region (point-min) (point-max) coding))
1797 (setq last-coding-system-used coding)))))
1798
b25eef20 1799(defun make-translation-table (&rest args)
a284eea3 1800 "Make a translation table from arguments.
d38b07f9 1801A translation table is a char table intended for character
a284eea3
DL
1802translation in CCL programs.
1803
d38b07f9 1804Each argument is a list of elements of the form (FROM . TO), where FROM
a284eea3 1805is a character to be translated to TO.
13d5617d 1806
4e003d37
KH
1807The arguments and forms in each argument are processed in the given
1808order, and if a previous form already translates TO to some other
1809character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1810 (let ((table (make-char-table 'translation-table))
a73a8c89 1811 revlist)
5d75f46f
KH
1812 (dolist (elts args)
1813 (dolist (elt elts)
1814 (let ((from (car elt))
1815 (to (cdr elt))
1816 to-alt rev-from rev-to)
1817 ;; If we have already translated TO to TO-ALT, FROM should
1818 ;; also be translated to TO-ALT.
1819 (if (setq to-alt (aref table to))
1820 (setq to to-alt))
1821 (aset table from to)
1822 ;; If we have already translated some chars to FROM, they
1823 ;; should also be translated to TO.
1824 (when (setq rev-from (assq from revlist))
1825 (dolist (elt (cdr rev-from))
1826 (aset table elt to))
1827 (setq revlist (delq rev-from revlist)
1828 rev-from (cdr rev-from)))
1829 ;; Now update REVLIST.
1830 (setq rev-to (assq to revlist))
1831 (if rev-to
1832 (setcdr rev-to (cons from (cdr rev-to)))
1833 (setq rev-to (list to from)
1834 revlist (cons rev-to revlist)))
1835 (if rev-from
1836 (setcdr rev-to (append rev-from (cdr rev-to)))))))
a73a8c89
KH
1837 ;; Return TABLE just created.
1838 table))
1839
c76b5c99
KH
1840(defun make-translation-table-from-vector (vec)
1841 "Make translation table from decoding vector VEC.
9e3b6057
DL
1842VEC is an array of 256 elements to map unibyte codes to multibyte
1843characters. Elements may be nil for undefined code points.
c76b5c99
KH
1844See also the variable `nonascii-translation-table'."
1845 (let ((table (make-char-table 'translation-table))
1846 (rev-table (make-char-table 'translation-table))
c76b5c99 1847 ch)
9e3b6057 1848 (dotimes (i 256)
c76b5c99 1849 (setq ch (aref vec i))
9e3b6057
DL
1850 (when ch
1851 (aset table i ch)
1852 (if (>= ch 256)
1853 (aset rev-table ch i))))
c76b5c99
KH
1854 (set-char-table-extra-slot table 0 rev-table)
1855 table))
1856
f967223b 1857(defun define-translation-table (symbol &rest args)
a284eea3
DL
1858 "Define SYMBOL as the name of translation table made by ARGS.
1859This sets up information so that the table can be used for
1860translations in a CCL program.
b25eef20 1861
a284eea3
DL
1862If the first element of ARGS is a char-table whose purpose is
1863`translation-table', just define SYMBOL to name it. (Note that this
1864function does not bind SYMBOL.)
007c79c8 1865
a284eea3 1866Any other ARGS should be suitable as arguments of the function
007c79c8 1867`make-translation-table' (which see).
b25eef20 1868
452fdb31 1869This function sets properties `translation-table' and
521d4010
DL
1870`translation-table-id' of SYMBOL to the created table itself and the
1871identification number of the table respectively. It also registers
1872the table in `translation-table-vector'."
007c79c8
KH
1873 (let ((table (if (and (char-table-p (car args))
1874 (eq (char-table-subtype (car args))
1875 'translation-table))
1876 (car args)
1877 (apply 'make-translation-table args)))
f967223b 1878 (len (length translation-table-vector))
d9e3229d 1879 (id 0)
b25eef20 1880 (done nil))
f967223b 1881 (put symbol 'translation-table table)
b25eef20
KH
1882 (while (not done)
1883 (if (>= id len)
f967223b
KH
1884 (setq translation-table-vector
1885 (vconcat translation-table-vector (make-vector len nil))))
1886 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1887 (if (or (not slot)
1888 (eq (car slot) symbol))
1889 (progn
f967223b 1890 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1891 (setq done t))
1892 (setq id (1+ id)))))
f967223b 1893 (put symbol 'translation-table-id id)
d9e3229d
KH
1894 id))
1895
35554641
KH
1896(put 'with-category-table 'lisp-indent-function 1)
1897
ef6e365d 1898(defmacro with-category-table (table &rest body)
8f924df7 1899 "Execute BODY like `progn' with CATEGORY-TABLE the current category table.
ef6e365d
JPW
1900The category table of the current buffer is saved, BODY is evaluated,
1901then the saved table is restored, even in case of an abnormal exit.
1902Value is what BODY returns."
1903 (let ((old-table (make-symbol "old-table"))
1904 (old-buffer (make-symbol "old-buffer")))
1905 `(let ((,old-table (category-table))
1906 (,old-buffer (current-buffer)))
1907 (unwind-protect
1908 (progn
1909 (set-category-table ,table)
1910 ,@body)
1911 (save-current-buffer
1912 (set-buffer ,old-buffer)
1913 (set-category-table ,old-table))))))
35554641 1914
394e4eb0
DL
1915(defun define-translation-hash-table (symbol table)
1916 "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
1917
1918Analogous to `define-translation-table', but updates
1919`translation-hash-table-vector' and the table is for use in the CCL
1920`lookup-integer' and `lookup-character' functions."
1921 (unless (and (symbolp symbol)
1922 (hash-table-p table))
1923 (error "Bad args to define-translation-hash-table"))
1924 (let ((len (length translation-hash-table-vector))
1925 (id 0)
1926 done)
1927 (put symbol 'translation-hash-table table)
1928 (while (not done)
1929 (if (>= id len)
1930 (setq translation-hash-table-vector
1931 (vconcat translation-hash-table-vector [nil])))
1932 (let ((slot (aref translation-hash-table-vector id)))
1933 (if (or (not slot)
1934 (eq (car slot) symbol))
1935 (progn
1936 (aset translation-hash-table-vector id (cons symbol table))
1937 (setq done t))
1938 (setq id (1+ id)))))
1939 (put symbol 'translation-hash-table-id id)
1940 id))
1941
69eba008
KH
1942;;; Initialize some variables.
1943
1944(put 'use-default-ascent 'char-table-extra-slots 0)
1945(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
1946(put 'ignore-relative-composition 'char-table-extra-slots 0)
1947(setq ignore-relative-composition
1948 (make-char-table 'ignore-relative-composition))
69eba008 1949
256d0fef
DL
1950(make-obsolete 'set-char-table-default
1951 "Generic characters no longer exist" "22.1")
d9f6dfe6
CW
1952
1953;;; Built-in auto-coding-functions:
1954
1955(defun sgml-xml-auto-coding-function (size)
1956 "Determine whether the buffer is XML, and if so, its encoding.
1957This function is intended to be added to `auto-coding-functions'."
c069d3ac
SM
1958 (setq size (+ (point) size))
1959 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
d9f6dfe6
CW
1960 (let ((end (save-excursion
1961 ;; This is a hack.
2f4e9c47 1962 (re-search-forward "\"\\s-*\\?>" size t))))
d9f6dfe6
CW
1963 (when end
1964 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t)
447404a3
CW
1965 (let* ((match (match-string 1))
1966 (sym (intern (downcase match))))
1967 (if (coding-system-p sym)
1968 sym
1969 (message "Warning: unknown coding system \"%s\"" match)
1970 nil))
d9f6dfe6
CW
1971 'utf-8)))))
1972
447404a3
CW
1973(defun sgml-html-meta-auto-coding-function (size)
1974 "If the buffer has an HTML meta tag, use it to determine encoding.
1975This function is intended to be added to `auto-coding-functions'."
c069d3ac 1976 (setq size (min (+ (point) size)
447404a3
CW
1977 ;; Only search forward 10 lines
1978 (save-excursion
1979 (forward-line 10)
1980 (point))))
1981 (when (and (search-forward "<html>" size t)
1982 (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
1983 (let* ((match (match-string 1))
1984 (sym (intern (downcase match))))
1985 (if (coding-system-p sym)
1986 sym
1987 (message "Warning: unknown coding system \"%s\"" match)
1988 nil))))
0bca779a 1989
69eba008 1990;;;
4ed46869
KH
1991(provide 'mule)
1992
1993;;; mule.el ends here