Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-71
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
07513d64 1;;; mule.el --- basic commands for multilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
8f924df7 4;; Licensed to the Free Software Foundation.
32db08f1 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
8f924df7 6;; Copyright (C) 2003
c1841772
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
4ed46869
KH
9
10;; Keywords: mule, multilingual, character set, coding system
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc
KH
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
4ed46869 28
60370d40
PJ
29;;; Commentary:
30
4ed46869
KH
31;;; Code:
32
8f924df7 33(defconst mule-version "6.0 (HANACHIRUSATO)" "\
4ed46869
KH
34Version number and name of this version of MULE (multilingual environment).")
35
8f924df7 36(defconst mule-version-date "2003.9.1" "\
4ed46869
KH
37Distribution date of this version of MULE (multilingual environment).")
38
c1841772
KH
39\f
40;;; CHARSET
41
6d2b6635
KH
42;; Backward compatibility code for handling emacs-mule charsets.
43(defvar private-char-area-1-min #xF0000)
44(defvar private-char-area-1-max #xFFFFE)
45(defvar private-char-area-2-min #x100000)
46(defvar private-char-area-2-max #x10FFFE)
47
48;; Table of emacs-mule charsets indexed by their emacs-mule ID.
49(defvar emacs-mule-charset-table (make-vector 256 nil))
50(aset emacs-mule-charset-table 0 'ascii)
51
52;; Convert the argument of old-style calll of define-charset to a
53;; property list used by the new-style.
54;; INFO-VECTOR is a vector of the format:
55;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
56;; SHORT-NAME LONG-NAME DESCRIPTION]
57
58(defun convert-define-charset-argument (emacs-mule-id info-vector)
59 (let* ((dim (aref info-vector 0))
60 (chars (aref info-vector 1))
61 (total (if (= dim 1) chars (* chars chars)))
62 (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
63 (if (= chars 96) [32 127 32 127] [33 126 33 126])))
64 code-offset)
65 (if (integerp emacs-mule-id)
66 (or (= emacs-mule-id 0)
67 (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
68 (error "Invalid CHARSET-ID: %d" emacs-mule-id))
69 (let (from-id to-id)
70 (if (= dim 1) (setq from-id 160 to-id 224)
71 (setq from-id 224 to-id 255))
72 (while (and (< from-id to-id)
73 (not (aref emacs-mule-charset-table from-id)))
74 (setq from-id (1+ from-id)))
75 (if (= from-id to-id)
76 (error "No more room for the new Emacs-mule charset"))
77 (setq emacs-mule-id from-id)))
78 (if (> (- private-char-area-1-max private-char-area-1-min) total)
79 (setq code-offset private-char-area-1-min
80 private-char-area-1-min (+ private-char-area-1-min total))
81 (if (> (- private-char-area-2-max private-char-area-2-min) total)
82 (setq code-offset private-char-area-2-min
83 private-char-area-2-min (+ private-char-area-2-min total))
84 (error "No more space for a new charset.")))
85 (list :dimension dim
86 :code-space code-space
87 :iso-final-char (aref info-vector 4)
88 :code-offset code-offset
89 :emacs-mule-id emacs-mule-id)))
90
c1841772
KH
91(defun define-charset (name docstring &rest props)
92 "Define NAME (symbol) as a charset with DOCSTRING.
93The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
07513d64 94may be any symbol. The following have special meanings, and one of
bec25acc 95`:code-offset', `:map', `:subset', `:superset' must be specified.
c1841772
KH
96
97`:short-name'
98
99VALUE must be a short string to identify the charset. If omitted,
100NAME is used.
101
102`:long-name'
103
104VALUE must be a string longer than `:short-name' to identify the
07513d64 105charset. If omitted, the value of the `:short-name' attribute is used.
c1841772
KH
106
107`:dimension'
108
109VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
07513d64
DL
110code-points of the charsets. If omitted, it is calculated from the
111value of the `:code-space' attribute.
c1841772
KH
112
113`:code-space'
114
115VALUE must be a vector of length at most 8 specifying the byte code
116range of each dimension in this format:
117 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
07513d64 118where MIN-N is the minimum byte value of Nth dimension of code-point,
c1841772
KH
119MAX-N is the maximum byte value of that.
120
b1a79461
KH
121`:min-code'
122
123VALUE must be an integer specifying the mininum code point of the
124charset. If omitted, it is calculated from `:code-space'. VALUE may
125be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
126the code point and LOW is the least significant 16 bits.
127
1f32125f 128`:max-code'
b1a79461
KH
129
130VALUE must be an integer specifying the maxinum code point of the
131charset. If omitted, it is calculated from `:code-space'. VALUE may
132be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
133the code point and LOW is the least significant 16 bits.
134
c1841772
KH
135`:iso-final-char'
136
137VALUE must be a character in the range 32 to 127 (inclusive)
138specifying the final char of the charset for ISO-2022 encoding. If
139omitted, the charset can't be encoded by ISO-2022 based
140coding-systems.
141
142`:iso-revision-number'
143
144VALUE must be an integer in the range 0..63, specifying the revision
145number of the charset for ISO-2022 encoding.
146
147`:emacs-mule-id'
148
6d2b6635 149VALUE must be an integer of 0, 129..255. If omitted, the charset
c1841772
KH
150can't be encoded by coding-systems of type `emacs-mule'.
151
152`:ascii-compatible-p'
153
07513d64
DL
154VALUE must be nil or t (default nil). If VALUE is t, the charset is
155compatible with ASCII, i.e. the first 128 code points map to ASCII.
c1841772
KH
156
157`:supplementary-p'
158
159VALUE must be nil or t. If the VALUE is t, the charset is
07513d64
DL
160supplementary, which means it is used only as a parent of some other
161charset.
c1841772
KH
162
163`:invalid-code'
164
165VALUE must be a nonnegative integer that can be used as an invalid
166code point of the charset. If the minimum code is 0 and the maximum
167code is greater than Emacs' maximum integer value, `:invalid-code'
168should not be omitted.
169
170`:code-offset'
171
07513d64
DL
172VALUE must be an integer added to the index number of a character to
173get the corresponding character code.
c1841772
KH
174
175`:map'
176
177VALUE must be vector or string.
178
179If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
180where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 181corresponding character code.
c1841772
KH
182
183If it is a string, it is a name of file that contains the above
3e4abc9e
KH
184information. Each line of the file must be this format:
185 0xXXX 0xYYY
186where XXX is a hexadecimal representation of CODE-n and YYY is a
187hexadecimal representation of CHAR-n. A line starting with `#' is a
188comment line.
c1841772 189
2c2a254f
KH
190`:subset'
191
192VALUE must be a list:
193 ( PARENT MIN-CODE MAX-CODE OFFSET )
194PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
195of characters inherited from the parent. OFFSET is an integer value
196to add to a code point of the parent charset to get the corresponding
197code point of this charset.
198
199`:superset'
c1841772
KH
200
201VALUE must be a list of parent charsets. The charset inherits
202characters from them. Each element of the list may be a cons (PARENT
203. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
2c2a254f
KH
204value to add to a code point of PARENT to get the corresponding code
205point of this charset.
c1841772
KH
206
207`:unify-map'
208
209VALUE must be vector or string.
210
211If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
212where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 213corresponding Unicode character code.
c1841772
KH
214
215If it is a string, it is a name of file that contains the above
3e4abc9e
KH
216information. The file format is the same as what described for `:map'
217attribute."
6d2b6635
KH
218 (when (vectorp (car props))
219 ;; Old style code:
220 ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
221 ;; Convert the argument to make it fit with the current style.
222 (let ((vec (car props)))
223 (setq props (convert-define-charset-argument name vec)
224 name docstring
225 docstring (aref vec 8))))
c1841772
KH
226 (let ((attrs (mapcar 'list '(:dimension
227 :code-space
b1a79461
KH
228 :min-code
229 :max-code
c1841772
KH
230 :iso-final-char
231 :iso-revision-number
232 :emacs-mule-id
233 :ascii-compatible-p
234 :supplementary-p
235 :invalid-code
236 :code-offset
237 :map
2c2a254f
KH
238 :subset
239 :superset
c1841772
KH
240 :unify-map
241 :plist))))
242
243 ;; If :dimension is omitted, get the dimension from :code-space.
244 (let ((dimension (plist-get props :dimension)))
245 (or dimension
c04e918c
KH
246 (let ((code-space (plist-get props :code-space)))
247 (setq dimension (if code-space (/ (length code-space) 2) 4))
c1841772
KH
248 (setq props (plist-put props :dimension dimension)))))
249
c04e918c
KH
250 (let ((code-space (plist-get props :code-space)))
251 (or code-space
252 (let ((dimension (plist-get props :dimension)))
253 (setq code-space (make-vector 8 0))
254 (dotimes (i dimension)
255 (aset code-space (1+ (* i 2)) #xFF))
256 (setq props (plist-put props :code-space code-space)))))
257
6d2b6635
KH
258 ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
259 (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
260 (if (integerp emacs-mule-id)
261 (aset emacs-mule-charset-table emacs-mule-id name)))
262
c1841772
KH
263 (dolist (slot attrs)
264 (setcdr slot (plist-get props (car slot))))
265
266 ;; Make sure that the value of :code-space is a vector of 8
267 ;; elements.
268 (let* ((slot (assq :code-space attrs))
269 (val (cdr slot))
270 (len (length val)))
271 (if (< len 8)
272 (setcdr slot
273 (vconcat val (make-vector (- 8 len) 0)))))
274
275 ;; Add :name and :docstring properties to PROPS.
276 (setq props
277 (cons :name (cons name (cons :docstring (cons docstring props)))))
278 (or (plist-get props :short-name)
279 (plist-put props :short-name (symbol-name name)))
280 (or (plist-get props :long-name)
281 (plist-put props :long-name (plist-get props :short-name)))
e1e529fa
DL
282 ;; We can probably get a worthwhile amount in purespace.
283 (setq props
284 (mapcar (lambda (elt)
285 (if (stringp elt)
286 (purecopy elt)
287 elt))
288 props))
c1841772
KH
289 (setcdr (assq :plist attrs) props)
290
291 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
292
293
4ed46869 294(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
295 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
296The file contents are decoded before evaluation if necessary.
4ed46869
KH
297If optional second arg NOERROR is non-nil,
298 report no error if FILE doesn't exist.
299Print messages at start and end of loading unless
300 optional third arg NOMESSAGE is non-nil.
301Return t if file exists."
302 (if (null (file-readable-p fullname))
303 (and (null noerror)
304 (signal 'file-error (list "Cannot open load file" file)))
305 ;; Read file with code conversion, and then eval.
306 (let* ((buffer
307 ;; To avoid any autoloading, set default-major-mode to
308 ;; fundamental-mode.
88162676
RS
309 ;; So that we don't get completely screwed if the
310 ;; file is encoded in some complicated character set,
311 ;; read it with real decoding, as a multibyte buffer,
312 ;; even if this is a --unibyte Emacs session.
313 (let ((default-major-mode 'fundamental-mode)
314 (default-enable-multibyte-characters t))
4ed46869
KH
315 ;; We can't use `generate-new-buffer' because files.el
316 ;; is not yet loaded.
317 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
318 (load-in-progress t)
319 (source (save-match-data (string-match "\\.el\\'" fullname))))
320 (unless nomessage
321 (if source
322 (message "Loading %s (source)..." file)
323 (message "Loading %s..." file)))
324 (when purify-flag
4c86cca0 325 (push file preloaded-file-list))
4ed46869 326 (unwind-protect
a6acd8a2 327 (let ((load-file-name fullname)
1c4cc63a 328 (set-auto-coding-for-load t)
a6acd8a2 329 (inhibit-file-name-operation nil))
4ed46869
KH
330 (save-excursion
331 (set-buffer buffer)
332 (insert-file-contents fullname)
7d276780
EZ
333 ;; If the loaded file was inserted with no-conversion or
334 ;; raw-text coding system, make the buffer unibyte.
335 ;; Otherwise, eval-buffer might try to interpret random
336 ;; binary junk as multibyte characters.
337 (if (and enable-multibyte-characters
8f924df7
KH
338 (or (eq (coding-system-type last-coding-system-used)
339 'raw-text)))
7d276780 340 (set-buffer-multibyte nil))
4ed46869
KH
341 ;; Make `kill-buffer' quiet.
342 (set-buffer-modified-p nil))
0f69cb38 343 ;; Have the original buffer current while we eval.
88162676
RS
344 (eval-buffer buffer nil file
345 ;; If this Emacs is running with --unibyte,
346 ;; convert multibyte strings to unibyte
347 ;; after reading them.
ba74e833 348;; (not default-enable-multibyte-characters)
8dd08b5b 349 nil t
ba74e833 350 ))
cfc70cdf
RS
351 (let (kill-buffer-hook kill-buffer-query-functions)
352 (kill-buffer buffer)))
4ed46869 353 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
354 (when hook
355 (mapcar (function eval) (cdr hook))))
356 (unless (or nomessage noninteractive)
357 (if source
358 (message "Loading %s (source)...done" file)
359 (message "Loading %s...done" file)))
4ed46869
KH
360 t)))
361
8f924df7 362(defun charset-info (charset)
4ed46869 363 "Return a vector of information of CHARSET.
8f924df7 364This function is provided for backward compatibility.
4ed46869 365
4ed46869
KH
366The elements of the vector are:
367 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
368 LEADING-CODE-BASE, LEADING-CODE-EXT,
369 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
370 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
8f924df7 371 PLIST.
4ed46869 372where
8f924df7
KH
373CHARSET-ID is always 0.
374BYTES is always 0.
375DIMENSION is the number of bytes of a code-point of the charset:
376 1, 2, 3, or 4.
377CHARS is the number of characters in a dimension:
378 94, 96, 128, or 256.
379WIDTH is always 0.
380DIRECTION is always 0.
381LEADING-CODE-BASE is always 0.
382LEADING-CODE-EXT is always 0.
4ed46869 383ISO-FINAL-CHAR (character) is the final character of the
7dd4c92d
KH
384 corresponding ISO 2022 charset. If the charset is not assigned
385 any final character, the value is -1.
8f924df7
KH
386ISO-GRAPHIC-PLANE is always 0.
387REVERSE-CHARSET is always -1.
4ed46869
KH
388SHORT-NAME (string) is the short name to refer to the charset.
389LONG-NAME (string) is the long name to refer to the charset
390DESCRIPTION (string) is the description string of the charset.
391PLIST (property list) may contain any type of information a user
392 want to put and get by functions `put-charset-property' and
393 `get-charset-property' respectively."
8f924df7
KH
394 (vector 0
395 0
396 (charset-dimension charset)
397 (charset-chars charset)
398 0
399 0
400 0
401 0
402 (charset-iso-final-char charset)
403 0
404 -1
405 (get-charset-property charset :short-name)
406 (get-charset-property charset :short-name)
407 (charset-description charset)
408 (charset-plist charset)))
4ed46869 409
40c81f74
PE
410;; It is better not to use backquote in this file,
411;; because that makes a bootstrapping problem
412;; if you need to recompile all the Lisp files using interpreted code.
413
8f924df7
KH
414(defun charset-id (charset)
415 "Always return 0. This is provided for backward compatibility."
416 0)
4ed46869
KH
417
418(defmacro charset-bytes (charset)
8f924df7
KH
419 "Always return 0. This is provided for backward compatibility."
420 0)
c1841772
KH
421
422(defun get-charset-property (charset propname)
423 "Return the value of CHARSET's PROPNAME property.
424This is the last value stored with
425 (put-charset-property CHARSET PROPNAME VALUE)."
426 (plist-get (charset-plist charset) propname))
427
428(defun put-charset-property (charset propname value)
1f32125f 429 "Set CHARSETS's PROPNAME property to value VALUE.
c1841772
KH
430It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
431 (set-charset-plist charset
432 (plist-put (charset-plist charset) propname value)))
433
c1841772
KH
434(defun charset-description (charset)
435 "Return description string of CHARSET."
436 (plist-get (charset-plist charset) :docstring))
437
438(defun charset-dimension (charset)
12504f57 439 "Return dimension of CHARSET."
c1841772
KH
440 (plist-get (charset-plist charset) :dimension))
441
346a8d64 442(defun charset-chars (charset &optional dimension)
12504f57 443 "Return number of characters contained in DIMENSION of CHARSET.
346a8d64
DL
444DIMENSION defaults to the first dimension."
445 (unless dimension (setq dimension 1))
103cc921 446 (let ((code-space (plist-get (charset-plist charset) :code-space)))
346a8d64
DL
447 (1+ (- (aref code-space (1- (* 2 dimension)))
448 (aref code-space (- (* 2 dimension) 2))))))
c1841772
KH
449
450(defun charset-iso-final-char (charset)
1d839a14
DL
451 "Return ISO-2022 final character of CHARSET.
452Return -1 if charset isn't an ISO 2022 one."
c1841772
KH
453 (or (plist-get (charset-plist charset) :iso-final-char)
454 -1))
4ed46869
KH
455
456(defmacro charset-short-name (charset)
c1841772
KH
457 "Return short name of CHARSET."
458 (plist-get (charset-plist charset) :short-name))
4ed46869
KH
459
460(defmacro charset-long-name (charset)
c1841772
KH
461 "Return long name of CHARSET."
462 (plist-get (charset-plist charset) :long-name))
4ed46869 463
d3675a42 464(defun charset-list ()
12504f57 465 "Return list of all charsets ever defined.
900dc6e3 466
d3675a42 467This function is provided for backward compatibility.
900dc6e3 468Now we have the variable `charset-list'."
d3675a42 469 charset-list)
07513d64 470(make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
d3675a42 471
6d2b6635
KH
472\f
473;;; CHARACTER
474(defalias 'char-valid-p 'characterp)
475(make-obsolete 'char-valid-p 'characterp "22.1")
476
c1841772 477(defun generic-char-p (char)
8f924df7 478 "Always return nil. This is provided for backward compatibility."
c1841772 479 nil)
07513d64 480(make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
6d2b6635
KH
481
482(defun make-char-internal (charset-id &optional code1 code2)
483 (let ((charset (aref emacs-mule-charset-table charset-id)))
484 (or charset
485 (error "Invalid Emacs-mule charset ID: %d" charset-id))
486 (make-char charset code1 code2)))
0269ddfb 487\f
e76938e7 488;; Coding system stuff
4ed46869 489
c1841772
KH
490;; Coding system is a symbol that has been defined by the function
491;; `define-coding-system'.
4ed46869 492
c1841772
KH
493(defconst coding-system-iso-2022-flags
494 '(long-form
495 ascii-at-eol
496 ascii-at-cntl
497 7-bit
498 locking-shift
499 single-shift
500 designation
501 revision
502 direction
503 init-at-bol
504 designate-at-bol
505 safe
506 latin-extra
507 composition
3ed58a15
KH
508 euc-tw-shift
509 use-roman
510 use-oldjis)
c1841772 511 "List of symbols that control ISO-2022 encoder/decoder.
4ed46869 512
12504f57 513The value of the `:flags' attribute in the argument of the function
caa7db3a 514`define-coding-system' must be one of them.
4ed46869 515
c1841772
KH
516If `long-form' is specified, use a long designation sequence on
517encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
518and `japanese-jisx0208'. The long designation sequence doesn't
12504f57 519conform to ISO 2022, but is used by such coding systems as
c1841772
KH
520`compound-text'.
521
522If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
523on encoding.
524
525If `ascii-at-cntl' is specified, designate ASCII to g0 before control
526codes and SPC on encoding.
527
528If `7-bit' is specified, use 7-bit code only on encoding.
529
530If `locking-shift' is specified, decode locking-shift code correctly
531on decoding, and use locking-shift to invoke a graphic element on
532encoding.
533
534If `single-shift' is specified, decode single-shift code correctly on
535decoding, and use single-shift to invoke a graphic element on encoding.
536
537If `designation' is specified, decode designation code correctly on
538decoding, and use designation to designate a charset to a graphic
539element on encoding.
540
541If `revision' is specified, produce an escape sequence to specify
542revision number of a charset on encoding. Such an escape sequence is
543always correctly decoded on decoding.
544
545If `direction' is specified, decode ISO6429's code for specifying
12504f57 546direction correctly, and produce the code on encoding.
c1841772
KH
547
548If `init-at-bol' is specified, on encoding, it is assumed that
549invocation and designation statuses are reset at each beginning of
12504f57 550line even if `ascii-at-eol' is not specified; thus no codes for
c1841772
KH
551resetting them are produced.
552
553If `safe' is specified, on encoding, characters not supported by a
554coding are replaced with `?'.
555
12504f57 556If `latin-extra' is specified, the code-detection routine assumes that a
c1841772
KH
557code specified in `latin-extra-code-table' (which see) is valid.
558
559If `composition' is specified, an escape sequence to specify
12504f57 560composition sequence is correctly decoded on decoding, and is produced
c1841772
KH
561on encoding.
562
563If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
12504f57 564correctly decoded on decoding, and is produced on encoding.
c1841772 565
12504f57
DL
566If `use-roman' is specified, JIS0201-1976-Roman is designated instead
567of ASCII.
568
569If `use-oldjis' is specified, JIS0208-1976 is designated instead of
570JIS0208-1983.")
571
c1841772 572(defun define-coding-system (name docstring &rest props)
12504f57 573 "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
c1841772
KH
574The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
575may be any symbol.
576
12504f57
DL
577The following attributes have special meanings. Those labeled as
578\"(required)\", should not be omitted.
c1841772
KH
579
580`:mnemonic' (required)
581
582VALUE is a character to display on mode line for the coding system.
583
584`:coding-type' (required)
585
586VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
1bfd603c 587`emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
c1841772 588
12504f57 589`:eol-type'
c1841772 590
12504f57 591VALUE is the EOL (end-of-line) format of the coding system. It must be
c1841772
KH
592one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
593\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
594and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
12504f57 595decoding by the coding system, Emacs automatically detects the EOL
c1841772
KH
596format of the source text.
597
736345cb 598`:charset-list'
c1841772
KH
599
600VALUE must be a list of charsets supported by the coding system. On
601encoding by the coding system, if a character belongs to multiple
602charsets in the list, a charset that comes earlier in the list is
736345cb
KH
603selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
604which indicates that the coding system supports all ISO-2022 based
605charsets. If `:coding-type' is `emacs-mule', VALUE may be
606`emacs-mule', which indicates that the coding system supports all
1d839a14 607charsets that have the `:emacs-mule-id' property.
c1841772 608
12504f57 609`:ascii-compatible-p'
c1841772
KH
610
611If VALUE is non-nil, the coding system decodes all 7-bit bytes into
07513d64 612the corresponding ASCII characters, and encodes all ASCII characters
12504f57 613back to the corresponding 7-bit bytes. VALUE defaults to nil.
c1841772 614
12504f57 615`:decode-translation-table'
c1841772
KH
616
617VALUE must be a translation table to use on decoding.
618
12504f57 619`:encode-translation-table'
c1841772
KH
620
621VALUE must be a translation table to use on encoding.
622
12504f57 623`:post-read-conversion'
c1841772
KH
624
625VALUE must be a function to call after some text is inserted and
626decoded by the coding system itself and before any functions in
627`after-insert-functions' are called. The arguments to this function
12504f57
DL
628are the same as those of a function in `after-insert-file-functions',
629i.e. LENGTH of the text to be decoded with point at the head of it,
630and the function should leave point unchanged.
c1841772
KH
631
632`:pre-write-conversion'
633
634VALUE must be a function to call after all functions in
635`write-region-annotate-functions' and `buffer-file-format' are called,
636and before the text is encoded by the coding system itself. The
12504f57
DL
637arguments to this function are the same as those of a function in
638`write-region-annotate-functions'.
c1841772
KH
639
640`:default-char'
641
642VALUE must be a character. On encoding, a character not supported by
643the coding system is replaced with VALUE.
644
8f924df7
KH
645`:for-unibyte'
646
647VALUE non-nil means that visiting a file with the coding system
648results in a unibyte buffer.
649
c1841772
KH
650`:eol-type'
651
652VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
653EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
12504f57
DL
654EOL (CR). If omitted, on decoding, the coding system detects EOL
655format automatically, and on encoding, uses Unix-like EOL.
c1841772
KH
656
657`:mime-charset'
658
12504f57
DL
659VALUE must be a symbol whose name is that of a MIME charset converted
660to lower case.
c1841772 661
1bfd603c
DL
662`:mime-text-unsuitable'
663
664VALUE non-nil means the `:mime-charset' property names a charset which
1894d108 665is unsuitable for the top-level media type \"text\".
1bfd603c 666
c1841772
KH
667`:flags'
668
12504f57
DL
669VALUE must be a list of symbols that control the ISO-2022 converter.
670Each must be a member of the list `coding-system-iso-2022-flags'
c1841772
KH
671\(which see). This attribute has a meaning only when `:coding-type'
672is `iso-2022'.
673
674`:designation'
675
12504f57 676VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
c1841772
KH
677GN-USAGE specifies the usage of graphic register GN as follows.
678
679If it is nil, no charset can be designated to GN.
680
07513d64 681If it is a charset, the charset is initially designated to GN, and
c1841772
KH
682never used by the other charsets.
683
684If it is a list, the elements must be charsets, nil, 94, or 96. GN
12504f57
DL
685can be used by all the listed charsets. If the list contains 94, any
686iso-2022 charset whose code-space ranges are 94 long can be designated
687to GN. If the list contains 96, any charsets whose whose ranges are
68896 long can be designated to GN. If the first element is a charset,
689that charset is initially designated to GN.
c1841772
KH
690
691This attribute has a meaning only when `:coding-type' is `iso-2022'.
692
693`:bom'
694
12504f57
DL
695This attributes specifies whether the coding system uses a `byte order
696mark'. VALUE must nil, t, or cons of coding systems whose
697`:coding-type' is `utf-16'.
c1841772 698
0ea1a6ca
KH
699If the value is nil, on decoding, don't treat the first two-byte as
700BOM, and on encoding, don't produce BOM bytes.
701
702If the value is t, on decoding, skip the first two-byte as BOM, and on
703encoding, produce BOM bytes accoding to the value of `:endian'.
704
705If the value is cons, on decoding, check the first two-byte. If theyq
706are 0xFE 0xFF, use the car part coding system of the value. If they
707are 0xFF 0xFE, use the car part coding system of the value.
708Otherwise, treat them as bytes for a normal character. On encoding,
709produce BOM bytes accoding to the value of `:endian'.
710
c1841772
KH
711This attribute has a meaning only when `:coding-type' is `utf-16'.
712
713`:endian'
714
0ea1a6ca
KH
715VALUE must be `big' or `little' specifying big-endian and
716little-endian respectively. The default value is `big'.
c1841772
KH
717
718This attribute has a meaning only when `:coding-type' is `utf-16'.
719
720`:ccl-decoder'
721
12504f57
DL
722VALUE is a symbol representing the registered CCL program used for
723decoding. This attribute has a meaning only when `:coding-type' is
724`ccl'.
c1841772
KH
725
726`:ccl-encoder'
727
12504f57
DL
728VALUE is a symbol representing the registered CCL program used for
729encoding. This attribute has a meaning only when `:coding-type' is
730`ccl'."
c1841772
KH
731 (let* ((common-attrs (mapcar 'list
732 '(:mnemonic
733 :coding-type
734 :charset-list
735 :ascii-compatible-p
1a9db556 736 :decode-translation-table
c1841772
KH
737 :encode-translation-table
738 :post-read-conversion
739 :pre-write-conversion
740 :default-char
7e742024 741 :for-unibyte
c1841772
KH
742 :plist
743 :eol-type)))
744 (coding-type (plist-get props :coding-type))
745 (spec-attrs (mapcar 'list
746 (cond ((eq coding-type 'iso-2022)
747 '(:initial
748 :reg-usage
749 :request
750 :flags))
751 ((eq coding-type 'utf-16)
752 '(:bom
753 :endian))
754 ((eq coding-type 'ccl)
755 '(:ccl-decoder
756 :ccl-encoder
757 :valids))))))
758
759 (dolist (slot common-attrs)
760 (setcdr slot (plist-get props (car slot))))
761
762 (dolist (slot spec-attrs)
763 (setcdr slot (plist-get props (car slot))))
764
765 (if (eq coding-type 'iso-2022)
766 (let ((designation (plist-get props :designation))
767 (flags (plist-get props :flags))
768 (initial (make-vector 4 nil))
769 (reg-usage (cons 4 4))
770 request elt)
771 (dotimes (i 4)
772 (setq elt (aref designation i))
773 (cond ((charsetp elt)
774 (aset initial i elt)
775 (setq request (cons (cons elt i) request)))
776 ((consp elt)
777 (aset initial i (car elt))
778 (if (charsetp (car elt))
779 (setq request (cons (cons (car elt) i) request)))
780 (dolist (e (cdr elt))
781 (cond ((charsetp e)
782 (setq request (cons (cons e i) request)))
783 ((eq e 94)
784 (setcar reg-usage i))
785 ((eq e 96)
786 (setcdr reg-usage i))
787 ((eq e t)
788 (setcar reg-usage i)
789 (setcdr reg-usage i)))))))
790 (setcdr (assq :initial spec-attrs) initial)
791 (setcdr (assq :reg-usage spec-attrs) reg-usage)
792 (setcdr (assq :request spec-attrs) request)
793
794 ;; Change :flags value from a list to a bit-mask.
795 (let ((bits 0)
796 (i 0))
797 (dolist (elt coding-system-iso-2022-flags)
798 (if (memq elt flags)
799 (setq bits (logior bits (lsh 1 i))))
800 (setq i (1+ i)))
801 (setcdr (assq :flags spec-attrs) bits))))
802
803 ;; Add :name and :docstring properties to PROPS.
804 (setq props
e1e529fa
DL
805 (cons :name (cons name (cons :docstring (cons (purecopy docstring)
806 props)))))
c1841772 807 (setcdr (assq :plist common-attrs) props)
c1841772
KH
808 (apply 'define-coding-system-internal
809 name (mapcar 'cdr (append common-attrs spec-attrs)))))
4ed46869 810
8057896b 811(defun coding-system-doc-string (coding-system)
0269ddfb 812 "Return the documentation string for CODING-SYSTEM."
c1841772 813 (plist-get (coding-system-plist coding-system) :docstring))
4ed46869 814
4ed46869 815(defun coding-system-mnemonic (coding-system)
0269ddfb 816 "Return the mnemonic character of CODING-SYSTEM.
12504f57
DL
817The mnemonic character of a coding system is used in mode line to
818indicate the coding system. If CODING-SYSTEM. is nil, return ?=."
c1841772 819 (plist-get (coding-system-plist coding-system) :mnemonic))
4ed46869 820
c1841772
KH
821(defun coding-system-type (coding-system)
822 "Return the coding type of CODING-SYSTEM.
823A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
824See the function `define-coding-system' for more detail."
825 (plist-get (coding-system-plist coding-system) :coding-type))
d3675a42 826
c1841772 827(defun coding-system-charset-list (coding-system)
07513d64 828 "Return list of charsets supported by CODING-SYSTEM.
c1841772
KH
829If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
830If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
831 (plist-get (coding-system-plist coding-system) :charset-list))
0269ddfb 832
2f1e746b
KH
833(defun coding-system-category (coding-system)
834 "Return a category symbol of CODING-SYSTEM."
835 (plist-get (coding-system-plist coding-system) :category))
0269ddfb
KH
836
837(defun coding-system-get (coding-system prop)
07513d64
DL
838 "Extract a value from CODING-SYSTEM's property list for property PROP.
839For compatibility with Emacs 20/21, this accepts old-style symbols
840like `mime-charset' as well as the current style like `:mime-charset'."
841 (or (plist-get (coding-system-plist coding-system) prop)
842 (if (not (keywordp prop))
843 (plist-get (coding-system-plist coding-system)
844 (intern (concat ":" (symbol-name prop)))))))
0269ddfb 845
0269ddfb 846(defalias 'coding-system-parent 'coding-system-base)
2598a293 847(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb 848
2e729bfa
JB
849(defun coding-system-eol-type-mnemonic (coding-system)
850 "Return the string indicating end-of-line format of CODING-SYSTEM."
851 (let* ((eol-type (coding-system-eol-type coding-system))
f4f00827 852 (val (cond ((eq eol-type 0) eol-mnemonic-unix)
2e729bfa
JB
853 ((eq eol-type 1) eol-mnemonic-dos)
854 ((eq eol-type 2) eol-mnemonic-mac)
f4f00827 855 (t eol-mnemonic-undecided))))
2e729bfa
JB
856 (if (stringp val)
857 val
858 (char-to-string val))))
859
857ea15c
AS
860(defun coding-system-lessp (x y)
861 (cond ((eq x 'no-conversion) t)
862 ((eq y 'no-conversion) nil)
863 ((eq x 'emacs-mule) t)
864 ((eq y 'emacs-mule) nil)
865 ((eq x 'undecided) t)
866 ((eq y 'undecided) nil)
867 (t (let ((c1 (coding-system-mnemonic x))
868 (c2 (coding-system-mnemonic y)))
869 (or (< (downcase c1) (downcase c2))
870 (and (not (> (downcase c1) (downcase c2)))
871 (< c1 c2)))))))
872
5e2e859a
KH
873(defun coding-system-equal (coding-system-1 coding-system-2)
874 "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical.
875Two coding systems are identical if two symbols are equal
876or one is an alias of the other."
877 (or (eq coding-system-1 coding-system-2)
daff7d74
KH
878 (and (equal (coding-system-plist coding-system-1)
879 (coding-system-plist coding-system-2))
5e2e859a
KH
880 (let ((eol-type-1 (coding-system-eol-type coding-system-1))
881 (eol-type-2 (coding-system-eol-type coding-system-2)))
882 (or (eq eol-type-1 eol-type-2)
883 (and (vectorp eol-type-1) (vectorp eol-type-2)))))))
884
857ea15c 885(defun add-to-coding-system-list (coding-system)
521d4010 886 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
857ea15c
AS
887 (if (or (null coding-system-list)
888 (coding-system-lessp coding-system (car coding-system-list)))
889 (setq coding-system-list (cons coding-system coding-system-list))
890 (let ((len (length coding-system-list))
891 mid (tem coding-system-list))
892 (while (> len 1)
893 (setq mid (nthcdr (/ len 2) tem))
894 (if (coding-system-lessp (car mid) coding-system)
895 (setq tem mid
896 len (- len (/ len 2)))
897 (setq len (/ len 2))))
898 (setcdr tem (cons coding-system (cdr tem))))))
899
80a7463d 900(defun coding-system-list (&optional base-only)
c11a8f77 901 "Return a list of all existing non-subsidiary coding systems.
12504f57
DL
902If optional arg BASE-ONLY is non-nil, only base coding systems are
903listed. The value doesn't include subsidiary coding systems which are
c11a8f77
KH
904made from bases and aliases automatically for various end-of-line
905formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
906 (let* ((codings (copy-sequence coding-system-list))
907 (tail (cons nil codings)))
908 ;; Remove subsidiary coding systems (eol variants) and alias
909 ;; coding systems (if necessary).
910 (while (cdr tail)
911 (let* ((coding (car (cdr tail)))
687441de 912 (aliases (coding-system-aliases coding)))
80a7463d
KH
913 (if (or
914 ;; CODING is an eol variant if not in ALIASES.
915 (not (memq coding aliases))
916 ;; CODING is an alias if it is not car of ALIASES.
917 (and base-only (not (eq coding (car aliases)))))
918 (setcdr tail (cdr (cdr tail)))
919 (setq tail (cdr tail)))))
920 codings))
921
620956ca
KH
922(defconst char-coding-system-table nil
923 "This is an obsolete variable.
924It exists just for backward compatibility, and the value is always nil.")
c11a8f77 925
50c29104
KH
926(defun transform-make-coding-system-args (name type &optional doc-string props)
927 "For internal use only.
928Transform XEmacs style args for `make-coding-system' to Emacs style.
929Value is a list of transformed arguments."
930 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
931 (eol-type (plist-get props 'eol-type))
932 properties tmp)
933 (cond
934 ((eq eol-type 'lf) (setq eol-type 'unix))
935 ((eq eol-type 'crlf) (setq eol-type 'dos))
936 ((eq eol-type 'cr) (setq eol-type 'mac)))
937 (if (setq tmp (plist-get props 'post-read-conversion))
938 (setq properties (plist-put properties 'post-read-conversion tmp)))
939 (if (setq tmp (plist-get props 'pre-write-conversion))
940 (setq properties (plist-put properties 'pre-write-conversion tmp)))
941 (cond
f4a012a6
KH
942 ((eq type 'shift-jis)
943 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
944 ((eq type 'iso2022) ; This is not perfect.
945 (if (plist-get props 'escape-quoted)
946 (error "escape-quoted is not supported: %S"
947 `(,name ,type ,doc-string ,props)))
948 (let ((g0 (plist-get props 'charset-g0))
949 (g1 (plist-get props 'charset-g1))
950 (g2 (plist-get props 'charset-g2))
951 (g3 (plist-get props 'charset-g3))
952 (use-roman
953 (and
954 (eq (cadr (assoc 'latin-jisx0201
955 (plist-get props 'input-charset-conversion)))
956 'ascii)
957 (eq (cadr (assoc 'ascii
958 (plist-get props 'output-charset-conversion)))
959 'latin-jisx0201)))
960 (use-oldjis
961 (and
962 (eq (cadr (assoc 'japanese-jisx0208-1978
963 (plist-get props 'input-charset-conversion)))
964 'japanese-jisx0208)
965 (eq (cadr (assoc 'japanese-jisx0208
966 (plist-get props 'output-charset-conversion)))
967 'japanese-jisx0208-1978))))
968 (if (charsetp g0)
969 (if (plist-get props 'force-g0-on-output)
970 (setq g0 `(nil ,g0))
971 (setq g0 `(,g0 t))))
972 (if (charsetp g1)
973 (if (plist-get props 'force-g1-on-output)
974 (setq g1 `(nil ,g1))
975 (setq g1 `(,g1 t))))
976 (if (charsetp g2)
977 (if (plist-get props 'force-g2-on-output)
978 (setq g2 `(nil ,g2))
979 (setq g2 `(,g2 t))))
980 (if (charsetp g3)
981 (if (plist-get props 'force-g3-on-output)
982 (setq g3 `(nil ,g3))
983 (setq g3 `(,g3 t))))
984 `(,name 2 ,mnemonic ,doc-string
985 (,g0 ,g1 ,g2 ,g3
986 ,(plist-get props 'short)
987 ,(not (plist-get props 'no-ascii-eol))
988 ,(not (plist-get props 'no-ascii-cntl))
989 ,(plist-get props 'seven)
990 t
991 ,(not (plist-get props 'lock-shift))
992 ,use-roman
993 ,use-oldjis
994 ,(plist-get props 'no-iso6429)
995 nil nil nil nil)
996 ,properties ,eol-type)))
997 ((eq type 'big5)
998 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
50c29104 999 ((eq type 'ccl)
f4a012a6 1000 `(,name 4 ,mnemonic ,doc-string
50c29104 1001 (,(plist-get props 'decode) . ,(plist-get props 'encode))
f4a012a6 1002 ,properties ,eol-type))
50c29104 1003 (t
f4a012a6 1004 (error "unsupported XEmacs style make-coding-style arguments: %S"
50c29104
KH
1005 `(,name ,type ,doc-string ,props))))))
1006
8057896b 1007(defun make-coding-system (coding-system type mnemonic doc-string
1b46a680
KH
1008 &optional
1009 flags
1010 properties
1011 eol-type)
3bb1accb 1012 "Define a new coding system CODING-SYSTEM (symbol).
8f924df7
KH
1013This function is provided for backward compatibility.
1014Use `define-coding-system' instead."
50c29104 1015 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
c3d0ee51
EZ
1016 ;; is a symbol, perhaps, this function is called with XEmacs-style
1017 ;; arguments. Here, try to transform that kind of arguments to
50c29104
KH
1018 ;; Emacs style.
1019 (if (symbolp type)
1020 (let ((args (transform-make-coding-system-args coding-system type
1021 mnemonic doc-string)))
1022 (setq coding-system (car args)
1053cc93 1023 type (nth 1 args)
50c29104
KH
1024 mnemonic (nth 2 args)
1025 doc-string (nth 3 args)
1026 flags (nth 4 args)
1027 properties (nth 5 args)
1028 eol-type (nth 6 args))))
1029
8f924df7
KH
1030 (setq type
1031 (cond ((eq type 0) 'emacs-mule)
1032 ((eq type 1) 'shift-jis)
1033 ((eq type 2) 'iso2022)
1034 ((eq type 3) 'big5)
1035 ((eq type 4) 'ccl)
1036 ((eq type 5) 'raw-text)
1b46a680 1037 (t
8f924df7
KH
1038 (error "Invalid coding system type: %s" type))))
1039
1040 (setq properties
1041 (let ((plist nil) key)
1042 (dolist (elt properties)
1043 (setq key (car elt))
1044 (cond ((eq key 'post-read-conversion)
1045 (setq key :post-read-conversion))
1046 ((eq key 'pre-write-conversion)
1047 (setq key :pre-write-conversion))
1048 ((eq key 'translation-table-for-decode)
1049 (setq key :decode-translation-table))
1050 ((eq key 'translation-table-for-encode)
1051 (setq key :encode-translation-table))
1052 ((eq key 'safe-charsets)
1053 (setq key :charset-list))
1054 ((eq key 'mime-charset)
1055 (setq key :mime-charset))
1056 ((eq key 'valid-codes)
1057 (setq key :valids)))
1058 (setq plist (plist-put plist key (cdr elt))))
1059 plist))
28380f17 1060 (setq properties (plist-put properties :mnemonic mnemonic))
8f924df7
KH
1061 (plist-put properties :coding-type type)
1062 (cond ((eq eol-type 0) (setq eol-type 'unix))
1063 ((eq eol-type 1) (setq eol-type 'dos))
1064 ((eq eol-type 2) (setq eol-type 'mac))
1065 ((vectorp eol-type) (setq eol-type nil)))
1066 (plist-put properties :eol-type eol-type)
1067
1068 (cond
1069 ((eq type 'iso2022)
1070 (plist-put properties :flags
1071 (list (and (or (consp (nth 0 flags))
1072 (consp (nth 1 flags))
1073 (consp (nth 2 flags))
1074 (consp (nth 3 flags))) 'designation)
1075 (or (nth 4 flags) 'long-form)
1076 (and (nth 5 flags) 'ascii-at-eol)
1077 (and (nth 6 flags) 'ascii-at-cntl)
1078 (and (nth 7 flags) '7-bit)
1079 (and (nth 8 flags) 'locking-shift)
1080 (and (nth 9 flags) 'single-shift)
1081 (and (nth 10 flags) 'use-roman)
1082 (and (nth 11 flags) 'use-oldjis)
1083 (or (nth 12 flags) 'direction)
1084 (and (nth 13 flags) 'init-at-bol)
1085 (and (nth 14 flags) 'designate-at-bol)
1086 (and (nth 15 flags) 'safe)
1087 (and (nth 16 flags) 'latin-extra)))
1088 (plist-put properties :designation
1089 (let ((vec (make-vector 4 nil)))
1090 (dotimes (i 4)
1091 (let ((spec (nth i flags)))
1092 (if (eq spec t)
1093 (aset vec i '(94 96))
1094 (if (consp spec)
1095 (progn
1096 (if (memq t spec)
1097 (setq spec (append (delq t spec) '(94 96))))
1098 (aset vec i spec))))))
1099 vec)))
1100
1101 ((eq type 'ccl)
1102 (plist-put properties :ccl-decoder (car flags))
1103 (plist-put properties :ccl-encoder (cdr flags))))
1104
1105 (apply 'define-coding-system coding-system doc-string properties))
4ed46869 1106
bbdea948
RS
1107(defun merge-coding-systems (first second)
1108 "Fill in any unspecified aspects of coding system FIRST from SECOND.
1109Return the resulting coding system."
1110 (let ((base (coding-system-base second))
1111 (eol (coding-system-eol-type second)))
1112 ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1113 (if (eq (coding-system-base first) 'undecided)
1114 (setq first (coding-system-change-text-conversion first base)))
1115 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1116 (if (and (vectorp (coding-system-eol-type first))
1117 (numberp eol) (>= eol 0) (<= eol 2))
1118 (setq first (coding-system-change-eol-conversion
1119 first eol)))
1120 first))
1121
4ed46869 1122(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
1123 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1124This means that when you save the buffer, it will be converted
1125according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1126use \\[list-coding-systems].
1127
bbdea948
RS
1128If CODING-SYSTEM leaves the text conversion unspecified, or if it
1129leaves the end-of-line conversion unspecified, FORCE controls what to
1130do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1131buffer's previous `buffer-file-coding-system' value (if it is
6b61353c 1132specified there). Otherwise, leave it unspecified.
aeef8f07
KH
1133
1134This marks the buffer modified so that the succeeding \\[save-buffer]
1135surely saves the buffer with CODING-SYSTEM. From a program, if you
1136don't want to mark the buffer modified, just set the variable
1137`buffer-file-coding-system' directly."
bbdea948 1138 (interactive "zCoding system for saving file (default, nil): \nP")
4ed46869 1139 (check-coding-system coding-system)
36d455c4 1140 (if (and coding-system buffer-file-coding-system (null force))
bbdea948
RS
1141 (setq coding-system
1142 (merge-coding-systems coding-system buffer-file-coding-system)))
4ed46869 1143 (setq buffer-file-coding-system coding-system)
38a1356d
RS
1144 ;; This is in case of an explicit call. Normally, `normal-mode' and
1145 ;; `set-buffer-major-mode-hook' take care of setting the table.
1146 (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
1147 (ucs-set-table-for-input))
4ed46869
KH
1148 (set-buffer-modified-p t)
1149 (force-mode-line-update))
1150
bbdea948
RS
1151(defun revert-buffer-with-coding-system (coding-system &optional force)
1152 "Visit the current buffer's file again using coding system CODING-SYSTEM.
1153For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1154
1155If CODING-SYSTEM leaves the text conversion unspecified, or if it
1156leaves the end-of-line conversion unspecified, FORCE controls what to
1157do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1158buffer's previous `buffer-file-coding-system' value (if it is
1159specified there). Otherwise, determine it from the file contents as
1160usual for visiting a file."
1161 (interactive "zCoding system for visited file (default, nil): \nP")
1162 (check-coding-system coding-system)
1163 (if (and coding-system buffer-file-coding-system (null force))
1164 (setq coding-system
1165 (merge-coding-systems coding-system buffer-file-coding-system)))
1166 (let ((coding-system-for-read coding-system))
1167 (revert-buffer)))
1168
701414e3
KH
1169(defun set-file-name-coding-system (coding-system)
1170 "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1171It actually just set the variable `file-name-coding-system' (which
1172see) to CODING-SYSTEM."
1173 (interactive "zCoding system for file names (default, nil): ")
1174 (check-coding-system coding-system)
1175 (setq file-name-coding-system coding-system))
1176
358d28fb
RS
1177(defvar default-terminal-coding-system nil
1178 "Default value for the terminal coding system.
1179This is normally set according to the selected language environment.
1180See also the command `set-terminal-coding-system'.")
1181
df100398
KH
1182(defun set-terminal-coding-system (coding-system)
1183 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
1184All text output to the terminal will be encoded
1185with the specified coding system.
1186For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1187The default is determined by the selected language environment
1188or by the previous use of this command."
1189 (interactive
2e02a76f
RS
1190 (list (let ((default (if (and (not (terminal-coding-system))
1191 default-terminal-coding-system)
1192 default-terminal-coding-system)))
1193 (read-coding-system
1194 (format "Coding system for terminal display (default, %s): "
1195 default)
1196 default))))
358d28fb
RS
1197 (if (and (not coding-system)
1198 (not (terminal-coding-system)))
1199 (setq coding-system default-terminal-coding-system))
1200 (if coding-system
521d4010 1201 (setq default-terminal-coding-system coding-system))
df100398
KH
1202 (set-terminal-coding-system-internal coding-system)
1203 (redraw-frame (selected-frame)))
1204
358d28fb
RS
1205(defvar default-keyboard-coding-system nil
1206 "Default value of the keyboard coding system.
1207This is normally set according to the selected language environment.
1208See also the command `set-keyboard-coding-system'.")
1209
df100398 1210(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
1211 "Set coding system for keyboard input to CODING-SYSTEM.
1212In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
1213\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1214`encoded-kbd-mode'.)
358d28fb
RS
1215For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1216The default is determined by the selected language environment
1217or by the previous use of this command."
1218 (interactive
2e02a76f
RS
1219 (list (let ((default (if (and (not (keyboard-coding-system))
1220 default-keyboard-coding-system)
1221 default-keyboard-coding-system)))
1222 (read-coding-system
1223 (format "Coding system for keyboard input (default, %s): "
1224 default)
1225 default))))
358d28fb
RS
1226 (if (and (not coding-system)
1227 (not (keyboard-coding-system)))
1228 (setq coding-system default-keyboard-coding-system))
1229 (if coding-system
1230 (setq default-keyboard-coding-system coding-system))
df100398 1231 (set-keyboard-coding-system-internal coding-system)
b23bad0b 1232 (setq keyboard-coding-system coding-system)
df100398
KH
1233 (encoded-kbd-mode (if coding-system 1 0)))
1234
6d34f495
DL
1235(defcustom keyboard-coding-system nil
1236 "Specify coding system for keyboard input.
1237If you set this on a terminal which can't distinguish Meta keys from
12388-bit characters, you will have to use ESC to type Meta characters.
1239See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1240
237e5993
DL
1241On non-windowing terminals, this is set from the locale by default.
1242
6d34f495 1243Setting this variable directly does not take effect;
6b61353c 1244use either \\[customize] or \\[set-keyboard-coding-system]."
6d34f495
DL
1245 :type '(coding-system :tag "Coding system")
1246 :link '(info-link "(emacs)Specify Coding")
1247 :link '(info-link "(emacs)Single-Byte Character Support")
1248 :set (lambda (symbol value)
1249 ;; Don't load encoded-kbd-mode unnecessarily.
1250 (if (or value (boundp 'encoded-kbd-mode))
1251 (set-keyboard-coding-system value)
1252 (set-default 'keyboard-coding-system nil))) ; must initialize
237e5993 1253 :version "21.4"
6d34f495
DL
1254 :group 'keyboard
1255 :group 'mule)
1256
df100398 1257(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 1258 "Set coding systems for the process associated with the current buffer.
df100398 1259DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
1260ENCODING is the coding system to be used to encode output to the process.
1261
1262For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 1263 (interactive
83911021 1264 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
1265 (let ((proc (get-buffer-process (current-buffer))))
1266 (if (null proc)
521d4010 1267 (error "No process")
df100398
KH
1268 (check-coding-system decoding)
1269 (check-coding-system encoding)
1270 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
1271 (force-mode-line-update))
1272
d0b99881
RS
1273(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1274
14915c37 1275(defun set-selection-coding-system (coding-system)
8c52d564 1276 "Make CODING-SYSTEM used for communicating with other X clients.
b25eef20
KH
1277When sending or receiving text via cut_buffer, selection, and clipboard,
1278the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 1279 (interactive "zCoding system for X selection: ")
b25eef20 1280 (check-coding-system coding-system)
14915c37 1281 (setq selection-coding-system coding-system))
b25eef20 1282
e8dd0160 1283;; Coding system lastly specified by the command
a03b3ce1
KH
1284;; set-next-selection-coding-system.
1285(defvar last-next-selection-coding-system nil)
1286
1287(defun set-next-selection-coding-system (coding-system)
12504f57 1288 "Use CODING-SYSTEM for next communication with other window system clients.
a03b3ce1
KH
1289This setting is effective for the next communication only."
1290 (interactive
1291 (list (read-coding-system
1292 (if last-next-selection-coding-system
12504f57 1293 (format "Coding system for the next selection (default, %S): "
a03b3ce1 1294 last-next-selection-coding-system)
12504f57 1295 "Coding system for the next selection: ")
a03b3ce1
KH
1296 last-next-selection-coding-system)))
1297 (if coding-system
1298 (setq last-next-selection-coding-system coding-system)
1299 (setq coding-system last-next-selection-coding-system))
1300 (check-coding-system coding-system)
1301
1302 (setq next-selection-coding-system coding-system))
1303
4ed46869 1304(defun set-coding-priority (arg)
521d4010 1305 "Set priority of coding categories according to ARG.
c1841772
KH
1306ARG is a list of coding categories ordered by priority.
1307
1308This function is provided for backward compatibility.
1309Now we have more convenient function `set-coding-system-priority'."
5d75f46f
KH
1310 (apply 'set-coding-system-priority
1311 (mapcar #'(lambda (x) (symbol-value x)) arg)))
07513d64 1312(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
4ed46869 1313
835cbadb
EZ
1314;;; X selections
1315
cc926903 1316(defvar ctext-non-standard-encodings-alist
6b61353c
KH
1317 '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
1318 ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
1319 ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
1320 "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
1321
1322It controls how extended segments of a compound text are handled
1323by the coding system `compound-text-with-extensions'.
1324
1325Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
1326
1327ENCODING-NAME is an encoding name of an \"extended segments\".
1328
1329CODING-SYSTEM is the coding-system to encode (or decode) the
1330characters into (or from) the extended segment.
1331
1332N-OCTET is the number of octets (bytes) that encodes a character
1333in the segment. It can be 0 (meaning the number of octets per
1334character is variable), 1, 2, 3, or 4.
1335
1336CHARSET is a charater set containing characters that are encoded
1337in the segment. It can be a list of character sets. It can also
1338be a char-table, in which case characters that have non-nil value
1339in the char-table are the target.
1340
1341On decoding CTEXT, all encoding names listed here are recognized.
1342
1343On encoding CTEXT, encoding names in the variable
1344`ctext-non-standard-encodings' (which see) and in the information
1345listed for the current language environment under the key
1346`ctext-non-standard-encodings' are used.")
1347
1348(defvar ctext-non-standard-encodings
1349 '("big5-0")
1350 "List of non-standard encoding names used in extended segments of CTEXT.
1351Each element must be one of the names listed in the variable
1352`ctext-non-standard-encodings-alist' (which see).")
cc926903
KH
1353
1354(defvar ctext-non-standard-encodings-regexp
1355 (string-to-multibyte
1356 (concat
1357 ;; For non-standard encodings.
1358 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1359 "\\|"
1360 ;; For UTF-8 encoding.
1361 "\\(\e%G[^\e]*\e%@\\)")))
835cbadb
EZ
1362
1363;; Functions to support "Non-Standard Character Set Encodings" defined
6b61353c
KH
1364;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding"
1365;; described in the section 7 of the documentation of COMPOUND-TEXT
1366;; distributed with XFree86.
5c88a01e 1367
835cbadb
EZ
1368(defun ctext-post-read-conversion (len)
1369 "Decode LEN characters encoded as Compound Text with Extended Segments."
1894d108
KH
1370 ;; We don't need the following because it is expected that this
1371 ;; function is mainly used for decoding X selection which is not
1372 ;; that big data.
1373 ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
835cbadb 1374 (save-match-data
cc926903 1375 (save-restriction
1894d108 1376 (narrow-to-region (point) (+ (point) len))
cc926903 1377 (let ((case-fold-search nil)
cc926903
KH
1378 last-coding-system-used
1379 pos bytes)
cc926903 1380 (decode-coding-region (point-min) (point-max) 'ctext)
cc926903
KH
1381 (while (re-search-forward ctext-non-standard-encodings-regexp
1382 nil 'move)
1383 (setq pos (match-beginning 0))
1384 (if (match-beginning 1)
1385 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1386 (let* ((M (char-after (+ pos 4)))
1387 (L (char-after (+ pos 5)))
1388 (encoding (match-string 2))
9857367f 1389 (encoding-info (assoc-string
6b61353c 1390 encoding
c12bc1fb 1391 ctext-non-standard-encodings-alist t))
6b61353c
KH
1392 (coding (if encoding-info
1393 (nth 1 encoding-info)
1394 (setq encoding (intern (downcase encoding)))
1395 (and (coding-system-p encoding)
1396 encoding))))
cc926903
KH
1397 (setq bytes (- (+ (* (- M 128) 128) (- L 128))
1398 (- (point) (+ pos 6))))
1399 (when coding
1400 (delete-region pos (point))
1401 (forward-char bytes)
1402 (decode-coding-region (- (point) bytes) (point) coding)))
1403 ;; ESC % G --UTF-8-BYTES-- ESC % @
6b61353c
KH
1404 (delete-char -3)
1405 (delete-region pos (+ pos 3))
1406 (decode-coding-region pos (point) 'utf-8))))
cc926903
KH
1407 (goto-char (point-min))
1408 (- (point-max) (point)))))
835cbadb 1409
6b61353c
KH
1410;; Return a char table of extended segment usage for each character.
1411;; Each value of the char table is nil, one of the elements of
1412;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
1413
1414(defun ctext-non-standard-encodings-table ()
1415 (let ((table (make-char-table 'translation-table)))
1416 (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
1417 (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
1418 (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
1419 (dolist (encoding (reverse
1420 (append
1421 (get-language-info current-language-environment
1422 'ctext-non-standard-encodings)
1423 ctext-non-standard-encodings)))
1424 (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
1425 (charset (nth 3 slot)))
1426 (if charset
1427 (cond ((charsetp charset)
1428 (aset table (make-char charset) slot))
1429 ((listp charset)
1430 (dolist (elt charset)
1431 (aset table (make-char elt) slot)))
1432 ((char-table-p charset)
9857367f 1433 (map-char-table #'(lambda (k v)
6b61353c
KH
1434 (if (and v (> k 128)) (aset table k slot)))
1435 charset))))))
1436 table))
835cbadb
EZ
1437
1438(defun ctext-pre-write-conversion (from to)
5dde3c71
EZ
1439 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1440
1441If FROM is a string, or if the current buffer is not the one set up for us
cc926903 1442by encode-coding-string, generate a new temp buffer, insert the
5dde3c71 1443text, and convert it in the temporary buffer. Otherwise, convert in-place."
835cbadb 1444 (save-match-data
cc926903 1445 ;; Setup a working buffer if necessary.
f1beb0e0
KH
1446 (when (stringp from)
1447 (set-buffer (generate-new-buffer " *temp"))
1448 (set-buffer-multibyte (multibyte-string-p from))
1449 (insert from))
cc926903
KH
1450
1451 ;; Now we can encode the whole buffer.
6b61353c 1452 (let ((encoding-table (ctext-non-standard-encodings-table))
cc926903 1453 last-coding-system-used
6b61353c
KH
1454 last-pos last-encoding-info
1455 encoding-info end-pos)
1456 (goto-char (setq last-pos (point-min)))
1457 (setq end-pos (point-marker))
1458 (while (re-search-forward "[^\000-\177]+" nil t)
1459 ;; Found a sequence of non-ASCII characters.
1460 (setq last-pos (match-beginning 0)
1461 last-encoding-info (aref encoding-table (char-after last-pos)))
1462 (set-marker end-pos (match-end 0))
1463 (goto-char (1+ last-pos))
1464 (catch 'tag
1465 (while t
1466 (setq encoding-info
1467 (if (< (point) end-pos)
1468 (aref encoding-table (following-char))))
1469 (unless (eq last-encoding-info encoding-info)
1470 (cond ((consp last-encoding-info)
1471 ;; Encode the previous range using an extended
1472 ;; segment.
1473 (let ((encoding-name (car last-encoding-info))
1474 (coding-system (nth 1 last-encoding-info))
1475 (noctets (nth 2 last-encoding-info))
1476 len)
1477 (encode-coding-region last-pos (point) coding-system)
1478 (setq len (+ (length encoding-name) 1
1479 (- (point) last-pos)))
1480 (save-excursion
1481 (goto-char last-pos)
9857367f 1482 (insert (string-to-multibyte
6b61353c
KH
1483 (format "\e%%/%d%c%c%s\ 2"
1484 noctets
1485 (+ (/ len 128) 128)
1486 (+ (% len 128) 128)
1487 encoding-name))))))
1488 ((eq last-encoding-info 'utf-8)
1489 ;; Encode the previous range using UTF-8 encoding
1490 ;; extention.
1491 (encode-coding-region last-pos (point) 'mule-utf-8)
1492 (save-excursion
1493 (goto-char last-pos)
1494 (insert "\e%G"))
1495 (insert "\e%@")))
1496 (setq last-pos (point)
1497 last-encoding-info encoding-info))
1498 (if (< (point) end-pos)
1499 (forward-char 1)
1500 (throw 'tag nil)))))
1501 (set-marker end-pos nil)
cc926903 1502 (goto-char (point-min))))
5dde3c71 1503 ;; Must return nil, as build_annotations_2 expects that.
835cbadb
EZ
1504 nil)
1505
4ed46869
KH
1506;;; FILE I/O
1507
e76938e7 1508(defcustom auto-coding-alist
8f924df7
KH
1509 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte)
1510 ("\\.tgz\\'" . no-conversion)
6caef2da 1511 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
45885400 1512 ("/#[^/]+#\\'" . emacs-mule))
835f49b8
KH
1513 "Alist of filename patterns vs corresponding coding systems.
1514Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1515A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1516
7fed493a
RS
1517The settings in this alist take priority over `coding:' tags
1518in the file (see the function `set-auto-coding')
e76938e7
DL
1519and the contents of `file-coding-system-alist'."
1520 :group 'files
1521 :group 'mule
1522 :type '(repeat (cons (regexp :tag "File name regexp")
1523 (symbol :tag "Coding system"))))
835f49b8 1524
502522b2 1525(defcustom auto-coding-regexp-alist
4e4a5bca
DL
1526 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
1527 ("\\`;ELC\14\0\0\0" . emacs-mule)) ; Emacs 20-compiled
502522b2
GM
1528 "Alist of patterns vs corresponding coding systems.
1529Each element looks like (REGEXP . CODING-SYSTEM).
1530A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1531
1532The settings in this alist take priority over `coding:' tags
1533in the file (see the function `set-auto-coding')
1534and the contents of `file-coding-system-alist'."
1535 :group 'files
1536 :group 'mule
1537 :type '(repeat (cons (regexp :tag "Regexp")
1538 (symbol :tag "Coding system"))))
1539
d9f6dfe6 1540;; See the bottom of this file for built-in auto coding functions.
447404a3
CW
1541(defcustom auto-coding-functions '(sgml-xml-auto-coding-function
1542 sgml-html-meta-auto-coding-function)
d9f6dfe6
CW
1543 "A list of functions which attempt to determine a coding system.
1544
66643502
RS
1545Each function in this list should be written to operate on the
1546current buffer, but should not modify it in any way. The buffer
1547will contain undecoded text of parts of the file. Each function
1548should take one argument, SIZE, which says how many
1549characters (starting from point) it should look at.
1550
1551If one of these functions succeeds in determining a coding
1552system, it should return that coding system. Otherwise, it
1553should return nil.
1554
1555If a file has a `coding:' tag, that takes precedence over these
1556functions, so they won't be called at all."
d9f6dfe6
CW
1557 :group 'files
1558 :group 'mule
1559 :type '(repeat function))
1560
1c4cc63a
KH
1561(defvar set-auto-coding-for-load nil
1562 "Non-nil means look for `load-coding' property instead of `coding'.
1563This is used for loading and byte-compiling Emacs Lisp files.")
1564
8a592131
RS
1565(defun auto-coding-alist-lookup (filename)
1566 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1567 (let ((alist auto-coding-alist)
c60ee5e7 1568 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin)))
8a592131
RS
1569 coding-system)
1570 (while (and alist (not coding-system))
1571 (if (string-match (car (car alist)) filename)
1572 (setq coding-system (cdr (car alist)))
1573 (setq alist (cdr alist))))
1574 coding-system))
1575
835f49b8
KH
1576(defun set-auto-coding (filename size)
1577 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1578These bytes should include at least the first 1k of the file
1579and the last 3k of the file, but the middle may be omitted.
63561304 1580
d21ba5e0
DL
1581The function checks FILENAME against the variable `auto-coding-alist'.
1582If FILENAME doesn't match any entries in the variable, it checks the
502522b2 1583contents of the current buffer following point against
447404a3 1584`auto-coding-regexp-alist'. If no match is found, it checks for a
502522b2 1585`coding:' tag in the first one or two lines following point. If no
d21ba5e0 1586`coding:' tag is found, it checks any local variables list in the last
447404a3 15873K bytes out of the SIZE bytes. Finally, if none of these methods
d21ba5e0
DL
1588succeed, it checks to see if any function in `auto-coding-functions'
1589gives a match.
63561304 1590
d21ba5e0
DL
1591The return value is the specified coding system, or nil if nothing is
1592specified.
87aba788 1593
ba74e833 1594The variable `set-auto-coding-function' (which see) is set to this
87aba788 1595function by default."
502522b2 1596 (or (auto-coding-alist-lookup filename)
447404a3
CW
1597 ;; Try using `auto-coding-regexp-alist'.
1598 (save-excursion
1599 (let ((alist auto-coding-regexp-alist)
1600 coding-system)
1601 (while (and alist (not coding-system))
1602 (let ((regexp (car (car alist))))
1603 (when (re-search-forward regexp (+ (point) size) t)
1604 (setq coding-system (cdr (car alist)))))
0bca779a 1605 (setq alist (cdr alist)))
447404a3 1606 coding-system))
502522b2
GM
1607 (let* ((case-fold-search t)
1608 (head-start (point))
1609 (head-end (+ head-start (min size 1024)))
1610 (tail-start (+ head-start (max (- size 3072) 0)))
1611 (tail-end (+ head-start size))
1612 coding-system head-found tail-found pos)
1613 ;; Try a short cut by searching for the string "coding:"
1614 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1615 (setq head-found (or (search-forward "coding:" head-end t)
1616 (search-forward "unibyte:" head-end t)))
1617 (if (and head-found (> head-found tail-start))
1618 ;; Head and tail are overlapped.
1619 (setq tail-found head-found)
1620 (goto-char tail-start)
1621 (setq tail-found (or (search-forward "coding:" tail-end t)
1622 (search-forward "unibyte:" tail-end t))))
1623
1624 ;; At first check the head.
1625 (when head-found
1626 (goto-char head-start)
6b66d028
RS
1627 (setq head-end (set-auto-mode-1))
1628 (setq head-start (point))
1d8e9a7c 1629 (when (and head-end (< head-found head-end))
835f49b8 1630 (goto-char head-start)
502522b2
GM
1631 (when (and set-auto-coding-for-load
1632 (re-search-forward
6b66d028 1633 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1634 head-end t))
1635 (setq coding-system 'raw-text))
1636 (when (and (not coding-system)
1637 (re-search-forward
6b66d028 1638 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1639 head-end t))
1640 (setq coding-system (intern (match-string 2)))
1641 (or (coding-system-p coding-system)
1642 (setq coding-system nil)))))
1643
1644 ;; If no coding: tag in the head, check the tail.
6b61353c
KH
1645 ;; Here we must pay attention to the case that the end-of-line
1646 ;; is just "\r" and we can't use "^" nor "$" in regexp.
502522b2
GM
1647 (when (and tail-found (not coding-system))
1648 (goto-char tail-start)
6b61353c 1649 (re-search-forward "[\r\n]\^L" nil t)
502522b2 1650 (if (re-search-forward
9857367f 1651 "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
6b61353c
KH
1652 tail-end t)
1653 ;; The prefix is what comes before "local variables:" in its
1654 ;; line. The suffix is what comes after "local variables:"
502522b2
GM
1655 ;; in its line.
1656 (let* ((prefix (regexp-quote (match-string 1)))
1657 (suffix (regexp-quote (match-string 2)))
1658 (re-coding
1659 (concat
6b61353c 1660 "[\r\n]" prefix
cfe98f50
GM
1661 ;; N.B. without the \n below, the regexp can
1662 ;; eat newlines.
6b61353c
KH
1663 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
1664 suffix "[\r\n]"))
502522b2
GM
1665 (re-unibyte
1666 (concat
6b61353c
KH
1667 "[\r\n]" prefix
1668 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
1669 suffix "[\r\n]"))
502522b2 1670 (re-end
9857367f 1671 (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
6b61353c
KH
1672 "[\r\n]?"))
1673 (pos (1- (point))))
1674 (forward-char -1) ; skip back \r or \n.
502522b2
GM
1675 (re-search-forward re-end tail-end 'move)
1676 (setq tail-end (point))
1677 (goto-char pos)
1678 (when (and set-auto-coding-for-load
1679 (re-search-forward re-unibyte tail-end t))
1680 (setq coding-system 'raw-text))
1681 (when (and (not coding-system)
1682 (re-search-forward re-coding tail-end t))
1683 (setq coding-system (intern (match-string 1)))
1684 (or (coding-system-p coding-system)
1685 (setq coding-system nil))))))
447404a3
CW
1686 coding-system)
1687 ;; Finally, try all the `auto-coding-functions'.
1688 (let ((funcs auto-coding-functions)
1689 (coding-system nil))
1690 (while (and funcs (not coding-system))
1691 (setq coding-system (condition-case e
1692 (save-excursion
1693 (goto-char (point-min))
1694 (funcall (pop funcs) size))
1695 (error nil))))
502522b2 1696 coding-system)))
63561304
KH
1697
1698(setq set-auto-coding-function 'set-auto-coding)
87aba788 1699
c36b37ed 1700(defun after-insert-file-set-coding (inserted)
872a0a6f
RS
1701 "Set `buffer-file-coding-system' of current buffer after text is inserted.
1702INSERTED is the number of characters that were inserted, as figured
1703in the situation before this function. Return the number of characters
1704inserted, as figured in the situation after. The two numbers can be
1705different if the buffer has become unibyte."
4ed46869
KH
1706 (if last-coding-system-used
1707 (let ((coding-system
1708 (find-new-buffer-file-coding-system last-coding-system-used))
1709 (modified-p (buffer-modified-p)))
0269ddfb 1710 (when coding-system
71983219 1711 (set-buffer-file-coding-system coding-system t)
0269ddfb 1712 (set-buffer-modified-p modified-p))))
d0c26c63 1713 inserted)
4ed46869 1714
8057896b 1715;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1716;; independently in the following order.
1717;; 1. That of buffer-file-coding-system locally bound.
1718;; 2. That of CODING.
1719
1720(defun find-new-buffer-file-coding-system (coding)
1721 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1722The local variable `buffer-file-coding-system' of the current buffer
1723is set to the returned value.
509064c5 1724Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1725 (let (local-coding local-eol
b685f8d6 1726 found-coding found-eol
4ed46869
KH
1727 new-coding new-eol)
1728 (if (null coding)
1729 ;; Nothing found about coding.
1730 nil
1731
b685f8d6
RS
1732 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1733 ;; and LOCAL-CODING.
1734 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1735 (if (null (numberp local-eol))
1736 ;; But eol-type is not yet set.
1737 (setq local-eol nil))
0269ddfb 1738 (if (and buffer-file-coding-system
c1841772
KH
1739 (not (eq (coding-system-type buffer-file-coding-system)
1740 'undecided)))
0269ddfb 1741 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1742
1743 (if (and (local-variable-p 'buffer-file-coding-system)
1744 local-eol local-coding)
4ed46869
KH
1745 ;; The current buffer has already set full coding-system, we
1746 ;; had better not change it.
1747 nil
1748
8057896b 1749 (setq found-eol (coding-system-eol-type coding))
4ed46869 1750 (if (null (numberp found-eol))
be02cd54
EZ
1751 ;; But eol-type is not found.
1752 ;; If EOL conversions are inhibited, force unix eol-type.
1753 (setq found-eol (if inhibit-eol-conversion 0)))
c1841772 1754 (setq found-coding (coding-system-base coding))
c76b5c99
KH
1755
1756 (if (and (not found-eol) (eq found-coding 'undecided))
1757 ;; No valid coding information found.
1758 nil
1759
1760 ;; Some coding information (eol or text) found.
1761
1762 ;; The local setting takes precedence over the found one.
1763 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1764 (or local-coding found-coding)
1765 (or found-coding local-coding)))
1766 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1767 (or local-eol found-eol)
1768 (or found-eol local-eol)))
1769
1770 (let ((eol-type (coding-system-eol-type new-coding)))
1771 (if (and (numberp new-eol) (vectorp eol-type))
1772 (aref eol-type new-eol)
1773 new-coding)))))))
4ed46869 1774
fe831d33
GV
1775(defun modify-coding-system-alist (target-type regexp coding-system)
1776 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1777There are three of such tables, `file-coding-system-alist',
1778`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1779
1780TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1781If it is `file', it affects `file-coding-system-alist' (which see).
1782If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1783If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1784
1785REGEXP is a regular expression matching a target of I/O operation.
1786The target is a file name if TARGET-TYPE is `file', a program name if
1787TARGET-TYPE is `process', or a network service name or a port number
1788to connect to if TARGET-TYPE is `network'.
1789
1790CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1791operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1792for decoding and encoding respectively,
1793or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1794 (or (memq target-type '(file process network))
1795 (error "Invalid target type: %s" target-type))
1796 (or (stringp regexp)
1797 (and (eq target-type 'network) (integerp regexp))
1798 (error "Invalid regular expression: %s" regexp))
1799 (if (symbolp coding-system)
1800 (if (not (fboundp coding-system))
1801 (progn
1802 (check-coding-system coding-system)
1803 (setq coding-system (cons coding-system coding-system))))
1804 (check-coding-system (car coding-system))
1805 (check-coding-system (cdr coding-system)))
1806 (cond ((eq target-type 'file)
1807 (let ((slot (assoc regexp file-coding-system-alist)))
1808 (if slot
1809 (setcdr slot coding-system)
1810 (setq file-coding-system-alist
1811 (cons (cons regexp coding-system)
1812 file-coding-system-alist)))))
1813 ((eq target-type 'process)
1814 (let ((slot (assoc regexp process-coding-system-alist)))
1815 (if slot
1816 (setcdr slot coding-system)
1817 (setq process-coding-system-alist
1818 (cons (cons regexp coding-system)
1819 process-coding-system-alist)))))
1820 (t
1821 (let ((slot (assoc regexp network-coding-system-alist)))
1822 (if slot
1823 (setcdr slot coding-system)
1824 (setq network-coding-system-alist
1825 (cons (cons regexp coding-system)
1826 network-coding-system-alist)))))))
1827
db046b7d
KH
1828(defun decode-coding-inserted-region (from to filename
1829 &optional visit beg end replace)
f29387e8
KH
1830 "Decode the region between FROM and TO as if it is read from file FILENAME.
1831Optional arguments VISIT, BEG, END, and REPLACE are the same as those
1832of the function `insert-file-contents'."
1833 (save-excursion
1834 (save-restriction
1835 (narrow-to-region from to)
1836 (goto-char (point-min))
1837 (let ((coding coding-system-for-read))
1838 (or coding
1839 (setq coding (funcall set-auto-coding-function
1840 filename (- (point-max) (point-min)))))
1841 (or coding
6b61353c
KH
1842 (setq coding (car (find-operation-coding-system
1843 'insert-file-contents
1844 filename visit beg end replace))))
f29387e8
KH
1845 (if (coding-system-p coding)
1846 (or enable-multibyte-characters
1847 (setq coding
1848 (coding-system-change-text-conversion coding 'raw-text)))
1849 (setq coding nil))
1850 (if coding
b12e19b2
KH
1851 (decode-coding-region (point-min) (point-max) coding)
1852 (setq last-coding-system-used coding))))))
f29387e8 1853
b25eef20 1854(defun make-translation-table (&rest args)
a284eea3 1855 "Make a translation table from arguments.
d38b07f9 1856A translation table is a char table intended for character
a284eea3
DL
1857translation in CCL programs.
1858
d38b07f9 1859Each argument is a list of elements of the form (FROM . TO), where FROM
a284eea3 1860is a character to be translated to TO.
13d5617d 1861
4e003d37
KH
1862The arguments and forms in each argument are processed in the given
1863order, and if a previous form already translates TO to some other
1864character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1865 (let ((table (make-char-table 'translation-table))
a73a8c89 1866 revlist)
5d75f46f
KH
1867 (dolist (elts args)
1868 (dolist (elt elts)
1869 (let ((from (car elt))
1870 (to (cdr elt))
1871 to-alt rev-from rev-to)
1872 ;; If we have already translated TO to TO-ALT, FROM should
1873 ;; also be translated to TO-ALT.
1874 (if (setq to-alt (aref table to))
1875 (setq to to-alt))
1876 (aset table from to)
1877 ;; If we have already translated some chars to FROM, they
1878 ;; should also be translated to TO.
1879 (when (setq rev-from (assq from revlist))
1880 (dolist (elt (cdr rev-from))
1881 (aset table elt to))
1882 (setq revlist (delq rev-from revlist)
1883 rev-from (cdr rev-from)))
1884 ;; Now update REVLIST.
1885 (setq rev-to (assq to revlist))
1886 (if rev-to
1887 (setcdr rev-to (cons from (cdr rev-to)))
1888 (setq rev-to (list to from)
1889 revlist (cons rev-to revlist)))
1890 (if rev-from
1891 (setcdr rev-to (append rev-from (cdr rev-to)))))))
a73a8c89 1892 ;; Return TABLE just created.
350cd166 1893 (set-char-table-extra-slot table 1 1)
a73a8c89
KH
1894 table))
1895
c76b5c99
KH
1896(defun make-translation-table-from-vector (vec)
1897 "Make translation table from decoding vector VEC.
9e3b6057
DL
1898VEC is an array of 256 elements to map unibyte codes to multibyte
1899characters. Elements may be nil for undefined code points.
c76b5c99
KH
1900See also the variable `nonascii-translation-table'."
1901 (let ((table (make-char-table 'translation-table))
1902 (rev-table (make-char-table 'translation-table))
c76b5c99 1903 ch)
9e3b6057 1904 (dotimes (i 256)
c76b5c99 1905 (setq ch (aref vec i))
9e3b6057
DL
1906 (when ch
1907 (aset table i ch)
1908 (if (>= ch 256)
1909 (aset rev-table ch i))))
c76b5c99 1910 (set-char-table-extra-slot table 0 rev-table)
350cd166
KH
1911 (set-char-table-extra-slot table 1 1)
1912 (set-char-table-extra-slot rev-table 1 1)
c76b5c99
KH
1913 table))
1914
a6d1872e
KH
1915(defun make-translation-table-from-alist (alist)
1916 "Make translation table from N<->M mapping in ALIST.
1917ALIST is an alist, each element has the form (FROM . TO).
1918FROM and TO are a character or a vector of characters.
1919If FROM is a character, that character is translated to TO.
1920If FROM is a vector of characters, that sequence is translated to TO.
350cd166
KH
1921The first extra-slot of the value is a translation table for reverse mapping."
1922 (let ((tables (vector (make-char-table 'translation-table)
1923 (make-char-table 'translation-table)))
1924 table max-lookup from to idx val)
1925 (dotimes (i 2)
1926 (setq table (aref tables i))
1927 (setq max-lookup 1)
1928 (dolist (elt alist)
1929 (if (= i 0)
1930 (setq from (car elt) to (cdr elt))
1931 (setq from (cdr elt) to (car elt)))
1932 (if (characterp from)
1933 (setq idx from)
1934 (setq idx (aref from 0)
1935 max-lookup (max max-lookup (length from))))
1936 (setq val (aref table idx))
1937 (if val
1938 (progn
1939 (or (consp val)
1940 (setq val (list (cons (vector idx) val))))
1941 (if (characterp from)
1942 (setq from (vector from)))
1943 (setq val (nconc val (list (cons from to)))))
1944 (if (characterp from)
1945 (setq val to)
1946 (setq val (list (cons from to)))))
1947 (aset table idx val))
1948 (set-char-table-extra-slot table 1 max-lookup))
1949 (set-char-table-extra-slot (aref tables 0) 0 (aref tables 1))
1950 (aref tables 0)))
a6d1872e 1951
f967223b 1952(defun define-translation-table (symbol &rest args)
a284eea3
DL
1953 "Define SYMBOL as the name of translation table made by ARGS.
1954This sets up information so that the table can be used for
1955translations in a CCL program.
b25eef20 1956
a284eea3
DL
1957If the first element of ARGS is a char-table whose purpose is
1958`translation-table', just define SYMBOL to name it. (Note that this
1959function does not bind SYMBOL.)
007c79c8 1960
a284eea3 1961Any other ARGS should be suitable as arguments of the function
007c79c8 1962`make-translation-table' (which see).
b25eef20 1963
452fdb31 1964This function sets properties `translation-table' and
521d4010
DL
1965`translation-table-id' of SYMBOL to the created table itself and the
1966identification number of the table respectively. It also registers
1967the table in `translation-table-vector'."
007c79c8
KH
1968 (let ((table (if (and (char-table-p (car args))
1969 (eq (char-table-subtype (car args))
1970 'translation-table))
1971 (car args)
1972 (apply 'make-translation-table args)))
f967223b 1973 (len (length translation-table-vector))
d9e3229d 1974 (id 0)
b25eef20 1975 (done nil))
f967223b 1976 (put symbol 'translation-table table)
b25eef20
KH
1977 (while (not done)
1978 (if (>= id len)
f967223b
KH
1979 (setq translation-table-vector
1980 (vconcat translation-table-vector (make-vector len nil))))
1981 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1982 (if (or (not slot)
1983 (eq (car slot) symbol))
1984 (progn
f967223b 1985 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1986 (setq done t))
1987 (setq id (1+ id)))))
f967223b 1988 (put symbol 'translation-table-id id)
d9e3229d
KH
1989 id))
1990
16431d3c
KH
1991(defun translate-region (start end table)
1992 "From START to END, translate characters according to TABLE.
1993TABLE is a string or a char-table.
1994If TABLE is a string, the Nth character in it is the mapping
1995for the character with code N.
1996If TABLE is a char-table, the element for character N is the mapping
1997for the character with code N.
1998It returns the number of characters changed."
1999 (interactive
2000 (list (region-beginning)
2001 (region-end)
2002 (let (table l)
2003 (dotimes (i (length translation-table-vector))
2004 (if (consp (aref translation-table-vector i))
2005 (push (list (symbol-name
2006 (car (aref translation-table-vector i)))) l)))
2007 (if (not l)
2008 (error "No translation table defined"))
2009 (while (not table)
2010 (setq table (completing-read "Translation table: " l nil t)))
2011 (intern table))))
2012 (if (symbolp table)
2013 (let ((val (get table 'translation-table)))
2014 (or (char-table-p val)
2015 (error "Invalid translation table name: %s" table))
2016 (setq table val)))
2017 (translate-region-internal start end table))
2018
35554641
KH
2019(put 'with-category-table 'lisp-indent-function 1)
2020
ef6e365d 2021(defmacro with-category-table (table &rest body)
8f924df7 2022 "Execute BODY like `progn' with CATEGORY-TABLE the current category table.
ef6e365d
JPW
2023The category table of the current buffer is saved, BODY is evaluated,
2024then the saved table is restored, even in case of an abnormal exit.
2025Value is what BODY returns."
2026 (let ((old-table (make-symbol "old-table"))
2027 (old-buffer (make-symbol "old-buffer")))
2028 `(let ((,old-table (category-table))
2029 (,old-buffer (current-buffer)))
2030 (unwind-protect
2031 (progn
2032 (set-category-table ,table)
2033 ,@body)
2034 (save-current-buffer
2035 (set-buffer ,old-buffer)
2036 (set-category-table ,old-table))))))
35554641 2037
394e4eb0
DL
2038(defun define-translation-hash-table (symbol table)
2039 "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
2040
2041Analogous to `define-translation-table', but updates
2042`translation-hash-table-vector' and the table is for use in the CCL
2043`lookup-integer' and `lookup-character' functions."
2044 (unless (and (symbolp symbol)
2045 (hash-table-p table))
2046 (error "Bad args to define-translation-hash-table"))
2047 (let ((len (length translation-hash-table-vector))
2048 (id 0)
2049 done)
2050 (put symbol 'translation-hash-table table)
2051 (while (not done)
2052 (if (>= id len)
2053 (setq translation-hash-table-vector
2054 (vconcat translation-hash-table-vector [nil])))
2055 (let ((slot (aref translation-hash-table-vector id)))
2056 (if (or (not slot)
2057 (eq (car slot) symbol))
2058 (progn
2059 (aset translation-hash-table-vector id (cons symbol table))
2060 (setq done t))
2061 (setq id (1+ id)))))
2062 (put symbol 'translation-hash-table-id id)
2063 id))
2064
69eba008
KH
2065;;; Initialize some variables.
2066
2067(put 'use-default-ascent 'char-table-extra-slots 0)
2068(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
2069(put 'ignore-relative-composition 'char-table-extra-slots 0)
2070(setq ignore-relative-composition
2071 (make-char-table 'ignore-relative-composition))
69eba008 2072
256d0fef
DL
2073(make-obsolete 'set-char-table-default
2074 "Generic characters no longer exist" "22.1")
d9f6dfe6
CW
2075
2076;;; Built-in auto-coding-functions:
2077
2078(defun sgml-xml-auto-coding-function (size)
2079 "Determine whether the buffer is XML, and if so, its encoding.
2080This function is intended to be added to `auto-coding-functions'."
c069d3ac
SM
2081 (setq size (+ (point) size))
2082 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
d9f6dfe6
CW
2083 (let ((end (save-excursion
2084 ;; This is a hack.
2f4e9c47 2085 (re-search-forward "\"\\s-*\\?>" size t))))
d9f6dfe6
CW
2086 (when end
2087 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t)
447404a3
CW
2088 (let* ((match (match-string 1))
2089 (sym (intern (downcase match))))
2090 (if (coding-system-p sym)
2091 sym
2092 (message "Warning: unknown coding system \"%s\"" match)
2093 nil))
d9f6dfe6
CW
2094 'utf-8)))))
2095
447404a3
CW
2096(defun sgml-html-meta-auto-coding-function (size)
2097 "If the buffer has an HTML meta tag, use it to determine encoding.
2098This function is intended to be added to `auto-coding-functions'."
c069d3ac 2099 (setq size (min (+ (point) size)
447404a3
CW
2100 ;; Only search forward 10 lines
2101 (save-excursion
2102 (forward-line 10)
2103 (point))))
81668eb2 2104 (when (and (search-forward "<html" size t)
447404a3
CW
2105 (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t))
2106 (let* ((match (match-string 1))
2107 (sym (intern (downcase match))))
2108 (if (coding-system-p sym)
2109 sym
2110 (message "Warning: unknown coding system \"%s\"" match)
2111 nil))))
0bca779a 2112
69eba008 2113;;;
4ed46869
KH
2114(provide 'mule)
2115
6b61353c 2116;;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c
4ed46869 2117;;; mule.el ends here