(coding-system-iso-2022-flags): Revert
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
07513d64 1;;; mule.el --- basic commands for multilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
58cfed09 4;; Licensed to the Free Software Foundation.
9e3b6057 5;; Copyright (C) 2001 Free Software Foundation, Inc.
c1841772
KH
6;; Copyright (C) 2001, 2002
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
4ed46869
KH
9
10;; Keywords: mule, multilingual, character set, coding system
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc
KH
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
4ed46869 28
60370d40
PJ
29;;; Commentary:
30
4ed46869
KH
31;;; Code:
32
c1841772 33(defconst mule-version "7.0 (SAKAKI)" "\
4ed46869
KH
34Version number and name of this version of MULE (multilingual environment).")
35
c1841772 36(defconst mule-version-date "2002.2.28" "\
4ed46869
KH
37Distribution date of this version of MULE (multilingual environment).")
38
c1841772
KH
39
40\f
41;;; CHARACTER
42(defalias 'char-valid-p 'characterp)
43(make-obsolete 'char-valid-p 'characterp "22.1")
44
45\f
46;;; CHARSET
47
48(defun define-charset (name docstring &rest props)
49 "Define NAME (symbol) as a charset with DOCSTRING.
50The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
07513d64 51may be any symbol. The following have special meanings, and one of
c1841772
KH
52`:code-offset', `:map', `:parents' must be specified.
53
54`:short-name'
55
56VALUE must be a short string to identify the charset. If omitted,
57NAME is used.
58
59`:long-name'
60
61VALUE must be a string longer than `:short-name' to identify the
07513d64 62charset. If omitted, the value of the `:short-name' attribute is used.
c1841772
KH
63
64`:dimension'
65
66VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
07513d64
DL
67code-points of the charsets. If omitted, it is calculated from the
68value of the `:code-space' attribute.
c1841772
KH
69
70`:code-space'
71
72VALUE must be a vector of length at most 8 specifying the byte code
73range of each dimension in this format:
74 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
07513d64 75where MIN-N is the minimum byte value of Nth dimension of code-point,
c1841772
KH
76MAX-N is the maximum byte value of that.
77
78`:iso-final-char'
79
80VALUE must be a character in the range 32 to 127 (inclusive)
81specifying the final char of the charset for ISO-2022 encoding. If
82omitted, the charset can't be encoded by ISO-2022 based
83coding-systems.
84
85`:iso-revision-number'
86
87VALUE must be an integer in the range 0..63, specifying the revision
88number of the charset for ISO-2022 encoding.
89
90`:emacs-mule-id'
91
92VALUE must be an integer of 0, 128..255. If omitted, the charset
93can't be encoded by coding-systems of type `emacs-mule'.
94
95`:ascii-compatible-p'
96
07513d64
DL
97VALUE must be nil or t (default nil). If VALUE is t, the charset is
98compatible with ASCII, i.e. the first 128 code points map to ASCII.
c1841772
KH
99
100`:supplementary-p'
101
102VALUE must be nil or t. If the VALUE is t, the charset is
07513d64
DL
103supplementary, which means it is used only as a parent of some other
104charset.
c1841772
KH
105
106`:invalid-code'
107
108VALUE must be a nonnegative integer that can be used as an invalid
109code point of the charset. If the minimum code is 0 and the maximum
110code is greater than Emacs' maximum integer value, `:invalid-code'
111should not be omitted.
112
113`:code-offset'
114
07513d64
DL
115VALUE must be an integer added to the index number of a character to
116get the corresponding character code.
c1841772
KH
117
118`:map'
119
120VALUE must be vector or string.
121
122If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
123where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 124corresponding character code.
c1841772
KH
125
126If it is a string, it is a name of file that contains the above
3e4abc9e
KH
127information. Each line of the file must be this format:
128 0xXXX 0xYYY
129where XXX is a hexadecimal representation of CODE-n and YYY is a
130hexadecimal representation of CHAR-n. A line starting with `#' is a
131comment line.
c1841772
KH
132
133`:parents'
134
135VALUE must be a list of parent charsets. The charset inherits
136characters from them. Each element of the list may be a cons (PARENT
137. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
138value to add to a code point of this charset to get the corresponding
139code point of PARENT.
140
141`:unify-map'
142
143VALUE must be vector or string.
144
145If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
146where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 147corresponding Unicode character code.
c1841772
KH
148
149If it is a string, it is a name of file that contains the above
3e4abc9e
KH
150information. The file format is the same as what described for `:map'
151attribute."
c1841772
KH
152 (let ((attrs (mapcar 'list '(:dimension
153 :code-space
154 :iso-final-char
155 :iso-revision-number
156 :emacs-mule-id
157 :ascii-compatible-p
158 :supplementary-p
159 :invalid-code
160 :code-offset
161 :map
162 :parents
163 :unify-map
164 :plist))))
165
166 ;; If :dimension is omitted, get the dimension from :code-space.
167 (let ((dimension (plist-get props :dimension)))
168 (or dimension
169 (progn
170 (setq dimension (/ (length (plist-get props :code-space)) 2))
171 (setq props (plist-put props :dimension dimension)))))
172
173 (dolist (slot attrs)
174 (setcdr slot (plist-get props (car slot))))
175
176 ;; Make sure that the value of :code-space is a vector of 8
177 ;; elements.
178 (let* ((slot (assq :code-space attrs))
179 (val (cdr slot))
180 (len (length val)))
181 (if (< len 8)
182 (setcdr slot
183 (vconcat val (make-vector (- 8 len) 0)))))
184
185 ;; Add :name and :docstring properties to PROPS.
186 (setq props
187 (cons :name (cons name (cons :docstring (cons docstring props)))))
188 (or (plist-get props :short-name)
189 (plist-put props :short-name (symbol-name name)))
190 (or (plist-get props :long-name)
191 (plist-put props :long-name (plist-get props :short-name)))
e1e529fa
DL
192 ;; We can probably get a worthwhile amount in purespace.
193 (setq props
194 (mapcar (lambda (elt)
195 (if (stringp elt)
196 (purecopy elt)
197 elt))
198 props))
c1841772
KH
199 (setcdr (assq :plist attrs) props)
200
201 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
202
203
4ed46869 204(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
205 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
206The file contents are decoded before evaluation if necessary.
4ed46869
KH
207If optional second arg NOERROR is non-nil,
208 report no error if FILE doesn't exist.
209Print messages at start and end of loading unless
210 optional third arg NOMESSAGE is non-nil.
211Return t if file exists."
212 (if (null (file-readable-p fullname))
213 (and (null noerror)
214 (signal 'file-error (list "Cannot open load file" file)))
215 ;; Read file with code conversion, and then eval.
216 (let* ((buffer
217 ;; To avoid any autoloading, set default-major-mode to
218 ;; fundamental-mode.
88162676
RS
219 ;; So that we don't get completely screwed if the
220 ;; file is encoded in some complicated character set,
221 ;; read it with real decoding, as a multibyte buffer,
222 ;; even if this is a --unibyte Emacs session.
223 (let ((default-major-mode 'fundamental-mode)
224 (default-enable-multibyte-characters t))
4ed46869
KH
225 ;; We can't use `generate-new-buffer' because files.el
226 ;; is not yet loaded.
227 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
228 (load-in-progress t)
229 (source (save-match-data (string-match "\\.el\\'" fullname))))
230 (unless nomessage
231 (if source
232 (message "Loading %s (source)..." file)
233 (message "Loading %s..." file)))
234 (when purify-flag
235 (setq preloaded-file-list (cons file preloaded-file-list)))
4ed46869 236 (unwind-protect
a6acd8a2 237 (let ((load-file-name fullname)
1c4cc63a 238 (set-auto-coding-for-load t)
a6acd8a2 239 (inhibit-file-name-operation nil))
4ed46869
KH
240 (save-excursion
241 (set-buffer buffer)
242 (insert-file-contents fullname)
7d276780
EZ
243 ;; If the loaded file was inserted with no-conversion or
244 ;; raw-text coding system, make the buffer unibyte.
245 ;; Otherwise, eval-buffer might try to interpret random
246 ;; binary junk as multibyte characters.
247 (if (and enable-multibyte-characters
248 (or (eq (coding-system-type last-coding-system-used) 5)
249 (eq last-coding-system-used 'no-conversion)))
250 (set-buffer-multibyte nil))
4ed46869
KH
251 ;; Make `kill-buffer' quiet.
252 (set-buffer-modified-p nil))
0f69cb38 253 ;; Have the original buffer current while we eval.
88162676
RS
254 (eval-buffer buffer nil file
255 ;; If this Emacs is running with --unibyte,
256 ;; convert multibyte strings to unibyte
257 ;; after reading them.
ba74e833 258;; (not default-enable-multibyte-characters)
8dd08b5b 259 nil t
ba74e833 260 ))
cfc70cdf
RS
261 (let (kill-buffer-hook kill-buffer-query-functions)
262 (kill-buffer buffer)))
4ed46869 263 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
264 (when hook
265 (mapcar (function eval) (cdr hook))))
266 (unless (or nomessage noninteractive)
267 (if source
268 (message "Loading %s (source)...done" file)
269 (message "Loading %s...done" file)))
4ed46869
KH
270 t)))
271
272;; API (Application Program Interface) for charsets.
273
c1841772
KH
274;;; Charset property
275
276(defun get-charset-property (charset propname)
277 "Return the value of CHARSET's PROPNAME property.
278This is the last value stored with
279 (put-charset-property CHARSET PROPNAME VALUE)."
280 (plist-get (charset-plist charset) propname))
281
282(defun put-charset-property (charset propname value)
283 "Store CHARSETS's PROPNAME property with value VALUE.
284It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
285 (set-charset-plist charset
286 (plist-put (charset-plist charset) propname value)))
287
288
289(defun charset-description (charset)
290 "Return description string of CHARSET."
291 (plist-get (charset-plist charset) :docstring))
292
293(defun charset-dimension (charset)
294 "Return dimension string of CHARSET."
295 (plist-get (charset-plist charset) :dimension))
296
297(defun charset-chars (charset)
298 "Return character numbers contained in a dimension of CHARSET."
103cc921 299 (let ((code-space (plist-get (charset-plist charset) :code-space)))
c1841772
KH
300 (1+ (- (aref code-space 1) (aref code-space 0)))))
301
302(defun charset-iso-final-char (charset)
303 "Return final char of CHARSET."
304 (or (plist-get (charset-plist charset) :iso-final-char)
305 -1))
4ed46869
KH
306
307(defmacro charset-short-name (charset)
c1841772
KH
308 "Return short name of CHARSET."
309 (plist-get (charset-plist charset) :short-name))
4ed46869
KH
310
311(defmacro charset-long-name (charset)
c1841772
KH
312 "Return long name of CHARSET."
313 (plist-get (charset-plist charset) :long-name))
4ed46869 314
d3675a42 315(defun charset-list ()
900dc6e3
KH
316 "Return list of charsets ever defined.
317
d3675a42 318This function is provided for backward compatibility.
900dc6e3 319Now we have the variable `charset-list'."
d3675a42 320 charset-list)
07513d64 321(make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
d3675a42 322
c1841772
KH
323(defun generic-char-p (char)
324 "Always return nil. This exists only for backward compatibility."
325 nil)
07513d64 326(make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
0269ddfb 327\f
e76938e7 328;; Coding system stuff
4ed46869 329
c1841772
KH
330;; Coding system is a symbol that has been defined by the function
331;; `define-coding-system'.
c76b5c99 332
c1841772
KH
333(defconst coding-system-iso-2022-flags
334 '(long-form
335 ascii-at-eol
336 ascii-at-cntl
337 7-bit
338 locking-shift
339 single-shift
340 designation
341 revision
342 direction
343 init-at-bol
344 designate-at-bol
345 safe
346 latin-extra
347 composition
caa7db3a 348 euc-tw-shift)
c1841772 349 "List of symbols that control ISO-2022 encoder/decoder.
0269ddfb 350
c1841772 351The value of `:flags' attribute in the argument of the function
caa7db3a 352`define-coding-system' must be one of them.
4ed46869 353
c1841772
KH
354If `long-form' is specified, use a long designation sequence on
355encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
356and `japanese-jisx0208'. The long designation sequence doesn't
357conform to ISO 2022, but used by such a coding system as
358`compound-text'.
359
360If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
361on encoding.
362
363If `ascii-at-cntl' is specified, designate ASCII to g0 before control
364codes and SPC on encoding.
365
366If `7-bit' is specified, use 7-bit code only on encoding.
367
368If `locking-shift' is specified, decode locking-shift code correctly
369on decoding, and use locking-shift to invoke a graphic element on
370encoding.
371
372If `single-shift' is specified, decode single-shift code correctly on
373decoding, and use single-shift to invoke a graphic element on encoding.
374
375If `designation' is specified, decode designation code correctly on
376decoding, and use designation to designate a charset to a graphic
377element on encoding.
378
379If `revision' is specified, produce an escape sequence to specify
380revision number of a charset on encoding. Such an escape sequence is
381always correctly decoded on decoding.
382
383If `direction' is specified, decode ISO6429's code for specifying
384direction correctly, and produced the code on encoding.
385
386If `init-at-bol' is specified, on encoding, it is assumed that
387invocation and designation statuses are reset at each beginning of
388line even if `ascii-at-eol' is not specified thus no code for
389resetting them are produced.
390
391If `safe' is specified, on encoding, characters not supported by a
392coding are replaced with `?'.
393
394If `latin-extra' is specified, code-detection routine assumes that a
395code specified in `latin-extra-code-table' (which see) is valid.
396
397If `composition' is specified, an escape sequence to specify
398composition sequence is correctly decode on decoding, and is produced
399on encoding.
400
401If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
caa7db3a 402correctly decoded on decoding, and is produced on encoding.")
c1841772
KH
403
404(defun define-coding-system (name docstring &rest props)
405 "Define NAME (symbol) as a coding system with DOCSTRING and attributes.
406The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
407may be any symbol.
408
409The following attributes have special meanings. If labeled as
410\"(required)\", it should not be omitted.
411
412`:mnemonic' (required)
413
414VALUE is a character to display on mode line for the coding system.
415
416`:coding-type' (required)
417
418VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
5170b329 419`emacs-mule', `shift-jis', `big5', `ccl', `raw-text', `undecided'.
c1841772
KH
420
421`:eol-type' (optional)
422
423VALUE is an EOL (end-of-line) format of the coding system. It must be
424one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
425\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
426and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
427decoding by the coding system, Emacs automatically detects an EOL
428format of the source text.
429
430`:charset-list' (required)
431
432VALUE must be a list of charsets supported by the coding system. On
433encoding by the coding system, if a character belongs to multiple
434charsets in the list, a charset that comes earlier in the list is
435selected.
436
437`:ascii-compatible-p' (optional)
438
439If VALUE is non-nil, the coding system decodes all 7-bit bytes into
07513d64
DL
440the corresponding ASCII characters, and encodes all ASCII characters
441back to the corresponding 7-bit bytes. If omitted, the VALUE defaults
c1841772
KH
442to nil.
443
444`:decode-translation-table' (optional)
445
446VALUE must be a translation table to use on decoding.
447
448`:encode-translation-table' (optional)
449
450VALUE must be a translation table to use on encoding.
451
452`:post-read-conversion' (optional)
453
454VALUE must be a function to call after some text is inserted and
455decoded by the coding system itself and before any functions in
456`after-insert-functions' are called. The arguments to this function
457is the same as those of a function in `after-insert-functions',
458i.e. LENGTH of a text while putting point at the head of the text to
459be decoded
460
461`:pre-write-conversion'
462
463VALUE must be a function to call after all functions in
464`write-region-annotate-functions' and `buffer-file-format' are called,
465and before the text is encoded by the coding system itself. The
466arguments to this function is the same as those of a function in
467`write-region-annotate-functions', i.e. FROM and TO specifying region
468of a text.
469
470`:default-char'
471
472VALUE must be a character. On encoding, a character not supported by
473the coding system is replaced with VALUE.
474
475`:eol-type'
476
477VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
478EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
479EOL (CR). If omitted, on decoding, the coding system detect EOL
480format automatically, and on encoding, used Unix-like EOL.
481
482`:mime-charset'
483
484VALUE must be a symbol who has MIME-charset name.
485
486`:flags'
487
488VALUE must be a list of symbols that control ISO-2022 converter. Each
489symbol must be a member of the variable `coding-system-iso-2022-flags'
490\(which see). This attribute has a meaning only when `:coding-type'
491is `iso-2022'.
492
493`:designation'
494
495VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
496GN-USAGE specifies the usage of graphic register GN as follows.
497
498If it is nil, no charset can be designated to GN.
499
07513d64 500If it is a charset, the charset is initially designated to GN, and
c1841772
KH
501never used by the other charsets.
502
503If it is a list, the elements must be charsets, nil, 94, or 96. GN
504can be used by all listed charsets. If the list contains 94, any
07513d64
DL
505charsets whose iso-chars is 94 can be designated to GN. If the list
506contains 96, any charsets whose iso-chars is 96 can be designated to
c1841772 507GN. If the first element is a charset, the charset is initially
07513d64 508designated to GN.
c1841772
KH
509
510This attribute has a meaning only when `:coding-type' is `iso-2022'.
511
512`:bom'
513
514VALUE must nil, t, or cons of coding systems whose `:coding-type' is
515`utf-16'.
516
517This attribute has a meaning only when `:coding-type' is `utf-16'.
518
519`:endian'
520
521VALUE must be t or nil. See the above description for the detail.
522
523This attribute has a meaning only when `:coding-type' is `utf-16'.
524
525`:ccl-decoder'
526
527This attribute has a meaning only when `:coding-type' is `ccl'.
528
529`:ccl-encoder'
530
531This attribute has a meaning only when `:coding-type' is `ccl'."
532 (let* ((common-attrs (mapcar 'list
533 '(:mnemonic
534 :coding-type
535 :charset-list
536 :ascii-compatible-p
537 :docode-translation-table
538 :encode-translation-table
539 :post-read-conversion
540 :pre-write-conversion
541 :default-char
542 :plist
543 :eol-type)))
544 (coding-type (plist-get props :coding-type))
545 (spec-attrs (mapcar 'list
546 (cond ((eq coding-type 'iso-2022)
547 '(:initial
548 :reg-usage
549 :request
550 :flags))
551 ((eq coding-type 'utf-16)
552 '(:bom
553 :endian))
554 ((eq coding-type 'ccl)
555 '(:ccl-decoder
556 :ccl-encoder
557 :valids))))))
558
559 (dolist (slot common-attrs)
560 (setcdr slot (plist-get props (car slot))))
561
562 (dolist (slot spec-attrs)
563 (setcdr slot (plist-get props (car slot))))
564
565 (if (eq coding-type 'iso-2022)
566 (let ((designation (plist-get props :designation))
567 (flags (plist-get props :flags))
568 (initial (make-vector 4 nil))
569 (reg-usage (cons 4 4))
570 request elt)
571 (dotimes (i 4)
572 (setq elt (aref designation i))
573 (cond ((charsetp elt)
574 (aset initial i elt)
575 (setq request (cons (cons elt i) request)))
576 ((consp elt)
577 (aset initial i (car elt))
578 (if (charsetp (car elt))
579 (setq request (cons (cons (car elt) i) request)))
580 (dolist (e (cdr elt))
581 (cond ((charsetp e)
582 (setq request (cons (cons e i) request)))
583 ((eq e 94)
584 (setcar reg-usage i))
585 ((eq e 96)
586 (setcdr reg-usage i))
587 ((eq e t)
588 (setcar reg-usage i)
589 (setcdr reg-usage i)))))))
590 (setcdr (assq :initial spec-attrs) initial)
591 (setcdr (assq :reg-usage spec-attrs) reg-usage)
592 (setcdr (assq :request spec-attrs) request)
593
594 ;; Change :flags value from a list to a bit-mask.
595 (let ((bits 0)
596 (i 0))
597 (dolist (elt coding-system-iso-2022-flags)
598 (if (memq elt flags)
599 (setq bits (logior bits (lsh 1 i))))
600 (setq i (1+ i)))
601 (setcdr (assq :flags spec-attrs) bits))))
602
603 ;; Add :name and :docstring properties to PROPS.
604 (setq props
e1e529fa
DL
605 (cons :name (cons name (cons :docstring (cons (purecopy docstring)
606 props)))))
c1841772
KH
607 (setcdr (assq :plist common-attrs) props)
608
609 (apply 'define-coding-system-internal
610 name (mapcar 'cdr (append common-attrs spec-attrs)))))
611
612(defun coding-system-doc-string (coding-system)
613 "Return the documentation string for CODING-SYSTEM."
614 (plist-get (coding-system-plist coding-system) :docstring))
4ed46869 615
4ed46869 616(defun coding-system-mnemonic (coding-system)
0269ddfb 617 "Return the mnemonic character of CODING-SYSTEM.
6e2c8840
KH
618The mnemonic character of a coding system is used in mode line
619to indicate the coding system. If the arg is nil, return ?-."
c1841772 620 (plist-get (coding-system-plist coding-system) :mnemonic))
4ed46869 621
c1841772
KH
622(defun coding-system-type (coding-system)
623 "Return the coding type of CODING-SYSTEM.
624A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
625See the function `define-coding-system' for more detail."
626 (plist-get (coding-system-plist coding-system) :coding-type))
d3675a42 627
c1841772 628(defun coding-system-charset-list (coding-system)
07513d64 629 "Return list of charsets supported by CODING-SYSTEM.
c1841772
KH
630If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
631If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
632 (plist-get (coding-system-plist coding-system) :charset-list))
0269ddfb
KH
633
634(defun coding-system-get (coding-system prop)
07513d64
DL
635 "Extract a value from CODING-SYSTEM's property list for property PROP.
636For compatibility with Emacs 20/21, this accepts old-style symbols
637like `mime-charset' as well as the current style like `:mime-charset'."
638 (or (plist-get (coding-system-plist coding-system) prop)
639 (if (not (keywordp prop))
640 (plist-get (coding-system-plist coding-system)
641 (intern (concat ":" (symbol-name prop)))))))
0269ddfb
KH
642
643(defun coding-system-put (coding-system prop val)
644 "Change value in CODING-SYSTEM's property list PROP to VAL."
c1841772 645 (plist-put (coding-system-plist coding-system) prop val))
0269ddfb
KH
646
647(defalias 'coding-system-parent 'coding-system-base)
2598a293 648(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb
KH
649
650;; Coding system also has a property `eol-type'.
651;;
652;; This property indicates how the coding system handles end-of-line
653;; format. The value is integer 0, 1, 2, or a vector of three coding
654;; systems. Each integer value 0, 1, and 2 indicates the format of
655;; end-of-line LF, CRLF, and CR respectively. A vector value
656;; indicates that the format of end-of-line should be detected
657;; automatically. Nth element of the vector is the subsidiary coding
658;; system whose `eol-type' property is N.
4ed46869 659
857ea15c
AS
660(defun coding-system-lessp (x y)
661 (cond ((eq x 'no-conversion) t)
662 ((eq y 'no-conversion) nil)
663 ((eq x 'emacs-mule) t)
664 ((eq y 'emacs-mule) nil)
665 ((eq x 'undecided) t)
666 ((eq y 'undecided) nil)
667 (t (let ((c1 (coding-system-mnemonic x))
668 (c2 (coding-system-mnemonic y)))
669 (or (< (downcase c1) (downcase c2))
670 (and (not (> (downcase c1) (downcase c2)))
671 (< c1 c2)))))))
672
857ea15c 673(defun add-to-coding-system-list (coding-system)
521d4010 674 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
857ea15c
AS
675 (if (or (null coding-system-list)
676 (coding-system-lessp coding-system (car coding-system-list)))
677 (setq coding-system-list (cons coding-system coding-system-list))
678 (let ((len (length coding-system-list))
679 mid (tem coding-system-list))
680 (while (> len 1)
681 (setq mid (nthcdr (/ len 2) tem))
682 (if (coding-system-lessp (car mid) coding-system)
683 (setq tem mid
684 len (- len (/ len 2)))
685 (setq len (/ len 2))))
686 (setcdr tem (cons coding-system (cdr tem))))))
687
80a7463d 688(defun coding-system-list (&optional base-only)
c11a8f77
KH
689 "Return a list of all existing non-subsidiary coding systems.
690If optional arg BASE-ONLY is non-nil, only base coding systems are listed.
691The value doesn't include subsidiary coding systems which are what
692made from bases and aliases automatically for various end-of-line
693formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
694 (let* ((codings (copy-sequence coding-system-list))
695 (tail (cons nil codings)))
696 ;; Remove subsidiary coding systems (eol variants) and alias
697 ;; coding systems (if necessary).
698 (while (cdr tail)
699 (let* ((coding (car (cdr tail)))
700 (aliases (coding-system-get coding 'alias-coding-systems)))
701 (if (or
702 ;; CODING is an eol variant if not in ALIASES.
703 (not (memq coding aliases))
704 ;; CODING is an alias if it is not car of ALIASES.
705 (and base-only (not (eq coding (car aliases)))))
706 (setcdr tail (cdr (cdr tail)))
707 (setq tail (cdr tail)))))
708 codings))
709
4ed46869 710(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
711 "Set the file coding-system of the current buffer to CODING-SYSTEM.
712This means that when you save the buffer, it will be converted
713according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
714use \\[list-coding-systems].
715
716If the buffer's previous file coding-system value specifies end-of-line
717conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
718merged with the already-specified end-of-line conversion.
b839fdcc
KH
719
720If the buffer's previous file coding-system value specifies text
721conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
722merged with the already-specified text conversion.
723
724However, if the optional prefix argument FORCE is non-nil, then
725CODING-SYSTEM is used exactly as specified.
aeef8f07
KH
726
727This marks the buffer modified so that the succeeding \\[save-buffer]
728surely saves the buffer with CODING-SYSTEM. From a program, if you
729don't want to mark the buffer modified, just set the variable
730`buffer-file-coding-system' directly."
d9e3229d 731 (interactive "zCoding system for visited file (default, nil): \nP")
4ed46869 732 (check-coding-system coding-system)
36d455c4 733 (if (and coding-system buffer-file-coding-system (null force))
8dd735c1
KH
734 (let ((base (coding-system-base buffer-file-coding-system))
735 (eol (coding-system-eol-type buffer-file-coding-system)))
736 ;; If CODING-SYSTEM doesn't specify text conversion, merge
737 ;; with that of buffer-file-coding-system.
738 (if (eq (coding-system-base coding-system) 'undecided)
739 (setq coding-system (coding-system-change-text-conversion
740 coding-system base)))
741 ;; If CODING-SYSTEM doesn't specify eol conversion, merge with
742 ;; that of buffer-file-coding-system.
743 (if (and (vectorp (coding-system-eol-type coding-system))
744 (numberp eol) (>= eol 0) (<= eol 2))
745 (setq coding-system (coding-system-change-eol-conversion
746 coding-system eol)))))
4ed46869
KH
747 (setq buffer-file-coding-system coding-system)
748 (set-buffer-modified-p t)
749 (force-mode-line-update))
750
358d28fb
RS
751(defvar default-terminal-coding-system nil
752 "Default value for the terminal coding system.
753This is normally set according to the selected language environment.
754See also the command `set-terminal-coding-system'.")
755
df100398
KH
756(defun set-terminal-coding-system (coding-system)
757 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
758All text output to the terminal will be encoded
759with the specified coding system.
760For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
761The default is determined by the selected language environment
762or by the previous use of this command."
763 (interactive
2e02a76f
RS
764 (list (let ((default (if (and (not (terminal-coding-system))
765 default-terminal-coding-system)
766 default-terminal-coding-system)))
767 (read-coding-system
768 (format "Coding system for terminal display (default, %s): "
769 default)
770 default))))
358d28fb
RS
771 (if (and (not coding-system)
772 (not (terminal-coding-system)))
773 (setq coding-system default-terminal-coding-system))
774 (if coding-system
521d4010 775 (setq default-terminal-coding-system coding-system))
df100398
KH
776 (set-terminal-coding-system-internal coding-system)
777 (redraw-frame (selected-frame)))
778
358d28fb
RS
779(defvar default-keyboard-coding-system nil
780 "Default value of the keyboard coding system.
781This is normally set according to the selected language environment.
782See also the command `set-keyboard-coding-system'.")
783
df100398 784(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
785 "Set coding system for keyboard input to CODING-SYSTEM.
786In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
787\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
788`encoded-kbd-mode'.)
358d28fb
RS
789For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
790The default is determined by the selected language environment
791or by the previous use of this command."
792 (interactive
2e02a76f
RS
793 (list (let ((default (if (and (not (keyboard-coding-system))
794 default-keyboard-coding-system)
795 default-keyboard-coding-system)))
796 (read-coding-system
797 (format "Coding system for keyboard input (default, %s): "
798 default)
799 default))))
358d28fb
RS
800 (if (and (not coding-system)
801 (not (keyboard-coding-system)))
802 (setq coding-system default-keyboard-coding-system))
803 (if coding-system
804 (setq default-keyboard-coding-system coding-system))
df100398 805 (set-keyboard-coding-system-internal coding-system)
b23bad0b 806 (setq keyboard-coding-system coding-system)
df100398
KH
807 (encoded-kbd-mode (if coding-system 1 0)))
808
6d34f495
DL
809(defcustom keyboard-coding-system nil
810 "Specify coding system for keyboard input.
811If you set this on a terminal which can't distinguish Meta keys from
8128-bit characters, you will have to use ESC to type Meta characters.
813See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
814
815Setting this variable directly does not take effect;
816use either M-x customize or \\[set-keyboard-coding-system]."
817 :type '(coding-system :tag "Coding system")
818 :link '(info-link "(emacs)Specify Coding")
819 :link '(info-link "(emacs)Single-Byte Character Support")
820 :set (lambda (symbol value)
821 ;; Don't load encoded-kbd-mode unnecessarily.
822 (if (or value (boundp 'encoded-kbd-mode))
823 (set-keyboard-coding-system value)
824 (set-default 'keyboard-coding-system nil))) ; must initialize
825 :version "21.1"
826 :group 'keyboard
827 :group 'mule)
828
df100398 829(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 830 "Set coding systems for the process associated with the current buffer.
df100398 831DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
832ENCODING is the coding system to be used to encode output to the process.
833
834For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 835 (interactive
83911021 836 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
837 (let ((proc (get-buffer-process (current-buffer))))
838 (if (null proc)
521d4010 839 (error "No process")
df100398
KH
840 (check-coding-system decoding)
841 (check-coding-system encoding)
842 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
843 (force-mode-line-update))
844
d0b99881
RS
845(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
846
14915c37 847(defun set-selection-coding-system (coding-system)
b25eef20
KH
848 "Make CODING-SYSTEM used for communicating with other X clients .
849When sending or receiving text via cut_buffer, selection, and clipboard,
850the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 851 (interactive "zCoding system for X selection: ")
b25eef20 852 (check-coding-system coding-system)
14915c37 853 (setq selection-coding-system coding-system))
b25eef20 854
e8dd0160 855;; Coding system lastly specified by the command
a03b3ce1
KH
856;; set-next-selection-coding-system.
857(defvar last-next-selection-coding-system nil)
858
859(defun set-next-selection-coding-system (coding-system)
860 "Make CODING-SYSTEM used for the next communication with other X clients.
861This setting is effective for the next communication only."
862 (interactive
863 (list (read-coding-system
864 (if last-next-selection-coding-system
865 (format "Coding system for the next X selection (default, %S): "
866 last-next-selection-coding-system)
867 "Coding system for the next X selection: ")
868 last-next-selection-coding-system)))
869 (if coding-system
870 (setq last-next-selection-coding-system coding-system)
871 (setq coding-system last-next-selection-coding-system))
872 (check-coding-system coding-system)
873
874 (setq next-selection-coding-system coding-system))
875
4ed46869 876(defun set-coding-priority (arg)
521d4010 877 "Set priority of coding categories according to ARG.
c1841772
KH
878ARG is a list of coding categories ordered by priority.
879
880This function is provided for backward compatibility.
881Now we have more convenient function `set-coding-system-priority'."
d9e3229d
KH
882 (let ((l arg)
883 (current-list (copy-sequence coding-category-list)))
e8dd0160 884 ;; Check the validity of ARG while deleting coding categories in
d9e3229d
KH
885 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST
886 ;; contains all coding categories.
887 (while l
888 (if (or (null (get (car l) 'coding-category-index))
889 (null (memq (car l) current-list)))
890 (error "Invalid or duplicated element in argument: %s" arg))
891 (setq current-list (delq (car l) current-list))
892 (setq l (cdr l)))
4ed46869 893 ;; Update `coding-category-list' and return it.
2feaf204 894 (setq coding-category-list (append arg current-list))
07513d64 895 ;; Fixme: not defined.
2feaf204 896 (set-coding-priority-internal)))
07513d64 897(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
4ed46869 898
835cbadb
EZ
899;;; X selections
900
901(defvar non-standard-icccm-encodings-alist
902 '(("ISO8859-15" . latin-iso8859-15)
903 ("ISO8859-14" . latin-iso8859-14)
904 ("KOI8-R" . koi8-r)
905 ("BIG5-0" . big5))
906 "Alist of font charset names defined by XLFD, and the corresponding Emacs
907charsets or coding systems.")
908
909;; Functions to support "Non-Standard Character Set Encodings" defined
910;; by the ICCCM spec. We support that by converting the leading
911;; sequence of the ``extended segment'' to the corresponding ISO-2022
912;; sequences (if the leading sequence names an Emacs charset), or decode
913;; the segment (if it names a coding system). Encoding does the reverse.
914(defun ctext-post-read-conversion (len)
915 "Decode LEN characters encoded as Compound Text with Extended Segments."
916 (buffer-disable-undo) ; minimize consing due to insertions and deletions
917 (narrow-to-region (point) (+ (point) len))
918 (save-match-data
919 (let ((pt (point-marker))
920 (oldpt (point-marker))
921 (newpt (make-marker))
922 (modified-p (buffer-modified-p))
923 (case-fold-search nil)
924 last-coding-system-used
925 encoding textlen chset)
926 (while (re-search-forward
927 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
928 nil 'move)
929 (set-marker newpt (point))
930 (set-marker pt (match-beginning 0))
931 (setq encoding (match-string 3))
932 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
933 (- (aref (match-string 2) 1) 128))
934 (1+ (length encoding))))
935 (setq
936 chset (cdr (assoc-ignore-case encoding
937 non-standard-icccm-encodings-alist)))
938 (cond ((null chset)
939 ;; This charset is not supported--leave this extended
940 ;; segment unaltered and skip over it.
941 (goto-char (+ (point) textlen)))
942 ((charsetp chset)
943 ;; If it's a charset, replace the leading escape sequence
944 ;; with a standard ISO-2022 sequence. We will decode all
945 ;; such segments later, in one go, when we exit the loop
946 ;; or find an extended segment that names a coding
947 ;; system, not a charset.
948 (replace-match
949 (concat "\\1"
950 (if (= 0 (charset-iso-graphic-plane chset))
951 ;; GL charsets
952 (if (= 1 (charset-dimension chset)) "(" "$(")
953 ;; GR charsets
954 (if (= 96 (charset-chars chset))
955 "-"
956 (if (= 1 (charset-dimension chset)) ")" "$)")))
957 (string (charset-iso-final-char chset)))
958 t)
959 (goto-char (+ (point) textlen)))
960 ((coding-system-p chset)
961 ;; If it's a coding system, we need to decode the segment
962 ;; right away. But first, decode what we've skipped
963 ;; across until now.
964 (when (> pt oldpt)
965 (decode-coding-region oldpt pt 'ctext-no-compositions))
966 (delete-region pt newpt)
967 (set-marker newpt (+ newpt textlen))
968 (decode-coding-region pt newpt chset)
969 (goto-char newpt)
970 (set-marker oldpt newpt))))
971 ;; Decode what's left.
972 (when (> (point) oldpt)
973 (decode-coding-region oldpt (point) 'ctext-no-compositions))
974 ;; This buffer started as unibyte, because the string we get from
975 ;; the X selection is a unibyte string. We must now make it
976 ;; multibyte, so that the decoded text is inserted as multibyte
977 ;; into its buffer.
978 (set-buffer-multibyte t)
979 (set-buffer-modified-p modified-p)
980 (- (point-max) (point-min)))))
981
982(defvar non-standard-designations-alist
983 '(("$(0" . (big5 "big5-0" 2))
984 ("$(1" . (big5 "big5-0" 2))
985 ("-V" . (t "iso8859-10" 1))
986 ("-Y" . (t "iso8859-13" 1))
987 ("-_" . (t "iso8859-14" 1))
988 ("-b" . (t "iso8859-15" 1))
989 ("-f" . (t "iso8859-16" 1)))
990 "Alist of ctext control sequences that introduce character sets which
991are not in the list of approved ICCCM encodings, and the corresponding
992coding system, identifier string, and number of octets per encoded
993character.
994
995Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
996is the control sequence (sans the leading ESC) that introduces the character
997set in the text encoded by compound-text. ENCODING is a coding system
998symbol; if it is t, it means that the ctext coding system already encodes
999the text correctly, and only the leading control sequence needs to be altered.
1000If ENCODING is a coding system, we need to re-encode the text with that
1001coding system. CHARSET is the ICCCM name of the charset we need to put into
1002the leading control sequence. NOCTETS is the number of octets (bytes) that
1003encode each character in this charset. NOCTETS can be 0 (meaning the number
1004of octets per character is variable), 1, 2, 3, or 4.")
1005
1006(defun ctext-pre-write-conversion (from to)
5dde3c71
EZ
1007 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1008
1009If FROM is a string, or if the current buffer is not the one set up for us
1010by run_pre_post_conversion_on_str, generate a new temp buffer, insert the
1011text, and convert it in the temporary buffer. Otherwise, convert in-place."
1012 (cond ((and (string= (buffer-name) " *code-converting-work*")
1013 (not (stringp from)))
1014 ; Minimize consing due to subsequent insertions and deletions.
1015 (buffer-disable-undo)
1016 (narrow-to-region from to))
1017 (t
1018 (let ((buf (current-buffer)))
1019 (set-buffer (generate-new-buffer " *temp"))
1020 (buffer-disable-undo)
1021 (if (stringp from)
1022 (insert from)
1023 (insert-buffer-substring buf from to)))))
835cbadb
EZ
1024 (encode-coding-region from to 'ctext-no-compositions)
1025 ;; Replace ISO-2022 charset designations with extended segments, for
1026 ;; those charsets that are not part of the official X registry.
1027 (save-match-data
1028 (goto-char (point-min))
1029 (let ((newpt (make-marker))
1030 (case-fold-search nil)
1031 pt desig encode-info encoding chset noctets textlen)
1032 (set-buffer-multibyte nil)
5dde3c71
EZ
1033 ;; The regexp below finds the leading sequences for big5 and
1034 ;; iso8859-1[03-6] charsets.
835cbadb
EZ
1035 (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
1036 (setq desig (match-string 1)
1037 pt (point-marker)
1038 encode-info (cdr (assoc desig non-standard-designations-alist))
1039 encoding (car encode-info)
1040 chset (cadr encode-info)
1041 noctets (car (cddr encode-info)))
1042 (skip-chars-forward "^\e")
1043 (set-marker newpt (point))
1044 (cond
1045 ((eq encoding t) ; only the leading sequence needs to be changed
1046 (setq textlen (+ (- newpt pt) (length chset) 1))
5dde3c71 1047 ;; Generate the ICCCM control sequence for an extended segment.
835cbadb
EZ
1048 (replace-match (format "\e%%/%d%c%c%s\ 2"
1049 noctets
1050 (+ (/ textlen 128) 128)
1051 (+ (% textlen 128) 128)
1052 chset)
1053 t t))
1054 ((coding-system-p encoding) ; need to recode the entire segment...
1055 (set-marker pt (match-beginning 0))
1056 (decode-coding-region pt newpt 'ctext-no-compositions)
1057 (set-buffer-multibyte t)
1058 (encode-coding-region pt newpt encoding)
1059 (set-buffer-multibyte nil)
1060 (setq textlen (+ (- newpt pt) (length chset) 1))
1061 (goto-char pt)
1062 (insert (format "\e%%/%d%c%c%s\ 2"
1063 noctets
1064 (+ (/ textlen 128) 128)
1065 (+ (% textlen 128) 128)
1066 chset))))
1067 (goto-char newpt))))
1068 (set-buffer-multibyte t)
5dde3c71 1069 ;; Must return nil, as build_annotations_2 expects that.
835cbadb
EZ
1070 nil)
1071
c1841772
KH
1072(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0")
1073
4ed46869
KH
1074;;; FILE I/O
1075
e76938e7 1076(defcustom auto-coding-alist
0735296c 1077 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion)
4f16d1d1 1078 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion))
835f49b8
KH
1079 "Alist of filename patterns vs corresponding coding systems.
1080Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1081A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1082
7fed493a
RS
1083The settings in this alist take priority over `coding:' tags
1084in the file (see the function `set-auto-coding')
e76938e7
DL
1085and the contents of `file-coding-system-alist'."
1086 :group 'files
1087 :group 'mule
1088 :type '(repeat (cons (regexp :tag "File name regexp")
1089 (symbol :tag "Coding system"))))
835f49b8 1090
502522b2
GM
1091(defcustom auto-coding-regexp-alist
1092 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
1093 "Alist of patterns vs corresponding coding systems.
1094Each element looks like (REGEXP . CODING-SYSTEM).
1095A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1096
1097The settings in this alist take priority over `coding:' tags
1098in the file (see the function `set-auto-coding')
1099and the contents of `file-coding-system-alist'."
1100 :group 'files
1101 :group 'mule
1102 :type '(repeat (cons (regexp :tag "Regexp")
1103 (symbol :tag "Coding system"))))
1104
1c4cc63a
KH
1105(defvar set-auto-coding-for-load nil
1106 "Non-nil means look for `load-coding' property instead of `coding'.
1107This is used for loading and byte-compiling Emacs Lisp files.")
1108
8a592131
RS
1109(defun auto-coding-alist-lookup (filename)
1110 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1111 (let ((alist auto-coding-alist)
ca128d75 1112 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos)))
8a592131
RS
1113 coding-system)
1114 (while (and alist (not coding-system))
1115 (if (string-match (car (car alist)) filename)
1116 (setq coding-system (cdr (car alist)))
1117 (setq alist (cdr alist))))
1118 coding-system))
1119
502522b2
GM
1120
1121(defun auto-coding-from-file-contents (size)
1122 "Determine a coding system from the contents of the current buffer.
1123The current buffer contains SIZE bytes starting at point.
1124Value is either a coding system or nil."
1125 (save-excursion
1126 (let ((alist auto-coding-regexp-alist)
1127 coding-system)
1128 (while (and alist (not coding-system))
1129 (let ((regexp (car (car alist))))
1130 (when (re-search-forward regexp (+ (point) size) t)
1131 (setq coding-system (cdr (car alist)))))
1132 (setq alist (cdr alist)))
1133 coding-system)))
1134
1135
835f49b8
KH
1136(defun set-auto-coding (filename size)
1137 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1138These bytes should include at least the first 1k of the file
1139and the last 3k of the file, but the middle may be omitted.
63561304 1140
502522b2
GM
1141It checks FILENAME against the variable `auto-coding-alist'. If
1142FILENAME doesn't match any entries in the variable, it checks the
1143contents of the current buffer following point against
1144`auto-coding-regexp-alist'. If no match is found, it checks for a
1145`coding:' tag in the first one or two lines following point. If no
1146`coding:' tag is found, it checks for local variables list in the last
11473K bytes out of the SIZE bytes.
63561304
KH
1148
1149The return value is the specified coding system,
1150or nil if nothing specified.
87aba788 1151
ba74e833 1152The variable `set-auto-coding-function' (which see) is set to this
87aba788 1153function by default."
502522b2
GM
1154 (or (auto-coding-alist-lookup filename)
1155 (auto-coding-from-file-contents size)
1156 (let* ((case-fold-search t)
1157 (head-start (point))
1158 (head-end (+ head-start (min size 1024)))
1159 (tail-start (+ head-start (max (- size 3072) 0)))
1160 (tail-end (+ head-start size))
1161 coding-system head-found tail-found pos)
1162 ;; Try a short cut by searching for the string "coding:"
1163 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1164 (setq head-found (or (search-forward "coding:" head-end t)
1165 (search-forward "unibyte:" head-end t)))
1166 (if (and head-found (> head-found tail-start))
1167 ;; Head and tail are overlapped.
1168 (setq tail-found head-found)
1169 (goto-char tail-start)
1170 (setq tail-found (or (search-forward "coding:" tail-end t)
1171 (search-forward "unibyte:" tail-end t))))
1172
1173 ;; At first check the head.
1174 (when head-found
1175 (goto-char head-start)
6b66d028
RS
1176 (setq head-end (set-auto-mode-1))
1177 (setq head-start (point))
1d8e9a7c 1178 (when (and head-end (< head-found head-end))
835f49b8 1179 (goto-char head-start)
502522b2
GM
1180 (when (and set-auto-coding-for-load
1181 (re-search-forward
6b66d028 1182 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1183 head-end t))
1184 (setq coding-system 'raw-text))
1185 (when (and (not coding-system)
1186 (re-search-forward
6b66d028 1187 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1188 head-end t))
1189 (setq coding-system (intern (match-string 2)))
1190 (or (coding-system-p coding-system)
1191 (setq coding-system nil)))))
1192
1193 ;; If no coding: tag in the head, check the tail.
1194 (when (and tail-found (not coding-system))
1195 (goto-char tail-start)
1196 (search-forward "\n\^L" nil t)
1197 (if (re-search-forward
1198 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1199 ;; The prefix is what comes before "local variables:" in its
1200 ;; line. The suffix is what comes after "local variables:"
1201 ;; in its line.
1202 (let* ((prefix (regexp-quote (match-string 1)))
1203 (suffix (regexp-quote (match-string 2)))
1204 (re-coding
1205 (concat
1206 "^" prefix
cfe98f50
GM
1207 ;; N.B. without the \n below, the regexp can
1208 ;; eat newlines.
1209 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1210 suffix "$"))
1211 (re-unibyte
1212 (concat
1213 "^" prefix
cfe98f50 1214 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1215 suffix "$"))
1216 (re-end
cfe98f50 1217 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
502522b2
GM
1218 (pos (point)))
1219 (re-search-forward re-end tail-end 'move)
1220 (setq tail-end (point))
1221 (goto-char pos)
1222 (when (and set-auto-coding-for-load
1223 (re-search-forward re-unibyte tail-end t))
1224 (setq coding-system 'raw-text))
1225 (when (and (not coding-system)
1226 (re-search-forward re-coding tail-end t))
1227 (setq coding-system (intern (match-string 1)))
1228 (or (coding-system-p coding-system)
1229 (setq coding-system nil))))))
1230 coding-system)))
63561304
KH
1231
1232(setq set-auto-coding-function 'set-auto-coding)
87aba788 1233
4ed46869 1234(defun after-insert-file-set-buffer-file-coding-system (inserted)
521d4010 1235 "Set `buffer-file-coding-system' of current buffer after text is inserted."
4ed46869
KH
1236 (if last-coding-system-used
1237 (let ((coding-system
1238 (find-new-buffer-file-coding-system last-coding-system-used))
1239 (modified-p (buffer-modified-p)))
0269ddfb 1240 (when coding-system
71983219 1241 (set-buffer-file-coding-system coding-system t)
d0c26c63 1242 (if (and enable-multibyte-characters
c1841772 1243 (or (eq (coding-system-type coding-system) 'raw-text))
136e48e4
KH
1244 ;; If buffer was unmodified and the size is the
1245 ;; same as INSERTED, we must be visiting it.
1246 (not modified-p)
1247 (= (buffer-size) inserted))
ccb77b4e
RS
1248 ;; For coding systems no-conversion and raw-text...,
1249 ;; edit the buffer as unibyte.
d0c26c63
KH
1250 (let ((pos-byte (position-bytes (+ (point) inserted))))
1251 (set-buffer-multibyte nil)
1252 (setq inserted (- pos-byte (position-bytes (point))))))
0269ddfb 1253 (set-buffer-modified-p modified-p))))
d0c26c63 1254 inserted)
4ed46869 1255
84c9d215
KH
1256(add-hook 'after-insert-file-functions
1257 'after-insert-file-set-buffer-file-coding-system)
4ed46869 1258
8057896b 1259;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1260;; independently in the following order.
1261;; 1. That of buffer-file-coding-system locally bound.
1262;; 2. That of CODING.
1263
1264(defun find-new-buffer-file-coding-system (coding)
1265 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1266The local variable `buffer-file-coding-system' of the current buffer
1267is set to the returned value.
509064c5 1268Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1269 (let (local-coding local-eol
b685f8d6 1270 found-coding found-eol
4ed46869
KH
1271 new-coding new-eol)
1272 (if (null coding)
1273 ;; Nothing found about coding.
1274 nil
1275
b685f8d6
RS
1276 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1277 ;; and LOCAL-CODING.
1278 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1279 (if (null (numberp local-eol))
1280 ;; But eol-type is not yet set.
1281 (setq local-eol nil))
0269ddfb 1282 (if (and buffer-file-coding-system
c1841772
KH
1283 (not (eq (coding-system-type buffer-file-coding-system)
1284 'undecided)))
0269ddfb 1285 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1286
1287 (if (and (local-variable-p 'buffer-file-coding-system)
1288 local-eol local-coding)
4ed46869
KH
1289 ;; The current buffer has already set full coding-system, we
1290 ;; had better not change it.
1291 nil
1292
8057896b 1293 (setq found-eol (coding-system-eol-type coding))
4ed46869 1294 (if (null (numberp found-eol))
be02cd54
EZ
1295 ;; But eol-type is not found.
1296 ;; If EOL conversions are inhibited, force unix eol-type.
1297 (setq found-eol (if inhibit-eol-conversion 0)))
c1841772 1298 (setq found-coding (coding-system-base coding))
c76b5c99
KH
1299
1300 (if (and (not found-eol) (eq found-coding 'undecided))
1301 ;; No valid coding information found.
1302 nil
1303
1304 ;; Some coding information (eol or text) found.
1305
1306 ;; The local setting takes precedence over the found one.
1307 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1308 (or local-coding found-coding)
1309 (or found-coding local-coding)))
1310 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1311 (or local-eol found-eol)
1312 (or found-eol local-eol)))
1313
1314 (let ((eol-type (coding-system-eol-type new-coding)))
1315 (if (and (numberp new-eol) (vectorp eol-type))
1316 (aref eol-type new-eol)
1317 new-coding)))))))
4ed46869 1318
fe831d33
GV
1319(defun modify-coding-system-alist (target-type regexp coding-system)
1320 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1321There are three of such tables, `file-coding-system-alist',
1322`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1323
1324TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1325If it is `file', it affects `file-coding-system-alist' (which see).
1326If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1327If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1328
1329REGEXP is a regular expression matching a target of I/O operation.
1330The target is a file name if TARGET-TYPE is `file', a program name if
1331TARGET-TYPE is `process', or a network service name or a port number
1332to connect to if TARGET-TYPE is `network'.
1333
1334CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1335operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1336for decoding and encoding respectively,
1337or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1338 (or (memq target-type '(file process network))
1339 (error "Invalid target type: %s" target-type))
1340 (or (stringp regexp)
1341 (and (eq target-type 'network) (integerp regexp))
1342 (error "Invalid regular expression: %s" regexp))
1343 (if (symbolp coding-system)
1344 (if (not (fboundp coding-system))
1345 (progn
1346 (check-coding-system coding-system)
1347 (setq coding-system (cons coding-system coding-system))))
1348 (check-coding-system (car coding-system))
1349 (check-coding-system (cdr coding-system)))
1350 (cond ((eq target-type 'file)
1351 (let ((slot (assoc regexp file-coding-system-alist)))
1352 (if slot
1353 (setcdr slot coding-system)
1354 (setq file-coding-system-alist
1355 (cons (cons regexp coding-system)
1356 file-coding-system-alist)))))
1357 ((eq target-type 'process)
1358 (let ((slot (assoc regexp process-coding-system-alist)))
1359 (if slot
1360 (setcdr slot coding-system)
1361 (setq process-coding-system-alist
1362 (cons (cons regexp coding-system)
1363 process-coding-system-alist)))))
1364 (t
1365 (let ((slot (assoc regexp network-coding-system-alist)))
1366 (if slot
1367 (setcdr slot coding-system)
1368 (setq network-coding-system-alist
1369 (cons (cons regexp coding-system)
1370 network-coding-system-alist)))))))
1371
b25eef20 1372(defun make-translation-table (&rest args)
a284eea3 1373 "Make a translation table from arguments.
d38b07f9 1374A translation table is a char table intended for character
a284eea3
DL
1375translation in CCL programs.
1376
d38b07f9 1377Each argument is a list of elements of the form (FROM . TO), where FROM
a284eea3 1378is a character to be translated to TO.
13d5617d 1379
452fdb31 1380FROM can be a generic character (see `make-char'). In this case, TO is
d38b07f9 1381a generic character containing the same number of characters, or an
452fdb31 1382ordinary character. If FROM and TO are both generic characters, all
b25eef20 1383characters belonging to FROM are translated to characters belonging to TO
4e003d37
KH
1384without changing their position code(s).
1385
1386The arguments and forms in each argument are processed in the given
1387order, and if a previous form already translates TO to some other
1388character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1389 (let ((table (make-char-table 'translation-table))
a73a8c89
KH
1390 revlist)
1391 (while args
1392 (let ((elts (car args)))
1393 (while elts
13d5617d
KH
1394 (let* ((from (car (car elts)))
1395 (from-i 0) ; degree of freedom of FROM
1396 (from-rev (nreverse (split-char from)))
1397 (to (cdr (car elts)))
1398 (to-i 0) ; degree of freedom of TO
1399 (to-rev (nreverse (split-char to))))
1400 ;; Check numbers of heading 0s in FROM-REV and TO-REV.
1401 (while (eq (car from-rev) 0)
1402 (setq from-i (1+ from-i) from-rev (cdr from-rev)))
1403 (while (eq (car to-rev) 0)
1404 (setq to-i (1+ to-i) to-rev (cdr to-rev)))
1405 (if (and (/= from-i to-i) (/= to-i 0))
1406 (error "Invalid character pair (%d . %d)" from to))
b25eef20
KH
1407 ;; If we have already translated TO to TO-ALT, FROM should
1408 ;; also be translated to TO-ALT. But, this is only if TO
1409 ;; is a generic character or TO-ALT is not a generic
13d5617d
KH
1410 ;; character.
1411 (let ((to-alt (aref table to)))
1412 (if (and to-alt
1413 (or (> to-i 0) (not (generic-char-p to-alt))))
1414 (setq to to-alt)))
1415 (if (> from-i 0)
1416 (set-char-table-default table from to)
1417 (aset table from to))
b25eef20
KH
1418 ;; If we have already translated some chars to FROM, they
1419 ;; should also be translated to TO.
a73a8c89
KH
1420 (let ((l (assq from revlist)))
1421 (if l
1422 (let ((ch (car l)))
1423 (setcar l to)
1424 (setq l (cdr l))
1425 (while l
1426 (aset table ch to)
1427 (setq l (cdr l)) ))))
1428 ;; Now update REVLIST.
1429 (let ((l (assq to revlist)))
1430 (if l
1431 (setcdr l (cons from (cdr l)))
1432 (setq revlist (cons (list to from) revlist)))))
1433 (setq elts (cdr elts))))
1434 (setq args (cdr args)))
1435 ;; Return TABLE just created.
1436 table))
1437
c76b5c99
KH
1438(defun make-translation-table-from-vector (vec)
1439 "Make translation table from decoding vector VEC.
9e3b6057
DL
1440VEC is an array of 256 elements to map unibyte codes to multibyte
1441characters. Elements may be nil for undefined code points.
c76b5c99
KH
1442See also the variable `nonascii-translation-table'."
1443 (let ((table (make-char-table 'translation-table))
1444 (rev-table (make-char-table 'translation-table))
c76b5c99 1445 ch)
9e3b6057 1446 (dotimes (i 256)
c76b5c99 1447 (setq ch (aref vec i))
9e3b6057
DL
1448 (when ch
1449 (aset table i ch)
1450 (if (>= ch 256)
1451 (aset rev-table ch i))))
c76b5c99
KH
1452 (set-char-table-extra-slot table 0 rev-table)
1453 table))
1454
f967223b 1455(defun define-translation-table (symbol &rest args)
a284eea3
DL
1456 "Define SYMBOL as the name of translation table made by ARGS.
1457This sets up information so that the table can be used for
1458translations in a CCL program.
b25eef20 1459
a284eea3
DL
1460If the first element of ARGS is a char-table whose purpose is
1461`translation-table', just define SYMBOL to name it. (Note that this
1462function does not bind SYMBOL.)
007c79c8 1463
a284eea3 1464Any other ARGS should be suitable as arguments of the function
007c79c8 1465`make-translation-table' (which see).
b25eef20 1466
452fdb31 1467This function sets properties `translation-table' and
521d4010
DL
1468`translation-table-id' of SYMBOL to the created table itself and the
1469identification number of the table respectively. It also registers
1470the table in `translation-table-vector'."
007c79c8
KH
1471 (let ((table (if (and (char-table-p (car args))
1472 (eq (char-table-subtype (car args))
1473 'translation-table))
1474 (car args)
1475 (apply 'make-translation-table args)))
f967223b 1476 (len (length translation-table-vector))
d9e3229d 1477 (id 0)
b25eef20 1478 (done nil))
f967223b 1479 (put symbol 'translation-table table)
b25eef20
KH
1480 (while (not done)
1481 (if (>= id len)
f967223b
KH
1482 (setq translation-table-vector
1483 (vconcat translation-table-vector (make-vector len nil))))
1484 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1485 (if (or (not slot)
1486 (eq (car slot) symbol))
1487 (progn
f967223b 1488 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1489 (setq done t))
1490 (setq id (1+ id)))))
f967223b 1491 (put symbol 'translation-table-id id)
d9e3229d
KH
1492 id))
1493
35554641
KH
1494(put 'with-category-table 'lisp-indent-function 1)
1495
1496(defmacro with-category-table (category-table &rest body)
07513d64 1497 "Execute BODY like `progn' with CATEGORY-TABLE the current category table."
35554641
KH
1498 `(let ((current-category-table (category-table)))
1499 (set-category-table ,category-table)
1500 (unwind-protect
1501 (progn ,@body)
1502 (set-category-table current-category-table))))
1503
69eba008
KH
1504;;; Initialize some variables.
1505
1506(put 'use-default-ascent 'char-table-extra-slots 0)
1507(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
1508(put 'ignore-relative-composition 'char-table-extra-slots 0)
1509(setq ignore-relative-composition
1510 (make-char-table 'ignore-relative-composition))
69eba008
KH
1511
1512;;;
4ed46869
KH
1513(provide 'mule)
1514
1515;;; mule.el ends here