(windows-1254, cp1254, windows-1257)
[bpt/emacs.git] / lisp / international / mule.el
CommitLineData
07513d64 1;;; mule.el --- basic commands for multilingual environment
4ed46869 2
4ed46869 3;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
58cfed09 4;; Licensed to the Free Software Foundation.
9e3b6057 5;; Copyright (C) 2001 Free Software Foundation, Inc.
c1841772
KH
6;; Copyright (C) 2001, 2002
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009
4ed46869
KH
9
10;; Keywords: mule, multilingual, character set, coding system
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software; you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation; either version 2, or (at your option)
17;; any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
369314dc
KH
25;; along with GNU Emacs; see the file COPYING. If not, write to the
26;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27;; Boston, MA 02111-1307, USA.
4ed46869 28
60370d40
PJ
29;;; Commentary:
30
4ed46869
KH
31;;; Code:
32
c1841772 33(defconst mule-version "7.0 (SAKAKI)" "\
4ed46869
KH
34Version number and name of this version of MULE (multilingual environment).")
35
c1841772 36(defconst mule-version-date "2002.2.28" "\
4ed46869
KH
37Distribution date of this version of MULE (multilingual environment).")
38
c1841772
KH
39
40\f
41;;; CHARACTER
42(defalias 'char-valid-p 'characterp)
43(make-obsolete 'char-valid-p 'characterp "22.1")
44
45\f
46;;; CHARSET
47
48(defun define-charset (name docstring &rest props)
49 "Define NAME (symbol) as a charset with DOCSTRING.
50The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
07513d64 51may be any symbol. The following have special meanings, and one of
c1841772
KH
52`:code-offset', `:map', `:parents' must be specified.
53
54`:short-name'
55
56VALUE must be a short string to identify the charset. If omitted,
57NAME is used.
58
59`:long-name'
60
61VALUE must be a string longer than `:short-name' to identify the
07513d64 62charset. If omitted, the value of the `:short-name' attribute is used.
c1841772
KH
63
64`:dimension'
65
66VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
07513d64
DL
67code-points of the charsets. If omitted, it is calculated from the
68value of the `:code-space' attribute.
c1841772
KH
69
70`:code-space'
71
72VALUE must be a vector of length at most 8 specifying the byte code
73range of each dimension in this format:
74 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
07513d64 75where MIN-N is the minimum byte value of Nth dimension of code-point,
c1841772
KH
76MAX-N is the maximum byte value of that.
77
78`:iso-final-char'
79
80VALUE must be a character in the range 32 to 127 (inclusive)
81specifying the final char of the charset for ISO-2022 encoding. If
82omitted, the charset can't be encoded by ISO-2022 based
83coding-systems.
84
85`:iso-revision-number'
86
87VALUE must be an integer in the range 0..63, specifying the revision
88number of the charset for ISO-2022 encoding.
89
90`:emacs-mule-id'
91
92VALUE must be an integer of 0, 128..255. If omitted, the charset
93can't be encoded by coding-systems of type `emacs-mule'.
94
95`:ascii-compatible-p'
96
07513d64
DL
97VALUE must be nil or t (default nil). If VALUE is t, the charset is
98compatible with ASCII, i.e. the first 128 code points map to ASCII.
c1841772
KH
99
100`:supplementary-p'
101
102VALUE must be nil or t. If the VALUE is t, the charset is
07513d64
DL
103supplementary, which means it is used only as a parent of some other
104charset.
c1841772
KH
105
106`:invalid-code'
107
108VALUE must be a nonnegative integer that can be used as an invalid
109code point of the charset. If the minimum code is 0 and the maximum
110code is greater than Emacs' maximum integer value, `:invalid-code'
111should not be omitted.
112
113`:code-offset'
114
07513d64
DL
115VALUE must be an integer added to the index number of a character to
116get the corresponding character code.
c1841772
KH
117
118`:map'
119
120VALUE must be vector or string.
121
122If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
123where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 124corresponding character code.
c1841772
KH
125
126If it is a string, it is a name of file that contains the above
3e4abc9e
KH
127information. Each line of the file must be this format:
128 0xXXX 0xYYY
129where XXX is a hexadecimal representation of CODE-n and YYY is a
130hexadecimal representation of CHAR-n. A line starting with `#' is a
131comment line.
c1841772
KH
132
133`:parents'
134
135VALUE must be a list of parent charsets. The charset inherits
136characters from them. Each element of the list may be a cons (PARENT
137. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
138value to add to a code point of this charset to get the corresponding
139code point of PARENT.
140
141`:unify-map'
142
143VALUE must be vector or string.
144
145If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
146where CODE-n is a code-point of the charset, and CHAR-n is the
07513d64 147corresponding Unicode character code.
c1841772
KH
148
149If it is a string, it is a name of file that contains the above
3e4abc9e
KH
150information. The file format is the same as what described for `:map'
151attribute."
c1841772
KH
152 (let ((attrs (mapcar 'list '(:dimension
153 :code-space
154 :iso-final-char
155 :iso-revision-number
156 :emacs-mule-id
157 :ascii-compatible-p
158 :supplementary-p
159 :invalid-code
160 :code-offset
161 :map
162 :parents
163 :unify-map
164 :plist))))
165
166 ;; If :dimension is omitted, get the dimension from :code-space.
167 (let ((dimension (plist-get props :dimension)))
168 (or dimension
169 (progn
170 (setq dimension (/ (length (plist-get props :code-space)) 2))
171 (setq props (plist-put props :dimension dimension)))))
172
173 (dolist (slot attrs)
174 (setcdr slot (plist-get props (car slot))))
175
176 ;; Make sure that the value of :code-space is a vector of 8
177 ;; elements.
178 (let* ((slot (assq :code-space attrs))
179 (val (cdr slot))
180 (len (length val)))
181 (if (< len 8)
182 (setcdr slot
183 (vconcat val (make-vector (- 8 len) 0)))))
184
185 ;; Add :name and :docstring properties to PROPS.
186 (setq props
187 (cons :name (cons name (cons :docstring (cons docstring props)))))
188 (or (plist-get props :short-name)
189 (plist-put props :short-name (symbol-name name)))
190 (or (plist-get props :long-name)
191 (plist-put props :long-name (plist-get props :short-name)))
192 (setcdr (assq :plist attrs) props)
193
194 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
195
196
4ed46869 197(defun load-with-code-conversion (fullname file &optional noerror nomessage)
0f69cb38
KH
198 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
199The file contents are decoded before evaluation if necessary.
4ed46869
KH
200If optional second arg NOERROR is non-nil,
201 report no error if FILE doesn't exist.
202Print messages at start and end of loading unless
203 optional third arg NOMESSAGE is non-nil.
204Return t if file exists."
205 (if (null (file-readable-p fullname))
206 (and (null noerror)
207 (signal 'file-error (list "Cannot open load file" file)))
208 ;; Read file with code conversion, and then eval.
209 (let* ((buffer
210 ;; To avoid any autoloading, set default-major-mode to
211 ;; fundamental-mode.
88162676
RS
212 ;; So that we don't get completely screwed if the
213 ;; file is encoded in some complicated character set,
214 ;; read it with real decoding, as a multibyte buffer,
215 ;; even if this is a --unibyte Emacs session.
216 (let ((default-major-mode 'fundamental-mode)
217 (default-enable-multibyte-characters t))
4ed46869
KH
218 ;; We can't use `generate-new-buffer' because files.el
219 ;; is not yet loaded.
220 (get-buffer-create (generate-new-buffer-name " *load*"))))
db5cae4b
SM
221 (load-in-progress t)
222 (source (save-match-data (string-match "\\.el\\'" fullname))))
223 (unless nomessage
224 (if source
225 (message "Loading %s (source)..." file)
226 (message "Loading %s..." file)))
227 (when purify-flag
228 (setq preloaded-file-list (cons file preloaded-file-list)))
4ed46869 229 (unwind-protect
a6acd8a2 230 (let ((load-file-name fullname)
1c4cc63a 231 (set-auto-coding-for-load t)
a6acd8a2 232 (inhibit-file-name-operation nil))
4ed46869
KH
233 (save-excursion
234 (set-buffer buffer)
235 (insert-file-contents fullname)
7d276780
EZ
236 ;; If the loaded file was inserted with no-conversion or
237 ;; raw-text coding system, make the buffer unibyte.
238 ;; Otherwise, eval-buffer might try to interpret random
239 ;; binary junk as multibyte characters.
240 (if (and enable-multibyte-characters
241 (or (eq (coding-system-type last-coding-system-used) 5)
242 (eq last-coding-system-used 'no-conversion)))
243 (set-buffer-multibyte nil))
4ed46869
KH
244 ;; Make `kill-buffer' quiet.
245 (set-buffer-modified-p nil))
0f69cb38 246 ;; Have the original buffer current while we eval.
88162676
RS
247 (eval-buffer buffer nil file
248 ;; If this Emacs is running with --unibyte,
249 ;; convert multibyte strings to unibyte
250 ;; after reading them.
ba74e833 251;; (not default-enable-multibyte-characters)
8dd08b5b 252 nil t
ba74e833 253 ))
cfc70cdf
RS
254 (let (kill-buffer-hook kill-buffer-query-functions)
255 (kill-buffer buffer)))
4ed46869 256 (let ((hook (assoc file after-load-alist)))
db5cae4b
SM
257 (when hook
258 (mapcar (function eval) (cdr hook))))
259 (unless (or nomessage noninteractive)
260 (if source
261 (message "Loading %s (source)...done" file)
262 (message "Loading %s...done" file)))
4ed46869
KH
263 t)))
264
265;; API (Application Program Interface) for charsets.
266
c1841772
KH
267;;; Charset property
268
269(defun get-charset-property (charset propname)
270 "Return the value of CHARSET's PROPNAME property.
271This is the last value stored with
272 (put-charset-property CHARSET PROPNAME VALUE)."
273 (plist-get (charset-plist charset) propname))
274
275(defun put-charset-property (charset propname value)
276 "Store CHARSETS's PROPNAME property with value VALUE.
277It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
278 (set-charset-plist charset
279 (plist-put (charset-plist charset) propname value)))
280
281
282(defun charset-description (charset)
283 "Return description string of CHARSET."
284 (plist-get (charset-plist charset) :docstring))
285
286(defun charset-dimension (charset)
287 "Return dimension string of CHARSET."
288 (plist-get (charset-plist charset) :dimension))
289
290(defun charset-chars (charset)
291 "Return character numbers contained in a dimension of CHARSET."
103cc921 292 (let ((code-space (plist-get (charset-plist charset) :code-space)))
c1841772
KH
293 (1+ (- (aref code-space 1) (aref code-space 0)))))
294
295(defun charset-iso-final-char (charset)
296 "Return final char of CHARSET."
297 (or (plist-get (charset-plist charset) :iso-final-char)
298 -1))
4ed46869
KH
299
300(defmacro charset-short-name (charset)
c1841772
KH
301 "Return short name of CHARSET."
302 (plist-get (charset-plist charset) :short-name))
4ed46869
KH
303
304(defmacro charset-long-name (charset)
c1841772
KH
305 "Return long name of CHARSET."
306 (plist-get (charset-plist charset) :long-name))
4ed46869 307
d3675a42 308(defun charset-list ()
900dc6e3
KH
309 "Return list of charsets ever defined.
310
d3675a42 311This function is provided for backward compatibility.
900dc6e3 312Now we have the variable `charset-list'."
d3675a42 313 charset-list)
07513d64 314(make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
d3675a42 315
c1841772
KH
316(defun generic-char-p (char)
317 "Always return nil. This exists only for backward compatibility."
318 nil)
07513d64 319(make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
0269ddfb 320\f
e76938e7 321;; Coding system stuff
4ed46869 322
c1841772
KH
323;; Coding system is a symbol that has been defined by the function
324;; `define-coding-system'.
c76b5c99 325
c1841772
KH
326(defconst coding-system-iso-2022-flags
327 '(long-form
328 ascii-at-eol
329 ascii-at-cntl
330 7-bit
331 locking-shift
332 single-shift
333 designation
334 revision
335 direction
336 init-at-bol
337 designate-at-bol
338 safe
339 latin-extra
340 composition
341 euc-tw-shift)
342 "List of symbols that control ISO-2022 encoder/decoder.
0269ddfb 343
c1841772
KH
344The value of `:flags' attribute in the argument of the function
345`define-coding-system' must be one of them.
4ed46869 346
c1841772
KH
347If `long-form' is specified, use a long designation sequence on
348encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
349and `japanese-jisx0208'. The long designation sequence doesn't
350conform to ISO 2022, but used by such a coding system as
351`compound-text'.
352
353If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
354on encoding.
355
356If `ascii-at-cntl' is specified, designate ASCII to g0 before control
357codes and SPC on encoding.
358
359If `7-bit' is specified, use 7-bit code only on encoding.
360
361If `locking-shift' is specified, decode locking-shift code correctly
362on decoding, and use locking-shift to invoke a graphic element on
363encoding.
364
365If `single-shift' is specified, decode single-shift code correctly on
366decoding, and use single-shift to invoke a graphic element on encoding.
367
368If `designation' is specified, decode designation code correctly on
369decoding, and use designation to designate a charset to a graphic
370element on encoding.
371
372If `revision' is specified, produce an escape sequence to specify
373revision number of a charset on encoding. Such an escape sequence is
374always correctly decoded on decoding.
375
376If `direction' is specified, decode ISO6429's code for specifying
377direction correctly, and produced the code on encoding.
378
379If `init-at-bol' is specified, on encoding, it is assumed that
380invocation and designation statuses are reset at each beginning of
381line even if `ascii-at-eol' is not specified thus no code for
382resetting them are produced.
383
384If `safe' is specified, on encoding, characters not supported by a
385coding are replaced with `?'.
386
387If `latin-extra' is specified, code-detection routine assumes that a
388code specified in `latin-extra-code-table' (which see) is valid.
389
390If `composition' is specified, an escape sequence to specify
391composition sequence is correctly decode on decoding, and is produced
392on encoding.
393
394If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
395correctly decoded on decoding, and is produced on encoding.")
396
397(defun define-coding-system (name docstring &rest props)
398 "Define NAME (symbol) as a coding system with DOCSTRING and attributes.
399The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
400may be any symbol.
401
402The following attributes have special meanings. If labeled as
403\"(required)\", it should not be omitted.
404
405`:mnemonic' (required)
406
407VALUE is a character to display on mode line for the coding system.
408
409`:coding-type' (required)
410
411VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
5170b329 412`emacs-mule', `shift-jis', `big5', `ccl', `raw-text', `undecided'.
c1841772
KH
413
414`:eol-type' (optional)
415
416VALUE is an EOL (end-of-line) format of the coding system. It must be
417one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
418\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
419and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
420decoding by the coding system, Emacs automatically detects an EOL
421format of the source text.
422
423`:charset-list' (required)
424
425VALUE must be a list of charsets supported by the coding system. On
426encoding by the coding system, if a character belongs to multiple
427charsets in the list, a charset that comes earlier in the list is
428selected.
429
430`:ascii-compatible-p' (optional)
431
432If VALUE is non-nil, the coding system decodes all 7-bit bytes into
07513d64
DL
433the corresponding ASCII characters, and encodes all ASCII characters
434back to the corresponding 7-bit bytes. If omitted, the VALUE defaults
c1841772
KH
435to nil.
436
437`:decode-translation-table' (optional)
438
439VALUE must be a translation table to use on decoding.
440
441`:encode-translation-table' (optional)
442
443VALUE must be a translation table to use on encoding.
444
445`:post-read-conversion' (optional)
446
447VALUE must be a function to call after some text is inserted and
448decoded by the coding system itself and before any functions in
449`after-insert-functions' are called. The arguments to this function
450is the same as those of a function in `after-insert-functions',
451i.e. LENGTH of a text while putting point at the head of the text to
452be decoded
453
454`:pre-write-conversion'
455
456VALUE must be a function to call after all functions in
457`write-region-annotate-functions' and `buffer-file-format' are called,
458and before the text is encoded by the coding system itself. The
459arguments to this function is the same as those of a function in
460`write-region-annotate-functions', i.e. FROM and TO specifying region
461of a text.
462
463`:default-char'
464
465VALUE must be a character. On encoding, a character not supported by
466the coding system is replaced with VALUE.
467
468`:eol-type'
469
470VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
471EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
472EOL (CR). If omitted, on decoding, the coding system detect EOL
473format automatically, and on encoding, used Unix-like EOL.
474
475`:mime-charset'
476
477VALUE must be a symbol who has MIME-charset name.
478
479`:flags'
480
481VALUE must be a list of symbols that control ISO-2022 converter. Each
482symbol must be a member of the variable `coding-system-iso-2022-flags'
483\(which see). This attribute has a meaning only when `:coding-type'
484is `iso-2022'.
485
486`:designation'
487
488VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
489GN-USAGE specifies the usage of graphic register GN as follows.
490
491If it is nil, no charset can be designated to GN.
492
07513d64 493If it is a charset, the charset is initially designated to GN, and
c1841772
KH
494never used by the other charsets.
495
496If it is a list, the elements must be charsets, nil, 94, or 96. GN
497can be used by all listed charsets. If the list contains 94, any
07513d64
DL
498charsets whose iso-chars is 94 can be designated to GN. If the list
499contains 96, any charsets whose iso-chars is 96 can be designated to
c1841772 500GN. If the first element is a charset, the charset is initially
07513d64 501designated to GN.
c1841772
KH
502
503This attribute has a meaning only when `:coding-type' is `iso-2022'.
504
505`:bom'
506
507VALUE must nil, t, or cons of coding systems whose `:coding-type' is
508`utf-16'.
509
510This attribute has a meaning only when `:coding-type' is `utf-16'.
511
512`:endian'
513
514VALUE must be t or nil. See the above description for the detail.
515
516This attribute has a meaning only when `:coding-type' is `utf-16'.
517
518`:ccl-decoder'
519
520This attribute has a meaning only when `:coding-type' is `ccl'.
521
522`:ccl-encoder'
523
524This attribute has a meaning only when `:coding-type' is `ccl'."
525 (let* ((common-attrs (mapcar 'list
526 '(:mnemonic
527 :coding-type
528 :charset-list
529 :ascii-compatible-p
530 :docode-translation-table
531 :encode-translation-table
532 :post-read-conversion
533 :pre-write-conversion
534 :default-char
535 :plist
536 :eol-type)))
537 (coding-type (plist-get props :coding-type))
538 (spec-attrs (mapcar 'list
539 (cond ((eq coding-type 'iso-2022)
540 '(:initial
541 :reg-usage
542 :request
543 :flags))
544 ((eq coding-type 'utf-16)
545 '(:bom
546 :endian))
547 ((eq coding-type 'ccl)
548 '(:ccl-decoder
549 :ccl-encoder
550 :valids))))))
551
552 (dolist (slot common-attrs)
553 (setcdr slot (plist-get props (car slot))))
554
555 (dolist (slot spec-attrs)
556 (setcdr slot (plist-get props (car slot))))
557
558 (if (eq coding-type 'iso-2022)
559 (let ((designation (plist-get props :designation))
560 (flags (plist-get props :flags))
561 (initial (make-vector 4 nil))
562 (reg-usage (cons 4 4))
563 request elt)
564 (dotimes (i 4)
565 (setq elt (aref designation i))
566 (cond ((charsetp elt)
567 (aset initial i elt)
568 (setq request (cons (cons elt i) request)))
569 ((consp elt)
570 (aset initial i (car elt))
571 (if (charsetp (car elt))
572 (setq request (cons (cons (car elt) i) request)))
573 (dolist (e (cdr elt))
574 (cond ((charsetp e)
575 (setq request (cons (cons e i) request)))
576 ((eq e 94)
577 (setcar reg-usage i))
578 ((eq e 96)
579 (setcdr reg-usage i))
580 ((eq e t)
581 (setcar reg-usage i)
582 (setcdr reg-usage i)))))))
583 (setcdr (assq :initial spec-attrs) initial)
584 (setcdr (assq :reg-usage spec-attrs) reg-usage)
585 (setcdr (assq :request spec-attrs) request)
586
587 ;; Change :flags value from a list to a bit-mask.
588 (let ((bits 0)
589 (i 0))
590 (dolist (elt coding-system-iso-2022-flags)
591 (if (memq elt flags)
592 (setq bits (logior bits (lsh 1 i))))
593 (setq i (1+ i)))
594 (setcdr (assq :flags spec-attrs) bits))))
595
596 ;; Add :name and :docstring properties to PROPS.
597 (setq props
598 (cons :name (cons name (cons :docstring (cons docstring props)))))
599 (setcdr (assq :plist common-attrs) props)
600
601 (apply 'define-coding-system-internal
602 name (mapcar 'cdr (append common-attrs spec-attrs)))))
603
604(defun coding-system-doc-string (coding-system)
605 "Return the documentation string for CODING-SYSTEM."
606 (plist-get (coding-system-plist coding-system) :docstring))
4ed46869 607
4ed46869 608(defun coding-system-mnemonic (coding-system)
0269ddfb 609 "Return the mnemonic character of CODING-SYSTEM.
6e2c8840
KH
610The mnemonic character of a coding system is used in mode line
611to indicate the coding system. If the arg is nil, return ?-."
c1841772 612 (plist-get (coding-system-plist coding-system) :mnemonic))
4ed46869 613
c1841772
KH
614(defun coding-system-type (coding-system)
615 "Return the coding type of CODING-SYSTEM.
616A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
617See the function `define-coding-system' for more detail."
618 (plist-get (coding-system-plist coding-system) :coding-type))
d3675a42 619
c1841772 620(defun coding-system-charset-list (coding-system)
07513d64 621 "Return list of charsets supported by CODING-SYSTEM.
c1841772
KH
622If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
623If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
624 (plist-get (coding-system-plist coding-system) :charset-list))
0269ddfb
KH
625
626(defun coding-system-get (coding-system prop)
07513d64
DL
627 "Extract a value from CODING-SYSTEM's property list for property PROP.
628For compatibility with Emacs 20/21, this accepts old-style symbols
629like `mime-charset' as well as the current style like `:mime-charset'."
630 (or (plist-get (coding-system-plist coding-system) prop)
631 (if (not (keywordp prop))
632 (plist-get (coding-system-plist coding-system)
633 (intern (concat ":" (symbol-name prop)))))))
0269ddfb
KH
634
635(defun coding-system-put (coding-system prop val)
636 "Change value in CODING-SYSTEM's property list PROP to VAL."
c1841772 637 (plist-put (coding-system-plist coding-system) prop val))
0269ddfb
KH
638
639(defalias 'coding-system-parent 'coding-system-base)
2598a293 640(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
0269ddfb
KH
641
642;; Coding system also has a property `eol-type'.
643;;
644;; This property indicates how the coding system handles end-of-line
645;; format. The value is integer 0, 1, 2, or a vector of three coding
646;; systems. Each integer value 0, 1, and 2 indicates the format of
647;; end-of-line LF, CRLF, and CR respectively. A vector value
648;; indicates that the format of end-of-line should be detected
649;; automatically. Nth element of the vector is the subsidiary coding
650;; system whose `eol-type' property is N.
4ed46869 651
857ea15c
AS
652(defun coding-system-lessp (x y)
653 (cond ((eq x 'no-conversion) t)
654 ((eq y 'no-conversion) nil)
655 ((eq x 'emacs-mule) t)
656 ((eq y 'emacs-mule) nil)
657 ((eq x 'undecided) t)
658 ((eq y 'undecided) nil)
659 (t (let ((c1 (coding-system-mnemonic x))
660 (c2 (coding-system-mnemonic y)))
661 (or (< (downcase c1) (downcase c2))
662 (and (not (> (downcase c1) (downcase c2)))
663 (< c1 c2)))))))
664
857ea15c 665(defun add-to-coding-system-list (coding-system)
521d4010 666 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
857ea15c
AS
667 (if (or (null coding-system-list)
668 (coding-system-lessp coding-system (car coding-system-list)))
669 (setq coding-system-list (cons coding-system coding-system-list))
670 (let ((len (length coding-system-list))
671 mid (tem coding-system-list))
672 (while (> len 1)
673 (setq mid (nthcdr (/ len 2) tem))
674 (if (coding-system-lessp (car mid) coding-system)
675 (setq tem mid
676 len (- len (/ len 2)))
677 (setq len (/ len 2))))
678 (setcdr tem (cons coding-system (cdr tem))))))
679
80a7463d 680(defun coding-system-list (&optional base-only)
c11a8f77
KH
681 "Return a list of all existing non-subsidiary coding systems.
682If optional arg BASE-ONLY is non-nil, only base coding systems are listed.
683The value doesn't include subsidiary coding systems which are what
684made from bases and aliases automatically for various end-of-line
685formats (e.g. iso-latin-1-unix, koi8-r-dos)."
80a7463d
KH
686 (let* ((codings (copy-sequence coding-system-list))
687 (tail (cons nil codings)))
688 ;; Remove subsidiary coding systems (eol variants) and alias
689 ;; coding systems (if necessary).
690 (while (cdr tail)
691 (let* ((coding (car (cdr tail)))
692 (aliases (coding-system-get coding 'alias-coding-systems)))
693 (if (or
694 ;; CODING is an eol variant if not in ALIASES.
695 (not (memq coding aliases))
696 ;; CODING is an alias if it is not car of ALIASES.
697 (and base-only (not (eq coding (car aliases)))))
698 (setcdr tail (cdr (cdr tail)))
699 (setq tail (cdr tail)))))
700 codings))
701
4ed46869 702(defun set-buffer-file-coding-system (coding-system &optional force)
358d28fb
RS
703 "Set the file coding-system of the current buffer to CODING-SYSTEM.
704This means that when you save the buffer, it will be converted
705according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
706use \\[list-coding-systems].
707
708If the buffer's previous file coding-system value specifies end-of-line
709conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
710merged with the already-specified end-of-line conversion.
b839fdcc
KH
711
712If the buffer's previous file coding-system value specifies text
713conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is
714merged with the already-specified text conversion.
715
716However, if the optional prefix argument FORCE is non-nil, then
717CODING-SYSTEM is used exactly as specified.
aeef8f07
KH
718
719This marks the buffer modified so that the succeeding \\[save-buffer]
720surely saves the buffer with CODING-SYSTEM. From a program, if you
721don't want to mark the buffer modified, just set the variable
722`buffer-file-coding-system' directly."
d9e3229d 723 (interactive "zCoding system for visited file (default, nil): \nP")
4ed46869 724 (check-coding-system coding-system)
36d455c4 725 (if (and coding-system buffer-file-coding-system (null force))
8dd735c1
KH
726 (let ((base (coding-system-base buffer-file-coding-system))
727 (eol (coding-system-eol-type buffer-file-coding-system)))
728 ;; If CODING-SYSTEM doesn't specify text conversion, merge
729 ;; with that of buffer-file-coding-system.
730 (if (eq (coding-system-base coding-system) 'undecided)
731 (setq coding-system (coding-system-change-text-conversion
732 coding-system base)))
733 ;; If CODING-SYSTEM doesn't specify eol conversion, merge with
734 ;; that of buffer-file-coding-system.
735 (if (and (vectorp (coding-system-eol-type coding-system))
736 (numberp eol) (>= eol 0) (<= eol 2))
737 (setq coding-system (coding-system-change-eol-conversion
738 coding-system eol)))))
4ed46869
KH
739 (setq buffer-file-coding-system coding-system)
740 (set-buffer-modified-p t)
741 (force-mode-line-update))
742
358d28fb
RS
743(defvar default-terminal-coding-system nil
744 "Default value for the terminal coding system.
745This is normally set according to the selected language environment.
746See also the command `set-terminal-coding-system'.")
747
df100398
KH
748(defun set-terminal-coding-system (coding-system)
749 "Set coding system of your terminal to CODING-SYSTEM.
358d28fb
RS
750All text output to the terminal will be encoded
751with the specified coding system.
752For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
753The default is determined by the selected language environment
754or by the previous use of this command."
755 (interactive
2e02a76f
RS
756 (list (let ((default (if (and (not (terminal-coding-system))
757 default-terminal-coding-system)
758 default-terminal-coding-system)))
759 (read-coding-system
760 (format "Coding system for terminal display (default, %s): "
761 default)
762 default))))
358d28fb
RS
763 (if (and (not coding-system)
764 (not (terminal-coding-system)))
765 (setq coding-system default-terminal-coding-system))
766 (if coding-system
521d4010 767 (setq default-terminal-coding-system coding-system))
df100398
KH
768 (set-terminal-coding-system-internal coding-system)
769 (redraw-frame (selected-frame)))
770
358d28fb
RS
771(defvar default-keyboard-coding-system nil
772 "Default value of the keyboard coding system.
773This is normally set according to the selected language environment.
774See also the command `set-keyboard-coding-system'.")
775
df100398 776(defun set-keyboard-coding-system (coding-system)
358d28fb
RS
777 "Set coding system for keyboard input to CODING-SYSTEM.
778In addition, this command enables Encoded-kbd minor mode.
6d34f495
DL
779\(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
780`encoded-kbd-mode'.)
358d28fb
RS
781For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
782The default is determined by the selected language environment
783or by the previous use of this command."
784 (interactive
2e02a76f
RS
785 (list (let ((default (if (and (not (keyboard-coding-system))
786 default-keyboard-coding-system)
787 default-keyboard-coding-system)))
788 (read-coding-system
789 (format "Coding system for keyboard input (default, %s): "
790 default)
791 default))))
358d28fb
RS
792 (if (and (not coding-system)
793 (not (keyboard-coding-system)))
794 (setq coding-system default-keyboard-coding-system))
795 (if coding-system
796 (setq default-keyboard-coding-system coding-system))
df100398 797 (set-keyboard-coding-system-internal coding-system)
b23bad0b 798 (setq keyboard-coding-system coding-system)
df100398
KH
799 (encoded-kbd-mode (if coding-system 1 0)))
800
6d34f495
DL
801(defcustom keyboard-coding-system nil
802 "Specify coding system for keyboard input.
803If you set this on a terminal which can't distinguish Meta keys from
8048-bit characters, you will have to use ESC to type Meta characters.
805See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
806
807Setting this variable directly does not take effect;
808use either M-x customize or \\[set-keyboard-coding-system]."
809 :type '(coding-system :tag "Coding system")
810 :link '(info-link "(emacs)Specify Coding")
811 :link '(info-link "(emacs)Single-Byte Character Support")
812 :set (lambda (symbol value)
813 ;; Don't load encoded-kbd-mode unnecessarily.
814 (if (or value (boundp 'encoded-kbd-mode))
815 (set-keyboard-coding-system value)
816 (set-default 'keyboard-coding-system nil))) ; must initialize
817 :version "21.1"
818 :group 'keyboard
819 :group 'mule)
820
df100398 821(defun set-buffer-process-coding-system (decoding encoding)
358d28fb 822 "Set coding systems for the process associated with the current buffer.
df100398 823DECODING is the coding system to be used to decode input from the process,
358d28fb
RS
824ENCODING is the coding system to be used to encode output to the process.
825
826For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
4ed46869 827 (interactive
83911021 828 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
4ed46869
KH
829 (let ((proc (get-buffer-process (current-buffer))))
830 (if (null proc)
521d4010 831 (error "No process")
df100398
KH
832 (check-coding-system decoding)
833 (check-coding-system encoding)
834 (set-process-coding-system proc decoding encoding)))
4ed46869
KH
835 (force-mode-line-update))
836
d0b99881
RS
837(defalias 'set-clipboard-coding-system 'set-selection-coding-system)
838
14915c37 839(defun set-selection-coding-system (coding-system)
b25eef20
KH
840 "Make CODING-SYSTEM used for communicating with other X clients .
841When sending or receiving text via cut_buffer, selection, and clipboard,
842the text is encoded or decoded by CODING-SYSTEM."
a03b3ce1 843 (interactive "zCoding system for X selection: ")
b25eef20 844 (check-coding-system coding-system)
14915c37 845 (setq selection-coding-system coding-system))
b25eef20 846
e8dd0160 847;; Coding system lastly specified by the command
a03b3ce1
KH
848;; set-next-selection-coding-system.
849(defvar last-next-selection-coding-system nil)
850
851(defun set-next-selection-coding-system (coding-system)
852 "Make CODING-SYSTEM used for the next communication with other X clients.
853This setting is effective for the next communication only."
854 (interactive
855 (list (read-coding-system
856 (if last-next-selection-coding-system
857 (format "Coding system for the next X selection (default, %S): "
858 last-next-selection-coding-system)
859 "Coding system for the next X selection: ")
860 last-next-selection-coding-system)))
861 (if coding-system
862 (setq last-next-selection-coding-system coding-system)
863 (setq coding-system last-next-selection-coding-system))
864 (check-coding-system coding-system)
865
866 (setq next-selection-coding-system coding-system))
867
4ed46869 868(defun set-coding-priority (arg)
521d4010 869 "Set priority of coding categories according to ARG.
c1841772
KH
870ARG is a list of coding categories ordered by priority.
871
872This function is provided for backward compatibility.
873Now we have more convenient function `set-coding-system-priority'."
d9e3229d
KH
874 (let ((l arg)
875 (current-list (copy-sequence coding-category-list)))
e8dd0160 876 ;; Check the validity of ARG while deleting coding categories in
d9e3229d
KH
877 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST
878 ;; contains all coding categories.
879 (while l
880 (if (or (null (get (car l) 'coding-category-index))
881 (null (memq (car l) current-list)))
882 (error "Invalid or duplicated element in argument: %s" arg))
883 (setq current-list (delq (car l) current-list))
884 (setq l (cdr l)))
4ed46869 885 ;; Update `coding-category-list' and return it.
2feaf204 886 (setq coding-category-list (append arg current-list))
07513d64 887 ;; Fixme: not defined.
2feaf204 888 (set-coding-priority-internal)))
07513d64 889(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
4ed46869 890
835cbadb
EZ
891;;; X selections
892
893(defvar non-standard-icccm-encodings-alist
894 '(("ISO8859-15" . latin-iso8859-15)
895 ("ISO8859-14" . latin-iso8859-14)
896 ("KOI8-R" . koi8-r)
897 ("BIG5-0" . big5))
898 "Alist of font charset names defined by XLFD, and the corresponding Emacs
899charsets or coding systems.")
900
901;; Functions to support "Non-Standard Character Set Encodings" defined
902;; by the ICCCM spec. We support that by converting the leading
903;; sequence of the ``extended segment'' to the corresponding ISO-2022
904;; sequences (if the leading sequence names an Emacs charset), or decode
905;; the segment (if it names a coding system). Encoding does the reverse.
906(defun ctext-post-read-conversion (len)
907 "Decode LEN characters encoded as Compound Text with Extended Segments."
908 (buffer-disable-undo) ; minimize consing due to insertions and deletions
909 (narrow-to-region (point) (+ (point) len))
910 (save-match-data
911 (let ((pt (point-marker))
912 (oldpt (point-marker))
913 (newpt (make-marker))
914 (modified-p (buffer-modified-p))
915 (case-fold-search nil)
916 last-coding-system-used
917 encoding textlen chset)
918 (while (re-search-forward
919 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
920 nil 'move)
921 (set-marker newpt (point))
922 (set-marker pt (match-beginning 0))
923 (setq encoding (match-string 3))
924 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
925 (- (aref (match-string 2) 1) 128))
926 (1+ (length encoding))))
927 (setq
928 chset (cdr (assoc-ignore-case encoding
929 non-standard-icccm-encodings-alist)))
930 (cond ((null chset)
931 ;; This charset is not supported--leave this extended
932 ;; segment unaltered and skip over it.
933 (goto-char (+ (point) textlen)))
934 ((charsetp chset)
935 ;; If it's a charset, replace the leading escape sequence
936 ;; with a standard ISO-2022 sequence. We will decode all
937 ;; such segments later, in one go, when we exit the loop
938 ;; or find an extended segment that names a coding
939 ;; system, not a charset.
940 (replace-match
941 (concat "\\1"
942 (if (= 0 (charset-iso-graphic-plane chset))
943 ;; GL charsets
944 (if (= 1 (charset-dimension chset)) "(" "$(")
945 ;; GR charsets
946 (if (= 96 (charset-chars chset))
947 "-"
948 (if (= 1 (charset-dimension chset)) ")" "$)")))
949 (string (charset-iso-final-char chset)))
950 t)
951 (goto-char (+ (point) textlen)))
952 ((coding-system-p chset)
953 ;; If it's a coding system, we need to decode the segment
954 ;; right away. But first, decode what we've skipped
955 ;; across until now.
956 (when (> pt oldpt)
957 (decode-coding-region oldpt pt 'ctext-no-compositions))
958 (delete-region pt newpt)
959 (set-marker newpt (+ newpt textlen))
960 (decode-coding-region pt newpt chset)
961 (goto-char newpt)
962 (set-marker oldpt newpt))))
963 ;; Decode what's left.
964 (when (> (point) oldpt)
965 (decode-coding-region oldpt (point) 'ctext-no-compositions))
966 ;; This buffer started as unibyte, because the string we get from
967 ;; the X selection is a unibyte string. We must now make it
968 ;; multibyte, so that the decoded text is inserted as multibyte
969 ;; into its buffer.
970 (set-buffer-multibyte t)
971 (set-buffer-modified-p modified-p)
972 (- (point-max) (point-min)))))
973
974(defvar non-standard-designations-alist
975 '(("$(0" . (big5 "big5-0" 2))
976 ("$(1" . (big5 "big5-0" 2))
977 ("-V" . (t "iso8859-10" 1))
978 ("-Y" . (t "iso8859-13" 1))
979 ("-_" . (t "iso8859-14" 1))
980 ("-b" . (t "iso8859-15" 1))
981 ("-f" . (t "iso8859-16" 1)))
982 "Alist of ctext control sequences that introduce character sets which
983are not in the list of approved ICCCM encodings, and the corresponding
984coding system, identifier string, and number of octets per encoded
985character.
986
987Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
988is the control sequence (sans the leading ESC) that introduces the character
989set in the text encoded by compound-text. ENCODING is a coding system
990symbol; if it is t, it means that the ctext coding system already encodes
991the text correctly, and only the leading control sequence needs to be altered.
992If ENCODING is a coding system, we need to re-encode the text with that
993coding system. CHARSET is the ICCCM name of the charset we need to put into
994the leading control sequence. NOCTETS is the number of octets (bytes) that
995encode each character in this charset. NOCTETS can be 0 (meaning the number
996of octets per character is variable), 1, 2, 3, or 4.")
997
998(defun ctext-pre-write-conversion (from to)
5dde3c71
EZ
999 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1000
1001If FROM is a string, or if the current buffer is not the one set up for us
1002by run_pre_post_conversion_on_str, generate a new temp buffer, insert the
1003text, and convert it in the temporary buffer. Otherwise, convert in-place."
1004 (cond ((and (string= (buffer-name) " *code-converting-work*")
1005 (not (stringp from)))
1006 ; Minimize consing due to subsequent insertions and deletions.
1007 (buffer-disable-undo)
1008 (narrow-to-region from to))
1009 (t
1010 (let ((buf (current-buffer)))
1011 (set-buffer (generate-new-buffer " *temp"))
1012 (buffer-disable-undo)
1013 (if (stringp from)
1014 (insert from)
1015 (insert-buffer-substring buf from to)))))
835cbadb
EZ
1016 (encode-coding-region from to 'ctext-no-compositions)
1017 ;; Replace ISO-2022 charset designations with extended segments, for
1018 ;; those charsets that are not part of the official X registry.
1019 (save-match-data
1020 (goto-char (point-min))
1021 (let ((newpt (make-marker))
1022 (case-fold-search nil)
1023 pt desig encode-info encoding chset noctets textlen)
1024 (set-buffer-multibyte nil)
5dde3c71
EZ
1025 ;; The regexp below finds the leading sequences for big5 and
1026 ;; iso8859-1[03-6] charsets.
835cbadb
EZ
1027 (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
1028 (setq desig (match-string 1)
1029 pt (point-marker)
1030 encode-info (cdr (assoc desig non-standard-designations-alist))
1031 encoding (car encode-info)
1032 chset (cadr encode-info)
1033 noctets (car (cddr encode-info)))
1034 (skip-chars-forward "^\e")
1035 (set-marker newpt (point))
1036 (cond
1037 ((eq encoding t) ; only the leading sequence needs to be changed
1038 (setq textlen (+ (- newpt pt) (length chset) 1))
5dde3c71 1039 ;; Generate the ICCCM control sequence for an extended segment.
835cbadb
EZ
1040 (replace-match (format "\e%%/%d%c%c%s\ 2"
1041 noctets
1042 (+ (/ textlen 128) 128)
1043 (+ (% textlen 128) 128)
1044 chset)
1045 t t))
1046 ((coding-system-p encoding) ; need to recode the entire segment...
1047 (set-marker pt (match-beginning 0))
1048 (decode-coding-region pt newpt 'ctext-no-compositions)
1049 (set-buffer-multibyte t)
1050 (encode-coding-region pt newpt encoding)
1051 (set-buffer-multibyte nil)
1052 (setq textlen (+ (- newpt pt) (length chset) 1))
1053 (goto-char pt)
1054 (insert (format "\e%%/%d%c%c%s\ 2"
1055 noctets
1056 (+ (/ textlen 128) 128)
1057 (+ (% textlen 128) 128)
1058 chset))))
1059 (goto-char newpt))))
1060 (set-buffer-multibyte t)
5dde3c71 1061 ;; Must return nil, as build_annotations_2 expects that.
835cbadb
EZ
1062 nil)
1063
c1841772
KH
1064(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0")
1065
4ed46869
KH
1066;;; FILE I/O
1067
e76938e7 1068(defcustom auto-coding-alist
0735296c 1069 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion)
4f16d1d1 1070 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion))
835f49b8
KH
1071 "Alist of filename patterns vs corresponding coding systems.
1072Each element looks like (REGEXP . CODING-SYSTEM).
558b0c86 1073A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
835f49b8 1074
7fed493a
RS
1075The settings in this alist take priority over `coding:' tags
1076in the file (see the function `set-auto-coding')
e76938e7
DL
1077and the contents of `file-coding-system-alist'."
1078 :group 'files
1079 :group 'mule
1080 :type '(repeat (cons (regexp :tag "File name regexp")
1081 (symbol :tag "Coding system"))))
835f49b8 1082
502522b2
GM
1083(defcustom auto-coding-regexp-alist
1084 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
1085 "Alist of patterns vs corresponding coding systems.
1086Each element looks like (REGEXP . CODING-SYSTEM).
1087A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1088
1089The settings in this alist take priority over `coding:' tags
1090in the file (see the function `set-auto-coding')
1091and the contents of `file-coding-system-alist'."
1092 :group 'files
1093 :group 'mule
1094 :type '(repeat (cons (regexp :tag "Regexp")
1095 (symbol :tag "Coding system"))))
1096
1c4cc63a
KH
1097(defvar set-auto-coding-for-load nil
1098 "Non-nil means look for `load-coding' property instead of `coding'.
1099This is used for loading and byte-compiling Emacs Lisp files.")
1100
8a592131
RS
1101(defun auto-coding-alist-lookup (filename)
1102 "Return the coding system specified by `auto-coding-alist' for FILENAME."
1103 (let ((alist auto-coding-alist)
ca128d75 1104 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos)))
8a592131
RS
1105 coding-system)
1106 (while (and alist (not coding-system))
1107 (if (string-match (car (car alist)) filename)
1108 (setq coding-system (cdr (car alist)))
1109 (setq alist (cdr alist))))
1110 coding-system))
1111
502522b2
GM
1112
1113(defun auto-coding-from-file-contents (size)
1114 "Determine a coding system from the contents of the current buffer.
1115The current buffer contains SIZE bytes starting at point.
1116Value is either a coding system or nil."
1117 (save-excursion
1118 (let ((alist auto-coding-regexp-alist)
1119 coding-system)
1120 (while (and alist (not coding-system))
1121 (let ((regexp (car (car alist))))
1122 (when (re-search-forward regexp (+ (point) size) t)
1123 (setq coding-system (cdr (car alist)))))
1124 (setq alist (cdr alist)))
1125 coding-system)))
1126
1127
835f49b8
KH
1128(defun set-auto-coding (filename size)
1129 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1c4cc63a
KH
1130These bytes should include at least the first 1k of the file
1131and the last 3k of the file, but the middle may be omitted.
63561304 1132
502522b2
GM
1133It checks FILENAME against the variable `auto-coding-alist'. If
1134FILENAME doesn't match any entries in the variable, it checks the
1135contents of the current buffer following point against
1136`auto-coding-regexp-alist'. If no match is found, it checks for a
1137`coding:' tag in the first one or two lines following point. If no
1138`coding:' tag is found, it checks for local variables list in the last
11393K bytes out of the SIZE bytes.
63561304
KH
1140
1141The return value is the specified coding system,
1142or nil if nothing specified.
87aba788 1143
ba74e833 1144The variable `set-auto-coding-function' (which see) is set to this
87aba788 1145function by default."
502522b2
GM
1146 (or (auto-coding-alist-lookup filename)
1147 (auto-coding-from-file-contents size)
1148 (let* ((case-fold-search t)
1149 (head-start (point))
1150 (head-end (+ head-start (min size 1024)))
1151 (tail-start (+ head-start (max (- size 3072) 0)))
1152 (tail-end (+ head-start size))
1153 coding-system head-found tail-found pos)
1154 ;; Try a short cut by searching for the string "coding:"
1155 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1156 (setq head-found (or (search-forward "coding:" head-end t)
1157 (search-forward "unibyte:" head-end t)))
1158 (if (and head-found (> head-found tail-start))
1159 ;; Head and tail are overlapped.
1160 (setq tail-found head-found)
1161 (goto-char tail-start)
1162 (setq tail-found (or (search-forward "coding:" tail-end t)
1163 (search-forward "unibyte:" tail-end t))))
1164
1165 ;; At first check the head.
1166 (when head-found
1167 (goto-char head-start)
6b66d028
RS
1168 (setq head-end (set-auto-mode-1))
1169 (setq head-start (point))
1d8e9a7c 1170 (when (and head-end (< head-found head-end))
835f49b8 1171 (goto-char head-start)
502522b2
GM
1172 (when (and set-auto-coding-for-load
1173 (re-search-forward
6b66d028 1174 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1175 head-end t))
1176 (setq coding-system 'raw-text))
1177 (when (and (not coding-system)
1178 (re-search-forward
6b66d028 1179 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
502522b2
GM
1180 head-end t))
1181 (setq coding-system (intern (match-string 2)))
1182 (or (coding-system-p coding-system)
1183 (setq coding-system nil)))))
1184
1185 ;; If no coding: tag in the head, check the tail.
1186 (when (and tail-found (not coding-system))
1187 (goto-char tail-start)
1188 (search-forward "\n\^L" nil t)
1189 (if (re-search-forward
1190 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1191 ;; The prefix is what comes before "local variables:" in its
1192 ;; line. The suffix is what comes after "local variables:"
1193 ;; in its line.
1194 (let* ((prefix (regexp-quote (match-string 1)))
1195 (suffix (regexp-quote (match-string 2)))
1196 (re-coding
1197 (concat
1198 "^" prefix
cfe98f50
GM
1199 ;; N.B. without the \n below, the regexp can
1200 ;; eat newlines.
1201 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1202 suffix "$"))
1203 (re-unibyte
1204 (concat
1205 "^" prefix
cfe98f50 1206 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*"
502522b2
GM
1207 suffix "$"))
1208 (re-end
cfe98f50 1209 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$"))
502522b2
GM
1210 (pos (point)))
1211 (re-search-forward re-end tail-end 'move)
1212 (setq tail-end (point))
1213 (goto-char pos)
1214 (when (and set-auto-coding-for-load
1215 (re-search-forward re-unibyte tail-end t))
1216 (setq coding-system 'raw-text))
1217 (when (and (not coding-system)
1218 (re-search-forward re-coding tail-end t))
1219 (setq coding-system (intern (match-string 1)))
1220 (or (coding-system-p coding-system)
1221 (setq coding-system nil))))))
1222 coding-system)))
63561304
KH
1223
1224(setq set-auto-coding-function 'set-auto-coding)
87aba788 1225
4ed46869 1226(defun after-insert-file-set-buffer-file-coding-system (inserted)
521d4010 1227 "Set `buffer-file-coding-system' of current buffer after text is inserted."
4ed46869
KH
1228 (if last-coding-system-used
1229 (let ((coding-system
1230 (find-new-buffer-file-coding-system last-coding-system-used))
1231 (modified-p (buffer-modified-p)))
0269ddfb 1232 (when coding-system
71983219 1233 (set-buffer-file-coding-system coding-system t)
d0c26c63 1234 (if (and enable-multibyte-characters
c1841772 1235 (or (eq (coding-system-type coding-system) 'raw-text))
136e48e4
KH
1236 ;; If buffer was unmodified and the size is the
1237 ;; same as INSERTED, we must be visiting it.
1238 (not modified-p)
1239 (= (buffer-size) inserted))
ccb77b4e
RS
1240 ;; For coding systems no-conversion and raw-text...,
1241 ;; edit the buffer as unibyte.
d0c26c63
KH
1242 (let ((pos-byte (position-bytes (+ (point) inserted))))
1243 (set-buffer-multibyte nil)
1244 (setq inserted (- pos-byte (position-bytes (point))))))
0269ddfb 1245 (set-buffer-modified-p modified-p))))
d0c26c63 1246 inserted)
4ed46869 1247
84c9d215
KH
1248(add-hook 'after-insert-file-functions
1249 'after-insert-file-set-buffer-file-coding-system)
4ed46869 1250
8057896b 1251;; The coding-spec and eol-type of coding-system returned is decided
4ed46869
KH
1252;; independently in the following order.
1253;; 1. That of buffer-file-coding-system locally bound.
1254;; 2. That of CODING.
1255
1256(defun find-new-buffer-file-coding-system (coding)
1257 "Return a coding system for a buffer when a file of CODING is inserted.
a73a8c89
KH
1258The local variable `buffer-file-coding-system' of the current buffer
1259is set to the returned value.
509064c5 1260Return nil if there's no need to set `buffer-file-coding-system'."
4ed46869 1261 (let (local-coding local-eol
b685f8d6 1262 found-coding found-eol
4ed46869
KH
1263 new-coding new-eol)
1264 (if (null coding)
1265 ;; Nothing found about coding.
1266 nil
1267
b685f8d6
RS
1268 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
1269 ;; and LOCAL-CODING.
1270 (setq local-eol (coding-system-eol-type buffer-file-coding-system))
1271 (if (null (numberp local-eol))
1272 ;; But eol-type is not yet set.
1273 (setq local-eol nil))
0269ddfb 1274 (if (and buffer-file-coding-system
c1841772
KH
1275 (not (eq (coding-system-type buffer-file-coding-system)
1276 'undecided)))
0269ddfb 1277 (setq local-coding (coding-system-base buffer-file-coding-system)))
b685f8d6
RS
1278
1279 (if (and (local-variable-p 'buffer-file-coding-system)
1280 local-eol local-coding)
4ed46869
KH
1281 ;; The current buffer has already set full coding-system, we
1282 ;; had better not change it.
1283 nil
1284
8057896b 1285 (setq found-eol (coding-system-eol-type coding))
4ed46869 1286 (if (null (numberp found-eol))
be02cd54
EZ
1287 ;; But eol-type is not found.
1288 ;; If EOL conversions are inhibited, force unix eol-type.
1289 (setq found-eol (if inhibit-eol-conversion 0)))
c1841772 1290 (setq found-coding (coding-system-base coding))
c76b5c99
KH
1291
1292 (if (and (not found-eol) (eq found-coding 'undecided))
1293 ;; No valid coding information found.
1294 nil
1295
1296 ;; Some coding information (eol or text) found.
1297
1298 ;; The local setting takes precedence over the found one.
1299 (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
1300 (or local-coding found-coding)
1301 (or found-coding local-coding)))
1302 (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
1303 (or local-eol found-eol)
1304 (or found-eol local-eol)))
1305
1306 (let ((eol-type (coding-system-eol-type new-coding)))
1307 (if (and (numberp new-eol) (vectorp eol-type))
1308 (aref eol-type new-eol)
1309 new-coding)))))))
4ed46869 1310
fe831d33
GV
1311(defun modify-coding-system-alist (target-type regexp coding-system)
1312 "Modify one of look up tables for finding a coding system on I/O operation.
8c453b46
RS
1313There are three of such tables, `file-coding-system-alist',
1314`process-coding-system-alist', and `network-coding-system-alist'.
fe831d33
GV
1315
1316TARGET-TYPE specifies which of them to modify.
8c453b46
RS
1317If it is `file', it affects `file-coding-system-alist' (which see).
1318If it is `process', it affects `process-coding-system-alist' (which see).
e8dd0160 1319If it is `network', it affects `network-coding-system-alist' (which see).
fe831d33
GV
1320
1321REGEXP is a regular expression matching a target of I/O operation.
1322The target is a file name if TARGET-TYPE is `file', a program name if
1323TARGET-TYPE is `process', or a network service name or a port number
1324to connect to if TARGET-TYPE is `network'.
1325
1326CODING-SYSTEM is a coding system to perform code conversion on the I/O
8c453b46
RS
1327operation, or a cons cell (DECODING . ENCODING) specifying the coding systems
1328for decoding and encoding respectively,
1329or a function symbol which, when called, returns such a cons cell."
fe831d33
GV
1330 (or (memq target-type '(file process network))
1331 (error "Invalid target type: %s" target-type))
1332 (or (stringp regexp)
1333 (and (eq target-type 'network) (integerp regexp))
1334 (error "Invalid regular expression: %s" regexp))
1335 (if (symbolp coding-system)
1336 (if (not (fboundp coding-system))
1337 (progn
1338 (check-coding-system coding-system)
1339 (setq coding-system (cons coding-system coding-system))))
1340 (check-coding-system (car coding-system))
1341 (check-coding-system (cdr coding-system)))
1342 (cond ((eq target-type 'file)
1343 (let ((slot (assoc regexp file-coding-system-alist)))
1344 (if slot
1345 (setcdr slot coding-system)
1346 (setq file-coding-system-alist
1347 (cons (cons regexp coding-system)
1348 file-coding-system-alist)))))
1349 ((eq target-type 'process)
1350 (let ((slot (assoc regexp process-coding-system-alist)))
1351 (if slot
1352 (setcdr slot coding-system)
1353 (setq process-coding-system-alist
1354 (cons (cons regexp coding-system)
1355 process-coding-system-alist)))))
1356 (t
1357 (let ((slot (assoc regexp network-coding-system-alist)))
1358 (if slot
1359 (setcdr slot coding-system)
1360 (setq network-coding-system-alist
1361 (cons (cons regexp coding-system)
1362 network-coding-system-alist)))))))
1363
b25eef20 1364(defun make-translation-table (&rest args)
a284eea3 1365 "Make a translation table from arguments.
d38b07f9 1366A translation table is a char table intended for character
a284eea3
DL
1367translation in CCL programs.
1368
d38b07f9 1369Each argument is a list of elements of the form (FROM . TO), where FROM
a284eea3 1370is a character to be translated to TO.
13d5617d 1371
452fdb31 1372FROM can be a generic character (see `make-char'). In this case, TO is
d38b07f9 1373a generic character containing the same number of characters, or an
452fdb31 1374ordinary character. If FROM and TO are both generic characters, all
b25eef20 1375characters belonging to FROM are translated to characters belonging to TO
4e003d37
KH
1376without changing their position code(s).
1377
1378The arguments and forms in each argument are processed in the given
1379order, and if a previous form already translates TO to some other
1380character, say TO-ALT, FROM is also translated to TO-ALT."
f967223b 1381 (let ((table (make-char-table 'translation-table))
a73a8c89
KH
1382 revlist)
1383 (while args
1384 (let ((elts (car args)))
1385 (while elts
13d5617d
KH
1386 (let* ((from (car (car elts)))
1387 (from-i 0) ; degree of freedom of FROM
1388 (from-rev (nreverse (split-char from)))
1389 (to (cdr (car elts)))
1390 (to-i 0) ; degree of freedom of TO
1391 (to-rev (nreverse (split-char to))))
1392 ;; Check numbers of heading 0s in FROM-REV and TO-REV.
1393 (while (eq (car from-rev) 0)
1394 (setq from-i (1+ from-i) from-rev (cdr from-rev)))
1395 (while (eq (car to-rev) 0)
1396 (setq to-i (1+ to-i) to-rev (cdr to-rev)))
1397 (if (and (/= from-i to-i) (/= to-i 0))
1398 (error "Invalid character pair (%d . %d)" from to))
b25eef20
KH
1399 ;; If we have already translated TO to TO-ALT, FROM should
1400 ;; also be translated to TO-ALT. But, this is only if TO
1401 ;; is a generic character or TO-ALT is not a generic
13d5617d
KH
1402 ;; character.
1403 (let ((to-alt (aref table to)))
1404 (if (and to-alt
1405 (or (> to-i 0) (not (generic-char-p to-alt))))
1406 (setq to to-alt)))
1407 (if (> from-i 0)
1408 (set-char-table-default table from to)
1409 (aset table from to))
b25eef20
KH
1410 ;; If we have already translated some chars to FROM, they
1411 ;; should also be translated to TO.
a73a8c89
KH
1412 (let ((l (assq from revlist)))
1413 (if l
1414 (let ((ch (car l)))
1415 (setcar l to)
1416 (setq l (cdr l))
1417 (while l
1418 (aset table ch to)
1419 (setq l (cdr l)) ))))
1420 ;; Now update REVLIST.
1421 (let ((l (assq to revlist)))
1422 (if l
1423 (setcdr l (cons from (cdr l)))
1424 (setq revlist (cons (list to from) revlist)))))
1425 (setq elts (cdr elts))))
1426 (setq args (cdr args)))
1427 ;; Return TABLE just created.
1428 table))
1429
c76b5c99
KH
1430(defun make-translation-table-from-vector (vec)
1431 "Make translation table from decoding vector VEC.
9e3b6057
DL
1432VEC is an array of 256 elements to map unibyte codes to multibyte
1433characters. Elements may be nil for undefined code points.
c76b5c99
KH
1434See also the variable `nonascii-translation-table'."
1435 (let ((table (make-char-table 'translation-table))
1436 (rev-table (make-char-table 'translation-table))
c76b5c99 1437 ch)
9e3b6057 1438 (dotimes (i 256)
c76b5c99 1439 (setq ch (aref vec i))
9e3b6057
DL
1440 (when ch
1441 (aset table i ch)
1442 (if (>= ch 256)
1443 (aset rev-table ch i))))
c76b5c99
KH
1444 (set-char-table-extra-slot table 0 rev-table)
1445 table))
1446
f967223b 1447(defun define-translation-table (symbol &rest args)
a284eea3
DL
1448 "Define SYMBOL as the name of translation table made by ARGS.
1449This sets up information so that the table can be used for
1450translations in a CCL program.
b25eef20 1451
a284eea3
DL
1452If the first element of ARGS is a char-table whose purpose is
1453`translation-table', just define SYMBOL to name it. (Note that this
1454function does not bind SYMBOL.)
007c79c8 1455
a284eea3 1456Any other ARGS should be suitable as arguments of the function
007c79c8 1457`make-translation-table' (which see).
b25eef20 1458
452fdb31 1459This function sets properties `translation-table' and
521d4010
DL
1460`translation-table-id' of SYMBOL to the created table itself and the
1461identification number of the table respectively. It also registers
1462the table in `translation-table-vector'."
007c79c8
KH
1463 (let ((table (if (and (char-table-p (car args))
1464 (eq (char-table-subtype (car args))
1465 'translation-table))
1466 (car args)
1467 (apply 'make-translation-table args)))
f967223b 1468 (len (length translation-table-vector))
d9e3229d 1469 (id 0)
b25eef20 1470 (done nil))
f967223b 1471 (put symbol 'translation-table table)
b25eef20
KH
1472 (while (not done)
1473 (if (>= id len)
f967223b
KH
1474 (setq translation-table-vector
1475 (vconcat translation-table-vector (make-vector len nil))))
1476 (let ((slot (aref translation-table-vector id)))
b25eef20
KH
1477 (if (or (not slot)
1478 (eq (car slot) symbol))
1479 (progn
f967223b 1480 (aset translation-table-vector id (cons symbol table))
007c79c8
KH
1481 (setq done t))
1482 (setq id (1+ id)))))
f967223b 1483 (put symbol 'translation-table-id id)
d9e3229d
KH
1484 id))
1485
35554641
KH
1486(put 'with-category-table 'lisp-indent-function 1)
1487
1488(defmacro with-category-table (category-table &rest body)
07513d64 1489 "Execute BODY like `progn' with CATEGORY-TABLE the current category table."
35554641
KH
1490 `(let ((current-category-table (category-table)))
1491 (set-category-table ,category-table)
1492 (unwind-protect
1493 (progn ,@body)
1494 (set-category-table current-category-table))))
1495
69eba008
KH
1496;;; Initialize some variables.
1497
1498(put 'use-default-ascent 'char-table-extra-slots 0)
1499(setq use-default-ascent (make-char-table 'use-default-ascent))
d6d6d592
KH
1500(put 'ignore-relative-composition 'char-table-extra-slots 0)
1501(setq ignore-relative-composition
1502 (make-char-table 'ignore-relative-composition))
69eba008
KH
1503
1504;;;
4ed46869
KH
1505(provide 'mule)
1506
1507;;; mule.el ends here