(private-char-area-1-min)
[bpt/emacs.git] / lisp / international / mule.el
1 ;;; mule.el --- basic commands for multilingual environment
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 ;; Copyright (C) 2003
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
9
10 ;; Keywords: mule, multilingual, character set, coding system
11
12 ;; This file is part of GNU Emacs.
13
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; any later version.
18
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
23
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
28
29 ;;; Commentary:
30
31 ;;; Code:
32
33 (defconst mule-version "6.0 (HANACHIRUSATO)" "\
34 Version number and name of this version of MULE (multilingual environment).")
35
36 (defconst mule-version-date "2003.9.1" "\
37 Distribution date of this version of MULE (multilingual environment).")
38
39 \f
40 ;;; CHARSET
41
42 ;; Backward compatibility code for handling emacs-mule charsets.
43 (defvar private-char-area-1-min #xF0000)
44 (defvar private-char-area-1-max #xFFFFE)
45 (defvar private-char-area-2-min #x100000)
46 (defvar private-char-area-2-max #x10FFFE)
47
48 ;; Table of emacs-mule charsets indexed by their emacs-mule ID.
49 (defvar emacs-mule-charset-table (make-vector 256 nil))
50 (aset emacs-mule-charset-table 0 'ascii)
51
52 ;; Convert the argument of old-style calll of define-charset to a
53 ;; property list used by the new-style.
54 ;; INFO-VECTOR is a vector of the format:
55 ;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
56 ;; SHORT-NAME LONG-NAME DESCRIPTION]
57
58 (defun convert-define-charset-argument (emacs-mule-id info-vector)
59 (let* ((dim (aref info-vector 0))
60 (chars (aref info-vector 1))
61 (total (if (= dim 1) chars (* chars chars)))
62 (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
63 (if (= chars 96) [32 127 32 127] [33 126 33 126])))
64 code-offset)
65 (if (integerp emacs-mule-id)
66 (or (= emacs-mule-id 0)
67 (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
68 (error "Invalid CHARSET-ID: %d" emacs-mule-id))
69 (let (from-id to-id)
70 (if (= dim 1) (setq from-id 160 to-id 224)
71 (setq from-id 224 to-id 255))
72 (while (and (< from-id to-id)
73 (not (aref emacs-mule-charset-table from-id)))
74 (setq from-id (1+ from-id)))
75 (if (= from-id to-id)
76 (error "No more room for the new Emacs-mule charset"))
77 (setq emacs-mule-id from-id)))
78 (if (> (- private-char-area-1-max private-char-area-1-min) total)
79 (setq code-offset private-char-area-1-min
80 private-char-area-1-min (+ private-char-area-1-min total))
81 (if (> (- private-char-area-2-max private-char-area-2-min) total)
82 (setq code-offset private-char-area-2-min
83 private-char-area-2-min (+ private-char-area-2-min total))
84 (error "No more space for a new charset.")))
85 (list :dimension dim
86 :code-space code-space
87 :iso-final-char (aref info-vector 4)
88 :code-offset code-offset
89 :emacs-mule-id emacs-mule-id)))
90
91 (defun define-charset (name docstring &rest props)
92 "Define NAME (symbol) as a charset with DOCSTRING.
93 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
94 may be any symbol. The following have special meanings, and one of
95 `:code-offset', `:map', `:subset', `:superset' must be specified.
96
97 `:short-name'
98
99 VALUE must be a short string to identify the charset. If omitted,
100 NAME is used.
101
102 `:long-name'
103
104 VALUE must be a string longer than `:short-name' to identify the
105 charset. If omitted, the value of the `:short-name' attribute is used.
106
107 `:dimension'
108
109 VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
110 code-points of the charsets. If omitted, it is calculated from the
111 value of the `:code-space' attribute.
112
113 `:code-space'
114
115 VALUE must be a vector of length at most 8 specifying the byte code
116 range of each dimension in this format:
117 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
118 where MIN-N is the minimum byte value of Nth dimension of code-point,
119 MAX-N is the maximum byte value of that.
120
121 `:min-code'
122
123 VALUE must be an integer specifying the mininum code point of the
124 charset. If omitted, it is calculated from `:code-space'. VALUE may
125 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
126 the code point and LOW is the least significant 16 bits.
127
128 `:max-code'
129
130 VALUE must be an integer specifying the maxinum code point of the
131 charset. If omitted, it is calculated from `:code-space'. VALUE may
132 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
133 the code point and LOW is the least significant 16 bits.
134
135 `:iso-final-char'
136
137 VALUE must be a character in the range 32 to 127 (inclusive)
138 specifying the final char of the charset for ISO-2022 encoding. If
139 omitted, the charset can't be encoded by ISO-2022 based
140 coding-systems.
141
142 `:iso-revision-number'
143
144 VALUE must be an integer in the range 0..63, specifying the revision
145 number of the charset for ISO-2022 encoding.
146
147 `:emacs-mule-id'
148
149 VALUE must be an integer of 0, 129..255. If omitted, the charset
150 can't be encoded by coding-systems of type `emacs-mule'.
151
152 `:ascii-compatible-p'
153
154 VALUE must be nil or t (default nil). If VALUE is t, the charset is
155 compatible with ASCII, i.e. the first 128 code points map to ASCII.
156
157 `:supplementary-p'
158
159 VALUE must be nil or t. If the VALUE is t, the charset is
160 supplementary, which means it is used only as a parent of some other
161 charset.
162
163 `:invalid-code'
164
165 VALUE must be a nonnegative integer that can be used as an invalid
166 code point of the charset. If the minimum code is 0 and the maximum
167 code is greater than Emacs' maximum integer value, `:invalid-code'
168 should not be omitted.
169
170 `:code-offset'
171
172 VALUE must be an integer added to the index number of a character to
173 get the corresponding character code.
174
175 `:map'
176
177 VALUE must be vector or string.
178
179 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
180 where CODE-n is a code-point of the charset, and CHAR-n is the
181 corresponding character code.
182
183 If it is a string, it is a name of file that contains the above
184 information. Each line of the file must be this format:
185 0xXXX 0xYYY
186 where XXX is a hexadecimal representation of CODE-n and YYY is a
187 hexadecimal representation of CHAR-n. A line starting with `#' is a
188 comment line.
189
190 `:subset'
191
192 VALUE must be a list:
193 ( PARENT MIN-CODE MAX-CODE OFFSET )
194 PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
195 of characters inherited from the parent. OFFSET is an integer value
196 to add to a code point of the parent charset to get the corresponding
197 code point of this charset.
198
199 `:superset'
200
201 VALUE must be a list of parent charsets. The charset inherits
202 characters from them. Each element of the list may be a cons (PARENT
203 . OFFSET), where PARENT is a parent charset, and OFFSET is an offset
204 value to add to a code point of PARENT to get the corresponding code
205 point of this charset.
206
207 `:unify-map'
208
209 VALUE must be vector or string.
210
211 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
212 where CODE-n is a code-point of the charset, and CHAR-n is the
213 corresponding Unicode character code.
214
215 If it is a string, it is a name of file that contains the above
216 information. The file format is the same as what described for `:map'
217 attribute."
218 (when (vectorp (car props))
219 ;; Old style code:
220 ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
221 ;; Convert the argument to make it fit with the current style.
222 (let ((vec (car props)))
223 (setq props (convert-define-charset-argument name vec)
224 name docstring
225 docstring (aref vec 8))))
226 (let ((attrs (mapcar 'list '(:dimension
227 :code-space
228 :min-code
229 :max-code
230 :iso-final-char
231 :iso-revision-number
232 :emacs-mule-id
233 :ascii-compatible-p
234 :supplementary-p
235 :invalid-code
236 :code-offset
237 :map
238 :subset
239 :superset
240 :unify-map
241 :plist))))
242
243 ;; If :dimension is omitted, get the dimension from :code-space.
244 (let ((dimension (plist-get props :dimension)))
245 (or dimension
246 (progn
247 (setq dimension (/ (length (plist-get props :code-space)) 2))
248 (setq props (plist-put props :dimension dimension)))))
249
250 ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
251 (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
252 (if (integerp emacs-mule-id)
253 (aset emacs-mule-charset-table emacs-mule-id name)))
254
255 (dolist (slot attrs)
256 (setcdr slot (plist-get props (car slot))))
257
258 ;; Make sure that the value of :code-space is a vector of 8
259 ;; elements.
260 (let* ((slot (assq :code-space attrs))
261 (val (cdr slot))
262 (len (length val)))
263 (if (< len 8)
264 (setcdr slot
265 (vconcat val (make-vector (- 8 len) 0)))))
266
267 ;; Add :name and :docstring properties to PROPS.
268 (setq props
269 (cons :name (cons name (cons :docstring (cons docstring props)))))
270 (or (plist-get props :short-name)
271 (plist-put props :short-name (symbol-name name)))
272 (or (plist-get props :long-name)
273 (plist-put props :long-name (plist-get props :short-name)))
274 ;; We can probably get a worthwhile amount in purespace.
275 (setq props
276 (mapcar (lambda (elt)
277 (if (stringp elt)
278 (purecopy elt)
279 elt))
280 props))
281 (setcdr (assq :plist attrs) props)
282
283 (apply 'define-charset-internal name (mapcar 'cdr attrs))))
284
285
286 (defun load-with-code-conversion (fullname file &optional noerror nomessage)
287 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
288 The file contents are decoded before evaluation if necessary.
289 If optional second arg NOERROR is non-nil,
290 report no error if FILE doesn't exist.
291 Print messages at start and end of loading unless
292 optional third arg NOMESSAGE is non-nil.
293 Return t if file exists."
294 (if (null (file-readable-p fullname))
295 (and (null noerror)
296 (signal 'file-error (list "Cannot open load file" file)))
297 ;; Read file with code conversion, and then eval.
298 (let* ((buffer
299 ;; To avoid any autoloading, set default-major-mode to
300 ;; fundamental-mode.
301 ;; So that we don't get completely screwed if the
302 ;; file is encoded in some complicated character set,
303 ;; read it with real decoding, as a multibyte buffer,
304 ;; even if this is a --unibyte Emacs session.
305 (let ((default-major-mode 'fundamental-mode)
306 (default-enable-multibyte-characters t))
307 ;; We can't use `generate-new-buffer' because files.el
308 ;; is not yet loaded.
309 (get-buffer-create (generate-new-buffer-name " *load*"))))
310 (load-in-progress t)
311 (source (save-match-data (string-match "\\.el\\'" fullname))))
312 (unless nomessage
313 (if source
314 (message "Loading %s (source)..." file)
315 (message "Loading %s..." file)))
316 (when purify-flag
317 (push file preloaded-file-list))
318 (unwind-protect
319 (let ((load-file-name fullname)
320 (set-auto-coding-for-load t)
321 (inhibit-file-name-operation nil))
322 (save-excursion
323 (set-buffer buffer)
324 (insert-file-contents fullname)
325 ;; If the loaded file was inserted with no-conversion or
326 ;; raw-text coding system, make the buffer unibyte.
327 ;; Otherwise, eval-buffer might try to interpret random
328 ;; binary junk as multibyte characters.
329 (if (and enable-multibyte-characters
330 (or (eq (coding-system-type last-coding-system-used)
331 'raw-text)))
332 (set-buffer-multibyte nil))
333 ;; Make `kill-buffer' quiet.
334 (set-buffer-modified-p nil))
335 ;; Have the original buffer current while we eval.
336 (eval-buffer buffer nil file
337 ;; If this Emacs is running with --unibyte,
338 ;; convert multibyte strings to unibyte
339 ;; after reading them.
340 ;; (not default-enable-multibyte-characters)
341 nil t
342 ))
343 (let (kill-buffer-hook kill-buffer-query-functions)
344 (kill-buffer buffer)))
345 (let ((hook (assoc file after-load-alist)))
346 (when hook
347 (mapcar (function eval) (cdr hook))))
348 (unless (or nomessage noninteractive)
349 (if source
350 (message "Loading %s (source)...done" file)
351 (message "Loading %s...done" file)))
352 t)))
353
354 (defun charset-info (charset)
355 "Return a vector of information of CHARSET.
356 This function is provided for backward compatibility.
357
358 The elements of the vector are:
359 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
360 LEADING-CODE-BASE, LEADING-CODE-EXT,
361 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
362 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
363 PLIST.
364 where
365 CHARSET-ID is always 0.
366 BYTES is always 0.
367 DIMENSION is the number of bytes of a code-point of the charset:
368 1, 2, 3, or 4.
369 CHARS is the number of characters in a dimension:
370 94, 96, 128, or 256.
371 WIDTH is always 0.
372 DIRECTION is always 0.
373 LEADING-CODE-BASE is always 0.
374 LEADING-CODE-EXT is always 0.
375 ISO-FINAL-CHAR (character) is the final character of the
376 corresponding ISO 2022 charset. If the charset is not assigned
377 any final character, the value is -1.
378 ISO-GRAPHIC-PLANE is always 0.
379 REVERSE-CHARSET is always -1.
380 SHORT-NAME (string) is the short name to refer to the charset.
381 LONG-NAME (string) is the long name to refer to the charset
382 DESCRIPTION (string) is the description string of the charset.
383 PLIST (property list) may contain any type of information a user
384 want to put and get by functions `put-charset-property' and
385 `get-charset-property' respectively."
386 (vector 0
387 0
388 (charset-dimension charset)
389 (charset-chars charset)
390 0
391 0
392 0
393 0
394 (charset-iso-final-char charset)
395 0
396 -1
397 (get-charset-property charset :short-name)
398 (get-charset-property charset :short-name)
399 (charset-description charset)
400 (charset-plist charset)))
401
402 ;; It is better not to use backquote in this file,
403 ;; because that makes a bootstrapping problem
404 ;; if you need to recompile all the Lisp files using interpreted code.
405
406 (defun charset-id (charset)
407 "Always return 0. This is provided for backward compatibility."
408 0)
409
410 (defmacro charset-bytes (charset)
411 "Always return 0. This is provided for backward compatibility."
412 0)
413
414 (defun get-charset-property (charset propname)
415 "Return the value of CHARSET's PROPNAME property.
416 This is the last value stored with
417 (put-charset-property CHARSET PROPNAME VALUE)."
418 (plist-get (charset-plist charset) propname))
419
420 (defun put-charset-property (charset propname value)
421 "Set CHARSETS's PROPNAME property to value VALUE.
422 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
423 (set-charset-plist charset
424 (plist-put (charset-plist charset) propname value)))
425
426 (defun charset-description (charset)
427 "Return description string of CHARSET."
428 (plist-get (charset-plist charset) :docstring))
429
430 (defun charset-dimension (charset)
431 "Return dimension of CHARSET."
432 (plist-get (charset-plist charset) :dimension))
433
434 (defun charset-chars (charset &optional dimension)
435 "Return number of characters contained in DIMENSION of CHARSET.
436 DIMENSION defaults to the first dimension."
437 (unless dimension (setq dimension 1))
438 (let ((code-space (plist-get (charset-plist charset) :code-space)))
439 (1+ (- (aref code-space (1- (* 2 dimension)))
440 (aref code-space (- (* 2 dimension) 2))))))
441
442 (defun charset-iso-final-char (charset)
443 "Return ISO-2022 final character of CHARSET.
444 Return -1 if charset isn't an ISO 2022 one."
445 (or (plist-get (charset-plist charset) :iso-final-char)
446 -1))
447
448 (defmacro charset-short-name (charset)
449 "Return short name of CHARSET."
450 (plist-get (charset-plist charset) :short-name))
451
452 (defmacro charset-long-name (charset)
453 "Return long name of CHARSET."
454 (plist-get (charset-plist charset) :long-name))
455
456 (defun charset-list ()
457 "Return list of all charsets ever defined.
458
459 This function is provided for backward compatibility.
460 Now we have the variable `charset-list'."
461 charset-list)
462 (make-obsolete 'charset-list "Use variable `charset-list'" "22.1")
463
464 \f
465 ;;; CHARACTER
466 (defalias 'char-valid-p 'characterp)
467 (make-obsolete 'char-valid-p 'characterp "22.1")
468
469 (defun generic-char-p (char)
470 "Always return nil. This is provided for backward compatibility."
471 nil)
472 (make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1")
473
474 (defun make-char-internal (charset-id &optional code1 code2)
475 (let ((charset (aref emacs-mule-charset-table charset-id)))
476 (or charset
477 (error "Invalid Emacs-mule charset ID: %d" charset-id))
478 (make-char charset code1 code2)))
479 \f
480 ;; Coding system stuff
481
482 ;; Coding system is a symbol that has been defined by the function
483 ;; `define-coding-system'.
484
485 (defconst coding-system-iso-2022-flags
486 '(long-form
487 ascii-at-eol
488 ascii-at-cntl
489 7-bit
490 locking-shift
491 single-shift
492 designation
493 revision
494 direction
495 init-at-bol
496 designate-at-bol
497 safe
498 latin-extra
499 composition
500 euc-tw-shift
501 use-roman
502 use-oldjis)
503 "List of symbols that control ISO-2022 encoder/decoder.
504
505 The value of the `:flags' attribute in the argument of the function
506 `define-coding-system' must be one of them.
507
508 If `long-form' is specified, use a long designation sequence on
509 encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
510 and `japanese-jisx0208'. The long designation sequence doesn't
511 conform to ISO 2022, but is used by such coding systems as
512 `compound-text'.
513
514 If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
515 on encoding.
516
517 If `ascii-at-cntl' is specified, designate ASCII to g0 before control
518 codes and SPC on encoding.
519
520 If `7-bit' is specified, use 7-bit code only on encoding.
521
522 If `locking-shift' is specified, decode locking-shift code correctly
523 on decoding, and use locking-shift to invoke a graphic element on
524 encoding.
525
526 If `single-shift' is specified, decode single-shift code correctly on
527 decoding, and use single-shift to invoke a graphic element on encoding.
528
529 If `designation' is specified, decode designation code correctly on
530 decoding, and use designation to designate a charset to a graphic
531 element on encoding.
532
533 If `revision' is specified, produce an escape sequence to specify
534 revision number of a charset on encoding. Such an escape sequence is
535 always correctly decoded on decoding.
536
537 If `direction' is specified, decode ISO6429's code for specifying
538 direction correctly, and produce the code on encoding.
539
540 If `init-at-bol' is specified, on encoding, it is assumed that
541 invocation and designation statuses are reset at each beginning of
542 line even if `ascii-at-eol' is not specified; thus no codes for
543 resetting them are produced.
544
545 If `safe' is specified, on encoding, characters not supported by a
546 coding are replaced with `?'.
547
548 If `latin-extra' is specified, the code-detection routine assumes that a
549 code specified in `latin-extra-code-table' (which see) is valid.
550
551 If `composition' is specified, an escape sequence to specify
552 composition sequence is correctly decoded on decoding, and is produced
553 on encoding.
554
555 If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
556 correctly decoded on decoding, and is produced on encoding.
557
558 If `use-roman' is specified, JIS0201-1976-Roman is designated instead
559 of ASCII.
560
561 If `use-oldjis' is specified, JIS0208-1976 is designated instead of
562 JIS0208-1983.")
563
564 (defun define-coding-system (name docstring &rest props)
565 "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
566 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
567 may be any symbol.
568
569 The following attributes have special meanings. Those labeled as
570 \"(required)\", should not be omitted.
571
572 `:mnemonic' (required)
573
574 VALUE is a character to display on mode line for the coding system.
575
576 `:coding-type' (required)
577
578 VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
579 `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
580
581 `:eol-type'
582
583 VALUE is the EOL (end-of-line) format of the coding system. It must be
584 one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
585 \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
586 and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
587 decoding by the coding system, Emacs automatically detects the EOL
588 format of the source text.
589
590 `:charset-list'
591
592 VALUE must be a list of charsets supported by the coding system. On
593 encoding by the coding system, if a character belongs to multiple
594 charsets in the list, a charset that comes earlier in the list is
595 selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
596 which indicates that the coding system supports all ISO-2022 based
597 charsets. If `:coding-type' is `emacs-mule', VALUE may be
598 `emacs-mule', which indicates that the coding system supports all
599 charsets that have the `:emacs-mule-id' property.
600
601 `:ascii-compatible-p'
602
603 If VALUE is non-nil, the coding system decodes all 7-bit bytes into
604 the corresponding ASCII characters, and encodes all ASCII characters
605 back to the corresponding 7-bit bytes. VALUE defaults to nil.
606
607 `:decode-translation-table'
608
609 VALUE must be a translation table to use on decoding.
610
611 `:encode-translation-table'
612
613 VALUE must be a translation table to use on encoding.
614
615 `:post-read-conversion'
616
617 VALUE must be a function to call after some text is inserted and
618 decoded by the coding system itself and before any functions in
619 `after-insert-functions' are called. The arguments to this function
620 are the same as those of a function in `after-insert-file-functions',
621 i.e. LENGTH of the text to be decoded with point at the head of it,
622 and the function should leave point unchanged.
623
624 `:pre-write-conversion'
625
626 VALUE must be a function to call after all functions in
627 `write-region-annotate-functions' and `buffer-file-format' are called,
628 and before the text is encoded by the coding system itself. The
629 arguments to this function are the same as those of a function in
630 `write-region-annotate-functions'.
631
632 `:default-char'
633
634 VALUE must be a character. On encoding, a character not supported by
635 the coding system is replaced with VALUE.
636
637 `:for-unibyte'
638
639 VALUE non-nil means that visiting a file with the coding system
640 results in a unibyte buffer.
641
642 `:eol-type'
643
644 VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
645 EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
646 EOL (CR). If omitted, on decoding, the coding system detects EOL
647 format automatically, and on encoding, uses Unix-like EOL.
648
649 `:mime-charset'
650
651 VALUE must be a symbol whose name is that of a MIME charset converted
652 to lower case.
653
654 `:mime-text-unsuitable'
655
656 VALUE non-nil means the `:mime-charset' property names a charset which
657 is unsuitable for the top-level media type \"text\".
658
659 `:flags'
660
661 VALUE must be a list of symbols that control the ISO-2022 converter.
662 Each must be a member of the list `coding-system-iso-2022-flags'
663 \(which see). This attribute has a meaning only when `:coding-type'
664 is `iso-2022'.
665
666 `:designation'
667
668 VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
669 GN-USAGE specifies the usage of graphic register GN as follows.
670
671 If it is nil, no charset can be designated to GN.
672
673 If it is a charset, the charset is initially designated to GN, and
674 never used by the other charsets.
675
676 If it is a list, the elements must be charsets, nil, 94, or 96. GN
677 can be used by all the listed charsets. If the list contains 94, any
678 iso-2022 charset whose code-space ranges are 94 long can be designated
679 to GN. If the list contains 96, any charsets whose whose ranges are
680 96 long can be designated to GN. If the first element is a charset,
681 that charset is initially designated to GN.
682
683 This attribute has a meaning only when `:coding-type' is `iso-2022'.
684
685 `:bom'
686
687 This attributes specifies whether the coding system uses a `byte order
688 mark'. VALUE must nil, t, or cons of coding systems whose
689 `:coding-type' is `utf-16'.
690
691 If the value is nil, on decoding, don't treat the first two-byte as
692 BOM, and on encoding, don't produce BOM bytes.
693
694 If the value is t, on decoding, skip the first two-byte as BOM, and on
695 encoding, produce BOM bytes accoding to the value of `:endian'.
696
697 If the value is cons, on decoding, check the first two-byte. If theyq
698 are 0xFE 0xFF, use the car part coding system of the value. If they
699 are 0xFF 0xFE, use the car part coding system of the value.
700 Otherwise, treat them as bytes for a normal character. On encoding,
701 produce BOM bytes accoding to the value of `:endian'.
702
703 This attribute has a meaning only when `:coding-type' is `utf-16'.
704
705 `:endian'
706
707 VALUE must be `big' or `little' specifying big-endian and
708 little-endian respectively. The default value is `big'.
709
710 This attribute has a meaning only when `:coding-type' is `utf-16'.
711
712 `:ccl-decoder'
713
714 VALUE is a symbol representing the registered CCL program used for
715 decoding. This attribute has a meaning only when `:coding-type' is
716 `ccl'.
717
718 `:ccl-encoder'
719
720 VALUE is a symbol representing the registered CCL program used for
721 encoding. This attribute has a meaning only when `:coding-type' is
722 `ccl'."
723 (let* ((common-attrs (mapcar 'list
724 '(:mnemonic
725 :coding-type
726 :charset-list
727 :ascii-compatible-p
728 :decode-translation-table
729 :encode-translation-table
730 :post-read-conversion
731 :pre-write-conversion
732 :default-char
733 :for-unibyte
734 :plist
735 :eol-type)))
736 (coding-type (plist-get props :coding-type))
737 (spec-attrs (mapcar 'list
738 (cond ((eq coding-type 'iso-2022)
739 '(:initial
740 :reg-usage
741 :request
742 :flags))
743 ((eq coding-type 'utf-16)
744 '(:bom
745 :endian))
746 ((eq coding-type 'ccl)
747 '(:ccl-decoder
748 :ccl-encoder
749 :valids))))))
750
751 (dolist (slot common-attrs)
752 (setcdr slot (plist-get props (car slot))))
753
754 (dolist (slot spec-attrs)
755 (setcdr slot (plist-get props (car slot))))
756
757 (if (eq coding-type 'iso-2022)
758 (let ((designation (plist-get props :designation))
759 (flags (plist-get props :flags))
760 (initial (make-vector 4 nil))
761 (reg-usage (cons 4 4))
762 request elt)
763 (dotimes (i 4)
764 (setq elt (aref designation i))
765 (cond ((charsetp elt)
766 (aset initial i elt)
767 (setq request (cons (cons elt i) request)))
768 ((consp elt)
769 (aset initial i (car elt))
770 (if (charsetp (car elt))
771 (setq request (cons (cons (car elt) i) request)))
772 (dolist (e (cdr elt))
773 (cond ((charsetp e)
774 (setq request (cons (cons e i) request)))
775 ((eq e 94)
776 (setcar reg-usage i))
777 ((eq e 96)
778 (setcdr reg-usage i))
779 ((eq e t)
780 (setcar reg-usage i)
781 (setcdr reg-usage i)))))))
782 (setcdr (assq :initial spec-attrs) initial)
783 (setcdr (assq :reg-usage spec-attrs) reg-usage)
784 (setcdr (assq :request spec-attrs) request)
785
786 ;; Change :flags value from a list to a bit-mask.
787 (let ((bits 0)
788 (i 0))
789 (dolist (elt coding-system-iso-2022-flags)
790 (if (memq elt flags)
791 (setq bits (logior bits (lsh 1 i))))
792 (setq i (1+ i)))
793 (setcdr (assq :flags spec-attrs) bits))))
794
795 ;; Add :name and :docstring properties to PROPS.
796 (setq props
797 (cons :name (cons name (cons :docstring (cons (purecopy docstring)
798 props)))))
799 (setcdr (assq :plist common-attrs) props)
800 (apply 'define-coding-system-internal
801 name (mapcar 'cdr (append common-attrs spec-attrs)))))
802
803 (defun coding-system-doc-string (coding-system)
804 "Return the documentation string for CODING-SYSTEM."
805 (plist-get (coding-system-plist coding-system) :docstring))
806
807 (defun coding-system-mnemonic (coding-system)
808 "Return the mnemonic character of CODING-SYSTEM.
809 The mnemonic character of a coding system is used in mode line to
810 indicate the coding system. If CODING-SYSTEM. is nil, return ?=."
811 (plist-get (coding-system-plist coding-system) :mnemonic))
812
813 (defun coding-system-type (coding-system)
814 "Return the coding type of CODING-SYSTEM.
815 A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
816 See the function `define-coding-system' for more detail."
817 (plist-get (coding-system-plist coding-system) :coding-type))
818
819 (defun coding-system-charset-list (coding-system)
820 "Return list of charsets supported by CODING-SYSTEM.
821 If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
822 If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
823 (plist-get (coding-system-plist coding-system) :charset-list))
824
825 (defun coding-system-category (coding-system)
826 "Return a category symbol of CODING-SYSTEM."
827 (plist-get (coding-system-plist coding-system) :category))
828
829 (defun coding-system-get (coding-system prop)
830 "Extract a value from CODING-SYSTEM's property list for property PROP.
831 For compatibility with Emacs 20/21, this accepts old-style symbols
832 like `mime-charset' as well as the current style like `:mime-charset'."
833 (or (plist-get (coding-system-plist coding-system) prop)
834 (if (not (keywordp prop))
835 (plist-get (coding-system-plist coding-system)
836 (intern (concat ":" (symbol-name prop)))))))
837
838 (defun coding-system-put (coding-system prop val)
839 "Change value in CODING-SYSTEM's property list PROP to VAL."
840 (plist-put (coding-system-plist coding-system) prop val))
841
842 (defalias 'coding-system-parent 'coding-system-base)
843 (make-obsolete 'coding-system-parent 'coding-system-base "20.3")
844
845 (defun coding-system-eol-type-mnemonic (coding-system)
846 "Return the string indicating end-of-line format of CODING-SYSTEM."
847 (let* ((eol-type (coding-system-eol-type coding-system))
848 (val (cond ((eq eol-type 0) eol-mnemonic-unix)
849 ((eq eol-type 1) eol-mnemonic-dos)
850 ((eq eol-type 2) eol-mnemonic-mac)
851 (t eol-mnemonic-undecided))))
852 (if (stringp val)
853 val
854 (char-to-string val))))
855
856 (defun coding-system-lessp (x y)
857 (cond ((eq x 'no-conversion) t)
858 ((eq y 'no-conversion) nil)
859 ((eq x 'emacs-mule) t)
860 ((eq y 'emacs-mule) nil)
861 ((eq x 'undecided) t)
862 ((eq y 'undecided) nil)
863 (t (let ((c1 (coding-system-mnemonic x))
864 (c2 (coding-system-mnemonic y)))
865 (or (< (downcase c1) (downcase c2))
866 (and (not (> (downcase c1) (downcase c2)))
867 (< c1 c2)))))))
868
869 (defun add-to-coding-system-list (coding-system)
870 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
871 (if (or (null coding-system-list)
872 (coding-system-lessp coding-system (car coding-system-list)))
873 (setq coding-system-list (cons coding-system coding-system-list))
874 (let ((len (length coding-system-list))
875 mid (tem coding-system-list))
876 (while (> len 1)
877 (setq mid (nthcdr (/ len 2) tem))
878 (if (coding-system-lessp (car mid) coding-system)
879 (setq tem mid
880 len (- len (/ len 2)))
881 (setq len (/ len 2))))
882 (setcdr tem (cons coding-system (cdr tem))))))
883
884 (defun coding-system-list (&optional base-only)
885 "Return a list of all existing non-subsidiary coding systems.
886 If optional arg BASE-ONLY is non-nil, only base coding systems are
887 listed. The value doesn't include subsidiary coding systems which are
888 made from bases and aliases automatically for various end-of-line
889 formats (e.g. iso-latin-1-unix, koi8-r-dos)."
890 (let* ((codings (copy-sequence coding-system-list))
891 (tail (cons nil codings)))
892 ;; Remove subsidiary coding systems (eol variants) and alias
893 ;; coding systems (if necessary).
894 (while (cdr tail)
895 (let* ((coding (car (cdr tail)))
896 (aliases (coding-system-aliases coding)))
897 (if (or
898 ;; CODING is an eol variant if not in ALIASES.
899 (not (memq coding aliases))
900 ;; CODING is an alias if it is not car of ALIASES.
901 (and base-only (not (eq coding (car aliases)))))
902 (setcdr tail (cdr (cdr tail)))
903 (setq tail (cdr tail)))))
904 codings))
905
906 (defconst char-coding-system-table nil
907 "This is an obsolete variable.
908 It exists just for backward compatibility, and the value is always nil.")
909
910 (defun transform-make-coding-system-args (name type &optional doc-string props)
911 "For internal use only.
912 Transform XEmacs style args for `make-coding-system' to Emacs style.
913 Value is a list of transformed arguments."
914 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
915 (eol-type (plist-get props 'eol-type))
916 properties tmp)
917 (cond
918 ((eq eol-type 'lf) (setq eol-type 'unix))
919 ((eq eol-type 'crlf) (setq eol-type 'dos))
920 ((eq eol-type 'cr) (setq eol-type 'mac)))
921 (if (setq tmp (plist-get props 'post-read-conversion))
922 (setq properties (plist-put properties 'post-read-conversion tmp)))
923 (if (setq tmp (plist-get props 'pre-write-conversion))
924 (setq properties (plist-put properties 'pre-write-conversion tmp)))
925 (cond
926 ((eq type 'shift-jis)
927 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
928 ((eq type 'iso2022) ; This is not perfect.
929 (if (plist-get props 'escape-quoted)
930 (error "escape-quoted is not supported: %S"
931 `(,name ,type ,doc-string ,props)))
932 (let ((g0 (plist-get props 'charset-g0))
933 (g1 (plist-get props 'charset-g1))
934 (g2 (plist-get props 'charset-g2))
935 (g3 (plist-get props 'charset-g3))
936 (use-roman
937 (and
938 (eq (cadr (assoc 'latin-jisx0201
939 (plist-get props 'input-charset-conversion)))
940 'ascii)
941 (eq (cadr (assoc 'ascii
942 (plist-get props 'output-charset-conversion)))
943 'latin-jisx0201)))
944 (use-oldjis
945 (and
946 (eq (cadr (assoc 'japanese-jisx0208-1978
947 (plist-get props 'input-charset-conversion)))
948 'japanese-jisx0208)
949 (eq (cadr (assoc 'japanese-jisx0208
950 (plist-get props 'output-charset-conversion)))
951 'japanese-jisx0208-1978))))
952 (if (charsetp g0)
953 (if (plist-get props 'force-g0-on-output)
954 (setq g0 `(nil ,g0))
955 (setq g0 `(,g0 t))))
956 (if (charsetp g1)
957 (if (plist-get props 'force-g1-on-output)
958 (setq g1 `(nil ,g1))
959 (setq g1 `(,g1 t))))
960 (if (charsetp g2)
961 (if (plist-get props 'force-g2-on-output)
962 (setq g2 `(nil ,g2))
963 (setq g2 `(,g2 t))))
964 (if (charsetp g3)
965 (if (plist-get props 'force-g3-on-output)
966 (setq g3 `(nil ,g3))
967 (setq g3 `(,g3 t))))
968 `(,name 2 ,mnemonic ,doc-string
969 (,g0 ,g1 ,g2 ,g3
970 ,(plist-get props 'short)
971 ,(not (plist-get props 'no-ascii-eol))
972 ,(not (plist-get props 'no-ascii-cntl))
973 ,(plist-get props 'seven)
974 t
975 ,(not (plist-get props 'lock-shift))
976 ,use-roman
977 ,use-oldjis
978 ,(plist-get props 'no-iso6429)
979 nil nil nil nil)
980 ,properties ,eol-type)))
981 ((eq type 'big5)
982 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
983 ((eq type 'ccl)
984 `(,name 4 ,mnemonic ,doc-string
985 (,(plist-get props 'decode) . ,(plist-get props 'encode))
986 ,properties ,eol-type))
987 (t
988 (error "unsupported XEmacs style make-coding-style arguments: %S"
989 `(,name ,type ,doc-string ,props))))))
990
991 (defun make-coding-system (coding-system type mnemonic doc-string
992 &optional
993 flags
994 properties
995 eol-type)
996 "Define a new coding system CODING-SYSTEM (symbol).
997 This function is provided for backward compatibility.
998 Use `define-coding-system' instead."
999 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
1000 ;; is a symbol, perhaps, this function is called with XEmacs-style
1001 ;; arguments. Here, try to transform that kind of arguments to
1002 ;; Emacs style.
1003 (if (symbolp type)
1004 (let ((args (transform-make-coding-system-args coding-system type
1005 mnemonic doc-string)))
1006 (setq coding-system (car args)
1007 type (nth 1 args)
1008 mnemonic (nth 2 args)
1009 doc-string (nth 3 args)
1010 flags (nth 4 args)
1011 properties (nth 5 args)
1012 eol-type (nth 6 args))))
1013
1014 (setq type
1015 (cond ((eq type 0) 'emacs-mule)
1016 ((eq type 1) 'shift-jis)
1017 ((eq type 2) 'iso2022)
1018 ((eq type 3) 'big5)
1019 ((eq type 4) 'ccl)
1020 ((eq type 5) 'raw-text)
1021 (t
1022 (error "Invalid coding system type: %s" type))))
1023
1024 (setq properties
1025 (let ((plist nil) key)
1026 (dolist (elt properties)
1027 (setq key (car elt))
1028 (cond ((eq key 'post-read-conversion)
1029 (setq key :post-read-conversion))
1030 ((eq key 'pre-write-conversion)
1031 (setq key :pre-write-conversion))
1032 ((eq key 'translation-table-for-decode)
1033 (setq key :decode-translation-table))
1034 ((eq key 'translation-table-for-encode)
1035 (setq key :encode-translation-table))
1036 ((eq key 'safe-charsets)
1037 (setq key :charset-list))
1038 ((eq key 'mime-charset)
1039 (setq key :mime-charset))
1040 ((eq key 'valid-codes)
1041 (setq key :valids)))
1042 (setq plist (plist-put plist key (cdr elt))))
1043 plist))
1044 (plist-put properties :mnemonic mnemonic)
1045 (plist-put properties :coding-type type)
1046 (cond ((eq eol-type 0) (setq eol-type 'unix))
1047 ((eq eol-type 1) (setq eol-type 'dos))
1048 ((eq eol-type 2) (setq eol-type 'mac))
1049 ((vectorp eol-type) (setq eol-type nil)))
1050 (plist-put properties :eol-type eol-type)
1051
1052 (cond
1053 ((eq type 'iso2022)
1054 (plist-put properties :flags
1055 (list (and (or (consp (nth 0 flags))
1056 (consp (nth 1 flags))
1057 (consp (nth 2 flags))
1058 (consp (nth 3 flags))) 'designation)
1059 (or (nth 4 flags) 'long-form)
1060 (and (nth 5 flags) 'ascii-at-eol)
1061 (and (nth 6 flags) 'ascii-at-cntl)
1062 (and (nth 7 flags) '7-bit)
1063 (and (nth 8 flags) 'locking-shift)
1064 (and (nth 9 flags) 'single-shift)
1065 (and (nth 10 flags) 'use-roman)
1066 (and (nth 11 flags) 'use-oldjis)
1067 (or (nth 12 flags) 'direction)
1068 (and (nth 13 flags) 'init-at-bol)
1069 (and (nth 14 flags) 'designate-at-bol)
1070 (and (nth 15 flags) 'safe)
1071 (and (nth 16 flags) 'latin-extra)))
1072 (plist-put properties :designation
1073 (let ((vec (make-vector 4 nil)))
1074 (dotimes (i 4)
1075 (let ((spec (nth i flags)))
1076 (if (eq spec t)
1077 (aset vec i '(94 96))
1078 (if (consp spec)
1079 (progn
1080 (if (memq t spec)
1081 (setq spec (append (delq t spec) '(94 96))))
1082 (aset vec i spec))))))
1083 vec)))
1084
1085 ((eq type 'ccl)
1086 (plist-put properties :ccl-decoder (car flags))
1087 (plist-put properties :ccl-encoder (cdr flags))))
1088
1089 (apply 'define-coding-system coding-system doc-string properties))
1090
1091 (defun merge-coding-systems (first second)
1092 "Fill in any unspecified aspects of coding system FIRST from SECOND.
1093 Return the resulting coding system."
1094 (let ((base (coding-system-base second))
1095 (eol (coding-system-eol-type second)))
1096 ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1097 (if (eq (coding-system-base first) 'undecided)
1098 (setq first (coding-system-change-text-conversion first base)))
1099 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1100 (if (and (vectorp (coding-system-eol-type first))
1101 (numberp eol) (>= eol 0) (<= eol 2))
1102 (setq first (coding-system-change-eol-conversion
1103 first eol)))
1104 first))
1105
1106 (defun set-buffer-file-coding-system (coding-system &optional force)
1107 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1108 This means that when you save the buffer, it will be converted
1109 according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1110 use \\[list-coding-systems].
1111
1112 If CODING-SYSTEM leaves the text conversion unspecified, or if it
1113 leaves the end-of-line conversion unspecified, FORCE controls what to
1114 do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1115 buffer's previous `buffer-file-coding-system' value (if it is
1116 specified there). Otherwise, levae it unspecified.
1117
1118 This marks the buffer modified so that the succeeding \\[save-buffer]
1119 surely saves the buffer with CODING-SYSTEM. From a program, if you
1120 don't want to mark the buffer modified, just set the variable
1121 `buffer-file-coding-system' directly."
1122 (interactive "zCoding system for saving file (default, nil): \nP")
1123 (check-coding-system coding-system)
1124 (if (and coding-system buffer-file-coding-system (null force))
1125 (setq coding-system
1126 (merge-coding-systems coding-system buffer-file-coding-system)))
1127 (setq buffer-file-coding-system coding-system)
1128 ;; This is in case of an explicit call. Normally, `normal-mode' and
1129 ;; `set-buffer-major-mode-hook' take care of setting the table.
1130 (if (fboundp 'ucs-set-table-for-input) ; don't lose when building
1131 (ucs-set-table-for-input))
1132 (set-buffer-modified-p t)
1133 (force-mode-line-update))
1134
1135 (defun revert-buffer-with-coding-system (coding-system &optional force)
1136 "Visit the current buffer's file again using coding system CODING-SYSTEM.
1137 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1138
1139 If CODING-SYSTEM leaves the text conversion unspecified, or if it
1140 leaves the end-of-line conversion unspecified, FORCE controls what to
1141 do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1142 buffer's previous `buffer-file-coding-system' value (if it is
1143 specified there). Otherwise, determine it from the file contents as
1144 usual for visiting a file."
1145 (interactive "zCoding system for visited file (default, nil): \nP")
1146 (check-coding-system coding-system)
1147 (if (and coding-system buffer-file-coding-system (null force))
1148 (setq coding-system
1149 (merge-coding-systems coding-system buffer-file-coding-system)))
1150 (let ((coding-system-for-read coding-system))
1151 (revert-buffer)))
1152
1153 (defun set-file-name-coding-system (coding-system)
1154 "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1155 It actually just set the variable `file-name-coding-system' (which
1156 see) to CODING-SYSTEM."
1157 (interactive "zCoding system for file names (default, nil): ")
1158 (check-coding-system coding-system)
1159 (setq file-name-coding-system coding-system))
1160
1161 (defvar default-terminal-coding-system nil
1162 "Default value for the terminal coding system.
1163 This is normally set according to the selected language environment.
1164 See also the command `set-terminal-coding-system'.")
1165
1166 (defun set-terminal-coding-system (coding-system)
1167 "Set coding system of your terminal to CODING-SYSTEM.
1168 All text output to the terminal will be encoded
1169 with the specified coding system.
1170 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1171 The default is determined by the selected language environment
1172 or by the previous use of this command."
1173 (interactive
1174 (list (let ((default (if (and (not (terminal-coding-system))
1175 default-terminal-coding-system)
1176 default-terminal-coding-system)))
1177 (read-coding-system
1178 (format "Coding system for terminal display (default, %s): "
1179 default)
1180 default))))
1181 (if (and (not coding-system)
1182 (not (terminal-coding-system)))
1183 (setq coding-system default-terminal-coding-system))
1184 (if coding-system
1185 (setq default-terminal-coding-system coding-system))
1186 (set-terminal-coding-system-internal coding-system)
1187 (redraw-frame (selected-frame)))
1188
1189 (defvar default-keyboard-coding-system nil
1190 "Default value of the keyboard coding system.
1191 This is normally set according to the selected language environment.
1192 See also the command `set-keyboard-coding-system'.")
1193
1194 (defun set-keyboard-coding-system (coding-system)
1195 "Set coding system for keyboard input to CODING-SYSTEM.
1196 In addition, this command enables Encoded-kbd minor mode.
1197 \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1198 `encoded-kbd-mode'.)
1199 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1200 The default is determined by the selected language environment
1201 or by the previous use of this command."
1202 (interactive
1203 (list (let ((default (if (and (not (keyboard-coding-system))
1204 default-keyboard-coding-system)
1205 default-keyboard-coding-system)))
1206 (read-coding-system
1207 (format "Coding system for keyboard input (default, %s): "
1208 default)
1209 default))))
1210 (if (and (not coding-system)
1211 (not (keyboard-coding-system)))
1212 (setq coding-system default-keyboard-coding-system))
1213 (if coding-system
1214 (setq default-keyboard-coding-system coding-system))
1215 (set-keyboard-coding-system-internal coding-system)
1216 (setq keyboard-coding-system coding-system)
1217 (encoded-kbd-mode (if coding-system 1 0)))
1218
1219 (defcustom keyboard-coding-system nil
1220 "Specify coding system for keyboard input.
1221 If you set this on a terminal which can't distinguish Meta keys from
1222 8-bit characters, you will have to use ESC to type Meta characters.
1223 See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1224
1225 On non-windowing terminals, this is set from the locale by default.
1226
1227 Setting this variable directly does not take effect;
1228 use either M-x customize or \\[set-keyboard-coding-system]."
1229 :type '(coding-system :tag "Coding system")
1230 :link '(info-link "(emacs)Specify Coding")
1231 :link '(info-link "(emacs)Single-Byte Character Support")
1232 :set (lambda (symbol value)
1233 ;; Don't load encoded-kbd-mode unnecessarily.
1234 (if (or value (boundp 'encoded-kbd-mode))
1235 (set-keyboard-coding-system value)
1236 (set-default 'keyboard-coding-system nil))) ; must initialize
1237 :version "21.4"
1238 :group 'keyboard
1239 :group 'mule)
1240
1241 (defun set-buffer-process-coding-system (decoding encoding)
1242 "Set coding systems for the process associated with the current buffer.
1243 DECODING is the coding system to be used to decode input from the process,
1244 ENCODING is the coding system to be used to encode output to the process.
1245
1246 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
1247 (interactive
1248 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
1249 (let ((proc (get-buffer-process (current-buffer))))
1250 (if (null proc)
1251 (error "No process")
1252 (check-coding-system decoding)
1253 (check-coding-system encoding)
1254 (set-process-coding-system proc decoding encoding)))
1255 (force-mode-line-update))
1256
1257 (defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1258
1259 (defun set-selection-coding-system (coding-system)
1260 "Make CODING-SYSTEM used for communicating with other X clients.
1261 When sending or receiving text via cut_buffer, selection, and clipboard,
1262 the text is encoded or decoded by CODING-SYSTEM."
1263 (interactive "zCoding system for X selection: ")
1264 (check-coding-system coding-system)
1265 (setq selection-coding-system coding-system))
1266
1267 ;; Coding system lastly specified by the command
1268 ;; set-next-selection-coding-system.
1269 (defvar last-next-selection-coding-system nil)
1270
1271 (defun set-next-selection-coding-system (coding-system)
1272 "Use CODING-SYSTEM for next communication with other window system clients.
1273 This setting is effective for the next communication only."
1274 (interactive
1275 (list (read-coding-system
1276 (if last-next-selection-coding-system
1277 (format "Coding system for the next selection (default, %S): "
1278 last-next-selection-coding-system)
1279 "Coding system for the next selection: ")
1280 last-next-selection-coding-system)))
1281 (if coding-system
1282 (setq last-next-selection-coding-system coding-system)
1283 (setq coding-system last-next-selection-coding-system))
1284 (check-coding-system coding-system)
1285
1286 (setq next-selection-coding-system coding-system))
1287
1288 (defun set-coding-priority (arg)
1289 "Set priority of coding categories according to ARG.
1290 ARG is a list of coding categories ordered by priority.
1291
1292 This function is provided for backward compatibility.
1293 Now we have more convenient function `set-coding-system-priority'."
1294 (apply 'set-coding-system-priority
1295 (mapcar #'(lambda (x) (symbol-value x)) arg)))
1296 (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1")
1297
1298 ;;; X selections
1299
1300 (defvar ctext-non-standard-encodings-alist
1301 '(("ISO8859-15" . latin-iso8859-15)
1302 ("ISO8859-14" . latin-iso8859-14)
1303 ("KOI8-R" . koi8-r)
1304 ("BIG5-0" . big5))
1305 "Alist of non-standard encoding names vs Emacs coding systems.
1306 This alist is used to decode an extened segment of a compound text.")
1307
1308 (defvar ctext-non-standard-encodings-regexp
1309 (string-to-multibyte
1310 (concat
1311 ;; For non-standard encodings.
1312 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1313 "\\|"
1314 ;; For UTF-8 encoding.
1315 "\\(\e%G[^\e]*\e%@\\)")))
1316
1317 ;; Functions to support "Non-Standard Character Set Encodings" defined
1318 ;; by the COMPOUND-TEXT spec.
1319 ;; We support that by decoding the whole data by `ctext' which just
1320 ;; pertains byte sequences belonging to ``extended segment'', then
1321 ;; decoding those byte sequences one by one in Lisp.
1322 ;; This function also supports "The UTF-8 encoding" described in the
1323 ;; section 7 of the documentation fo COMPOUND-TEXT distributed with
1324 ;; XFree86.
1325
1326 (defun ctext-post-read-conversion (len)
1327 "Decode LEN characters encoded as Compound Text with Extended Segments."
1328 ;; We don't need the following because it is expected that this
1329 ;; function is mainly used for decoding X selection which is not
1330 ;; that big data.
1331 ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
1332 (save-match-data
1333 (save-restriction
1334 (narrow-to-region (point) (+ (point) len))
1335 (let ((case-fold-search nil)
1336 last-coding-system-used
1337 pos bytes)
1338 (decode-coding-region (point-min) (point-max) 'ctext)
1339 (while (re-search-forward ctext-non-standard-encodings-regexp
1340 nil 'move)
1341 (setq pos (match-beginning 0))
1342 (if (match-beginning 1)
1343 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1344 (let* ((M (char-after (+ pos 4)))
1345 (L (char-after (+ pos 5)))
1346 (encoding (match-string 2))
1347 (coding (or (cdr (assoc-ignore-case
1348 encoding
1349 ctext-non-standard-encodings-alist))
1350 (coding-system-p
1351 (intern (downcase encoding))))))
1352 (if enable-multibyte-characters
1353 (setq M (multibyte-char-to-unibyte M)
1354 L (multibyte-char-to-unibyte L)))
1355 (setq bytes (- (+ (* (- M 128) 128) (- L 128))
1356 (- (point) (+ pos 6))))
1357 (when coding
1358 (delete-region pos (point))
1359 (forward-char bytes)
1360 (decode-coding-region (- (point) bytes) (point) coding)))
1361 ;; ESC % G --UTF-8-BYTES-- ESC % @
1362 (setq bytes (- (point) pos))
1363 (decode-coding-region (- (point) bytes) (point) 'utf-8))))
1364 (goto-char (point-min))
1365 (- (point-max) (point)))))
1366
1367 ;; If you add charsets here, be sure to modify the regexp used by
1368 ;; ctext-pre-write-conversion to look up non-standard charsets.
1369 (defvar ctext-non-standard-designations-alist
1370 '(("$(0" . (big5 "big5-0" 2))
1371 ("$(1" . (big5 "big5-0" 2))
1372 ;; The following are actually standard; generating extended
1373 ;; segments for them is wrong and screws e.g. Latin-9 users.
1374 ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
1375 ;; ("-V" . (t "iso8859-10" 1))
1376 ;; ("-Y" . (t "iso8859-13" 1))
1377 ;; ("-_" . (t "iso8859-14" 1))
1378 ;; ("-b" . (t "iso8859-15" 1))
1379 ;; ("-f" . (t "iso8859-16" 1))
1380 )
1381 "Alist of ctext control sequences that introduce character sets which
1382 are not in the list of approved encodings, and the corresponding
1383 coding system, identifier string, and number of octets per encoded
1384 character.
1385
1386 Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
1387 is the control sequence (sans the leading ESC) that introduces the character
1388 set in the text encoded by compound-text. ENCODING is a coding system
1389 symbol; if it is t, it means that the ctext coding system already encodes
1390 the text correctly, and only the leading control sequence needs to be altered.
1391 If ENCODING is a coding system, we need to re-encode the text with that
1392 coding system. CHARSET is the name of the charset we need to put into
1393 the leading control sequence. NOCTETS is the number of octets (bytes) that
1394 encode each character in this charset. NOCTETS can be 0 (meaning the number
1395 of octets per character is variable), 1, 2, 3, or 4.")
1396
1397 (defun ctext-pre-write-conversion (from to)
1398 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1399
1400 If FROM is a string, or if the current buffer is not the one set up for us
1401 by encode-coding-string, generate a new temp buffer, insert the
1402 text, and convert it in the temporary buffer. Otherwise, convert in-place."
1403 (save-match-data
1404 ;; Setup a working buffer if necessary.
1405 (when (stringp from)
1406 (set-buffer (generate-new-buffer " *temp"))
1407 (set-buffer-multibyte (multibyte-string-p from))
1408 (insert from))
1409
1410 ;; Now we can encode the whole buffer.
1411 (let ((case-fold-search nil)
1412 last-coding-system-used
1413 pos posend desig encode-info encoding chset noctets textlen)
1414 (goto-char (point-min))
1415 ;; At first encode the whole buffer.
1416 (encode-coding-region (point-min) (point-max) 'ctext-no-compositions)
1417 ;; Then replace ISO-2022 charset designations with extended
1418 ;; segments, for those charsets that are not part of the
1419 ;; official X registry. The regexp below finds the leading
1420 ;; sequences for big5.
1421 (while (re-search-forward "\e\\(\$([01]\\)" nil 'move)
1422 (setq pos (match-beginning 0)
1423 posend (point)
1424 desig (match-string 1)
1425 encode-info (cdr (assoc desig
1426 ctext-non-standard-designations-alist))
1427 encoding (car encode-info)
1428 chset (cadr encode-info)
1429 noctets (car (cddr encode-info)))
1430 (skip-chars-forward "^\e")
1431 (cond
1432 ((eq encoding t) ; only the leading sequence needs to be changed
1433 (setq textlen (+ (- (point) posend) (length chset) 1))
1434 ;; Generate the control sequence for an extended segment.
1435 (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1436 noctets
1437 (+ (/ textlen 128) 128)
1438 (+ (% textlen 128) 128)
1439 chset))
1440 t t))
1441 ((coding-system-p encoding) ; need to recode the entire segment...
1442 (decode-coding-region pos (point) 'ctext-no-compositions)
1443 (encode-coding-region pos (point) encoding)
1444 (setq textlen (+ (- (point) pos) (length chset) 1))
1445 (save-excursion
1446 (goto-char pos)
1447 (insert (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1448 noctets
1449 (+ (/ textlen 128) 128)
1450 (+ (% textlen 128) 128)
1451 chset)))))))
1452 (goto-char (point-min))))
1453 ;; Must return nil, as build_annotations_2 expects that.
1454 nil)
1455
1456 ;;; FILE I/O
1457
1458 (defcustom auto-coding-alist
1459 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte)
1460 ("\\.tgz\\'" . no-conversion)
1461 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)
1462 ("/#[^/]+#\\'" . emacs-mule))
1463 "Alist of filename patterns vs corresponding coding systems.
1464 Each element looks like (REGEXP . CODING-SYSTEM).
1465 A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
1466
1467 The settings in this alist take priority over `coding:' tags
1468 in the file (see the function `set-auto-coding')
1469 and the contents of `file-coding-system-alist'."
1470 :group 'files
1471 :group 'mule
1472 :type '(repeat (cons (regexp :tag "File name regexp")
1473 (symbol :tag "Coding system"))))
1474
1475 (defcustom auto-coding-regexp-alist
1476 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
1477 ("\\`;ELC\14