1 ;;; mule.el --- basic commands for multilingual environment
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
10 ;; Keywords: mule, multilingual, character set, coding system
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation; either version 2, or (at your option)
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
33 (defconst mule-version
"6.0 (HANACHIRUSATO)" "\
34 Version number and name of this version of MULE (multilingual environment).")
36 (defconst mule-version-date
"2003.9.1" "\
37 Distribution date of this version of MULE (multilingual environment).")
42 ;; Backward compatibility code for handling emacs-mule charsets.
43 (defvar private-char-area-1-min
#xF0000
)
44 (defvar private-char-area-1-max
#xFFFFE
)
45 (defvar private-char-area-2-min
#x100000
)
46 (defvar private-char-area-2-max
#x10FFFE
)
48 ;; Table of emacs-mule charsets indexed by their emacs-mule ID.
49 (defvar emacs-mule-charset-table
(make-vector 256 nil
))
50 (aset emacs-mule-charset-table
0 'ascii
)
52 ;; Convert the argument of old-style calll of define-charset to a
53 ;; property list used by the new-style.
54 ;; INFO-VECTOR is a vector of the format:
55 ;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
56 ;; SHORT-NAME LONG-NAME DESCRIPTION]
58 (defun convert-define-charset-argument (emacs-mule-id info-vector
)
59 (let* ((dim (aref info-vector
0))
60 (chars (aref info-vector
1))
61 (total (if (= dim
1) chars
(* chars chars
)))
62 (code-space (if (= dim
1) (if (= chars
96) [32 127] [33 126])
63 (if (= chars
96) [32 127 32 127] [33 126 33 126])))
65 (if (integerp emacs-mule-id
)
66 (or (= emacs-mule-id
0)
67 (and (>= emacs-mule-id
129) (< emacs-mule-id
256))
68 (error "Invalid CHARSET-ID: %d" emacs-mule-id
))
70 (if (= dim
1) (setq from-id
160 to-id
224)
71 (setq from-id
224 to-id
255))
72 (while (and (< from-id to-id
)
73 (not (aref emacs-mule-charset-table from-id
)))
74 (setq from-id
(1+ from-id
)))
76 (error "No more room for the new Emacs-mule charset"))
77 (setq emacs-mule-id from-id
)))
78 (if (> (- private-char-area-1-max private-char-area-1-min
) total
)
79 (setq code-offset private-char-area-1-min
80 private-char-area-1-min
(+ private-char-area-1-min total
))
81 (if (> (- private-char-area-2-max private-char-area-2-min
) total
)
82 (setq code-offset private-char-area-2-min
83 private-char-area-2-min
(+ private-char-area-2-min total
))
84 (error "No more space for a new charset.")))
86 :code-space code-space
87 :iso-final-char
(aref info-vector
4)
88 :code-offset code-offset
89 :emacs-mule-id emacs-mule-id
)))
91 (defun define-charset (name docstring
&rest props
)
92 "Define NAME (symbol) as a charset with DOCSTRING.
93 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
94 may be any symbol. The following have special meanings, and one of
95 `:code-offset', `:map', `:subset', `:superset' must be specified.
99 VALUE must be a short string to identify the charset. If omitted,
104 VALUE must be a string longer than `:short-name' to identify the
105 charset. If omitted, the value of the `:short-name' attribute is used.
109 VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
110 code-points of the charsets. If omitted, it is calculated from the
111 value of the `:code-space' attribute.
115 VALUE must be a vector of length at most 8 specifying the byte code
116 range of each dimension in this format:
117 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
118 where MIN-N is the minimum byte value of Nth dimension of code-point,
119 MAX-N is the maximum byte value of that.
123 VALUE must be an integer specifying the mininum code point of the
124 charset. If omitted, it is calculated from `:code-space'. VALUE may
125 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
126 the code point and LOW is the least significant 16 bits.
130 VALUE must be an integer specifying the maxinum code point of the
131 charset. If omitted, it is calculated from `:code-space'. VALUE may
132 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
133 the code point and LOW is the least significant 16 bits.
137 VALUE must be a character in the range 32 to 127 (inclusive)
138 specifying the final char of the charset for ISO-2022 encoding. If
139 omitted, the charset can't be encoded by ISO-2022 based
142 `:iso-revision-number'
144 VALUE must be an integer in the range 0..63, specifying the revision
145 number of the charset for ISO-2022 encoding.
149 VALUE must be an integer of 0, 129..255. If omitted, the charset
150 can't be encoded by coding-systems of type `emacs-mule'.
152 `:ascii-compatible-p'
154 VALUE must be nil or t (default nil). If VALUE is t, the charset is
155 compatible with ASCII, i.e. the first 128 code points map to ASCII.
159 VALUE must be nil or t. If the VALUE is t, the charset is
160 supplementary, which means it is used only as a parent of some other
165 VALUE must be a nonnegative integer that can be used as an invalid
166 code point of the charset. If the minimum code is 0 and the maximum
167 code is greater than Emacs' maximum integer value, `:invalid-code'
168 should not be omitted.
172 VALUE must be an integer added to the index number of a character to
173 get the corresponding character code.
177 VALUE must be vector or string.
179 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
180 where CODE-n is a code-point of the charset, and CHAR-n is the
181 corresponding character code.
183 If it is a string, it is a name of file that contains the above
184 information. Each line of the file must be this format:
186 where XXX is a hexadecimal representation of CODE-n and YYY is a
187 hexadecimal representation of CHAR-n. A line starting with `#' is a
192 VALUE must be a list:
193 ( PARENT MIN-CODE MAX-CODE OFFSET )
194 PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
195 of characters inherited from the parent. OFFSET is an integer value
196 to add to a code point of the parent charset to get the corresponding
197 code point of this charset.
201 VALUE must be a list of parent charsets. The charset inherits
202 characters from them. Each element of the list may be a cons (PARENT
203 . OFFSET), where PARENT is a parent charset, and OFFSET is an offset
204 value to add to a code point of PARENT to get the corresponding code
205 point of this charset.
209 VALUE must be vector or string.
211 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
212 where CODE-n is a code-point of the charset, and CHAR-n is the
213 corresponding Unicode character code.
215 If it is a string, it is a name of file that contains the above
216 information. The file format is the same as what described for `:map'
218 (when (vectorp (car props
))
220 ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
221 ;; Convert the argument to make it fit with the current style.
222 (let ((vec (car props
)))
223 (setq props
(convert-define-charset-argument name vec
)
225 docstring
(aref vec
8))))
226 (let ((attrs (mapcar 'list
'(:dimension
243 ;; If :dimension is omitted, get the dimension from :code-space.
244 (let ((dimension (plist-get props
:dimension
)))
247 (setq dimension
(/ (length (plist-get props
:code-space
)) 2))
248 (setq props
(plist-put props
:dimension dimension
)))))
250 ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
251 (let ((emacs-mule-id (plist-get props
:emacs-mule-id
)))
252 (if (integerp emacs-mule-id
)
253 (aset emacs-mule-charset-table emacs-mule-id name
)))
256 (setcdr slot
(plist-get props
(car slot
))))
258 ;; Make sure that the value of :code-space is a vector of 8
260 (let* ((slot (assq :code-space attrs
))
265 (vconcat val
(make-vector (- 8 len
) 0)))))
267 ;; Add :name and :docstring properties to PROPS.
269 (cons :name
(cons name
(cons :docstring
(cons docstring props
)))))
270 (or (plist-get props
:short-name
)
271 (plist-put props
:short-name
(symbol-name name
)))
272 (or (plist-get props
:long-name
)
273 (plist-put props
:long-name
(plist-get props
:short-name
)))
274 ;; We can probably get a worthwhile amount in purespace.
276 (mapcar (lambda (elt)
281 (setcdr (assq :plist attrs
) props
)
283 (apply 'define-charset-internal name
(mapcar 'cdr attrs
))))
286 (defun load-with-code-conversion (fullname file
&optional noerror nomessage
)
287 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
288 The file contents are decoded before evaluation if necessary.
289 If optional second arg NOERROR is non-nil,
290 report no error if FILE doesn't exist.
291 Print messages at start and end of loading unless
292 optional third arg NOMESSAGE is non-nil.
293 Return t if file exists."
294 (if (null (file-readable-p fullname
))
296 (signal 'file-error
(list "Cannot open load file" file
)))
297 ;; Read file with code conversion, and then eval.
299 ;; To avoid any autoloading, set default-major-mode to
301 ;; So that we don't get completely screwed if the
302 ;; file is encoded in some complicated character set,
303 ;; read it with real decoding, as a multibyte buffer,
304 ;; even if this is a --unibyte Emacs session.
305 (let ((default-major-mode 'fundamental-mode
)
306 (default-enable-multibyte-characters t
))
307 ;; We can't use `generate-new-buffer' because files.el
308 ;; is not yet loaded.
309 (get-buffer-create (generate-new-buffer-name " *load*"))))
311 (source (save-match-data (string-match "\\.el\\'" fullname
))))
314 (message "Loading %s (source)..." file
)
315 (message "Loading %s..." file
)))
317 (push file preloaded-file-list
))
319 (let ((load-file-name fullname
)
320 (set-auto-coding-for-load t
)
321 (inhibit-file-name-operation nil
))
324 (insert-file-contents fullname
)
325 ;; If the loaded file was inserted with no-conversion or
326 ;; raw-text coding system, make the buffer unibyte.
327 ;; Otherwise, eval-buffer might try to interpret random
328 ;; binary junk as multibyte characters.
329 (if (and enable-multibyte-characters
330 (or (eq (coding-system-type last-coding-system-used
)
332 (set-buffer-multibyte nil
))
333 ;; Make `kill-buffer' quiet.
334 (set-buffer-modified-p nil
))
335 ;; Have the original buffer current while we eval.
336 (eval-buffer buffer nil file
337 ;; If this Emacs is running with --unibyte,
338 ;; convert multibyte strings to unibyte
339 ;; after reading them.
340 ;; (not default-enable-multibyte-characters)
343 (let (kill-buffer-hook kill-buffer-query-functions
)
344 (kill-buffer buffer
)))
345 (let ((hook (assoc file after-load-alist
)))
347 (mapcar (function eval
) (cdr hook
))))
348 (unless (or nomessage noninteractive
)
350 (message "Loading %s (source)...done" file
)
351 (message "Loading %s...done" file
)))
354 (defun charset-info (charset)
355 "Return a vector of information of CHARSET.
356 This function is provided for backward compatibility.
358 The elements of the vector are:
359 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
360 LEADING-CODE-BASE, LEADING-CODE-EXT,
361 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
362 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
365 CHARSET-ID is always 0.
367 DIMENSION is the number of bytes of a code-point of the charset:
369 CHARS is the number of characters in a dimension:
372 DIRECTION is always 0.
373 LEADING-CODE-BASE is always 0.
374 LEADING-CODE-EXT is always 0.
375 ISO-FINAL-CHAR (character) is the final character of the
376 corresponding ISO 2022 charset. If the charset is not assigned
377 any final character, the value is -1.
378 ISO-GRAPHIC-PLANE is always 0.
379 REVERSE-CHARSET is always -1.
380 SHORT-NAME (string) is the short name to refer to the charset.
381 LONG-NAME (string) is the long name to refer to the charset
382 DESCRIPTION (string) is the description string of the charset.
383 PLIST (property list) may contain any type of information a user
384 want to put and get by functions `put-charset-property' and
385 `get-charset-property' respectively."
388 (charset-dimension charset
)
389 (charset-chars charset
)
394 (charset-iso-final-char charset
)
397 (get-charset-property charset
:short-name
)
398 (get-charset-property charset
:short-name
)
399 (charset-description charset
)
400 (charset-plist charset
)))
402 ;; It is better not to use backquote in this file,
403 ;; because that makes a bootstrapping problem
404 ;; if you need to recompile all the Lisp files using interpreted code.
406 (defun charset-id (charset)
407 "Always return 0. This is provided for backward compatibility."
410 (defmacro charset-bytes
(charset)
411 "Always return 0. This is provided for backward compatibility."
414 (defun get-charset-property (charset propname
)
415 "Return the value of CHARSET's PROPNAME property.
416 This is the last value stored with
417 (put-charset-property CHARSET PROPNAME VALUE)."
418 (plist-get (charset-plist charset
) propname
))
420 (defun put-charset-property (charset propname value
)
421 "Set CHARSETS's PROPNAME property to value VALUE.
422 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
423 (set-charset-plist charset
424 (plist-put (charset-plist charset
) propname value
)))
426 (defun charset-description (charset)
427 "Return description string of CHARSET."
428 (plist-get (charset-plist charset
) :docstring
))
430 (defun charset-dimension (charset)
431 "Return dimension of CHARSET."
432 (plist-get (charset-plist charset
) :dimension
))
434 (defun charset-chars (charset &optional dimension
)
435 "Return number of characters contained in DIMENSION of CHARSET.
436 DIMENSION defaults to the first dimension."
437 (unless dimension
(setq dimension
1))
438 (let ((code-space (plist-get (charset-plist charset
) :code-space
)))
439 (1+ (- (aref code-space
(1- (* 2 dimension
)))
440 (aref code-space
(- (* 2 dimension
) 2))))))
442 (defun charset-iso-final-char (charset)
443 "Return ISO-2022 final character of CHARSET.
444 Return -1 if charset isn't an ISO 2022 one."
445 (or (plist-get (charset-plist charset
) :iso-final-char
)
448 (defmacro charset-short-name
(charset)
449 "Return short name of CHARSET."
450 (plist-get (charset-plist charset
) :short-name
))
452 (defmacro charset-long-name
(charset)
453 "Return long name of CHARSET."
454 (plist-get (charset-plist charset
) :long-name
))
456 (defun charset-list ()
457 "Return list of all charsets ever defined.
459 This function is provided for backward compatibility.
460 Now we have the variable `charset-list'."
462 (make-obsolete 'charset-list
"Use variable `charset-list'" "22.1")
466 (defalias 'char-valid-p
'characterp
)
467 (make-obsolete 'char-valid-p
'characterp
"22.1")
469 (defun generic-char-p (char)
470 "Always return nil. This is provided for backward compatibility."
472 (make-obsolete 'generic-char-p
"Generic characters no longer exist" "22.1")
474 (defun make-char-internal (charset-id &optional code1 code2
)
475 (let ((charset (aref emacs-mule-charset-table charset-id
)))
477 (error "Invalid Emacs-mule charset ID: %d" charset-id
))
478 (make-char charset code1 code2
)))
480 ;; Coding system stuff
482 ;; Coding system is a symbol that has been defined by the function
483 ;; `define-coding-system'.
485 (defconst coding-system-iso-2022-flags
503 "List of symbols that control ISO-2022 encoder/decoder.
505 The value of the `:flags' attribute in the argument of the function
506 `define-coding-system' must be one of them.
508 If `long-form' is specified, use a long designation sequence on
509 encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
510 and `japanese-jisx0208'. The long designation sequence doesn't
511 conform to ISO 2022, but is used by such coding systems as
514 If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
517 If `ascii-at-cntl' is specified, designate ASCII to g0 before control
518 codes and SPC on encoding.
520 If `7-bit' is specified, use 7-bit code only on encoding.
522 If `locking-shift' is specified, decode locking-shift code correctly
523 on decoding, and use locking-shift to invoke a graphic element on
526 If `single-shift' is specified, decode single-shift code correctly on
527 decoding, and use single-shift to invoke a graphic element on encoding.
529 If `designation' is specified, decode designation code correctly on
530 decoding, and use designation to designate a charset to a graphic
533 If `revision' is specified, produce an escape sequence to specify
534 revision number of a charset on encoding. Such an escape sequence is
535 always correctly decoded on decoding.
537 If `direction' is specified, decode ISO6429's code for specifying
538 direction correctly, and produce the code on encoding.
540 If `init-at-bol' is specified, on encoding, it is assumed that
541 invocation and designation statuses are reset at each beginning of
542 line even if `ascii-at-eol' is not specified; thus no codes for
543 resetting them are produced.
545 If `safe' is specified, on encoding, characters not supported by a
546 coding are replaced with `?'.
548 If `latin-extra' is specified, the code-detection routine assumes that a
549 code specified in `latin-extra-code-table' (which see) is valid.
551 If `composition' is specified, an escape sequence to specify
552 composition sequence is correctly decoded on decoding, and is produced
555 If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
556 correctly decoded on decoding, and is produced on encoding.
558 If `use-roman' is specified, JIS0201-1976-Roman is designated instead
561 If `use-oldjis' is specified, JIS0208-1976 is designated instead of
564 (defun define-coding-system (name docstring
&rest props
)
565 "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
566 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
569 The following attributes have special meanings. Those labeled as
570 \"(required)\", should not be omitted.
572 `:mnemonic' (required)
574 VALUE is a character to display on mode line for the coding system.
576 `:coding-type' (required)
578 VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
579 `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'.
583 VALUE is the EOL (end-of-line) format of the coding system. It must be
584 one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
585 \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
586 and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
587 decoding by the coding system, Emacs automatically detects the EOL
588 format of the source text.
592 VALUE must be a list of charsets supported by the coding system. On
593 encoding by the coding system, if a character belongs to multiple
594 charsets in the list, a charset that comes earlier in the list is
595 selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022',
596 which indicates that the coding system supports all ISO-2022 based
597 charsets. If `:coding-type' is `emacs-mule', VALUE may be
598 `emacs-mule', which indicates that the coding system supports all
599 charsets that have the `:emacs-mule-id' property.
601 `:ascii-compatible-p'
603 If VALUE is non-nil, the coding system decodes all 7-bit bytes into
604 the corresponding ASCII characters, and encodes all ASCII characters
605 back to the corresponding 7-bit bytes. VALUE defaults to nil.
607 `:decode-translation-table'
609 VALUE must be a translation table to use on decoding.
611 `:encode-translation-table'
613 VALUE must be a translation table to use on encoding.
615 `:post-read-conversion'
617 VALUE must be a function to call after some text is inserted and
618 decoded by the coding system itself and before any functions in
619 `after-insert-functions' are called. The arguments to this function
620 are the same as those of a function in `after-insert-file-functions',
621 i.e. LENGTH of the text to be decoded with point at the head of it,
622 and the function should leave point unchanged.
624 `:pre-write-conversion'
626 VALUE must be a function to call after all functions in
627 `write-region-annotate-functions' and `buffer-file-format' are called,
628 and before the text is encoded by the coding system itself. The
629 arguments to this function are the same as those of a function in
630 `write-region-annotate-functions'.
634 VALUE must be a character. On encoding, a character not supported by
635 the coding system is replaced with VALUE.
639 VALUE non-nil means that visiting a file with the coding system
640 results in a unibyte buffer.
644 VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
645 EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
646 EOL (CR). If omitted, on decoding, the coding system detects EOL
647 format automatically, and on encoding, uses Unix-like EOL.
651 VALUE must be a symbol whose name is that of a MIME charset converted
654 `:mime-text-unsuitable'
656 VALUE non-nil means the `:mime-charset' property names a charset which
657 is unsuitable for the top-level media type \"text\".
661 VALUE must be a list of symbols that control the ISO-2022 converter.
662 Each must be a member of the list `coding-system-iso-2022-flags'
663 \(which see). This attribute has a meaning only when `:coding-type'
668 VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
669 GN-USAGE specifies the usage of graphic register GN as follows.
671 If it is nil, no charset can be designated to GN.
673 If it is a charset, the charset is initially designated to GN, and
674 never used by the other charsets.
676 If it is a list, the elements must be charsets, nil, 94, or 96. GN
677 can be used by all the listed charsets. If the list contains 94, any
678 iso-2022 charset whose code-space ranges are 94 long can be designated
679 to GN. If the list contains 96, any charsets whose whose ranges are
680 96 long can be designated to GN. If the first element is a charset,
681 that charset is initially designated to GN.
683 This attribute has a meaning only when `:coding-type' is `iso-2022'.
687 This attributes specifies whether the coding system uses a `byte order
688 mark'. VALUE must nil, t, or cons of coding systems whose
689 `:coding-type' is `utf-16'.
691 If the value is nil, on decoding, don't treat the first two-byte as
692 BOM, and on encoding, don't produce BOM bytes.
694 If the value is t, on decoding, skip the first two-byte as BOM, and on
695 encoding, produce BOM bytes accoding to the value of `:endian'.
697 If the value is cons, on decoding, check the first two-byte. If theyq
698 are 0xFE 0xFF, use the car part coding system of the value. If they
699 are 0xFF 0xFE, use the car part coding system of the value.
700 Otherwise, treat them as bytes for a normal character. On encoding,
701 produce BOM bytes accoding to the value of `:endian'.
703 This attribute has a meaning only when `:coding-type' is `utf-16'.
707 VALUE must be `big' or `little' specifying big-endian and
708 little-endian respectively. The default value is `big'.
710 This attribute has a meaning only when `:coding-type' is `utf-16'.
714 VALUE is a symbol representing the registered CCL program used for
715 decoding. This attribute has a meaning only when `:coding-type' is
720 VALUE is a symbol representing the registered CCL program used for
721 encoding. This attribute has a meaning only when `:coding-type' is
723 (let* ((common-attrs (mapcar 'list
728 :decode-translation-table
729 :encode-translation-table
730 :post-read-conversion
731 :pre-write-conversion
736 (coding-type (plist-get props
:coding-type
))
737 (spec-attrs (mapcar 'list
738 (cond ((eq coding-type
'iso-2022
)
743 ((eq coding-type
'utf-16
)
746 ((eq coding-type
'ccl
)
751 (dolist (slot common-attrs
)
752 (setcdr slot
(plist-get props
(car slot
))))
754 (dolist (slot spec-attrs
)
755 (setcdr slot
(plist-get props
(car slot
))))
757 (if (eq coding-type
'iso-2022
)
758 (let ((designation (plist-get props
:designation
))
759 (flags (plist-get props
:flags
))
760 (initial (make-vector 4 nil
))
761 (reg-usage (cons 4 4))
764 (setq elt
(aref designation i
))
765 (cond ((charsetp elt
)
767 (setq request
(cons (cons elt i
) request
)))
769 (aset initial i
(car elt
))
770 (if (charsetp (car elt
))
771 (setq request
(cons (cons (car elt
) i
) request
)))
772 (dolist (e (cdr elt
))
774 (setq request
(cons (cons e i
) request
)))
776 (setcar reg-usage i
))
778 (setcdr reg-usage i
))
781 (setcdr reg-usage i
)))))))
782 (setcdr (assq :initial spec-attrs
) initial
)
783 (setcdr (assq :reg-usage spec-attrs
) reg-usage
)
784 (setcdr (assq :request spec-attrs
) request
)
786 ;; Change :flags value from a list to a bit-mask.
789 (dolist (elt coding-system-iso-2022-flags
)
791 (setq bits
(logior bits
(lsh 1 i
))))
793 (setcdr (assq :flags spec-attrs
) bits
))))
795 ;; Add :name and :docstring properties to PROPS.
797 (cons :name
(cons name
(cons :docstring
(cons (purecopy docstring
)
799 (setcdr (assq :plist common-attrs
) props
)
800 (apply 'define-coding-system-internal
801 name
(mapcar 'cdr
(append common-attrs spec-attrs
)))))
803 (defun coding-system-doc-string (coding-system)
804 "Return the documentation string for CODING-SYSTEM."
805 (plist-get (coding-system-plist coding-system
) :docstring
))
807 (defun coding-system-mnemonic (coding-system)
808 "Return the mnemonic character of CODING-SYSTEM.
809 The mnemonic character of a coding system is used in mode line to
810 indicate the coding system. If CODING-SYSTEM. is nil, return ?=."
811 (plist-get (coding-system-plist coding-system
) :mnemonic
))
813 (defun coding-system-type (coding-system)
814 "Return the coding type of CODING-SYSTEM.
815 A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
816 See the function `define-coding-system' for more detail."
817 (plist-get (coding-system-plist coding-system
) :coding-type
))
819 (defun coding-system-charset-list (coding-system)
820 "Return list of charsets supported by CODING-SYSTEM.
821 If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
822 If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
823 (plist-get (coding-system-plist coding-system
) :charset-list
))
825 (defun coding-system-category (coding-system)
826 "Return a category symbol of CODING-SYSTEM."
827 (plist-get (coding-system-plist coding-system
) :category
))
829 (defun coding-system-get (coding-system prop
)
830 "Extract a value from CODING-SYSTEM's property list for property PROP.
831 For compatibility with Emacs 20/21, this accepts old-style symbols
832 like `mime-charset' as well as the current style like `:mime-charset'."
833 (or (plist-get (coding-system-plist coding-system
) prop
)
834 (if (not (keywordp prop
))
835 (plist-get (coding-system-plist coding-system
)
836 (intern (concat ":" (symbol-name prop
)))))))
838 (defun coding-system-put (coding-system prop val
)
839 "Change value in CODING-SYSTEM's property list PROP to VAL."
840 (plist-put (coding-system-plist coding-system
) prop val
))
842 (defalias 'coding-system-parent
'coding-system-base
)
843 (make-obsolete 'coding-system-parent
'coding-system-base
"20.3")
845 (defun coding-system-eol-type-mnemonic (coding-system)
846 "Return the string indicating end-of-line format of CODING-SYSTEM."
847 (let* ((eol-type (coding-system-eol-type coding-system
))
848 (val (cond ((eq eol-type
0) eol-mnemonic-unix
)
849 ((eq eol-type
1) eol-mnemonic-dos
)
850 ((eq eol-type
2) eol-mnemonic-mac
)
851 (t eol-mnemonic-undecided
))))
854 (char-to-string val
))))
856 (defun coding-system-lessp (x y
)
857 (cond ((eq x
'no-conversion
) t
)
858 ((eq y
'no-conversion
) nil
)
859 ((eq x
'emacs-mule
) t
)
860 ((eq y
'emacs-mule
) nil
)
861 ((eq x
'undecided
) t
)
862 ((eq y
'undecided
) nil
)
863 (t (let ((c1 (coding-system-mnemonic x
))
864 (c2 (coding-system-mnemonic y
)))
865 (or (< (downcase c1
) (downcase c2
))
866 (and (not (> (downcase c1
) (downcase c2
)))
869 (defun add-to-coding-system-list (coding-system)
870 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted."
871 (if (or (null coding-system-list
)
872 (coding-system-lessp coding-system
(car coding-system-list
)))
873 (setq coding-system-list
(cons coding-system coding-system-list
))
874 (let ((len (length coding-system-list
))
875 mid
(tem coding-system-list
))
877 (setq mid
(nthcdr (/ len
2) tem
))
878 (if (coding-system-lessp (car mid
) coding-system
)
880 len
(- len
(/ len
2)))
881 (setq len
(/ len
2))))
882 (setcdr tem
(cons coding-system
(cdr tem
))))))
884 (defun coding-system-list (&optional base-only
)
885 "Return a list of all existing non-subsidiary coding systems.
886 If optional arg BASE-ONLY is non-nil, only base coding systems are
887 listed. The value doesn't include subsidiary coding systems which are
888 made from bases and aliases automatically for various end-of-line
889 formats (e.g. iso-latin-1-unix, koi8-r-dos)."
890 (let* ((codings (copy-sequence coding-system-list
))
891 (tail (cons nil codings
)))
892 ;; Remove subsidiary coding systems (eol variants) and alias
893 ;; coding systems (if necessary).
895 (let* ((coding (car (cdr tail
)))
896 (aliases (coding-system-aliases coding
)))
898 ;; CODING is an eol variant if not in ALIASES.
899 (not (memq coding aliases
))
900 ;; CODING is an alias if it is not car of ALIASES.
901 (and base-only
(not (eq coding
(car aliases
)))))
902 (setcdr tail
(cdr (cdr tail
)))
903 (setq tail
(cdr tail
)))))
906 (defconst char-coding-system-table nil
907 "This is an obsolete variable.
908 It exists just for backward compatibility, and the value is always nil.")
910 (defun transform-make-coding-system-args (name type
&optional doc-string props
)
911 "For internal use only.
912 Transform XEmacs style args for `make-coding-system' to Emacs style.
913 Value is a list of transformed arguments."
914 (let ((mnemonic (string-to-char (or (plist-get props
'mnemonic
) "?")))
915 (eol-type (plist-get props
'eol-type
))
918 ((eq eol-type
'lf
) (setq eol-type
'unix
))
919 ((eq eol-type
'crlf
) (setq eol-type
'dos
))
920 ((eq eol-type
'cr
) (setq eol-type
'mac
)))
921 (if (setq tmp
(plist-get props
'post-read-conversion
))
922 (setq properties
(plist-put properties
'post-read-conversion tmp
)))
923 (if (setq tmp
(plist-get props
'pre-write-conversion
))
924 (setq properties
(plist-put properties
'pre-write-conversion tmp
)))
926 ((eq type
'shift-jis
)
927 `(,name
1 ,mnemonic
,doc-string
() ,properties
,eol-type
))
928 ((eq type
'iso2022
) ; This is not perfect.
929 (if (plist-get props
'escape-quoted
)
930 (error "escape-quoted is not supported: %S"
931 `(,name
,type
,doc-string
,props
)))
932 (let ((g0 (plist-get props
'charset-g0
))
933 (g1 (plist-get props
'charset-g1
))
934 (g2 (plist-get props
'charset-g2
))
935 (g3 (plist-get props
'charset-g3
))
938 (eq (cadr (assoc 'latin-jisx0201
939 (plist-get props
'input-charset-conversion
)))
941 (eq (cadr (assoc 'ascii
942 (plist-get props
'output-charset-conversion
)))
946 (eq (cadr (assoc 'japanese-jisx0208-1978
947 (plist-get props
'input-charset-conversion
)))
949 (eq (cadr (assoc 'japanese-jisx0208
950 (plist-get props
'output-charset-conversion
)))
951 'japanese-jisx0208-1978
))))
953 (if (plist-get props
'force-g0-on-output
)
957 (if (plist-get props
'force-g1-on-output
)
961 (if (plist-get props
'force-g2-on-output
)
965 (if (plist-get props
'force-g3-on-output
)
968 `(,name
2 ,mnemonic
,doc-string
970 ,(plist-get props
'short
)
971 ,(not (plist-get props
'no-ascii-eol
))
972 ,(not (plist-get props
'no-ascii-cntl
))
973 ,(plist-get props
'seven
)
975 ,(not (plist-get props
'lock-shift
))
978 ,(plist-get props
'no-iso6429
)
980 ,properties
,eol-type
)))
982 `(,name
3 ,mnemonic
,doc-string
() ,properties
,eol-type
))
984 `(,name
4 ,mnemonic
,doc-string
985 (,(plist-get props
'decode
) .
,(plist-get props
'encode
))
986 ,properties
,eol-type
))
988 (error "unsupported XEmacs style make-coding-style arguments: %S"
989 `(,name
,type
,doc-string
,props
))))))
991 (defun make-coding-system (coding-system type mnemonic doc-string
996 "Define a new coding system CODING-SYSTEM (symbol).
997 This function is provided for backward compatibility.
998 Use `define-coding-system' instead."
999 ;; For compatiblity with XEmacs, we check the type of TYPE. If it
1000 ;; is a symbol, perhaps, this function is called with XEmacs-style
1001 ;; arguments. Here, try to transform that kind of arguments to
1004 (let ((args (transform-make-coding-system-args coding-system type
1005 mnemonic doc-string
)))
1006 (setq coding-system
(car args
)
1008 mnemonic
(nth 2 args
)
1009 doc-string
(nth 3 args
)
1011 properties
(nth 5 args
)
1012 eol-type
(nth 6 args
))))
1015 (cond ((eq type
0) 'emacs-mule
)
1016 ((eq type
1) 'shift-jis
)
1017 ((eq type
2) 'iso2022
)
1020 ((eq type
5) 'raw-text
)
1022 (error "Invalid coding system type: %s" type
))))
1025 (let ((plist nil
) key
)
1026 (dolist (elt properties
)
1027 (setq key
(car elt
))
1028 (cond ((eq key
'post-read-conversion
)
1029 (setq key
:post-read-conversion
))
1030 ((eq key
'pre-write-conversion
)
1031 (setq key
:pre-write-conversion
))
1032 ((eq key
'translation-table-for-decode
)
1033 (setq key
:decode-translation-table
))
1034 ((eq key
'translation-table-for-encode
)
1035 (setq key
:encode-translation-table
))
1036 ((eq key
'safe-charsets
)
1037 (setq key
:charset-list
))
1038 ((eq key
'mime-charset
)
1039 (setq key
:mime-charset
))
1040 ((eq key
'valid-codes
)
1041 (setq key
:valids
)))
1042 (setq plist
(plist-put plist key
(cdr elt
))))
1044 (plist-put properties
:mnemonic mnemonic
)
1045 (plist-put properties
:coding-type type
)
1046 (cond ((eq eol-type
0) (setq eol-type
'unix
))
1047 ((eq eol-type
1) (setq eol-type
'dos
))
1048 ((eq eol-type
2) (setq eol-type
'mac
))
1049 ((vectorp eol-type
) (setq eol-type nil
)))
1050 (plist-put properties
:eol-type eol-type
)
1054 (plist-put properties
:flags
1055 (list (and (or (consp (nth 0 flags
))
1056 (consp (nth 1 flags
))
1057 (consp (nth 2 flags
))
1058 (consp (nth 3 flags
))) 'designation
)
1059 (or (nth 4 flags
) 'long-form
)
1060 (and (nth 5 flags
) 'ascii-at-eol
)
1061 (and (nth 6 flags
) 'ascii-at-cntl
)
1062 (and (nth 7 flags
) '7-bit
)
1063 (and (nth 8 flags
) 'locking-shift
)
1064 (and (nth 9 flags
) 'single-shift
)
1065 (and (nth 10 flags
) 'use-roman
)
1066 (and (nth 11 flags
) 'use-oldjis
)
1067 (or (nth 12 flags
) 'direction
)
1068 (and (nth 13 flags
) 'init-at-bol
)
1069 (and (nth 14 flags
) 'designate-at-bol
)
1070 (and (nth 15 flags
) 'safe
)
1071 (and (nth 16 flags
) 'latin-extra
)))
1072 (plist-put properties
:designation
1073 (let ((vec (make-vector 4 nil
)))
1075 (let ((spec (nth i flags
)))
1077 (aset vec i
'(94 96))
1081 (setq spec
(append (delq t spec
) '(94 96))))
1082 (aset vec i spec
))))))
1086 (plist-put properties
:ccl-decoder
(car flags
))
1087 (plist-put properties
:ccl-encoder
(cdr flags
))))
1089 (apply 'define-coding-system coding-system doc-string properties
))
1091 (defun merge-coding-systems (first second
)
1092 "Fill in any unspecified aspects of coding system FIRST from SECOND.
1093 Return the resulting coding system."
1094 (let ((base (coding-system-base second
))
1095 (eol (coding-system-eol-type second
)))
1096 ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1097 (if (eq (coding-system-base first
) 'undecided
)
1098 (setq first
(coding-system-change-text-conversion first base
)))
1099 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1100 (if (and (vectorp (coding-system-eol-type first
))
1101 (numberp eol
) (>= eol
0) (<= eol
2))
1102 (setq first
(coding-system-change-eol-conversion
1106 (defun set-buffer-file-coding-system (coding-system &optional force
)
1107 "Set the file coding-system of the current buffer to CODING-SYSTEM.
1108 This means that when you save the buffer, it will be converted
1109 according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM,
1110 use \\[list-coding-systems].
1112 If CODING-SYSTEM leaves the text conversion unspecified, or if it
1113 leaves the end-of-line conversion unspecified, FORCE controls what to
1114 do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1115 buffer's previous `buffer-file-coding-system' value (if it is
1116 specified there). Otherwise, levae it unspecified.
1118 This marks the buffer modified so that the succeeding \\[save-buffer]
1119 surely saves the buffer with CODING-SYSTEM. From a program, if you
1120 don't want to mark the buffer modified, just set the variable
1121 `buffer-file-coding-system' directly."
1122 (interactive "zCoding system for saving file (default, nil): \nP")
1123 (check-coding-system coding-system
)
1124 (if (and coding-system buffer-file-coding-system
(null force
))
1126 (merge-coding-systems coding-system buffer-file-coding-system
)))
1127 (setq buffer-file-coding-system coding-system
)
1128 ;; This is in case of an explicit call. Normally, `normal-mode' and
1129 ;; `set-buffer-major-mode-hook' take care of setting the table.
1130 (if (fboundp 'ucs-set-table-for-input
) ; don't lose when building
1131 (ucs-set-table-for-input))
1132 (set-buffer-modified-p t
)
1133 (force-mode-line-update))
1135 (defun revert-buffer-with-coding-system (coding-system &optional force
)
1136 "Visit the current buffer's file again using coding system CODING-SYSTEM.
1137 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1139 If CODING-SYSTEM leaves the text conversion unspecified, or if it
1140 leaves the end-of-line conversion unspecified, FORCE controls what to
1141 do. If FORCE is nil, get the unspecified aspect (or aspects) from the
1142 buffer's previous `buffer-file-coding-system' value (if it is
1143 specified there). Otherwise, determine it from the file contents as
1144 usual for visiting a file."
1145 (interactive "zCoding system for visited file (default, nil): \nP")
1146 (check-coding-system coding-system
)
1147 (if (and coding-system buffer-file-coding-system
(null force
))
1149 (merge-coding-systems coding-system buffer-file-coding-system
)))
1150 (let ((coding-system-for-read coding-system
))
1153 (defun set-file-name-coding-system (coding-system)
1154 "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1155 It actually just set the variable `file-name-coding-system' (which
1156 see) to CODING-SYSTEM."
1157 (interactive "zCoding system for file names (default, nil): ")
1158 (check-coding-system coding-system
)
1159 (setq file-name-coding-system coding-system
))
1161 (defvar default-terminal-coding-system nil
1162 "Default value for the terminal coding system.
1163 This is normally set according to the selected language environment.
1164 See also the command `set-terminal-coding-system'.")
1166 (defun set-terminal-coding-system (coding-system)
1167 "Set coding system of your terminal to CODING-SYSTEM.
1168 All text output to the terminal will be encoded
1169 with the specified coding system.
1170 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1171 The default is determined by the selected language environment
1172 or by the previous use of this command."
1174 (list (let ((default (if (and (not (terminal-coding-system))
1175 default-terminal-coding-system
)
1176 default-terminal-coding-system
)))
1178 (format "Coding system for terminal display (default, %s): "
1181 (if (and (not coding-system
)
1182 (not (terminal-coding-system)))
1183 (setq coding-system default-terminal-coding-system
))
1185 (setq default-terminal-coding-system coding-system
))
1186 (set-terminal-coding-system-internal coding-system
)
1187 (redraw-frame (selected-frame)))
1189 (defvar default-keyboard-coding-system nil
1190 "Default value of the keyboard coding system.
1191 This is normally set according to the selected language environment.
1192 See also the command `set-keyboard-coding-system'.")
1194 (defun set-keyboard-coding-system (coding-system)
1195 "Set coding system for keyboard input to CODING-SYSTEM.
1196 In addition, this command enables Encoded-kbd minor mode.
1197 \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see
1198 `encoded-kbd-mode'.)
1199 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1200 The default is determined by the selected language environment
1201 or by the previous use of this command."
1203 (list (let ((default (if (and (not (keyboard-coding-system))
1204 default-keyboard-coding-system
)
1205 default-keyboard-coding-system
)))
1207 (format "Coding system for keyboard input (default, %s): "
1210 (if (and (not coding-system
)
1211 (not (keyboard-coding-system)))
1212 (setq coding-system default-keyboard-coding-system
))
1214 (setq default-keyboard-coding-system coding-system
))
1215 (set-keyboard-coding-system-internal coding-system
)
1216 (setq keyboard-coding-system coding-system
)
1217 (encoded-kbd-mode (if coding-system
1 0)))
1219 (defcustom keyboard-coding-system nil
1220 "Specify coding system for keyboard input.
1221 If you set this on a terminal which can't distinguish Meta keys from
1222 8-bit characters, you will have to use ESC to type Meta characters.
1223 See Info node `Specify Coding' and Info node `Single-Byte Character Support'.
1225 On non-windowing terminals, this is set from the locale by default.
1227 Setting this variable directly does not take effect;
1228 use either M-x customize or \\[set-keyboard-coding-system]."
1229 :type
'(coding-system :tag
"Coding system")
1230 :link
'(info-link "(emacs)Specify Coding")
1231 :link
'(info-link "(emacs)Single-Byte Character Support")
1232 :set
(lambda (symbol value
)
1233 ;; Don't load encoded-kbd-mode unnecessarily.
1234 (if (or value
(boundp 'encoded-kbd-mode
))
1235 (set-keyboard-coding-system value
)
1236 (set-default 'keyboard-coding-system nil
))) ; must initialize
1241 (defun set-buffer-process-coding-system (decoding encoding
)
1242 "Set coding systems for the process associated with the current buffer.
1243 DECODING is the coding system to be used to decode input from the process,
1244 ENCODING is the coding system to be used to encode output to the process.
1246 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
1248 "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
1249 (let ((proc (get-buffer-process (current-buffer))))
1251 (error "No process")
1252 (check-coding-system decoding
)
1253 (check-coding-system encoding
)
1254 (set-process-coding-system proc decoding encoding
)))
1255 (force-mode-line-update))
1257 (defalias 'set-clipboard-coding-system
'set-selection-coding-system
)
1259 (defun set-selection-coding-system (coding-system)
1260 "Make CODING-SYSTEM used for communicating with other X clients.
1261 When sending or receiving text via cut_buffer, selection, and clipboard,
1262 the text is encoded or decoded by CODING-SYSTEM."
1263 (interactive "zCoding system for X selection: ")
1264 (check-coding-system coding-system
)
1265 (setq selection-coding-system coding-system
))
1267 ;; Coding system lastly specified by the command
1268 ;; set-next-selection-coding-system.
1269 (defvar last-next-selection-coding-system nil
)
1271 (defun set-next-selection-coding-system (coding-system)
1272 "Use CODING-SYSTEM for next communication with other window system clients.
1273 This setting is effective for the next communication only."
1275 (list (read-coding-system
1276 (if last-next-selection-coding-system
1277 (format "Coding system for the next selection (default, %S): "
1278 last-next-selection-coding-system
)
1279 "Coding system for the next selection: ")
1280 last-next-selection-coding-system
)))
1282 (setq last-next-selection-coding-system coding-system
)
1283 (setq coding-system last-next-selection-coding-system
))
1284 (check-coding-system coding-system
)
1286 (setq next-selection-coding-system coding-system
))
1288 (defun set-coding-priority (arg)
1289 "Set priority of coding categories according to ARG.
1290 ARG is a list of coding categories ordered by priority.
1292 This function is provided for backward compatibility.
1293 Now we have more convenient function `set-coding-system-priority'."
1294 (apply 'set-coding-system-priority
1295 (mapcar #'(lambda (x) (symbol-value x
)) arg
)))
1296 (make-obsolete 'set-coding-priority
'set-coding-system-priority
"22.1")
1300 (defvar ctext-non-standard-encodings-alist
1301 '(("ISO8859-15" . latin-iso8859-15
)
1302 ("ISO8859-14" . latin-iso8859-14
)
1305 "Alist of non-standard encoding names vs Emacs coding systems.
1306 This alist is used to decode an extened segment of a compound text.")
1308 (defvar ctext-non-standard-encodings-regexp
1309 (string-to-multibyte
1311 ;; For non-standard encodings.
1312 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1314 ;; For UTF-8 encoding.
1315 "\\(\e%G[^\e]*\e%@\\)")))
1317 ;; Functions to support "Non-Standard Character Set Encodings" defined
1318 ;; by the COMPOUND-TEXT spec.
1319 ;; We support that by decoding the whole data by `ctext' which just
1320 ;; pertains byte sequences belonging to ``extended segment'', then
1321 ;; decoding those byte sequences one by one in Lisp.
1322 ;; This function also supports "The UTF-8 encoding" described in the
1323 ;; section 7 of the documentation fo COMPOUND-TEXT distributed with
1326 (defun ctext-post-read-conversion (len)
1327 "Decode LEN characters encoded as Compound Text with Extended Segments."
1328 ;; We don't need the following because it is expected that this
1329 ;; function is mainly used for decoding X selection which is not
1331 ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
1334 (narrow-to-region (point) (+ (point) len
))
1335 (let ((case-fold-search nil
)
1336 last-coding-system-used
1338 (decode-coding-region (point-min) (point-max) 'ctext
)
1339 (while (re-search-forward ctext-non-standard-encodings-regexp
1341 (setq pos
(match-beginning 0))
1342 (if (match-beginning 1)
1343 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1344 (let* ((M (char-after (+ pos
4)))
1345 (L (char-after (+ pos
5)))
1346 (encoding (match-string 2))
1347 (coding (or (cdr (assoc-ignore-case
1349 ctext-non-standard-encodings-alist
))
1351 (intern (downcase encoding
))))))
1352 (if enable-multibyte-characters
1353 (setq M
(multibyte-char-to-unibyte M
)
1354 L
(multibyte-char-to-unibyte L
)))
1355 (setq bytes
(- (+ (* (- M
128) 128) (- L
128))
1356 (- (point) (+ pos
6))))
1358 (delete-region pos
(point))
1359 (forward-char bytes
)
1360 (decode-coding-region (- (point) bytes
) (point) coding
)))
1361 ;; ESC % G --UTF-8-BYTES-- ESC % @
1362 (setq bytes
(- (point) pos
))
1363 (decode-coding-region (- (point) bytes
) (point) 'utf-8
))))
1364 (goto-char (point-min))
1365 (- (point-max) (point)))))
1367 ;; If you add charsets here, be sure to modify the regexp used by
1368 ;; ctext-pre-write-conversion to look up non-standard charsets.
1369 (defvar ctext-non-standard-designations-alist
1370 '(("$(0" .
(big5 "big5-0" 2))
1371 ("$(1" .
(big5 "big5-0" 2))
1372 ;; The following are actually standard; generating extended
1373 ;; segments for them is wrong and screws e.g. Latin-9 users.
1374 ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx
1375 ;; ("-V" . (t "iso8859-10" 1))
1376 ;; ("-Y" . (t "iso8859-13" 1))
1377 ;; ("-_" . (t "iso8859-14" 1))
1378 ;; ("-b" . (t "iso8859-15" 1))
1379 ;; ("-f" . (t "iso8859-16" 1))
1381 "Alist of ctext control sequences that introduce character sets which
1382 are not in the list of approved encodings, and the corresponding
1383 coding system, identifier string, and number of octets per encoded
1386 Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
1387 is the control sequence (sans the leading ESC) that introduces the character
1388 set in the text encoded by compound-text. ENCODING is a coding system
1389 symbol; if it is t, it means that the ctext coding system already encodes
1390 the text correctly, and only the leading control sequence needs to be altered.
1391 If ENCODING is a coding system, we need to re-encode the text with that
1392 coding system. CHARSET is the name of the charset we need to put into
1393 the leading control sequence. NOCTETS is the number of octets (bytes) that
1394 encode each character in this charset. NOCTETS can be 0 (meaning the number
1395 of octets per character is variable), 1, 2, 3, or 4.")
1397 (defun ctext-pre-write-conversion (from to
)
1398 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1400 If FROM is a string, or if the current buffer is not the one set up for us
1401 by encode-coding-string, generate a new temp buffer, insert the
1402 text, and convert it in the temporary buffer. Otherwise, convert in-place."
1404 ;; Setup a working buffer if necessary.
1405 (when (stringp from
)
1406 (set-buffer (generate-new-buffer " *temp"))
1407 (set-buffer-multibyte (multibyte-string-p from
))
1410 ;; Now we can encode the whole buffer.
1411 (let ((case-fold-search nil
)
1412 last-coding-system-used
1413 pos posend desig encode-info encoding chset noctets textlen
)
1414 (goto-char (point-min))
1415 ;; At first encode the whole buffer.
1416 (encode-coding-region (point-min) (point-max) 'ctext-no-compositions
)
1417 ;; Then replace ISO-2022 charset designations with extended
1418 ;; segments, for those charsets that are not part of the
1419 ;; official X registry. The regexp below finds the leading
1420 ;; sequences for big5.
1421 (while (re-search-forward "\e\\(\$([01]\\)" nil
'move
)
1422 (setq pos
(match-beginning 0)
1424 desig
(match-string 1)
1425 encode-info
(cdr (assoc desig
1426 ctext-non-standard-designations-alist
))
1427 encoding
(car encode-info
)
1428 chset
(cadr encode-info
)
1429 noctets
(car (cddr encode-info
)))
1430 (skip-chars-forward "^\e")
1432 ((eq encoding t
) ; only the leading sequence needs to be changed
1433 (setq textlen
(+ (- (point) posend
) (length chset
) 1))
1434 ;; Generate the control sequence for an extended segment.
1435 (replace-match (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1437 (+ (/ textlen
128) 128)
1438 (+ (% textlen
128) 128)
1441 ((coding-system-p encoding
) ; need to recode the entire segment...
1442 (decode-coding-region pos
(point) 'ctext-no-compositions
)
1443 (encode-coding-region pos
(point) encoding
)
1444 (setq textlen
(+ (- (point) pos
) (length chset
) 1))
1447 (insert (string-to-multibyte (format "\e%%/%d%c%c%s\ 2"
1449 (+ (/ textlen
128) 128)
1450 (+ (% textlen
128) 128)
1452 (goto-char (point-min))))
1453 ;; Must return nil, as build_annotations_2 expects that.
1458 (defcustom auto-coding-alist
1459 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte
)
1460 ("\\.tgz\\'" . no-conversion
)
1461 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion
)
1462 ("/#[^/]+#\\'" . emacs-mule
))
1463 "Alist of filename patterns vs corresponding coding systems.
1464 Each element looks like (REGEXP . CODING-SYSTEM).
1465 A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
1467 The settings in this alist take priority over `coding:' tags
1468 in the file (see the function `set-auto-coding')
1469 and the contents of `file-coding-system-alist'."
1472 :type
'(repeat (cons (regexp :tag
"File name regexp")
1473 (symbol :tag
"Coding system"))))
1475 (defcustom auto-coding-regexp-alist
1476 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion
)