1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /*** TABLE OF CONTENTS ***
26 2. Emacs' internal format (emacs-mule) handlers
28 4. Shift-JIS and BIG5 handlers
30 6. End-of-line handlers
31 7. C library functions
32 8. Emacs Lisp library functions
37 /*** 0. General comments ***/
40 /*** GENERAL NOTE on CODING SYSTEM ***
42 Coding system is an encoding mechanism of one or more character
43 sets. Here's a list of coding systems which Emacs can handle. When
44 we say "decode", it means converting some other coding system to
45 Emacs' internal format (emacs-internal), and when we say "encode",
46 it means converting the coding system emacs-mule to some other
49 0. Emacs' internal format (emacs-mule)
51 Emacs itself holds a multi-lingual character in a buffer and a string
52 in a special format. Details are described in section 2.
56 The most famous coding system for multiple character sets. X's
57 Compound Text, various EUCs (Extended Unix Code), and coding
58 systems used in Internet communication such as ISO-2022-JP are
59 all variants of ISO2022. Details are described in section 3.
61 2. SJIS (or Shift-JIS or MS-Kanji-Code)
63 A coding system to encode character sets: ASCII, JISX0201, and
64 JISX0208. Widely used for PC's in Japan. Details are described in
69 A coding system to encode character sets: ASCII and Big5. Widely
70 used by Chinese (mainly in Taiwan and Hong Kong). Details are
71 described in section 4. In this file, when we write "BIG5"
72 (all uppercase), we mean the coding system, and when we write
73 "Big5" (capitalized), we mean the character set.
77 A coding system for a text containing random 8-bit code. Emacs does
78 no code conversion on such a text except for end-of-line format.
82 If a user wants to read/write a text encoded in a coding system not
83 listed above, he can supply a decoder and an encoder for it in CCL
84 (Code Conversion Language) programs. Emacs executes the CCL program
85 while reading/writing.
87 Emacs represents a coding system by a Lisp symbol that has a property
88 `coding-system'. But, before actually using the coding system, the
89 information about it is set in a structure of type `struct
90 coding_system' for rapid processing. See section 6 for more details.
94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
96 How end-of-line of a text is encoded depends on a system. For
97 instance, Unix's format is just one byte of `line-feed' code,
98 whereas DOS's format is two-byte sequence of `carriage-return' and
99 `line-feed' codes. MacOS's format is usually one byte of
102 Since text characters encoding and end-of-line encoding are
103 independent, any coding system described above can take
104 any format of end-of-line. So, Emacs has information of format of
105 end-of-line in each coding-system. See section 6 for more details.
109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
111 These functions check if a text between SRC and SRC_END is encoded
112 in the coding system category XXX. Each returns an integer value in
113 which appropriate flag bits for the category XXX is set. The flag
114 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
115 template of these functions. */
118 detect_coding_emacs_mule (src
, src_end
)
119 unsigned char *src
, *src_end
;
125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
127 These functions decode SRC_BYTES length of unibyte text at SOURCE
128 encoded in CODING to Emacs' internal format. The resulting
129 multibyte text goes to a place pointed to by DESTINATION, the length
130 of which should not exceed DST_BYTES.
132 These functions set the information of original and decoded texts in
133 the members produced, produced_char, consumed, and consumed_char of
134 the structure *CODING. They also set the member result to one of
135 CODING_FINISH_XXX indicating how the decoding finished.
137 DST_BYTES zero means that source area and destination area are
138 overlapped, which means that we can produce a decoded text until it
139 reaches at the head of not-yet-decoded source text.
141 Below is a template of these functions. */
144 decode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
145 struct coding_system
*coding
;
146 unsigned char *source
, *destination
;
147 int src_bytes
, dst_bytes
;
153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
155 These functions encode SRC_BYTES length text at SOURCE of Emacs'
156 internal multibyte format to CODING. The resulting unibyte text
157 goes to a place pointed to by DESTINATION, the length of which
158 should not exceed DST_BYTES.
160 These functions set the information of original and encoded texts in
161 the members produced, produced_char, consumed, and consumed_char of
162 the structure *CODING. They also set the member result to one of
163 CODING_FINISH_XXX indicating how the encoding finished.
165 DST_BYTES zero means that source area and destination area are
166 overlapped, which means that we can produce a encoded text until it
167 reaches at the head of not-yet-encoded source text.
169 Below is a template of these functions. */
172 encode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
173 struct coding_system
*coding
;
174 unsigned char *source
, *destination
;
175 int src_bytes
, dst_bytes
;
181 /*** COMMONLY USED MACROS ***/
183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
184 get one, two, and three bytes from the source text respectively.
185 If there are not enough bytes in the source, they jump to
186 `label_end_of_loop'. The caller should set variables `coding',
187 `src' and `src_end' to appropriate pointer in advance. These
188 macros are called from decoding routines `decode_coding_XXX', thus
189 it is assumed that the source text is unibyte. */
191 #define ONE_MORE_BYTE(c1) \
193 if (src >= src_end) \
195 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
196 goto label_end_of_loop; \
201 #define TWO_MORE_BYTES(c1, c2) \
203 if (src + 1 >= src_end) \
205 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
206 goto label_end_of_loop; \
213 /* Set C to the next character at the source text pointed by `src'.
214 If there are not enough characters in the source, jump to
215 `label_end_of_loop'. The caller should set variables `coding'
216 `src', `src_end', and `translation_table' to appropriate pointers
217 in advance. This macro is used in encoding routines
218 `encode_coding_XXX', thus it assumes that the source text is in
219 multibyte form except for 8-bit characters. 8-bit characters are
220 in multibyte form if coding->src_multibyte is nonzero, else they
221 are represented by a single byte. */
223 #define ONE_MORE_CHAR(c) \
225 int len = src_end - src; \
229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
230 goto label_end_of_loop; \
232 if (coding->src_multibyte \
233 || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
234 c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
236 c = *src, bytes = 1; \
237 if (!NILP (translation_table)) \
238 c = translate_char (translation_table, c, 0, 0, 0); \
243 /* Produce a multibyte form of characater C to `dst'. Jump to
244 `label_end_of_loop' if there's not enough space at `dst'.
246 If we are now in the middle of composition sequence, the decoded
247 character may be ALTCHAR (for the current composition). In that
248 case, the character goes to coding->cmp_data->data instead of
251 This macro is used in decoding routines. */
253 #define EMIT_CHAR(c) \
255 if (! COMPOSING_P (coding) \
256 || coding->composing == COMPOSITION_RELATIVE \
257 || coding->composing == COMPOSITION_WITH_RULE) \
259 int bytes = CHAR_BYTES (c); \
260 if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
262 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
263 goto label_end_of_loop; \
265 dst += CHAR_STRING (c, dst); \
266 coding->produced_char++; \
269 if (COMPOSING_P (coding) \
270 && coding->composing != COMPOSITION_RELATIVE) \
272 CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
273 coding->composition_rule_follows \
274 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
279 #define EMIT_ONE_BYTE(c) \
281 if (dst >= (dst_bytes ? dst_end : src)) \
283 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
284 goto label_end_of_loop; \
289 #define EMIT_TWO_BYTES(c1, c2) \
291 if (dst + 2 > (dst_bytes ? dst_end : src)) \
293 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
294 goto label_end_of_loop; \
296 *dst++ = c1, *dst++ = c2; \
299 #define EMIT_BYTES(from, to) \
301 if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
303 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
304 goto label_end_of_loop; \
311 /*** 1. Preamble ***/
324 #include "composite.h"
329 #else /* not emacs */
333 #endif /* not emacs */
335 Lisp_Object Qcoding_system
, Qeol_type
;
336 Lisp_Object Qbuffer_file_coding_system
;
337 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
338 Lisp_Object Qno_conversion
, Qundecided
;
339 Lisp_Object Qcoding_system_history
;
340 Lisp_Object Qsafe_charsets
;
341 Lisp_Object Qvalid_codes
;
343 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
344 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
345 Lisp_Object Qstart_process
, Qopen_network_stream
;
346 Lisp_Object Qtarget_idx
;
348 Lisp_Object Vselect_safe_coding_system_function
;
350 /* Mnemonic string for each format of end-of-line. */
351 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
352 /* Mnemonic string to indicate format of end-of-line is not yet
354 Lisp_Object eol_mnemonic_undecided
;
356 /* Format of end-of-line decided by system. This is CODING_EOL_LF on
357 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
362 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
364 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
366 /* Coding system emacs-mule and raw-text are for converting only
367 end-of-line format. */
368 Lisp_Object Qemacs_mule
, Qraw_text
;
370 /* Coding-systems are handed between Emacs Lisp programs and C internal
371 routines by the following three variables. */
372 /* Coding-system for reading files and receiving data from process. */
373 Lisp_Object Vcoding_system_for_read
;
374 /* Coding-system for writing files and sending data to process. */
375 Lisp_Object Vcoding_system_for_write
;
376 /* Coding-system actually used in the latest I/O. */
377 Lisp_Object Vlast_coding_system_used
;
379 /* A vector of length 256 which contains information about special
380 Latin codes (especially for dealing with Microsoft codes). */
381 Lisp_Object Vlatin_extra_code_table
;
383 /* Flag to inhibit code conversion of end-of-line format. */
384 int inhibit_eol_conversion
;
386 /* Flag to make buffer-file-coding-system inherit from process-coding. */
387 int inherit_process_coding_system
;
389 /* Coding system to be used to encode text for terminal display. */
390 struct coding_system terminal_coding
;
392 /* Coding system to be used to encode text for terminal display when
393 terminal coding system is nil. */
394 struct coding_system safe_terminal_coding
;
396 /* Coding system of what is sent from terminal keyboard. */
397 struct coding_system keyboard_coding
;
399 /* Default coding system to be used to write a file. */
400 struct coding_system default_buffer_file_coding
;
402 Lisp_Object Vfile_coding_system_alist
;
403 Lisp_Object Vprocess_coding_system_alist
;
404 Lisp_Object Vnetwork_coding_system_alist
;
406 Lisp_Object Vlocale_coding_system
;
410 Lisp_Object Qcoding_category
, Qcoding_category_index
;
412 /* List of symbols `coding-category-xxx' ordered by priority. */
413 Lisp_Object Vcoding_category_list
;
415 /* Table of coding categories (Lisp symbols). */
416 Lisp_Object Vcoding_category_table
;
418 /* Table of names of symbol for each coding-category. */
419 char *coding_category_name
[CODING_CATEGORY_IDX_MAX
] = {
420 "coding-category-emacs-mule",
421 "coding-category-sjis",
422 "coding-category-iso-7",
423 "coding-category-iso-7-tight",
424 "coding-category-iso-8-1",
425 "coding-category-iso-8-2",
426 "coding-category-iso-7-else",
427 "coding-category-iso-8-else",
428 "coding-category-ccl",
429 "coding-category-big5",
430 "coding-category-utf-8",
431 "coding-category-utf-16-be",
432 "coding-category-utf-16-le",
433 "coding-category-raw-text",
434 "coding-category-binary"
437 /* Table of pointers to coding systems corresponding to each coding
439 struct coding_system
*coding_system_table
[CODING_CATEGORY_IDX_MAX
];
441 /* Table of coding category masks. Nth element is a mask for a coding
442 cateogry of which priority is Nth. */
444 int coding_priorities
[CODING_CATEGORY_IDX_MAX
];
446 /* Flag to tell if we look up translation table on character code
448 Lisp_Object Venable_character_translation
;
449 /* Standard translation table to look up on decoding (reading). */
450 Lisp_Object Vstandard_translation_table_for_decode
;
451 /* Standard translation table to look up on encoding (writing). */
452 Lisp_Object Vstandard_translation_table_for_encode
;
454 Lisp_Object Qtranslation_table
;
455 Lisp_Object Qtranslation_table_id
;
456 Lisp_Object Qtranslation_table_for_decode
;
457 Lisp_Object Qtranslation_table_for_encode
;
459 /* Alist of charsets vs revision number. */
460 Lisp_Object Vcharset_revision_alist
;
462 /* Default coding systems used for process I/O. */
463 Lisp_Object Vdefault_process_coding_system
;
465 /* Global flag to tell that we can't call post-read-conversion and
466 pre-write-conversion functions. Usually the value is zero, but it
467 is set to 1 temporarily while such functions are running. This is
468 to avoid infinite recursive call. */
469 static int inhibit_pre_post_conversion
;
472 /*** 2. Emacs internal format (emacs-mule) handlers ***/
474 /* Emacs' internal format for encoding multiple character sets is a
475 kind of multi-byte encoding, i.e. characters are encoded by
476 variable-length sequences of one-byte codes.
478 ASCII characters and control characters (e.g. `tab', `newline') are
479 represented by one-byte sequences which are their ASCII codes, in
480 the range 0x00 through 0x7F.
482 8-bit characters of the range 0x80..0x9F are represented by
483 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
486 8-bit characters of the range 0xA0..0xFF are represented by
487 one-byte sequences which are their 8-bit code.
489 The other characters are represented by a sequence of `base
490 leading-code', optional `extended leading-code', and one or two
491 `position-code's. The length of the sequence is determined by the
492 base leading-code. Leading-code takes the range 0x80 through 0x9F,
493 whereas extended leading-code and position-code take the range 0xA0
494 through 0xFF. See `charset.h' for more details about leading-code
497 --- CODE RANGE of Emacs' internal format ---
501 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
502 eight-bit-graphic 0xA0..0xBF
503 ELSE 0x81..0x9F + [0xA0..0xFF]+
504 ---------------------------------------------
508 enum emacs_code_class_type emacs_code_class
[256];
510 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
511 Check if a text is encoded in Emacs' internal format. If it is,
512 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
515 detect_coding_emacs_mule (src
, src_end
)
516 unsigned char *src
, *src_end
;
520 /* Dummy for ONE_MORE_BYTE. */
521 struct coding_system dummy_coding
;
522 struct coding_system
*coding
= &dummy_coding
;
543 if (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
)
546 else if (c
>= 0x80 && c
< 0xA0)
549 /* Old leading code for a composite character. */
553 unsigned char *src_base
= src
- 1;
556 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base
, src_end
- src_base
,
559 src
= src_base
+ bytes
;
564 return CODING_CATEGORY_MASK_EMACS_MULE
;
568 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
571 decode_coding_emacs_mule (coding
, source
, destination
, src_bytes
, dst_bytes
)
572 struct coding_system
*coding
;
573 unsigned char *source
, *destination
;
574 int src_bytes
, dst_bytes
;
576 unsigned char *src
= source
;
577 unsigned char *src_end
= source
+ src_bytes
;
578 unsigned char *dst
= destination
;
579 unsigned char *dst_end
= destination
+ dst_bytes
;
580 /* SRC_BASE remembers the start position in source in each loop.
581 The loop will be exited when there's not enough source code, or
582 when there's not enough destination area to produce a
584 unsigned char *src_base
;
586 coding
->produced_char
= 0;
587 while (src
< src_end
)
589 unsigned char tmp
[MAX_MULTIBYTE_LENGTH
], *p
;
593 if (UNIBYTE_STR_AS_MULTIBYTE_P (src
, src_end
- src
, bytes
))
600 bytes
= CHAR_STRING (*src
, tmp
);
604 if (dst
+ bytes
>= (dst_bytes
? dst_end
: src
))
606 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
609 while (bytes
--) *dst
++ = *p
++;
610 coding
->produced_char
++;
612 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
613 coding
->produced
= dst
- destination
;
616 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
617 encode_eol (coding, source, destination, src_bytes, dst_bytes)
621 /*** 3. ISO2022 handlers ***/
623 /* The following note describes the coding system ISO2022 briefly.
624 Since the intention of this note is to help understand the
625 functions in this file, some parts are NOT ACCURATE or OVERLY
626 SIMPLIFIED. For thorough understanding, please refer to the
627 original document of ISO2022.
629 ISO2022 provides many mechanisms to encode several character sets
630 in 7-bit and 8-bit environments. For 7-bite environments, all text
631 is encoded using bytes less than 128. This may make the encoded
632 text a little bit longer, but the text passes more easily through
633 several gateways, some of which strip off MSB (Most Signigant Bit).
635 There are two kinds of character sets: control character set and
636 graphic character set. The former contains control characters such
637 as `newline' and `escape' to provide control functions (control
638 functions are also provided by escape sequences). The latter
639 contains graphic characters such as 'A' and '-'. Emacs recognizes
640 two control character sets and many graphic character sets.
642 Graphic character sets are classified into one of the following
643 four classes, according to the number of bytes (DIMENSION) and
644 number of characters in one dimension (CHARS) of the set:
650 In addition, each character set is assigned an identification tag,
651 unique for each set, called "final character" (denoted as <F>
652 hereafter). The <F> of each character set is decided by ECMA(*)
653 when it is registered in ISO. The code range of <F> is 0x30..0x7F
654 (0x30..0x3F are for private use only).
656 Note (*): ECMA = European Computer Manufacturers Association
658 Here are examples of graphic character set [NAME(<F>)]:
659 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
660 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
661 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
662 o DIMENSION2_CHARS96 -- none for the moment
664 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
665 C0 [0x00..0x1F] -- control character plane 0
666 GL [0x20..0x7F] -- graphic character plane 0
667 C1 [0x80..0x9F] -- control character plane 1
668 GR [0xA0..0xFF] -- graphic character plane 1
670 A control character set is directly designated and invoked to C0 or
671 C1 by an escape sequence. The most common case is that:
672 - ISO646's control character set is designated/invoked to C0, and
673 - ISO6429's control character set is designated/invoked to C1,
674 and usually these designations/invocations are omitted in encoded
675 text. In a 7-bit environment, only C0 can be used, and a control
676 character for C1 is encoded by an appropriate escape sequence to
677 fit into the environment. All control characters for C1 are
678 defined to have corresponding escape sequences.
680 A graphic character set is at first designated to one of four
681 graphic registers (G0 through G3), then these graphic registers are
682 invoked to GL or GR. These designations and invocations can be
683 done independently. The most common case is that G0 is invoked to
684 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
685 these invocations and designations are omitted in encoded text.
686 In a 7-bit environment, only GL can be used.
688 When a graphic character set of CHARS94 is invoked to GL, codes
689 0x20 and 0x7F of the GL area work as control characters SPACE and
690 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
693 There are two ways of invocation: locking-shift and single-shift.
694 With locking-shift, the invocation lasts until the next different
695 invocation, whereas with single-shift, the invocation affects the
696 following character only and doesn't affect the locking-shift
697 state. Invocations are done by the following control characters or
700 ----------------------------------------------------------------------
701 abbrev function cntrl escape seq description
702 ----------------------------------------------------------------------
703 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
704 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
705 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
706 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
707 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
708 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
709 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
710 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
711 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
712 ----------------------------------------------------------------------
713 (*) These are not used by any known coding system.
715 Control characters for these functions are defined by macros
716 ISO_CODE_XXX in `coding.h'.
718 Designations are done by the following escape sequences:
719 ----------------------------------------------------------------------
720 escape sequence description
721 ----------------------------------------------------------------------
722 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
723 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
724 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
725 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
726 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
727 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
728 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
729 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
730 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
731 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
732 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
733 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
734 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
735 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
736 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
737 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
738 ----------------------------------------------------------------------
740 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
741 of dimension 1, chars 94, and final character <F>, etc...
743 Note (*): Although these designations are not allowed in ISO2022,
744 Emacs accepts them on decoding, and produces them on encoding
745 CHARS96 character sets in a coding system which is characterized as
746 7-bit environment, non-locking-shift, and non-single-shift.
748 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
749 '(' can be omitted. We refer to this as "short-form" hereafter.
751 Now you may notice that there are a lot of ways for encoding the
752 same multilingual text in ISO2022. Actually, there exist many
753 coding systems such as Compound Text (used in X11's inter client
754 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
755 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
756 localized platforms), and all of these are variants of ISO2022.
758 In addition to the above, Emacs handles two more kinds of escape
759 sequences: ISO6429's direction specification and Emacs' private
760 sequence for specifying character composition.
762 ISO6429's direction specification takes the following form:
763 o CSI ']' -- end of the current direction
764 o CSI '0' ']' -- end of the current direction
765 o CSI '1' ']' -- start of left-to-right text
766 o CSI '2' ']' -- start of right-to-left text
767 The control character CSI (0x9B: control sequence introducer) is
768 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
770 Character composition specification takes the following form:
771 o ESC '0' -- start relative composition
772 o ESC '1' -- end composition
773 o ESC '2' -- start rule-base composition (*)
774 o ESC '3' -- start relative composition with alternate chars (**)
775 o ESC '4' -- start rule-base composition with alternate chars (**)
776 Since these are not standard escape sequences of any ISO standard,
777 the use of them for these meaning is restricted to Emacs only.
779 (*) This form is used only in Emacs 20.5 and the older versions,
780 but the newer versions can safely decode it.
781 (**) This form is used only in Emacs 21.1 and the newer versions,
782 and the older versions can't decode it.
784 Here's a list of examples usages of these composition escape
785 sequences (categorized by `enum composition_method').
787 COMPOSITION_RELATIVE:
788 ESC 0 CHAR [ CHAR ] ESC 1
789 COMPOSITOIN_WITH_RULE:
790 ESC 2 CHAR [ RULE CHAR ] ESC 1
791 COMPOSITION_WITH_ALTCHARS:
792 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
793 COMPOSITION_WITH_RULE_ALTCHARS:
794 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
796 enum iso_code_class_type iso_code_class
[256];
798 #define CHARSET_OK(idx, charset) \
799 (coding_system_table[idx] \
800 && (coding_system_table[idx]->safe_charsets[charset] \
801 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
802 (coding_system_table[idx], charset) \
803 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
805 #define SHIFT_OUT_OK(idx) \
806 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
808 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
809 Check if a text is encoded in ISO2022. If it is, returns an
810 integer in which appropriate flag bits any of:
811 CODING_CATEGORY_MASK_ISO_7
812 CODING_CATEGORY_MASK_ISO_7_TIGHT
813 CODING_CATEGORY_MASK_ISO_8_1
814 CODING_CATEGORY_MASK_ISO_8_2
815 CODING_CATEGORY_MASK_ISO_7_ELSE
816 CODING_CATEGORY_MASK_ISO_8_ELSE
817 are set. If a code which should never appear in ISO2022 is found,
821 detect_coding_iso2022 (src
, src_end
)
822 unsigned char *src
, *src_end
;
824 int mask
= CODING_CATEGORY_MASK_ISO
;
826 int reg
[4], shift_out
= 0, single_shifting
= 0;
827 int c
, c1
, i
, charset
;
828 /* Dummy for ONE_MORE_BYTE. */
829 struct coding_system dummy_coding
;
830 struct coding_system
*coding
= &dummy_coding
;
832 reg
[0] = CHARSET_ASCII
, reg
[1] = reg
[2] = reg
[3] = -1;
833 while (mask
&& src
< src_end
)
841 if (c
>= '(' && c
<= '/')
843 /* Designation sequence for a charset of dimension 1. */
845 if (c1
< ' ' || c1
>= 0x80
846 || (charset
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
847 /* Invalid designation sequence. Just ignore. */
849 reg
[(c
- '(') % 4] = charset
;
853 /* Designation sequence for a charset of dimension 2. */
855 if (c
>= '@' && c
<= 'B')
856 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
857 reg
[0] = charset
= iso_charset_table
[1][0][c
];
858 else if (c
>= '(' && c
<= '/')
861 if (c1
< ' ' || c1
>= 0x80
862 || (charset
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
863 /* Invalid designation sequence. Just ignore. */
865 reg
[(c
- '(') % 4] = charset
;
868 /* Invalid designation sequence. Just ignore. */
871 else if (c
== 'N' || c
== 'O')
873 /* ESC <Fe> for SS2 or SS3. */
874 mask
&= CODING_CATEGORY_MASK_ISO_7_ELSE
;
877 else if (c
>= '0' && c
<= '4')
879 /* ESC <Fp> for start/end composition. */
880 mask_found
|= CODING_CATEGORY_MASK_ISO
;
884 /* Invalid escape sequence. Just ignore. */
887 /* We found a valid designation sequence for CHARSET. */
888 mask
&= ~CODING_CATEGORY_MASK_ISO_8BIT
;
889 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7
, charset
))
890 mask_found
|= CODING_CATEGORY_MASK_ISO_7
;
892 mask
&= ~CODING_CATEGORY_MASK_ISO_7
;
893 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT
, charset
))
894 mask_found
|= CODING_CATEGORY_MASK_ISO_7_TIGHT
;
896 mask
&= ~CODING_CATEGORY_MASK_ISO_7_TIGHT
;
897 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
, charset
))
898 mask_found
|= CODING_CATEGORY_MASK_ISO_7_ELSE
;
900 mask
&= ~CODING_CATEGORY_MASK_ISO_7_ELSE
;
901 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
, charset
))
902 mask_found
|= CODING_CATEGORY_MASK_ISO_8_ELSE
;
904 mask
&= ~CODING_CATEGORY_MASK_ISO_8_ELSE
;
911 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
)
912 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
)))
914 /* Locking shift out. */
915 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
916 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
924 /* Locking shift in. */
925 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
926 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
935 int newmask
= CODING_CATEGORY_MASK_ISO_8_ELSE
;
937 if (c
!= ISO_CODE_CSI
)
939 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
940 & CODING_FLAG_ISO_SINGLE_SHIFT
)
941 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
942 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
943 & CODING_FLAG_ISO_SINGLE_SHIFT
)
944 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
947 if (VECTORP (Vlatin_extra_code_table
)
948 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
950 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
951 & CODING_FLAG_ISO_LATIN_EXTRA
)
952 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
953 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
954 & CODING_FLAG_ISO_LATIN_EXTRA
)
955 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
958 mask_found
|= newmask
;
971 if (VECTORP (Vlatin_extra_code_table
)
972 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
976 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
977 & CODING_FLAG_ISO_LATIN_EXTRA
)
978 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
979 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
980 & CODING_FLAG_ISO_LATIN_EXTRA
)
981 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
983 mask_found
|= newmask
;
990 unsigned char *src_begin
= src
;
992 mask
&= ~(CODING_CATEGORY_MASK_ISO_7BIT
993 | CODING_CATEGORY_MASK_ISO_7_ELSE
);
994 mask_found
|= CODING_CATEGORY_MASK_ISO_8_1
;
995 /* Check the length of succeeding codes of the range
996 0xA0..0FF. If the byte length is odd, we exclude
997 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
998 when we are not single shifting. */
1000 && mask
& CODING_CATEGORY_MASK_ISO_8_2
)
1003 while (src
< src_end
)
1011 if (i
& 1 && src
< src_end
)
1012 mask
&= ~CODING_CATEGORY_MASK_ISO_8_2
;
1014 mask_found
|= CODING_CATEGORY_MASK_ISO_8_2
;
1021 return (mask
& mask_found
);
1024 /* Decode a character of which charset is CHARSET, the 1st position
1025 code is C1, the 2nd position code is C2, and return the decoded
1026 character code. If the variable `translation_table' is non-nil,
1027 returned the translated code. */
1029 #define DECODE_ISO_CHARACTER(charset, c1, c2) \
1030 (NILP (translation_table) \
1031 ? MAKE_CHAR (charset, c1, c2) \
1032 : translate_char (translation_table, -1, charset, c1, c2))
1034 /* Set designation state into CODING. */
1035 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1039 if (final_char < '0' || final_char >= 128) \
1040 goto label_invalid_code; \
1041 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1042 make_number (chars), \
1043 make_number (final_char)); \
1045 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1046 || coding->safe_charsets[charset])) \
1048 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1050 && charset == CHARSET_ASCII) \
1052 /* We should insert this designation sequence as is so \
1053 that it is surely written back to a file. */ \
1054 coding->spec.iso2022.last_invalid_designation_register = -1; \
1055 goto label_invalid_code; \
1057 coding->spec.iso2022.last_invalid_designation_register = -1; \
1058 if ((coding->mode & CODING_MODE_DIRECTION) \
1059 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
1060 charset = CHARSET_REVERSE_CHARSET (charset); \
1061 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1065 coding->spec.iso2022.last_invalid_designation_register = reg; \
1066 goto label_invalid_code; \
1070 /* Allocate a memory block for storing information about compositions.
1071 The block is chained to the already allocated blocks. */
1074 coding_allocate_composition_data (coding
, char_offset
)
1075 struct coding_system
*coding
;
1078 struct composition_data
*cmp_data
1079 = (struct composition_data
*) xmalloc (sizeof *cmp_data
);
1081 cmp_data
->char_offset
= char_offset
;
1083 cmp_data
->prev
= coding
->cmp_data
;
1084 cmp_data
->next
= NULL
;
1085 if (coding
->cmp_data
)
1086 coding
->cmp_data
->next
= cmp_data
;
1087 coding
->cmp_data
= cmp_data
;
1088 coding
->cmp_data_start
= 0;
1091 /* Record the starting position START and METHOD of one composition. */
1093 #define CODING_ADD_COMPOSITION_START(coding, start, method) \
1095 struct composition_data *cmp_data = coding->cmp_data; \
1096 int *data = cmp_data->data + cmp_data->used; \
1097 coding->cmp_data_start = cmp_data->used; \
1099 data[1] = cmp_data->char_offset + start; \
1100 data[3] = (int) method; \
1101 cmp_data->used += 4; \
1104 /* Record the ending position END of the current composition. */
1106 #define CODING_ADD_COMPOSITION_END(coding, end) \
1108 struct composition_data *cmp_data = coding->cmp_data; \
1109 int *data = cmp_data->data + coding->cmp_data_start; \
1110 data[0] = cmp_data->used - coding->cmp_data_start; \
1111 data[2] = cmp_data->char_offset + end; \
1114 /* Record one COMPONENT (alternate character or composition rule). */
1116 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
1117 (coding->cmp_data->data[coding->cmp_data->used++] = component)
1119 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
1121 #define DECODE_COMPOSITION_START(c1) \
1123 if (coding->composing == COMPOSITION_DISABLED) \
1125 *dst++ = ISO_CODE_ESC; \
1126 *dst++ = c1 & 0x7f; \
1127 coding->produced_char += 2; \
1129 else if (!COMPOSING_P (coding)) \
1131 /* This is surely the start of a composition. We must be sure \
1132 that coding->cmp_data has enough space to store the \
1133 information about the composition. If not, terminate the \
1134 current decoding loop, allocate one more memory block for \
1135 coding->cmp_data in the calller, then start the decoding \
1136 loop again. We can't allocate memory here directly because \
1137 it may cause buffer/string relocation. */ \
1138 if (!coding->cmp_data \
1139 || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1140 >= COMPOSITION_DATA_SIZE)) \
1142 coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
1143 goto label_end_of_loop; \
1145 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1146 : c1 == '2' ? COMPOSITION_WITH_RULE \
1147 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1148 : COMPOSITION_WITH_RULE_ALTCHARS); \
1149 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1150 coding->composing); \
1151 coding->composition_rule_follows = 0; \
1155 /* We are already handling a composition. If the method is \
1156 the following two, the codes following the current escape \
1157 sequence are actual characters stored in a buffer. */ \
1158 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1159 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1161 coding->composing = COMPOSITION_RELATIVE; \
1162 coding->composition_rule_follows = 0; \
1167 /* Handle compositoin end sequence ESC 1. */
1169 #define DECODE_COMPOSITION_END(c1) \
1171 if (coding->composing == COMPOSITION_DISABLED) \
1173 *dst++ = ISO_CODE_ESC; \
1175 coding->produced_char += 2; \
1179 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1180 coding->composing = COMPOSITION_NO; \
1184 /* Decode a composition rule from the byte C1 (and maybe one more byte
1185 from SRC) and store one encoded composition rule in
1186 coding->cmp_data. */
1188 #define DECODE_COMPOSITION_RULE(c1) \
1192 if (c1 < 81) /* old format (before ver.21) */ \
1194 int gref = (c1) / 9; \
1195 int nref = (c1) % 9; \
1196 if (gref == 4) gref = 10; \
1197 if (nref == 4) nref = 10; \
1198 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1200 else if (c1 < 93) /* new format (after ver.21) */ \
1202 ONE_MORE_BYTE (c2); \
1203 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1205 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1206 coding->composition_rule_follows = 0; \
1210 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1213 decode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1214 struct coding_system
*coding
;
1215 unsigned char *source
, *destination
;
1216 int src_bytes
, dst_bytes
;
1218 unsigned char *src
= source
;
1219 unsigned char *src_end
= source
+ src_bytes
;
1220 unsigned char *dst
= destination
;
1221 unsigned char *dst_end
= destination
+ dst_bytes
;
1222 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1223 int charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1224 int charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1225 /* SRC_BASE remembers the start position in source in each loop.
1226 The loop will be exited when there's not enough source code
1227 (within macro ONE_MORE_BYTE), or when there's not enough
1228 destination area to produce a character (within macro
1230 unsigned char *src_base
;
1232 Lisp_Object translation_table
;
1234 if (NILP (Venable_character_translation
))
1235 translation_table
= Qnil
;
1238 translation_table
= coding
->translation_table_for_decode
;
1239 if (NILP (translation_table
))
1240 translation_table
= Vstandard_translation_table_for_decode
;
1243 coding
->result
= CODING_FINISH_NORMAL
;
1252 /* We produce no character or one character. */
1253 switch (iso_code_class
[c1
])
1255 case ISO_0x20_or_0x7F
:
1256 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1258 DECODE_COMPOSITION_RULE (c1
);
1261 if (charset0
< 0 || CHARSET_CHARS (charset0
) == 94)
1263 /* This is SPACE or DEL. */
1264 charset
= CHARSET_ASCII
;
1267 /* This is a graphic character, we fall down ... */
1269 case ISO_graphic_plane_0
:
1270 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1272 DECODE_COMPOSITION_RULE (c1
);
1278 case ISO_0xA0_or_0xFF
:
1279 if (charset1
< 0 || CHARSET_CHARS (charset1
) == 94
1280 || coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
1281 goto label_invalid_code
;
1282 /* This is a graphic character, we fall down ... */
1284 case ISO_graphic_plane_1
:
1286 goto label_invalid_code
;
1291 if (COMPOSING_P (coding
))
1292 DECODE_COMPOSITION_END ('1');
1294 /* All ISO2022 control characters in this class have the
1295 same representation in Emacs internal format. */
1297 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1298 && (coding
->eol_type
== CODING_EOL_CR
1299 || coding
->eol_type
== CODING_EOL_CRLF
))
1301 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
1302 goto label_end_of_loop
;
1304 charset
= CHARSET_ASCII
;
1308 if (COMPOSING_P (coding
))
1309 DECODE_COMPOSITION_END ('1');
1310 goto label_invalid_code
;
1312 case ISO_carriage_return
:
1313 if (COMPOSING_P (coding
))
1314 DECODE_COMPOSITION_END ('1');
1316 if (coding
->eol_type
== CODING_EOL_CR
)
1318 else if (coding
->eol_type
== CODING_EOL_CRLF
)
1321 if (c1
!= ISO_CODE_LF
)
1323 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1325 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
1326 goto label_end_of_loop
;
1332 charset
= CHARSET_ASCII
;
1336 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1337 || CODING_SPEC_ISO_DESIGNATION (coding
, 1) < 0)
1338 goto label_invalid_code
;
1339 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 1;
1340 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1344 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
1345 goto label_invalid_code
;
1346 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
1347 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1350 case ISO_single_shift_2_7
:
1351 case ISO_single_shift_2
:
1352 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1353 goto label_invalid_code
;
1354 /* SS2 is handled as an escape sequence of ESC 'N' */
1356 goto label_escape_sequence
;
1358 case ISO_single_shift_3
:
1359 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1360 goto label_invalid_code
;
1361 /* SS2 is handled as an escape sequence of ESC 'O' */
1363 goto label_escape_sequence
;
1365 case ISO_control_sequence_introducer
:
1366 /* CSI is handled as an escape sequence of ESC '[' ... */
1368 goto label_escape_sequence
;
1372 label_escape_sequence
:
1373 /* Escape sequences handled by Emacs are invocation,
1374 designation, direction specification, and character
1375 composition specification. */
1378 case '&': /* revision of following character set */
1380 if (!(c1
>= '@' && c1
<= '~'))
1381 goto label_invalid_code
;
1383 if (c1
!= ISO_CODE_ESC
)
1384 goto label_invalid_code
;
1386 goto label_escape_sequence
;
1388 case '$': /* designation of 2-byte character set */
1389 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1390 goto label_invalid_code
;
1392 if (c1
>= '@' && c1
<= 'B')
1393 { /* designation of JISX0208.1978, GB2312.1980,
1395 DECODE_DESIGNATION (0, 2, 94, c1
);
1397 else if (c1
>= 0x28 && c1
<= 0x2B)
1398 { /* designation of DIMENSION2_CHARS94 character set */
1400 DECODE_DESIGNATION (c1
- 0x28, 2, 94, c2
);
1402 else if (c1
>= 0x2C && c1
<= 0x2F)
1403 { /* designation of DIMENSION2_CHARS96 character set */
1405 DECODE_DESIGNATION (c1
- 0x2C, 2, 96, c2
);
1408 goto label_invalid_code
;
1409 /* We must update these variables now. */
1410 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1411 charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1414 case 'n': /* invocation of locking-shift-2 */
1415 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1416 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1417 goto label_invalid_code
;
1418 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 2;
1419 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1422 case 'o': /* invocation of locking-shift-3 */
1423 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1424 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1425 goto label_invalid_code
;
1426 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 3;
1427 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1430 case 'N': /* invocation of single-shift-2 */
1431 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1432 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1433 goto label_invalid_code
;
1434 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 2);
1438 case 'O': /* invocation of single-shift-3 */
1439 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1440 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1441 goto label_invalid_code
;
1442 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 3);
1446 case '0': case '2': case '3': case '4': /* start composition */
1447 DECODE_COMPOSITION_START (c1
);
1450 case '1': /* end composition */
1451 DECODE_COMPOSITION_END (c1
);
1454 case '[': /* specification of direction */
1455 if (coding
->flags
& CODING_FLAG_ISO_NO_DIRECTION
)
1456 goto label_invalid_code
;
1457 /* For the moment, nested direction is not supported.
1458 So, `coding->mode & CODING_MODE_DIRECTION' zero means
1459 left-to-right, and nozero means right-to-left. */
1463 case ']': /* end of the current direction */
1464 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1466 case '0': /* end of the current direction */
1467 case '1': /* start of left-to-right direction */
1470 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1472 goto label_invalid_code
;
1475 case '2': /* start of right-to-left direction */
1478 coding
->mode
|= CODING_MODE_DIRECTION
;
1480 goto label_invalid_code
;
1484 goto label_invalid_code
;
1489 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1490 goto label_invalid_code
;
1491 if (c1
>= 0x28 && c1
<= 0x2B)
1492 { /* designation of DIMENSION1_CHARS94 character set */
1494 DECODE_DESIGNATION (c1
- 0x28, 1, 94, c2
);
1496 else if (c1
>= 0x2C && c1
<= 0x2F)
1497 { /* designation of DIMENSION1_CHARS96 character set */
1499 DECODE_DESIGNATION (c1
- 0x2C, 1, 96, c2
);
1502 goto label_invalid_code
;
1503 /* We must update these variables now. */
1504 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1505 charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1510 /* Now we know CHARSET and 1st position code C1 of a character.
1511 Produce a multibyte sequence for that character while getting
1512 2nd position code C2 if necessary. */
1513 if (CHARSET_DIMENSION (charset
) == 2)
1516 if (c1
< 0x80 ? c2
< 0x20 || c2
>= 0x80 : c2
< 0xA0)
1517 /* C2 is not in a valid range. */
1518 goto label_invalid_code
;
1520 c
= DECODE_ISO_CHARACTER (charset
, c1
, c2
);
1526 if (COMPOSING_P (coding
))
1527 DECODE_COMPOSITION_END ('1');
1534 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
1535 coding
->produced
= dst
- destination
;
1540 /* ISO2022 encoding stuff. */
1543 It is not enough to say just "ISO2022" on encoding, we have to
1544 specify more details. In Emacs, each coding system of ISO2022
1545 variant has the following specifications:
1546 1. Initial designation to G0 thru G3.
1547 2. Allows short-form designation?
1548 3. ASCII should be designated to G0 before control characters?
1549 4. ASCII should be designated to G0 at end of line?
1550 5. 7-bit environment or 8-bit environment?
1551 6. Use locking-shift?
1552 7. Use Single-shift?
1553 And the following two are only for Japanese:
1554 8. Use ASCII in place of JIS0201-1976-Roman?
1555 9. Use JISX0208-1983 in place of JISX0208-1978?
1556 These specifications are encoded in `coding->flags' as flag bits
1557 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
1561 /* Produce codes (escape sequence) for designating CHARSET to graphic
1562 register REG at DST, and increment DST. If <final-char> of CHARSET is
1563 '@', 'A', or 'B' and the coding system CODING allows, produce
1564 designation sequence of short-form. */
1566 #define ENCODE_DESIGNATION(charset, reg, coding) \
1568 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1569 char *intermediate_char_94 = "()*+"; \
1570 char *intermediate_char_96 = ",-./"; \
1571 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1573 if (revision < 255) \
1575 *dst++ = ISO_CODE_ESC; \
1577 *dst++ = '@' + revision; \
1579 *dst++ = ISO_CODE_ESC; \
1580 if (CHARSET_DIMENSION (charset) == 1) \
1582 if (CHARSET_CHARS (charset) == 94) \
1583 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1585 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1590 if (CHARSET_CHARS (charset) == 94) \
1592 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1594 || final_char < '@' || final_char > 'B') \
1595 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1598 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1600 *dst++ = final_char; \
1601 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1604 /* The following two macros produce codes (control character or escape
1605 sequence) for ISO2022 single-shift functions (single-shift-2 and
1608 #define ENCODE_SINGLE_SHIFT_2 \
1610 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1611 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1613 *dst++ = ISO_CODE_SS2; \
1614 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1617 #define ENCODE_SINGLE_SHIFT_3 \
1619 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1620 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1622 *dst++ = ISO_CODE_SS3; \
1623 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1626 /* The following four macros produce codes (control character or
1627 escape sequence) for ISO2022 locking-shift functions (shift-in,
1628 shift-out, locking-shift-2, and locking-shift-3). */
1630 #define ENCODE_SHIFT_IN \
1632 *dst++ = ISO_CODE_SI; \
1633 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1636 #define ENCODE_SHIFT_OUT \
1638 *dst++ = ISO_CODE_SO; \
1639 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1642 #define ENCODE_LOCKING_SHIFT_2 \
1644 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1645 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1648 #define ENCODE_LOCKING_SHIFT_3 \
1650 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1651 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1654 /* Produce codes for a DIMENSION1 character whose character set is
1655 CHARSET and whose position-code is C1. Designation and invocation
1656 sequences are also produced in advance if necessary. */
1658 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1660 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1662 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1663 *dst++ = c1 & 0x7F; \
1665 *dst++ = c1 | 0x80; \
1666 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1669 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1671 *dst++ = c1 & 0x7F; \
1674 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1676 *dst++ = c1 | 0x80; \
1679 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1680 && !coding->safe_charsets[charset]) \
1682 /* We should not encode this character, instead produce one or \
1684 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1685 if (CHARSET_WIDTH (charset) == 2) \
1686 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1690 /* Since CHARSET is not yet invoked to any graphic planes, we \
1691 must invoke it, or, at first, designate it to some graphic \
1692 register. Then repeat the loop to actually produce the \
1694 dst = encode_invocation_designation (charset, coding, dst); \
1697 /* Produce codes for a DIMENSION2 character whose character set is
1698 CHARSET and whose position-codes are C1 and C2. Designation and
1699 invocation codes are also produced in advance if necessary. */
1701 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1703 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1705 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1706 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1708 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1709 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1712 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1714 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1717 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1719 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1722 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1723 && !coding->safe_charsets[charset]) \
1725 /* We should not encode this character, instead produce one or \
1727 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1728 if (CHARSET_WIDTH (charset) == 2) \
1729 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1733 /* Since CHARSET is not yet invoked to any graphic planes, we \
1734 must invoke it, or, at first, designate it to some graphic \
1735 register. Then repeat the loop to actually produce the \
1737 dst = encode_invocation_designation (charset, coding, dst); \
1740 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1742 int alt_charset = charset; \
1744 if (CHARSET_DEFINED_P (charset)) \
1746 if (CHARSET_DIMENSION (charset) == 1) \
1748 if (charset == CHARSET_ASCII \
1749 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1750 alt_charset = charset_latin_jisx0201; \
1751 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
1755 if (charset == charset_jisx0208 \
1756 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1757 alt_charset = charset_jisx0208_1978; \
1758 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
1769 /* Produce designation and invocation codes at a place pointed by DST
1770 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1774 encode_invocation_designation (charset
, coding
, dst
)
1776 struct coding_system
*coding
;
1779 int reg
; /* graphic register number */
1781 /* At first, check designations. */
1782 for (reg
= 0; reg
< 4; reg
++)
1783 if (charset
== CODING_SPEC_ISO_DESIGNATION (coding
, reg
))
1788 /* CHARSET is not yet designated to any graphic registers. */
1789 /* At first check the requested designation. */
1790 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1791 if (reg
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
)
1792 /* Since CHARSET requests no special designation, designate it
1793 to graphic register 0. */
1796 ENCODE_DESIGNATION (charset
, reg
, coding
);
1799 if (CODING_SPEC_ISO_INVOCATION (coding
, 0) != reg
1800 && CODING_SPEC_ISO_INVOCATION (coding
, 1) != reg
)
1802 /* Since the graphic register REG is not invoked to any graphic
1803 planes, invoke it to graphic plane 0. */
1806 case 0: /* graphic register 0 */
1810 case 1: /* graphic register 1 */
1814 case 2: /* graphic register 2 */
1815 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1816 ENCODE_SINGLE_SHIFT_2
;
1818 ENCODE_LOCKING_SHIFT_2
;
1821 case 3: /* graphic register 3 */
1822 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1823 ENCODE_SINGLE_SHIFT_3
;
1825 ENCODE_LOCKING_SHIFT_3
;
1833 /* Produce 2-byte codes for encoded composition rule RULE. */
1835 #define ENCODE_COMPOSITION_RULE(rule) \
1838 COMPOSITION_DECODE_RULE (rule, gref, nref); \
1839 *dst++ = 32 + 81 + gref; \
1840 *dst++ = 32 + nref; \
1843 /* Produce codes for indicating the start of a composition sequence
1844 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
1845 which specify information about the composition. See the comment
1846 in coding.h for the format of DATA. */
1848 #define ENCODE_COMPOSITION_START(coding, data) \
1850 coding->composing = data[3]; \
1851 *dst++ = ISO_CODE_ESC; \
1852 if (coding->composing == COMPOSITION_RELATIVE) \
1856 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
1858 coding->cmp_data_index = coding->cmp_data_start + 4; \
1859 coding->composition_rule_follows = 0; \
1863 /* Produce codes for indicating the end of the current composition. */
1865 #define ENCODE_COMPOSITION_END(coding, data) \
1867 *dst++ = ISO_CODE_ESC; \
1869 coding->cmp_data_start += data[0]; \
1870 coding->composing = COMPOSITION_NO; \
1871 if (coding->cmp_data_start == coding->cmp_data->used \
1872 && coding->cmp_data->next) \
1874 coding->cmp_data = coding->cmp_data->next; \
1875 coding->cmp_data_start = 0; \
1879 /* Produce composition start sequence ESC 0. Here, this sequence
1880 doesn't mean the start of a new composition but means that we have
1881 just produced components (alternate chars and composition rules) of
1882 the composition and the actual text follows in SRC. */
1884 #define ENCODE_COMPOSITION_FAKE_START(coding) \
1886 *dst++ = ISO_CODE_ESC; \
1888 coding->composing = COMPOSITION_RELATIVE; \
1891 /* The following three macros produce codes for indicating direction
1893 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1895 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1896 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1898 *dst++ = ISO_CODE_CSI; \
1901 #define ENCODE_DIRECTION_R2L \
1902 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1904 #define ENCODE_DIRECTION_L2R \
1905 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1907 /* Produce codes for designation and invocation to reset the graphic
1908 planes and registers to initial state. */
1909 #define ENCODE_RESET_PLANE_AND_REGISTER \
1912 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1914 for (reg = 0; reg < 4; reg++) \
1915 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1916 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1917 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1918 ENCODE_DESIGNATION \
1919 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1922 /* Produce designation sequences of charsets in the line started from
1923 SRC to a place pointed by DST, and return updated DST.
1925 If the current block ends before any end-of-line, we may fail to
1926 find all the necessary designations. */
1928 static unsigned char *
1929 encode_designation_at_bol (coding
, translation_table
, src
, src_end
, dst
)
1930 struct coding_system
*coding
;
1931 Lisp_Object translation_table
;
1932 unsigned char *src
, *src_end
, *dst
;
1934 int charset
, c
, found
= 0, reg
;
1935 /* Table of charsets to be designated to each graphic register. */
1938 for (reg
= 0; reg
< 4; reg
++)
1947 charset
= CHAR_CHARSET (c
);
1948 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1949 if (reg
!= CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
&& r
[reg
] < 0)
1959 for (reg
= 0; reg
< 4; reg
++)
1961 && CODING_SPEC_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
1962 ENCODE_DESIGNATION (r
[reg
], reg
, coding
);
1968 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1971 encode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1972 struct coding_system
*coding
;
1973 unsigned char *source
, *destination
;
1974 int src_bytes
, dst_bytes
;
1976 unsigned char *src
= source
;
1977 unsigned char *src_end
= source
+ src_bytes
;
1978 unsigned char *dst
= destination
;
1979 unsigned char *dst_end
= destination
+ dst_bytes
;
1980 /* Since the maximum bytes produced by each loop is 20, we subtract 19
1981 from DST_END to assure overflow checking is necessary only at the
1983 unsigned char *adjusted_dst_end
= dst_end
- 19;
1984 /* SRC_BASE remembers the start position in source in each loop.
1985 The loop will be exited when there's not enough source text to
1986 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
1987 there's not enough destination area to produce encoded codes
1988 (within macro EMIT_BYTES). */
1989 unsigned char *src_base
;
1991 Lisp_Object translation_table
;
1993 if (NILP (Venable_character_translation
))
1994 translation_table
= Qnil
;
1997 translation_table
= coding
->translation_table_for_encode
;
1998 if (NILP (translation_table
))
1999 translation_table
= Vstandard_translation_table_for_encode
;
2002 coding
->consumed_char
= 0;
2006 int charset
, c1
, c2
;
2010 if (dst
>= (dst_bytes
? adjusted_dst_end
: (src
- 19)))
2012 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
2016 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
2017 && CODING_SPEC_ISO_BOL (coding
))
2019 /* We have to produce designation sequences if any now. */
2020 dst
= encode_designation_at_bol (coding
, translation_table
,
2022 CODING_SPEC_ISO_BOL (coding
) = 0;
2025 /* Check composition start and end. */
2026 if (coding
->composing
!= COMPOSITION_DISABLED
2027 && coding
->cmp_data_start
< coding
->cmp_data
->used
)
2029 struct composition_data
*cmp_data
= coding
->cmp_data
;
2030 int *data
= cmp_data
->data
+ coding
->cmp_data_start
;
2031 int this_pos
= cmp_data
->char_offset
+ coding
->consumed_char
;
2033 if (coding
->composing
== COMPOSITION_RELATIVE
)
2035 if (this_pos
== data
[2])
2037 ENCODE_COMPOSITION_END (coding
, data
);
2038 cmp_data
= coding
->cmp_data
;
2039 data
= cmp_data
->data
+ coding
->cmp_data_start
;
2042 else if (COMPOSING_P (coding
))
2044 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2045 if (coding
->cmp_data_index
== coding
->cmp_data_start
+ data
[0])
2046 /* We have consumed components of the composition.
2047 What follows in SRC is the compositions's base
2049 ENCODE_COMPOSITION_FAKE_START (coding
);
2052 int c
= cmp_data
->data
[coding
->cmp_data_index
++];
2053 if (coding
->composition_rule_follows
)
2055 ENCODE_COMPOSITION_RULE (c
);
2056 coding
->composition_rule_follows
= 0;
2060 SPLIT_CHAR (c
, charset
, c1
, c2
);
2061 ENCODE_ISO_CHARACTER (charset
, c1
, c2
);
2062 if (coding
->composing
== COMPOSITION_WITH_RULE_ALTCHARS
)
2063 coding
->composition_rule_follows
= 1;
2068 if (!COMPOSING_P (coding
))
2070 if (this_pos
== data
[1])
2072 ENCODE_COMPOSITION_START (coding
, data
);
2080 /* Now encode the character C. */
2081 if (c
< 0x20 || c
== 0x7F)
2085 if (! (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
2087 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2088 ENCODE_RESET_PLANE_AND_REGISTER
;
2092 /* fall down to treat '\r' as '\n' ... */
2097 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_EOL
)
2098 ENCODE_RESET_PLANE_AND_REGISTER
;
2099 if (coding
->flags
& CODING_FLAG_ISO_INIT_AT_BOL
)
2100 bcopy (coding
->spec
.iso2022
.initial_designation
,
2101 coding
->spec
.iso2022
.current_designation
,
2102 sizeof coding
->spec
.iso2022
.initial_designation
);
2103 if (coding
->eol_type
== CODING_EOL_LF
2104 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
2105 *dst
++ = ISO_CODE_LF
;
2106 else if (coding
->eol_type
== CODING_EOL_CRLF
)
2107 *dst
++ = ISO_CODE_CR
, *dst
++ = ISO_CODE_LF
;
2109 *dst
++ = ISO_CODE_CR
;
2110 CODING_SPEC_ISO_BOL (coding
) = 1;
2114 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2115 ENCODE_RESET_PLANE_AND_REGISTER
;
2119 else if (ASCII_BYTE_P (c
))
2120 ENCODE_ISO_CHARACTER (CHARSET_ASCII
, c
, /* dummy */ c1
);
2121 else if (SINGLE_BYTE_CHAR_P (c
))
2128 SPLIT_CHAR (c
, charset
, c1
, c2
);
2129 ENCODE_ISO_CHARACTER (charset
, c1
, c2
);
2132 coding
->consumed_char
++;
2136 coding
->consumed
= src_base
- source
;
2137 coding
->produced
= coding
->produced_char
= dst
- destination
;
2141 /*** 4. SJIS and BIG5 handlers ***/
2143 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2144 quite widely. So, for the moment, Emacs supports them in the bare
2145 C code. But, in the future, they may be supported only by CCL. */
2147 /* SJIS is a coding system encoding three character sets: ASCII, right
2148 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2149 as is. A character of charset katakana-jisx0201 is encoded by
2150 "position-code + 0x80". A character of charset japanese-jisx0208
2151 is encoded in 2-byte but two position-codes are divided and shifted
2152 so that it fit in the range below.
2154 --- CODE RANGE of SJIS ---
2155 (character set) (range)
2157 KATAKANA-JISX0201 0xA0 .. 0xDF
2158 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
2159 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
2160 -------------------------------
2164 /* BIG5 is a coding system encoding two character sets: ASCII and
2165 Big5. An ASCII character is encoded as is. Big5 is a two-byte
2166 character set and is encoded in two-byte.
2168 --- CODE RANGE of BIG5 ---
2169 (character set) (range)
2171 Big5 (1st byte) 0xA1 .. 0xFE
2172 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2173 --------------------------
2175 Since the number of characters in Big5 is larger than maximum
2176 characters in Emacs' charset (96x96), it can't be handled as one
2177 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2178 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2179 contains frequently used characters and the latter contains less
2180 frequently used characters. */
2182 /* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2183 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2184 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2185 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2187 /* Number of Big5 characters which have the same code in 1st byte. */
2188 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2190 #define DECODE_BIG5(b1, b2, charset, c1, c2) \
2193 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2195 charset = charset_big5_1; \
2198 charset = charset_big5_2; \
2199 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2201 c1 = temp / (0xFF - 0xA1) + 0x21; \
2202 c2 = temp % (0xFF - 0xA1) + 0x21; \
2205 #define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2207 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2208 if (charset == charset_big5_2) \
2209 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2210 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2211 b2 = temp % BIG5_SAME_ROW; \
2212 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2215 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2216 Check if a text is encoded in SJIS. If it is, return
2217 CODING_CATEGORY_MASK_SJIS, else return 0. */
2220 detect_coding_sjis (src
, src_end
)
2221 unsigned char *src
, *src_end
;
2224 /* Dummy for ONE_MORE_BYTE. */
2225 struct coding_system dummy_coding
;
2226 struct coding_system
*coding
= &dummy_coding
;
2231 if ((c
>= 0x80 && c
< 0xA0) || c
>= 0xE0)
2239 return CODING_CATEGORY_MASK_SJIS
;
2242 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2243 Check if a text is encoded in BIG5. If it is, return
2244 CODING_CATEGORY_MASK_BIG5, else return 0. */
2247 detect_coding_big5 (src
, src_end
)
2248 unsigned char *src
, *src_end
;
2251 /* Dummy for ONE_MORE_BYTE. */
2252 struct coding_system dummy_coding
;
2253 struct coding_system
*coding
= &dummy_coding
;
2261 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
2266 return CODING_CATEGORY_MASK_BIG5
;
2269 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2270 Check if a text is encoded in UTF-8. If it is, return
2271 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2273 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2274 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2275 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2276 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2277 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2278 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2279 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2282 detect_coding_utf_8 (src
, src_end
)
2283 unsigned char *src
, *src_end
;
2286 int seq_maybe_bytes
;
2287 /* Dummy for ONE_MORE_BYTE. */
2288 struct coding_system dummy_coding
;
2289 struct coding_system
*coding
= &dummy_coding
;
2294 if (UTF_8_1_OCTET_P (c
))
2296 else if (UTF_8_2_OCTET_LEADING_P (c
))
2297 seq_maybe_bytes
= 1;
2298 else if (UTF_8_3_OCTET_LEADING_P (c
))
2299 seq_maybe_bytes
= 2;
2300 else if (UTF_8_4_OCTET_LEADING_P (c
))
2301 seq_maybe_bytes
= 3;
2302 else if (UTF_8_5_OCTET_LEADING_P (c
))
2303 seq_maybe_bytes
= 4;
2304 else if (UTF_8_6_OCTET_LEADING_P (c
))
2305 seq_maybe_bytes
= 5;
2312 if (!UTF_8_EXTRA_OCTET_P (c
))
2316 while (seq_maybe_bytes
> 0);
2320 return CODING_CATEGORY_MASK_UTF_8
;
2323 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2324 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2325 Little Endian (otherwise). If it is, return
2326 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2329 #define UTF_16_INVALID_P(val) \
2330 (((val) == 0xFFFE) \
2331 || ((val) == 0xFFFF))
2333 #define UTF_16_HIGH_SURROGATE_P(val) \
2334 (((val) & 0xD800) == 0xD800)
2336 #define UTF_16_LOW_SURROGATE_P(val) \
2337 (((val) & 0xDC00) == 0xDC00)
2340 detect_coding_utf_16 (src
, src_end
)
2341 unsigned char *src
, *src_end
;
2343 unsigned char c1
, c2
;
2344 /* Dummy for TWO_MORE_BYTES. */
2345 struct coding_system dummy_coding
;
2346 struct coding_system
*coding
= &dummy_coding
;
2348 TWO_MORE_BYTES (c1
, c2
);
2350 if ((c1
== 0xFF) && (c2
== 0xFE))
2351 return CODING_CATEGORY_MASK_UTF_16_LE
;
2352 else if ((c1
== 0xFE) && (c2
== 0xFF))
2353 return CODING_CATEGORY_MASK_UTF_16_BE
;
2359 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2360 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2363 decode_coding_sjis_big5 (coding
, source
, destination
,
2364 src_bytes
, dst_bytes
, sjis_p
)
2365 struct coding_system
*coding
;
2366 unsigned char *source
, *destination
;
2367 int src_bytes
, dst_bytes
;
2370 unsigned char *src
= source
;
2371 unsigned char *src_end
= source
+ src_bytes
;
2372 unsigned char *dst
= destination
;
2373 unsigned char *dst_end
= destination
+ dst_bytes
;
2374 /* SRC_BASE remembers the start position in source in each loop.
2375 The loop will be exited when there's not enough source code
2376 (within macro ONE_MORE_BYTE), or when there's not enough
2377 destination area to produce a character (within macro
2379 unsigned char *src_base
;
2380 Lisp_Object translation_table
;
2382 if (NILP (Venable_character_translation
))
2383 translation_table
= Qnil
;
2386 translation_table
= coding
->translation_table_for_decode
;
2387 if (NILP (translation_table
))
2388 translation_table
= Vstandard_translation_table_for_decode
;
2391 coding
->produced_char
= 0;
2394 int c
, charset
, c1
, c2
;
2401 charset
= CHARSET_ASCII
;
2406 if (coding
->eol_type
== CODING_EOL_CRLF
)
2411 else if (coding
->mode
2412 & CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2414 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2415 goto label_end_of_loop
;
2418 /* To process C2 again, SRC is subtracted by 1. */
2421 else if (coding
->eol_type
== CODING_EOL_CR
)
2425 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2426 && (coding
->eol_type
== CODING_EOL_CR
2427 || coding
->eol_type
== CODING_EOL_CRLF
))
2429 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2430 goto label_end_of_loop
;
2439 goto label_invalid_code
;
2440 if (c1
< 0xA0 || c1
>= 0xE0)
2442 /* SJIS -> JISX0208 */
2444 if (c2
< 0x40 || c2
== 0x7F || c2
> 0xFC)
2445 goto label_invalid_code
;
2446 DECODE_SJIS (c1
, c2
, c1
, c2
);
2447 charset
= charset_jisx0208
;
2450 /* SJIS -> JISX0201-Kana */
2451 charset
= charset_katakana_jisx0201
;
2456 if (c1
< 0xA1 || c1
> 0xFE)
2457 goto label_invalid_code
;
2459 if (c2
< 0x40 || (c2
> 0x7E && c2
< 0xA1) || c2
> 0xFE)
2460 goto label_invalid_code
;
2461 DECODE_BIG5 (c1
, c2
, charset
, c1
, c2
);
2465 c
= DECODE_ISO_CHARACTER (charset
, c1
, c2
);
2477 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
2478 coding
->produced
= dst
- destination
;
2482 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2483 This function can encode charsets `ascii', `katakana-jisx0201',
2484 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
2485 are sure that all these charsets are registered as official charset
2486 (i.e. do not have extended leading-codes). Characters of other
2487 charsets are produced without any encoding. If SJIS_P is 1, encode
2488 SJIS text, else encode BIG5 text. */
2491 encode_coding_sjis_big5 (coding
, source
, destination
,
2492 src_bytes
, dst_bytes
, sjis_p
)
2493 struct coding_system
*coding
;
2494 unsigned char *source
, *destination
;
2495 int src_bytes
, dst_bytes
;
2498 unsigned char *src
= source
;
2499 unsigned char *src_end
= source
+ src_bytes
;
2500 unsigned char *dst
= destination
;
2501 unsigned char *dst_end
= destination
+ dst_bytes
;
2502 /* SRC_BASE remembers the start position in source in each loop.
2503 The loop will be exited when there's not enough source text to
2504 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2505 there's not enough destination area to produce encoded codes
2506 (within macro EMIT_BYTES). */
2507 unsigned char *src_base
;
2508 Lisp_Object translation_table
;
2510 if (NILP (Venable_character_translation
))
2511 translation_table
= Qnil
;
2514 translation_table
= coding
->translation_table_for_decode
;
2515 if (NILP (translation_table
))
2516 translation_table
= Vstandard_translation_table_for_decode
;
2521 int c
, charset
, c1
, c2
;
2526 /* Now encode the character C. */
2527 if (SINGLE_BYTE_CHAR_P (c
))
2532 if (!coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
)
2539 if (coding
->eol_type
== CODING_EOL_CRLF
)
2541 EMIT_TWO_BYTES ('\r', c
);
2544 else if (coding
->eol_type
== CODING_EOL_CR
)
2552 SPLIT_CHAR (c
, charset
, c1
, c2
);
2555 if (charset
== charset_jisx0208
2556 || charset
== charset_jisx0208_1978
)
2558 ENCODE_SJIS (c1
, c2
, c1
, c2
);
2559 EMIT_TWO_BYTES (c1
, c2
);
2561 else if (charset
== charset_latin_jisx0201
)
2564 /* There's no way other than producing the internal
2566 EMIT_BYTES (src_base
, src
);
2570 if (charset
== charset_big5_1
|| charset
== charset_big5_2
)
2572 ENCODE_BIG5 (charset
, c1
, c2
, c1
, c2
);
2573 EMIT_TWO_BYTES (c1
, c2
);
2576 /* There's no way other than producing the internal
2578 EMIT_BYTES (src_base
, src
);
2581 coding
->consumed_char
++;
2585 coding
->consumed
= src_base
- source
;
2586 coding
->produced
= coding
->produced_char
= dst
- destination
;
2590 /*** 5. CCL handlers ***/
2592 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2593 Check if a text is encoded in a coding system of which
2594 encoder/decoder are written in CCL program. If it is, return
2595 CODING_CATEGORY_MASK_CCL, else return 0. */
2598 detect_coding_ccl (src
, src_end
)
2599 unsigned char *src
, *src_end
;
2601 unsigned char *valid
;
2603 /* Dummy for ONE_MORE_BYTE. */
2604 struct coding_system dummy_coding
;
2605 struct coding_system
*coding
= &dummy_coding
;
2607 /* No coding system is assigned to coding-category-ccl. */
2608 if (!coding_system_table
[CODING_CATEGORY_IDX_CCL
])
2611 valid
= coding_system_table
[CODING_CATEGORY_IDX_CCL
]->spec
.ccl
.valid_codes
;
2619 return CODING_CATEGORY_MASK_CCL
;
2623 /*** 6. End-of-line handlers ***/
2625 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2628 decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2629 struct coding_system
*coding
;
2630 unsigned char *source
, *destination
;
2631 int src_bytes
, dst_bytes
;
2633 unsigned char *src
= source
;
2634 unsigned char *dst
= destination
;
2635 unsigned char *src_end
= src
+ src_bytes
;
2636 unsigned char *dst_end
= dst
+ dst_bytes
;
2637 Lisp_Object translation_table
;
2638 /* SRC_BASE remembers the start position in source in each loop.
2639 The loop will be exited when there's not enough source code
2640 (within macro ONE_MORE_BYTE), or when there's not enough
2641 destination area to produce a character (within macro
2643 unsigned char *src_base
;
2646 translation_table
= Qnil
;
2647 switch (coding
->eol_type
)
2649 case CODING_EOL_CRLF
:
2659 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2661 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2662 goto label_end_of_loop
;
2669 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
))
2671 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2672 goto label_end_of_loop
;
2685 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2687 coding
->result
= CODING_FINISH_INCONSISTENT_EOL
;
2688 goto label_end_of_loop
;
2697 default: /* no need for EOL handling */
2707 coding
->consumed
= coding
->consumed_char
= src_base
- source
;
2708 coding
->produced
= dst
- destination
;
2712 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
2713 format of end-of-line according to `coding->eol_type'. It also
2714 convert multibyte form 8-bit characers to unibyte if
2715 CODING->src_multibyte is nonzero. If `coding->mode &
2716 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2717 also means end-of-line. */
2720 encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2721 struct coding_system
*coding
;
2722 unsigned char *source
, *destination
;
2723 int src_bytes
, dst_bytes
;
2725 unsigned char *src
= source
;
2726 unsigned char *dst
= destination
;
2727 unsigned char *src_end
= src
+ src_bytes
;
2728 unsigned char *dst_end
= dst
+ dst_bytes
;
2729 Lisp_Object translation_table
;
2730 /* SRC_BASE remembers the start position in source in each loop.
2731 The loop will be exited when there's not enough source text to
2732 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2733 there's not enough destination area to produce encoded codes
2734 (within macro EMIT_BYTES). */
2735 unsigned char *src_base
;
2737 int selective_display
= coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
;
2739 translation_table
= Qnil
;
2740 if (coding
->src_multibyte
2741 && *(src_end
- 1) == LEADING_CODE_8_BIT_CONTROL
)
2745 coding
->result
= CODING_FINISH_INSUFFICIENT_SRC
;
2748 if (coding
->eol_type
== CODING_EOL_CRLF
)
2750 while (src
< src_end
)
2756 else if (c
== '\n' || (c
== '\r' && selective_display
))
2757 EMIT_TWO_BYTES ('\r', '\n');
2767 if (src_bytes
<= dst_bytes
)
2769 safe_bcopy (src
, dst
, src_bytes
);
2775 if (coding
->src_multibyte
2776 && *(src
+ dst_bytes
- 1) == LEADING_CODE_8_BIT_CONTROL
)
2778 safe_bcopy (src
, dst
, dst_bytes
);
2779 src_base
= src
+ dst_bytes
;
2780 dst
= destination
+ dst_bytes
;
2781 coding
->result
= CODING_FINISH_INSUFFICIENT_DST
;
2783 if (coding
->eol_type
== CODING_EOL_CR
)
2785 for (src
= destination
; src
< dst
; src
++)
2786 if (*src
== '\n') *src
= '\r';
2788 else if (selective_display
)
2790 for (src
= destination
; src
< dst
; src
++)
2791 if (*src
== '\r') *src
= '\n';
2794 if (coding
->src_multibyte
)
2795 dst
= destination
+ str_as_unibyte (destination
, dst
- destination
);
2797 coding
->consumed
= src_base
- source
;
2798 coding
->produced
= dst
- destination
;
2802 /*** 7. C library functions ***/
2804 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2805 has a property `coding-system'. The value of this property is a
2806 vector of length 5 (called as coding-vector). Among elements of
2807 this vector, the first (element[0]) and the fifth (element[4])
2808 carry important information for decoding/encoding. Before
2809 decoding/encoding, this information should be set in fields of a
2810 structure of type `coding_system'.
2812 A value of property `coding-system' can be a symbol of another
2813 subsidiary coding-system. In that case, Emacs gets coding-vector
2816 `element[0]' contains information to be set in `coding->type'. The
2817 value and its meaning is as follows:
2819 0 -- coding_type_emacs_mule
2820 1 -- coding_type_sjis
2821 2 -- coding_type_iso2022
2822 3 -- coding_type_big5
2823 4 -- coding_type_ccl encoder/decoder written in CCL
2824 nil -- coding_type_no_conversion
2825 t -- coding_type_undecided (automatic conversion on decoding,
2826 no-conversion on encoding)
2828 `element[4]' contains information to be set in `coding->flags' and
2829 `coding->spec'. The meaning varies by `coding->type'.
2831 If `coding->type' is `coding_type_iso2022', element[4] is a vector
2832 of length 32 (of which the first 13 sub-elements are used now).
2833 Meanings of these sub-elements are:
2835 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2836 If the value is an integer of valid charset, the charset is
2837 assumed to be designated to graphic register N initially.
2839 If the value is minus, it is a minus value of charset which
2840 reserves graphic register N, which means that the charset is
2841 not designated initially but should be designated to graphic
2842 register N just before encoding a character in that charset.
2844 If the value is nil, graphic register N is never used on
2847 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2848 Each value takes t or nil. See the section ISO2022 of
2849 `coding.h' for more information.
2851 If `coding->type' is `coding_type_big5', element[4] is t to denote
2852 BIG5-ETen or nil to denote BIG5-HKU.
2854 If `coding->type' takes the other value, element[4] is ignored.
2856 Emacs Lisp's coding system also carries information about format of
2857 end-of-line in a value of property `eol-type'. If the value is
2858 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2859 means CODING_EOL_CR. If it is not integer, it should be a vector
2860 of subsidiary coding systems of which property `eol-type' has one
2865 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2866 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
2867 is setup so that no conversion is necessary and return -1, else
2871 setup_coding_system (coding_system
, coding
)
2872 Lisp_Object coding_system
;
2873 struct coding_system
*coding
;
2875 Lisp_Object coding_spec
, coding_type
, eol_type
, plist
;
2879 /* Initialize some fields required for all kinds of coding systems. */
2880 coding
->symbol
= coding_system
;
2881 coding
->common_flags
= 0;
2883 coding
->heading_ascii
= -1;
2884 coding
->post_read_conversion
= coding
->pre_write_conversion
= Qnil
;
2885 coding
->composing
= COMPOSITION_DISABLED
;
2886 coding
->cmp_data
= NULL
;
2888 if (NILP (coding_system
))
2889 goto label_invalid_coding_system
;
2891 coding_spec
= Fget (coding_system
, Qcoding_system
);
2893 if (!VECTORP (coding_spec
)
2894 || XVECTOR (coding_spec
)->size
!= 5
2895 || !CONSP (XVECTOR (coding_spec
)->contents
[3]))
2896 goto label_invalid_coding_system
;
2898 eol_type
= inhibit_eol_conversion
? Qnil
: Fget (coding_system
, Qeol_type
);
2899 if (VECTORP (eol_type
))
2901 coding
->eol_type
= CODING_EOL_UNDECIDED
;
2902 coding
->common_flags
= CODING_REQUIRE_DETECTION_MASK
;
2904 else if (XFASTINT (eol_type
) == 1)
2906 coding
->eol_type
= CODING_EOL_CRLF
;
2907 coding
->common_flags
2908 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
2910 else if (XFASTINT (eol_type
) == 2)
2912 coding
->eol_type
= CODING_EOL_CR
;
2913 coding
->common_flags
2914 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
2917 coding
->eol_type
= CODING_EOL_LF
;
2919 coding_type
= XVECTOR (coding_spec
)->contents
[0];
2920 /* Try short cut. */
2921 if (SYMBOLP (coding_type
))
2923 if (EQ (coding_type
, Qt
))
2925 coding
->type
= coding_type_undecided
;
2926 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
2929 coding
->type
= coding_type_no_conversion
;
2933 /* Get values of coding system properties:
2934 `post-read-conversion', `pre-write-conversion',
2935 `translation-table-for-decode', `translation-table-for-encode'. */
2936 plist
= XVECTOR (coding_spec
)->contents
[3];
2937 /* Pre & post conversion functions should be disabled if
2938 inhibit_eol_conversion is nozero. This is the case that a code
2939 conversion function is called while those functions are running. */
2940 if (! inhibit_pre_post_conversion
)
2942 coding
->post_read_conversion
= Fplist_get (plist
, Qpost_read_conversion
);
2943 coding
->pre_write_conversion
= Fplist_get (plist
, Qpre_write_conversion
);
2945 val
= Fplist_get (plist
, Qtranslation_table_for_decode
);
2947 val
= Fget (val
, Qtranslation_table_for_decode
);
2948 coding
->translation_table_for_decode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
2949 val
= Fplist_get (plist
, Qtranslation_table_for_encode
);
2951 val
= Fget (val
, Qtranslation_table_for_encode
);
2952 coding
->translation_table_for_encode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
2953 val
= Fplist_get (plist
, Qcoding_category
);
2956 val
= Fget (val
, Qcoding_category_index
);
2958 coding
->category_idx
= XINT (val
);
2960 goto label_invalid_coding_system
;
2963 goto label_invalid_coding_system
;
2965 val
= Fplist_get (plist
, Qsafe_charsets
);
2968 for (i
= 0; i
<= MAX_CHARSET
; i
++)
2969 coding
->safe_charsets
[i
] = 1;
2973 bzero (coding
->safe_charsets
, MAX_CHARSET
+ 1);
2976 if ((i
= get_charset_id (XCAR (val
))) >= 0)
2977 coding
->safe_charsets
[i
] = 1;
2982 /* If the coding system has non-nil `composition' property, enable
2983 composition handling. */
2984 val
= Fplist_get (plist
, Qcomposition
);
2986 coding
->composing
= COMPOSITION_NO
;
2988 switch (XFASTINT (coding_type
))
2991 coding
->type
= coding_type_emacs_mule
;
2992 if (!NILP (coding
->post_read_conversion
))
2993 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
2994 if (!NILP (coding
->pre_write_conversion
))
2995 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
2999 coding
->type
= coding_type_sjis
;
3000 coding
->common_flags
3001 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3005 coding
->type
= coding_type_iso2022
;
3006 coding
->common_flags
3007 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3009 Lisp_Object val
, temp
;
3011 int i
, charset
, reg_bits
= 0;
3013 val
= XVECTOR (coding_spec
)->contents
[4];
3015 if (!VECTORP (val
) || XVECTOR (val
)->size
!= 32)
3016 goto label_invalid_coding_system
;
3018 flags
= XVECTOR (val
)->contents
;
3020 = ((NILP (flags
[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM
)
3021 | (NILP (flags
[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL
)
3022 | (NILP (flags
[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL
)
3023 | (NILP (flags
[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS
)
3024 | (NILP (flags
[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT
)
3025 | (NILP (flags
[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT
)
3026 | (NILP (flags
[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN
)
3027 | (NILP (flags
[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS
)
3028 | (NILP (flags
[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION
)
3029 | (NILP (flags
[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL
)
3030 | (NILP (flags
[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
3031 | (NILP (flags
[15]) ? 0 : CODING_FLAG_ISO_SAFE
)
3032 | (NILP (flags
[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA
)
3035 /* Invoke graphic register 0 to plane 0. */
3036 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
3037 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3038 CODING_SPEC_ISO_INVOCATION (coding
, 1)
3039 = (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
? -1 : 1);
3040 /* Not single shifting at first. */
3041 CODING_SPEC_ISO_SINGLE_SHIFTING (coding
) = 0;
3042 /* Beginning of buffer should also be regarded as bol. */
3043 CODING_SPEC_ISO_BOL (coding
) = 1;
3045 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3046 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = 255;
3047 val
= Vcharset_revision_alist
;
3050 charset
= get_charset_id (Fcar_safe (XCAR (val
)));
3052 && (temp
= Fcdr_safe (XCAR (val
)), INTEGERP (temp
))
3053 && (i
= XINT (temp
), (i
>= 0 && (i
+ '@') < 128)))
3054 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = i
;
3058 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3059 FLAGS[REG] can be one of below:
3060 integer CHARSET: CHARSET occupies register I,
3061 t: designate nothing to REG initially, but can be used
3063 list of integer, nil, or t: designate the first
3064 element (if integer) to REG initially, the remaining
3065 elements (if integer) is designated to REG on request,
3066 if an element is t, REG can be used by any charsets,
3067 nil: REG is never used. */
3068 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3069 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3070 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
;
3071 for (i
= 0; i
< 4; i
++)
3073 if (INTEGERP (flags
[i
])
3074 && (charset
= XINT (flags
[i
]), CHARSET_VALID_P (charset
))
3075 || (charset
= get_charset_id (flags
[i
])) >= 0)
3077 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3078 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) = i
;
3080 else if (EQ (flags
[i
], Qt
))
3082 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3084 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3086 else if (CONSP (flags
[i
]))
3091 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3092 if (INTEGERP (XCAR (tail
))
3093 && (charset
= XINT (XCAR (tail
)),
3094 CHARSET_VALID_P (charset
))
3095 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3097 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3098 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) =i
;
3101 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3103 while (CONSP (tail
))
3105 if (INTEGERP (XCAR (tail
))
3106 && (charset
= XINT (XCAR (tail
)),
3107 CHARSET_VALID_P (charset
))
3108 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3109 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3111 else if (EQ (XCAR (tail
), Qt
))
3117 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3119 CODING_SPEC_ISO_DESIGNATION (coding
, i
)
3120 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
);
3123 if (reg_bits
&& ! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
3125 /* REG 1 can be used only by locking shift in 7-bit env. */
3126 if (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
3128 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
3129 /* Without any shifting, only REG 0 and 1 can be used. */
3134 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3136 if (CHARSET_VALID_P (charset
))
3138 /* There exist some default graphic registers to be
3141 /* We had better avoid designating a charset of
3142 CHARS96 to REG 0 as far as possible. */
3143 if (CHARSET_CHARS (charset
) == 96)
3144 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3146 ? 1 : (reg_bits
& 4 ? 2 : (reg_bits
& 8 ? 3 : 0)));
3148 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3150 ? 0 : (reg_bits
& 2 ? 1 : (reg_bits
& 4 ? 2 : 3)));
3154 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3155 coding
->spec
.iso2022
.last_invalid_designation_register
= -1;
3159 coding
->type
= coding_type_big5
;
3160 coding
->common_flags
3161 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3163 = (NILP (XVECTOR (coding_spec
)->contents
[4])
3164 ? CODING_FLAG_BIG5_HKU
3165 : CODING_FLAG_BIG5_ETEN
);
3169 coding
->type
= coding_type_ccl
;
3170 coding
->common_flags
3171 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3173 val
= XVECTOR (coding_spec
)->contents
[4];
3175 || setup_ccl_program (&(coding
->spec
.ccl
.decoder
),
3177 || setup_ccl_program (&(coding
->spec
.ccl
.encoder
),
3179 goto label_invalid_coding_system
;
3181 bzero (coding
->spec
.ccl
.valid_codes
, 256);
3182 val
= Fplist_get (plist
, Qvalid_codes
);
3187 for (; CONSP (val
); val
= XCDR (val
))
3191 && XINT (this) >= 0 && XINT (this) < 256)
3192 coding
->spec
.ccl
.valid_codes
[XINT (this)] = 1;
3193 else if (CONSP (this)
3194 && INTEGERP (XCAR (this))
3195 && INTEGERP (XCDR (this)))
3197 int start
= XINT (XCAR (this));
3198 int end
= XINT (XCDR (this));
3200 if (start
>= 0 && start
<= end
&& end
< 256)
3201 while (start
<= end
)
3202 coding
->spec
.ccl
.valid_codes
[start
++] = 1;
3207 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3211 coding
->type
= coding_type_raw_text
;
3215 goto label_invalid_coding_system
;
3219 label_invalid_coding_system
:
3220 coding
->type
= coding_type_no_conversion
;
3221 coding
->category_idx
= CODING_CATEGORY_IDX_BINARY
;
3222 coding
->common_flags
= 0;
3223 coding
->eol_type
= CODING_EOL_LF
;
3224 coding
->pre_write_conversion
= coding
->post_read_conversion
= Qnil
;
3228 /* Free memory blocks allocated for storing composition information. */
3231 coding_free_composition_data (coding
)
3232 struct coding_system
*coding
;
3234 struct composition_data
*cmp_data
= coding
->cmp_data
, *next
;
3238 /* Memory blocks are chained. At first, rewind to the first, then,
3239 free blocks one by one. */
3240 while (cmp_data
->prev
)
3241 cmp_data
= cmp_data
->prev
;
3244 next
= cmp_data
->next
;
3248 coding
->cmp_data
= NULL
;
3251 /* Set `char_offset' member of all memory blocks pointed by
3252 coding->cmp_data to POS. */
3255 coding_adjust_composition_offset (coding
, pos
)
3256 struct coding_system
*coding
;
3259 struct composition_data
*cmp_data
;
3261 for (cmp_data
= coding
->cmp_data
; cmp_data
; cmp_data
= cmp_data
->next
)
3262 cmp_data
->char_offset
= pos
;
3265 /* Setup raw-text or one of its subsidiaries in the structure
3266 coding_system CODING according to the already setup value eol_type
3267 in CODING. CODING should be setup for some coding system in
3271 setup_raw_text_coding_system (coding
)
3272 struct coding_system
*coding
;
3274 if (coding
->type
!= coding_type_raw_text
)
3276 coding
->symbol
= Qraw_text
;
3277 coding
->type
= coding_type_raw_text
;
3278 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3280 Lisp_Object subsidiaries
;
3281 subsidiaries
= Fget (Qraw_text
, Qeol_type
);
3283 if (VECTORP (subsidiaries
)
3284 && XVECTOR (subsidiaries
)->size
== 3)
3286 = XVECTOR (subsidiaries
)->contents
[coding
->eol_type
];
3288 setup_coding_system (coding
->symbol
, coding
);
3293 /* Emacs has a mechanism to automatically detect a coding system if it
3294 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3295 it's impossible to distinguish some coding systems accurately
3296 because they use the same range of codes. So, at first, coding
3297 systems are categorized into 7, those are:
3299 o coding-category-emacs-mule
3301 The category for a coding system which has the same code range
3302 as Emacs' internal format. Assigned the coding-system (Lisp
3303 symbol) `emacs-mule' by default.
3305 o coding-category-sjis
3307 The category for a coding system which has the same code range
3308 as SJIS. Assigned the coding-system (Lisp
3309 symbol) `japanese-shift-jis' by default.
3311 o coding-category-iso-7
3313 The category for a coding system which has the same code range
3314 as ISO2022 of 7-bit environment. This doesn't use any locking
3315 shift and single shift functions. This can encode/decode all
3316 charsets. Assigned the coding-system (Lisp symbol)
3317 `iso-2022-7bit' by default.
3319 o coding-category-iso-7-tight
3321 Same as coding-category-iso-7 except that this can
3322 encode/decode only the specified charsets.
3324 o coding-category-iso-8-1
3326 The category for a coding system which has the same code range
3327 as ISO2022 of 8-bit environment and graphic plane 1 used only
3328 for DIMENSION1 charset. This doesn't use any locking shift
3329 and single shift functions. Assigned the coding-system (Lisp
3330 symbol) `iso-latin-1' by default.
3332 o coding-category-iso-8-2
3334 The category for a coding system which has the same code range
3335 as ISO2022 of 8-bit environment and graphic plane 1 used only
3336 for DIMENSION2 charset. This doesn't use any locking shift
3337 and single shift functions. Assigned the coding-system (Lisp
3338 symbol) `japanese-iso-8bit' by default.
3340 o coding-category-iso-7-else
3342 The category for a coding system which has the same code range
3343 as ISO2022 of 7-bit environemnt but uses locking shift or
3344 single shift functions. Assigned the coding-system (Lisp
3345 symbol) `iso-2022-7bit-lock' by default.
3347 o coding-category-iso-8-else
3349 The category for a coding system which has the same code range
3350 as ISO2022 of 8-bit environemnt but uses locking shift or
3351 single shift functions. Assigned the coding-system (Lisp
3352 symbol) `iso-2022-8bit-ss2' by default.
3354 o coding-category-big5
3356 The category for a coding system which has the same code range
3357 as BIG5. Assigned the coding-system (Lisp symbol)
3358 `cn-big5' by default.
3360 o coding-category-utf-8
3362 The category for a coding system which has the same code range
3363 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
3364 symbol) `utf-8' by default.
3366 o coding-category-utf-16-be
3368 The category for a coding system in which a text has an
3369 Unicode signature (cf. Unicode Standard) in the order of BIG
3370 endian at the head. Assigned the coding-system (Lisp symbol)
3371 `utf-16-be' by default.
3373 o coding-category-utf-16-le
3375 The category for a coding system in which a text has an
3376 Unicode signature (cf. Unicode Standard) in the order of
3377 LITTLE endian at the head. Assigned the coding-system (Lisp
3378 symbol) `utf-16-le' by default.
3380 o coding-category-ccl
3382 The category for a coding system of which encoder/decoder is
3383 written in CCL programs. The default value is nil, i.e., no
3384 coding system is assigned.
3386 o coding-category-binary
3388 The category for a coding system not categorized in any of the
3389 above. Assigned the coding-system (Lisp symbol)
3390 `no-conversion' by default.
3392 Each of them is a Lisp symbol and the value is an actual
3393 `coding-system's (this is also a Lisp symbol) assigned by a user.
3394 What Emacs does actually is to detect a category of coding system.
3395 Then, it uses a `coding-system' assigned to it. If Emacs can't
3396 decide only one possible category, it selects a category of the
3397 highest priority. Priorities of categories are also specified by a
3398 user in a Lisp variable `coding-category-list'.
3403 int ascii_skip_code
[256];
3405 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3406 If it detects possible coding systems, return an integer in which
3407 appropriate flag bits are set. Flag bits are defined by macros
3408 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3409 it should point the table `coding_priorities'. In that case, only
3410 the flag bit for a coding system of the highest priority is set in
3413 How many ASCII characters are at the head is returned as *SKIP. */
3416 detect_coding_mask (source
, src_bytes
, priorities
, skip
)
3417 unsigned char *source
;
3418 int src_bytes
, *priorities
, *skip
;
3420 register unsigned char c
;
3421 unsigned char *src
= source
, *src_end
= source
+ src_bytes
;
3422 unsigned int mask
, utf16_examined_p
, iso2022_examined_p
;
3425 /* At first, skip all ASCII characters and control characters except
3426 for three ISO2022 specific control characters. */
3427 ascii_skip_code
[ISO_CODE_SO
] = 0;
3428 ascii_skip_code
[ISO_CODE_SI
] = 0;
3429 ascii_skip_code
[ISO_CODE_ESC
] = 0;
3431 label_loop_detect_coding
:
3432 while (src
< src_end
&& ascii_skip_code
[*src
]) src
++;
3433 *skip
= src
- source
;
3436 /* We found nothing other than ASCII. There's nothing to do. */
3440 /* The text seems to be encoded in some multilingual coding system.
3441 Now, try to find in which coding system the text is encoded. */
3444 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3445 /* C is an ISO2022 specific control code of C0. */
3446 mask
= detect_coding_iso2022 (src
, src_end
);
3449 /* No valid ISO2022 code follows C. Try again. */
3451 if (c
== ISO_CODE_ESC
)
3452 ascii_skip_code
[ISO_CODE_ESC
] = 1;
3454 ascii_skip_code
[ISO_CODE_SO
] = ascii_skip_code
[ISO_CODE_SI
] = 1;
3455 goto label_loop_detect_coding
;
3459 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3461 if (mask
& priorities
[i
])
3462 return priorities
[i
];
3464 return CODING_CATEGORY_MASK_RAW_TEXT
;
3473 /* C is the first byte of SJIS character code,
3474 or a leading-code of Emacs' internal format (emacs-mule),
3475 or the first byte of UTF-16. */
3476 try = (CODING_CATEGORY_MASK_SJIS
3477 | CODING_CATEGORY_MASK_EMACS_MULE
3478 | CODING_CATEGORY_MASK_UTF_16_BE
3479 | CODING_CATEGORY_MASK_UTF_16_LE
);
3481 /* Or, if C is a special latin extra code,
3482 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3483 or is an ISO2022 control-sequence-introducer (CSI),
3484 we should also consider the possibility of ISO2022 codings. */
3485 if ((VECTORP (Vlatin_extra_code_table
)
3486 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
3487 || (c
== ISO_CODE_SS2
|| c
== ISO_CODE_SS3
)
3488 || (c
== ISO_CODE_CSI
3491 || ((*src
== '0' || *src
== '1' || *src
== '2')
3492 && src
+ 1 < src_end
3493 && src
[1] == ']')))))
3494 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3495 | CODING_CATEGORY_MASK_ISO_8BIT
);
3498 /* C is a character of ISO2022 in graphic plane right,
3499 or a SJIS's 1-byte character code (i.e. JISX0201),
3500 or the first byte of BIG5's 2-byte code,
3501 or the first byte of UTF-8/16. */
3502 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3503 | CODING_CATEGORY_MASK_ISO_8BIT
3504 | CODING_CATEGORY_MASK_SJIS
3505 | CODING_CATEGORY_MASK_BIG5
3506 | CODING_CATEGORY_MASK_UTF_8
3507 | CODING_CATEGORY_MASK_UTF_16_BE
3508 | CODING_CATEGORY_MASK_UTF_16_LE
);
3510 /* Or, we may have to consider the possibility of CCL. */
3511 if (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3512 && (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3513 ->spec
.ccl
.valid_codes
)[c
])
3514 try |= CODING_CATEGORY_MASK_CCL
;
3517 utf16_examined_p
= iso2022_examined_p
= 0;
3520 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3522 if (!iso2022_examined_p
3523 && (priorities
[i
] & try & CODING_CATEGORY_MASK_ISO
))
3525 mask
|= detect_coding_iso2022 (src
, src_end
);
3526 iso2022_examined_p
= 1;
3528 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_SJIS
)
3529 mask
|= detect_coding_sjis (src
, src_end
);
3530 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_UTF_8
)
3531 mask
|= detect_coding_utf_8 (src
, src_end
);
3532 else if (!utf16_examined_p
3533 && (priorities
[i
] & try &
3534 CODING_CATEGORY_MASK_UTF_16_BE_LE
))
3536 mask
|= detect_coding_utf_16 (src
, src_end
);
3537 utf16_examined_p
= 1;
3539 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_BIG5
)
3540 mask
|= detect_coding_big5 (src
, src_end
);
3541 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_EMACS_MULE
)
3542 mask
|= detect_coding_emacs_mule (src
, src_end
);
3543 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_CCL
)
3544 mask
|= detect_coding_ccl (src
, src_end
);
3545 else if (priorities
[i
] & CODING_CATEGORY_MASK_RAW_TEXT
)
3546 mask
|= CODING_CATEGORY_MASK_RAW_TEXT
;
3547 else if (priorities
[i
] & CODING_CATEGORY_MASK_BINARY
)
3548 mask
|= CODING_CATEGORY_MASK_BINARY
;
3549 if (mask
& priorities
[i
])
3550 return priorities
[i
];
3552 return CODING_CATEGORY_MASK_RAW_TEXT
;
3554 if (try & CODING_CATEGORY_MASK_ISO
)
3555 mask
|= detect_coding_iso2022 (src
, src_end
);
3556 if (try & CODING_CATEGORY_MASK_SJIS
)
3557 mask
|= detect_coding_sjis (src
, src_end
);
3558 if (try & CODING_CATEGORY_MASK_BIG5
)
3559 mask
|= detect_coding_big5 (src
, src_end
);
3560 if (try & CODING_CATEGORY_MASK_UTF_8
)
3561 mask
|= detect_coding_utf_8 (src
, src_end
);
3562 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE
)
3563 mask
|= detect_coding_utf_16 (src
, src_end
);
3564 if (try & CODING_CATEGORY_MASK_EMACS_MULE
)
3565 mask
|= detect_coding_emacs_mule (src
, src_end
);
3566 if (try & CODING_CATEGORY_MASK_CCL
)
3567 mask
|= detect_coding_ccl (src
, src_end
);
3569 return (mask
| CODING_CATEGORY_MASK_RAW_TEXT
| CODING_CATEGORY_MASK_BINARY
);
3572 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3573 The information of the detected coding system is set in CODING. */
3576 detect_coding (coding
, src
, src_bytes
)
3577 struct coding_system
*coding
;
3585 val
= Vcoding_category_list
;
3586 mask
= detect_coding_mask (src
, src_bytes
, coding_priorities
, &skip
);
3587 coding
->heading_ascii
= skip
;
3591 /* We found a single coding system of the highest priority in MASK. */
3593 while (mask
&& ! (mask
& 1)) mask
>>= 1, idx
++;
3595 idx
= CODING_CATEGORY_IDX_RAW_TEXT
;
3597 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[idx
])->value
;
3599 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3603 tmp
= Fget (val
, Qeol_type
);
3605 val
= XVECTOR (tmp
)->contents
[coding
->eol_type
];
3608 /* Setup this new coding system while preserving some slots. */
3610 int src_multibyte
= coding
->src_multibyte
;
3611 int dst_multibyte
= coding
->dst_multibyte
;
3613 setup_coding_system (val
, coding
);
3614 coding
->src_multibyte
= src_multibyte
;
3615 coding
->dst_multibyte
= dst_multibyte
;
3616 coding
->heading_ascii
= skip
;
3620 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3621 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3622 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3624 How many non-eol characters are at the head is returned as *SKIP. */
3626 #define MAX_EOL_CHECK_COUNT 3
3629 detect_eol_type (source
, src_bytes
, skip
)
3630 unsigned char *source
;
3631 int src_bytes
, *skip
;
3633 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3635 int total
= 0; /* How many end-of-lines are found so far. */
3636 int eol_type
= CODING_EOL_UNDECIDED
;
3641 while (src
< src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3644 if (c
== '\n' || c
== '\r')
3647 *skip
= src
- 1 - source
;
3650 this_eol_type
= CODING_EOL_LF
;
3651 else if (src
>= src_end
|| *src
!= '\n')
3652 this_eol_type
= CODING_EOL_CR
;
3654 this_eol_type
= CODING_EOL_CRLF
, src
++;
3656 if (eol_type
== CODING_EOL_UNDECIDED
)
3657 /* This is the first end-of-line. */
3658 eol_type
= this_eol_type
;
3659 else if (eol_type
!= this_eol_type
)
3661 /* The found type is different from what found before. */
3662 eol_type
= CODING_EOL_INCONSISTENT
;
3669 *skip
= src_end
- source
;
3673 /* Like detect_eol_type, but detect EOL type in 2-octet
3674 big-endian/little-endian format for coding systems utf-16-be and
3678 detect_eol_type_in_2_octet_form (source
, src_bytes
, skip
, big_endian_p
)
3679 unsigned char *source
;
3680 int src_bytes
, *skip
;
3682 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3683 unsigned int c1
, c2
;
3684 int total
= 0; /* How many end-of-lines are found so far. */
3685 int eol_type
= CODING_EOL_UNDECIDED
;
3696 while ((src
+ 1) < src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3698 c1
= (src
[msb
] << 8) | (src
[lsb
]);
3701 if (c1
== '\n' || c1
== '\r')
3704 *skip
= src
- 2 - source
;
3708 this_eol_type
= CODING_EOL_LF
;
3712 if ((src
+ 1) >= src_end
)
3714 this_eol_type
= CODING_EOL_CR
;
3718 c2
= (src
[msb
] << 8) | (src
[lsb
]);
3720 this_eol_type
= CODING_EOL_CRLF
, src
+= 2;
3722 this_eol_type
= CODING_EOL_CR
;
3726 if (eol_type
== CODING_EOL_UNDECIDED
)
3727 /* This is the first end-of-line. */
3728 eol_type
= this_eol_type
;
3729 else if (eol_type
!= this_eol_type
)
3731 /* The found type is different from what found before. */
3732 eol_type
= CODING_EOL_INCONSISTENT
;
3739 *skip
= src_end
- source
;
3743 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3744 is encoded. If it detects an appropriate format of end-of-line, it
3745 sets the information in *CODING. */
3748 detect_eol (coding
, src
, src_bytes
)
3749 struct coding_system
*coding
;
3757 switch (coding
->category_idx
)
3759 case CODING_CATEGORY_IDX_UTF_16_BE
:
3760 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 1);
3762 case CODING_CATEGORY_IDX_UTF_16_LE
:
3763 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 0);
3766 eol_type
= detect_eol_type (src
, src_bytes
, &skip
);
3770 if (coding
->heading_ascii
> skip
)
3771 coding
->heading_ascii
= skip
;
3773 skip
= coding
->heading_ascii
;
3775 if (eol_type
== CODING_EOL_UNDECIDED
)
3777 if (eol_type
== CODING_EOL_INCONSISTENT
)
3780 /* This code is suppressed until we find a better way to
3781 distinguish raw text file and binary file. */
3783 /* If we have already detected that the coding is raw-text, the
3784 coding should actually be no-conversion. */
3785 if (coding
->type
== coding_type_raw_text
)
3787 setup_coding_system (Qno_conversion
, coding
);
3790 /* Else, let's decode only text code anyway. */
3792 eol_type
= CODING_EOL_LF
;
3795 val
= Fget (coding
->symbol
, Qeol_type
);
3796 if (VECTORP (val
) && XVECTOR (val
)->size
== 3)
3798 int src_multibyte
= coding
->src_multibyte
;
3799 int dst_multibyte
= coding
->dst_multibyte
;
3801 setup_coding_system (XVECTOR (val
)->contents
[eol_type
], coding
);
3802 coding
->src_multibyte
= src_multibyte
;
3803 coding
->dst_multibyte
= dst_multibyte
;
3804 coding
->heading_ascii
= skip
;
3808 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3810 #define DECODING_BUFFER_MAG(coding) \
3811 (coding->type == coding_type_iso2022 \
3813 : (coding->type == coding_type_ccl \
3814 ? coding->spec.ccl.decoder.buf_magnification \
3817 /* Return maximum size (bytes) of a buffer enough for decoding
3818 SRC_BYTES of text encoded in CODING. */
3821 decoding_buffer_size (coding
, src_bytes
)
3822 struct coding_system
*coding
;
3825 return (src_bytes
* DECODING_BUFFER_MAG (coding
)
3826 + CONVERSION_BUFFER_EXTRA_ROOM
);
3829 /* Return maximum size (bytes) of a buffer enough for encoding
3830 SRC_BYTES of text to CODING. */
3833 encoding_buffer_size (coding
, src_bytes
)
3834 struct coding_system
*coding
;
3839 if (coding
->type
== coding_type_ccl
)
3840 magnification
= coding
->spec
.ccl
.encoder
.buf_magnification
;
3841 else if (CODING_REQUIRE_ENCODING (coding
))
3846 return (src_bytes
* magnification
+ CONVERSION_BUFFER_EXTRA_ROOM
);
3849 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3850 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3853 char *conversion_buffer
;
3854 int conversion_buffer_size
;
3856 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3857 or decoding. Sufficient memory is allocated automatically. If we
3858 run out of memory, return NULL. */
3861 get_conversion_buffer (size
)
3864 if (size
> conversion_buffer_size
)
3867 int real_size
= conversion_buffer_size
* 2;
3869 while (real_size
< size
) real_size
*= 2;
3870 buf
= (char *) xmalloc (real_size
);
3871 xfree (conversion_buffer
);
3872 conversion_buffer
= buf
;
3873 conversion_buffer_size
= real_size
;
3875 return conversion_buffer
;
3879 ccl_coding_driver (coding
, source
, destination
, src_bytes
, dst_bytes
, encodep
)
3880 struct coding_system
*coding
;
3881 unsigned char *source
, *destination
;
3882 int src_bytes
, dst_bytes
, encodep
;
3884 struct ccl_program
*ccl
3885 = encodep
? &coding
->spec
.ccl
.encoder
: &coding
->spec
.ccl
.decoder
;
3888 ccl
->last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
3890 coding
->produced
= ccl_driver (ccl
, source
, destination
,
3891 src_bytes
, dst_bytes
, &(coding
->consumed
));
3893 coding
->produced_char
= coding
->produced
;
3897 = dst_bytes
? dst_bytes
: source
+ coding
->consumed
- destination
;
3898 coding
->produced
= str_as_multibyte (destination
, bytes
,
3900 &(coding
->produced_char
));
3903 switch (ccl
->status
)
3905 case CCL_STAT_SUSPEND_BY_SRC
:
3906 result
= CODING_FINISH_INSUFFICIENT_SRC
;
3908 case CCL_STAT_SUSPEND_BY_DST
:
3909 result
= CODING_FINISH_INSUFFICIENT_DST
;
3912 case CCL_STAT_INVALID_CMD
:
3913 result
= CODING_FINISH_INTERRUPT
;
3916 result
= CODING_FINISH_NORMAL
;
3922 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
3923 decoding, it may detect coding system and format of end-of-line if
3924 those are not yet decided. The source should be unibyte, the
3925 result is multibyte if CODING->dst_multibyte is nonzero, else
3929 decode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
3930 struct coding_system
*coding
;
3931 unsigned char *source
, *destination
;
3932 int src_bytes
, dst_bytes
;
3934 if (coding
->type
== coding_type_undecided
)
3935 detect_coding (coding
, source
, src_bytes
);
3937 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
3938 detect_eol (coding
, source
, src_bytes
);
3940 coding
->produced
= coding
->produced_char
= 0;
3941 coding
->consumed
= coding
->consumed_char
= 0;
3943 coding
->result
= CODING_FINISH_NORMAL
;
3945 switch (coding
->type
)
3947 case coding_type_sjis
:
3948 decode_coding_sjis_big5 (coding
, source
, destination
,
3949 src_bytes
, dst_bytes
, 1);
3952 case coding_type_iso2022
:
3953 decode_coding_iso2022 (coding
, source
, destination
,
3954 src_bytes
, dst_bytes
);
3957 case coding_type_big5
:
3958 decode_coding_sjis_big5 (coding
, source
, destination
,
3959 src_bytes
, dst_bytes
, 0);
3962 case coding_type_emacs_mule
:
3963 decode_coding_emacs_mule (coding
, source
, destination
,
3964 src_bytes
, dst_bytes
);
3967 case coding_type_ccl
:
3968 ccl_coding_driver (coding
, source
, destination
,
3969 src_bytes
, dst_bytes
, 0);
3973 decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
3976 if (coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
3977 && coding
->consumed
== src_bytes
)
3978 coding
->result
= CODING_FINISH_NORMAL
;
3980 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3981 && coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
)
3983 unsigned char *src
= source
+ coding
->consumed
;
3984 unsigned char *dst
= destination
+ coding
->produced
;
3986 src_bytes
-= coding
->consumed
;
3988 if (COMPOSING_P (coding
))
3989 DECODE_COMPOSITION_END ('1');
3993 dst
+= CHAR_STRING (c
, dst
);
3994 coding
->produced_char
++;
3996 coding
->consumed
= coding
->consumed_char
= src
- source
;
3997 coding
->produced
= dst
- destination
;
4000 if (!coding
->dst_multibyte
)
4002 coding
->produced
= str_as_unibyte (destination
, coding
->produced
);
4003 coding
->produced_char
= coding
->produced
;
4006 return coding
->result
;
4009 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
4010 multibyteness of the source is CODING->src_multibyte, the
4011 multibyteness of the result is always unibyte. */
4014 encode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
4015 struct coding_system
*coding
;
4016 unsigned char *source
, *destination
;
4017 int src_bytes
, dst_bytes
;
4019 coding
->produced
= coding
->produced_char
= 0;
4020 coding
->consumed
= coding
->consumed_char
= 0;
4022 coding
->result
= CODING_FINISH_NORMAL
;
4024 switch (coding
->type
)
4026 case coding_type_sjis
:
4027 encode_coding_sjis_big5 (coding
, source
, destination
,
4028 src_bytes
, dst_bytes
, 1);
4031 case coding_type_iso2022
:
4032 encode_coding_iso2022 (coding
, source
, destination
,
4033 src_bytes
, dst_bytes
);
4036 case coding_type_big5
:
4037 encode_coding_sjis_big5 (coding
, source
, destination
,
4038 src_bytes
, dst_bytes
, 0);
4041 case coding_type_emacs_mule
:
4042 encode_coding_emacs_mule (coding
, source
, destination
,
4043 src_bytes
, dst_bytes
);
4046 case coding_type_ccl
:
4047 ccl_coding_driver (coding
, source
, destination
,
4048 src_bytes
, dst_bytes
, 1);
4052 encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
4055 if (coding
->result
== CODING_FINISH_INSUFFICIENT_SRC
4056 && coding
->consumed
== src_bytes
)
4057 coding
->result
= CODING_FINISH_NORMAL
;
4059 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
4061 unsigned char *src
= source
+ coding
->consumed
;
4062 unsigned char *src_end
= src
+ src_bytes
;
4063 unsigned char *dst
= destination
+ coding
->produced
;
4065 if (coding
->type
== coding_type_iso2022
)
4066 ENCODE_RESET_PLANE_AND_REGISTER
;
4067 if (COMPOSING_P (coding
))
4068 *dst
++ = ISO_CODE_ESC
, *dst
++ = '1';
4069 if (coding
->consumed
< src_bytes
)
4071 int len
= src_bytes
- coding
->consumed
;
4073 BCOPY_SHORT (source
+ coding
->consumed
, dst
, len
);
4074 if (coding
->src_multibyte
)
4075 len
= str_as_unibyte (dst
, len
);
4077 coding
->consumed
= src_bytes
;
4079 coding
->produced
= coding
->produced_char
= dst
- destination
;
4082 return coding
->result
;
4085 /* Scan text in the region between *BEG and *END (byte positions),
4086 skip characters which we don't have to decode by coding system
4087 CODING at the head and tail, then set *BEG and *END to the region
4088 of the text we actually have to convert. The caller should move
4089 the gap out of the region in advance if the region is from a
4092 If STR is not NULL, *BEG and *END are indices into STR. */
4095 shrink_decoding_region (beg
, end
, coding
, str
)
4097 struct coding_system
*coding
;
4100 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
, c
;
4102 Lisp_Object translation_table
;
4104 if (coding
->type
== coding_type_ccl
4105 || coding
->type
== coding_type_undecided
4106 || coding
->eol_type
!= CODING_EOL_LF
4107 || !NILP (coding
->post_read_conversion
)
4108 || coding
->composing
!= COMPOSITION_DISABLED
)
4110 /* We can't skip any data. */
4113 if (coding
->type
== coding_type_no_conversion
4114 || coding
->type
== coding_type_raw_text
4115 || coding
->type
== coding_type_emacs_mule
)
4117 /* We need no conversion, but don't have to skip any data here.
4118 Decoding routine handles them effectively anyway. */
4122 translation_table
= coding
->translation_table_for_decode
;
4123 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4124 translation_table
= Vstandard_translation_table_for_decode
;
4125 if (CHAR_TABLE_P (translation_table
))
4128 for (i
= 0; i
< 128; i
++)
4129 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4132 /* Some ASCII character should be translated. We give up
4137 if (coding
->heading_ascii
>= 0)
4138 /* Detection routine has already found how much we can skip at the
4140 *beg
+= coding
->heading_ascii
;
4144 begp_orig
= begp
= str
+ *beg
;
4145 endp_orig
= endp
= str
+ *end
;
4149 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4150 endp_orig
= endp
= begp
+ *end
- *beg
;
4153 eol_conversion
= (coding
->eol_type
== CODING_EOL_CR
4154 || coding
->eol_type
== CODING_EOL_CRLF
);
4156 switch (coding
->type
)
4158 case coding_type_sjis
:
4159 case coding_type_big5
:
4160 /* We can skip all ASCII characters at the head. */
4161 if (coding
->heading_ascii
< 0)
4164 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\r') begp
++;
4166 while (begp
< endp
&& *begp
< 0x80) begp
++;
4168 /* We can skip all ASCII characters at the tail except for the
4169 second byte of SJIS or BIG5 code. */
4171 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\r') endp
--;
4173 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4174 /* Do not consider LF as ascii if preceded by CR, since that
4175 confuses eol decoding. */
4176 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4178 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] >= 0x80)
4182 case coding_type_iso2022
:
4183 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4184 /* We can't skip any data. */
4186 if (coding
->heading_ascii
< 0)
4188 /* We can skip all ASCII characters at the head except for a
4189 few control codes. */
4190 while (begp
< endp
&& (c
= *begp
) < 0x80
4191 && c
!= ISO_CODE_CR
&& c
!= ISO_CODE_SO
4192 && c
!= ISO_CODE_SI
&& c
!= ISO_CODE_ESC
4193 && (!eol_conversion
|| c
!= ISO_CODE_LF
))
4196 switch (coding
->category_idx
)
4198 case CODING_CATEGORY_IDX_ISO_8_1
:
4199 case CODING_CATEGORY_IDX_ISO_8_2
:
4200 /* We can skip all ASCII characters at the tail. */
4202 while (begp
< endp
&& (c
= endp
[-1]) < 0x80 && c
!= '\r') endp
--;
4204 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4205 /* Do not consider LF as ascii if preceded by CR, since that
4206 confuses eol decoding. */
4207 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4211 case CODING_CATEGORY_IDX_ISO_7
:
4212 case CODING_CATEGORY_IDX_ISO_7_TIGHT
:
4214 /* We can skip all charactes at the tail except for 8-bit
4215 codes and ESC and the following 2-byte at the tail. */
4216 unsigned char *eight_bit
= NULL
;
4220 && (c
= endp
[-1]) != ISO_CODE_ESC
&& c
!= '\r')
4222 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4227 && (c
= endp
[-1]) != ISO_CODE_ESC
)
4229 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4232 /* Do not consider LF as ascii if preceded by CR, since that
4233 confuses eol decoding. */
4234 if (begp
< endp
&& endp
< endp_orig
4235 && endp
[-1] == '\r' && endp
[0] == '\n')
4237 if (begp
< endp
&& endp
[-1] == ISO_CODE_ESC
)
4239 if (endp
+ 1 < endp_orig
&& end
[0] == '(' && end
[1] == 'B')
4240 /* This is an ASCII designation sequence. We can
4241 surely skip the tail. But, if we have
4242 encountered an 8-bit code, skip only the codes
4244 endp
= eight_bit
? eight_bit
: endp
+ 2;
4246 /* Hmmm, we can't skip the tail. */
4258 *beg
+= begp
- begp_orig
;
4259 *end
+= endp
- endp_orig
;
4263 /* Like shrink_decoding_region but for encoding. */
4266 shrink_encoding_region (beg
, end
, coding
, str
)
4268 struct coding_system
*coding
;
4271 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
;
4273 Lisp_Object translation_table
;
4275 if (coding
->type
== coding_type_ccl
4276 || coding
->eol_type
== CODING_EOL_CRLF
4277 || coding
->eol_type
== CODING_EOL_CR
4278 || coding
->cmp_data
&& coding
->cmp_data
->used
> 0)
4280 /* We can't skip any data. */
4283 if (coding
->type
== coding_type_no_conversion
4284 || coding
->type
== coding_type_raw_text
4285 || coding
->type
== coding_type_emacs_mule
4286 || coding
->type
== coding_type_undecided
)
4288 /* We need no conversion, but don't have to skip any data here.
4289 Encoding routine handles them effectively anyway. */
4293 translation_table
= coding
->translation_table_for_encode
;
4294 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4295 translation_table
= Vstandard_translation_table_for_encode
;
4296 if (CHAR_TABLE_P (translation_table
))
4299 for (i
= 0; i
< 128; i
++)
4300 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4303 /* Some ASCII character should be tranlsated. We give up
4310 begp_orig
= begp
= str
+ *beg
;
4311 endp_orig
= endp
= str
+ *end
;
4315 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4316 endp_orig
= endp
= begp
+ *end
- *beg
;
4319 eol_conversion
= (coding
->eol_type
== CODING_EOL_CR
4320 || coding
->eol_type
== CODING_EOL_CRLF
);
4322 /* Here, we don't have to check coding->pre_write_conversion because
4323 the caller is expected to have handled it already. */
4324 switch (coding
->type
)
4326 case coding_type_iso2022
:
4327 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4328 /* We can't skip any data. */
4330 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
4332 unsigned char *bol
= begp
;
4333 while (begp
< endp
&& *begp
< 0x80)
4336 if (begp
[-1] == '\n')
4340 goto label_skip_tail
;
4344 case coding_type_sjis
:
4345 case coding_type_big5
:
4346 /* We can skip all ASCII characters at the head and tail. */
4348 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\n') begp
++;
4350 while (begp
< endp
&& *begp
< 0x80) begp
++;
4353 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\n') endp
--;
4355 while (begp
< endp
&& *(endp
- 1) < 0x80) endp
--;
4362 *beg
+= begp
- begp_orig
;
4363 *end
+= endp
- endp_orig
;
4367 /* As shrinking conversion region requires some overhead, we don't try
4368 shrinking if the length of conversion region is less than this
4370 static int shrink_conversion_region_threshhold
= 1024;
4372 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4374 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4376 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4377 else shrink_decoding_region (beg, end, coding, str); \
4382 code_convert_region_unwind (dummy
)
4385 inhibit_pre_post_conversion
= 0;
4389 /* Store information about all compositions in the range FROM and TO
4390 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
4391 buffer or a string, defaults to the current buffer. */
4394 coding_save_composition (coding
, from
, to
, obj
)
4395 struct coding_system
*coding
;
4402 if (coding
->composing
== COMPOSITION_DISABLED
)
4404 if (!coding
->cmp_data
)
4405 coding_allocate_composition_data (coding
, from
);
4406 if (!find_composition (from
, to
, &start
, &end
, &prop
, obj
)
4410 && (!find_composition (end
, to
, &start
, &end
, &prop
, obj
)
4413 coding
->composing
= COMPOSITION_NO
;
4416 if (COMPOSITION_VALID_P (start
, end
, prop
))
4418 enum composition_method method
= COMPOSITION_METHOD (prop
);
4419 if (coding
->cmp_data
->used
+ COMPOSITION_DATA_MAX_BUNCH_LENGTH
4420 >= COMPOSITION_DATA_SIZE
)
4421 coding_allocate_composition_data (coding
, from
);
4422 /* For relative composition, we remember start and end
4423 positions, for the other compositions, we also remember
4425 CODING_ADD_COMPOSITION_START (coding
, start
- from
, method
);
4426 if (method
!= COMPOSITION_RELATIVE
)
4428 /* We must store a*/
4429 Lisp_Object val
, ch
;
4431 val
= COMPOSITION_COMPONENTS (prop
);
4435 ch
= XCAR (val
), val
= XCDR (val
);
4436 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4438 else if (VECTORP (val
) || STRINGP (val
))
4440 int len
= (VECTORP (val
)
4441 ? XVECTOR (val
)->size
: XSTRING (val
)->size
);
4443 for (i
= 0; i
< len
; i
++)
4446 ? Faref (val
, make_number (i
))
4447 : XVECTOR (val
)->contents
[i
]);
4448 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4451 else /* INTEGERP (val) */
4452 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (val
));
4454 CODING_ADD_COMPOSITION_END (coding
, end
- from
);
4459 && find_composition (start
, to
, &start
, &end
, &prop
, obj
)
4462 /* Make coding->cmp_data point to the first memory block. */
4463 while (coding
->cmp_data
->prev
)
4464 coding
->cmp_data
= coding
->cmp_data
->prev
;
4465 coding
->cmp_data_start
= 0;
4468 /* Reflect the saved information about compositions to OBJ.
4469 CODING->cmp_data points to a memory block for the informaiton. OBJ
4470 is a buffer or a string, defaults to the current buffer. */
4473 coding_restore_composition (coding
, obj
)
4474 struct coding_system
*coding
;
4477 struct composition_data
*cmp_data
= coding
->cmp_data
;
4482 while (cmp_data
->prev
)
4483 cmp_data
= cmp_data
->prev
;
4489 for (i
= 0; i
< cmp_data
->used
; i
+= cmp_data
->data
[i
])
4491 int *data
= cmp_data
->data
+ i
;
4492 enum composition_method method
= (enum composition_method
) data
[3];
4493 Lisp_Object components
;
4495 if (method
== COMPOSITION_RELATIVE
)
4499 int len
= data
[0] - 4, j
;
4500 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
4502 for (j
= 0; j
< len
; j
++)
4503 args
[j
] = make_number (data
[4 + j
]);
4504 components
= (method
== COMPOSITION_WITH_ALTCHARS
4505 ? Fstring (len
, args
) : Fvector (len
, args
));
4507 compose_text (data
[1], data
[2], components
, Qnil
, obj
);
4509 cmp_data
= cmp_data
->next
;
4513 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4514 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4515 coding system CODING, and return the status code of code conversion
4516 (currently, this value has no meaning).
4518 How many characters (and bytes) are converted to how many
4519 characters (and bytes) are recorded in members of the structure
4522 If REPLACE is nonzero, we do various things as if the original text
4523 is deleted and a new text is inserted. See the comments in
4524 replace_range (insdel.c) to know what we are doing.
4526 If REPLACE is zero, it is assumed that the source text is unibyte.
4527 Otherwize, it is assumed that the source text is multibyte. */
4530 code_convert_region (from
, from_byte
, to
, to_byte
, coding
, encodep
, replace
)
4531 int from
, from_byte
, to
, to_byte
, encodep
, replace
;
4532 struct coding_system
*coding
;
4534 int len
= to
- from
, len_byte
= to_byte
- from_byte
;
4535 int require
, inserted
, inserted_byte
;
4536 int head_skip
, tail_skip
, total_skip
= 0;
4537 Lisp_Object saved_coding_symbol
;
4539 unsigned char *src
, *dst
;
4540 Lisp_Object deletion
;
4541 int orig_point
= PT
, orig_len
= len
;
4543 int multibyte_p
= !NILP (current_buffer
->enable_multibyte_characters
);
4545 coding
->src_multibyte
= replace
&& multibyte_p
;
4546 coding
->dst_multibyte
= multibyte_p
;
4549 saved_coding_symbol
= Qnil
;
4551 if (from
< PT
&& PT
< to
)
4553 TEMP_SET_PT_BOTH (from
, from_byte
);
4559 int saved_from
= from
;
4561 prepare_to_modify_buffer (from
, to
, &from
);
4562 if (saved_from
!= from
)
4565 from_byte
= CHAR_TO_BYTE (from
), to_byte
= CHAR_TO_BYTE (to
);
4566 len_byte
= to_byte
- from_byte
;
4570 if (! encodep
&& CODING_REQUIRE_DETECTION (coding
))
4572 /* We must detect encoding of text and eol format. */
4574 if (from
< GPT
&& to
> GPT
)
4575 move_gap_both (from
, from_byte
);
4576 if (coding
->type
== coding_type_undecided
)
4578 detect_coding (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4579 if (coding
->type
== coding_type_undecided
)
4580 /* It seems that the text contains only ASCII, but we
4581 should not left it undecided because the deeper
4582 decoding routine (decode_coding) tries to detect the
4583 encodings again in vain. */
4584 coding
->type
= coding_type_emacs_mule
;
4586 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4588 saved_coding_symbol
= coding
->symbol
;
4589 detect_eol (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4590 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4591 coding
->eol_type
= CODING_EOL_LF
;
4592 /* We had better recover the original eol format if we
4593 encounter an inconsitent eol format while decoding. */
4594 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
4598 /* Now we convert the text. */
4600 /* For encoding, we must process pre-write-conversion in advance. */
4601 if (! inhibit_pre_post_conversion
4603 && SYMBOLP (coding
->pre_write_conversion
)
4604 && ! NILP (Ffboundp (coding
->pre_write_conversion
)))
4606 /* The function in pre-write-conversion may put a new text in a
4608 struct buffer
*prev
= current_buffer
;
4610 int count
= specpdl_ptr
- specpdl
;
4612 record_unwind_protect (code_convert_region_unwind
, Qnil
);
4613 /* We should not call any more pre-write/post-read-conversion
4614 functions while this pre-write-conversion is running. */
4615 inhibit_pre_post_conversion
= 1;
4616 call2 (coding
->pre_write_conversion
,
4617 make_number (from
), make_number (to
));
4618 inhibit_pre_post_conversion
= 0;
4619 /* Discard the unwind protect. */
4622 if (current_buffer
!= prev
)
4625 new = Fcurrent_buffer ();
4626 set_buffer_internal_1 (prev
);
4627 del_range_2 (from
, from_byte
, to
, to_byte
, 0);
4628 TEMP_SET_PT_BOTH (from
, from_byte
);
4629 insert_from_buffer (XBUFFER (new), 1, len
, 0);
4631 if (orig_point
>= to
)
4632 orig_point
+= len
- orig_len
;
4633 else if (orig_point
> from
)
4637 from_byte
= CHAR_TO_BYTE (from
);
4638 to_byte
= CHAR_TO_BYTE (to
);
4639 len_byte
= to_byte
- from_byte
;
4640 TEMP_SET_PT_BOTH (from
, from_byte
);
4645 deletion
= make_buffer_string_both (from
, from_byte
, to
, to_byte
, 1);
4647 if (coding
->composing
!= COMPOSITION_DISABLED
)
4650 coding_save_composition (coding
, from
, to
, Fcurrent_buffer ());
4652 coding_allocate_composition_data (coding
, from
);
4655 /* Try to skip the heading and tailing ASCIIs. */
4657 int from_byte_orig
= from_byte
, to_byte_orig
= to_byte
;
4659 if (from
< GPT
&& GPT
< to
)
4660 move_gap_both (from
, from_byte
);
4661 SHRINK_CONVERSION_REGION (&from_byte
, &to_byte
, coding
, NULL
, encodep
);
4662 if (from_byte
== to_byte
4663 && (encodep
|| NILP (coding
->post_read_conversion
))
4664 && ! CODING_REQUIRE_FLUSHING (coding
))
4666 coding
->produced
= len_byte
;
4667 coding
->produced_char
= len
;
4669 /* We must record and adjust for this new text now. */
4670 adjust_after_insert (from
, from_byte_orig
, to
, to_byte_orig
, len
);
4674 head_skip
= from_byte
- from_byte_orig
;
4675 tail_skip
= to_byte_orig
- to_byte
;
4676 total_skip
= head_skip
+ tail_skip
;
4679 len
-= total_skip
; len_byte
-= total_skip
;
4682 /* The code conversion routine can not preserve text properties for
4683 now. So, we must remove all text properties in the region.
4684 Here, we must suppress all modification hooks. */
4687 int saved_inhibit_modification_hooks
= inhibit_modification_hooks
;
4688 inhibit_modification_hooks
= 1;
4689 Fset_text_properties (make_number (from
), make_number (to
), Qnil
, Qnil
);
4690 inhibit_modification_hooks
= saved_inhibit_modification_hooks
;
4693 /* For converion, we must put the gap before the text in addition to
4694 making the gap larger for efficient decoding. The required gap
4695 size starts from 2000 which is the magic number used in make_gap.
4696 But, after one batch of conversion, it will be incremented if we
4697 find that it is not enough . */
4700 if (GAP_SIZE
< require
)
4701 make_gap (require
- GAP_SIZE
);
4702 move_gap_both (from
, from_byte
);
4704 inserted
= inserted_byte
= 0;
4706 GAP_SIZE
+= len_byte
;
4709 ZV_BYTE
-= len_byte
;
4712 if (GPT
- BEG
< BEG_UNCHANGED
)
4713 BEG_UNCHANGED
= GPT
- BEG
;
4714 if (Z
- GPT
< END_UNCHANGED
)
4715 END_UNCHANGED
= Z
- GPT
;
4717 if (!encodep
&& coding
->src_multibyte
)
4719 /* Decoding routines expects that the source text is unibyte.
4720 We must convert 8-bit characters of multibyte form to
4722 int len_byte_orig
= len_byte
;
4723 len_byte
= str_as_unibyte (GAP_END_ADDR
- len_byte
, len_byte
);
4724 if (len_byte
< len_byte_orig
)
4725 safe_bcopy (GAP_END_ADDR
- len_byte_orig
, GAP_END_ADDR
- len_byte
,
4727 coding
->src_multibyte
= 0;
4734 /* The buffer memory is now:
4735 +--------+converted-text+---------+-------original-text-------+---+
4736 |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4737 |<---------------------- GAP ----------------------->| */
4738 src
= GAP_END_ADDR
- len_byte
;
4739 dst
= GPT_ADDR
+ inserted_byte
;
4742 result
= encode_coding (coding
, src
, dst
, len_byte
, 0);
4744 result
= decode_coding (coding
, src
, dst
, len_byte
, 0);
4746 /* The buffer memory is now:
4747 +--------+-------converted-text----+--+------original-text----+---+
4748 |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4749 |<---------------------- GAP ----------------------->| */
4751 inserted
+= coding
->produced_char
;
4752 inserted_byte
+= coding
->produced
;
4753 len_byte
-= coding
->consumed
;
4755 if (result
== CODING_FINISH_INSUFFICIENT_CMP
)
4757 coding_allocate_composition_data (coding
, from
+ inserted
);
4761 src
+= coding
->consumed
;
4762 dst
+= coding
->produced
;
4764 if (result
== CODING_FINISH_NORMAL
)
4769 if (! encodep
&& result
== CODING_FINISH_INCONSISTENT_EOL
)
4771 unsigned char *pend
= dst
, *p
= pend
- inserted_byte
;
4772 Lisp_Object eol_type
;
4774 /* Encode LFs back to the original eol format (CR or CRLF). */
4775 if (coding
->eol_type
== CODING_EOL_CR
)
4777 while (p
< pend
) if (*p
++ == '\n') p
[-1] = '\r';
4783 while (p
< pend
) if (*p
++ == '\n') count
++;
4784 if (src
- dst
< count
)
4786 /* We don't have sufficient room for encoding LFs
4787 back to CRLF. We must record converted and
4788 not-yet-converted text back to the buffer
4789 content, enlarge the gap, then record them out of
4790 the buffer contents again. */
4791 int add
= len_byte
+ inserted_byte
;
4794 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
4795 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
4796 make_gap (count
- GAP_SIZE
);
4798 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
4799 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
4800 /* Don't forget to update SRC, DST, and PEND. */
4801 src
= GAP_END_ADDR
- len_byte
;
4802 dst
= GPT_ADDR
+ inserted_byte
;
4806 inserted_byte
+= count
;
4807 coding
->produced
+= count
;
4808 p
= dst
= pend
+ count
;
4812 if (*p
== '\n') count
--, *--p
= '\r';
4816 /* Suppress eol-format conversion in the further conversion. */
4817 coding
->eol_type
= CODING_EOL_LF
;
4819 /* Set the coding system symbol to that for Unix-like EOL. */
4820 eol_type
= Fget (saved_coding_symbol
, Qeol_type
);
4821 if (VECTORP (eol_type
)
4822 && XVECTOR (eol_type
)->size
== 3
4823 && SYMBOLP (XVECTOR (eol_type
)->contents
[CODING_EOL_LF
]))
4824 coding
->symbol
= XVECTOR (eol_type
)->contents
[CODING_EOL_LF
];
4826 coding
->symbol
= saved_coding_symbol
;
4832 if (coding
->type
!= coding_type_ccl
4833 || coding
->mode
& CODING_MODE_LAST_BLOCK
)
4835 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
4838 if (result
== CODING_FINISH_INSUFFICIENT_SRC
)
4840 /* The source text ends in invalid codes. Let's just
4841 make them valid buffer contents, and finish conversion. */
4842 inserted
+= len_byte
;
4843 inserted_byte
+= len_byte
;
4848 if (result
== CODING_FINISH_INTERRUPT
)
4850 /* The conversion procedure was interrupted by a user. */
4853 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
4854 if (coding
->consumed
< 1)
4856 /* It's quite strange to require more memory without
4857 consuming any bytes. Perhaps CCL program bug. */
4862 /* We have just done the first batch of conversion which was
4863 stoped because of insufficient gap. Let's reconsider the
4864 required gap size (i.e. SRT - DST) now.
4866 We have converted ORIG bytes (== coding->consumed) into
4867 NEW bytes (coding->produced). To convert the remaining
4868 LEN bytes, we may need REQUIRE bytes of gap, where:
4869 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4870 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4871 Here, we are sure that NEW >= ORIG. */
4872 float ratio
= coding
->produced
- coding
->consumed
;
4873 ratio
/= coding
->consumed
;
4874 require
= len_byte
* ratio
;
4877 if ((src
- dst
) < (require
+ 2000))
4879 /* See the comment above the previous call of make_gap. */
4880 int add
= len_byte
+ inserted_byte
;
4883 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
4884 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
4885 make_gap (require
+ 2000);
4887 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
4888 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
4891 if (src
- dst
> 0) *dst
= 0; /* Put an anchor. */
4893 if (encodep
&& coding
->dst_multibyte
)
4895 /* The output is unibyte. We must convert 8-bit characters to
4897 if (inserted_byte
* 2 > GAP_SIZE
)
4899 GAP_SIZE
-= inserted_byte
;
4900 ZV
+= inserted_byte
; Z
+= inserted_byte
;
4901 ZV_BYTE
+= inserted_byte
; Z_BYTE
+= inserted_byte
;
4902 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
4903 make_gap (inserted_byte
- GAP_SIZE
);
4904 GAP_SIZE
+= inserted_byte
;
4905 ZV
-= inserted_byte
; Z
-= inserted_byte
;
4906 ZV_BYTE
-= inserted_byte
; Z_BYTE
-= inserted_byte
;
4907 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
4909 inserted_byte
= str_to_multibyte (GPT_ADDR
, GAP_SIZE
, inserted_byte
);
4912 /* If we have shrinked the conversion area, adjust it now. */
4916 safe_bcopy (GAP_END_ADDR
, GPT_ADDR
+ inserted_byte
, tail_skip
);
4917 inserted
+= total_skip
; inserted_byte
+= total_skip
;
4918 GAP_SIZE
+= total_skip
;
4919 GPT
-= head_skip
; GPT_BYTE
-= head_skip
;
4920 ZV
-= total_skip
; ZV_BYTE
-= total_skip
;
4921 Z
-= total_skip
; Z_BYTE
-= total_skip
;
4922 from
-= head_skip
; from_byte
-= head_skip
;
4923 to
+= tail_skip
; to_byte
+= tail_skip
;
4927 adjust_after_replace (from
, from_byte
, deletion
, inserted
, inserted_byte
);
4928 inserted
= Z
- prev_Z
;
4930 if (!encodep
&& coding
->cmp_data
&& coding
->cmp_data
->used
)
4931 coding_restore_composition (coding
, Fcurrent_buffer ());
4932 coding_free_composition_data (coding
);
4934 if (! inhibit_pre_post_conversion
4935 && ! encodep
&& ! NILP (coding
->post_read_conversion
))
4938 int count
= specpdl_ptr
- specpdl
;
4941 TEMP_SET_PT_BOTH (from
, from_byte
);
4943 record_unwind_protect (code_convert_region_unwind
, Qnil
);
4944 /* We should not call any more pre-write/post-read-conversion
4945 functions while this post-read-conversion is running. */
4946 inhibit_pre_post_conversion
= 1;
4947 val
= call1 (coding
->post_read_conversion
, make_number (inserted
));
4948 inhibit_pre_post_conversion
= 0;
4949 /* Discard the unwind protect. */
4951 CHECK_NUMBER (val
, 0);
4952 inserted
+= Z
- prev_Z
;
4955 if (orig_point
>= from
)
4957 if (orig_point
>= from
+ orig_len
)
4958 orig_point
+= inserted
- orig_len
;
4961 TEMP_SET_PT (orig_point
);
4966 signal_after_change (from
, to
- from
, inserted
);
4967 update_compositions (from
, from
+ inserted
, CHECK_BORDER
);
4971 coding
->consumed
= to_byte
- from_byte
;
4972 coding
->consumed_char
= to
- from
;
4973 coding
->produced
= inserted_byte
;
4974 coding
->produced_char
= inserted
;
4981 run_pre_post_conversion_on_str (str
, coding
, encodep
)
4983 struct coding_system
*coding
;
4986 int count
= specpdl_ptr
- specpdl
;
4987 struct gcpro gcpro1
;
4988 struct buffer
*prev
= current_buffer
;
4989 int multibyte
= STRING_MULTIBYTE (str
);
4991 record_unwind_protect (Fset_buffer
, Fcurrent_buffer ());
4992 record_unwind_protect (code_convert_region_unwind
, Qnil
);
4994 temp_output_buffer_setup (" *code-converting-work*");
4995 set_buffer_internal (XBUFFER (Vstandard_output
));
4996 /* We must insert the contents of STR as is without
4997 unibyte<->multibyte conversion. For that, we adjust the
4998 multibyteness of the working buffer to that of STR. */
5000 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
5001 insert_from_string (str
, 0, 0,
5002 XSTRING (str
)->size
, STRING_BYTES (XSTRING (str
)), 0);
5004 inhibit_pre_post_conversion
= 1;
5006 call2 (coding
->pre_write_conversion
, make_number (BEG
), make_number (Z
));
5009 TEMP_SET_PT_BOTH (BEG
, BEG_BYTE
);
5010 call1 (coding
->post_read_conversion
, make_number (Z
- BEG
));
5012 inhibit_pre_post_conversion
= 0;
5013 str
= make_buffer_string (BEG
, Z
, 0);
5014 return unbind_to (count
, str
);
5018 decode_coding_string (str
, coding
, nocopy
)
5020 struct coding_system
*coding
;
5025 int from
, to
, to_byte
;
5026 struct gcpro gcpro1
;
5027 Lisp_Object saved_coding_symbol
;
5031 to
= XSTRING (str
)->size
;
5032 to_byte
= STRING_BYTES (XSTRING (str
));
5034 saved_coding_symbol
= Qnil
;
5035 if (CODING_REQUIRE_DETECTION (coding
))
5037 /* See the comments in code_convert_region. */
5038 if (coding
->type
== coding_type_undecided
)
5040 detect_coding (coding
, XSTRING (str
)->data
, to_byte
);
5041 if (coding
->type
== coding_type_undecided
)
5042 coding
->type
= coding_type_emacs_mule
;
5044 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
5046 saved_coding_symbol
= coding
->symbol
;
5047 detect_eol (coding
, XSTRING (str
)->data
, to_byte
);
5048 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
5049 coding
->eol_type
= CODING_EOL_LF
;
5050 /* We had better recover the original eol format if we
5051 encounter an inconsitent eol format while decoding. */
5052 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
5056 if (! CODING_REQUIRE_DECODING (coding
))
5058 if (!STRING_MULTIBYTE (str
))
5060 str
= Fstring_as_multibyte (str
);
5063 return (nocopy
? str
: Fcopy_sequence (str
));
5066 if (STRING_MULTIBYTE (str
))
5068 /* Decoding routines expect the source text to be unibyte. */
5069 str
= Fstring_as_unibyte (str
);
5071 coding
->src_multibyte
= 0;
5073 coding
->dst_multibyte
= 1;
5075 if (coding
->composing
!= COMPOSITION_DISABLED
)
5076 coding_allocate_composition_data (coding
, from
);
5078 /* Try to skip the heading and tailing ASCIIs. */
5080 int from_orig
= from
;
5082 SHRINK_CONVERSION_REGION (&from
, &to_byte
, coding
, XSTRING (str
)->data
,
5084 if (from
== to_byte
)
5085 return (nocopy
? str
: Fcopy_sequence (str
));
5088 len
= decoding_buffer_size (coding
, to_byte
- from
);
5089 len
+= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5091 buf
= get_conversion_buffer (len
);
5095 bcopy (XSTRING (str
)->data
, buf
, from
);
5096 result
= decode_coding (coding
, XSTRING (str
)->data
+ from
,
5097 buf
+ from
, to_byte
- from
, len
);
5098 if (result
== CODING_FINISH_INCONSISTENT_EOL
)
5100 /* We simply try to decode the whole string again but without
5101 eol-conversion this time. */
5102 coding
->eol_type
= CODING_EOL_LF
;
5103 coding
->symbol
= saved_coding_symbol
;
5104 coding_free_composition_data (coding
);
5105 return decode_coding_string (str
, coding
, nocopy
);
5108 bcopy (XSTRING (str
)->data
+ to_byte
, buf
+ from
+ coding
->produced
,
5109 STRING_BYTES (XSTRING (str
)) - to_byte
);
5111 len
= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5112 str
= make_multibyte_string (buf
, len
+ coding
->produced_char
,
5113 len
+ coding
->produced
);
5115 if (coding
->cmp_data
&& coding
->cmp_data
->used
)
5116 coding_restore_composition (coding
, str
);
5117 coding_free_composition_data (coding
);
5119 if (SYMBOLP (coding
->post_read_conversion
)
5120 && !NILP (Ffboundp (coding
->post_read_conversion
)))
5121 str
= run_pre_post_conversion_on_str (str
, coding
, 0);
5127 encode_coding_string (str
, coding
, nocopy
)
5129 struct coding_system
*coding
;
5134 int from
, to
, to_byte
;
5135 struct gcpro gcpro1
;
5136 Lisp_Object saved_coding_symbol
;
5139 if (SYMBOLP (coding
->pre_write_conversion
)
5140 && !NILP (Ffboundp (coding
->pre_write_conversion
)))
5141 str
= run_pre_post_conversion_on_str (str
, coding
, 1);
5144 to
= XSTRING (str
)->size
;
5145 to_byte
= STRING_BYTES (XSTRING (str
));
5147 saved_coding_symbol
= Qnil
;
5148 if (! CODING_REQUIRE_ENCODING (coding
))
5150 if (STRING_MULTIBYTE (str
))
5152 str
= Fstring_as_unibyte (str
);
5155 return (nocopy
? str
: Fcopy_sequence (str
));
5158 /* Encoding routines determine the multibyteness of the source text
5159 by coding->src_multibyte. */
5160 coding
->src_multibyte
= STRING_MULTIBYTE (str
);
5161 coding
->dst_multibyte
= 0;
5163 if (coding
->composing
!= COMPOSITION_DISABLED
)
5164 coding_save_composition (coding
, from
, to
, str
);
5166 /* Try to skip the heading and tailing ASCIIs. */
5168 int from_orig
= from
;
5170 SHRINK_CONVERSION_REGION (&from
, &to_byte
, coding
, XSTRING (str
)->data
,
5172 if (from
== to_byte
)
5173 return (nocopy
? str
: Fcopy_sequence (str
));
5176 len
= encoding_buffer_size (coding
, to_byte
- from
);
5177 len
+= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5179 buf
= get_conversion_buffer (len
);
5183 bcopy (XSTRING (str
)->data
, buf
, from
);
5184 result
= encode_coding (coding
, XSTRING (str
)->data
+ from
,
5185 buf
+ from
, to_byte
- from
, len
);
5186 bcopy (XSTRING (str
)->data
+ to_byte
, buf
+ from
+ coding
->produced
,
5187 STRING_BYTES (XSTRING (str
)) - to_byte
);
5189 len
= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5190 str
= make_unibyte_string (buf
, len
+ coding
->produced
);
5191 coding_free_composition_data (coding
);
5198 /*** 8. Emacs Lisp library functions ***/
5200 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
5201 "Return t if OBJECT is nil or a coding-system.\n\
5202 See the documentation of `make-coding-system' for information\n\
5203 about coding-system objects.")
5211 /* Get coding-spec vector for OBJ. */
5212 obj
= Fget (obj
, Qcoding_system
);
5213 return ((VECTORP (obj
) && XVECTOR (obj
)->size
== 5)
5217 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
5218 Sread_non_nil_coding_system
, 1, 1, 0,
5219 "Read a coding system from the minibuffer, prompting with string PROMPT.")
5226 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5227 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
5229 while (XSTRING (val
)->size
== 0);
5230 return (Fintern (val
, Qnil
));
5233 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
5234 "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5235 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5236 (prompt
, default_coding_system
)
5237 Lisp_Object prompt
, default_coding_system
;
5240 if (SYMBOLP (default_coding_system
))
5241 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
5242 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5243 Qt
, Qnil
, Qcoding_system_history
,
5244 default_coding_system
, Qnil
);
5245 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
5248 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
5250 "Check validity of CODING-SYSTEM.\n\
5251 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5252 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5253 The value of property should be a vector of length 5.")
5255 Lisp_Object coding_system
;
5257 CHECK_SYMBOL (coding_system
, 0);
5258 if (!NILP (Fcoding_system_p (coding_system
)))
5259 return coding_system
;
5261 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
5265 detect_coding_system (src
, src_bytes
, highest
)
5267 int src_bytes
, highest
;
5269 int coding_mask
, eol_type
;
5270 Lisp_Object val
, tmp
;
5273 coding_mask
= detect_coding_mask (src
, src_bytes
, NULL
, &dummy
);
5274 eol_type
= detect_eol_type (src
, src_bytes
, &dummy
);
5275 if (eol_type
== CODING_EOL_INCONSISTENT
)
5276 eol_type
= CODING_EOL_UNDECIDED
;
5281 if (eol_type
!= CODING_EOL_UNDECIDED
)
5284 val2
= Fget (Qundecided
, Qeol_type
);
5286 val
= XVECTOR (val2
)->contents
[eol_type
];
5288 return (highest
? val
: Fcons (val
, Qnil
));
5291 /* At first, gather possible coding systems in VAL. */
5293 for (tmp
= Vcoding_category_list
; CONSP (tmp
); tmp
= XCDR (tmp
))
5295 Lisp_Object category_val
, category_index
;
5297 category_index
= Fget (XCAR (tmp
), Qcoding_category_index
);
5298 category_val
= Fsymbol_value (XCAR (tmp
));
5299 if (!NILP (category_val
)
5300 && NATNUMP (category_index
)
5301 && (coding_mask
& (1 << XFASTINT (category_index
))))
5303 val
= Fcons (category_val
, val
);
5309 val
= Fnreverse (val
);
5311 /* Then, replace the elements with subsidiary coding systems. */
5312 for (tmp
= val
; CONSP (tmp
); tmp
= XCDR (tmp
))
5314 if (eol_type
!= CODING_EOL_UNDECIDED
5315 && eol_type
!= CODING_EOL_INCONSISTENT
)
5318 eol
= Fget (XCAR (tmp
), Qeol_type
);
5320 XCAR (tmp
) = XVECTOR (eol
)->contents
[eol_type
];
5323 return (highest
? XCAR (val
) : val
);
5326 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
5328 "Detect coding system of the text in the region between START and END.\n\
5329 Return a list of possible coding systems ordered by priority.\n\
5331 If only ASCII characters are found, it returns a list of single element\n\
5332 `undecided' or its subsidiary coding system according to a detected\n\
5333 end-of-line format.\n\
5335 If optional argument HIGHEST is non-nil, return the coding system of\n\
5337 (start
, end
, highest
)
5338 Lisp_Object start
, end
, highest
;
5341 int from_byte
, to_byte
;
5343 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5344 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5346 validate_region (&start
, &end
);
5347 from
= XINT (start
), to
= XINT (end
);
5348 from_byte
= CHAR_TO_BYTE (from
);
5349 to_byte
= CHAR_TO_BYTE (to
);
5351 if (from
< GPT
&& to
>= GPT
)
5352 move_gap_both (to
, to_byte
);
5354 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
5355 to_byte
- from_byte
,
5359 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
5361 "Detect coding system of the text in STRING.\n\
5362 Return a list of possible coding systems ordered by priority.\n\
5364 If only ASCII characters are found, it returns a list of single element\n\
5365 `undecided' or its subsidiary coding system according to a detected\n\
5366 end-of-line format.\n\
5368 If optional argument HIGHEST is non-nil, return the coding system of\n\
5371 Lisp_Object string
, highest
;
5373 CHECK_STRING (string
, 0);
5375 return detect_coding_system (XSTRING (string
)->data
,
5376 STRING_BYTES (XSTRING (string
)),
5381 code_convert_region1 (start
, end
, coding_system
, encodep
)
5382 Lisp_Object start
, end
, coding_system
;
5385 struct coding_system coding
;
5388 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5389 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5390 CHECK_SYMBOL (coding_system
, 2);
5392 validate_region (&start
, &end
);
5393 from
= XFASTINT (start
);
5394 to
= XFASTINT (end
);
5396 if (NILP (coding_system
))
5397 return make_number (to
- from
);
5399 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5400 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5402 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5403 coding
.src_multibyte
= coding
.dst_multibyte
5404 = !NILP (current_buffer
->enable_multibyte_characters
);
5405 code_convert_region (from
, CHAR_TO_BYTE (from
), to
, CHAR_TO_BYTE (to
),
5406 &coding
, encodep
, 1);
5407 Vlast_coding_system_used
= coding
.symbol
;
5408 return make_number (coding
.produced_char
);
5411 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
5412 3, 3, "r\nzCoding system: ",
5413 "Decode the current region by specified coding system.\n\
5414 When called from a program, takes three arguments:\n\
5415 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5416 This function sets `last-coding-system-used' to the precise coding system\n\
5417 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5418 not fully specified.)\n\
5419 It returns the length of the decoded text.")
5420 (start
, end
, coding_system
)
5421 Lisp_Object start
, end
, coding_system
;
5423 return code_convert_region1 (start
, end
, coding_system
, 0);
5426 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
5427 3, 3, "r\nzCoding system: ",
5428 "Encode the current region by specified coding system.\n\
5429 When called from a program, takes three arguments:\n\
5430 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5431 This function sets `last-coding-system-used' to the precise coding system\n\
5432 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5433 not fully specified.)\n\
5434 It returns the length of the encoded text.")
5435 (start
, end
, coding_system
)
5436 Lisp_Object start
, end
, coding_system
;
5438 return code_convert_region1 (start
, end
, coding_system
, 1);
5442 code_convert_string1 (string
, coding_system
, nocopy
, encodep
)
5443 Lisp_Object string
, coding_system
, nocopy
;
5446 struct coding_system coding
;
5448 CHECK_STRING (string
, 0);
5449 CHECK_SYMBOL (coding_system
, 1);
5451 if (NILP (coding_system
))
5452 return (NILP (nocopy
) ? Fcopy_sequence (string
) : string
);
5454 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5455 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5457 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5459 ? encode_coding_string (string
, &coding
, !NILP (nocopy
))
5460 : decode_coding_string (string
, &coding
, !NILP (nocopy
)));
5461 Vlast_coding_system_used
= coding
.symbol
;
5466 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
5468 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5469 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5470 if the decoding operation is trivial.\n\
5471 This function sets `last-coding-system-used' to the precise coding system\n\
5472 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5473 not fully specified.)")
5474 (string
, coding_system
, nocopy
)
5475 Lisp_Object string
, coding_system
, nocopy
;
5477 return code_convert_string1 (string
, coding_system
, nocopy
, 0);
5480 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
5482 "Encode STRING to CODING-SYSTEM, and return the result.\n\
5483 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5484 if the encoding operation is trivial.\n\
5485 This function sets `last-coding-system-used' to the precise coding system\n\
5486 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5487 not fully specified.)")
5488 (string
, coding_system
, nocopy
)
5489 Lisp_Object string
, coding_system
, nocopy
;
5491 return code_convert_string1 (string
, coding_system
, nocopy
, 1);
5494 /* Encode or decode STRING according to CODING_SYSTEM.
5495 Do not set Vlast_coding_system_used.
5497 This function is called only from macros DECODE_FILE and
5498 ENCODE_FILE, thus we ignore character composition. */
5501 code_convert_string_norecord (string
, coding_system
, encodep
)
5502 Lisp_Object string
, coding_system
;
5505 struct coding_system coding
;
5507 CHECK_STRING (string
, 0);
5508 CHECK_SYMBOL (coding_system
, 1);
5510 if (NILP (coding_system
))
5513 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5514 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5516 coding
.composing
= COMPOSITION_DISABLED
;
5517 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5519 ? encode_coding_string (string
, &coding
, 1)
5520 : decode_coding_string (string
, &coding
, 1));
5523 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
5524 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5525 Return the corresponding character.")
5529 unsigned char c1
, c2
, s1
, s2
;
5532 CHECK_NUMBER (code
, 0);
5533 s1
= (XFASTINT (code
)) >> 8, s2
= (XFASTINT (code
)) & 0xFF;
5537 XSETFASTINT (val
, s2
);
5538 else if (s2
>= 0xA0 || s2
<= 0xDF)
5539 XSETFASTINT (val
, MAKE_CHAR (charset_katakana_jisx0201
, s2
, 0));
5541 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5545 if ((s1
< 0x80 || s1
> 0x9F && s1
< 0xE0 || s1
> 0xEF)
5546 || (s2
< 0x40 || s2
== 0x7F || s2
> 0xFC))
5547 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5548 DECODE_SJIS (s1
, s2
, c1
, c2
);
5549 XSETFASTINT (val
, MAKE_CHAR (charset_jisx0208
, c1
, c2
));
5554 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
5555 "Encode a Japanese character CHAR to shift_jis encoding.\n\
5556 Return the corresponding code in SJIS.")
5560 int charset
, c1
, c2
, s1
, s2
;
5563 CHECK_NUMBER (ch
, 0);
5564 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5565 if (charset
== CHARSET_ASCII
)
5569 else if (charset
== charset_jisx0208
5570 && c1
> 0x20 && c1
< 0x7F && c2
> 0x20 && c2
< 0x7F)
5572 ENCODE_SJIS (c1
, c2
, s1
, s2
);
5573 XSETFASTINT (val
, (s1
<< 8) | s2
);
5575 else if (charset
== charset_katakana_jisx0201
5576 && c1
> 0x20 && c2
< 0xE0)
5578 XSETFASTINT (val
, c1
| 0x80);
5581 error ("Can't encode to shift_jis: %d", XFASTINT (ch
));
5585 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
5586 "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5587 Return the corresponding character.")
5592 unsigned char b1
, b2
, c1
, c2
;
5595 CHECK_NUMBER (code
, 0);
5596 b1
= (XFASTINT (code
)) >> 8, b2
= (XFASTINT (code
)) & 0xFF;
5600 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5605 if ((b1
< 0xA1 || b1
> 0xFE)
5606 || (b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE))
5607 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5608 DECODE_BIG5 (b1
, b2
, charset
, c1
, c2
);
5609 XSETFASTINT (val
, MAKE_CHAR (charset
, c1
, c2
));
5614 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
5615 "Encode the Big5 character CHAR to BIG5 coding system.\n\
5616 Return the corresponding character code in Big5.")
5620 int charset
, c1
, c2
, b1
, b2
;
5623 CHECK_NUMBER (ch
, 0);
5624 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5625 if (charset
== CHARSET_ASCII
)
5629 else if ((charset
== charset_big5_1
5630 && (XFASTINT (ch
) >= 0x250a1 && XFASTINT (ch
) <= 0x271ec))
5631 || (charset
== charset_big5_2
5632 && XFASTINT (ch
) >= 0x290a1 && XFASTINT (ch
) <= 0x2bdb2))
5634 ENCODE_BIG5 (charset
, c1
, c2
, b1
, b2
);
5635 XSETFASTINT (val
, (b1
<< 8) | b2
);
5638 error ("Can't encode to Big5: %d", XFASTINT (ch
));
5642 DEFUN ("set-terminal-coding-system-internal",
5643 Fset_terminal_coding_system_internal
,
5644 Sset_terminal_coding_system_internal
, 1, 1, 0, "")
5646 Lisp_Object coding_system
;
5648 CHECK_SYMBOL (coding_system
, 0);
5649 setup_coding_system (Fcheck_coding_system (coding_system
), &terminal_coding
);
5650 /* We had better not send unsafe characters to terminal. */
5651 terminal_coding
.flags
|= CODING_FLAG_ISO_SAFE
;
5652 /* Characer composition should be disabled. */
5653 terminal_coding
.composing
= COMPOSITION_DISABLED
;
5654 terminal_coding
.src_multibyte
= 1;
5655 terminal_coding
.dst_multibyte
= 0;
5659 DEFUN ("set-safe-terminal-coding-system-internal",
5660 Fset_safe_terminal_coding_system_internal
,
5661 Sset_safe_terminal_coding_system_internal
, 1, 1, 0, "")
5663 Lisp_Object coding_system
;
5665 CHECK_SYMBOL (coding_system
, 0);
5666 setup_coding_system (Fcheck_coding_system (coding_system
),
5667 &safe_terminal_coding
);
5668 /* Characer composition should be disabled. */
5669 safe_terminal_coding
.composing
= COMPOSITION_DISABLED
;
5670 safe_terminal_coding
.src_multibyte
= 1;
5671 safe_terminal_coding
.dst_multibyte
= 0;
5675 DEFUN ("terminal-coding-system",
5676 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
5677 "Return coding system specified for terminal output.")
5680 return terminal_coding
.symbol
;
5683 DEFUN ("set-keyboard-coding-system-internal",
5684 Fset_keyboard_coding_system_internal
,
5685 Sset_keyboard_coding_system_internal
, 1, 1, 0, "")
5687 Lisp_Object coding_system
;
5689 CHECK_SYMBOL (coding_system
, 0);
5690 setup_coding_system (Fcheck_coding_system (coding_system
), &keyboard_coding
);
5691 /* Characer composition should be disabled. */
5692 keyboard_coding
.composing
= COMPOSITION_DISABLED
;
5696 DEFUN ("keyboard-coding-system",
5697 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
5698 "Return coding system specified for decoding keyboard input.")
5701 return keyboard_coding
.symbol
;
5705 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
5706 Sfind_operation_coding_system
, 1, MANY
, 0,
5707 "Choose a coding system for an operation based on the target name.\n\
5708 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5709 DECODING-SYSTEM is the coding system to use for decoding\n\
5710 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5711 for encoding (in case OPERATION does encoding).\n\
5713 The first argument OPERATION specifies an I/O primitive:\n\
5714 For file I/O, `insert-file-contents' or `write-region'.\n\
5715 For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5716 For network I/O, `open-network-stream'.\n\
5718 The remaining arguments should be the same arguments that were passed\n\
5719 to the primitive. Depending on which primitive, one of those arguments\n\
5720 is selected as the TARGET. For example, if OPERATION does file I/O,\n\
5721 whichever argument specifies the file name is TARGET.\n\
5723 TARGET has a meaning which depends on OPERATION:\n\
5724 For file I/O, TARGET is a file name.\n\
5725 For process I/O, TARGET is a process name.\n\
5726 For network I/O, TARGET is a service name or a port number\n\
5728 This function looks up what specified for TARGET in,\n\
5729 `file-coding-system-alist', `process-coding-system-alist',\n\
5730 or `network-coding-system-alist' depending on OPERATION.\n\
5731 They may specify a coding system, a cons of coding systems,\n\
5732 or a function symbol to call.\n\
5733 In the last case, we call the function with one argument,\n\
5734 which is a list of all the arguments given to this function.")
5739 Lisp_Object operation
, target_idx
, target
, val
;
5740 register Lisp_Object chain
;
5743 error ("Too few arguments");
5744 operation
= args
[0];
5745 if (!SYMBOLP (operation
)
5746 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
5747 error ("Invalid first arguement");
5748 if (nargs
< 1 + XINT (target_idx
))
5749 error ("Too few arguments for operation: %s",
5750 XSYMBOL (operation
)->name
->data
);
5751 target
= args
[XINT (target_idx
) + 1];
5752 if (!(STRINGP (target
)
5753 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
5754 error ("Invalid %dth argument", XINT (target_idx
) + 1);
5756 chain
= ((EQ (operation
, Qinsert_file_contents
)
5757 || EQ (operation
, Qwrite_region
))
5758 ? Vfile_coding_system_alist
5759 : (EQ (operation
, Qopen_network_stream
)
5760 ? Vnetwork_coding_system_alist
5761 : Vprocess_coding_system_alist
));
5765 for (; CONSP (chain
); chain
= XCDR (chain
))
5771 && ((STRINGP (target
)
5772 && STRINGP (XCAR (elt
))
5773 && fast_string_match (XCAR (elt
), target
) >= 0)
5774 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
5777 /* Here, if VAL is both a valid coding system and a valid
5778 function symbol, we return VAL as a coding system. */
5781 if (! SYMBOLP (val
))
5783 if (! NILP (Fcoding_system_p (val
)))
5784 return Fcons (val
, val
);
5785 if (! NILP (Ffboundp (val
)))
5787 val
= call1 (val
, Flist (nargs
, args
));
5790 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
5791 return Fcons (val
, val
);
5799 DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal
,
5800 Supdate_coding_systems_internal
, 0, 0, 0,
5801 "Update internal database for ISO2022 and CCL based coding systems.\n\
5802 When values of any coding categories are changed, you must\n\
5803 call this function")
5808 for (i
= CODING_CATEGORY_IDX_EMACS_MULE
; i
< CODING_CATEGORY_IDX_MAX
; i
++)
5812 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[i
])->value
;
5815 if (! coding_system_table
[i
])
5816 coding_system_table
[i
] = ((struct coding_system
*)
5817 xmalloc (sizeof (struct coding_system
)));
5818 setup_coding_system (val
, coding_system_table
[i
]);
5820 else if (coding_system_table
[i
])
5822 xfree (coding_system_table
[i
]);
5823 coding_system_table
[i
] = NULL
;
5830 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal
,
5831 Sset_coding_priority_internal
, 0, 0, 0,
5832 "Update internal database for the current value of `coding-category-list'.\n\
5833 This function is internal use only.")
5839 val
= Vcoding_category_list
;
5841 while (CONSP (val
) && i
< CODING_CATEGORY_IDX_MAX
)
5843 if (! SYMBOLP (XCAR (val
)))
5845 idx
= XFASTINT (Fget (XCAR (val
), Qcoding_category_index
));
5846 if (idx
>= CODING_CATEGORY_IDX_MAX
)
5848 coding_priorities
[i
++] = (1 << idx
);
5851 /* If coding-category-list is valid and contains all coding
5852 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
5853 the following code saves Emacs from crashing. */
5854 while (i
< CODING_CATEGORY_IDX_MAX
)
5855 coding_priorities
[i
++] = CODING_CATEGORY_MASK_RAW_TEXT
;
5863 /*** 9. Post-amble ***/
5868 conversion_buffer
= (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE
);
5876 /* Emacs' internal format specific initialize routine. */
5877 for (i
= 0; i
<= 0x20; i
++)
5878 emacs_code_class
[i
] = EMACS_control_code
;
5879 emacs_code_class
[0x0A] = EMACS_linefeed_code
;
5880 emacs_code_class
[0x0D] = EMACS_carriage_return_code
;
5881 for (i
= 0x21 ; i
< 0x7F; i
++)
5882 emacs_code_class
[i
] = EMACS_ascii_code
;
5883 emacs_code_class
[0x7F] = EMACS_control_code
;
5884 for (i
= 0x80; i
< 0xFF; i
++)
5885 emacs_code_class
[i
] = EMACS_invalid_code
;
5886 emacs_code_class
[LEADING_CODE_PRIVATE_11
] = EMACS_leading_code_3
;
5887 emacs_code_class
[LEADING_CODE_PRIVATE_12
] = EMACS_leading_code_3
;
5888 emacs_code_class
[LEADING_CODE_PRIVATE_21
] = EMACS_leading_code_4
;
5889 emacs_code_class
[LEADING_CODE_PRIVATE_22
] = EMACS_leading_code_4
;
5891 /* ISO2022 specific initialize routine. */
5892 for (i
= 0; i
< 0x20; i
++)
5893 iso_code_class
[i
] = ISO_control_0
;
5894 for (i
= 0x21; i
< 0x7F; i
++)
5895 iso_code_class
[i
] = ISO_graphic_plane_0
;
5896 for (i
= 0x80; i
< 0xA0; i
++)
5897 iso_code_class
[i
] = ISO_control_1
;
5898 for (i
= 0xA1; i
< 0xFF; i
++)
5899 iso_code_class
[i
] = ISO_graphic_plane_1
;
5900 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
5901 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
5902 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
5903 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
5904 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
5905 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
5906 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
5907 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
5908 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
5909 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
5911 conversion_buffer_size
= MINIMUM_CONVERSION_BUFFER_SIZE
;
5913 setup_coding_system (Qnil
, &keyboard_coding
);
5914 setup_coding_system (Qnil
, &terminal_coding
);
5915 setup_coding_system (Qnil
, &safe_terminal_coding
);
5916 setup_coding_system (Qnil
, &default_buffer_file_coding
);
5918 bzero (coding_system_table
, sizeof coding_system_table
);
5920 bzero (ascii_skip_code
, sizeof ascii_skip_code
);
5921 for (i
= 0; i
< 128; i
++)
5922 ascii_skip_code
[i
] = 1;
5924 #if defined (MSDOS) || defined (WINDOWSNT)
5925 system_eol_type
= CODING_EOL_CRLF
;
5927 system_eol_type
= CODING_EOL_LF
;
5930 inhibit_pre_post_conversion
= 0;
5938 Qtarget_idx
= intern ("target-idx");
5939 staticpro (&Qtarget_idx
);
5941 Qcoding_system_history
= intern ("coding-system-history");
5942 staticpro (&Qcoding_system_history
);
5943 Fset (Qcoding_system_history
, Qnil
);
5945 /* Target FILENAME is the first argument. */
5946 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
5947 /* Target FILENAME is the third argument. */
5948 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
5950 Qcall_process
= intern ("call-process");
5951 staticpro (&Qcall_process
);
5952 /* Target PROGRAM is the first argument. */
5953 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
5955 Qcall_process_region
= intern ("call-process-region");
5956 staticpro (&Qcall_process_region
);
5957 /* Target PROGRAM is the third argument. */
5958 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
5960 Qstart_process
= intern ("start-process");
5961 staticpro (&Qstart_process
);
5962 /* Target PROGRAM is the third argument. */
5963 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
5965 Qopen_network_stream
= intern ("open-network-stream");
5966 staticpro (&Qopen_network_stream
);
5967 /* Target SERVICE is the fourth argument. */
5968 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
5970 Qcoding_system
= intern ("coding-system");
5971 staticpro (&Qcoding_system
);
5973 Qeol_type
= intern ("eol-type");
5974 staticpro (&Qeol_type
);
5976 Qbuffer_file_coding_system
= intern ("buffer-file-coding-system");
5977 staticpro (&Qbuffer_file_coding_system
);
5979 Qpost_read_conversion
= intern ("post-read-conversion");
5980 staticpro (&Qpost_read_conversion
);
5982 Qpre_write_conversion
= intern ("pre-write-conversion");
5983 staticpro (&Qpre_write_conversion
);
5985 Qno_conversion
= intern ("no-conversion");
5986 staticpro (&Qno_conversion
);
5988 Qundecided
= intern ("undecided");
5989 staticpro (&Qundecided
);
5991 Qcoding_system_p
= intern ("coding-system-p");
5992 staticpro (&Qcoding_system_p
);
5994 Qcoding_system_error
= intern ("coding-system-error");
5995 staticpro (&Qcoding_system_error
);
5997 Fput (Qcoding_system_error
, Qerror_conditions
,
5998 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
5999 Fput (Qcoding_system_error
, Qerror_message
,
6000 build_string ("Invalid coding system"));
6002 Qcoding_category
= intern ("coding-category");
6003 staticpro (&Qcoding_category
);
6004 Qcoding_category_index
= intern ("coding-category-index");
6005 staticpro (&Qcoding_category_index
);
6007 Vcoding_category_table
6008 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX
), Qnil
);
6009 staticpro (&Vcoding_category_table
);
6012 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
6014 XVECTOR (Vcoding_category_table
)->contents
[i
]
6015 = intern (coding_category_name
[i
]);
6016 Fput (XVECTOR (Vcoding_category_table
)->contents
[i
],
6017 Qcoding_category_index
, make_number (i
));
6021 Qtranslation_table
= intern ("translation-table");
6022 staticpro (&Qtranslation_table
);
6023 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
6025 Qtranslation_table_id
= intern ("translation-table-id");
6026 staticpro (&Qtranslation_table_id
);
6028 Qtranslation_table_for_decode
= intern ("translation-table-for-decode");
6029 staticpro (&Qtranslation_table_for_decode
);
6031 Qtranslation_table_for_encode
= intern ("translation-table-for-encode");
6032 staticpro (&Qtranslation_table_for_encode
);
6034 Qsafe_charsets
= intern ("safe-charsets");
6035 staticpro (&Qsafe_charsets
);
6037 Qvalid_codes
= intern ("valid-codes");
6038 staticpro (&Qvalid_codes
);
6040 Qemacs_mule
= intern ("emacs-mule");
6041 staticpro (&Qemacs_mule
);
6043 Qraw_text
= intern ("raw-text");
6044 staticpro (&Qraw_text
);
6046 defsubr (&Scoding_system_p
);
6047 defsubr (&Sread_coding_system
);
6048 defsubr (&Sread_non_nil_coding_system
);
6049 defsubr (&Scheck_coding_system
);
6050 defsubr (&Sdetect_coding_region
);
6051 defsubr (&Sdetect_coding_string
);
6052 defsubr (&Sdecode_coding_region
);
6053 defsubr (&Sencode_coding_region
);
6054 defsubr (&Sdecode_coding_string
);
6055 defsubr (&Sencode_coding_string
);
6056 defsubr (&Sdecode_sjis_char
);
6057 defsubr (&Sencode_sjis_char
);
6058 defsubr (&Sdecode_big5_char
);
6059 defsubr (&Sencode_big5_char
);
6060 defsubr (&Sset_terminal_coding_system_internal
);
6061 defsubr (&Sset_safe_terminal_coding_system_internal
);
6062 defsubr (&Sterminal_coding_system
);
6063 defsubr (&Sset_keyboard_coding_system_internal
);
6064 defsubr (&Skeyboard_coding_system
);
6065 defsubr (&Sfind_operation_coding_system
);
6066 defsubr (&Supdate_coding_systems_internal
);
6067 defsubr (&Sset_coding_priority_internal
);
6069 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
6070 "List of coding systems.\n\
6072 Do not alter the value of this variable manually. This variable should be\n\
6073 updated by the functions `make-coding-system' and\n\
6074 `define-coding-system-alias'.");
6075 Vcoding_system_list
= Qnil
;
6077 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
6078 "Alist of coding system names.\n\
6079 Each element is one element list of coding system name.\n\
6080 This variable is given to `completing-read' as TABLE argument.\n\
6082 Do not alter the value of this variable manually. This variable should be\n\
6083 updated by the functions `make-coding-system' and\n\
6084 `define-coding-system-alias'.");
6085 Vcoding_system_alist
= Qnil
;
6087 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
6088 "List of coding-categories (symbols) ordered by priority.");
6092 Vcoding_category_list
= Qnil
;
6093 for (i
= CODING_CATEGORY_IDX_MAX
- 1; i
>= 0; i
--)
6094 Vcoding_category_list
6095 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
6096 Vcoding_category_list
);
6099 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
6100 "Specify the coding system for read operations.\n\
6101 It is useful to bind this variable with `let', but do not set it globally.\n\
6102 If the value is a coding system, it is used for decoding on read operation.\n\
6103 If not, an appropriate element is used from one of the coding system alists:\n\
6104 There are three such tables, `file-coding-system-alist',\n\
6105 `process-coding-system-alist', and `network-coding-system-alist'.");
6106 Vcoding_system_for_read
= Qnil
;
6108 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
6109 "Specify the coding system for write operations.\n\
6110 Programs bind this variable with `let', but you should not set it globally.\n\
6111 If the value is a coding system, it is used for encoding of output,\n\
6112 when writing it to a file and when sending it to a file or subprocess.\n\
6114 If this does not specify a coding system, an appropriate element\n\
6115 is used from one of the coding system alists:\n\
6116 There are three such tables, `file-coding-system-alist',\n\
6117 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6118 For output to files, if the above procedure does not specify a coding system,\n\
6119 the value of `buffer-file-coding-system' is used.");
6120 Vcoding_system_for_write
= Qnil
;
6122 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
6123 "Coding system used in the latest file or process I/O.");
6124 Vlast_coding_system_used
= Qnil
;
6126 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
6127 "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6128 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6130 inhibit_eol_conversion
= 0;
6132 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
6133 "Non-nil means process buffer inherits coding system of process output.\n\
6134 Bind it to t if the process output is to be treated as if it were a file\n\
6135 read from some filesystem.");
6136 inherit_process_coding_system
= 0;
6138 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
6139 "Alist to decide a coding system to use for a file I/O operation.\n\
6140 The format is ((PATTERN . VAL) ...),\n\
6141 where PATTERN is a regular expression matching a file name,\n\
6142 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6143 If VAL is a coding system, it is used for both decoding and encoding\n\
6144 the file contents.\n\
6145 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6146 and the cdr part is used for encoding.\n\
6147 If VAL is a function symbol, the function must return a coding system\n\
6148 or a cons of coding systems which are used as above.\n\
6150 See also the function `find-operation-coding-system'\n\
6151 and the variable `auto-coding-alist'.");
6152 Vfile_coding_system_alist
= Qnil
;
6154 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
6155 "Alist to decide a coding system to use for a process I/O operation.\n\
6156 The format is ((PATTERN . VAL) ...),\n\
6157 where PATTERN is a regular expression matching a program name,\n\
6158 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6159 If VAL is a coding system, it is used for both decoding what received\n\
6160 from the program and encoding what sent to the program.\n\
6161 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6162 and the cdr part is used for encoding.\n\
6163 If VAL is a function symbol, the function must return a coding system\n\
6164 or a cons of coding systems which are used as above.\n\
6166 See also the function `find-operation-coding-system'.");
6167 Vprocess_coding_system_alist
= Qnil
;
6169 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
6170 "Alist to decide a coding system to use for a network I/O operation.\n\
6171 The format is ((PATTERN . VAL) ...),\n\
6172 where PATTERN is a regular expression matching a network service name\n\
6173 or is a port number to connect to,\n\
6174 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6175 If VAL is a coding system, it is used for both decoding what received\n\
6176 from the network stream and encoding what sent to the network stream.\n\
6177 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6178 and the cdr part is used for encoding.\n\
6179 If VAL is a function symbol, the function must return a coding system\n\
6180 or a cons of coding systems which are used as above.\n\
6182 See also the function `find-operation-coding-system'.");
6183 Vnetwork_coding_system_alist
= Qnil
;
6185 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
6186 "Coding system to use with system messages.");
6187 Vlocale_coding_system
= Qnil
;
6189 /* The eol mnemonics are reset in startup.el system-dependently. */
6190 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
6191 "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6192 eol_mnemonic_unix
= build_string (":");
6194 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
6195 "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6196 eol_mnemonic_dos
= build_string ("\\");
6198 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
6199 "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6200 eol_mnemonic_mac
= build_string ("/");
6202 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
6203 "*String displayed in mode line when end-of-line format is not yet determined.");
6204 eol_mnemonic_undecided
= build_string (":");
6206 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
6207 "*Non-nil enables character translation while encoding and decoding.");
6208 Venable_character_translation
= Qt
;
6210 DEFVAR_LISP ("standard-translation-table-for-decode",
6211 &Vstandard_translation_table_for_decode
,
6212 "Table for translating characters while decoding.");
6213 Vstandard_translation_table_for_decode
= Qnil
;
6215 DEFVAR_LISP ("standard-translation-table-for-encode",
6216 &Vstandard_translation_table_for_encode
,
6217 "Table for translationg characters while encoding.");
6218 Vstandard_translation_table_for_encode
= Qnil
;
6220 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist
,
6221 "Alist of charsets vs revision numbers.\n\
6222 While encoding, if a charset (car part of an element) is found,\n\
6223 designate it with the escape sequence identifing revision (cdr part of the element).");
6224 Vcharset_revision_alist
= Qnil
;
6226 DEFVAR_LISP ("default-process-coding-system",
6227 &Vdefault_process_coding_system
,
6228 "Cons of coding systems used for process I/O by default.\n\
6229 The car part is used for decoding a process output,\n\
6230 the cdr part is used for encoding a text to be sent to a process.");
6231 Vdefault_process_coding_system
= Qnil
;
6233 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
6234 "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6235 This is a vector of length 256.\n\
6236 If Nth element is non-nil, the existence of code N in a file\n\
6237 \(or output of subprocess) doesn't prevent it to be detected as\n\
6238 a coding system of ISO 2022 variant which has a flag\n\
6239 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6240 or reading output of a subprocess.\n\
6241 Only 128th through 159th elements has a meaning.");
6242 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
6244 DEFVAR_LISP ("select-safe-coding-system-function",
6245 &Vselect_safe_coding_system_function
,
6246 "Function to call to select safe coding system for encoding a text.\n\
6248 If set, this function is called to force a user to select a proper\n\
6249 coding system which can encode the text in the case that a default\n\
6250 coding system used in each operation can't encode the text.\n\
6252 The default value is `select-safe-coding-system' (which see).");
6253 Vselect_safe_coding_system_function
= Qnil
;
6258 emacs_strerror (error_number
)
6263 synchronize_system_messages_locale ();
6264 str
= strerror (error_number
);
6266 if (! NILP (Vlocale_coding_system
))
6268 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
6269 Vlocale_coding_system
,
6271 str
= (char *) XSTRING (dec
)->data
;