Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
114f9c96 3 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27/*** TABLE OF CONTENTS ***
28
b73bfc1c 29 0. General comments
4ed46869 30 1. Preamble
df7492f9
KH
31 2. Emacs' internal format (emacs-utf-8) handlers
32 3. UTF-8 handlers
33 4. UTF-16 handlers
34 5. Charset-base coding systems handlers
35 6. emacs-mule (old Emacs' internal format) handlers
36 7. ISO2022 handlers
37 8. Shift-JIS and BIG5 handlers
38 9. CCL handlers
39 10. C library functions
40 11. Emacs Lisp library functions
41 12. Postamble
4ed46869
KH
42
43*/
44
df7492f9 45/*** 0. General comments ***
b73bfc1c
KH
46
47
df7492f9 48CODING SYSTEM
4ed46869 49
5bad0796
DL
50 A coding system is an object for an encoding mechanism that contains
51 information about how to convert byte sequences to character
e19c3639
KH
52 sequences and vice versa. When we say "decode", it means converting
53 a byte sequence of a specific coding system into a character
54 sequence that is represented by Emacs' internal coding system
55 `emacs-utf-8', and when we say "encode", it means converting a
56 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 57 coding system.
4ed46869 58
e19c3639
KH
59 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
60 C level, a coding system is represented by a vector of attributes
5bad0796 61 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
62 coding system symbol to attributes vector is done by looking up
63 Vcharset_hash_table by the symbol.
4ed46869 64
e19c3639 65 Coding systems are classified into the following types depending on
5bad0796 66 the encoding mechanism. Here's a brief description of the types.
4ed46869 67
df7492f9
KH
68 o UTF-8
69
70 o UTF-16
71
72 o Charset-base coding system
73
74 A coding system defined by one or more (coded) character sets.
5bad0796 75 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
76 character set.
77
5bad0796 78 o Old Emacs internal format (emacs-mule)
df7492f9 79
5bad0796 80 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 81
df7492f9 82 o ISO2022-base coding system
4ed46869
KH
83
84 The most famous coding system for multiple character sets. X's
df7492f9
KH
85 Compound Text, various EUCs (Extended Unix Code), and coding systems
86 used in the Internet communication such as ISO-2022-JP are all
87 variants of ISO2022.
4ed46869 88
df7492f9 89 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 90
4ed46869
KH
91 A coding system to encode character sets: ASCII, JISX0201, and
92 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 93 section 8.
4ed46869 94
df7492f9 95 o BIG5
4ed46869 96
df7492f9 97 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 98 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
99 described in section 8. In this file, when we write "big5" (all
100 lowercase), we mean the coding system, and when we write "Big5"
101 (capitalized), we mean the character set.
4ed46869 102
df7492f9 103 o CCL
27901516 104
5bad0796 105 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
106 not listed above, he can supply a decoder and an encoder for it in
107 CCL (Code Conversion Language) programs. Emacs executes the CCL
108 program while decoding/encoding.
27901516 109
df7492f9 110 o Raw-text
4ed46869 111
5a936b46 112 A coding system for text containing raw eight-bit data. Emacs
5bad0796 113 treats each byte of source text as a character (except for
df7492f9 114 end-of-line conversion).
4ed46869 115
df7492f9
KH
116 o No-conversion
117
118 Like raw text, but don't do end-of-line conversion.
4ed46869 119
4ed46869 120
df7492f9 121END-OF-LINE FORMAT
4ed46869 122
5bad0796 123 How text end-of-line is encoded depends on operating system. For
df7492f9 124 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 125 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
126 `line-feed' codes. MacOS's format is usually one byte of
127 `carriage-return'.
4ed46869 128
cfb43547 129 Since text character encoding and end-of-line encoding are
df7492f9
KH
130 independent, any coding system described above can take any format
131 of end-of-line (except for no-conversion).
4ed46869 132
e19c3639
KH
133STRUCT CODING_SYSTEM
134
135 Before using a coding system for code conversion (i.e. decoding and
136 encoding), we setup a structure of type `struct coding_system'.
137 This structure keeps various information about a specific code
5bad0796 138 conversion (e.g. the location of source and destination data).
4ed46869
KH
139
140*/
141
df7492f9
KH
142/* COMMON MACROS */
143
144
4ed46869
KH
145/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146
df7492f9 147 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
148 CODING conforms to the format of XXX, and update the members of
149 DETECT_INFO.
df7492f9 150
ff0dacd7 151 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
152
153 Below is the template of these functions. */
154
4ed46869 155#if 0
df7492f9 156static int
ff0dacd7 157detect_coding_XXX (coding, detect_info)
df7492f9 158 struct coding_system *coding;
ff0dacd7 159 struct coding_detection_info *detect_info;
4ed46869 160{
f1d34bca
MB
161 const unsigned char *src = coding->source;
162 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 163 int multibytep = coding->src_multibyte;
ff0dacd7 164 int consumed_chars = 0;
df7492f9
KH
165 int found = 0;
166 ...;
167
168 while (1)
169 {
170 /* Get one byte from the source. If the souce is exausted, jump
171 to no_more_source:. */
172 ONE_MORE_BYTE (c);
ff0dacd7
KH
173
174 if (! __C_conforms_to_XXX___ (c))
175 break;
176 if (! __C_strongly_suggests_XXX__ (c))
177 found = CATEGORY_MASK_XXX;
df7492f9 178 }
ff0dacd7
KH
179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 181 return 0;
ff0dacd7 182
df7492f9 183 no_more_source:
ff0dacd7
KH
184 /* The source exausted successfully. */
185 detect_info->found |= found;
df7492f9 186 return 1;
4ed46869
KH
187}
188#endif
189
190/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191
df7492f9
KH
192 These functions decode a byte sequence specified as a source by
193 CODING. The resulting multibyte text goes to a place pointed to by
194 CODING->charbuf, the length of which should not exceed
195 CODING->charbuf_size;
d46c5b12 196
df7492f9
KH
197 These functions set the information of original and decoded texts in
198 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
199 They also set CODING->result to one of CODING_RESULT_XXX indicating
200 how the decoding is finished.
d46c5b12 201
df7492f9 202 Below is the template of these functions. */
d46c5b12 203
4ed46869 204#if 0
b73bfc1c 205static void
df7492f9 206decode_coding_XXXX (coding)
4ed46869 207 struct coding_system *coding;
4ed46869 208{
f1d34bca
MB
209 const unsigned char *src = coding->source + coding->consumed;
210 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
211 /* SRC_BASE remembers the start position in source in each loop.
212 The loop will be exited when there's not enough source code, or
213 when there's no room in CHARBUF for a decoded character. */
f1d34bca 214 const unsigned char *src_base;
df7492f9 215 /* A buffer to produce decoded characters. */
69a80ea3
KH
216 int *charbuf = coding->charbuf + coding->charbuf_used;
217 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
218 int multibytep = coding->src_multibyte;
219
220 while (1)
221 {
222 src_base = src;
223 if (charbuf < charbuf_end)
224 /* No more room to produce a decoded character. */
225 break;
226 ONE_MORE_BYTE (c);
227 /* Decode it. */
228 }
229
230 no_more_source:
231 if (src_base < src_end
232 && coding->mode & CODING_MODE_LAST_BLOCK)
233 /* If the source ends by partial bytes to construct a character,
234 treat them as eight-bit raw data. */
235 while (src_base < src_end && charbuf < charbuf_end)
236 *charbuf++ = *src_base++;
237 /* Remember how many bytes and characters we consumed. If the
238 source is multibyte, the bytes and chars are not identical. */
239 coding->consumed = coding->consumed_char = src_base - coding->source;
240 /* Remember how many characters we produced. */
241 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
242}
243#endif
244
245/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246
df7492f9
KH
247 These functions encode SRC_BYTES length text at SOURCE of Emacs'
248 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
249 goes to a place pointed to by DESTINATION, the length of which
250 should not exceed DST_BYTES.
d46c5b12 251
df7492f9
KH
252 These functions set the information of original and encoded texts in
253 the members produced, produced_char, consumed, and consumed_char of
254 the structure *CODING. They also set the member result to one of
255 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 256
df7492f9
KH
257 DST_BYTES zero means that source area and destination area are
258 overlapped, which means that we can produce a encoded text until it
259 reaches at the head of not-yet-encoded source text.
d46c5b12 260
df7492f9 261 Below is a template of these functions. */
4ed46869 262#if 0
b73bfc1c 263static void
df7492f9 264encode_coding_XXX (coding)
4ed46869 265 struct coding_system *coding;
4ed46869 266{
df7492f9
KH
267 int multibytep = coding->dst_multibyte;
268 int *charbuf = coding->charbuf;
269 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
270 unsigned char *dst = coding->destination + coding->produced;
271 unsigned char *dst_end = coding->destination + coding->dst_bytes;
272 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
273 int produced_chars = 0;
274
275 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
276 {
277 int c = *charbuf;
278 /* Encode C into DST, and increment DST. */
279 }
280 label_no_more_destination:
281 /* How many chars and bytes we produced. */
282 coding->produced_char += produced_chars;
283 coding->produced = dst - coding->destination;
4ed46869
KH
284}
285#endif
286
4ed46869
KH
287\f
288/*** 1. Preamble ***/
289
68c45bf0 290#include <config.h>
4ed46869 291#include <stdio.h>
d7306fe6 292#include <setjmp.h>
4ed46869 293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
b8299c66
KL
302#include "frame.h"
303#include "termhooks.h"
4ed46869 304
df7492f9 305Lisp_Object Vcoding_system_hash_table;
4ed46869 306
df7492f9 307Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
308Lisp_Object Qunix, Qdos;
309extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
310Lisp_Object Qbuffer_file_coding_system;
311Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 312Lisp_Object Qdefault_char;
27901516 313Lisp_Object Qno_conversion, Qundecided;
df7492f9 314Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 315Lisp_Object Qbig, Qlittle;
bb0115a2 316Lisp_Object Qcoding_system_history;
1397dc18 317Lisp_Object Qvalid_codes;
2133e2d1 318Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
a6f87d34
KH
319Lisp_Object QCdecode_translation_table, QCencode_translation_table;
320Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 321Lisp_Object QCascii_compatible_p;
4ed46869
KH
322
323extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 324Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
325Lisp_Object Qstart_process, Qopen_network_stream;
326Lisp_Object Qtarget_idx;
327
065e3595
KH
328Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
329Lisp_Object Qinterrupted, Qinsufficient_memory;
330
c7183fb8
GM
331extern Lisp_Object Qcompletion_ignore_case;
332
44e8490d
KH
333/* If a symbol has this property, evaluate the value to define the
334 symbol as a coding system. */
335static Lisp_Object Qcoding_system_define_form;
336
5d5bf4d8
KH
337int coding_system_require_warning;
338
d46c5b12
KH
339Lisp_Object Vselect_safe_coding_system_function;
340
7722baf9
EZ
341/* Mnemonic string for each format of end-of-line. */
342Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
343/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 344 decided. */
7722baf9 345Lisp_Object eol_mnemonic_undecided;
4ed46869 346
fcbcfb64
KH
347/* Format of end-of-line decided by system. This is Qunix on
348 Unix and Mac, Qdos on DOS/Windows.
349 This has an effect only for external encoding (i.e. for output to
350 file and process), not for in-buffer or Lisp string encoding. */
351static Lisp_Object system_eol_type;
352
4ed46869
KH
353#ifdef emacs
354
4608c386
KH
355Lisp_Object Vcoding_system_list, Vcoding_system_alist;
356
357Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 358
d46c5b12
KH
359/* Coding system emacs-mule and raw-text are for converting only
360 end-of-line format. */
361Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 362Lisp_Object Qutf_8_emacs;
ecf488bc 363
4ed46869
KH
364/* Coding-systems are handed between Emacs Lisp programs and C internal
365 routines by the following three variables. */
366/* Coding-system for reading files and receiving data from process. */
367Lisp_Object Vcoding_system_for_read;
368/* Coding-system for writing files and sending data to process. */
369Lisp_Object Vcoding_system_for_write;
370/* Coding-system actually used in the latest I/O. */
371Lisp_Object Vlast_coding_system_used;
065e3595
KH
372/* Set to non-nil when an error is detected while code conversion. */
373Lisp_Object Vlast_code_conversion_error;
c4825358 374/* A vector of length 256 which contains information about special
94487c4e 375 Latin codes (especially for dealing with Microsoft codes). */
3f003981 376Lisp_Object Vlatin_extra_code_table;
c4825358 377
9ce27fde
KH
378/* Flag to inhibit code conversion of end-of-line format. */
379int inhibit_eol_conversion;
380
74383408
KH
381/* Flag to inhibit ISO2022 escape sequence detection. */
382int inhibit_iso_escape_detection;
383
97b1b294
EZ
384/* Flag to inhibit detection of binary files through null bytes. */
385int inhibit_null_byte_detection;
386
ed29121d
EZ
387/* Flag to make buffer-file-coding-system inherit from process-coding. */
388int inherit_process_coding_system;
389
c4825358
KH
390/* Coding system to be used to encode text for terminal display when
391 terminal coding system is nil. */
392struct coding_system safe_terminal_coding;
393
02ba4723
KH
394Lisp_Object Vfile_coding_system_alist;
395Lisp_Object Vprocess_coding_system_alist;
396Lisp_Object Vnetwork_coding_system_alist;
4ed46869 397
68c45bf0
PE
398Lisp_Object Vlocale_coding_system;
399
4ed46869
KH
400#endif /* emacs */
401
f967223b
KH
402/* Flag to tell if we look up translation table on character code
403 conversion. */
84fbb8a0 404Lisp_Object Venable_character_translation;
f967223b
KH
405/* Standard translation table to look up on decoding (reading). */
406Lisp_Object Vstandard_translation_table_for_decode;
407/* Standard translation table to look up on encoding (writing). */
408Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 409
f967223b
KH
410Lisp_Object Qtranslation_table;
411Lisp_Object Qtranslation_table_id;
412Lisp_Object Qtranslation_table_for_decode;
413Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
414
415/* Alist of charsets vs revision number. */
df7492f9 416static Lisp_Object Vcharset_revision_table;
4ed46869 417
02ba4723
KH
418/* Default coding systems used for process I/O. */
419Lisp_Object Vdefault_process_coding_system;
420
002fdb44
DL
421/* Char table for translating Quail and self-inserting input. */
422Lisp_Object Vtranslation_table_for_input;
423
df7492f9
KH
424/* Two special coding systems. */
425Lisp_Object Vsjis_coding_system;
426Lisp_Object Vbig5_coding_system;
427
df7492f9
KH
428/* ISO2022 section */
429
430#define CODING_ISO_INITIAL(coding, reg) \
431 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
432 coding_attr_iso_initial), \
433 reg)))
434
435
1b3b981b
AS
436#define CODING_ISO_REQUEST(coding, charset_id) \
437 (((charset_id) <= (coding)->max_charset_id \
438 ? ((coding)->safe_charsets[charset_id] != 255 \
439 ? (coding)->safe_charsets[charset_id] \
440 : -1) \
df7492f9
KH
441 : -1))
442
443
444#define CODING_ISO_FLAGS(coding) \
445 ((coding)->spec.iso_2022.flags)
446#define CODING_ISO_DESIGNATION(coding, reg) \
447 ((coding)->spec.iso_2022.current_designation[reg])
448#define CODING_ISO_INVOCATION(coding, plane) \
449 ((coding)->spec.iso_2022.current_invocation[plane])
450#define CODING_ISO_SINGLE_SHIFTING(coding) \
451 ((coding)->spec.iso_2022.single_shifting)
452#define CODING_ISO_BOL(coding) \
453 ((coding)->spec.iso_2022.bol)
454#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
455 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
e951386e
KH
456#define CODING_ISO_CMP_STATUS(coding) \
457 (&(coding)->spec.iso_2022.cmp_status)
458#define CODING_ISO_EXTSEGMENT_LEN(coding) \
459 ((coding)->spec.iso_2022.ctext_extended_segment_len)
460#define CODING_ISO_EMBEDDED_UTF_8(coding) \
461 ((coding)->spec.iso_2022.embedded_utf_8)
df7492f9
KH
462
463/* Control characters of ISO2022. */
464 /* code */ /* function */
465#define ISO_CODE_LF 0x0A /* line-feed */
466#define ISO_CODE_CR 0x0D /* carriage-return */
467#define ISO_CODE_SO 0x0E /* shift-out */
468#define ISO_CODE_SI 0x0F /* shift-in */
469#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
470#define ISO_CODE_ESC 0x1B /* escape */
471#define ISO_CODE_SS2 0x8E /* single-shift-2 */
472#define ISO_CODE_SS3 0x8F /* single-shift-3 */
473#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
474
475/* All code (1-byte) of ISO2022 is classified into one of the
476 followings. */
477enum iso_code_class_type
478 {
479 ISO_control_0, /* Control codes in the range
480 0x00..0x1F and 0x7F, except for the
481 following 5 codes. */
df7492f9
KH
482 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
483 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
484 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
485 ISO_escape, /* ISO_CODE_SO (0x1B) */
486 ISO_control_1, /* Control codes in the range
487 0x80..0x9F, except for the
488 following 3 codes. */
489 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
490 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
491 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
492 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
493 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
494 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
495 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
496 };
05e6f5dc 497
df7492f9
KH
498/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
499 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 500
df7492f9
KH
501/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
502 instead of the correct short-form sequence (e.g. ESC $ A). */
503#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 504
df7492f9
KH
505/* If set, reset graphic planes and registers at end-of-line to the
506 initial state. */
507#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 508
df7492f9
KH
509/* If set, reset graphic planes and registers before any control
510 characters to the initial state. */
511#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 512
df7492f9
KH
513/* If set, encode by 7-bit environment. */
514#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 515
df7492f9
KH
516/* If set, use locking-shift function. */
517#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 518
df7492f9
KH
519/* If set, use single-shift function. Overwrite
520 CODING_ISO_FLAG_LOCKING_SHIFT. */
521#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 522
df7492f9
KH
523/* If set, use designation escape sequence. */
524#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 525
df7492f9
KH
526/* If set, produce revision number sequence. */
527#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 528
df7492f9
KH
529/* If set, produce ISO6429's direction specifying sequence. */
530#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 531
df7492f9
KH
532/* If set, assume designation states are reset at beginning of line on
533 output. */
534#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 535
df7492f9
KH
536/* If set, designation sequence should be placed at beginning of line
537 on output. */
538#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 539
df7492f9
KH
540/* If set, do not encode unsafe charactes on output. */
541#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 542
df7492f9
KH
543/* If set, extra latin codes (128..159) are accepted as a valid code
544 on input. */
545#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 546
df7492f9 547#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 548
df7492f9 549#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 550
bf16eb23 551#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 552
bf16eb23 553#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 554
bf16eb23 555#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 556
df7492f9
KH
557/* A character to be produced on output if encoding of the original
558 character is prohibited by CODING_ISO_FLAG_SAFE. */
559#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 560
a470d443
KH
561/* UTF-8 section */
562#define CODING_UTF_8_BOM(coding) \
563 ((coding)->spec.utf_8_bom)
4ed46869 564
df7492f9
KH
565/* UTF-16 section */
566#define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
4ed46869 568
df7492f9
KH
569#define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
4ed46869 571
df7492f9
KH
572#define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
4ed46869 574
4ed46869 575
df7492f9
KH
576/* CCL section */
577#define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579#define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581#define CODING_CCL_VALIDS(coding) \
8f924df7 582 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 583
5a936b46 584/* Index for each coding category in `coding_categories' */
4ed46869 585
df7492f9
KH
586enum coding_category
587 {
588 coding_category_iso_7,
589 coding_category_iso_7_tight,
590 coding_category_iso_8_1,
591 coding_category_iso_8_2,
592 coding_category_iso_7_else,
593 coding_category_iso_8_else,
a470d443
KH
594 coding_category_utf_8_auto,
595 coding_category_utf_8_nosig,
596 coding_category_utf_8_sig,
df7492f9
KH
597 coding_category_utf_16_auto,
598 coding_category_utf_16_be,
599 coding_category_utf_16_le,
600 coding_category_utf_16_be_nosig,
601 coding_category_utf_16_le_nosig,
602 coding_category_charset,
603 coding_category_sjis,
604 coding_category_big5,
605 coding_category_ccl,
606 coding_category_emacs_mule,
607 /* All above are targets of code detection. */
608 coding_category_raw_text,
609 coding_category_undecided,
610 coding_category_max
611 };
612
613/* Definitions of flag bits used in detect_coding_XXXX. */
614#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
615#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
616#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
617#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
618#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
619#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
620#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
621#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
622#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 623#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
624#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 633#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
644 | CATEGORY_MASK_UTF_8_AUTO \
645 | CATEGORY_MASK_UTF_8_NOSIG \
646 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 647 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
648 | CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG \
652 | CATEGORY_MASK_CHARSET \
653 | CATEGORY_MASK_SJIS \
654 | CATEGORY_MASK_BIG5 \
655 | CATEGORY_MASK_CCL \
656 | CATEGORY_MASK_EMACS_MULE)
657
658
659#define CATEGORY_MASK_ISO_7BIT \
660 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
661
662#define CATEGORY_MASK_ISO_8BIT \
663 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
664
665#define CATEGORY_MASK_ISO_ELSE \
666 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
667
668#define CATEGORY_MASK_ISO_ESCAPE \
669 (CATEGORY_MASK_ISO_7 \
670 | CATEGORY_MASK_ISO_7_TIGHT \
671 | CATEGORY_MASK_ISO_7_ELSE \
672 | CATEGORY_MASK_ISO_8_ELSE)
673
674#define CATEGORY_MASK_ISO \
675 ( CATEGORY_MASK_ISO_7BIT \
676 | CATEGORY_MASK_ISO_8BIT \
677 | CATEGORY_MASK_ISO_ELSE)
678
679#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
680 (CATEGORY_MASK_UTF_16_AUTO \
681 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
682 | CATEGORY_MASK_UTF_16_LE \
683 | CATEGORY_MASK_UTF_16_BE_NOSIG \
684 | CATEGORY_MASK_UTF_16_LE_NOSIG)
685
a470d443
KH
686#define CATEGORY_MASK_UTF_8 \
687 (CATEGORY_MASK_UTF_8_AUTO \
688 | CATEGORY_MASK_UTF_8_NOSIG \
689 | CATEGORY_MASK_UTF_8_SIG)
df7492f9
KH
690
691/* List of symbols `coding-category-xxx' ordered by priority. This
692 variable is exposed to Emacs Lisp. */
693static Lisp_Object Vcoding_category_list;
694
695/* Table of coding categories (Lisp symbols). This variable is for
696 internal use oly. */
697static Lisp_Object Vcoding_category_table;
698
699/* Table of coding-categories ordered by priority. */
700static enum coding_category coding_priorities[coding_category_max];
701
702/* Nth element is a coding context for the coding system bound to the
703 Nth coding category. */
704static struct coding_system coding_categories[coding_category_max];
705
df7492f9
KH
706/*** Commonly used macros and functions ***/
707
708#ifndef min
709#define min(a, b) ((a) < (b) ? (a) : (b))
710#endif
711#ifndef max
712#define max(a, b) ((a) > (b) ? (a) : (b))
713#endif
4ed46869 714
24a73b0a
KH
715#define CODING_GET_INFO(coding, attrs, charset_list) \
716 do { \
717 (attrs) = CODING_ID_ATTRS ((coding)->id); \
718 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 719 } while (0)
4ed46869 720
4ed46869 721
df7492f9
KH
722/* Safely get one byte from the source text pointed by SRC which ends
723 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
724 in the source, it jumps to `no_more_source'. If multibytep is
725 nonzero, and a multibyte character is found at SRC, set C to the
726 negative value of the character code. The caller should declare
727 and set these variables appropriately in advance:
728 src, src_end, multibytep */
aa72b389 729
065e3595
KH
730#define ONE_MORE_BYTE(c) \
731 do { \
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 record_conversion_result \
736 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
737 goto no_more_source; \
738 } \
739 c = *src++; \
740 if (multibytep && (c & 0x80)) \
741 { \
742 if ((c & 0xFE) == 0xC0) \
743 c = ((c & 1) << 6) | *src++; \
744 else \
745 { \
35befdaa
KH
746 src--; \
747 c = - string_char (src, &src, NULL); \
065e3595
KH
748 record_conversion_result \
749 (coding, CODING_RESULT_INVALID_SRC); \
750 } \
751 } \
752 consumed_chars++; \
aa72b389
KH
753 } while (0)
754
f56a4450 755/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
756 at SRC_END, and set C1 and C2 to those bytes while skipping the
757 heading multibyte characters. If there are not enough bytes in the
758 source, it jumps to `no_more_source'. If multibytep is nonzero and
759 a multibyte character is found for C2, set C2 to the negative value
760 of the character code. The caller should declare and set these
761 variables appropriately in advance:
f56a4450
KH
762 src, src_end, multibytep
763 It is intended that this macro is used in detect_coding_utf_16. */
764
220eeac9
KH
765#define TWO_MORE_BYTES(c1, c2) \
766 do { \
767 do { \
768 if (src == src_end) \
769 goto no_more_source; \
770 c1 = *src++; \
771 if (multibytep && (c1 & 0x80)) \
772 { \
773 if ((c1 & 0xFE) == 0xC0) \
774 c1 = ((c1 & 1) << 6) | *src++; \
775 else \
776 { \
777 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
778 c1 = -1; \
779 } \
780 } \
781 } while (c1 < 0); \
782 if (src == src_end) \
783 goto no_more_source; \
784 c2 = *src++; \
785 if (multibytep && (c2 & 0x80)) \
786 { \
787 if ((c2 & 0xFE) == 0xC0) \
788 c2 = ((c2 & 1) << 6) | *src++; \
789 else \
790 c2 = -1; \
791 } \
f56a4450
KH
792 } while (0)
793
aa72b389 794
065e3595
KH
795#define ONE_MORE_BYTE_NO_CHECK(c) \
796 do { \
797 c = *src++; \
798 if (multibytep && (c & 0x80)) \
799 { \
800 if ((c & 0xFE) == 0xC0) \
801 c = ((c & 1) << 6) | *src++; \
802 else \
803 { \
35befdaa
KH
804 src--; \
805 c = - string_char (src, &src, NULL); \
065e3595
KH
806 record_conversion_result \
807 (coding, CODING_RESULT_INVALID_SRC); \
808 } \
809 } \
810 consumed_chars++; \
aa72b389
KH
811 } while (0)
812
aa72b389 813
df7492f9
KH
814/* Store a byte C in the place pointed by DST and increment DST to the
815 next free point, and increment PRODUCED_CHARS. The caller should
816 assure that C is 0..127, and declare and set the variable `dst'
817 appropriately in advance.
818*/
aa72b389
KH
819
820
df7492f9
KH
821#define EMIT_ONE_ASCII_BYTE(c) \
822 do { \
823 produced_chars++; \
824 *dst++ = (c); \
b6871cc7 825 } while (0)
aa72b389
KH
826
827
df7492f9 828/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 829
df7492f9
KH
830#define EMIT_TWO_ASCII_BYTES(c1, c2) \
831 do { \
832 produced_chars += 2; \
833 *dst++ = (c1), *dst++ = (c2); \
834 } while (0)
aa72b389
KH
835
836
df7492f9
KH
837/* Store a byte C in the place pointed by DST and increment DST to the
838 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
839 nonzero, store in an appropriate multibyte from. The caller should
840 declare and set the variables `dst' and `multibytep' appropriately
841 in advance. */
842
843#define EMIT_ONE_BYTE(c) \
844 do { \
845 produced_chars++; \
846 if (multibytep) \
847 { \
848 int ch = (c); \
849 if (ch >= 0x80) \
850 ch = BYTE8_TO_CHAR (ch); \
851 CHAR_STRING_ADVANCE (ch, dst); \
852 } \
853 else \
854 *dst++ = (c); \
aa72b389 855 } while (0)
aa72b389 856
aa72b389 857
df7492f9 858/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 859
e19c3639
KH
860#define EMIT_TWO_BYTES(c1, c2) \
861 do { \
862 produced_chars += 2; \
863 if (multibytep) \
864 { \
865 int ch; \
866 \
867 ch = (c1); \
868 if (ch >= 0x80) \
869 ch = BYTE8_TO_CHAR (ch); \
870 CHAR_STRING_ADVANCE (ch, dst); \
871 ch = (c2); \
872 if (ch >= 0x80) \
873 ch = BYTE8_TO_CHAR (ch); \
874 CHAR_STRING_ADVANCE (ch, dst); \
875 } \
876 else \
877 { \
878 *dst++ = (c1); \
879 *dst++ = (c2); \
880 } \
aa72b389
KH
881 } while (0)
882
883
df7492f9
KH
884#define EMIT_THREE_BYTES(c1, c2, c3) \
885 do { \
886 EMIT_ONE_BYTE (c1); \
887 EMIT_TWO_BYTES (c2, c3); \
888 } while (0)
aa72b389 889
aa72b389 890
df7492f9
KH
891#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
892 do { \
893 EMIT_TWO_BYTES (c1, c2); \
894 EMIT_TWO_BYTES (c3, c4); \
895 } while (0)
aa72b389 896
aa72b389 897
f6cbaf43 898/* Prototypes for static functions. */
f57e2426
J
899static void record_conversion_result (struct coding_system *coding,
900 enum coding_result_code result);
901static int detect_coding_utf_8 (struct coding_system *,
902 struct coding_detection_info *info);
903static void decode_coding_utf_8 (struct coding_system *);
904static int encode_coding_utf_8 (struct coding_system *);
905
906static int detect_coding_utf_16 (struct coding_system *,
907 struct coding_detection_info *info);
908static void decode_coding_utf_16 (struct coding_system *);
909static int encode_coding_utf_16 (struct coding_system *);
910
911static int detect_coding_iso_2022 (struct coding_system *,
912 struct coding_detection_info *info);
913static void decode_coding_iso_2022 (struct coding_system *);
914static int encode_coding_iso_2022 (struct coding_system *);
915
916static int detect_coding_emacs_mule (struct coding_system *,
917 struct coding_detection_info *info);
918static void decode_coding_emacs_mule (struct coding_system *);
919static int encode_coding_emacs_mule (struct coding_system *);
920
921static int detect_coding_sjis (struct coding_system *,
922 struct coding_detection_info *info);
923static void decode_coding_sjis (struct coding_system *);
924static int encode_coding_sjis (struct coding_system *);
925
926static int detect_coding_big5 (struct coding_system *,
927 struct coding_detection_info *info);
928static void decode_coding_big5 (struct coding_system *);
929static int encode_coding_big5 (struct coding_system *);
930
931static int detect_coding_ccl (struct coding_system *,
932 struct coding_detection_info *info);
933static void decode_coding_ccl (struct coding_system *);
934static int encode_coding_ccl (struct coding_system *);
935
936static void decode_coding_raw_text (struct coding_system *);
937static int encode_coding_raw_text (struct coding_system *);
938
939static void coding_set_source (struct coding_system *);
940static void coding_set_destination (struct coding_system *);
941static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
942static void coding_alloc_by_making_gap (struct coding_system *,
943 EMACS_INT, EMACS_INT);
944static unsigned char *alloc_destination (struct coding_system *,
945 EMACS_INT, unsigned char *);
946static void setup_iso_safe_charsets (Lisp_Object);
947static unsigned char *encode_designation_at_bol (struct coding_system *,
948 int *, int *,
949 unsigned char *);
950static int detect_eol (const unsigned char *,
951 EMACS_INT, enum coding_category);
952static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
953static void decode_eol (struct coding_system *);
954static Lisp_Object get_translation_table (Lisp_Object, int, int *);
955static Lisp_Object get_translation (Lisp_Object, int *, int *);
956static int produce_chars (struct coding_system *, Lisp_Object, int);
957static INLINE void produce_charset (struct coding_system *, int *,
958 EMACS_INT);
959static void produce_annotation (struct coding_system *, EMACS_INT);
960static int decode_coding (struct coding_system *);
961static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
962 struct coding_system *,
963 int *, EMACS_INT *);
964static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
965 struct coding_system *,
966 int *, EMACS_INT *);
967static void consume_chars (struct coding_system *, Lisp_Object, int);
968static int encode_coding (struct coding_system *);
969static Lisp_Object make_conversion_work_buffer (int);
970static Lisp_Object code_conversion_restore (Lisp_Object);
971static INLINE int char_encodable_p (int, Lisp_Object);
972static Lisp_Object make_subsidiaries (Lisp_Object);
f6cbaf43 973
065e3595
KH
974static void
975record_conversion_result (struct coding_system *coding,
976 enum coding_result_code result)
977{
978 coding->result = result;
979 switch (result)
980 {
981 case CODING_RESULT_INSUFFICIENT_SRC:
982 Vlast_code_conversion_error = Qinsufficient_source;
983 break;
984 case CODING_RESULT_INCONSISTENT_EOL:
985 Vlast_code_conversion_error = Qinconsistent_eol;
986 break;
987 case CODING_RESULT_INVALID_SRC:
988 Vlast_code_conversion_error = Qinvalid_source;
989 break;
990 case CODING_RESULT_INTERRUPT:
991 Vlast_code_conversion_error = Qinterrupted;
992 break;
993 case CODING_RESULT_INSUFFICIENT_MEM:
994 Vlast_code_conversion_error = Qinsufficient_memory;
995 break;
ebaf11b6
KH
996 case CODING_RESULT_INSUFFICIENT_DST:
997 /* Don't record this error in Vlast_code_conversion_error
998 because it happens just temporarily and is resolved when the
999 whole conversion is finished. */
1000 break;
409ea3a1
AS
1001 case CODING_RESULT_SUCCESS:
1002 break;
35befdaa
KH
1003 default:
1004 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
1005 }
1006}
1007
75f80e63
EZ
1008/* This wrapper macro is used to preserve validity of pointers into
1009 buffer text across calls to decode_char, which could cause
1010 relocation of buffers if it loads a charset map, because loading a
1011 charset map allocates large structures. */
df7492f9
KH
1012#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013 do { \
1014 charset_map_loaded = 0; \
1015 c = DECODE_CHAR (charset, code); \
1016 if (charset_map_loaded) \
1017 { \
8f924df7 1018 const unsigned char *orig = coding->source; \
df7492f9
KH
1019 EMACS_INT offset; \
1020 \
1021 coding_set_source (coding); \
1022 offset = coding->source - orig; \
1023 src += offset; \
1024 src_base += offset; \
1025 src_end += offset; \
1026 } \
aa72b389
KH
1027 } while (0)
1028
1029
119852e7
KH
1030/* If there are at least BYTES length of room at dst, allocate memory
1031 for coding->destination and update dst and dst_end. We don't have
1032 to take care of coding->source which will be relocated. It is
1033 handled by calling coding_set_source in encode_coding. */
1034
df7492f9
KH
1035#define ASSURE_DESTINATION(bytes) \
1036 do { \
1037 if (dst + (bytes) >= dst_end) \
1038 { \
1039 int more_bytes = charbuf_end - charbuf + (bytes); \
1040 \
1041 dst = alloc_destination (coding, more_bytes, dst); \
1042 dst_end = coding->destination + coding->dst_bytes; \
1043 } \
1044 } while (0)
aa72b389 1045
aa72b389 1046
db274c7a
KH
1047/* Store multibyte form of the character C in P, and advance P to the
1048 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
1049 never calls MAYBE_UNIFY_CHAR. */
1050
1051#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1052 do { \
1053 if ((c) <= MAX_1_BYTE_CHAR) \
1054 *(p)++ = (c); \
1055 else if ((c) <= MAX_2_BYTE_CHAR) \
1056 *(p)++ = (0xC0 | ((c) >> 6)), \
1057 *(p)++ = (0x80 | ((c) & 0x3F)); \
1058 else if ((c) <= MAX_3_BYTE_CHAR) \
1059 *(p)++ = (0xE0 | ((c) >> 12)), \
1060 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1061 *(p)++ = (0x80 | ((c) & 0x3F)); \
1062 else if ((c) <= MAX_4_BYTE_CHAR) \
1063 *(p)++ = (0xF0 | (c >> 18)), \
1064 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1065 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1066 *(p)++ = (0x80 | (c & 0x3F)); \
1067 else if ((c) <= MAX_5_BYTE_CHAR) \
1068 *(p)++ = 0xF8, \
1069 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1070 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1071 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1072 *(p)++ = (0x80 | (c & 0x3F)); \
1073 else \
1074 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1075 } while (0)
1076
1077
1078/* Return the character code of character whose multibyte form is at
1079 P, and advance P to the end of the multibyte form. This is like
1080 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1081
1082#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1083 (!((p)[0] & 0x80) \
1084 ? *(p)++ \
1085 : ! ((p)[0] & 0x20) \
1086 ? ((p) += 2, \
1087 ((((p)[-2] & 0x1F) << 6) \
1088 | ((p)[-1] & 0x3F) \
1089 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1090 : ! ((p)[0] & 0x10) \
1091 ? ((p) += 3, \
1092 ((((p)[-3] & 0x0F) << 12) \
1093 | (((p)[-2] & 0x3F) << 6) \
1094 | ((p)[-1] & 0x3F))) \
1095 : ! ((p)[0] & 0x08) \
1096 ? ((p) += 4, \
1097 ((((p)[-4] & 0xF) << 18) \
1098 | (((p)[-3] & 0x3F) << 12) \
1099 | (((p)[-2] & 0x3F) << 6) \
1100 | ((p)[-1] & 0x3F))) \
1101 : ((p) += 5, \
1102 ((((p)[-4] & 0x3F) << 18) \
1103 | (((p)[-3] & 0x3F) << 12) \
1104 | (((p)[-2] & 0x3F) << 6) \
1105 | ((p)[-1] & 0x3F))))
1106
aa72b389 1107
df7492f9 1108static void
971de7fb 1109coding_set_source (struct coding_system *coding)
aa72b389 1110{
df7492f9
KH
1111 if (BUFFERP (coding->src_object))
1112 {
2cb26057 1113 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1114
df7492f9 1115 if (coding->src_pos < 0)
2cb26057 1116 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1117 else
2cb26057 1118 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1119 }
df7492f9 1120 else if (STRINGP (coding->src_object))
aa72b389 1121 {
8f924df7 1122 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1123 }
df7492f9
KH
1124 else
1125 /* Otherwise, the source is C string and is never relocated
1126 automatically. Thus we don't have to update anything. */
1127 ;
1128}
aa72b389 1129
df7492f9 1130static void
971de7fb 1131coding_set_destination (struct coding_system *coding)
df7492f9
KH
1132{
1133 if (BUFFERP (coding->dst_object))
aa72b389 1134 {
df7492f9 1135 if (coding->src_pos < 0)
aa72b389 1136 {
13818c30 1137 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1138 coding->dst_bytes = (GAP_END_ADDR
1139 - (coding->src_bytes - coding->consumed)
1140 - coding->destination);
aa72b389 1141 }
df7492f9 1142 else
28f67a95
KH
1143 {
1144 /* We are sure that coding->dst_pos_byte is before the gap
1145 of the buffer. */
1146 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1147 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1148 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1149 - coding->destination);
1150 }
df7492f9
KH
1151 }
1152 else
1153 /* Otherwise, the destination is C string and is never relocated
1154 automatically. Thus we don't have to update anything. */
1155 ;
1156}
1157
1158
1159static void
971de7fb 1160coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
df7492f9
KH
1161{
1162 coding->destination = (unsigned char *) xrealloc (coding->destination,
1163 coding->dst_bytes + bytes);
1164 coding->dst_bytes += bytes;
1165}
1166
1167static void
971de7fb 1168coding_alloc_by_making_gap (struct coding_system *coding, EMACS_INT gap_head_used, EMACS_INT bytes)
df7492f9 1169{
db274c7a 1170 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1171 {
db274c7a
KH
1172 /* The gap may contain the produced data at the head and not-yet
1173 consumed data at the tail. To preserve those data, we at
1174 first make the gap size to zero, then increase the gap
1175 size. */
1176 EMACS_INT add = GAP_SIZE;
1177
1178 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1179 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1180 make_gap (bytes);
1181 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1182 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1183 }
730fff51 1184 else
df7492f9 1185 {
2c78b7e1
KH
1186 Lisp_Object this_buffer;
1187
1188 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1189 set_buffer_internal (XBUFFER (coding->dst_object));
1190 make_gap (bytes);
1191 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1192 }
df7492f9 1193}
8f924df7 1194
df7492f9
KH
1195
1196static unsigned char *
971de7fb 1197alloc_destination (struct coding_system *coding, EMACS_INT nbytes, unsigned char *dst)
df7492f9
KH
1198{
1199 EMACS_INT offset = dst - coding->destination;
1200
1201 if (BUFFERP (coding->dst_object))
db274c7a
KH
1202 {
1203 struct buffer *buf = XBUFFER (coding->dst_object);
1204
1205 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1206 }
aa72b389 1207 else
df7492f9 1208 coding_alloc_by_realloc (coding, nbytes);
df7492f9
KH
1209 coding_set_destination (coding);
1210 dst = coding->destination + offset;
1211 return dst;
1212}
aa72b389 1213
ff0dacd7
KH
1214/** Macros for annotations. */
1215
ff0dacd7
KH
1216/* An annotation data is stored in the array coding->charbuf in this
1217 format:
69a80ea3 1218 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1219 LENGTH is the number of elements in the annotation.
1220 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1221 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1222
1223 The format of the following elements depend on ANNOTATION_MASK.
1224
1225 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1226 follows:
e951386e
KH
1227 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1228
1229 NBYTES is the number of bytes specified in the header part of
1230 old-style emacs-mule encoding, or 0 for the other kind of
1231 composition.
1232
ff0dacd7 1233 METHOD is one of enum composition_method.
e951386e 1234
ff0dacd7
KH
1235 Optionnal COMPOSITION-COMPONENTS are characters and composition
1236 rules.
1237
1238 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
e951386e
KH
1239 follows.
1240
1241 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1242 recover from an invalid annotation, and should be skipped by
1243 produce_annotation. */
1244
1245/* Maximum length of the header of annotation data. */
1246#define MAX_ANNOTATION_LENGTH 5
ff0dacd7 1247
69a80ea3 1248#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1249 do { \
1250 *(buf)++ = -(len); \
1251 *(buf)++ = (mask); \
69a80ea3 1252 *(buf)++ = (nchars); \
ff0dacd7
KH
1253 coding->annotated = 1; \
1254 } while (0);
1255
e951386e 1256#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
69a80ea3 1257 do { \
e951386e
KH
1258 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1259 *buf++ = nbytes; \
69a80ea3 1260 *buf++ = method; \
ff0dacd7
KH
1261 } while (0)
1262
1263
69a80ea3
KH
1264#define ADD_CHARSET_DATA(buf, nchars, id) \
1265 do { \
1266 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1267 *buf++ = id; \
ff0dacd7
KH
1268 } while (0)
1269
df7492f9
KH
1270\f
1271/*** 2. Emacs' internal format (emacs-utf-8) ***/
1272
1273
1274
1275\f
1276/*** 3. UTF-8 ***/
1277
1278/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1279 Check if a text is encoded in UTF-8. If it is, return 1, else
1280 return 0. */
df7492f9
KH
1281
1282#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1283#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1284#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1285#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1286#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1287#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1288
a470d443
KH
1289#define UTF_BOM 0xFEFF
1290#define UTF_8_BOM_1 0xEF
1291#define UTF_8_BOM_2 0xBB
1292#define UTF_8_BOM_3 0xBF
1293
df7492f9 1294static int
971de7fb 1295detect_coding_utf_8 (struct coding_system *coding, struct coding_detection_info *detect_info)
df7492f9 1296{
065e3595 1297 const unsigned char *src = coding->source, *src_base;
8f924df7 1298 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1299 int multibytep = coding->src_multibyte;
1300 int consumed_chars = 0;
a470d443 1301 int bom_found = 0;
df7492f9
KH
1302 int found = 0;
1303
ff0dacd7 1304 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1305 /* A coding system of this category is always ASCII compatible. */
1306 src += coding->head_ascii;
1307
1308 while (1)
aa72b389 1309 {
df7492f9 1310 int c, c1, c2, c3, c4;
aa72b389 1311
065e3595 1312 src_base = src;
df7492f9 1313 ONE_MORE_BYTE (c);
065e3595 1314 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1315 continue;
1316 ONE_MORE_BYTE (c1);
065e3595 1317 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1318 break;
1319 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1320 {
a470d443 1321 found = 1;
df7492f9 1322 continue;
aa72b389 1323 }
df7492f9 1324 ONE_MORE_BYTE (c2);
065e3595 1325 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1326 break;
1327 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1328 {
a470d443
KH
1329 found = 1;
1330 if (src_base == coding->source
1331 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1332 bom_found = 1;
df7492f9 1333 continue;
aa72b389 1334 }
df7492f9 1335 ONE_MORE_BYTE (c3);
065e3595 1336 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1337 break;
1338 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1339 {
a470d443 1340 found = 1;
df7492f9
KH
1341 continue;
1342 }
1343 ONE_MORE_BYTE (c4);
065e3595 1344 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1345 break;
1346 if (UTF_8_5_OCTET_LEADING_P (c))
1347 {
a470d443 1348 found = 1;
df7492f9
KH
1349 continue;
1350 }
1351 break;
aa72b389 1352 }
ff0dacd7 1353 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1354 return 0;
aa72b389 1355
df7492f9 1356 no_more_source:
065e3595 1357 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1358 {
ff0dacd7 1359 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1360 return 0;
aa72b389 1361 }
a470d443
KH
1362 if (bom_found)
1363 {
1364 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1365 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1366 }
1367 else
1368 {
1369 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1370 if (found)
1371 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1372 }
ff0dacd7 1373 return 1;
aa72b389
KH
1374}
1375
4ed46869 1376
b73bfc1c 1377static void
971de7fb 1378decode_coding_utf_8 (struct coding_system *coding)
b73bfc1c 1379{
8f924df7
KH
1380 const unsigned char *src = coding->source + coding->consumed;
1381 const unsigned char *src_end = coding->source + coding->src_bytes;
1382 const unsigned char *src_base;
69a80ea3
KH
1383 int *charbuf = coding->charbuf + coding->charbuf_used;
1384 int *charbuf_end = coding->charbuf + coding->charbuf_size;
453b38f0 1385 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1386 int multibytep = coding->src_multibyte;
a470d443 1387 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
24a73b0a 1388 Lisp_Object attr, charset_list;
0a9564cb
EZ
1389 int eol_crlf =
1390 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1391 int byte_after_cr = -1;
4ed46869 1392
24a73b0a 1393 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1394
a470d443
KH
1395 if (bom != utf_without_bom)
1396 {
1397 int c1, c2, c3;
1398
1399 src_base = src;
1400 ONE_MORE_BYTE (c1);
1401 if (! UTF_8_3_OCTET_LEADING_P (c1))
1402 src = src_base;
1403 else
1404 {
159bd5a2 1405 ONE_MORE_BYTE (c2);
a470d443
KH
1406 if (! UTF_8_EXTRA_OCTET_P (c2))
1407 src = src_base;
1408 else
1409 {
159bd5a2 1410 ONE_MORE_BYTE (c3);
a470d443
KH
1411 if (! UTF_8_EXTRA_OCTET_P (c3))
1412 src = src_base;
1413 else
1414 {
1415 if ((c1 != UTF_8_BOM_1)
1416 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1417 src = src_base;
1418 else
1419 CODING_UTF_8_BOM (coding) = utf_without_bom;
1420 }
1421 }
1422 }
1423 }
1424 CODING_UTF_8_BOM (coding) = utf_without_bom;
1425
1426
1427
df7492f9 1428 while (1)
b73bfc1c 1429 {
df7492f9 1430 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1431
df7492f9
KH
1432 src_base = src;
1433 consumed_chars_base = consumed_chars;
4af310db 1434
df7492f9 1435 if (charbuf >= charbuf_end)
b71f6f73
KH
1436 {
1437 if (byte_after_cr >= 0)
1438 src_base--;
1439 break;
1440 }
df7492f9 1441
119852e7
KH
1442 if (byte_after_cr >= 0)
1443 c1 = byte_after_cr, byte_after_cr = -1;
1444 else
1445 ONE_MORE_BYTE (c1);
065e3595
KH
1446 if (c1 < 0)
1447 {
1448 c = - c1;
1449 }
1450 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1451 {
119852e7
KH
1452 if (eol_crlf && c1 == '\r')
1453 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1454 c = c1;
4af310db 1455 }
df7492f9 1456 else
4af310db 1457 {
df7492f9 1458 ONE_MORE_BYTE (c2);
065e3595 1459 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1460 goto invalid_code;
1461 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1462 {
b0edb2c5
DL
1463 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1464 /* Reject overlong sequences here and below. Encoders
1465 producing them are incorrect, they can be misleading,
1466 and they mess up read/write invariance. */
1467 if (c < 128)
1468 goto invalid_code;
4af310db 1469 }
df7492f9 1470 else
aa72b389 1471 {
df7492f9 1472 ONE_MORE_BYTE (c3);
065e3595 1473 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1474 goto invalid_code;
1475 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1476 {
1477 c = (((c1 & 0xF) << 12)
1478 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1479 if (c < 0x800
1480 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1481 goto invalid_code;
1482 }
df7492f9
KH
1483 else
1484 {
1485 ONE_MORE_BYTE (c4);
065e3595 1486 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1487 goto invalid_code;
1488 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1489 {
df7492f9
KH
1490 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1491 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1492 if (c < 0x10000)
1493 goto invalid_code;
1494 }
df7492f9
KH
1495 else
1496 {
1497 ONE_MORE_BYTE (c5);
065e3595 1498 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1499 goto invalid_code;
1500 if (UTF_8_5_OCTET_LEADING_P (c1))
1501 {
1502 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1503 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1504 | (c5 & 0x3F));
b0edb2c5 1505 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1506 goto invalid_code;
1507 }
1508 else
1509 goto invalid_code;
1510 }
1511 }
aa72b389 1512 }
b73bfc1c 1513 }
df7492f9
KH
1514
1515 *charbuf++ = c;
1516 continue;
1517
1518 invalid_code:
1519 src = src_base;
1520 consumed_chars = consumed_chars_base;
1521 ONE_MORE_BYTE (c);
1522 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1523 coding->errors++;
aa72b389
KH
1524 }
1525
df7492f9
KH
1526 no_more_source:
1527 coding->consumed_char += consumed_chars_base;
1528 coding->consumed = src_base - coding->source;
1529 coding->charbuf_used = charbuf - coding->charbuf;
1530}
1531
1532
1533static int
971de7fb 1534encode_coding_utf_8 (struct coding_system *coding)
df7492f9
KH
1535{
1536 int multibytep = coding->dst_multibyte;
1537 int *charbuf = coding->charbuf;
1538 int *charbuf_end = charbuf + coding->charbuf_used;
1539 unsigned char *dst = coding->destination + coding->produced;
1540 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1541 int produced_chars = 0;
df7492f9
KH
1542 int c;
1543
a470d443
KH
1544 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1545 {
1546 ASSURE_DESTINATION (3);
1547 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1548 CODING_UTF_8_BOM (coding) = utf_without_bom;
1549 }
1550
df7492f9 1551 if (multibytep)
aa72b389 1552 {
df7492f9
KH
1553 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1554
1555 while (charbuf < charbuf_end)
b73bfc1c 1556 {
df7492f9 1557 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1558
df7492f9
KH
1559 ASSURE_DESTINATION (safe_room);
1560 c = *charbuf++;
28f67a95
KH
1561 if (CHAR_BYTE8_P (c))
1562 {
1563 c = CHAR_TO_BYTE8 (c);
1564 EMIT_ONE_BYTE (c);
1565 }
1566 else
1567 {
db274c7a 1568 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1569 for (p = str; p < pend; p++)
1570 EMIT_ONE_BYTE (*p);
1571 }
b73bfc1c 1572 }
aa72b389 1573 }
df7492f9
KH
1574 else
1575 {
1576 int safe_room = MAX_MULTIBYTE_LENGTH;
1577
1578 while (charbuf < charbuf_end)
b73bfc1c 1579 {
df7492f9
KH
1580 ASSURE_DESTINATION (safe_room);
1581 c = *charbuf++;
f03caae0
KH
1582 if (CHAR_BYTE8_P (c))
1583 *dst++ = CHAR_TO_BYTE8 (c);
1584 else
db274c7a 1585 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1586 produced_chars++;
4ed46869
KH
1587 }
1588 }
065e3595 1589 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1590 coding->produced_char += produced_chars;
1591 coding->produced = dst - coding->destination;
1592 return 0;
4ed46869
KH
1593}
1594
b73bfc1c 1595
df7492f9 1596/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1597 Check if a text is encoded in one of UTF-16 based coding systems.
1598 If it is, return 1, else return 0. */
aa72b389 1599
df7492f9
KH
1600#define UTF_16_HIGH_SURROGATE_P(val) \
1601 (((val) & 0xFC00) == 0xD800)
1602
1603#define UTF_16_LOW_SURROGATE_P(val) \
1604 (((val) & 0xFC00) == 0xDC00)
93dec019 1605
df7492f9
KH
1606#define UTF_16_INVALID_P(val) \
1607 (((val) == 0xFFFE) \
1608 || ((val) == 0xFFFF) \
1609 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1610
aa72b389 1611
df7492f9 1612static int
971de7fb 1613detect_coding_utf_16 (struct coding_system *coding, struct coding_detection_info *detect_info)
aa72b389 1614{
8f924df7
KH
1615 const unsigned char *src = coding->source, *src_base = src;
1616 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1617 int multibytep = coding->src_multibyte;
1618 int consumed_chars = 0;
1619 int c1, c2;
aa72b389 1620
ff0dacd7 1621 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1622 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1623 && (coding->src_chars & 1))
ff0dacd7
KH
1624 {
1625 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1626 return 0;
1627 }
24a73b0a 1628
f56a4450 1629 TWO_MORE_BYTES (c1, c2);
df7492f9 1630 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1631 {
b49a1807
KH
1632 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1633 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1634 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1635 | CATEGORY_MASK_UTF_16_BE_NOSIG
1636 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1637 }
df7492f9 1638 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1639 {
b49a1807
KH
1640 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1641 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1642 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1643 | CATEGORY_MASK_UTF_16_BE_NOSIG
1644 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1645 }
220eeac9 1646 else if (c2 < 0)
f56a4450
KH
1647 {
1648 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1649 return 0;
1650 }
2f3cbb32 1651 else
24a73b0a 1652 {
2f3cbb32
KH
1653 /* We check the dispersion of Eth and Oth bytes where E is even and
1654 O is odd. If both are high, we assume binary data.*/
1655 unsigned char e[256], o[256];
1656 unsigned e_num = 1, o_num = 1;
1657
1658 memset (e, 0, 256);
1659 memset (o, 0, 256);
1660 e[c1] = 1;
1661 o[c2] = 1;
1662
cc13543e
KH
1663 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1664 |CATEGORY_MASK_UTF_16_BE
1665 | CATEGORY_MASK_UTF_16_LE);
2f3cbb32 1666
7f1faf1c
KH
1667 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1668 != CATEGORY_MASK_UTF_16)
2f3cbb32 1669 {
f56a4450 1670 TWO_MORE_BYTES (c1, c2);
220eeac9 1671 if (c2 < 0)
f56a4450 1672 break;
2f3cbb32
KH
1673 if (! e[c1])
1674 {
1675 e[c1] = 1;
1676 e_num++;
cc13543e
KH
1677 if (e_num >= 128)
1678 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
2f3cbb32
KH
1679 }
1680 if (! o[c2])
1681 {
977b85f4 1682 o[c2] = 1;
2f3cbb32 1683 o_num++;
cc13543e
KH
1684 if (o_num >= 128)
1685 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
2f3cbb32
KH
1686 }
1687 }
2f3cbb32 1688 return 0;
ff0dacd7 1689 }
2f3cbb32 1690
df7492f9 1691 no_more_source:
ff0dacd7 1692 return 1;
df7492f9 1693}
aa72b389 1694
df7492f9 1695static void
971de7fb 1696decode_coding_utf_16 (struct coding_system *coding)
df7492f9 1697{
8f924df7
KH
1698 const unsigned char *src = coding->source + coding->consumed;
1699 const unsigned char *src_end = coding->source + coding->src_bytes;
1700 const unsigned char *src_base;
69a80ea3 1701 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
1702 /* We may produces at most 3 chars in one loop. */
1703 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
3a8406e1 1704 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1705 int multibytep = coding->src_multibyte;
a470d443 1706 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1707 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1708 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1709 Lisp_Object attr, charset_list;
0a9564cb
EZ
1710 int eol_crlf =
1711 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1712 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1713
24a73b0a 1714 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1715
a470d443 1716 if (bom == utf_with_bom)
aa72b389 1717 {
df7492f9 1718 int c, c1, c2;
4af310db 1719
aa72b389 1720 src_base = src;
df7492f9
KH
1721 ONE_MORE_BYTE (c1);
1722 ONE_MORE_BYTE (c2);
e19c3639 1723 c = (c1 << 8) | c2;
aa72b389 1724
b49a1807
KH
1725 if (endian == utf_16_big_endian
1726 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1727 {
b49a1807
KH
1728 /* The first two bytes are not BOM. Treat them as bytes
1729 for a normal character. */
1730 src = src_base;
1731 coding->errors++;
aa72b389 1732 }
a470d443 1733 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1734 }
a470d443 1735 else if (bom == utf_detect_bom)
b49a1807
KH
1736 {
1737 /* We have already tried to detect BOM and failed in
1738 detect_coding. */
a470d443 1739 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1740 }
aa72b389 1741
df7492f9
KH
1742 while (1)
1743 {
1744 int c, c1, c2;
1745
1746 src_base = src;
1747 consumed_chars_base = consumed_chars;
1748
df80c7f0 1749 if (charbuf >= charbuf_end)
b71f6f73
KH
1750 {
1751 if (byte_after_cr1 >= 0)
1752 src_base -= 2;
1753 break;
1754 }
df7492f9 1755
119852e7
KH
1756 if (byte_after_cr1 >= 0)
1757 c1 = byte_after_cr1, byte_after_cr1 = -1;
1758 else
1759 ONE_MORE_BYTE (c1);
065e3595
KH
1760 if (c1 < 0)
1761 {
1762 *charbuf++ = -c1;
1763 continue;
1764 }
119852e7
KH
1765 if (byte_after_cr2 >= 0)
1766 c2 = byte_after_cr2, byte_after_cr2 = -1;
1767 else
1768 ONE_MORE_BYTE (c2);
065e3595
KH
1769 if (c2 < 0)
1770 {
1771 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1772 *charbuf++ = -c2;
1773 continue;
1774 }
df7492f9 1775 c = (endian == utf_16_big_endian
e19c3639 1776 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1777
df7492f9 1778 if (surrogate)
fd3ae0b9 1779 {
df7492f9 1780 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1781 {
df7492f9
KH
1782 if (endian == utf_16_big_endian)
1783 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1784 else
1785 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1786 *charbuf++ = c1;
1787 *charbuf++ = c2;
1788 coding->errors++;
1789 if (UTF_16_HIGH_SURROGATE_P (c))
1790 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1791 else
df7492f9 1792 *charbuf++ = c;
fd3ae0b9
KH
1793 }
1794 else
df7492f9
KH
1795 {
1796 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1797 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1798 *charbuf++ = 0x10000 + c;
df7492f9 1799 }
fd3ae0b9 1800 }
aa72b389 1801 else
df7492f9
KH
1802 {
1803 if (UTF_16_HIGH_SURROGATE_P (c))
1804 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1805 else
119852e7
KH
1806 {
1807 if (eol_crlf && c == '\r')
1808 {
1809 ONE_MORE_BYTE (byte_after_cr1);
1810 ONE_MORE_BYTE (byte_after_cr2);
1811 }
1812 *charbuf++ = c;
1813 }
8f924df7 1814 }
aa72b389 1815 }
df7492f9
KH
1816
1817 no_more_source:
1818 coding->consumed_char += consumed_chars_base;
1819 coding->consumed = src_base - coding->source;
1820 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1821}
b73bfc1c 1822
df7492f9 1823static int
971de7fb 1824encode_coding_utf_16 (struct coding_system *coding)
df7492f9
KH
1825{
1826 int multibytep = coding->dst_multibyte;
1827 int *charbuf = coding->charbuf;
1828 int *charbuf_end = charbuf + coding->charbuf_used;
1829 unsigned char *dst = coding->destination + coding->produced;
1830 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1831 int safe_room = 8;
a470d443 1832 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1833 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1834 int produced_chars = 0;
24a73b0a 1835 Lisp_Object attrs, charset_list;
df7492f9 1836 int c;
4ed46869 1837
24a73b0a 1838 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1839
a470d443 1840 if (bom != utf_without_bom)
df7492f9
KH
1841 {
1842 ASSURE_DESTINATION (safe_room);
1843 if (big_endian)
df7492f9 1844 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1845 else
1846 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1847 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1848 }
1849
1850 while (charbuf < charbuf_end)
1851 {
1852 ASSURE_DESTINATION (safe_room);
1853 c = *charbuf++;
60afa08d 1854 if (c > MAX_UNICODE_CHAR)
e19c3639 1855 c = coding->default_char;
df7492f9
KH
1856
1857 if (c < 0x10000)
1858 {
1859 if (big_endian)
1860 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1861 else
1862 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1863 }
1864 else
1865 {
1866 int c1, c2;
1867
1868 c -= 0x10000;
1869 c1 = (c >> 10) + 0xD800;
1870 c2 = (c & 0x3FF) + 0xDC00;
1871 if (big_endian)
1872 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1873 else
1874 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1875 }
1876 }
065e3595 1877 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1878 coding->produced = dst - coding->destination;
1879 coding->produced_char += produced_chars;
1880 return 0;
1881}
1882
1883\f
1884/*** 6. Old Emacs' internal format (emacs-mule) ***/
1885
1886/* Emacs' internal format for representation of multiple character
1887 sets is a kind of multi-byte encoding, i.e. characters are
1888 represented by variable-length sequences of one-byte codes.
1889
1890 ASCII characters and control characters (e.g. `tab', `newline') are
1891 represented by one-byte sequences which are their ASCII codes, in
1892 the range 0x00 through 0x7F.
1893
1894 8-bit characters of the range 0x80..0x9F are represented by
1895 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1896 code + 0x20).
1897
1898 8-bit characters of the range 0xA0..0xFF are represented by
1899 one-byte sequences which are their 8-bit code.
1900
1901 The other characters are represented by a sequence of `base
1902 leading-code', optional `extended leading-code', and one or two
1903 `position-code's. The length of the sequence is determined by the
1904 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1905 whereas extended leading-code and position-code take the range 0xA0
1906 through 0xFF. See `charset.h' for more details about leading-code
1907 and position-code.
1908
1909 --- CODE RANGE of Emacs' internal format ---
1910 character set range
1911 ------------- -----
1912 ascii 0x00..0x7F
1913 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1914 eight-bit-graphic 0xA0..0xBF
1915 ELSE 0x81..0x9D + [0xA0..0xFF]+
1916 ---------------------------------------------
1917
1918 As this is the internal character representation, the format is
1919 usually not used externally (i.e. in a file or in a data sent to a
1920 process). But, it is possible to have a text externally in this
1921 format (i.e. by encoding by the coding system `emacs-mule').
1922
1923 In that case, a sequence of one-byte codes has a slightly different
1924 form.
1925
1926 At first, all characters in eight-bit-control are represented by
1927 one-byte sequences which are their 8-bit code.
1928
1929 Next, character composition data are represented by the byte
1930 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1931 where,
e951386e 1932 METHOD is 0xF2 plus one of composition method (enum
df7492f9
KH
1933 composition_method),
1934
1935 BYTES is 0xA0 plus a byte length of this composition data,
1936
e951386e 1937 CHARS is 0xA0 plus a number of characters composed by this
df7492f9
KH
1938 data,
1939
1940 COMPONENTs are characters of multibye form or composition
1941 rules encoded by two-byte of ASCII codes.
1942
1943 In addition, for backward compatibility, the following formats are
1944 also recognized as composition data on decoding.
1945
1946 0x80 MSEQ ...
1947 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1948
1949 Here,
1950 MSEQ is a multibyte form but in these special format:
1951 ASCII: 0xA0 ASCII_CODE+0x80,
1952 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1953 RULE is a one byte code of the range 0xA0..0xF0 that
1954 represents a composition rule.
1955 */
1956
1957char emacs_mule_bytes[256];
1958
e951386e
KH
1959
1960/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1961 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1962 else return 0. */
1963
1964static int
971de7fb 1965detect_coding_emacs_mule (struct coding_system *coding, struct coding_detection_info *detect_info)
e951386e
KH
1966{
1967 const unsigned char *src = coding->source, *src_base;
1968 const unsigned char *src_end = coding->source + coding->src_bytes;
1969 int multibytep = coding->src_multibyte;
1970 int consumed_chars = 0;
1971 int c;
1972 int found = 0;
1973
1974 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1975 /* A coding system of this category is always ASCII compatible. */
1976 src += coding->head_ascii;
1977
1978 while (1)
1979 {
1980 src_base = src;
1981 ONE_MORE_BYTE (c);
1982 if (c < 0)
1983 continue;
1984 if (c == 0x80)
1985 {
1986 /* Perhaps the start of composite character. We simply skip
1987 it because analyzing it is too heavy for detecting. But,
1988 at least, we check that the composite character
1989 constitutes of more than 4 bytes. */
1990 const unsigned char *src_base;
1991
1992 repeat:
1993 src_base = src;
1994 do
1995 {
1996 ONE_MORE_BYTE (c);
1997 }
1998 while (c >= 0xA0);
1999
2000 if (src - src_base <= 4)
2001 break;
2002 found = CATEGORY_MASK_EMACS_MULE;
2003 if (c == 0x80)
2004 goto repeat;
2005 }
2006
2007 if (c < 0x80)
2008 {
2009 if (c < 0x20
2010 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2011 break;
2012 }
2013 else
2014 {
2015 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2016
2017 while (more_bytes > 0)
2018 {
2019 ONE_MORE_BYTE (c);
2020 if (c < 0xA0)
2021 {
2022 src--; /* Unread the last byte. */
2023 break;
2024 }
2025 more_bytes--;
2026 }
2027 if (more_bytes != 0)
2028 break;
2029 found = CATEGORY_MASK_EMACS_MULE;
2030 }
2031 }
2032 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2033 return 0;
2034
2035 no_more_source:
2036 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2037 {
2038 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039 return 0;
2040 }
2041 detect_info->found |= found;
2042 return 1;
2043}
2044
2045
2046/* Parse emacs-mule multibyte sequence at SRC and return the decoded
2047 character. If CMP_STATUS indicates that we must expect MSEQ or
2048 RULE described above, decode it and return the negative value of
685ebdc8 2049 the decoded character or rule. If an invalid byte is found, return
e951386e
KH
2050 -1. If SRC is too short, return -2. */
2051
df7492f9 2052int
971de7fb 2053emacs_mule_char (struct coding_system *coding, const unsigned char *src, int *nbytes, int *nchars, int *id, struct composition_status *cmp_status)
df7492f9 2054{
8f924df7
KH
2055 const unsigned char *src_end = coding->source + coding->src_bytes;
2056 const unsigned char *src_base = src;
df7492f9 2057 int multibytep = coding->src_multibyte;
df7492f9
KH
2058 struct charset *charset;
2059 unsigned code;
2060 int c;
2061 int consumed_chars = 0;
e951386e 2062 int mseq_found = 0;
df7492f9
KH
2063
2064 ONE_MORE_BYTE (c);
065e3595 2065 if (c < 0)
df7492f9 2066 {
065e3595
KH
2067 c = -c;
2068 charset = emacs_mule_charset[0];
2069 }
2070 else
2071 {
4d41e8b7
KH
2072 if (c >= 0xA0)
2073 {
e951386e
KH
2074 if (cmp_status->state != COMPOSING_NO
2075 && cmp_status->old_form)
4d41e8b7 2076 {
e951386e
KH
2077 if (cmp_status->state == COMPOSING_CHAR)
2078 {
2079 if (c == 0xA0)
2080 {
2081 ONE_MORE_BYTE (c);
2082 c -= 0x80;
2083 if (c < 0)
2084 goto invalid_code;
2085 }
2086 else
2087 c -= 0x20;
2088 mseq_found = 1;
2089 }
2090 else
2091 {
2092 *nbytes = src - src_base;
2093 *nchars = consumed_chars;
2094 return -c;
2095 }
4d41e8b7
KH
2096 }
2097 else
e951386e 2098 goto invalid_code;
4d41e8b7
KH
2099 }
2100
065e3595 2101 switch (emacs_mule_bytes[c])
b73bfc1c 2102 {
065e3595 2103 case 2:
df7492f9
KH
2104 if (! (charset = emacs_mule_charset[c]))
2105 goto invalid_code;
2106 ONE_MORE_BYTE (c);
9ffd559c 2107 if (c < 0xA0)
065e3595 2108 goto invalid_code;
df7492f9 2109 code = c & 0x7F;
065e3595
KH
2110 break;
2111
2112 case 3:
2113 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2114 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2115 {
2116 ONE_MORE_BYTE (c);
9ffd559c 2117 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
2118 goto invalid_code;
2119 ONE_MORE_BYTE (c);
9ffd559c 2120 if (c < 0xA0)
065e3595
KH
2121 goto invalid_code;
2122 code = c & 0x7F;
2123 }
2124 else
2125 {
2126 if (! (charset = emacs_mule_charset[c]))
2127 goto invalid_code;
2128 ONE_MORE_BYTE (c);
9ffd559c 2129 if (c < 0xA0)
065e3595
KH
2130 goto invalid_code;
2131 code = (c & 0x7F) << 8;
2132 ONE_MORE_BYTE (c);
9ffd559c 2133 if (c < 0xA0)
065e3595
KH
2134 goto invalid_code;
2135 code |= c & 0x7F;
2136 }
2137 break;
2138
2139 case 4:
2140 ONE_MORE_BYTE (c);
2141 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
2142 goto invalid_code;
2143 ONE_MORE_BYTE (c);
9ffd559c 2144 if (c < 0xA0)
065e3595 2145 goto invalid_code;
781d7a48 2146 code = (c & 0x7F) << 8;
df7492f9 2147 ONE_MORE_BYTE (c);
9ffd559c 2148 if (c < 0xA0)
065e3595 2149 goto invalid_code;
df7492f9 2150 code |= c & 0x7F;
065e3595 2151 break;
df7492f9 2152
065e3595
KH
2153 case 1:
2154 code = c;
2155 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2156 ? charset_ascii : charset_eight_bit);
2157 break;
df7492f9 2158
065e3595
KH
2159 default:
2160 abort ();
2161 }
75f80e63 2162 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
065e3595
KH
2163 if (c < 0)
2164 goto invalid_code;
df7492f9 2165 }
df7492f9
KH
2166 *nbytes = src - src_base;
2167 *nchars = consumed_chars;
ff0dacd7
KH
2168 if (id)
2169 *id = charset->id;
e951386e 2170 return (mseq_found ? -c : c);
df7492f9
KH
2171
2172 no_more_source:
2173 return -2;
2174
2175 invalid_code:
2176 return -1;
2177}
2178
2179
e951386e 2180/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
df7492f9 2181
e951386e
KH
2182/* Handle these composition sequence ('|': the end of header elements,
2183 BYTES and CHARS >= 0xA0):
df7492f9 2184
e951386e
KH
2185 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2186 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2187 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
df7492f9 2188
e951386e
KH
2189 and these old form:
2190
2191 (4) relative composition: 0x80 | MSEQ ... MSEQ
2192 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
df7492f9 2193
e951386e
KH
2194 When the starter 0x80 and the following header elements are found,
2195 this annotation header is produced.
df7492f9 2196
e951386e 2197 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
df7492f9 2198
e951386e
KH
2199 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2200 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
df7492f9 2201
e951386e
KH
2202 Then, upon reading the following elements, these codes are produced
2203 until the composition end is found:
df7492f9 2204
e951386e
KH
2205 (1) CHAR ... CHAR
2206 (2) ALT ... ALT CHAR ... CHAR
2207 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2208 (4) CHAR ... CHAR
2209 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
4ed46869 2210
e951386e
KH
2211 When the composition end is found, LENGTH and NCHARS in the
2212 annotation header is updated as below:
b73bfc1c 2213
e951386e
KH
2214 (1) LENGTH: unchanged, NCHARS: unchanged
2215 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2216 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2217 (4) LENGTH: unchanged, NCHARS: number of CHARs
2218 (5) LENGTH: unchanged, NCHARS: number of CHARs
df7492f9 2219
e951386e
KH
2220 If an error is found while composing, the annotation header is
2221 changed to the original composition header (plus filler -1s) as
2222 below:
2223
2224 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2225 (5) [ 0x80 0xFF -1 -1- -1 ]
2226
2227 and the sequence [ -2 DECODED-RULE ] is changed to the original
2228 byte sequence as below:
2229 o the original byte sequence is B: [ B -1 ]
2230 o the original byte sequence is B1 B2: [ B1 B2 ]
2231
2232 Most of the routines are implemented by macros because many
2233 variables and labels in the caller decode_coding_emacs_mule must be
2234 accessible, and they are usually called just once (thus doesn't
2235 increase the size of compiled object). */
2236
2237/* Decode a composition rule represented by C as a component of
2238 composition sequence of Emacs 20 style. Set RULE to the decoded
2239 rule. */
2240
2241#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
df7492f9 2242 do { \
e951386e
KH
2243 int gref, nref; \
2244 \
4d41e8b7 2245 c -= 0xA0; \
df7492f9
KH
2246 if (c < 0 || c >= 81) \
2247 goto invalid_code; \
df7492f9 2248 gref = c / 9, nref = c % 9; \
e951386e
KH
2249 if (gref == 4) gref = 10; \
2250 if (nref == 4) nref = 10; \
2251 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
df7492f9
KH
2252 } while (0)
2253
2254
e951386e
KH
2255/* Decode a composition rule represented by C and the following byte
2256 at SRC as a component of composition sequence of Emacs 21 style.
2257 Set RULE to the decoded rule. */
781d7a48 2258
e951386e 2259#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
781d7a48
KH
2260 do { \
2261 int gref, nref; \
e951386e
KH
2262 \
2263 gref = c - 0x20; \
2264 if (gref < 0 || gref >= 81) \
781d7a48 2265 goto invalid_code; \
e951386e
KH
2266 ONE_MORE_BYTE (c); \
2267 nref = c - 0x20; \
2268 if (nref < 0 || nref >= 81) \
781d7a48 2269 goto invalid_code; \
e951386e 2270 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
781d7a48
KH
2271 } while (0)
2272
2273
e951386e
KH
2274/* Start of Emacs 21 style format. The first three bytes at SRC are
2275 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2276 byte length of this composition information, CHARS is the number of
2277 characters composed by this composition. */
2278
2279#define DECODE_EMACS_MULE_21_COMPOSITION() \
aa72b389 2280 do { \
781d7a48
KH
2281 enum composition_method method = c - 0xF2; \
2282 int *charbuf_base = charbuf; \
df7492f9 2283 int nbytes, nchars; \
e951386e 2284 \
df7492f9 2285 ONE_MORE_BYTE (c); \
065e3595
KH
2286 if (c < 0) \
2287 goto invalid_code; \
df7492f9 2288 nbytes = c - 0xA0; \
e951386e 2289 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
df7492f9
KH
2290 goto invalid_code; \
2291 ONE_MORE_BYTE (c); \
2292 nchars = c - 0xA0; \
e951386e
KH
2293 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2294 goto invalid_code; \
2295 cmp_status->old_form = 0; \
2296 cmp_status->method = method; \
2297 if (method == COMPOSITION_RELATIVE) \
2298 cmp_status->state = COMPOSING_CHAR; \
2299 else \
2300 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2301 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2302 cmp_status->nchars = nchars; \
2303 cmp_status->ncomps = nbytes - 4; \
2304 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
aa72b389 2305 } while (0)
93dec019 2306
aa72b389 2307
e951386e
KH
2308/* Start of Emacs 20 style format for relative composition. */
2309
2310#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2311 do { \
2312 cmp_status->old_form = 1; \
2313 cmp_status->method = COMPOSITION_RELATIVE; \
2314 cmp_status->state = COMPOSING_CHAR; \
2315 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2316 cmp_status->nchars = cmp_status->ncomps = 0; \
2317 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2318 } while (0)
2319
2320
2321/* Start of Emacs 20 style format for rule-base composition. */
2322
2323#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2324 do { \
2325 cmp_status->old_form = 1; \
2326 cmp_status->method = COMPOSITION_WITH_RULE; \
2327 cmp_status->state = COMPOSING_CHAR; \
2328 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2329 cmp_status->nchars = cmp_status->ncomps = 0; \
2330 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
df7492f9
KH
2331 } while (0)
2332
2333
e951386e
KH
2334#define DECODE_EMACS_MULE_COMPOSITION_START() \
2335 do { \
2336 const unsigned char *current_src = src; \
2337 \
2338 ONE_MORE_BYTE (c); \
2339 if (c < 0) \
2340 goto invalid_code; \
2341 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2342 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2343 DECODE_EMACS_MULE_21_COMPOSITION (); \
2344 else if (c < 0xA0) \
2345 goto invalid_code; \
2346 else if (c < 0xC0) \
2347 { \
2348 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2349 /* Re-read C as a composition component. */ \
2350 src = current_src; \
2351 } \
2352 else if (c == 0xFF) \
2353 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2354 else \
2355 goto invalid_code; \
2356 } while (0)
2357
2358#define EMACS_MULE_COMPOSITION_END() \
df7492f9 2359 do { \
e951386e 2360 int idx = - cmp_status->length; \
4d41e8b7 2361 \
e951386e
KH
2362 if (cmp_status->old_form) \
2363 charbuf[idx + 2] = cmp_status->nchars; \
2364 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2365 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2366 cmp_status->state = COMPOSING_NO; \
2367 } while (0)
2368
2369
2370static int
971de7fb 2371emacs_mule_finish_composition (int *charbuf, struct composition_status *cmp_status)
e951386e
KH
2372{
2373 int idx = - cmp_status->length;
2374 int new_chars;
2375
2376 if (cmp_status->old_form && cmp_status->nchars > 0)
2377 {
2378 charbuf[idx + 2] = cmp_status->nchars;
2379 new_chars = 0;
2380 if (cmp_status->method == COMPOSITION_WITH_RULE
2381 && cmp_status->state == COMPOSING_CHAR)
2382 {
2383 /* The last rule was invalid. */
2384 int rule = charbuf[-1] + 0xA0;
2385
2386 charbuf[-2] = BYTE8_TO_CHAR (rule);
2387 charbuf[-1] = -1;
2388 new_chars = 1;
2389 }
2390 }
2391 else
2392 {
2393 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2394
2395 if (cmp_status->method == COMPOSITION_WITH_RULE)
2396 {
2397 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2398 charbuf[idx++] = -3;
2399 charbuf[idx++] = 0;
2400 new_chars = 1;
2401 }
2402 else
2403 {
2404 int nchars = charbuf[idx + 1] + 0xA0;
2405 int nbytes = charbuf[idx + 2] + 0xA0;
2406
2407 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2408 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2409 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2410 charbuf[idx++] = -1;
2411 new_chars = 4;
2412 }
2413 }
2414 cmp_status->state = COMPOSING_NO;
2415 return new_chars;
2416}
2417
2418#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2419 do { \
2420 if (cmp_status->state != COMPOSING_NO) \
2421 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
df7492f9
KH
2422 } while (0)
2423
aa72b389
KH
2424
2425static void
971de7fb 2426decode_coding_emacs_mule (struct coding_system *coding)
aa72b389 2427{
8f924df7
KH
2428 const unsigned char *src = coding->source + coding->consumed;
2429 const unsigned char *src_end = coding->source + coding->src_bytes;
2430 const unsigned char *src_base;
69a80ea3 2431 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
2432 /* We may produce two annocations (charset and composition) in one
2433 loop and one more charset annocation at the end. */
69a80ea3 2434 int *charbuf_end
df80c7f0 2435 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 2436 int consumed_chars = 0, consumed_chars_base;
df7492f9 2437 int multibytep = coding->src_multibyte;
24a73b0a 2438 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2439 int char_offset = coding->produced_char;
2440 int last_offset = char_offset;
2441 int last_id = charset_ascii;
0a9564cb
EZ
2442 int eol_crlf =
2443 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 2444 int byte_after_cr = -1;
e951386e 2445 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
aa72b389 2446
24a73b0a 2447 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2448
e951386e
KH
2449 if (cmp_status->state != COMPOSING_NO)
2450 {
2451 int i;
2452
2453 for (i = 0; i < cmp_status->length; i++)
2454 *charbuf++ = cmp_status->carryover[i];
2455 coding->annotated = 1;
2456 }
2457
aa72b389
KH
2458 while (1)
2459 {
e951386e 2460 int c, id;
df7492f9 2461
aa72b389 2462 src_base = src;
df7492f9
KH
2463 consumed_chars_base = consumed_chars;
2464
2465 if (charbuf >= charbuf_end)
b71f6f73
KH
2466 {
2467 if (byte_after_cr >= 0)
2468 src_base--;
2469 break;
2470 }
aa72b389 2471
119852e7
KH
2472 if (byte_after_cr >= 0)
2473 c = byte_after_cr, byte_after_cr = -1;
2474 else
2475 ONE_MORE_BYTE (c);
e951386e
KH
2476
2477 if (c < 0 || c == 0x80)
065e3595 2478 {
e951386e
KH
2479 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2480 if (c < 0)
2481 {
2482 *charbuf++ = -c;
2483 char_offset++;
2484 }
2485 else
2486 DECODE_EMACS_MULE_COMPOSITION_START ();
2487 continue;
065e3595 2488 }
e951386e
KH
2489
2490 if (c < 0x80)
aa72b389 2491 {
119852e7
KH
2492 if (eol_crlf && c == '\r')
2493 ONE_MORE_BYTE (byte_after_cr);
e951386e
KH
2494 id = charset_ascii;
2495 if (cmp_status->state != COMPOSING_NO)
2496 {
2497 if (cmp_status->old_form)
2498 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2499 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2500 cmp_status->ncomps--;
2501 }
2502 }
2503 else
2504 {
2505 int nchars, nbytes;
75f80e63
EZ
2506 /* emacs_mule_char can load a charset map from a file, which
2507 allocates a large structure and might cause buffer text
2508 to be relocated as result. Thus, we need to remember the
2509 original pointer to buffer text, and fixup all related
2510 pointers after the call. */
2511 const unsigned char *orig = coding->source;
2512 EMACS_INT offset;
e951386e
KH
2513
2514 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2515 cmp_status);
75f80e63
EZ
2516 offset = coding->source - orig;
2517 if (offset)
2518 {
2519 src += offset;
2520 src_base += offset;
2521 src_end += offset;
2522 }
e951386e
KH
2523 if (c < 0)
2524 {
2525 if (c == -1)
2526 goto invalid_code;
2527 if (c == -2)
2528 break;
2529 }
2530 src = src_base + nbytes;
2531 consumed_chars = consumed_chars_base + nchars;
2532 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2533 cmp_status->ncomps -= nchars;
2534 }
2535
2536 /* Now if C >= 0, we found a normally encoded characer, if C <
2537 0, we found an old-style composition component character or
2538 rule. */
2539
2540 if (cmp_status->state == COMPOSING_NO)
2541 {
2542 if (last_id != id)
2543 {
2544 if (last_id != charset_ascii)
2545 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2546 last_id);
2547 last_id = id;
2548 last_offset = char_offset;
2549 }
df7492f9
KH
2550 *charbuf++ = c;
2551 char_offset++;
aa72b389 2552 }
e951386e 2553 else if (cmp_status->state == COMPOSING_CHAR)
df7492f9 2554 {
e951386e
KH
2555 if (cmp_status->old_form)
2556 {
2557 if (c >= 0)
2558 {
2559 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2560 *charbuf++ = c;
2561 char_offset++;
2562 }
2563 else
2564 {
2565 *charbuf++ = -c;
2566 cmp_status->nchars++;
2567 cmp_status->length++;
2568 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2569 EMACS_MULE_COMPOSITION_END ();
2570 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2571 cmp_status->state = COMPOSING_RULE;
2572 }
2573 }
df7492f9 2574 else
e951386e
KH
2575 {
2576 *charbuf++ = c;
2577 cmp_status->length++;
2578 cmp_status->nchars--;
2579 if (cmp_status->nchars == 0)
2580 EMACS_MULE_COMPOSITION_END ();
2581 }
df7492f9 2582 }
e951386e 2583 else if (cmp_status->state == COMPOSING_RULE)
df7492f9 2584 {
e951386e 2585 int rule;
ff0dacd7 2586
e951386e 2587 if (c >= 0)
df7492f9 2588 {
e951386e
KH
2589 EMACS_MULE_COMPOSITION_END ();
2590 *charbuf++ = c;
2591 char_offset++;
df7492f9 2592 }
e951386e 2593 else
ff0dacd7 2594 {
e951386e
KH
2595 c = -c;
2596 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2597 if (rule < 0)
2598 goto invalid_code;
2599 *charbuf++ = -2;
2600 *charbuf++ = rule;
2601 cmp_status->length += 2;
2602 cmp_status->state = COMPOSING_CHAR;
ff0dacd7 2603 }
e951386e
KH
2604 }
2605 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2606 {
df7492f9 2607 *charbuf++ = c;
e951386e
KH
2608 cmp_status->length++;
2609 if (cmp_status->ncomps == 0)
2610 cmp_status->state = COMPOSING_CHAR;
2611 else if (cmp_status->ncomps > 0)
2612 {
2613 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2614 cmp_status->state = COMPOSING_COMPONENT_RULE;
2615 }
2616 else
2617 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9 2618 }
e951386e
KH
2619 else /* COMPOSING_COMPONENT_RULE */
2620 {
2621 int rule;
2622
2623 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2624 if (rule < 0)
2625 goto invalid_code;
2626 *charbuf++ = -2;
2627 *charbuf++ = rule;
2628 cmp_status->length += 2;
2629 cmp_status->ncomps--;
2630 if (cmp_status->ncomps > 0)
2631 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2632 else
2633 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2634 }
2635 continue;
2636
2637 retry:
2638 src = src_base;
2639 consumed_chars = consumed_chars_base;
df7492f9
KH
2640 continue;
2641
2642 invalid_code:
e951386e 2643 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9
KH
2644 src = src_base;
2645 consumed_chars = consumed_chars_base;
2646 ONE_MORE_BYTE (c);
2647 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2648 char_offset++;
df7492f9
KH
2649 coding->errors++;
2650 }
2651
2652 no_more_source:
e951386e
KH
2653 if (cmp_status->state != COMPOSING_NO)
2654 {
2655 if (coding->mode & CODING_MODE_LAST_BLOCK)
2656 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2657 else
2658 {
2659 int i;
2660
2661 charbuf -= cmp_status->length;
2662 for (i = 0; i < cmp_status->length; i++)
2663 cmp_status->carryover[i] = charbuf[i];
2664 }
2665 }
ff0dacd7 2666 if (last_id != charset_ascii)
69a80ea3 2667 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2668 coding->consumed_char += consumed_chars_base;
2669 coding->consumed = src_base - coding->source;
2670 coding->charbuf_used = charbuf - coding->charbuf;
2671}
2672
2673
2674#define EMACS_MULE_LEADING_CODES(id, codes) \
2675 do { \
2676 if (id < 0xA0) \
2677 codes[0] = id, codes[1] = 0; \
2678 else if (id < 0xE0) \
2679 codes[0] = 0x9A, codes[1] = id; \
2680 else if (id < 0xF0) \
2681 codes[0] = 0x9B, codes[1] = id; \
2682 else if (id < 0xF5) \
2683 codes[0] = 0x9C, codes[1] = id; \
2684 else \
2685 codes[0] = 0x9D, codes[1] = id; \
2686 } while (0);
2687
aa72b389 2688
df7492f9 2689static int
971de7fb 2690encode_coding_emacs_mule (struct coding_system *coding)
df7492f9
KH
2691{
2692 int multibytep = coding->dst_multibyte;
2693 int *charbuf = coding->charbuf;
2694 int *charbuf_end = charbuf + coding->charbuf_used;
2695 unsigned char *dst = coding->destination + coding->produced;
2696 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2697 int safe_room = 8;
df7492f9 2698 int produced_chars = 0;
24a73b0a 2699 Lisp_Object attrs, charset_list;
df7492f9 2700 int c;
ff0dacd7 2701 int preferred_charset_id = -1;
df7492f9 2702
24a73b0a 2703 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2704 if (! EQ (charset_list, Vemacs_mule_charset_list))
2705 {
2706 CODING_ATTR_CHARSET_LIST (attrs)
2707 = charset_list = Vemacs_mule_charset_list;
2708 }
df7492f9
KH
2709
2710 while (charbuf < charbuf_end)
2711 {
2712 ASSURE_DESTINATION (safe_room);
2713 c = *charbuf++;
ff0dacd7
KH
2714
2715 if (c < 0)
2716 {
2717 /* Handle an annotation. */
2718 switch (*charbuf)
2719 {
2720 case CODING_ANNOTATE_COMPOSITION_MASK:
2721 /* Not yet implemented. */
2722 break;
2723 case CODING_ANNOTATE_CHARSET_MASK:
2724 preferred_charset_id = charbuf[3];
2725 if (preferred_charset_id >= 0
2726 && NILP (Fmemq (make_number (preferred_charset_id),
2727 charset_list)))
2728 preferred_charset_id = -1;
2729 break;
2730 default:
2731 abort ();
2732 }
2733 charbuf += -c - 1;
2734 continue;
2735 }
2736
df7492f9
KH
2737 if (ASCII_CHAR_P (c))
2738 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2739 else if (CHAR_BYTE8_P (c))
2740 {
2741 c = CHAR_TO_BYTE8 (c);
2742 EMIT_ONE_BYTE (c);
2743 }
df7492f9 2744 else
aa72b389 2745 {
df7492f9
KH
2746 struct charset *charset;
2747 unsigned code;
2748 int dimension;
2749 int emacs_mule_id;
2750 unsigned char leading_codes[2];
2751
ff0dacd7
KH
2752 if (preferred_charset_id >= 0)
2753 {
2754 charset = CHARSET_FROM_ID (preferred_charset_id);
905ca9d2
KH
2755 if (CHAR_CHARSET_P (c, charset))
2756 code = ENCODE_CHAR (charset, c);
2757 else
2758 charset = char_charset (c, charset_list, &code);
ff0dacd7
KH
2759 }
2760 else
2761 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2762 if (! charset)
2763 {
2764 c = coding->default_char;
2765 if (ASCII_CHAR_P (c))
2766 {
2767 EMIT_ONE_ASCII_BYTE (c);
2768 continue;
2769 }
2770 charset = char_charset (c, charset_list, &code);
2771 }
2772 dimension = CHARSET_DIMENSION (charset);
2773 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2774 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2775 EMIT_ONE_BYTE (leading_codes[0]);
2776 if (leading_codes[1])
2777 EMIT_ONE_BYTE (leading_codes[1]);
2778 if (dimension == 1)
1fa663f9 2779 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2780 else
df7492f9 2781 {
1fa663f9 2782 code |= 0x8080;
df7492f9
KH
2783 EMIT_ONE_BYTE (code >> 8);
2784 EMIT_ONE_BYTE (code & 0xFF);
2785 }
aa72b389 2786 }
aa72b389 2787 }
065e3595 2788 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2789 coding->produced_char += produced_chars;
2790 coding->produced = dst - coding->destination;
2791 return 0;
aa72b389 2792}
b73bfc1c 2793
4ed46869 2794\f
df7492f9 2795/*** 7. ISO2022 handlers ***/
4ed46869
KH
2796
2797/* The following note describes the coding system ISO2022 briefly.
39787efd 2798 Since the intention of this note is to help understand the
5a936b46 2799 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2800 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2801 original document of ISO2022. This is equivalent to the standard
cfb43547 2802 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2803
2804 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2805 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2806 is encoded using bytes less than 128. This may make the encoded
2807 text a little bit longer, but the text passes more easily through
cfb43547 2808 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2809 Significant Bit).
b73bfc1c 2810
cfb43547
DL
2811 There are two kinds of character sets: control character sets and
2812 graphic character sets. The former contain control characters such
4ed46869 2813 as `newline' and `escape' to provide control functions (control
39787efd 2814 functions are also provided by escape sequences). The latter
cfb43547 2815 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2816 two control character sets and many graphic character sets.
2817
2818 Graphic character sets are classified into one of the following
39787efd
KH
2819 four classes, according to the number of bytes (DIMENSION) and
2820 number of characters in one dimension (CHARS) of the set:
2821 - DIMENSION1_CHARS94
2822 - DIMENSION1_CHARS96
2823 - DIMENSION2_CHARS94
2824 - DIMENSION2_CHARS96
2825
2826 In addition, each character set is assigned an identification tag,
cfb43547 2827 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2828 hereafter). The <F> of each character set is decided by ECMA(*)
2829 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2830 (0x30..0x3F are for private use only).
4ed46869
KH
2831
2832 Note (*): ECMA = European Computer Manufacturers Association
2833
cfb43547 2834 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2835 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2836 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2837 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2838 o DIMENSION2_CHARS96 -- none for the moment
2839
39787efd 2840 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2841 C0 [0x00..0x1F] -- control character plane 0
2842 GL [0x20..0x7F] -- graphic character plane 0
2843 C1 [0x80..0x9F] -- control character plane 1
2844 GR [0xA0..0xFF] -- graphic character plane 1
2845
2846 A control character set is directly designated and invoked to C0 or
39787efd
KH
2847 C1 by an escape sequence. The most common case is that:
2848 - ISO646's control character set is designated/invoked to C0, and
2849 - ISO6429's control character set is designated/invoked to C1,
2850 and usually these designations/invocations are omitted in encoded
2851 text. In a 7-bit environment, only C0 can be used, and a control
2852 character for C1 is encoded by an appropriate escape sequence to
2853 fit into the environment. All control characters for C1 are
2854 defined to have corresponding escape sequences.
4ed46869
KH
2855
2856 A graphic character set is at first designated to one of four
2857 graphic registers (G0 through G3), then these graphic registers are
2858 invoked to GL or GR. These designations and invocations can be
2859 done independently. The most common case is that G0 is invoked to
39787efd
KH
2860 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2861 these invocations and designations are omitted in encoded text.
2862 In a 7-bit environment, only GL can be used.
4ed46869 2863
39787efd
KH
2864 When a graphic character set of CHARS94 is invoked to GL, codes
2865 0x20 and 0x7F of the GL area work as control characters SPACE and
2866 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2867 be used.
4ed46869
KH
2868
2869 There are two ways of invocation: locking-shift and single-shift.
2870 With locking-shift, the invocation lasts until the next different
39787efd
KH
2871 invocation, whereas with single-shift, the invocation affects the
2872 following character only and doesn't affect the locking-shift
2873 state. Invocations are done by the following control characters or
2874 escape sequences:
4ed46869
KH
2875
2876 ----------------------------------------------------------------------
39787efd 2877 abbrev function cntrl escape seq description
4ed46869 2878 ----------------------------------------------------------------------
39787efd
KH
2879 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2880 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2881 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2882 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2883 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2884 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2885 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2886 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2887 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2888 ----------------------------------------------------------------------
39787efd
KH
2889 (*) These are not used by any known coding system.
2890
2891 Control characters for these functions are defined by macros
2892 ISO_CODE_XXX in `coding.h'.
4ed46869 2893
39787efd 2894 Designations are done by the following escape sequences:
4ed46869
KH
2895 ----------------------------------------------------------------------
2896 escape sequence description
2897 ----------------------------------------------------------------------
2898 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2899 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2900 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2901 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2902 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2903 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2904 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2905 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2906 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2907 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2908 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2909 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2910 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2911 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2912 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2913 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2914 ----------------------------------------------------------------------
2915
2916 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2917 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2918
2919 Note (*): Although these designations are not allowed in ISO2022,
2920 Emacs accepts them on decoding, and produces them on encoding
39787efd 2921 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2922 7-bit environment, non-locking-shift, and non-single-shift.
2923
2924 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2925 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2926
cfb43547 2927 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2928 same multilingual text in ISO2022. Actually, there exist many
2929 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2930 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2931 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2932 localized platforms), and all of these are variants of ISO2022.
2933
2934 In addition to the above, Emacs handles two more kinds of escape
2935 sequences: ISO6429's direction specification and Emacs' private
2936 sequence for specifying character composition.
2937
39787efd 2938 ISO6429's direction specification takes the following form:
4ed46869
KH
2939 o CSI ']' -- end of the current direction
2940 o CSI '0' ']' -- end of the current direction
2941 o CSI '1' ']' -- start of left-to-right text
2942 o CSI '2' ']' -- start of right-to-left text
2943 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2944 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2945
2946 Character composition specification takes the following form:
ec6d2bb8
KH
2947 o ESC '0' -- start relative composition
2948 o ESC '1' -- end composition
2949 o ESC '2' -- start rule-base composition (*)
2950 o ESC '3' -- start relative composition with alternate chars (**)
2951 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2952 Since these are not standard escape sequences of any ISO standard,
cfb43547 2953 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2954
5a936b46
DL
2955 (*) This form is used only in Emacs 20.7 and older versions,
2956 but newer versions can safely decode it.
cfb43547 2957 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2958 and older versions can't decode it.
ec6d2bb8 2959
cfb43547 2960 Here's a list of example usages of these composition escape
b73bfc1c 2961 sequences (categorized by `enum composition_method').
ec6d2bb8 2962
b73bfc1c 2963 COMPOSITION_RELATIVE:
ec6d2bb8 2964 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2965 COMPOSITION_WITH_RULE:
ec6d2bb8 2966 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2967 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2968 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2969 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2970 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2971
2972enum iso_code_class_type iso_code_class[256];
2973
df7492f9
KH
2974#define SAFE_CHARSET_P(coding, id) \
2975 ((id) <= (coding)->max_charset_id \
1b3b981b 2976 && (coding)->safe_charsets[id] != 255)
df7492f9
KH
2977
2978
2979#define SHIFT_OUT_OK(category) \
2980 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2981
2982static void
971de7fb 2983setup_iso_safe_charsets (Lisp_Object attrs)
df7492f9
KH
2984{
2985 Lisp_Object charset_list, safe_charsets;
2986 Lisp_Object request;
2987 Lisp_Object reg_usage;
2988 Lisp_Object tail;
2989 int reg94, reg96;
2990 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2991 int max_charset_id;
2992
2993 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2994 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2995 && ! EQ (charset_list, Viso_2022_charset_list))
2996 {
2997 CODING_ATTR_CHARSET_LIST (attrs)
2998 = charset_list = Viso_2022_charset_list;
2999 ASET (attrs, coding_attr_safe_charsets, Qnil);
3000 }
3001
3002 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3003 return;
3004
3005 max_charset_id = 0;
3006 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3007 {
3008 int id = XINT (XCAR (tail));
3009 if (max_charset_id < id)
3010 max_charset_id = id;
3011 }
d46c5b12 3012
1b3b981b
AS
3013 safe_charsets = make_uninit_string (max_charset_id + 1);
3014 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
3015 request = AREF (attrs, coding_attr_iso_request);
3016 reg_usage = AREF (attrs, coding_attr_iso_usage);
3017 reg94 = XINT (XCAR (reg_usage));
3018 reg96 = XINT (XCDR (reg_usage));
3019
3020 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3021 {
3022 Lisp_Object id;
3023 Lisp_Object reg;
3024 struct charset *charset;
3025
3026 id = XCAR (tail);
3027 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 3028 reg = Fcdr (Fassq (id, request));
df7492f9 3029 if (! NILP (reg))
8f924df7 3030 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
3031 else if (charset->iso_chars_96)
3032 {
3033 if (reg96 < 4)
8f924df7 3034 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
3035 }
3036 else
3037 {
3038 if (reg94 < 4)
8f924df7 3039 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
3040 }
3041 }
3042 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3043}
d46c5b12 3044
b6871cc7 3045
4ed46869 3046/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
3047 Check if a text is encoded in one of ISO-2022 based codig systems.
3048 If it is, return 1, else return 0. */
4ed46869 3049
0a28aafb 3050static int
971de7fb 3051detect_coding_iso_2022 (struct coding_system *coding, struct coding_detection_info *detect_info)
4ed46869 3052{
8f924df7
KH
3053 const unsigned char *src = coding->source, *src_base = src;
3054 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 3055 int multibytep = coding->src_multibyte;
ff0dacd7 3056 int single_shifting = 0;
df7492f9
KH
3057 int id;
3058 int c, c1;
3059 int consumed_chars = 0;
3060 int i;
ff0dacd7
KH
3061 int rejected = 0;
3062 int found = 0;
cee53ed4 3063 int composition_count = -1;
ff0dacd7
KH
3064
3065 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
3066
3067 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3068 {
3069 struct coding_system *this = &(coding_categories[i]);
3070 Lisp_Object attrs, val;
3071
c6b278e7
KH
3072 if (this->id < 0)
3073 continue;
df7492f9
KH
3074 attrs = CODING_ID_ATTRS (this->id);
3075 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 3076 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
3077 setup_iso_safe_charsets (attrs);
3078 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 3079 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 3080 this->safe_charsets = SDATA (val);
df7492f9
KH
3081 }
3082
3083 /* A coding system of this category is always ASCII compatible. */
3084 src += coding->head_ascii;
3f003981 3085
ff0dacd7 3086 while (rejected != CATEGORY_MASK_ISO)
4ed46869 3087 {
065e3595 3088 src_base = src;
df7492f9 3089 ONE_MORE_BYTE (c);
4ed46869
KH
3090 switch (c)
3091 {
3092 case ISO_CODE_ESC:
74383408
KH
3093 if (inhibit_iso_escape_detection)
3094 break;
f46869e4 3095 single_shifting = 0;
df7492f9 3096 ONE_MORE_BYTE (c);
d46c5b12 3097 if (c >= '(' && c <= '/')
4ed46869 3098 {
bf9cdd4e 3099 /* Designation sequence for a charset of dimension 1. */
df7492f9 3100 ONE_MORE_BYTE (c1);
d46c5b12 3101 if (c1 < ' ' || c1 >= 0x80
df7492f9 3102 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
3103 /* Invalid designation sequence. Just ignore. */
3104 break;
bf9cdd4e
KH
3105 }
3106 else if (c == '$')
3107 {
3108 /* Designation sequence for a charset of dimension 2. */
df7492f9 3109 ONE_MORE_BYTE (c);
bf9cdd4e
KH
3110 if (c >= '@' && c <= 'B')
3111 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 3112 id = iso_charset_table[1][0][c];
bf9cdd4e 3113 else if (c >= '(' && c <= '/')
bcf26d6a 3114 {
df7492f9 3115 ONE_MORE_BYTE (c1);
d46c5b12 3116 if (c1 < ' ' || c1 >= 0x80
df7492f9 3117 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
3118 /* Invalid designation sequence. Just ignore. */
3119 break;
bcf26d6a 3120 }
bf9cdd4e 3121 else
ff0dacd7 3122 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
3123 break;
3124 }
ae9ff118 3125 else if (c == 'N' || c == 'O')
d46c5b12 3126 {
ae9ff118 3127 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
3128 single_shifting = 1;
3129 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 3130 break;
4ed46869 3131 }
cee53ed4
KH
3132 else if (c == '1')
3133 {
3134 /* End of composition. */
3135 if (composition_count < 0
3136 || composition_count > MAX_COMPOSITION_COMPONENTS)
3137 /* Invalid */
3138 break;
3139 composition_count = -1;
3140 found |= CATEGORY_MASK_ISO;
3141 }
ec6d2bb8
KH
3142 else if (c >= '0' && c <= '4')
3143 {
3144 /* ESC <Fp> for start/end composition. */
cee53ed4 3145 composition_count = 0;
ec6d2bb8
KH
3146 break;
3147 }
bf9cdd4e 3148 else
df7492f9 3149 {
ff0dacd7 3150 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
3151 break;
3152 }
d46c5b12
KH
3153
3154 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 3155 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
3156 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3157 id))
ff0dacd7 3158 found |= CATEGORY_MASK_ISO_7;
d46c5b12 3159 else
ff0dacd7 3160 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
3161 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3162 id))
ff0dacd7 3163 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 3164 else
ff0dacd7 3165 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
3166 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3167 id))
ff0dacd7 3168 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 3169 else
ff0dacd7 3170 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
3171 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3172 id))
ff0dacd7 3173 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 3174 else
ff0dacd7 3175 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
3176 break;
3177
4ed46869 3178 case ISO_CODE_SO:
d46c5b12 3179 case ISO_CODE_SI:
ff0dacd7 3180 /* Locking shift out/in. */
74383408
KH
3181 if (inhibit_iso_escape_detection)
3182 break;
f46869e4 3183 single_shifting = 0;
ff0dacd7 3184 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
3185 break;
3186
4ed46869 3187 case ISO_CODE_CSI:
ff0dacd7 3188 /* Control sequence introducer. */
f46869e4 3189 single_shifting = 0;
ff0dacd7
KH
3190 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3191 found |= CATEGORY_MASK_ISO_8_ELSE;
3192 goto check_extra_latin;
3193
4ed46869
KH
3194 case ISO_CODE_SS2:
3195 case ISO_CODE_SS3:
ff0dacd7
KH
3196 /* Single shift. */
3197 if (inhibit_iso_escape_detection)
3198 break;
75e2a253 3199 single_shifting = 0;
ff0dacd7
KH
3200 rejected |= CATEGORY_MASK_ISO_7BIT;
3201 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3202 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 3203 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
3204 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3205 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
3206 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3207 if (single_shifting)
3208 break;
ff0dacd7 3209 goto check_extra_latin;
4ed46869
KH
3210
3211 default:
065e3595
KH
3212 if (c < 0)
3213 continue;
4ed46869 3214 if (c < 0x80)
f46869e4 3215 {
cee53ed4
KH
3216 if (composition_count >= 0)
3217 composition_count++;
f46869e4
KH
3218 single_shifting = 0;
3219 break;
3220 }
ff0dacd7 3221 if (c >= 0xA0)
c4825358 3222 {
ff0dacd7
KH
3223 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3224 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 3225 /* Check the length of succeeding codes of the range
ff0dacd7
KH
3226 0xA0..0FF. If the byte length is even, we include
3227 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
3228 only when we are not single shifting. */
3229 if (! single_shifting
3230 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 3231 {
e17de821 3232 int i = 1;
b73bfc1c
KH
3233 while (src < src_end)
3234 {
d12bd917 3235 src_base = src;
df7492f9 3236 ONE_MORE_BYTE (c);
b73bfc1c 3237 if (c < 0xA0)
d12bd917
KH
3238 {
3239 src = src_base;
3240 break;
3241 }
b73bfc1c
KH
3242 i++;
3243 }
3244
3245 if (i & 1 && src < src_end)
cee53ed4
KH
3246 {
3247 rejected |= CATEGORY_MASK_ISO_8_2;
3248 if (composition_count >= 0)
3249 composition_count += i;
3250 }
f46869e4 3251 else
cee53ed4
KH
3252 {
3253 found |= CATEGORY_MASK_ISO_8_2;
3254 if (composition_count >= 0)
3255 composition_count += i / 2;
3256 }
f46869e4 3257 }
ff0dacd7 3258 break;
4ed46869 3259 }
ff0dacd7
KH
3260 check_extra_latin:
3261 single_shifting = 0;
3262 if (! VECTORP (Vlatin_extra_code_table)
3263 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3264 {
3265 rejected = CATEGORY_MASK_ISO;
3266 break;
3267 }
3268 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3269 & CODING_ISO_FLAG_LATIN_EXTRA)
3270 found |= CATEGORY_MASK_ISO_8_1;
3271 else
3272 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 3273 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
3274 }
3275 }
ff0dacd7
KH
3276 detect_info->rejected |= CATEGORY_MASK_ISO;
3277 return 0;
4ed46869 3278
df7492f9 3279 no_more_source:
ff0dacd7
KH
3280 detect_info->rejected |= rejected;
3281 detect_info->found |= (found & ~rejected);
df7492f9 3282 return 1;
4ed46869 3283}
ec6d2bb8 3284
4ed46869 3285
134b9549
KH
3286/* Set designation state into CODING. Set CHARS_96 to -1 if the
3287 escape sequence should be kept. */
df7492f9
KH
3288#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3289 do { \
3290 int id, prev; \
3291 \
3292 if (final < '0' || final >= 128 \
3293 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3294 || !SAFE_CHARSET_P (coding, id)) \
3295 { \
3296 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3297 chars_96 = -1; \
3298 break; \
df7492f9
KH
3299 } \
3300 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3301 if (id == charset_jisx0201_roman) \
3302 { \
3303 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3304 id = charset_ascii; \
3305 } \
3306 else if (id == charset_jisx0208_1978) \
3307 { \
3308 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3309 id = charset_jisx0208; \
3310 } \
df7492f9
KH
3311 CODING_ISO_DESIGNATION (coding, reg) = id; \
3312 /* If there was an invalid designation to REG previously, and this \
3313 designation is ASCII to REG, we should keep this designation \
3314 sequence. */ \
3315 if (prev == -2 && id == charset_ascii) \
134b9549 3316 chars_96 = -1; \
4ed46869
KH
3317 } while (0)
3318
d46c5b12 3319
e951386e
KH
3320/* Handle these composition sequence (ALT: alternate char):
3321
3322 (1) relative composition: ESC 0 CHAR ... ESC 1
3323 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3324 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3325 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3326
3327 When the start sequence (ESC 0/2/3/4) is found, this annotation
3328 header is produced.
3329
3330 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3331
3332 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3333 produced until the end sequence (ESC 1) is found:
3334
3335 (1) CHAR ... CHAR
3336 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3337 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3338 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3339
3340 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3341 annotation header is updated as below:
3342
3343 (1) LENGTH: unchanged, NCHARS: number of CHARs
3344 (2) LENGTH: unchanged, NCHARS: number of CHARs
3345 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3346 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3347
3348 If an error is found while composing, the annotation header is
3349 changed to:
3350
3351 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3352
3353 and the sequence [ -2 DECODED-RULE ] is changed to the original
3354 byte sequence as below:
3355 o the original byte sequence is B: [ B -1 ]
3356 o the original byte sequence is B1 B2: [ B1 B2 ]
3357 and the sequence [ -1 -1 ] is changed to the original byte
3358 sequence:
3359 [ ESC '0' ]
3360*/
3361
3362/* Decode a composition rule C1 and maybe one more byte from the
3363 source, and set RULE to the encoded composition rule, NBYTES to the
3364 length of the composition rule. If the rule is invalid, set RULE
3365 to some negative value. */
3366
3367#define DECODE_COMPOSITION_RULE(rule, nbytes) \
3368 do { \
3369 rule = c1 - 32; \
3370 if (rule < 0) \
3371 break; \
3372 if (rule < 81) /* old format (before ver.21) */ \
3373 { \
3374 int gref = (rule) / 9; \
3375 int nref = (rule) % 9; \
3376 if (gref == 4) gref = 10; \
3377 if (nref == 4) nref = 10; \
3378 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3379 nbytes = 1; \
3380 } \
3381 else /* new format (after ver.21) */ \
3382 { \
3383 int c; \
3384 \
3385 ONE_MORE_BYTE (c); \
3386 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3387 if (rule >= 0) \
3388 rule += 0x100; /* to destinguish it from the old format */ \
3389 nbytes = 2; \
3390 } \
3391 } while (0)
3392
3393#define ENCODE_COMPOSITION_RULE(rule) \
df7492f9 3394 do { \
e951386e
KH
3395 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3396 \
3397 if (rule < 0x100) /* old format */ \
df7492f9 3398 { \
e951386e
KH
3399 if (gref == 10) gref = 4; \
3400 if (nref == 10) nref = 4; \
3401 charbuf[idx] = 32 + gref * 9 + nref; \
3402 charbuf[idx + 1] = -1; \
3403 new_chars++; \
df7492f9 3404 } \
e951386e 3405 else /* new format */ \
df7492f9 3406 { \
e951386e
KH
3407 charbuf[idx] = 32 + 81 + gref; \
3408 charbuf[idx + 1] = 32 + nref; \
3409 new_chars += 2; \
df7492f9
KH
3410 } \
3411 } while (0)
3412
e951386e
KH
3413/* Finish the current composition as invalid. */
3414
f57e2426 3415static int finish_composition (int *, struct composition_status *);
e951386e
KH
3416
3417static int
971de7fb 3418finish_composition (int *charbuf, struct composition_status *cmp_status)
e951386e
KH
3419{
3420 int idx = - cmp_status->length;
3421 int new_chars;
3422
3423 /* Recover the original ESC sequence */
3424 charbuf[idx++] = ISO_CODE_ESC;
3425 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3426 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3427 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3428 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3429 : '4');
3430 charbuf[idx++] = -2;
3431 charbuf[idx++] = 0;
3432 charbuf[idx++] = -1;
3433 new_chars = cmp_status->nchars;
3434 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3435 for (; idx < 0; idx++)
3436 {
3437 int elt = charbuf[idx];
3438
3439 if (elt == -2)
3440 {
3441 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3442 idx++;
3443 }
3444 else if (elt == -1)
3445 {
3446 charbuf[idx++] = ISO_CODE_ESC;
3447 charbuf[idx] = '0';
3448 new_chars += 2;
3449 }
3450 }
3451 cmp_status->state = COMPOSING_NO;
3452 return new_chars;
3453}
3454
3455/* If characers are under composition, finish the composition. */
3456#define MAYBE_FINISH_COMPOSITION() \
3457 do { \
3458 if (cmp_status->state != COMPOSING_NO) \
3459 char_offset += finish_composition (charbuf, cmp_status); \
3460 } while (0)
d46c5b12 3461
aa72b389 3462/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
e951386e 3463
aa72b389
KH
3464 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3465 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3466 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3467 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
ec6d2bb8 3468
e951386e
KH
3469 Produce this annotation sequence now:
3470
3471 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3472*/
3473
3474#define DECODE_COMPOSITION_START(c1) \
3475 do { \
3476 if (c1 == '0' \
3477 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3478 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3479 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3480 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3481 { \
3482 *charbuf++ = -1; \
3483 *charbuf++= -1; \
3484 cmp_status->state = COMPOSING_CHAR; \
3485 cmp_status->length += 2; \
3486 } \
3487 else \
3488 { \
3489 MAYBE_FINISH_COMPOSITION (); \
3490 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3491 : c1 == '2' ? COMPOSITION_WITH_RULE \
3492 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3493 : COMPOSITION_WITH_RULE_ALTCHARS); \
3494 cmp_status->state \
3495 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3496 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3497 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3498 cmp_status->nchars = cmp_status->ncomps = 0; \
3499 coding->annotated = 1; \
3500 } \
ec6d2bb8
KH
3501 } while (0)
3502
ec6d2bb8 3503
e951386e 3504/* Handle composition end sequence ESC 1. */
df7492f9
KH
3505
3506#define DECODE_COMPOSITION_END() \
ec6d2bb8 3507 do { \
e951386e
KH
3508 if (cmp_status->nchars == 0 \
3509 || ((cmp_status->state == COMPOSING_CHAR) \
3510 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
ec6d2bb8 3511 { \
e951386e
KH
3512 MAYBE_FINISH_COMPOSITION (); \
3513 goto invalid_code; \
ec6d2bb8 3514 } \
e951386e
KH
3515 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3516 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3517 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3518 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3519 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3520 char_offset += cmp_status->nchars; \
3521 cmp_status->state = COMPOSING_NO; \
ec6d2bb8
KH
3522 } while (0)
3523
e951386e 3524/* Store a composition rule RULE in charbuf, and update cmp_status. */
df7492f9 3525
e951386e
KH
3526#define STORE_COMPOSITION_RULE(rule) \
3527 do { \
3528 *charbuf++ = -2; \
3529 *charbuf++ = rule; \
3530 cmp_status->length += 2; \
3531 cmp_status->state--; \
3532 } while (0)
ec6d2bb8 3533
e951386e
KH
3534/* Store a composed char or a component char C in charbuf, and update
3535 cmp_status. */
3536
3537#define STORE_COMPOSITION_CHAR(c) \
ec6d2bb8 3538 do { \
e951386e
KH
3539 *charbuf++ = (c); \
3540 cmp_status->length++; \
3541 if (cmp_status->state == COMPOSING_CHAR) \
3542 cmp_status->nchars++; \
df7492f9 3543 else \
e951386e
KH
3544 cmp_status->ncomps++; \
3545 if (cmp_status->method == COMPOSITION_WITH_RULE \
3546 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3547 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3548 cmp_status->state++; \
ec6d2bb8 3549 } while (0)
88993dfd 3550
d46c5b12 3551
4ed46869
KH
3552/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3553
b73bfc1c 3554static void
971de7fb 3555decode_coding_iso_2022 (struct coding_system *coding)
4ed46869 3556{
8f924df7
KH
3557 const unsigned char *src = coding->source + coding->consumed;
3558 const unsigned char *src_end = coding->source + coding->src_bytes;
3559 const unsigned char *src_base;
69a80ea3 3560 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
3561 /* We may produce two annocations (charset and composition) in one
3562 loop and one more charset annocation at the end. */
ff0dacd7 3563 int *charbuf_end
df80c7f0 3564 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 3565 int consumed_chars = 0, consumed_chars_base;
df7492f9 3566 int multibytep = coding->src_multibyte;
4ed46869 3567 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3568 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3569 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3570 int charset_id_2, charset_id_3;
df7492f9
KH
3571 struct charset *charset;
3572 int c;
e951386e 3573 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
24a73b0a 3574 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3575 int char_offset = coding->produced_char;
3576 int last_offset = char_offset;
3577 int last_id = charset_ascii;
0a9564cb
EZ
3578 int eol_crlf =
3579 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 3580 int byte_after_cr = -1;
e951386e 3581 int i;
df7492f9 3582
24a73b0a 3583 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3584 setup_iso_safe_charsets (attrs);
287c57d7
KH
3585 /* Charset list may have been changed. */
3586 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 3587 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c 3588
e951386e
KH
3589 if (cmp_status->state != COMPOSING_NO)
3590 {
3591 for (i = 0; i < cmp_status->length; i++)
3592 *charbuf++ = cmp_status->carryover[i];
3593 coding->annotated = 1;
3594 }
3595
b73bfc1c 3596 while (1)
4ed46869 3597 {
cf299835 3598 int c1, c2, c3;
b73bfc1c
KH
3599
3600 src_base = src;
df7492f9
KH
3601 consumed_chars_base = consumed_chars;
3602
3603 if (charbuf >= charbuf_end)
b71f6f73
KH
3604 {
3605 if (byte_after_cr >= 0)
3606 src_base--;
3607 break;
3608 }
df7492f9 3609
119852e7
KH
3610 if (byte_after_cr >= 0)
3611 c1 = byte_after_cr, byte_after_cr = -1;
3612 else
3613 ONE_MORE_BYTE (c1);
065e3595
KH
3614 if (c1 < 0)
3615 goto invalid_code;
4ed46869 3616
e951386e 3617 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
4ed46869 3618 {
e951386e
KH
3619 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3620 char_offset++;
3621 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3622 continue;
3623 }
3624
3625 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3626 {
3627 if (c1 == ISO_CODE_ESC)
ec6d2bb8 3628 {
e951386e
KH
3629 if (src + 1 >= src_end)
3630 goto no_more_source;
3631 *charbuf++ = ISO_CODE_ESC;
3632 char_offset++;
3633 if (src[0] == '%' && src[1] == '@')
df7492f9 3634 {
e951386e
KH
3635 src += 2;
3636 consumed_chars += 2;
3637 char_offset += 2;
3638 /* We are sure charbuf can contain two more chars. */
3639 *charbuf++ = '%';
3640 *charbuf++ = '@';
3641 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
df7492f9 3642 }
4ed46869 3643 }
e951386e
KH
3644 else
3645 {
3646 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3647 char_offset++;
3648 }
3649 continue;
3650 }
3651
3652 if ((cmp_status->state == COMPOSING_RULE
3653 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3654 && c1 != ISO_CODE_ESC)
3655 {
3656 int rule, nbytes;
3657
3658 DECODE_COMPOSITION_RULE (rule, nbytes);
3659 if (rule < 0)
3660 goto invalid_code;
3661 STORE_COMPOSITION_RULE (rule);
3662 continue;
3663 }
3664
3665 /* We produce at most one character. */
3666 switch (iso_code_class [c1])
3667 {
3668 case ISO_0x20_or_0x7F:
df7492f9
KH
3669 if (charset_id_0 < 0
3670 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3671 /* This is SPACE or DEL. */
3672 charset = CHARSET_FROM_ID (charset_ascii);
3673 else
3674 charset = CHARSET_FROM_ID (charset_id_0);
3675 break;
4ed46869
KH
3676
3677 case ISO_graphic_plane_0:
134b9549
KH
3678 if (charset_id_0 < 0)
3679 charset = CHARSET_FROM_ID (charset_ascii);
3680 else
3681 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3682 break;
3683
3684 case ISO_0xA0_or_0xFF:
df7492f9
KH
3685 if (charset_id_1 < 0
3686 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3687 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3688 goto invalid_code;
4ed46869
KH
3689 /* This is a graphic character, we fall down ... */
3690
3691 case ISO_graphic_plane_1:
df7492f9
KH
3692 if (charset_id_1 < 0)
3693 goto invalid_code;
3694 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3695 break;
3696
df7492f9 3697 case ISO_control_0:
119852e7
KH
3698 if (eol_crlf && c1 == '\r')
3699 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3700 MAYBE_FINISH_COMPOSITION ();
3701 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3702 break;
3703
df7492f9 3704 case ISO_control_1:
df7492f9
KH
3705 goto invalid_code;
3706
4ed46869 3707 case ISO_shift_out:
df7492f9
KH
3708 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3709 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3710 goto invalid_code;
3711 CODING_ISO_INVOCATION (coding, 0) = 1;
3712 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3713 continue;
4ed46869
KH
3714
3715 case ISO_shift_in:
df7492f9
KH
3716 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3717 goto invalid_code;
3718 CODING_ISO_INVOCATION (coding, 0) = 0;
3719 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3720 continue;
4ed46869
KH
3721
3722 case ISO_single_shift_2_7:
a63dba42
KH
3723 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3724 goto invalid_code;
4ed46869 3725 case ISO_single_shift_2:
df7492f9
KH
3726 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3727 goto invalid_code;
4ed46869
KH
3728 /* SS2 is handled as an escape sequence of ESC 'N' */
3729 c1 = 'N';
3730 goto label_escape_sequence;
3731
3732 case ISO_single_shift_3:
df7492f9
KH
3733 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3734 goto invalid_code;
4ed46869
KH
3735 /* SS2 is handled as an escape sequence of ESC 'O' */
3736 c1 = 'O';
3737 goto label_escape_sequence;
3738
3739 case ISO_control_sequence_introducer:
3740 /* CSI is handled as an escape sequence of ESC '[' ... */
3741 c1 = '[';
3742 goto label_escape_sequence;
3743
3744 case ISO_escape:
3745 ONE_MORE_BYTE (c1);
3746 label_escape_sequence:
df7492f9 3747 /* Escape sequences handled here are invocation,
4ed46869
KH
3748 designation, direction specification, and character
3749 composition specification. */
3750 switch (c1)
3751 {
3752 case '&': /* revision of following character set */
3753 ONE_MORE_BYTE (c1);
3754 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3755 goto invalid_code;
4ed46869
KH
3756 ONE_MORE_BYTE (c1);
3757 if (c1 != ISO_CODE_ESC)
df7492f9 3758 goto invalid_code;
4ed46869
KH
3759 ONE_MORE_BYTE (c1);
3760 goto label_escape_sequence;
3761
3762 case '$': /* designation of 2-byte character set */
df7492f9
KH
3763 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3764 goto invalid_code;
134b9549
KH
3765 {
3766 int reg, chars96;
3767
3768 ONE_MORE_BYTE (c1);
3769 if (c1 >= '@' && c1 <= 'B')
3770 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3771 or JISX0208.1980 */
134b9549
KH
3772 reg = 0, chars96 = 0;
3773 }
3774 else if (c1 >= 0x28 && c1 <= 0x2B)
3775 { /* designation of DIMENSION2_CHARS94 character set */
3776 reg = c1 - 0x28, chars96 = 0;
3777 ONE_MORE_BYTE (c1);
3778 }
3779 else if (c1 >= 0x2C && c1 <= 0x2F)
3780 { /* designation of DIMENSION2_CHARS96 character set */
3781 reg = c1 - 0x2C, chars96 = 1;
3782 ONE_MORE_BYTE (c1);
3783 }
3784 else
3785 goto invalid_code;
3786 DECODE_DESIGNATION (reg, 2, chars96, c1);
3787 /* We must update these variables now. */
3788 if (reg == 0)
3789 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3790 else if (reg == 1)
3791 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3792 if (chars96 < 0)
3793 goto invalid_code;
3794 }
b73bfc1c 3795 continue;
4ed46869
KH
3796
3797 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3798 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3799 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3800 goto invalid_code;
3801 CODING_ISO_INVOCATION (coding, 0) = 2;
3802 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3803 continue;
4ed46869
KH
3804
3805 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3806 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3807 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3808 goto invalid_code;
3809 CODING_ISO_INVOCATION (coding, 0) = 3;
3810 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3811 continue;
4ed46869
KH
3812
3813 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3814 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3815 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3816 goto invalid_code;
134b9549
KH
3817 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3818 if (charset_id_2 < 0)
3819 charset = CHARSET_FROM_ID (charset_ascii);
3820 else
3821 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3822 ONE_MORE_BYTE (c1);
e7046a18 3823 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3824 goto invalid_code;
4ed46869
KH
3825 break;
3826
3827 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3828 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3829 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3830 goto invalid_code;
134b9549
KH
3831 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3832 if (charset_id_3 < 0)
3833 charset = CHARSET_FROM_ID (charset_ascii);
3834 else
3835 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3836 ONE_MORE_BYTE (c1);
e7046a18 3837 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3838 goto invalid_code;
4ed46869
KH
3839 break;
3840
ec6d2bb8 3841 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3842 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3843 goto invalid_code;
e951386e
KH
3844 if (last_id != charset_ascii)
3845 {
3846 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3847 last_id = charset_ascii;
3848 last_offset = char_offset;
3849 }
ec6d2bb8 3850 DECODE_COMPOSITION_START (c1);
b73bfc1c 3851 continue;
4ed46869 3852
ec6d2bb8 3853 case '1': /* end composition */
e951386e 3854 if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3855 goto invalid_code;
3856 DECODE_COMPOSITION_END ();
b73bfc1c 3857 continue;
4ed46869
KH
3858
3859 case '[': /* specification of direction */
de59072a 3860 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
df7492f9 3861 goto invalid_code;
4ed46869 3862 /* For the moment, nested direction is not supported.
d46c5b12 3863 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3864 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3865 ONE_MORE_BYTE (c1);
3866 switch (c1)
3867 {
3868 case ']': /* end of the current direction */
d46c5b12 3869 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3870
3871 case '0': /* end of the current direction */
3872 case '1': /* start of left-to-right direction */
3873 ONE_MORE_BYTE (c1);
3874 if (c1 == ']')
d46c5b12 3875 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3876 else
df7492f9 3877 goto invalid_code;
4ed46869
KH
3878 break;
3879
3880 case '2': /* start of right-to-left direction */
3881 ONE_MORE_BYTE (c1);
3882 if (c1 == ']')
d46c5b12 3883 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3884 else
df7492f9 3885 goto invalid_code;
4ed46869
KH
3886 break;
3887
3888 default:
df7492f9 3889 goto invalid_code;
4ed46869 3890 }
b73bfc1c 3891 continue;
4ed46869 3892
103e0180 3893 case '%':
103e0180
KH
3894 ONE_MORE_BYTE (c1);
3895 if (c1 == '/')
3896 {
3897 /* CTEXT extended segment:
3898 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3899 We keep these bytes as is for the moment.
3900 They may be decoded by post-read-conversion. */
3901 int dim, M, L;
4776e638 3902 int size;
8f924df7 3903
103e0180 3904 ONE_MORE_BYTE (dim);
e951386e
KH
3905 if (dim < 0 || dim > 4)
3906 goto invalid_code;
103e0180 3907 ONE_MORE_BYTE (M);
e951386e
KH
3908 if (M < 128)
3909 goto invalid_code;
103e0180 3910 ONE_MORE_BYTE (L);
e951386e
KH
3911 if (L < 128)
3912 goto invalid_code;
103e0180 3913 size = ((M - 128) * 128) + (L - 128);
e951386e 3914 if (charbuf + 6 > charbuf_end)
4776e638
KH
3915 goto break_loop;
3916 *charbuf++ = ISO_CODE_ESC;
3917 *charbuf++ = '%';
3918 *charbuf++ = '/';
3919 *charbuf++ = dim;
3920 *charbuf++ = BYTE8_TO_CHAR (M);
3921 *charbuf++ = BYTE8_TO_CHAR (L);
e951386e 3922 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
103e0180
KH
3923 }
3924 else if (c1 == 'G')
3925 {
103e0180
KH
3926 /* XFree86 extension for embedding UTF-8 in CTEXT:
3927 ESC % G --UTF-8-BYTES-- ESC % @
3928 We keep these bytes as is for the moment.
3929 They may be decoded by post-read-conversion. */
e951386e 3930 if (charbuf + 3 > charbuf_end)
4776e638 3931 goto break_loop;
e951386e
KH
3932 *charbuf++ = ISO_CODE_ESC;
3933 *charbuf++ = '%';
3934 *charbuf++ = 'G';
3935 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
103e0180
KH
3936 }
3937 else
4776e638 3938 goto invalid_code;
103e0180 3939 continue;
4776e638 3940 break;
103e0180 3941
4ed46869 3942 default:
df7492f9
KH
3943 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3944 goto invalid_code;
134b9549
KH
3945 {
3946 int reg, chars96;
3947
3948 if (c1 >= 0x28 && c1 <= 0x2B)
3949 { /* designation of DIMENSION1_CHARS94 character set */
3950 reg = c1 - 0x28, chars96 = 0;
3951 ONE_MORE_BYTE (c1);
3952 }
3953 else if (c1 >= 0x2C && c1 <= 0x2F)
3954 { /* designation of DIMENSION1_CHARS96 character set */
3955 reg = c1 - 0x2C, chars96 = 1;
3956 ONE_MORE_BYTE (c1);
3957 }
3958 else
3959 goto invalid_code;
3960 DECODE_DESIGNATION (reg, 1, chars96, c1);
3961 /* We must update these variables now. */
3962 if (reg == 0)
3963 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3964 else if (reg == 1)
3965 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3966 if (chars96 < 0)
3967 goto invalid_code;
3968 }
b73bfc1c 3969 continue;
4ed46869 3970 }
b73bfc1c 3971 }
4ed46869 3972
e951386e
KH
3973 if (cmp_status->state == COMPOSING_NO
3974 && charset->id != charset_ascii
ff0dacd7
KH
3975 && last_id != charset->id)
3976 {
3977 if (last_id != charset_ascii)
69a80ea3 3978 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3979 last_id = charset->id;
3980 last_offset = char_offset;
3981 }
3982
b73bfc1c 3983 /* Now we know CHARSET and 1st position code C1 of a character.
cf299835
KH
3984 Produce a decoded character while getting 2nd and 3rd
3985 position codes C2, C3 if necessary. */
df7492f9 3986 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3987 {
3988 ONE_MORE_BYTE (c2);
cf299835
KH
3989 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3990 || ((c1 & 0x80) != (c2 & 0x80)))
b73bfc1c 3991 /* C2 is not in a valid range. */
df7492f9 3992 goto invalid_code;
cf299835
KH
3993 if (CHARSET_DIMENSION (charset) == 2)
3994 c1 = (c1 << 8) | c2;
3995 else
df7492f9 3996 {
cf299835
KH
3997 ONE_MORE_BYTE (c3);
3998 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3999 || ((c1 & 0x80) != (c3 & 0x80)))
4000 /* C3 is not in a valid range. */
df7492f9 4001 goto invalid_code;
cf299835 4002 c1 = (c1 << 16) | (c2 << 8) | c2;
df7492f9
KH
4003 }
4004 }
cf299835 4005 c1 &= 0x7F7F7F;
df7492f9
KH
4006 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4007 if (c < 0)
4008 {
4009 MAYBE_FINISH_COMPOSITION ();
4010 for (; src_base < src; src_base++, char_offset++)
4011 {
4012 if (ASCII_BYTE_P (*src_base))
4013 *charbuf++ = *src_base;
4014 else
4015 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4016 }
4017 }
e951386e 4018 else if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
4019 {
4020 *charbuf++ = c;
4021 char_offset++;
4ed46869 4022 }
e951386e
KH
4023 else if ((cmp_status->state == COMPOSING_CHAR
4024 ? cmp_status->nchars
4025 : cmp_status->ncomps)
4026 >= MAX_COMPOSITION_COMPONENTS)
781d7a48 4027 {
e951386e
KH
4028 /* Too long composition. */
4029 MAYBE_FINISH_COMPOSITION ();
4030 *charbuf++ = c;
4031 char_offset++;
4ed46869 4032 }
e951386e
KH
4033 else
4034 STORE_COMPOSITION_CHAR (c);
4ed46869
KH
4035 continue;
4036
df7492f9
KH
4037 invalid_code:
4038 MAYBE_FINISH_COMPOSITION ();
4ed46869 4039 src = src_base;
df7492f9
KH
4040 consumed_chars = consumed_chars_base;
4041 ONE_MORE_BYTE (c);
065e3595 4042 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4043 char_offset++;
df7492f9 4044 coding->errors++;
4776e638
KH
4045 continue;
4046
4047 break_loop:
4048 break;
4ed46869 4049 }
fb88bf2d 4050
df7492f9 4051 no_more_source:
e951386e
KH
4052 if (cmp_status->state != COMPOSING_NO)
4053 {
4054 if (coding->mode & CODING_MODE_LAST_BLOCK)
4055 MAYBE_FINISH_COMPOSITION ();
4056 else
4057 {
4058 charbuf -= cmp_status->length;
4059 for (i = 0; i < cmp_status->length; i++)
4060 cmp_status->carryover[i] = charbuf[i];
4061 }
4062 }
4063 else if (last_id != charset_ascii)
69a80ea3 4064 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4065 coding->consumed_char += consumed_chars_base;
4066 coding->consumed = src_base - coding->source;
4067 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4068}
4069
b73bfc1c 4070
f4dee582 4071/* ISO2022 encoding stuff. */
4ed46869
KH
4072
4073/*
f4dee582 4074 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 4075 specify more details. In Emacs, each coding system of ISO2022
4ed46869 4076 variant has the following specifications:
df7492f9 4077 1. Initial designation to G0 thru G3.
4ed46869
KH
4078 2. Allows short-form designation?
4079 3. ASCII should be designated to G0 before control characters?
4080 4. ASCII should be designated to G0 at end of line?
4081 5. 7-bit environment or 8-bit environment?
4082 6. Use locking-shift?
4083 7. Use Single-shift?
4084 And the following two are only for Japanese:
4085 8. Use ASCII in place of JIS0201-1976-Roman?
4086 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
4087 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4088 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 4089 details.
4ed46869
KH
4090*/
4091
4092/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
4093 register REG at DST, and increment DST. If <final-char> of CHARSET is
4094 '@', 'A', or 'B' and the coding system CODING allows, produce
4095 designation sequence of short-form. */
4ed46869
KH
4096
4097#define ENCODE_DESIGNATION(charset, reg, coding) \
4098 do { \
df7492f9 4099 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
4100 char *intermediate_char_94 = "()*+"; \
4101 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
4102 int revision = -1; \
4103 int c; \
4104 \
4105 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 4106 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
4107 \
4108 if (revision >= 0) \
70c22245 4109 { \
df7492f9
KH
4110 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4111 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 4112 } \
df7492f9 4113 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
4114 if (CHARSET_DIMENSION (charset) == 1) \
4115 { \
df7492f9
KH
4116 if (! CHARSET_ISO_CHARS_96 (charset)) \
4117 c = intermediate_char_94[reg]; \
4ed46869 4118 else \
df7492f9
KH
4119 c = intermediate_char_96[reg]; \
4120 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
4121 } \
4122 else \
4123 { \
df7492f9
KH
4124 EMIT_ONE_ASCII_BYTE ('$'); \
4125 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 4126 { \
df7492f9 4127 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
4128 || reg != 0 \
4129 || final_char < '@' || final_char > 'B') \
df7492f9 4130 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
4131 } \
4132 else \
df7492f9 4133 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 4134 } \
df7492f9
KH
4135 EMIT_ONE_ASCII_BYTE (final_char); \
4136 \
4137 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
4138 } while (0)
4139
df7492f9 4140
4ed46869
KH
4141/* The following two macros produce codes (control character or escape
4142 sequence) for ISO2022 single-shift functions (single-shift-2 and
4143 single-shift-3). */
4144
df7492f9
KH
4145#define ENCODE_SINGLE_SHIFT_2 \
4146 do { \
4147 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4148 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4149 else \
4150 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4151 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4152 } while (0)
4153
df7492f9
KH
4154
4155#define ENCODE_SINGLE_SHIFT_3 \
4156 do { \
4157 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4158 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4159 else \
4160 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4161 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4162 } while (0)
4163
df7492f9 4164
4ed46869
KH
4165/* The following four macros produce codes (control character or
4166 escape sequence) for ISO2022 locking-shift functions (shift-in,
4167 shift-out, locking-shift-2, and locking-shift-3). */
4168
df7492f9
KH
4169#define ENCODE_SHIFT_IN \
4170 do { \
4171 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4172 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
4173 } while (0)
4174
df7492f9
KH
4175
4176#define ENCODE_SHIFT_OUT \
4177 do { \
4178 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4179 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
4180 } while (0)
4181
df7492f9
KH
4182
4183#define ENCODE_LOCKING_SHIFT_2 \
4184 do { \
4185 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4186 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
4187 } while (0)
4188
df7492f9
KH
4189
4190#define ENCODE_LOCKING_SHIFT_3 \
4191 do { \
4192 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4193 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
4194 } while (0)
4195
df7492f9 4196
f4dee582
RS
4197/* Produce codes for a DIMENSION1 character whose character set is
4198 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
4199 sequences are also produced in advance if necessary. */
4200
6e85d753
KH
4201#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4202 do { \
df7492f9 4203 int id = CHARSET_ID (charset); \
bf16eb23
KH
4204 \
4205 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4206 && id == charset_ascii) \
4207 { \
4208 id = charset_jisx0201_roman; \
4209 charset = CHARSET_FROM_ID (id); \
4210 } \
4211 \
df7492f9 4212 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4213 { \
df7492f9
KH
4214 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4215 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 4216 else \
df7492f9
KH
4217 EMIT_ONE_BYTE (c1 | 0x80); \
4218 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4219 break; \
4220 } \
df7492f9 4221 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4222 { \
df7492f9 4223 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
4224 break; \
4225 } \
df7492f9 4226 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4227 { \
df7492f9 4228 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
4229 break; \
4230 } \
6e85d753
KH
4231 else \
4232 /* Since CHARSET is not yet invoked to any graphic planes, we \
4233 must invoke it, or, at first, designate it to some graphic \
4234 register. Then repeat the loop to actually produce the \
4235 character. */ \
df7492f9
KH
4236 dst = encode_invocation_designation (charset, coding, dst, \
4237 &produced_chars); \
4ed46869
KH
4238 } while (1)
4239
df7492f9 4240
f4dee582
RS
4241/* Produce codes for a DIMENSION2 character whose character set is
4242 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
4243 invocation codes are also produced in advance if necessary. */
4244
6e85d753
KH
4245#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4246 do { \
df7492f9 4247 int id = CHARSET_ID (charset); \
bf16eb23
KH
4248 \
4249 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4250 && id == charset_jisx0208) \
4251 { \
4252 id = charset_jisx0208_1978; \
4253 charset = CHARSET_FROM_ID (id); \
4254 } \
4255 \
df7492f9 4256 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4257 { \
df7492f9
KH
4258 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4259 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 4260 else \
df7492f9
KH
4261 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4262 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4263 break; \
4264 } \
df7492f9 4265 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4266 { \
df7492f9 4267 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
4268 break; \
4269 } \
df7492f9 4270 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4271 { \
df7492f9 4272 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
4273 break; \
4274 } \
6e85d753
KH
4275 else \
4276 /* Since CHARSET is not yet invoked to any graphic planes, we \
4277 must invoke it, or, at first, designate it to some graphic \
4278 register. Then repeat the loop to actually produce the \
4279 character. */ \
df7492f9
KH
4280 dst = encode_invocation_designation (charset, coding, dst, \
4281 &produced_chars); \
4ed46869
KH
4282 } while (1)
4283
05e6f5dc 4284
df7492f9
KH
4285#define ENCODE_ISO_CHARACTER(charset, c) \
4286 do { \
4287 int code = ENCODE_CHAR ((charset),(c)); \
4288 \
4289 if (CHARSET_DIMENSION (charset) == 1) \
4290 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4291 else \
4292 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 4293 } while (0)
bdd9fb48 4294
05e6f5dc 4295
4ed46869 4296/* Produce designation and invocation codes at a place pointed by DST
df7492f9 4297 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
4298 Return new DST. */
4299
4300unsigned char *
971de7fb 4301encode_invocation_designation (struct charset *charset, struct coding_system *coding, unsigned char *dst, int *p_nchars)
4ed46869 4302{
df7492f9
KH
4303 int multibytep = coding->dst_multibyte;
4304 int produced_chars = *p_nchars;
4ed46869 4305 int reg; /* graphic register number */
df7492f9 4306 int id = CHARSET_ID (charset);
4ed46869
KH
4307
4308 /* At first, check designations. */
4309 for (reg = 0; reg < 4; reg++)
df7492f9 4310 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
4311 break;
4312
4313 if (reg >= 4)
4314 {
4315 /* CHARSET is not yet designated to any graphic registers. */
4316 /* At first check the requested designation. */
df7492f9
KH
4317 reg = CODING_ISO_REQUEST (coding, id);
4318 if (reg < 0)
1ba9e4ab
KH
4319 /* Since CHARSET requests no special designation, designate it
4320 to graphic register 0. */
4ed46869
KH
4321 reg = 0;
4322
4323 ENCODE_DESIGNATION (charset, reg, coding);
4324 }
4325
df7492f9
KH
4326 if (CODING_ISO_INVOCATION (coding, 0) != reg
4327 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
4328 {
4329 /* Since the graphic register REG is not invoked to any graphic
4330 planes, invoke it to graphic plane 0. */
4331 switch (reg)
4332 {
4333 case 0: /* graphic register 0 */
4334 ENCODE_SHIFT_IN;
4335 break;
4336
4337 case 1: /* graphic register 1 */
4338 ENCODE_SHIFT_OUT;
4339 break;
4340
4341 case 2: /* graphic register 2 */
df7492f9 4342 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4343 ENCODE_SINGLE_SHIFT_2;
4344 else
4345 ENCODE_LOCKING_SHIFT_2;
4346 break;
4347
4348 case 3: /* graphic register 3 */
df7492f9 4349 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4350 ENCODE_SINGLE_SHIFT_3;
4351 else
4352 ENCODE_LOCKING_SHIFT_3;
4353 break;
4354 }
4355 }
b73bfc1c 4356
df7492f9 4357 *p_nchars = produced_chars;
4ed46869
KH
4358 return dst;
4359}
4360
df7492f9
KH
4361/* The following three macros produce codes for indicating direction
4362 of text. */
4363#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 4364 do { \
df7492f9
KH
4365 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4366 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 4367 else \
df7492f9 4368 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
4369 } while (0)
4370
ec6d2bb8 4371
df7492f9
KH
4372#define ENCODE_DIRECTION_R2L() \
4373 do { \
4374 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4375 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
4376 } while (0)
4377
ec6d2bb8 4378
df7492f9 4379#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 4380 do { \
df7492f9
KH
4381 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4382 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 4383 } while (0)
4ed46869 4384
4ed46869
KH
4385
4386/* Produce codes for designation and invocation to reset the graphic
4387 planes and registers to initial state. */
df7492f9
KH
4388#define ENCODE_RESET_PLANE_AND_REGISTER() \
4389 do { \
4390 int reg; \
4391 struct charset *charset; \
4392 \
4393 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4394 ENCODE_SHIFT_IN; \
4395 for (reg = 0; reg < 4; reg++) \
4396 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4397 && (CODING_ISO_DESIGNATION (coding, reg) \
4398 != CODING_ISO_INITIAL (coding, reg))) \
4399 { \
4400 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4401 ENCODE_DESIGNATION (charset, reg, coding); \
4402 } \
4ed46869
KH
4403 } while (0)
4404
df7492f9 4405
bdd9fb48 4406/* Produce designation sequences of charsets in the line started from
b73bfc1c 4407 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
4408
4409 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4410 find all the necessary designations. */
4411
b73bfc1c 4412static unsigned char *
971de7fb 4413encode_designation_at_bol (struct coding_system *coding, int *charbuf, int *charbuf_end, unsigned char *dst)
e0e989f6 4414{
df7492f9 4415 struct charset *charset;
bdd9fb48
KH
4416 /* Table of charsets to be designated to each graphic register. */
4417 int r[4];
df7492f9
KH
4418 int c, found = 0, reg;
4419 int produced_chars = 0;
4420 int multibytep = coding->dst_multibyte;
4421 Lisp_Object attrs;
4422 Lisp_Object charset_list;
4423
4424 attrs = CODING_ID_ATTRS (coding->id);
4425 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4426 if (EQ (charset_list, Qiso_2022))
4427 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4428
4429 for (reg = 0; reg < 4; reg++)
4430 r[reg] = -1;
4431
b73bfc1c 4432 while (found < 4)
e0e989f6 4433 {
df7492f9
KH
4434 int id;
4435
4436 c = *charbuf++;
b73bfc1c
KH
4437 if (c == '\n')
4438 break;
df7492f9
KH
4439 charset = char_charset (c, charset_list, NULL);
4440 id = CHARSET_ID (charset);
4441 reg = CODING_ISO_REQUEST (coding, id);
4442 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4443 {
4444 found++;
df7492f9 4445 r[reg] = id;
bdd9fb48 4446 }
bdd9fb48
KH
4447 }
4448
4449 if (found)
4450 {
4451 for (reg = 0; reg < 4; reg++)
4452 if (r[reg] >= 0
df7492f9
KH
4453 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4454 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4455 }
b73bfc1c
KH
4456
4457 return dst;
e0e989f6
KH
4458}
4459
4ed46869
KH
4460/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4461
df7492f9 4462static int
971de7fb 4463encode_coding_iso_2022 (struct coding_system *coding)
4ed46869 4464{
df7492f9
KH
4465 int multibytep = coding->dst_multibyte;
4466 int *charbuf = coding->charbuf;
4467 int *charbuf_end = charbuf + coding->charbuf_used;
4468 unsigned char *dst = coding->destination + coding->produced;
4469 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4470 int safe_room = 16;
4471 int bol_designation
4472 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4473 && CODING_ISO_BOL (coding));
4474 int produced_chars = 0;
4475 Lisp_Object attrs, eol_type, charset_list;
4476 int ascii_compatible;
b73bfc1c 4477 int c;
ff0dacd7 4478 int preferred_charset_id = -1;
05e6f5dc 4479
24a73b0a 4480 CODING_GET_INFO (coding, attrs, charset_list);
0a9564cb 4481 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
24a73b0a
KH
4482 if (VECTORP (eol_type))
4483 eol_type = Qunix;
4484
004068e4 4485 setup_iso_safe_charsets (attrs);
ff0dacd7 4486 /* Charset list may have been changed. */
287c57d7 4487 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4488 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4489
df7492f9 4490 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 4491
df7492f9 4492 while (charbuf < charbuf_end)
4ed46869 4493 {
df7492f9 4494 ASSURE_DESTINATION (safe_room);
b73bfc1c 4495
df7492f9 4496 if (bol_designation)
b73bfc1c 4497 {
df7492f9 4498 unsigned char *dst_prev = dst;
4ed46869 4499
bdd9fb48 4500 /* We have to produce designation sequences if any now. */
df7492f9
KH
4501 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4502 bol_designation = 0;
4503 /* We are sure that designation sequences are all ASCII bytes. */
4504 produced_chars += dst - dst_prev;
e0e989f6
KH
4505 }
4506
df7492f9 4507 c = *charbuf++;
ec6d2bb8 4508
ff0dacd7
KH
4509 if (c < 0)
4510 {
4511 /* Handle an annotation. */
4512 switch (*charbuf)
ec6d2bb8 4513 {
ff0dacd7
KH
4514 case CODING_ANNOTATE_COMPOSITION_MASK:
4515 /* Not yet implemented. */
4516 break;
4517 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4518 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4519 if (preferred_charset_id >= 0
4520 && NILP (Fmemq (make_number (preferred_charset_id),
4521 charset_list)))
4522 preferred_charset_id = -1;
4523 break;
4524 default:
4525 abort ();
4ed46869 4526 }
ff0dacd7
KH
4527 charbuf += -c - 1;
4528 continue;
4ed46869 4529 }
ec6d2bb8 4530
b73bfc1c
KH
4531 /* Now encode the character C. */
4532 if (c < 0x20 || c == 0x7F)
4533 {
df7492f9
KH
4534 if (c == '\n'
4535 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4536 {
df7492f9
KH
4537 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4538 ENCODE_RESET_PLANE_AND_REGISTER ();
4539 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4540 {
df7492f9
KH
4541 int i;
4542
4543 for (i = 0; i < 4; i++)
4544 CODING_ISO_DESIGNATION (coding, i)
4545 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4546 }
df7492f9
KH
4547 bol_designation
4548 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4549 }
df7492f9
KH
4550 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4551 ENCODE_RESET_PLANE_AND_REGISTER ();
4552 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4553 }
df7492f9 4554 else if (ASCII_CHAR_P (c))
88993dfd 4555 {
df7492f9
KH
4556 if (ascii_compatible)
4557 EMIT_ONE_ASCII_BYTE (c);
93dec019 4558 else
19a8d9e0 4559 {
bf16eb23
KH
4560 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4561 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4562 }
4ed46869 4563 }
16eafb5d 4564 else if (CHAR_BYTE8_P (c))
88993dfd 4565 {
16eafb5d
KH
4566 c = CHAR_TO_BYTE8 (c);
4567 EMIT_ONE_BYTE (c);
88993dfd 4568 }
b73bfc1c 4569 else
df7492f9 4570 {
ff0dacd7 4571 struct charset *charset;
b73bfc1c 4572
ff0dacd7
KH
4573 if (preferred_charset_id >= 0)
4574 {
4575 charset = CHARSET_FROM_ID (preferred_charset_id);
4576 if (! CHAR_CHARSET_P (c, charset))
4577 charset = char_charset (c, charset_list, NULL);
4578 }
4579 else
4580 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4581 if (!charset)
4582 {
41cbe562
KH
4583 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4584 {
4585 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4586 charset = CHARSET_FROM_ID (charset_ascii);
4587 }
4588 else
4589 {
4590 c = coding->default_char;
4591 charset = char_charset (c, charset_list, NULL);
4592 }
df7492f9
KH
4593 }
4594 ENCODE_ISO_CHARACTER (charset, c);
4595 }
84fbb8a0 4596 }
b73bfc1c 4597
df7492f9
KH
4598 if (coding->mode & CODING_MODE_LAST_BLOCK
4599 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4600 {
4601 ASSURE_DESTINATION (safe_room);
4602 ENCODE_RESET_PLANE_AND_REGISTER ();
4603 }
065e3595 4604 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4605 CODING_ISO_BOL (coding) = bol_designation;
4606 coding->produced_char += produced_chars;
4607 coding->produced = dst - coding->destination;
4608 return 0;
4ed46869
KH
4609}
4610
4611\f
df7492f9 4612/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4613
df7492f9 4614/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4615 quite widely. So, for the moment, Emacs supports them in the bare
4616 C code. But, in the future, they may be supported only by CCL. */
4617
4618/* SJIS is a coding system encoding three character sets: ASCII, right
4619 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4620 as is. A character of charset katakana-jisx0201 is encoded by
4621 "position-code + 0x80". A character of charset japanese-jisx0208
4622 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4623 so that it fit in the range below.
4ed46869
KH
4624
4625 --- CODE RANGE of SJIS ---
4626 (character set) (range)
4627 ASCII 0x00 .. 0x7F
df7492f9 4628 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4629 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4630 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4631 -------------------------------
4632
4633*/
4634
4635/* BIG5 is a coding system encoding two character sets: ASCII and
4636 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4637 character set and is encoded in two-byte.
4ed46869
KH
4638
4639 --- CODE RANGE of BIG5 ---
4640 (character set) (range)
4641 ASCII 0x00 .. 0x7F
4642 Big5 (1st byte) 0xA1 .. 0xFE
4643 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4644 --------------------------
4645
df7492f9 4646 */
4ed46869
KH
4647
4648/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4649 Check if a text is encoded in SJIS. If it is, return
df7492f9 4650 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4651
0a28aafb 4652static int
971de7fb 4653detect_coding_sjis (struct coding_system *coding, struct coding_detection_info *detect_info)
4ed46869 4654{
065e3595 4655 const unsigned char *src = coding->source, *src_base;
8f924df7 4656 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4657 int multibytep = coding->src_multibyte;
4658 int consumed_chars = 0;
4659 int found = 0;
b73bfc1c 4660 int c;
f07190ca
KH
4661 Lisp_Object attrs, charset_list;
4662 int max_first_byte_of_2_byte_code;
4663
4664 CODING_GET_INFO (coding, attrs, charset_list);
4665 max_first_byte_of_2_byte_code
4666 = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
df7492f9 4667
ff0dacd7 4668 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4669 /* A coding system of this category is always ASCII compatible. */
4670 src += coding->head_ascii;
4ed46869 4671
b73bfc1c 4672 while (1)
4ed46869 4673 {
065e3595 4674 src_base = src;
df7492f9 4675 ONE_MORE_BYTE (c);
682169fe
KH
4676 if (c < 0x80)
4677 continue;
f07190ca
KH
4678 if ((c >= 0x81 && c <= 0x9F)
4679 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4ed46869 4680 {
df7492f9 4681 ONE_MORE_BYTE (c);
682169fe 4682 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4683 break;
ff0dacd7 4684 found = CATEGORY_MASK_SJIS;
4ed46869 4685 }
df7492f9 4686 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4687 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4688 else
4689 break;
4ed46869 4690 }
ff0dacd7 4691 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4692 return 0;
4693
4694 no_more_source:
065e3595 4695 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4696 {
ff0dacd7 4697 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4698 return 0;
4ed46869 4699 }
ff0dacd7
KH
4700 detect_info->found |= found;
4701 return 1;
4ed46869
KH
4702}
4703
4704/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4705 Check if a text is encoded in BIG5. If it is, return
df7492f9 4706 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4707
0a28aafb 4708static int
971de7fb 4709detect_coding_big5 (struct coding_system *coding, struct coding_detection_info *detect_info)
4ed46869 4710{
065e3595 4711 const unsigned char *src = coding->source, *src_base;
8f924df7 4712 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4713 int multibytep = coding->src_multibyte;
4714 int consumed_chars = 0;
4715 int found = 0;
b73bfc1c 4716 int c;
fa42c37f 4717
ff0dacd7 4718 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4719 /* A coding system of this category is always ASCII compatible. */
4720 src += coding->head_ascii;
fa42c37f 4721
b73bfc1c 4722 while (1)
fa42c37f 4723 {
065e3595 4724 src_base = src;
df7492f9
KH
4725 ONE_MORE_BYTE (c);
4726 if (c < 0x80)
fa42c37f 4727 continue;
df7492f9 4728 if (c >= 0xA1)
fa42c37f 4729 {
df7492f9
KH
4730 ONE_MORE_BYTE (c);
4731 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4732 return 0;
ff0dacd7 4733 found = CATEGORY_MASK_BIG5;
fa42c37f 4734 }
df7492f9
KH
4735 else
4736 break;
fa42c37f 4737 }
ff0dacd7 4738 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4739 return 0;
fa42c37f 4740
df7492f9 4741 no_more_source:
065e3595 4742 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4743 {
ff0dacd7 4744 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4745 return 0;
4746 }
ff0dacd7
KH
4747 detect_info->found |= found;
4748 return 1;
fa42c37f
KH
4749}
4750
4ed46869
KH
4751/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4752 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4753
b73bfc1c 4754static void
971de7fb 4755decode_coding_sjis (struct coding_system *coding)
4ed46869 4756{
8f924df7
KH
4757 const unsigned char *src = coding->source + coding->consumed;
4758 const unsigned char *src_end = coding->source + coding->src_bytes;
4759 const unsigned char *src_base;
69a80ea3 4760 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
4761 /* We may produce one charset annocation in one loop and one more at
4762 the end. */
69a80ea3 4763 int *charbuf_end
df80c7f0 4764 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4765 int consumed_chars = 0, consumed_chars_base;
4766 int multibytep = coding->src_multibyte;
4767 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4768 struct charset *charset_kanji2;
24a73b0a 4769 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4770 int char_offset = coding->produced_char;
4771 int last_offset = char_offset;
4772 int last_id = charset_ascii;
0a9564cb
EZ
4773 int eol_crlf =
4774 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4775 int byte_after_cr = -1;
a5d301df 4776
24a73b0a 4777 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4778
4779 val = charset_list;
4780 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4781 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4782 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4783 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4784
b73bfc1c 4785 while (1)
4ed46869 4786 {
df7492f9 4787 int c, c1;
24a73b0a 4788 struct charset *charset;
fa42c37f 4789
b73bfc1c 4790 src_base = src;
df7492f9 4791 consumed_chars_base = consumed_chars;
fa42c37f 4792
df7492f9 4793 if (charbuf >= charbuf_end)
b71f6f73
KH
4794 {
4795 if (byte_after_cr >= 0)
4796 src_base--;
4797 break;
4798 }
df7492f9 4799
119852e7
KH
4800 if (byte_after_cr >= 0)
4801 c = byte_after_cr, byte_after_cr = -1;
4802 else
4803 ONE_MORE_BYTE (c);
065e3595
KH
4804 if (c < 0)
4805 goto invalid_code;
24a73b0a 4806 if (c < 0x80)
119852e7
KH
4807 {
4808 if (eol_crlf && c == '\r')
4809 ONE_MORE_BYTE (byte_after_cr);
4810 charset = charset_roman;
4811 }
57a47f8a 4812 else if (c == 0x80 || c == 0xA0)
8e921c4b 4813 goto invalid_code;
57a47f8a
KH
4814 else if (c >= 0xA1 && c <= 0xDF)
4815 {
4816 /* SJIS -> JISX0201-Kana */
4817 c &= 0x7F;
4818 charset = charset_kana;
4819 }
4820 else if (c <= 0xEF)
df7492f9 4821 {
57a47f8a
KH
4822 /* SJIS -> JISX0208 */
4823 ONE_MORE_BYTE (c1);
4824 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4825 goto invalid_code;
57a47f8a
KH
4826 c = (c << 8) | c1;
4827 SJIS_TO_JIS (c);
4828 charset = charset_kanji;
4829 }
4830 else if (c <= 0xFC && charset_kanji2)
4831 {
c6876370 4832 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4833 ONE_MORE_BYTE (c1);
4834 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4835 goto invalid_code;
57a47f8a
KH
4836 c = (c << 8) | c1;
4837 SJIS_TO_JIS2 (c);
4838 charset = charset_kanji2;
df7492f9 4839 }
57a47f8a
KH
4840 else
4841 goto invalid_code;
24a73b0a
KH
4842 if (charset->id != charset_ascii
4843 && last_id != charset->id)
4844 {
4845 if (last_id != charset_ascii)
69a80ea3 4846 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4847 last_id = charset->id;
4848 last_offset = char_offset;
4849 }
4850 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4851 *charbuf++ = c;
ff0dacd7 4852 char_offset++;
df7492f9 4853 continue;
b73bfc1c 4854
df7492f9
KH
4855 invalid_code:
4856 src = src_base;
4857 consumed_chars = consumed_chars_base;
4858 ONE_MORE_BYTE (c);
065e3595 4859 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4860 char_offset++;
df7492f9
KH
4861 coding->errors++;
4862 }
fa42c37f 4863
df7492f9 4864 no_more_source:
ff0dacd7 4865 if (last_id != charset_ascii)
69a80ea3 4866 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4867 coding->consumed_char += consumed_chars_base;
4868 coding->consumed = src_base - coding->source;
4869 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4870}
4871
b73bfc1c 4872static void
971de7fb 4873decode_coding_big5 (struct coding_system *coding)
4ed46869 4874{
8f924df7
KH
4875 const unsigned char *src = coding->source + coding->consumed;
4876 const unsigned char *src_end = coding->source + coding->src_bytes;
4877 const unsigned char *src_base;
69a80ea3 4878 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
4879 /* We may produce one charset annocation in one loop and one more at
4880 the end. */
69a80ea3 4881 int *charbuf_end
df80c7f0 4882 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4883 int consumed_chars = 0, consumed_chars_base;
4884 int multibytep = coding->src_multibyte;
4885 struct charset *charset_roman, *charset_big5;
24a73b0a 4886 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4887 int char_offset = coding->produced_char;
4888 int last_offset = char_offset;
4889 int last_id = charset_ascii;
0a9564cb
EZ
4890 int eol_crlf =
4891 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4892 int byte_after_cr = -1;
df7492f9 4893
24a73b0a 4894 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4895 val = charset_list;
4896 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4897 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4898
b73bfc1c 4899 while (1)
4ed46869 4900 {
df7492f9 4901 int c, c1;
24a73b0a 4902 struct charset *charset;
b73bfc1c
KH
4903
4904 src_base = src;
df7492f9
KH
4905 consumed_chars_base = consumed_chars;
4906
4907 if (charbuf >= charbuf_end)
b71f6f73
KH
4908 {
4909 if (byte_after_cr >= 0)
4910 src_base--;
4911 break;
4912 }
df7492f9 4913
119852e7 4914 if (byte_after_cr >= 0)
14daee73 4915 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4916 else
4917 ONE_MORE_BYTE (c);
b73bfc1c 4918
065e3595
KH
4919 if (c < 0)
4920 goto invalid_code;
24a73b0a 4921 if (c < 0x80)
119852e7 4922 {
14daee73 4923 if (eol_crlf && c == '\r')
119852e7
KH
4924 ONE_MORE_BYTE (byte_after_cr);
4925 charset = charset_roman;
4926 }
24a73b0a 4927 else
4ed46869 4928 {
24a73b0a
KH
4929 /* BIG5 -> Big5 */
4930 if (c < 0xA1 || c > 0xFE)
4931 goto invalid_code;
4932 ONE_MORE_BYTE (c1);
4933 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4934 goto invalid_code;
4935 c = c << 8 | c1;
4936 charset = charset_big5;
4ed46869 4937 }
24a73b0a
KH
4938 if (charset->id != charset_ascii
4939 && last_id != charset->id)
df7492f9 4940 {
24a73b0a 4941 if (last_id != charset_ascii)
69a80ea3 4942 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4943 last_id = charset->id;
4944 last_offset = char_offset;
4ed46869 4945 }
24a73b0a 4946 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4947 *charbuf++ = c;
ff0dacd7 4948 char_offset++;
fb88bf2d
KH
4949 continue;
4950
df7492f9 4951 invalid_code:
4ed46869 4952 src = src_base;
df7492f9
KH
4953 consumed_chars = consumed_chars_base;
4954 ONE_MORE_BYTE (c);
065e3595 4955 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4956 char_offset++;
df7492f9 4957 coding->errors++;
fb88bf2d 4958 }
d46c5b12 4959
df7492f9 4960 no_more_source:
ff0dacd7 4961 if (last_id != charset_ascii)
69a80ea3 4962 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4963 coding->consumed_char += consumed_chars_base;
4964 coding->consumed = src_base - coding->source;
4965 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4966}
4967
4968/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4969 This function can encode charsets `ascii', `katakana-jisx0201',
4970 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4971 are sure that all these charsets are registered as official charset
4ed46869
KH
4972 (i.e. do not have extended leading-codes). Characters of other
4973 charsets are produced without any encoding. If SJIS_P is 1, encode
4974 SJIS text, else encode BIG5 text. */
4975
df7492f9 4976static int
971de7fb 4977encode_coding_sjis (struct coding_system *coding)
4ed46869 4978{
df7492f9
KH
4979 int multibytep = coding->dst_multibyte;
4980 int *charbuf = coding->charbuf;
4981 int *charbuf_end = charbuf + coding->charbuf_used;
4982 unsigned char *dst = coding->destination + coding->produced;
4983 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4984 int safe_room = 4;
4985 int produced_chars = 0;
24a73b0a 4986 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4987 int ascii_compatible;
4988 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4989 struct charset *charset_kanji2;
df7492f9 4990 int c;
a5d301df 4991
24a73b0a 4992 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4993 val = charset_list;
4994 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4995 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4996 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4997 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4998
df7492f9 4999 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 5000
df7492f9
KH
5001 while (charbuf < charbuf_end)
5002 {
5003 ASSURE_DESTINATION (safe_room);
5004 c = *charbuf++;
b73bfc1c 5005 /* Now encode the character C. */
df7492f9
KH
5006 if (ASCII_CHAR_P (c) && ascii_compatible)
5007 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5008 else if (CHAR_BYTE8_P (c))
5009 {
5010 c = CHAR_TO_BYTE8 (c);
5011 EMIT_ONE_BYTE (c);
5012 }
df7492f9 5013 else
b73bfc1c 5014 {
df7492f9
KH
5015 unsigned code;
5016 struct charset *charset = char_charset (c, charset_list, &code);
5017
5018 if (!charset)
4ed46869 5019 {
41cbe562 5020 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5021 {
41cbe562
KH
5022 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5023 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5024 }
41cbe562 5025 else
b73bfc1c 5026 {
41cbe562
KH
5027 c = coding->default_char;
5028 charset = char_charset (c, charset_list, &code);
b73bfc1c 5029 }
b73bfc1c 5030 }
df7492f9
KH
5031 if (code == CHARSET_INVALID_CODE (charset))
5032 abort ();
5033 if (charset == charset_kanji)
5034 {
5035 int c1, c2;
5036 JIS_TO_SJIS (code);
5037 c1 = code >> 8, c2 = code & 0xFF;
5038 EMIT_TWO_BYTES (c1, c2);
5039 }
5040 else if (charset == charset_kana)
5041 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
5042 else if (charset_kanji2 && charset == charset_kanji2)
5043 {
5044 int c1, c2;
5045
5046 c1 = code >> 8;
f07190ca
KH
5047 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5048 || c1 == 0x28
57a47f8a
KH
5049 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5050 {
5051 JIS_TO_SJIS2 (code);
5052 c1 = code >> 8, c2 = code & 0xFF;
5053 EMIT_TWO_BYTES (c1, c2);
5054 }
5055 else
5056 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5057 }
df7492f9
KH
5058 else
5059 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5060 }
5061 }
065e3595 5062 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5063 coding->produced_char += produced_chars;
5064 coding->produced = dst - coding->destination;
5065 return 0;
5066}
5067
5068static int
971de7fb 5069encode_coding_big5 (struct coding_system *coding)
df7492f9
KH
5070{
5071 int multibytep = coding->dst_multibyte;
5072 int *charbuf = coding->charbuf;
5073 int *charbuf_end = charbuf + coding->charbuf_used;
5074 unsigned char *dst = coding->destination + coding->produced;
5075 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5076 int safe_room = 4;
5077 int produced_chars = 0;
24a73b0a 5078 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5079 int ascii_compatible;
5080 struct charset *charset_roman, *charset_big5;
5081 int c;
5082
24a73b0a 5083 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5084 val = charset_list;
5085 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5086 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5087 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5088
5089 while (charbuf < charbuf_end)
5090 {
5091 ASSURE_DESTINATION (safe_room);
5092 c = *charbuf++;
5093 /* Now encode the character C. */
5094 if (ASCII_CHAR_P (c) && ascii_compatible)
5095 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5096 else if (CHAR_BYTE8_P (c))
5097 {
5098 c = CHAR_TO_BYTE8 (c);
5099 EMIT_ONE_BYTE (c);
b73bfc1c
KH
5100 }
5101 else
5102 {
df7492f9
KH
5103 unsigned code;
5104 struct charset *charset = char_charset (c, charset_list, &code);
5105
5106 if (! charset)
b73bfc1c 5107 {
41cbe562 5108 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5109 {
41cbe562
KH
5110 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5111 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5112 }
41cbe562 5113 else
0eecad43 5114 {
41cbe562
KH
5115 c = coding->default_char;
5116 charset = char_charset (c, charset_list, &code);
0eecad43 5117 }
4ed46869 5118 }
df7492f9
KH
5119 if (code == CHARSET_INVALID_CODE (charset))
5120 abort ();
5121 if (charset == charset_big5)
b73bfc1c 5122 {
df7492f9
KH
5123 int c1, c2;
5124
5125 c1 = code >> 8, c2 = code & 0xFF;
5126 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 5127 }
df7492f9
KH
5128 else
5129 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 5130 }
4ed46869 5131 }
065e3595 5132 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5133 coding->produced_char += produced_chars;
5134 coding->produced = dst - coding->destination;
5135 return 0;
4ed46869
KH
5136}
5137
5138\f
df7492f9 5139/*** 10. CCL handlers ***/
1397dc18
KH
5140
5141/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5142 Check if a text is encoded in a coding system of which
5143 encoder/decoder are written in CCL program. If it is, return
df7492f9 5144 CATEGORY_MASK_CCL, else return 0. */
1397dc18 5145
0a28aafb 5146static int
971de7fb 5147detect_coding_ccl (struct coding_system *coding, struct coding_detection_info *detect_info)
1397dc18 5148{
065e3595 5149 const unsigned char *src = coding->source, *src_base;
8f924df7 5150 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5151 int multibytep = coding->src_multibyte;
5152 int consumed_chars = 0;
5153 int found = 0;
0e219d54 5154 unsigned char *valids;
df7492f9
KH
5155 int head_ascii = coding->head_ascii;
5156 Lisp_Object attrs;
5157
ff0dacd7
KH
5158 detect_info->checked |= CATEGORY_MASK_CCL;
5159
df7492f9 5160 coding = &coding_categories[coding_category_ccl];
0e219d54 5161 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
5162 attrs = CODING_ID_ATTRS (coding->id);
5163 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5164 src += head_ascii;
1397dc18 5165
b73bfc1c 5166 while (1)
1397dc18 5167 {
df7492f9 5168 int c;
065e3595
KH
5169
5170 src_base = src;
df7492f9 5171 ONE_MORE_BYTE (c);
065e3595 5172 if (c < 0 || ! valids[c])
df7492f9 5173 break;
ff0dacd7
KH
5174 if ((valids[c] > 1))
5175 found = CATEGORY_MASK_CCL;
df7492f9 5176 }
ff0dacd7 5177 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
5178 return 0;
5179
5180 no_more_source:
ff0dacd7
KH
5181 detect_info->found |= found;
5182 return 1;
df7492f9
KH
5183}
5184
5185static void
971de7fb 5186decode_coding_ccl (struct coding_system *coding)
df7492f9 5187{
7c78e542 5188 const unsigned char *src = coding->source + coding->consumed;
8f924df7 5189 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
5190 int *charbuf = coding->charbuf + coding->charbuf_used;
5191 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
5192 int consumed_chars = 0;
5193 int multibytep = coding->src_multibyte;
d0396581 5194 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9 5195 int source_charbuf[1024];
fbdc1721 5196 int source_byteidx[1025];
24a73b0a 5197 Lisp_Object attrs, charset_list;
df7492f9 5198
24a73b0a 5199 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5200
d0396581 5201 while (1)
df7492f9 5202 {
7c78e542 5203 const unsigned char *p = src;
df7492f9
KH
5204 int i = 0;
5205
5206 if (multibytep)
fbdc1721
KH
5207 {
5208 while (i < 1024 && p < src_end)
5209 {
5210 source_byteidx[i] = p - src;
5211 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5212 }
5213 source_byteidx[i] = p - src;
5214 }
df7492f9
KH
5215 else
5216 while (i < 1024 && p < src_end)
5217 source_charbuf[i++] = *p++;
8f924df7 5218
df7492f9 5219 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
d0396581
KH
5220 ccl->last_block = 1;
5221 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5222 charset_list);
5223 charbuf += ccl->produced;
fbdc1721 5224 if (multibytep)
d0396581 5225 src += source_byteidx[ccl->consumed];
df7492f9 5226 else
d0396581
KH
5227 src += ccl->consumed;
5228 consumed_chars += ccl->consumed;
5229 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
df7492f9
KH
5230 break;
5231 }
5232
d0396581 5233 switch (ccl->status)
df7492f9
KH
5234 {
5235 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5236 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5237 break;
5238 case CCL_STAT_SUSPEND_BY_DST:
d0396581 5239 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5240 break;
5241 case CCL_STAT_QUIT:
5242 case CCL_STAT_INVALID_CMD:
065e3595 5243 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5244 break;
5245 default:
065e3595 5246 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5247 break;
5248 }
5249 coding->consumed_char += consumed_chars;
5250 coding->consumed = src - coding->source;
5251 coding->charbuf_used = charbuf - coding->charbuf;
5252}
5253
5254static int
971de7fb 5255encode_coding_ccl (struct coding_system *coding)
df7492f9 5256{
fb608df3 5257 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9
KH
5258 int multibytep = coding->dst_multibyte;
5259 int *charbuf = coding->charbuf;
5260 int *charbuf_end = charbuf + coding->charbuf_used;
5261 unsigned char *dst = coding->destination + coding->produced;
5262 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
5263 int destination_charbuf[1024];
5264 int i, produced_chars = 0;
24a73b0a 5265 Lisp_Object attrs, charset_list;
df7492f9 5266
24a73b0a 5267 CODING_GET_INFO (coding, attrs, charset_list);
fb608df3
KH
5268 if (coding->consumed_char == coding->src_chars
5269 && coding->mode & CODING_MODE_LAST_BLOCK)
5270 ccl->last_block = 1;
df7492f9 5271
8cffd3e7 5272 while (charbuf < charbuf_end)
df7492f9 5273 {
fb608df3 5274 ccl_driver (ccl, charbuf, destination_charbuf,
8cffd3e7 5275 charbuf_end - charbuf, 1024, charset_list);
df7492f9 5276 if (multibytep)
8cffd3e7 5277 {
fb608df3
KH
5278 ASSURE_DESTINATION (ccl->produced * 2);
5279 for (i = 0; i < ccl->produced; i++)
8cffd3e7
KH
5280 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5281 }
df7492f9
KH
5282 else
5283 {
fb608df3
KH
5284 ASSURE_DESTINATION (ccl->produced);
5285 for (i = 0; i < ccl->produced; i++)
df7492f9 5286 *dst++ = destination_charbuf[i] & 0xFF;
fb608df3 5287 produced_chars += ccl->produced;
df7492f9 5288 }
fb608df3
KH
5289 charbuf += ccl->consumed;
5290 if (ccl->status == CCL_STAT_QUIT
5291 || ccl->status == CCL_STAT_INVALID_CMD)
8cffd3e7 5292 break;
df7492f9
KH
5293 }
5294
fb608df3 5295 switch (ccl->status)
df7492f9
KH
5296 {
5297 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5298 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5299 break;
5300 case CCL_STAT_SUSPEND_BY_DST:
065e3595 5301 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5302 break;
5303 case CCL_STAT_QUIT:
5304 case CCL_STAT_INVALID_CMD:
065e3595 5305 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5306 break;
5307 default:
065e3595 5308 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5309 break;
1397dc18 5310 }
df7492f9
KH
5311
5312 coding->produced_char += produced_chars;
5313 coding->produced = dst - coding->destination;
5314 return 0;
1397dc18
KH
5315}
5316
df7492f9 5317
1397dc18 5318\f
df7492f9 5319/*** 10, 11. no-conversion handlers ***/
4ed46869 5320
b73bfc1c 5321/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 5322
b73bfc1c 5323static void
971de7fb 5324decode_coding_raw_text (struct coding_system *coding)
4ed46869 5325{
0a9564cb
EZ
5326 int eol_crlf =
5327 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5328
df7492f9 5329 coding->chars_at_source = 1;
119852e7
KH
5330 coding->consumed_char = coding->src_chars;
5331 coding->consumed = coding->src_bytes;
5332 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5333 {
5334 coding->consumed_char--;
5335 coding->consumed--;
5336 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5337 }
5338 else
5339 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5340}
4ed46869 5341
df7492f9 5342static int
971de7fb 5343encode_coding_raw_text (struct coding_system *coding)
df7492f9
KH
5344{
5345 int multibytep = coding->dst_multibyte;
5346 int *charbuf = coding->charbuf;
5347 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5348 unsigned char *dst = coding->destination + coding->produced;
5349 unsigned char *dst_end = coding->destination + coding->dst_bytes;
a0ed9b27 5350 int produced_chars = 0;
b73bfc1c
KH
5351 int c;
5352
df7492f9 5353 if (multibytep)
b73bfc1c 5354 {
df7492f9 5355 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5356
df7492f9
KH
5357 if (coding->src_multibyte)
5358 while (charbuf < charbuf_end)
5359 {
5360 ASSURE_DESTINATION (safe_room);
5361 c = *charbuf++;
5362 if (ASCII_CHAR_P (c))
5363 EMIT_ONE_ASCII_BYTE (c);
5364 else if (CHAR_BYTE8_P (c))
5365 {
5366 c = CHAR_TO_BYTE8 (c);
5367 EMIT_ONE_BYTE (c);
5368 }
5369 else
5370 {
5371 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5372
df7492f9
KH
5373 CHAR_STRING_ADVANCE (c, p1);
5374 while (p0 < p1)
9d123124
KH
5375 {
5376 EMIT_ONE_BYTE (*p0);
5377 p0++;
5378 }
df7492f9
KH
5379 }
5380 }
b73bfc1c 5381 else
df7492f9
KH
5382 while (charbuf < charbuf_end)
5383 {
5384 ASSURE_DESTINATION (safe_room);
5385 c = *charbuf++;
5386 EMIT_ONE_BYTE (c);
5387 }
5388 }
5389 else
4ed46869 5390 {
df7492f9 5391 if (coding->src_multibyte)
d46c5b12 5392 {
df7492f9
KH
5393 int safe_room = MAX_MULTIBYTE_LENGTH;
5394
5395 while (charbuf < charbuf_end)
d46c5b12 5396 {
df7492f9
KH
5397 ASSURE_DESTINATION (safe_room);
5398 c = *charbuf++;
5399 if (ASCII_CHAR_P (c))
5400 *dst++ = c;
5401 else if (CHAR_BYTE8_P (c))
5402 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5403 else
df7492f9 5404 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5405 }
5406 }
df7492f9
KH
5407 else
5408 {
5409 ASSURE_DESTINATION (charbuf_end - charbuf);
5410 while (charbuf < charbuf_end && dst < dst_end)
5411 *dst++ = *charbuf++;
8f924df7 5412 }
319a3947 5413 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5414 }
065e3595 5415 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5416 coding->produced_char += produced_chars;
df7492f9
KH
5417 coding->produced = dst - coding->destination;
5418 return 0;
4ed46869
KH
5419}
5420
ff0dacd7
KH
5421/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5422 Check if a text is encoded in a charset-based coding system. If it
5423 is, return 1, else return 0. */
5424
0a28aafb 5425static int
971de7fb 5426detect_coding_charset (struct coding_system *coding, struct coding_detection_info *detect_info)
1397dc18 5427{
065e3595 5428 const unsigned char *src = coding->source, *src_base;
8f924df7 5429 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5430 int multibytep = coding->src_multibyte;
5431 int consumed_chars = 0;
07295713 5432 Lisp_Object attrs, valids, name;
584948ac 5433 int found = 0;
716b3fa0 5434 int head_ascii = coding->head_ascii;
07295713 5435 int check_latin_extra = 0;
1397dc18 5436
ff0dacd7
KH
5437 detect_info->checked |= CATEGORY_MASK_CHARSET;
5438
df7492f9
KH
5439 coding = &coding_categories[coding_category_charset];
5440 attrs = CODING_ID_ATTRS (coding->id);
5441 valids = AREF (attrs, coding_attr_charset_valids);
07295713 5442 name = CODING_ID_NAME (coding->id);
237aabf4
JR
5443 if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5444 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5445 || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5446 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
07295713 5447 check_latin_extra = 1;
237aabf4 5448
df7492f9 5449 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5450 src += head_ascii;
1397dc18 5451
b73bfc1c 5452 while (1)
1397dc18 5453 {
df7492f9 5454 int c;
716b3fa0
KH
5455 Lisp_Object val;
5456 struct charset *charset;
5457 int dim, idx;
1397dc18 5458
065e3595 5459 src_base = src;
df7492f9 5460 ONE_MORE_BYTE (c);
065e3595
KH
5461 if (c < 0)
5462 continue;
716b3fa0
KH
5463 val = AREF (valids, c);
5464 if (NILP (val))
df7492f9 5465 break;
584948ac 5466 if (c >= 0x80)
07295713
KH
5467 {
5468 if (c < 0xA0
237aabf4
JR
5469 && check_latin_extra
5470 && (!VECTORP (Vlatin_extra_code_table)
9f0526cb 5471 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
07295713
KH
5472 break;
5473 found = CATEGORY_MASK_CHARSET;
5474 }
716b3fa0
KH
5475 if (INTEGERP (val))
5476 {
5477 charset = CHARSET_FROM_ID (XFASTINT (val));
5478 dim = CHARSET_DIMENSION (charset);
5479 for (idx = 1; idx < dim; idx++)
5480 {
5481 if (src == src_end)
5482 goto too_short;
5483 ONE_MORE_BYTE (c);
3ed051d4 5484 if (c < charset->code_space[(dim - 1 - idx) * 2]
716b3fa0
KH
5485 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5486 break;
5487 }
5488 if (idx < dim)
5489 break;
5490 }
5491 else
5492 {
5493 idx = 1;
5494 for (; CONSP (val); val = XCDR (val))
5495 {
5496 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5497 dim = CHARSET_DIMENSION (charset);
5498 while (idx < dim)
5499 {
5500 if (src == src_end)
5501 goto too_short;
5502 ONE_MORE_BYTE (c);
5503 if (c < charset->code_space[(dim - 1 - idx) * 4]
5504 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5505 break;
5506 idx++;
5507 }
5508 if (idx == dim)
5509 {
5510 val = Qnil;
5511 break;
5512 }
5513 }
5514 if (CONSP (val))
5515 break;
5516 }
df7492f9 5517 }
716b3fa0 5518 too_short:
ff0dacd7 5519 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5520 return 0;
4ed46869 5521
df7492f9 5522 no_more_source:
ff0dacd7
KH
5523 detect_info->found |= found;
5524 return 1;
df7492f9 5525}
b73bfc1c 5526
b73bfc1c 5527static void
971de7fb 5528decode_coding_charset (struct coding_system *coding)
4ed46869 5529{
8f924df7
KH
5530 const unsigned char *src = coding->source + coding->consumed;
5531 const unsigned char *src_end = coding->source + coding->src_bytes;
5532 const unsigned char *src_base;
69a80ea3 5533 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
5534 /* We may produce one charset annocation in one loop and one more at
5535 the end. */
69a80ea3 5536 int *charbuf_end
df80c7f0 5537 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
5538 int consumed_chars = 0, consumed_chars_base;
5539 int multibytep = coding->src_multibyte;
24a73b0a 5540 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5541 int char_offset = coding->produced_char;
5542 int last_offset = char_offset;
5543 int last_id = charset_ascii;
0a9564cb
EZ
5544 int eol_crlf =
5545 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5546 int byte_after_cr = -1;
df7492f9 5547
24a73b0a 5548 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5549 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5550
df7492f9 5551 while (1)
4ed46869 5552 {
4eb6d3f1 5553 int c;
24a73b0a
KH
5554 Lisp_Object val;
5555 struct charset *charset;
5556 int dim;
5557 int len = 1;
5558 unsigned code;
df7492f9
KH
5559
5560 src_base = src;
5561 consumed_chars_base = consumed_chars;
b73bfc1c 5562
df7492f9 5563 if (charbuf >= charbuf_end)
b71f6f73
KH
5564 {
5565 if (byte_after_cr >= 0)
5566 src_base--;
5567 break;
5568 }
df7492f9 5569
119852e7
KH
5570 if (byte_after_cr >= 0)
5571 {
5572 c = byte_after_cr;
5573 byte_after_cr = -1;
5574 }
5575 else
5576 {
5577 ONE_MORE_BYTE (c);
5578 if (eol_crlf && c == '\r')
5579 ONE_MORE_BYTE (byte_after_cr);
5580 }
065e3595
KH
5581 if (c < 0)
5582 goto invalid_code;
24a73b0a
KH
5583 code = c;
5584
5585 val = AREF (valids, c);
1b17adfd 5586 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5587 goto invalid_code;
5588 if (INTEGERP (val))
d46c5b12 5589 {
24a73b0a
KH
5590 charset = CHARSET_FROM_ID (XFASTINT (val));
5591 dim = CHARSET_DIMENSION (charset);
5592 while (len < dim)
b73bfc1c 5593 {
24a73b0a
KH
5594 ONE_MORE_BYTE (c);
5595 code = (code << 8) | c;
5596 len++;
b73bfc1c 5597 }
24a73b0a
KH
5598 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5599 charset, code, c);
d46c5b12 5600 }
df7492f9 5601 else
d46c5b12 5602 {
24a73b0a
KH
5603 /* VAL is a list of charset IDs. It is assured that the
5604 list is sorted by charset dimensions (smaller one
5605 comes first). */
5606 while (CONSP (val))
4eb6d3f1 5607 {
24a73b0a 5608 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5609 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5610 while (len < dim)
4eb6d3f1 5611 {
acb2a965
KH
5612 ONE_MORE_BYTE (c);
5613 code = (code << 8) | c;
f9d71dcd 5614 len++;
4eb6d3f1 5615 }
24a73b0a
KH
5616 CODING_DECODE_CHAR (coding, src, src_base,
5617 src_end, charset, code, c);
5618 if (c >= 0)
5619 break;
5620 val = XCDR (val);
ff0dacd7 5621 }
d46c5b12 5622 }
24a73b0a
KH
5623 if (c < 0)
5624 goto invalid_code;
5625 if (charset->id != charset_ascii
5626 && last_id != charset->id)
5627 {
5628 if (last_id != charset_ascii)
69a80ea3 5629 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5630 last_id = charset->id;
5631 last_offset = char_offset;
5632 }
5633
df7492f9 5634 *charbuf++ = c;
ff0dacd7 5635 char_offset++;
df7492f9
KH
5636 continue;
5637
5638 invalid_code:
5639 src = src_base;
5640 consumed_chars = consumed_chars_base;
5641 ONE_MORE_BYTE (c);
065e3595 5642 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5643 char_offset++;
df7492f9 5644 coding->errors++;
4ed46869
KH
5645 }
5646
df7492f9 5647 no_more_source:
ff0dacd7 5648 if (last_id != charset_ascii)
69a80ea3 5649 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5650 coding->consumed_char += consumed_chars_base;
5651 coding->consumed = src_base - coding->source;
5652 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5653}
5654
df7492f9 5655static int
971de7fb 5656encode_coding_charset (struct coding_system *coding)
4ed46869 5657{
df7492f9
KH
5658 int multibytep = coding->dst_multibyte;
5659 int *charbuf = coding->charbuf;
5660 int *charbuf_end = charbuf + coding->charbuf_used;
5661 unsigned char *dst = coding->destination + coding->produced;
5662 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5663 int safe_room = MAX_MULTIBYTE_LENGTH;
5664 int produced_chars = 0;
24a73b0a 5665 Lisp_Object attrs, charset_list;
df7492f9 5666 int ascii_compatible;
b73bfc1c 5667 int c;
b73bfc1c 5668
24a73b0a 5669 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5670 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5671
df7492f9 5672 while (charbuf < charbuf_end)
4ed46869 5673 {
4eb6d3f1 5674 struct charset *charset;
df7492f9 5675 unsigned code;
8f924df7 5676
df7492f9
KH
5677 ASSURE_DESTINATION (safe_room);
5678 c = *charbuf++;
5679 if (ascii_compatible && ASCII_CHAR_P (c))
5680 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5681 else if (CHAR_BYTE8_P (c))
4ed46869 5682 {
16eafb5d
KH
5683 c = CHAR_TO_BYTE8 (c);
5684 EMIT_ONE_BYTE (c);
d46c5b12 5685 }
d46c5b12 5686 else
b73bfc1c 5687 {
4eb6d3f1
KH
5688 charset = char_charset (c, charset_list, &code);
5689 if (charset)
5690 {
5691 if (CHARSET_DIMENSION (charset) == 1)
5692 EMIT_ONE_BYTE (code);
5693 else if (CHARSET_DIMENSION (charset) == 2)
5694 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5695 else if (CHARSET_DIMENSION (charset) == 3)
5696 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5697 else
5698 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5699 (code >> 8) & 0xFF, code & 0xFF);
5700 }
5701 else
41cbe562
KH
5702 {
5703 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5704 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5705 else
5706 c = coding->default_char;
5707 EMIT_ONE_BYTE (c);
5708 }
4ed46869 5709 }
4ed46869
KH
5710 }
5711
065e3595 5712 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5713 coding->produced_char += produced_chars;
5714 coding->produced = dst - coding->destination;
5715 return 0;
4ed46869
KH
5716}
5717
5718\f
1397dc18 5719/*** 7. C library functions ***/
4ed46869 5720
df7492f9
KH
5721/* Setup coding context CODING from information about CODING_SYSTEM.
5722 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5723 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5724
ec6d2bb8 5725void
971de7fb 5726setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
4ed46869 5727{
df7492f9
KH
5728 Lisp_Object attrs;
5729 Lisp_Object eol_type;
5730 Lisp_Object coding_type;
4608c386 5731 Lisp_Object val;
4ed46869 5732
df7492f9 5733 if (NILP (coding_system))
ae6f73fa 5734 coding_system = Qundecided;
c07c8e12 5735
df7492f9 5736 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5737
df7492f9 5738 attrs = CODING_ID_ATTRS (coding->id);
0a9564cb 5739 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5740
df7492f9
KH
5741 coding->mode = 0;
5742 coding->head_ascii = -1;
4a015c45
KH
5743 if (VECTORP (eol_type))
5744 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5745 | CODING_REQUIRE_DETECTION_MASK);
5746 else if (! EQ (eol_type, Qunix))
5747 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5748 | CODING_REQUIRE_ENCODING_MASK);
5749 else
5750 coding->common_flags = 0;
5e5c78be
KH
5751 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5752 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5753 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5754 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5755 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5756 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5757
df7492f9 5758 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5759 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5760 coding->safe_charsets = SDATA (val);
df7492f9 5761 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
624bda09 5762 coding->carryover_bytes = 0;
4608c386 5763
df7492f9
KH
5764 coding_type = CODING_ATTR_TYPE (attrs);
5765 if (EQ (coding_type, Qundecided))
d46c5b12 5766 {
df7492f9
KH
5767 coding->detector = NULL;
5768 coding->decoder = decode_coding_raw_text;
5769 coding->encoder = encode_coding_raw_text;
5770 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5771 }
df7492f9 5772 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5773 {
df7492f9
KH
5774 int i;
5775 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5776
5777 /* Invoke graphic register 0 to plane 0. */
5778 CODING_ISO_INVOCATION (coding, 0) = 0;
5779 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5780 CODING_ISO_INVOCATION (coding, 1)
5781 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5782 /* Setup the initial status of designation. */
5783 for (i = 0; i < 4; i++)
5784 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5785 /* Not single shifting initially. */
5786 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5787 /* Beginning of buffer should also be regarded as bol. */
5788 CODING_ISO_BOL (coding) = 1;
5789 coding->detector = detect_coding_iso_2022;
5790 coding->decoder = decode_coding_iso_2022;
5791 coding->encoder = encode_coding_iso_2022;
5792 if (flags & CODING_ISO_FLAG_SAFE)
5793 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5794 coding->common_flags
df7492f9
KH
5795 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5796 | CODING_REQUIRE_FLUSHING_MASK);
5797 if (flags & CODING_ISO_FLAG_COMPOSITION)
5798 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5799 if (flags & CODING_ISO_FLAG_DESIGNATION)
5800 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5801 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5802 {
5803 setup_iso_safe_charsets (attrs);
5804 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5805 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5806 coding->safe_charsets = SDATA (val);
df7492f9
KH
5807 }
5808 CODING_ISO_FLAGS (coding) = flags;
e951386e
KH
5809 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5810 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5811 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5812 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
d46c5b12 5813 }
df7492f9 5814 else if (EQ (coding_type, Qcharset))
d46c5b12 5815 {
df7492f9
KH
5816 coding->detector = detect_coding_charset;
5817 coding->decoder = decode_coding_charset;
5818 coding->encoder = encode_coding_charset;
d46c5b12 5819 coding->common_flags
df7492f9 5820 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5821 }
df7492f9 5822 else if (EQ (coding_type, Qutf_8))
d46c5b12 5823 {
a470d443
KH
5824 val = AREF (attrs, coding_attr_utf_bom);
5825 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5826 : EQ (val, Qt) ? utf_with_bom
5827 : utf_without_bom);
df7492f9
KH
5828 coding->detector = detect_coding_utf_8;
5829 coding->decoder = decode_coding_utf_8;
5830 coding->encoder = encode_coding_utf_8;
5831 coding->common_flags
5832 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5833 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5834 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5835 }
5836 else if (EQ (coding_type, Qutf_16))
5837 {
a470d443
KH
5838 val = AREF (attrs, coding_attr_utf_bom);
5839 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5840 : EQ (val, Qt) ? utf_with_bom
5841 : utf_without_bom);
df7492f9 5842 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5843 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5844 : utf_16_little_endian);
e19c3639 5845 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5846 coding->detector = detect_coding_utf_16;
5847 coding->decoder = decode_coding_utf_16;
5848 coding->encoder = encode_coding_utf_16;
5849 coding->common_flags
5850 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5851 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5852 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5853 }
df7492f9 5854 else if (EQ (coding_type, Qccl))
4ed46869 5855 {
df7492f9
KH
5856 coding->detector = detect_coding_ccl;
5857 coding->decoder = decode_coding_ccl;
5858 coding->encoder = encode_coding_ccl;
c952af22 5859 coding->common_flags
df7492f9
KH
5860 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5861 | CODING_REQUIRE_FLUSHING_MASK);
5862 }
5863 else if (EQ (coding_type, Qemacs_mule))
5864 {
5865 coding->detector = detect_coding_emacs_mule;
5866 coding->decoder = decode_coding_emacs_mule;
5867 coding->encoder = encode_coding_emacs_mule;
c952af22 5868 coding->common_flags
df7492f9 5869 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
e951386e 5870 coding->spec.emacs_mule.full_support = 1;
df7492f9
KH
5871 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5872 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5873 {
5874 Lisp_Object tail, safe_charsets;
5875 int max_charset_id = 0;
5876
5877 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5878 tail = XCDR (tail))
5879 if (max_charset_id < XFASTINT (XCAR (tail)))
5880 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5881 safe_charsets = make_uninit_string (max_charset_id + 1);
5882 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5883 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5884 tail = XCDR (tail))
8f924df7 5885 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5886 coding->max_charset_id = max_charset_id;
1b3b981b 5887 coding->safe_charsets = SDATA (safe_charsets);
e951386e 5888 coding->spec.emacs_mule.full_support = 1;
df7492f9 5889 }
e951386e
KH
5890 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5891 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
df7492f9
KH
5892 }
5893 else if (EQ (coding_type, Qshift_jis))
5894 {
5895 coding->detector = detect_coding_sjis;
5896 coding->decoder = decode_coding_sjis;
5897 coding->encoder = encode_coding_sjis;
c952af22 5898 coding->common_flags
df7492f9
KH
5899 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5900 }
5901 else if (EQ (coding_type, Qbig5))
5902 {
5903 coding->detector = detect_coding_big5;
5904 coding->decoder = decode_coding_big5;
5905 coding->encoder = encode_coding_big5;
c952af22 5906 coding->common_flags
df7492f9
KH
5907 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5908 }
5909 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5910 {
df7492f9
KH
5911 coding->detector = NULL;
5912 coding->decoder = decode_coding_raw_text;
5913 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5914 if (! EQ (eol_type, Qunix))
5915 {
5916 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5917 if (! VECTORP (eol_type))
5918 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5919 }
5920
4ed46869 5921 }
4ed46869 5922
df7492f9 5923 return;
4ed46869
KH
5924}
5925
0ff61e78
KH
5926/* Return a list of charsets supported by CODING. */
5927
5928Lisp_Object
971de7fb 5929coding_charset_list (struct coding_system *coding)
0ff61e78 5930{
35befdaa 5931 Lisp_Object attrs, charset_list;
0ff61e78
KH
5932
5933 CODING_GET_INFO (coding, attrs, charset_list);
5934 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5935 {
5936 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5937
5938 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5939 charset_list = Viso_2022_charset_list;
5940 }
5941 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5942 {
5943 charset_list = Vemacs_mule_charset_list;
5944 }
5945 return charset_list;
5946}
5947
5948
e9f91ece
KH
5949/* Return a list of charsets supported by CODING-SYSTEM. */
5950
5951Lisp_Object
971de7fb 5952coding_system_charset_list (Lisp_Object coding_system)
e9f91ece
KH
5953{
5954 int id;
5955 Lisp_Object attrs, charset_list;
5956
5957 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5958 attrs = CODING_ID_ATTRS (id);
5959
5960 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5961 {
5962 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5963
5964 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5965 charset_list = Viso_2022_charset_list;
5966 else
5967 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5968 }
5969 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5970 {
5971 charset_list = Vemacs_mule_charset_list;
5972 }
5973 else
5974 {
5975 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5976 }
5977 return charset_list;
5978}
5979
5980
df7492f9
KH
5981/* Return raw-text or one of its subsidiaries that has the same
5982 eol_type as CODING-SYSTEM. */
ec6d2bb8 5983
df7492f9 5984Lisp_Object
971de7fb 5985raw_text_coding_system (Lisp_Object coding_system)
ec6d2bb8 5986{
0be8721c 5987 Lisp_Object spec, attrs;
df7492f9 5988 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5989
d3e4cb56
KH
5990 if (NILP (coding_system))
5991 return Qraw_text;
df7492f9
KH
5992 spec = CODING_SYSTEM_SPEC (coding_system);
5993 attrs = AREF (spec, 0);
ec6d2bb8 5994
df7492f9
KH
5995 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5996 return coding_system;
ec6d2bb8 5997
df7492f9
KH
5998 eol_type = AREF (spec, 2);
5999 if (VECTORP (eol_type))
6000 return Qraw_text;
6001 spec = CODING_SYSTEM_SPEC (Qraw_text);
6002 raw_text_eol_type = AREF (spec, 2);
6003 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6004 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6005 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
6006}
6007
54f78171 6008
df7492f9
KH
6009/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6010 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
6011 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
6012 inherit end-of-line format from the system's setting
6013 (system_eol_type). */
df7492f9
KH
6014
6015Lisp_Object
971de7fb 6016coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
54f78171 6017{
3e139625 6018 Lisp_Object spec, eol_type;
54f78171 6019
d3e4cb56
KH
6020 if (NILP (coding_system))
6021 coding_system = Qraw_text;
df7492f9 6022 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 6023 eol_type = AREF (spec, 2);
fcbcfb64 6024 if (VECTORP (eol_type))
df7492f9 6025 {
df7492f9
KH
6026 Lisp_Object parent_eol_type;
6027
fcbcfb64
KH
6028 if (! NILP (parent))
6029 {
6030 Lisp_Object parent_spec;
6031
4a015c45 6032 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
6033 parent_eol_type = AREF (parent_spec, 2);
6034 }
6035 else
6036 parent_eol_type = system_eol_type;
df7492f9
KH
6037 if (EQ (parent_eol_type, Qunix))
6038 coding_system = AREF (eol_type, 0);
6039 else if (EQ (parent_eol_type, Qdos))
6040 coding_system = AREF (eol_type, 1);
6041 else if (EQ (parent_eol_type, Qmac))
6042 coding_system = AREF (eol_type, 2);
54f78171 6043 }
df7492f9 6044 return coding_system;
54f78171
KH
6045}
6046
4ed46869
KH
6047/* Emacs has a mechanism to automatically detect a coding system if it
6048 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6049 it's impossible to distinguish some coding systems accurately
6050 because they use the same range of codes. So, at first, coding
6051 systems are categorized into 7, those are:
6052
0ef69138 6053 o coding-category-emacs-mule
4ed46869
KH
6054
6055 The category for a coding system which has the same code range
6056 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 6057 symbol) `emacs-mule' by default.
4ed46869
KH
6058
6059 o coding-category-sjis
6060
6061 The category for a coding system which has the same code range
6062 as SJIS. Assigned the coding-system (Lisp
7717c392 6063 symbol) `japanese-shift-jis' by default.
4ed46869
KH
6064
6065 o coding-category-iso-7
6066
6067 The category for a coding system which has the same code range
7717c392 6068 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
6069 shift and single shift functions. This can encode/decode all
6070 charsets. Assigned the coding-system (Lisp symbol)
6071 `iso-2022-7bit' by default.
6072
6073 o coding-category-iso-7-tight
6074
6075 Same as coding-category-iso-7 except that this can
6076 encode/decode only the specified charsets.
4ed46869
KH
6077
6078 o coding-category-iso-8-1
6079
6080 The category for a coding system which has the same code range
6081 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6082 for DIMENSION1 charset. This doesn't use any locking shift
6083 and single shift functions. Assigned the coding-system (Lisp
6084 symbol) `iso-latin-1' by default.
4ed46869
KH
6085
6086 o coding-category-iso-8-2
6087
6088 The category for a coding system which has the same code range
6089 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6090 for DIMENSION2 charset. This doesn't use any locking shift
6091 and single shift functions. Assigned the coding-system (Lisp
6092 symbol) `japanese-iso-8bit' by default.
4ed46869 6093
7717c392 6094 o coding-category-iso-7-else
4ed46869
KH
6095
6096 The category for a coding system which has the same code range
df7492f9 6097 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
6098 single shift functions. Assigned the coding-system (Lisp
6099 symbol) `iso-2022-7bit-lock' by default.
6100
6101 o coding-category-iso-8-else
6102
6103 The category for a coding system which has the same code range
df7492f9 6104 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
6105 single shift functions. Assigned the coding-system (Lisp
6106 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
6107
6108 o coding-category-big5
6109
6110 The category for a coding system which has the same code range
6111 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 6112 `cn-big5' by default.
4ed46869 6113
fa42c37f
KH
6114 o coding-category-utf-8
6115
6116 The category for a coding system which has the same code range
6e76ae91 6117 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
6118 symbol) `utf-8' by default.
6119
6120 o coding-category-utf-16-be
6121
6122 The category for a coding system in which a text has an
6123 Unicode signature (cf. Unicode Standard) in the order of BIG
6124 endian at the head. Assigned the coding-system (Lisp symbol)
6125 `utf-16-be' by default.
6126
6127 o coding-category-utf-16-le
6128
6129 The category for a coding system in which a text has an
6130 Unicode signature (cf. Unicode Standard) in the order of
6131 LITTLE endian at the head. Assigned the coding-system (Lisp
6132 symbol) `utf-16-le' by default.
6133
1397dc18
KH
6134 o coding-category-ccl
6135
6136 The category for a coding system of which encoder/decoder is
6137 written in CCL programs. The default value is nil, i.e., no
6138 coding system is assigned.
6139
4ed46869
KH
6140 o coding-category-binary
6141
6142 The category for a coding system not categorized in any of the
6143 above. Assigned the coding-system (Lisp symbol)
e0e989f6 6144 `no-conversion' by default.
4ed46869
KH
6145
6146 Each of them is a Lisp symbol and the value is an actual
df7492f9 6147 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
6148 What Emacs does actually is to detect a category of coding system.
6149 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 6150 decide only one possible category, it selects a category of the
4ed46869
KH
6151 highest priority. Priorities of categories are also specified by a
6152 user in a Lisp variable `coding-category-list'.
6153
6154*/
6155
df7492f9
KH
6156#define EOL_SEEN_NONE 0
6157#define EOL_SEEN_LF 1
6158#define EOL_SEEN_CR 2
6159#define EOL_SEEN_CRLF 4
66cfb530 6160
ff0dacd7
KH
6161/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6162 SOURCE is encoded. If CATEGORY is one of
6163 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6164 two-byte, else they are encoded by one-byte.
6165
6166 Return one of EOL_SEEN_XXX. */
4ed46869 6167
bc4bc72a 6168#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
6169
6170static int
971de7fb 6171detect_eol (const unsigned char *source, EMACS_INT src_bytes, enum coding_category category)
4ed46869 6172{
f6cbaf43 6173 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 6174 unsigned char c;
df7492f9
KH
6175 int total = 0;
6176 int eol_seen = EOL_SEEN_NONE;
4ed46869 6177
89528eb3 6178 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 6179 {
df7492f9 6180 int msb, lsb;
fa42c37f 6181
89528eb3
KH
6182 msb = category == (coding_category_utf_16_le
6183 | coding_category_utf_16_le_nosig);
df7492f9 6184 lsb = 1 - msb;
fa42c37f 6185
df7492f9 6186 while (src + 1 < src_end)
fa42c37f 6187 {
df7492f9
KH
6188 c = src[lsb];
6189 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 6190 {
df7492f9
KH
6191 int this_eol;
6192
6193 if (c == '\n')
6194 this_eol = EOL_SEEN_LF;
6195 else if (src + 3 >= src_end
6196 || src[msb + 2] != 0
6197 || src[lsb + 2] != '\n')
6198 this_eol = EOL_SEEN_CR;
fa42c37f 6199 else
75f4f1ac
EZ
6200 {
6201 this_eol = EOL_SEEN_CRLF;
6202 src += 2;
6203 }
df7492f9
KH
6204
6205 if (eol_seen == EOL_SEEN_NONE)
6206 /* This is the first end-of-line. */
6207 eol_seen = this_eol;
6208 else if (eol_seen != this_eol)
fa42c37f 6209 {
75f4f1ac
EZ
6210 /* The found type is different from what found before.
6211 Allow for stray ^M characters in DOS EOL files. */
6212 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6213 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6214 eol_seen = EOL_SEEN_CRLF;
6215 else
6216 {
6217 eol_seen = EOL_SEEN_LF;
6218 break;
6219 }
fa42c37f 6220 }
df7492f9
KH
6221 if (++total == MAX_EOL_CHECK_COUNT)
6222 break;
fa42c37f 6223 }
df7492f9 6224 src += 2;
fa42c37f 6225 }
bcf26d6a 6226 }
d46c5b12 6227 else
c4825358 6228 {
df7492f9 6229 while (src < src_end)
27901516 6230 {
df7492f9
KH
6231 c = *src++;
6232 if (c == '\n' || c == '\r')
6233 {
6234 int this_eol;
d46c5b12 6235
df7492f9
KH
6236 if (c == '\n')
6237 this_eol = EOL_SEEN_LF;
6238 else if (src >= src_end || *src != '\n')
6239 this_eol = EOL_SEEN_CR;
6240 else
6241 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 6242
df7492f9
KH
6243 if (eol_seen == EOL_SEEN_NONE)
6244 /* This is the first end-of-line. */
6245 eol_seen = this_eol;
6246 else if (eol_seen != this_eol)
6247 {
75f4f1ac
EZ
6248 /* The found type is different from what found before.
6249 Allow for stray ^M characters in DOS EOL files. */
6250 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6251 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6252 eol_seen = EOL_SEEN_CRLF;
6253 else
6254 {
6255 eol_seen = EOL_SEEN_LF;
6256 break;
6257 }
df7492f9
KH
6258 }
6259 if (++total == MAX_EOL_CHECK_COUNT)
6260 break;
6261 }
6262 }
73be902c 6263 }
df7492f9 6264 return eol_seen;
73be902c
KH
6265}
6266
df7492f9 6267
24a73b0a 6268static Lisp_Object
971de7fb 6269adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
73be902c 6270{
0be8721c 6271 Lisp_Object eol_type;
8f924df7 6272
df7492f9
KH
6273 eol_type = CODING_ID_EOL_TYPE (coding->id);
6274 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
6275 {
6276 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6277 eol_type = Qunix;
6278 }
6f197c07 6279 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
6280 {
6281 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6282 eol_type = Qdos;
6283 }
6f197c07 6284 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
6285 {
6286 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6287 eol_type = Qmac;
6288 }
6289 return eol_type;
d46c5b12 6290}
4ed46869 6291
df7492f9
KH
6292/* Detect how a text specified in CODING is encoded. If a coding
6293 system is detected, update fields of CODING by the detected coding
6294 system. */
0a28aafb 6295
df7492f9 6296void
971de7fb 6297detect_coding (struct coding_system *coding)
d46c5b12 6298{
8f924df7 6299 const unsigned char *src, *src_end;
73cce38d 6300 int saved_mode = coding->mode;
d46c5b12 6301
df7492f9
KH
6302 coding->consumed = coding->consumed_char = 0;
6303 coding->produced = coding->produced_char = 0;
6304 coding_set_source (coding);
1c3478b0 6305
df7492f9 6306 src_end = coding->source + coding->src_bytes;
c0e16b14 6307 coding->head_ascii = 0;
1c3478b0 6308
df7492f9
KH
6309 /* If we have not yet decided the text encoding type, detect it
6310 now. */
6311 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 6312 {
df7492f9 6313 int c, i;
6cb21a4f 6314 struct coding_detection_info detect_info;
2f3cbb32 6315 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 6316
6cb21a4f 6317 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 6318 for (src = coding->source; src < src_end; src++)
d46c5b12 6319 {
df7492f9 6320 c = *src;
6cb21a4f 6321 if (c & 0x80)
6cb21a4f 6322 {
2f3cbb32 6323 eight_bit_found = 1;
2f3cbb32
KH
6324 if (null_byte_found)
6325 break;
6326 }
6327 else if (c < 0x20)
6328 {
6329 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6330 && ! inhibit_iso_escape_detection
6331 && ! detect_info.checked)
6cb21a4f 6332 {
2f3cbb32
KH
6333 if (detect_coding_iso_2022 (coding, &detect_info))
6334 {
6335 /* We have scanned the whole data. */
6336 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
6337 {
6338 /* We didn't find an 8-bit code. We may
6339 have found a null-byte, but it's very
ce5b453a 6340 rare that a binary file conforms to
c0e16b14
KH
6341 ISO-2022. */
6342 src = src_end;
6343 coding->head_ascii = src - coding->source;
6344 }
6345 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6346 break;
6347 }
6348 }
97b1b294 6349 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6350 {
6351 null_byte_found = 1;
6352 if (eight_bit_found)
6353 break;
6cb21a4f 6354 }
c006c0c8
KH
6355 if (! eight_bit_found)
6356 coding->head_ascii++;
6cb21a4f 6357 }
c006c0c8 6358 else if (! eight_bit_found)
c0e16b14 6359 coding->head_ascii++;
d46c5b12 6360 }
df7492f9 6361
2f3cbb32
KH
6362 if (null_byte_found || eight_bit_found
6363 || coding->head_ascii < coding->src_bytes
6cb21a4f 6364 || detect_info.found)
d46c5b12 6365 {
ff0dacd7
KH
6366 enum coding_category category;
6367 struct coding_system *this;
df7492f9 6368
6cb21a4f
KH
6369 if (coding->head_ascii == coding->src_bytes)
6370 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6371 for (i = 0; i < coding_category_raw_text; i++)
6372 {
6373 category = coding_priorities[i];
6374 this = coding_categories + category;
6375 if (detect_info.found & (1 << category))
24a73b0a 6376 break;
6cb21a4f
KH
6377 }
6378 else
2f3cbb32
KH
6379 {
6380 if (null_byte_found)
ff0dacd7 6381 {
2f3cbb32
KH
6382 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6383 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6384 }
2f3cbb32
KH
6385 for (i = 0; i < coding_category_raw_text; i++)
6386 {
6387 category = coding_priorities[i];
6388 this = coding_categories + category;
6389 if (this->id < 0)
6390 {
6391 /* No coding system of this category is defined. */
6392 detect_info.rejected |= (1 << category);
6393 }
6394 else if (category >= coding_category_raw_text)
6395 continue;
6396 else if (detect_info.checked & (1 << category))
6397 {
6398 if (detect_info.found & (1 << category))
6399 break;
6400 }
6401 else if ((*(this->detector)) (coding, &detect_info)
6402 && detect_info.found & (1 << category))
6403 {
6404 if (category == coding_category_utf_16_auto)
6405 {
6406 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6407 category = coding_category_utf_16_le;
6408 else
6409 category = coding_category_utf_16_be;
6410 }
6411 break;
6412 }
6413 }
2f3cbb32 6414 }
c0e16b14
KH
6415
6416 if (i < coding_category_raw_text)
6417 setup_coding_system (CODING_ID_NAME (this->id), coding);
6418 else if (null_byte_found)
6419 setup_coding_system (Qno_conversion, coding);
6420 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6421 == CATEGORY_MASK_ANY)
6422 setup_coding_system (Qraw_text, coding);
6423 else if (detect_info.rejected)
6424 for (i = 0; i < coding_category_raw_text; i++)
6425 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6426 {
6427 this = coding_categories + coding_priorities[i];
6428 setup_coding_system (CODING_ID_NAME (this->id), coding);
6429 break;
6430 }
d46c5b12 6431 }
b73bfc1c 6432 }
a470d443
KH
6433 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6434 == coding_category_utf_8_auto)
6435 {
6436 Lisp_Object coding_systems;
6437 struct coding_detection_info detect_info;
6438
6439 coding_systems
6440 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6441 detect_info.found = detect_info.rejected = 0;
6442 coding->head_ascii = 0;
6443 if (CONSP (coding_systems)
6444 && detect_coding_utf_8 (coding, &detect_info))
6445 {
6446 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6447 setup_coding_system (XCAR (coding_systems), coding);
6448 else
6449 setup_coding_system (XCDR (coding_systems), coding);
6450 }
6451 }
24a73b0a
KH
6452 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6453 == coding_category_utf_16_auto)
b49a1807
KH
6454 {
6455 Lisp_Object coding_systems;
6456 struct coding_detection_info detect_info;
6457
6458 coding_systems
a470d443 6459 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6460 detect_info.found = detect_info.rejected = 0;
a470d443 6461 coding->head_ascii = 0;
b49a1807 6462 if (CONSP (coding_systems)
24a73b0a 6463 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6464 {
6465 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6466 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6467 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6468 setup_coding_system (XCDR (coding_systems), coding);
6469 }
6470 }
73cce38d 6471 coding->mode = saved_mode;
4ed46869 6472}
4ed46869 6473
d46c5b12 6474
aaaf0b1e 6475static void
971de7fb 6476decode_eol (struct coding_system *coding)
aaaf0b1e 6477{
24a73b0a
KH
6478 Lisp_Object eol_type;
6479 unsigned char *p, *pbeg, *pend;
3ed051d4 6480
24a73b0a 6481 eol_type = CODING_ID_EOL_TYPE (coding->id);
0a9564cb 6482 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
24a73b0a
KH
6483 return;
6484
6485 if (NILP (coding->dst_object))
6486 pbeg = coding->destination;
6487 else
6488 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6489 pend = pbeg + coding->produced;
6490
6491 if (VECTORP (eol_type))
aaaf0b1e 6492 {
df7492f9 6493 int eol_seen = EOL_SEEN_NONE;
4ed46869 6494
24a73b0a 6495 for (p = pbeg; p < pend; p++)
aaaf0b1e 6496 {
df7492f9
KH
6497 if (*p == '\n')
6498 eol_seen |= EOL_SEEN_LF;
6499 else if (*p == '\r')
aaaf0b1e 6500 {
df7492f9 6501 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6502 {
df7492f9
KH
6503 eol_seen |= EOL_SEEN_CRLF;
6504 p++;
aaaf0b1e 6505 }
aaaf0b1e 6506 else
df7492f9 6507 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6508 }
aaaf0b1e 6509 }
75f4f1ac
EZ
6510 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6511 if ((eol_seen & EOL_SEEN_CRLF) != 0
6512 && (eol_seen & EOL_SEEN_CR) != 0
6513 && (eol_seen & EOL_SEEN_LF) == 0)
6514 eol_seen = EOL_SEEN_CRLF;
6515 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6516 && eol_seen != EOL_SEEN_LF
6517 && eol_seen != EOL_SEEN_CRLF
6518 && eol_seen != EOL_SEEN_CR)
6519 eol_seen = EOL_SEEN_LF;
df7492f9 6520 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6521 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6522 }
d46c5b12 6523
24a73b0a 6524 if (EQ (eol_type, Qmac))
27901516 6525 {
24a73b0a 6526 for (p = pbeg; p < pend; p++)
df7492f9
KH
6527 if (*p == '\r')
6528 *p = '\n';
4ed46869 6529 }
24a73b0a 6530 else if (EQ (eol_type, Qdos))
df7492f9 6531 {
24a73b0a 6532 int n = 0;
b73bfc1c 6533
24a73b0a
KH
6534 if (NILP (coding->dst_object))
6535 {
4347441b
KH
6536 /* Start deleting '\r' from the tail to minimize the memory
6537 movement. */
24a73b0a
KH
6538 for (p = pend - 2; p >= pbeg; p--)
6539 if (*p == '\r')
6540 {
6541 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6542 n++;
6543 }
6544 }
6545 else
6546 {
4347441b
KH
6547 int pos_byte = coding->dst_pos_byte;
6548 int pos = coding->dst_pos;
6549 int pos_end = pos + coding->produced_char - 1;
6550
6551 while (pos < pos_end)
6552 {
6553 p = BYTE_POS_ADDR (pos_byte);
6554 if (*p == '\r' && p[1] == '\n')
6555 {
6556 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6557 n++;
6558 pos_end--;
6559 }
6560 pos++;
69b8522d
KH
6561 if (coding->dst_multibyte)
6562 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6563 else
6564 pos_byte++;
4347441b 6565 }
24a73b0a
KH
6566 }
6567 coding->produced -= n;
6568 coding->produced_char -= n;
aaaf0b1e 6569 }
4ed46869
KH
6570}
6571
7d64c6ad 6572
a6f87d34
KH
6573/* Return a translation table (or list of them) from coding system
6574 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6575 decoding (ENCODEP is zero). */
7d64c6ad 6576
e6a54062 6577static Lisp_Object
971de7fb 6578get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
7d64c6ad
KH
6579{
6580 Lisp_Object standard, translation_table;
09ee6fdd 6581 Lisp_Object val;
7d64c6ad 6582
4bed5909
CY
6583 if (NILP (Venable_character_translation))
6584 {
6585 if (max_lookup)
6586 *max_lookup = 0;
6587 return Qnil;
6588 }
7d64c6ad
KH
6589 if (encodep)
6590 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6591 standard = Vstandard_translation_table_for_encode;
6592 else
6593 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6594 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6595 if (NILP (translation_table))
09ee6fdd
KH
6596 translation_table = standard;
6597 else
a6f87d34 6598 {
09ee6fdd
KH
6599 if (SYMBOLP (translation_table))
6600 translation_table = Fget (translation_table, Qtranslation_table);
6601 else if (CONSP (translation_table))
6602 {
6603 translation_table = Fcopy_sequence (translation_table);
6604 for (val = translation_table; CONSP (val); val = XCDR (val))
6605 if (SYMBOLP (XCAR (val)))
6606 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6607 }
6608 if (CHAR_TABLE_P (standard))
6609 {
6610 if (CONSP (translation_table))
6611 translation_table = nconc2 (translation_table,
6612 Fcons (standard, Qnil));
6613 else
6614 translation_table = Fcons (translation_table,
6615 Fcons (standard, Qnil));
6616 }
a6f87d34 6617 }
2170c8f0
KH
6618
6619 if (max_lookup)
09ee6fdd 6620 {
2170c8f0
KH
6621 *max_lookup = 1;
6622 if (CHAR_TABLE_P (translation_table)
6623 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6624 {
6625 val = XCHAR_TABLE (translation_table)->extras[1];
6626 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6627 *max_lookup = XFASTINT (val);
6628 }
6629 else if (CONSP (translation_table))
6630 {
6631 Lisp_Object tail, val;
09ee6fdd 6632
2170c8f0
KH
6633 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6634 if (CHAR_TABLE_P (XCAR (tail))
6635 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6636 {
6637 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6638 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6639 *max_lookup = XFASTINT (val);
6640 }
6641 }
a6f87d34 6642 }
7d64c6ad
KH
6643 return translation_table;
6644}
6645
09ee6fdd
KH
6646#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6647 do { \
6648 trans = Qnil; \
6649 if (CHAR_TABLE_P (table)) \
6650 { \
6651 trans = CHAR_TABLE_REF (table, c); \
6652 if (CHARACTERP (trans)) \
6653 c = XFASTINT (trans), trans = Qnil; \
6654 } \
6655 else if (CONSP (table)) \
6656 { \
6657 Lisp_Object tail; \
6658 \
6659 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6660 if (CHAR_TABLE_P (XCAR (tail))) \
6661 { \
6662 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6663 if (CHARACTERP (trans)) \
6664 c = XFASTINT (trans), trans = Qnil; \
6665 else if (! NILP (trans)) \
6666 break; \
6667 } \
6668 } \
e6a54062
KH
6669 } while (0)
6670
7d64c6ad 6671
e951386e
KH
6672/* Return a translation of character(s) at BUF according to TRANS.
6673 TRANS is TO-CHAR or ((FROM . TO) ...) where
6674 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6675 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6676 translation is found, and Qnil if not found..
6677 If BUF is too short to lookup characters in FROM, return Qt. */
6678
69a80ea3 6679static Lisp_Object
971de7fb 6680get_translation (Lisp_Object trans, int *buf, int *buf_end)
69a80ea3 6681{
e951386e
KH
6682
6683 if (INTEGERP (trans))
6684 return trans;
6685 for (; CONSP (trans); trans = XCDR (trans))
69a80ea3 6686 {
e951386e
KH
6687 Lisp_Object val = XCAR (trans);
6688 Lisp_Object from = XCAR (val);
6689 int len = ASIZE (from);
6690 int i;
69a80ea3 6691
e951386e 6692 for (i = 0; i < len; i++)
69a80ea3 6693 {
e951386e
KH
6694 if (buf + i == buf_end)
6695 return Qt;
6696 if (XINT (AREF (from, i)) != buf[i])
6697 break;
69a80ea3 6698 }
e951386e
KH
6699 if (i == len)
6700 return val;
69a80ea3 6701 }
e951386e 6702 return Qnil;
69a80ea3
KH
6703}
6704
6705
d46c5b12 6706static int
971de7fb 6707produce_chars (struct coding_system *coding, Lisp_Object translation_table, int last_block)
4ed46869 6708{
df7492f9
KH
6709 unsigned char *dst = coding->destination + coding->produced;
6710 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6711 EMACS_INT produced;
6712 EMACS_INT produced_chars = 0;
69a80ea3 6713 int carryover = 0;
4ed46869 6714
df7492f9 6715 if (! coding->chars_at_source)
4ed46869 6716 {
119852e7 6717 /* Source characters are in coding->charbuf. */
fba4576f
AS
6718 int *buf = coding->charbuf;
6719 int *buf_end = buf + coding->charbuf_used;
4ed46869 6720
db274c7a
KH
6721 if (EQ (coding->src_object, coding->dst_object))
6722 {
6723 coding_set_source (coding);
6724 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6725 }
4ed46869 6726
df7492f9 6727 while (buf < buf_end)
4ed46869 6728 {
69a80ea3 6729 int c = *buf, i;
bc4bc72a 6730
df7492f9
KH
6731 if (c >= 0)
6732 {
69a80ea3
KH
6733 int from_nchars = 1, to_nchars = 1;
6734 Lisp_Object trans = Qnil;
6735
09ee6fdd 6736 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6737 if (! NILP (trans))
69a80ea3 6738 {
e951386e
KH
6739 trans = get_translation (trans, buf, buf_end);
6740 if (INTEGERP (trans))
6741 c = XINT (trans);
6742 else if (CONSP (trans))
6743 {
6744 from_nchars = ASIZE (XCAR (trans));
6745 trans = XCDR (trans);
6746 if (INTEGERP (trans))
6747 c = XINT (trans);
6748 else
6749 {
6750 to_nchars = ASIZE (trans);
6751 c = XINT (AREF (trans, 0));
6752 }
6753 }
6754 else if (EQ (trans, Qt) && ! last_block)
69a80ea3 6755 break;
69a80ea3
KH
6756 }
6757
6758 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6759 {
6760 dst = alloc_destination (coding,
6761 buf_end - buf
6762 + MAX_MULTIBYTE_LENGTH * to_nchars,
6763 dst);
db274c7a
KH
6764 if (EQ (coding->src_object, coding->dst_object))
6765 {
6766 coding_set_source (coding);
e951386e
KH
6767 dst_end = (((unsigned char *) coding->source)
6768 + coding->consumed);
db274c7a
KH
6769 }
6770 else
6771 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6772 }
6773
433f7f87 6774 for (i = 0; i < to_nchars; i++)
69a80ea3 6775 {
433f7f87
KH
6776 if (i > 0)
6777 c = XINT (AREF (trans, i));
69a80ea3
KH
6778 if (coding->dst_multibyte
6779 || ! CHAR_BYTE8_P (c))
db274c7a 6780 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6781 else
6782 *dst++ = CHAR_TO_BYTE8 (c);
6783 }
6784 produced_chars += to_nchars;
e951386e 6785 buf += from_nchars;
d46c5b12 6786 }
df7492f9 6787 else
69a80ea3
KH
6788 /* This is an annotation datum. (-C) is the length. */
6789 buf += -c;
4ed46869 6790 }
69a80ea3 6791 carryover = buf_end - buf;
4ed46869 6792 }
fa42c37f 6793 else
fa42c37f 6794 {
119852e7 6795 /* Source characters are at coding->source. */
8f924df7 6796 const unsigned char *src = coding->source;
119852e7 6797 const unsigned char *src_end = src + coding->consumed;
4ed46869 6798
db274c7a
KH
6799 if (EQ (coding->dst_object, coding->src_object))
6800 dst_end = (unsigned char *) src;
df7492f9 6801 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6802 {
df7492f9 6803 if (coding->src_multibyte)
fa42c37f 6804 {
71c81426 6805 int multibytep = 1;
4533845d 6806 EMACS_INT consumed_chars = 0;
d46c5b12 6807
df7492f9
KH
6808 while (1)
6809 {
8f924df7 6810 const unsigned char *src_base = src;
df7492f9 6811 int c;
b73bfc1c 6812
df7492f9 6813 ONE_MORE_BYTE (c);
119852e7 6814 if (dst == dst_end)
df7492f9 6815 {
119852e7
KH
6816 if (EQ (coding->src_object, coding->dst_object))
6817 dst_end = (unsigned char *) src;
6818 if (dst == dst_end)
df7492f9 6819 {
119852e7
KH
6820 EMACS_INT offset = src - coding->source;
6821
6822 dst = alloc_destination (coding, src_end - src + 1,
6823 dst);
6824 dst_end = coding->destination + coding->dst_bytes;
6825 coding_set_source (coding);
6826 src = coding->source + offset;
6827 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6828 if (EQ (coding->src_object, coding->dst_object))
6829 dst_end = (unsigned char *) src;
df7492f9 6830 }
df7492f9
KH
6831 }
6832 *dst++ = c;
6833 produced_chars++;
6834 }
6835 no_more_source:
6836 ;
fa42c37f
KH
6837 }
6838 else
df7492f9
KH
6839 while (src < src_end)
6840 {
71c81426 6841 int multibytep = 1;
df7492f9 6842 int c = *src++;
b73bfc1c 6843
df7492f9
KH
6844 if (dst >= dst_end - 1)
6845 {
2c78b7e1 6846 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6847 dst_end = (unsigned char *) src;
2c78b7e1
KH
6848 if (dst >= dst_end - 1)
6849 {
119852e7 6850 EMACS_INT offset = src - coding->source;
db274c7a 6851 EMACS_INT more_bytes;
119852e7 6852
db274c7a
KH
6853 if (EQ (coding->src_object, coding->dst_object))
6854 more_bytes = ((src_end - src) / 2) + 2;
6855 else
6856 more_bytes = src_end - src + 2;
6857 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6858 dst_end = coding->destination + coding->dst_bytes;
6859 coding_set_source (coding);
119852e7 6860 src = coding->source + offset;
2c78b7e1 6861 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6862 if (EQ (coding->src_object, coding->dst_object))
6863 dst_end = (unsigned char *) src;
2c78b7e1 6864 }
df7492f9
KH
6865 }
6866 EMIT_ONE_BYTE (c);
6867 }
d46c5b12 6868 }
df7492f9
KH
6869 else
6870 {
6871 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6872 {
119852e7 6873 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6874
df7492f9 6875 if (require > 0)
fa42c37f 6876 {
df7492f9
KH
6877 EMACS_INT offset = src - coding->source;
6878
6879 dst = alloc_destination (coding, require, dst);
6880 coding_set_source (coding);
6881 src = coding->source + offset;
6882 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6883 }
6884 }
119852e7 6885 produced_chars = coding->consumed_char;
df7492f9 6886 while (src < src_end)
14daee73 6887 *dst++ = *src++;
fa42c37f
KH
6888 }
6889 }
6890
df7492f9 6891 produced = dst - (coding->destination + coding->produced);
284201e4 6892 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6893 insert_from_gap (produced_chars, produced);
6894 coding->produced += produced;
6895 coding->produced_char += produced_chars;
69a80ea3 6896 return carryover;
fa42c37f
KH
6897}
6898
ff0dacd7
KH
6899/* Compose text in CODING->object according to the annotation data at
6900 CHARBUF. CHARBUF is an array:
e951386e 6901 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
df7492f9 6902 */
4ed46869 6903
df7492f9 6904static INLINE void
971de7fb 6905produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
4ed46869 6906{
df7492f9 6907 int len;
69a80ea3 6908 EMACS_INT to;
df7492f9 6909 enum composition_method method;
df7492f9 6910 Lisp_Object components;
fa42c37f 6911
e951386e 6912 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
69a80ea3 6913 to = pos + charbuf[2];
e951386e 6914 method = (enum composition_method) (charbuf[4]);
d46c5b12 6915
df7492f9
KH
6916 if (method == COMPOSITION_RELATIVE)
6917 components = Qnil;
e951386e 6918 else
d46c5b12 6919 {
df7492f9 6920 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
e951386e 6921 int i, j;
b73bfc1c 6922
e951386e
KH
6923 if (method == COMPOSITION_WITH_RULE)
6924 len = charbuf[2] * 3 - 2;
6925 charbuf += MAX_ANNOTATION_LENGTH;
6926 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6927 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
9ffd559c 6928 {
e951386e
KH
6929 if (charbuf[i] >= 0)
6930 args[j] = make_number (charbuf[i]);
6931 else
6932 {
6933 i++;
6934 args[j] = make_number (charbuf[i] % 0x100);
6935 }
9ffd559c 6936 }
e951386e 6937 components = (i == j ? Fstring (j, args) : Fvector (j, args));
d46c5b12 6938 }
69a80ea3 6939 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6940}
6941
d46c5b12 6942
ff0dacd7
KH
6943/* Put `charset' property on text in CODING->object according to
6944 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6945 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6946 */
d46c5b12 6947
ff0dacd7 6948static INLINE void
971de7fb 6949produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
d46c5b12 6950{
69a80ea3
KH
6951 EMACS_INT from = pos - charbuf[2];
6952 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6953
69a80ea3 6954 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6955 Qcharset, CHARSET_NAME (charset),
6956 coding->dst_object);
d46c5b12
KH
6957}
6958
d46c5b12 6959
df7492f9
KH
6960#define CHARBUF_SIZE 0x4000
6961
6962#define ALLOC_CONVERSION_WORK_AREA(coding) \
6963 do { \
8510724d 6964 int size = CHARBUF_SIZE; \
df7492f9
KH
6965 \
6966 coding->charbuf = NULL; \
6967 while (size > 1024) \
6968 { \
6969 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6970 if (coding->charbuf) \
6971 break; \
6972 size >>= 1; \
6973 } \
6974 if (! coding->charbuf) \
6975 { \
065e3595 6976 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6977 return coding->result; \
6978 } \
6979 coding->charbuf_size = size; \
6980 } while (0)
4ed46869 6981
d46c5b12
KH
6982
6983static void
971de7fb 6984produce_annotation (struct coding_system *coding, EMACS_INT pos)
d46c5b12 6985{
df7492f9
KH
6986 int *charbuf = coding->charbuf;
6987 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6988
ff0dacd7
KH
6989 if (NILP (coding->dst_object))
6990 return;
d46c5b12 6991
df7492f9 6992 while (charbuf < charbuf_end)
a84f1519 6993 {
df7492f9 6994 if (*charbuf >= 0)
e951386e 6995 pos++, charbuf++;
d46c5b12 6996 else
d46c5b12 6997 {
df7492f9 6998 int len = -*charbuf;
e951386e
KH
6999
7000 if (len > 2)
7001 switch (charbuf[1])
7002 {
7003 case CODING_ANNOTATE_COMPOSITION_MASK:
7004 produce_composition (coding, charbuf, pos);
7005 break;
7006 case CODING_ANNOTATE_CHARSET_MASK:
7007 produce_charset (coding, charbuf, pos);
7008 break;
7009 }
df7492f9 7010 charbuf += len;
d46c5b12 7011 }
a84f1519 7012 }
d46c5b12
KH
7013}
7014
df7492f9
KH
7015/* Decode the data at CODING->src_object into CODING->dst_object.
7016 CODING->src_object is a buffer, a string, or nil.
7017 CODING->dst_object is a buffer.
d46c5b12 7018
df7492f9
KH
7019 If CODING->src_object is a buffer, it must be the current buffer.
7020 In this case, if CODING->src_pos is positive, it is a position of
7021 the source text in the buffer, otherwise, the source text is in the
7022 gap area of the buffer, and CODING->src_pos specifies the offset of
7023 the text from GPT (which must be the same as PT). If this is the
7024 same buffer as CODING->dst_object, CODING->src_pos must be
7025 negative.
d46c5b12 7026
b6828792 7027 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 7028 that string.
d46c5b12 7029
df7492f9
KH
7030 If CODING->src_object is nil, CODING->source must already point to
7031 the non-relocatable memory area. In this case, CODING->src_pos is
7032 an offset from CODING->source.
73be902c 7033
df7492f9
KH
7034 The decoded data is inserted at the current point of the buffer
7035 CODING->dst_object.
7036*/
d46c5b12 7037
df7492f9 7038static int
971de7fb 7039decode_coding (struct coding_system *coding)
d46c5b12 7040{
df7492f9 7041 Lisp_Object attrs;
24a73b0a 7042 Lisp_Object undo_list;
7d64c6ad 7043 Lisp_Object translation_table;
d0396581 7044 struct ccl_spec cclspec;
69a80ea3
KH
7045 int carryover;
7046 int i;
d46c5b12 7047
df7492f9
KH
7048 if (BUFFERP (coding->src_object)
7049 && coding->src_pos > 0
7050 && coding->src_pos < GPT
7051 && coding->src_pos + coding->src_chars > GPT)
7052 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 7053
24a73b0a 7054 undo_list = Qt;
df7492f9 7055 if (BUFFERP (coding->dst_object))
1c3478b0 7056 {
df7492f9
KH
7057 if (current_buffer != XBUFFER (coding->dst_object))
7058 set_buffer_internal (XBUFFER (coding->dst_object));
7059 if (GPT != PT)
7060 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
7061 undo_list = current_buffer->undo_list;
7062 current_buffer->undo_list = Qt;
1c3478b0
KH
7063 }
7064
df7492f9
KH
7065 coding->consumed = coding->consumed_char = 0;
7066 coding->produced = coding->produced_char = 0;
7067 coding->chars_at_source = 0;
065e3595 7068 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 7069 coding->errors = 0;
1c3478b0 7070
df7492f9
KH
7071 ALLOC_CONVERSION_WORK_AREA (coding);
7072
7073 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 7074 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 7075
69a80ea3 7076 carryover = 0;
d0396581
KH
7077 if (coding->decoder == decode_coding_ccl)
7078 {
7079 coding->spec.ccl = &cclspec;
7080 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7081 }
df7492f9 7082 do
b73bfc1c 7083 {
69a80ea3
KH
7084 EMACS_INT pos = coding->dst_pos + coding->produced_char;
7085
df7492f9
KH
7086 coding_set_source (coding);
7087 coding->annotated = 0;
69a80ea3 7088 coding->charbuf_used = carryover;
df7492f9 7089 (*(coding->decoder)) (coding);
df7492f9 7090 coding_set_destination (coding);
69a80ea3 7091 carryover = produce_chars (coding, translation_table, 0);
df7492f9 7092 if (coding->annotated)
69a80ea3
KH
7093 produce_annotation (coding, pos);
7094 for (i = 0; i < carryover; i++)
7095 coding->charbuf[i]
7096 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 7097 }
d0396581
KH
7098 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7099 || (coding->consumed < coding->src_bytes
7100 && (coding->result == CODING_RESULT_SUCCESS
7101 || coding->result == CODING_RESULT_INVALID_SRC)));
d46c5b12 7102
69a80ea3
KH
7103 if (carryover > 0)
7104 {
7105 coding_set_destination (coding);
7106 coding->charbuf_used = carryover;
7107 produce_chars (coding, translation_table, 1);
7108 }
7109
df7492f9
KH
7110 coding->carryover_bytes = 0;
7111 if (coding->consumed < coding->src_bytes)
d46c5b12 7112 {
df7492f9 7113 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 7114 const unsigned char *src;
df7492f9
KH
7115
7116 coding_set_source (coding);
7117 coding_set_destination (coding);
7118 src = coding->source + coding->consumed;
7119
7120 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 7121 {
df7492f9
KH
7122 /* Flush out unprocessed data as binary chars. We are sure
7123 that the number of data is less than the size of
7124 coding->charbuf. */
065e3595 7125 coding->charbuf_used = 0;
b2dab6c8
JR
7126 coding->chars_at_source = 0;
7127
df7492f9 7128 while (nbytes-- > 0)
1c3478b0 7129 {
df7492f9 7130 int c = *src++;
98725083 7131
1c91457d
KH
7132 if (c & 0x80)
7133 c = BYTE8_TO_CHAR (c);
7134 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 7135 }
f6cbaf43 7136 produce_chars (coding, Qnil, 1);
d46c5b12 7137 }
d46c5b12 7138 else
df7492f9
KH
7139 {
7140 /* Record unprocessed bytes in coding->carryover. We are
7141 sure that the number of data is less than the size of
7142 coding->carryover. */
7143 unsigned char *p = coding->carryover;
7144
f289d375
KH
7145 if (nbytes > sizeof coding->carryover)
7146 nbytes = sizeof coding->carryover;
df7492f9
KH
7147 coding->carryover_bytes = nbytes;
7148 while (nbytes-- > 0)
7149 *p++ = *src++;
1c3478b0 7150 }
df7492f9 7151 coding->consumed = coding->src_bytes;
b73bfc1c 7152 }
69f76525 7153
0a9564cb
EZ
7154 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7155 && !inhibit_eol_conversion)
4347441b 7156 decode_eol (coding);
24a73b0a
KH
7157 if (BUFFERP (coding->dst_object))
7158 {
7159 current_buffer->undo_list = undo_list;
7160 record_insert (coding->dst_pos, coding->produced_char);
7161 }
73be902c 7162 return coding->result;
4ed46869
KH
7163}
7164
aaaf0b1e 7165
e1c23804 7166/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
7167 ending before LIMIT of CODING->src_object (buffer or string), store
7168 the data in BUF, set *STOP to a starting position of the next
7169 composition (if any) or to LIMIT, and return the address of the
7170 next element of BUF.
7171
7172 If such an annotation is not found, set *STOP to a starting
7173 position of a composition after POS (if any) or to LIMIT, and
7174 return BUF. */
7175
7176static INLINE int *
971de7fb 7177handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, struct coding_system *coding, int *buf, EMACS_INT *stop)
aaaf0b1e 7178{
ff0dacd7
KH
7179 EMACS_INT start, end;
7180 Lisp_Object prop;
aaaf0b1e 7181
ff0dacd7
KH
7182 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7183 || end > limit)
7184 *stop = limit;
7185 else if (start > pos)
7186 *stop = start;
7187 else
aaaf0b1e 7188 {
ff0dacd7 7189 if (start == pos)
aaaf0b1e 7190 {
ff0dacd7
KH
7191 /* We found a composition. Store the corresponding
7192 annotation data in BUF. */
7193 int *head = buf;
7194 enum composition_method method = COMPOSITION_METHOD (prop);
7195 int nchars = COMPOSITION_LENGTH (prop);
7196
e951386e 7197 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
ff0dacd7 7198 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 7199 {
ff0dacd7
KH
7200 Lisp_Object components;
7201 int len, i, i_byte;
7202
7203 components = COMPOSITION_COMPONENTS (prop);
7204 if (VECTORP (components))
aaaf0b1e 7205 {
ff0dacd7
KH
7206 len = XVECTOR (components)->size;
7207 for (i = 0; i < len; i++)
7208 *buf++ = XINT (AREF (components, i));
aaaf0b1e 7209 }
ff0dacd7 7210 else if (STRINGP (components))
aaaf0b1e 7211 {
8f924df7 7212 len = SCHARS (components);
ff0dacd7
KH
7213 i = i_byte = 0;
7214 while (i < len)
7215 {
7216 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7217 buf++;
7218 }
7219 }
7220 else if (INTEGERP (components))
7221 {
7222 len = 1;
7223 *buf++ = XINT (components);
7224 }
7225 else if (CONSP (components))
7226 {
7227 for (len = 0; CONSP (components);
7228 len++, components = XCDR (components))
7229 *buf++ = XINT (XCAR (components));
aaaf0b1e 7230 }
aaaf0b1e 7231 else
ff0dacd7
KH
7232 abort ();
7233 *head -= len;
aaaf0b1e 7234 }
aaaf0b1e 7235 }
ff0dacd7
KH
7236
7237 if (find_composition (end, limit, &start, &end, &prop,
7238 coding->src_object)
7239 && end <= limit)
7240 *stop = start;
7241 else
7242 *stop = limit;
aaaf0b1e 7243 }
ff0dacd7
KH
7244 return buf;
7245}
7246
7247
e1c23804 7248/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
7249 CODING->src_object (buffer of string), store the data in BUF, set
7250 *STOP to the position where the value of `charset' property changes
7251 (limiting by LIMIT), and return the address of the next element of
7252 BUF.
7253
7254 If the property value is nil, set *STOP to the position where the
7255 property value is non-nil (limiting by LIMIT), and return BUF. */
7256
7257static INLINE int *
971de7fb 7258handle_charset_annotation (EMACS_INT pos, EMACS_INT limit, struct coding_system *coding, int *buf, EMACS_INT *stop)
ff0dacd7
KH
7259{
7260 Lisp_Object val, next;
7261 int id;
7262
7263 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7264 if (! NILP (val) && CHARSETP (val))
7265 id = XINT (CHARSET_SYMBOL_ID (val));
7266 else
7267 id = -1;
69a80ea3 7268 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
7269 next = Fnext_single_property_change (make_number (pos), Qcharset,
7270 coding->src_object,
7271 make_number (limit));
7272 *stop = XINT (next);
7273 return buf;
7274}
7275
7276
df7492f9 7277static void
971de7fb 7278consume_chars (struct coding_system *coding, Lisp_Object translation_table, int max_lookup)
df7492f9
KH
7279{
7280 int *buf = coding->charbuf;
ff0dacd7 7281 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 7282 const unsigned char *src = coding->source + coding->consumed;
4776e638 7283 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
7284 EMACS_INT pos = coding->src_pos + coding->consumed_char;
7285 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
7286 int multibytep = coding->src_multibyte;
7287 Lisp_Object eol_type;
7288 int c;
ff0dacd7 7289 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 7290 int *lookup_buf = NULL;
433f7f87
KH
7291
7292 if (! NILP (translation_table))
09ee6fdd 7293 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 7294
0a9564cb 7295 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
df7492f9
KH
7296 if (VECTORP (eol_type))
7297 eol_type = Qunix;
88993dfd 7298
df7492f9
KH
7299 /* Note: composition handling is not yet implemented. */
7300 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 7301
0b5670c9
KH
7302 if (NILP (coding->src_object))
7303 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 7304 else
0b5670c9
KH
7305 {
7306 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7307 stop = stop_composition = pos;
7308 else
7309 stop = stop_composition = end_pos;
7310 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7311 stop = stop_charset = pos;
7312 else
7313 stop_charset = end_pos;
7314 }
ec6d2bb8 7315
24a73b0a 7316 /* Compensate for CRLF and conversion. */
ff0dacd7 7317 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 7318 while (buf < buf_end)
aaaf0b1e 7319 {
433f7f87
KH
7320 Lisp_Object trans;
7321
df7492f9 7322 if (pos == stop)
ec6d2bb8 7323 {
df7492f9
KH
7324 if (pos == end_pos)
7325 break;
ff0dacd7
KH
7326 if (pos == stop_composition)
7327 buf = handle_composition_annotation (pos, end_pos, coding,
7328 buf, &stop_composition);
7329 if (pos == stop_charset)
7330 buf = handle_charset_annotation (pos, end_pos, coding,
7331 buf, &stop_charset);
7332 stop = (stop_composition < stop_charset
7333 ? stop_composition : stop_charset);
df7492f9
KH
7334 }
7335
7336 if (! multibytep)
4776e638 7337 {
d3e4cb56 7338 EMACS_INT bytes;
aaaf0b1e 7339
4d1e6632
KH
7340 if (coding->encoder == encode_coding_raw_text
7341 || coding->encoder == encode_coding_ccl)
ea29edf2
KH
7342 c = *src++, pos++;
7343 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7344 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7345 else
f03caae0 7346 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7347 }
df7492f9 7348 else
db274c7a 7349 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7350 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7351 c = '\n';
7352 if (! EQ (eol_type, Qunix))
aaaf0b1e 7353 {
df7492f9 7354 if (c == '\n')
aaaf0b1e 7355 {
df7492f9
KH
7356 if (EQ (eol_type, Qdos))
7357 *buf++ = '\r';
7358 else
7359 c = '\r';
aaaf0b1e
KH
7360 }
7361 }
433f7f87 7362
e6a54062 7363 trans = Qnil;
09ee6fdd 7364 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7365 if (NILP (trans))
433f7f87
KH
7366 *buf++ = c;
7367 else
7368 {
7369 int from_nchars = 1, to_nchars = 1;
7370 int *lookup_buf_end;
7371 const unsigned char *p = src;
7372 int i;
7373
7374 lookup_buf[0] = c;
7375 for (i = 1; i < max_lookup && p < src_end; i++)
7376 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7377 lookup_buf_end = lookup_buf + i;
e951386e
KH
7378 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7379 if (INTEGERP (trans))
7380 c = XINT (trans);
7381 else if (CONSP (trans))
7382 {
7383 from_nchars = ASIZE (XCAR (trans));
7384 trans = XCDR (trans);
7385 if (INTEGERP (trans))
7386 c = XINT (trans);
7387 else
7388 {
7389 to_nchars = ASIZE (trans);
7390 if (buf + to_nchars > buf_end)
7391 break;
7392 c = XINT (AREF (trans, 0));
7393 }
7394 }
7395 else
433f7f87 7396 break;
e951386e 7397 *buf++ = c;
433f7f87
KH
7398 for (i = 1; i < to_nchars; i++)
7399 *buf++ = XINT (AREF (trans, i));
7400 for (i = 1; i < from_nchars; i++, pos++)
7401 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7402 }
aaaf0b1e 7403 }
ec6d2bb8 7404
df7492f9
KH
7405 coding->consumed = src - coding->source;
7406 coding->consumed_char = pos - coding->src_pos;
7407 coding->charbuf_used = buf - coding->charbuf;
7408 coding->chars_at_source = 0;
aaaf0b1e
KH
7409}
7410
4ed46869 7411
df7492f9
KH
7412/* Encode the text at CODING->src_object into CODING->dst_object.
7413 CODING->src_object is a buffer or a string.
7414 CODING->dst_object is a buffer or nil.
7415
7416 If CODING->src_object is a buffer, it must be the current buffer.
7417 In this case, if CODING->src_pos is positive, it is a position of
7418 the source text in the buffer, otherwise. the source text is in the
7419 gap area of the buffer, and coding->src_pos specifies the offset of
7420 the text from GPT (which must be the same as PT). If this is the
7421 same buffer as CODING->dst_object, CODING->src_pos must be
7422 negative and CODING should not have `pre-write-conversion'.
7423
7424 If CODING->src_object is a string, CODING should not have
7425 `pre-write-conversion'.
7426
7427 If CODING->dst_object is a buffer, the encoded data is inserted at
7428 the current point of that buffer.
7429
7430 If CODING->dst_object is nil, the encoded data is placed at the
7431 memory area specified by CODING->destination. */
7432
7433static int
971de7fb 7434encode_coding (struct coding_system *coding)
4ed46869 7435{
df7492f9 7436 Lisp_Object attrs;
7d64c6ad 7437 Lisp_Object translation_table;
09ee6fdd 7438 int max_lookup;
fb608df3 7439 struct ccl_spec cclspec;
9861e777 7440
df7492f9 7441 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7442 if (coding->encoder == encode_coding_raw_text)
7443 translation_table = Qnil, max_lookup = 0;
7444 else
7445 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7446
df7492f9 7447 if (BUFFERP (coding->dst_object))
8844fa83 7448 {
df7492f9
KH
7449 set_buffer_internal (XBUFFER (coding->dst_object));
7450 coding->dst_multibyte
7451 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 7452 }
4ed46869 7453
b73bfc1c 7454 coding->consumed = coding->consumed_char = 0;
df7492f9 7455 coding->produced = coding->produced_char = 0;
065e3595 7456 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7457 coding->errors = 0;
b73bfc1c 7458
df7492f9 7459 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7460
fb608df3
KH
7461 if (coding->encoder == encode_coding_ccl)
7462 {
7463 coding->spec.ccl = &cclspec;
7464 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7465 }
df7492f9
KH
7466 do {
7467 coding_set_source (coding);
09ee6fdd 7468 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7469 coding_set_destination (coding);
7470 (*(coding->encoder)) (coding);
7471 } while (coding->consumed_char < coding->src_chars);
7472
284201e4 7473 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7474 insert_from_gap (coding->produced_char, coding->produced);
7475
7476 return (coding->result);
ec6d2bb8
KH
7477}
7478
fb88bf2d 7479
24a73b0a
KH
7480/* Name (or base name) of work buffer for code conversion. */
7481static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7482
24a73b0a
KH
7483/* A working buffer used by the top level conversion. Once it is
7484 created, it is never destroyed. It has the name
7485 Vcode_conversion_workbuf_name. The other working buffers are
7486 destroyed after the use is finished, and their names are modified
7487 versions of Vcode_conversion_workbuf_name. */
7488static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7489
24a73b0a
KH
7490/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7491static int reused_workbuf_in_use;
4ed46869 7492
24a73b0a
KH
7493
7494/* Return a working buffer of code convesion. MULTIBYTE specifies the
7495 multibyteness of returning buffer. */
b73bfc1c 7496
f6cbaf43 7497static Lisp_Object
971de7fb 7498make_conversion_work_buffer (int multibyte)
df7492f9 7499{
24a73b0a
KH
7500 Lisp_Object name, workbuf;
7501 struct buffer *current;
4ed46869 7502
24a73b0a 7503 if (reused_workbuf_in_use++)
065e3595
KH
7504 {
7505 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7506 workbuf = Fget_buffer_create (name);
7507 }
df7492f9 7508 else
065e3595 7509 {
159bd5a2 7510 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7511 Vcode_conversion_reused_workbuf
7512 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7513 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7514 }
24a73b0a
KH
7515 current = current_buffer;
7516 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7517 /* We can't allow modification hooks to run in the work buffer. For
7518 instance, directory_files_internal assumes that file decoding
7519 doesn't compile new regexps. */
7520 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7521 Ferase_buffer ();
df7492f9 7522 current_buffer->undo_list = Qt;
24a73b0a 7523 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 7524 set_buffer_internal (current);
24a73b0a 7525 return workbuf;
df7492f9 7526}
d46c5b12 7527
24a73b0a 7528
4776e638 7529static Lisp_Object
971de7fb 7530code_conversion_restore (Lisp_Object arg)
4776e638 7531{
24a73b0a 7532 Lisp_Object current, workbuf;
948bdcf3 7533 struct gcpro gcpro1;
24a73b0a 7534
948bdcf3 7535 GCPRO1 (arg);
24a73b0a
KH
7536 current = XCAR (arg);
7537 workbuf = XCDR (arg);
7538 if (! NILP (workbuf))
7539 {
7540 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7541 reused_workbuf_in_use = 0;
7542 else if (! NILP (Fbuffer_live_p (workbuf)))
7543 Fkill_buffer (workbuf);
7544 }
7545 set_buffer_internal (XBUFFER (current));
948bdcf3 7546 UNGCPRO;
4776e638
KH
7547 return Qnil;
7548}
b73bfc1c 7549
24a73b0a 7550Lisp_Object
971de7fb 7551code_conversion_save (int with_work_buf, int multibyte)
df7492f9 7552{
24a73b0a 7553 Lisp_Object workbuf = Qnil;
b73bfc1c 7554
4776e638 7555 if (with_work_buf)
24a73b0a
KH
7556 workbuf = make_conversion_work_buffer (multibyte);
7557 record_unwind_protect (code_conversion_restore,
7558 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7559 return workbuf;
df7492f9 7560}
d46c5b12 7561
df7492f9 7562int
971de7fb 7563decode_coding_gap (struct coding_system *coding, EMACS_INT chars, EMACS_INT bytes)
df7492f9
KH
7564{
7565 int count = specpdl_ptr - specpdl;
5e5c78be 7566 Lisp_Object attrs;
fb88bf2d 7567
24a73b0a 7568 code_conversion_save (0, 0);
ec6d2bb8 7569
24a73b0a 7570 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7571 coding->src_chars = chars;
7572 coding->src_bytes = bytes;
7573 coding->src_pos = -chars;
7574 coding->src_pos_byte = -bytes;
7575 coding->src_multibyte = chars < bytes;
24a73b0a 7576 coding->dst_object = coding->src_object;
df7492f9
KH
7577 coding->dst_pos = PT;
7578 coding->dst_pos_byte = PT_BYTE;
71c81426 7579 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 7580
df7492f9
KH
7581 if (CODING_REQUIRE_DETECTION (coding))
7582 detect_coding (coding);
8f924df7 7583
9286b333 7584 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7585 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7586 decode_coding (coding);
287c57d7 7587 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7588
5e5c78be
KH
7589 attrs = CODING_ID_ATTRS (coding->id);
7590 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7591 {
5e5c78be
KH
7592 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7593 Lisp_Object val;
7594
7595 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7596 val = call1 (CODING_ATTR_POST_READ (attrs),
7597 make_number (coding->produced_char));
5e5c78be
KH
7598 CHECK_NATNUM (val);
7599 coding->produced_char += Z - prev_Z;
7600 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7601 }
4ed46869 7602
df7492f9 7603 unbind_to (count, Qnil);
b73bfc1c
KH
7604 return coding->result;
7605}
52d41803 7606
4ed46869 7607int
971de7fb 7608encode_coding_gap (struct coding_system *coding, EMACS_INT chars, EMACS_INT bytes)
4ed46869 7609{
df7492f9 7610 int count = specpdl_ptr - specpdl;
4ed46869 7611
24a73b0a 7612 code_conversion_save (0, 0);
4ed46869 7613
24a73b0a 7614 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7615 coding->src_chars = chars;
7616 coding->src_bytes = bytes;
7617 coding->src_pos = -chars;
7618 coding->src_pos_byte = -bytes;
7619 coding->src_multibyte = chars < bytes;
7620 coding->dst_object = coding->src_object;
7621 coding->dst_pos = PT;
7622 coding->dst_pos_byte = PT_BYTE;
4ed46869 7623
df7492f9 7624 encode_coding (coding);
b73bfc1c 7625
df7492f9
KH
7626 unbind_to (count, Qnil);
7627 return coding->result;
7628}
4ed46869 7629
d46c5b12 7630
df7492f9
KH
7631/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7632 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7633
df7492f9 7634 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7635
df7492f9
KH
7636 If it is a buffer, the text is at point of the buffer. FROM and TO
7637 are positions in the buffer.
b73bfc1c 7638
df7492f9
KH
7639 If it is a string, the text is at the beginning of the string.
7640 FROM and TO are indices to the string.
4ed46869 7641
df7492f9
KH
7642 If it is nil, the text is at coding->source. FROM and TO are
7643 indices to coding->source.
bb10be8b 7644
df7492f9 7645 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7646
df7492f9
KH
7647 If it is a buffer, the decoded text is inserted at point of the
7648 buffer. If the buffer is the same as SRC_OBJECT, the source text
7649 is deleted.
4ed46869 7650
df7492f9
KH
7651 If it is Qt, a string is made from the decoded text, and
7652 set in CODING->dst_object.
d46c5b12 7653
df7492f9 7654 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7655 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7656 CODING->destination by xmalloc. If the decoded text is longer than
7657 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7658 */
d46c5b12 7659
df7492f9
KH
7660void
7661decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7662 dst_object)
d46c5b12 7663 struct coding_system *coding;
df7492f9
KH
7664 Lisp_Object src_object;
7665 EMACS_INT from, from_byte, to, to_byte;
7666 Lisp_Object dst_object;
d46c5b12 7667{
df7492f9
KH
7668 int count = specpdl_ptr - specpdl;
7669 unsigned char *destination;
7670 EMACS_INT dst_bytes;
7671 EMACS_INT chars = to - from;
7672 EMACS_INT bytes = to_byte - from_byte;
7673 Lisp_Object attrs;
4776e638 7674 int saved_pt = -1, saved_pt_byte;
64cedb0c 7675 int need_marker_adjustment = 0;
b3bfad50 7676 Lisp_Object old_deactivate_mark;
d46c5b12 7677
b3bfad50 7678 old_deactivate_mark = Vdeactivate_mark;
93dec019 7679
df7492f9 7680 if (NILP (dst_object))
d46c5b12 7681 {
df7492f9
KH
7682 destination = coding->destination;
7683 dst_bytes = coding->dst_bytes;
d46c5b12 7684 }
93dec019 7685
df7492f9
KH
7686 coding->src_object = src_object;
7687 coding->src_chars = chars;
7688 coding->src_bytes = bytes;
7689 coding->src_multibyte = chars < bytes;
70ad9fc4 7690
df7492f9 7691 if (STRINGP (src_object))
d46c5b12 7692 {
df7492f9
KH
7693 coding->src_pos = from;
7694 coding->src_pos_byte = from_byte;
d46c5b12 7695 }
df7492f9 7696 else if (BUFFERP (src_object))
88993dfd 7697 {
df7492f9
KH
7698 set_buffer_internal (XBUFFER (src_object));
7699 if (from != GPT)
7700 move_gap_both (from, from_byte);
7701 if (EQ (src_object, dst_object))
fb88bf2d 7702 {
64cedb0c
KH
7703 struct Lisp_Marker *tail;
7704
7705 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7706 {
7707 tail->need_adjustment
7708 = tail->charpos == (tail->insertion_type ? from : to);
7709 need_marker_adjustment |= tail->need_adjustment;
7710 }
4776e638 7711 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7712 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7713 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7714 del_range_both (from, from_byte, to, to_byte, 1);
7715 coding->src_pos = -chars;
7716 coding->src_pos_byte = -bytes;
fb88bf2d 7717 }
df7492f9 7718 else
fb88bf2d 7719 {
df7492f9
KH
7720 coding->src_pos = from;
7721 coding->src_pos_byte = from_byte;
fb88bf2d 7722 }
88993dfd
KH
7723 }
7724
df7492f9
KH
7725 if (CODING_REQUIRE_DETECTION (coding))
7726 detect_coding (coding);
7727 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7728
2cb26057
KH
7729 if (EQ (dst_object, Qt)
7730 || (! NILP (CODING_ATTR_POST_READ (attrs))
7731 && NILP (dst_object)))
b73bfc1c 7732 {
a1567c45
SM
7733 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7734 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7735 coding->dst_pos = BEG;
7736 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7737 }
df7492f9 7738 else if (BUFFERP (dst_object))
d46c5b12 7739 {
24a73b0a 7740 code_conversion_save (0, 0);
df7492f9
KH
7741 coding->dst_object = dst_object;
7742 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7743 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7744 coding->dst_multibyte
7745 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7746 }
7747 else
7748 {
24a73b0a 7749 code_conversion_save (0, 0);
df7492f9 7750 coding->dst_object = Qnil;
0154725e
SM
7751 /* Most callers presume this will return a multibyte result, and they
7752 won't use `binary' or `raw-text' anyway, so let's not worry about
7753 CODING_FOR_UNIBYTE. */
bb555731 7754 coding->dst_multibyte = 1;
d46c5b12
KH
7755 }
7756
df7492f9 7757 decode_coding (coding);
fa46990e 7758
df7492f9
KH
7759 if (BUFFERP (coding->dst_object))
7760 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7761
df7492f9 7762 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7763 {
b3bfad50 7764 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7765 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7766 Lisp_Object val;
d46c5b12 7767
c0cc7f7f 7768 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7769 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7770 old_deactivate_mark);
d4850d67
KH
7771 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7772 make_number (coding->produced_char));
df7492f9
KH
7773 UNGCPRO;
7774 CHECK_NATNUM (val);
7775 coding->produced_char += Z - prev_Z;
7776 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7777 }
de79a6a5 7778
df7492f9 7779 if (EQ (dst_object, Qt))
ec6d2bb8 7780 {
df7492f9
KH
7781 coding->dst_object = Fbuffer_string ();
7782 }
7783 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7784 {
7785 set_buffer_internal (XBUFFER (coding->dst_object));
7786 if (dst_bytes < coding->produced)
7787 {
b3bfad50 7788 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7789 if (! destination)
7790 {
065e3595 7791 record_conversion_result (coding,
ebaf11b6 7792 CODING_RESULT_INSUFFICIENT_MEM);
df7492f9
KH
7793 unbind_to (count, Qnil);
7794 return;
7795 }
7796 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7797 move_gap_both (BEGV, BEGV_BYTE);
7798 bcopy (BEGV_ADDR, destination, coding->produced);
7799 coding->destination = destination;
d46c5b12 7800 }
ec6d2bb8 7801 }
b73bfc1c 7802
4776e638
KH
7803 if (saved_pt >= 0)
7804 {
7805 /* This is the case of:
7806 (BUFFERP (src_object) && EQ (src_object, dst_object))
7807 As we have moved PT while replacing the original buffer
7808 contents, we must recover it now. */
7809 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7810 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7811 if (saved_pt < from)
7812 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7813 else if (saved_pt < from + chars)
7814 TEMP_SET_PT_BOTH (from, from_byte);
7815 else if (! NILP (current_buffer->enable_multibyte_characters))
7816 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7817 saved_pt_byte + (coding->produced - bytes));
7818 else
7819 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7820 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7821
7822 if (need_marker_adjustment)
7823 {
7824 struct Lisp_Marker *tail;
7825
7826 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7827 if (tail->need_adjustment)
7828 {
7829 tail->need_adjustment = 0;
7830 if (tail->insertion_type)
7831 {
7832 tail->bytepos = from_byte;
7833 tail->charpos = from;
7834 }
7835 else
7836 {
7837 tail->bytepos = from_byte + coding->produced;
7838 tail->charpos
7839 = (NILP (current_buffer->enable_multibyte_characters)
7840 ? tail->bytepos : from + coding->produced_char);
7841 }
7842 }
7843 }
d46c5b12 7844 }
4776e638 7845
b3bfad50 7846 Vdeactivate_mark = old_deactivate_mark;
065e3595 7847 unbind_to (count, coding->dst_object);
d46c5b12
KH
7848}
7849
d46c5b12 7850
df7492f9
KH
7851void
7852encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7853 dst_object)
d46c5b12 7854 struct coding_system *coding;
df7492f9
KH
7855 Lisp_Object src_object;
7856 EMACS_INT from, from_byte, to, to_byte;
7857 Lisp_Object dst_object;
d46c5b12 7858{
b73bfc1c 7859 int count = specpdl_ptr - specpdl;
df7492f9
KH
7860 EMACS_INT chars = to - from;
7861 EMACS_INT bytes = to_byte - from_byte;
7862 Lisp_Object attrs;
4776e638 7863 int saved_pt = -1, saved_pt_byte;
64cedb0c 7864 int need_marker_adjustment = 0;
c02d943b 7865 int kill_src_buffer = 0;
b3bfad50 7866 Lisp_Object old_deactivate_mark;
df7492f9 7867
b3bfad50 7868 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7869
7870 coding->src_object = src_object;
7871 coding->src_chars = chars;
7872 coding->src_bytes = bytes;
7873 coding->src_multibyte = chars < bytes;
7874
7875 attrs = CODING_ID_ATTRS (coding->id);
7876
64cedb0c
KH
7877 if (EQ (src_object, dst_object))
7878 {
7879 struct Lisp_Marker *tail;
7880
7881 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7882 {
7883 tail->need_adjustment
7884 = tail->charpos == (tail->insertion_type ? from : to);
7885 need_marker_adjustment |= tail->need_adjustment;
7886 }
7887 }
7888
df7492f9 7889 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7890 {
24a73b0a 7891 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7892 set_buffer_internal (XBUFFER (coding->src_object));
7893 if (STRINGP (src_object))
7894 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7895 else if (BUFFERP (src_object))
7896 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7897 else
7898 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7899
df7492f9
KH
7900 if (EQ (src_object, dst_object))
7901 {
7902 set_buffer_internal (XBUFFER (src_object));
4776e638 7903 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7904 del_range_both (from, from_byte, to, to_byte, 1);
7905 set_buffer_internal (XBUFFER (coding->src_object));
7906 }
7907
d4850d67
KH
7908 {
7909 Lisp_Object args[3];
b3bfad50 7910 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7911
b3bfad50
KH
7912 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7913 old_deactivate_mark);
d4850d67
KH
7914 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7915 args[1] = make_number (BEG);
7916 args[2] = make_number (Z);
7917 safe_call (3, args);
b3bfad50 7918 UNGCPRO;
d4850d67 7919 }
c02d943b
KH
7920 if (XBUFFER (coding->src_object) != current_buffer)
7921 kill_src_buffer = 1;
ac87bbef 7922 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7923 if (BEG != GPT)
7924 move_gap_both (BEG, BEG_BYTE);
7925 coding->src_chars = Z - BEG;
7926 coding->src_bytes = Z_BYTE - BEG_BYTE;
7927 coding->src_pos = BEG;
7928 coding->src_pos_byte = BEG_BYTE;
7929 coding->src_multibyte = Z < Z_BYTE;
7930 }
7931 else if (STRINGP (src_object))
d46c5b12 7932 {
24a73b0a 7933 code_conversion_save (0, 0);
df7492f9
KH
7934 coding->src_pos = from;
7935 coding->src_pos_byte = from_byte;
b73bfc1c 7936 }
df7492f9 7937 else if (BUFFERP (src_object))
b73bfc1c 7938 {
24a73b0a 7939 code_conversion_save (0, 0);
df7492f9 7940 set_buffer_internal (XBUFFER (src_object));
df7492f9 7941 if (EQ (src_object, dst_object))
d46c5b12 7942 {
4776e638 7943 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7944 coding->src_object = del_range_1 (from, to, 1, 1);
7945 coding->src_pos = 0;
7946 coding->src_pos_byte = 0;
d46c5b12 7947 }
df7492f9 7948 else
d46c5b12 7949 {
ff0dacd7
KH
7950 if (from < GPT && to >= GPT)
7951 move_gap_both (from, from_byte);
df7492f9
KH
7952 coding->src_pos = from;
7953 coding->src_pos_byte = from_byte;
d46c5b12 7954 }
d46c5b12 7955 }
4776e638 7956 else
24a73b0a 7957 code_conversion_save (0, 0);
d46c5b12 7958
df7492f9 7959 if (BUFFERP (dst_object))
88993dfd 7960 {
df7492f9 7961 coding->dst_object = dst_object;
28f67a95
KH
7962 if (EQ (src_object, dst_object))
7963 {
7964 coding->dst_pos = from;
7965 coding->dst_pos_byte = from_byte;
7966 }
7967 else
7968 {
319a3947
KH
7969 struct buffer *current = current_buffer;
7970
7971 set_buffer_temp (XBUFFER (dst_object));
7972 coding->dst_pos = PT;
7973 coding->dst_pos_byte = PT_BYTE;
7974 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7975 set_buffer_temp (current);
28f67a95 7976 }
df7492f9
KH
7977 coding->dst_multibyte
7978 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7979 }
df7492f9 7980 else if (EQ (dst_object, Qt))
d46c5b12 7981 {
df7492f9 7982 coding->dst_object = Qnil;
df7492f9 7983 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7984 if (coding->dst_bytes == 0)
7985 coding->dst_bytes = 1;
7986 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7987 coding->dst_multibyte = 0;
d46c5b12
KH
7988 }
7989 else
7990 {
df7492f9
KH
7991 coding->dst_object = Qnil;
7992 coding->dst_multibyte = 0;
d46c5b12
KH
7993 }
7994
df7492f9 7995 encode_coding (coding);
d46c5b12 7996
df7492f9 7997 if (EQ (dst_object, Qt))
d46c5b12 7998 {
df7492f9
KH
7999 if (BUFFERP (coding->dst_object))
8000 coding->dst_object = Fbuffer_string ();
8001 else
d46c5b12 8002 {
df7492f9
KH
8003 coding->dst_object
8004 = make_unibyte_string ((char *) coding->destination,
8005 coding->produced);
8006 xfree (coding->destination);
d46c5b12 8007 }
4ed46869 8008 }
d46c5b12 8009
4776e638
KH
8010 if (saved_pt >= 0)
8011 {
8012 /* This is the case of:
8013 (BUFFERP (src_object) && EQ (src_object, dst_object))
8014 As we have moved PT while replacing the original buffer
8015 contents, we must recover it now. */
8016 set_buffer_internal (XBUFFER (src_object));
8017 if (saved_pt < from)
8018 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8019 else if (saved_pt < from + chars)
8020 TEMP_SET_PT_BOTH (from, from_byte);
8021 else if (! NILP (current_buffer->enable_multibyte_characters))
8022 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8023 saved_pt_byte + (coding->produced - bytes));
d46c5b12 8024 else
4776e638
KH
8025 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8026 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
8027
8028 if (need_marker_adjustment)
8029 {
8030 struct Lisp_Marker *tail;
8031
8032 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8033 if (tail->need_adjustment)
8034 {
8035 tail->need_adjustment = 0;
8036 if (tail->insertion_type)
8037 {
8038 tail->bytepos = from_byte;
8039 tail->charpos = from;
8040 }
8041 else
8042 {
8043 tail->bytepos = from_byte + coding->produced;
8044 tail->charpos
8045 = (NILP (current_buffer->enable_multibyte_characters)
8046 ? tail->bytepos : from + coding->produced_char);
8047 }
8048 }
8049 }
4776e638
KH
8050 }
8051
c02d943b
KH
8052 if (kill_src_buffer)
8053 Fkill_buffer (coding->src_object);
b3bfad50
KH
8054
8055 Vdeactivate_mark = old_deactivate_mark;
df7492f9 8056 unbind_to (count, Qnil);
b73bfc1c
KH
8057}
8058
df7492f9 8059
b73bfc1c 8060Lisp_Object
971de7fb 8061preferred_coding_system (void)
b73bfc1c 8062{
df7492f9 8063 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 8064
df7492f9 8065 return CODING_ID_NAME (id);
4ed46869
KH
8066}
8067
8068\f
8069#ifdef emacs
1397dc18 8070/*** 8. Emacs Lisp library functions ***/
4ed46869 8071
4ed46869 8072DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 8073 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 8074See the documentation of `define-coding-system' for information
48b0f3ae 8075about coding-system objects. */)
d4a1d553
JB
8076 (object)
8077 Lisp_Object object;
4ed46869 8078{
d4a1d553
JB
8079 if (NILP (object)
8080 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 8081 return Qt;
d4a1d553
JB
8082 if (! SYMBOLP (object)
8083 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
8084 return Qnil;
8085 return Qt;
4ed46869
KH
8086}
8087
9d991de8
RS
8088DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8089 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
8090 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
8091 (prompt)
4ed46869
KH
8092 Lisp_Object prompt;
8093{
e0e989f6 8094 Lisp_Object val;
9d991de8
RS
8095 do
8096 {
4608c386
KH
8097 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8098 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 8099 }
8f924df7 8100 while (SCHARS (val) == 0);
e0e989f6 8101 return (Fintern (val, Qnil));
4ed46869
KH
8102}
8103
9b787f3e 8104DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 8105 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
8106If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8107Ignores case when completing coding systems (all Emacs coding systems
8108are lower-case). */)
48b0f3ae 8109 (prompt, default_coding_system)
9b787f3e 8110 Lisp_Object prompt, default_coding_system;
4ed46869 8111{
f44d27ce 8112 Lisp_Object val;
c7183fb8
GM
8113 int count = SPECPDL_INDEX ();
8114
9b787f3e 8115 if (SYMBOLP (default_coding_system))
57d25e6f 8116 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 8117 specbind (Qcompletion_ignore_case, Qt);
4608c386 8118 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
8119 Qt, Qnil, Qcoding_system_history,
8120 default_coding_system, Qnil);
c7183fb8 8121 unbind_to (count, Qnil);
8f924df7 8122 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
8123}
8124
8125DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8126 1, 1, 0,
48b0f3ae 8127 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
8128If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8129It is valid if it is nil or a symbol defined as a coding system by the
8130function `define-coding-system'. */)
df7492f9 8131 (coding_system)
4ed46869
KH
8132 Lisp_Object coding_system;
8133{
44e8490d
KH
8134 Lisp_Object define_form;
8135
8136 define_form = Fget (coding_system, Qcoding_system_define_form);
8137 if (! NILP (define_form))
8138 {
8139 Fput (coding_system, Qcoding_system_define_form, Qnil);
8140 safe_eval (define_form);
8141 }
4ed46869
KH
8142 if (!NILP (Fcoding_system_p (coding_system)))
8143 return coding_system;
fcad4ec4 8144 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 8145}
df7492f9 8146
3a73fa5d 8147\f
89528eb3
KH
8148/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8149 HIGHEST is nonzero, return the coding system of the highest
8150 priority among the detected coding systems. Otherwize return a
8151 list of detected coding systems sorted by their priorities. If
8152 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8153 multibyte form but contains only ASCII and eight-bit chars.
8154 Otherwise, the bytes are raw bytes.
8155
8156 CODING-SYSTEM controls the detection as below:
8157
8158 If it is nil, detect both text-format and eol-format. If the
8159 text-format part of CODING-SYSTEM is already specified
8160 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
8161 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8162 detect only text-format. */
8163
d46c5b12 8164Lisp_Object
24a73b0a
KH
8165detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8166 coding_system)
8f924df7 8167 const unsigned char *src;
13818c30
SM
8168 EMACS_INT src_chars, src_bytes;
8169 int highest;
0a28aafb 8170 int multibytep;
df7492f9 8171 Lisp_Object coding_system;
4ed46869 8172{
8f924df7 8173 const unsigned char *src_end = src + src_bytes;
df7492f9 8174 Lisp_Object attrs, eol_type;
4533845d 8175 Lisp_Object val = Qnil;
df7492f9 8176 struct coding_system coding;
89528eb3 8177 int id;
ff0dacd7 8178 struct coding_detection_info detect_info;
24a73b0a 8179 enum coding_category base_category;
2f3cbb32 8180 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 8181
df7492f9
KH
8182 if (NILP (coding_system))
8183 coding_system = Qundecided;
8184 setup_coding_system (coding_system, &coding);
8185 attrs = CODING_ID_ATTRS (coding.id);
8186 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 8187 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 8188
df7492f9 8189 coding.source = src;
24a73b0a 8190 coding.src_chars = src_chars;
df7492f9
KH
8191 coding.src_bytes = src_bytes;
8192 coding.src_multibyte = multibytep;
8193 coding.consumed = 0;
89528eb3 8194 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 8195 coding.head_ascii = 0;
d46c5b12 8196
ff0dacd7 8197 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 8198
89528eb3 8199 /* At first, detect text-format if necessary. */
24a73b0a
KH
8200 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8201 if (base_category == coding_category_undecided)
4ed46869 8202 {
ff0dacd7
KH
8203 enum coding_category category;
8204 struct coding_system *this;
8205 int c, i;
88993dfd 8206
24a73b0a 8207 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 8208 for (; src < src_end; src++)
4ed46869 8209 {
df7492f9 8210 c = *src;
6cb21a4f 8211 if (c & 0x80)
6cb21a4f 8212 {
2f3cbb32 8213 eight_bit_found = 1;
2f3cbb32
KH
8214 if (null_byte_found)
8215 break;
8216 }
c0e16b14 8217 else if (c < 0x20)
2f3cbb32
KH
8218 {
8219 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8220 && ! inhibit_iso_escape_detection
8221 && ! detect_info.checked)
6cb21a4f 8222 {
2f3cbb32
KH
8223 if (detect_coding_iso_2022 (&coding, &detect_info))
8224 {
8225 /* We have scanned the whole data. */
8226 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
8227 {
8228 /* We didn't find an 8-bit code. We may
8229 have found a null-byte, but it's very
8230 rare that a binary file confirm to
8231 ISO-2022. */
8232 src = src_end;
8233 coding.head_ascii = src - coding.source;
8234 }
8235 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
8236 break;
8237 }
8238 }
97b1b294 8239 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
8240 {
8241 null_byte_found = 1;
8242 if (eight_bit_found)
8243 break;
6cb21a4f 8244 }
c006c0c8
KH
8245 if (! eight_bit_found)
8246 coding.head_ascii++;
6cb21a4f 8247 }
c006c0c8 8248 else if (! eight_bit_found)
c0e16b14 8249 coding.head_ascii++;
4ed46869 8250 }
88993dfd 8251
2f3cbb32
KH
8252 if (null_byte_found || eight_bit_found
8253 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
8254 || detect_info.found)
8255 {
2f3cbb32 8256 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
8257 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
8258 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 8259 {
6cb21a4f 8260 category = coding_priorities[i];
c7266f4a 8261 this = coding_categories + category;
6cb21a4f 8262 if (detect_info.found & (1 << category))
ff0dacd7
KH
8263 break;
8264 }
6cb21a4f 8265 else
2f3cbb32
KH
8266 {
8267 if (null_byte_found)
8268 {
8269 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8270 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8271 }
8272 for (i = 0; i < coding_category_raw_text; i++)
8273 {
8274 category = coding_priorities[i];
8275 this = coding_categories + category;
6cb21a4f 8276
2f3cbb32
KH
8277 if (this->id < 0)
8278 {
8279 /* No coding system of this category is defined. */
8280 detect_info.rejected |= (1 << category);
8281 }
8282 else if (category >= coding_category_raw_text)
8283 continue;
8284 else if (detect_info.checked & (1 << category))
8285 {
8286 if (highest
8287 && (detect_info.found & (1 << category)))
6cb21a4f 8288 break;
2f3cbb32
KH
8289 }
8290 else if ((*(this->detector)) (&coding, &detect_info)
8291 && highest
8292 && (detect_info.found & (1 << category)))
8293 {
8294 if (category == coding_category_utf_16_auto)
8295 {
8296 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8297 category = coding_category_utf_16_le;
8298 else
8299 category = coding_category_utf_16_be;
8300 }
8301 break;
8302 }
8303 }
8304 }
6cb21a4f 8305 }
ec6d2bb8 8306
4cddb209
KH
8307 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8308 || null_byte_found)
ec6d2bb8 8309 {
ff0dacd7 8310 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 8311 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
8312 val = Fcons (make_number (id), Qnil);
8313 }
ff0dacd7 8314 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 8315 {
ff0dacd7 8316 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
8317 id = coding_categories[coding_category_undecided].id;
8318 val = Fcons (make_number (id), Qnil);
8319 }
8320 else if (highest)
8321 {
ff0dacd7 8322 if (detect_info.found)
ec6d2bb8 8323 {
ff0dacd7
KH
8324 detect_info.found = 1 << category;
8325 val = Fcons (make_number (this->id), Qnil);
8326 }
8327 else
8328 for (i = 0; i < coding_category_raw_text; i++)
8329 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8330 {
8331 detect_info.found = 1 << coding_priorities[i];
8332 id = coding_categories[coding_priorities[i]].id;
8333 val = Fcons (make_number (id), Qnil);
8334 break;
8335 }
8336 }
89528eb3
KH
8337 else
8338 {
ff0dacd7
KH
8339 int mask = detect_info.rejected | detect_info.found;
8340 int found = 0;
ec6d2bb8 8341
89528eb3 8342 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8343 {
8344 category = coding_priorities[i];
8345 if (! (mask & (1 << category)))
ec6d2bb8 8346 {
ff0dacd7
KH
8347 found |= 1 << category;
8348 id = coding_categories[category].id;
c7266f4a
KH
8349 if (id >= 0)
8350 val = Fcons (make_number (id), val);
ff0dacd7
KH
8351 }
8352 }
8353 for (i = coding_category_raw_text - 1; i >= 0; i--)
8354 {
8355 category = coding_priorities[i];
8356 if (detect_info.found & (1 << category))
8357 {
8358 id = coding_categories[category].id;
8359 val = Fcons (make_number (id), val);
ec6d2bb8 8360 }
ec6d2bb8 8361 }
ff0dacd7 8362 detect_info.found |= found;
ec6d2bb8 8363 }
ec6d2bb8 8364 }
a470d443
KH
8365 else if (base_category == coding_category_utf_8_auto)
8366 {
8367 if (detect_coding_utf_8 (&coding, &detect_info))
8368 {
8369 struct coding_system *this;
8370
8371 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8372 this = coding_categories + coding_category_utf_8_sig;
8373 else
8374 this = coding_categories + coding_category_utf_8_nosig;
8375 val = Fcons (make_number (this->id), Qnil);
8376 }
8377 }
24a73b0a
KH
8378 else if (base_category == coding_category_utf_16_auto)
8379 {
8380 if (detect_coding_utf_16 (&coding, &detect_info))
8381 {
24a73b0a
KH
8382 struct coding_system *this;
8383
8384 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8385 this = coding_categories + coding_category_utf_16_le;
8386 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8387 this = coding_categories + coding_category_utf_16_be;
8388 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8389 this = coding_categories + coding_category_utf_16_be_nosig;
8390 else
8391 this = coding_categories + coding_category_utf_16_le_nosig;
8392 val = Fcons (make_number (this->id), Qnil);
8393 }
8394 }
df7492f9
KH
8395 else
8396 {
ff0dacd7 8397 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8398 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8399 }
df7492f9 8400
89528eb3 8401 /* Then, detect eol-format if necessary. */
df7492f9 8402 {
4533845d 8403 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8404 Lisp_Object tail;
8405
89528eb3
KH
8406 if (VECTORP (eol_type))
8407 {
ff0dacd7 8408 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8409 {
8410 if (null_byte_found)
8411 normal_eol = EOL_SEEN_LF;
8412 else
8413 normal_eol = detect_eol (coding.source, src_bytes,
8414 coding_category_raw_text);
8415 }
ff0dacd7
KH
8416 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8417 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8418 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8419 coding_category_utf_16_be);
ff0dacd7
KH
8420 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8421 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8422 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8423 coding_category_utf_16_le);
8424 }
8425 else
8426 {
8427 if (EQ (eol_type, Qunix))
8428 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8429 else if (EQ (eol_type, Qdos))
8430 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8431 else
8432 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8433 }
8434
df7492f9
KH
8435 for (tail = val; CONSP (tail); tail = XCDR (tail))
8436 {
89528eb3 8437 enum coding_category category;
df7492f9 8438 int this_eol;
89528eb3
KH
8439
8440 id = XINT (XCAR (tail));
8441 attrs = CODING_ID_ATTRS (id);
8442 category = XINT (CODING_ATTR_CATEGORY (attrs));
8443 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8444 if (VECTORP (eol_type))
8445 {
89528eb3
KH
8446 if (category == coding_category_utf_16_be
8447 || category == coding_category_utf_16_be_nosig)
8448 this_eol = utf_16_be_eol;
8449 else if (category == coding_category_utf_16_le
8450 || category == coding_category_utf_16_le_nosig)
8451 this_eol = utf_16_le_eol;
df7492f9 8452 else
89528eb3
KH
8453 this_eol = normal_eol;
8454
df7492f9
KH
8455 if (this_eol == EOL_SEEN_LF)
8456 XSETCAR (tail, AREF (eol_type, 0));
8457 else if (this_eol == EOL_SEEN_CRLF)
8458 XSETCAR (tail, AREF (eol_type, 1));
8459 else if (this_eol == EOL_SEEN_CR)
8460 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8461 else
8462 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8463 }
89528eb3
KH
8464 else
8465 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8466 }
8467 }
ec6d2bb8 8468
4533845d 8469 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8470}
8471
ec6d2bb8 8472
d46c5b12
KH
8473DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8474 2, 3, 0,
48b0f3ae
PJ
8475 doc: /* Detect coding system of the text in the region between START and END.
8476Return a list of possible coding systems ordered by priority.
b811c52b
KH
8477The coding systems to try and their priorities follows what
8478the function `coding-system-priority-list' (which see) returns.
ec6d2bb8 8479
12e0131a 8480If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8481characters as ESC), it returns a list of single element `undecided'
8482or its subsidiary coding system according to a detected end-of-line
8483format.
ec6d2bb8 8484
48b0f3ae
PJ
8485If optional argument HIGHEST is non-nil, return the coding system of
8486highest priority. */)
8487 (start, end, highest)
d46c5b12
KH
8488 Lisp_Object start, end, highest;
8489{
8490 int from, to;
8491 int from_byte, to_byte;
ec6d2bb8 8492
b7826503
PJ
8493 CHECK_NUMBER_COERCE_MARKER (start);
8494 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8495
d46c5b12
KH
8496 validate_region (&start, &end);
8497 from = XINT (start), to = XINT (end);
8498 from_byte = CHAR_TO_BYTE (from);
8499 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8500
d46c5b12
KH
8501 if (from < GPT && to >= GPT)
8502 move_gap_both (to, to_byte);
c210f766 8503
d46c5b12 8504 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8505 to - from, to_byte - from_byte,
0a28aafb
KH
8506 !NILP (highest),
8507 !NILP (current_buffer
df7492f9
KH
8508 ->enable_multibyte_characters),
8509 Qnil);
ec6d2bb8
KH
8510}
8511
d46c5b12
KH
8512DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8513 1, 2, 0,
48b0f3ae
PJ
8514 doc: /* Detect coding system of the text in STRING.
8515Return a list of possible coding systems ordered by priority.
67ceab9d
KH
8516The coding systems to try and their priorities follows what
8517the function `coding-system-priority-list' (which see) returns.
fb88bf2d 8518
12e0131a 8519If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8520characters as ESC), it returns a list of single element `undecided'
8521or its subsidiary coding system according to a detected end-of-line
8522format.
d46c5b12 8523
48b0f3ae
PJ
8524If optional argument HIGHEST is non-nil, return the coding system of
8525highest priority. */)
8526 (string, highest)
d46c5b12
KH
8527 Lisp_Object string, highest;
8528{
b7826503 8529 CHECK_STRING (string);
b73bfc1c 8530
24a73b0a
KH
8531 return detect_coding_system (SDATA (string),
8532 SCHARS (string), SBYTES (string),
8f924df7 8533 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8534 Qnil);
4ed46869 8535}
4ed46869 8536
b73bfc1c 8537
df7492f9 8538static INLINE int
971de7fb 8539char_encodable_p (int c, Lisp_Object attrs)
05e6f5dc 8540{
df7492f9 8541 Lisp_Object tail;
df7492f9 8542 struct charset *charset;
7d64c6ad 8543 Lisp_Object translation_table;
d46c5b12 8544
7d64c6ad 8545 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8546 if (! NILP (translation_table))
7d64c6ad 8547 c = translate_char (translation_table, c);
df7492f9
KH
8548 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8549 CONSP (tail); tail = XCDR (tail))
e133c8fa 8550 {
df7492f9
KH
8551 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8552 if (CHAR_CHARSET_P (c, charset))
8553 break;
e133c8fa 8554 }
df7492f9 8555 return (! NILP (tail));
05e6f5dc 8556}
83fa074f 8557
fb88bf2d 8558
df7492f9
KH
8559/* Return a list of coding systems that safely encode the text between
8560 START and END. If EXCLUDE is non-nil, it is a list of coding
8561 systems not to check. The returned list doesn't contain any such
48468dac 8562 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8563 unibyte, return t. */
e077cc80 8564
df7492f9
KH
8565DEFUN ("find-coding-systems-region-internal",
8566 Ffind_coding_systems_region_internal,
8567 Sfind_coding_systems_region_internal, 2, 3, 0,
8568 doc: /* Internal use only. */)
8569 (start, end, exclude)
8570 Lisp_Object start, end, exclude;
8571{
8572 Lisp_Object coding_attrs_list, safe_codings;
8573 EMACS_INT start_byte, end_byte;
7c78e542 8574 const unsigned char *p, *pbeg, *pend;
df7492f9 8575 int c;
0e727afa 8576 Lisp_Object tail, elt, work_table;
d46c5b12 8577
df7492f9
KH
8578 if (STRINGP (start))
8579 {
8580 if (!STRING_MULTIBYTE (start)
8f924df7 8581 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8582 return Qt;
8583 start_byte = 0;
8f924df7 8584 end_byte = SBYTES (start);
df7492f9
KH
8585 }
8586 else
d46c5b12 8587 {
df7492f9
KH
8588 CHECK_NUMBER_COERCE_MARKER (start);
8589 CHECK_NUMBER_COERCE_MARKER (end);
8590 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8591 args_out_of_range (start, end);
8592 if (NILP (current_buffer->enable_multibyte_characters))
8593 return Qt;
8594 start_byte = CHAR_TO_BYTE (XINT (start));
8595 end_byte = CHAR_TO_BYTE (XINT (end));
8596 if (XINT (end) - XINT (start) == end_byte - start_byte)
8597 return Qt;
d46c5b12 8598
e1c23804 8599 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8600 {
e1c23804
DL
8601 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8602 move_gap_both (XINT (start), start_byte);
df7492f9 8603 else
e1c23804 8604 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8605 }
8606 }
8607
df7492f9
KH
8608 coding_attrs_list = Qnil;
8609 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8610 if (NILP (exclude)
8611 || NILP (Fmemq (XCAR (tail), exclude)))
8612 {
8613 Lisp_Object attrs;
d46c5b12 8614
df7492f9
KH
8615 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8616 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8617 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8618 {
8619 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8620 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8621 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8622 }
df7492f9 8623 }
d46c5b12 8624
df7492f9 8625 if (STRINGP (start))
8f924df7 8626 p = pbeg = SDATA (start);
df7492f9
KH
8627 else
8628 p = pbeg = BYTE_POS_ADDR (start_byte);
8629 pend = p + (end_byte - start_byte);
b843d1ae 8630
df7492f9
KH
8631 while (p < pend && ASCII_BYTE_P (*p)) p++;
8632 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8633
0e727afa 8634 work_table = Fmake_char_table (Qnil, Qnil);
05e6f5dc 8635 while (p < pend)
72d1a715 8636 {
df7492f9
KH
8637 if (ASCII_BYTE_P (*p))
8638 p++;
72d1a715
RS
8639 else
8640 {
df7492f9 8641 c = STRING_CHAR_ADVANCE (p);
0e727afa
YM
8642 if (!NILP (char_table_ref (work_table, c)))
8643 /* This character was already checked. Ignore it. */
8644 continue;
12410ef1 8645
df7492f9
KH
8646 charset_map_loaded = 0;
8647 for (tail = coding_attrs_list; CONSP (tail);)
8648 {
8649 elt = XCAR (tail);
8650 if (NILP (elt))
8651 tail = XCDR (tail);
8652 else if (char_encodable_p (c, elt))
8653 tail = XCDR (tail);
8654 else if (CONSP (XCDR (tail)))
8655 {
8656 XSETCAR (tail, XCAR (XCDR (tail)));
8657 XSETCDR (tail, XCDR (XCDR (tail)));
8658 }
8659 else
8660 {
8661 XSETCAR (tail, Qnil);
8662 tail = XCDR (tail);
8663 }
8664 }
8665 if (charset_map_loaded)
8666 {
8667 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8668
df7492f9 8669 if (STRINGP (start))
8f924df7 8670 pbeg = SDATA (start);
df7492f9
KH
8671 else
8672 pbeg = BYTE_POS_ADDR (start_byte);
8673 p = pbeg + p_offset;
8674 pend = pbeg + pend_offset;
8675 }
0e727afa 8676 char_table_set (work_table, c, Qt);
df7492f9 8677 }
ec6d2bb8 8678 }
fb88bf2d 8679
988b3759 8680 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8681 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8682 if (! NILP (XCAR (tail)))
8683 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8684
05e6f5dc
KH
8685 return safe_codings;
8686}
4956c225 8687
d46c5b12 8688
8f924df7
KH
8689DEFUN ("unencodable-char-position", Funencodable_char_position,
8690 Sunencodable_char_position, 3, 5, 0,
8691 doc: /*
8692Return position of first un-encodable character in a region.
d4a1d553 8693START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8694encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8695
8f924df7
KH
8696If optional 4th argument COUNT is non-nil, it specifies at most how
8697many un-encodable characters to search. In this case, the value is a
8698list of positions.
d46c5b12 8699
8f924df7
KH
8700If optional 5th argument STRING is non-nil, it is a string to search
8701for un-encodable characters. In that case, START and END are indexes
8702to the string. */)
8703 (start, end, coding_system, count, string)
8704 Lisp_Object start, end, coding_system, count, string;
8705{
8706 int n;
8707 struct coding_system coding;
7d64c6ad 8708 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8709 Lisp_Object positions;
8710 int from, to;
8711 const unsigned char *p, *stop, *pend;
8712 int ascii_compatible;
fb88bf2d 8713
8f924df7
KH
8714 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8715 attrs = CODING_ID_ATTRS (coding.id);
8716 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8717 return Qnil;
8718 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8719 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8720 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8721
8f924df7
KH
8722 if (NILP (string))
8723 {
8724 validate_region (&start, &end);
8725 from = XINT (start);
8726 to = XINT (end);
8727 if (NILP (current_buffer->enable_multibyte_characters)
8728 || (ascii_compatible
8729 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8730 return Qnil;
8731 p = CHAR_POS_ADDR (from);
8732 pend = CHAR_POS_ADDR (to);
8733 if (from < GPT && to >= GPT)
8734 stop = GPT_ADDR;
8735 else
8736 stop = pend;
8737 }
8738 else
8739 {
8740 CHECK_STRING (string);
8741 CHECK_NATNUM (start);
8742 CHECK_NATNUM (end);
8743 from = XINT (start);
8744 to = XINT (end);
8745 if (from > to
8746 || to > SCHARS (string))
8747 args_out_of_range_3 (string, start, end);
8748 if (! STRING_MULTIBYTE (string))
8749 return Qnil;
8750 p = SDATA (string) + string_char_to_byte (string, from);
8751 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8752 if (ascii_compatible && (to - from) == (pend - p))
8753 return Qnil;
8754 }
f2558efd 8755
8f924df7
KH
8756 if (NILP (count))
8757 n = 1;
8758 else
b73bfc1c 8759 {
8f924df7
KH
8760 CHECK_NATNUM (count);
8761 n = XINT (count);
b73bfc1c
KH
8762 }
8763
8f924df7
KH
8764 positions = Qnil;
8765 while (1)
d46c5b12 8766 {
8f924df7 8767 int c;
ec6d2bb8 8768
8f924df7
KH
8769 if (ascii_compatible)
8770 while (p < stop && ASCII_BYTE_P (*p))
8771 p++, from++;
8772 if (p >= stop)
0e79d667 8773 {
8f924df7
KH
8774 if (p >= pend)
8775 break;
8776 stop = pend;
8777 p = GAP_END_ADDR;
0e79d667 8778 }
ec6d2bb8 8779
8f924df7
KH
8780 c = STRING_CHAR_ADVANCE (p);
8781 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8782 && ! char_charset (translate_char (translation_table, c),
8783 charset_list, NULL))
ec6d2bb8 8784 {
8f924df7
KH
8785 positions = Fcons (make_number (from), positions);
8786 n--;
8787 if (n == 0)
8788 break;
ec6d2bb8
KH
8789 }
8790
8f924df7
KH
8791 from++;
8792 }
d46c5b12 8793
8f924df7
KH
8794 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8795}
d46c5b12 8796
d46c5b12 8797
df7492f9
KH
8798DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8799 Scheck_coding_systems_region, 3, 3, 0,
8800 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8801
df7492f9
KH
8802START and END are buffer positions specifying the region.
8803CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8804
df7492f9 8805The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8806CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8807whole region, POS0, POS1, ... are buffer positions where non-encodable
8808characters are found.
93dec019 8809
df7492f9
KH
8810If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8811value is nil.
93dec019 8812
df7492f9
KH
8813START may be a string. In that case, check if the string is
8814encodable, and the value contains indices to the string instead of
5704f39a
KH
8815buffer positions. END is ignored.
8816
4c1958f4 8817If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8818is nil. */)
df7492f9
KH
8819 (start, end, coding_system_list)
8820 Lisp_Object start, end, coding_system_list;
05e6f5dc 8821{
df7492f9
KH
8822 Lisp_Object list;
8823 EMACS_INT start_byte, end_byte;
8824 int pos;
7c78e542 8825 const unsigned char *p, *pbeg, *pend;
df7492f9 8826 int c;
7d64c6ad 8827 Lisp_Object tail, elt, attrs;
70ad9fc4 8828
05e6f5dc
KH
8829 if (STRINGP (start))
8830 {
df7492f9 8831 if (!STRING_MULTIBYTE (start)
4c1958f4 8832 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8833 return Qnil;
8834 start_byte = 0;
8f924df7 8835 end_byte = SBYTES (start);
df7492f9 8836 pos = 0;
d46c5b12 8837 }
05e6f5dc 8838 else
b73bfc1c 8839 {
b7826503
PJ
8840 CHECK_NUMBER_COERCE_MARKER (start);
8841 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8842 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8843 args_out_of_range (start, end);
8844 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8845 return Qnil;
8846 start_byte = CHAR_TO_BYTE (XINT (start));
8847 end_byte = CHAR_TO_BYTE (XINT (end));
8848 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 8849 return Qnil;
df7492f9 8850
e1c23804 8851 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8852 {
e1c23804
DL
8853 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8854 move_gap_both (XINT (start), start_byte);
df7492f9 8855 else
e1c23804 8856 move_gap_both (XINT (end), end_byte);
b73bfc1c 8857 }
e1c23804 8858 pos = XINT (start);
b73bfc1c 8859 }
7553d0e1 8860
df7492f9
KH
8861 list = Qnil;
8862 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8863 {
df7492f9 8864 elt = XCAR (tail);
7d64c6ad 8865 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8866 ASET (attrs, coding_attr_trans_tbl,
8867 get_translation_table (attrs, 1, NULL));
7d64c6ad 8868 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8869 }
8870
df7492f9 8871 if (STRINGP (start))
8f924df7 8872 p = pbeg = SDATA (start);
72d1a715 8873 else
df7492f9
KH
8874 p = pbeg = BYTE_POS_ADDR (start_byte);
8875 pend = p + (end_byte - start_byte);
4ed46869 8876
df7492f9
KH
8877 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8878 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8879
df7492f9 8880 while (p < pend)
d46c5b12 8881 {
df7492f9
KH
8882 if (ASCII_BYTE_P (*p))
8883 p++;
e133c8fa 8884 else
05e6f5dc 8885 {
df7492f9
KH
8886 c = STRING_CHAR_ADVANCE (p);
8887
8888 charset_map_loaded = 0;
8889 for (tail = list; CONSP (tail); tail = XCDR (tail))
8890 {
8891 elt = XCDR (XCAR (tail));
8892 if (! char_encodable_p (c, XCAR (elt)))
8893 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8894 }
8895 if (charset_map_loaded)
8896 {
8897 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8898
8899 if (STRINGP (start))
8f924df7 8900 pbeg = SDATA (start);
df7492f9
KH
8901 else
8902 pbeg = BYTE_POS_ADDR (start_byte);
8903 p = pbeg + p_offset;
8904 pend = pbeg + pend_offset;
8905 }
05e6f5dc 8906 }
df7492f9 8907 pos++;
d46c5b12 8908 }
4ed46869 8909
df7492f9
KH
8910 tail = list;
8911 list = Qnil;
8912 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8913 {
df7492f9
KH
8914 elt = XCAR (tail);
8915 if (CONSP (XCDR (XCDR (elt))))
8916 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8917 list);
ec6d2bb8 8918 }
2b4f9037 8919
df7492f9 8920 return list;
d46c5b12
KH
8921}
8922
3fd9494b 8923
b73bfc1c 8924Lisp_Object
971de7fb 8925code_convert_region (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object dst_object, int encodep, int norecord)
4ed46869 8926{
3a73fa5d 8927 struct coding_system coding;
df7492f9
KH
8928 EMACS_INT from, from_byte, to, to_byte;
8929 Lisp_Object src_object;
4ed46869 8930
b7826503
PJ
8931 CHECK_NUMBER_COERCE_MARKER (start);
8932 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8933 if (NILP (coding_system))
8934 coding_system = Qno_conversion;
8935 else
8936 CHECK_CODING_SYSTEM (coding_system);
8937 src_object = Fcurrent_buffer ();
8938 if (NILP (dst_object))
8939 dst_object = src_object;
8940 else if (! EQ (dst_object, Qt))
8941 CHECK_BUFFER (dst_object);
3a73fa5d 8942
d46c5b12
KH
8943 validate_region (&start, &end);
8944 from = XFASTINT (start);
df7492f9 8945 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8946 to = XFASTINT (end);
df7492f9 8947 to_byte = CHAR_TO_BYTE (to);
764ca8da 8948
df7492f9
KH
8949 setup_coding_system (coding_system, &coding);
8950 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8951
df7492f9
KH
8952 if (encodep)
8953 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8954 dst_object);
8955 else
8956 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8957 dst_object);
8958 if (! norecord)
8959 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8960
df7492f9
KH
8961 return (BUFFERP (dst_object)
8962 ? make_number (coding.produced_char)
8963 : coding.dst_object);
4031e2bf 8964}
78108bcd 8965
4ed46869 8966
4031e2bf 8967DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8968 3, 4, "r\nzCoding system: ",
48b0f3ae 8969 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8970When called from a program, takes four arguments:
8971 START, END, CODING-SYSTEM, and DESTINATION.
8972START and END are buffer positions.
8844fa83 8973
df7492f9 8974Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8975If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
8976If buffer, the decoded text is inserted in that buffer after point (point
8977does not move).
446dcd75 8978In those cases, the length of the decoded text is returned.
319a3947 8979If DESTINATION is t, the decoded text is returned.
8844fa83 8980
48b0f3ae
PJ
8981This function sets `last-coding-system-used' to the precise coding system
8982used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 8983not fully specified.) */)
df7492f9
KH
8984 (start, end, coding_system, destination)
8985 Lisp_Object start, end, coding_system, destination;
4031e2bf 8986{
df7492f9 8987 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8988}
8844fa83 8989
3a73fa5d 8990DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8991 3, 4, "r\nzCoding system: ",
8992 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
8993When called from a program, takes four arguments:
8994 START, END, CODING-SYSTEM and DESTINATION.
8995START and END are buffer positions.
d46c5b12 8996
df7492f9
KH
8997Optional 4th arguments DESTINATION specifies where the encoded text goes.
8998If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
8999If buffer, the encoded text is inserted in that buffer after point (point
9000does not move).
446dcd75 9001In those cases, the length of the encoded text is returned.
319a3947 9002If DESTINATION is t, the encoded text is returned.
2391eaa4 9003
48b0f3ae
PJ
9004This function sets `last-coding-system-used' to the precise coding system
9005used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9006not fully specified.) */)
df7492f9
KH
9007 (start, end, coding_system, destination)
9008 Lisp_Object start, end, coding_system, destination;
3a73fa5d 9009{
df7492f9 9010 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
9011}
9012
9013Lisp_Object
df7492f9
KH
9014code_convert_string (string, coding_system, dst_object,
9015 encodep, nocopy, norecord)
9016 Lisp_Object string, coding_system, dst_object;
9017 int encodep, nocopy, norecord;
b73bfc1c 9018{
4031e2bf 9019 struct coding_system coding;
df7492f9 9020 EMACS_INT chars, bytes;
ec6d2bb8 9021
b7826503 9022 CHECK_STRING (string);
d46c5b12 9023 if (NILP (coding_system))
4956c225 9024 {
df7492f9
KH
9025 if (! norecord)
9026 Vlast_coding_system_used = Qno_conversion;
9027 if (NILP (dst_object))
9028 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 9029 }
b73bfc1c 9030
df7492f9
KH
9031 if (NILP (coding_system))
9032 coding_system = Qno_conversion;
9033 else
9034 CHECK_CODING_SYSTEM (coding_system);
9035 if (NILP (dst_object))
9036 dst_object = Qt;
9037 else if (! EQ (dst_object, Qt))
9038 CHECK_BUFFER (dst_object);
73be902c 9039
df7492f9 9040 setup_coding_system (coding_system, &coding);
d46c5b12 9041 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
9042 chars = SCHARS (string);
9043 bytes = SBYTES (string);
df7492f9
KH
9044 if (encodep)
9045 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9046 else
9047 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9048 if (! norecord)
9049 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 9050
df7492f9
KH
9051 return (BUFFERP (dst_object)
9052 ? make_number (coding.produced_char)
9053 : coding.dst_object);
4ed46869 9054}
73be902c 9055
b73bfc1c 9056
ecec61c1 9057/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 9058 Do not set Vlast_coding_system_used.
4ed46869 9059
ec6d2bb8
KH
9060 This function is called only from macros DECODE_FILE and
9061 ENCODE_FILE, thus we ignore character composition. */
4ed46869 9062
ecec61c1 9063Lisp_Object
971de7fb 9064code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, int encodep)
4ed46869 9065{
0be8721c 9066 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
9067}
9068
4ed46869 9069
df7492f9
KH
9070DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9071 2, 4, 0,
9072 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9073
9074Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9075if the decoding operation is trivial.
ecec61c1 9076
d4a1d553 9077Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
9078inserted in that buffer after point (point does not move). In this
9079case, the return value is the length of the decoded text.
ecec61c1 9080
df7492f9
KH
9081This function sets `last-coding-system-used' to the precise coding system
9082used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 9083not fully specified.) */)
df7492f9
KH
9084 (string, coding_system, nocopy, buffer)
9085 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9086{
df7492f9
KH
9087 return code_convert_string (string, coding_system, buffer,
9088 0, ! NILP (nocopy), 0);
4ed46869
KH
9089}
9090
df7492f9
KH
9091DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9092 2, 4, 0,
9093 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9094
9095Optional third arg NOCOPY non-nil means it is OK to return STRING
9096itself if the encoding operation is trivial.
9097
d4a1d553 9098Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
9099inserted in that buffer after point (point does not move). In this
9100case, the return value is the length of the encoded text.
df7492f9
KH
9101
9102This function sets `last-coding-system-used' to the precise coding system
9103used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9104not fully specified.) */)
9105 (string, coding_system, nocopy, buffer)
9106 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9107{
df7492f9 9108 return code_convert_string (string, coding_system, buffer,
c197f191 9109 1, ! NILP (nocopy), 1);
4ed46869 9110}
df7492f9 9111
3a73fa5d 9112\f
4ed46869 9113DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
9114 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9115Return the corresponding character. */)
9116 (code)
4ed46869 9117 Lisp_Object code;
4ed46869 9118{
df7492f9
KH
9119 Lisp_Object spec, attrs, val;
9120 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9121 int c;
4ed46869 9122
df7492f9
KH
9123 CHECK_NATNUM (code);
9124 c = XFASTINT (code);
9125 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9126 attrs = AREF (spec, 0);
4ed46869 9127
df7492f9
KH
9128 if (ASCII_BYTE_P (c)
9129 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9130 return code;
4ed46869 9131
df7492f9
KH
9132 val = CODING_ATTR_CHARSET_LIST (attrs);
9133 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
9134 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9135 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 9136
df7492f9
KH
9137 if (c <= 0x7F)
9138 charset = charset_roman;
9139 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 9140 {
df7492f9
KH
9141 charset = charset_kana;
9142 c -= 0x80;
4ed46869 9143 }
55ab7be3 9144 else
4ed46869 9145 {
004068e4 9146 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
9147
9148 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9149 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9150 error ("Invalid code: %d", code);
9151 SJIS_TO_JIS (c);
9152 charset = charset_kanji;
4ed46869 9153 }
df7492f9
KH
9154 c = DECODE_CHAR (charset, c);
9155 if (c < 0)
9156 error ("Invalid code: %d", code);
9157 return make_number (c);
93dec019 9158}
4ed46869 9159
48b0f3ae 9160
4ed46869 9161DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 9162 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
9163Return the corresponding code in SJIS. */)
9164 (ch)
df7492f9 9165 Lisp_Object ch;
4ed46869 9166{
df7492f9
KH
9167 Lisp_Object spec, attrs, charset_list;
9168 int c;
9169 struct charset *charset;
9170 unsigned code;
48b0f3ae 9171
df7492f9
KH
9172 CHECK_CHARACTER (ch);
9173 c = XFASTINT (ch);
9174 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9175 attrs = AREF (spec, 0);
9176
9177 if (ASCII_CHAR_P (c)
9178 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9179 return ch;
9180
9181 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9182 charset = char_charset (c, charset_list, &code);
9183 if (code == CHARSET_INVALID_CODE (charset))
9184 error ("Can't encode by shift_jis encoding: %d", c);
9185 JIS_TO_SJIS (code);
9186
9187 return make_number (code);
4ed46869
KH
9188}
9189
9190DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
9191 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9192Return the corresponding character. */)
9193 (code)
4ed46869 9194 Lisp_Object code;
d46c5b12 9195{
df7492f9
KH
9196 Lisp_Object spec, attrs, val;
9197 struct charset *charset_roman, *charset_big5, *charset;
9198 int c;
6289dd10 9199
df7492f9
KH
9200 CHECK_NATNUM (code);
9201 c = XFASTINT (code);
9202 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9203 attrs = AREF (spec, 0);
4ed46869 9204
df7492f9
KH
9205 if (ASCII_BYTE_P (c)
9206 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9207 return code;
6289dd10 9208
df7492f9
KH
9209 val = CODING_ATTR_CHARSET_LIST (attrs);
9210 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9211 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 9212
df7492f9
KH
9213 if (c <= 0x7F)
9214 charset = charset_roman;
c28a9453
KH
9215 else
9216 {
df7492f9
KH
9217 int b1 = c >> 8, b2 = c & 0x7F;
9218 if (b1 < 0xA1 || b1 > 0xFE
9219 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9220 error ("Invalid code: %d", code);
9221 charset = charset_big5;
c28a9453 9222 }
df7492f9
KH
9223 c = DECODE_CHAR (charset, (unsigned )c);
9224 if (c < 0)
9225 error ("Invalid code: %d", code);
9226 return make_number (c);
d46c5b12 9227}
6289dd10 9228
4ed46869 9229DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 9230 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
9231Return the corresponding character code in Big5. */)
9232 (ch)
4ed46869
KH
9233 Lisp_Object ch;
9234{
df7492f9
KH
9235 Lisp_Object spec, attrs, charset_list;
9236 struct charset *charset;
9237 int c;
9238 unsigned code;
9239
9240 CHECK_CHARACTER (ch);
9241 c = XFASTINT (ch);
9242 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9243 attrs = AREF (spec, 0);
9244 if (ASCII_CHAR_P (c)
9245 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9246 return ch;
9247
9248 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9249 charset = char_charset (c, charset_list, &code);
9250 if (code == CHARSET_INVALID_CODE (charset))
9251 error ("Can't encode by Big5 encoding: %d", c);
9252
9253 return make_number (code);
4ed46869 9254}
48b0f3ae 9255
3a73fa5d 9256\f
002fdb44 9257DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 9258 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 9259 doc: /* Internal use only. */)
6ed8eeff 9260 (coding_system, terminal)
b74e4686 9261 Lisp_Object coding_system;
6ed8eeff 9262 Lisp_Object terminal;
4ed46869 9263{
6ed8eeff 9264 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 9265 CHECK_SYMBOL (coding_system);
b8299c66 9266 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 9267 /* We had better not send unsafe characters to terminal. */
c73bd236 9268 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 9269 /* Characer composition should be disabled. */
c73bd236 9270 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
9271 terminal_coding->src_multibyte = 1;
9272 terminal_coding->dst_multibyte = 0;
4ed46869
KH
9273 return Qnil;
9274}
9275
c4825358
KH
9276DEFUN ("set-safe-terminal-coding-system-internal",
9277 Fset_safe_terminal_coding_system_internal,
48b0f3ae 9278 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 9279 doc: /* Internal use only. */)
48b0f3ae 9280 (coding_system)
b74e4686 9281 Lisp_Object coding_system;
d46c5b12 9282{
b7826503 9283 CHECK_SYMBOL (coding_system);
c4825358
KH
9284 setup_coding_system (Fcheck_coding_system (coding_system),
9285 &safe_terminal_coding);
df7492f9
KH
9286 /* Characer composition should be disabled. */
9287 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
9288 safe_terminal_coding.src_multibyte = 1;
9289 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
9290 return Qnil;
9291}
4ed46869 9292
002fdb44 9293DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 9294 Sterminal_coding_system, 0, 1, 0,
6ed8eeff 9295 doc: /* Return coding system specified for terminal output on the given terminal.
708e05dc 9296TERMINAL may be a terminal object, a frame, or nil for the selected
6ed8eeff
KL
9297frame's terminal device. */)
9298 (terminal)
9299 Lisp_Object terminal;
4ed46869 9300{
985773c9
MB
9301 struct coding_system *terminal_coding
9302 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9303 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 9304
ae6f73fa 9305 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 9306 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
9307}
9308
002fdb44 9309DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 9310 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 9311 doc: /* Internal use only. */)
6ed8eeff 9312 (coding_system, terminal)
4ed46869 9313 Lisp_Object coding_system;
6ed8eeff 9314 Lisp_Object terminal;
4ed46869 9315{
6ed8eeff 9316 struct terminal *t = get_terminal (terminal, 1);
b7826503 9317 CHECK_SYMBOL (coding_system);
624bda09
KH
9318 if (NILP (coding_system))
9319 coding_system = Qno_conversion;
9320 else
9321 Fcheck_coding_system (coding_system);
9322 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
df7492f9 9323 /* Characer composition should be disabled. */
c73bd236
MB
9324 TERMINAL_KEYBOARD_CODING (t)->common_flags
9325 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
9326 return Qnil;
9327}
9328
9329DEFUN ("keyboard-coding-system",
985773c9 9330 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 9331 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
9332 (terminal)
9333 Lisp_Object terminal;
4ed46869 9334{
985773c9
MB
9335 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9336 (get_terminal (terminal, 1))->id);
4ed46869
KH
9337}
9338
4ed46869 9339\f
a5d301df
KH
9340DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9341 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
9342 doc: /* Choose a coding system for an operation based on the target name.
9343The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9344DECODING-SYSTEM is the coding system to use for decoding
9345\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9346for encoding (in case OPERATION does encoding).
05e6f5dc 9347
48b0f3ae
PJ
9348The first argument OPERATION specifies an I/O primitive:
9349 For file I/O, `insert-file-contents' or `write-region'.
9350 For process I/O, `call-process', `call-process-region', or `start-process'.
9351 For network I/O, `open-network-stream'.
05e6f5dc 9352
48b0f3ae
PJ
9353The remaining arguments should be the same arguments that were passed
9354to the primitive. Depending on which primitive, one of those arguments
9355is selected as the TARGET. For example, if OPERATION does file I/O,
9356whichever argument specifies the file name is TARGET.
05e6f5dc 9357
48b0f3ae 9358TARGET has a meaning which depends on OPERATION:
b883cdb2 9359 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9360 For process I/O, TARGET is a process name.
d4a1d553 9361 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9362
d4a1d553 9363This function looks up what is specified for TARGET in
48b0f3ae
PJ
9364`file-coding-system-alist', `process-coding-system-alist',
9365or `network-coding-system-alist' depending on OPERATION.
9366They may specify a coding system, a cons of coding systems,
9367or a function symbol to call.
9368In the last case, we call the function with one argument,
9369which is a list of all the arguments given to this function.
1011c487
MB
9370If the function can't decide a coding system, it can return
9371`undecided' so that the normal code-detection is performed.
48b0f3ae 9372
b883cdb2
MB
9373If OPERATION is `insert-file-contents', the argument corresponding to
9374TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9375file name to look up, and BUFFER is a buffer that contains the file's
9376contents (not yet decoded). If `file-coding-system-alist' specifies a
9377function to call for FILENAME, that function should examine the
9378contents of BUFFER instead of reading the file.
9379
d918f936 9380usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 9381 (nargs, args)
4ed46869
KH
9382 int nargs;
9383 Lisp_Object *args;
6b89e3aa 9384{
4ed46869
KH
9385 Lisp_Object operation, target_idx, target, val;
9386 register Lisp_Object chain;
177c0ea7 9387
4ed46869
KH
9388 if (nargs < 2)
9389 error ("Too few arguments");
9390 operation = args[0];
9391 if (!SYMBOLP (operation)
9392 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3ed051d4 9393 error ("Invalid first argument");
4ed46869
KH
9394 if (nargs < 1 + XINT (target_idx))
9395 error ("Too few arguments for operation: %s",
8f924df7 9396 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
9397 target = args[XINT (target_idx) + 1];
9398 if (!(STRINGP (target)
091a0ff0
KH
9399 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9400 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9401 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 9402 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
9403 if (CONSP (target))
9404 target = XCAR (target);
4ed46869 9405
2e34157c
RS
9406 chain = ((EQ (operation, Qinsert_file_contents)
9407 || EQ (operation, Qwrite_region))
02ba4723 9408 ? Vfile_coding_system_alist
2e34157c 9409 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9410 ? Vnetwork_coding_system_alist
9411 : Vprocess_coding_system_alist));
4ed46869
KH
9412 if (NILP (chain))
9413 return Qnil;
9414
03699b14 9415 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9416 {
f44d27ce 9417 Lisp_Object elt;
6b89e3aa 9418
df7492f9 9419 elt = XCAR (chain);
4ed46869
KH
9420 if (CONSP (elt)
9421 && ((STRINGP (target)
03699b14
KR
9422 && STRINGP (XCAR (elt))
9423 && fast_string_match (XCAR (elt), target) >= 0)
9424 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9425 {
03699b14 9426 val = XCDR (elt);
b19fd4c5
KH
9427 /* Here, if VAL is both a valid coding system and a valid
9428 function symbol, we return VAL as a coding system. */
02ba4723
KH
9429 if (CONSP (val))
9430 return val;
9431 if (! SYMBOLP (val))
9432 return Qnil;
9433 if (! NILP (Fcoding_system_p (val)))
9434 return Fcons (val, val);
b19fd4c5 9435 if (! NILP (Ffboundp (val)))
6b89e3aa 9436 {
e2b97060
MB
9437 /* We use call1 rather than safe_call1
9438 so as to get bug reports about functions called here
9439 which don't handle the current interface. */
9440 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9441 if (CONSP (val))
9442 return val;
9443 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9444 return Fcons (val, val);
6b89e3aa 9445 }
02ba4723 9446 return Qnil;
6b89e3aa
KH
9447 }
9448 }
4ed46869 9449 return Qnil;
6b89e3aa
KH
9450}
9451
df7492f9 9452DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9453 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9454 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9455If multiple coding systems belong to the same category,
a3181084
DL
9456all but the first one are ignored.
9457
d4a1d553 9458usage: (set-coding-system-priority &rest coding-systems) */)
df7492f9
KH
9459 (nargs, args)
9460 int nargs;
9461 Lisp_Object *args;
9462{
9463 int i, j;
9464 int changed[coding_category_max];
9465 enum coding_category priorities[coding_category_max];
9466
9467 bzero (changed, sizeof changed);
6b89e3aa 9468
df7492f9 9469 for (i = j = 0; i < nargs; i++)
6b89e3aa 9470 {
df7492f9
KH
9471 enum coding_category category;
9472 Lisp_Object spec, attrs;
6b89e3aa 9473
df7492f9
KH
9474 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9475 attrs = AREF (spec, 0);
9476 category = XINT (CODING_ATTR_CATEGORY (attrs));
9477 if (changed[category])
9478 /* Ignore this coding system because a coding system of the
9479 same category already had a higher priority. */
9480 continue;
9481 changed[category] = 1;
9482 priorities[j++] = category;
9483 if (coding_categories[category].id >= 0
9484 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9485 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9486 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9487 }
6b89e3aa 9488
df7492f9
KH
9489 /* Now we have decided top J priorities. Reflect the order of the
9490 original priorities to the remaining priorities. */
6b89e3aa 9491
df7492f9 9492 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9493 {
df7492f9
KH
9494 while (j < coding_category_max
9495 && changed[coding_priorities[j]])
9496 j++;
9497 if (j == coding_category_max)
9498 abort ();
9499 priorities[i] = coding_priorities[j];
9500 }
6b89e3aa 9501
df7492f9 9502 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 9503
ff563fce
KH
9504 /* Update `coding-category-list'. */
9505 Vcoding_category_list = Qnil;
9506 for (i = coding_category_max - 1; i >= 0; i--)
9507 Vcoding_category_list
9508 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9509 Vcoding_category_list);
6b89e3aa 9510
df7492f9 9511 return Qnil;
6b89e3aa
KH
9512}
9513
df7492f9
KH
9514DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9515 Scoding_system_priority_list, 0, 1, 0,
da7db224 9516 doc: /* Return a list of coding systems ordered by their priorities.
b811c52b
KH
9517The list contains a subset of coding systems; i.e. coding systems
9518assigned to each coding category (see `coding-category-list').
9519
da7db224 9520HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
9521 (highestp)
9522 Lisp_Object highestp;
d46c5b12
KH
9523{
9524 int i;
df7492f9 9525 Lisp_Object val;
6b89e3aa 9526
df7492f9 9527 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9528 {
df7492f9
KH
9529 enum coding_category category = coding_priorities[i];
9530 int id = coding_categories[category].id;
9531 Lisp_Object attrs;
068a9dbd 9532
df7492f9
KH
9533 if (id < 0)
9534 continue;
9535 attrs = CODING_ID_ATTRS (id);
9536 if (! NILP (highestp))
9537 return CODING_ATTR_BASE_NAME (attrs);
9538 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9539 }
9540 return Fnreverse (val);
9541}
068a9dbd 9542
91433552 9543static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9544
9545static Lisp_Object
971de7fb 9546make_subsidiaries (Lisp_Object base)
068a9dbd 9547{
df7492f9 9548 Lisp_Object subsidiaries;
8f924df7 9549 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
9550 char *buf = (char *) alloca (base_name_len + 6);
9551 int i;
068a9dbd 9552
8f924df7 9553 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
9554 subsidiaries = Fmake_vector (make_number (3), Qnil);
9555 for (i = 0; i < 3; i++)
068a9dbd 9556 {
df7492f9
KH
9557 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9558 ASET (subsidiaries, i, intern (buf));
068a9dbd 9559 }
df7492f9 9560 return subsidiaries;
068a9dbd
KH
9561}
9562
9563
df7492f9
KH
9564DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9565 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9566 doc: /* For internal use only.
9567usage: (define-coding-system-internal ...) */)
df7492f9
KH
9568 (nargs, args)
9569 int nargs;
9570 Lisp_Object *args;
068a9dbd 9571{
df7492f9
KH
9572 Lisp_Object name;
9573 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9574 Lisp_Object attrs; /* Vector of attributes. */
9575 Lisp_Object eol_type;
9576 Lisp_Object aliases;
9577 Lisp_Object coding_type, charset_list, safe_charsets;
9578 enum coding_category category;
9579 Lisp_Object tail, val;
9580 int max_charset_id = 0;
9581 int i;
068a9dbd 9582
df7492f9
KH
9583 if (nargs < coding_arg_max)
9584 goto short_args;
068a9dbd 9585
df7492f9 9586 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9587
df7492f9
KH
9588 name = args[coding_arg_name];
9589 CHECK_SYMBOL (name);
9590 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9591
df7492f9
KH
9592 val = args[coding_arg_mnemonic];
9593 if (! STRINGP (val))
9594 CHECK_CHARACTER (val);
9595 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9596
df7492f9
KH
9597 coding_type = args[coding_arg_coding_type];
9598 CHECK_SYMBOL (coding_type);
9599 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9600
df7492f9
KH
9601 charset_list = args[coding_arg_charset_list];
9602 if (SYMBOLP (charset_list))
9603 {
9604 if (EQ (charset_list, Qiso_2022))
9605 {
9606 if (! EQ (coding_type, Qiso_2022))
9607 error ("Invalid charset-list");
9608 charset_list = Viso_2022_charset_list;
9609 }
9610 else if (EQ (charset_list, Qemacs_mule))
9611 {
9612 if (! EQ (coding_type, Qemacs_mule))
9613 error ("Invalid charset-list");
9614 charset_list = Vemacs_mule_charset_list;
9615 }
9616 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9617 if (max_charset_id < XFASTINT (XCAR (tail)))
9618 max_charset_id = XFASTINT (XCAR (tail));
9619 }
068a9dbd
KH
9620 else
9621 {
df7492f9 9622 charset_list = Fcopy_sequence (charset_list);
985773c9 9623 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9624 {
df7492f9
KH
9625 struct charset *charset;
9626
985773c9 9627 val = XCAR (tail);
df7492f9
KH
9628 CHECK_CHARSET_GET_CHARSET (val, charset);
9629 if (EQ (coding_type, Qiso_2022)
9630 ? CHARSET_ISO_FINAL (charset) < 0
9631 : EQ (coding_type, Qemacs_mule)
9632 ? CHARSET_EMACS_MULE_ID (charset) < 0
9633 : 0)
9634 error ("Can't handle charset `%s'",
8f924df7 9635 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9636
8f924df7 9637 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9638 if (max_charset_id < charset->id)
9639 max_charset_id = charset->id;
068a9dbd
KH
9640 }
9641 }
df7492f9 9642 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9643
1b3b981b
AS
9644 safe_charsets = make_uninit_string (max_charset_id + 1);
9645 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9646 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9647 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9648 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9649
584948ac 9650 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9651
df7492f9 9652 val = args[coding_arg_decode_translation_table];
a6f87d34 9653 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9654 CHECK_SYMBOL (val);
df7492f9 9655 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9656
df7492f9 9657 val = args[coding_arg_encode_translation_table];
a6f87d34 9658 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9659 CHECK_SYMBOL (val);
df7492f9 9660 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9661
df7492f9
KH
9662 val = args[coding_arg_post_read_conversion];
9663 CHECK_SYMBOL (val);
9664 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9665
df7492f9
KH
9666 val = args[coding_arg_pre_write_conversion];
9667 CHECK_SYMBOL (val);
9668 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9669
df7492f9
KH
9670 val = args[coding_arg_default_char];
9671 if (NILP (val))
9672 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9673 else
9674 {
8f924df7 9675 CHECK_CHARACTER (val);
df7492f9
KH
9676 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9677 }
4031e2bf 9678
8f924df7
KH
9679 val = args[coding_arg_for_unibyte];
9680 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9681
df7492f9
KH
9682 val = args[coding_arg_plist];
9683 CHECK_LIST (val);
9684 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9685
df7492f9
KH
9686 if (EQ (coding_type, Qcharset))
9687 {
c7c66a95
KH
9688 /* Generate a lisp vector of 256 elements. Each element is nil,
9689 integer, or a list of charset IDs.
3a73fa5d 9690
c7c66a95
KH
9691 If Nth element is nil, the byte code N is invalid in this
9692 coding system.
4ed46869 9693
c7c66a95
KH
9694 If Nth element is a number NUM, N is the first byte of a
9695 charset whose ID is NUM.
4ed46869 9696
c7c66a95
KH
9697 If Nth element is a list of charset IDs, N is the first byte
9698 of one of them. The list is sorted by dimensions of the
2bc515e4 9699 charsets. A charset of smaller dimension comes firtst. */
df7492f9 9700 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9701
5c99c2e6 9702 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9703 {
c7c66a95
KH
9704 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9705 int dim = CHARSET_DIMENSION (charset);
9706 int idx = (dim - 1) * 4;
4ed46869 9707
5c99c2e6 9708 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9709 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9710
15d143f7
KH
9711 for (i = charset->code_space[idx];
9712 i <= charset->code_space[idx + 1]; i++)
9713 {
c7c66a95
KH
9714 Lisp_Object tmp, tmp2;
9715 int dim2;
ec6d2bb8 9716
c7c66a95
KH
9717 tmp = AREF (val, i);
9718 if (NILP (tmp))
9719 tmp = XCAR (tail);
9720 else if (NUMBERP (tmp))
9721 {
9722 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9723 if (dim < dim2)
c7c66a95 9724 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9725 else
9726 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9727 }
15d143f7 9728 else
c7c66a95
KH
9729 {
9730 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9731 {
9732 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9733 if (dim < dim2)
9734 break;
9735 }
9736 if (NILP (tmp2))
9737 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9738 else
9739 {
9740 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9741 XSETCAR (tmp2, XCAR (tail));
9742 }
9743 }
9744 ASET (val, i, tmp);
15d143f7 9745 }
df7492f9
KH
9746 }
9747 ASET (attrs, coding_attr_charset_valids, val);
9748 category = coding_category_charset;
9749 }
9750 else if (EQ (coding_type, Qccl))
9751 {
9752 Lisp_Object valids;
ecec61c1 9753
df7492f9
KH
9754 if (nargs < coding_arg_ccl_max)
9755 goto short_args;
ecec61c1 9756
df7492f9
KH
9757 val = args[coding_arg_ccl_decoder];
9758 CHECK_CCL_PROGRAM (val);
9759 if (VECTORP (val))
9760 val = Fcopy_sequence (val);
9761 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9762
df7492f9
KH
9763 val = args[coding_arg_ccl_encoder];
9764 CHECK_CCL_PROGRAM (val);
9765 if (VECTORP (val))
9766 val = Fcopy_sequence (val);
9767 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9768
df7492f9
KH
9769 val = args[coding_arg_ccl_valids];
9770 valids = Fmake_string (make_number (256), make_number (0));
9771 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9772 {
8dcbea82 9773 int from, to;
ecec61c1 9774
df7492f9
KH
9775 val = Fcar (tail);
9776 if (INTEGERP (val))
8dcbea82
KH
9777 {
9778 from = to = XINT (val);
9779 if (from < 0 || from > 255)
9780 args_out_of_range_3 (val, make_number (0), make_number (255));
9781 }
df7492f9
KH
9782 else
9783 {
df7492f9 9784 CHECK_CONS (val);
8f924df7
KH
9785 CHECK_NATNUM_CAR (val);
9786 CHECK_NATNUM_CDR (val);
df7492f9 9787 from = XINT (XCAR (val));
8f924df7 9788 if (from > 255)
8dcbea82
KH
9789 args_out_of_range_3 (XCAR (val),
9790 make_number (0), make_number (255));
df7492f9 9791 to = XINT (XCDR (val));
8dcbea82
KH
9792 if (to < from || to > 255)
9793 args_out_of_range_3 (XCDR (val),
9794 XCAR (val), make_number (255));
df7492f9 9795 }
8dcbea82 9796 for (i = from; i <= to; i++)
8f924df7 9797 SSET (valids, i, 1);
df7492f9
KH
9798 }
9799 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9800
df7492f9 9801 category = coding_category_ccl;
55ab7be3 9802 }
df7492f9 9803 else if (EQ (coding_type, Qutf_16))
55ab7be3 9804 {
df7492f9 9805 Lisp_Object bom, endian;
4ed46869 9806
584948ac 9807 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9808
df7492f9
KH
9809 if (nargs < coding_arg_utf16_max)
9810 goto short_args;
4ed46869 9811
df7492f9
KH
9812 bom = args[coding_arg_utf16_bom];
9813 if (! NILP (bom) && ! EQ (bom, Qt))
9814 {
9815 CHECK_CONS (bom);
8f924df7
KH
9816 val = XCAR (bom);
9817 CHECK_CODING_SYSTEM (val);
9818 val = XCDR (bom);
9819 CHECK_CODING_SYSTEM (val);
df7492f9 9820 }
a470d443 9821 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9822
9823 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9824 CHECK_SYMBOL (endian);
9825 if (NILP (endian))
9826 endian = Qbig;
9827 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9828 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9829 ASET (attrs, coding_attr_utf_16_endian, endian);
9830
9831 category = (CONSP (bom)
9832 ? coding_category_utf_16_auto
9833 : NILP (bom)
b49a1807 9834 ? (EQ (endian, Qbig)
df7492f9
KH
9835 ? coding_category_utf_16_be_nosig
9836 : coding_category_utf_16_le_nosig)
b49a1807 9837 : (EQ (endian, Qbig)
df7492f9
KH
9838 ? coding_category_utf_16_be
9839 : coding_category_utf_16_le));
9840 }
9841 else if (EQ (coding_type, Qiso_2022))
9842 {
9843 Lisp_Object initial, reg_usage, request, flags;
4776e638 9844 int i;
1397dc18 9845
df7492f9
KH
9846 if (nargs < coding_arg_iso2022_max)
9847 goto short_args;
9848
9849 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9850 CHECK_VECTOR (initial);
9851 for (i = 0; i < 4; i++)
9852 {
9853 val = Faref (initial, make_number (i));
9854 if (! NILP (val))
9855 {
584948ac
KH
9856 struct charset *charset;
9857
9858 CHECK_CHARSET_GET_CHARSET (val, charset);
9859 ASET (initial, i, make_number (CHARSET_ID (charset)));
9860 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9861 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9862 }
9863 else
9864 ASET (initial, i, make_number (-1));
9865 }
9866
9867 reg_usage = args[coding_arg_iso2022_reg_usage];
9868 CHECK_CONS (reg_usage);
8f924df7
KH
9869 CHECK_NUMBER_CAR (reg_usage);
9870 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9871
9872 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9873 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9874 {
df7492f9 9875 int id;
8f924df7 9876 Lisp_Object tmp;
df7492f9
KH
9877
9878 val = Fcar (tail);
9879 CHECK_CONS (val);
8f924df7
KH
9880 tmp = XCAR (val);
9881 CHECK_CHARSET_GET_ID (tmp, id);
9882 CHECK_NATNUM_CDR (val);
df7492f9
KH
9883 if (XINT (XCDR (val)) >= 4)
9884 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9885 XSETCAR (val, make_number (id));
1397dc18 9886 }
4ed46869 9887
df7492f9
KH
9888 flags = args[coding_arg_iso2022_flags];
9889 CHECK_NATNUM (flags);
9890 i = XINT (flags);
9891 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9892 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9893
9894 ASET (attrs, coding_attr_iso_initial, initial);
9895 ASET (attrs, coding_attr_iso_usage, reg_usage);
9896 ASET (attrs, coding_attr_iso_request, request);
9897 ASET (attrs, coding_attr_iso_flags, flags);
9898 setup_iso_safe_charsets (attrs);
9899
9900 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9901 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9902 | CODING_ISO_FLAG_SINGLE_SHIFT))
9903 ? coding_category_iso_7_else
9904 : EQ (args[coding_arg_charset_list], Qiso_2022)
9905 ? coding_category_iso_7
9906 : coding_category_iso_7_tight);
9907 else
9908 {
9909 int id = XINT (AREF (initial, 1));
9910
c6fb6e98 9911 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9912 || EQ (args[coding_arg_charset_list], Qiso_2022)
9913 || id < 0)
9914 ? coding_category_iso_8_else
9915 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9916 ? coding_category_iso_8_1
9917 : coding_category_iso_8_2);
9918 }
0ce7886f
KH
9919 if (category != coding_category_iso_8_1
9920 && category != coding_category_iso_8_2)
9921 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9922 }
9923 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9924 {
df7492f9
KH
9925 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9926 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9927 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9928 category = coding_category_emacs_mule;
c28a9453 9929 }
df7492f9 9930 else if (EQ (coding_type, Qshift_jis))
c28a9453 9931 {
df7492f9
KH
9932
9933 struct charset *charset;
9934
7d64c6ad 9935 if (XINT (Flength (charset_list)) != 3
6e07c25f 9936 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9937 error ("There should be three or four charsets");
df7492f9
KH
9938
9939 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9940 if (CHARSET_DIMENSION (charset) != 1)
9941 error ("Dimension of charset %s is not one",
8f924df7 9942 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9943 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9944 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9945
9946 charset_list = XCDR (charset_list);
9947 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9948 if (CHARSET_DIMENSION (charset) != 1)
9949 error ("Dimension of charset %s is not one",
8f924df7 9950 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9951
9952 charset_list = XCDR (charset_list);
9953 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9954 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9955 error ("Dimension of charset %s is not two",
9956 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9957
9958 charset_list = XCDR (charset_list);
2b917a06
KH
9959 if (! NILP (charset_list))
9960 {
9961 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9962 if (CHARSET_DIMENSION (charset) != 2)
9963 error ("Dimension of charset %s is not two",
9964 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9965 }
df7492f9
KH
9966
9967 category = coding_category_sjis;
9968 Vsjis_coding_system = name;
c28a9453 9969 }
df7492f9
KH
9970 else if (EQ (coding_type, Qbig5))
9971 {
9972 struct charset *charset;
4ed46869 9973
df7492f9
KH
9974 if (XINT (Flength (charset_list)) != 2)
9975 error ("There should be just two charsets");
9976
9977 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9978 if (CHARSET_DIMENSION (charset) != 1)
9979 error ("Dimension of charset %s is not one",
8f924df7 9980 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9981 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9982 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9983
9984 charset_list = XCDR (charset_list);
9985 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9986 if (CHARSET_DIMENSION (charset) != 2)
9987 error ("Dimension of charset %s is not two",
8f924df7 9988 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9989
df7492f9
KH
9990 category = coding_category_big5;
9991 Vbig5_coding_system = name;
9992 }
9993 else if (EQ (coding_type, Qraw_text))
c28a9453 9994 {
584948ac
KH
9995 category = coding_category_raw_text;
9996 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9997 }
df7492f9 9998 else if (EQ (coding_type, Qutf_8))
4ed46869 9999 {
a470d443
KH
10000 Lisp_Object bom;
10001
584948ac 10002 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
10003
10004 if (nargs < coding_arg_utf8_max)
10005 goto short_args;
10006
10007 bom = args[coding_arg_utf8_bom];
10008 if (! NILP (bom) && ! EQ (bom, Qt))
10009 {
10010 CHECK_CONS (bom);
10011 val = XCAR (bom);
10012 CHECK_CODING_SYSTEM (val);
10013 val = XCDR (bom);
10014 CHECK_CODING_SYSTEM (val);
10015 }
10016 ASET (attrs, coding_attr_utf_bom, bom);
10017
10018 category = (CONSP (bom) ? coding_category_utf_8_auto
10019 : NILP (bom) ? coding_category_utf_8_nosig
10020 : coding_category_utf_8_sig);
4ed46869 10021 }
df7492f9
KH
10022 else if (EQ (coding_type, Qundecided))
10023 category = coding_category_undecided;
4ed46869 10024 else
df7492f9 10025 error ("Invalid coding system type: %s",
8f924df7 10026 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 10027
df7492f9 10028 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
10029 CODING_ATTR_PLIST (attrs)
10030 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10031 CODING_ATTR_PLIST (attrs)));
35befdaa 10032 CODING_ATTR_PLIST (attrs)
3ed051d4 10033 = Fcons (QCascii_compatible_p,
35befdaa
KH
10034 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10035 CODING_ATTR_PLIST (attrs)));
c4825358 10036
df7492f9
KH
10037 eol_type = args[coding_arg_eol_type];
10038 if (! NILP (eol_type)
10039 && ! EQ (eol_type, Qunix)
10040 && ! EQ (eol_type, Qdos)
10041 && ! EQ (eol_type, Qmac))
10042 error ("Invalid eol-type");
4ed46869 10043
df7492f9 10044 aliases = Fcons (name, Qnil);
4ed46869 10045
df7492f9
KH
10046 if (NILP (eol_type))
10047 {
10048 eol_type = make_subsidiaries (name);
10049 for (i = 0; i < 3; i++)
1397dc18 10050 {
df7492f9
KH
10051 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10052
10053 this_name = AREF (eol_type, i);
10054 this_aliases = Fcons (this_name, Qnil);
10055 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10056 this_spec = Fmake_vector (make_number (3), attrs);
10057 ASET (this_spec, 1, this_aliases);
10058 ASET (this_spec, 2, this_eol_type);
10059 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10060 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
10061 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10062 if (NILP (val))
10063 Vcoding_system_alist
10064 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10065 Vcoding_system_alist);
1397dc18 10066 }
d46c5b12 10067 }
4ed46869 10068
df7492f9
KH
10069 spec_vec = Fmake_vector (make_number (3), attrs);
10070 ASET (spec_vec, 1, aliases);
10071 ASET (spec_vec, 2, eol_type);
48b0f3ae 10072
df7492f9
KH
10073 Fputhash (name, spec_vec, Vcoding_system_hash_table);
10074 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
10075 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10076 if (NILP (val))
10077 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10078 Vcoding_system_alist);
48b0f3ae 10079
df7492f9
KH
10080 {
10081 int id = coding_categories[category].id;
48b0f3ae 10082
df7492f9
KH
10083 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10084 setup_coding_system (name, &coding_categories[category]);
10085 }
48b0f3ae 10086
d46c5b12 10087 return Qnil;
48b0f3ae 10088
df7492f9
KH
10089 short_args:
10090 return Fsignal (Qwrong_number_of_arguments,
10091 Fcons (intern ("define-coding-system-internal"),
10092 make_number (nargs)));
d46c5b12 10093}
4ed46869 10094
d6925f38 10095
a6f87d34
KH
10096DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10097 3, 3, 0,
10098 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
10099 (coding_system, prop, val)
10100 Lisp_Object coding_system, prop, val;
10101{
3dbe7859 10102 Lisp_Object spec, attrs;
a6f87d34
KH
10103
10104 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10105 attrs = AREF (spec, 0);
10106 if (EQ (prop, QCmnemonic))
10107 {
10108 if (! STRINGP (val))
10109 CHECK_CHARACTER (val);
10110 CODING_ATTR_MNEMONIC (attrs) = val;
10111 }
2133e2d1 10112 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
10113 {
10114 if (NILP (val))
10115 val = make_number (' ');
10116 else
10117 CHECK_CHARACTER (val);
10118 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10119 }
10120 else if (EQ (prop, QCdecode_translation_table))
10121 {
10122 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10123 CHECK_SYMBOL (val);
10124 CODING_ATTR_DECODE_TBL (attrs) = val;
10125 }
10126 else if (EQ (prop, QCencode_translation_table))
10127 {
10128 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10129 CHECK_SYMBOL (val);
10130 CODING_ATTR_ENCODE_TBL (attrs) = val;
10131 }
10132 else if (EQ (prop, QCpost_read_conversion))
10133 {
10134 CHECK_SYMBOL (val);
10135 CODING_ATTR_POST_READ (attrs) = val;
10136 }
10137 else if (EQ (prop, QCpre_write_conversion))
10138 {
10139 CHECK_SYMBOL (val);
10140 CODING_ATTR_PRE_WRITE (attrs) = val;
10141 }
35befdaa
KH
10142 else if (EQ (prop, QCascii_compatible_p))
10143 {
10144 CODING_ATTR_ASCII_COMPAT (attrs) = val;
10145 }
a6f87d34
KH
10146
10147 CODING_ATTR_PLIST (attrs)
10148 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10149 return val;
10150}
10151
10152
df7492f9
KH
10153DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10154 Sdefine_coding_system_alias, 2, 2, 0,
10155 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
10156 (alias, coding_system)
10157 Lisp_Object alias, coding_system;
66cfb530 10158{
583f71ca 10159 Lisp_Object spec, aliases, eol_type, val;
4ed46869 10160
df7492f9
KH
10161 CHECK_SYMBOL (alias);
10162 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10163 aliases = AREF (spec, 1);
d4a1d553 10164 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
10165 element is a base coding system. Append ALIAS at the tail of the
10166 list. */
df7492f9
KH
10167 while (!NILP (XCDR (aliases)))
10168 aliases = XCDR (aliases);
8f924df7 10169 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 10170
df7492f9
KH
10171 eol_type = AREF (spec, 2);
10172 if (VECTORP (eol_type))
4ed46869 10173 {
df7492f9
KH
10174 Lisp_Object subsidiaries;
10175 int i;
4ed46869 10176
df7492f9
KH
10177 subsidiaries = make_subsidiaries (alias);
10178 for (i = 0; i < 3; i++)
10179 Fdefine_coding_system_alias (AREF (subsidiaries, i),
10180 AREF (eol_type, i));
4ed46869 10181 }
df7492f9
KH
10182
10183 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 10184 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
10185 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10186 if (NILP (val))
10187 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10188 Vcoding_system_alist);
66cfb530 10189
4ed46869
KH
10190 return Qnil;
10191}
10192
df7492f9
KH
10193DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10194 1, 1, 0,
10195 doc: /* Return the base of CODING-SYSTEM.
da7db224 10196Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
10197 (coding_system)
10198 Lisp_Object coding_system;
d46c5b12 10199{
df7492f9 10200 Lisp_Object spec, attrs;
d46c5b12 10201
df7492f9
KH
10202 if (NILP (coding_system))
10203 return (Qno_conversion);
10204 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10205 attrs = AREF (spec, 0);
10206 return CODING_ATTR_BASE_NAME (attrs);
10207}
1397dc18 10208
df7492f9
KH
10209DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10210 1, 1, 0,
10211 doc: "Return the property list of CODING-SYSTEM.")
10212 (coding_system)
10213 Lisp_Object coding_system;
10214{
10215 Lisp_Object spec, attrs;
1397dc18 10216
df7492f9
KH
10217 if (NILP (coding_system))
10218 coding_system = Qno_conversion;
10219 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10220 attrs = AREF (spec, 0);
10221 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
10222}
10223
df7492f9
KH
10224
10225DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10226 1, 1, 0,
da7db224 10227 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
10228 (coding_system)
10229 Lisp_Object coding_system;
66cfb530 10230{
df7492f9 10231 Lisp_Object spec;
84d60297 10232
df7492f9
KH
10233 if (NILP (coding_system))
10234 coding_system = Qno_conversion;
10235 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 10236 return AREF (spec, 1);
df7492f9 10237}
66cfb530 10238
df7492f9
KH
10239DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10240 Scoding_system_eol_type, 1, 1, 0,
10241 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 10242An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 10243
df7492f9
KH
10244Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10245and CR respectively.
66cfb530 10246
df7492f9
KH
10247A vector value indicates that a format of end-of-line should be
10248detected automatically. Nth element of the vector is the subsidiary
10249coding system whose eol-type is N. */)
6b89e3aa
KH
10250 (coding_system)
10251 Lisp_Object coding_system;
10252{
df7492f9
KH
10253 Lisp_Object spec, eol_type;
10254 int n;
6b89e3aa 10255
df7492f9
KH
10256 if (NILP (coding_system))
10257 coding_system = Qno_conversion;
10258 if (! CODING_SYSTEM_P (coding_system))
10259 return Qnil;
10260 spec = CODING_SYSTEM_SPEC (coding_system);
10261 eol_type = AREF (spec, 2);
10262 if (VECTORP (eol_type))
10263 return Fcopy_sequence (eol_type);
10264 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10265 return make_number (n);
6b89e3aa
KH
10266}
10267
4ed46869
KH
10268#endif /* emacs */
10269
10270\f
1397dc18 10271/*** 9. Post-amble ***/
4ed46869 10272
dfcf069d 10273void
971de7fb 10274init_coding_once (void)
4ed46869
KH
10275{
10276 int i;
10277
df7492f9
KH
10278 for (i = 0; i < coding_category_max; i++)
10279 {
10280 coding_categories[i].id = -1;
10281 coding_priorities[i] = i;
10282 }
4ed46869
KH
10283
10284 /* ISO2022 specific initialize routine. */
10285 for (i = 0; i < 0x20; i++)
b73bfc1c 10286 iso_code_class[i] = ISO_control_0;
4ed46869
KH
10287 for (i = 0x21; i < 0x7F; i++)
10288 iso_code_class[i] = ISO_graphic_plane_0;
10289 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 10290 iso_code_class[i] = ISO_control_1;
4ed46869
KH
10291 for (i = 0xA1; i < 0xFF; i++)
10292 iso_code_class[i] = ISO_graphic_plane_1;
10293 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10294 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
10295 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10296 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10297 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10298 iso_code_class[ISO_CODE_ESC] = ISO_escape;
10299 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10300 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10301 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10302
df7492f9
KH
10303 for (i = 0; i < 256; i++)
10304 {
10305 emacs_mule_bytes[i] = 1;
10306 }
7c78e542
KH
10307 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10308 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10309 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10310 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
10311}
10312
10313#ifdef emacs
10314
dfcf069d 10315void
971de7fb 10316syms_of_coding (void)
e0e989f6 10317{
df7492f9 10318 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
10319 {
10320 Lisp_Object args[2];
10321 args[0] = QCtest;
10322 args[1] = Qeq;
10323 Vcoding_system_hash_table = Fmake_hash_table (2, args);
10324 }
df7492f9
KH
10325
10326 staticpro (&Vsjis_coding_system);
10327 Vsjis_coding_system = Qnil;
e0e989f6 10328
df7492f9
KH
10329 staticpro (&Vbig5_coding_system);
10330 Vbig5_coding_system = Qnil;
10331
24a73b0a
KH
10332 staticpro (&Vcode_conversion_reused_workbuf);
10333 Vcode_conversion_reused_workbuf = Qnil;
10334
10335 staticpro (&Vcode_conversion_workbuf_name);
d67b4f80 10336 Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
e0e989f6 10337
24a73b0a 10338 reused_workbuf_in_use = 0;
df7492f9
KH
10339
10340 DEFSYM (Qcharset, "charset");
10341 DEFSYM (Qtarget_idx, "target-idx");
10342 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
10343 Fset (Qcoding_system_history, Qnil);
10344
9ce27fde 10345 /* Target FILENAME is the first argument. */
e0e989f6 10346 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10347 /* Target FILENAME is the third argument. */
e0e989f6
KH
10348 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10349
df7492f9 10350 DEFSYM (Qcall_process, "call-process");
9ce27fde 10351 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10352 Fput (Qcall_process, Qtarget_idx, make_number (0));
10353
df7492f9 10354 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10355 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10356 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10357
df7492f9 10358 DEFSYM (Qstart_process, "start-process");
9ce27fde 10359 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10360 Fput (Qstart_process, Qtarget_idx, make_number (2));
10361
df7492f9 10362 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10363 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10364 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10365
df7492f9
KH
10366 DEFSYM (Qcoding_system, "coding-system");
10367 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10368
df7492f9
KH
10369 DEFSYM (Qeol_type, "eol-type");
10370 DEFSYM (Qunix, "unix");
10371 DEFSYM (Qdos, "dos");
4ed46869 10372
df7492f9
KH
10373 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10374 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10375 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10376 DEFSYM (Qdefault_char, "default-char");
10377 DEFSYM (Qundecided, "undecided");
10378 DEFSYM (Qno_conversion, "no-conversion");
10379 DEFSYM (Qraw_text, "raw-text");
4ed46869 10380
df7492f9 10381 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10382
df7492f9 10383 DEFSYM (Qutf_8, "utf-8");
8f924df7 10384 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10385
df7492f9 10386 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10387 DEFSYM (Qbig, "big");
10388 DEFSYM (Qlittle, "little");
27901516 10389
df7492f9
KH
10390 DEFSYM (Qshift_jis, "shift-jis");
10391 DEFSYM (Qbig5, "big5");
4ed46869 10392
df7492f9 10393 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10394
df7492f9 10395 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869 10396 Fput (Qcoding_system_error, Qerror_conditions,
d67b4f80 10397 pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
4ed46869 10398 Fput (Qcoding_system_error, Qerror_message,
d67b4f80 10399 make_pure_c_string ("Invalid coding system"));
4ed46869 10400
05e6f5dc
KH
10401 /* Intern this now in case it isn't already done.
10402 Setting this variable twice is harmless.
10403 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 10404 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
70c22245 10405
df7492f9 10406 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10407 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10408 DEFSYM (Qtranslation_table_id, "translation-table-id");
10409 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10410 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10411
df7492f9 10412 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10413
df7492f9 10414 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10415
01378f49 10416 DEFSYM (QCcategory, ":category");
a6f87d34 10417 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10418 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10419 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10420 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10421 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10422 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10423 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10424
df7492f9
KH
10425 Vcoding_category_table
10426 = Fmake_vector (make_number (coding_category_max), Qnil);
10427 staticpro (&Vcoding_category_table);
10428 /* Followings are target of code detection. */
10429 ASET (Vcoding_category_table, coding_category_iso_7,
d67b4f80 10430 intern_c_string ("coding-category-iso-7"));
df7492f9 10431 ASET (Vcoding_category_table, coding_category_iso_7_tight,
d67b4f80 10432 intern_c_string ("coding-category-iso-7-tight"));
df7492f9 10433 ASET (Vcoding_category_table, coding_category_iso_8_1,
d67b4f80 10434 intern_c_string ("coding-category-iso-8-1"));
df7492f9 10435 ASET (Vcoding_category_table, coding_category_iso_8_2,
d67b4f80 10436 intern_c_string ("coding-category-iso-8-2"));
df7492f9 10437 ASET (Vcoding_category_table, coding_category_iso_7_else,
d67b4f80 10438 intern_c_string ("coding-category-iso-7-else"));
df7492f9 10439 ASET (Vcoding_category_table, coding_category_iso_8_else,
d67b4f80 10440 intern_c_string ("coding-category-iso-8-else"));
a470d443 10441 ASET (Vcoding_category_table, coding_category_utf_8_auto,
d67b4f80 10442 intern_c_string ("coding-category-utf-8-auto"));
a470d443 10443 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
d67b4f80 10444 intern_c_string ("coding-category-utf-8"));
a470d443 10445 ASET (Vcoding_category_table, coding_category_utf_8_sig,
d67b4f80 10446 intern_c_string ("coding-category-utf-8-sig"));
df7492f9 10447 ASET (Vcoding_category_table, coding_category_utf_16_be,
d67b4f80 10448 intern_c_string ("coding-category-utf-16-be"));
ff563fce 10449 ASET (Vcoding_category_table, coding_category_utf_16_auto,
d67b4f80 10450 intern_c_string ("coding-category-utf-16-auto"));
df7492f9 10451 ASET (Vcoding_category_table, coding_category_utf_16_le,
d67b4f80 10452 intern_c_string ("coding-category-utf-16-le"));
df7492f9 10453 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
d67b4f80 10454 intern_c_string ("coding-category-utf-16-be-nosig"));
df7492f9 10455 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
d67b4f80 10456 intern_c_string ("coding-category-utf-16-le-nosig"));
df7492f9 10457 ASET (Vcoding_category_table, coding_category_charset,
d67b4f80 10458 intern_c_string ("coding-category-charset"));
df7492f9 10459 ASET (Vcoding_category_table, coding_category_sjis,
d67b4f80 10460 intern_c_string ("coding-category-sjis"));
df7492f9 10461 ASET (Vcoding_category_table, coding_category_big5,
d67b4f80 10462 intern_c_string ("coding-category-big5"));
df7492f9 10463 ASET (Vcoding_category_table, coding_category_ccl,
d67b4f80 10464 intern_c_string ("coding-category-ccl"));
df7492f9 10465 ASET (Vcoding_category_table, coding_category_emacs_mule,
d67b4f80 10466 intern_c_string ("coding-category-emacs-mule"));
df7492f9
KH
10467 /* Followings are NOT target of code detection. */
10468 ASET (Vcoding_category_table, coding_category_raw_text,
d67b4f80 10469 intern_c_string ("coding-category-raw-text"));
df7492f9 10470 ASET (Vcoding_category_table, coding_category_undecided,
d67b4f80 10471 intern_c_string ("coding-category-undecided"));
ecf488bc 10472
065e3595
KH
10473 DEFSYM (Qinsufficient_source, "insufficient-source");
10474 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10475 DEFSYM (Qinvalid_source, "invalid-source");
10476 DEFSYM (Qinterrupted, "interrupted");
10477 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10478 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10479
4ed46869
KH
10480 defsubr (&Scoding_system_p);
10481 defsubr (&Sread_coding_system);
10482 defsubr (&Sread_non_nil_coding_system);
10483 defsubr (&Scheck_coding_system);
10484 defsubr (&Sdetect_coding_region);
d46c5b12 10485 defsubr (&Sdetect_coding_string);
05e6f5dc 10486 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10487 defsubr (&Sunencodable_char_position);
df7492f9 10488 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10489 defsubr (&Sdecode_coding_region);
10490 defsubr (&Sencode_coding_region);
10491 defsubr (&Sdecode_coding_string);
10492 defsubr (&Sencode_coding_string);
10493 defsubr (&Sdecode_sjis_char);
10494 defsubr (&Sencode_sjis_char);
10495 defsubr (&Sdecode_big5_char);
10496 defsubr (&Sencode_big5_char);
1ba9e4ab 10497 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10498 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10499 defsubr (&Sterminal_coding_system);
1ba9e4ab 10500 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10501 defsubr (&Skeyboard_coding_system);
a5d301df 10502 defsubr (&Sfind_operation_coding_system);
df7492f9 10503 defsubr (&Sset_coding_system_priority);
6b89e3aa 10504 defsubr (&Sdefine_coding_system_internal);
df7492f9 10505 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10506 defsubr (&Scoding_system_put);
df7492f9
KH
10507 defsubr (&Scoding_system_base);
10508 defsubr (&Scoding_system_plist);
10509 defsubr (&Scoding_system_aliases);
10510 defsubr (&Scoding_system_eol_type);
10511 defsubr (&Scoding_system_priority_list);
4ed46869 10512
4608c386 10513 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
10514 doc: /* List of coding systems.
10515
10516Do not alter the value of this variable manually. This variable should be
df7492f9 10517updated by the functions `define-coding-system' and
48b0f3ae 10518`define-coding-system-alias'. */);
4608c386
KH
10519 Vcoding_system_list = Qnil;
10520
10521 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
10522 doc: /* Alist of coding system names.
10523Each element is one element list of coding system name.
446dcd75 10524This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10525
10526Do not alter the value of this variable manually. This variable should be
10527updated by the functions `make-coding-system' and
10528`define-coding-system-alias'. */);
4608c386
KH
10529 Vcoding_system_alist = Qnil;
10530
4ed46869 10531 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
10532 doc: /* List of coding-categories (symbols) ordered by priority.
10533
10534On detecting a coding system, Emacs tries code detection algorithms
10535associated with each coding-category one by one in this order. When
10536one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10537system bound to the corresponding coding-category is selected.
10538
42205607 10539Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
10540 {
10541 int i;
10542
10543 Vcoding_category_list = Qnil;
df7492f9 10544 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10545 Vcoding_category_list
d46c5b12
KH
10546 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10547 Vcoding_category_list);
4ed46869
KH
10548 }
10549
10550 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
10551 doc: /* Specify the coding system for read operations.
10552It is useful to bind this variable with `let', but do not set it globally.
10553If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10554If not, an appropriate element is used from one of the coding system alists.
10555There are three such tables: `file-coding-system-alist',
48b0f3ae 10556`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10557 Vcoding_system_for_read = Qnil;
10558
10559 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
10560 doc: /* Specify the coding system for write operations.
10561Programs bind this variable with `let', but you should not set it globally.
10562If the value is a coding system, it is used for encoding of output,
10563when writing it to a file and when sending it to a file or subprocess.
10564
10565If this does not specify a coding system, an appropriate element
446dcd75
JB
10566is used from one of the coding system alists.
10567There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10568`process-coding-system-alist', and `network-coding-system-alist'.
10569For output to files, if the above procedure does not specify a coding system,
10570the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10571 Vcoding_system_for_write = Qnil;
10572
10573 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
10574 doc: /*
10575Coding system used in the latest file or process I/O. */);
4ed46869
KH
10576 Vlast_coding_system_used = Qnil;
10577
065e3595
KH
10578 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10579 doc: /*
10580Error status of the last code conversion.
10581
10582When an error was detected in the last code conversion, this variable
10583is set to one of the following symbols.
10584 `insufficient-source'
10585 `inconsistent-eol'
10586 `invalid-source'
10587 `interrupted'
10588 `insufficient-memory'
10589When no error was detected, the value doesn't change. So, to check
10590the error status of a code conversion by this variable, you must
10591explicitly set this variable to nil before performing code
10592conversion. */);
10593 Vlast_code_conversion_error = Qnil;
10594
9ce27fde 10595 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
10596 doc: /*
10597*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10598See info node `Coding Systems' and info node `Text and Binary' concerning
10599such conversion. */);
9ce27fde
KH
10600 inhibit_eol_conversion = 0;
10601
ed29121d 10602 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
10603 doc: /*
10604Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10605Bind it to t if the process output is to be treated as if it were a file
10606read from some filesystem. */);
ed29121d
EZ
10607 inherit_process_coding_system = 0;
10608
02ba4723 10609 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
10610 doc: /*
10611Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10612The format is ((PATTERN . VAL) ...),
10613where PATTERN is a regular expression matching a file name,
10614VAL is a coding system, a cons of coding systems, or a function symbol.
10615If VAL is a coding system, it is used for both decoding and encoding
10616the file contents.
10617If VAL is a cons of coding systems, the car part is used for decoding,
10618and the cdr part is used for encoding.
10619If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10620or a cons of coding systems which are used as above. The function is
10621called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10622`find-operation-coding-system' was called. If the function can't decide
10623a coding system, it can return `undecided' so that the normal
10624code-detection is performed.
48b0f3ae
PJ
10625
10626See also the function `find-operation-coding-system'
10627and the variable `auto-coding-alist'. */);
02ba4723
KH
10628 Vfile_coding_system_alist = Qnil;
10629
10630 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
10631 doc: /*
10632Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10633The format is ((PATTERN . VAL) ...),
10634where PATTERN is a regular expression matching a program name,
10635VAL is a coding system, a cons of coding systems, or a function symbol.
10636If VAL is a coding system, it is used for both decoding what received
10637from the program and encoding what sent to the program.
10638If VAL is a cons of coding systems, the car part is used for decoding,
10639and the cdr part is used for encoding.
10640If VAL is a function symbol, the function must return a coding system
10641or a cons of coding systems which are used as above.
10642
10643See also the function `find-operation-coding-system'. */);
02ba4723
KH
10644 Vprocess_coding_system_alist = Qnil;
10645
10646 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
10647 doc: /*
10648Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10649The format is ((PATTERN . VAL) ...),
10650where PATTERN is a regular expression matching a network service name
10651or is a port number to connect to,
10652VAL is a coding system, a cons of coding systems, or a function symbol.
10653If VAL is a coding system, it is used for both decoding what received
10654from the network stream and encoding what sent to the network stream.
10655If VAL is a cons of coding systems, the car part is used for decoding,
10656and the cdr part is used for encoding.
10657If VAL is a function symbol, the function must return a coding system
10658or a cons of coding systems which are used as above.
10659
10660See also the function `find-operation-coding-system'. */);
02ba4723 10661 Vnetwork_coding_system_alist = Qnil;
4ed46869 10662
68c45bf0 10663 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
10664 doc: /* Coding system to use with system messages.
10665Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10666 Vlocale_coding_system = Qnil;
10667
005f0d35 10668 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 10669 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
10670 doc: /*
10671*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
d67b4f80 10672 eol_mnemonic_unix = make_pure_c_string (":");
4ed46869 10673
7722baf9 10674 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
10675 doc: /*
10676*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
d67b4f80 10677 eol_mnemonic_dos = make_pure_c_string ("\\");
4ed46869 10678
7722baf9 10679 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
10680 doc: /*
10681*String displayed in mode line for MAC-like (CR) end-of-line format. */);
d67b4f80 10682 eol_mnemonic_mac = make_pure_c_string ("/");
4ed46869 10683
7722baf9 10684 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
10685 doc: /*
10686*String displayed in mode line when end-of-line format is not yet determined. */);
d67b4f80 10687 eol_mnemonic_undecided = make_pure_c_string (":");
4ed46869 10688
84fbb8a0 10689 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
10690 doc: /*
10691*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10692 Venable_character_translation = Qt;
bdd9fb48 10693
f967223b 10694 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
10695 &Vstandard_translation_table_for_decode,
10696 doc: /* Table for translating characters while decoding. */);
f967223b 10697 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10698
f967223b 10699 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
10700 &Vstandard_translation_table_for_encode,
10701 doc: /* Table for translating characters while encoding. */);
f967223b 10702 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10703
df7492f9 10704 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
10705 doc: /* Alist of charsets vs revision numbers.
10706While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10707designate it with the escape sequence identifying revision (cdr part
10708of the element). */);
10709 Vcharset_revision_table = Qnil;
02ba4723
KH
10710
10711 DEFVAR_LISP ("default-process-coding-system",
10712 &Vdefault_process_coding_system,
48b0f3ae
PJ
10713 doc: /* Cons of coding systems used for process I/O by default.
10714The car part is used for decoding a process output,
10715the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10716 Vdefault_process_coding_system = Qnil;
c4825358 10717
3f003981 10718 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
10719 doc: /*
10720Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10721This is a vector of length 256.
10722If Nth element is non-nil, the existence of code N in a file
10723\(or output of subprocess) doesn't prevent it to be detected as
10724a coding system of ISO 2022 variant which has a flag
10725`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10726or reading output of a subprocess.
446dcd75 10727Only 128th through 159th elements have a meaning. */);
3f003981 10728 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10729
10730 DEFVAR_LISP ("select-safe-coding-system-function",
10731 &Vselect_safe_coding_system_function,
df7492f9
KH
10732 doc: /*
10733Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10734
10735If set, this function is called to force a user to select a proper
10736coding system which can encode the text in the case that a default
fdecf907
GM
10737coding system used in each operation can't encode the text. The
10738function should take care that the buffer is not modified while
10739the coding system is being selected.
48b0f3ae
PJ
10740
10741The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10742 Vselect_safe_coding_system_function = Qnil;
10743
5d5bf4d8
KH
10744 DEFVAR_BOOL ("coding-system-require-warning",
10745 &coding_system_require_warning,
10746 doc: /* Internal use only.
6b89e3aa
KH
10747If non-nil, on writing a file, `select-safe-coding-system-function' is
10748called even if `coding-system-for-write' is non-nil. The command
10749`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10750 coding_system_require_warning = 0;
10751
10752
22ab2303 10753 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 10754 &inhibit_iso_escape_detection,
df7492f9 10755 doc: /*
97b1b294 10756If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10757
97b1b294
EZ
10758When Emacs reads text, it tries to detect how the text is encoded.
10759This code detection is sensitive to escape sequences. If Emacs sees
10760a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10761of the ISO2022 encodings, and decodes text by the corresponding coding
10762system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10763
10764However, there may be a case that you want to read escape sequences in
10765a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10766Then the code detection will ignore any escape sequences, and no text is
10767detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10768escape sequences become visible in a buffer.
10769
10770The default value is nil, and it is strongly recommended not to change
10771it. That is because many Emacs Lisp source files that contain
10772non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10773in Emacs's distribution, and they won't be decoded correctly on
10774reading if you suppress escape sequence detection.
10775
10776The other way to read escape sequences in a file without decoding is
97b1b294 10777to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10778escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10779 inhibit_iso_escape_detection = 0;
002fdb44 10780
97b1b294
EZ
10781 DEFVAR_BOOL ("inhibit-null-byte-detection",
10782 &inhibit_null_byte_detection,
10783 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10784By default, Emacs treats it as binary data, and does not attempt to
10785decode it. The effect is as if you specified `no-conversion' for
10786reading that text.
10787
10788Set this to non-nil when a regular text happens to include null bytes.
10789Examples are Index nodes of Info files and null-byte delimited output
10790from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10791decode text as usual. */);
10792 inhibit_null_byte_detection = 0;
10793
002fdb44 10794 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1 10795 doc: /* Char table for translating self-inserting characters.
446dcd75 10796This is applied to the result of input methods, not their input.
8434d0b8
EZ
10797See also `keyboard-translate-table'.
10798
10799Use of this variable for character code unification was rendered
10800obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10801internal character representation. */);
002fdb44 10802 Vtranslation_table_for_input = Qnil;
8f924df7 10803
2c78b7e1
KH
10804 {
10805 Lisp_Object args[coding_arg_max];
8f924df7 10806 Lisp_Object plist[16];
2c78b7e1
KH
10807 int i;
10808
10809 for (i = 0; i < coding_arg_max; i++)
10810 args[i] = Qnil;
10811
d67b4f80 10812 plist[0] = intern_c_string (":name");
2c78b7e1 10813 plist[1] = args[coding_arg_name] = Qno_conversion;
d67b4f80 10814 plist[2] = intern_c_string (":mnemonic");
2c78b7e1 10815 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
d67b4f80 10816 plist[4] = intern_c_string (":coding-type");
2c78b7e1 10817 plist[5] = args[coding_arg_coding_type] = Qraw_text;
d67b4f80 10818 plist[6] = intern_c_string (":ascii-compatible-p");
2c78b7e1 10819 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
d67b4f80 10820 plist[8] = intern_c_string (":default-char");
2c78b7e1 10821 plist[9] = args[coding_arg_default_char] = make_number (0);
d67b4f80 10822 plist[10] = intern_c_string (":for-unibyte");
8f924df7 10823 plist[11] = args[coding_arg_for_unibyte] = Qt;
d67b4f80
DN
10824 plist[12] = intern_c_string (":docstring");
10825 plist[13] = make_pure_c_string ("Do no conversion.\n\
2c78b7e1
KH
10826\n\
10827When you visit a file with this coding, the file is read into a\n\
10828unibyte buffer as is, thus each byte of a file is treated as a\n\
10829character.");
d67b4f80 10830 plist[14] = intern_c_string (":eol-type");
8f924df7
KH
10831 plist[15] = args[coding_arg_eol_type] = Qunix;
10832 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10833 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10834
10835 plist[1] = args[coding_arg_name] = Qundecided;
10836 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10837 plist[5] = args[coding_arg_coding_type] = Qundecided;
10838 /* This is already set.
35befdaa 10839 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
d67b4f80 10840 plist[8] = intern_c_string (":charset-list");
ae6f73fa
KH
10841 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10842 plist[11] = args[coding_arg_for_unibyte] = Qnil;
d67b4f80 10843 plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
ae6f73fa
KH
10844 plist[15] = args[coding_arg_eol_type] = Qnil;
10845 args[coding_arg_plist] = Flist (16, plist);
10846 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10847 }
10848
2c78b7e1 10849 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10850
10851 {
10852 int i;
10853
10854 for (i = 0; i < coding_category_max; i++)
10855 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10856 }
fcbcfb64
KH
10857#if defined (MSDOS) || defined (WINDOWSNT)
10858 system_eol_type = Qdos;
10859#else
10860 system_eol_type = Qunix;
10861#endif
10862 staticpro (&system_eol_type);
4ed46869
KH
10863}
10864
68c45bf0 10865char *
971de7fb 10866emacs_strerror (int error_number)
68c45bf0
PE
10867{
10868 char *str;
10869
ca9c0567 10870 synchronize_system_messages_locale ();
68c45bf0
PE
10871 str = strerror (error_number);
10872
10873 if (! NILP (Vlocale_coding_system))
10874 {
10875 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10876 Vlocale_coding_system,
10877 0);
d5db4077 10878 str = (char *) SDATA (dec);
68c45bf0
PE
10879 }
10880
10881 return str;
10882}
10883
4ed46869 10884#endif /* emacs */
9ffd559c
KH
10885
10886/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10887 (do not change this comment) */