Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
49f70d46 3 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 5 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27/*** TABLE OF CONTENTS ***
28
b73bfc1c 29 0. General comments
4ed46869 30 1. Preamble
df7492f9
KH
31 2. Emacs' internal format (emacs-utf-8) handlers
32 3. UTF-8 handlers
33 4. UTF-16 handlers
34 5. Charset-base coding systems handlers
35 6. emacs-mule (old Emacs' internal format) handlers
36 7. ISO2022 handlers
37 8. Shift-JIS and BIG5 handlers
38 9. CCL handlers
39 10. C library functions
40 11. Emacs Lisp library functions
41 12. Postamble
4ed46869
KH
42
43*/
44
df7492f9 45/*** 0. General comments ***
b73bfc1c
KH
46
47
df7492f9 48CODING SYSTEM
4ed46869 49
5bad0796
DL
50 A coding system is an object for an encoding mechanism that contains
51 information about how to convert byte sequences to character
e19c3639
KH
52 sequences and vice versa. When we say "decode", it means converting
53 a byte sequence of a specific coding system into a character
54 sequence that is represented by Emacs' internal coding system
55 `emacs-utf-8', and when we say "encode", it means converting a
56 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 57 coding system.
4ed46869 58
e19c3639
KH
59 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
60 C level, a coding system is represented by a vector of attributes
5bad0796 61 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
62 coding system symbol to attributes vector is done by looking up
63 Vcharset_hash_table by the symbol.
4ed46869 64
e19c3639 65 Coding systems are classified into the following types depending on
5bad0796 66 the encoding mechanism. Here's a brief description of the types.
4ed46869 67
df7492f9
KH
68 o UTF-8
69
70 o UTF-16
71
72 o Charset-base coding system
73
74 A coding system defined by one or more (coded) character sets.
5bad0796 75 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
76 character set.
77
5bad0796 78 o Old Emacs internal format (emacs-mule)
df7492f9 79
5bad0796 80 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 81
df7492f9 82 o ISO2022-base coding system
4ed46869
KH
83
84 The most famous coding system for multiple character sets. X's
df7492f9
KH
85 Compound Text, various EUCs (Extended Unix Code), and coding systems
86 used in the Internet communication such as ISO-2022-JP are all
87 variants of ISO2022.
4ed46869 88
df7492f9 89 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 90
4ed46869
KH
91 A coding system to encode character sets: ASCII, JISX0201, and
92 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 93 section 8.
4ed46869 94
df7492f9 95 o BIG5
4ed46869 96
df7492f9 97 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 98 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
99 described in section 8. In this file, when we write "big5" (all
100 lowercase), we mean the coding system, and when we write "Big5"
101 (capitalized), we mean the character set.
4ed46869 102
df7492f9 103 o CCL
27901516 104
5bad0796 105 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
106 not listed above, he can supply a decoder and an encoder for it in
107 CCL (Code Conversion Language) programs. Emacs executes the CCL
108 program while decoding/encoding.
27901516 109
df7492f9 110 o Raw-text
4ed46869 111
5a936b46 112 A coding system for text containing raw eight-bit data. Emacs
5bad0796 113 treats each byte of source text as a character (except for
df7492f9 114 end-of-line conversion).
4ed46869 115
df7492f9
KH
116 o No-conversion
117
118 Like raw text, but don't do end-of-line conversion.
4ed46869 119
4ed46869 120
df7492f9 121END-OF-LINE FORMAT
4ed46869 122
5bad0796 123 How text end-of-line is encoded depends on operating system. For
df7492f9 124 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 125 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
126 `line-feed' codes. MacOS's format is usually one byte of
127 `carriage-return'.
4ed46869 128
cfb43547 129 Since text character encoding and end-of-line encoding are
df7492f9
KH
130 independent, any coding system described above can take any format
131 of end-of-line (except for no-conversion).
4ed46869 132
e19c3639
KH
133STRUCT CODING_SYSTEM
134
135 Before using a coding system for code conversion (i.e. decoding and
136 encoding), we setup a structure of type `struct coding_system'.
137 This structure keeps various information about a specific code
5bad0796 138 conversion (e.g. the location of source and destination data).
4ed46869
KH
139
140*/
141
df7492f9
KH
142/* COMMON MACROS */
143
144
4ed46869
KH
145/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146
df7492f9 147 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
148 CODING conforms to the format of XXX, and update the members of
149 DETECT_INFO.
df7492f9 150
ff0dacd7 151 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
152
153 Below is the template of these functions. */
154
4ed46869 155#if 0
df7492f9 156static int
ff0dacd7 157detect_coding_XXX (coding, detect_info)
df7492f9 158 struct coding_system *coding;
ff0dacd7 159 struct coding_detection_info *detect_info;
4ed46869 160{
f1d34bca
MB
161 const unsigned char *src = coding->source;
162 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 163 int multibytep = coding->src_multibyte;
ff0dacd7 164 int consumed_chars = 0;
df7492f9
KH
165 int found = 0;
166 ...;
167
168 while (1)
169 {
ad1746f5 170 /* Get one byte from the source. If the source is exhausted, jump
df7492f9
KH
171 to no_more_source:. */
172 ONE_MORE_BYTE (c);
ff0dacd7
KH
173
174 if (! __C_conforms_to_XXX___ (c))
175 break;
176 if (! __C_strongly_suggests_XXX__ (c))
177 found = CATEGORY_MASK_XXX;
df7492f9 178 }
ff0dacd7
KH
179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 181 return 0;
ff0dacd7 182
df7492f9 183 no_more_source:
ad1746f5 184 /* The source exhausted successfully. */
ff0dacd7 185 detect_info->found |= found;
df7492f9 186 return 1;
4ed46869
KH
187}
188#endif
189
190/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191
df7492f9
KH
192 These functions decode a byte sequence specified as a source by
193 CODING. The resulting multibyte text goes to a place pointed to by
194 CODING->charbuf, the length of which should not exceed
195 CODING->charbuf_size;
d46c5b12 196
df7492f9
KH
197 These functions set the information of original and decoded texts in
198 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
199 They also set CODING->result to one of CODING_RESULT_XXX indicating
200 how the decoding is finished.
d46c5b12 201
df7492f9 202 Below is the template of these functions. */
d46c5b12 203
4ed46869 204#if 0
b73bfc1c 205static void
df7492f9 206decode_coding_XXXX (coding)
4ed46869 207 struct coding_system *coding;
4ed46869 208{
f1d34bca
MB
209 const unsigned char *src = coding->source + coding->consumed;
210 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
211 /* SRC_BASE remembers the start position in source in each loop.
212 The loop will be exited when there's not enough source code, or
213 when there's no room in CHARBUF for a decoded character. */
f1d34bca 214 const unsigned char *src_base;
df7492f9 215 /* A buffer to produce decoded characters. */
69a80ea3
KH
216 int *charbuf = coding->charbuf + coding->charbuf_used;
217 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
218 int multibytep = coding->src_multibyte;
219
220 while (1)
221 {
222 src_base = src;
223 if (charbuf < charbuf_end)
224 /* No more room to produce a decoded character. */
225 break;
226 ONE_MORE_BYTE (c);
227 /* Decode it. */
228 }
229
230 no_more_source:
231 if (src_base < src_end
232 && coding->mode & CODING_MODE_LAST_BLOCK)
233 /* If the source ends by partial bytes to construct a character,
234 treat them as eight-bit raw data. */
235 while (src_base < src_end && charbuf < charbuf_end)
236 *charbuf++ = *src_base++;
237 /* Remember how many bytes and characters we consumed. If the
238 source is multibyte, the bytes and chars are not identical. */
239 coding->consumed = coding->consumed_char = src_base - coding->source;
240 /* Remember how many characters we produced. */
241 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
242}
243#endif
244
245/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246
df7492f9
KH
247 These functions encode SRC_BYTES length text at SOURCE of Emacs'
248 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
249 goes to a place pointed to by DESTINATION, the length of which
250 should not exceed DST_BYTES.
d46c5b12 251
df7492f9
KH
252 These functions set the information of original and encoded texts in
253 the members produced, produced_char, consumed, and consumed_char of
254 the structure *CODING. They also set the member result to one of
255 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 256
df7492f9
KH
257 DST_BYTES zero means that source area and destination area are
258 overlapped, which means that we can produce a encoded text until it
259 reaches at the head of not-yet-encoded source text.
d46c5b12 260
df7492f9 261 Below is a template of these functions. */
4ed46869 262#if 0
b73bfc1c 263static void
df7492f9 264encode_coding_XXX (coding)
4ed46869 265 struct coding_system *coding;
4ed46869 266{
df7492f9
KH
267 int multibytep = coding->dst_multibyte;
268 int *charbuf = coding->charbuf;
269 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
270 unsigned char *dst = coding->destination + coding->produced;
271 unsigned char *dst_end = coding->destination + coding->dst_bytes;
272 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
273 int produced_chars = 0;
274
275 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
276 {
277 int c = *charbuf;
278 /* Encode C into DST, and increment DST. */
279 }
280 label_no_more_destination:
281 /* How many chars and bytes we produced. */
282 coding->produced_char += produced_chars;
283 coding->produced = dst - coding->destination;
4ed46869
KH
284}
285#endif
286
4ed46869
KH
287\f
288/*** 1. Preamble ***/
289
68c45bf0 290#include <config.h>
4ed46869 291#include <stdio.h>
d7306fe6 292#include <setjmp.h>
4ed46869 293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
b8299c66
KL
302#include "frame.h"
303#include "termhooks.h"
4ed46869 304
df7492f9 305Lisp_Object Vcoding_system_hash_table;
4ed46869 306
df7492f9 307Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
308Lisp_Object Qunix, Qdos;
309extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
310Lisp_Object Qbuffer_file_coding_system;
311Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 312Lisp_Object Qdefault_char;
27901516 313Lisp_Object Qno_conversion, Qundecided;
df7492f9 314Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 315Lisp_Object Qbig, Qlittle;
bb0115a2 316Lisp_Object Qcoding_system_history;
1397dc18 317Lisp_Object Qvalid_codes;
2133e2d1 318Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
a6f87d34
KH
319Lisp_Object QCdecode_translation_table, QCencode_translation_table;
320Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 321Lisp_Object QCascii_compatible_p;
4ed46869
KH
322
323extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 324Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
325Lisp_Object Qstart_process, Qopen_network_stream;
326Lisp_Object Qtarget_idx;
327
065e3595
KH
328Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
329Lisp_Object Qinterrupted, Qinsufficient_memory;
330
c7183fb8
GM
331extern Lisp_Object Qcompletion_ignore_case;
332
44e8490d
KH
333/* If a symbol has this property, evaluate the value to define the
334 symbol as a coding system. */
335static Lisp_Object Qcoding_system_define_form;
336
5d5bf4d8
KH
337int coding_system_require_warning;
338
d46c5b12
KH
339Lisp_Object Vselect_safe_coding_system_function;
340
7722baf9
EZ
341/* Mnemonic string for each format of end-of-line. */
342Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
343/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 344 decided. */
7722baf9 345Lisp_Object eol_mnemonic_undecided;
4ed46869 346
fcbcfb64
KH
347/* Format of end-of-line decided by system. This is Qunix on
348 Unix and Mac, Qdos on DOS/Windows.
349 This has an effect only for external encoding (i.e. for output to
350 file and process), not for in-buffer or Lisp string encoding. */
351static Lisp_Object system_eol_type;
352
4ed46869
KH
353#ifdef emacs
354
4608c386
KH
355Lisp_Object Vcoding_system_list, Vcoding_system_alist;
356
357Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 358
d46c5b12
KH
359/* Coding system emacs-mule and raw-text are for converting only
360 end-of-line format. */
361Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 362Lisp_Object Qutf_8_emacs;
ecf488bc 363
4ed46869
KH
364/* Coding-systems are handed between Emacs Lisp programs and C internal
365 routines by the following three variables. */
366/* Coding-system for reading files and receiving data from process. */
367Lisp_Object Vcoding_system_for_read;
368/* Coding-system for writing files and sending data to process. */
369Lisp_Object Vcoding_system_for_write;
370/* Coding-system actually used in the latest I/O. */
371Lisp_Object Vlast_coding_system_used;
065e3595
KH
372/* Set to non-nil when an error is detected while code conversion. */
373Lisp_Object Vlast_code_conversion_error;
c4825358 374/* A vector of length 256 which contains information about special
94487c4e 375 Latin codes (especially for dealing with Microsoft codes). */
3f003981 376Lisp_Object Vlatin_extra_code_table;
c4825358 377
9ce27fde
KH
378/* Flag to inhibit code conversion of end-of-line format. */
379int inhibit_eol_conversion;
380
74383408
KH
381/* Flag to inhibit ISO2022 escape sequence detection. */
382int inhibit_iso_escape_detection;
383
97b1b294
EZ
384/* Flag to inhibit detection of binary files through null bytes. */
385int inhibit_null_byte_detection;
386
ed29121d
EZ
387/* Flag to make buffer-file-coding-system inherit from process-coding. */
388int inherit_process_coding_system;
389
c4825358
KH
390/* Coding system to be used to encode text for terminal display when
391 terminal coding system is nil. */
392struct coding_system safe_terminal_coding;
393
02ba4723
KH
394Lisp_Object Vfile_coding_system_alist;
395Lisp_Object Vprocess_coding_system_alist;
396Lisp_Object Vnetwork_coding_system_alist;
4ed46869 397
68c45bf0
PE
398Lisp_Object Vlocale_coding_system;
399
4ed46869
KH
400#endif /* emacs */
401
f967223b
KH
402/* Flag to tell if we look up translation table on character code
403 conversion. */
84fbb8a0 404Lisp_Object Venable_character_translation;
f967223b
KH
405/* Standard translation table to look up on decoding (reading). */
406Lisp_Object Vstandard_translation_table_for_decode;
407/* Standard translation table to look up on encoding (writing). */
408Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 409
f967223b
KH
410Lisp_Object Qtranslation_table;
411Lisp_Object Qtranslation_table_id;
412Lisp_Object Qtranslation_table_for_decode;
413Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
414
415/* Alist of charsets vs revision number. */
df7492f9 416static Lisp_Object Vcharset_revision_table;
4ed46869 417
02ba4723
KH
418/* Default coding systems used for process I/O. */
419Lisp_Object Vdefault_process_coding_system;
420
002fdb44
DL
421/* Char table for translating Quail and self-inserting input. */
422Lisp_Object Vtranslation_table_for_input;
423
df7492f9
KH
424/* Two special coding systems. */
425Lisp_Object Vsjis_coding_system;
426Lisp_Object Vbig5_coding_system;
427
df7492f9
KH
428/* ISO2022 section */
429
430#define CODING_ISO_INITIAL(coding, reg) \
431 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
432 coding_attr_iso_initial), \
433 reg)))
434
435
1b3b981b
AS
436#define CODING_ISO_REQUEST(coding, charset_id) \
437 (((charset_id) <= (coding)->max_charset_id \
438 ? ((coding)->safe_charsets[charset_id] != 255 \
439 ? (coding)->safe_charsets[charset_id] \
440 : -1) \
df7492f9
KH
441 : -1))
442
443
444#define CODING_ISO_FLAGS(coding) \
445 ((coding)->spec.iso_2022.flags)
446#define CODING_ISO_DESIGNATION(coding, reg) \
447 ((coding)->spec.iso_2022.current_designation[reg])
448#define CODING_ISO_INVOCATION(coding, plane) \
449 ((coding)->spec.iso_2022.current_invocation[plane])
450#define CODING_ISO_SINGLE_SHIFTING(coding) \
451 ((coding)->spec.iso_2022.single_shifting)
452#define CODING_ISO_BOL(coding) \
453 ((coding)->spec.iso_2022.bol)
454#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
455 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
e951386e
KH
456#define CODING_ISO_CMP_STATUS(coding) \
457 (&(coding)->spec.iso_2022.cmp_status)
458#define CODING_ISO_EXTSEGMENT_LEN(coding) \
459 ((coding)->spec.iso_2022.ctext_extended_segment_len)
460#define CODING_ISO_EMBEDDED_UTF_8(coding) \
461 ((coding)->spec.iso_2022.embedded_utf_8)
df7492f9
KH
462
463/* Control characters of ISO2022. */
464 /* code */ /* function */
465#define ISO_CODE_LF 0x0A /* line-feed */
466#define ISO_CODE_CR 0x0D /* carriage-return */
467#define ISO_CODE_SO 0x0E /* shift-out */
468#define ISO_CODE_SI 0x0F /* shift-in */
469#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
470#define ISO_CODE_ESC 0x1B /* escape */
471#define ISO_CODE_SS2 0x8E /* single-shift-2 */
472#define ISO_CODE_SS3 0x8F /* single-shift-3 */
473#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
474
475/* All code (1-byte) of ISO2022 is classified into one of the
476 followings. */
477enum iso_code_class_type
478 {
479 ISO_control_0, /* Control codes in the range
480 0x00..0x1F and 0x7F, except for the
481 following 5 codes. */
df7492f9
KH
482 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
483 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
484 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
485 ISO_escape, /* ISO_CODE_SO (0x1B) */
486 ISO_control_1, /* Control codes in the range
487 0x80..0x9F, except for the
488 following 3 codes. */
489 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
490 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
491 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
492 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
493 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
494 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
495 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
496 };
05e6f5dc 497
df7492f9
KH
498/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
499 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 500
df7492f9
KH
501/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
502 instead of the correct short-form sequence (e.g. ESC $ A). */
503#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 504
df7492f9
KH
505/* If set, reset graphic planes and registers at end-of-line to the
506 initial state. */
507#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 508
df7492f9
KH
509/* If set, reset graphic planes and registers before any control
510 characters to the initial state. */
511#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 512
df7492f9
KH
513/* If set, encode by 7-bit environment. */
514#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 515
df7492f9
KH
516/* If set, use locking-shift function. */
517#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 518
df7492f9
KH
519/* If set, use single-shift function. Overwrite
520 CODING_ISO_FLAG_LOCKING_SHIFT. */
521#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 522
df7492f9
KH
523/* If set, use designation escape sequence. */
524#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 525
df7492f9
KH
526/* If set, produce revision number sequence. */
527#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 528
df7492f9
KH
529/* If set, produce ISO6429's direction specifying sequence. */
530#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 531
df7492f9
KH
532/* If set, assume designation states are reset at beginning of line on
533 output. */
534#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 535
df7492f9
KH
536/* If set, designation sequence should be placed at beginning of line
537 on output. */
538#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 539
ad1746f5 540/* If set, do not encode unsafe characters on output. */
df7492f9 541#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 542
df7492f9
KH
543/* If set, extra latin codes (128..159) are accepted as a valid code
544 on input. */
545#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 546
df7492f9 547#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 548
df7492f9 549#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 550
bf16eb23 551#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 552
bf16eb23 553#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 554
bf16eb23 555#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 556
df7492f9
KH
557/* A character to be produced on output if encoding of the original
558 character is prohibited by CODING_ISO_FLAG_SAFE. */
559#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 560
a470d443
KH
561/* UTF-8 section */
562#define CODING_UTF_8_BOM(coding) \
563 ((coding)->spec.utf_8_bom)
4ed46869 564
df7492f9
KH
565/* UTF-16 section */
566#define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
4ed46869 568
df7492f9
KH
569#define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
4ed46869 571
df7492f9
KH
572#define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
4ed46869 574
4ed46869 575
df7492f9
KH
576/* CCL section */
577#define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579#define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581#define CODING_CCL_VALIDS(coding) \
8f924df7 582 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 583
5a936b46 584/* Index for each coding category in `coding_categories' */
4ed46869 585
df7492f9
KH
586enum coding_category
587 {
588 coding_category_iso_7,
589 coding_category_iso_7_tight,
590 coding_category_iso_8_1,
591 coding_category_iso_8_2,
592 coding_category_iso_7_else,
593 coding_category_iso_8_else,
a470d443
KH
594 coding_category_utf_8_auto,
595 coding_category_utf_8_nosig,
596 coding_category_utf_8_sig,
df7492f9
KH
597 coding_category_utf_16_auto,
598 coding_category_utf_16_be,
599 coding_category_utf_16_le,
600 coding_category_utf_16_be_nosig,
601 coding_category_utf_16_le_nosig,
602 coding_category_charset,
603 coding_category_sjis,
604 coding_category_big5,
605 coding_category_ccl,
606 coding_category_emacs_mule,
607 /* All above are targets of code detection. */
608 coding_category_raw_text,
609 coding_category_undecided,
610 coding_category_max
611 };
612
613/* Definitions of flag bits used in detect_coding_XXXX. */
614#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
615#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
616#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
617#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
618#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
619#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
620#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
621#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
622#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 623#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
624#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 633#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
644 | CATEGORY_MASK_UTF_8_AUTO \
645 | CATEGORY_MASK_UTF_8_NOSIG \
646 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 647 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
648 | CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG \
652 | CATEGORY_MASK_CHARSET \
653 | CATEGORY_MASK_SJIS \
654 | CATEGORY_MASK_BIG5 \
655 | CATEGORY_MASK_CCL \
656 | CATEGORY_MASK_EMACS_MULE)
657
658
659#define CATEGORY_MASK_ISO_7BIT \
660 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
661
662#define CATEGORY_MASK_ISO_8BIT \
663 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
664
665#define CATEGORY_MASK_ISO_ELSE \
666 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
667
668#define CATEGORY_MASK_ISO_ESCAPE \
669 (CATEGORY_MASK_ISO_7 \
670 | CATEGORY_MASK_ISO_7_TIGHT \
671 | CATEGORY_MASK_ISO_7_ELSE \
672 | CATEGORY_MASK_ISO_8_ELSE)
673
674#define CATEGORY_MASK_ISO \
675 ( CATEGORY_MASK_ISO_7BIT \
676 | CATEGORY_MASK_ISO_8BIT \
677 | CATEGORY_MASK_ISO_ELSE)
678
679#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
680 (CATEGORY_MASK_UTF_16_AUTO \
681 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
682 | CATEGORY_MASK_UTF_16_LE \
683 | CATEGORY_MASK_UTF_16_BE_NOSIG \
684 | CATEGORY_MASK_UTF_16_LE_NOSIG)
685
a470d443
KH
686#define CATEGORY_MASK_UTF_8 \
687 (CATEGORY_MASK_UTF_8_AUTO \
688 | CATEGORY_MASK_UTF_8_NOSIG \
689 | CATEGORY_MASK_UTF_8_SIG)
df7492f9
KH
690
691/* List of symbols `coding-category-xxx' ordered by priority. This
692 variable is exposed to Emacs Lisp. */
693static Lisp_Object Vcoding_category_list;
694
695/* Table of coding categories (Lisp symbols). This variable is for
ad1746f5 696 internal use only. */
df7492f9
KH
697static Lisp_Object Vcoding_category_table;
698
699/* Table of coding-categories ordered by priority. */
700static enum coding_category coding_priorities[coding_category_max];
701
702/* Nth element is a coding context for the coding system bound to the
703 Nth coding category. */
704static struct coding_system coding_categories[coding_category_max];
705
df7492f9
KH
706/*** Commonly used macros and functions ***/
707
708#ifndef min
709#define min(a, b) ((a) < (b) ? (a) : (b))
710#endif
711#ifndef max
712#define max(a, b) ((a) > (b) ? (a) : (b))
713#endif
4ed46869 714
24a73b0a
KH
715#define CODING_GET_INFO(coding, attrs, charset_list) \
716 do { \
717 (attrs) = CODING_ID_ATTRS ((coding)->id); \
718 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 719 } while (0)
4ed46869 720
4ed46869 721
df7492f9
KH
722/* Safely get one byte from the source text pointed by SRC which ends
723 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
724 in the source, it jumps to `no_more_source'. If multibytep is
725 nonzero, and a multibyte character is found at SRC, set C to the
726 negative value of the character code. The caller should declare
727 and set these variables appropriately in advance:
728 src, src_end, multibytep */
aa72b389 729
065e3595
KH
730#define ONE_MORE_BYTE(c) \
731 do { \
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 record_conversion_result \
736 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
737 goto no_more_source; \
738 } \
739 c = *src++; \
740 if (multibytep && (c & 0x80)) \
741 { \
742 if ((c & 0xFE) == 0xC0) \
743 c = ((c & 1) << 6) | *src++; \
744 else \
745 { \
35befdaa
KH
746 src--; \
747 c = - string_char (src, &src, NULL); \
065e3595
KH
748 record_conversion_result \
749 (coding, CODING_RESULT_INVALID_SRC); \
750 } \
751 } \
752 consumed_chars++; \
aa72b389
KH
753 } while (0)
754
f56a4450 755/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
756 at SRC_END, and set C1 and C2 to those bytes while skipping the
757 heading multibyte characters. If there are not enough bytes in the
758 source, it jumps to `no_more_source'. If multibytep is nonzero and
759 a multibyte character is found for C2, set C2 to the negative value
760 of the character code. The caller should declare and set these
761 variables appropriately in advance:
f56a4450
KH
762 src, src_end, multibytep
763 It is intended that this macro is used in detect_coding_utf_16. */
764
220eeac9
KH
765#define TWO_MORE_BYTES(c1, c2) \
766 do { \
767 do { \
768 if (src == src_end) \
769 goto no_more_source; \
770 c1 = *src++; \
771 if (multibytep && (c1 & 0x80)) \
772 { \
773 if ((c1 & 0xFE) == 0xC0) \
774 c1 = ((c1 & 1) << 6) | *src++; \
775 else \
776 { \
777 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
778 c1 = -1; \
779 } \
780 } \
781 } while (c1 < 0); \
782 if (src == src_end) \
783 goto no_more_source; \
784 c2 = *src++; \
785 if (multibytep && (c2 & 0x80)) \
786 { \
787 if ((c2 & 0xFE) == 0xC0) \
788 c2 = ((c2 & 1) << 6) | *src++; \
789 else \
790 c2 = -1; \
791 } \
f56a4450
KH
792 } while (0)
793
aa72b389 794
065e3595
KH
795#define ONE_MORE_BYTE_NO_CHECK(c) \
796 do { \
797 c = *src++; \
798 if (multibytep && (c & 0x80)) \
799 { \
800 if ((c & 0xFE) == 0xC0) \
801 c = ((c & 1) << 6) | *src++; \
802 else \
803 { \
35befdaa
KH
804 src--; \
805 c = - string_char (src, &src, NULL); \
065e3595
KH
806 record_conversion_result \
807 (coding, CODING_RESULT_INVALID_SRC); \
808 } \
809 } \
810 consumed_chars++; \
aa72b389
KH
811 } while (0)
812
aa72b389 813
df7492f9
KH
814/* Store a byte C in the place pointed by DST and increment DST to the
815 next free point, and increment PRODUCED_CHARS. The caller should
816 assure that C is 0..127, and declare and set the variable `dst'
817 appropriately in advance.
818*/
aa72b389
KH
819
820
df7492f9
KH
821#define EMIT_ONE_ASCII_BYTE(c) \
822 do { \
823 produced_chars++; \
824 *dst++ = (c); \
b6871cc7 825 } while (0)
aa72b389
KH
826
827
ad1746f5 828/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
aa72b389 829
df7492f9
KH
830#define EMIT_TWO_ASCII_BYTES(c1, c2) \
831 do { \
832 produced_chars += 2; \
833 *dst++ = (c1), *dst++ = (c2); \
834 } while (0)
aa72b389
KH
835
836
df7492f9
KH
837/* Store a byte C in the place pointed by DST and increment DST to the
838 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
839 nonzero, store in an appropriate multibyte from. The caller should
840 declare and set the variables `dst' and `multibytep' appropriately
841 in advance. */
842
843#define EMIT_ONE_BYTE(c) \
844 do { \
845 produced_chars++; \
846 if (multibytep) \
847 { \
848 int ch = (c); \
849 if (ch >= 0x80) \
850 ch = BYTE8_TO_CHAR (ch); \
851 CHAR_STRING_ADVANCE (ch, dst); \
852 } \
853 else \
854 *dst++ = (c); \
aa72b389 855 } while (0)
aa72b389 856
aa72b389 857
df7492f9 858/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 859
e19c3639
KH
860#define EMIT_TWO_BYTES(c1, c2) \
861 do { \
862 produced_chars += 2; \
863 if (multibytep) \
864 { \
865 int ch; \
866 \
867 ch = (c1); \
868 if (ch >= 0x80) \
869 ch = BYTE8_TO_CHAR (ch); \
870 CHAR_STRING_ADVANCE (ch, dst); \
871 ch = (c2); \
872 if (ch >= 0x80) \
873 ch = BYTE8_TO_CHAR (ch); \
874 CHAR_STRING_ADVANCE (ch, dst); \
875 } \
876 else \
877 { \
878 *dst++ = (c1); \
879 *dst++ = (c2); \
880 } \
aa72b389
KH
881 } while (0)
882
883
df7492f9
KH
884#define EMIT_THREE_BYTES(c1, c2, c3) \
885 do { \
886 EMIT_ONE_BYTE (c1); \
887 EMIT_TWO_BYTES (c2, c3); \
888 } while (0)
aa72b389 889
aa72b389 890
df7492f9
KH
891#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
892 do { \
893 EMIT_TWO_BYTES (c1, c2); \
894 EMIT_TWO_BYTES (c3, c4); \
895 } while (0)
aa72b389 896
aa72b389 897
f6cbaf43
KH
898/* Prototypes for static functions. */
899static void record_conversion_result P_ ((struct coding_system *coding,
900 enum coding_result_code result));
901static int detect_coding_utf_8 P_ ((struct coding_system *,
902 struct coding_detection_info *info));
903static void decode_coding_utf_8 P_ ((struct coding_system *));
904static int encode_coding_utf_8 P_ ((struct coding_system *));
905
906static int detect_coding_utf_16 P_ ((struct coding_system *,
907 struct coding_detection_info *info));
908static void decode_coding_utf_16 P_ ((struct coding_system *));
909static int encode_coding_utf_16 P_ ((struct coding_system *));
910
911static int detect_coding_iso_2022 P_ ((struct coding_system *,
912 struct coding_detection_info *info));
913static void decode_coding_iso_2022 P_ ((struct coding_system *));
914static int encode_coding_iso_2022 P_ ((struct coding_system *));
915
916static int detect_coding_emacs_mule P_ ((struct coding_system *,
917 struct coding_detection_info *info));
918static void decode_coding_emacs_mule P_ ((struct coding_system *));
919static int encode_coding_emacs_mule P_ ((struct coding_system *));
920
921static int detect_coding_sjis P_ ((struct coding_system *,
922 struct coding_detection_info *info));
923static void decode_coding_sjis P_ ((struct coding_system *));
924static int encode_coding_sjis P_ ((struct coding_system *));
925
926static int detect_coding_big5 P_ ((struct coding_system *,
927 struct coding_detection_info *info));
928static void decode_coding_big5 P_ ((struct coding_system *));
929static int encode_coding_big5 P_ ((struct coding_system *));
930
931static int detect_coding_ccl P_ ((struct coding_system *,
932 struct coding_detection_info *info));
933static void decode_coding_ccl P_ ((struct coding_system *));
934static int encode_coding_ccl P_ ((struct coding_system *));
935
936static void decode_coding_raw_text P_ ((struct coding_system *));
937static int encode_coding_raw_text P_ ((struct coding_system *));
938
939static void coding_set_source P_ ((struct coding_system *));
940static void coding_set_destination P_ ((struct coding_system *));
941static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
942static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 943 EMACS_INT, EMACS_INT));
f6cbaf43
KH
944static unsigned char *alloc_destination P_ ((struct coding_system *,
945 EMACS_INT, unsigned char *));
946static void setup_iso_safe_charsets P_ ((Lisp_Object));
947static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
948 int *, int *,
949 unsigned char *));
950static int detect_eol P_ ((const unsigned char *,
951 EMACS_INT, enum coding_category));
952static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
953static void decode_eol P_ ((struct coding_system *));
954static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
e951386e 955static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
f6cbaf43 956static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
f6cbaf43
KH
957static INLINE void produce_charset P_ ((struct coding_system *, int *,
958 EMACS_INT));
959static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
960static int decode_coding P_ ((struct coding_system *));
961static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
3ed051d4 962 struct coding_system *,
f6cbaf43
KH
963 int *, EMACS_INT *));
964static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
965 struct coding_system *,
966 int *, EMACS_INT *));
967static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
968static int encode_coding P_ ((struct coding_system *));
969static Lisp_Object make_conversion_work_buffer P_ ((int));
970static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
971static INLINE int char_encodable_p P_ ((int, Lisp_Object));
972static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
973
065e3595
KH
974static void
975record_conversion_result (struct coding_system *coding,
976 enum coding_result_code result)
977{
978 coding->result = result;
979 switch (result)
980 {
981 case CODING_RESULT_INSUFFICIENT_SRC:
982 Vlast_code_conversion_error = Qinsufficient_source;
983 break;
984 case CODING_RESULT_INCONSISTENT_EOL:
985 Vlast_code_conversion_error = Qinconsistent_eol;
986 break;
987 case CODING_RESULT_INVALID_SRC:
988 Vlast_code_conversion_error = Qinvalid_source;
989 break;
990 case CODING_RESULT_INTERRUPT:
991 Vlast_code_conversion_error = Qinterrupted;
992 break;
993 case CODING_RESULT_INSUFFICIENT_MEM:
994 Vlast_code_conversion_error = Qinsufficient_memory;
995 break;
ebaf11b6
KH
996 case CODING_RESULT_INSUFFICIENT_DST:
997 /* Don't record this error in Vlast_code_conversion_error
998 because it happens just temporarily and is resolved when the
999 whole conversion is finished. */
1000 break;
409ea3a1
AS
1001 case CODING_RESULT_SUCCESS:
1002 break;
35befdaa
KH
1003 default:
1004 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
1005 }
1006}
1007
75f80e63
EZ
1008/* This wrapper macro is used to preserve validity of pointers into
1009 buffer text across calls to decode_char, which could cause
1010 relocation of buffers if it loads a charset map, because loading a
1011 charset map allocates large structures. */
df7492f9
KH
1012#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013 do { \
1014 charset_map_loaded = 0; \
1015 c = DECODE_CHAR (charset, code); \
1016 if (charset_map_loaded) \
1017 { \
8f924df7 1018 const unsigned char *orig = coding->source; \
df7492f9
KH
1019 EMACS_INT offset; \
1020 \
1021 coding_set_source (coding); \
1022 offset = coding->source - orig; \
1023 src += offset; \
1024 src_base += offset; \
1025 src_end += offset; \
1026 } \
aa72b389
KH
1027 } while (0)
1028
1029
119852e7
KH
1030/* If there are at least BYTES length of room at dst, allocate memory
1031 for coding->destination and update dst and dst_end. We don't have
1032 to take care of coding->source which will be relocated. It is
1033 handled by calling coding_set_source in encode_coding. */
1034
df7492f9
KH
1035#define ASSURE_DESTINATION(bytes) \
1036 do { \
1037 if (dst + (bytes) >= dst_end) \
1038 { \
1039 int more_bytes = charbuf_end - charbuf + (bytes); \
1040 \
1041 dst = alloc_destination (coding, more_bytes, dst); \
1042 dst_end = coding->destination + coding->dst_bytes; \
1043 } \
1044 } while (0)
aa72b389 1045
aa72b389 1046
db274c7a
KH
1047/* Store multibyte form of the character C in P, and advance P to the
1048 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
1049 never calls MAYBE_UNIFY_CHAR. */
1050
1051#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1052 do { \
1053 if ((c) <= MAX_1_BYTE_CHAR) \
1054 *(p)++ = (c); \
1055 else if ((c) <= MAX_2_BYTE_CHAR) \
1056 *(p)++ = (0xC0 | ((c) >> 6)), \
1057 *(p)++ = (0x80 | ((c) & 0x3F)); \
1058 else if ((c) <= MAX_3_BYTE_CHAR) \
1059 *(p)++ = (0xE0 | ((c) >> 12)), \
1060 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1061 *(p)++ = (0x80 | ((c) & 0x3F)); \
1062 else if ((c) <= MAX_4_BYTE_CHAR) \
1063 *(p)++ = (0xF0 | (c >> 18)), \
1064 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1065 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1066 *(p)++ = (0x80 | (c & 0x3F)); \
1067 else if ((c) <= MAX_5_BYTE_CHAR) \
1068 *(p)++ = 0xF8, \
1069 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1070 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1071 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1072 *(p)++ = (0x80 | (c & 0x3F)); \
1073 else \
1074 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1075 } while (0)
1076
1077
1078/* Return the character code of character whose multibyte form is at
1079 P, and advance P to the end of the multibyte form. This is like
1080 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1081
1082#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1083 (!((p)[0] & 0x80) \
1084 ? *(p)++ \
1085 : ! ((p)[0] & 0x20) \
1086 ? ((p) += 2, \
1087 ((((p)[-2] & 0x1F) << 6) \
1088 | ((p)[-1] & 0x3F) \
1089 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1090 : ! ((p)[0] & 0x10) \
1091 ? ((p) += 3, \
1092 ((((p)[-3] & 0x0F) << 12) \
1093 | (((p)[-2] & 0x3F) << 6) \
1094 | ((p)[-1] & 0x3F))) \
1095 : ! ((p)[0] & 0x08) \
1096 ? ((p) += 4, \
1097 ((((p)[-4] & 0xF) << 18) \
1098 | (((p)[-3] & 0x3F) << 12) \
1099 | (((p)[-2] & 0x3F) << 6) \
1100 | ((p)[-1] & 0x3F))) \
1101 : ((p) += 5, \
1102 ((((p)[-4] & 0x3F) << 18) \
1103 | (((p)[-3] & 0x3F) << 12) \
1104 | (((p)[-2] & 0x3F) << 6) \
1105 | ((p)[-1] & 0x3F))))
1106
aa72b389 1107
df7492f9
KH
1108static void
1109coding_set_source (coding)
aa72b389 1110 struct coding_system *coding;
aa72b389 1111{
df7492f9
KH
1112 if (BUFFERP (coding->src_object))
1113 {
2cb26057 1114 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1115
df7492f9 1116 if (coding->src_pos < 0)
2cb26057 1117 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1118 else
2cb26057 1119 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1120 }
df7492f9 1121 else if (STRINGP (coding->src_object))
aa72b389 1122 {
8f924df7 1123 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1124 }
df7492f9
KH
1125 else
1126 /* Otherwise, the source is C string and is never relocated
1127 automatically. Thus we don't have to update anything. */
1128 ;
1129}
aa72b389 1130
df7492f9
KH
1131static void
1132coding_set_destination (coding)
1133 struct coding_system *coding;
1134{
1135 if (BUFFERP (coding->dst_object))
aa72b389 1136 {
df7492f9 1137 if (coding->src_pos < 0)
aa72b389 1138 {
13818c30 1139 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1140 coding->dst_bytes = (GAP_END_ADDR
1141 - (coding->src_bytes - coding->consumed)
1142 - coding->destination);
aa72b389 1143 }
df7492f9 1144 else
28f67a95
KH
1145 {
1146 /* We are sure that coding->dst_pos_byte is before the gap
1147 of the buffer. */
1148 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1149 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1150 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1151 - coding->destination);
1152 }
df7492f9
KH
1153 }
1154 else
1155 /* Otherwise, the destination is C string and is never relocated
1156 automatically. Thus we don't have to update anything. */
1157 ;
1158}
1159
1160
1161static void
1162coding_alloc_by_realloc (coding, bytes)
1163 struct coding_system *coding;
1164 EMACS_INT bytes;
1165{
1166 coding->destination = (unsigned char *) xrealloc (coding->destination,
1167 coding->dst_bytes + bytes);
1168 coding->dst_bytes += bytes;
1169}
1170
1171static void
db274c7a 1172coding_alloc_by_making_gap (coding, gap_head_used, bytes)
df7492f9 1173 struct coding_system *coding;
db274c7a 1174 EMACS_INT gap_head_used, bytes;
df7492f9 1175{
db274c7a 1176 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1177 {
db274c7a
KH
1178 /* The gap may contain the produced data at the head and not-yet
1179 consumed data at the tail. To preserve those data, we at
1180 first make the gap size to zero, then increase the gap
1181 size. */
1182 EMACS_INT add = GAP_SIZE;
1183
1184 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1185 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1186 make_gap (bytes);
1187 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1188 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1189 }
730fff51 1190 else
df7492f9 1191 {
2c78b7e1
KH
1192 Lisp_Object this_buffer;
1193
1194 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1195 set_buffer_internal (XBUFFER (coding->dst_object));
1196 make_gap (bytes);
1197 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1198 }
df7492f9 1199}
8f924df7 1200
df7492f9
KH
1201
1202static unsigned char *
1203alloc_destination (coding, nbytes, dst)
1204 struct coding_system *coding;
3e139625 1205 EMACS_INT nbytes;
df7492f9
KH
1206 unsigned char *dst;
1207{
1208 EMACS_INT offset = dst - coding->destination;
1209
1210 if (BUFFERP (coding->dst_object))
db274c7a
KH
1211 {
1212 struct buffer *buf = XBUFFER (coding->dst_object);
1213
1214 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1215 }
aa72b389 1216 else
df7492f9 1217 coding_alloc_by_realloc (coding, nbytes);
df7492f9
KH
1218 coding_set_destination (coding);
1219 dst = coding->destination + offset;
1220 return dst;
1221}
aa72b389 1222
ff0dacd7
KH
1223/** Macros for annotations. */
1224
ff0dacd7
KH
1225/* An annotation data is stored in the array coding->charbuf in this
1226 format:
69a80ea3 1227 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1228 LENGTH is the number of elements in the annotation.
1229 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1230 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1231
1232 The format of the following elements depend on ANNOTATION_MASK.
1233
1234 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1235 follows:
e951386e
KH
1236 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1237
1238 NBYTES is the number of bytes specified in the header part of
1239 old-style emacs-mule encoding, or 0 for the other kind of
1240 composition.
1241
ff0dacd7 1242 METHOD is one of enum composition_method.
e951386e 1243
ad1746f5 1244 Optional COMPOSITION-COMPONENTS are characters and composition
ff0dacd7
KH
1245 rules.
1246
1247 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
e951386e
KH
1248 follows.
1249
1250 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1251 recover from an invalid annotation, and should be skipped by
1252 produce_annotation. */
1253
1254/* Maximum length of the header of annotation data. */
1255#define MAX_ANNOTATION_LENGTH 5
ff0dacd7 1256
69a80ea3 1257#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1258 do { \
1259 *(buf)++ = -(len); \
1260 *(buf)++ = (mask); \
69a80ea3 1261 *(buf)++ = (nchars); \
ff0dacd7
KH
1262 coding->annotated = 1; \
1263 } while (0);
1264
e951386e 1265#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
69a80ea3 1266 do { \
e951386e
KH
1267 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1268 *buf++ = nbytes; \
69a80ea3 1269 *buf++ = method; \
ff0dacd7
KH
1270 } while (0)
1271
1272
69a80ea3
KH
1273#define ADD_CHARSET_DATA(buf, nchars, id) \
1274 do { \
1275 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1276 *buf++ = id; \
ff0dacd7
KH
1277 } while (0)
1278
df7492f9
KH
1279\f
1280/*** 2. Emacs' internal format (emacs-utf-8) ***/
1281
1282
1283
1284\f
1285/*** 3. UTF-8 ***/
1286
1287/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1288 Check if a text is encoded in UTF-8. If it is, return 1, else
1289 return 0. */
df7492f9
KH
1290
1291#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1292#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1293#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1294#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1295#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1296#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1297
a470d443
KH
1298#define UTF_BOM 0xFEFF
1299#define UTF_8_BOM_1 0xEF
1300#define UTF_8_BOM_2 0xBB
1301#define UTF_8_BOM_3 0xBF
1302
df7492f9 1303static int
ff0dacd7 1304detect_coding_utf_8 (coding, detect_info)
df7492f9 1305 struct coding_system *coding;
ff0dacd7 1306 struct coding_detection_info *detect_info;
df7492f9 1307{
065e3595 1308 const unsigned char *src = coding->source, *src_base;
8f924df7 1309 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1310 int multibytep = coding->src_multibyte;
1311 int consumed_chars = 0;
a470d443 1312 int bom_found = 0;
df7492f9
KH
1313 int found = 0;
1314
ff0dacd7 1315 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1316 /* A coding system of this category is always ASCII compatible. */
1317 src += coding->head_ascii;
1318
1319 while (1)
aa72b389 1320 {
df7492f9 1321 int c, c1, c2, c3, c4;
aa72b389 1322
065e3595 1323 src_base = src;
df7492f9 1324 ONE_MORE_BYTE (c);
065e3595 1325 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1326 continue;
1327 ONE_MORE_BYTE (c1);
065e3595 1328 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1329 break;
1330 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1331 {
a470d443 1332 found = 1;
df7492f9 1333 continue;
aa72b389 1334 }
df7492f9 1335 ONE_MORE_BYTE (c2);
065e3595 1336 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1337 break;
1338 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1339 {
a470d443
KH
1340 found = 1;
1341 if (src_base == coding->source
1342 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1343 bom_found = 1;
df7492f9 1344 continue;
aa72b389 1345 }
df7492f9 1346 ONE_MORE_BYTE (c3);
065e3595 1347 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1348 break;
1349 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1350 {
a470d443 1351 found = 1;
df7492f9
KH
1352 continue;
1353 }
1354 ONE_MORE_BYTE (c4);
065e3595 1355 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1356 break;
1357 if (UTF_8_5_OCTET_LEADING_P (c))
1358 {
a470d443 1359 found = 1;
df7492f9
KH
1360 continue;
1361 }
1362 break;
aa72b389 1363 }
ff0dacd7 1364 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1365 return 0;
aa72b389 1366
df7492f9 1367 no_more_source:
065e3595 1368 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1369 {
ff0dacd7 1370 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1371 return 0;
aa72b389 1372 }
a470d443
KH
1373 if (bom_found)
1374 {
1375 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1376 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1377 }
1378 else
1379 {
1380 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1381 if (found)
1382 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1383 }
ff0dacd7 1384 return 1;
aa72b389
KH
1385}
1386
4ed46869 1387
b73bfc1c 1388static void
df7492f9 1389decode_coding_utf_8 (coding)
b73bfc1c 1390 struct coding_system *coding;
b73bfc1c 1391{
8f924df7
KH
1392 const unsigned char *src = coding->source + coding->consumed;
1393 const unsigned char *src_end = coding->source + coding->src_bytes;
1394 const unsigned char *src_base;
69a80ea3
KH
1395 int *charbuf = coding->charbuf + coding->charbuf_used;
1396 int *charbuf_end = coding->charbuf + coding->charbuf_size;
453b38f0 1397 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1398 int multibytep = coding->src_multibyte;
a470d443 1399 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
24a73b0a 1400 Lisp_Object attr, charset_list;
0a9564cb
EZ
1401 int eol_crlf =
1402 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1403 int byte_after_cr = -1;
4ed46869 1404
24a73b0a 1405 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1406
a470d443
KH
1407 if (bom != utf_without_bom)
1408 {
1409 int c1, c2, c3;
1410
1411 src_base = src;
1412 ONE_MORE_BYTE (c1);
1413 if (! UTF_8_3_OCTET_LEADING_P (c1))
1414 src = src_base;
1415 else
1416 {
159bd5a2 1417 ONE_MORE_BYTE (c2);
a470d443
KH
1418 if (! UTF_8_EXTRA_OCTET_P (c2))
1419 src = src_base;
1420 else
1421 {
159bd5a2 1422 ONE_MORE_BYTE (c3);
a470d443
KH
1423 if (! UTF_8_EXTRA_OCTET_P (c3))
1424 src = src_base;
1425 else
1426 {
1427 if ((c1 != UTF_8_BOM_1)
1428 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1429 src = src_base;
1430 else
1431 CODING_UTF_8_BOM (coding) = utf_without_bom;
1432 }
1433 }
1434 }
1435 }
1436 CODING_UTF_8_BOM (coding) = utf_without_bom;
1437
1438
1439
df7492f9 1440 while (1)
b73bfc1c 1441 {
df7492f9 1442 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1443
df7492f9
KH
1444 src_base = src;
1445 consumed_chars_base = consumed_chars;
4af310db 1446
df7492f9 1447 if (charbuf >= charbuf_end)
b71f6f73
KH
1448 {
1449 if (byte_after_cr >= 0)
1450 src_base--;
1451 break;
1452 }
df7492f9 1453
119852e7
KH
1454 if (byte_after_cr >= 0)
1455 c1 = byte_after_cr, byte_after_cr = -1;
1456 else
1457 ONE_MORE_BYTE (c1);
065e3595
KH
1458 if (c1 < 0)
1459 {
1460 c = - c1;
1461 }
1462 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1463 {
119852e7
KH
1464 if (eol_crlf && c1 == '\r')
1465 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1466 c = c1;
4af310db 1467 }
df7492f9 1468 else
4af310db 1469 {
df7492f9 1470 ONE_MORE_BYTE (c2);
065e3595 1471 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1472 goto invalid_code;
1473 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1474 {
b0edb2c5
DL
1475 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1476 /* Reject overlong sequences here and below. Encoders
1477 producing them are incorrect, they can be misleading,
1478 and they mess up read/write invariance. */
1479 if (c < 128)
1480 goto invalid_code;
4af310db 1481 }
df7492f9 1482 else
aa72b389 1483 {
df7492f9 1484 ONE_MORE_BYTE (c3);
065e3595 1485 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1486 goto invalid_code;
1487 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1488 {
1489 c = (((c1 & 0xF) << 12)
1490 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1491 if (c < 0x800
1492 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1493 goto invalid_code;
1494 }
df7492f9
KH
1495 else
1496 {
1497 ONE_MORE_BYTE (c4);
065e3595 1498 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1499 goto invalid_code;
1500 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1501 {
df7492f9
KH
1502 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1503 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1504 if (c < 0x10000)
1505 goto invalid_code;
1506 }
df7492f9
KH
1507 else
1508 {
1509 ONE_MORE_BYTE (c5);
065e3595 1510 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1511 goto invalid_code;
1512 if (UTF_8_5_OCTET_LEADING_P (c1))
1513 {
1514 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1515 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1516 | (c5 & 0x3F));
b0edb2c5 1517 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1518 goto invalid_code;
1519 }
1520 else
1521 goto invalid_code;
1522 }
1523 }
aa72b389 1524 }
b73bfc1c 1525 }
df7492f9
KH
1526
1527 *charbuf++ = c;
1528 continue;
1529
1530 invalid_code:
1531 src = src_base;
1532 consumed_chars = consumed_chars_base;
1533 ONE_MORE_BYTE (c);
1534 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1535 coding->errors++;
aa72b389
KH
1536 }
1537
df7492f9
KH
1538 no_more_source:
1539 coding->consumed_char += consumed_chars_base;
1540 coding->consumed = src_base - coding->source;
1541 coding->charbuf_used = charbuf - coding->charbuf;
1542}
1543
1544
1545static int
1546encode_coding_utf_8 (coding)
1547 struct coding_system *coding;
1548{
1549 int multibytep = coding->dst_multibyte;
1550 int *charbuf = coding->charbuf;
1551 int *charbuf_end = charbuf + coding->charbuf_used;
1552 unsigned char *dst = coding->destination + coding->produced;
1553 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1554 int produced_chars = 0;
df7492f9
KH
1555 int c;
1556
a470d443
KH
1557 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1558 {
1559 ASSURE_DESTINATION (3);
1560 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1561 CODING_UTF_8_BOM (coding) = utf_without_bom;
1562 }
1563
df7492f9 1564 if (multibytep)
aa72b389 1565 {
df7492f9
KH
1566 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1567
1568 while (charbuf < charbuf_end)
b73bfc1c 1569 {
df7492f9 1570 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1571
df7492f9
KH
1572 ASSURE_DESTINATION (safe_room);
1573 c = *charbuf++;
28f67a95
KH
1574 if (CHAR_BYTE8_P (c))
1575 {
1576 c = CHAR_TO_BYTE8 (c);
1577 EMIT_ONE_BYTE (c);
1578 }
1579 else
1580 {
db274c7a 1581 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1582 for (p = str; p < pend; p++)
1583 EMIT_ONE_BYTE (*p);
1584 }
b73bfc1c 1585 }
aa72b389 1586 }
df7492f9
KH
1587 else
1588 {
1589 int safe_room = MAX_MULTIBYTE_LENGTH;
1590
1591 while (charbuf < charbuf_end)
b73bfc1c 1592 {
df7492f9
KH
1593 ASSURE_DESTINATION (safe_room);
1594 c = *charbuf++;
f03caae0
KH
1595 if (CHAR_BYTE8_P (c))
1596 *dst++ = CHAR_TO_BYTE8 (c);
1597 else
db274c7a 1598 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1599 produced_chars++;
4ed46869
KH
1600 }
1601 }
065e3595 1602 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1603 coding->produced_char += produced_chars;
1604 coding->produced = dst - coding->destination;
1605 return 0;
4ed46869
KH
1606}
1607
b73bfc1c 1608
df7492f9 1609/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1610 Check if a text is encoded in one of UTF-16 based coding systems.
1611 If it is, return 1, else return 0. */
aa72b389 1612
df7492f9
KH
1613#define UTF_16_HIGH_SURROGATE_P(val) \
1614 (((val) & 0xFC00) == 0xD800)
1615
1616#define UTF_16_LOW_SURROGATE_P(val) \
1617 (((val) & 0xFC00) == 0xDC00)
93dec019 1618
df7492f9
KH
1619#define UTF_16_INVALID_P(val) \
1620 (((val) == 0xFFFE) \
1621 || ((val) == 0xFFFF) \
1622 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1623
aa72b389 1624
df7492f9 1625static int
ff0dacd7 1626detect_coding_utf_16 (coding, detect_info)
aa72b389 1627 struct coding_system *coding;
ff0dacd7 1628 struct coding_detection_info *detect_info;
aa72b389 1629{
8f924df7
KH
1630 const unsigned char *src = coding->source, *src_base = src;
1631 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1632 int multibytep = coding->src_multibyte;
1633 int consumed_chars = 0;
1634 int c1, c2;
aa72b389 1635
ff0dacd7 1636 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1637 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1638 && (coding->src_chars & 1))
ff0dacd7
KH
1639 {
1640 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1641 return 0;
1642 }
24a73b0a 1643
f56a4450 1644 TWO_MORE_BYTES (c1, c2);
df7492f9 1645 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1646 {
b49a1807
KH
1647 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1648 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1649 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1650 | CATEGORY_MASK_UTF_16_BE_NOSIG
1651 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1652 }
df7492f9 1653 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1654 {
b49a1807
KH
1655 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1656 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1657 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1658 | CATEGORY_MASK_UTF_16_BE_NOSIG
1659 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1660 }
220eeac9 1661 else if (c2 < 0)
f56a4450
KH
1662 {
1663 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1664 return 0;
1665 }
2f3cbb32 1666 else
24a73b0a 1667 {
2f3cbb32
KH
1668 /* We check the dispersion of Eth and Oth bytes where E is even and
1669 O is odd. If both are high, we assume binary data.*/
1670 unsigned char e[256], o[256];
1671 unsigned e_num = 1, o_num = 1;
1672
1673 memset (e, 0, 256);
1674 memset (o, 0, 256);
1675 e[c1] = 1;
1676 o[c2] = 1;
1677
cc13543e
KH
1678 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1679 |CATEGORY_MASK_UTF_16_BE
1680 | CATEGORY_MASK_UTF_16_LE);
2f3cbb32 1681
7f1faf1c
KH
1682 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1683 != CATEGORY_MASK_UTF_16)
2f3cbb32 1684 {
f56a4450 1685 TWO_MORE_BYTES (c1, c2);
220eeac9 1686 if (c2 < 0)
f56a4450 1687 break;
2f3cbb32
KH
1688 if (! e[c1])
1689 {
1690 e[c1] = 1;
1691 e_num++;
cc13543e
KH
1692 if (e_num >= 128)
1693 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
2f3cbb32
KH
1694 }
1695 if (! o[c2])
1696 {
977b85f4 1697 o[c2] = 1;
2f3cbb32 1698 o_num++;
cc13543e
KH
1699 if (o_num >= 128)
1700 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
2f3cbb32
KH
1701 }
1702 }
2f3cbb32 1703 return 0;
ff0dacd7 1704 }
2f3cbb32 1705
df7492f9 1706 no_more_source:
ff0dacd7 1707 return 1;
df7492f9 1708}
aa72b389 1709
df7492f9
KH
1710static void
1711decode_coding_utf_16 (coding)
1712 struct coding_system *coding;
1713{
8f924df7
KH
1714 const unsigned char *src = coding->source + coding->consumed;
1715 const unsigned char *src_end = coding->source + coding->src_bytes;
1716 const unsigned char *src_base;
69a80ea3 1717 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
1718 /* We may produces at most 3 chars in one loop. */
1719 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
3a8406e1 1720 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1721 int multibytep = coding->src_multibyte;
a470d443 1722 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1723 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1724 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1725 Lisp_Object attr, charset_list;
0a9564cb
EZ
1726 int eol_crlf =
1727 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1728 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1729
24a73b0a 1730 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1731
a470d443 1732 if (bom == utf_with_bom)
aa72b389 1733 {
df7492f9 1734 int c, c1, c2;
4af310db 1735
aa72b389 1736 src_base = src;
df7492f9
KH
1737 ONE_MORE_BYTE (c1);
1738 ONE_MORE_BYTE (c2);
e19c3639 1739 c = (c1 << 8) | c2;
aa72b389 1740
b49a1807
KH
1741 if (endian == utf_16_big_endian
1742 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1743 {
b49a1807
KH
1744 /* The first two bytes are not BOM. Treat them as bytes
1745 for a normal character. */
1746 src = src_base;
1747 coding->errors++;
aa72b389 1748 }
a470d443 1749 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1750 }
a470d443 1751 else if (bom == utf_detect_bom)
b49a1807
KH
1752 {
1753 /* We have already tried to detect BOM and failed in
1754 detect_coding. */
a470d443 1755 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1756 }
aa72b389 1757
df7492f9
KH
1758 while (1)
1759 {
1760 int c, c1, c2;
1761
1762 src_base = src;
1763 consumed_chars_base = consumed_chars;
1764
df80c7f0 1765 if (charbuf >= charbuf_end)
b71f6f73
KH
1766 {
1767 if (byte_after_cr1 >= 0)
1768 src_base -= 2;
1769 break;
1770 }
df7492f9 1771
119852e7
KH
1772 if (byte_after_cr1 >= 0)
1773 c1 = byte_after_cr1, byte_after_cr1 = -1;
1774 else
1775 ONE_MORE_BYTE (c1);
065e3595
KH
1776 if (c1 < 0)
1777 {
1778 *charbuf++ = -c1;
1779 continue;
1780 }
119852e7
KH
1781 if (byte_after_cr2 >= 0)
1782 c2 = byte_after_cr2, byte_after_cr2 = -1;
1783 else
1784 ONE_MORE_BYTE (c2);
065e3595
KH
1785 if (c2 < 0)
1786 {
1787 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1788 *charbuf++ = -c2;
1789 continue;
1790 }
df7492f9 1791 c = (endian == utf_16_big_endian
e19c3639 1792 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1793
df7492f9 1794 if (surrogate)
fd3ae0b9 1795 {
df7492f9 1796 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1797 {
df7492f9
KH
1798 if (endian == utf_16_big_endian)
1799 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1800 else
1801 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1802 *charbuf++ = c1;
1803 *charbuf++ = c2;
1804 coding->errors++;
1805 if (UTF_16_HIGH_SURROGATE_P (c))
1806 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1807 else
df7492f9 1808 *charbuf++ = c;
fd3ae0b9
KH
1809 }
1810 else
df7492f9
KH
1811 {
1812 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1813 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1814 *charbuf++ = 0x10000 + c;
df7492f9 1815 }
fd3ae0b9 1816 }
aa72b389 1817 else
df7492f9
KH
1818 {
1819 if (UTF_16_HIGH_SURROGATE_P (c))
1820 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1821 else
119852e7
KH
1822 {
1823 if (eol_crlf && c == '\r')
1824 {
1825 ONE_MORE_BYTE (byte_after_cr1);
1826 ONE_MORE_BYTE (byte_after_cr2);
1827 }
1828 *charbuf++ = c;
1829 }
8f924df7 1830 }
aa72b389 1831 }
df7492f9
KH
1832
1833 no_more_source:
1834 coding->consumed_char += consumed_chars_base;
1835 coding->consumed = src_base - coding->source;
1836 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1837}
b73bfc1c 1838
df7492f9
KH
1839static int
1840encode_coding_utf_16 (coding)
1841 struct coding_system *coding;
1842{
1843 int multibytep = coding->dst_multibyte;
1844 int *charbuf = coding->charbuf;
1845 int *charbuf_end = charbuf + coding->charbuf_used;
1846 unsigned char *dst = coding->destination + coding->produced;
1847 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1848 int safe_room = 8;
a470d443 1849 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1850 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1851 int produced_chars = 0;
24a73b0a 1852 Lisp_Object attrs, charset_list;
df7492f9 1853 int c;
4ed46869 1854
24a73b0a 1855 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1856
a470d443 1857 if (bom != utf_without_bom)
df7492f9
KH
1858 {
1859 ASSURE_DESTINATION (safe_room);
1860 if (big_endian)
df7492f9 1861 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1862 else
1863 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1864 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1865 }
1866
1867 while (charbuf < charbuf_end)
1868 {
1869 ASSURE_DESTINATION (safe_room);
1870 c = *charbuf++;
60afa08d 1871 if (c > MAX_UNICODE_CHAR)
e19c3639 1872 c = coding->default_char;
df7492f9
KH
1873
1874 if (c < 0x10000)
1875 {
1876 if (big_endian)
1877 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1878 else
1879 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1880 }
1881 else
1882 {
1883 int c1, c2;
1884
1885 c -= 0x10000;
1886 c1 = (c >> 10) + 0xD800;
1887 c2 = (c & 0x3FF) + 0xDC00;
1888 if (big_endian)
1889 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1890 else
1891 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1892 }
1893 }
065e3595 1894 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1895 coding->produced = dst - coding->destination;
1896 coding->produced_char += produced_chars;
1897 return 0;
1898}
1899
1900\f
1901/*** 6. Old Emacs' internal format (emacs-mule) ***/
1902
1903/* Emacs' internal format for representation of multiple character
1904 sets is a kind of multi-byte encoding, i.e. characters are
1905 represented by variable-length sequences of one-byte codes.
1906
1907 ASCII characters and control characters (e.g. `tab', `newline') are
1908 represented by one-byte sequences which are their ASCII codes, in
1909 the range 0x00 through 0x7F.
1910
1911 8-bit characters of the range 0x80..0x9F are represented by
1912 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1913 code + 0x20).
1914
1915 8-bit characters of the range 0xA0..0xFF are represented by
1916 one-byte sequences which are their 8-bit code.
1917
1918 The other characters are represented by a sequence of `base
1919 leading-code', optional `extended leading-code', and one or two
1920 `position-code's. The length of the sequence is determined by the
1921 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1922 whereas extended leading-code and position-code take the range 0xA0
1923 through 0xFF. See `charset.h' for more details about leading-code
1924 and position-code.
1925
1926 --- CODE RANGE of Emacs' internal format ---
1927 character set range
1928 ------------- -----
1929 ascii 0x00..0x7F
1930 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1931 eight-bit-graphic 0xA0..0xBF
1932 ELSE 0x81..0x9D + [0xA0..0xFF]+
1933 ---------------------------------------------
1934
1935 As this is the internal character representation, the format is
1936 usually not used externally (i.e. in a file or in a data sent to a
1937 process). But, it is possible to have a text externally in this
1938 format (i.e. by encoding by the coding system `emacs-mule').
1939
1940 In that case, a sequence of one-byte codes has a slightly different
1941 form.
1942
1943 At first, all characters in eight-bit-control are represented by
1944 one-byte sequences which are their 8-bit code.
1945
1946 Next, character composition data are represented by the byte
1947 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1948 where,
e951386e 1949 METHOD is 0xF2 plus one of composition method (enum
df7492f9
KH
1950 composition_method),
1951
1952 BYTES is 0xA0 plus a byte length of this composition data,
1953
e951386e 1954 CHARS is 0xA0 plus a number of characters composed by this
df7492f9
KH
1955 data,
1956
ad1746f5 1957 COMPONENTs are characters of multibyte form or composition
df7492f9
KH
1958 rules encoded by two-byte of ASCII codes.
1959
1960 In addition, for backward compatibility, the following formats are
1961 also recognized as composition data on decoding.
1962
1963 0x80 MSEQ ...
1964 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1965
1966 Here,
1967 MSEQ is a multibyte form but in these special format:
1968 ASCII: 0xA0 ASCII_CODE+0x80,
1969 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1970 RULE is a one byte code of the range 0xA0..0xF0 that
1971 represents a composition rule.
1972 */
1973
1974char emacs_mule_bytes[256];
1975
e951386e
KH
1976
1977/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1978 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1979 else return 0. */
1980
1981static int
1982detect_coding_emacs_mule (coding, detect_info)
1983 struct coding_system *coding;
1984 struct coding_detection_info *detect_info;
1985{
1986 const unsigned char *src = coding->source, *src_base;
1987 const unsigned char *src_end = coding->source + coding->src_bytes;
1988 int multibytep = coding->src_multibyte;
1989 int consumed_chars = 0;
1990 int c;
1991 int found = 0;
1992
1993 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1994 /* A coding system of this category is always ASCII compatible. */
1995 src += coding->head_ascii;
1996
1997 while (1)
1998 {
1999 src_base = src;
2000 ONE_MORE_BYTE (c);
2001 if (c < 0)
2002 continue;
2003 if (c == 0x80)
2004 {
2005 /* Perhaps the start of composite character. We simply skip
2006 it because analyzing it is too heavy for detecting. But,
2007 at least, we check that the composite character
2008 constitutes of more than 4 bytes. */
2009 const unsigned char *src_base;
2010
2011 repeat:
2012 src_base = src;
2013 do
2014 {
2015 ONE_MORE_BYTE (c);
2016 }
2017 while (c >= 0xA0);
2018
2019 if (src - src_base <= 4)
2020 break;
2021 found = CATEGORY_MASK_EMACS_MULE;
2022 if (c == 0x80)
2023 goto repeat;
2024 }
2025
2026 if (c < 0x80)
2027 {
2028 if (c < 0x20
2029 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2030 break;
2031 }
2032 else
2033 {
396475b7 2034 int more_bytes = emacs_mule_bytes[c] - 1;
e951386e
KH
2035
2036 while (more_bytes > 0)
2037 {
2038 ONE_MORE_BYTE (c);
2039 if (c < 0xA0)
2040 {
2041 src--; /* Unread the last byte. */
2042 break;
2043 }
2044 more_bytes--;
2045 }
2046 if (more_bytes != 0)
2047 break;
2048 found = CATEGORY_MASK_EMACS_MULE;
2049 }
2050 }
2051 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2052 return 0;
2053
2054 no_more_source:
2055 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2056 {
2057 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2058 return 0;
2059 }
2060 detect_info->found |= found;
2061 return 1;
2062}
2063
2064
2065/* Parse emacs-mule multibyte sequence at SRC and return the decoded
2066 character. If CMP_STATUS indicates that we must expect MSEQ or
2067 RULE described above, decode it and return the negative value of
685ebdc8 2068 the decoded character or rule. If an invalid byte is found, return
e951386e
KH
2069 -1. If SRC is too short, return -2. */
2070
df7492f9 2071int
e951386e 2072emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
df7492f9 2073 struct coding_system *coding;
065e3595 2074 const unsigned char *src;
ff0dacd7 2075 int *nbytes, *nchars, *id;
e951386e 2076 struct composition_status *cmp_status;
df7492f9 2077{
8f924df7
KH
2078 const unsigned char *src_end = coding->source + coding->src_bytes;
2079 const unsigned char *src_base = src;
df7492f9 2080 int multibytep = coding->src_multibyte;
b84ae584 2081 int charset_id;
df7492f9
KH
2082 unsigned code;
2083 int c;
2084 int consumed_chars = 0;
e951386e 2085 int mseq_found = 0;
df7492f9
KH
2086
2087 ONE_MORE_BYTE (c);
065e3595 2088 if (c < 0)
df7492f9 2089 {
065e3595 2090 c = -c;
b84ae584 2091 charset_id = emacs_mule_charset[0];
065e3595
KH
2092 }
2093 else
2094 {
4d41e8b7
KH
2095 if (c >= 0xA0)
2096 {
e951386e
KH
2097 if (cmp_status->state != COMPOSING_NO
2098 && cmp_status->old_form)
4d41e8b7 2099 {
e951386e
KH
2100 if (cmp_status->state == COMPOSING_CHAR)
2101 {
2102 if (c == 0xA0)
2103 {
2104 ONE_MORE_BYTE (c);
2105 c -= 0x80;
2106 if (c < 0)
2107 goto invalid_code;
2108 }
2109 else
2110 c -= 0x20;
2111 mseq_found = 1;
2112 }
2113 else
2114 {
2115 *nbytes = src - src_base;
2116 *nchars = consumed_chars;
2117 return -c;
2118 }
4d41e8b7
KH
2119 }
2120 else
e951386e 2121 goto invalid_code;
4d41e8b7
KH
2122 }
2123
065e3595 2124 switch (emacs_mule_bytes[c])
b73bfc1c 2125 {
065e3595 2126 case 2:
b84ae584 2127 if ((charset_id = emacs_mule_charset[c]) < 0)
df7492f9
KH
2128 goto invalid_code;
2129 ONE_MORE_BYTE (c);
9ffd559c 2130 if (c < 0xA0)
065e3595 2131 goto invalid_code;
df7492f9 2132 code = c & 0x7F;
065e3595
KH
2133 break;
2134
2135 case 3:
2136 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2137 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2138 {
2139 ONE_MORE_BYTE (c);
b84ae584 2140 if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0)
065e3595
KH
2141 goto invalid_code;
2142 ONE_MORE_BYTE (c);
9ffd559c 2143 if (c < 0xA0)
065e3595
KH
2144 goto invalid_code;
2145 code = c & 0x7F;
2146 }
2147 else
2148 {
b84ae584 2149 if ((charset_id = emacs_mule_charset[c]) < 0)
065e3595
KH
2150 goto invalid_code;
2151 ONE_MORE_BYTE (c);
9ffd559c 2152 if (c < 0xA0)
065e3595
KH
2153 goto invalid_code;
2154 code = (c & 0x7F) << 8;
2155 ONE_MORE_BYTE (c);
9ffd559c 2156 if (c < 0xA0)
065e3595
KH
2157 goto invalid_code;
2158 code |= c & 0x7F;
2159 }
2160 break;
2161
2162 case 4:
2163 ONE_MORE_BYTE (c);
b84ae584 2164 if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0)
df7492f9
KH
2165 goto invalid_code;
2166 ONE_MORE_BYTE (c);
9ffd559c 2167 if (c < 0xA0)
065e3595 2168 goto invalid_code;
781d7a48 2169 code = (c & 0x7F) << 8;
df7492f9 2170 ONE_MORE_BYTE (c);
9ffd559c 2171 if (c < 0xA0)
065e3595 2172 goto invalid_code;
df7492f9 2173 code |= c & 0x7F;
065e3595 2174 break;
df7492f9 2175
065e3595
KH
2176 case 1:
2177 code = c;
b84ae584 2178 charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
065e3595 2179 break;
df7492f9 2180
065e3595
KH
2181 default:
2182 abort ();
2183 }
b84ae584
KH
2184 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2185 CHARSET_FROM_ID (charset_id), code, c);
065e3595
KH
2186 if (c < 0)
2187 goto invalid_code;
df7492f9 2188 }
df7492f9
KH
2189 *nbytes = src - src_base;
2190 *nchars = consumed_chars;
ff0dacd7 2191 if (id)
b84ae584 2192 *id = charset_id;
e951386e 2193 return (mseq_found ? -c : c);
df7492f9
KH
2194
2195 no_more_source:
2196 return -2;
2197
2198 invalid_code:
2199 return -1;
2200}
2201
2202
e951386e 2203/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
df7492f9 2204
e951386e
KH
2205/* Handle these composition sequence ('|': the end of header elements,
2206 BYTES and CHARS >= 0xA0):
df7492f9 2207
e951386e
KH
2208 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2209 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2210 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
df7492f9 2211
e951386e
KH
2212 and these old form:
2213
2214 (4) relative composition: 0x80 | MSEQ ... MSEQ
2215 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
df7492f9 2216
e951386e
KH
2217 When the starter 0x80 and the following header elements are found,
2218 this annotation header is produced.
df7492f9 2219
e951386e 2220 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
df7492f9 2221
e951386e
KH
2222 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2223 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
df7492f9 2224
e951386e
KH
2225 Then, upon reading the following elements, these codes are produced
2226 until the composition end is found:
df7492f9 2227
e951386e
KH
2228 (1) CHAR ... CHAR
2229 (2) ALT ... ALT CHAR ... CHAR
2230 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2231 (4) CHAR ... CHAR
2232 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
4ed46869 2233
e951386e
KH
2234 When the composition end is found, LENGTH and NCHARS in the
2235 annotation header is updated as below:
b73bfc1c 2236
e951386e
KH
2237 (1) LENGTH: unchanged, NCHARS: unchanged
2238 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2239 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2240 (4) LENGTH: unchanged, NCHARS: number of CHARs
2241 (5) LENGTH: unchanged, NCHARS: number of CHARs
df7492f9 2242
e951386e
KH
2243 If an error is found while composing, the annotation header is
2244 changed to the original composition header (plus filler -1s) as
2245 below:
2246
2247 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2248 (5) [ 0x80 0xFF -1 -1- -1 ]
2249
2250 and the sequence [ -2 DECODED-RULE ] is changed to the original
2251 byte sequence as below:
2252 o the original byte sequence is B: [ B -1 ]
2253 o the original byte sequence is B1 B2: [ B1 B2 ]
2254
2255 Most of the routines are implemented by macros because many
2256 variables and labels in the caller decode_coding_emacs_mule must be
2257 accessible, and they are usually called just once (thus doesn't
2258 increase the size of compiled object). */
2259
2260/* Decode a composition rule represented by C as a component of
2261 composition sequence of Emacs 20 style. Set RULE to the decoded
2262 rule. */
2263
2264#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
df7492f9 2265 do { \
e951386e
KH
2266 int gref, nref; \
2267 \
4d41e8b7 2268 c -= 0xA0; \
df7492f9
KH
2269 if (c < 0 || c >= 81) \
2270 goto invalid_code; \
df7492f9 2271 gref = c / 9, nref = c % 9; \
e951386e
KH
2272 if (gref == 4) gref = 10; \
2273 if (nref == 4) nref = 10; \
2274 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
df7492f9
KH
2275 } while (0)
2276
2277
e951386e
KH
2278/* Decode a composition rule represented by C and the following byte
2279 at SRC as a component of composition sequence of Emacs 21 style.
2280 Set RULE to the decoded rule. */
781d7a48 2281
e951386e 2282#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
781d7a48
KH
2283 do { \
2284 int gref, nref; \
e951386e
KH
2285 \
2286 gref = c - 0x20; \
2287 if (gref < 0 || gref >= 81) \
781d7a48 2288 goto invalid_code; \
e951386e
KH
2289 ONE_MORE_BYTE (c); \
2290 nref = c - 0x20; \
2291 if (nref < 0 || nref >= 81) \
781d7a48 2292 goto invalid_code; \
e951386e 2293 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
781d7a48
KH
2294 } while (0)
2295
2296
e951386e
KH
2297/* Start of Emacs 21 style format. The first three bytes at SRC are
2298 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2299 byte length of this composition information, CHARS is the number of
2300 characters composed by this composition. */
2301
2302#define DECODE_EMACS_MULE_21_COMPOSITION() \
aa72b389 2303 do { \
781d7a48
KH
2304 enum composition_method method = c - 0xF2; \
2305 int *charbuf_base = charbuf; \
df7492f9 2306 int nbytes, nchars; \
e951386e 2307 \
df7492f9 2308 ONE_MORE_BYTE (c); \
065e3595
KH
2309 if (c < 0) \
2310 goto invalid_code; \
df7492f9 2311 nbytes = c - 0xA0; \
e951386e 2312 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
df7492f9
KH
2313 goto invalid_code; \
2314 ONE_MORE_BYTE (c); \
2315 nchars = c - 0xA0; \
e951386e
KH
2316 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2317 goto invalid_code; \
2318 cmp_status->old_form = 0; \
2319 cmp_status->method = method; \
2320 if (method == COMPOSITION_RELATIVE) \
2321 cmp_status->state = COMPOSING_CHAR; \
2322 else \
2323 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2324 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2325 cmp_status->nchars = nchars; \
2326 cmp_status->ncomps = nbytes - 4; \
2327 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
aa72b389 2328 } while (0)
93dec019 2329
aa72b389 2330
e951386e
KH
2331/* Start of Emacs 20 style format for relative composition. */
2332
2333#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2334 do { \
2335 cmp_status->old_form = 1; \
2336 cmp_status->method = COMPOSITION_RELATIVE; \
2337 cmp_status->state = COMPOSING_CHAR; \
2338 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2339 cmp_status->nchars = cmp_status->ncomps = 0; \
2340 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2341 } while (0)
2342
2343
2344/* Start of Emacs 20 style format for rule-base composition. */
2345
2346#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2347 do { \
2348 cmp_status->old_form = 1; \
2349 cmp_status->method = COMPOSITION_WITH_RULE; \
2350 cmp_status->state = COMPOSING_CHAR; \
2351 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2352 cmp_status->nchars = cmp_status->ncomps = 0; \
2353 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
df7492f9
KH
2354 } while (0)
2355
2356
e951386e
KH
2357#define DECODE_EMACS_MULE_COMPOSITION_START() \
2358 do { \
2359 const unsigned char *current_src = src; \
2360 \
2361 ONE_MORE_BYTE (c); \
2362 if (c < 0) \
2363 goto invalid_code; \
2364 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2365 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2366 DECODE_EMACS_MULE_21_COMPOSITION (); \
2367 else if (c < 0xA0) \
2368 goto invalid_code; \
2369 else if (c < 0xC0) \
2370 { \
2371 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2372 /* Re-read C as a composition component. */ \
2373 src = current_src; \
2374 } \
2375 else if (c == 0xFF) \
2376 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2377 else \
2378 goto invalid_code; \
2379 } while (0)
2380
2381#define EMACS_MULE_COMPOSITION_END() \
df7492f9 2382 do { \
e951386e 2383 int idx = - cmp_status->length; \
4d41e8b7 2384 \
e951386e
KH
2385 if (cmp_status->old_form) \
2386 charbuf[idx + 2] = cmp_status->nchars; \
2387 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2388 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2389 cmp_status->state = COMPOSING_NO; \
2390 } while (0)
2391
2392
2393static int
2394emacs_mule_finish_composition (charbuf, cmp_status)
2395 int *charbuf;
2396 struct composition_status *cmp_status;
2397{
2398 int idx = - cmp_status->length;
2399 int new_chars;
2400
2401 if (cmp_status->old_form && cmp_status->nchars > 0)
2402 {
2403 charbuf[idx + 2] = cmp_status->nchars;
2404 new_chars = 0;
2405 if (cmp_status->method == COMPOSITION_WITH_RULE
2406 && cmp_status->state == COMPOSING_CHAR)
2407 {
2408 /* The last rule was invalid. */
2409 int rule = charbuf[-1] + 0xA0;
2410
2411 charbuf[-2] = BYTE8_TO_CHAR (rule);
2412 charbuf[-1] = -1;
2413 new_chars = 1;
2414 }
2415 }
2416 else
2417 {
2418 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2419
2420 if (cmp_status->method == COMPOSITION_WITH_RULE)
2421 {
2422 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2423 charbuf[idx++] = -3;
2424 charbuf[idx++] = 0;
2425 new_chars = 1;
2426 }
2427 else
2428 {
2429 int nchars = charbuf[idx + 1] + 0xA0;
2430 int nbytes = charbuf[idx + 2] + 0xA0;
2431
2432 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2433 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2434 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2435 charbuf[idx++] = -1;
2436 new_chars = 4;
2437 }
2438 }
2439 cmp_status->state = COMPOSING_NO;
2440 return new_chars;
2441}
2442
2443#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2444 do { \
2445 if (cmp_status->state != COMPOSING_NO) \
2446 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
df7492f9
KH
2447 } while (0)
2448
aa72b389
KH
2449
2450static void
df7492f9 2451decode_coding_emacs_mule (coding)
aa72b389 2452 struct coding_system *coding;
aa72b389 2453{
8f924df7
KH
2454 const unsigned char *src = coding->source + coding->consumed;
2455 const unsigned char *src_end = coding->source + coding->src_bytes;
2456 const unsigned char *src_base;
69a80ea3 2457 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
2458 /* We may produce two annotations (charset and composition) in one
2459 loop and one more charset annotation at the end. */
69a80ea3 2460 int *charbuf_end
df80c7f0 2461 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 2462 int consumed_chars = 0, consumed_chars_base;
df7492f9 2463 int multibytep = coding->src_multibyte;
24a73b0a 2464 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2465 int char_offset = coding->produced_char;
2466 int last_offset = char_offset;
2467 int last_id = charset_ascii;
0a9564cb
EZ
2468 int eol_crlf =
2469 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 2470 int byte_after_cr = -1;
e951386e 2471 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
aa72b389 2472
24a73b0a 2473 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2474
e951386e
KH
2475 if (cmp_status->state != COMPOSING_NO)
2476 {
2477 int i;
2478
2479 for (i = 0; i < cmp_status->length; i++)
2480 *charbuf++ = cmp_status->carryover[i];
2481 coding->annotated = 1;
2482 }
2483
aa72b389
KH
2484 while (1)
2485 {
e951386e 2486 int c, id;
df7492f9 2487
aa72b389 2488 src_base = src;
df7492f9
KH
2489 consumed_chars_base = consumed_chars;
2490
2491 if (charbuf >= charbuf_end)
b71f6f73
KH
2492 {
2493 if (byte_after_cr >= 0)
2494 src_base--;
2495 break;
2496 }
aa72b389 2497
119852e7
KH
2498 if (byte_after_cr >= 0)
2499 c = byte_after_cr, byte_after_cr = -1;
2500 else
2501 ONE_MORE_BYTE (c);
e951386e
KH
2502
2503 if (c < 0 || c == 0x80)
065e3595 2504 {
e951386e
KH
2505 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2506 if (c < 0)
2507 {
2508 *charbuf++ = -c;
2509 char_offset++;
2510 }
2511 else
2512 DECODE_EMACS_MULE_COMPOSITION_START ();
2513 continue;
065e3595 2514 }
e951386e
KH
2515
2516 if (c < 0x80)
aa72b389 2517 {
119852e7
KH
2518 if (eol_crlf && c == '\r')
2519 ONE_MORE_BYTE (byte_after_cr);
e951386e
KH
2520 id = charset_ascii;
2521 if (cmp_status->state != COMPOSING_NO)
2522 {
2523 if (cmp_status->old_form)
2524 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2525 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2526 cmp_status->ncomps--;
2527 }
2528 }
2529 else
2530 {
2531 int nchars, nbytes;
75f80e63
EZ
2532 /* emacs_mule_char can load a charset map from a file, which
2533 allocates a large structure and might cause buffer text
2534 to be relocated as result. Thus, we need to remember the
ad1746f5 2535 original pointer to buffer text, and fix up all related
75f80e63
EZ
2536 pointers after the call. */
2537 const unsigned char *orig = coding->source;
2538 EMACS_INT offset;
e951386e
KH
2539
2540 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2541 cmp_status);
75f80e63
EZ
2542 offset = coding->source - orig;
2543 if (offset)
2544 {
2545 src += offset;
2546 src_base += offset;
2547 src_end += offset;
2548 }
e951386e
KH
2549 if (c < 0)
2550 {
2551 if (c == -1)
2552 goto invalid_code;
2553 if (c == -2)
2554 break;
2555 }
2556 src = src_base + nbytes;
2557 consumed_chars = consumed_chars_base + nchars;
2558 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2559 cmp_status->ncomps -= nchars;
2560 }
2561
ad1746f5 2562 /* Now if C >= 0, we found a normally encoded character, if C <
e951386e
KH
2563 0, we found an old-style composition component character or
2564 rule. */
2565
2566 if (cmp_status->state == COMPOSING_NO)
2567 {
2568 if (last_id != id)
2569 {
2570 if (last_id != charset_ascii)
2571 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2572 last_id);
2573 last_id = id;
2574 last_offset = char_offset;
2575 }
df7492f9
KH
2576 *charbuf++ = c;
2577 char_offset++;
aa72b389 2578 }
e951386e 2579 else if (cmp_status->state == COMPOSING_CHAR)
df7492f9 2580 {
e951386e
KH
2581 if (cmp_status->old_form)
2582 {
2583 if (c >= 0)
2584 {
2585 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2586 *charbuf++ = c;
2587 char_offset++;
2588 }
2589 else
2590 {
2591 *charbuf++ = -c;
2592 cmp_status->nchars++;
2593 cmp_status->length++;
2594 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2595 EMACS_MULE_COMPOSITION_END ();
2596 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2597 cmp_status->state = COMPOSING_RULE;
2598 }
2599 }
df7492f9 2600 else
e951386e
KH
2601 {
2602 *charbuf++ = c;
2603 cmp_status->length++;
2604 cmp_status->nchars--;
2605 if (cmp_status->nchars == 0)
2606 EMACS_MULE_COMPOSITION_END ();
2607 }
df7492f9 2608 }
e951386e 2609 else if (cmp_status->state == COMPOSING_RULE)
df7492f9 2610 {
e951386e 2611 int rule;
ff0dacd7 2612
e951386e 2613 if (c >= 0)
df7492f9 2614 {
e951386e
KH
2615 EMACS_MULE_COMPOSITION_END ();
2616 *charbuf++ = c;
2617 char_offset++;
df7492f9 2618 }
e951386e 2619 else
ff0dacd7 2620 {
e951386e
KH
2621 c = -c;
2622 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2623 if (rule < 0)
2624 goto invalid_code;
2625 *charbuf++ = -2;
2626 *charbuf++ = rule;
2627 cmp_status->length += 2;
2628 cmp_status->state = COMPOSING_CHAR;
ff0dacd7 2629 }
e951386e
KH
2630 }
2631 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2632 {
df7492f9 2633 *charbuf++ = c;
e951386e
KH
2634 cmp_status->length++;
2635 if (cmp_status->ncomps == 0)
2636 cmp_status->state = COMPOSING_CHAR;
2637 else if (cmp_status->ncomps > 0)
2638 {
2639 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2640 cmp_status->state = COMPOSING_COMPONENT_RULE;
2641 }
2642 else
2643 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9 2644 }
e951386e
KH
2645 else /* COMPOSING_COMPONENT_RULE */
2646 {
2647 int rule;
2648
2649 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2650 if (rule < 0)
2651 goto invalid_code;
2652 *charbuf++ = -2;
2653 *charbuf++ = rule;
2654 cmp_status->length += 2;
2655 cmp_status->ncomps--;
2656 if (cmp_status->ncomps > 0)
2657 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2658 else
2659 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660 }
2661 continue;
2662
2663 retry:
2664 src = src_base;
2665 consumed_chars = consumed_chars_base;
df7492f9
KH
2666 continue;
2667
2668 invalid_code:
e951386e 2669 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9
KH
2670 src = src_base;
2671 consumed_chars = consumed_chars_base;
2672 ONE_MORE_BYTE (c);
2673 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2674 char_offset++;
df7492f9
KH
2675 coding->errors++;
2676 }
2677
2678 no_more_source:
e951386e
KH
2679 if (cmp_status->state != COMPOSING_NO)
2680 {
2681 if (coding->mode & CODING_MODE_LAST_BLOCK)
2682 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2683 else
2684 {
2685 int i;
2686
2687 charbuf -= cmp_status->length;
2688 for (i = 0; i < cmp_status->length; i++)
2689 cmp_status->carryover[i] = charbuf[i];
2690 }
2691 }
ff0dacd7 2692 if (last_id != charset_ascii)
69a80ea3 2693 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2694 coding->consumed_char += consumed_chars_base;
2695 coding->consumed = src_base - coding->source;
2696 coding->charbuf_used = charbuf - coding->charbuf;
2697}
2698
2699
2700#define EMACS_MULE_LEADING_CODES(id, codes) \
2701 do { \
2702 if (id < 0xA0) \
2703 codes[0] = id, codes[1] = 0; \
2704 else if (id < 0xE0) \
2705 codes[0] = 0x9A, codes[1] = id; \
2706 else if (id < 0xF0) \
2707 codes[0] = 0x9B, codes[1] = id; \
2708 else if (id < 0xF5) \
2709 codes[0] = 0x9C, codes[1] = id; \
2710 else \
2711 codes[0] = 0x9D, codes[1] = id; \
2712 } while (0);
2713
aa72b389 2714
df7492f9
KH
2715static int
2716encode_coding_emacs_mule (coding)
2717 struct coding_system *coding;
2718{
2719 int multibytep = coding->dst_multibyte;
2720 int *charbuf = coding->charbuf;
2721 int *charbuf_end = charbuf + coding->charbuf_used;
2722 unsigned char *dst = coding->destination + coding->produced;
2723 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2724 int safe_room = 8;
df7492f9 2725 int produced_chars = 0;
24a73b0a 2726 Lisp_Object attrs, charset_list;
df7492f9 2727 int c;
ff0dacd7 2728 int preferred_charset_id = -1;
df7492f9 2729
24a73b0a 2730 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2731 if (! EQ (charset_list, Vemacs_mule_charset_list))
2732 {
2733 CODING_ATTR_CHARSET_LIST (attrs)
2734 = charset_list = Vemacs_mule_charset_list;
2735 }
df7492f9
KH
2736
2737 while (charbuf < charbuf_end)
2738 {
2739 ASSURE_DESTINATION (safe_room);
2740 c = *charbuf++;
ff0dacd7
KH
2741
2742 if (c < 0)
2743 {
2744 /* Handle an annotation. */
2745 switch (*charbuf)
2746 {
2747 case CODING_ANNOTATE_COMPOSITION_MASK:
2748 /* Not yet implemented. */
2749 break;
2750 case CODING_ANNOTATE_CHARSET_MASK:
2751 preferred_charset_id = charbuf[3];
2752 if (preferred_charset_id >= 0
2753 && NILP (Fmemq (make_number (preferred_charset_id),
2754 charset_list)))
2755 preferred_charset_id = -1;
2756 break;
2757 default:
2758 abort ();
2759 }
2760 charbuf += -c - 1;
2761 continue;
2762 }
2763
df7492f9
KH
2764 if (ASCII_CHAR_P (c))
2765 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2766 else if (CHAR_BYTE8_P (c))
2767 {
2768 c = CHAR_TO_BYTE8 (c);
2769 EMIT_ONE_BYTE (c);
2770 }
df7492f9 2771 else
aa72b389 2772 {
df7492f9
KH
2773 struct charset *charset;
2774 unsigned code;
2775 int dimension;
2776 int emacs_mule_id;
2777 unsigned char leading_codes[2];
2778
ff0dacd7
KH
2779 if (preferred_charset_id >= 0)
2780 {
2781 charset = CHARSET_FROM_ID (preferred_charset_id);
905ca9d2
KH
2782 if (CHAR_CHARSET_P (c, charset))
2783 code = ENCODE_CHAR (charset, c);
2784 else
2785 charset = char_charset (c, charset_list, &code);
ff0dacd7
KH
2786 }
2787 else
2788 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2789 if (! charset)
2790 {
2791 c = coding->default_char;
2792 if (ASCII_CHAR_P (c))
2793 {
2794 EMIT_ONE_ASCII_BYTE (c);
2795 continue;
2796 }
2797 charset = char_charset (c, charset_list, &code);
2798 }
2799 dimension = CHARSET_DIMENSION (charset);
2800 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2801 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2802 EMIT_ONE_BYTE (leading_codes[0]);
2803 if (leading_codes[1])
2804 EMIT_ONE_BYTE (leading_codes[1]);
2805 if (dimension == 1)
1fa663f9 2806 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2807 else
df7492f9 2808 {
1fa663f9 2809 code |= 0x8080;
df7492f9
KH
2810 EMIT_ONE_BYTE (code >> 8);
2811 EMIT_ONE_BYTE (code & 0xFF);
2812 }
aa72b389 2813 }
aa72b389 2814 }
065e3595 2815 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2816 coding->produced_char += produced_chars;
2817 coding->produced = dst - coding->destination;
2818 return 0;
aa72b389 2819}
b73bfc1c 2820
4ed46869 2821\f
df7492f9 2822/*** 7. ISO2022 handlers ***/
4ed46869
KH
2823
2824/* The following note describes the coding system ISO2022 briefly.
39787efd 2825 Since the intention of this note is to help understand the
5a936b46 2826 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2827 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2828 original document of ISO2022. This is equivalent to the standard
cfb43547 2829 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2830
2831 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2832 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2833 is encoded using bytes less than 128. This may make the encoded
2834 text a little bit longer, but the text passes more easily through
cfb43547 2835 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2836 Significant Bit).
b73bfc1c 2837
cfb43547
DL
2838 There are two kinds of character sets: control character sets and
2839 graphic character sets. The former contain control characters such
4ed46869 2840 as `newline' and `escape' to provide control functions (control
39787efd 2841 functions are also provided by escape sequences). The latter
cfb43547 2842 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2843 two control character sets and many graphic character sets.
2844
2845 Graphic character sets are classified into one of the following
39787efd
KH
2846 four classes, according to the number of bytes (DIMENSION) and
2847 number of characters in one dimension (CHARS) of the set:
2848 - DIMENSION1_CHARS94
2849 - DIMENSION1_CHARS96
2850 - DIMENSION2_CHARS94
2851 - DIMENSION2_CHARS96
2852
2853 In addition, each character set is assigned an identification tag,
cfb43547 2854 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2855 hereafter). The <F> of each character set is decided by ECMA(*)
2856 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2857 (0x30..0x3F are for private use only).
4ed46869
KH
2858
2859 Note (*): ECMA = European Computer Manufacturers Association
2860
cfb43547 2861 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2862 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2863 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2864 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2865 o DIMENSION2_CHARS96 -- none for the moment
2866
39787efd 2867 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2868 C0 [0x00..0x1F] -- control character plane 0
2869 GL [0x20..0x7F] -- graphic character plane 0
2870 C1 [0x80..0x9F] -- control character plane 1
2871 GR [0xA0..0xFF] -- graphic character plane 1
2872
2873 A control character set is directly designated and invoked to C0 or
39787efd
KH
2874 C1 by an escape sequence. The most common case is that:
2875 - ISO646's control character set is designated/invoked to C0, and
2876 - ISO6429's control character set is designated/invoked to C1,
2877 and usually these designations/invocations are omitted in encoded
2878 text. In a 7-bit environment, only C0 can be used, and a control
2879 character for C1 is encoded by an appropriate escape sequence to
2880 fit into the environment. All control characters for C1 are
2881 defined to have corresponding escape sequences.
4ed46869
KH
2882
2883 A graphic character set is at first designated to one of four
2884 graphic registers (G0 through G3), then these graphic registers are
2885 invoked to GL or GR. These designations and invocations can be
2886 done independently. The most common case is that G0 is invoked to
39787efd
KH
2887 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2888 these invocations and designations are omitted in encoded text.
2889 In a 7-bit environment, only GL can be used.
4ed46869 2890
39787efd
KH
2891 When a graphic character set of CHARS94 is invoked to GL, codes
2892 0x20 and 0x7F of the GL area work as control characters SPACE and
2893 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2894 be used.
4ed46869
KH
2895
2896 There are two ways of invocation: locking-shift and single-shift.
2897 With locking-shift, the invocation lasts until the next different
39787efd
KH
2898 invocation, whereas with single-shift, the invocation affects the
2899 following character only and doesn't affect the locking-shift
2900 state. Invocations are done by the following control characters or
2901 escape sequences:
4ed46869
KH
2902
2903 ----------------------------------------------------------------------
39787efd 2904 abbrev function cntrl escape seq description
4ed46869 2905 ----------------------------------------------------------------------
39787efd
KH
2906 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2907 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2908 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2909 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2910 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2911 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2912 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2913 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2914 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2915 ----------------------------------------------------------------------
39787efd
KH
2916 (*) These are not used by any known coding system.
2917
2918 Control characters for these functions are defined by macros
2919 ISO_CODE_XXX in `coding.h'.
4ed46869 2920
39787efd 2921 Designations are done by the following escape sequences:
4ed46869
KH
2922 ----------------------------------------------------------------------
2923 escape sequence description
2924 ----------------------------------------------------------------------
2925 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2926 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2927 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2928 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2929 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2930 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2931 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2932 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2933 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2934 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2935 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2936 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2937 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2938 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2939 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2940 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2941 ----------------------------------------------------------------------
2942
2943 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2944 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2945
2946 Note (*): Although these designations are not allowed in ISO2022,
2947 Emacs accepts them on decoding, and produces them on encoding
39787efd 2948 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2949 7-bit environment, non-locking-shift, and non-single-shift.
2950
2951 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2952 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2953
cfb43547 2954 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2955 same multilingual text in ISO2022. Actually, there exist many
2956 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2957 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2958 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2959 localized platforms), and all of these are variants of ISO2022.
2960
2961 In addition to the above, Emacs handles two more kinds of escape
2962 sequences: ISO6429's direction specification and Emacs' private
2963 sequence for specifying character composition.
2964
39787efd 2965 ISO6429's direction specification takes the following form:
4ed46869
KH
2966 o CSI ']' -- end of the current direction
2967 o CSI '0' ']' -- end of the current direction
2968 o CSI '1' ']' -- start of left-to-right text
2969 o CSI '2' ']' -- start of right-to-left text
2970 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2971 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2972
2973 Character composition specification takes the following form:
ec6d2bb8
KH
2974 o ESC '0' -- start relative composition
2975 o ESC '1' -- end composition
2976 o ESC '2' -- start rule-base composition (*)
2977 o ESC '3' -- start relative composition with alternate chars (**)
2978 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2979 Since these are not standard escape sequences of any ISO standard,
cfb43547 2980 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2981
5a936b46
DL
2982 (*) This form is used only in Emacs 20.7 and older versions,
2983 but newer versions can safely decode it.
cfb43547 2984 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2985 and older versions can't decode it.
ec6d2bb8 2986
cfb43547 2987 Here's a list of example usages of these composition escape
b73bfc1c 2988 sequences (categorized by `enum composition_method').
ec6d2bb8 2989
b73bfc1c 2990 COMPOSITION_RELATIVE:
ec6d2bb8 2991 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2992 COMPOSITION_WITH_RULE:
ec6d2bb8 2993 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2994 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2995 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2996 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2997 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2998
2999enum iso_code_class_type iso_code_class[256];
3000
df7492f9
KH
3001#define SAFE_CHARSET_P(coding, id) \
3002 ((id) <= (coding)->max_charset_id \
1b3b981b 3003 && (coding)->safe_charsets[id] != 255)
df7492f9
KH
3004
3005
3006#define SHIFT_OUT_OK(category) \
3007 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
3008
3009static void
f0064e1f
DL
3010setup_iso_safe_charsets (attrs)
3011 Lisp_Object attrs;
df7492f9
KH
3012{
3013 Lisp_Object charset_list, safe_charsets;
3014 Lisp_Object request;
3015 Lisp_Object reg_usage;
3016 Lisp_Object tail;
3017 int reg94, reg96;
3018 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
3019 int max_charset_id;
3020
3021 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3022 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3023 && ! EQ (charset_list, Viso_2022_charset_list))
3024 {
3025 CODING_ATTR_CHARSET_LIST (attrs)
3026 = charset_list = Viso_2022_charset_list;
3027 ASET (attrs, coding_attr_safe_charsets, Qnil);
3028 }
3029
3030 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3031 return;
3032
3033 max_charset_id = 0;
3034 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3035 {
3036 int id = XINT (XCAR (tail));
3037 if (max_charset_id < id)
3038 max_charset_id = id;
3039 }
d46c5b12 3040
1b3b981b
AS
3041 safe_charsets = make_uninit_string (max_charset_id + 1);
3042 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
3043 request = AREF (attrs, coding_attr_iso_request);
3044 reg_usage = AREF (attrs, coding_attr_iso_usage);
3045 reg94 = XINT (XCAR (reg_usage));
3046 reg96 = XINT (XCDR (reg_usage));
3047
3048 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3049 {
3050 Lisp_Object id;
3051 Lisp_Object reg;
3052 struct charset *charset;
3053
3054 id = XCAR (tail);
3055 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 3056 reg = Fcdr (Fassq (id, request));
df7492f9 3057 if (! NILP (reg))
8f924df7 3058 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
3059 else if (charset->iso_chars_96)
3060 {
3061 if (reg96 < 4)
8f924df7 3062 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
3063 }
3064 else
3065 {
3066 if (reg94 < 4)
8f924df7 3067 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
3068 }
3069 }
3070 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3071}
d46c5b12 3072
b6871cc7 3073
4ed46869 3074/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ad1746f5 3075 Check if a text is encoded in one of ISO-2022 based coding systems.
ff0dacd7 3076 If it is, return 1, else return 0. */
4ed46869 3077
0a28aafb 3078static int
ff0dacd7 3079detect_coding_iso_2022 (coding, detect_info)
df7492f9 3080 struct coding_system *coding;
ff0dacd7 3081 struct coding_detection_info *detect_info;
4ed46869 3082{
8f924df7
KH
3083 const unsigned char *src = coding->source, *src_base = src;
3084 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 3085 int multibytep = coding->src_multibyte;
ff0dacd7 3086 int single_shifting = 0;
df7492f9
KH
3087 int id;
3088 int c, c1;
3089 int consumed_chars = 0;
3090 int i;
ff0dacd7
KH
3091 int rejected = 0;
3092 int found = 0;
cee53ed4 3093 int composition_count = -1;
ff0dacd7
KH
3094
3095 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
3096
3097 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3098 {
3099 struct coding_system *this = &(coding_categories[i]);
3100 Lisp_Object attrs, val;
3101
c6b278e7
KH
3102 if (this->id < 0)
3103 continue;
df7492f9
KH
3104 attrs = CODING_ID_ATTRS (this->id);
3105 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 3106 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
3107 setup_iso_safe_charsets (attrs);
3108 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 3109 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 3110 this->safe_charsets = SDATA (val);
df7492f9
KH
3111 }
3112
3113 /* A coding system of this category is always ASCII compatible. */
3114 src += coding->head_ascii;
3f003981 3115
ff0dacd7 3116 while (rejected != CATEGORY_MASK_ISO)
4ed46869 3117 {
065e3595 3118 src_base = src;
df7492f9 3119 ONE_MORE_BYTE (c);
4ed46869
KH
3120 switch (c)
3121 {
3122 case ISO_CODE_ESC:
74383408
KH
3123 if (inhibit_iso_escape_detection)
3124 break;
f46869e4 3125 single_shifting = 0;
df7492f9 3126 ONE_MORE_BYTE (c);
d46c5b12 3127 if (c >= '(' && c <= '/')
4ed46869 3128 {
bf9cdd4e 3129 /* Designation sequence for a charset of dimension 1. */
df7492f9 3130 ONE_MORE_BYTE (c1);
d46c5b12 3131 if (c1 < ' ' || c1 >= 0x80
df7492f9 3132 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
3133 /* Invalid designation sequence. Just ignore. */
3134 break;
bf9cdd4e
KH
3135 }
3136 else if (c == '$')
3137 {
3138 /* Designation sequence for a charset of dimension 2. */
df7492f9 3139 ONE_MORE_BYTE (c);
bf9cdd4e
KH
3140 if (c >= '@' && c <= 'B')
3141 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 3142 id = iso_charset_table[1][0][c];
bf9cdd4e 3143 else if (c >= '(' && c <= '/')
bcf26d6a 3144 {
df7492f9 3145 ONE_MORE_BYTE (c1);
d46c5b12 3146 if (c1 < ' ' || c1 >= 0x80
df7492f9 3147 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
3148 /* Invalid designation sequence. Just ignore. */
3149 break;
bcf26d6a 3150 }
bf9cdd4e 3151 else
ff0dacd7 3152 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
3153 break;
3154 }
ae9ff118 3155 else if (c == 'N' || c == 'O')
d46c5b12 3156 {
ae9ff118 3157 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
3158 single_shifting = 1;
3159 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 3160 break;
4ed46869 3161 }
cee53ed4
KH
3162 else if (c == '1')
3163 {
3164 /* End of composition. */
3165 if (composition_count < 0
3166 || composition_count > MAX_COMPOSITION_COMPONENTS)
3167 /* Invalid */
3168 break;
3169 composition_count = -1;
3170 found |= CATEGORY_MASK_ISO;
3171 }
ec6d2bb8
KH
3172 else if (c >= '0' && c <= '4')
3173 {
3174 /* ESC <Fp> for start/end composition. */
cee53ed4 3175 composition_count = 0;
ec6d2bb8
KH
3176 break;
3177 }
bf9cdd4e 3178 else
df7492f9 3179 {
ff0dacd7 3180 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
3181 break;
3182 }
d46c5b12
KH
3183
3184 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 3185 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
3186 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3187 id))
ff0dacd7 3188 found |= CATEGORY_MASK_ISO_7;
d46c5b12 3189 else
ff0dacd7 3190 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
3191 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3192 id))
ff0dacd7 3193 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 3194 else
ff0dacd7 3195 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
3196 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3197 id))
ff0dacd7 3198 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 3199 else
ff0dacd7 3200 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
3201 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3202 id))
ff0dacd7 3203 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 3204 else
ff0dacd7 3205 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
3206 break;
3207
4ed46869 3208 case ISO_CODE_SO:
d46c5b12 3209 case ISO_CODE_SI:
ff0dacd7 3210 /* Locking shift out/in. */
74383408
KH
3211 if (inhibit_iso_escape_detection)
3212 break;
f46869e4 3213 single_shifting = 0;
ff0dacd7 3214 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
3215 break;
3216
4ed46869 3217 case ISO_CODE_CSI:
ff0dacd7 3218 /* Control sequence introducer. */
f46869e4 3219 single_shifting = 0;
ff0dacd7
KH
3220 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3221 found |= CATEGORY_MASK_ISO_8_ELSE;
3222 goto check_extra_latin;
3223
4ed46869
KH
3224 case ISO_CODE_SS2:
3225 case ISO_CODE_SS3:
ff0dacd7
KH
3226 /* Single shift. */
3227 if (inhibit_iso_escape_detection)
3228 break;
75e2a253 3229 single_shifting = 0;
ff0dacd7
KH
3230 rejected |= CATEGORY_MASK_ISO_7BIT;
3231 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3232 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 3233 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
3234 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3235 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
3236 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3237 if (single_shifting)
3238 break;
ff0dacd7 3239 goto check_extra_latin;
4ed46869
KH
3240
3241 default:
065e3595
KH
3242 if (c < 0)
3243 continue;
4ed46869 3244 if (c < 0x80)
f46869e4 3245 {
cee53ed4
KH
3246 if (composition_count >= 0)
3247 composition_count++;
f46869e4
KH
3248 single_shifting = 0;
3249 break;
3250 }
ff0dacd7 3251 if (c >= 0xA0)
c4825358 3252 {
ff0dacd7
KH
3253 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3254 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 3255 /* Check the length of succeeding codes of the range
ff0dacd7
KH
3256 0xA0..0FF. If the byte length is even, we include
3257 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
3258 only when we are not single shifting. */
3259 if (! single_shifting
3260 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 3261 {
e17de821 3262 int i = 1;
b73bfc1c
KH
3263 while (src < src_end)
3264 {
d12bd917 3265 src_base = src;
df7492f9 3266 ONE_MORE_BYTE (c);
b73bfc1c 3267 if (c < 0xA0)
d12bd917
KH
3268 {
3269 src = src_base;
3270 break;
3271 }
b73bfc1c
KH
3272 i++;
3273 }
3274
3275 if (i & 1 && src < src_end)
cee53ed4
KH
3276 {
3277 rejected |= CATEGORY_MASK_ISO_8_2;
3278 if (composition_count >= 0)
3279 composition_count += i;
3280 }
f46869e4 3281 else
cee53ed4
KH
3282 {
3283 found |= CATEGORY_MASK_ISO_8_2;
3284 if (composition_count >= 0)
3285 composition_count += i / 2;
3286 }
f46869e4 3287 }
ff0dacd7 3288 break;
4ed46869 3289 }
ff0dacd7
KH
3290 check_extra_latin:
3291 single_shifting = 0;
3292 if (! VECTORP (Vlatin_extra_code_table)
3293 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3294 {
3295 rejected = CATEGORY_MASK_ISO;
3296 break;
3297 }
3298 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3299 & CODING_ISO_FLAG_LATIN_EXTRA)
3300 found |= CATEGORY_MASK_ISO_8_1;
3301 else
3302 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 3303 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
3304 }
3305 }
ff0dacd7
KH
3306 detect_info->rejected |= CATEGORY_MASK_ISO;
3307 return 0;
4ed46869 3308
df7492f9 3309 no_more_source:
ff0dacd7
KH
3310 detect_info->rejected |= rejected;
3311 detect_info->found |= (found & ~rejected);
df7492f9 3312 return 1;
4ed46869 3313}
ec6d2bb8 3314
4ed46869 3315
134b9549
KH
3316/* Set designation state into CODING. Set CHARS_96 to -1 if the
3317 escape sequence should be kept. */
df7492f9
KH
3318#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3319 do { \
3320 int id, prev; \
3321 \
3322 if (final < '0' || final >= 128 \
3323 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3324 || !SAFE_CHARSET_P (coding, id)) \
3325 { \
3326 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3327 chars_96 = -1; \
3328 break; \
df7492f9
KH
3329 } \
3330 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3331 if (id == charset_jisx0201_roman) \
3332 { \
3333 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3334 id = charset_ascii; \
3335 } \
3336 else if (id == charset_jisx0208_1978) \
3337 { \
3338 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3339 id = charset_jisx0208; \
3340 } \
df7492f9
KH
3341 CODING_ISO_DESIGNATION (coding, reg) = id; \
3342 /* If there was an invalid designation to REG previously, and this \
3343 designation is ASCII to REG, we should keep this designation \
3344 sequence. */ \
3345 if (prev == -2 && id == charset_ascii) \
134b9549 3346 chars_96 = -1; \
4ed46869
KH
3347 } while (0)
3348
d46c5b12 3349
e951386e
KH
3350/* Handle these composition sequence (ALT: alternate char):
3351
3352 (1) relative composition: ESC 0 CHAR ... ESC 1
3353 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3354 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3355 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3356
3357 When the start sequence (ESC 0/2/3/4) is found, this annotation
3358 header is produced.
3359
3360 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3361
3362 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3363 produced until the end sequence (ESC 1) is found:
3364
3365 (1) CHAR ... CHAR
3366 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3367 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3368 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3369
3370 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3371 annotation header is updated as below:
3372
3373 (1) LENGTH: unchanged, NCHARS: number of CHARs
3374 (2) LENGTH: unchanged, NCHARS: number of CHARs
3375 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3376 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3377
3378 If an error is found while composing, the annotation header is
3379 changed to:
3380
3381 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3382
3383 and the sequence [ -2 DECODED-RULE ] is changed to the original
3384 byte sequence as below:
3385 o the original byte sequence is B: [ B -1 ]
3386 o the original byte sequence is B1 B2: [ B1 B2 ]
3387 and the sequence [ -1 -1 ] is changed to the original byte
3388 sequence:
3389 [ ESC '0' ]
3390*/
3391
3392/* Decode a composition rule C1 and maybe one more byte from the
3393 source, and set RULE to the encoded composition rule, NBYTES to the
3394 length of the composition rule. If the rule is invalid, set RULE
3395 to some negative value. */
3396
3397#define DECODE_COMPOSITION_RULE(rule, nbytes) \
3398 do { \
3399 rule = c1 - 32; \
3400 if (rule < 0) \
3401 break; \
3402 if (rule < 81) /* old format (before ver.21) */ \
3403 { \
3404 int gref = (rule) / 9; \
3405 int nref = (rule) % 9; \
3406 if (gref == 4) gref = 10; \
3407 if (nref == 4) nref = 10; \
3408 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3409 nbytes = 1; \
3410 } \
3411 else /* new format (after ver.21) */ \
3412 { \
3413 int c; \
3414 \
3415 ONE_MORE_BYTE (c); \
3416 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3417 if (rule >= 0) \
3418 rule += 0x100; /* to destinguish it from the old format */ \
3419 nbytes = 2; \
3420 } \
3421 } while (0)
3422
3423#define ENCODE_COMPOSITION_RULE(rule) \
df7492f9 3424 do { \
e951386e
KH
3425 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3426 \
3427 if (rule < 0x100) /* old format */ \
df7492f9 3428 { \
e951386e
KH
3429 if (gref == 10) gref = 4; \
3430 if (nref == 10) nref = 4; \
3431 charbuf[idx] = 32 + gref * 9 + nref; \
3432 charbuf[idx + 1] = -1; \
3433 new_chars++; \
df7492f9 3434 } \
e951386e 3435 else /* new format */ \
df7492f9 3436 { \
e951386e
KH
3437 charbuf[idx] = 32 + 81 + gref; \
3438 charbuf[idx + 1] = 32 + nref; \
3439 new_chars += 2; \
df7492f9
KH
3440 } \
3441 } while (0)
3442
e951386e
KH
3443/* Finish the current composition as invalid. */
3444
3445static int finish_composition P_ ((int *, struct composition_status *));
3446
3447static int
3448finish_composition (charbuf, cmp_status)
3449 int *charbuf;
3450 struct composition_status *cmp_status;
3451{
3452 int idx = - cmp_status->length;
3453 int new_chars;
3454
3455 /* Recover the original ESC sequence */
3456 charbuf[idx++] = ISO_CODE_ESC;
3457 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3458 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3459 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3460 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3461 : '4');
3462 charbuf[idx++] = -2;
3463 charbuf[idx++] = 0;
3464 charbuf[idx++] = -1;
3465 new_chars = cmp_status->nchars;
3466 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3467 for (; idx < 0; idx++)
3468 {
3469 int elt = charbuf[idx];
3470
3471 if (elt == -2)
3472 {
3473 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3474 idx++;
3475 }
3476 else if (elt == -1)
3477 {
3478 charbuf[idx++] = ISO_CODE_ESC;
3479 charbuf[idx] = '0';
3480 new_chars += 2;
3481 }
3482 }
3483 cmp_status->state = COMPOSING_NO;
3484 return new_chars;
3485}
3486
ad1746f5 3487/* If characters are under composition, finish the composition. */
e951386e
KH
3488#define MAYBE_FINISH_COMPOSITION() \
3489 do { \
3490 if (cmp_status->state != COMPOSING_NO) \
3491 char_offset += finish_composition (charbuf, cmp_status); \
3492 } while (0)
d46c5b12 3493
aa72b389 3494/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
e951386e 3495
aa72b389
KH
3496 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3497 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3498 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3499 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
ec6d2bb8 3500
e951386e
KH
3501 Produce this annotation sequence now:
3502
3503 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3504*/
3505
3506#define DECODE_COMPOSITION_START(c1) \
3507 do { \
3508 if (c1 == '0' \
3509 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3510 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3511 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3512 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3513 { \
3514 *charbuf++ = -1; \
3515 *charbuf++= -1; \
3516 cmp_status->state = COMPOSING_CHAR; \
3517 cmp_status->length += 2; \
3518 } \
3519 else \
3520 { \
3521 MAYBE_FINISH_COMPOSITION (); \
3522 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3523 : c1 == '2' ? COMPOSITION_WITH_RULE \
3524 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3525 : COMPOSITION_WITH_RULE_ALTCHARS); \
3526 cmp_status->state \
3527 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3528 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3529 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3530 cmp_status->nchars = cmp_status->ncomps = 0; \
3531 coding->annotated = 1; \
3532 } \
ec6d2bb8
KH
3533 } while (0)
3534
ec6d2bb8 3535
e951386e 3536/* Handle composition end sequence ESC 1. */
df7492f9
KH
3537
3538#define DECODE_COMPOSITION_END() \
ec6d2bb8 3539 do { \
e951386e
KH
3540 if (cmp_status->nchars == 0 \
3541 || ((cmp_status->state == COMPOSING_CHAR) \
3542 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
ec6d2bb8 3543 { \
e951386e
KH
3544 MAYBE_FINISH_COMPOSITION (); \
3545 goto invalid_code; \
ec6d2bb8 3546 } \
e951386e
KH
3547 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3548 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3549 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3550 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3551 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3552 char_offset += cmp_status->nchars; \
3553 cmp_status->state = COMPOSING_NO; \
ec6d2bb8
KH
3554 } while (0)
3555
e951386e 3556/* Store a composition rule RULE in charbuf, and update cmp_status. */
df7492f9 3557
e951386e
KH
3558#define STORE_COMPOSITION_RULE(rule) \
3559 do { \
3560 *charbuf++ = -2; \
3561 *charbuf++ = rule; \
3562 cmp_status->length += 2; \
3563 cmp_status->state--; \
3564 } while (0)
ec6d2bb8 3565
e951386e
KH
3566/* Store a composed char or a component char C in charbuf, and update
3567 cmp_status. */
3568
3569#define STORE_COMPOSITION_CHAR(c) \
ec6d2bb8 3570 do { \
e951386e
KH
3571 *charbuf++ = (c); \
3572 cmp_status->length++; \
3573 if (cmp_status->state == COMPOSING_CHAR) \
3574 cmp_status->nchars++; \
df7492f9 3575 else \
e951386e
KH
3576 cmp_status->ncomps++; \
3577 if (cmp_status->method == COMPOSITION_WITH_RULE \
3578 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3579 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3580 cmp_status->state++; \
ec6d2bb8 3581 } while (0)
88993dfd 3582
d46c5b12 3583
4ed46869
KH
3584/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3585
b73bfc1c 3586static void
df7492f9 3587decode_coding_iso_2022 (coding)
4ed46869 3588 struct coding_system *coding;
4ed46869 3589{
8f924df7
KH
3590 const unsigned char *src = coding->source + coding->consumed;
3591 const unsigned char *src_end = coding->source + coding->src_bytes;
3592 const unsigned char *src_base;
69a80ea3 3593 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
3594 /* We may produce two annotations (charset and composition) in one
3595 loop and one more charset annotation at the end. */
ff0dacd7 3596 int *charbuf_end
df80c7f0 3597 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 3598 int consumed_chars = 0, consumed_chars_base;
df7492f9 3599 int multibytep = coding->src_multibyte;
4ed46869 3600 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3601 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3602 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3603 int charset_id_2, charset_id_3;
df7492f9
KH
3604 struct charset *charset;
3605 int c;
e951386e 3606 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
24a73b0a 3607 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3608 int char_offset = coding->produced_char;
3609 int last_offset = char_offset;
3610 int last_id = charset_ascii;
0a9564cb
EZ
3611 int eol_crlf =
3612 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 3613 int byte_after_cr = -1;
e951386e 3614 int i;
df7492f9 3615
24a73b0a 3616 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3617 setup_iso_safe_charsets (attrs);
287c57d7
KH
3618 /* Charset list may have been changed. */
3619 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 3620 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c 3621
e951386e
KH
3622 if (cmp_status->state != COMPOSING_NO)
3623 {
3624 for (i = 0; i < cmp_status->length; i++)
3625 *charbuf++ = cmp_status->carryover[i];
3626 coding->annotated = 1;
3627 }
3628
b73bfc1c 3629 while (1)
4ed46869 3630 {
cf299835 3631 int c1, c2, c3;
b73bfc1c
KH
3632
3633 src_base = src;
df7492f9
KH
3634 consumed_chars_base = consumed_chars;
3635
3636 if (charbuf >= charbuf_end)
b71f6f73
KH
3637 {
3638 if (byte_after_cr >= 0)
3639 src_base--;
3640 break;
3641 }
df7492f9 3642
119852e7
KH
3643 if (byte_after_cr >= 0)
3644 c1 = byte_after_cr, byte_after_cr = -1;
3645 else
3646 ONE_MORE_BYTE (c1);
065e3595
KH
3647 if (c1 < 0)
3648 goto invalid_code;
4ed46869 3649
e951386e 3650 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
4ed46869 3651 {
e951386e
KH
3652 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3653 char_offset++;
3654 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3655 continue;
3656 }
3657
3658 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3659 {
3660 if (c1 == ISO_CODE_ESC)
ec6d2bb8 3661 {
e951386e
KH
3662 if (src + 1 >= src_end)
3663 goto no_more_source;
3664 *charbuf++ = ISO_CODE_ESC;
3665 char_offset++;
3666 if (src[0] == '%' && src[1] == '@')
df7492f9 3667 {
e951386e
KH
3668 src += 2;
3669 consumed_chars += 2;
3670 char_offset += 2;
3671 /* We are sure charbuf can contain two more chars. */
3672 *charbuf++ = '%';
3673 *charbuf++ = '@';
3674 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
df7492f9 3675 }
4ed46869 3676 }
e951386e
KH
3677 else
3678 {
3679 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3680 char_offset++;
3681 }
3682 continue;
3683 }
3684
3685 if ((cmp_status->state == COMPOSING_RULE
3686 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3687 && c1 != ISO_CODE_ESC)
3688 {
3689 int rule, nbytes;
3690
3691 DECODE_COMPOSITION_RULE (rule, nbytes);
3692 if (rule < 0)
3693 goto invalid_code;
3694 STORE_COMPOSITION_RULE (rule);
3695 continue;
3696 }
3697
3698 /* We produce at most one character. */
3699 switch (iso_code_class [c1])
3700 {
3701 case ISO_0x20_or_0x7F:
df7492f9
KH
3702 if (charset_id_0 < 0
3703 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3704 /* This is SPACE or DEL. */
3705 charset = CHARSET_FROM_ID (charset_ascii);
3706 else
3707 charset = CHARSET_FROM_ID (charset_id_0);
3708 break;
4ed46869
KH
3709
3710 case ISO_graphic_plane_0:
134b9549
KH
3711 if (charset_id_0 < 0)
3712 charset = CHARSET_FROM_ID (charset_ascii);
3713 else
3714 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3715 break;
3716
3717 case ISO_0xA0_or_0xFF:
df7492f9
KH
3718 if (charset_id_1 < 0
3719 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3720 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3721 goto invalid_code;
4ed46869
KH
3722 /* This is a graphic character, we fall down ... */
3723
3724 case ISO_graphic_plane_1:
df7492f9
KH
3725 if (charset_id_1 < 0)
3726 goto invalid_code;
3727 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3728 break;
3729
df7492f9 3730 case ISO_control_0:
119852e7
KH
3731 if (eol_crlf && c1 == '\r')
3732 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3733 MAYBE_FINISH_COMPOSITION ();
3734 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3735 break;
3736
df7492f9 3737 case ISO_control_1:
df7492f9
KH
3738 goto invalid_code;
3739
4ed46869 3740 case ISO_shift_out:
df7492f9
KH
3741 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3742 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3743 goto invalid_code;
3744 CODING_ISO_INVOCATION (coding, 0) = 1;
3745 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3746 continue;
4ed46869
KH
3747
3748 case ISO_shift_in:
df7492f9
KH
3749 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3750 goto invalid_code;
3751 CODING_ISO_INVOCATION (coding, 0) = 0;
3752 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3753 continue;
4ed46869
KH
3754
3755 case ISO_single_shift_2_7:
a63dba42
KH
3756 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3757 goto invalid_code;
4ed46869 3758 case ISO_single_shift_2:
df7492f9
KH
3759 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3760 goto invalid_code;
4ed46869
KH
3761 /* SS2 is handled as an escape sequence of ESC 'N' */
3762 c1 = 'N';
3763 goto label_escape_sequence;
3764
3765 case ISO_single_shift_3:
df7492f9
KH
3766 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3767 goto invalid_code;
4ed46869
KH
3768 /* SS2 is handled as an escape sequence of ESC 'O' */
3769 c1 = 'O';
3770 goto label_escape_sequence;
3771
3772 case ISO_control_sequence_introducer:
3773 /* CSI is handled as an escape sequence of ESC '[' ... */
3774 c1 = '[';
3775 goto label_escape_sequence;
3776
3777 case ISO_escape:
3778 ONE_MORE_BYTE (c1);
3779 label_escape_sequence:
df7492f9 3780 /* Escape sequences handled here are invocation,
4ed46869
KH
3781 designation, direction specification, and character
3782 composition specification. */
3783 switch (c1)
3784 {
3785 case '&': /* revision of following character set */
3786 ONE_MORE_BYTE (c1);
3787 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3788 goto invalid_code;
4ed46869
KH
3789 ONE_MORE_BYTE (c1);
3790 if (c1 != ISO_CODE_ESC)
df7492f9 3791 goto invalid_code;
4ed46869
KH
3792 ONE_MORE_BYTE (c1);
3793 goto label_escape_sequence;
3794
3795 case '$': /* designation of 2-byte character set */
df7492f9
KH
3796 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3797 goto invalid_code;
134b9549
KH
3798 {
3799 int reg, chars96;
3800
3801 ONE_MORE_BYTE (c1);
3802 if (c1 >= '@' && c1 <= 'B')
3803 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3804 or JISX0208.1980 */
134b9549
KH
3805 reg = 0, chars96 = 0;
3806 }
3807 else if (c1 >= 0x28 && c1 <= 0x2B)
3808 { /* designation of DIMENSION2_CHARS94 character set */
3809 reg = c1 - 0x28, chars96 = 0;
3810 ONE_MORE_BYTE (c1);
3811 }
3812 else if (c1 >= 0x2C && c1 <= 0x2F)
3813 { /* designation of DIMENSION2_CHARS96 character set */
3814 reg = c1 - 0x2C, chars96 = 1;
3815 ONE_MORE_BYTE (c1);
3816 }
3817 else
3818 goto invalid_code;
3819 DECODE_DESIGNATION (reg, 2, chars96, c1);
3820 /* We must update these variables now. */
3821 if (reg == 0)
3822 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3823 else if (reg == 1)
3824 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3825 if (chars96 < 0)
3826 goto invalid_code;
3827 }
b73bfc1c 3828 continue;
4ed46869
KH
3829
3830 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3831 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3832 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3833 goto invalid_code;
3834 CODING_ISO_INVOCATION (coding, 0) = 2;
3835 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3836 continue;
4ed46869
KH
3837
3838 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3839 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3840 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3841 goto invalid_code;
3842 CODING_ISO_INVOCATION (coding, 0) = 3;
3843 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3844 continue;
4ed46869
KH
3845
3846 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3847 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3848 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3849 goto invalid_code;
134b9549
KH
3850 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3851 if (charset_id_2 < 0)
3852 charset = CHARSET_FROM_ID (charset_ascii);
3853 else
3854 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3855 ONE_MORE_BYTE (c1);
e7046a18 3856 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3857 goto invalid_code;
4ed46869
KH
3858 break;
3859
3860 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3861 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3862 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3863 goto invalid_code;
134b9549
KH
3864 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3865 if (charset_id_3 < 0)
3866 charset = CHARSET_FROM_ID (charset_ascii);
3867 else
3868 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3869 ONE_MORE_BYTE (c1);
e7046a18 3870 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3871 goto invalid_code;
4ed46869
KH
3872 break;
3873
ec6d2bb8 3874 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3875 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3876 goto invalid_code;
e951386e
KH
3877 if (last_id != charset_ascii)
3878 {
3879 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3880 last_id = charset_ascii;
3881 last_offset = char_offset;
3882 }
ec6d2bb8 3883 DECODE_COMPOSITION_START (c1);
b73bfc1c 3884 continue;
4ed46869 3885
ec6d2bb8 3886 case '1': /* end composition */
e951386e 3887 if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3888 goto invalid_code;
3889 DECODE_COMPOSITION_END ();
b73bfc1c 3890 continue;
4ed46869
KH
3891
3892 case '[': /* specification of direction */
de59072a 3893 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
df7492f9 3894 goto invalid_code;
4ed46869 3895 /* For the moment, nested direction is not supported.
d46c5b12 3896 So, `coding->mode & CODING_MODE_DIRECTION' zero means
ad1746f5 3897 left-to-right, and nonzero means right-to-left. */
4ed46869
KH
3898 ONE_MORE_BYTE (c1);
3899 switch (c1)
3900 {
3901 case ']': /* end of the current direction */
d46c5b12 3902 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3903
3904 case '0': /* end of the current direction */
3905 case '1': /* start of left-to-right direction */
3906 ONE_MORE_BYTE (c1);
3907 if (c1 == ']')
d46c5b12 3908 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3909 else
df7492f9 3910 goto invalid_code;
4ed46869
KH
3911 break;
3912
3913 case '2': /* start of right-to-left direction */
3914 ONE_MORE_BYTE (c1);
3915 if (c1 == ']')
d46c5b12 3916 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3917 else
df7492f9 3918 goto invalid_code;
4ed46869
KH
3919 break;
3920
3921 default:
df7492f9 3922 goto invalid_code;
4ed46869 3923 }
b73bfc1c 3924 continue;
4ed46869 3925
103e0180 3926 case '%':
103e0180
KH
3927 ONE_MORE_BYTE (c1);
3928 if (c1 == '/')
3929 {
3930 /* CTEXT extended segment:
3931 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3932 We keep these bytes as is for the moment.
3933 They may be decoded by post-read-conversion. */
3934 int dim, M, L;
4776e638 3935 int size;
8f924df7 3936
103e0180 3937 ONE_MORE_BYTE (dim);
7a84eee5 3938 if (dim < '0' || dim > '4')
e951386e 3939 goto invalid_code;
103e0180 3940 ONE_MORE_BYTE (M);
e951386e
KH
3941 if (M < 128)
3942 goto invalid_code;
103e0180 3943 ONE_MORE_BYTE (L);
e951386e
KH
3944 if (L < 128)
3945 goto invalid_code;
103e0180 3946 size = ((M - 128) * 128) + (L - 128);
e951386e 3947 if (charbuf + 6 > charbuf_end)
4776e638
KH
3948 goto break_loop;
3949 *charbuf++ = ISO_CODE_ESC;
3950 *charbuf++ = '%';
3951 *charbuf++ = '/';
3952 *charbuf++ = dim;
3953 *charbuf++ = BYTE8_TO_CHAR (M);
3954 *charbuf++ = BYTE8_TO_CHAR (L);
e951386e 3955 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
103e0180
KH
3956 }
3957 else if (c1 == 'G')
3958 {
103e0180
KH
3959 /* XFree86 extension for embedding UTF-8 in CTEXT:
3960 ESC % G --UTF-8-BYTES-- ESC % @
3961 We keep these bytes as is for the moment.
3962 They may be decoded by post-read-conversion. */
e951386e 3963 if (charbuf + 3 > charbuf_end)
4776e638 3964 goto break_loop;
e951386e
KH
3965 *charbuf++ = ISO_CODE_ESC;
3966 *charbuf++ = '%';
3967 *charbuf++ = 'G';
3968 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
103e0180
KH
3969 }
3970 else
4776e638 3971 goto invalid_code;
103e0180 3972 continue;
4776e638 3973 break;
103e0180 3974
4ed46869 3975 default:
df7492f9
KH
3976 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3977 goto invalid_code;
134b9549
KH
3978 {
3979 int reg, chars96;
3980
3981 if (c1 >= 0x28 && c1 <= 0x2B)
3982 { /* designation of DIMENSION1_CHARS94 character set */
3983 reg = c1 - 0x28, chars96 = 0;
3984 ONE_MORE_BYTE (c1);
3985 }
3986 else if (c1 >= 0x2C && c1 <= 0x2F)
3987 { /* designation of DIMENSION1_CHARS96 character set */
3988 reg = c1 - 0x2C, chars96 = 1;
3989 ONE_MORE_BYTE (c1);
3990 }
3991 else
3992 goto invalid_code;
3993 DECODE_DESIGNATION (reg, 1, chars96, c1);
3994 /* We must update these variables now. */
3995 if (reg == 0)
3996 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3997 else if (reg == 1)
3998 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3999 if (chars96 < 0)
4000 goto invalid_code;
4001 }
b73bfc1c 4002 continue;
4ed46869 4003 }
b73bfc1c 4004 }
4ed46869 4005
e951386e
KH
4006 if (cmp_status->state == COMPOSING_NO
4007 && charset->id != charset_ascii
ff0dacd7
KH
4008 && last_id != charset->id)
4009 {
4010 if (last_id != charset_ascii)
69a80ea3 4011 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
4012 last_id = charset->id;
4013 last_offset = char_offset;
4014 }
4015
b73bfc1c 4016 /* Now we know CHARSET and 1st position code C1 of a character.
cf299835
KH
4017 Produce a decoded character while getting 2nd and 3rd
4018 position codes C2, C3 if necessary. */
df7492f9 4019 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
4020 {
4021 ONE_MORE_BYTE (c2);
cf299835
KH
4022 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
4023 || ((c1 & 0x80) != (c2 & 0x80)))
b73bfc1c 4024 /* C2 is not in a valid range. */
df7492f9 4025 goto invalid_code;
cf299835
KH
4026 if (CHARSET_DIMENSION (charset) == 2)
4027 c1 = (c1 << 8) | c2;
4028 else
df7492f9 4029 {
cf299835
KH
4030 ONE_MORE_BYTE (c3);
4031 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4032 || ((c1 & 0x80) != (c3 & 0x80)))
4033 /* C3 is not in a valid range. */
df7492f9 4034 goto invalid_code;
cf299835 4035 c1 = (c1 << 16) | (c2 << 8) | c2;
df7492f9
KH
4036 }
4037 }
cf299835 4038 c1 &= 0x7F7F7F;
df7492f9
KH
4039 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4040 if (c < 0)
4041 {
4042 MAYBE_FINISH_COMPOSITION ();
4043 for (; src_base < src; src_base++, char_offset++)
4044 {
4045 if (ASCII_BYTE_P (*src_base))
4046 *charbuf++ = *src_base;
4047 else
4048 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4049 }
4050 }
e951386e 4051 else if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
4052 {
4053 *charbuf++ = c;
4054 char_offset++;
4ed46869 4055 }
e951386e
KH
4056 else if ((cmp_status->state == COMPOSING_CHAR
4057 ? cmp_status->nchars
4058 : cmp_status->ncomps)
4059 >= MAX_COMPOSITION_COMPONENTS)
781d7a48 4060 {
e951386e
KH
4061 /* Too long composition. */
4062 MAYBE_FINISH_COMPOSITION ();
4063 *charbuf++ = c;
4064 char_offset++;
4ed46869 4065 }
e951386e
KH
4066 else
4067 STORE_COMPOSITION_CHAR (c);
4ed46869
KH
4068 continue;
4069
df7492f9
KH
4070 invalid_code:
4071 MAYBE_FINISH_COMPOSITION ();
4ed46869 4072 src = src_base;
df7492f9
KH
4073 consumed_chars = consumed_chars_base;
4074 ONE_MORE_BYTE (c);
065e3595 4075 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4076 char_offset++;
df7492f9 4077 coding->errors++;
4776e638
KH
4078 continue;
4079
4080 break_loop:
4081 break;
4ed46869 4082 }
fb88bf2d 4083
df7492f9 4084 no_more_source:
e951386e
KH
4085 if (cmp_status->state != COMPOSING_NO)
4086 {
4087 if (coding->mode & CODING_MODE_LAST_BLOCK)
4088 MAYBE_FINISH_COMPOSITION ();
4089 else
4090 {
4091 charbuf -= cmp_status->length;
4092 for (i = 0; i < cmp_status->length; i++)
4093 cmp_status->carryover[i] = charbuf[i];
4094 }
4095 }
4096 else if (last_id != charset_ascii)
69a80ea3 4097 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4098 coding->consumed_char += consumed_chars_base;
4099 coding->consumed = src_base - coding->source;
4100 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4101}
4102
b73bfc1c 4103
f4dee582 4104/* ISO2022 encoding stuff. */
4ed46869
KH
4105
4106/*
f4dee582 4107 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 4108 specify more details. In Emacs, each coding system of ISO2022
4ed46869 4109 variant has the following specifications:
df7492f9 4110 1. Initial designation to G0 thru G3.
4ed46869
KH
4111 2. Allows short-form designation?
4112 3. ASCII should be designated to G0 before control characters?
4113 4. ASCII should be designated to G0 at end of line?
4114 5. 7-bit environment or 8-bit environment?
4115 6. Use locking-shift?
4116 7. Use Single-shift?
4117 And the following two are only for Japanese:
4118 8. Use ASCII in place of JIS0201-1976-Roman?
4119 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
4120 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4121 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 4122 details.
4ed46869
KH
4123*/
4124
4125/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
4126 register REG at DST, and increment DST. If <final-char> of CHARSET is
4127 '@', 'A', or 'B' and the coding system CODING allows, produce
4128 designation sequence of short-form. */
4ed46869
KH
4129
4130#define ENCODE_DESIGNATION(charset, reg, coding) \
4131 do { \
df7492f9 4132 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
4133 char *intermediate_char_94 = "()*+"; \
4134 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
4135 int revision = -1; \
4136 int c; \
4137 \
4138 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 4139 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
4140 \
4141 if (revision >= 0) \
70c22245 4142 { \
df7492f9
KH
4143 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4144 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 4145 } \
df7492f9 4146 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
4147 if (CHARSET_DIMENSION (charset) == 1) \
4148 { \
df7492f9
KH
4149 if (! CHARSET_ISO_CHARS_96 (charset)) \
4150 c = intermediate_char_94[reg]; \
4ed46869 4151 else \
df7492f9
KH
4152 c = intermediate_char_96[reg]; \
4153 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
4154 } \
4155 else \
4156 { \
df7492f9
KH
4157 EMIT_ONE_ASCII_BYTE ('$'); \
4158 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 4159 { \
df7492f9 4160 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
4161 || reg != 0 \
4162 || final_char < '@' || final_char > 'B') \
df7492f9 4163 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
4164 } \
4165 else \
df7492f9 4166 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 4167 } \
df7492f9
KH
4168 EMIT_ONE_ASCII_BYTE (final_char); \
4169 \
4170 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
4171 } while (0)
4172
df7492f9 4173
4ed46869
KH
4174/* The following two macros produce codes (control character or escape
4175 sequence) for ISO2022 single-shift functions (single-shift-2 and
4176 single-shift-3). */
4177
df7492f9
KH
4178#define ENCODE_SINGLE_SHIFT_2 \
4179 do { \
4180 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4181 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4182 else \
4183 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4184 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4185 } while (0)
4186
df7492f9
KH
4187
4188#define ENCODE_SINGLE_SHIFT_3 \
4189 do { \
4190 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4191 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4192 else \
4193 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4194 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4195 } while (0)
4196
df7492f9 4197
4ed46869
KH
4198/* The following four macros produce codes (control character or
4199 escape sequence) for ISO2022 locking-shift functions (shift-in,
4200 shift-out, locking-shift-2, and locking-shift-3). */
4201
df7492f9
KH
4202#define ENCODE_SHIFT_IN \
4203 do { \
4204 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4205 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
4206 } while (0)
4207
df7492f9
KH
4208
4209#define ENCODE_SHIFT_OUT \
4210 do { \
4211 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4212 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
4213 } while (0)
4214
df7492f9
KH
4215
4216#define ENCODE_LOCKING_SHIFT_2 \
4217 do { \
4218 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4219 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
4220 } while (0)
4221
df7492f9
KH
4222
4223#define ENCODE_LOCKING_SHIFT_3 \
4224 do { \
4225 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4226 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
4227 } while (0)
4228
df7492f9 4229
f4dee582
RS
4230/* Produce codes for a DIMENSION1 character whose character set is
4231 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
4232 sequences are also produced in advance if necessary. */
4233
6e85d753
KH
4234#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4235 do { \
df7492f9 4236 int id = CHARSET_ID (charset); \
bf16eb23
KH
4237 \
4238 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4239 && id == charset_ascii) \
4240 { \
4241 id = charset_jisx0201_roman; \
4242 charset = CHARSET_FROM_ID (id); \
4243 } \
4244 \
df7492f9 4245 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4246 { \
df7492f9
KH
4247 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4248 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 4249 else \
df7492f9
KH
4250 EMIT_ONE_BYTE (c1 | 0x80); \
4251 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4252 break; \
4253 } \
df7492f9 4254 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4255 { \
df7492f9 4256 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
4257 break; \
4258 } \
df7492f9 4259 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4260 { \
df7492f9 4261 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
4262 break; \
4263 } \
6e85d753
KH
4264 else \
4265 /* Since CHARSET is not yet invoked to any graphic planes, we \
4266 must invoke it, or, at first, designate it to some graphic \
4267 register. Then repeat the loop to actually produce the \
4268 character. */ \
df7492f9
KH
4269 dst = encode_invocation_designation (charset, coding, dst, \
4270 &produced_chars); \
4ed46869
KH
4271 } while (1)
4272
df7492f9 4273
f4dee582
RS
4274/* Produce codes for a DIMENSION2 character whose character set is
4275 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
4276 invocation codes are also produced in advance if necessary. */
4277
6e85d753
KH
4278#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4279 do { \
df7492f9 4280 int id = CHARSET_ID (charset); \
bf16eb23
KH
4281 \
4282 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4283 && id == charset_jisx0208) \
4284 { \
4285 id = charset_jisx0208_1978; \
4286 charset = CHARSET_FROM_ID (id); \
4287 } \
4288 \
df7492f9 4289 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4290 { \
df7492f9
KH
4291 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4292 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 4293 else \
df7492f9
KH
4294 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4295 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4296 break; \
4297 } \
df7492f9 4298 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4299 { \
df7492f9 4300 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
4301 break; \
4302 } \
df7492f9 4303 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4304 { \
df7492f9 4305 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
4306 break; \
4307 } \
6e85d753
KH
4308 else \
4309 /* Since CHARSET is not yet invoked to any graphic planes, we \
4310 must invoke it, or, at first, designate it to some graphic \
4311 register. Then repeat the loop to actually produce the \
4312 character. */ \
df7492f9
KH
4313 dst = encode_invocation_designation (charset, coding, dst, \
4314 &produced_chars); \
4ed46869
KH
4315 } while (1)
4316
05e6f5dc 4317
df7492f9
KH
4318#define ENCODE_ISO_CHARACTER(charset, c) \
4319 do { \
4320 int code = ENCODE_CHAR ((charset),(c)); \
4321 \
4322 if (CHARSET_DIMENSION (charset) == 1) \
4323 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4324 else \
4325 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 4326 } while (0)
bdd9fb48 4327
05e6f5dc 4328
4ed46869 4329/* Produce designation and invocation codes at a place pointed by DST
df7492f9 4330 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
4331 Return new DST. */
4332
4333unsigned char *
df7492f9
KH
4334encode_invocation_designation (charset, coding, dst, p_nchars)
4335 struct charset *charset;
4ed46869
KH
4336 struct coding_system *coding;
4337 unsigned char *dst;
df7492f9 4338 int *p_nchars;
4ed46869 4339{
df7492f9
KH
4340 int multibytep = coding->dst_multibyte;
4341 int produced_chars = *p_nchars;
4ed46869 4342 int reg; /* graphic register number */
df7492f9 4343 int id = CHARSET_ID (charset);
4ed46869
KH
4344
4345 /* At first, check designations. */
4346 for (reg = 0; reg < 4; reg++)
df7492f9 4347 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
4348 break;
4349
4350 if (reg >= 4)
4351 {
4352 /* CHARSET is not yet designated to any graphic registers. */
4353 /* At first check the requested designation. */
df7492f9
KH
4354 reg = CODING_ISO_REQUEST (coding, id);
4355 if (reg < 0)
1ba9e4ab
KH
4356 /* Since CHARSET requests no special designation, designate it
4357 to graphic register 0. */
4ed46869
KH
4358 reg = 0;
4359
4360 ENCODE_DESIGNATION (charset, reg, coding);
4361 }
4362
df7492f9
KH
4363 if (CODING_ISO_INVOCATION (coding, 0) != reg
4364 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
4365 {
4366 /* Since the graphic register REG is not invoked to any graphic
4367 planes, invoke it to graphic plane 0. */
4368 switch (reg)
4369 {
4370 case 0: /* graphic register 0 */
4371 ENCODE_SHIFT_IN;
4372 break;
4373
4374 case 1: /* graphic register 1 */
4375 ENCODE_SHIFT_OUT;
4376 break;
4377
4378 case 2: /* graphic register 2 */
df7492f9 4379 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4380 ENCODE_SINGLE_SHIFT_2;
4381 else
4382 ENCODE_LOCKING_SHIFT_2;
4383 break;
4384
4385 case 3: /* graphic register 3 */
df7492f9 4386 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4387 ENCODE_SINGLE_SHIFT_3;
4388 else
4389 ENCODE_LOCKING_SHIFT_3;
4390 break;
4391 }
4392 }
b73bfc1c 4393
df7492f9 4394 *p_nchars = produced_chars;
4ed46869
KH
4395 return dst;
4396}
4397
df7492f9
KH
4398/* The following three macros produce codes for indicating direction
4399 of text. */
4400#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 4401 do { \
df7492f9
KH
4402 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4403 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 4404 else \
df7492f9 4405 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
4406 } while (0)
4407
ec6d2bb8 4408
df7492f9
KH
4409#define ENCODE_DIRECTION_R2L() \
4410 do { \
4411 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4412 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
4413 } while (0)
4414
ec6d2bb8 4415
df7492f9 4416#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 4417 do { \
df7492f9
KH
4418 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4419 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 4420 } while (0)
4ed46869 4421
4ed46869
KH
4422
4423/* Produce codes for designation and invocation to reset the graphic
4424 planes and registers to initial state. */
df7492f9
KH
4425#define ENCODE_RESET_PLANE_AND_REGISTER() \
4426 do { \
4427 int reg; \
4428 struct charset *charset; \
4429 \
4430 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4431 ENCODE_SHIFT_IN; \
4432 for (reg = 0; reg < 4; reg++) \
4433 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4434 && (CODING_ISO_DESIGNATION (coding, reg) \
4435 != CODING_ISO_INITIAL (coding, reg))) \
4436 { \
4437 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4438 ENCODE_DESIGNATION (charset, reg, coding); \
4439 } \
4ed46869
KH
4440 } while (0)
4441
df7492f9 4442
bdd9fb48 4443/* Produce designation sequences of charsets in the line started from
b73bfc1c 4444 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
4445
4446 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4447 find all the necessary designations. */
4448
b73bfc1c 4449static unsigned char *
df7492f9 4450encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 4451 struct coding_system *coding;
df7492f9
KH
4452 int *charbuf, *charbuf_end;
4453 unsigned char *dst;
e0e989f6 4454{
df7492f9 4455 struct charset *charset;
bdd9fb48
KH
4456 /* Table of charsets to be designated to each graphic register. */
4457 int r[4];
df7492f9
KH
4458 int c, found = 0, reg;
4459 int produced_chars = 0;
4460 int multibytep = coding->dst_multibyte;
4461 Lisp_Object attrs;
4462 Lisp_Object charset_list;
4463
4464 attrs = CODING_ID_ATTRS (coding->id);
4465 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4466 if (EQ (charset_list, Qiso_2022))
4467 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4468
4469 for (reg = 0; reg < 4; reg++)
4470 r[reg] = -1;
4471
b73bfc1c 4472 while (found < 4)
e0e989f6 4473 {
df7492f9
KH
4474 int id;
4475
4476 c = *charbuf++;
b73bfc1c
KH
4477 if (c == '\n')
4478 break;
df7492f9
KH
4479 charset = char_charset (c, charset_list, NULL);
4480 id = CHARSET_ID (charset);
4481 reg = CODING_ISO_REQUEST (coding, id);
4482 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4483 {
4484 found++;
df7492f9 4485 r[reg] = id;
bdd9fb48 4486 }
bdd9fb48
KH
4487 }
4488
4489 if (found)
4490 {
4491 for (reg = 0; reg < 4; reg++)
4492 if (r[reg] >= 0
df7492f9
KH
4493 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4494 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4495 }
b73bfc1c
KH
4496
4497 return dst;
e0e989f6
KH
4498}
4499
4ed46869
KH
4500/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4501
df7492f9
KH
4502static int
4503encode_coding_iso_2022 (coding)
4ed46869 4504 struct coding_system *coding;
4ed46869 4505{
df7492f9
KH
4506 int multibytep = coding->dst_multibyte;
4507 int *charbuf = coding->charbuf;
4508 int *charbuf_end = charbuf + coding->charbuf_used;
4509 unsigned char *dst = coding->destination + coding->produced;
4510 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511 int safe_room = 16;
4512 int bol_designation
4513 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4514 && CODING_ISO_BOL (coding));
4515 int produced_chars = 0;
4516 Lisp_Object attrs, eol_type, charset_list;
4517 int ascii_compatible;
b73bfc1c 4518 int c;
ff0dacd7 4519 int preferred_charset_id = -1;
05e6f5dc 4520
24a73b0a 4521 CODING_GET_INFO (coding, attrs, charset_list);
0a9564cb 4522 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
24a73b0a
KH
4523 if (VECTORP (eol_type))
4524 eol_type = Qunix;
4525
004068e4 4526 setup_iso_safe_charsets (attrs);
ff0dacd7 4527 /* Charset list may have been changed. */
287c57d7 4528 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4529 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4530
a552b35a
KH
4531 ascii_compatible
4532 = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4533 && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4534 | CODING_ISO_FLAG_LOCKING_SHIFT)));
bdd9fb48 4535
df7492f9 4536 while (charbuf < charbuf_end)
4ed46869 4537 {
df7492f9 4538 ASSURE_DESTINATION (safe_room);
b73bfc1c 4539
df7492f9 4540 if (bol_designation)
b73bfc1c 4541 {
df7492f9 4542 unsigned char *dst_prev = dst;
4ed46869 4543
bdd9fb48 4544 /* We have to produce designation sequences if any now. */
df7492f9
KH
4545 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4546 bol_designation = 0;
4547 /* We are sure that designation sequences are all ASCII bytes. */
4548 produced_chars += dst - dst_prev;
e0e989f6
KH
4549 }
4550
df7492f9 4551 c = *charbuf++;
ec6d2bb8 4552
ff0dacd7
KH
4553 if (c < 0)
4554 {
4555 /* Handle an annotation. */
4556 switch (*charbuf)
ec6d2bb8 4557 {
ff0dacd7
KH
4558 case CODING_ANNOTATE_COMPOSITION_MASK:
4559 /* Not yet implemented. */
4560 break;
4561 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4562 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4563 if (preferred_charset_id >= 0
4564 && NILP (Fmemq (make_number (preferred_charset_id),
4565 charset_list)))
4566 preferred_charset_id = -1;
4567 break;
4568 default:
4569 abort ();
4ed46869 4570 }
ff0dacd7
KH
4571 charbuf += -c - 1;
4572 continue;
4ed46869 4573 }
ec6d2bb8 4574
b73bfc1c
KH
4575 /* Now encode the character C. */
4576 if (c < 0x20 || c == 0x7F)
4577 {
df7492f9
KH
4578 if (c == '\n'
4579 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4580 {
df7492f9
KH
4581 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4582 ENCODE_RESET_PLANE_AND_REGISTER ();
4583 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4584 {
df7492f9
KH
4585 int i;
4586
4587 for (i = 0; i < 4; i++)
4588 CODING_ISO_DESIGNATION (coding, i)
4589 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4590 }
df7492f9
KH
4591 bol_designation
4592 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4593 }
df7492f9
KH
4594 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4595 ENCODE_RESET_PLANE_AND_REGISTER ();
4596 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4597 }
df7492f9 4598 else if (ASCII_CHAR_P (c))
88993dfd 4599 {
df7492f9
KH
4600 if (ascii_compatible)
4601 EMIT_ONE_ASCII_BYTE (c);
93dec019 4602 else
19a8d9e0 4603 {
bf16eb23
KH
4604 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4605 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4606 }
4ed46869 4607 }
16eafb5d 4608 else if (CHAR_BYTE8_P (c))
88993dfd 4609 {
16eafb5d
KH
4610 c = CHAR_TO_BYTE8 (c);
4611 EMIT_ONE_BYTE (c);
88993dfd 4612 }
b73bfc1c 4613 else
df7492f9 4614 {
ff0dacd7 4615 struct charset *charset;
b73bfc1c 4616
ff0dacd7
KH
4617 if (preferred_charset_id >= 0)
4618 {
4619 charset = CHARSET_FROM_ID (preferred_charset_id);
4620 if (! CHAR_CHARSET_P (c, charset))
4621 charset = char_charset (c, charset_list, NULL);
4622 }
4623 else
4624 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4625 if (!charset)
4626 {
41cbe562
KH
4627 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4628 {
4629 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4630 charset = CHARSET_FROM_ID (charset_ascii);
4631 }
4632 else
4633 {
4634 c = coding->default_char;
4635 charset = char_charset (c, charset_list, NULL);
4636 }
df7492f9
KH
4637 }
4638 ENCODE_ISO_CHARACTER (charset, c);
4639 }
84fbb8a0 4640 }
b73bfc1c 4641
df7492f9
KH
4642 if (coding->mode & CODING_MODE_LAST_BLOCK
4643 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4644 {
4645 ASSURE_DESTINATION (safe_room);
4646 ENCODE_RESET_PLANE_AND_REGISTER ();
4647 }
065e3595 4648 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4649 CODING_ISO_BOL (coding) = bol_designation;
4650 coding->produced_char += produced_chars;
4651 coding->produced = dst - coding->destination;
4652 return 0;
4ed46869
KH
4653}
4654
4655\f
df7492f9 4656/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4657
df7492f9 4658/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4659 quite widely. So, for the moment, Emacs supports them in the bare
4660 C code. But, in the future, they may be supported only by CCL. */
4661
4662/* SJIS is a coding system encoding three character sets: ASCII, right
4663 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4664 as is. A character of charset katakana-jisx0201 is encoded by
4665 "position-code + 0x80". A character of charset japanese-jisx0208
4666 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4667 so that it fit in the range below.
4ed46869
KH
4668
4669 --- CODE RANGE of SJIS ---
4670 (character set) (range)
4671 ASCII 0x00 .. 0x7F
df7492f9 4672 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4673 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4674 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4675 -------------------------------
4676
4677*/
4678
4679/* BIG5 is a coding system encoding two character sets: ASCII and
4680 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4681 character set and is encoded in two-byte.
4ed46869
KH
4682
4683 --- CODE RANGE of BIG5 ---
4684 (character set) (range)
4685 ASCII 0x00 .. 0x7F
4686 Big5 (1st byte) 0xA1 .. 0xFE
4687 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4688 --------------------------
4689
df7492f9 4690 */
4ed46869
KH
4691
4692/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4693 Check if a text is encoded in SJIS. If it is, return
df7492f9 4694 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4695
0a28aafb 4696static int
ff0dacd7 4697detect_coding_sjis (coding, detect_info)
df7492f9 4698 struct coding_system *coding;
ff0dacd7 4699 struct coding_detection_info *detect_info;
4ed46869 4700{
065e3595 4701 const unsigned char *src = coding->source, *src_base;
8f924df7 4702 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4703 int multibytep = coding->src_multibyte;
4704 int consumed_chars = 0;
4705 int found = 0;
b73bfc1c 4706 int c;
f07190ca
KH
4707 Lisp_Object attrs, charset_list;
4708 int max_first_byte_of_2_byte_code;
4709
4710 CODING_GET_INFO (coding, attrs, charset_list);
4711 max_first_byte_of_2_byte_code
4712 = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
df7492f9 4713
ff0dacd7 4714 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4715 /* A coding system of this category is always ASCII compatible. */
4716 src += coding->head_ascii;
4ed46869 4717
b73bfc1c 4718 while (1)
4ed46869 4719 {
065e3595 4720 src_base = src;
df7492f9 4721 ONE_MORE_BYTE (c);
682169fe
KH
4722 if (c < 0x80)
4723 continue;
f07190ca
KH
4724 if ((c >= 0x81 && c <= 0x9F)
4725 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4ed46869 4726 {
df7492f9 4727 ONE_MORE_BYTE (c);
682169fe 4728 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4729 break;
ff0dacd7 4730 found = CATEGORY_MASK_SJIS;
4ed46869 4731 }
df7492f9 4732 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4733 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4734 else
4735 break;
4ed46869 4736 }
ff0dacd7 4737 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4738 return 0;
4739
4740 no_more_source:
065e3595 4741 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4742 {
ff0dacd7 4743 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4744 return 0;
4ed46869 4745 }
ff0dacd7
KH
4746 detect_info->found |= found;
4747 return 1;
4ed46869
KH
4748}
4749
4750/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4751 Check if a text is encoded in BIG5. If it is, return
df7492f9 4752 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4753
0a28aafb 4754static int
ff0dacd7 4755detect_coding_big5 (coding, detect_info)
df7492f9 4756 struct coding_system *coding;
ff0dacd7 4757 struct coding_detection_info *detect_info;
4ed46869 4758{
065e3595 4759 const unsigned char *src = coding->source, *src_base;
8f924df7 4760 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4761 int multibytep = coding->src_multibyte;
4762 int consumed_chars = 0;
4763 int found = 0;
b73bfc1c 4764 int c;
fa42c37f 4765
ff0dacd7 4766 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4767 /* A coding system of this category is always ASCII compatible. */
4768 src += coding->head_ascii;
fa42c37f 4769
b73bfc1c 4770 while (1)
fa42c37f 4771 {
065e3595 4772 src_base = src;
df7492f9
KH
4773 ONE_MORE_BYTE (c);
4774 if (c < 0x80)
fa42c37f 4775 continue;
df7492f9 4776 if (c >= 0xA1)
fa42c37f 4777 {
df7492f9
KH
4778 ONE_MORE_BYTE (c);
4779 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4780 return 0;
ff0dacd7 4781 found = CATEGORY_MASK_BIG5;
fa42c37f 4782 }
df7492f9
KH
4783 else
4784 break;
fa42c37f 4785 }
ff0dacd7 4786 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4787 return 0;
fa42c37f 4788
df7492f9 4789 no_more_source:
065e3595 4790 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4791 {
ff0dacd7 4792 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4793 return 0;
4794 }
ff0dacd7
KH
4795 detect_info->found |= found;
4796 return 1;
fa42c37f
KH
4797}
4798
4ed46869
KH
4799/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4800 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4801
b73bfc1c 4802static void
df7492f9 4803decode_coding_sjis (coding)
4ed46869 4804 struct coding_system *coding;
4ed46869 4805{
8f924df7
KH
4806 const unsigned char *src = coding->source + coding->consumed;
4807 const unsigned char *src_end = coding->source + coding->src_bytes;
4808 const unsigned char *src_base;
69a80ea3 4809 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4810 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4811 the end. */
69a80ea3 4812 int *charbuf_end
df80c7f0 4813 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4814 int consumed_chars = 0, consumed_chars_base;
4815 int multibytep = coding->src_multibyte;
4816 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4817 struct charset *charset_kanji2;
24a73b0a 4818 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4819 int char_offset = coding->produced_char;
4820 int last_offset = char_offset;
4821 int last_id = charset_ascii;
0a9564cb
EZ
4822 int eol_crlf =
4823 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4824 int byte_after_cr = -1;
a5d301df 4825
24a73b0a 4826 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4827
4828 val = charset_list;
4829 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4830 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4831 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4832 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4833
b73bfc1c 4834 while (1)
4ed46869 4835 {
df7492f9 4836 int c, c1;
24a73b0a 4837 struct charset *charset;
fa42c37f 4838
b73bfc1c 4839 src_base = src;
df7492f9 4840 consumed_chars_base = consumed_chars;
fa42c37f 4841
df7492f9 4842 if (charbuf >= charbuf_end)
b71f6f73
KH
4843 {
4844 if (byte_after_cr >= 0)
4845 src_base--;
4846 break;
4847 }
df7492f9 4848
119852e7
KH
4849 if (byte_after_cr >= 0)
4850 c = byte_after_cr, byte_after_cr = -1;
4851 else
4852 ONE_MORE_BYTE (c);
065e3595
KH
4853 if (c < 0)
4854 goto invalid_code;
24a73b0a 4855 if (c < 0x80)
119852e7
KH
4856 {
4857 if (eol_crlf && c == '\r')
4858 ONE_MORE_BYTE (byte_after_cr);
4859 charset = charset_roman;
4860 }
57a47f8a 4861 else if (c == 0x80 || c == 0xA0)
8e921c4b 4862 goto invalid_code;
57a47f8a
KH
4863 else if (c >= 0xA1 && c <= 0xDF)
4864 {
4865 /* SJIS -> JISX0201-Kana */
4866 c &= 0x7F;
4867 charset = charset_kana;
4868 }
4869 else if (c <= 0xEF)
df7492f9 4870 {
57a47f8a
KH
4871 /* SJIS -> JISX0208 */
4872 ONE_MORE_BYTE (c1);
4873 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4874 goto invalid_code;
57a47f8a
KH
4875 c = (c << 8) | c1;
4876 SJIS_TO_JIS (c);
4877 charset = charset_kanji;
4878 }
4879 else if (c <= 0xFC && charset_kanji2)
4880 {
c6876370 4881 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4882 ONE_MORE_BYTE (c1);
4883 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4884 goto invalid_code;
57a47f8a
KH
4885 c = (c << 8) | c1;
4886 SJIS_TO_JIS2 (c);
4887 charset = charset_kanji2;
df7492f9 4888 }
57a47f8a
KH
4889 else
4890 goto invalid_code;
24a73b0a
KH
4891 if (charset->id != charset_ascii
4892 && last_id != charset->id)
4893 {
4894 if (last_id != charset_ascii)
69a80ea3 4895 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4896 last_id = charset->id;
4897 last_offset = char_offset;
4898 }
4899 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4900 *charbuf++ = c;
ff0dacd7 4901 char_offset++;
df7492f9 4902 continue;
b73bfc1c 4903
df7492f9
KH
4904 invalid_code:
4905 src = src_base;
4906 consumed_chars = consumed_chars_base;
4907 ONE_MORE_BYTE (c);
065e3595 4908 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4909 char_offset++;
df7492f9
KH
4910 coding->errors++;
4911 }
fa42c37f 4912
df7492f9 4913 no_more_source:
ff0dacd7 4914 if (last_id != charset_ascii)
69a80ea3 4915 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4916 coding->consumed_char += consumed_chars_base;
4917 coding->consumed = src_base - coding->source;
4918 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4919}
4920
b73bfc1c 4921static void
df7492f9 4922decode_coding_big5 (coding)
4ed46869 4923 struct coding_system *coding;
4ed46869 4924{
8f924df7
KH
4925 const unsigned char *src = coding->source + coding->consumed;
4926 const unsigned char *src_end = coding->source + coding->src_bytes;
4927 const unsigned char *src_base;
69a80ea3 4928 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4929 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4930 the end. */
69a80ea3 4931 int *charbuf_end
df80c7f0 4932 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4933 int consumed_chars = 0, consumed_chars_base;
4934 int multibytep = coding->src_multibyte;
4935 struct charset *charset_roman, *charset_big5;
24a73b0a 4936 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4937 int char_offset = coding->produced_char;
4938 int last_offset = char_offset;
4939 int last_id = charset_ascii;
0a9564cb
EZ
4940 int eol_crlf =
4941 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4942 int byte_after_cr = -1;
df7492f9 4943
24a73b0a 4944 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4945 val = charset_list;
4946 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4947 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4948
b73bfc1c 4949 while (1)
4ed46869 4950 {
df7492f9 4951 int c, c1;
24a73b0a 4952 struct charset *charset;
b73bfc1c
KH
4953
4954 src_base = src;
df7492f9
KH
4955 consumed_chars_base = consumed_chars;
4956
4957 if (charbuf >= charbuf_end)
b71f6f73
KH
4958 {
4959 if (byte_after_cr >= 0)
4960 src_base--;
4961 break;
4962 }
df7492f9 4963
119852e7 4964 if (byte_after_cr >= 0)
14daee73 4965 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4966 else
4967 ONE_MORE_BYTE (c);
b73bfc1c 4968
065e3595
KH
4969 if (c < 0)
4970 goto invalid_code;
24a73b0a 4971 if (c < 0x80)
119852e7 4972 {
14daee73 4973 if (eol_crlf && c == '\r')
119852e7
KH
4974 ONE_MORE_BYTE (byte_after_cr);
4975 charset = charset_roman;
4976 }
24a73b0a 4977 else
4ed46869 4978 {
24a73b0a
KH
4979 /* BIG5 -> Big5 */
4980 if (c < 0xA1 || c > 0xFE)
4981 goto invalid_code;
4982 ONE_MORE_BYTE (c1);
4983 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4984 goto invalid_code;
4985 c = c << 8 | c1;
4986 charset = charset_big5;
4ed46869 4987 }
24a73b0a
KH
4988 if (charset->id != charset_ascii
4989 && last_id != charset->id)
df7492f9 4990 {
24a73b0a 4991 if (last_id != charset_ascii)
69a80ea3 4992 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4993 last_id = charset->id;
4994 last_offset = char_offset;
4ed46869 4995 }
24a73b0a 4996 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4997 *charbuf++ = c;
ff0dacd7 4998 char_offset++;
fb88bf2d
KH
4999 continue;
5000
df7492f9 5001 invalid_code:
4ed46869 5002 src = src_base;
df7492f9
KH
5003 consumed_chars = consumed_chars_base;
5004 ONE_MORE_BYTE (c);
065e3595 5005 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 5006 char_offset++;
df7492f9 5007 coding->errors++;
fb88bf2d 5008 }
d46c5b12 5009
df7492f9 5010 no_more_source:
ff0dacd7 5011 if (last_id != charset_ascii)
69a80ea3 5012 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5013 coding->consumed_char += consumed_chars_base;
5014 coding->consumed = src_base - coding->source;
5015 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5016}
5017
5018/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
5019 This function can encode charsets `ascii', `katakana-jisx0201',
5020 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
5021 are sure that all these charsets are registered as official charset
4ed46869
KH
5022 (i.e. do not have extended leading-codes). Characters of other
5023 charsets are produced without any encoding. If SJIS_P is 1, encode
5024 SJIS text, else encode BIG5 text. */
5025
df7492f9
KH
5026static int
5027encode_coding_sjis (coding)
4ed46869 5028 struct coding_system *coding;
4ed46869 5029{
df7492f9
KH
5030 int multibytep = coding->dst_multibyte;
5031 int *charbuf = coding->charbuf;
5032 int *charbuf_end = charbuf + coding->charbuf_used;
5033 unsigned char *dst = coding->destination + coding->produced;
5034 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5035 int safe_room = 4;
5036 int produced_chars = 0;
24a73b0a 5037 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5038 int ascii_compatible;
5039 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 5040 struct charset *charset_kanji2;
df7492f9 5041 int c;
a5d301df 5042
24a73b0a 5043 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5044 val = charset_list;
5045 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5046 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
5047 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5048 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 5049
df7492f9 5050 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 5051
df7492f9
KH
5052 while (charbuf < charbuf_end)
5053 {
5054 ASSURE_DESTINATION (safe_room);
5055 c = *charbuf++;
b73bfc1c 5056 /* Now encode the character C. */
df7492f9
KH
5057 if (ASCII_CHAR_P (c) && ascii_compatible)
5058 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5059 else if (CHAR_BYTE8_P (c))
5060 {
5061 c = CHAR_TO_BYTE8 (c);
5062 EMIT_ONE_BYTE (c);
5063 }
df7492f9 5064 else
b73bfc1c 5065 {
df7492f9
KH
5066 unsigned code;
5067 struct charset *charset = char_charset (c, charset_list, &code);
5068
5069 if (!charset)
4ed46869 5070 {
41cbe562 5071 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5072 {
41cbe562
KH
5073 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5074 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5075 }
41cbe562 5076 else
b73bfc1c 5077 {
41cbe562
KH
5078 c = coding->default_char;
5079 charset = char_charset (c, charset_list, &code);
b73bfc1c 5080 }
b73bfc1c 5081 }
df7492f9
KH
5082 if (code == CHARSET_INVALID_CODE (charset))
5083 abort ();
5084 if (charset == charset_kanji)
5085 {
5086 int c1, c2;
5087 JIS_TO_SJIS (code);
5088 c1 = code >> 8, c2 = code & 0xFF;
5089 EMIT_TWO_BYTES (c1, c2);
5090 }
5091 else if (charset == charset_kana)
5092 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
5093 else if (charset_kanji2 && charset == charset_kanji2)
5094 {
5095 int c1, c2;
5096
5097 c1 = code >> 8;
f07190ca
KH
5098 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5099 || c1 == 0x28
57a47f8a
KH
5100 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5101 {
5102 JIS_TO_SJIS2 (code);
5103 c1 = code >> 8, c2 = code & 0xFF;
5104 EMIT_TWO_BYTES (c1, c2);
5105 }
5106 else
5107 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5108 }
df7492f9
KH
5109 else
5110 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5111 }
5112 }
065e3595 5113 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5114 coding->produced_char += produced_chars;
5115 coding->produced = dst - coding->destination;
5116 return 0;
5117}
5118
5119static int
5120encode_coding_big5 (coding)
5121 struct coding_system *coding;
5122{
5123 int multibytep = coding->dst_multibyte;
5124 int *charbuf = coding->charbuf;
5125 int *charbuf_end = charbuf + coding->charbuf_used;
5126 unsigned char *dst = coding->destination + coding->produced;
5127 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5128 int safe_room = 4;
5129 int produced_chars = 0;
24a73b0a 5130 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5131 int ascii_compatible;
5132 struct charset *charset_roman, *charset_big5;
5133 int c;
5134
24a73b0a 5135 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5136 val = charset_list;
5137 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5138 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5139 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5140
5141 while (charbuf < charbuf_end)
5142 {
5143 ASSURE_DESTINATION (safe_room);
5144 c = *charbuf++;
5145 /* Now encode the character C. */
5146 if (ASCII_CHAR_P (c) && ascii_compatible)
5147 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5148 else if (CHAR_BYTE8_P (c))
5149 {
5150 c = CHAR_TO_BYTE8 (c);
5151 EMIT_ONE_BYTE (c);
b73bfc1c
KH
5152 }
5153 else
5154 {
df7492f9
KH
5155 unsigned code;
5156 struct charset *charset = char_charset (c, charset_list, &code);
5157
5158 if (! charset)
b73bfc1c 5159 {
41cbe562 5160 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5161 {
41cbe562
KH
5162 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5163 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5164 }
41cbe562 5165 else
0eecad43 5166 {
41cbe562
KH
5167 c = coding->default_char;
5168 charset = char_charset (c, charset_list, &code);
0eecad43 5169 }
4ed46869 5170 }
df7492f9
KH
5171 if (code == CHARSET_INVALID_CODE (charset))
5172 abort ();
5173 if (charset == charset_big5)
b73bfc1c 5174 {
df7492f9
KH
5175 int c1, c2;
5176
5177 c1 = code >> 8, c2 = code & 0xFF;
5178 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 5179 }
df7492f9
KH
5180 else
5181 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 5182 }
4ed46869 5183 }
065e3595 5184 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5185 coding->produced_char += produced_chars;
5186 coding->produced = dst - coding->destination;
5187 return 0;
4ed46869
KH
5188}
5189
5190\f
df7492f9 5191/*** 10. CCL handlers ***/
1397dc18
KH
5192
5193/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5194 Check if a text is encoded in a coding system of which
5195 encoder/decoder are written in CCL program. If it is, return
df7492f9 5196 CATEGORY_MASK_CCL, else return 0. */
1397dc18 5197
0a28aafb 5198static int
ff0dacd7 5199detect_coding_ccl (coding, detect_info)
df7492f9 5200 struct coding_system *coding;
ff0dacd7 5201 struct coding_detection_info *detect_info;
1397dc18 5202{
065e3595 5203 const unsigned char *src = coding->source, *src_base;
8f924df7 5204 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5205 int multibytep = coding->src_multibyte;
5206 int consumed_chars = 0;
5207 int found = 0;
0e219d54 5208 unsigned char *valids;
df7492f9
KH
5209 int head_ascii = coding->head_ascii;
5210 Lisp_Object attrs;
5211
ff0dacd7
KH
5212 detect_info->checked |= CATEGORY_MASK_CCL;
5213
df7492f9 5214 coding = &coding_categories[coding_category_ccl];
0e219d54 5215 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
5216 attrs = CODING_ID_ATTRS (coding->id);
5217 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5218 src += head_ascii;
1397dc18 5219
b73bfc1c 5220 while (1)
1397dc18 5221 {
df7492f9 5222 int c;
065e3595
KH
5223
5224 src_base = src;
df7492f9 5225 ONE_MORE_BYTE (c);
065e3595 5226 if (c < 0 || ! valids[c])
df7492f9 5227 break;
ff0dacd7
KH
5228 if ((valids[c] > 1))
5229 found = CATEGORY_MASK_CCL;
df7492f9 5230 }
ff0dacd7 5231 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
5232 return 0;
5233
5234 no_more_source:
ff0dacd7
KH
5235 detect_info->found |= found;
5236 return 1;
df7492f9
KH
5237}
5238
5239static void
5240decode_coding_ccl (coding)
5241 struct coding_system *coding;
5242{
7c78e542 5243 const unsigned char *src = coding->source + coding->consumed;
8f924df7 5244 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
5245 int *charbuf = coding->charbuf + coding->charbuf_used;
5246 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
5247 int consumed_chars = 0;
5248 int multibytep = coding->src_multibyte;
d0396581 5249 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9 5250 int source_charbuf[1024];
fbdc1721 5251 int source_byteidx[1025];
24a73b0a 5252 Lisp_Object attrs, charset_list;
df7492f9 5253
24a73b0a 5254 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5255
d0396581 5256 while (1)
df7492f9 5257 {
7c78e542 5258 const unsigned char *p = src;
df7492f9
KH
5259 int i = 0;
5260
5261 if (multibytep)
fbdc1721
KH
5262 {
5263 while (i < 1024 && p < src_end)
5264 {
5265 source_byteidx[i] = p - src;
5266 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5267 }
5268 source_byteidx[i] = p - src;
5269 }
df7492f9
KH
5270 else
5271 while (i < 1024 && p < src_end)
5272 source_charbuf[i++] = *p++;
8f924df7 5273
df7492f9 5274 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
d0396581
KH
5275 ccl->last_block = 1;
5276 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5277 charset_list);
5278 charbuf += ccl->produced;
fbdc1721 5279 if (multibytep)
d0396581 5280 src += source_byteidx[ccl->consumed];
df7492f9 5281 else
d0396581
KH
5282 src += ccl->consumed;
5283 consumed_chars += ccl->consumed;
5284 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
df7492f9
KH
5285 break;
5286 }
5287
d0396581 5288 switch (ccl->status)
df7492f9
KH
5289 {
5290 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5291 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5292 break;
5293 case CCL_STAT_SUSPEND_BY_DST:
d0396581 5294 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5295 break;
5296 case CCL_STAT_QUIT:
5297 case CCL_STAT_INVALID_CMD:
065e3595 5298 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5299 break;
5300 default:
065e3595 5301 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5302 break;
5303 }
5304 coding->consumed_char += consumed_chars;
5305 coding->consumed = src - coding->source;
5306 coding->charbuf_used = charbuf - coding->charbuf;
5307}
5308
5309static int
5310encode_coding_ccl (coding)
5311 struct coding_system *coding;
5312{
fb608df3 5313 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9
KH
5314 int multibytep = coding->dst_multibyte;
5315 int *charbuf = coding->charbuf;
5316 int *charbuf_end = charbuf + coding->charbuf_used;
5317 unsigned char *dst = coding->destination + coding->produced;
5318 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
5319 int destination_charbuf[1024];
5320 int i, produced_chars = 0;
24a73b0a 5321 Lisp_Object attrs, charset_list;
df7492f9 5322
24a73b0a 5323 CODING_GET_INFO (coding, attrs, charset_list);
fb608df3
KH
5324 if (coding->consumed_char == coding->src_chars
5325 && coding->mode & CODING_MODE_LAST_BLOCK)
5326 ccl->last_block = 1;
df7492f9 5327
8cffd3e7 5328 while (charbuf < charbuf_end)
df7492f9 5329 {
fb608df3 5330 ccl_driver (ccl, charbuf, destination_charbuf,
8cffd3e7 5331 charbuf_end - charbuf, 1024, charset_list);
df7492f9 5332 if (multibytep)
8cffd3e7 5333 {
fb608df3
KH
5334 ASSURE_DESTINATION (ccl->produced * 2);
5335 for (i = 0; i < ccl->produced; i++)
8cffd3e7
KH
5336 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5337 }
df7492f9
KH
5338 else
5339 {
fb608df3
KH
5340 ASSURE_DESTINATION (ccl->produced);
5341 for (i = 0; i < ccl->produced; i++)
df7492f9 5342 *dst++ = destination_charbuf[i] & 0xFF;
fb608df3 5343 produced_chars += ccl->produced;
df7492f9 5344 }
fb608df3
KH
5345 charbuf += ccl->consumed;
5346 if (ccl->status == CCL_STAT_QUIT
5347 || ccl->status == CCL_STAT_INVALID_CMD)
8cffd3e7 5348 break;
df7492f9
KH
5349 }
5350
fb608df3 5351 switch (ccl->status)
df7492f9
KH
5352 {
5353 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5354 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5355 break;
5356 case CCL_STAT_SUSPEND_BY_DST:
065e3595 5357 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5358 break;
5359 case CCL_STAT_QUIT:
5360 case CCL_STAT_INVALID_CMD:
065e3595 5361 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5362 break;
5363 default:
065e3595 5364 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5365 break;
1397dc18 5366 }
df7492f9
KH
5367
5368 coding->produced_char += produced_chars;
5369 coding->produced = dst - coding->destination;
5370 return 0;
1397dc18
KH
5371}
5372
df7492f9 5373
1397dc18 5374\f
df7492f9 5375/*** 10, 11. no-conversion handlers ***/
4ed46869 5376
b73bfc1c 5377/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 5378
b73bfc1c 5379static void
df7492f9 5380decode_coding_raw_text (coding)
4ed46869 5381 struct coding_system *coding;
4ed46869 5382{
0a9564cb
EZ
5383 int eol_crlf =
5384 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5385
df7492f9 5386 coding->chars_at_source = 1;
119852e7
KH
5387 coding->consumed_char = coding->src_chars;
5388 coding->consumed = coding->src_bytes;
5389 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5390 {
5391 coding->consumed_char--;
5392 coding->consumed--;
5393 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5394 }
5395 else
5396 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5397}
4ed46869 5398
df7492f9
KH
5399static int
5400encode_coding_raw_text (coding)
5401 struct coding_system *coding;
5402{
5403 int multibytep = coding->dst_multibyte;
5404 int *charbuf = coding->charbuf;
5405 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5406 unsigned char *dst = coding->destination + coding->produced;
5407 unsigned char *dst_end = coding->destination + coding->dst_bytes;
a0ed9b27 5408 int produced_chars = 0;
b73bfc1c
KH
5409 int c;
5410
df7492f9 5411 if (multibytep)
b73bfc1c 5412 {
df7492f9 5413 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5414
df7492f9
KH
5415 if (coding->src_multibyte)
5416 while (charbuf < charbuf_end)
5417 {
5418 ASSURE_DESTINATION (safe_room);
5419 c = *charbuf++;
5420 if (ASCII_CHAR_P (c))
5421 EMIT_ONE_ASCII_BYTE (c);
5422 else if (CHAR_BYTE8_P (c))
5423 {
5424 c = CHAR_TO_BYTE8 (c);
5425 EMIT_ONE_BYTE (c);
5426 }
5427 else
5428 {
5429 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5430
df7492f9
KH
5431 CHAR_STRING_ADVANCE (c, p1);
5432 while (p0 < p1)
9d123124
KH
5433 {
5434 EMIT_ONE_BYTE (*p0);
5435 p0++;
5436 }
df7492f9
KH
5437 }
5438 }
b73bfc1c 5439 else
df7492f9
KH
5440 while (charbuf < charbuf_end)
5441 {
5442 ASSURE_DESTINATION (safe_room);
5443 c = *charbuf++;
5444 EMIT_ONE_BYTE (c);
5445 }
5446 }
5447 else
4ed46869 5448 {
df7492f9 5449 if (coding->src_multibyte)
d46c5b12 5450 {
df7492f9
KH
5451 int safe_room = MAX_MULTIBYTE_LENGTH;
5452
5453 while (charbuf < charbuf_end)
d46c5b12 5454 {
df7492f9
KH
5455 ASSURE_DESTINATION (safe_room);
5456 c = *charbuf++;
5457 if (ASCII_CHAR_P (c))
5458 *dst++ = c;
5459 else if (CHAR_BYTE8_P (c))
5460 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5461 else
df7492f9 5462 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5463 }
5464 }
df7492f9
KH
5465 else
5466 {
5467 ASSURE_DESTINATION (charbuf_end - charbuf);
5468 while (charbuf < charbuf_end && dst < dst_end)
5469 *dst++ = *charbuf++;
8f924df7 5470 }
319a3947 5471 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5472 }
065e3595 5473 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5474 coding->produced_char += produced_chars;
df7492f9
KH
5475 coding->produced = dst - coding->destination;
5476 return 0;
4ed46869
KH
5477}
5478
ff0dacd7
KH
5479/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5480 Check if a text is encoded in a charset-based coding system. If it
5481 is, return 1, else return 0. */
5482
0a28aafb 5483static int
ff0dacd7 5484detect_coding_charset (coding, detect_info)
df7492f9 5485 struct coding_system *coding;
ff0dacd7 5486 struct coding_detection_info *detect_info;
1397dc18 5487{
065e3595 5488 const unsigned char *src = coding->source, *src_base;
8f924df7 5489 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5490 int multibytep = coding->src_multibyte;
5491 int consumed_chars = 0;
07295713 5492 Lisp_Object attrs, valids, name;
584948ac 5493 int found = 0;
716b3fa0 5494 int head_ascii = coding->head_ascii;
07295713 5495 int check_latin_extra = 0;
1397dc18 5496
ff0dacd7
KH
5497 detect_info->checked |= CATEGORY_MASK_CHARSET;
5498
df7492f9
KH
5499 coding = &coding_categories[coding_category_charset];
5500 attrs = CODING_ID_ATTRS (coding->id);
5501 valids = AREF (attrs, coding_attr_charset_valids);
07295713 5502 name = CODING_ID_NAME (coding->id);
237aabf4
JR
5503 if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5504 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5505 || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5506 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
07295713 5507 check_latin_extra = 1;
237aabf4 5508
df7492f9 5509 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5510 src += head_ascii;
1397dc18 5511
b73bfc1c 5512 while (1)
1397dc18 5513 {
df7492f9 5514 int c;
716b3fa0
KH
5515 Lisp_Object val;
5516 struct charset *charset;
5517 int dim, idx;
1397dc18 5518
065e3595 5519 src_base = src;
df7492f9 5520 ONE_MORE_BYTE (c);
065e3595
KH
5521 if (c < 0)
5522 continue;
716b3fa0
KH
5523 val = AREF (valids, c);
5524 if (NILP (val))
df7492f9 5525 break;
584948ac 5526 if (c >= 0x80)
07295713
KH
5527 {
5528 if (c < 0xA0
237aabf4
JR
5529 && check_latin_extra
5530 && (!VECTORP (Vlatin_extra_code_table)
9f0526cb 5531 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
07295713
KH
5532 break;
5533 found = CATEGORY_MASK_CHARSET;
5534 }
716b3fa0
KH
5535 if (INTEGERP (val))
5536 {
5537 charset = CHARSET_FROM_ID (XFASTINT (val));
5538 dim = CHARSET_DIMENSION (charset);
5539 for (idx = 1; idx < dim; idx++)
5540 {
5541 if (src == src_end)
5542 goto too_short;
5543 ONE_MORE_BYTE (c);
3ed051d4 5544 if (c < charset->code_space[(dim - 1 - idx) * 2]
716b3fa0
KH
5545 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5546 break;
5547 }
5548 if (idx < dim)
5549 break;
5550 }
5551 else
5552 {
5553 idx = 1;
5554 for (; CONSP (val); val = XCDR (val))
5555 {
5556 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5557 dim = CHARSET_DIMENSION (charset);
5558 while (idx < dim)
5559 {
5560 if (src == src_end)
5561 goto too_short;
5562 ONE_MORE_BYTE (c);
5563 if (c < charset->code_space[(dim - 1 - idx) * 4]
5564 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5565 break;
5566 idx++;
5567 }
5568 if (idx == dim)
5569 {
5570 val = Qnil;
5571 break;
5572 }
5573 }
5574 if (CONSP (val))
5575 break;
5576 }
df7492f9 5577 }
716b3fa0 5578 too_short:
ff0dacd7 5579 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5580 return 0;
4ed46869 5581
df7492f9 5582 no_more_source:
ff0dacd7
KH
5583 detect_info->found |= found;
5584 return 1;
df7492f9 5585}
b73bfc1c 5586
b73bfc1c 5587static void
df7492f9 5588decode_coding_charset (coding)
4ed46869 5589 struct coding_system *coding;
4ed46869 5590{
8f924df7
KH
5591 const unsigned char *src = coding->source + coding->consumed;
5592 const unsigned char *src_end = coding->source + coding->src_bytes;
5593 const unsigned char *src_base;
69a80ea3 5594 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 5595 /* We may produce one charset annotation in one loop and one more at
df80c7f0 5596 the end. */
69a80ea3 5597 int *charbuf_end
df80c7f0 5598 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
5599 int consumed_chars = 0, consumed_chars_base;
5600 int multibytep = coding->src_multibyte;
24a73b0a 5601 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5602 int char_offset = coding->produced_char;
5603 int last_offset = char_offset;
5604 int last_id = charset_ascii;
0a9564cb
EZ
5605 int eol_crlf =
5606 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5607 int byte_after_cr = -1;
df7492f9 5608
24a73b0a 5609 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5610 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5611
df7492f9 5612 while (1)
4ed46869 5613 {
4eb6d3f1 5614 int c;
24a73b0a
KH
5615 Lisp_Object val;
5616 struct charset *charset;
5617 int dim;
5618 int len = 1;
5619 unsigned code;
df7492f9
KH
5620
5621 src_base = src;
5622 consumed_chars_base = consumed_chars;
b73bfc1c 5623
df7492f9 5624 if (charbuf >= charbuf_end)
b71f6f73
KH
5625 {
5626 if (byte_after_cr >= 0)
5627 src_base--;
5628 break;
5629 }
df7492f9 5630
119852e7
KH
5631 if (byte_after_cr >= 0)
5632 {
5633 c = byte_after_cr;
5634 byte_after_cr = -1;
5635 }
5636 else
5637 {
5638 ONE_MORE_BYTE (c);
5639 if (eol_crlf && c == '\r')
5640 ONE_MORE_BYTE (byte_after_cr);
5641 }
065e3595
KH
5642 if (c < 0)
5643 goto invalid_code;
24a73b0a
KH
5644 code = c;
5645
5646 val = AREF (valids, c);
1b17adfd 5647 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5648 goto invalid_code;
5649 if (INTEGERP (val))
d46c5b12 5650 {
24a73b0a
KH
5651 charset = CHARSET_FROM_ID (XFASTINT (val));
5652 dim = CHARSET_DIMENSION (charset);
5653 while (len < dim)
b73bfc1c 5654 {
24a73b0a
KH
5655 ONE_MORE_BYTE (c);
5656 code = (code << 8) | c;
5657 len++;
b73bfc1c 5658 }
24a73b0a
KH
5659 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5660 charset, code, c);
d46c5b12 5661 }
df7492f9 5662 else
d46c5b12 5663 {
24a73b0a
KH
5664 /* VAL is a list of charset IDs. It is assured that the
5665 list is sorted by charset dimensions (smaller one
5666 comes first). */
5667 while (CONSP (val))
4eb6d3f1 5668 {
24a73b0a 5669 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5670 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5671 while (len < dim)
4eb6d3f1 5672 {
acb2a965
KH
5673 ONE_MORE_BYTE (c);
5674 code = (code << 8) | c;
f9d71dcd 5675 len++;
4eb6d3f1 5676 }
24a73b0a
KH
5677 CODING_DECODE_CHAR (coding, src, src_base,
5678 src_end, charset, code, c);
5679 if (c >= 0)
5680 break;
5681 val = XCDR (val);
ff0dacd7 5682 }
d46c5b12 5683 }
24a73b0a
KH
5684 if (c < 0)
5685 goto invalid_code;
5686 if (charset->id != charset_ascii
5687 && last_id != charset->id)
5688 {
5689 if (last_id != charset_ascii)
69a80ea3 5690 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5691 last_id = charset->id;
5692 last_offset = char_offset;
5693 }
5694
df7492f9 5695 *charbuf++ = c;
ff0dacd7 5696 char_offset++;
df7492f9
KH
5697 continue;
5698
5699 invalid_code:
5700 src = src_base;
5701 consumed_chars = consumed_chars_base;
5702 ONE_MORE_BYTE (c);
065e3595 5703 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5704 char_offset++;
df7492f9 5705 coding->errors++;
4ed46869
KH
5706 }
5707
df7492f9 5708 no_more_source:
ff0dacd7 5709 if (last_id != charset_ascii)
69a80ea3 5710 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5711 coding->consumed_char += consumed_chars_base;
5712 coding->consumed = src_base - coding->source;
5713 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5714}
5715
df7492f9
KH
5716static int
5717encode_coding_charset (coding)
4ed46869 5718 struct coding_system *coding;
4ed46869 5719{
df7492f9
KH
5720 int multibytep = coding->dst_multibyte;
5721 int *charbuf = coding->charbuf;
5722 int *charbuf_end = charbuf + coding->charbuf_used;
5723 unsigned char *dst = coding->destination + coding->produced;
5724 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5725 int safe_room = MAX_MULTIBYTE_LENGTH;
5726 int produced_chars = 0;
24a73b0a 5727 Lisp_Object attrs, charset_list;
df7492f9 5728 int ascii_compatible;
b73bfc1c 5729 int c;
b73bfc1c 5730
24a73b0a 5731 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5732 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5733
df7492f9 5734 while (charbuf < charbuf_end)
4ed46869 5735 {
4eb6d3f1 5736 struct charset *charset;
df7492f9 5737 unsigned code;
8f924df7 5738
df7492f9
KH
5739 ASSURE_DESTINATION (safe_room);
5740 c = *charbuf++;
5741 if (ascii_compatible && ASCII_CHAR_P (c))
5742 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5743 else if (CHAR_BYTE8_P (c))
4ed46869 5744 {
16eafb5d
KH
5745 c = CHAR_TO_BYTE8 (c);
5746 EMIT_ONE_BYTE (c);
d46c5b12 5747 }
d46c5b12 5748 else
b73bfc1c 5749 {
4eb6d3f1
KH
5750 charset = char_charset (c, charset_list, &code);
5751 if (charset)
5752 {
5753 if (CHARSET_DIMENSION (charset) == 1)
5754 EMIT_ONE_BYTE (code);
5755 else if (CHARSET_DIMENSION (charset) == 2)
5756 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5757 else if (CHARSET_DIMENSION (charset) == 3)
5758 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5759 else
5760 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5761 (code >> 8) & 0xFF, code & 0xFF);
5762 }
5763 else
41cbe562
KH
5764 {
5765 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5766 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5767 else
5768 c = coding->default_char;
5769 EMIT_ONE_BYTE (c);
5770 }
4ed46869 5771 }
4ed46869
KH
5772 }
5773
065e3595 5774 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5775 coding->produced_char += produced_chars;
5776 coding->produced = dst - coding->destination;
5777 return 0;
4ed46869
KH
5778}
5779
5780\f
1397dc18 5781/*** 7. C library functions ***/
4ed46869 5782
df7492f9
KH
5783/* Setup coding context CODING from information about CODING_SYSTEM.
5784 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5785 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5786
ec6d2bb8 5787void
e0e989f6
KH
5788setup_coding_system (coding_system, coding)
5789 Lisp_Object coding_system;
4ed46869
KH
5790 struct coding_system *coding;
5791{
df7492f9
KH
5792 Lisp_Object attrs;
5793 Lisp_Object eol_type;
5794 Lisp_Object coding_type;
4608c386 5795 Lisp_Object val;
4ed46869 5796
df7492f9 5797 if (NILP (coding_system))
ae6f73fa 5798 coding_system = Qundecided;
c07c8e12 5799
df7492f9 5800 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5801
df7492f9 5802 attrs = CODING_ID_ATTRS (coding->id);
0a9564cb 5803 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5804
df7492f9
KH
5805 coding->mode = 0;
5806 coding->head_ascii = -1;
4a015c45
KH
5807 if (VECTORP (eol_type))
5808 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5809 | CODING_REQUIRE_DETECTION_MASK);
5810 else if (! EQ (eol_type, Qunix))
5811 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5812 | CODING_REQUIRE_ENCODING_MASK);
5813 else
5814 coding->common_flags = 0;
5e5c78be
KH
5815 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5816 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5817 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5818 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5819 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5820 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5821
df7492f9 5822 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5823 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5824 coding->safe_charsets = SDATA (val);
df7492f9 5825 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
624bda09 5826 coding->carryover_bytes = 0;
4608c386 5827
df7492f9
KH
5828 coding_type = CODING_ATTR_TYPE (attrs);
5829 if (EQ (coding_type, Qundecided))
d46c5b12 5830 {
df7492f9
KH
5831 coding->detector = NULL;
5832 coding->decoder = decode_coding_raw_text;
5833 coding->encoder = encode_coding_raw_text;
5834 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5835 }
df7492f9 5836 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5837 {
df7492f9
KH
5838 int i;
5839 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5840
5841 /* Invoke graphic register 0 to plane 0. */
5842 CODING_ISO_INVOCATION (coding, 0) = 0;
5843 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5844 CODING_ISO_INVOCATION (coding, 1)
5845 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5846 /* Setup the initial status of designation. */
5847 for (i = 0; i < 4; i++)
5848 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5849 /* Not single shifting initially. */
5850 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5851 /* Beginning of buffer should also be regarded as bol. */
5852 CODING_ISO_BOL (coding) = 1;
5853 coding->detector = detect_coding_iso_2022;
5854 coding->decoder = decode_coding_iso_2022;
5855 coding->encoder = encode_coding_iso_2022;
5856 if (flags & CODING_ISO_FLAG_SAFE)
5857 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5858 coding->common_flags
df7492f9
KH
5859 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5860 | CODING_REQUIRE_FLUSHING_MASK);
5861 if (flags & CODING_ISO_FLAG_COMPOSITION)
5862 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5863 if (flags & CODING_ISO_FLAG_DESIGNATION)
5864 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5865 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5866 {
5867 setup_iso_safe_charsets (attrs);
5868 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5869 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5870 coding->safe_charsets = SDATA (val);
df7492f9
KH
5871 }
5872 CODING_ISO_FLAGS (coding) = flags;
e951386e
KH
5873 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5874 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5875 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5876 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
d46c5b12 5877 }
df7492f9 5878 else if (EQ (coding_type, Qcharset))
d46c5b12 5879 {
df7492f9
KH
5880 coding->detector = detect_coding_charset;
5881 coding->decoder = decode_coding_charset;
5882 coding->encoder = encode_coding_charset;
d46c5b12 5883 coding->common_flags
df7492f9 5884 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5885 }
df7492f9 5886 else if (EQ (coding_type, Qutf_8))
d46c5b12 5887 {
a470d443
KH
5888 val = AREF (attrs, coding_attr_utf_bom);
5889 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5890 : EQ (val, Qt) ? utf_with_bom
5891 : utf_without_bom);
df7492f9
KH
5892 coding->detector = detect_coding_utf_8;
5893 coding->decoder = decode_coding_utf_8;
5894 coding->encoder = encode_coding_utf_8;
5895 coding->common_flags
5896 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5897 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5898 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5899 }
5900 else if (EQ (coding_type, Qutf_16))
5901 {
a470d443
KH
5902 val = AREF (attrs, coding_attr_utf_bom);
5903 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5904 : EQ (val, Qt) ? utf_with_bom
5905 : utf_without_bom);
df7492f9 5906 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5907 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5908 : utf_16_little_endian);
e19c3639 5909 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5910 coding->detector = detect_coding_utf_16;
5911 coding->decoder = decode_coding_utf_16;
5912 coding->encoder = encode_coding_utf_16;
5913 coding->common_flags
5914 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5915 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5916 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5917 }
df7492f9 5918 else if (EQ (coding_type, Qccl))
4ed46869 5919 {
df7492f9
KH
5920 coding->detector = detect_coding_ccl;
5921 coding->decoder = decode_coding_ccl;
5922 coding->encoder = encode_coding_ccl;
c952af22 5923 coding->common_flags
df7492f9
KH
5924 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5925 | CODING_REQUIRE_FLUSHING_MASK);
5926 }
5927 else if (EQ (coding_type, Qemacs_mule))
5928 {
5929 coding->detector = detect_coding_emacs_mule;
5930 coding->decoder = decode_coding_emacs_mule;
5931 coding->encoder = encode_coding_emacs_mule;
c952af22 5932 coding->common_flags
df7492f9 5933 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
e951386e 5934 coding->spec.emacs_mule.full_support = 1;
df7492f9
KH
5935 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5936 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5937 {
5938 Lisp_Object tail, safe_charsets;
5939 int max_charset_id = 0;
5940
5941 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5942 tail = XCDR (tail))
5943 if (max_charset_id < XFASTINT (XCAR (tail)))
5944 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5945 safe_charsets = make_uninit_string (max_charset_id + 1);
5946 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5947 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5948 tail = XCDR (tail))
8f924df7 5949 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5950 coding->max_charset_id = max_charset_id;
1b3b981b 5951 coding->safe_charsets = SDATA (safe_charsets);
e951386e 5952 coding->spec.emacs_mule.full_support = 1;
df7492f9 5953 }
e951386e
KH
5954 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5955 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
df7492f9
KH
5956 }
5957 else if (EQ (coding_type, Qshift_jis))
5958 {
5959 coding->detector = detect_coding_sjis;
5960 coding->decoder = decode_coding_sjis;
5961 coding->encoder = encode_coding_sjis;
c952af22 5962 coding->common_flags
df7492f9
KH
5963 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5964 }
5965 else if (EQ (coding_type, Qbig5))
5966 {
5967 coding->detector = detect_coding_big5;
5968 coding->decoder = decode_coding_big5;
5969 coding->encoder = encode_coding_big5;
c952af22 5970 coding->common_flags
df7492f9
KH
5971 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5972 }
5973 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5974 {
df7492f9
KH
5975 coding->detector = NULL;
5976 coding->decoder = decode_coding_raw_text;
5977 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5978 if (! EQ (eol_type, Qunix))
5979 {
5980 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5981 if (! VECTORP (eol_type))
5982 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5983 }
5984
4ed46869 5985 }
4ed46869 5986
df7492f9 5987 return;
4ed46869
KH
5988}
5989
0ff61e78
KH
5990/* Return a list of charsets supported by CODING. */
5991
5992Lisp_Object
5993coding_charset_list (coding)
5994 struct coding_system *coding;
5995{
35befdaa 5996 Lisp_Object attrs, charset_list;
0ff61e78
KH
5997
5998 CODING_GET_INFO (coding, attrs, charset_list);
5999 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6000 {
6001 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6002
6003 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6004 charset_list = Viso_2022_charset_list;
6005 }
6006 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6007 {
6008 charset_list = Vemacs_mule_charset_list;
6009 }
6010 return charset_list;
6011}
6012
6013
e9f91ece
KH
6014/* Return a list of charsets supported by CODING-SYSTEM. */
6015
6016Lisp_Object
6017coding_system_charset_list (coding_system)
6018 Lisp_Object coding_system;
6019{
6020 int id;
6021 Lisp_Object attrs, charset_list;
6022
6023 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6024 attrs = CODING_ID_ATTRS (id);
6025
6026 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6027 {
6028 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6029
6030 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6031 charset_list = Viso_2022_charset_list;
6032 else
6033 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6034 }
6035 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6036 {
6037 charset_list = Vemacs_mule_charset_list;
6038 }
6039 else
6040 {
6041 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6042 }
6043 return charset_list;
6044}
6045
6046
df7492f9
KH
6047/* Return raw-text or one of its subsidiaries that has the same
6048 eol_type as CODING-SYSTEM. */
ec6d2bb8 6049
df7492f9
KH
6050Lisp_Object
6051raw_text_coding_system (coding_system)
6052 Lisp_Object coding_system;
ec6d2bb8 6053{
0be8721c 6054 Lisp_Object spec, attrs;
df7492f9 6055 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 6056
d3e4cb56
KH
6057 if (NILP (coding_system))
6058 return Qraw_text;
df7492f9
KH
6059 spec = CODING_SYSTEM_SPEC (coding_system);
6060 attrs = AREF (spec, 0);
ec6d2bb8 6061
df7492f9
KH
6062 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6063 return coding_system;
ec6d2bb8 6064
df7492f9
KH
6065 eol_type = AREF (spec, 2);
6066 if (VECTORP (eol_type))
6067 return Qraw_text;
6068 spec = CODING_SYSTEM_SPEC (Qraw_text);
6069 raw_text_eol_type = AREF (spec, 2);
6070 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6071 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6072 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
6073}
6074
54f78171 6075
1911a33b
KH
6076/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
6077 the subsidiary that has the same eol-spec as PARENT (if it is not
6078 nil and specifies end-of-line format) or the system's setting
fcbcfb64 6079 (system_eol_type). */
df7492f9
KH
6080
6081Lisp_Object
6082coding_inherit_eol_type (coding_system, parent)
b74e4686 6083 Lisp_Object coding_system, parent;
54f78171 6084{
3e139625 6085 Lisp_Object spec, eol_type;
54f78171 6086
d3e4cb56
KH
6087 if (NILP (coding_system))
6088 coding_system = Qraw_text;
df7492f9 6089 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 6090 eol_type = AREF (spec, 2);
fcbcfb64 6091 if (VECTORP (eol_type))
df7492f9 6092 {
df7492f9
KH
6093 Lisp_Object parent_eol_type;
6094
fcbcfb64
KH
6095 if (! NILP (parent))
6096 {
6097 Lisp_Object parent_spec;
6098
4a015c45 6099 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64 6100 parent_eol_type = AREF (parent_spec, 2);
1911a33b
KH
6101 if (VECTORP (parent_eol_type))
6102 parent_eol_type = system_eol_type;
fcbcfb64
KH
6103 }
6104 else
6105 parent_eol_type = system_eol_type;
df7492f9
KH
6106 if (EQ (parent_eol_type, Qunix))
6107 coding_system = AREF (eol_type, 0);
6108 else if (EQ (parent_eol_type, Qdos))
6109 coding_system = AREF (eol_type, 1);
6110 else if (EQ (parent_eol_type, Qmac))
6111 coding_system = AREF (eol_type, 2);
54f78171 6112 }
df7492f9 6113 return coding_system;
54f78171
KH
6114}
6115
fcaf8878
KH
6116
6117/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6118 decided for writing to a process. If not, complement them, and
6119 return a new coding system. */
6120
6121Lisp_Object
6122complement_process_encoding_system (coding_system)
6123 Lisp_Object coding_system;
6124{
5886ec9c
KH
6125 Lisp_Object coding_base = Qnil, eol_base = Qnil;
6126 Lisp_Object spec, attrs;
93d50df8 6127 int i;
fcaf8878 6128
93d50df8 6129 for (i = 0; i < 3; i++)
fcaf8878 6130 {
93d50df8
KH
6131 if (i == 1)
6132 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6133 else if (i == 2)
6134 coding_system = preferred_coding_system ();
6135 spec = CODING_SYSTEM_SPEC (coding_system);
6136 if (NILP (spec))
6137 continue;
6138 attrs = AREF (spec, 0);
6139 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6140 coding_base = CODING_ATTR_BASE_NAME (attrs);
6141 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6142 eol_base = coding_system;
6143 if (! NILP (coding_base) && ! NILP (eol_base))
6144 break;
fcaf8878 6145 }
fcaf8878 6146
93d50df8
KH
6147 if (i > 0)
6148 /* The original CODING_SYSTEM didn't specify text-conversion or
6149 eol-conversion. Be sure that we return a fully complemented
6150 coding system. */
6151 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6152 return coding_system;
fcaf8878
KH
6153}
6154
6155
4ed46869
KH
6156/* Emacs has a mechanism to automatically detect a coding system if it
6157 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6158 it's impossible to distinguish some coding systems accurately
6159 because they use the same range of codes. So, at first, coding
6160 systems are categorized into 7, those are:
6161
0ef69138 6162 o coding-category-emacs-mule
4ed46869
KH
6163
6164 The category for a coding system which has the same code range
6165 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 6166 symbol) `emacs-mule' by default.
4ed46869
KH
6167
6168 o coding-category-sjis
6169
6170 The category for a coding system which has the same code range
6171 as SJIS. Assigned the coding-system (Lisp
7717c392 6172 symbol) `japanese-shift-jis' by default.
4ed46869
KH
6173
6174 o coding-category-iso-7
6175
6176 The category for a coding system which has the same code range
7717c392 6177 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
6178 shift and single shift functions. This can encode/decode all
6179 charsets. Assigned the coding-system (Lisp symbol)
6180 `iso-2022-7bit' by default.
6181
6182 o coding-category-iso-7-tight
6183
6184 Same as coding-category-iso-7 except that this can
6185 encode/decode only the specified charsets.
4ed46869
KH
6186
6187 o coding-category-iso-8-1
6188
6189 The category for a coding system which has the same code range
6190 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6191 for DIMENSION1 charset. This doesn't use any locking shift
6192 and single shift functions. Assigned the coding-system (Lisp
6193 symbol) `iso-latin-1' by default.
4ed46869
KH
6194
6195 o coding-category-iso-8-2
6196
6197 The category for a coding system which has the same code range
6198 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6199 for DIMENSION2 charset. This doesn't use any locking shift
6200 and single shift functions. Assigned the coding-system (Lisp
6201 symbol) `japanese-iso-8bit' by default.
4ed46869 6202
7717c392 6203 o coding-category-iso-7-else
4ed46869
KH
6204
6205 The category for a coding system which has the same code range
ad1746f5 6206 as ISO2022 of 7-bit environment but uses locking shift or
7717c392
KH
6207 single shift functions. Assigned the coding-system (Lisp
6208 symbol) `iso-2022-7bit-lock' by default.
6209
6210 o coding-category-iso-8-else
6211
6212 The category for a coding system which has the same code range
ad1746f5 6213 as ISO2022 of 8-bit environment but uses locking shift or
7717c392
KH
6214 single shift functions. Assigned the coding-system (Lisp
6215 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
6216
6217 o coding-category-big5
6218
6219 The category for a coding system which has the same code range
6220 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 6221 `cn-big5' by default.
4ed46869 6222
fa42c37f
KH
6223 o coding-category-utf-8
6224
6225 The category for a coding system which has the same code range
6e76ae91 6226 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
6227 symbol) `utf-8' by default.
6228
6229 o coding-category-utf-16-be
6230
6231 The category for a coding system in which a text has an
6232 Unicode signature (cf. Unicode Standard) in the order of BIG
6233 endian at the head. Assigned the coding-system (Lisp symbol)
6234 `utf-16-be' by default.
6235
6236 o coding-category-utf-16-le
6237
6238 The category for a coding system in which a text has an
6239 Unicode signature (cf. Unicode Standard) in the order of
6240 LITTLE endian at the head. Assigned the coding-system (Lisp
6241 symbol) `utf-16-le' by default.
6242
1397dc18
KH
6243 o coding-category-ccl
6244
6245 The category for a coding system of which encoder/decoder is
6246 written in CCL programs. The default value is nil, i.e., no
6247 coding system is assigned.
6248
4ed46869
KH
6249 o coding-category-binary
6250
6251 The category for a coding system not categorized in any of the
6252 above. Assigned the coding-system (Lisp symbol)
e0e989f6 6253 `no-conversion' by default.
4ed46869
KH
6254
6255 Each of them is a Lisp symbol and the value is an actual
df7492f9 6256 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
6257 What Emacs does actually is to detect a category of coding system.
6258 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 6259 decide only one possible category, it selects a category of the
4ed46869
KH
6260 highest priority. Priorities of categories are also specified by a
6261 user in a Lisp variable `coding-category-list'.
6262
6263*/
6264
df7492f9
KH
6265#define EOL_SEEN_NONE 0
6266#define EOL_SEEN_LF 1
6267#define EOL_SEEN_CR 2
6268#define EOL_SEEN_CRLF 4
66cfb530 6269
ff0dacd7
KH
6270/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6271 SOURCE is encoded. If CATEGORY is one of
6272 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6273 two-byte, else they are encoded by one-byte.
6274
6275 Return one of EOL_SEEN_XXX. */
4ed46869 6276
bc4bc72a 6277#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
6278
6279static int
89528eb3 6280detect_eol (source, src_bytes, category)
f6cbaf43 6281 const unsigned char *source;
df7492f9 6282 EMACS_INT src_bytes;
89528eb3 6283 enum coding_category category;
4ed46869 6284{
f6cbaf43 6285 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 6286 unsigned char c;
df7492f9
KH
6287 int total = 0;
6288 int eol_seen = EOL_SEEN_NONE;
4ed46869 6289
89528eb3 6290 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 6291 {
df7492f9 6292 int msb, lsb;
fa42c37f 6293
89528eb3
KH
6294 msb = category == (coding_category_utf_16_le
6295 | coding_category_utf_16_le_nosig);
df7492f9 6296 lsb = 1 - msb;
fa42c37f 6297
df7492f9 6298 while (src + 1 < src_end)
fa42c37f 6299 {
df7492f9
KH
6300 c = src[lsb];
6301 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 6302 {
df7492f9
KH
6303 int this_eol;
6304
6305 if (c == '\n')
6306 this_eol = EOL_SEEN_LF;
6307 else if (src + 3 >= src_end
6308 || src[msb + 2] != 0
6309 || src[lsb + 2] != '\n')
6310 this_eol = EOL_SEEN_CR;
fa42c37f 6311 else
75f4f1ac
EZ
6312 {
6313 this_eol = EOL_SEEN_CRLF;
6314 src += 2;
6315 }
df7492f9
KH
6316
6317 if (eol_seen == EOL_SEEN_NONE)
6318 /* This is the first end-of-line. */
6319 eol_seen = this_eol;
6320 else if (eol_seen != this_eol)
fa42c37f 6321 {
75f4f1ac
EZ
6322 /* The found type is different from what found before.
6323 Allow for stray ^M characters in DOS EOL files. */
6324 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6325 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6326 eol_seen = EOL_SEEN_CRLF;
6327 else
6328 {
6329 eol_seen = EOL_SEEN_LF;
6330 break;
6331 }
fa42c37f 6332 }
df7492f9
KH
6333 if (++total == MAX_EOL_CHECK_COUNT)
6334 break;
fa42c37f 6335 }
df7492f9 6336 src += 2;
fa42c37f 6337 }
bcf26d6a 6338 }
d46c5b12 6339 else
c4825358 6340 {
df7492f9 6341 while (src < src_end)
27901516 6342 {
df7492f9
KH
6343 c = *src++;
6344 if (c == '\n' || c == '\r')
6345 {
6346 int this_eol;
d46c5b12 6347
df7492f9
KH
6348 if (c == '\n')
6349 this_eol = EOL_SEEN_LF;
6350 else if (src >= src_end || *src != '\n')
6351 this_eol = EOL_SEEN_CR;
6352 else
6353 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 6354
df7492f9
KH
6355 if (eol_seen == EOL_SEEN_NONE)
6356 /* This is the first end-of-line. */
6357 eol_seen = this_eol;
6358 else if (eol_seen != this_eol)
6359 {
75f4f1ac
EZ
6360 /* The found type is different from what found before.
6361 Allow for stray ^M characters in DOS EOL files. */
6362 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6363 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6364 eol_seen = EOL_SEEN_CRLF;
6365 else
6366 {
6367 eol_seen = EOL_SEEN_LF;
6368 break;
6369 }
df7492f9
KH
6370 }
6371 if (++total == MAX_EOL_CHECK_COUNT)
6372 break;
6373 }
6374 }
73be902c 6375 }
df7492f9 6376 return eol_seen;
73be902c
KH
6377}
6378
df7492f9 6379
24a73b0a 6380static Lisp_Object
df7492f9
KH
6381adjust_coding_eol_type (coding, eol_seen)
6382 struct coding_system *coding;
6383 int eol_seen;
73be902c 6384{
0be8721c 6385 Lisp_Object eol_type;
8f924df7 6386
df7492f9
KH
6387 eol_type = CODING_ID_EOL_TYPE (coding->id);
6388 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
6389 {
6390 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6391 eol_type = Qunix;
6392 }
6f197c07 6393 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
6394 {
6395 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6396 eol_type = Qdos;
6397 }
6f197c07 6398 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
6399 {
6400 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6401 eol_type = Qmac;
6402 }
6403 return eol_type;
d46c5b12 6404}
4ed46869 6405
df7492f9
KH
6406/* Detect how a text specified in CODING is encoded. If a coding
6407 system is detected, update fields of CODING by the detected coding
6408 system. */
0a28aafb 6409
df7492f9
KH
6410void
6411detect_coding (coding)
d46c5b12 6412 struct coding_system *coding;
d46c5b12 6413{
8f924df7 6414 const unsigned char *src, *src_end;
73cce38d 6415 int saved_mode = coding->mode;
d46c5b12 6416
df7492f9
KH
6417 coding->consumed = coding->consumed_char = 0;
6418 coding->produced = coding->produced_char = 0;
6419 coding_set_source (coding);
1c3478b0 6420
df7492f9 6421 src_end = coding->source + coding->src_bytes;
c0e16b14 6422 coding->head_ascii = 0;
1c3478b0 6423
df7492f9
KH
6424 /* If we have not yet decided the text encoding type, detect it
6425 now. */
6426 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 6427 {
df7492f9 6428 int c, i;
6cb21a4f 6429 struct coding_detection_info detect_info;
2f3cbb32 6430 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 6431
6cb21a4f 6432 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 6433 for (src = coding->source; src < src_end; src++)
d46c5b12 6434 {
df7492f9 6435 c = *src;
6cb21a4f 6436 if (c & 0x80)
6cb21a4f 6437 {
2f3cbb32 6438 eight_bit_found = 1;
2f3cbb32
KH
6439 if (null_byte_found)
6440 break;
6441 }
6442 else if (c < 0x20)
6443 {
6444 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6445 && ! inhibit_iso_escape_detection
6446 && ! detect_info.checked)
6cb21a4f 6447 {
2f3cbb32
KH
6448 if (detect_coding_iso_2022 (coding, &detect_info))
6449 {
6450 /* We have scanned the whole data. */
6451 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
6452 {
6453 /* We didn't find an 8-bit code. We may
6454 have found a null-byte, but it's very
6455 rare that a binary file confirm to
6456 ISO-2022. */
6457 src = src_end;
6458 coding->head_ascii = src - coding->source;
6459 }
6460 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6461 break;
6462 }
6463 }
97b1b294 6464 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6465 {
6466 null_byte_found = 1;
6467 if (eight_bit_found)
6468 break;
6cb21a4f 6469 }
c006c0c8
KH
6470 if (! eight_bit_found)
6471 coding->head_ascii++;
6cb21a4f 6472 }
c006c0c8 6473 else if (! eight_bit_found)
c0e16b14 6474 coding->head_ascii++;
d46c5b12 6475 }
df7492f9 6476
2f3cbb32
KH
6477 if (null_byte_found || eight_bit_found
6478 || coding->head_ascii < coding->src_bytes
6cb21a4f 6479 || detect_info.found)
d46c5b12 6480 {
ff0dacd7
KH
6481 enum coding_category category;
6482 struct coding_system *this;
df7492f9 6483
6cb21a4f
KH
6484 if (coding->head_ascii == coding->src_bytes)
6485 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6486 for (i = 0; i < coding_category_raw_text; i++)
6487 {
6488 category = coding_priorities[i];
6489 this = coding_categories + category;
6490 if (detect_info.found & (1 << category))
24a73b0a 6491 break;
6cb21a4f
KH
6492 }
6493 else
2f3cbb32
KH
6494 {
6495 if (null_byte_found)
ff0dacd7 6496 {
2f3cbb32
KH
6497 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6498 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6499 }
2f3cbb32
KH
6500 for (i = 0; i < coding_category_raw_text; i++)
6501 {
6502 category = coding_priorities[i];
6503 this = coding_categories + category;
6504 if (this->id < 0)
6505 {
6506 /* No coding system of this category is defined. */
6507 detect_info.rejected |= (1 << category);
6508 }
6509 else if (category >= coding_category_raw_text)
6510 continue;
6511 else if (detect_info.checked & (1 << category))
6512 {
6513 if (detect_info.found & (1 << category))
6514 break;
6515 }
6516 else if ((*(this->detector)) (coding, &detect_info)
6517 && detect_info.found & (1 << category))
6518 {
6519 if (category == coding_category_utf_16_auto)
6520 {
6521 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6522 category = coding_category_utf_16_le;
6523 else
6524 category = coding_category_utf_16_be;
6525 }
6526 break;
6527 }
6528 }
2f3cbb32 6529 }
c0e16b14
KH
6530
6531 if (i < coding_category_raw_text)
6532 setup_coding_system (CODING_ID_NAME (this->id), coding);
6533 else if (null_byte_found)
6534 setup_coding_system (Qno_conversion, coding);
6535 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6536 == CATEGORY_MASK_ANY)
6537 setup_coding_system (Qraw_text, coding);
6538 else if (detect_info.rejected)
6539 for (i = 0; i < coding_category_raw_text; i++)
6540 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6541 {
6542 this = coding_categories + coding_priorities[i];
6543 setup_coding_system (CODING_ID_NAME (this->id), coding);
6544 break;
6545 }
d46c5b12 6546 }
b73bfc1c 6547 }
a470d443
KH
6548 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6549 == coding_category_utf_8_auto)
6550 {
6551 Lisp_Object coding_systems;
6552 struct coding_detection_info detect_info;
6553
6554 coding_systems
6555 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6556 detect_info.found = detect_info.rejected = 0;
6557 coding->head_ascii = 0;
6558 if (CONSP (coding_systems)
6559 && detect_coding_utf_8 (coding, &detect_info))
6560 {
6561 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6562 setup_coding_system (XCAR (coding_systems), coding);
6563 else
6564 setup_coding_system (XCDR (coding_systems), coding);
6565 }
6566 }
24a73b0a
KH
6567 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6568 == coding_category_utf_16_auto)
b49a1807
KH
6569 {
6570 Lisp_Object coding_systems;
6571 struct coding_detection_info detect_info;
6572
6573 coding_systems
a470d443 6574 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6575 detect_info.found = detect_info.rejected = 0;
a470d443 6576 coding->head_ascii = 0;
b49a1807 6577 if (CONSP (coding_systems)
24a73b0a 6578 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6579 {
6580 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6581 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6582 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6583 setup_coding_system (XCDR (coding_systems), coding);
6584 }
6585 }
73cce38d 6586 coding->mode = saved_mode;
4ed46869 6587}
4ed46869 6588
d46c5b12 6589
aaaf0b1e 6590static void
df7492f9 6591decode_eol (coding)
aaaf0b1e 6592 struct coding_system *coding;
aaaf0b1e 6593{
24a73b0a
KH
6594 Lisp_Object eol_type;
6595 unsigned char *p, *pbeg, *pend;
3ed051d4 6596
24a73b0a 6597 eol_type = CODING_ID_EOL_TYPE (coding->id);
0a9564cb 6598 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
24a73b0a
KH
6599 return;
6600
6601 if (NILP (coding->dst_object))
6602 pbeg = coding->destination;
6603 else
6604 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6605 pend = pbeg + coding->produced;
6606
6607 if (VECTORP (eol_type))
aaaf0b1e 6608 {
df7492f9 6609 int eol_seen = EOL_SEEN_NONE;
4ed46869 6610
24a73b0a 6611 for (p = pbeg; p < pend; p++)
aaaf0b1e 6612 {
df7492f9
KH
6613 if (*p == '\n')
6614 eol_seen |= EOL_SEEN_LF;
6615 else if (*p == '\r')
aaaf0b1e 6616 {
df7492f9 6617 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6618 {
df7492f9
KH
6619 eol_seen |= EOL_SEEN_CRLF;
6620 p++;
aaaf0b1e 6621 }
aaaf0b1e 6622 else
df7492f9 6623 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6624 }
aaaf0b1e 6625 }
75f4f1ac
EZ
6626 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6627 if ((eol_seen & EOL_SEEN_CRLF) != 0
6628 && (eol_seen & EOL_SEEN_CR) != 0
6629 && (eol_seen & EOL_SEEN_LF) == 0)
6630 eol_seen = EOL_SEEN_CRLF;
6631 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6632 && eol_seen != EOL_SEEN_LF
6633 && eol_seen != EOL_SEEN_CRLF
6634 && eol_seen != EOL_SEEN_CR)
6635 eol_seen = EOL_SEEN_LF;
df7492f9 6636 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6637 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6638 }
d46c5b12 6639
24a73b0a 6640 if (EQ (eol_type, Qmac))
27901516 6641 {
24a73b0a 6642 for (p = pbeg; p < pend; p++)
df7492f9
KH
6643 if (*p == '\r')
6644 *p = '\n';
4ed46869 6645 }
24a73b0a 6646 else if (EQ (eol_type, Qdos))
df7492f9 6647 {
24a73b0a 6648 int n = 0;
b73bfc1c 6649
24a73b0a
KH
6650 if (NILP (coding->dst_object))
6651 {
4347441b
KH
6652 /* Start deleting '\r' from the tail to minimize the memory
6653 movement. */
24a73b0a
KH
6654 for (p = pend - 2; p >= pbeg; p--)
6655 if (*p == '\r')
6656 {
6657 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6658 n++;
6659 }
6660 }
6661 else
6662 {
4347441b
KH
6663 int pos_byte = coding->dst_pos_byte;
6664 int pos = coding->dst_pos;
6665 int pos_end = pos + coding->produced_char - 1;
6666
6667 while (pos < pos_end)
6668 {
6669 p = BYTE_POS_ADDR (pos_byte);
6670 if (*p == '\r' && p[1] == '\n')
6671 {
6672 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6673 n++;
6674 pos_end--;
6675 }
6676 pos++;
69b8522d
KH
6677 if (coding->dst_multibyte)
6678 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6679 else
6680 pos_byte++;
4347441b 6681 }
24a73b0a
KH
6682 }
6683 coding->produced -= n;
6684 coding->produced_char -= n;
aaaf0b1e 6685 }
4ed46869
KH
6686}
6687
7d64c6ad 6688
a6f87d34
KH
6689/* Return a translation table (or list of them) from coding system
6690 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6691 decoding (ENCODEP is zero). */
7d64c6ad 6692
e6a54062 6693static Lisp_Object
09ee6fdd
KH
6694get_translation_table (attrs, encodep, max_lookup)
6695 Lisp_Object attrs;
6696 int encodep, *max_lookup;
7d64c6ad
KH
6697{
6698 Lisp_Object standard, translation_table;
09ee6fdd 6699 Lisp_Object val;
7d64c6ad 6700
4bed5909
CY
6701 if (NILP (Venable_character_translation))
6702 {
6703 if (max_lookup)
6704 *max_lookup = 0;
6705 return Qnil;
6706 }
7d64c6ad
KH
6707 if (encodep)
6708 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6709 standard = Vstandard_translation_table_for_encode;
6710 else
6711 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6712 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6713 if (NILP (translation_table))
09ee6fdd
KH
6714 translation_table = standard;
6715 else
a6f87d34 6716 {
09ee6fdd
KH
6717 if (SYMBOLP (translation_table))
6718 translation_table = Fget (translation_table, Qtranslation_table);
6719 else if (CONSP (translation_table))
6720 {
6721 translation_table = Fcopy_sequence (translation_table);
6722 for (val = translation_table; CONSP (val); val = XCDR (val))
6723 if (SYMBOLP (XCAR (val)))
6724 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6725 }
6726 if (CHAR_TABLE_P (standard))
6727 {
6728 if (CONSP (translation_table))
6729 translation_table = nconc2 (translation_table,
6730 Fcons (standard, Qnil));
6731 else
6732 translation_table = Fcons (translation_table,
6733 Fcons (standard, Qnil));
6734 }
a6f87d34 6735 }
2170c8f0
KH
6736
6737 if (max_lookup)
09ee6fdd 6738 {
2170c8f0
KH
6739 *max_lookup = 1;
6740 if (CHAR_TABLE_P (translation_table)
6741 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6742 {
6743 val = XCHAR_TABLE (translation_table)->extras[1];
6744 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6745 *max_lookup = XFASTINT (val);
6746 }
6747 else if (CONSP (translation_table))
6748 {
6749 Lisp_Object tail, val;
09ee6fdd 6750
2170c8f0
KH
6751 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6752 if (CHAR_TABLE_P (XCAR (tail))
6753 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6754 {
6755 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6756 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6757 *max_lookup = XFASTINT (val);
6758 }
6759 }
a6f87d34 6760 }
7d64c6ad
KH
6761 return translation_table;
6762}
6763
09ee6fdd
KH
6764#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6765 do { \
6766 trans = Qnil; \
6767 if (CHAR_TABLE_P (table)) \
6768 { \
6769 trans = CHAR_TABLE_REF (table, c); \
6770 if (CHARACTERP (trans)) \
6771 c = XFASTINT (trans), trans = Qnil; \
6772 } \
6773 else if (CONSP (table)) \
6774 { \
6775 Lisp_Object tail; \
6776 \
6777 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6778 if (CHAR_TABLE_P (XCAR (tail))) \
6779 { \
6780 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6781 if (CHARACTERP (trans)) \
6782 c = XFASTINT (trans), trans = Qnil; \
6783 else if (! NILP (trans)) \
6784 break; \
6785 } \
6786 } \
e6a54062
KH
6787 } while (0)
6788
7d64c6ad 6789
e951386e
KH
6790/* Return a translation of character(s) at BUF according to TRANS.
6791 TRANS is TO-CHAR or ((FROM . TO) ...) where
6792 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6793 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6794 translation is found, and Qnil if not found..
6795 If BUF is too short to lookup characters in FROM, return Qt. */
6796
69a80ea3 6797static Lisp_Object
e951386e
KH
6798get_translation (trans, buf, buf_end)
6799 Lisp_Object trans;
69a80ea3 6800 int *buf, *buf_end;
69a80ea3 6801{
e951386e
KH
6802
6803 if (INTEGERP (trans))
6804 return trans;
6805 for (; CONSP (trans); trans = XCDR (trans))
69a80ea3 6806 {
e951386e
KH
6807 Lisp_Object val = XCAR (trans);
6808 Lisp_Object from = XCAR (val);
6809 int len = ASIZE (from);
6810 int i;
69a80ea3 6811
e951386e 6812 for (i = 0; i < len; i++)
69a80ea3 6813 {
e951386e
KH
6814 if (buf + i == buf_end)
6815 return Qt;
6816 if (XINT (AREF (from, i)) != buf[i])
6817 break;
69a80ea3 6818 }
e951386e
KH
6819 if (i == len)
6820 return val;
69a80ea3 6821 }
e951386e 6822 return Qnil;
69a80ea3
KH
6823}
6824
6825
d46c5b12 6826static int
69a80ea3 6827produce_chars (coding, translation_table, last_block)
df7492f9 6828 struct coding_system *coding;
69a80ea3
KH
6829 Lisp_Object translation_table;
6830 int last_block;
4ed46869 6831{
df7492f9
KH
6832 unsigned char *dst = coding->destination + coding->produced;
6833 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6834 EMACS_INT produced;
6835 EMACS_INT produced_chars = 0;
69a80ea3 6836 int carryover = 0;
4ed46869 6837
df7492f9 6838 if (! coding->chars_at_source)
4ed46869 6839 {
119852e7 6840 /* Source characters are in coding->charbuf. */
fba4576f
AS
6841 int *buf = coding->charbuf;
6842 int *buf_end = buf + coding->charbuf_used;
4ed46869 6843
db274c7a
KH
6844 if (EQ (coding->src_object, coding->dst_object))
6845 {
6846 coding_set_source (coding);
6847 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6848 }
4ed46869 6849
df7492f9 6850 while (buf < buf_end)
4ed46869 6851 {
69a80ea3 6852 int c = *buf, i;
bc4bc72a 6853
df7492f9
KH
6854 if (c >= 0)
6855 {
69a80ea3
KH
6856 int from_nchars = 1, to_nchars = 1;
6857 Lisp_Object trans = Qnil;
6858
09ee6fdd 6859 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6860 if (! NILP (trans))
69a80ea3 6861 {
e951386e
KH
6862 trans = get_translation (trans, buf, buf_end);
6863 if (INTEGERP (trans))
6864 c = XINT (trans);
6865 else if (CONSP (trans))
6866 {
6867 from_nchars = ASIZE (XCAR (trans));
6868 trans = XCDR (trans);
6869 if (INTEGERP (trans))
6870 c = XINT (trans);
6871 else
6872 {
6873 to_nchars = ASIZE (trans);
6874 c = XINT (AREF (trans, 0));
6875 }
6876 }
6877 else if (EQ (trans, Qt) && ! last_block)
69a80ea3 6878 break;
69a80ea3
KH
6879 }
6880
6881 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6882 {
6883 dst = alloc_destination (coding,
6884 buf_end - buf
6885 + MAX_MULTIBYTE_LENGTH * to_nchars,
6886 dst);
db274c7a
KH
6887 if (EQ (coding->src_object, coding->dst_object))
6888 {
6889 coding_set_source (coding);
e951386e
KH
6890 dst_end = (((unsigned char *) coding->source)
6891 + coding->consumed);
db274c7a
KH
6892 }
6893 else
6894 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6895 }
6896
433f7f87 6897 for (i = 0; i < to_nchars; i++)
69a80ea3 6898 {
433f7f87
KH
6899 if (i > 0)
6900 c = XINT (AREF (trans, i));
69a80ea3
KH
6901 if (coding->dst_multibyte
6902 || ! CHAR_BYTE8_P (c))
db274c7a 6903 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6904 else
6905 *dst++ = CHAR_TO_BYTE8 (c);
6906 }
6907 produced_chars += to_nchars;
e951386e 6908 buf += from_nchars;
d46c5b12 6909 }
df7492f9 6910 else
69a80ea3
KH
6911 /* This is an annotation datum. (-C) is the length. */
6912 buf += -c;
4ed46869 6913 }
69a80ea3 6914 carryover = buf_end - buf;
4ed46869 6915 }
fa42c37f 6916 else
fa42c37f 6917 {
119852e7 6918 /* Source characters are at coding->source. */
8f924df7 6919 const unsigned char *src = coding->source;
119852e7 6920 const unsigned char *src_end = src + coding->consumed;
4ed46869 6921
db274c7a
KH
6922 if (EQ (coding->dst_object, coding->src_object))
6923 dst_end = (unsigned char *) src;
df7492f9 6924 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6925 {
df7492f9 6926 if (coding->src_multibyte)
fa42c37f 6927 {
71c81426 6928 int multibytep = 1;
4533845d 6929 EMACS_INT consumed_chars = 0;
d46c5b12 6930
df7492f9
KH
6931 while (1)
6932 {
8f924df7 6933 const unsigned char *src_base = src;
df7492f9 6934 int c;
b73bfc1c 6935
df7492f9 6936 ONE_MORE_BYTE (c);
119852e7 6937 if (dst == dst_end)
df7492f9 6938 {
119852e7
KH
6939 if (EQ (coding->src_object, coding->dst_object))
6940 dst_end = (unsigned char *) src;
6941 if (dst == dst_end)
df7492f9 6942 {
119852e7
KH
6943 EMACS_INT offset = src - coding->source;
6944
6945 dst = alloc_destination (coding, src_end - src + 1,
6946 dst);
6947 dst_end = coding->destination + coding->dst_bytes;
6948 coding_set_source (coding);
6949 src = coding->source + offset;
6950 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6951 if (EQ (coding->src_object, coding->dst_object))
6952 dst_end = (unsigned char *) src;
df7492f9 6953 }
df7492f9
KH
6954 }
6955 *dst++ = c;
6956 produced_chars++;
6957 }
6958 no_more_source:
6959 ;
fa42c37f
KH
6960 }
6961 else
df7492f9
KH
6962 while (src < src_end)
6963 {
71c81426 6964 int multibytep = 1;
df7492f9 6965 int c = *src++;
b73bfc1c 6966
df7492f9
KH
6967 if (dst >= dst_end - 1)
6968 {
2c78b7e1 6969 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6970 dst_end = (unsigned char *) src;
2c78b7e1
KH
6971 if (dst >= dst_end - 1)
6972 {
119852e7 6973 EMACS_INT offset = src - coding->source;
db274c7a 6974 EMACS_INT more_bytes;
119852e7 6975
db274c7a
KH
6976 if (EQ (coding->src_object, coding->dst_object))
6977 more_bytes = ((src_end - src) / 2) + 2;
6978 else
6979 more_bytes = src_end - src + 2;
6980 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6981 dst_end = coding->destination + coding->dst_bytes;
6982 coding_set_source (coding);
119852e7 6983 src = coding->source + offset;
2c78b7e1 6984 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6985 if (EQ (coding->src_object, coding->dst_object))
6986 dst_end = (unsigned char *) src;
2c78b7e1 6987 }
df7492f9
KH
6988 }
6989 EMIT_ONE_BYTE (c);
6990 }
d46c5b12 6991 }
df7492f9
KH
6992 else
6993 {
6994 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6995 {
119852e7 6996 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6997
df7492f9 6998 if (require > 0)
fa42c37f 6999 {
df7492f9
KH
7000 EMACS_INT offset = src - coding->source;
7001
7002 dst = alloc_destination (coding, require, dst);
7003 coding_set_source (coding);
7004 src = coding->source + offset;
7005 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
7006 }
7007 }
119852e7 7008 produced_chars = coding->consumed_char;
df7492f9 7009 while (src < src_end)
14daee73 7010 *dst++ = *src++;
fa42c37f
KH
7011 }
7012 }
7013
df7492f9 7014 produced = dst - (coding->destination + coding->produced);
284201e4 7015 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
7016 insert_from_gap (produced_chars, produced);
7017 coding->produced += produced;
7018 coding->produced_char += produced_chars;
69a80ea3 7019 return carryover;
fa42c37f
KH
7020}
7021
ff0dacd7
KH
7022/* Compose text in CODING->object according to the annotation data at
7023 CHARBUF. CHARBUF is an array:
e951386e 7024 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
df7492f9 7025 */
4ed46869 7026
df7492f9 7027static INLINE void
69a80ea3 7028produce_composition (coding, charbuf, pos)
4ed46869 7029 struct coding_system *coding;
df7492f9 7030 int *charbuf;
69a80ea3 7031 EMACS_INT pos;
4ed46869 7032{
df7492f9 7033 int len;
69a80ea3 7034 EMACS_INT to;
df7492f9 7035 enum composition_method method;
df7492f9 7036 Lisp_Object components;
fa42c37f 7037
e951386e 7038 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
69a80ea3 7039 to = pos + charbuf[2];
e951386e 7040 method = (enum composition_method) (charbuf[4]);
d46c5b12 7041
df7492f9
KH
7042 if (method == COMPOSITION_RELATIVE)
7043 components = Qnil;
e951386e 7044 else
d46c5b12 7045 {
df7492f9 7046 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
e951386e 7047 int i, j;
b73bfc1c 7048
e951386e
KH
7049 if (method == COMPOSITION_WITH_RULE)
7050 len = charbuf[2] * 3 - 2;
7051 charbuf += MAX_ANNOTATION_LENGTH;
7052 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
7053 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
9ffd559c 7054 {
e951386e
KH
7055 if (charbuf[i] >= 0)
7056 args[j] = make_number (charbuf[i]);
7057 else
7058 {
7059 i++;
7060 args[j] = make_number (charbuf[i] % 0x100);
7061 }
9ffd559c 7062 }
e951386e 7063 components = (i == j ? Fstring (j, args) : Fvector (j, args));
d46c5b12 7064 }
69a80ea3 7065 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
7066}
7067
d46c5b12 7068
ff0dacd7
KH
7069/* Put `charset' property on text in CODING->object according to
7070 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 7071 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 7072 */
d46c5b12 7073
ff0dacd7 7074static INLINE void
69a80ea3 7075produce_charset (coding, charbuf, pos)
d46c5b12 7076 struct coding_system *coding;
ff0dacd7 7077 int *charbuf;
69a80ea3 7078 EMACS_INT pos;
d46c5b12 7079{
69a80ea3
KH
7080 EMACS_INT from = pos - charbuf[2];
7081 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 7082
69a80ea3 7083 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
7084 Qcharset, CHARSET_NAME (charset),
7085 coding->dst_object);
d46c5b12
KH
7086}
7087
d46c5b12 7088
df7492f9
KH
7089#define CHARBUF_SIZE 0x4000
7090
7091#define ALLOC_CONVERSION_WORK_AREA(coding) \
7092 do { \
8510724d 7093 int size = CHARBUF_SIZE; \
df7492f9
KH
7094 \
7095 coding->charbuf = NULL; \
7096 while (size > 1024) \
7097 { \
7098 coding->charbuf = (int *) alloca (sizeof (int) * size); \
7099 if (coding->charbuf) \
7100 break; \
7101 size >>= 1; \
7102 } \
7103 if (! coding->charbuf) \
7104 { \
065e3595 7105 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
7106 return coding->result; \
7107 } \
7108 coding->charbuf_size = size; \
7109 } while (0)
4ed46869 7110
d46c5b12
KH
7111
7112static void
69a80ea3 7113produce_annotation (coding, pos)
d46c5b12 7114 struct coding_system *coding;
69a80ea3 7115 EMACS_INT pos;
d46c5b12 7116{
df7492f9
KH
7117 int *charbuf = coding->charbuf;
7118 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 7119
ff0dacd7
KH
7120 if (NILP (coding->dst_object))
7121 return;
d46c5b12 7122
df7492f9 7123 while (charbuf < charbuf_end)
a84f1519 7124 {
df7492f9 7125 if (*charbuf >= 0)
e951386e 7126 pos++, charbuf++;
d46c5b12 7127 else
d46c5b12 7128 {
df7492f9 7129 int len = -*charbuf;
e951386e
KH
7130
7131 if (len > 2)
7132 switch (charbuf[1])
7133 {
7134 case CODING_ANNOTATE_COMPOSITION_MASK:
7135 produce_composition (coding, charbuf, pos);
7136 break;
7137 case CODING_ANNOTATE_CHARSET_MASK:
7138 produce_charset (coding, charbuf, pos);
7139 break;
7140 }
df7492f9 7141 charbuf += len;
d46c5b12 7142 }
a84f1519 7143 }
d46c5b12
KH
7144}
7145
df7492f9
KH
7146/* Decode the data at CODING->src_object into CODING->dst_object.
7147 CODING->src_object is a buffer, a string, or nil.
7148 CODING->dst_object is a buffer.
d46c5b12 7149
df7492f9
KH
7150 If CODING->src_object is a buffer, it must be the current buffer.
7151 In this case, if CODING->src_pos is positive, it is a position of
7152 the source text in the buffer, otherwise, the source text is in the
7153 gap area of the buffer, and CODING->src_pos specifies the offset of
7154 the text from GPT (which must be the same as PT). If this is the
7155 same buffer as CODING->dst_object, CODING->src_pos must be
7156 negative.
d46c5b12 7157
b6828792 7158 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 7159 that string.
d46c5b12 7160
df7492f9
KH
7161 If CODING->src_object is nil, CODING->source must already point to
7162 the non-relocatable memory area. In this case, CODING->src_pos is
7163 an offset from CODING->source.
73be902c 7164
df7492f9
KH
7165 The decoded data is inserted at the current point of the buffer
7166 CODING->dst_object.
7167*/
d46c5b12 7168
df7492f9
KH
7169static int
7170decode_coding (coding)
d46c5b12 7171 struct coding_system *coding;
d46c5b12 7172{
df7492f9 7173 Lisp_Object attrs;
24a73b0a 7174 Lisp_Object undo_list;
7d64c6ad 7175 Lisp_Object translation_table;
d0396581 7176 struct ccl_spec cclspec;
69a80ea3
KH
7177 int carryover;
7178 int i;
d46c5b12 7179
df7492f9
KH
7180 if (BUFFERP (coding->src_object)
7181 && coding->src_pos > 0
7182 && coding->src_pos < GPT
7183 && coding->src_pos + coding->src_chars > GPT)
7184 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 7185
24a73b0a 7186 undo_list = Qt;
df7492f9 7187 if (BUFFERP (coding->dst_object))
1c3478b0 7188 {
df7492f9
KH
7189 if (current_buffer != XBUFFER (coding->dst_object))
7190 set_buffer_internal (XBUFFER (coding->dst_object));
7191 if (GPT != PT)
7192 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
7193 undo_list = current_buffer->undo_list;
7194 current_buffer->undo_list = Qt;
1c3478b0
KH
7195 }
7196
df7492f9
KH
7197 coding->consumed = coding->consumed_char = 0;
7198 coding->produced = coding->produced_char = 0;
7199 coding->chars_at_source = 0;
065e3595 7200 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 7201 coding->errors = 0;
1c3478b0 7202
df7492f9
KH
7203 ALLOC_CONVERSION_WORK_AREA (coding);
7204
7205 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 7206 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 7207
69a80ea3 7208 carryover = 0;
d0396581
KH
7209 if (coding->decoder == decode_coding_ccl)
7210 {
7211 coding->spec.ccl = &cclspec;
7212 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7213 }
df7492f9 7214 do
b73bfc1c 7215 {
69a80ea3
KH
7216 EMACS_INT pos = coding->dst_pos + coding->produced_char;
7217
df7492f9
KH
7218 coding_set_source (coding);
7219 coding->annotated = 0;
69a80ea3 7220 coding->charbuf_used = carryover;
df7492f9 7221 (*(coding->decoder)) (coding);
df7492f9 7222 coding_set_destination (coding);
69a80ea3 7223 carryover = produce_chars (coding, translation_table, 0);
df7492f9 7224 if (coding->annotated)
69a80ea3
KH
7225 produce_annotation (coding, pos);
7226 for (i = 0; i < carryover; i++)
7227 coding->charbuf[i]
7228 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 7229 }
d0396581
KH
7230 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7231 || (coding->consumed < coding->src_bytes
7232 && (coding->result == CODING_RESULT_SUCCESS
7233 || coding->result == CODING_RESULT_INVALID_SRC)));
d46c5b12 7234
69a80ea3
KH
7235 if (carryover > 0)
7236 {
7237 coding_set_destination (coding);
7238 coding->charbuf_used = carryover;
7239 produce_chars (coding, translation_table, 1);
7240 }
7241
df7492f9
KH
7242 coding->carryover_bytes = 0;
7243 if (coding->consumed < coding->src_bytes)
d46c5b12 7244 {
df7492f9 7245 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 7246 const unsigned char *src;
df7492f9
KH
7247
7248 coding_set_source (coding);
7249 coding_set_destination (coding);
7250 src = coding->source + coding->consumed;
7251
7252 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 7253 {
df7492f9
KH
7254 /* Flush out unprocessed data as binary chars. We are sure
7255 that the number of data is less than the size of
7256 coding->charbuf. */
065e3595 7257 coding->charbuf_used = 0;
b2dab6c8
JR
7258 coding->chars_at_source = 0;
7259
df7492f9 7260 while (nbytes-- > 0)
1c3478b0 7261 {
df7492f9 7262 int c = *src++;
98725083 7263
1c91457d
KH
7264 if (c & 0x80)
7265 c = BYTE8_TO_CHAR (c);
7266 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 7267 }
f6cbaf43 7268 produce_chars (coding, Qnil, 1);
d46c5b12 7269 }
d46c5b12 7270 else
df7492f9
KH
7271 {
7272 /* Record unprocessed bytes in coding->carryover. We are
7273 sure that the number of data is less than the size of
7274 coding->carryover. */
7275 unsigned char *p = coding->carryover;
7276
f289d375
KH
7277 if (nbytes > sizeof coding->carryover)
7278 nbytes = sizeof coding->carryover;
df7492f9
KH
7279 coding->carryover_bytes = nbytes;
7280 while (nbytes-- > 0)
7281 *p++ = *src++;
1c3478b0 7282 }
df7492f9 7283 coding->consumed = coding->src_bytes;
b73bfc1c 7284 }
69f76525 7285
0a9564cb
EZ
7286 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7287 && !inhibit_eol_conversion)
4347441b 7288 decode_eol (coding);
24a73b0a
KH
7289 if (BUFFERP (coding->dst_object))
7290 {
7291 current_buffer->undo_list = undo_list;
7292 record_insert (coding->dst_pos, coding->produced_char);
7293 }
73be902c 7294 return coding->result;
4ed46869
KH
7295}
7296
aaaf0b1e 7297
e1c23804 7298/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
7299 ending before LIMIT of CODING->src_object (buffer or string), store
7300 the data in BUF, set *STOP to a starting position of the next
7301 composition (if any) or to LIMIT, and return the address of the
7302 next element of BUF.
7303
7304 If such an annotation is not found, set *STOP to a starting
7305 position of a composition after POS (if any) or to LIMIT, and
7306 return BUF. */
7307
7308static INLINE int *
7309handle_composition_annotation (pos, limit, coding, buf, stop)
7310 EMACS_INT pos, limit;
aaaf0b1e 7311 struct coding_system *coding;
ff0dacd7
KH
7312 int *buf;
7313 EMACS_INT *stop;
aaaf0b1e 7314{
ff0dacd7
KH
7315 EMACS_INT start, end;
7316 Lisp_Object prop;
aaaf0b1e 7317
ff0dacd7
KH
7318 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7319 || end > limit)
7320 *stop = limit;
7321 else if (start > pos)
7322 *stop = start;
7323 else
aaaf0b1e 7324 {
ff0dacd7 7325 if (start == pos)
aaaf0b1e 7326 {
ff0dacd7
KH
7327 /* We found a composition. Store the corresponding
7328 annotation data in BUF. */
7329 int *head = buf;
7330 enum composition_method method = COMPOSITION_METHOD (prop);
7331 int nchars = COMPOSITION_LENGTH (prop);
7332
e951386e 7333 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
ff0dacd7 7334 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 7335 {
ff0dacd7
KH
7336 Lisp_Object components;
7337 int len, i, i_byte;
7338
7339 components = COMPOSITION_COMPONENTS (prop);
7340 if (VECTORP (components))
aaaf0b1e 7341 {
14fe7b53 7342 len = XVECTOR_SIZE (components);
ff0dacd7
KH
7343 for (i = 0; i < len; i++)
7344 *buf++ = XINT (AREF (components, i));
aaaf0b1e 7345 }
ff0dacd7 7346 else if (STRINGP (components))
aaaf0b1e 7347 {
8f924df7 7348 len = SCHARS (components);
ff0dacd7
KH
7349 i = i_byte = 0;
7350 while (i < len)
7351 {
7352 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7353 buf++;
7354 }
7355 }
7356 else if (INTEGERP (components))
7357 {
7358 len = 1;
7359 *buf++ = XINT (components);
7360 }
7361 else if (CONSP (components))
7362 {
7363 for (len = 0; CONSP (components);
7364 len++, components = XCDR (components))
7365 *buf++ = XINT (XCAR (components));
aaaf0b1e 7366 }
aaaf0b1e 7367 else
ff0dacd7
KH
7368 abort ();
7369 *head -= len;
aaaf0b1e 7370 }
aaaf0b1e 7371 }
ff0dacd7
KH
7372
7373 if (find_composition (end, limit, &start, &end, &prop,
7374 coding->src_object)
7375 && end <= limit)
7376 *stop = start;
7377 else
7378 *stop = limit;
aaaf0b1e 7379 }
ff0dacd7
KH
7380 return buf;
7381}
7382
7383
e1c23804 7384/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
7385 CODING->src_object (buffer of string), store the data in BUF, set
7386 *STOP to the position where the value of `charset' property changes
7387 (limiting by LIMIT), and return the address of the next element of
7388 BUF.
7389
7390 If the property value is nil, set *STOP to the position where the
7391 property value is non-nil (limiting by LIMIT), and return BUF. */
7392
7393static INLINE int *
7394handle_charset_annotation (pos, limit, coding, buf, stop)
7395 EMACS_INT pos, limit;
7396 struct coding_system *coding;
7397 int *buf;
7398 EMACS_INT *stop;
7399{
7400 Lisp_Object val, next;
7401 int id;
7402
7403 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7404 if (! NILP (val) && CHARSETP (val))
7405 id = XINT (CHARSET_SYMBOL_ID (val));
7406 else
7407 id = -1;
69a80ea3 7408 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
7409 next = Fnext_single_property_change (make_number (pos), Qcharset,
7410 coding->src_object,
7411 make_number (limit));
7412 *stop = XINT (next);
7413 return buf;
7414}
7415
7416
df7492f9 7417static void
09ee6fdd 7418consume_chars (coding, translation_table, max_lookup)
df7492f9 7419 struct coding_system *coding;
433f7f87 7420 Lisp_Object translation_table;
09ee6fdd 7421 int max_lookup;
df7492f9
KH
7422{
7423 int *buf = coding->charbuf;
ff0dacd7 7424 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 7425 const unsigned char *src = coding->source + coding->consumed;
4776e638 7426 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
7427 EMACS_INT pos = coding->src_pos + coding->consumed_char;
7428 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
7429 int multibytep = coding->src_multibyte;
7430 Lisp_Object eol_type;
7431 int c;
ff0dacd7 7432 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 7433 int *lookup_buf = NULL;
433f7f87
KH
7434
7435 if (! NILP (translation_table))
09ee6fdd 7436 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 7437
0a9564cb 7438 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
df7492f9
KH
7439 if (VECTORP (eol_type))
7440 eol_type = Qunix;
88993dfd 7441
df7492f9
KH
7442 /* Note: composition handling is not yet implemented. */
7443 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 7444
0b5670c9
KH
7445 if (NILP (coding->src_object))
7446 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 7447 else
0b5670c9
KH
7448 {
7449 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7450 stop = stop_composition = pos;
7451 else
7452 stop = stop_composition = end_pos;
7453 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7454 stop = stop_charset = pos;
7455 else
7456 stop_charset = end_pos;
7457 }
ec6d2bb8 7458
24a73b0a 7459 /* Compensate for CRLF and conversion. */
ff0dacd7 7460 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 7461 while (buf < buf_end)
aaaf0b1e 7462 {
433f7f87
KH
7463 Lisp_Object trans;
7464
df7492f9 7465 if (pos == stop)
ec6d2bb8 7466 {
df7492f9
KH
7467 if (pos == end_pos)
7468 break;
ff0dacd7
KH
7469 if (pos == stop_composition)
7470 buf = handle_composition_annotation (pos, end_pos, coding,
7471 buf, &stop_composition);
7472 if (pos == stop_charset)
7473 buf = handle_charset_annotation (pos, end_pos, coding,
7474 buf, &stop_charset);
7475 stop = (stop_composition < stop_charset
7476 ? stop_composition : stop_charset);
df7492f9
KH
7477 }
7478
7479 if (! multibytep)
4776e638 7480 {
d3e4cb56 7481 EMACS_INT bytes;
aaaf0b1e 7482
4d1e6632
KH
7483 if (coding->encoder == encode_coding_raw_text
7484 || coding->encoder == encode_coding_ccl)
ea29edf2
KH
7485 c = *src++, pos++;
7486 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7487 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7488 else
f03caae0 7489 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7490 }
df7492f9 7491 else
db274c7a 7492 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7493 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7494 c = '\n';
7495 if (! EQ (eol_type, Qunix))
aaaf0b1e 7496 {
df7492f9 7497 if (c == '\n')
aaaf0b1e 7498 {
df7492f9
KH
7499 if (EQ (eol_type, Qdos))
7500 *buf++ = '\r';
7501 else
7502 c = '\r';
aaaf0b1e
KH
7503 }
7504 }
433f7f87 7505
e6a54062 7506 trans = Qnil;
09ee6fdd 7507 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7508 if (NILP (trans))
433f7f87
KH
7509 *buf++ = c;
7510 else
7511 {
7512 int from_nchars = 1, to_nchars = 1;
7513 int *lookup_buf_end;
7514 const unsigned char *p = src;
7515 int i;
7516
7517 lookup_buf[0] = c;
7518 for (i = 1; i < max_lookup && p < src_end; i++)
7519 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7520 lookup_buf_end = lookup_buf + i;
e951386e
KH
7521 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7522 if (INTEGERP (trans))
7523 c = XINT (trans);
7524 else if (CONSP (trans))
7525 {
7526 from_nchars = ASIZE (XCAR (trans));
7527 trans = XCDR (trans);
7528 if (INTEGERP (trans))
7529 c = XINT (trans);
7530 else
7531 {
7532 to_nchars = ASIZE (trans);
7533 if (buf + to_nchars > buf_end)
7534 break;
7535 c = XINT (AREF (trans, 0));
7536 }
7537 }
7538 else
433f7f87 7539 break;
e951386e 7540 *buf++ = c;
433f7f87
KH
7541 for (i = 1; i < to_nchars; i++)
7542 *buf++ = XINT (AREF (trans, i));
7543 for (i = 1; i < from_nchars; i++, pos++)
7544 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7545 }
aaaf0b1e 7546 }
ec6d2bb8 7547
df7492f9
KH
7548 coding->consumed = src - coding->source;
7549 coding->consumed_char = pos - coding->src_pos;
7550 coding->charbuf_used = buf - coding->charbuf;
7551 coding->chars_at_source = 0;
aaaf0b1e
KH
7552}
7553
4ed46869 7554
df7492f9
KH
7555/* Encode the text at CODING->src_object into CODING->dst_object.
7556 CODING->src_object is a buffer or a string.
7557 CODING->dst_object is a buffer or nil.
7558
7559 If CODING->src_object is a buffer, it must be the current buffer.
7560 In this case, if CODING->src_pos is positive, it is a position of
7561 the source text in the buffer, otherwise. the source text is in the
7562 gap area of the buffer, and coding->src_pos specifies the offset of
7563 the text from GPT (which must be the same as PT). If this is the
7564 same buffer as CODING->dst_object, CODING->src_pos must be
7565 negative and CODING should not have `pre-write-conversion'.
7566
7567 If CODING->src_object is a string, CODING should not have
7568 `pre-write-conversion'.
7569
7570 If CODING->dst_object is a buffer, the encoded data is inserted at
7571 the current point of that buffer.
7572
7573 If CODING->dst_object is nil, the encoded data is placed at the
7574 memory area specified by CODING->destination. */
7575
7576static int
7577encode_coding (coding)
4ed46869 7578 struct coding_system *coding;
4ed46869 7579{
df7492f9 7580 Lisp_Object attrs;
7d64c6ad 7581 Lisp_Object translation_table;
09ee6fdd 7582 int max_lookup;
fb608df3 7583 struct ccl_spec cclspec;
9861e777 7584
df7492f9 7585 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7586 if (coding->encoder == encode_coding_raw_text)
7587 translation_table = Qnil, max_lookup = 0;
7588 else
7589 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7590
df7492f9 7591 if (BUFFERP (coding->dst_object))
8844fa83 7592 {
df7492f9
KH
7593 set_buffer_internal (XBUFFER (coding->dst_object));
7594 coding->dst_multibyte
7595 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 7596 }
4ed46869 7597
b73bfc1c 7598 coding->consumed = coding->consumed_char = 0;
df7492f9 7599 coding->produced = coding->produced_char = 0;
065e3595 7600 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7601 coding->errors = 0;
b73bfc1c 7602
df7492f9 7603 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7604
fb608df3
KH
7605 if (coding->encoder == encode_coding_ccl)
7606 {
7607 coding->spec.ccl = &cclspec;
7608 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7609 }
df7492f9
KH
7610 do {
7611 coding_set_source (coding);
09ee6fdd 7612 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7613 coding_set_destination (coding);
7614 (*(coding->encoder)) (coding);
7615 } while (coding->consumed_char < coding->src_chars);
7616
284201e4 7617 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7618 insert_from_gap (coding->produced_char, coding->produced);
7619
7620 return (coding->result);
ec6d2bb8
KH
7621}
7622
fb88bf2d 7623
24a73b0a
KH
7624/* Name (or base name) of work buffer for code conversion. */
7625static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7626
24a73b0a
KH
7627/* A working buffer used by the top level conversion. Once it is
7628 created, it is never destroyed. It has the name
7629 Vcode_conversion_workbuf_name. The other working buffers are
7630 destroyed after the use is finished, and their names are modified
7631 versions of Vcode_conversion_workbuf_name. */
7632static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7633
24a73b0a
KH
7634/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7635static int reused_workbuf_in_use;
4ed46869 7636
24a73b0a 7637
ad1746f5 7638/* Return a working buffer of code conversion. MULTIBYTE specifies the
24a73b0a 7639 multibyteness of returning buffer. */
b73bfc1c 7640
f6cbaf43 7641static Lisp_Object
24a73b0a 7642make_conversion_work_buffer (multibyte)
f6cbaf43 7643 int multibyte;
df7492f9 7644{
24a73b0a
KH
7645 Lisp_Object name, workbuf;
7646 struct buffer *current;
4ed46869 7647
24a73b0a 7648 if (reused_workbuf_in_use++)
065e3595
KH
7649 {
7650 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7651 workbuf = Fget_buffer_create (name);
7652 }
df7492f9 7653 else
065e3595 7654 {
159bd5a2 7655 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7656 Vcode_conversion_reused_workbuf
7657 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7658 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7659 }
24a73b0a
KH
7660 current = current_buffer;
7661 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7662 /* We can't allow modification hooks to run in the work buffer. For
7663 instance, directory_files_internal assumes that file decoding
7664 doesn't compile new regexps. */
7665 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7666 Ferase_buffer ();
df7492f9 7667 current_buffer->undo_list = Qt;
24a73b0a 7668 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 7669 set_buffer_internal (current);
24a73b0a 7670 return workbuf;
df7492f9 7671}
d46c5b12 7672
24a73b0a 7673
4776e638 7674static Lisp_Object
24a73b0a
KH
7675code_conversion_restore (arg)
7676 Lisp_Object arg;
4776e638 7677{
24a73b0a 7678 Lisp_Object current, workbuf;
948bdcf3 7679 struct gcpro gcpro1;
24a73b0a 7680
948bdcf3 7681 GCPRO1 (arg);
24a73b0a
KH
7682 current = XCAR (arg);
7683 workbuf = XCDR (arg);
7684 if (! NILP (workbuf))
7685 {
7686 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7687 reused_workbuf_in_use = 0;
7688 else if (! NILP (Fbuffer_live_p (workbuf)))
7689 Fkill_buffer (workbuf);
7690 }
7691 set_buffer_internal (XBUFFER (current));
948bdcf3 7692 UNGCPRO;
4776e638
KH
7693 return Qnil;
7694}
b73bfc1c 7695
24a73b0a
KH
7696Lisp_Object
7697code_conversion_save (with_work_buf, multibyte)
4776e638 7698 int with_work_buf, multibyte;
df7492f9 7699{
24a73b0a 7700 Lisp_Object workbuf = Qnil;
b73bfc1c 7701
4776e638 7702 if (with_work_buf)
24a73b0a
KH
7703 workbuf = make_conversion_work_buffer (multibyte);
7704 record_unwind_protect (code_conversion_restore,
7705 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7706 return workbuf;
df7492f9 7707}
d46c5b12 7708
df7492f9
KH
7709int
7710decode_coding_gap (coding, chars, bytes)
7711 struct coding_system *coding;
7712 EMACS_INT chars, bytes;
7713{
7714 int count = specpdl_ptr - specpdl;
5e5c78be 7715 Lisp_Object attrs;
fb88bf2d 7716
24a73b0a 7717 code_conversion_save (0, 0);
ec6d2bb8 7718
24a73b0a 7719 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7720 coding->src_chars = chars;
7721 coding->src_bytes = bytes;
7722 coding->src_pos = -chars;
7723 coding->src_pos_byte = -bytes;
7724 coding->src_multibyte = chars < bytes;
24a73b0a 7725 coding->dst_object = coding->src_object;
df7492f9
KH
7726 coding->dst_pos = PT;
7727 coding->dst_pos_byte = PT_BYTE;
71c81426 7728 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 7729
df7492f9
KH
7730 if (CODING_REQUIRE_DETECTION (coding))
7731 detect_coding (coding);
8f924df7 7732
9286b333 7733 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7734 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7735 decode_coding (coding);
287c57d7 7736 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7737
5e5c78be
KH
7738 attrs = CODING_ID_ATTRS (coding->id);
7739 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7740 {
5e5c78be
KH
7741 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7742 Lisp_Object val;
7743
7744 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7745 val = call1 (CODING_ATTR_POST_READ (attrs),
7746 make_number (coding->produced_char));
5e5c78be
KH
7747 CHECK_NATNUM (val);
7748 coding->produced_char += Z - prev_Z;
7749 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7750 }
4ed46869 7751
df7492f9 7752 unbind_to (count, Qnil);
b73bfc1c
KH
7753 return coding->result;
7754}
52d41803 7755
4ed46869 7756int
df7492f9 7757encode_coding_gap (coding, chars, bytes)
4ed46869 7758 struct coding_system *coding;
df7492f9 7759 EMACS_INT chars, bytes;
4ed46869 7760{
df7492f9 7761 int count = specpdl_ptr - specpdl;
4ed46869 7762
24a73b0a 7763 code_conversion_save (0, 0);
4ed46869 7764
24a73b0a 7765 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7766 coding->src_chars = chars;
7767 coding->src_bytes = bytes;
7768 coding->src_pos = -chars;
7769 coding->src_pos_byte = -bytes;
7770 coding->src_multibyte = chars < bytes;
7771 coding->dst_object = coding->src_object;
7772 coding->dst_pos = PT;
7773 coding->dst_pos_byte = PT_BYTE;
4ed46869 7774
df7492f9 7775 encode_coding (coding);
b73bfc1c 7776
df7492f9
KH
7777 unbind_to (count, Qnil);
7778 return coding->result;
7779}
4ed46869 7780
d46c5b12 7781
df7492f9
KH
7782/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7783 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7784
df7492f9 7785 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7786
df7492f9
KH
7787 If it is a buffer, the text is at point of the buffer. FROM and TO
7788 are positions in the buffer.
b73bfc1c 7789
df7492f9
KH
7790 If it is a string, the text is at the beginning of the string.
7791 FROM and TO are indices to the string.
4ed46869 7792
df7492f9
KH
7793 If it is nil, the text is at coding->source. FROM and TO are
7794 indices to coding->source.
bb10be8b 7795
df7492f9 7796 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7797
df7492f9
KH
7798 If it is a buffer, the decoded text is inserted at point of the
7799 buffer. If the buffer is the same as SRC_OBJECT, the source text
7800 is deleted.
4ed46869 7801
df7492f9
KH
7802 If it is Qt, a string is made from the decoded text, and
7803 set in CODING->dst_object.
d46c5b12 7804
df7492f9 7805 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7806 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7807 CODING->destination by xmalloc. If the decoded text is longer than
7808 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7809 */
d46c5b12 7810
df7492f9
KH
7811void
7812decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7813 dst_object)
d46c5b12 7814 struct coding_system *coding;
df7492f9
KH
7815 Lisp_Object src_object;
7816 EMACS_INT from, from_byte, to, to_byte;
7817 Lisp_Object dst_object;
d46c5b12 7818{
df7492f9
KH
7819 int count = specpdl_ptr - specpdl;
7820 unsigned char *destination;
7821 EMACS_INT dst_bytes;
7822 EMACS_INT chars = to - from;
7823 EMACS_INT bytes = to_byte - from_byte;
7824 Lisp_Object attrs;
4776e638 7825 int saved_pt = -1, saved_pt_byte;
64cedb0c 7826 int need_marker_adjustment = 0;
b3bfad50 7827 Lisp_Object old_deactivate_mark;
d46c5b12 7828
b3bfad50 7829 old_deactivate_mark = Vdeactivate_mark;
93dec019 7830
df7492f9 7831 if (NILP (dst_object))
d46c5b12 7832 {
df7492f9
KH
7833 destination = coding->destination;
7834 dst_bytes = coding->dst_bytes;
d46c5b12 7835 }
93dec019 7836
df7492f9
KH
7837 coding->src_object = src_object;
7838 coding->src_chars = chars;
7839 coding->src_bytes = bytes;
7840 coding->src_multibyte = chars < bytes;
70ad9fc4 7841
df7492f9 7842 if (STRINGP (src_object))
d46c5b12 7843 {
df7492f9
KH
7844 coding->src_pos = from;
7845 coding->src_pos_byte = from_byte;
d46c5b12 7846 }
df7492f9 7847 else if (BUFFERP (src_object))
88993dfd 7848 {
df7492f9
KH
7849 set_buffer_internal (XBUFFER (src_object));
7850 if (from != GPT)
7851 move_gap_both (from, from_byte);
7852 if (EQ (src_object, dst_object))
fb88bf2d 7853 {
64cedb0c
KH
7854 struct Lisp_Marker *tail;
7855
7856 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7857 {
7858 tail->need_adjustment
7859 = tail->charpos == (tail->insertion_type ? from : to);
7860 need_marker_adjustment |= tail->need_adjustment;
7861 }
4776e638 7862 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7863 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7864 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7865 del_range_both (from, from_byte, to, to_byte, 1);
7866 coding->src_pos = -chars;
7867 coding->src_pos_byte = -bytes;
fb88bf2d 7868 }
df7492f9 7869 else
fb88bf2d 7870 {
df7492f9
KH
7871 coding->src_pos = from;
7872 coding->src_pos_byte = from_byte;
fb88bf2d 7873 }
88993dfd
KH
7874 }
7875
df7492f9
KH
7876 if (CODING_REQUIRE_DETECTION (coding))
7877 detect_coding (coding);
7878 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7879
2cb26057
KH
7880 if (EQ (dst_object, Qt)
7881 || (! NILP (CODING_ATTR_POST_READ (attrs))
7882 && NILP (dst_object)))
b73bfc1c 7883 {
a1567c45
SM
7884 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7885 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7886 coding->dst_pos = BEG;
7887 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7888 }
df7492f9 7889 else if (BUFFERP (dst_object))
d46c5b12 7890 {
24a73b0a 7891 code_conversion_save (0, 0);
df7492f9
KH
7892 coding->dst_object = dst_object;
7893 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7894 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7895 coding->dst_multibyte
7896 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7897 }
7898 else
7899 {
24a73b0a 7900 code_conversion_save (0, 0);
df7492f9 7901 coding->dst_object = Qnil;
0154725e
SM
7902 /* Most callers presume this will return a multibyte result, and they
7903 won't use `binary' or `raw-text' anyway, so let's not worry about
7904 CODING_FOR_UNIBYTE. */
bb555731 7905 coding->dst_multibyte = 1;
d46c5b12
KH
7906 }
7907
df7492f9 7908 decode_coding (coding);
fa46990e 7909
df7492f9
KH
7910 if (BUFFERP (coding->dst_object))
7911 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7912
df7492f9 7913 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7914 {
b3bfad50 7915 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7916 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7917 Lisp_Object val;
d46c5b12 7918
c0cc7f7f 7919 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7920 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7921 old_deactivate_mark);
d4850d67
KH
7922 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7923 make_number (coding->produced_char));
df7492f9
KH
7924 UNGCPRO;
7925 CHECK_NATNUM (val);
7926 coding->produced_char += Z - prev_Z;
7927 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7928 }
de79a6a5 7929
df7492f9 7930 if (EQ (dst_object, Qt))
ec6d2bb8 7931 {
df7492f9
KH
7932 coding->dst_object = Fbuffer_string ();
7933 }
7934 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7935 {
7936 set_buffer_internal (XBUFFER (coding->dst_object));
7937 if (dst_bytes < coding->produced)
7938 {
b3bfad50 7939 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7940 if (! destination)
7941 {
065e3595 7942 record_conversion_result (coding,
ebaf11b6 7943 CODING_RESULT_INSUFFICIENT_MEM);
df7492f9
KH
7944 unbind_to (count, Qnil);
7945 return;
7946 }
7947 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7948 move_gap_both (BEGV, BEGV_BYTE);
7949 bcopy (BEGV_ADDR, destination, coding->produced);
7950 coding->destination = destination;
d46c5b12 7951 }
ec6d2bb8 7952 }
b73bfc1c 7953
4776e638
KH
7954 if (saved_pt >= 0)
7955 {
7956 /* This is the case of:
7957 (BUFFERP (src_object) && EQ (src_object, dst_object))
7958 As we have moved PT while replacing the original buffer
7959 contents, we must recover it now. */
7960 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7961 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7962 if (saved_pt < from)
7963 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7964 else if (saved_pt < from + chars)
7965 TEMP_SET_PT_BOTH (from, from_byte);
7966 else if (! NILP (current_buffer->enable_multibyte_characters))
7967 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7968 saved_pt_byte + (coding->produced - bytes));
7969 else
7970 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7971 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7972
7973 if (need_marker_adjustment)
7974 {
7975 struct Lisp_Marker *tail;
7976
7977 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7978 if (tail->need_adjustment)
7979 {
7980 tail->need_adjustment = 0;
7981 if (tail->insertion_type)
7982 {
7983 tail->bytepos = from_byte;
7984 tail->charpos = from;
7985 }
7986 else
7987 {
7988 tail->bytepos = from_byte + coding->produced;
7989 tail->charpos
7990 = (NILP (current_buffer->enable_multibyte_characters)
7991 ? tail->bytepos : from + coding->produced_char);
7992 }
7993 }
7994 }
d46c5b12 7995 }
4776e638 7996
b3bfad50 7997 Vdeactivate_mark = old_deactivate_mark;
065e3595 7998 unbind_to (count, coding->dst_object);
d46c5b12
KH
7999}
8000
d46c5b12 8001
df7492f9
KH
8002void
8003encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
8004 dst_object)
d46c5b12 8005 struct coding_system *coding;
df7492f9
KH
8006 Lisp_Object src_object;
8007 EMACS_INT from, from_byte, to, to_byte;
8008 Lisp_Object dst_object;
d46c5b12 8009{
b73bfc1c 8010 int count = specpdl_ptr - specpdl;
df7492f9
KH
8011 EMACS_INT chars = to - from;
8012 EMACS_INT bytes = to_byte - from_byte;
8013 Lisp_Object attrs;
4776e638 8014 int saved_pt = -1, saved_pt_byte;
64cedb0c 8015 int need_marker_adjustment = 0;
c02d943b 8016 int kill_src_buffer = 0;
b3bfad50 8017 Lisp_Object old_deactivate_mark;
df7492f9 8018
b3bfad50 8019 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
8020
8021 coding->src_object = src_object;
8022 coding->src_chars = chars;
8023 coding->src_bytes = bytes;
8024 coding->src_multibyte = chars < bytes;
8025
8026 attrs = CODING_ID_ATTRS (coding->id);
8027
64cedb0c
KH
8028 if (EQ (src_object, dst_object))
8029 {
8030 struct Lisp_Marker *tail;
8031
8032 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8033 {
8034 tail->need_adjustment
8035 = tail->charpos == (tail->insertion_type ? from : to);
8036 need_marker_adjustment |= tail->need_adjustment;
8037 }
8038 }
8039
df7492f9 8040 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 8041 {
24a73b0a 8042 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
8043 set_buffer_internal (XBUFFER (coding->src_object));
8044 if (STRINGP (src_object))
8045 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
8046 else if (BUFFERP (src_object))
8047 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
8048 else
8049 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 8050
df7492f9
KH
8051 if (EQ (src_object, dst_object))
8052 {
8053 set_buffer_internal (XBUFFER (src_object));
4776e638 8054 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
8055 del_range_both (from, from_byte, to, to_byte, 1);
8056 set_buffer_internal (XBUFFER (coding->src_object));
8057 }
8058
d4850d67
KH
8059 {
8060 Lisp_Object args[3];
b3bfad50 8061 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 8062
b3bfad50
KH
8063 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
8064 old_deactivate_mark);
d4850d67
KH
8065 args[0] = CODING_ATTR_PRE_WRITE (attrs);
8066 args[1] = make_number (BEG);
8067 args[2] = make_number (Z);
8068 safe_call (3, args);
b3bfad50 8069 UNGCPRO;
d4850d67 8070 }
c02d943b
KH
8071 if (XBUFFER (coding->src_object) != current_buffer)
8072 kill_src_buffer = 1;
ac87bbef 8073 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
8074 if (BEG != GPT)
8075 move_gap_both (BEG, BEG_BYTE);
8076 coding->src_chars = Z - BEG;
8077 coding->src_bytes = Z_BYTE - BEG_BYTE;
8078 coding->src_pos = BEG;
8079 coding->src_pos_byte = BEG_BYTE;
8080 coding->src_multibyte = Z < Z_BYTE;
8081 }
8082 else if (STRINGP (src_object))
d46c5b12 8083 {
24a73b0a 8084 code_conversion_save (0, 0);
df7492f9
KH
8085 coding->src_pos = from;
8086 coding->src_pos_byte = from_byte;
b73bfc1c 8087 }
df7492f9 8088 else if (BUFFERP (src_object))
b73bfc1c 8089 {
24a73b0a 8090 code_conversion_save (0, 0);
df7492f9 8091 set_buffer_internal (XBUFFER (src_object));
df7492f9 8092 if (EQ (src_object, dst_object))
d46c5b12 8093 {
4776e638 8094 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
8095 coding->src_object = del_range_1 (from, to, 1, 1);
8096 coding->src_pos = 0;
8097 coding->src_pos_byte = 0;
d46c5b12 8098 }
df7492f9 8099 else
d46c5b12 8100 {
ff0dacd7
KH
8101 if (from < GPT && to >= GPT)
8102 move_gap_both (from, from_byte);
df7492f9
KH
8103 coding->src_pos = from;
8104 coding->src_pos_byte = from_byte;
d46c5b12 8105 }
d46c5b12 8106 }
4776e638 8107 else
24a73b0a 8108 code_conversion_save (0, 0);
d46c5b12 8109
df7492f9 8110 if (BUFFERP (dst_object))
88993dfd 8111 {
df7492f9 8112 coding->dst_object = dst_object;
28f67a95
KH
8113 if (EQ (src_object, dst_object))
8114 {
8115 coding->dst_pos = from;
8116 coding->dst_pos_byte = from_byte;
8117 }
8118 else
8119 {
319a3947
KH
8120 struct buffer *current = current_buffer;
8121
8122 set_buffer_temp (XBUFFER (dst_object));
8123 coding->dst_pos = PT;
8124 coding->dst_pos_byte = PT_BYTE;
8125 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8126 set_buffer_temp (current);
28f67a95 8127 }
df7492f9
KH
8128 coding->dst_multibyte
8129 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 8130 }
df7492f9 8131 else if (EQ (dst_object, Qt))
d46c5b12 8132 {
df7492f9 8133 coding->dst_object = Qnil;
df7492f9 8134 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
8135 if (coding->dst_bytes == 0)
8136 coding->dst_bytes = 1;
8137 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 8138 coding->dst_multibyte = 0;
d46c5b12
KH
8139 }
8140 else
8141 {
df7492f9
KH
8142 coding->dst_object = Qnil;
8143 coding->dst_multibyte = 0;
d46c5b12
KH
8144 }
8145
df7492f9 8146 encode_coding (coding);
d46c5b12 8147
df7492f9 8148 if (EQ (dst_object, Qt))
d46c5b12 8149 {
df7492f9
KH
8150 if (BUFFERP (coding->dst_object))
8151 coding->dst_object = Fbuffer_string ();
8152 else
d46c5b12 8153 {
df7492f9
KH
8154 coding->dst_object
8155 = make_unibyte_string ((char *) coding->destination,
8156 coding->produced);
8157 xfree (coding->destination);
d46c5b12 8158 }
4ed46869 8159 }
d46c5b12 8160
4776e638
KH
8161 if (saved_pt >= 0)
8162 {
8163 /* This is the case of:
8164 (BUFFERP (src_object) && EQ (src_object, dst_object))
8165 As we have moved PT while replacing the original buffer
8166 contents, we must recover it now. */
8167 set_buffer_internal (XBUFFER (src_object));
8168 if (saved_pt < from)
8169 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8170 else if (saved_pt < from + chars)
8171 TEMP_SET_PT_BOTH (from, from_byte);
8172 else if (! NILP (current_buffer->enable_multibyte_characters))
8173 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8174 saved_pt_byte + (coding->produced - bytes));
d46c5b12 8175 else
4776e638
KH
8176 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8177 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
8178
8179 if (need_marker_adjustment)
8180 {
8181 struct Lisp_Marker *tail;
8182
8183 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8184 if (tail->need_adjustment)
8185 {
8186 tail->need_adjustment = 0;
8187 if (tail->insertion_type)
8188 {
8189 tail->bytepos = from_byte;
8190 tail->charpos = from;
8191 }
8192 else
8193 {
8194 tail->bytepos = from_byte + coding->produced;
8195 tail->charpos
8196 = (NILP (current_buffer->enable_multibyte_characters)
8197 ? tail->bytepos : from + coding->produced_char);
8198 }
8199 }
8200 }
4776e638
KH
8201 }
8202
c02d943b
KH
8203 if (kill_src_buffer)
8204 Fkill_buffer (coding->src_object);
b3bfad50
KH
8205
8206 Vdeactivate_mark = old_deactivate_mark;
df7492f9 8207 unbind_to (count, Qnil);
b73bfc1c
KH
8208}
8209
df7492f9 8210
b73bfc1c 8211Lisp_Object
df7492f9 8212preferred_coding_system ()
b73bfc1c 8213{
df7492f9 8214 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 8215
df7492f9 8216 return CODING_ID_NAME (id);
4ed46869
KH
8217}
8218
8219\f
8220#ifdef emacs
1397dc18 8221/*** 8. Emacs Lisp library functions ***/
4ed46869 8222
4ed46869 8223DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 8224 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 8225See the documentation of `define-coding-system' for information
48b0f3ae 8226about coding-system objects. */)
d4a1d553
JB
8227 (object)
8228 Lisp_Object object;
4ed46869 8229{
d4a1d553
JB
8230 if (NILP (object)
8231 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 8232 return Qt;
d4a1d553
JB
8233 if (! SYMBOLP (object)
8234 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
8235 return Qnil;
8236 return Qt;
4ed46869
KH
8237}
8238
9d991de8
RS
8239DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8240 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
8241 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
8242 (prompt)
4ed46869
KH
8243 Lisp_Object prompt;
8244{
e0e989f6 8245 Lisp_Object val;
9d991de8
RS
8246 do
8247 {
4608c386
KH
8248 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8249 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 8250 }
8f924df7 8251 while (SCHARS (val) == 0);
e0e989f6 8252 return (Fintern (val, Qnil));
4ed46869
KH
8253}
8254
9b787f3e 8255DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 8256 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
8257If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8258Ignores case when completing coding systems (all Emacs coding systems
8259are lower-case). */)
48b0f3ae 8260 (prompt, default_coding_system)
9b787f3e 8261 Lisp_Object prompt, default_coding_system;
4ed46869 8262{
f44d27ce 8263 Lisp_Object val;
c7183fb8
GM
8264 int count = SPECPDL_INDEX ();
8265
9b787f3e 8266 if (SYMBOLP (default_coding_system))
57d25e6f 8267 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 8268 specbind (Qcompletion_ignore_case, Qt);
4608c386 8269 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
8270 Qt, Qnil, Qcoding_system_history,
8271 default_coding_system, Qnil);
c7183fb8 8272 unbind_to (count, Qnil);
8f924df7 8273 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
8274}
8275
8276DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8277 1, 1, 0,
48b0f3ae 8278 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
8279If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8280It is valid if it is nil or a symbol defined as a coding system by the
8281function `define-coding-system'. */)
df7492f9 8282 (coding_system)
4ed46869
KH
8283 Lisp_Object coding_system;
8284{
44e8490d
KH
8285 Lisp_Object define_form;
8286
8287 define_form = Fget (coding_system, Qcoding_system_define_form);
8288 if (! NILP (define_form))
8289 {
8290 Fput (coding_system, Qcoding_system_define_form, Qnil);
8291 safe_eval (define_form);
8292 }
4ed46869
KH
8293 if (!NILP (Fcoding_system_p (coding_system)))
8294 return coding_system;
fcad4ec4 8295 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 8296}
df7492f9 8297
3a73fa5d 8298\f
89528eb3
KH
8299/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8300 HIGHEST is nonzero, return the coding system of the highest
ad1746f5 8301 priority among the detected coding systems. Otherwise return a
89528eb3
KH
8302 list of detected coding systems sorted by their priorities. If
8303 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8304 multibyte form but contains only ASCII and eight-bit chars.
8305 Otherwise, the bytes are raw bytes.
8306
8307 CODING-SYSTEM controls the detection as below:
8308
8309 If it is nil, detect both text-format and eol-format. If the
8310 text-format part of CODING-SYSTEM is already specified
8311 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
8312 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8313 detect only text-format. */
8314
d46c5b12 8315Lisp_Object
24a73b0a
KH
8316detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8317 coding_system)
8f924df7 8318 const unsigned char *src;
13818c30
SM
8319 EMACS_INT src_chars, src_bytes;
8320 int highest;
0a28aafb 8321 int multibytep;
df7492f9 8322 Lisp_Object coding_system;
4ed46869 8323{
8f924df7 8324 const unsigned char *src_end = src + src_bytes;
df7492f9 8325 Lisp_Object attrs, eol_type;
4533845d 8326 Lisp_Object val = Qnil;
df7492f9 8327 struct coding_system coding;
89528eb3 8328 int id;
ff0dacd7 8329 struct coding_detection_info detect_info;
24a73b0a 8330 enum coding_category base_category;
2f3cbb32 8331 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 8332
df7492f9
KH
8333 if (NILP (coding_system))
8334 coding_system = Qundecided;
8335 setup_coding_system (coding_system, &coding);
8336 attrs = CODING_ID_ATTRS (coding.id);
8337 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 8338 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 8339
df7492f9 8340 coding.source = src;
24a73b0a 8341 coding.src_chars = src_chars;
df7492f9
KH
8342 coding.src_bytes = src_bytes;
8343 coding.src_multibyte = multibytep;
8344 coding.consumed = 0;
89528eb3 8345 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 8346 coding.head_ascii = 0;
d46c5b12 8347
ff0dacd7 8348 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 8349
89528eb3 8350 /* At first, detect text-format if necessary. */
24a73b0a
KH
8351 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8352 if (base_category == coding_category_undecided)
4ed46869 8353 {
ff0dacd7
KH
8354 enum coding_category category;
8355 struct coding_system *this;
8356 int c, i;
88993dfd 8357
24a73b0a 8358 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 8359 for (; src < src_end; src++)
4ed46869 8360 {
df7492f9 8361 c = *src;
6cb21a4f 8362 if (c & 0x80)
6cb21a4f 8363 {
2f3cbb32 8364 eight_bit_found = 1;
2f3cbb32
KH
8365 if (null_byte_found)
8366 break;
8367 }
c0e16b14 8368 else if (c < 0x20)
2f3cbb32
KH
8369 {
8370 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8371 && ! inhibit_iso_escape_detection
8372 && ! detect_info.checked)
6cb21a4f 8373 {
2f3cbb32
KH
8374 if (detect_coding_iso_2022 (&coding, &detect_info))
8375 {
8376 /* We have scanned the whole data. */
8377 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
8378 {
8379 /* We didn't find an 8-bit code. We may
8380 have found a null-byte, but it's very
8381 rare that a binary file confirm to
8382 ISO-2022. */
8383 src = src_end;
8384 coding.head_ascii = src - coding.source;
8385 }
8386 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
8387 break;
8388 }
8389 }
97b1b294 8390 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
8391 {
8392 null_byte_found = 1;
8393 if (eight_bit_found)
8394 break;
6cb21a4f 8395 }
c006c0c8
KH
8396 if (! eight_bit_found)
8397 coding.head_ascii++;
6cb21a4f 8398 }
c006c0c8 8399 else if (! eight_bit_found)
c0e16b14 8400 coding.head_ascii++;
4ed46869 8401 }
88993dfd 8402
2f3cbb32
KH
8403 if (null_byte_found || eight_bit_found
8404 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
8405 || detect_info.found)
8406 {
2f3cbb32 8407 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
8408 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
8409 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 8410 {
6cb21a4f 8411 category = coding_priorities[i];
c7266f4a 8412 this = coding_categories + category;
6cb21a4f 8413 if (detect_info.found & (1 << category))
ff0dacd7
KH
8414 break;
8415 }
6cb21a4f 8416 else
2f3cbb32
KH
8417 {
8418 if (null_byte_found)
8419 {
8420 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8421 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8422 }
8423 for (i = 0; i < coding_category_raw_text; i++)
8424 {
8425 category = coding_priorities[i];
8426 this = coding_categories + category;
6cb21a4f 8427
2f3cbb32
KH
8428 if (this->id < 0)
8429 {
8430 /* No coding system of this category is defined. */
8431 detect_info.rejected |= (1 << category);
8432 }
8433 else if (category >= coding_category_raw_text)
8434 continue;
8435 else if (detect_info.checked & (1 << category))
8436 {
8437 if (highest
8438 && (detect_info.found & (1 << category)))
6cb21a4f 8439 break;
2f3cbb32
KH
8440 }
8441 else if ((*(this->detector)) (&coding, &detect_info)
8442 && highest
8443 && (detect_info.found & (1 << category)))
8444 {
8445 if (category == coding_category_utf_16_auto)
8446 {
8447 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8448 category = coding_category_utf_16_le;
8449 else
8450 category = coding_category_utf_16_be;
8451 }
8452 break;
8453 }
8454 }
8455 }
6cb21a4f 8456 }
ec6d2bb8 8457
4cddb209
KH
8458 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8459 || null_byte_found)
ec6d2bb8 8460 {
ff0dacd7 8461 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 8462 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
8463 val = Fcons (make_number (id), Qnil);
8464 }
ff0dacd7 8465 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 8466 {
ff0dacd7 8467 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
8468 id = coding_categories[coding_category_undecided].id;
8469 val = Fcons (make_number (id), Qnil);
8470 }
8471 else if (highest)
8472 {
ff0dacd7 8473 if (detect_info.found)
ec6d2bb8 8474 {
ff0dacd7
KH
8475 detect_info.found = 1 << category;
8476 val = Fcons (make_number (this->id), Qnil);
8477 }
8478 else
8479 for (i = 0; i < coding_category_raw_text; i++)
8480 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8481 {
8482 detect_info.found = 1 << coding_priorities[i];
8483 id = coding_categories[coding_priorities[i]].id;
8484 val = Fcons (make_number (id), Qnil);
8485 break;
8486 }
8487 }
89528eb3
KH
8488 else
8489 {
ff0dacd7
KH
8490 int mask = detect_info.rejected | detect_info.found;
8491 int found = 0;
ec6d2bb8 8492
89528eb3 8493 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8494 {
8495 category = coding_priorities[i];
8496 if (! (mask & (1 << category)))
ec6d2bb8 8497 {
ff0dacd7
KH
8498 found |= 1 << category;
8499 id = coding_categories[category].id;
c7266f4a
KH
8500 if (id >= 0)
8501 val = Fcons (make_number (id), val);
ff0dacd7
KH
8502 }
8503 }
8504 for (i = coding_category_raw_text - 1; i >= 0; i--)
8505 {
8506 category = coding_priorities[i];
8507 if (detect_info.found & (1 << category))
8508 {
8509 id = coding_categories[category].id;
8510 val = Fcons (make_number (id), val);
ec6d2bb8 8511 }
ec6d2bb8 8512 }
ff0dacd7 8513 detect_info.found |= found;
ec6d2bb8 8514 }
ec6d2bb8 8515 }
a470d443
KH
8516 else if (base_category == coding_category_utf_8_auto)
8517 {
8518 if (detect_coding_utf_8 (&coding, &detect_info))
8519 {
8520 struct coding_system *this;
8521
8522 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8523 this = coding_categories + coding_category_utf_8_sig;
8524 else
8525 this = coding_categories + coding_category_utf_8_nosig;
8526 val = Fcons (make_number (this->id), Qnil);
8527 }
8528 }
24a73b0a
KH
8529 else if (base_category == coding_category_utf_16_auto)
8530 {
8531 if (detect_coding_utf_16 (&coding, &detect_info))
8532 {
24a73b0a
KH
8533 struct coding_system *this;
8534
8535 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8536 this = coding_categories + coding_category_utf_16_le;
8537 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8538 this = coding_categories + coding_category_utf_16_be;
8539 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8540 this = coding_categories + coding_category_utf_16_be_nosig;
8541 else
8542 this = coding_categories + coding_category_utf_16_le_nosig;
8543 val = Fcons (make_number (this->id), Qnil);
8544 }
8545 }
df7492f9
KH
8546 else
8547 {
ff0dacd7 8548 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8549 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8550 }
df7492f9 8551
89528eb3 8552 /* Then, detect eol-format if necessary. */
df7492f9 8553 {
4533845d 8554 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8555 Lisp_Object tail;
8556
89528eb3
KH
8557 if (VECTORP (eol_type))
8558 {
ff0dacd7 8559 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8560 {
8561 if (null_byte_found)
8562 normal_eol = EOL_SEEN_LF;
8563 else
8564 normal_eol = detect_eol (coding.source, src_bytes,
8565 coding_category_raw_text);
8566 }
ff0dacd7
KH
8567 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8568 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8569 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8570 coding_category_utf_16_be);
ff0dacd7
KH
8571 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8572 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8573 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8574 coding_category_utf_16_le);
8575 }
8576 else
8577 {
8578 if (EQ (eol_type, Qunix))
8579 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8580 else if (EQ (eol_type, Qdos))
8581 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8582 else
8583 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8584 }
8585
df7492f9
KH
8586 for (tail = val; CONSP (tail); tail = XCDR (tail))
8587 {
89528eb3 8588 enum coding_category category;
df7492f9 8589 int this_eol;
89528eb3
KH
8590
8591 id = XINT (XCAR (tail));
8592 attrs = CODING_ID_ATTRS (id);
8593 category = XINT (CODING_ATTR_CATEGORY (attrs));
8594 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8595 if (VECTORP (eol_type))
8596 {
89528eb3
KH
8597 if (category == coding_category_utf_16_be
8598 || category == coding_category_utf_16_be_nosig)
8599 this_eol = utf_16_be_eol;
8600 else if (category == coding_category_utf_16_le
8601 || category == coding_category_utf_16_le_nosig)
8602 this_eol = utf_16_le_eol;
df7492f9 8603 else
89528eb3
KH
8604 this_eol = normal_eol;
8605
df7492f9
KH
8606 if (this_eol == EOL_SEEN_LF)
8607 XSETCAR (tail, AREF (eol_type, 0));
8608 else if (this_eol == EOL_SEEN_CRLF)
8609 XSETCAR (tail, AREF (eol_type, 1));
8610 else if (this_eol == EOL_SEEN_CR)
8611 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8612 else
8613 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8614 }
89528eb3
KH
8615 else
8616 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8617 }
8618 }
ec6d2bb8 8619
4533845d 8620 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8621}
8622
ec6d2bb8 8623
d46c5b12
KH
8624DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8625 2, 3, 0,
48b0f3ae
PJ
8626 doc: /* Detect coding system of the text in the region between START and END.
8627Return a list of possible coding systems ordered by priority.
b811c52b
KH
8628The coding systems to try and their priorities follows what
8629the function `coding-system-priority-list' (which see) returns.
ec6d2bb8 8630
12e0131a 8631If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8632characters as ESC), it returns a list of single element `undecided'
8633or its subsidiary coding system according to a detected end-of-line
8634format.
ec6d2bb8 8635
48b0f3ae
PJ
8636If optional argument HIGHEST is non-nil, return the coding system of
8637highest priority. */)
8638 (start, end, highest)
d46c5b12
KH
8639 Lisp_Object start, end, highest;
8640{
8641 int from, to;
8642 int from_byte, to_byte;
ec6d2bb8 8643
b7826503
PJ
8644 CHECK_NUMBER_COERCE_MARKER (start);
8645 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8646
d46c5b12
KH
8647 validate_region (&start, &end);
8648 from = XINT (start), to = XINT (end);
8649 from_byte = CHAR_TO_BYTE (from);
8650 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8651
d46c5b12
KH
8652 if (from < GPT && to >= GPT)
8653 move_gap_both (to, to_byte);
c210f766 8654
d46c5b12 8655 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8656 to - from, to_byte - from_byte,
0a28aafb
KH
8657 !NILP (highest),
8658 !NILP (current_buffer
df7492f9
KH
8659 ->enable_multibyte_characters),
8660 Qnil);
ec6d2bb8
KH
8661}
8662
d46c5b12
KH
8663DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8664 1, 2, 0,
48b0f3ae
PJ
8665 doc: /* Detect coding system of the text in STRING.
8666Return a list of possible coding systems ordered by priority.
67ceab9d
KH
8667The coding systems to try and their priorities follows what
8668the function `coding-system-priority-list' (which see) returns.
fb88bf2d 8669
12e0131a 8670If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8671characters as ESC), it returns a list of single element `undecided'
8672or its subsidiary coding system according to a detected end-of-line
8673format.
d46c5b12 8674
48b0f3ae
PJ
8675If optional argument HIGHEST is non-nil, return the coding system of
8676highest priority. */)
8677 (string, highest)
d46c5b12
KH
8678 Lisp_Object string, highest;
8679{
b7826503 8680 CHECK_STRING (string);
b73bfc1c 8681
24a73b0a
KH
8682 return detect_coding_system (SDATA (string),
8683 SCHARS (string), SBYTES (string),
8f924df7 8684 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8685 Qnil);
4ed46869 8686}
4ed46869 8687
b73bfc1c 8688
df7492f9
KH
8689static INLINE int
8690char_encodable_p (c, attrs)
8691 int c;
8692 Lisp_Object attrs;
05e6f5dc 8693{
df7492f9 8694 Lisp_Object tail;
df7492f9 8695 struct charset *charset;
7d64c6ad 8696 Lisp_Object translation_table;
d46c5b12 8697
7d64c6ad 8698 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8699 if (! NILP (translation_table))
7d64c6ad 8700 c = translate_char (translation_table, c);
df7492f9
KH
8701 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8702 CONSP (tail); tail = XCDR (tail))
e133c8fa 8703 {
df7492f9
KH
8704 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8705 if (CHAR_CHARSET_P (c, charset))
8706 break;
e133c8fa 8707 }
df7492f9 8708 return (! NILP (tail));
05e6f5dc 8709}
83fa074f 8710
fb88bf2d 8711
df7492f9
KH
8712/* Return a list of coding systems that safely encode the text between
8713 START and END. If EXCLUDE is non-nil, it is a list of coding
8714 systems not to check. The returned list doesn't contain any such
48468dac 8715 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8716 unibyte, return t. */
e077cc80 8717
df7492f9
KH
8718DEFUN ("find-coding-systems-region-internal",
8719 Ffind_coding_systems_region_internal,
8720 Sfind_coding_systems_region_internal, 2, 3, 0,
8721 doc: /* Internal use only. */)
8722 (start, end, exclude)
8723 Lisp_Object start, end, exclude;
8724{
8725 Lisp_Object coding_attrs_list, safe_codings;
8726 EMACS_INT start_byte, end_byte;
7c78e542 8727 const unsigned char *p, *pbeg, *pend;
df7492f9 8728 int c;
0e727afa 8729 Lisp_Object tail, elt, work_table;
d46c5b12 8730
df7492f9
KH
8731 if (STRINGP (start))
8732 {
8733 if (!STRING_MULTIBYTE (start)
8f924df7 8734 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8735 return Qt;
8736 start_byte = 0;
8f924df7 8737 end_byte = SBYTES (start);
df7492f9
KH
8738 }
8739 else
d46c5b12 8740 {
df7492f9
KH
8741 CHECK_NUMBER_COERCE_MARKER (start);
8742 CHECK_NUMBER_COERCE_MARKER (end);
8743 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8744 args_out_of_range (start, end);
8745 if (NILP (current_buffer->enable_multibyte_characters))
8746 return Qt;
8747 start_byte = CHAR_TO_BYTE (XINT (start));
8748 end_byte = CHAR_TO_BYTE (XINT (end));
8749 if (XINT (end) - XINT (start) == end_byte - start_byte)
8750 return Qt;
d46c5b12 8751
e1c23804 8752 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8753 {
e1c23804
DL
8754 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8755 move_gap_both (XINT (start), start_byte);
df7492f9 8756 else
e1c23804 8757 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8758 }
8759 }
8760
df7492f9
KH
8761 coding_attrs_list = Qnil;
8762 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8763 if (NILP (exclude)
8764 || NILP (Fmemq (XCAR (tail), exclude)))
8765 {
8766 Lisp_Object attrs;
d46c5b12 8767
df7492f9
KH
8768 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8769 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8770 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8771 {
8772 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8773 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8774 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8775 }
df7492f9 8776 }
d46c5b12 8777
df7492f9 8778 if (STRINGP (start))
8f924df7 8779 p = pbeg = SDATA (start);
df7492f9
KH
8780 else
8781 p = pbeg = BYTE_POS_ADDR (start_byte);
8782 pend = p + (end_byte - start_byte);
b843d1ae 8783
df7492f9
KH
8784 while (p < pend && ASCII_BYTE_P (*p)) p++;
8785 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8786
0e727afa 8787 work_table = Fmake_char_table (Qnil, Qnil);
05e6f5dc 8788 while (p < pend)
72d1a715 8789 {
df7492f9
KH
8790 if (ASCII_BYTE_P (*p))
8791 p++;
72d1a715
RS
8792 else
8793 {
df7492f9 8794 c = STRING_CHAR_ADVANCE (p);
0e727afa
YM
8795 if (!NILP (char_table_ref (work_table, c)))
8796 /* This character was already checked. Ignore it. */
8797 continue;
12410ef1 8798
df7492f9
KH
8799 charset_map_loaded = 0;
8800 for (tail = coding_attrs_list; CONSP (tail);)
8801 {
8802 elt = XCAR (tail);
8803 if (NILP (elt))
8804 tail = XCDR (tail);
8805 else if (char_encodable_p (c, elt))
8806 tail = XCDR (tail);
8807 else if (CONSP (XCDR (tail)))
8808 {
8809 XSETCAR (tail, XCAR (XCDR (tail)));
8810 XSETCDR (tail, XCDR (XCDR (tail)));
8811 }
8812 else
8813 {
8814 XSETCAR (tail, Qnil);
8815 tail = XCDR (tail);
8816 }
8817 }
8818 if (charset_map_loaded)
8819 {
8820 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8821
df7492f9 8822 if (STRINGP (start))
8f924df7 8823 pbeg = SDATA (start);
df7492f9
KH
8824 else
8825 pbeg = BYTE_POS_ADDR (start_byte);
8826 p = pbeg + p_offset;
8827 pend = pbeg + pend_offset;
8828 }
0e727afa 8829 char_table_set (work_table, c, Qt);
df7492f9 8830 }
ec6d2bb8 8831 }
fb88bf2d 8832
988b3759 8833 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8834 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8835 if (! NILP (XCAR (tail)))
8836 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8837
05e6f5dc
KH
8838 return safe_codings;
8839}
4956c225 8840
d46c5b12 8841
8f924df7
KH
8842DEFUN ("unencodable-char-position", Funencodable_char_position,
8843 Sunencodable_char_position, 3, 5, 0,
8844 doc: /*
8845Return position of first un-encodable character in a region.
d4a1d553 8846START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8847encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8848
8f924df7
KH
8849If optional 4th argument COUNT is non-nil, it specifies at most how
8850many un-encodable characters to search. In this case, the value is a
8851list of positions.
d46c5b12 8852
8f924df7
KH
8853If optional 5th argument STRING is non-nil, it is a string to search
8854for un-encodable characters. In that case, START and END are indexes
8855to the string. */)
8856 (start, end, coding_system, count, string)
8857 Lisp_Object start, end, coding_system, count, string;
8858{
8859 int n;
8860 struct coding_system coding;
7d64c6ad 8861 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8862 Lisp_Object positions;
8863 int from, to;
8864 const unsigned char *p, *stop, *pend;
8865 int ascii_compatible;
fb88bf2d 8866
8f924df7
KH
8867 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8868 attrs = CODING_ID_ATTRS (coding.id);
8869 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8870 return Qnil;
8871 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8872 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8873 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8874
8f924df7
KH
8875 if (NILP (string))
8876 {
8877 validate_region (&start, &end);
8878 from = XINT (start);
8879 to = XINT (end);
8880 if (NILP (current_buffer->enable_multibyte_characters)
8881 || (ascii_compatible
8882 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8883 return Qnil;
8884 p = CHAR_POS_ADDR (from);
8885 pend = CHAR_POS_ADDR (to);
8886 if (from < GPT && to >= GPT)
8887 stop = GPT_ADDR;
8888 else
8889 stop = pend;
8890 }
8891 else
8892 {
8893 CHECK_STRING (string);
8894 CHECK_NATNUM (start);
8895 CHECK_NATNUM (end);
8896 from = XINT (start);
8897 to = XINT (end);
8898 if (from > to
8899 || to > SCHARS (string))
8900 args_out_of_range_3 (string, start, end);
8901 if (! STRING_MULTIBYTE (string))
8902 return Qnil;
8903 p = SDATA (string) + string_char_to_byte (string, from);
8904 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8905 if (ascii_compatible && (to - from) == (pend - p))
8906 return Qnil;
8907 }
f2558efd 8908
8f924df7
KH
8909 if (NILP (count))
8910 n = 1;
8911 else
b73bfc1c 8912 {
8f924df7
KH
8913 CHECK_NATNUM (count);
8914 n = XINT (count);
b73bfc1c
KH
8915 }
8916
8f924df7
KH
8917 positions = Qnil;
8918 while (1)
d46c5b12 8919 {
8f924df7 8920 int c;
ec6d2bb8 8921
8f924df7
KH
8922 if (ascii_compatible)
8923 while (p < stop && ASCII_BYTE_P (*p))
8924 p++, from++;
8925 if (p >= stop)
0e79d667 8926 {
8f924df7
KH
8927 if (p >= pend)
8928 break;
8929 stop = pend;
8930 p = GAP_END_ADDR;
0e79d667 8931 }
ec6d2bb8 8932
8f924df7
KH
8933 c = STRING_CHAR_ADVANCE (p);
8934 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8935 && ! char_charset (translate_char (translation_table, c),
8936 charset_list, NULL))
ec6d2bb8 8937 {
8f924df7
KH
8938 positions = Fcons (make_number (from), positions);
8939 n--;
8940 if (n == 0)
8941 break;
ec6d2bb8
KH
8942 }
8943
8f924df7
KH
8944 from++;
8945 }
d46c5b12 8946
8f924df7
KH
8947 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8948}
d46c5b12 8949
d46c5b12 8950
df7492f9
KH
8951DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8952 Scheck_coding_systems_region, 3, 3, 0,
8953 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8954
df7492f9
KH
8955START and END are buffer positions specifying the region.
8956CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8957
df7492f9 8958The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8959CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8960whole region, POS0, POS1, ... are buffer positions where non-encodable
8961characters are found.
93dec019 8962
df7492f9
KH
8963If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8964value is nil.
93dec019 8965
df7492f9
KH
8966START may be a string. In that case, check if the string is
8967encodable, and the value contains indices to the string instead of
5704f39a
KH
8968buffer positions. END is ignored.
8969
4c1958f4 8970If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8971is nil. */)
df7492f9
KH
8972 (start, end, coding_system_list)
8973 Lisp_Object start, end, coding_system_list;
05e6f5dc 8974{
df7492f9
KH
8975 Lisp_Object list;
8976 EMACS_INT start_byte, end_byte;
8977 int pos;
7c78e542 8978 const unsigned char *p, *pbeg, *pend;
df7492f9 8979 int c;
7d64c6ad 8980 Lisp_Object tail, elt, attrs;
70ad9fc4 8981
05e6f5dc
KH
8982 if (STRINGP (start))
8983 {
df7492f9 8984 if (!STRING_MULTIBYTE (start)
4c1958f4 8985 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8986 return Qnil;
8987 start_byte = 0;
8f924df7 8988 end_byte = SBYTES (start);
df7492f9 8989 pos = 0;
d46c5b12 8990 }
05e6f5dc 8991 else
b73bfc1c 8992 {
b7826503
PJ
8993 CHECK_NUMBER_COERCE_MARKER (start);
8994 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8995 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8996 args_out_of_range (start, end);
8997 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8998 return Qnil;
8999 start_byte = CHAR_TO_BYTE (XINT (start));
9000 end_byte = CHAR_TO_BYTE (XINT (end));
9001 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 9002 return Qnil;
df7492f9 9003
e1c23804 9004 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 9005 {
e1c23804
DL
9006 if ((GPT - XINT (start)) < (XINT (end) - GPT))
9007 move_gap_both (XINT (start), start_byte);
df7492f9 9008 else
e1c23804 9009 move_gap_both (XINT (end), end_byte);
b73bfc1c 9010 }
e1c23804 9011 pos = XINT (start);
b73bfc1c 9012 }
7553d0e1 9013
df7492f9
KH
9014 list = Qnil;
9015 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 9016 {
df7492f9 9017 elt = XCAR (tail);
7d64c6ad 9018 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
9019 ASET (attrs, coding_attr_trans_tbl,
9020 get_translation_table (attrs, 1, NULL));
7d64c6ad 9021 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
9022 }
9023
df7492f9 9024 if (STRINGP (start))
8f924df7 9025 p = pbeg = SDATA (start);
72d1a715 9026 else
df7492f9
KH
9027 p = pbeg = BYTE_POS_ADDR (start_byte);
9028 pend = p + (end_byte - start_byte);
4ed46869 9029
df7492f9
KH
9030 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
9031 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 9032
df7492f9 9033 while (p < pend)
d46c5b12 9034 {
df7492f9
KH
9035 if (ASCII_BYTE_P (*p))
9036 p++;
e133c8fa 9037 else
05e6f5dc 9038 {
df7492f9
KH
9039 c = STRING_CHAR_ADVANCE (p);
9040
9041 charset_map_loaded = 0;
9042 for (tail = list; CONSP (tail); tail = XCDR (tail))
9043 {
9044 elt = XCDR (XCAR (tail));
9045 if (! char_encodable_p (c, XCAR (elt)))
9046 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
9047 }
9048 if (charset_map_loaded)
9049 {
9050 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
9051
9052 if (STRINGP (start))
8f924df7 9053 pbeg = SDATA (start);
df7492f9
KH
9054 else
9055 pbeg = BYTE_POS_ADDR (start_byte);
9056 p = pbeg + p_offset;
9057 pend = pbeg + pend_offset;
9058 }
05e6f5dc 9059 }
df7492f9 9060 pos++;
d46c5b12 9061 }
4ed46869 9062
df7492f9
KH
9063 tail = list;
9064 list = Qnil;
9065 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 9066 {
df7492f9
KH
9067 elt = XCAR (tail);
9068 if (CONSP (XCDR (XCDR (elt))))
9069 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9070 list);
ec6d2bb8 9071 }
2b4f9037 9072
df7492f9 9073 return list;
d46c5b12
KH
9074}
9075
3fd9494b 9076
b73bfc1c 9077Lisp_Object
df7492f9
KH
9078code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9079 Lisp_Object start, end, coding_system, dst_object;
9080 int encodep, norecord;
4ed46869 9081{
3a73fa5d 9082 struct coding_system coding;
df7492f9
KH
9083 EMACS_INT from, from_byte, to, to_byte;
9084 Lisp_Object src_object;
4ed46869 9085
b7826503
PJ
9086 CHECK_NUMBER_COERCE_MARKER (start);
9087 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
9088 if (NILP (coding_system))
9089 coding_system = Qno_conversion;
9090 else
9091 CHECK_CODING_SYSTEM (coding_system);
9092 src_object = Fcurrent_buffer ();
9093 if (NILP (dst_object))
9094 dst_object = src_object;
9095 else if (! EQ (dst_object, Qt))
9096 CHECK_BUFFER (dst_object);
3a73fa5d 9097
d46c5b12
KH
9098 validate_region (&start, &end);
9099 from = XFASTINT (start);
df7492f9 9100 from_byte = CHAR_TO_BYTE (from);
d46c5b12 9101 to = XFASTINT (end);
df7492f9 9102 to_byte = CHAR_TO_BYTE (to);
764ca8da 9103
df7492f9
KH
9104 setup_coding_system (coding_system, &coding);
9105 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 9106
df7492f9
KH
9107 if (encodep)
9108 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9109 dst_object);
9110 else
9111 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9112 dst_object);
9113 if (! norecord)
9114 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 9115
df7492f9
KH
9116 return (BUFFERP (dst_object)
9117 ? make_number (coding.produced_char)
9118 : coding.dst_object);
4031e2bf 9119}
78108bcd 9120
4ed46869 9121
4031e2bf 9122DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 9123 3, 4, "r\nzCoding system: ",
48b0f3ae 9124 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
9125When called from a program, takes four arguments:
9126 START, END, CODING-SYSTEM, and DESTINATION.
9127START and END are buffer positions.
8844fa83 9128
df7492f9 9129Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 9130If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
9131If buffer, the decoded text is inserted in that buffer after point (point
9132does not move).
446dcd75 9133In those cases, the length of the decoded text is returned.
319a3947 9134If DESTINATION is t, the decoded text is returned.
8844fa83 9135
48b0f3ae
PJ
9136This function sets `last-coding-system-used' to the precise coding system
9137used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9138not fully specified.) */)
df7492f9
KH
9139 (start, end, coding_system, destination)
9140 Lisp_Object start, end, coding_system, destination;
4031e2bf 9141{
df7492f9 9142 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 9143}
8844fa83 9144
3a73fa5d 9145DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
9146 3, 4, "r\nzCoding system: ",
9147 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
9148When called from a program, takes four arguments:
9149 START, END, CODING-SYSTEM and DESTINATION.
9150START and END are buffer positions.
d46c5b12 9151
df7492f9
KH
9152Optional 4th arguments DESTINATION specifies where the encoded text goes.
9153If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
9154If buffer, the encoded text is inserted in that buffer after point (point
9155does not move).
446dcd75 9156In those cases, the length of the encoded text is returned.
319a3947 9157If DESTINATION is t, the encoded text is returned.
2391eaa4 9158
48b0f3ae
PJ
9159This function sets `last-coding-system-used' to the precise coding system
9160used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9161not fully specified.) */)
df7492f9
KH
9162 (start, end, coding_system, destination)
9163 Lisp_Object start, end, coding_system, destination;
3a73fa5d 9164{
df7492f9 9165 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
9166}
9167
9168Lisp_Object
df7492f9
KH
9169code_convert_string (string, coding_system, dst_object,
9170 encodep, nocopy, norecord)
9171 Lisp_Object string, coding_system, dst_object;
9172 int encodep, nocopy, norecord;
b73bfc1c 9173{
4031e2bf 9174 struct coding_system coding;
df7492f9 9175 EMACS_INT chars, bytes;
ec6d2bb8 9176
b7826503 9177 CHECK_STRING (string);
d46c5b12 9178 if (NILP (coding_system))
4956c225 9179 {
df7492f9
KH
9180 if (! norecord)
9181 Vlast_coding_system_used = Qno_conversion;
9182 if (NILP (dst_object))
9183 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 9184 }
b73bfc1c 9185
df7492f9
KH
9186 if (NILP (coding_system))
9187 coding_system = Qno_conversion;
9188 else
9189 CHECK_CODING_SYSTEM (coding_system);
9190 if (NILP (dst_object))
9191 dst_object = Qt;
9192 else if (! EQ (dst_object, Qt))
9193 CHECK_BUFFER (dst_object);
73be902c 9194
df7492f9 9195 setup_coding_system (coding_system, &coding);
d46c5b12 9196 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
9197 chars = SCHARS (string);
9198 bytes = SBYTES (string);
df7492f9
KH
9199 if (encodep)
9200 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9201 else
9202 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9203 if (! norecord)
9204 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 9205
df7492f9
KH
9206 return (BUFFERP (dst_object)
9207 ? make_number (coding.produced_char)
9208 : coding.dst_object);
4ed46869 9209}
73be902c 9210
b73bfc1c 9211
ecec61c1 9212/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 9213 Do not set Vlast_coding_system_used.
4ed46869 9214
ec6d2bb8
KH
9215 This function is called only from macros DECODE_FILE and
9216 ENCODE_FILE, thus we ignore character composition. */
4ed46869 9217
ecec61c1
KH
9218Lisp_Object
9219code_convert_string_norecord (string, coding_system, encodep)
9220 Lisp_Object string, coding_system;
9221 int encodep;
4ed46869 9222{
0be8721c 9223 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
9224}
9225
4ed46869 9226
df7492f9
KH
9227DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9228 2, 4, 0,
9229 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9230
9231Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9232if the decoding operation is trivial.
ecec61c1 9233
d4a1d553 9234Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
9235inserted in that buffer after point (point does not move). In this
9236case, the return value is the length of the decoded text.
ecec61c1 9237
df7492f9
KH
9238This function sets `last-coding-system-used' to the precise coding system
9239used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 9240not fully specified.) */)
df7492f9
KH
9241 (string, coding_system, nocopy, buffer)
9242 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9243{
df7492f9
KH
9244 return code_convert_string (string, coding_system, buffer,
9245 0, ! NILP (nocopy), 0);
4ed46869
KH
9246}
9247
df7492f9
KH
9248DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9249 2, 4, 0,
9250 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9251
9252Optional third arg NOCOPY non-nil means it is OK to return STRING
9253itself if the encoding operation is trivial.
9254
d4a1d553 9255Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
9256inserted in that buffer after point (point does not move). In this
9257case, the return value is the length of the encoded text.
df7492f9
KH
9258
9259This function sets `last-coding-system-used' to the precise coding system
9260used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9261not fully specified.) */)
9262 (string, coding_system, nocopy, buffer)
9263 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9264{
df7492f9 9265 return code_convert_string (string, coding_system, buffer,
c197f191 9266 1, ! NILP (nocopy), 1);
4ed46869 9267}
df7492f9 9268
3a73fa5d 9269\f
4ed46869 9270DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
9271 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9272Return the corresponding character. */)
9273 (code)
4ed46869 9274 Lisp_Object code;
4ed46869 9275{
df7492f9
KH
9276 Lisp_Object spec, attrs, val;
9277 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9278 int c;
4ed46869 9279
df7492f9
KH
9280 CHECK_NATNUM (code);
9281 c = XFASTINT (code);
9282 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9283 attrs = AREF (spec, 0);
4ed46869 9284
df7492f9
KH
9285 if (ASCII_BYTE_P (c)
9286 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9287 return code;
4ed46869 9288
df7492f9
KH
9289 val = CODING_ATTR_CHARSET_LIST (attrs);
9290 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
9291 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9292 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 9293
df7492f9
KH
9294 if (c <= 0x7F)
9295 charset = charset_roman;
9296 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 9297 {
df7492f9
KH
9298 charset = charset_kana;
9299 c -= 0x80;
4ed46869 9300 }
55ab7be3 9301 else
4ed46869 9302 {
004068e4 9303 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
9304
9305 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9306 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9307 error ("Invalid code: %d", code);
9308 SJIS_TO_JIS (c);
9309 charset = charset_kanji;
4ed46869 9310 }
df7492f9
KH
9311 c = DECODE_CHAR (charset, c);
9312 if (c < 0)
9313 error ("Invalid code: %d", code);
9314 return make_number (c);
93dec019 9315}
4ed46869 9316
48b0f3ae 9317
4ed46869 9318DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 9319 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
9320Return the corresponding code in SJIS. */)
9321 (ch)
df7492f9 9322 Lisp_Object ch;
4ed46869 9323{
df7492f9
KH
9324 Lisp_Object spec, attrs, charset_list;
9325 int c;
9326 struct charset *charset;
9327 unsigned code;
48b0f3ae 9328
df7492f9
KH
9329 CHECK_CHARACTER (ch);
9330 c = XFASTINT (ch);
9331 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9332 attrs = AREF (spec, 0);
9333
9334 if (ASCII_CHAR_P (c)
9335 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9336 return ch;
9337
9338 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9339 charset = char_charset (c, charset_list, &code);
9340 if (code == CHARSET_INVALID_CODE (charset))
9341 error ("Can't encode by shift_jis encoding: %d", c);
9342 JIS_TO_SJIS (code);
9343
9344 return make_number (code);
4ed46869
KH
9345}
9346
9347DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
9348 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9349Return the corresponding character. */)
9350 (code)
4ed46869 9351 Lisp_Object code;
d46c5b12 9352{
df7492f9
KH
9353 Lisp_Object spec, attrs, val;
9354 struct charset *charset_roman, *charset_big5, *charset;
9355 int c;
6289dd10 9356
df7492f9
KH
9357 CHECK_NATNUM (code);
9358 c = XFASTINT (code);
9359 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9360 attrs = AREF (spec, 0);
4ed46869 9361
df7492f9
KH
9362 if (ASCII_BYTE_P (c)
9363 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9364 return code;
6289dd10 9365
df7492f9
KH
9366 val = CODING_ATTR_CHARSET_LIST (attrs);
9367 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9368 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 9369
df7492f9
KH
9370 if (c <= 0x7F)
9371 charset = charset_roman;
c28a9453
KH
9372 else
9373 {
df7492f9
KH
9374 int b1 = c >> 8, b2 = c & 0x7F;
9375 if (b1 < 0xA1 || b1 > 0xFE
9376 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9377 error ("Invalid code: %d", code);
9378 charset = charset_big5;
c28a9453 9379 }
df7492f9
KH
9380 c = DECODE_CHAR (charset, (unsigned )c);
9381 if (c < 0)
9382 error ("Invalid code: %d", code);
9383 return make_number (c);
d46c5b12 9384}
6289dd10 9385
4ed46869 9386DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 9387 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
9388Return the corresponding character code in Big5. */)
9389 (ch)
4ed46869
KH
9390 Lisp_Object ch;
9391{
df7492f9
KH
9392 Lisp_Object spec, attrs, charset_list;
9393 struct charset *charset;
9394 int c;
9395 unsigned code;
9396
9397 CHECK_CHARACTER (ch);
9398 c = XFASTINT (ch);
9399 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9400 attrs = AREF (spec, 0);
9401 if (ASCII_CHAR_P (c)
9402 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9403 return ch;
9404
9405 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9406 charset = char_charset (c, charset_list, &code);
9407 if (code == CHARSET_INVALID_CODE (charset))
9408 error ("Can't encode by Big5 encoding: %d", c);
9409
9410 return make_number (code);
4ed46869 9411}
48b0f3ae 9412
3a73fa5d 9413\f
002fdb44 9414DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 9415 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 9416 doc: /* Internal use only. */)
6ed8eeff 9417 (coding_system, terminal)
b74e4686 9418 Lisp_Object coding_system;
6ed8eeff 9419 Lisp_Object terminal;
4ed46869 9420{
6ed8eeff 9421 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 9422 CHECK_SYMBOL (coding_system);
b8299c66 9423 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 9424 /* We had better not send unsafe characters to terminal. */
c73bd236 9425 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
ad1746f5 9426 /* Character composition should be disabled. */
c73bd236 9427 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
9428 terminal_coding->src_multibyte = 1;
9429 terminal_coding->dst_multibyte = 0;
4ed46869
KH
9430 return Qnil;
9431}
9432
c4825358
KH
9433DEFUN ("set-safe-terminal-coding-system-internal",
9434 Fset_safe_terminal_coding_system_internal,
48b0f3ae 9435 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 9436 doc: /* Internal use only. */)
48b0f3ae 9437 (coding_system)
b74e4686 9438 Lisp_Object coding_system;
d46c5b12 9439{
b7826503 9440 CHECK_SYMBOL (coding_system);
c4825358
KH
9441 setup_coding_system (Fcheck_coding_system (coding_system),
9442 &safe_terminal_coding);
ad1746f5 9443 /* Character composition should be disabled. */
df7492f9 9444 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
9445 safe_terminal_coding.src_multibyte = 1;
9446 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
9447 return Qnil;
9448}
4ed46869 9449
002fdb44 9450DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 9451 Sterminal_coding_system, 0, 1, 0,
6ed8eeff 9452 doc: /* Return coding system specified for terminal output on the given terminal.
708e05dc 9453TERMINAL may be a terminal object, a frame, or nil for the selected
6ed8eeff
KL
9454frame's terminal device. */)
9455 (terminal)
9456 Lisp_Object terminal;
4ed46869 9457{
985773c9
MB
9458 struct coding_system *terminal_coding
9459 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9460 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 9461
ae6f73fa 9462 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 9463 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
9464}
9465
002fdb44 9466DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 9467 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 9468 doc: /* Internal use only. */)
6ed8eeff 9469 (coding_system, terminal)
4ed46869 9470 Lisp_Object coding_system;
6ed8eeff 9471 Lisp_Object terminal;
4ed46869 9472{
6ed8eeff 9473 struct terminal *t = get_terminal (terminal, 1);
b7826503 9474 CHECK_SYMBOL (coding_system);
624bda09
KH
9475 if (NILP (coding_system))
9476 coding_system = Qno_conversion;
9477 else
9478 Fcheck_coding_system (coding_system);
9479 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
ad1746f5 9480 /* Character composition should be disabled. */
c73bd236
MB
9481 TERMINAL_KEYBOARD_CODING (t)->common_flags
9482 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
9483 return Qnil;
9484}
9485
9486DEFUN ("keyboard-coding-system",
985773c9 9487 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 9488 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
9489 (terminal)
9490 Lisp_Object terminal;
4ed46869 9491{
985773c9
MB
9492 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9493 (get_terminal (terminal, 1))->id);
4ed46869
KH
9494}
9495
4ed46869 9496\f
a5d301df
KH
9497DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9498 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
9499 doc: /* Choose a coding system for an operation based on the target name.
9500The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9501DECODING-SYSTEM is the coding system to use for decoding
9502\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9503for encoding (in case OPERATION does encoding).
05e6f5dc 9504
48b0f3ae
PJ
9505The first argument OPERATION specifies an I/O primitive:
9506 For file I/O, `insert-file-contents' or `write-region'.
9507 For process I/O, `call-process', `call-process-region', or `start-process'.
9508 For network I/O, `open-network-stream'.
05e6f5dc 9509
48b0f3ae
PJ
9510The remaining arguments should be the same arguments that were passed
9511to the primitive. Depending on which primitive, one of those arguments
9512is selected as the TARGET. For example, if OPERATION does file I/O,
9513whichever argument specifies the file name is TARGET.
05e6f5dc 9514
48b0f3ae 9515TARGET has a meaning which depends on OPERATION:
b883cdb2 9516 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9517 For process I/O, TARGET is a process name.
d4a1d553 9518 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9519
d4a1d553 9520This function looks up what is specified for TARGET in
48b0f3ae
PJ
9521`file-coding-system-alist', `process-coding-system-alist',
9522or `network-coding-system-alist' depending on OPERATION.
9523They may specify a coding system, a cons of coding systems,
9524or a function symbol to call.
9525In the last case, we call the function with one argument,
9526which is a list of all the arguments given to this function.
1011c487
MB
9527If the function can't decide a coding system, it can return
9528`undecided' so that the normal code-detection is performed.
48b0f3ae 9529
b883cdb2
MB
9530If OPERATION is `insert-file-contents', the argument corresponding to
9531TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9532file name to look up, and BUFFER is a buffer that contains the file's
9533contents (not yet decoded). If `file-coding-system-alist' specifies a
9534function to call for FILENAME, that function should examine the
9535contents of BUFFER instead of reading the file.
9536
d918f936 9537usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 9538 (nargs, args)
4ed46869
KH
9539 int nargs;
9540 Lisp_Object *args;
6b89e3aa 9541{
4ed46869
KH
9542 Lisp_Object operation, target_idx, target, val;
9543 register Lisp_Object chain;
177c0ea7 9544
4ed46869
KH
9545 if (nargs < 2)
9546 error ("Too few arguments");
9547 operation = args[0];
9548 if (!SYMBOLP (operation)
9549 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3ed051d4 9550 error ("Invalid first argument");
4ed46869
KH
9551 if (nargs < 1 + XINT (target_idx))
9552 error ("Too few arguments for operation: %s",
8f924df7 9553 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
9554 target = args[XINT (target_idx) + 1];
9555 if (!(STRINGP (target)
091a0ff0
KH
9556 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9557 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9558 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 9559 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
9560 if (CONSP (target))
9561 target = XCAR (target);
4ed46869 9562
2e34157c
RS
9563 chain = ((EQ (operation, Qinsert_file_contents)
9564 || EQ (operation, Qwrite_region))
02ba4723 9565 ? Vfile_coding_system_alist
2e34157c 9566 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9567 ? Vnetwork_coding_system_alist
9568 : Vprocess_coding_system_alist));
4ed46869
KH
9569 if (NILP (chain))
9570 return Qnil;
9571
03699b14 9572 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9573 {
f44d27ce 9574 Lisp_Object elt;
6b89e3aa 9575
df7492f9 9576 elt = XCAR (chain);
4ed46869
KH
9577 if (CONSP (elt)
9578 && ((STRINGP (target)
03699b14
KR
9579 && STRINGP (XCAR (elt))
9580 && fast_string_match (XCAR (elt), target) >= 0)
9581 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9582 {
03699b14 9583 val = XCDR (elt);
b19fd4c5
KH
9584 /* Here, if VAL is both a valid coding system and a valid
9585 function symbol, we return VAL as a coding system. */
02ba4723
KH
9586 if (CONSP (val))
9587 return val;
9588 if (! SYMBOLP (val))
9589 return Qnil;
9590 if (! NILP (Fcoding_system_p (val)))
9591 return Fcons (val, val);
b19fd4c5 9592 if (! NILP (Ffboundp (val)))
6b89e3aa 9593 {
e2b97060
MB
9594 /* We use call1 rather than safe_call1
9595 so as to get bug reports about functions called here
9596 which don't handle the current interface. */
9597 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9598 if (CONSP (val))
9599 return val;
9600 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9601 return Fcons (val, val);
6b89e3aa 9602 }
02ba4723 9603 return Qnil;
6b89e3aa
KH
9604 }
9605 }
4ed46869 9606 return Qnil;
6b89e3aa
KH
9607}
9608
df7492f9 9609DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9610 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9611 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9612If multiple coding systems belong to the same category,
a3181084
DL
9613all but the first one are ignored.
9614
d4a1d553 9615usage: (set-coding-system-priority &rest coding-systems) */)
df7492f9
KH
9616 (nargs, args)
9617 int nargs;
9618 Lisp_Object *args;
9619{
9620 int i, j;
9621 int changed[coding_category_max];
9622 enum coding_category priorities[coding_category_max];
9623
9624 bzero (changed, sizeof changed);
6b89e3aa 9625
df7492f9 9626 for (i = j = 0; i < nargs; i++)
6b89e3aa 9627 {
df7492f9
KH
9628 enum coding_category category;
9629 Lisp_Object spec, attrs;
6b89e3aa 9630
df7492f9
KH
9631 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9632 attrs = AREF (spec, 0);
9633 category = XINT (CODING_ATTR_CATEGORY (attrs));
9634 if (changed[category])
9635 /* Ignore this coding system because a coding system of the
9636 same category already had a higher priority. */
9637 continue;
9638 changed[category] = 1;
9639 priorities[j++] = category;
9640 if (coding_categories[category].id >= 0
9641 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9642 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9643 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9644 }
6b89e3aa 9645
df7492f9
KH
9646 /* Now we have decided top J priorities. Reflect the order of the
9647 original priorities to the remaining priorities. */
6b89e3aa 9648
df7492f9 9649 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9650 {
df7492f9
KH
9651 while (j < coding_category_max
9652 && changed[coding_priorities[j]])
9653 j++;
9654 if (j == coding_category_max)
9655 abort ();
9656 priorities[i] = coding_priorities[j];
9657 }
6b89e3aa 9658
df7492f9 9659 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 9660
ff563fce
KH
9661 /* Update `coding-category-list'. */
9662 Vcoding_category_list = Qnil;
9663 for (i = coding_category_max - 1; i >= 0; i--)
9664 Vcoding_category_list
9665 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9666 Vcoding_category_list);
6b89e3aa 9667
df7492f9 9668 return Qnil;
6b89e3aa
KH
9669}
9670
df7492f9
KH
9671DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9672 Scoding_system_priority_list, 0, 1, 0,
da7db224 9673 doc: /* Return a list of coding systems ordered by their priorities.
b811c52b
KH
9674The list contains a subset of coding systems; i.e. coding systems
9675assigned to each coding category (see `coding-category-list').
9676
da7db224 9677HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
9678 (highestp)
9679 Lisp_Object highestp;
d46c5b12
KH
9680{
9681 int i;
df7492f9 9682 Lisp_Object val;
6b89e3aa 9683
df7492f9 9684 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9685 {
df7492f9
KH
9686 enum coding_category category = coding_priorities[i];
9687 int id = coding_categories[category].id;
9688 Lisp_Object attrs;
068a9dbd 9689
df7492f9
KH
9690 if (id < 0)
9691 continue;
9692 attrs = CODING_ID_ATTRS (id);
9693 if (! NILP (highestp))
9694 return CODING_ATTR_BASE_NAME (attrs);
9695 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9696 }
9697 return Fnreverse (val);
9698}
068a9dbd 9699
91433552 9700static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9701
9702static Lisp_Object
df7492f9
KH
9703make_subsidiaries (base)
9704 Lisp_Object base;
068a9dbd 9705{
df7492f9 9706 Lisp_Object subsidiaries;
8f924df7 9707 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
9708 char *buf = (char *) alloca (base_name_len + 6);
9709 int i;
068a9dbd 9710
8f924df7 9711 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
9712 subsidiaries = Fmake_vector (make_number (3), Qnil);
9713 for (i = 0; i < 3; i++)
068a9dbd 9714 {
df7492f9
KH
9715 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9716 ASET (subsidiaries, i, intern (buf));
068a9dbd 9717 }
df7492f9 9718 return subsidiaries;
068a9dbd
KH
9719}
9720
9721
df7492f9
KH
9722DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9723 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9724 doc: /* For internal use only.
9725usage: (define-coding-system-internal ...) */)
df7492f9
KH
9726 (nargs, args)
9727 int nargs;
9728 Lisp_Object *args;
068a9dbd 9729{
df7492f9
KH
9730 Lisp_Object name;
9731 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9732 Lisp_Object attrs; /* Vector of attributes. */
9733 Lisp_Object eol_type;
9734 Lisp_Object aliases;
9735 Lisp_Object coding_type, charset_list, safe_charsets;
9736 enum coding_category category;
9737 Lisp_Object tail, val;
9738 int max_charset_id = 0;
9739 int i;
068a9dbd 9740
df7492f9
KH
9741 if (nargs < coding_arg_max)
9742 goto short_args;
068a9dbd 9743
df7492f9 9744 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9745
df7492f9
KH
9746 name = args[coding_arg_name];
9747 CHECK_SYMBOL (name);
9748 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9749
df7492f9
KH
9750 val = args[coding_arg_mnemonic];
9751 if (! STRINGP (val))
9752 CHECK_CHARACTER (val);
9753 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9754
df7492f9
KH
9755 coding_type = args[coding_arg_coding_type];
9756 CHECK_SYMBOL (coding_type);
9757 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9758
df7492f9
KH
9759 charset_list = args[coding_arg_charset_list];
9760 if (SYMBOLP (charset_list))
9761 {
9762 if (EQ (charset_list, Qiso_2022))
9763 {
9764 if (! EQ (coding_type, Qiso_2022))
9765 error ("Invalid charset-list");
9766 charset_list = Viso_2022_charset_list;
9767 }
9768 else if (EQ (charset_list, Qemacs_mule))
9769 {
9770 if (! EQ (coding_type, Qemacs_mule))
9771 error ("Invalid charset-list");
9772 charset_list = Vemacs_mule_charset_list;
9773 }
9774 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9775 if (max_charset_id < XFASTINT (XCAR (tail)))
9776 max_charset_id = XFASTINT (XCAR (tail));
9777 }
068a9dbd
KH
9778 else
9779 {
df7492f9 9780 charset_list = Fcopy_sequence (charset_list);
985773c9 9781 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9782 {
df7492f9
KH
9783 struct charset *charset;
9784
985773c9 9785 val = XCAR (tail);
df7492f9
KH
9786 CHECK_CHARSET_GET_CHARSET (val, charset);
9787 if (EQ (coding_type, Qiso_2022)
9788 ? CHARSET_ISO_FINAL (charset) < 0
9789 : EQ (coding_type, Qemacs_mule)
9790 ? CHARSET_EMACS_MULE_ID (charset) < 0
9791 : 0)
9792 error ("Can't handle charset `%s'",
8f924df7 9793 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9794
8f924df7 9795 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9796 if (max_charset_id < charset->id)
9797 max_charset_id = charset->id;
068a9dbd
KH
9798 }
9799 }
df7492f9 9800 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9801
1b3b981b
AS
9802 safe_charsets = make_uninit_string (max_charset_id + 1);
9803 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9804 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9805 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9806 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9807
584948ac 9808 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9809
df7492f9 9810 val = args[coding_arg_decode_translation_table];
a6f87d34 9811 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9812 CHECK_SYMBOL (val);
df7492f9 9813 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9814
df7492f9 9815 val = args[coding_arg_encode_translation_table];
a6f87d34 9816 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9817 CHECK_SYMBOL (val);
df7492f9 9818 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9819
df7492f9
KH
9820 val = args[coding_arg_post_read_conversion];
9821 CHECK_SYMBOL (val);
9822 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9823
df7492f9
KH
9824 val = args[coding_arg_pre_write_conversion];
9825 CHECK_SYMBOL (val);
9826 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9827
df7492f9
KH
9828 val = args[coding_arg_default_char];
9829 if (NILP (val))
9830 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9831 else
9832 {
8f924df7 9833 CHECK_CHARACTER (val);
df7492f9
KH
9834 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9835 }
4031e2bf 9836
8f924df7
KH
9837 val = args[coding_arg_for_unibyte];
9838 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9839
df7492f9
KH
9840 val = args[coding_arg_plist];
9841 CHECK_LIST (val);
9842 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9843
df7492f9
KH
9844 if (EQ (coding_type, Qcharset))
9845 {
c7c66a95
KH
9846 /* Generate a lisp vector of 256 elements. Each element is nil,
9847 integer, or a list of charset IDs.
3a73fa5d 9848
c7c66a95
KH
9849 If Nth element is nil, the byte code N is invalid in this
9850 coding system.
4ed46869 9851
c7c66a95
KH
9852 If Nth element is a number NUM, N is the first byte of a
9853 charset whose ID is NUM.
4ed46869 9854
c7c66a95
KH
9855 If Nth element is a list of charset IDs, N is the first byte
9856 of one of them. The list is sorted by dimensions of the
ad1746f5 9857 charsets. A charset of smaller dimension comes first. */
df7492f9 9858 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9859
5c99c2e6 9860 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9861 {
c7c66a95
KH
9862 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9863 int dim = CHARSET_DIMENSION (charset);
9864 int idx = (dim - 1) * 4;
4ed46869 9865
5c99c2e6 9866 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9867 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9868
15d143f7
KH
9869 for (i = charset->code_space[idx];
9870 i <= charset->code_space[idx + 1]; i++)
9871 {
c7c66a95
KH
9872 Lisp_Object tmp, tmp2;
9873 int dim2;
ec6d2bb8 9874
c7c66a95
KH
9875 tmp = AREF (val, i);
9876 if (NILP (tmp))
9877 tmp = XCAR (tail);
9878 else if (NUMBERP (tmp))
9879 {
9880 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9881 if (dim < dim2)
c7c66a95 9882 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9883 else
9884 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9885 }
15d143f7 9886 else
c7c66a95
KH
9887 {
9888 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9889 {
9890 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9891 if (dim < dim2)
9892 break;
9893 }
9894 if (NILP (tmp2))
9895 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9896 else
9897 {
9898 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9899 XSETCAR (tmp2, XCAR (tail));
9900 }
9901 }
9902 ASET (val, i, tmp);
15d143f7 9903 }
df7492f9
KH
9904 }
9905 ASET (attrs, coding_attr_charset_valids, val);
9906 category = coding_category_charset;
9907 }
9908 else if (EQ (coding_type, Qccl))
9909 {
9910 Lisp_Object valids;
ecec61c1 9911
df7492f9
KH
9912 if (nargs < coding_arg_ccl_max)
9913 goto short_args;
ecec61c1 9914
df7492f9
KH
9915 val = args[coding_arg_ccl_decoder];
9916 CHECK_CCL_PROGRAM (val);
9917 if (VECTORP (val))
9918 val = Fcopy_sequence (val);
9919 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9920
df7492f9
KH
9921 val = args[coding_arg_ccl_encoder];
9922 CHECK_CCL_PROGRAM (val);
9923 if (VECTORP (val))
9924 val = Fcopy_sequence (val);
9925 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9926
df7492f9
KH
9927 val = args[coding_arg_ccl_valids];
9928 valids = Fmake_string (make_number (256), make_number (0));
9929 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9930 {
8dcbea82 9931 int from, to;
ecec61c1 9932
df7492f9
KH
9933 val = Fcar (tail);
9934 if (INTEGERP (val))
8dcbea82
KH
9935 {
9936 from = to = XINT (val);
9937 if (from < 0 || from > 255)
9938 args_out_of_range_3 (val, make_number (0), make_number (255));
9939 }
df7492f9
KH
9940 else
9941 {
df7492f9 9942 CHECK_CONS (val);
8f924df7
KH
9943 CHECK_NATNUM_CAR (val);
9944 CHECK_NATNUM_CDR (val);
df7492f9 9945 from = XINT (XCAR (val));
8f924df7 9946 if (from > 255)
8dcbea82
KH
9947 args_out_of_range_3 (XCAR (val),
9948 make_number (0), make_number (255));
df7492f9 9949 to = XINT (XCDR (val));
8dcbea82
KH
9950 if (to < from || to > 255)
9951 args_out_of_range_3 (XCDR (val),
9952 XCAR (val), make_number (255));
df7492f9 9953 }
8dcbea82 9954 for (i = from; i <= to; i++)
8f924df7 9955 SSET (valids, i, 1);
df7492f9
KH
9956 }
9957 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9958
df7492f9 9959 category = coding_category_ccl;
55ab7be3 9960 }
df7492f9 9961 else if (EQ (coding_type, Qutf_16))
55ab7be3 9962 {
df7492f9 9963 Lisp_Object bom, endian;
4ed46869 9964
584948ac 9965 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9966
df7492f9
KH
9967 if (nargs < coding_arg_utf16_max)
9968 goto short_args;
4ed46869 9969
df7492f9
KH
9970 bom = args[coding_arg_utf16_bom];
9971 if (! NILP (bom) && ! EQ (bom, Qt))
9972 {
9973 CHECK_CONS (bom);
8f924df7
KH
9974 val = XCAR (bom);
9975 CHECK_CODING_SYSTEM (val);
9976 val = XCDR (bom);
9977 CHECK_CODING_SYSTEM (val);
df7492f9 9978 }
a470d443 9979 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9980
9981 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9982 CHECK_SYMBOL (endian);
9983 if (NILP (endian))
9984 endian = Qbig;
9985 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9986 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9987 ASET (attrs, coding_attr_utf_16_endian, endian);
9988
9989 category = (CONSP (bom)
9990 ? coding_category_utf_16_auto
9991 : NILP (bom)
b49a1807 9992 ? (EQ (endian, Qbig)
df7492f9
KH
9993 ? coding_category_utf_16_be_nosig
9994 : coding_category_utf_16_le_nosig)
b49a1807 9995 : (EQ (endian, Qbig)
df7492f9
KH
9996 ? coding_category_utf_16_be
9997 : coding_category_utf_16_le));
9998 }
9999 else if (EQ (coding_type, Qiso_2022))
10000 {
10001 Lisp_Object initial, reg_usage, request, flags;
4776e638 10002 int i;
1397dc18 10003
df7492f9
KH
10004 if (nargs < coding_arg_iso2022_max)
10005 goto short_args;
10006
10007 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
10008 CHECK_VECTOR (initial);
10009 for (i = 0; i < 4; i++)
10010 {
10011 val = Faref (initial, make_number (i));
10012 if (! NILP (val))
10013 {
584948ac
KH
10014 struct charset *charset;
10015
10016 CHECK_CHARSET_GET_CHARSET (val, charset);
10017 ASET (initial, i, make_number (CHARSET_ID (charset)));
10018 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
10019 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10020 }
10021 else
10022 ASET (initial, i, make_number (-1));
10023 }
10024
10025 reg_usage = args[coding_arg_iso2022_reg_usage];
10026 CHECK_CONS (reg_usage);
8f924df7
KH
10027 CHECK_NUMBER_CAR (reg_usage);
10028 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
10029
10030 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
10031 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 10032 {
df7492f9 10033 int id;
8f924df7 10034 Lisp_Object tmp;
df7492f9
KH
10035
10036 val = Fcar (tail);
10037 CHECK_CONS (val);
8f924df7
KH
10038 tmp = XCAR (val);
10039 CHECK_CHARSET_GET_ID (tmp, id);
10040 CHECK_NATNUM_CDR (val);
df7492f9
KH
10041 if (XINT (XCDR (val)) >= 4)
10042 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 10043 XSETCAR (val, make_number (id));
1397dc18 10044 }
4ed46869 10045
df7492f9
KH
10046 flags = args[coding_arg_iso2022_flags];
10047 CHECK_NATNUM (flags);
10048 i = XINT (flags);
10049 if (EQ (args[coding_arg_charset_list], Qiso_2022))
10050 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
10051
10052 ASET (attrs, coding_attr_iso_initial, initial);
10053 ASET (attrs, coding_attr_iso_usage, reg_usage);
10054 ASET (attrs, coding_attr_iso_request, request);
10055 ASET (attrs, coding_attr_iso_flags, flags);
10056 setup_iso_safe_charsets (attrs);
10057
10058 if (i & CODING_ISO_FLAG_SEVEN_BITS)
10059 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
10060 | CODING_ISO_FLAG_SINGLE_SHIFT))
10061 ? coding_category_iso_7_else
10062 : EQ (args[coding_arg_charset_list], Qiso_2022)
10063 ? coding_category_iso_7
10064 : coding_category_iso_7_tight);
10065 else
10066 {
10067 int id = XINT (AREF (initial, 1));
10068
c6fb6e98 10069 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
10070 || EQ (args[coding_arg_charset_list], Qiso_2022)
10071 || id < 0)
10072 ? coding_category_iso_8_else
10073 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
10074 ? coding_category_iso_8_1
10075 : coding_category_iso_8_2);
10076 }
0ce7886f
KH
10077 if (category != coding_category_iso_8_1
10078 && category != coding_category_iso_8_2)
10079 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
10080 }
10081 else if (EQ (coding_type, Qemacs_mule))
c28a9453 10082 {
df7492f9
KH
10083 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10084 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 10085 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 10086 category = coding_category_emacs_mule;
c28a9453 10087 }
df7492f9 10088 else if (EQ (coding_type, Qshift_jis))
c28a9453 10089 {
df7492f9
KH
10090
10091 struct charset *charset;
10092
7d64c6ad 10093 if (XINT (Flength (charset_list)) != 3
6e07c25f 10094 && XINT (Flength (charset_list)) != 4)
7d64c6ad 10095 error ("There should be three or four charsets");
df7492f9
KH
10096
10097 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10098 if (CHARSET_DIMENSION (charset) != 1)
10099 error ("Dimension of charset %s is not one",
8f924df7 10100 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
10101 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10102 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10103
10104 charset_list = XCDR (charset_list);
10105 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10106 if (CHARSET_DIMENSION (charset) != 1)
10107 error ("Dimension of charset %s is not one",
8f924df7 10108 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
10109
10110 charset_list = XCDR (charset_list);
10111 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10112 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
10113 error ("Dimension of charset %s is not two",
10114 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10115
10116 charset_list = XCDR (charset_list);
2b917a06
KH
10117 if (! NILP (charset_list))
10118 {
10119 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10120 if (CHARSET_DIMENSION (charset) != 2)
10121 error ("Dimension of charset %s is not two",
10122 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10123 }
df7492f9
KH
10124
10125 category = coding_category_sjis;
10126 Vsjis_coding_system = name;
c28a9453 10127 }
df7492f9
KH
10128 else if (EQ (coding_type, Qbig5))
10129 {
10130 struct charset *charset;
4ed46869 10131
df7492f9
KH
10132 if (XINT (Flength (charset_list)) != 2)
10133 error ("There should be just two charsets");
10134
10135 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10136 if (CHARSET_DIMENSION (charset) != 1)
10137 error ("Dimension of charset %s is not one",
8f924df7 10138 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
10139 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10140 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10141
10142 charset_list = XCDR (charset_list);
10143 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10144 if (CHARSET_DIMENSION (charset) != 2)
10145 error ("Dimension of charset %s is not two",
8f924df7 10146 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 10147
df7492f9
KH
10148 category = coding_category_big5;
10149 Vbig5_coding_system = name;
10150 }
10151 else if (EQ (coding_type, Qraw_text))
c28a9453 10152 {
584948ac
KH
10153 category = coding_category_raw_text;
10154 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 10155 }
df7492f9 10156 else if (EQ (coding_type, Qutf_8))
4ed46869 10157 {
a470d443
KH
10158 Lisp_Object bom;
10159
584948ac 10160 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
10161
10162 if (nargs < coding_arg_utf8_max)
10163 goto short_args;
10164
10165 bom = args[coding_arg_utf8_bom];
10166 if (! NILP (bom) && ! EQ (bom, Qt))
10167 {
10168 CHECK_CONS (bom);
10169 val = XCAR (bom);
10170 CHECK_CODING_SYSTEM (val);
10171 val = XCDR (bom);
10172 CHECK_CODING_SYSTEM (val);
10173 }
10174 ASET (attrs, coding_attr_utf_bom, bom);
10175
10176 category = (CONSP (bom) ? coding_category_utf_8_auto
10177 : NILP (bom) ? coding_category_utf_8_nosig
10178 : coding_category_utf_8_sig);
4ed46869 10179 }
df7492f9
KH
10180 else if (EQ (coding_type, Qundecided))
10181 category = coding_category_undecided;
4ed46869 10182 else
df7492f9 10183 error ("Invalid coding system type: %s",
8f924df7 10184 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 10185
df7492f9 10186 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
10187 CODING_ATTR_PLIST (attrs)
10188 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10189 CODING_ATTR_PLIST (attrs)));
35befdaa 10190 CODING_ATTR_PLIST (attrs)
3ed051d4 10191 = Fcons (QCascii_compatible_p,
35befdaa
KH
10192 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10193 CODING_ATTR_PLIST (attrs)));
c4825358 10194
df7492f9
KH
10195 eol_type = args[coding_arg_eol_type];
10196 if (! NILP (eol_type)
10197 && ! EQ (eol_type, Qunix)
10198 && ! EQ (eol_type, Qdos)
10199 && ! EQ (eol_type, Qmac))
10200 error ("Invalid eol-type");
4ed46869 10201
df7492f9 10202 aliases = Fcons (name, Qnil);
4ed46869 10203
df7492f9
KH
10204 if (NILP (eol_type))
10205 {
10206 eol_type = make_subsidiaries (name);
10207 for (i = 0; i < 3; i++)
1397dc18 10208 {
df7492f9
KH
10209 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10210
10211 this_name = AREF (eol_type, i);
10212 this_aliases = Fcons (this_name, Qnil);
10213 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10214 this_spec = Fmake_vector (make_number (3), attrs);
10215 ASET (this_spec, 1, this_aliases);
10216 ASET (this_spec, 2, this_eol_type);
10217 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10218 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
10219 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10220 if (NILP (val))
10221 Vcoding_system_alist
10222 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10223 Vcoding_system_alist);
1397dc18 10224 }
d46c5b12 10225 }
4ed46869 10226
df7492f9
KH
10227 spec_vec = Fmake_vector (make_number (3), attrs);
10228 ASET (spec_vec, 1, aliases);
10229 ASET (spec_vec, 2, eol_type);
48b0f3ae 10230
df7492f9
KH
10231 Fputhash (name, spec_vec, Vcoding_system_hash_table);
10232 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
10233 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10234 if (NILP (val))
10235 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10236 Vcoding_system_alist);
48b0f3ae 10237
df7492f9
KH
10238 {
10239 int id = coding_categories[category].id;
48b0f3ae 10240
df7492f9
KH
10241 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10242 setup_coding_system (name, &coding_categories[category]);
10243 }
48b0f3ae 10244
d46c5b12 10245 return Qnil;
48b0f3ae 10246
df7492f9
KH
10247 short_args:
10248 return Fsignal (Qwrong_number_of_arguments,
10249 Fcons (intern ("define-coding-system-internal"),
10250 make_number (nargs)));
d46c5b12 10251}
4ed46869 10252
d6925f38 10253
a6f87d34
KH
10254DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10255 3, 3, 0,
10256 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
10257 (coding_system, prop, val)
10258 Lisp_Object coding_system, prop, val;
10259{
3dbe7859 10260 Lisp_Object spec, attrs;
a6f87d34
KH
10261
10262 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10263 attrs = AREF (spec, 0);
10264 if (EQ (prop, QCmnemonic))
10265 {
10266 if (! STRINGP (val))
10267 CHECK_CHARACTER (val);
10268 CODING_ATTR_MNEMONIC (attrs) = val;
10269 }
2133e2d1 10270 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
10271 {
10272 if (NILP (val))
10273 val = make_number (' ');
10274 else
10275 CHECK_CHARACTER (val);
10276 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10277 }
10278 else if (EQ (prop, QCdecode_translation_table))
10279 {
10280 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10281 CHECK_SYMBOL (val);
10282 CODING_ATTR_DECODE_TBL (attrs) = val;
10283 }
10284 else if (EQ (prop, QCencode_translation_table))
10285 {
10286 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10287 CHECK_SYMBOL (val);
10288 CODING_ATTR_ENCODE_TBL (attrs) = val;
10289 }
10290 else if (EQ (prop, QCpost_read_conversion))
10291 {
10292 CHECK_SYMBOL (val);
10293 CODING_ATTR_POST_READ (attrs) = val;
10294 }
10295 else if (EQ (prop, QCpre_write_conversion))
10296 {
10297 CHECK_SYMBOL (val);
10298 CODING_ATTR_PRE_WRITE (attrs) = val;
10299 }
35befdaa
KH
10300 else if (EQ (prop, QCascii_compatible_p))
10301 {
10302 CODING_ATTR_ASCII_COMPAT (attrs) = val;
10303 }
a6f87d34
KH
10304
10305 CODING_ATTR_PLIST (attrs)
10306 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10307 return val;
10308}
10309
10310
df7492f9
KH
10311DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10312 Sdefine_coding_system_alias, 2, 2, 0,
10313 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
10314 (alias, coding_system)
10315 Lisp_Object alias, coding_system;
66cfb530 10316{
583f71ca 10317 Lisp_Object spec, aliases, eol_type, val;
4ed46869 10318
df7492f9
KH
10319 CHECK_SYMBOL (alias);
10320 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10321 aliases = AREF (spec, 1);
d4a1d553 10322 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
10323 element is a base coding system. Append ALIAS at the tail of the
10324 list. */
df7492f9
KH
10325 while (!NILP (XCDR (aliases)))
10326 aliases = XCDR (aliases);
8f924df7 10327 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 10328
df7492f9
KH
10329 eol_type = AREF (spec, 2);
10330 if (VECTORP (eol_type))
4ed46869 10331 {
df7492f9
KH
10332 Lisp_Object subsidiaries;
10333 int i;
4ed46869 10334
df7492f9
KH
10335 subsidiaries = make_subsidiaries (alias);
10336 for (i = 0; i < 3; i++)
10337 Fdefine_coding_system_alias (AREF (subsidiaries, i),
10338 AREF (eol_type, i));
4ed46869 10339 }
df7492f9
KH
10340
10341 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 10342 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
10343 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10344 if (NILP (val))
10345 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10346 Vcoding_system_alist);
66cfb530 10347
4ed46869
KH
10348 return Qnil;
10349}
10350
df7492f9
KH
10351DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10352 1, 1, 0,
10353 doc: /* Return the base of CODING-SYSTEM.
da7db224 10354Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
10355 (coding_system)
10356 Lisp_Object coding_system;
d46c5b12 10357{
df7492f9 10358 Lisp_Object spec, attrs;
d46c5b12 10359
df7492f9
KH
10360 if (NILP (coding_system))
10361 return (Qno_conversion);
10362 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10363 attrs = AREF (spec, 0);
10364 return CODING_ATTR_BASE_NAME (attrs);
10365}
1397dc18 10366
df7492f9
KH
10367DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10368 1, 1, 0,
10369 doc: "Return the property list of CODING-SYSTEM.")
10370 (coding_system)
10371 Lisp_Object coding_system;
10372{
10373 Lisp_Object spec, attrs;
1397dc18 10374
df7492f9
KH
10375 if (NILP (coding_system))
10376 coding_system = Qno_conversion;
10377 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10378 attrs = AREF (spec, 0);
10379 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
10380}
10381
df7492f9
KH
10382
10383DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10384 1, 1, 0,
da7db224 10385 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
10386 (coding_system)
10387 Lisp_Object coding_system;
66cfb530 10388{
df7492f9 10389 Lisp_Object spec;
84d60297 10390
df7492f9
KH
10391 if (NILP (coding_system))
10392 coding_system = Qno_conversion;
10393 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 10394 return AREF (spec, 1);
df7492f9 10395}
66cfb530 10396
df7492f9
KH
10397DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10398 Scoding_system_eol_type, 1, 1, 0,
10399 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 10400An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 10401
df7492f9
KH
10402Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10403and CR respectively.
66cfb530 10404
df7492f9
KH
10405A vector value indicates that a format of end-of-line should be
10406detected automatically. Nth element of the vector is the subsidiary
10407coding system whose eol-type is N. */)
6b89e3aa
KH
10408 (coding_system)
10409 Lisp_Object coding_system;
10410{
df7492f9
KH
10411 Lisp_Object spec, eol_type;
10412 int n;
6b89e3aa 10413
df7492f9
KH
10414 if (NILP (coding_system))
10415 coding_system = Qno_conversion;
10416 if (! CODING_SYSTEM_P (coding_system))
10417 return Qnil;
10418 spec = CODING_SYSTEM_SPEC (coding_system);
10419 eol_type = AREF (spec, 2);
10420 if (VECTORP (eol_type))
10421 return Fcopy_sequence (eol_type);
10422 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10423 return make_number (n);
6b89e3aa
KH
10424}
10425
4ed46869
KH
10426#endif /* emacs */
10427
10428\f
1397dc18 10429/*** 9. Post-amble ***/
4ed46869 10430
dfcf069d 10431void
4ed46869
KH
10432init_coding_once ()
10433{
10434 int i;
10435
df7492f9
KH
10436 for (i = 0; i < coding_category_max; i++)
10437 {
10438 coding_categories[i].id = -1;
10439 coding_priorities[i] = i;
10440 }
4ed46869
KH
10441
10442 /* ISO2022 specific initialize routine. */
10443 for (i = 0; i < 0x20; i++)
b73bfc1c 10444 iso_code_class[i] = ISO_control_0;
4ed46869
KH
10445 for (i = 0x21; i < 0x7F; i++)
10446 iso_code_class[i] = ISO_graphic_plane_0;
10447 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 10448 iso_code_class[i] = ISO_control_1;
4ed46869
KH
10449 for (i = 0xA1; i < 0xFF; i++)
10450 iso_code_class[i] = ISO_graphic_plane_1;
10451 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10452 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
10453 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10454 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10455 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10456 iso_code_class[ISO_CODE_ESC] = ISO_escape;
10457 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10458 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10459 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10460
df7492f9
KH
10461 for (i = 0; i < 256; i++)
10462 {
10463 emacs_mule_bytes[i] = 1;
10464 }
7c78e542
KH
10465 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10466 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10467 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10468 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
10469}
10470
10471#ifdef emacs
10472
dfcf069d 10473void
e0e989f6
KH
10474syms_of_coding ()
10475{
df7492f9 10476 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
10477 {
10478 Lisp_Object args[2];
10479 args[0] = QCtest;
10480 args[1] = Qeq;
10481 Vcoding_system_hash_table = Fmake_hash_table (2, args);
10482 }
df7492f9
KH
10483
10484 staticpro (&Vsjis_coding_system);
10485 Vsjis_coding_system = Qnil;
e0e989f6 10486
df7492f9
KH
10487 staticpro (&Vbig5_coding_system);
10488 Vbig5_coding_system = Qnil;
10489
24a73b0a
KH
10490 staticpro (&Vcode_conversion_reused_workbuf);
10491 Vcode_conversion_reused_workbuf = Qnil;
10492
10493 staticpro (&Vcode_conversion_workbuf_name);
d67b4f80 10494 Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
e0e989f6 10495
24a73b0a 10496 reused_workbuf_in_use = 0;
df7492f9
KH
10497
10498 DEFSYM (Qcharset, "charset");
10499 DEFSYM (Qtarget_idx, "target-idx");
10500 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
10501 Fset (Qcoding_system_history, Qnil);
10502
9ce27fde 10503 /* Target FILENAME is the first argument. */
e0e989f6 10504 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10505 /* Target FILENAME is the third argument. */
e0e989f6
KH
10506 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10507
df7492f9 10508 DEFSYM (Qcall_process, "call-process");
9ce27fde 10509 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10510 Fput (Qcall_process, Qtarget_idx, make_number (0));
10511
df7492f9 10512 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10513 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10514 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10515
df7492f9 10516 DEFSYM (Qstart_process, "start-process");
9ce27fde 10517 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10518 Fput (Qstart_process, Qtarget_idx, make_number (2));
10519
df7492f9 10520 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10521 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10522 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10523
df7492f9
KH
10524 DEFSYM (Qcoding_system, "coding-system");
10525 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10526
df7492f9
KH
10527 DEFSYM (Qeol_type, "eol-type");
10528 DEFSYM (Qunix, "unix");
10529 DEFSYM (Qdos, "dos");
4ed46869 10530
df7492f9
KH
10531 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10532 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10533 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10534 DEFSYM (Qdefault_char, "default-char");
10535 DEFSYM (Qundecided, "undecided");
10536 DEFSYM (Qno_conversion, "no-conversion");
10537 DEFSYM (Qraw_text, "raw-text");
4ed46869 10538
df7492f9 10539 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10540
df7492f9 10541 DEFSYM (Qutf_8, "utf-8");
8f924df7 10542 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10543
df7492f9 10544 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10545 DEFSYM (Qbig, "big");
10546 DEFSYM (Qlittle, "little");
27901516 10547
df7492f9
KH
10548 DEFSYM (Qshift_jis, "shift-jis");
10549 DEFSYM (Qbig5, "big5");
4ed46869 10550
df7492f9 10551 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10552
df7492f9 10553 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869 10554 Fput (Qcoding_system_error, Qerror_conditions,
d67b4f80 10555 pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
4ed46869 10556 Fput (Qcoding_system_error, Qerror_message,
d67b4f80 10557 make_pure_c_string ("Invalid coding system"));
4ed46869 10558
05e6f5dc
KH
10559 /* Intern this now in case it isn't already done.
10560 Setting this variable twice is harmless.
10561 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 10562 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
70c22245 10563
df7492f9 10564 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10565 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10566 DEFSYM (Qtranslation_table_id, "translation-table-id");
10567 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10568 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10569
df7492f9 10570 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10571
df7492f9 10572 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10573
01378f49 10574 DEFSYM (QCcategory, ":category");
a6f87d34 10575 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10576 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10577 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10578 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10579 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10580 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10581 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10582
df7492f9
KH
10583 Vcoding_category_table
10584 = Fmake_vector (make_number (coding_category_max), Qnil);
10585 staticpro (&Vcoding_category_table);
10586 /* Followings are target of code detection. */
10587 ASET (Vcoding_category_table, coding_category_iso_7,
d67b4f80 10588 intern_c_string ("coding-category-iso-7"));
df7492f9 10589 ASET (Vcoding_category_table, coding_category_iso_7_tight,
d67b4f80 10590 intern_c_string ("coding-category-iso-7-tight"));
df7492f9 10591 ASET (Vcoding_category_table, coding_category_iso_8_1,
d67b4f80 10592 intern_c_string ("coding-category-iso-8-1"));
df7492f9 10593 ASET (Vcoding_category_table, coding_category_iso_8_2,
d67b4f80 10594 intern_c_string ("coding-category-iso-8-2"));
df7492f9 10595 ASET (Vcoding_category_table, coding_category_iso_7_else,
d67b4f80 10596 intern_c_string ("coding-category-iso-7-else"));
df7492f9 10597 ASET (Vcoding_category_table, coding_category_iso_8_else,
d67b4f80 10598 intern_c_string ("coding-category-iso-8-else"));
a470d443 10599 ASET (Vcoding_category_table, coding_category_utf_8_auto,
d67b4f80 10600 intern_c_string ("coding-category-utf-8-auto"));
a470d443 10601 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
d67b4f80 10602 intern_c_string ("coding-category-utf-8"));
a470d443 10603 ASET (Vcoding_category_table, coding_category_utf_8_sig,
d67b4f80 10604 intern_c_string ("coding-category-utf-8-sig"));
df7492f9 10605 ASET (Vcoding_category_table, coding_category_utf_16_be,
d67b4f80 10606 intern_c_string ("coding-category-utf-16-be"));
ff563fce 10607 ASET (Vcoding_category_table, coding_category_utf_16_auto,
d67b4f80 10608 intern_c_string ("coding-category-utf-16-auto"));
df7492f9 10609 ASET (Vcoding_category_table, coding_category_utf_16_le,
d67b4f80 10610 intern_c_string ("coding-category-utf-16-le"));
df7492f9 10611 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
d67b4f80 10612 intern_c_string ("coding-category-utf-16-be-nosig"));
df7492f9 10613 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
d67b4f80 10614 intern_c_string ("coding-category-utf-16-le-nosig"));
df7492f9 10615 ASET (Vcoding_category_table, coding_category_charset,
d67b4f80 10616 intern_c_string ("coding-category-charset"));
df7492f9 10617 ASET (Vcoding_category_table, coding_category_sjis,
d67b4f80 10618 intern_c_string ("coding-category-sjis"));
df7492f9 10619 ASET (Vcoding_category_table, coding_category_big5,
d67b4f80 10620 intern_c_string ("coding-category-big5"));
df7492f9 10621 ASET (Vcoding_category_table, coding_category_ccl,
d67b4f80 10622 intern_c_string ("coding-category-ccl"));
df7492f9 10623 ASET (Vcoding_category_table, coding_category_emacs_mule,
d67b4f80 10624 intern_c_string ("coding-category-emacs-mule"));
df7492f9
KH
10625 /* Followings are NOT target of code detection. */
10626 ASET (Vcoding_category_table, coding_category_raw_text,
d67b4f80 10627 intern_c_string ("coding-category-raw-text"));
df7492f9 10628 ASET (Vcoding_category_table, coding_category_undecided,
d67b4f80 10629 intern_c_string ("coding-category-undecided"));
ecf488bc 10630
065e3595
KH
10631 DEFSYM (Qinsufficient_source, "insufficient-source");
10632 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10633 DEFSYM (Qinvalid_source, "invalid-source");
10634 DEFSYM (Qinterrupted, "interrupted");
10635 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10636 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10637
4ed46869
KH
10638 defsubr (&Scoding_system_p);
10639 defsubr (&Sread_coding_system);
10640 defsubr (&Sread_non_nil_coding_system);
10641 defsubr (&Scheck_coding_system);
10642 defsubr (&Sdetect_coding_region);
d46c5b12 10643 defsubr (&Sdetect_coding_string);
05e6f5dc 10644 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10645 defsubr (&Sunencodable_char_position);
df7492f9 10646 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10647 defsubr (&Sdecode_coding_region);
10648 defsubr (&Sencode_coding_region);
10649 defsubr (&Sdecode_coding_string);
10650 defsubr (&Sencode_coding_string);
10651 defsubr (&Sdecode_sjis_char);
10652 defsubr (&Sencode_sjis_char);
10653 defsubr (&Sdecode_big5_char);
10654 defsubr (&Sencode_big5_char);
1ba9e4ab 10655 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10656 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10657 defsubr (&Sterminal_coding_system);
1ba9e4ab 10658 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10659 defsubr (&Skeyboard_coding_system);
a5d301df 10660 defsubr (&Sfind_operation_coding_system);
df7492f9 10661 defsubr (&Sset_coding_system_priority);
6b89e3aa 10662 defsubr (&Sdefine_coding_system_internal);
df7492f9 10663 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10664 defsubr (&Scoding_system_put);
df7492f9
KH
10665 defsubr (&Scoding_system_base);
10666 defsubr (&Scoding_system_plist);
10667 defsubr (&Scoding_system_aliases);
10668 defsubr (&Scoding_system_eol_type);
10669 defsubr (&Scoding_system_priority_list);
4ed46869 10670
4608c386 10671 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
10672 doc: /* List of coding systems.
10673
10674Do not alter the value of this variable manually. This variable should be
df7492f9 10675updated by the functions `define-coding-system' and
48b0f3ae 10676`define-coding-system-alias'. */);
4608c386
KH
10677 Vcoding_system_list = Qnil;
10678
10679 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
10680 doc: /* Alist of coding system names.
10681Each element is one element list of coding system name.
446dcd75 10682This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10683
10684Do not alter the value of this variable manually. This variable should be
10685updated by the functions `make-coding-system' and
10686`define-coding-system-alias'. */);
4608c386
KH
10687 Vcoding_system_alist = Qnil;
10688
4ed46869 10689 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
10690 doc: /* List of coding-categories (symbols) ordered by priority.
10691
10692On detecting a coding system, Emacs tries code detection algorithms
10693associated with each coding-category one by one in this order. When
10694one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10695system bound to the corresponding coding-category is selected.
10696
42205607 10697Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
10698 {
10699 int i;
10700
10701 Vcoding_category_list = Qnil;
df7492f9 10702 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10703 Vcoding_category_list
d46c5b12
KH
10704 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10705 Vcoding_category_list);
4ed46869
KH
10706 }
10707
10708 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
10709 doc: /* Specify the coding system for read operations.
10710It is useful to bind this variable with `let', but do not set it globally.
10711If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10712If not, an appropriate element is used from one of the coding system alists.
10713There are three such tables: `file-coding-system-alist',
48b0f3ae 10714`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10715 Vcoding_system_for_read = Qnil;
10716
10717 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
10718 doc: /* Specify the coding system for write operations.
10719Programs bind this variable with `let', but you should not set it globally.
10720If the value is a coding system, it is used for encoding of output,
10721when writing it to a file and when sending it to a file or subprocess.
10722
10723If this does not specify a coding system, an appropriate element
446dcd75
JB
10724is used from one of the coding system alists.
10725There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10726`process-coding-system-alist', and `network-coding-system-alist'.
10727For output to files, if the above procedure does not specify a coding system,
10728the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10729 Vcoding_system_for_write = Qnil;
10730
10731 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
10732 doc: /*
10733Coding system used in the latest file or process I/O. */);
4ed46869
KH
10734 Vlast_coding_system_used = Qnil;
10735
065e3595
KH
10736 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10737 doc: /*
10738Error status of the last code conversion.
10739
10740When an error was detected in the last code conversion, this variable
10741is set to one of the following symbols.
10742 `insufficient-source'
10743 `inconsistent-eol'
10744 `invalid-source'
10745 `interrupted'
10746 `insufficient-memory'
10747When no error was detected, the value doesn't change. So, to check
10748the error status of a code conversion by this variable, you must
10749explicitly set this variable to nil before performing code
10750conversion. */);
10751 Vlast_code_conversion_error = Qnil;
10752
9ce27fde 10753 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
10754 doc: /*
10755*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10756See info node `Coding Systems' and info node `Text and Binary' concerning
10757such conversion. */);
9ce27fde
KH
10758 inhibit_eol_conversion = 0;
10759
ed29121d 10760 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
10761 doc: /*
10762Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10763Bind it to t if the process output is to be treated as if it were a file
10764read from some filesystem. */);
ed29121d
EZ
10765 inherit_process_coding_system = 0;
10766
02ba4723 10767 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
10768 doc: /*
10769Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10770The format is ((PATTERN . VAL) ...),
10771where PATTERN is a regular expression matching a file name,
10772VAL is a coding system, a cons of coding systems, or a function symbol.
10773If VAL is a coding system, it is used for both decoding and encoding
10774the file contents.
10775If VAL is a cons of coding systems, the car part is used for decoding,
10776and the cdr part is used for encoding.
10777If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10778or a cons of coding systems which are used as above. The function is
10779called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10780`find-operation-coding-system' was called. If the function can't decide
10781a coding system, it can return `undecided' so that the normal
10782code-detection is performed.
48b0f3ae
PJ
10783
10784See also the function `find-operation-coding-system'
10785and the variable `auto-coding-alist'. */);
02ba4723
KH
10786 Vfile_coding_system_alist = Qnil;
10787
10788 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
10789 doc: /*
10790Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10791The format is ((PATTERN . VAL) ...),
10792where PATTERN is a regular expression matching a program name,
10793VAL is a coding system, a cons of coding systems, or a function symbol.
10794If VAL is a coding system, it is used for both decoding what received
10795from the program and encoding what sent to the program.
10796If VAL is a cons of coding systems, the car part is used for decoding,
10797and the cdr part is used for encoding.
10798If VAL is a function symbol, the function must return a coding system
10799or a cons of coding systems which are used as above.
10800
10801See also the function `find-operation-coding-system'. */);
02ba4723
KH
10802 Vprocess_coding_system_alist = Qnil;
10803
10804 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
10805 doc: /*
10806Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10807The format is ((PATTERN . VAL) ...),
10808where PATTERN is a regular expression matching a network service name
10809or is a port number to connect to,
10810VAL is a coding system, a cons of coding systems, or a function symbol.
10811If VAL is a coding system, it is used for both decoding what received
10812from the network stream and encoding what sent to the network stream.
10813If VAL is a cons of coding systems, the car part is used for decoding,
10814and the cdr part is used for encoding.
10815If VAL is a function symbol, the function must return a coding system
10816or a cons of coding systems which are used as above.
10817
10818See also the function `find-operation-coding-system'. */);
02ba4723 10819 Vnetwork_coding_system_alist = Qnil;
4ed46869 10820
68c45bf0 10821 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
10822 doc: /* Coding system to use with system messages.
10823Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10824 Vlocale_coding_system = Qnil;
10825
005f0d35 10826 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 10827 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
10828 doc: /*
10829*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
d67b4f80 10830 eol_mnemonic_unix = make_pure_c_string (":");
4ed46869 10831
7722baf9 10832 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
10833 doc: /*
10834*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
d67b4f80 10835 eol_mnemonic_dos = make_pure_c_string ("\\");
4ed46869 10836
7722baf9 10837 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
10838 doc: /*
10839*String displayed in mode line for MAC-like (CR) end-of-line format. */);
d67b4f80 10840 eol_mnemonic_mac = make_pure_c_string ("/");
4ed46869 10841
7722baf9 10842 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
10843 doc: /*
10844*String displayed in mode line when end-of-line format is not yet determined. */);
d67b4f80 10845 eol_mnemonic_undecided = make_pure_c_string (":");
4ed46869 10846
84fbb8a0 10847 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
10848 doc: /*
10849*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10850 Venable_character_translation = Qt;
bdd9fb48 10851
f967223b 10852 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
10853 &Vstandard_translation_table_for_decode,
10854 doc: /* Table for translating characters while decoding. */);
f967223b 10855 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10856
f967223b 10857 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
10858 &Vstandard_translation_table_for_encode,
10859 doc: /* Table for translating characters while encoding. */);
f967223b 10860 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10861
df7492f9 10862 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
10863 doc: /* Alist of charsets vs revision numbers.
10864While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10865designate it with the escape sequence identifying revision (cdr part
10866of the element). */);
10867 Vcharset_revision_table = Qnil;
02ba4723
KH
10868
10869 DEFVAR_LISP ("default-process-coding-system",
10870 &Vdefault_process_coding_system,
48b0f3ae
PJ
10871 doc: /* Cons of coding systems used for process I/O by default.
10872The car part is used for decoding a process output,
10873the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10874 Vdefault_process_coding_system = Qnil;
c4825358 10875
3f003981 10876 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
10877 doc: /*
10878Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10879This is a vector of length 256.
10880If Nth element is non-nil, the existence of code N in a file
10881\(or output of subprocess) doesn't prevent it to be detected as
10882a coding system of ISO 2022 variant which has a flag
10883`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10884or reading output of a subprocess.
446dcd75 10885Only 128th through 159th elements have a meaning. */);
3f003981 10886 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10887
10888 DEFVAR_LISP ("select-safe-coding-system-function",
10889 &Vselect_safe_coding_system_function,
df7492f9
KH
10890 doc: /*
10891Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10892
10893If set, this function is called to force a user to select a proper
10894coding system which can encode the text in the case that a default
fdecf907
GM
10895coding system used in each operation can't encode the text. The
10896function should take care that the buffer is not modified while
10897the coding system is being selected.
48b0f3ae
PJ
10898
10899The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10900 Vselect_safe_coding_system_function = Qnil;
10901
5d5bf4d8
KH
10902 DEFVAR_BOOL ("coding-system-require-warning",
10903 &coding_system_require_warning,
10904 doc: /* Internal use only.
6b89e3aa
KH
10905If non-nil, on writing a file, `select-safe-coding-system-function' is
10906called even if `coding-system-for-write' is non-nil. The command
10907`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10908 coding_system_require_warning = 0;
10909
10910
22ab2303 10911 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 10912 &inhibit_iso_escape_detection,
df7492f9 10913 doc: /*
97b1b294 10914If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10915
97b1b294
EZ
10916When Emacs reads text, it tries to detect how the text is encoded.
10917This code detection is sensitive to escape sequences. If Emacs sees
10918a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10919of the ISO2022 encodings, and decodes text by the corresponding coding
10920system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10921
10922However, there may be a case that you want to read escape sequences in
10923a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10924Then the code detection will ignore any escape sequences, and no text is
10925detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10926escape sequences become visible in a buffer.
10927
10928The default value is nil, and it is strongly recommended not to change
10929it. That is because many Emacs Lisp source files that contain
10930non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10931in Emacs's distribution, and they won't be decoded correctly on
10932reading if you suppress escape sequence detection.
10933
10934The other way to read escape sequences in a file without decoding is
97b1b294 10935to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10936escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10937 inhibit_iso_escape_detection = 0;
002fdb44 10938
97b1b294
EZ
10939 DEFVAR_BOOL ("inhibit-null-byte-detection",
10940 &inhibit_null_byte_detection,
10941 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10942By default, Emacs treats it as binary data, and does not attempt to
10943decode it. The effect is as if you specified `no-conversion' for
10944reading that text.
10945
10946Set this to non-nil when a regular text happens to include null bytes.
10947Examples are Index nodes of Info files and null-byte delimited output
10948from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10949decode text as usual. */);
10950 inhibit_null_byte_detection = 0;
10951
002fdb44 10952 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1 10953 doc: /* Char table for translating self-inserting characters.
446dcd75 10954This is applied to the result of input methods, not their input.
8434d0b8
EZ
10955See also `keyboard-translate-table'.
10956
10957Use of this variable for character code unification was rendered
10958obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10959internal character representation. */);
002fdb44 10960 Vtranslation_table_for_input = Qnil;
8f924df7 10961
2c78b7e1
KH
10962 {
10963 Lisp_Object args[coding_arg_max];
8f924df7 10964 Lisp_Object plist[16];
2c78b7e1
KH
10965 int i;
10966
10967 for (i = 0; i < coding_arg_max; i++)
10968 args[i] = Qnil;
10969
d67b4f80 10970 plist[0] = intern_c_string (":name");
2c78b7e1 10971 plist[1] = args[coding_arg_name] = Qno_conversion;
d67b4f80 10972 plist[2] = intern_c_string (":mnemonic");
2c78b7e1 10973 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
d67b4f80 10974 plist[4] = intern_c_string (":coding-type");
2c78b7e1 10975 plist[5] = args[coding_arg_coding_type] = Qraw_text;
d67b4f80 10976 plist[6] = intern_c_string (":ascii-compatible-p");
2c78b7e1 10977 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
d67b4f80 10978 plist[8] = intern_c_string (":default-char");
2c78b7e1 10979 plist[9] = args[coding_arg_default_char] = make_number (0);
d67b4f80 10980 plist[10] = intern_c_string (":for-unibyte");
8f924df7 10981 plist[11] = args[coding_arg_for_unibyte] = Qt;
d67b4f80
DN
10982 plist[12] = intern_c_string (":docstring");
10983 plist[13] = make_pure_c_string ("Do no conversion.\n\
2c78b7e1
KH
10984\n\
10985When you visit a file with this coding, the file is read into a\n\
10986unibyte buffer as is, thus each byte of a file is treated as a\n\
10987character.");
d67b4f80 10988 plist[14] = intern_c_string (":eol-type");
8f924df7
KH
10989 plist[15] = args[coding_arg_eol_type] = Qunix;
10990 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10991 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10992
10993 plist[1] = args[coding_arg_name] = Qundecided;
10994 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10995 plist[5] = args[coding_arg_coding_type] = Qundecided;
10996 /* This is already set.
35befdaa 10997 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
d67b4f80 10998 plist[8] = intern_c_string (":charset-list");
ae6f73fa
KH
10999 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
11000 plist[11] = args[coding_arg_for_unibyte] = Qnil;
d67b4f80 11001 plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
ae6f73fa
KH
11002 plist[15] = args[coding_arg_eol_type] = Qnil;
11003 args[coding_arg_plist] = Flist (16, plist);
11004 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
11005 }
11006
2c78b7e1 11007 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
11008
11009 {
11010 int i;
11011
11012 for (i = 0; i < coding_category_max; i++)
11013 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
11014 }
fcbcfb64
KH
11015#if defined (MSDOS) || defined (WINDOWSNT)
11016 system_eol_type = Qdos;
11017#else
11018 system_eol_type = Qunix;
11019#endif
11020 staticpro (&system_eol_type);
4ed46869
KH
11021}
11022
68c45bf0
PE
11023char *
11024emacs_strerror (error_number)
11025 int error_number;
11026{
11027 char *str;
11028
ca9c0567 11029 synchronize_system_messages_locale ();
68c45bf0
PE
11030 str = strerror (error_number);
11031
11032 if (! NILP (Vlocale_coding_system))
11033 {
11034 Lisp_Object dec = code_convert_string_norecord (build_string (str),
11035 Vlocale_coding_system,
11036 0);
d5db4077 11037 str = (char *) SDATA (dec);
68c45bf0
PE
11038 }
11039
11040 return str;
11041}
11042
4ed46869 11043#endif /* emacs */
9ffd559c
KH
11044
11045/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
11046 (do not change this comment) */