* nsterm.m (ns_set_vertical_scroll_bar, ns_redeem_scroll_bar)
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
acaf905b 2 Copyright (C) 2001-2012 Free Software Foundation, Inc.
7976eda0 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8f924df7 7 Copyright (C) 2003
df7492f9
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
9ec0b715 13GNU Emacs is free software: you can redistribute it and/or modify
369314dc 14it under the terms of the GNU General Public License as published by
9ec0b715
GM
15the Free Software Foundation, either version 3 of the License, or
16(at your option) any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc 23You should have received a copy of the GNU General Public License
9ec0b715 24along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
34809aa6
EZ
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. On
59 the C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
cf84bb53
JB
156detect_coding_XXX (struct coding_system *coding,
157 struct coding_detection_info *detect_info)
4ed46869 158{
f1d34bca
MB
159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 161 int multibytep = coding->src_multibyte;
d311d28c 162 ptrdiff_t consumed_chars = 0;
df7492f9
KH
163 int found = 0;
164 ...;
165
166 while (1)
167 {
ad1746f5 168 /* Get one byte from the source. If the source is exhausted, jump
df7492f9
KH
169 to no_more_source:. */
170 ONE_MORE_BYTE (c);
ff0dacd7
KH
171
172 if (! __C_conforms_to_XXX___ (c))
173 break;
174 if (! __C_strongly_suggests_XXX__ (c))
175 found = CATEGORY_MASK_XXX;
df7492f9 176 }
ff0dacd7
KH
177 /* The byte sequence is invalid for XXX. */
178 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 179 return 0;
ff0dacd7 180
df7492f9 181 no_more_source:
ad1746f5 182 /* The source exhausted successfully. */
ff0dacd7 183 detect_info->found |= found;
df7492f9 184 return 1;
4ed46869
KH
185}
186#endif
187
188/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
189
df7492f9
KH
190 These functions decode a byte sequence specified as a source by
191 CODING. The resulting multibyte text goes to a place pointed to by
192 CODING->charbuf, the length of which should not exceed
193 CODING->charbuf_size;
d46c5b12 194
df7492f9
KH
195 These functions set the information of original and decoded texts in
196 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
197 They also set CODING->result to one of CODING_RESULT_XXX indicating
198 how the decoding is finished.
d46c5b12 199
df7492f9 200 Below is the template of these functions. */
d46c5b12 201
4ed46869 202#if 0
b73bfc1c 203static void
cf84bb53 204decode_coding_XXXX (struct coding_system *coding)
4ed46869 205{
f1d34bca
MB
206 const unsigned char *src = coding->source + coding->consumed;
207 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
208 /* SRC_BASE remembers the start position in source in each loop.
209 The loop will be exited when there's not enough source code, or
210 when there's no room in CHARBUF for a decoded character. */
f1d34bca 211 const unsigned char *src_base;
df7492f9 212 /* A buffer to produce decoded characters. */
69a80ea3
KH
213 int *charbuf = coding->charbuf + coding->charbuf_used;
214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
215 int multibytep = coding->src_multibyte;
216
217 while (1)
218 {
219 src_base = src;
220 if (charbuf < charbuf_end)
221 /* No more room to produce a decoded character. */
222 break;
223 ONE_MORE_BYTE (c);
224 /* Decode it. */
225 }
226
227 no_more_source:
228 if (src_base < src_end
229 && coding->mode & CODING_MODE_LAST_BLOCK)
230 /* If the source ends by partial bytes to construct a character,
231 treat them as eight-bit raw data. */
232 while (src_base < src_end && charbuf < charbuf_end)
233 *charbuf++ = *src_base++;
234 /* Remember how many bytes and characters we consumed. If the
235 source is multibyte, the bytes and chars are not identical. */
236 coding->consumed = coding->consumed_char = src_base - coding->source;
237 /* Remember how many characters we produced. */
238 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
239}
240#endif
241
242/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
243
df7492f9
KH
244 These functions encode SRC_BYTES length text at SOURCE of Emacs'
245 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
246 goes to a place pointed to by DESTINATION, the length of which
247 should not exceed DST_BYTES.
d46c5b12 248
df7492f9
KH
249 These functions set the information of original and encoded texts in
250 the members produced, produced_char, consumed, and consumed_char of
251 the structure *CODING. They also set the member result to one of
252 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 253
df7492f9
KH
254 DST_BYTES zero means that source area and destination area are
255 overlapped, which means that we can produce a encoded text until it
256 reaches at the head of not-yet-encoded source text.
d46c5b12 257
df7492f9 258 Below is a template of these functions. */
4ed46869 259#if 0
b73bfc1c 260static void
cf84bb53 261encode_coding_XXX (struct coding_system *coding)
4ed46869 262{
df7492f9
KH
263 int multibytep = coding->dst_multibyte;
264 int *charbuf = coding->charbuf;
265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
266 unsigned char *dst = coding->destination + coding->produced;
267 unsigned char *dst_end = coding->destination + coding->dst_bytes;
268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
d311d28c 269 ptrdiff_t produced_chars = 0;
df7492f9
KH
270
271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
272 {
273 int c = *charbuf;
274 /* Encode C into DST, and increment DST. */
275 }
276 label_no_more_destination:
277 /* How many chars and bytes we produced. */
278 coding->produced_char += produced_chars;
279 coding->produced = dst - coding->destination;
4ed46869
KH
280}
281#endif
282
4ed46869
KH
283\f
284/*** 1. Preamble ***/
285
68c45bf0 286#include <config.h>
4ed46869 287#include <stdio.h>
d7306fe6 288#include <setjmp.h>
4ed46869 289
4ed46869 290#include "lisp.h"
df7492f9 291#include "character.h"
e5560ff7 292#include "buffer.h"
4ed46869
KH
293#include "charset.h"
294#include "ccl.h"
df7492f9 295#include "composite.h"
4ed46869
KH
296#include "coding.h"
297#include "window.h"
b8299c66
KL
298#include "frame.h"
299#include "termhooks.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
955cbe7b
PE
303static Lisp_Object Qcoding_system, Qeol_type;
304static Lisp_Object Qcoding_aliases;
1965cb73 305Lisp_Object Qunix, Qdos;
4ed46869 306Lisp_Object Qbuffer_file_coding_system;
955cbe7b
PE
307static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
308static Lisp_Object Qdefault_char;
27901516 309Lisp_Object Qno_conversion, Qundecided;
955cbe7b
PE
310Lisp_Object Qcharset, Qutf_8;
311static Lisp_Object Qiso_2022;
312static Lisp_Object Qutf_16, Qshift_jis, Qbig5;
313static Lisp_Object Qbig, Qlittle;
314static Lisp_Object Qcoding_system_history;
315static Lisp_Object Qvalid_codes;
316static Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
317static Lisp_Object QCdecode_translation_table, QCencode_translation_table;
318static Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
319static Lisp_Object QCascii_compatible_p;
4ed46869 320
387f6ba5 321Lisp_Object Qcall_process, Qcall_process_region;
4ed46869 322Lisp_Object Qstart_process, Qopen_network_stream;
955cbe7b 323static Lisp_Object Qtarget_idx;
4ed46869 324
955cbe7b
PE
325static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
326static Lisp_Object Qinterrupted, Qinsufficient_memory;
065e3595 327
44e8490d
KH
328/* If a symbol has this property, evaluate the value to define the
329 symbol as a coding system. */
330static Lisp_Object Qcoding_system_define_form;
331
fcbcfb64
KH
332/* Format of end-of-line decided by system. This is Qunix on
333 Unix and Mac, Qdos on DOS/Windows.
334 This has an effect only for external encoding (i.e. for output to
335 file and process), not for in-buffer or Lisp string encoding. */
336static Lisp_Object system_eol_type;
337
4ed46869
KH
338#ifdef emacs
339
4608c386 340Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 341
d46c5b12
KH
342/* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 345Lisp_Object Qutf_8_emacs;
ecf488bc 346
4ed46869
KH
347/* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
c4825358
KH
349/* Coding system to be used to encode text for terminal display when
350 terminal coding system is nil. */
351struct coding_system safe_terminal_coding;
352
4ed46869
KH
353#endif /* emacs */
354
f967223b
KH
355Lisp_Object Qtranslation_table;
356Lisp_Object Qtranslation_table_id;
955cbe7b
PE
357static Lisp_Object Qtranslation_table_for_decode;
358static Lisp_Object Qtranslation_table_for_encode;
4ed46869 359
df7492f9 360/* Two special coding systems. */
74ab6df5
PE
361static Lisp_Object Vsjis_coding_system;
362static Lisp_Object Vbig5_coding_system;
df7492f9 363
df7492f9
KH
364/* ISO2022 section */
365
366#define CODING_ISO_INITIAL(coding, reg) \
367 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
368 coding_attr_iso_initial), \
369 reg)))
370
371
1b3b981b
AS
372#define CODING_ISO_REQUEST(coding, charset_id) \
373 (((charset_id) <= (coding)->max_charset_id \
374 ? ((coding)->safe_charsets[charset_id] != 255 \
375 ? (coding)->safe_charsets[charset_id] \
376 : -1) \
df7492f9
KH
377 : -1))
378
379
380#define CODING_ISO_FLAGS(coding) \
381 ((coding)->spec.iso_2022.flags)
382#define CODING_ISO_DESIGNATION(coding, reg) \
383 ((coding)->spec.iso_2022.current_designation[reg])
384#define CODING_ISO_INVOCATION(coding, plane) \
385 ((coding)->spec.iso_2022.current_invocation[plane])
386#define CODING_ISO_SINGLE_SHIFTING(coding) \
387 ((coding)->spec.iso_2022.single_shifting)
388#define CODING_ISO_BOL(coding) \
389 ((coding)->spec.iso_2022.bol)
390#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
391 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
e951386e
KH
392#define CODING_ISO_CMP_STATUS(coding) \
393 (&(coding)->spec.iso_2022.cmp_status)
394#define CODING_ISO_EXTSEGMENT_LEN(coding) \
395 ((coding)->spec.iso_2022.ctext_extended_segment_len)
396#define CODING_ISO_EMBEDDED_UTF_8(coding) \
397 ((coding)->spec.iso_2022.embedded_utf_8)
df7492f9
KH
398
399/* Control characters of ISO2022. */
400 /* code */ /* function */
df7492f9
KH
401#define ISO_CODE_SO 0x0E /* shift-out */
402#define ISO_CODE_SI 0x0F /* shift-in */
403#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
404#define ISO_CODE_ESC 0x1B /* escape */
405#define ISO_CODE_SS2 0x8E /* single-shift-2 */
406#define ISO_CODE_SS3 0x8F /* single-shift-3 */
407#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
408
409/* All code (1-byte) of ISO2022 is classified into one of the
410 followings. */
411enum iso_code_class_type
412 {
413 ISO_control_0, /* Control codes in the range
414 0x00..0x1F and 0x7F, except for the
415 following 5 codes. */
df7492f9
KH
416 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
417 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
418 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
419 ISO_escape, /* ISO_CODE_SO (0x1B) */
420 ISO_control_1, /* Control codes in the range
421 0x80..0x9F, except for the
422 following 3 codes. */
423 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
424 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
425 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
426 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
427 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
428 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
429 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
430 };
05e6f5dc 431
df7492f9
KH
432/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
433 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 434
df7492f9
KH
435/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
436 instead of the correct short-form sequence (e.g. ESC $ A). */
437#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 438
df7492f9
KH
439/* If set, reset graphic planes and registers at end-of-line to the
440 initial state. */
441#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 442
df7492f9
KH
443/* If set, reset graphic planes and registers before any control
444 characters to the initial state. */
445#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 446
df7492f9
KH
447/* If set, encode by 7-bit environment. */
448#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 449
df7492f9
KH
450/* If set, use locking-shift function. */
451#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 452
df7492f9
KH
453/* If set, use single-shift function. Overwrite
454 CODING_ISO_FLAG_LOCKING_SHIFT. */
455#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 456
df7492f9
KH
457/* If set, use designation escape sequence. */
458#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 459
df7492f9
KH
460/* If set, produce revision number sequence. */
461#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 462
df7492f9
KH
463/* If set, produce ISO6429's direction specifying sequence. */
464#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 465
df7492f9
KH
466/* If set, assume designation states are reset at beginning of line on
467 output. */
468#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 469
df7492f9
KH
470/* If set, designation sequence should be placed at beginning of line
471 on output. */
472#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 473
ad1746f5 474/* If set, do not encode unsafe characters on output. */
df7492f9 475#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 476
df7492f9
KH
477/* If set, extra latin codes (128..159) are accepted as a valid code
478 on input. */
479#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 480
df7492f9 481#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 482
5f58e762 483/* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
aa72b389 484
bf16eb23 485#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 486
bf16eb23 487#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 488
bf16eb23 489#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 490
df7492f9
KH
491/* A character to be produced on output if encoding of the original
492 character is prohibited by CODING_ISO_FLAG_SAFE. */
493#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 494
a470d443
KH
495/* UTF-8 section */
496#define CODING_UTF_8_BOM(coding) \
497 ((coding)->spec.utf_8_bom)
4ed46869 498
df7492f9
KH
499/* UTF-16 section */
500#define CODING_UTF_16_BOM(coding) \
501 ((coding)->spec.utf_16.bom)
4ed46869 502
df7492f9
KH
503#define CODING_UTF_16_ENDIAN(coding) \
504 ((coding)->spec.utf_16.endian)
4ed46869 505
df7492f9
KH
506#define CODING_UTF_16_SURROGATE(coding) \
507 ((coding)->spec.utf_16.surrogate)
4ed46869 508
4ed46869 509
df7492f9
KH
510/* CCL section */
511#define CODING_CCL_DECODER(coding) \
512 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
513#define CODING_CCL_ENCODER(coding) \
514 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
515#define CODING_CCL_VALIDS(coding) \
8f924df7 516 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 517
5a936b46 518/* Index for each coding category in `coding_categories' */
4ed46869 519
df7492f9
KH
520enum coding_category
521 {
522 coding_category_iso_7,
523 coding_category_iso_7_tight,
524 coding_category_iso_8_1,
525 coding_category_iso_8_2,
526 coding_category_iso_7_else,
527 coding_category_iso_8_else,
a470d443
KH
528 coding_category_utf_8_auto,
529 coding_category_utf_8_nosig,
530 coding_category_utf_8_sig,
df7492f9
KH
531 coding_category_utf_16_auto,
532 coding_category_utf_16_be,
533 coding_category_utf_16_le,
534 coding_category_utf_16_be_nosig,
535 coding_category_utf_16_le_nosig,
536 coding_category_charset,
537 coding_category_sjis,
538 coding_category_big5,
539 coding_category_ccl,
540 coding_category_emacs_mule,
541 /* All above are targets of code detection. */
542 coding_category_raw_text,
543 coding_category_undecided,
544 coding_category_max
545 };
546
547/* Definitions of flag bits used in detect_coding_XXXX. */
548#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
549#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
550#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
551#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
552#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
553#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
554#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
555#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
556#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 557#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
558#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
559#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
560#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
561#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
562#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
563#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
564#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
565#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
566#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 567#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
568
569/* This value is returned if detect_coding_mask () find nothing other
570 than ASCII characters. */
571#define CATEGORY_MASK_ANY \
572 (CATEGORY_MASK_ISO_7 \
573 | CATEGORY_MASK_ISO_7_TIGHT \
574 | CATEGORY_MASK_ISO_8_1 \
575 | CATEGORY_MASK_ISO_8_2 \
576 | CATEGORY_MASK_ISO_7_ELSE \
577 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
578 | CATEGORY_MASK_UTF_8_AUTO \
579 | CATEGORY_MASK_UTF_8_NOSIG \
580 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 581 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
582 | CATEGORY_MASK_UTF_16_BE \
583 | CATEGORY_MASK_UTF_16_LE \
584 | CATEGORY_MASK_UTF_16_BE_NOSIG \
585 | CATEGORY_MASK_UTF_16_LE_NOSIG \
586 | CATEGORY_MASK_CHARSET \
587 | CATEGORY_MASK_SJIS \
588 | CATEGORY_MASK_BIG5 \
589 | CATEGORY_MASK_CCL \
590 | CATEGORY_MASK_EMACS_MULE)
591
592
593#define CATEGORY_MASK_ISO_7BIT \
594 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
595
596#define CATEGORY_MASK_ISO_8BIT \
597 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
598
599#define CATEGORY_MASK_ISO_ELSE \
600 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
601
602#define CATEGORY_MASK_ISO_ESCAPE \
603 (CATEGORY_MASK_ISO_7 \
604 | CATEGORY_MASK_ISO_7_TIGHT \
605 | CATEGORY_MASK_ISO_7_ELSE \
606 | CATEGORY_MASK_ISO_8_ELSE)
607
608#define CATEGORY_MASK_ISO \
609 ( CATEGORY_MASK_ISO_7BIT \
610 | CATEGORY_MASK_ISO_8BIT \
611 | CATEGORY_MASK_ISO_ELSE)
612
613#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
614 (CATEGORY_MASK_UTF_16_AUTO \
615 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
616 | CATEGORY_MASK_UTF_16_LE \
617 | CATEGORY_MASK_UTF_16_BE_NOSIG \
618 | CATEGORY_MASK_UTF_16_LE_NOSIG)
619
a470d443
KH
620#define CATEGORY_MASK_UTF_8 \
621 (CATEGORY_MASK_UTF_8_AUTO \
622 | CATEGORY_MASK_UTF_8_NOSIG \
623 | CATEGORY_MASK_UTF_8_SIG)
df7492f9 624
df7492f9 625/* Table of coding categories (Lisp symbols). This variable is for
ad1746f5 626 internal use only. */
df7492f9
KH
627static Lisp_Object Vcoding_category_table;
628
629/* Table of coding-categories ordered by priority. */
630static enum coding_category coding_priorities[coding_category_max];
631
632/* Nth element is a coding context for the coding system bound to the
633 Nth coding category. */
634static struct coding_system coding_categories[coding_category_max];
635
df7492f9
KH
636/*** Commonly used macros and functions ***/
637
638#ifndef min
639#define min(a, b) ((a) < (b) ? (a) : (b))
640#endif
641#ifndef max
642#define max(a, b) ((a) > (b) ? (a) : (b))
643#endif
4ed46869 644
24a73b0a
KH
645#define CODING_GET_INFO(coding, attrs, charset_list) \
646 do { \
647 (attrs) = CODING_ID_ATTRS ((coding)->id); \
648 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 649 } while (0)
4ed46869 650
4ed46869 651
df7492f9
KH
652/* Safely get one byte from the source text pointed by SRC which ends
653 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
654 in the source, it jumps to `no_more_source'. If multibytep is
655 nonzero, and a multibyte character is found at SRC, set C to the
656 negative value of the character code. The caller should declare
657 and set these variables appropriately in advance:
658 src, src_end, multibytep */
aa72b389 659
065e3595
KH
660#define ONE_MORE_BYTE(c) \
661 do { \
662 if (src == src_end) \
663 { \
664 if (src_base < src) \
665 record_conversion_result \
666 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
667 goto no_more_source; \
668 } \
669 c = *src++; \
670 if (multibytep && (c & 0x80)) \
671 { \
672 if ((c & 0xFE) == 0xC0) \
673 c = ((c & 1) << 6) | *src++; \
674 else \
675 { \
35befdaa
KH
676 src--; \
677 c = - string_char (src, &src, NULL); \
065e3595
KH
678 record_conversion_result \
679 (coding, CODING_RESULT_INVALID_SRC); \
680 } \
681 } \
682 consumed_chars++; \
aa72b389
KH
683 } while (0)
684
f56a4450 685/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
686 at SRC_END, and set C1 and C2 to those bytes while skipping the
687 heading multibyte characters. If there are not enough bytes in the
688 source, it jumps to `no_more_source'. If multibytep is nonzero and
689 a multibyte character is found for C2, set C2 to the negative value
690 of the character code. The caller should declare and set these
691 variables appropriately in advance:
f56a4450
KH
692 src, src_end, multibytep
693 It is intended that this macro is used in detect_coding_utf_16. */
694
220eeac9
KH
695#define TWO_MORE_BYTES(c1, c2) \
696 do { \
697 do { \
698 if (src == src_end) \
699 goto no_more_source; \
700 c1 = *src++; \
701 if (multibytep && (c1 & 0x80)) \
702 { \
703 if ((c1 & 0xFE) == 0xC0) \
704 c1 = ((c1 & 1) << 6) | *src++; \
705 else \
706 { \
707 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
708 c1 = -1; \
709 } \
710 } \
711 } while (c1 < 0); \
712 if (src == src_end) \
713 goto no_more_source; \
714 c2 = *src++; \
715 if (multibytep && (c2 & 0x80)) \
716 { \
717 if ((c2 & 0xFE) == 0xC0) \
718 c2 = ((c2 & 1) << 6) | *src++; \
719 else \
720 c2 = -1; \
721 } \
f56a4450
KH
722 } while (0)
723
aa72b389 724
df7492f9
KH
725/* Store a byte C in the place pointed by DST and increment DST to the
726 next free point, and increment PRODUCED_CHARS. The caller should
727 assure that C is 0..127, and declare and set the variable `dst'
728 appropriately in advance.
729*/
aa72b389
KH
730
731
df7492f9
KH
732#define EMIT_ONE_ASCII_BYTE(c) \
733 do { \
734 produced_chars++; \
735 *dst++ = (c); \
b6871cc7 736 } while (0)
aa72b389
KH
737
738
ad1746f5 739/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
aa72b389 740
df7492f9
KH
741#define EMIT_TWO_ASCII_BYTES(c1, c2) \
742 do { \
743 produced_chars += 2; \
744 *dst++ = (c1), *dst++ = (c2); \
745 } while (0)
aa72b389
KH
746
747
df7492f9
KH
748/* Store a byte C in the place pointed by DST and increment DST to the
749 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
750 nonzero, store in an appropriate multibyte from. The caller should
751 declare and set the variables `dst' and `multibytep' appropriately
752 in advance. */
753
754#define EMIT_ONE_BYTE(c) \
755 do { \
756 produced_chars++; \
757 if (multibytep) \
758 { \
b25d760e 759 unsigned ch = (c); \
df7492f9
KH
760 if (ch >= 0x80) \
761 ch = BYTE8_TO_CHAR (ch); \
762 CHAR_STRING_ADVANCE (ch, dst); \
763 } \
764 else \
765 *dst++ = (c); \
aa72b389 766 } while (0)
aa72b389 767
aa72b389 768
df7492f9 769/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 770
e19c3639
KH
771#define EMIT_TWO_BYTES(c1, c2) \
772 do { \
773 produced_chars += 2; \
774 if (multibytep) \
775 { \
b25d760e 776 unsigned ch; \
e19c3639
KH
777 \
778 ch = (c1); \
779 if (ch >= 0x80) \
780 ch = BYTE8_TO_CHAR (ch); \
781 CHAR_STRING_ADVANCE (ch, dst); \
782 ch = (c2); \
783 if (ch >= 0x80) \
784 ch = BYTE8_TO_CHAR (ch); \
785 CHAR_STRING_ADVANCE (ch, dst); \
786 } \
787 else \
788 { \
789 *dst++ = (c1); \
790 *dst++ = (c2); \
791 } \
aa72b389
KH
792 } while (0)
793
794
df7492f9
KH
795#define EMIT_THREE_BYTES(c1, c2, c3) \
796 do { \
797 EMIT_ONE_BYTE (c1); \
798 EMIT_TWO_BYTES (c2, c3); \
799 } while (0)
aa72b389 800
aa72b389 801
df7492f9
KH
802#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
803 do { \
804 EMIT_TWO_BYTES (c1, c2); \
805 EMIT_TWO_BYTES (c3, c4); \
806 } while (0)
aa72b389 807
aa72b389 808
f6cbaf43 809/* Prototypes for static functions. */
f57e2426
J
810static void record_conversion_result (struct coding_system *coding,
811 enum coding_result_code result);
812static int detect_coding_utf_8 (struct coding_system *,
813 struct coding_detection_info *info);
814static void decode_coding_utf_8 (struct coding_system *);
815static int encode_coding_utf_8 (struct coding_system *);
816
817static int detect_coding_utf_16 (struct coding_system *,
818 struct coding_detection_info *info);
819static void decode_coding_utf_16 (struct coding_system *);
820static int encode_coding_utf_16 (struct coding_system *);
821
822static int detect_coding_iso_2022 (struct coding_system *,
823 struct coding_detection_info *info);
824static void decode_coding_iso_2022 (struct coding_system *);
825static int encode_coding_iso_2022 (struct coding_system *);
826
827static int detect_coding_emacs_mule (struct coding_system *,
828 struct coding_detection_info *info);
829static void decode_coding_emacs_mule (struct coding_system *);
830static int encode_coding_emacs_mule (struct coding_system *);
831
832static int detect_coding_sjis (struct coding_system *,
833 struct coding_detection_info *info);
834static void decode_coding_sjis (struct coding_system *);
835static int encode_coding_sjis (struct coding_system *);
836
837static int detect_coding_big5 (struct coding_system *,
838 struct coding_detection_info *info);
839static void decode_coding_big5 (struct coding_system *);
840static int encode_coding_big5 (struct coding_system *);
841
842static int detect_coding_ccl (struct coding_system *,
843 struct coding_detection_info *info);
844static void decode_coding_ccl (struct coding_system *);
845static int encode_coding_ccl (struct coding_system *);
846
847static void decode_coding_raw_text (struct coding_system *);
848static int encode_coding_raw_text (struct coding_system *);
849
c1892f11
PE
850static void coding_set_source (struct coding_system *);
851static ptrdiff_t coding_change_source (struct coding_system *);
852static void coding_set_destination (struct coding_system *);
853static ptrdiff_t coding_change_destination (struct coding_system *);
d311d28c 854static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t);
f57e2426 855static void coding_alloc_by_making_gap (struct coding_system *,
d311d28c 856 ptrdiff_t, ptrdiff_t);
f57e2426 857static unsigned char *alloc_destination (struct coding_system *,
d311d28c 858 ptrdiff_t, unsigned char *);
f57e2426 859static void setup_iso_safe_charsets (Lisp_Object);
6e6c82a4 860static ptrdiff_t encode_designation_at_bol (struct coding_system *,
5eb05ea3 861 int *, int *, unsigned char *);
f57e2426 862static int detect_eol (const unsigned char *,
d311d28c 863 ptrdiff_t, enum coding_category);
f57e2426
J
864static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
865static void decode_eol (struct coding_system *);
866static Lisp_Object get_translation_table (Lisp_Object, int, int *);
867static Lisp_Object get_translation (Lisp_Object, int *, int *);
868static int produce_chars (struct coding_system *, Lisp_Object, int);
55d4c1b2 869static inline void produce_charset (struct coding_system *, int *,
d311d28c
PE
870 ptrdiff_t);
871static void produce_annotation (struct coding_system *, ptrdiff_t);
f57e2426 872static int decode_coding (struct coding_system *);
d311d28c 873static inline int *handle_composition_annotation (ptrdiff_t, ptrdiff_t,
f57e2426 874 struct coding_system *,
d311d28c
PE
875 int *, ptrdiff_t *);
876static inline int *handle_charset_annotation (ptrdiff_t, ptrdiff_t,
f57e2426 877 struct coding_system *,
d311d28c 878 int *, ptrdiff_t *);
f57e2426
J
879static void consume_chars (struct coding_system *, Lisp_Object, int);
880static int encode_coding (struct coding_system *);
881static Lisp_Object make_conversion_work_buffer (int);
882static Lisp_Object code_conversion_restore (Lisp_Object);
55d4c1b2 883static inline int char_encodable_p (int, Lisp_Object);
f57e2426 884static Lisp_Object make_subsidiaries (Lisp_Object);
f6cbaf43 885
065e3595
KH
886static void
887record_conversion_result (struct coding_system *coding,
888 enum coding_result_code result)
889{
890 coding->result = result;
891 switch (result)
892 {
893 case CODING_RESULT_INSUFFICIENT_SRC:
894 Vlast_code_conversion_error = Qinsufficient_source;
895 break;
896 case CODING_RESULT_INCONSISTENT_EOL:
897 Vlast_code_conversion_error = Qinconsistent_eol;
898 break;
899 case CODING_RESULT_INVALID_SRC:
900 Vlast_code_conversion_error = Qinvalid_source;
901 break;
902 case CODING_RESULT_INTERRUPT:
903 Vlast_code_conversion_error = Qinterrupted;
904 break;
905 case CODING_RESULT_INSUFFICIENT_MEM:
906 Vlast_code_conversion_error = Qinsufficient_memory;
907 break;
ebaf11b6
KH
908 case CODING_RESULT_INSUFFICIENT_DST:
909 /* Don't record this error in Vlast_code_conversion_error
910 because it happens just temporarily and is resolved when the
911 whole conversion is finished. */
912 break;
409ea3a1
AS
913 case CODING_RESULT_SUCCESS:
914 break;
35befdaa
KH
915 default:
916 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
917 }
918}
919
5eb05ea3
KH
920/* These wrapper macros are used to preserve validity of pointers into
921 buffer text across calls to decode_char, encode_char, etc, which
922 could cause relocation of buffers if it loads a charset map,
923 because loading a charset map allocates large structures. */
924
df7492f9
KH
925#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
926 do { \
8f50130c 927 ptrdiff_t offset; \
5eb05ea3 928 \
df7492f9
KH
929 charset_map_loaded = 0; \
930 c = DECODE_CHAR (charset, code); \
5eb05ea3 931 if (charset_map_loaded \
c1892f11 932 && (offset = coding_change_source (coding))) \
df7492f9 933 { \
df7492f9
KH
934 src += offset; \
935 src_base += offset; \
936 src_end += offset; \
937 } \
aa72b389
KH
938 } while (0)
939
5eb05ea3
KH
940#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \
941 do { \
8f50130c 942 ptrdiff_t offset; \
5eb05ea3
KH
943 \
944 charset_map_loaded = 0; \
945 code = ENCODE_CHAR (charset, c); \
946 if (charset_map_loaded \
c1892f11 947 && (offset = coding_change_destination (coding))) \
5eb05ea3
KH
948 { \
949 dst += offset; \
950 dst_end += offset; \
951 } \
952 } while (0)
953
954#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
955 do { \
8f50130c 956 ptrdiff_t offset; \
5eb05ea3
KH
957 \
958 charset_map_loaded = 0; \
959 charset = char_charset (c, charset_list, code_return); \
960 if (charset_map_loaded \
c1892f11 961 && (offset = coding_change_destination (coding))) \
5eb05ea3
KH
962 { \
963 dst += offset; \
964 dst_end += offset; \
965 } \
966 } while (0)
967
968#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
969 do { \
8f50130c 970 ptrdiff_t offset; \
5eb05ea3
KH
971 \
972 charset_map_loaded = 0; \
973 result = CHAR_CHARSET_P (c, charset); \
974 if (charset_map_loaded \
c1892f11 975 && (offset = coding_change_destination (coding))) \
5eb05ea3
KH
976 { \
977 dst += offset; \
978 dst_end += offset; \
979 } \
980 } while (0)
981
aa72b389 982
119852e7
KH
983/* If there are at least BYTES length of room at dst, allocate memory
984 for coding->destination and update dst and dst_end. We don't have
985 to take care of coding->source which will be relocated. It is
986 handled by calling coding_set_source in encode_coding. */
987
df7492f9
KH
988#define ASSURE_DESTINATION(bytes) \
989 do { \
990 if (dst + (bytes) >= dst_end) \
991 { \
d311d28c 992 ptrdiff_t more_bytes = charbuf_end - charbuf + (bytes); \
df7492f9
KH
993 \
994 dst = alloc_destination (coding, more_bytes, dst); \
995 dst_end = coding->destination + coding->dst_bytes; \
996 } \
997 } while (0)
aa72b389 998
aa72b389 999
db274c7a
KH
1000/* Store multibyte form of the character C in P, and advance P to the
1001 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
1002 never calls MAYBE_UNIFY_CHAR. */
1003
1004#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1005 do { \
1006 if ((c) <= MAX_1_BYTE_CHAR) \
1007 *(p)++ = (c); \
1008 else if ((c) <= MAX_2_BYTE_CHAR) \
1009 *(p)++ = (0xC0 | ((c) >> 6)), \
1010 *(p)++ = (0x80 | ((c) & 0x3F)); \
1011 else if ((c) <= MAX_3_BYTE_CHAR) \
1012 *(p)++ = (0xE0 | ((c) >> 12)), \
1013 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1014 *(p)++ = (0x80 | ((c) & 0x3F)); \
1015 else if ((c) <= MAX_4_BYTE_CHAR) \
1016 *(p)++ = (0xF0 | (c >> 18)), \
1017 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1018 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1019 *(p)++ = (0x80 | (c & 0x3F)); \
1020 else if ((c) <= MAX_5_BYTE_CHAR) \
1021 *(p)++ = 0xF8, \
1022 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1023 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1024 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1025 *(p)++ = (0x80 | (c & 0x3F)); \
1026 else \
1027 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1028 } while (0)
1029
1030
1031/* Return the character code of character whose multibyte form is at
1032 P, and advance P to the end of the multibyte form. This is like
1033 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1034
1035#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1036 (!((p)[0] & 0x80) \
1037 ? *(p)++ \
1038 : ! ((p)[0] & 0x20) \
1039 ? ((p) += 2, \
1040 ((((p)[-2] & 0x1F) << 6) \
1041 | ((p)[-1] & 0x3F) \
1042 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1043 : ! ((p)[0] & 0x10) \
1044 ? ((p) += 3, \
1045 ((((p)[-3] & 0x0F) << 12) \
1046 | (((p)[-2] & 0x3F) << 6) \
1047 | ((p)[-1] & 0x3F))) \
1048 : ! ((p)[0] & 0x08) \
1049 ? ((p) += 4, \
1050 ((((p)[-4] & 0xF) << 18) \
1051 | (((p)[-3] & 0x3F) << 12) \
1052 | (((p)[-2] & 0x3F) << 6) \
1053 | ((p)[-1] & 0x3F))) \
1054 : ((p) += 5, \
1055 ((((p)[-4] & 0x3F) << 18) \
1056 | (((p)[-3] & 0x3F) << 12) \
1057 | (((p)[-2] & 0x3F) << 6) \
1058 | ((p)[-1] & 0x3F))))
1059
aa72b389 1060
c1892f11 1061/* Set coding->source from coding->src_object. */
5eb05ea3 1062
c1892f11 1063static void
971de7fb 1064coding_set_source (struct coding_system *coding)
aa72b389 1065{
df7492f9
KH
1066 if (BUFFERP (coding->src_object))
1067 {
2cb26057 1068 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1069
df7492f9 1070 if (coding->src_pos < 0)
2cb26057 1071 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1072 else
2cb26057 1073 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1074 }
df7492f9 1075 else if (STRINGP (coding->src_object))
aa72b389 1076 {
8f924df7 1077 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1078 }
df7492f9 1079 else
f38b440c
PE
1080 {
1081 /* Otherwise, the source is C string and is never relocated
1082 automatically. Thus we don't have to update anything. */
1083 }
df7492f9 1084}
aa72b389 1085
5eb05ea3 1086
c1892f11
PE
1087/* Set coding->source from coding->src_object, and return how many
1088 bytes coding->source was changed. */
5eb05ea3 1089
8f50130c 1090static ptrdiff_t
c1892f11 1091coding_change_source (struct coding_system *coding)
df7492f9 1092{
c1892f11
PE
1093 const unsigned char *orig = coding->source;
1094 coding_set_source (coding);
1095 return coding->source - orig;
1096}
1097
5eb05ea3 1098
c1892f11
PE
1099/* Set coding->destination from coding->dst_object. */
1100
1101static void
1102coding_set_destination (struct coding_system *coding)
1103{
df7492f9 1104 if (BUFFERP (coding->dst_object))
aa72b389 1105 {
a0241d01 1106 if (BUFFERP (coding->src_object) && coding->src_pos < 0)
aa72b389 1107 {
13818c30 1108 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1109 coding->dst_bytes = (GAP_END_ADDR
1110 - (coding->src_bytes - coding->consumed)
1111 - coding->destination);
aa72b389 1112 }
df7492f9 1113 else
28f67a95
KH
1114 {
1115 /* We are sure that coding->dst_pos_byte is before the gap
1116 of the buffer. */
1117 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1118 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1119 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1120 - coding->destination);
1121 }
df7492f9
KH
1122 }
1123 else
f38b440c
PE
1124 {
1125 /* Otherwise, the destination is C string and is never relocated
1126 automatically. Thus we don't have to update anything. */
1127 }
c1892f11
PE
1128}
1129
1130
1131/* Set coding->destination from coding->dst_object, and return how
1132 many bytes coding->destination was changed. */
1133
1134static ptrdiff_t
1135coding_change_destination (struct coding_system *coding)
1136{
1137 const unsigned char *orig = coding->destination;
1138 coding_set_destination (coding);
5eb05ea3 1139 return coding->destination - orig;
df7492f9
KH
1140}
1141
1142
1143static void
d311d28c 1144coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes)
df7492f9 1145{
c9d624c6 1146 if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
d1f3d2af 1147 string_overflow ();
38182d90
PE
1148 coding->destination = xrealloc (coding->destination,
1149 coding->dst_bytes + bytes);
df7492f9
KH
1150 coding->dst_bytes += bytes;
1151}
1152
1153static void
cf84bb53 1154coding_alloc_by_making_gap (struct coding_system *coding,
d311d28c 1155 ptrdiff_t gap_head_used, ptrdiff_t bytes)
df7492f9 1156{
db274c7a 1157 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1158 {
db274c7a
KH
1159 /* The gap may contain the produced data at the head and not-yet
1160 consumed data at the tail. To preserve those data, we at
1161 first make the gap size to zero, then increase the gap
1162 size. */
d311d28c 1163 ptrdiff_t add = GAP_SIZE;
db274c7a
KH
1164
1165 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1166 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1167 make_gap (bytes);
1168 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1169 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1170 }
730fff51 1171 else
df7492f9 1172 {
2c78b7e1
KH
1173 Lisp_Object this_buffer;
1174
1175 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1176 set_buffer_internal (XBUFFER (coding->dst_object));
1177 make_gap (bytes);
1178 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1179 }
df7492f9 1180}
8f924df7 1181
df7492f9
KH
1182
1183static unsigned char *
d311d28c 1184alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
cf84bb53 1185 unsigned char *dst)
df7492f9 1186{
d311d28c 1187 ptrdiff_t offset = dst - coding->destination;
df7492f9
KH
1188
1189 if (BUFFERP (coding->dst_object))
db274c7a
KH
1190 {
1191 struct buffer *buf = XBUFFER (coding->dst_object);
1192
1193 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1194 }
aa72b389 1195 else
df7492f9 1196 coding_alloc_by_realloc (coding, nbytes);
df7492f9
KH
1197 coding_set_destination (coding);
1198 dst = coding->destination + offset;
1199 return dst;
1200}
aa72b389 1201
ff0dacd7
KH
1202/** Macros for annotations. */
1203
ff0dacd7
KH
1204/* An annotation data is stored in the array coding->charbuf in this
1205 format:
69a80ea3 1206 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1207 LENGTH is the number of elements in the annotation.
1208 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1209 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1210
1211 The format of the following elements depend on ANNOTATION_MASK.
1212
1213 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1214 follows:
e951386e
KH
1215 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1216
1217 NBYTES is the number of bytes specified in the header part of
1218 old-style emacs-mule encoding, or 0 for the other kind of
1219 composition.
1220
ff0dacd7 1221 METHOD is one of enum composition_method.
e951386e 1222
ad1746f5 1223 Optional COMPOSITION-COMPONENTS are characters and composition
ff0dacd7
KH
1224 rules.
1225
1226 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
e951386e
KH
1227 follows.
1228
1229 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1230 recover from an invalid annotation, and should be skipped by
1231 produce_annotation. */
1232
1233/* Maximum length of the header of annotation data. */
1234#define MAX_ANNOTATION_LENGTH 5
ff0dacd7 1235
69a80ea3 1236#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1237 do { \
1238 *(buf)++ = -(len); \
1239 *(buf)++ = (mask); \
69a80ea3 1240 *(buf)++ = (nchars); \
ff0dacd7
KH
1241 coding->annotated = 1; \
1242 } while (0);
1243
e951386e 1244#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
69a80ea3 1245 do { \
e951386e
KH
1246 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1247 *buf++ = nbytes; \
69a80ea3 1248 *buf++ = method; \
ff0dacd7
KH
1249 } while (0)
1250
1251
69a80ea3
KH
1252#define ADD_CHARSET_DATA(buf, nchars, id) \
1253 do { \
1254 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1255 *buf++ = id; \
ff0dacd7
KH
1256 } while (0)
1257
df7492f9
KH
1258\f
1259/*** 2. Emacs' internal format (emacs-utf-8) ***/
1260
1261
1262
1263\f
1264/*** 3. UTF-8 ***/
1265
1266/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1267 Check if a text is encoded in UTF-8. If it is, return 1, else
1268 return 0. */
df7492f9
KH
1269
1270#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1271#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1272#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1273#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1274#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1275#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1276
a470d443
KH
1277#define UTF_8_BOM_1 0xEF
1278#define UTF_8_BOM_2 0xBB
1279#define UTF_8_BOM_3 0xBF
1280
df7492f9 1281static int
cf84bb53
JB
1282detect_coding_utf_8 (struct coding_system *coding,
1283 struct coding_detection_info *detect_info)
df7492f9 1284{
065e3595 1285 const unsigned char *src = coding->source, *src_base;
8f924df7 1286 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 1287 int multibytep = coding->src_multibyte;
d311d28c 1288 ptrdiff_t consumed_chars = 0;
a470d443 1289 int bom_found = 0;
df7492f9
KH
1290 int found = 0;
1291
ff0dacd7 1292 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1293 /* A coding system of this category is always ASCII compatible. */
1294 src += coding->head_ascii;
1295
1296 while (1)
aa72b389 1297 {
df7492f9 1298 int c, c1, c2, c3, c4;
aa72b389 1299
065e3595 1300 src_base = src;
df7492f9 1301 ONE_MORE_BYTE (c);
065e3595 1302 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1303 continue;
1304 ONE_MORE_BYTE (c1);
065e3595 1305 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1306 break;
1307 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1308 {
a470d443 1309 found = 1;
df7492f9 1310 continue;
aa72b389 1311 }
df7492f9 1312 ONE_MORE_BYTE (c2);
065e3595 1313 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1314 break;
1315 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1316 {
a470d443
KH
1317 found = 1;
1318 if (src_base == coding->source
1319 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1320 bom_found = 1;
df7492f9 1321 continue;
aa72b389 1322 }
df7492f9 1323 ONE_MORE_BYTE (c3);
065e3595 1324 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1325 break;
1326 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1327 {
a470d443 1328 found = 1;
df7492f9
KH
1329 continue;
1330 }
1331 ONE_MORE_BYTE (c4);
065e3595 1332 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1333 break;
1334 if (UTF_8_5_OCTET_LEADING_P (c))
1335 {
a470d443 1336 found = 1;
df7492f9
KH
1337 continue;
1338 }
1339 break;
aa72b389 1340 }
ff0dacd7 1341 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1342 return 0;
aa72b389 1343
df7492f9 1344 no_more_source:
065e3595 1345 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1346 {
ff0dacd7 1347 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1348 return 0;
aa72b389 1349 }
a470d443
KH
1350 if (bom_found)
1351 {
1352 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1353 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1354 }
1355 else
1356 {
1357 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1358 if (found)
1359 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1360 }
ff0dacd7 1361 return 1;
aa72b389
KH
1362}
1363
4ed46869 1364
b73bfc1c 1365static void
971de7fb 1366decode_coding_utf_8 (struct coding_system *coding)
b73bfc1c 1367{
8f924df7
KH
1368 const unsigned char *src = coding->source + coding->consumed;
1369 const unsigned char *src_end = coding->source + coding->src_bytes;
1370 const unsigned char *src_base;
69a80ea3
KH
1371 int *charbuf = coding->charbuf + coding->charbuf_used;
1372 int *charbuf_end = coding->charbuf + coding->charbuf_size;
d311d28c 1373 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1374 int multibytep = coding->src_multibyte;
a470d443 1375 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
2735d060 1376 int eol_dos =
0a9564cb 1377 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1378 int byte_after_cr = -1;
4ed46869 1379
a470d443
KH
1380 if (bom != utf_without_bom)
1381 {
1382 int c1, c2, c3;
1383
1384 src_base = src;
1385 ONE_MORE_BYTE (c1);
1386 if (! UTF_8_3_OCTET_LEADING_P (c1))
1387 src = src_base;
1388 else
1389 {
159bd5a2 1390 ONE_MORE_BYTE (c2);
a470d443
KH
1391 if (! UTF_8_EXTRA_OCTET_P (c2))
1392 src = src_base;
1393 else
1394 {
159bd5a2 1395 ONE_MORE_BYTE (c3);
a470d443
KH
1396 if (! UTF_8_EXTRA_OCTET_P (c3))
1397 src = src_base;
1398 else
1399 {
1400 if ((c1 != UTF_8_BOM_1)
1401 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1402 src = src_base;
1403 else
1404 CODING_UTF_8_BOM (coding) = utf_without_bom;
1405 }
1406 }
1407 }
1408 }
1409 CODING_UTF_8_BOM (coding) = utf_without_bom;
1410
df7492f9 1411 while (1)
b73bfc1c 1412 {
df7492f9 1413 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1414
df7492f9
KH
1415 src_base = src;
1416 consumed_chars_base = consumed_chars;
4af310db 1417
df7492f9 1418 if (charbuf >= charbuf_end)
b71f6f73
KH
1419 {
1420 if (byte_after_cr >= 0)
1421 src_base--;
1422 break;
1423 }
df7492f9 1424
119852e7
KH
1425 if (byte_after_cr >= 0)
1426 c1 = byte_after_cr, byte_after_cr = -1;
1427 else
1428 ONE_MORE_BYTE (c1);
065e3595
KH
1429 if (c1 < 0)
1430 {
1431 c = - c1;
1432 }
1a4990fb 1433 else if (UTF_8_1_OCTET_P (c1))
df7492f9 1434 {
2735d060 1435 if (eol_dos && c1 == '\r')
119852e7 1436 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1437 c = c1;
4af310db 1438 }
df7492f9 1439 else
4af310db 1440 {
df7492f9 1441 ONE_MORE_BYTE (c2);
065e3595 1442 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1443 goto invalid_code;
1444 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1445 {
b0edb2c5
DL
1446 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1447 /* Reject overlong sequences here and below. Encoders
1448 producing them are incorrect, they can be misleading,
1449 and they mess up read/write invariance. */
1450 if (c < 128)
1451 goto invalid_code;
4af310db 1452 }
df7492f9 1453 else
aa72b389 1454 {
df7492f9 1455 ONE_MORE_BYTE (c3);
065e3595 1456 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1457 goto invalid_code;
1458 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1459 {
1460 c = (((c1 & 0xF) << 12)
1461 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1462 if (c < 0x800
1463 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1464 goto invalid_code;
1465 }
df7492f9
KH
1466 else
1467 {
1468 ONE_MORE_BYTE (c4);
065e3595 1469 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1470 goto invalid_code;
1471 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1472 {
df7492f9
KH
1473 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1474 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1475 if (c < 0x10000)
1476 goto invalid_code;
1477 }
df7492f9
KH
1478 else
1479 {
1480 ONE_MORE_BYTE (c5);
065e3595 1481 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1482 goto invalid_code;
1483 if (UTF_8_5_OCTET_LEADING_P (c1))
1484 {
1485 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1486 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1487 | (c5 & 0x3F));
b0edb2c5 1488 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1489 goto invalid_code;
1490 }
1491 else
1492 goto invalid_code;
1493 }
1494 }
aa72b389 1495 }
b73bfc1c 1496 }
df7492f9
KH
1497
1498 *charbuf++ = c;
1499 continue;
1500
1501 invalid_code:
1502 src = src_base;
1503 consumed_chars = consumed_chars_base;
1504 ONE_MORE_BYTE (c);
1505 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1506 coding->errors++;
aa72b389
KH
1507 }
1508
df7492f9
KH
1509 no_more_source:
1510 coding->consumed_char += consumed_chars_base;
1511 coding->consumed = src_base - coding->source;
1512 coding->charbuf_used = charbuf - coding->charbuf;
1513}
1514
1515
1516static int
971de7fb 1517encode_coding_utf_8 (struct coding_system *coding)
df7492f9
KH
1518{
1519 int multibytep = coding->dst_multibyte;
1520 int *charbuf = coding->charbuf;
1521 int *charbuf_end = charbuf + coding->charbuf_used;
1522 unsigned char *dst = coding->destination + coding->produced;
1523 unsigned char *dst_end = coding->destination + coding->dst_bytes;
d311d28c 1524 ptrdiff_t produced_chars = 0;
df7492f9
KH
1525 int c;
1526
a470d443
KH
1527 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1528 {
1529 ASSURE_DESTINATION (3);
1530 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1531 CODING_UTF_8_BOM (coding) = utf_without_bom;
1532 }
1533
df7492f9 1534 if (multibytep)
aa72b389 1535 {
df7492f9
KH
1536 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1537
1538 while (charbuf < charbuf_end)
b73bfc1c 1539 {
df7492f9 1540 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1541
df7492f9
KH
1542 ASSURE_DESTINATION (safe_room);
1543 c = *charbuf++;
28f67a95
KH
1544 if (CHAR_BYTE8_P (c))
1545 {
1546 c = CHAR_TO_BYTE8 (c);
1547 EMIT_ONE_BYTE (c);
1548 }
1549 else
1550 {
db274c7a 1551 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1552 for (p = str; p < pend; p++)
1553 EMIT_ONE_BYTE (*p);
1554 }
b73bfc1c 1555 }
aa72b389 1556 }
df7492f9
KH
1557 else
1558 {
1559 int safe_room = MAX_MULTIBYTE_LENGTH;
1560
1561 while (charbuf < charbuf_end)
b73bfc1c 1562 {
df7492f9
KH
1563 ASSURE_DESTINATION (safe_room);
1564 c = *charbuf++;
f03caae0
KH
1565 if (CHAR_BYTE8_P (c))
1566 *dst++ = CHAR_TO_BYTE8 (c);
1567 else
db274c7a 1568 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1569 produced_chars++;
4ed46869
KH
1570 }
1571 }
065e3595 1572 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1573 coding->produced_char += produced_chars;
1574 coding->produced = dst - coding->destination;
1575 return 0;
4ed46869
KH
1576}
1577
b73bfc1c 1578
df7492f9 1579/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1580 Check if a text is encoded in one of UTF-16 based coding systems.
1581 If it is, return 1, else return 0. */
aa72b389 1582
df7492f9
KH
1583#define UTF_16_HIGH_SURROGATE_P(val) \
1584 (((val) & 0xFC00) == 0xD800)
1585
1586#define UTF_16_LOW_SURROGATE_P(val) \
1587 (((val) & 0xFC00) == 0xDC00)
93dec019 1588
aa72b389 1589
df7492f9 1590static int
cf84bb53
JB
1591detect_coding_utf_16 (struct coding_system *coding,
1592 struct coding_detection_info *detect_info)
aa72b389 1593{
ef1b0ba7 1594 const unsigned char *src = coding->source;
8f924df7 1595 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 1596 int multibytep = coding->src_multibyte;
df7492f9 1597 int c1, c2;
aa72b389 1598
ff0dacd7 1599 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1600 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1601 && (coding->src_chars & 1))
ff0dacd7
KH
1602 {
1603 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1604 return 0;
1605 }
24a73b0a 1606
f56a4450 1607 TWO_MORE_BYTES (c1, c2);
df7492f9 1608 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1609 {
b49a1807
KH
1610 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1611 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1612 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1613 | CATEGORY_MASK_UTF_16_BE_NOSIG
1614 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1615 }
df7492f9 1616 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1617 {
b49a1807
KH
1618 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1619 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1620 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1621 | CATEGORY_MASK_UTF_16_BE_NOSIG
1622 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1623 }
220eeac9 1624 else if (c2 < 0)
f56a4450
KH
1625 {
1626 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1627 return 0;
1628 }
2f3cbb32 1629 else
24a73b0a 1630 {
2f3cbb32
KH
1631 /* We check the dispersion of Eth and Oth bytes where E is even and
1632 O is odd. If both are high, we assume binary data.*/
1633 unsigned char e[256], o[256];
1634 unsigned e_num = 1, o_num = 1;
1635
1636 memset (e, 0, 256);
1637 memset (o, 0, 256);
1638 e[c1] = 1;
1639 o[c2] = 1;
1640
cc13543e
KH
1641 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1642 |CATEGORY_MASK_UTF_16_BE
1643 | CATEGORY_MASK_UTF_16_LE);
2f3cbb32 1644
7f1faf1c
KH
1645 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1646 != CATEGORY_MASK_UTF_16)
2f3cbb32 1647 {
f56a4450 1648 TWO_MORE_BYTES (c1, c2);
220eeac9 1649 if (c2 < 0)
f56a4450 1650 break;
2f3cbb32
KH
1651 if (! e[c1])
1652 {
1653 e[c1] = 1;
1654 e_num++;
cc13543e
KH
1655 if (e_num >= 128)
1656 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
2f3cbb32
KH
1657 }
1658 if (! o[c2])
1659 {
977b85f4 1660 o[c2] = 1;
2f3cbb32 1661 o_num++;
cc13543e
KH
1662 if (o_num >= 128)
1663 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
2f3cbb32
KH
1664 }
1665 }
2f3cbb32 1666 return 0;
ff0dacd7 1667 }
2f3cbb32 1668
df7492f9 1669 no_more_source:
ff0dacd7 1670 return 1;
df7492f9 1671}
aa72b389 1672
df7492f9 1673static void
971de7fb 1674decode_coding_utf_16 (struct coding_system *coding)
df7492f9 1675{
8f924df7
KH
1676 const unsigned char *src = coding->source + coding->consumed;
1677 const unsigned char *src_end = coding->source + coding->src_bytes;
1678 const unsigned char *src_base;
69a80ea3 1679 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
1680 /* We may produces at most 3 chars in one loop. */
1681 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
d311d28c 1682 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1683 int multibytep = coding->src_multibyte;
a470d443 1684 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1685 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1686 int surrogate = CODING_UTF_16_SURROGATE (coding);
2735d060 1687 int eol_dos =
0a9564cb 1688 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1689 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1690
a470d443 1691 if (bom == utf_with_bom)
aa72b389 1692 {
df7492f9 1693 int c, c1, c2;
4af310db 1694
aa72b389 1695 src_base = src;
df7492f9
KH
1696 ONE_MORE_BYTE (c1);
1697 ONE_MORE_BYTE (c2);
e19c3639 1698 c = (c1 << 8) | c2;
aa72b389 1699
b49a1807
KH
1700 if (endian == utf_16_big_endian
1701 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1702 {
b49a1807
KH
1703 /* The first two bytes are not BOM. Treat them as bytes
1704 for a normal character. */
1705 src = src_base;
1706 coding->errors++;
aa72b389 1707 }
a470d443 1708 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1709 }
a470d443 1710 else if (bom == utf_detect_bom)
b49a1807
KH
1711 {
1712 /* We have already tried to detect BOM and failed in
1713 detect_coding. */
a470d443 1714 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1715 }
aa72b389 1716
df7492f9
KH
1717 while (1)
1718 {
1719 int c, c1, c2;
1720
1721 src_base = src;
1722 consumed_chars_base = consumed_chars;
1723
df80c7f0 1724 if (charbuf >= charbuf_end)
b71f6f73
KH
1725 {
1726 if (byte_after_cr1 >= 0)
1727 src_base -= 2;
1728 break;
1729 }
df7492f9 1730
119852e7
KH
1731 if (byte_after_cr1 >= 0)
1732 c1 = byte_after_cr1, byte_after_cr1 = -1;
1733 else
1734 ONE_MORE_BYTE (c1);
065e3595
KH
1735 if (c1 < 0)
1736 {
1737 *charbuf++ = -c1;
1738 continue;
1739 }
119852e7
KH
1740 if (byte_after_cr2 >= 0)
1741 c2 = byte_after_cr2, byte_after_cr2 = -1;
1742 else
1743 ONE_MORE_BYTE (c2);
065e3595
KH
1744 if (c2 < 0)
1745 {
1746 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1747 *charbuf++ = -c2;
1748 continue;
1749 }
df7492f9 1750 c = (endian == utf_16_big_endian
e19c3639 1751 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1752
df7492f9 1753 if (surrogate)
fd3ae0b9 1754 {
df7492f9 1755 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1756 {
df7492f9
KH
1757 if (endian == utf_16_big_endian)
1758 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1759 else
1760 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1761 *charbuf++ = c1;
1762 *charbuf++ = c2;
1763 coding->errors++;
1764 if (UTF_16_HIGH_SURROGATE_P (c))
1765 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1766 else
df7492f9 1767 *charbuf++ = c;
fd3ae0b9
KH
1768 }
1769 else
df7492f9
KH
1770 {
1771 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1772 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1773 *charbuf++ = 0x10000 + c;
df7492f9 1774 }
fd3ae0b9 1775 }
aa72b389 1776 else
df7492f9
KH
1777 {
1778 if (UTF_16_HIGH_SURROGATE_P (c))
1779 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1780 else
119852e7 1781 {
2735d060 1782 if (eol_dos && c == '\r')
119852e7
KH
1783 {
1784 ONE_MORE_BYTE (byte_after_cr1);
1785 ONE_MORE_BYTE (byte_after_cr2);
1786 }
1787 *charbuf++ = c;
1788 }
8f924df7 1789 }
aa72b389 1790 }
df7492f9
KH
1791
1792 no_more_source:
1793 coding->consumed_char += consumed_chars_base;
1794 coding->consumed = src_base - coding->source;
1795 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1796}
b73bfc1c 1797
df7492f9 1798static int
971de7fb 1799encode_coding_utf_16 (struct coding_system *coding)
df7492f9
KH
1800{
1801 int multibytep = coding->dst_multibyte;
1802 int *charbuf = coding->charbuf;
1803 int *charbuf_end = charbuf + coding->charbuf_used;
1804 unsigned char *dst = coding->destination + coding->produced;
1805 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1806 int safe_room = 8;
a470d443 1807 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9 1808 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
d311d28c 1809 ptrdiff_t produced_chars = 0;
df7492f9 1810 int c;
4ed46869 1811
a470d443 1812 if (bom != utf_without_bom)
df7492f9
KH
1813 {
1814 ASSURE_DESTINATION (safe_room);
1815 if (big_endian)
df7492f9 1816 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1817 else
1818 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1819 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1820 }
1821
1822 while (charbuf < charbuf_end)
1823 {
1824 ASSURE_DESTINATION (safe_room);
1825 c = *charbuf++;
60afa08d 1826 if (c > MAX_UNICODE_CHAR)
e19c3639 1827 c = coding->default_char;
df7492f9
KH
1828
1829 if (c < 0x10000)
1830 {
1831 if (big_endian)
1832 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1833 else
1834 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1835 }
1836 else
1837 {
1838 int c1, c2;
1839
1840 c -= 0x10000;
1841 c1 = (c >> 10) + 0xD800;
1842 c2 = (c & 0x3FF) + 0xDC00;
1843 if (big_endian)
1844 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1845 else
1846 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1847 }
1848 }
065e3595 1849 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1850 coding->produced = dst - coding->destination;
1851 coding->produced_char += produced_chars;
1852 return 0;
1853}
1854
1855\f
1856/*** 6. Old Emacs' internal format (emacs-mule) ***/
1857
1858/* Emacs' internal format for representation of multiple character
1859 sets is a kind of multi-byte encoding, i.e. characters are
1860 represented by variable-length sequences of one-byte codes.
1861
1862 ASCII characters and control characters (e.g. `tab', `newline') are
1863 represented by one-byte sequences which are their ASCII codes, in
1864 the range 0x00 through 0x7F.
1865
1866 8-bit characters of the range 0x80..0x9F are represented by
1867 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1868 code + 0x20).
1869
1870 8-bit characters of the range 0xA0..0xFF are represented by
1871 one-byte sequences which are their 8-bit code.
1872
1873 The other characters are represented by a sequence of `base
1874 leading-code', optional `extended leading-code', and one or two
1875 `position-code's. The length of the sequence is determined by the
1876 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1877 whereas extended leading-code and position-code take the range 0xA0
1878 through 0xFF. See `charset.h' for more details about leading-code
1879 and position-code.
1880
1881 --- CODE RANGE of Emacs' internal format ---
1882 character set range
1883 ------------- -----
1884 ascii 0x00..0x7F
1885 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1886 eight-bit-graphic 0xA0..0xBF
1887 ELSE 0x81..0x9D + [0xA0..0xFF]+
1888 ---------------------------------------------
1889
1890 As this is the internal character representation, the format is
1891 usually not used externally (i.e. in a file or in a data sent to a
1892 process). But, it is possible to have a text externally in this
1893 format (i.e. by encoding by the coding system `emacs-mule').
1894
1895 In that case, a sequence of one-byte codes has a slightly different
1896 form.
1897
1898 At first, all characters in eight-bit-control are represented by
1899 one-byte sequences which are their 8-bit code.
1900
1901 Next, character composition data are represented by the byte
1902 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1903 where,
e951386e 1904 METHOD is 0xF2 plus one of composition method (enum
df7492f9
KH
1905 composition_method),
1906
1907 BYTES is 0xA0 plus a byte length of this composition data,
1908
e951386e 1909 CHARS is 0xA0 plus a number of characters composed by this
df7492f9
KH
1910 data,
1911
ad1746f5 1912 COMPONENTs are characters of multibyte form or composition
df7492f9
KH
1913 rules encoded by two-byte of ASCII codes.
1914
1915 In addition, for backward compatibility, the following formats are
1916 also recognized as composition data on decoding.
1917
1918 0x80 MSEQ ...
1919 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1920
1921 Here,
1922 MSEQ is a multibyte form but in these special format:
1923 ASCII: 0xA0 ASCII_CODE+0x80,
1924 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1925 RULE is a one byte code of the range 0xA0..0xF0 that
1926 represents a composition rule.
1927 */
1928
1929char emacs_mule_bytes[256];
1930
e951386e
KH
1931
1932/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1933 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1934 else return 0. */
1935
1936static int
cf84bb53
JB
1937detect_coding_emacs_mule (struct coding_system *coding,
1938 struct coding_detection_info *detect_info)
e951386e
KH
1939{
1940 const unsigned char *src = coding->source, *src_base;
1941 const unsigned char *src_end = coding->source + coding->src_bytes;
1942 int multibytep = coding->src_multibyte;
d311d28c 1943 ptrdiff_t consumed_chars = 0;
e951386e
KH
1944 int c;
1945 int found = 0;
1946
1947 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1948 /* A coding system of this category is always ASCII compatible. */
1949 src += coding->head_ascii;
1950
1951 while (1)
1952 {
1953 src_base = src;
1954 ONE_MORE_BYTE (c);
1955 if (c < 0)
1956 continue;
1957 if (c == 0x80)
1958 {
1959 /* Perhaps the start of composite character. We simply skip
1960 it because analyzing it is too heavy for detecting. But,
1961 at least, we check that the composite character
1962 constitutes of more than 4 bytes. */
2735d060 1963 const unsigned char *src_start;
e951386e
KH
1964
1965 repeat:
2735d060 1966 src_start = src;
e951386e
KH
1967 do
1968 {
1969 ONE_MORE_BYTE (c);
1970 }
1971 while (c >= 0xA0);
1972
2735d060 1973 if (src - src_start <= 4)
e951386e
KH
1974 break;
1975 found = CATEGORY_MASK_EMACS_MULE;
1976 if (c == 0x80)
1977 goto repeat;
1978 }
1979
1980 if (c < 0x80)
1981 {
1982 if (c < 0x20
1983 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1984 break;
1985 }
1986 else
1987 {
396475b7 1988 int more_bytes = emacs_mule_bytes[c] - 1;
e951386e
KH
1989
1990 while (more_bytes > 0)
1991 {
1992 ONE_MORE_BYTE (c);
1993 if (c < 0xA0)
1994 {
1995 src--; /* Unread the last byte. */
1996 break;
1997 }
1998 more_bytes--;
1999 }
2000 if (more_bytes != 0)
2001 break;
2002 found = CATEGORY_MASK_EMACS_MULE;
2003 }
2004 }
2005 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2006 return 0;
2007
2008 no_more_source:
2009 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2010 {
2011 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2012 return 0;
2013 }
2014 detect_info->found |= found;
2015 return 1;
2016}
2017
2018
2019/* Parse emacs-mule multibyte sequence at SRC and return the decoded
2020 character. If CMP_STATUS indicates that we must expect MSEQ or
2021 RULE described above, decode it and return the negative value of
685ebdc8 2022 the decoded character or rule. If an invalid byte is found, return
e951386e
KH
2023 -1. If SRC is too short, return -2. */
2024
e2f1bab9 2025static int
cf84bb53
JB
2026emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2027 int *nbytes, int *nchars, int *id,
2028 struct composition_status *cmp_status)
df7492f9 2029{
8f924df7
KH
2030 const unsigned char *src_end = coding->source + coding->src_bytes;
2031 const unsigned char *src_base = src;
df7492f9 2032 int multibytep = coding->src_multibyte;
2735d060 2033 int charset_ID;
df7492f9
KH
2034 unsigned code;
2035 int c;
2036 int consumed_chars = 0;
e951386e 2037 int mseq_found = 0;
df7492f9
KH
2038
2039 ONE_MORE_BYTE (c);
065e3595 2040 if (c < 0)
df7492f9 2041 {
065e3595 2042 c = -c;
2735d060 2043 charset_ID = emacs_mule_charset[0];
065e3595
KH
2044 }
2045 else
2046 {
4d41e8b7
KH
2047 if (c >= 0xA0)
2048 {
e951386e
KH
2049 if (cmp_status->state != COMPOSING_NO
2050 && cmp_status->old_form)
4d41e8b7 2051 {
e951386e
KH
2052 if (cmp_status->state == COMPOSING_CHAR)
2053 {
2054 if (c == 0xA0)
2055 {
2056 ONE_MORE_BYTE (c);
2057 c -= 0x80;
2058 if (c < 0)
2059 goto invalid_code;
2060 }
2061 else
2062 c -= 0x20;
2063 mseq_found = 1;
2064 }
2065 else
2066 {
2067 *nbytes = src - src_base;
2068 *nchars = consumed_chars;
2069 return -c;
2070 }
4d41e8b7
KH
2071 }
2072 else
e951386e 2073 goto invalid_code;
4d41e8b7
KH
2074 }
2075
065e3595 2076 switch (emacs_mule_bytes[c])
b73bfc1c 2077 {
065e3595 2078 case 2:
2735d060 2079 if ((charset_ID = emacs_mule_charset[c]) < 0)
df7492f9
KH
2080 goto invalid_code;
2081 ONE_MORE_BYTE (c);
9ffd559c 2082 if (c < 0xA0)
065e3595 2083 goto invalid_code;
df7492f9 2084 code = c & 0x7F;
065e3595
KH
2085 break;
2086
2087 case 3:
2088 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2089 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2090 {
2091 ONE_MORE_BYTE (c);
2735d060 2092 if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
065e3595
KH
2093 goto invalid_code;
2094 ONE_MORE_BYTE (c);
9ffd559c 2095 if (c < 0xA0)
065e3595
KH
2096 goto invalid_code;
2097 code = c & 0x7F;
2098 }
2099 else
2100 {
2735d060 2101 if ((charset_ID = emacs_mule_charset[c]) < 0)
065e3595
KH
2102 goto invalid_code;
2103 ONE_MORE_BYTE (c);
9ffd559c 2104 if (c < 0xA0)
065e3595
KH
2105 goto invalid_code;
2106 code = (c & 0x7F) << 8;
2107 ONE_MORE_BYTE (c);
9ffd559c 2108 if (c < 0xA0)
065e3595
KH
2109 goto invalid_code;
2110 code |= c & 0x7F;
2111 }
2112 break;
2113
2114 case 4:
2115 ONE_MORE_BYTE (c);
2735d060 2116 if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
df7492f9
KH
2117 goto invalid_code;
2118 ONE_MORE_BYTE (c);
9ffd559c 2119 if (c < 0xA0)
065e3595 2120 goto invalid_code;
781d7a48 2121 code = (c & 0x7F) << 8;
df7492f9 2122 ONE_MORE_BYTE (c);
9ffd559c 2123 if (c < 0xA0)
065e3595 2124 goto invalid_code;
df7492f9 2125 code |= c & 0x7F;
065e3595 2126 break;
df7492f9 2127
065e3595
KH
2128 case 1:
2129 code = c;
2735d060 2130 charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
065e3595 2131 break;
df7492f9 2132
065e3595
KH
2133 default:
2134 abort ();
2135 }
b84ae584 2136 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2735d060 2137 CHARSET_FROM_ID (charset_ID), code, c);
065e3595
KH
2138 if (c < 0)
2139 goto invalid_code;
df7492f9 2140 }
df7492f9
KH
2141 *nbytes = src - src_base;
2142 *nchars = consumed_chars;
ff0dacd7 2143 if (id)
2735d060 2144 *id = charset_ID;
e951386e 2145 return (mseq_found ? -c : c);
df7492f9
KH
2146
2147 no_more_source:
2148 return -2;
2149
2150 invalid_code:
2151 return -1;
2152}
2153
2154
e951386e 2155/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
df7492f9 2156
e951386e
KH
2157/* Handle these composition sequence ('|': the end of header elements,
2158 BYTES and CHARS >= 0xA0):
df7492f9 2159
e951386e
KH
2160 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2161 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2162 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
df7492f9 2163
e951386e 2164 and these old form:
1a4990fb 2165
e951386e
KH
2166 (4) relative composition: 0x80 | MSEQ ... MSEQ
2167 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
df7492f9 2168
e951386e
KH
2169 When the starter 0x80 and the following header elements are found,
2170 this annotation header is produced.
df7492f9 2171
e951386e 2172 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
df7492f9 2173
e951386e
KH
2174 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2175 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
df7492f9 2176
e951386e
KH
2177 Then, upon reading the following elements, these codes are produced
2178 until the composition end is found:
df7492f9 2179
e951386e
KH
2180 (1) CHAR ... CHAR
2181 (2) ALT ... ALT CHAR ... CHAR
2182 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2183 (4) CHAR ... CHAR
2184 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
4ed46869 2185
e951386e
KH
2186 When the composition end is found, LENGTH and NCHARS in the
2187 annotation header is updated as below:
b73bfc1c 2188
e951386e
KH
2189 (1) LENGTH: unchanged, NCHARS: unchanged
2190 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2191 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2192 (4) LENGTH: unchanged, NCHARS: number of CHARs
2193 (5) LENGTH: unchanged, NCHARS: number of CHARs
df7492f9 2194
e951386e
KH
2195 If an error is found while composing, the annotation header is
2196 changed to the original composition header (plus filler -1s) as
2197 below:
2198
2199 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2200 (5) [ 0x80 0xFF -1 -1- -1 ]
2201
2202 and the sequence [ -2 DECODED-RULE ] is changed to the original
2203 byte sequence as below:
2204 o the original byte sequence is B: [ B -1 ]
2205 o the original byte sequence is B1 B2: [ B1 B2 ]
2206
2207 Most of the routines are implemented by macros because many
2208 variables and labels in the caller decode_coding_emacs_mule must be
2209 accessible, and they are usually called just once (thus doesn't
2210 increase the size of compiled object). */
2211
2212/* Decode a composition rule represented by C as a component of
2213 composition sequence of Emacs 20 style. Set RULE to the decoded
2214 rule. */
2215
2216#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
df7492f9 2217 do { \
e951386e
KH
2218 int gref, nref; \
2219 \
4d41e8b7 2220 c -= 0xA0; \
df7492f9
KH
2221 if (c < 0 || c >= 81) \
2222 goto invalid_code; \
df7492f9 2223 gref = c / 9, nref = c % 9; \
e951386e
KH
2224 if (gref == 4) gref = 10; \
2225 if (nref == 4) nref = 10; \
2226 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
df7492f9
KH
2227 } while (0)
2228
2229
e951386e
KH
2230/* Decode a composition rule represented by C and the following byte
2231 at SRC as a component of composition sequence of Emacs 21 style.
2232 Set RULE to the decoded rule. */
781d7a48 2233
e951386e 2234#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
781d7a48
KH
2235 do { \
2236 int gref, nref; \
e951386e
KH
2237 \
2238 gref = c - 0x20; \
2239 if (gref < 0 || gref >= 81) \
781d7a48 2240 goto invalid_code; \
e951386e
KH
2241 ONE_MORE_BYTE (c); \
2242 nref = c - 0x20; \
2243 if (nref < 0 || nref >= 81) \
781d7a48 2244 goto invalid_code; \
e951386e 2245 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
781d7a48
KH
2246 } while (0)
2247
2248
e951386e
KH
2249/* Start of Emacs 21 style format. The first three bytes at SRC are
2250 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2251 byte length of this composition information, CHARS is the number of
2252 characters composed by this composition. */
2253
2254#define DECODE_EMACS_MULE_21_COMPOSITION() \
aa72b389 2255 do { \
781d7a48 2256 enum composition_method method = c - 0xF2; \
df7492f9 2257 int nbytes, nchars; \
e951386e 2258 \
df7492f9 2259 ONE_MORE_BYTE (c); \
065e3595
KH
2260 if (c < 0) \
2261 goto invalid_code; \
df7492f9 2262 nbytes = c - 0xA0; \
e951386e 2263 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
df7492f9
KH
2264 goto invalid_code; \
2265 ONE_MORE_BYTE (c); \
2266 nchars = c - 0xA0; \
e951386e
KH
2267 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2268 goto invalid_code; \
2269 cmp_status->old_form = 0; \
2270 cmp_status->method = method; \
2271 if (method == COMPOSITION_RELATIVE) \
2272 cmp_status->state = COMPOSING_CHAR; \
2273 else \
2274 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2275 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2276 cmp_status->nchars = nchars; \
2277 cmp_status->ncomps = nbytes - 4; \
2278 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
aa72b389 2279 } while (0)
93dec019 2280
aa72b389 2281
e951386e
KH
2282/* Start of Emacs 20 style format for relative composition. */
2283
2284#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2285 do { \
2286 cmp_status->old_form = 1; \
2287 cmp_status->method = COMPOSITION_RELATIVE; \
2288 cmp_status->state = COMPOSING_CHAR; \
2289 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2290 cmp_status->nchars = cmp_status->ncomps = 0; \
2291 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2292 } while (0)
2293
2294
2295/* Start of Emacs 20 style format for rule-base composition. */
2296
2297#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2298 do { \
2299 cmp_status->old_form = 1; \
2300 cmp_status->method = COMPOSITION_WITH_RULE; \
2301 cmp_status->state = COMPOSING_CHAR; \
2302 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2303 cmp_status->nchars = cmp_status->ncomps = 0; \
2304 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
df7492f9
KH
2305 } while (0)
2306
2307
e951386e
KH
2308#define DECODE_EMACS_MULE_COMPOSITION_START() \
2309 do { \
2310 const unsigned char *current_src = src; \
2311 \
2312 ONE_MORE_BYTE (c); \
2313 if (c < 0) \
2314 goto invalid_code; \
2315 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2316 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2317 DECODE_EMACS_MULE_21_COMPOSITION (); \
2318 else if (c < 0xA0) \
2319 goto invalid_code; \
2320 else if (c < 0xC0) \
2321 { \
2322 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2323 /* Re-read C as a composition component. */ \
2324 src = current_src; \
2325 } \
2326 else if (c == 0xFF) \
2327 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2328 else \
2329 goto invalid_code; \
2330 } while (0)
2331
2332#define EMACS_MULE_COMPOSITION_END() \
df7492f9 2333 do { \
e951386e 2334 int idx = - cmp_status->length; \
4d41e8b7 2335 \
e951386e
KH
2336 if (cmp_status->old_form) \
2337 charbuf[idx + 2] = cmp_status->nchars; \
2338 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2339 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2340 cmp_status->state = COMPOSING_NO; \
2341 } while (0)
2342
2343
2344static int
cf84bb53
JB
2345emacs_mule_finish_composition (int *charbuf,
2346 struct composition_status *cmp_status)
e951386e
KH
2347{
2348 int idx = - cmp_status->length;
2349 int new_chars;
2350
2351 if (cmp_status->old_form && cmp_status->nchars > 0)
2352 {
2353 charbuf[idx + 2] = cmp_status->nchars;
2354 new_chars = 0;
2355 if (cmp_status->method == COMPOSITION_WITH_RULE
2356 && cmp_status->state == COMPOSING_CHAR)
2357 {
2358 /* The last rule was invalid. */
2359 int rule = charbuf[-1] + 0xA0;
2360
2361 charbuf[-2] = BYTE8_TO_CHAR (rule);
2362 charbuf[-1] = -1;
2363 new_chars = 1;
2364 }
2365 }
2366 else
2367 {
2368 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2369
2370 if (cmp_status->method == COMPOSITION_WITH_RULE)
2371 {
2372 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2373 charbuf[idx++] = -3;
2374 charbuf[idx++] = 0;
2375 new_chars = 1;
2376 }
2377 else
2378 {
2379 int nchars = charbuf[idx + 1] + 0xA0;
2380 int nbytes = charbuf[idx + 2] + 0xA0;
2381
2382 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2383 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2384 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2385 charbuf[idx++] = -1;
2386 new_chars = 4;
2387 }
2388 }
2389 cmp_status->state = COMPOSING_NO;
2390 return new_chars;
2391}
2392
2393#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2394 do { \
2395 if (cmp_status->state != COMPOSING_NO) \
2396 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
df7492f9
KH
2397 } while (0)
2398
aa72b389
KH
2399
2400static void
971de7fb 2401decode_coding_emacs_mule (struct coding_system *coding)
aa72b389 2402{
8f924df7
KH
2403 const unsigned char *src = coding->source + coding->consumed;
2404 const unsigned char *src_end = coding->source + coding->src_bytes;
2405 const unsigned char *src_base;
69a80ea3 2406 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
2407 /* We may produce two annotations (charset and composition) in one
2408 loop and one more charset annotation at the end. */
69a80ea3 2409 int *charbuf_end
15cbd324
EZ
2410 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2411 /* We can produce up to 2 characters in a loop. */
2412 - 1;
d311d28c 2413 ptrdiff_t consumed_chars = 0, consumed_chars_base;
df7492f9 2414 int multibytep = coding->src_multibyte;
d311d28c
PE
2415 ptrdiff_t char_offset = coding->produced_char;
2416 ptrdiff_t last_offset = char_offset;
ff0dacd7 2417 int last_id = charset_ascii;
2735d060 2418 int eol_dos =
0a9564cb 2419 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 2420 int byte_after_cr = -1;
e951386e 2421 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
aa72b389 2422
e951386e
KH
2423 if (cmp_status->state != COMPOSING_NO)
2424 {
2425 int i;
2426
15cbd324
EZ
2427 if (charbuf_end - charbuf < cmp_status->length)
2428 abort ();
e951386e
KH
2429 for (i = 0; i < cmp_status->length; i++)
2430 *charbuf++ = cmp_status->carryover[i];
2431 coding->annotated = 1;
2432 }
2433
aa72b389
KH
2434 while (1)
2435 {
ee05f961 2436 int c, id IF_LINT (= 0);
df7492f9 2437
aa72b389 2438 src_base = src;
df7492f9
KH
2439 consumed_chars_base = consumed_chars;
2440
2441 if (charbuf >= charbuf_end)
b71f6f73
KH
2442 {
2443 if (byte_after_cr >= 0)
2444 src_base--;
2445 break;
2446 }
aa72b389 2447
119852e7
KH
2448 if (byte_after_cr >= 0)
2449 c = byte_after_cr, byte_after_cr = -1;
2450 else
2451 ONE_MORE_BYTE (c);
e951386e
KH
2452
2453 if (c < 0 || c == 0x80)
065e3595 2454 {
e951386e
KH
2455 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2456 if (c < 0)
2457 {
2458 *charbuf++ = -c;
2459 char_offset++;
2460 }
2461 else
2462 DECODE_EMACS_MULE_COMPOSITION_START ();
2463 continue;
065e3595 2464 }
e951386e
KH
2465
2466 if (c < 0x80)
aa72b389 2467 {
2735d060 2468 if (eol_dos && c == '\r')
119852e7 2469 ONE_MORE_BYTE (byte_after_cr);
e951386e
KH
2470 id = charset_ascii;
2471 if (cmp_status->state != COMPOSING_NO)
2472 {
2473 if (cmp_status->old_form)
2474 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2475 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2476 cmp_status->ncomps--;
2477 }
2478 }
2479 else
2480 {
ee05f961 2481 int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
75f80e63
EZ
2482 /* emacs_mule_char can load a charset map from a file, which
2483 allocates a large structure and might cause buffer text
2484 to be relocated as result. Thus, we need to remember the
ad1746f5 2485 original pointer to buffer text, and fix up all related
75f80e63
EZ
2486 pointers after the call. */
2487 const unsigned char *orig = coding->source;
d311d28c 2488 ptrdiff_t offset;
e951386e
KH
2489
2490 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2491 cmp_status);
75f80e63
EZ
2492 offset = coding->source - orig;
2493 if (offset)
2494 {
2495 src += offset;
2496 src_base += offset;
2497 src_end += offset;
2498 }
e951386e
KH
2499 if (c < 0)
2500 {
2501 if (c == -1)
2502 goto invalid_code;
2503 if (c == -2)
2504 break;
2505 }
2506 src = src_base + nbytes;
2507 consumed_chars = consumed_chars_base + nchars;
2508 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2509 cmp_status->ncomps -= nchars;
2510 }
2511
ad1746f5 2512 /* Now if C >= 0, we found a normally encoded character, if C <
e951386e
KH
2513 0, we found an old-style composition component character or
2514 rule. */
2515
2516 if (cmp_status->state == COMPOSING_NO)
2517 {
2518 if (last_id != id)
2519 {
2520 if (last_id != charset_ascii)
2521 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2522 last_id);
2523 last_id = id;
2524 last_offset = char_offset;
2525 }
df7492f9
KH
2526 *charbuf++ = c;
2527 char_offset++;
aa72b389 2528 }
e951386e 2529 else if (cmp_status->state == COMPOSING_CHAR)
df7492f9 2530 {
e951386e
KH
2531 if (cmp_status->old_form)
2532 {
2533 if (c >= 0)
2534 {
2535 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2536 *charbuf++ = c;
2537 char_offset++;
2538 }
2539 else
2540 {
2541 *charbuf++ = -c;
2542 cmp_status->nchars++;
2543 cmp_status->length++;
2544 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2545 EMACS_MULE_COMPOSITION_END ();
2546 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2547 cmp_status->state = COMPOSING_RULE;
2548 }
2549 }
df7492f9 2550 else
e951386e
KH
2551 {
2552 *charbuf++ = c;
2553 cmp_status->length++;
2554 cmp_status->nchars--;
2555 if (cmp_status->nchars == 0)
2556 EMACS_MULE_COMPOSITION_END ();
2557 }
df7492f9 2558 }
e951386e 2559 else if (cmp_status->state == COMPOSING_RULE)
df7492f9 2560 {
e951386e 2561 int rule;
ff0dacd7 2562
e951386e 2563 if (c >= 0)
df7492f9 2564 {
e951386e
KH
2565 EMACS_MULE_COMPOSITION_END ();
2566 *charbuf++ = c;
2567 char_offset++;
df7492f9 2568 }
e951386e 2569 else
ff0dacd7 2570 {
e951386e
KH
2571 c = -c;
2572 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2573 if (rule < 0)
2574 goto invalid_code;
2575 *charbuf++ = -2;
2576 *charbuf++ = rule;
2577 cmp_status->length += 2;
2578 cmp_status->state = COMPOSING_CHAR;
ff0dacd7 2579 }
e951386e
KH
2580 }
2581 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2582 {
df7492f9 2583 *charbuf++ = c;
e951386e
KH
2584 cmp_status->length++;
2585 if (cmp_status->ncomps == 0)
2586 cmp_status->state = COMPOSING_CHAR;
2587 else if (cmp_status->ncomps > 0)
2588 {
2589 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2590 cmp_status->state = COMPOSING_COMPONENT_RULE;
2591 }
2592 else
2593 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9 2594 }
e951386e
KH
2595 else /* COMPOSING_COMPONENT_RULE */
2596 {
2597 int rule;
2598
2599 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2600 if (rule < 0)
2601 goto invalid_code;
2602 *charbuf++ = -2;
2603 *charbuf++ = rule;
2604 cmp_status->length += 2;
2605 cmp_status->ncomps--;
2606 if (cmp_status->ncomps > 0)
2607 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2608 else
2609 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2610 }
2611 continue;
2612
df7492f9 2613 invalid_code:
e951386e 2614 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9
KH
2615 src = src_base;
2616 consumed_chars = consumed_chars_base;
2617 ONE_MORE_BYTE (c);
2618 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2619 char_offset++;
df7492f9
KH
2620 coding->errors++;
2621 }
2622
2623 no_more_source:
e951386e
KH
2624 if (cmp_status->state != COMPOSING_NO)
2625 {
2626 if (coding->mode & CODING_MODE_LAST_BLOCK)
2627 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2628 else
2629 {
2630 int i;
2631
2632 charbuf -= cmp_status->length;
2633 for (i = 0; i < cmp_status->length; i++)
2634 cmp_status->carryover[i] = charbuf[i];
2635 }
2636 }
ff0dacd7 2637 if (last_id != charset_ascii)
69a80ea3 2638 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2639 coding->consumed_char += consumed_chars_base;
2640 coding->consumed = src_base - coding->source;
2641 coding->charbuf_used = charbuf - coding->charbuf;
2642}
2643
2644
2645#define EMACS_MULE_LEADING_CODES(id, codes) \
2646 do { \
2647 if (id < 0xA0) \
2648 codes[0] = id, codes[1] = 0; \
2649 else if (id < 0xE0) \
2650 codes[0] = 0x9A, codes[1] = id; \
2651 else if (id < 0xF0) \
2652 codes[0] = 0x9B, codes[1] = id; \
2653 else if (id < 0xF5) \
2654 codes[0] = 0x9C, codes[1] = id; \
2655 else \
2656 codes[0] = 0x9D, codes[1] = id; \
2657 } while (0);
2658
aa72b389 2659
df7492f9 2660static int
971de7fb 2661encode_coding_emacs_mule (struct coding_system *coding)
df7492f9
KH
2662{
2663 int multibytep = coding->dst_multibyte;
2664 int *charbuf = coding->charbuf;
2665 int *charbuf_end = charbuf + coding->charbuf_used;
2666 unsigned char *dst = coding->destination + coding->produced;
2667 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2668 int safe_room = 8;
d311d28c 2669 ptrdiff_t produced_chars = 0;
24a73b0a 2670 Lisp_Object attrs, charset_list;
df7492f9 2671 int c;
ff0dacd7 2672 int preferred_charset_id = -1;
df7492f9 2673
24a73b0a 2674 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2675 if (! EQ (charset_list, Vemacs_mule_charset_list))
2676 {
2677 CODING_ATTR_CHARSET_LIST (attrs)
2678 = charset_list = Vemacs_mule_charset_list;
2679 }
df7492f9
KH
2680
2681 while (charbuf < charbuf_end)
2682 {
2683 ASSURE_DESTINATION (safe_room);
2684 c = *charbuf++;
ff0dacd7
KH
2685
2686 if (c < 0)
2687 {
2688 /* Handle an annotation. */
2689 switch (*charbuf)
2690 {
2691 case CODING_ANNOTATE_COMPOSITION_MASK:
2692 /* Not yet implemented. */
2693 break;
2694 case CODING_ANNOTATE_CHARSET_MASK:
2695 preferred_charset_id = charbuf[3];
2696 if (preferred_charset_id >= 0
2697 && NILP (Fmemq (make_number (preferred_charset_id),
2698 charset_list)))
2699 preferred_charset_id = -1;
2700 break;
2701 default:
2702 abort ();
2703 }
2704 charbuf += -c - 1;
2705 continue;
2706 }
2707
df7492f9
KH
2708 if (ASCII_CHAR_P (c))
2709 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2710 else if (CHAR_BYTE8_P (c))
2711 {
2712 c = CHAR_TO_BYTE8 (c);
2713 EMIT_ONE_BYTE (c);
2714 }
df7492f9 2715 else
aa72b389 2716 {
df7492f9
KH
2717 struct charset *charset;
2718 unsigned code;
2719 int dimension;
2720 int emacs_mule_id;
2721 unsigned char leading_codes[2];
2722
ff0dacd7
KH
2723 if (preferred_charset_id >= 0)
2724 {
5eb05ea3
KH
2725 int result;
2726
ff0dacd7 2727 charset = CHARSET_FROM_ID (preferred_charset_id);
5eb05ea3
KH
2728 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2729 if (result)
905ca9d2
KH
2730 code = ENCODE_CHAR (charset, c);
2731 else
5eb05ea3
KH
2732 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2733 &code, charset);
ff0dacd7
KH
2734 }
2735 else
5eb05ea3
KH
2736 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2737 &code, charset);
df7492f9
KH
2738 if (! charset)
2739 {
2740 c = coding->default_char;
2741 if (ASCII_CHAR_P (c))
2742 {
2743 EMIT_ONE_ASCII_BYTE (c);
2744 continue;
2745 }
5eb05ea3
KH
2746 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2747 &code, charset);
df7492f9
KH
2748 }
2749 dimension = CHARSET_DIMENSION (charset);
2750 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2751 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2752 EMIT_ONE_BYTE (leading_codes[0]);
2753 if (leading_codes[1])
2754 EMIT_ONE_BYTE (leading_codes[1]);
2755 if (dimension == 1)
1fa663f9 2756 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2757 else
df7492f9 2758 {
1fa663f9 2759 code |= 0x8080;
df7492f9
KH
2760 EMIT_ONE_BYTE (code >> 8);
2761 EMIT_ONE_BYTE (code & 0xFF);
2762 }
aa72b389 2763 }
aa72b389 2764 }
065e3595 2765 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2766 coding->produced_char += produced_chars;
2767 coding->produced = dst - coding->destination;
2768 return 0;
aa72b389 2769}
b73bfc1c 2770
4ed46869 2771\f
df7492f9 2772/*** 7. ISO2022 handlers ***/
4ed46869
KH
2773
2774/* The following note describes the coding system ISO2022 briefly.
39787efd 2775 Since the intention of this note is to help understand the
5a936b46 2776 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2777 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2778 original document of ISO2022. This is equivalent to the standard
cfb43547 2779 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2780
2781 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2782 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2783 is encoded using bytes less than 128. This may make the encoded
2784 text a little bit longer, but the text passes more easily through
cfb43547 2785 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2786 Significant Bit).
b73bfc1c 2787
cfb43547
DL
2788 There are two kinds of character sets: control character sets and
2789 graphic character sets. The former contain control characters such
4ed46869 2790 as `newline' and `escape' to provide control functions (control
39787efd 2791 functions are also provided by escape sequences). The latter
cfb43547 2792 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2793 two control character sets and many graphic character sets.
2794
2795 Graphic character sets are classified into one of the following
39787efd
KH
2796 four classes, according to the number of bytes (DIMENSION) and
2797 number of characters in one dimension (CHARS) of the set:
2798 - DIMENSION1_CHARS94
2799 - DIMENSION1_CHARS96
2800 - DIMENSION2_CHARS94
2801 - DIMENSION2_CHARS96
2802
2803 In addition, each character set is assigned an identification tag,
cfb43547 2804 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2805 hereafter). The <F> of each character set is decided by ECMA(*)
2806 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2807 (0x30..0x3F are for private use only).
4ed46869
KH
2808
2809 Note (*): ECMA = European Computer Manufacturers Association
2810
cfb43547 2811 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2812 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2813 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2814 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2815 o DIMENSION2_CHARS96 -- none for the moment
2816
39787efd 2817 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2818 C0 [0x00..0x1F] -- control character plane 0
2819 GL [0x20..0x7F] -- graphic character plane 0
2820 C1 [0x80..0x9F] -- control character plane 1
2821 GR [0xA0..0xFF] -- graphic character plane 1
2822
2823 A control character set is directly designated and invoked to C0 or
39787efd
KH
2824 C1 by an escape sequence. The most common case is that:
2825 - ISO646's control character set is designated/invoked to C0, and
2826 - ISO6429's control character set is designated/invoked to C1,
2827 and usually these designations/invocations are omitted in encoded
2828 text. In a 7-bit environment, only C0 can be used, and a control
2829 character for C1 is encoded by an appropriate escape sequence to
2830 fit into the environment. All control characters for C1 are
2831 defined to have corresponding escape sequences.
4ed46869
KH
2832
2833 A graphic character set is at first designated to one of four
2834 graphic registers (G0 through G3), then these graphic registers are
2835 invoked to GL or GR. These designations and invocations can be
2836 done independently. The most common case is that G0 is invoked to
39787efd
KH
2837 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2838 these invocations and designations are omitted in encoded text.
2839 In a 7-bit environment, only GL can be used.
4ed46869 2840
39787efd
KH
2841 When a graphic character set of CHARS94 is invoked to GL, codes
2842 0x20 and 0x7F of the GL area work as control characters SPACE and
2843 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2844 be used.
4ed46869
KH
2845
2846 There are two ways of invocation: locking-shift and single-shift.
2847 With locking-shift, the invocation lasts until the next different
39787efd
KH
2848 invocation, whereas with single-shift, the invocation affects the
2849 following character only and doesn't affect the locking-shift
2850 state. Invocations are done by the following control characters or
2851 escape sequences:
4ed46869
KH
2852
2853 ----------------------------------------------------------------------
39787efd 2854 abbrev function cntrl escape seq description
4ed46869 2855 ----------------------------------------------------------------------
39787efd
KH
2856 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2857 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2858 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2859 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2860 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2861 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2862 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2863 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2864 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2865 ----------------------------------------------------------------------
39787efd
KH
2866 (*) These are not used by any known coding system.
2867
2868 Control characters for these functions are defined by macros
2869 ISO_CODE_XXX in `coding.h'.
4ed46869 2870
39787efd 2871 Designations are done by the following escape sequences:
4ed46869
KH
2872 ----------------------------------------------------------------------
2873 escape sequence description
2874 ----------------------------------------------------------------------
2875 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2876 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2877 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2878 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2879 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2880 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2881 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2882 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2883 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2884 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2885 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2886 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2887 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2888 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2889 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2890 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2891 ----------------------------------------------------------------------
2892
2893 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2894 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2895
2896 Note (*): Although these designations are not allowed in ISO2022,
2897 Emacs accepts them on decoding, and produces them on encoding
39787efd 2898 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2899 7-bit environment, non-locking-shift, and non-single-shift.
2900
2901 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2902 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2903
cfb43547 2904 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2905 same multilingual text in ISO2022. Actually, there exist many
2906 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2907 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2908 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2909 localized platforms), and all of these are variants of ISO2022.
2910
2911 In addition to the above, Emacs handles two more kinds of escape
2912 sequences: ISO6429's direction specification and Emacs' private
2913 sequence for specifying character composition.
2914
39787efd 2915 ISO6429's direction specification takes the following form:
4ed46869
KH
2916 o CSI ']' -- end of the current direction
2917 o CSI '0' ']' -- end of the current direction
2918 o CSI '1' ']' -- start of left-to-right text
2919 o CSI '2' ']' -- start of right-to-left text
2920 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2921 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2922
2923 Character composition specification takes the following form:
ec6d2bb8
KH
2924 o ESC '0' -- start relative composition
2925 o ESC '1' -- end composition
2926 o ESC '2' -- start rule-base composition (*)
2927 o ESC '3' -- start relative composition with alternate chars (**)
2928 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2929 Since these are not standard escape sequences of any ISO standard,
cfb43547 2930 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2931
5a936b46
DL
2932 (*) This form is used only in Emacs 20.7 and older versions,
2933 but newer versions can safely decode it.
cfb43547 2934 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2935 and older versions can't decode it.
ec6d2bb8 2936
cfb43547 2937 Here's a list of example usages of these composition escape
b73bfc1c 2938 sequences (categorized by `enum composition_method').
ec6d2bb8 2939
b73bfc1c 2940 COMPOSITION_RELATIVE:
ec6d2bb8 2941 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2942 COMPOSITION_WITH_RULE:
ec6d2bb8 2943 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2944 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2945 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2946 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2947 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869 2948
74ab6df5 2949static enum iso_code_class_type iso_code_class[256];
4ed46869 2950
df7492f9
KH
2951#define SAFE_CHARSET_P(coding, id) \
2952 ((id) <= (coding)->max_charset_id \
1b3b981b 2953 && (coding)->safe_charsets[id] != 255)
df7492f9 2954
df7492f9 2955static void
971de7fb 2956setup_iso_safe_charsets (Lisp_Object attrs)
df7492f9
KH
2957{
2958 Lisp_Object charset_list, safe_charsets;
2959 Lisp_Object request;
2960 Lisp_Object reg_usage;
2961 Lisp_Object tail;
d311d28c 2962 EMACS_INT reg94, reg96;
df7492f9
KH
2963 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2964 int max_charset_id;
2965
2966 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2967 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2968 && ! EQ (charset_list, Viso_2022_charset_list))
2969 {
2970 CODING_ATTR_CHARSET_LIST (attrs)
2971 = charset_list = Viso_2022_charset_list;
2972 ASET (attrs, coding_attr_safe_charsets, Qnil);
2973 }
2974
2975 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2976 return;
2977
2978 max_charset_id = 0;
2979 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2980 {
2981 int id = XINT (XCAR (tail));
2982 if (max_charset_id < id)
2983 max_charset_id = id;
2984 }
d46c5b12 2985
1b3b981b
AS
2986 safe_charsets = make_uninit_string (max_charset_id + 1);
2987 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
2988 request = AREF (attrs, coding_attr_iso_request);
2989 reg_usage = AREF (attrs, coding_attr_iso_usage);
2990 reg94 = XINT (XCAR (reg_usage));
2991 reg96 = XINT (XCDR (reg_usage));
2992
2993 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2994 {
2995 Lisp_Object id;
2996 Lisp_Object reg;
2997 struct charset *charset;
2998
2999 id = XCAR (tail);
3000 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 3001 reg = Fcdr (Fassq (id, request));
df7492f9 3002 if (! NILP (reg))
8f924df7 3003 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
3004 else if (charset->iso_chars_96)
3005 {
3006 if (reg96 < 4)
8f924df7 3007 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
3008 }
3009 else
3010 {
3011 if (reg94 < 4)
8f924df7 3012 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
3013 }
3014 }
3015 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3016}
d46c5b12 3017
b6871cc7 3018
4ed46869 3019/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ad1746f5 3020 Check if a text is encoded in one of ISO-2022 based coding systems.
ff0dacd7 3021 If it is, return 1, else return 0. */
4ed46869 3022
0a28aafb 3023static int
cf84bb53
JB
3024detect_coding_iso_2022 (struct coding_system *coding,
3025 struct coding_detection_info *detect_info)
4ed46869 3026{
8f924df7
KH
3027 const unsigned char *src = coding->source, *src_base = src;
3028 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 3029 int multibytep = coding->src_multibyte;
ff0dacd7 3030 int single_shifting = 0;
0e48bb22 3031 int id;
df7492f9 3032 int c, c1;
d311d28c 3033 ptrdiff_t consumed_chars = 0;
df7492f9 3034 int i;
ff0dacd7
KH
3035 int rejected = 0;
3036 int found = 0;
cee53ed4 3037 int composition_count = -1;
ff0dacd7
KH
3038
3039 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
3040
3041 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3042 {
3043 struct coding_system *this = &(coding_categories[i]);
3044 Lisp_Object attrs, val;
3045
c6b278e7
KH
3046 if (this->id < 0)
3047 continue;
df7492f9
KH
3048 attrs = CODING_ID_ATTRS (this->id);
3049 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 3050 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
3051 setup_iso_safe_charsets (attrs);
3052 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 3053 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 3054 this->safe_charsets = SDATA (val);
df7492f9
KH
3055 }
3056
3057 /* A coding system of this category is always ASCII compatible. */
3058 src += coding->head_ascii;
3f003981 3059
ff0dacd7 3060 while (rejected != CATEGORY_MASK_ISO)
4ed46869 3061 {
065e3595 3062 src_base = src;
df7492f9 3063 ONE_MORE_BYTE (c);
4ed46869
KH
3064 switch (c)
3065 {
3066 case ISO_CODE_ESC:
74383408
KH
3067 if (inhibit_iso_escape_detection)
3068 break;
f46869e4 3069 single_shifting = 0;
df7492f9 3070 ONE_MORE_BYTE (c);
0e48bb22 3071 if (c == 'N' || c == 'O')
d46c5b12 3072 {
ae9ff118 3073 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
3074 single_shifting = 1;
3075 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
4ed46869 3076 }
cee53ed4
KH
3077 else if (c == '1')
3078 {
3079 /* End of composition. */
3080 if (composition_count < 0
3081 || composition_count > MAX_COMPOSITION_COMPONENTS)
3082 /* Invalid */
3083 break;
3084 composition_count = -1;
3085 found |= CATEGORY_MASK_ISO;
3086 }
ec6d2bb8
KH
3087 else if (c >= '0' && c <= '4')
3088 {
3089 /* ESC <Fp> for start/end composition. */
cee53ed4 3090 composition_count = 0;
ec6d2bb8 3091 }
bf9cdd4e 3092 else
df7492f9 3093 {
0e48bb22
AS
3094 if (c >= '(' && c <= '/')
3095 {
3096 /* Designation sequence for a charset of dimension 1. */
3097 ONE_MORE_BYTE (c1);
3098 if (c1 < ' ' || c1 >= 0x80
3099 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3100 /* Invalid designation sequence. Just ignore. */
3101 break;
3102 }
3103 else if (c == '$')
3104 {
3105 /* Designation sequence for a charset of dimension 2. */
3106 ONE_MORE_BYTE (c);
3107 if (c >= '@' && c <= 'B')
3108 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
3109 id = iso_charset_table[1][0][c];
3110 else if (c >= '(' && c <= '/')
3111 {
3112 ONE_MORE_BYTE (c1);
3113 if (c1 < ' ' || c1 >= 0x80
3114 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3115 /* Invalid designation sequence. Just ignore. */
3116 break;
3117 }
3118 else
3119 /* Invalid designation sequence. Just ignore it. */
3120 break;
3121 }
3122 else
3123 {
3124 /* Invalid escape sequence. Just ignore it. */
3125 break;
3126 }
d46c5b12 3127
0e48bb22
AS
3128 /* We found a valid designation sequence for CHARSET. */
3129 rejected |= CATEGORY_MASK_ISO_8BIT;
3130 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3131 id))
3132 found |= CATEGORY_MASK_ISO_7;
3133 else
3134 rejected |= CATEGORY_MASK_ISO_7;
3135 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3136 id))
3137 found |= CATEGORY_MASK_ISO_7_TIGHT;
3138 else
3139 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3140 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3141 id))
3142 found |= CATEGORY_MASK_ISO_7_ELSE;
3143 else
3144 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3145 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3146 id))
3147 found |= CATEGORY_MASK_ISO_8_ELSE;
3148 else
3149 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3150 }
4ed46869
KH
3151 break;
3152
4ed46869 3153 case ISO_CODE_SO:
d46c5b12 3154 case ISO_CODE_SI:
ff0dacd7 3155 /* Locking shift out/in. */
74383408
KH
3156 if (inhibit_iso_escape_detection)
3157 break;
f46869e4 3158 single_shifting = 0;
ff0dacd7 3159 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
3160 break;
3161
4ed46869 3162 case ISO_CODE_CSI:
ff0dacd7 3163 /* Control sequence introducer. */
f46869e4 3164 single_shifting = 0;
ff0dacd7
KH
3165 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3166 found |= CATEGORY_MASK_ISO_8_ELSE;
3167 goto check_extra_latin;
3168
4ed46869
KH
3169 case ISO_CODE_SS2:
3170 case ISO_CODE_SS3:
ff0dacd7
KH
3171 /* Single shift. */
3172 if (inhibit_iso_escape_detection)
3173 break;
75e2a253 3174 single_shifting = 0;
ff0dacd7
KH
3175 rejected |= CATEGORY_MASK_ISO_7BIT;
3176 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3177 & CODING_ISO_FLAG_SINGLE_SHIFT)
0e48bb22
AS
3178 {
3179 found |= CATEGORY_MASK_ISO_8_1;
3180 single_shifting = 1;
3181 }
ff0dacd7
KH
3182 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3183 & CODING_ISO_FLAG_SINGLE_SHIFT)
0e48bb22
AS
3184 {
3185 found |= CATEGORY_MASK_ISO_8_2;
3186 single_shifting = 1;
3187 }
75e2a253
KH
3188 if (single_shifting)
3189 break;
0e48bb22
AS
3190 check_extra_latin:
3191 if (! VECTORP (Vlatin_extra_code_table)
28be1ada 3192 || NILP (AREF (Vlatin_extra_code_table, c)))
0e48bb22
AS
3193 {
3194 rejected = CATEGORY_MASK_ISO;
3195 break;
3196 }
3197 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198 & CODING_ISO_FLAG_LATIN_EXTRA)
3199 found |= CATEGORY_MASK_ISO_8_1;
3200 else
3201 rejected |= CATEGORY_MASK_ISO_8_1;
3202 rejected |= CATEGORY_MASK_ISO_8_2;
3203 break;
4ed46869
KH
3204
3205 default:
065e3595
KH
3206 if (c < 0)
3207 continue;
4ed46869 3208 if (c < 0x80)
f46869e4 3209 {
cee53ed4
KH
3210 if (composition_count >= 0)
3211 composition_count++;
f46869e4
KH
3212 single_shifting = 0;
3213 break;
3214 }
ff0dacd7 3215 if (c >= 0xA0)
c4825358 3216 {
ff0dacd7
KH
3217 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3218 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 3219 /* Check the length of succeeding codes of the range
ff0dacd7
KH
3220 0xA0..0FF. If the byte length is even, we include
3221 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
3222 only when we are not single shifting. */
3223 if (! single_shifting
3224 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 3225 {
2735d060 3226 int len = 1;
b73bfc1c
KH
3227 while (src < src_end)
3228 {
d12bd917 3229 src_base = src;
df7492f9 3230 ONE_MORE_BYTE (c);
b73bfc1c 3231 if (c < 0xA0)
d12bd917
KH
3232 {
3233 src = src_base;
3234 break;
3235 }
2735d060 3236 len++;
b73bfc1c
KH
3237 }
3238
2735d060 3239 if (len & 1 && src < src_end)
cee53ed4
KH
3240 {
3241 rejected |= CATEGORY_MASK_ISO_8_2;
3242 if (composition_count >= 0)
2735d060 3243 composition_count += len;
cee53ed4 3244 }
f46869e4 3245 else
cee53ed4
KH
3246 {
3247 found |= CATEGORY_MASK_ISO_8_2;
3248 if (composition_count >= 0)
2735d060 3249 composition_count += len / 2;
cee53ed4 3250 }
f46869e4 3251 }
ff0dacd7 3252 break;
4ed46869 3253 }
4ed46869
KH
3254 }
3255 }
ff0dacd7
KH
3256 detect_info->rejected |= CATEGORY_MASK_ISO;
3257 return 0;
4ed46869 3258
df7492f9 3259 no_more_source:
ff0dacd7
KH
3260 detect_info->rejected |= rejected;
3261 detect_info->found |= (found & ~rejected);
df7492f9 3262 return 1;
4ed46869 3263}
ec6d2bb8 3264
4ed46869 3265
134b9549
KH
3266/* Set designation state into CODING. Set CHARS_96 to -1 if the
3267 escape sequence should be kept. */
df7492f9
KH
3268#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3269 do { \
3270 int id, prev; \
3271 \
3272 if (final < '0' || final >= 128 \
3273 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3274 || !SAFE_CHARSET_P (coding, id)) \
3275 { \
3276 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3277 chars_96 = -1; \
3278 break; \
df7492f9
KH
3279 } \
3280 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3281 if (id == charset_jisx0201_roman) \
3282 { \
3283 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3284 id = charset_ascii; \
3285 } \
3286 else if (id == charset_jisx0208_1978) \
3287 { \
3288 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3289 id = charset_jisx0208; \
3290 } \
df7492f9
KH
3291 CODING_ISO_DESIGNATION (coding, reg) = id; \
3292 /* If there was an invalid designation to REG previously, and this \
3293 designation is ASCII to REG, we should keep this designation \
3294 sequence. */ \
3295 if (prev == -2 && id == charset_ascii) \
134b9549 3296 chars_96 = -1; \
4ed46869
KH
3297 } while (0)
3298
d46c5b12 3299
e951386e
KH
3300/* Handle these composition sequence (ALT: alternate char):
3301
3302 (1) relative composition: ESC 0 CHAR ... ESC 1
3303 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3304 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3305 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3306
3307 When the start sequence (ESC 0/2/3/4) is found, this annotation
3308 header is produced.
3309
3310 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3311
3312 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3313 produced until the end sequence (ESC 1) is found:
3314
3315 (1) CHAR ... CHAR
3316 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3317 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3318 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3319
3320 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3321 annotation header is updated as below:
3322
3323 (1) LENGTH: unchanged, NCHARS: number of CHARs
3324 (2) LENGTH: unchanged, NCHARS: number of CHARs
3325 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3326 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3327
3328 If an error is found while composing, the annotation header is
3329 changed to:
3330
3331 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3332
3333 and the sequence [ -2 DECODED-RULE ] is changed to the original
3334 byte sequence as below:
3335 o the original byte sequence is B: [ B -1 ]
3336 o the original byte sequence is B1 B2: [ B1 B2 ]
3337 and the sequence [ -1 -1 ] is changed to the original byte
3338 sequence:
3339 [ ESC '0' ]
3340*/
3341
3342/* Decode a composition rule C1 and maybe one more byte from the
66ebf983 3343 source, and set RULE to the encoded composition rule. If the rule
d5efd1d1 3344 is invalid, goto invalid_code. */
e951386e 3345
66ebf983 3346#define DECODE_COMPOSITION_RULE(rule) \
e951386e
KH
3347 do { \
3348 rule = c1 - 32; \
3349 if (rule < 0) \
d5efd1d1 3350 goto invalid_code; \
e951386e
KH
3351 if (rule < 81) /* old format (before ver.21) */ \
3352 { \
3353 int gref = (rule) / 9; \
3354 int nref = (rule) % 9; \
3355 if (gref == 4) gref = 10; \
3356 if (nref == 4) nref = 10; \
3357 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
e951386e
KH
3358 } \
3359 else /* new format (after ver.21) */ \
3360 { \
2735d060 3361 int b; \
e951386e 3362 \
2735d060 3363 ONE_MORE_BYTE (b); \
d5efd1d1
PE
3364 if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32)) \
3365 goto invalid_code; \
2735d060 3366 rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \
d5efd1d1 3367 rule += 0x100; /* Distinguish it from the old format. */ \
e951386e
KH
3368 } \
3369 } while (0)
3370
3371#define ENCODE_COMPOSITION_RULE(rule) \
df7492f9 3372 do { \
e951386e
KH
3373 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3374 \
3375 if (rule < 0x100) /* old format */ \
df7492f9 3376 { \
e951386e
KH
3377 if (gref == 10) gref = 4; \
3378 if (nref == 10) nref = 4; \
3379 charbuf[idx] = 32 + gref * 9 + nref; \
3380 charbuf[idx + 1] = -1; \
3381 new_chars++; \
df7492f9 3382 } \
e951386e 3383 else /* new format */ \
df7492f9 3384 { \
e951386e
KH
3385 charbuf[idx] = 32 + 81 + gref; \
3386 charbuf[idx + 1] = 32 + nref; \
3387 new_chars += 2; \
df7492f9
KH
3388 } \
3389 } while (0)
3390
e951386e
KH
3391/* Finish the current composition as invalid. */
3392
f57e2426 3393static int finish_composition (int *, struct composition_status *);
e951386e
KH
3394
3395static int
971de7fb 3396finish_composition (int *charbuf, struct composition_status *cmp_status)
e951386e
KH
3397{
3398 int idx = - cmp_status->length;
3399 int new_chars;
3400
3401 /* Recover the original ESC sequence */
3402 charbuf[idx++] = ISO_CODE_ESC;
3403 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3404 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3405 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3406 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3407 : '4');
3408 charbuf[idx++] = -2;
3409 charbuf[idx++] = 0;
3410 charbuf[idx++] = -1;
3411 new_chars = cmp_status->nchars;
3412 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3413 for (; idx < 0; idx++)
3414 {
3415 int elt = charbuf[idx];
3416
3417 if (elt == -2)
3418 {
3419 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3420 idx++;
3421 }
3422 else if (elt == -1)
3423 {
3424 charbuf[idx++] = ISO_CODE_ESC;
3425 charbuf[idx] = '0';
3426 new_chars += 2;
3427 }
3428 }
3429 cmp_status->state = COMPOSING_NO;
3430 return new_chars;
3431}
3432
ad1746f5 3433/* If characters are under composition, finish the composition. */
e951386e
KH
3434#define MAYBE_FINISH_COMPOSITION() \
3435 do { \
3436 if (cmp_status->state != COMPOSING_NO) \
3437 char_offset += finish_composition (charbuf, cmp_status); \
3438 } while (0)
d46c5b12 3439
aa72b389 3440/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
e951386e 3441
aa72b389
KH
3442 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3443 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3444 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3445 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
ec6d2bb8 3446
e951386e
KH
3447 Produce this annotation sequence now:
3448
3449 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3450*/
3451
3452#define DECODE_COMPOSITION_START(c1) \
3453 do { \
3454 if (c1 == '0' \
3455 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3456 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3457 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3458 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3459 { \
3460 *charbuf++ = -1; \
3461 *charbuf++= -1; \
3462 cmp_status->state = COMPOSING_CHAR; \
3463 cmp_status->length += 2; \
3464 } \
3465 else \
3466 { \
3467 MAYBE_FINISH_COMPOSITION (); \
3468 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3469 : c1 == '2' ? COMPOSITION_WITH_RULE \
3470 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3471 : COMPOSITION_WITH_RULE_ALTCHARS); \
3472 cmp_status->state \
3473 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3474 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3475 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3476 cmp_status->nchars = cmp_status->ncomps = 0; \
3477 coding->annotated = 1; \
3478 } \
ec6d2bb8
KH
3479 } while (0)
3480
ec6d2bb8 3481
e951386e 3482/* Handle composition end sequence ESC 1. */
df7492f9
KH
3483
3484#define DECODE_COMPOSITION_END() \
ec6d2bb8 3485 do { \
e951386e
KH
3486 if (cmp_status->nchars == 0 \
3487 || ((cmp_status->state == COMPOSING_CHAR) \
3488 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
ec6d2bb8 3489 { \
e951386e
KH
3490 MAYBE_FINISH_COMPOSITION (); \
3491 goto invalid_code; \
ec6d2bb8 3492 } \
e951386e
KH
3493 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3494 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3495 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3496 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3497 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3498 char_offset += cmp_status->nchars; \
3499 cmp_status->state = COMPOSING_NO; \
ec6d2bb8
KH
3500 } while (0)
3501
e951386e 3502/* Store a composition rule RULE in charbuf, and update cmp_status. */
df7492f9 3503
e951386e
KH
3504#define STORE_COMPOSITION_RULE(rule) \
3505 do { \
3506 *charbuf++ = -2; \
3507 *charbuf++ = rule; \
3508 cmp_status->length += 2; \
3509 cmp_status->state--; \
3510 } while (0)
ec6d2bb8 3511
e951386e
KH
3512/* Store a composed char or a component char C in charbuf, and update
3513 cmp_status. */
3514
3515#define STORE_COMPOSITION_CHAR(c) \
ec6d2bb8 3516 do { \
e951386e
KH
3517 *charbuf++ = (c); \
3518 cmp_status->length++; \
3519 if (cmp_status->state == COMPOSING_CHAR) \
3520 cmp_status->nchars++; \
df7492f9 3521 else \
e951386e
KH
3522 cmp_status->ncomps++; \
3523 if (cmp_status->method == COMPOSITION_WITH_RULE \
3524 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3525 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3526 cmp_status->state++; \
ec6d2bb8 3527 } while (0)
88993dfd 3528
d46c5b12 3529
4ed46869
KH
3530/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3531
b73bfc1c 3532static void
971de7fb 3533decode_coding_iso_2022 (struct coding_system *coding)
4ed46869 3534{
8f924df7
KH
3535 const unsigned char *src = coding->source + coding->consumed;
3536 const unsigned char *src_end = coding->source + coding->src_bytes;
3537 const unsigned char *src_base;
69a80ea3 3538 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
3539 /* We may produce two annotations (charset and composition) in one
3540 loop and one more charset annotation at the end. */
ff0dacd7 3541 int *charbuf_end
df80c7f0 3542 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
d311d28c 3543 ptrdiff_t consumed_chars = 0, consumed_chars_base;
df7492f9 3544 int multibytep = coding->src_multibyte;
4ed46869 3545 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3546 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3547 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3548 int charset_id_2, charset_id_3;
df7492f9
KH
3549 struct charset *charset;
3550 int c;
e951386e 3551 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
66ebf983 3552 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
d311d28c
PE
3553 ptrdiff_t char_offset = coding->produced_char;
3554 ptrdiff_t last_offset = char_offset;
ff0dacd7 3555 int last_id = charset_ascii;
2735d060 3556 int eol_dos =
0a9564cb 3557 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 3558 int byte_after_cr = -1;
e951386e 3559 int i;
df7492f9 3560
df7492f9 3561 setup_iso_safe_charsets (attrs);
1b3b981b 3562 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c 3563
e951386e
KH
3564 if (cmp_status->state != COMPOSING_NO)
3565 {
15cbd324
EZ
3566 if (charbuf_end - charbuf < cmp_status->length)
3567 abort ();
e951386e
KH
3568 for (i = 0; i < cmp_status->length; i++)
3569 *charbuf++ = cmp_status->carryover[i];
3570 coding->annotated = 1;
3571 }
3572
b73bfc1c 3573 while (1)
4ed46869 3574 {
cf299835 3575 int c1, c2, c3;
b73bfc1c
KH
3576
3577 src_base = src;
df7492f9
KH
3578 consumed_chars_base = consumed_chars;
3579
3580 if (charbuf >= charbuf_end)
b71f6f73
KH
3581 {
3582 if (byte_after_cr >= 0)
3583 src_base--;
3584 break;
3585 }
df7492f9 3586
119852e7
KH
3587 if (byte_after_cr >= 0)
3588 c1 = byte_after_cr, byte_after_cr = -1;
3589 else
3590 ONE_MORE_BYTE (c1);
065e3595
KH
3591 if (c1 < 0)
3592 goto invalid_code;
4ed46869 3593
e951386e 3594 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
4ed46869 3595 {
e951386e
KH
3596 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3597 char_offset++;
3598 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3599 continue;
3600 }
3601
3602 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3603 {
3604 if (c1 == ISO_CODE_ESC)
ec6d2bb8 3605 {
e951386e
KH
3606 if (src + 1 >= src_end)
3607 goto no_more_source;
3608 *charbuf++ = ISO_CODE_ESC;
3609 char_offset++;
3610 if (src[0] == '%' && src[1] == '@')
df7492f9 3611 {
e951386e
KH
3612 src += 2;
3613 consumed_chars += 2;
3614 char_offset += 2;
3615 /* We are sure charbuf can contain two more chars. */
3616 *charbuf++ = '%';
3617 *charbuf++ = '@';
3618 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
df7492f9 3619 }
4ed46869 3620 }
e951386e
KH
3621 else
3622 {
3623 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3624 char_offset++;
3625 }
3626 continue;
3627 }
3628
3629 if ((cmp_status->state == COMPOSING_RULE
3630 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3631 && c1 != ISO_CODE_ESC)
3632 {
66ebf983 3633 int rule;
e951386e 3634
66ebf983 3635 DECODE_COMPOSITION_RULE (rule);
e951386e
KH
3636 STORE_COMPOSITION_RULE (rule);
3637 continue;
3638 }
3639
3640 /* We produce at most one character. */
3641 switch (iso_code_class [c1])
3642 {
3643 case ISO_0x20_or_0x7F:
df7492f9
KH
3644 if (charset_id_0 < 0
3645 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3646 /* This is SPACE or DEL. */
3647 charset = CHARSET_FROM_ID (charset_ascii);
3648 else
3649 charset = CHARSET_FROM_ID (charset_id_0);
3650 break;
4ed46869
KH
3651
3652 case ISO_graphic_plane_0:
134b9549
KH
3653 if (charset_id_0 < 0)
3654 charset = CHARSET_FROM_ID (charset_ascii);
3655 else
3656 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3657 break;
3658
3659 case ISO_0xA0_or_0xFF:
df7492f9
KH
3660 if (charset_id_1 < 0
3661 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3662 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3663 goto invalid_code;
4ed46869
KH
3664 /* This is a graphic character, we fall down ... */
3665
3666 case ISO_graphic_plane_1:
df7492f9
KH
3667 if (charset_id_1 < 0)
3668 goto invalid_code;
3669 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3670 break;
3671
df7492f9 3672 case ISO_control_0:
2735d060 3673 if (eol_dos && c1 == '\r')
119852e7 3674 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3675 MAYBE_FINISH_COMPOSITION ();
3676 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3677 break;
3678
df7492f9 3679 case ISO_control_1:
df7492f9
KH
3680 goto invalid_code;
3681
4ed46869 3682 case ISO_shift_out:
df7492f9
KH
3683 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3684 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3685 goto invalid_code;
3686 CODING_ISO_INVOCATION (coding, 0) = 1;
3687 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3688 continue;
4ed46869
KH
3689
3690 case ISO_shift_in:
df7492f9
KH
3691 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3692 goto invalid_code;
3693 CODING_ISO_INVOCATION (coding, 0) = 0;
3694 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3695 continue;
4ed46869
KH
3696
3697 case ISO_single_shift_2_7:
a63dba42
KH
3698 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3699 goto invalid_code;
4ed46869 3700 case ISO_single_shift_2:
df7492f9
KH
3701 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3702 goto invalid_code;
4ed46869
KH
3703 /* SS2 is handled as an escape sequence of ESC 'N' */
3704 c1 = 'N';
3705 goto label_escape_sequence;
3706
3707 case ISO_single_shift_3:
df7492f9
KH
3708 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3709 goto invalid_code;
4ed46869
KH
3710 /* SS2 is handled as an escape sequence of ESC 'O' */
3711 c1 = 'O';
3712 goto label_escape_sequence;
3713
3714 case ISO_control_sequence_introducer:
3715 /* CSI is handled as an escape sequence of ESC '[' ... */
3716 c1 = '[';
3717 goto label_escape_sequence;
3718
3719 case ISO_escape:
3720 ONE_MORE_BYTE (c1);
3721 label_escape_sequence:
df7492f9 3722 /* Escape sequences handled here are invocation,
4ed46869
KH
3723 designation, direction specification, and character
3724 composition specification. */
3725 switch (c1)
3726 {
3727 case '&': /* revision of following character set */
3728 ONE_MORE_BYTE (c1);
3729 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3730 goto invalid_code;
4ed46869
KH
3731 ONE_MORE_BYTE (c1);
3732 if (c1 != ISO_CODE_ESC)
df7492f9 3733 goto invalid_code;
4ed46869
KH
3734 ONE_MORE_BYTE (c1);
3735 goto label_escape_sequence;
3736
3737 case '$': /* designation of 2-byte character set */
df7492f9
KH
3738 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3739 goto invalid_code;
134b9549
KH
3740 {
3741 int reg, chars96;
3742
3743 ONE_MORE_BYTE (c1);
3744 if (c1 >= '@' && c1 <= 'B')
3745 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3746 or JISX0208.1980 */
134b9549
KH
3747 reg = 0, chars96 = 0;
3748 }
3749 else if (c1 >= 0x28 && c1 <= 0x2B)
3750 { /* designation of DIMENSION2_CHARS94 character set */
3751 reg = c1 - 0x28, chars96 = 0;
3752 ONE_MORE_BYTE (c1);
3753 }
3754 else if (c1 >= 0x2C && c1 <= 0x2F)
3755 { /* designation of DIMENSION2_CHARS96 character set */
3756 reg = c1 - 0x2C, chars96 = 1;
3757 ONE_MORE_BYTE (c1);
3758 }
3759 else
3760 goto invalid_code;
3761 DECODE_DESIGNATION (reg, 2, chars96, c1);
3762 /* We must update these variables now. */
3763 if (reg == 0)
3764 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3765 else if (reg == 1)
3766 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3767 if (chars96 < 0)
3768 goto invalid_code;
3769 }
b73bfc1c 3770 continue;
4ed46869
KH
3771
3772 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3773 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3774 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3775 goto invalid_code;
3776 CODING_ISO_INVOCATION (coding, 0) = 2;
3777 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3778 continue;
4ed46869
KH
3779
3780 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3781 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3782 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3783 goto invalid_code;
3784 CODING_ISO_INVOCATION (coding, 0) = 3;
3785 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3786 continue;
4ed46869
KH
3787
3788 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3789 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3790 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3791 goto invalid_code;
134b9549
KH
3792 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3793 if (charset_id_2 < 0)
3794 charset = CHARSET_FROM_ID (charset_ascii);
3795 else
3796 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3797 ONE_MORE_BYTE (c1);
e7046a18 3798 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3799 goto invalid_code;
4ed46869
KH
3800 break;
3801
3802 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3803 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3804 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3805 goto invalid_code;
134b9549
KH
3806 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3807 if (charset_id_3 < 0)
3808 charset = CHARSET_FROM_ID (charset_ascii);
3809 else
3810 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3811 ONE_MORE_BYTE (c1);
e7046a18 3812 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3813 goto invalid_code;
4ed46869
KH
3814 break;
3815
ec6d2bb8 3816 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3817 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3818 goto invalid_code;
e951386e
KH
3819 if (last_id != charset_ascii)
3820 {
3821 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3822 last_id = charset_ascii;
3823 last_offset = char_offset;
3824 }
ec6d2bb8 3825 DECODE_COMPOSITION_START (c1);
b73bfc1c 3826 continue;
4ed46869 3827
ec6d2bb8 3828 case '1': /* end composition */
e951386e 3829 if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3830 goto invalid_code;
3831 DECODE_COMPOSITION_END ();
b73bfc1c 3832 continue;
4ed46869
KH
3833
3834 case '[': /* specification of direction */
de59072a 3835 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
df7492f9 3836 goto invalid_code;
4ed46869 3837 /* For the moment, nested direction is not supported.
d46c5b12 3838 So, `coding->mode & CODING_MODE_DIRECTION' zero means
ad1746f5 3839 left-to-right, and nonzero means right-to-left. */
4ed46869
KH
3840 ONE_MORE_BYTE (c1);
3841 switch (c1)
3842 {
3843 case ']': /* end of the current direction */
d46c5b12 3844 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3845
3846 case '0': /* end of the current direction */
3847 case '1': /* start of left-to-right direction */
3848 ONE_MORE_BYTE (c1);
3849 if (c1 == ']')
d46c5b12 3850 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3851 else
df7492f9 3852 goto invalid_code;
4ed46869
KH
3853 break;
3854
3855 case '2': /* start of right-to-left direction */
3856 ONE_MORE_BYTE (c1);
3857 if (c1 == ']')
d46c5b12 3858 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3859 else
df7492f9 3860 goto invalid_code;
4ed46869
KH
3861 break;
3862
3863 default:
df7492f9 3864 goto invalid_code;
4ed46869 3865 }
b73bfc1c 3866 continue;
4ed46869 3867
103e0180 3868 case '%':
103e0180
KH
3869 ONE_MORE_BYTE (c1);
3870 if (c1 == '/')
3871 {
3872 /* CTEXT extended segment:
3873 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3874 We keep these bytes as is for the moment.
3875 They may be decoded by post-read-conversion. */
3876 int dim, M, L;
4776e638 3877 int size;
8f924df7 3878
103e0180 3879 ONE_MORE_BYTE (dim);
7a84eee5 3880 if (dim < '0' || dim > '4')
e951386e 3881 goto invalid_code;
103e0180 3882 ONE_MORE_BYTE (M);
e951386e
KH
3883 if (M < 128)
3884 goto invalid_code;
103e0180 3885 ONE_MORE_BYTE (L);
e951386e
KH
3886 if (L < 128)
3887 goto invalid_code;
103e0180 3888 size = ((M - 128) * 128) + (L - 128);
e951386e 3889 if (charbuf + 6 > charbuf_end)
4776e638
KH
3890 goto break_loop;
3891 *charbuf++ = ISO_CODE_ESC;
3892 *charbuf++ = '%';
3893 *charbuf++ = '/';
3894 *charbuf++ = dim;
3895 *charbuf++ = BYTE8_TO_CHAR (M);
3896 *charbuf++ = BYTE8_TO_CHAR (L);
e951386e 3897 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
103e0180
KH
3898 }
3899 else if (c1 == 'G')
3900 {
103e0180
KH
3901 /* XFree86 extension for embedding UTF-8 in CTEXT:
3902 ESC % G --UTF-8-BYTES-- ESC % @
3903 We keep these bytes as is for the moment.
3904 They may be decoded by post-read-conversion. */
e951386e 3905 if (charbuf + 3 > charbuf_end)
4776e638 3906 goto break_loop;
e951386e
KH
3907 *charbuf++ = ISO_CODE_ESC;
3908 *charbuf++ = '%';
3909 *charbuf++ = 'G';
3910 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
103e0180
KH
3911 }
3912 else
4776e638 3913 goto invalid_code;
103e0180 3914 continue;
4776e638 3915 break;
103e0180 3916
4ed46869 3917 default:
df7492f9
KH
3918 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3919 goto invalid_code;
134b9549
KH
3920 {
3921 int reg, chars96;
3922
3923 if (c1 >= 0x28 && c1 <= 0x2B)
3924 { /* designation of DIMENSION1_CHARS94 character set */
3925 reg = c1 - 0x28, chars96 = 0;
3926 ONE_MORE_BYTE (c1);
3927 }
3928 else if (c1 >= 0x2C && c1 <= 0x2F)
3929 { /* designation of DIMENSION1_CHARS96 character set */
3930 reg = c1 - 0x2C, chars96 = 1;
3931 ONE_MORE_BYTE (c1);
3932 }
3933 else
3934 goto invalid_code;
3935 DECODE_DESIGNATION (reg, 1, chars96, c1);
3936 /* We must update these variables now. */
3937 if (reg == 0)
3938 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3939 else if (reg == 1)
3940 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3941 if (chars96 < 0)
3942 goto invalid_code;
3943 }
b73bfc1c 3944 continue;
4ed46869 3945 }
413bb2db
PE
3946 break;
3947
3948 default:
3949 abort ();
b73bfc1c 3950 }
4ed46869 3951
e951386e
KH
3952 if (cmp_status->state == COMPOSING_NO
3953 && charset->id != charset_ascii
ff0dacd7
KH
3954 && last_id != charset->id)
3955 {
3956 if (last_id != charset_ascii)
69a80ea3 3957 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3958 last_id = charset->id;
3959 last_offset = char_offset;
3960 }
3961
b73bfc1c 3962 /* Now we know CHARSET and 1st position code C1 of a character.
cf299835
KH
3963 Produce a decoded character while getting 2nd and 3rd
3964 position codes C2, C3 if necessary. */
df7492f9 3965 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3966 {
3967 ONE_MORE_BYTE (c2);
cf299835
KH
3968 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3969 || ((c1 & 0x80) != (c2 & 0x80)))
b73bfc1c 3970 /* C2 is not in a valid range. */
df7492f9 3971 goto invalid_code;
cf299835
KH
3972 if (CHARSET_DIMENSION (charset) == 2)
3973 c1 = (c1 << 8) | c2;
3974 else
df7492f9 3975 {
cf299835
KH
3976 ONE_MORE_BYTE (c3);
3977 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3978 || ((c1 & 0x80) != (c3 & 0x80)))
3979 /* C3 is not in a valid range. */
df7492f9 3980 goto invalid_code;
cf299835 3981 c1 = (c1 << 16) | (c2 << 8) | c2;
df7492f9
KH
3982 }
3983 }
cf299835 3984 c1 &= 0x7F7F7F;
df7492f9
KH
3985 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3986 if (c < 0)
3987 {
3988 MAYBE_FINISH_COMPOSITION ();
3989 for (; src_base < src; src_base++, char_offset++)
3990 {
3991 if (ASCII_BYTE_P (*src_base))
3992 *charbuf++ = *src_base;
3993 else
3994 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3995 }
3996 }
e951386e 3997 else if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3998 {
3999 *charbuf++ = c;
4000 char_offset++;
4ed46869 4001 }
e951386e
KH
4002 else if ((cmp_status->state == COMPOSING_CHAR
4003 ? cmp_status->nchars
4004 : cmp_status->ncomps)
4005 >= MAX_COMPOSITION_COMPONENTS)
781d7a48 4006 {
e951386e
KH
4007 /* Too long composition. */
4008 MAYBE_FINISH_COMPOSITION ();
4009 *charbuf++ = c;
4010 char_offset++;
4ed46869 4011 }
e951386e
KH
4012 else
4013 STORE_COMPOSITION_CHAR (c);
4ed46869
KH
4014 continue;
4015
df7492f9
KH
4016 invalid_code:
4017 MAYBE_FINISH_COMPOSITION ();
4ed46869 4018 src = src_base;
df7492f9
KH
4019 consumed_chars = consumed_chars_base;
4020 ONE_MORE_BYTE (c);
065e3595 4021 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4022 char_offset++;
df7492f9 4023 coding->errors++;
4776e638
KH
4024 continue;
4025
4026 break_loop:
4027 break;
4ed46869 4028 }
fb88bf2d 4029
df7492f9 4030 no_more_source:
e951386e
KH
4031 if (cmp_status->state != COMPOSING_NO)
4032 {
4033 if (coding->mode & CODING_MODE_LAST_BLOCK)
4034 MAYBE_FINISH_COMPOSITION ();
4035 else
4036 {
4037 charbuf -= cmp_status->length;
4038 for (i = 0; i < cmp_status->length; i++)
4039 cmp_status->carryover[i] = charbuf[i];
4040 }
4041 }
4042 else if (last_id != charset_ascii)
69a80ea3 4043 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4044 coding->consumed_char += consumed_chars_base;
4045 coding->consumed = src_base - coding->source;
4046 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4047}
4048
b73bfc1c 4049
f4dee582 4050/* ISO2022 encoding stuff. */
4ed46869
KH
4051
4052/*
f4dee582 4053 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 4054 specify more details. In Emacs, each coding system of ISO2022
4ed46869 4055 variant has the following specifications:
df7492f9 4056 1. Initial designation to G0 thru G3.
4ed46869
KH
4057 2. Allows short-form designation?
4058 3. ASCII should be designated to G0 before control characters?
4059 4. ASCII should be designated to G0 at end of line?
4060 5. 7-bit environment or 8-bit environment?
4061 6. Use locking-shift?
4062 7. Use Single-shift?
4063 And the following two are only for Japanese:
4064 8. Use ASCII in place of JIS0201-1976-Roman?
4065 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
4066 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4067 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 4068 details.
4ed46869
KH
4069*/
4070
4071/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
4072 register REG at DST, and increment DST. If <final-char> of CHARSET is
4073 '@', 'A', or 'B' and the coding system CODING allows, produce
4074 designation sequence of short-form. */
4ed46869
KH
4075
4076#define ENCODE_DESIGNATION(charset, reg, coding) \
4077 do { \
df7492f9 4078 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
675e2c69
DN
4079 const char *intermediate_char_94 = "()*+"; \
4080 const char *intermediate_char_96 = ",-./"; \
df7492f9 4081 int revision = -1; \
df7492f9
KH
4082 \
4083 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 4084 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
4085 \
4086 if (revision >= 0) \
70c22245 4087 { \
df7492f9
KH
4088 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4089 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 4090 } \
df7492f9 4091 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
4092 if (CHARSET_DIMENSION (charset) == 1) \
4093 { \
2735d060 4094 int b; \
df7492f9 4095 if (! CHARSET_ISO_CHARS_96 (charset)) \
2735d060 4096 b = intermediate_char_94[reg]; \
4ed46869 4097 else \
2735d060
PE
4098 b = intermediate_char_96[reg]; \
4099 EMIT_ONE_ASCII_BYTE (b); \
4ed46869
KH
4100 } \
4101 else \
4102 { \
df7492f9
KH
4103 EMIT_ONE_ASCII_BYTE ('$'); \
4104 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 4105 { \
df7492f9 4106 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
4107 || reg != 0 \
4108 || final_char < '@' || final_char > 'B') \
df7492f9 4109 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
4110 } \
4111 else \
df7492f9 4112 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 4113 } \
df7492f9
KH
4114 EMIT_ONE_ASCII_BYTE (final_char); \
4115 \
4116 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
4117 } while (0)
4118
df7492f9 4119
4ed46869
KH
4120/* The following two macros produce codes (control character or escape
4121 sequence) for ISO2022 single-shift functions (single-shift-2 and
4122 single-shift-3). */
4123
df7492f9
KH
4124#define ENCODE_SINGLE_SHIFT_2 \
4125 do { \
4126 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4127 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4128 else \
4129 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4130 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4131 } while (0)
4132
df7492f9
KH
4133
4134#define ENCODE_SINGLE_SHIFT_3 \
4135 do { \
4136 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4137 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4138 else \
4139 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4140 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4141 } while (0)
4142
df7492f9 4143
4ed46869
KH
4144/* The following four macros produce codes (control character or
4145 escape sequence) for ISO2022 locking-shift functions (shift-in,
4146 shift-out, locking-shift-2, and locking-shift-3). */
4147
df7492f9
KH
4148#define ENCODE_SHIFT_IN \
4149 do { \
4150 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4151 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
4152 } while (0)
4153
df7492f9
KH
4154
4155#define ENCODE_SHIFT_OUT \
4156 do { \
4157 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4158 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
4159 } while (0)
4160
df7492f9
KH
4161
4162#define ENCODE_LOCKING_SHIFT_2 \
4163 do { \
4164 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4165 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
4166 } while (0)
4167
df7492f9
KH
4168
4169#define ENCODE_LOCKING_SHIFT_3 \
4170 do { \
4171 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4172 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
4173 } while (0)
4174
df7492f9 4175
f4dee582
RS
4176/* Produce codes for a DIMENSION1 character whose character set is
4177 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
4178 sequences are also produced in advance if necessary. */
4179
6e85d753
KH
4180#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4181 do { \
df7492f9 4182 int id = CHARSET_ID (charset); \
bf16eb23
KH
4183 \
4184 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4185 && id == charset_ascii) \
4186 { \
4187 id = charset_jisx0201_roman; \
4188 charset = CHARSET_FROM_ID (id); \
4189 } \
4190 \
df7492f9 4191 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4192 { \
df7492f9
KH
4193 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4194 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 4195 else \
df7492f9
KH
4196 EMIT_ONE_BYTE (c1 | 0x80); \
4197 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4198 break; \
4199 } \
df7492f9 4200 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4201 { \
df7492f9 4202 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
4203 break; \
4204 } \
df7492f9 4205 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4206 { \
df7492f9 4207 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
4208 break; \
4209 } \
6e85d753
KH
4210 else \
4211 /* Since CHARSET is not yet invoked to any graphic planes, we \
4212 must invoke it, or, at first, designate it to some graphic \
4213 register. Then repeat the loop to actually produce the \
4214 character. */ \
df7492f9
KH
4215 dst = encode_invocation_designation (charset, coding, dst, \
4216 &produced_chars); \
4ed46869
KH
4217 } while (1)
4218
df7492f9 4219
f4dee582
RS
4220/* Produce codes for a DIMENSION2 character whose character set is
4221 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
4222 invocation codes are also produced in advance if necessary. */
4223
6e85d753
KH
4224#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4225 do { \
df7492f9 4226 int id = CHARSET_ID (charset); \
bf16eb23
KH
4227 \
4228 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4229 && id == charset_jisx0208) \
4230 { \
4231 id = charset_jisx0208_1978; \
4232 charset = CHARSET_FROM_ID (id); \
4233 } \
4234 \
df7492f9 4235 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4236 { \
df7492f9
KH
4237 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4238 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 4239 else \
df7492f9
KH
4240 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4241 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4242 break; \
4243 } \
df7492f9 4244 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4245 { \
df7492f9 4246 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
4247 break; \
4248 } \
df7492f9 4249 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4250 { \
df7492f9 4251 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
4252 break; \
4253 } \
6e85d753
KH
4254 else \
4255 /* Since CHARSET is not yet invoked to any graphic planes, we \
4256 must invoke it, or, at first, designate it to some graphic \
4257 register. Then repeat the loop to actually produce the \
4258 character. */ \
df7492f9
KH
4259 dst = encode_invocation_designation (charset, coding, dst, \
4260 &produced_chars); \
4ed46869
KH
4261 } while (1)
4262
05e6f5dc 4263
df7492f9
KH
4264#define ENCODE_ISO_CHARACTER(charset, c) \
4265 do { \
8f50130c 4266 unsigned code; \
5eb05ea3 4267 CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \
df7492f9
KH
4268 \
4269 if (CHARSET_DIMENSION (charset) == 1) \
4270 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4271 else \
4272 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 4273 } while (0)
bdd9fb48 4274
05e6f5dc 4275
4ed46869 4276/* Produce designation and invocation codes at a place pointed by DST
df7492f9 4277 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
4278 Return new DST. */
4279
e2f1bab9 4280static unsigned char *
cf84bb53
JB
4281encode_invocation_designation (struct charset *charset,
4282 struct coding_system *coding,
d311d28c 4283 unsigned char *dst, ptrdiff_t *p_nchars)
4ed46869 4284{
df7492f9 4285 int multibytep = coding->dst_multibyte;
d311d28c 4286 ptrdiff_t produced_chars = *p_nchars;
4ed46869 4287 int reg; /* graphic register number */
df7492f9 4288 int id = CHARSET_ID (charset);
4ed46869
KH
4289
4290 /* At first, check designations. */
4291 for (reg = 0; reg < 4; reg++)
df7492f9 4292 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
4293 break;
4294
4295 if (reg >= 4)
4296 {
4297 /* CHARSET is not yet designated to any graphic registers. */
4298 /* At first check the requested designation. */
df7492f9
KH
4299 reg = CODING_ISO_REQUEST (coding, id);
4300 if (reg < 0)
1ba9e4ab
KH
4301 /* Since CHARSET requests no special designation, designate it
4302 to graphic register 0. */
4ed46869
KH
4303 reg = 0;
4304
4305 ENCODE_DESIGNATION (charset, reg, coding);
4306 }
4307
df7492f9
KH
4308 if (CODING_ISO_INVOCATION (coding, 0) != reg
4309 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
4310 {
4311 /* Since the graphic register REG is not invoked to any graphic
4312 planes, invoke it to graphic plane 0. */
4313 switch (reg)
4314 {
4315 case 0: /* graphic register 0 */
4316 ENCODE_SHIFT_IN;
4317 break;
4318
4319 case 1: /* graphic register 1 */
4320 ENCODE_SHIFT_OUT;
4321 break;
4322
4323 case 2: /* graphic register 2 */
df7492f9 4324 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4325 ENCODE_SINGLE_SHIFT_2;
4326 else
4327 ENCODE_LOCKING_SHIFT_2;
4328 break;
4329
4330 case 3: /* graphic register 3 */
df7492f9 4331 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4332 ENCODE_SINGLE_SHIFT_3;
4333 else
4334 ENCODE_LOCKING_SHIFT_3;
4335 break;
4336 }
4337 }
b73bfc1c 4338
df7492f9 4339 *p_nchars = produced_chars;
4ed46869
KH
4340 return dst;
4341}
4342
4ed46869
KH
4343
4344/* Produce codes for designation and invocation to reset the graphic
4345 planes and registers to initial state. */
df7492f9
KH
4346#define ENCODE_RESET_PLANE_AND_REGISTER() \
4347 do { \
4348 int reg; \
4349 struct charset *charset; \
4350 \
4351 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4352 ENCODE_SHIFT_IN; \
4353 for (reg = 0; reg < 4; reg++) \
4354 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4355 && (CODING_ISO_DESIGNATION (coding, reg) \
4356 != CODING_ISO_INITIAL (coding, reg))) \
4357 { \
4358 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4359 ENCODE_DESIGNATION (charset, reg, coding); \
4360 } \
4ed46869
KH
4361 } while (0)
4362
df7492f9 4363
bdd9fb48 4364/* Produce designation sequences of charsets in the line started from
5eb05ea3
KH
4365 CHARBUF to a place pointed by DST, and return the number of
4366 produced bytes. DST should not directly point a buffer text area
4367 which may be relocated by char_charset call.
bdd9fb48
KH
4368
4369 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4370 find all the necessary designations. */
4371
6e6c82a4 4372static ptrdiff_t
5eb05ea3
KH
4373encode_designation_at_bol (struct coding_system *coding,
4374 int *charbuf, int *charbuf_end,
461c2ab9 4375 unsigned char *dst)
e0e989f6 4376{
75a3b399 4377 unsigned char *orig = dst;
df7492f9 4378 struct charset *charset;
bdd9fb48
KH
4379 /* Table of charsets to be designated to each graphic register. */
4380 int r[4];
df7492f9 4381 int c, found = 0, reg;
d311d28c 4382 ptrdiff_t produced_chars = 0;
df7492f9
KH
4383 int multibytep = coding->dst_multibyte;
4384 Lisp_Object attrs;
4385 Lisp_Object charset_list;
4386
4387 attrs = CODING_ID_ATTRS (coding->id);
4388 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4389 if (EQ (charset_list, Qiso_2022))
4390 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4391
4392 for (reg = 0; reg < 4; reg++)
4393 r[reg] = -1;
4394
5eb05ea3 4395 while (charbuf < charbuf_end && found < 4)
e0e989f6 4396 {
df7492f9
KH
4397 int id;
4398
4399 c = *charbuf++;
b73bfc1c
KH
4400 if (c == '\n')
4401 break;
df7492f9
KH
4402 charset = char_charset (c, charset_list, NULL);
4403 id = CHARSET_ID (charset);
4404 reg = CODING_ISO_REQUEST (coding, id);
4405 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4406 {
4407 found++;
df7492f9 4408 r[reg] = id;
bdd9fb48 4409 }
bdd9fb48
KH
4410 }
4411
4412 if (found)
4413 {
4414 for (reg = 0; reg < 4; reg++)
4415 if (r[reg] >= 0
df7492f9
KH
4416 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4417 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4418 }
b73bfc1c 4419
5eb05ea3 4420 return dst - orig;
e0e989f6
KH
4421}
4422
4ed46869
KH
4423/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4424
df7492f9 4425static int
971de7fb 4426encode_coding_iso_2022 (struct coding_system *coding)
4ed46869 4427{
df7492f9
KH
4428 int multibytep = coding->dst_multibyte;
4429 int *charbuf = coding->charbuf;
4430 int *charbuf_end = charbuf + coding->charbuf_used;
4431 unsigned char *dst = coding->destination + coding->produced;
4432 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4433 int safe_room = 16;
4434 int bol_designation
4435 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4436 && CODING_ISO_BOL (coding));
d311d28c 4437 ptrdiff_t produced_chars = 0;
df7492f9
KH
4438 Lisp_Object attrs, eol_type, charset_list;
4439 int ascii_compatible;
b73bfc1c 4440 int c;
ff0dacd7 4441 int preferred_charset_id = -1;
05e6f5dc 4442
24a73b0a 4443 CODING_GET_INFO (coding, attrs, charset_list);
0a9564cb 4444 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
24a73b0a
KH
4445 if (VECTORP (eol_type))
4446 eol_type = Qunix;
4447
004068e4 4448 setup_iso_safe_charsets (attrs);
ff0dacd7 4449 /* Charset list may have been changed. */
287c57d7 4450 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4451 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4452
a552b35a
KH
4453 ascii_compatible
4454 = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4455 && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4456 | CODING_ISO_FLAG_LOCKING_SHIFT)));
bdd9fb48 4457
df7492f9 4458 while (charbuf < charbuf_end)
4ed46869 4459 {
df7492f9 4460 ASSURE_DESTINATION (safe_room);
b73bfc1c 4461
df7492f9 4462 if (bol_designation)
b73bfc1c 4463 {
bdd9fb48 4464 /* We have to produce designation sequences if any now. */
5eb05ea3
KH
4465 unsigned char desig_buf[16];
4466 int nbytes;
8f50130c 4467 ptrdiff_t offset;
5eb05ea3
KH
4468
4469 charset_map_loaded = 0;
4470 nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4471 desig_buf);
4472 if (charset_map_loaded
c1892f11 4473 && (offset = coding_change_destination (coding)))
5eb05ea3
KH
4474 {
4475 dst += offset;
4476 dst_end += offset;
4477 }
4478 memcpy (dst, desig_buf, nbytes);
4479 dst += nbytes;
df7492f9 4480 /* We are sure that designation sequences are all ASCII bytes. */
5eb05ea3
KH
4481 produced_chars += nbytes;
4482 bol_designation = 0;
4483 ASSURE_DESTINATION (safe_room);
e0e989f6
KH
4484 }
4485
df7492f9 4486 c = *charbuf++;
ec6d2bb8 4487
ff0dacd7
KH
4488 if (c < 0)
4489 {
4490 /* Handle an annotation. */
4491 switch (*charbuf)
ec6d2bb8 4492 {
ff0dacd7
KH
4493 case CODING_ANNOTATE_COMPOSITION_MASK:
4494 /* Not yet implemented. */
4495 break;
4496 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4497 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4498 if (preferred_charset_id >= 0
4499 && NILP (Fmemq (make_number (preferred_charset_id),
4500 charset_list)))
4501 preferred_charset_id = -1;
4502 break;
4503 default:
4504 abort ();
4ed46869 4505 }
ff0dacd7
KH
4506 charbuf += -c - 1;
4507 continue;
4ed46869 4508 }
ec6d2bb8 4509
b73bfc1c
KH
4510 /* Now encode the character C. */
4511 if (c < 0x20 || c == 0x7F)
4512 {
df7492f9
KH
4513 if (c == '\n'
4514 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4515 {
df7492f9
KH
4516 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4517 ENCODE_RESET_PLANE_AND_REGISTER ();
4518 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4519 {
df7492f9
KH
4520 int i;
4521
4522 for (i = 0; i < 4; i++)
4523 CODING_ISO_DESIGNATION (coding, i)
4524 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4525 }
df7492f9
KH
4526 bol_designation
4527 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4528 }
df7492f9
KH
4529 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4530 ENCODE_RESET_PLANE_AND_REGISTER ();
4531 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4532 }
df7492f9 4533 else if (ASCII_CHAR_P (c))
88993dfd 4534 {
df7492f9
KH
4535 if (ascii_compatible)
4536 EMIT_ONE_ASCII_BYTE (c);
93dec019 4537 else
19a8d9e0 4538 {
bf16eb23
KH
4539 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4540 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4541 }
4ed46869 4542 }
16eafb5d 4543 else if (CHAR_BYTE8_P (c))
88993dfd 4544 {
16eafb5d
KH
4545 c = CHAR_TO_BYTE8 (c);
4546 EMIT_ONE_BYTE (c);
88993dfd 4547 }
b73bfc1c 4548 else
df7492f9 4549 {
ff0dacd7 4550 struct charset *charset;
b73bfc1c 4551
ff0dacd7
KH
4552 if (preferred_charset_id >= 0)
4553 {
5eb05ea3
KH
4554 int result;
4555
ff0dacd7 4556 charset = CHARSET_FROM_ID (preferred_charset_id);
5eb05ea3
KH
4557 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4558 if (! result)
4559 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4560 NULL, charset);
ff0dacd7
KH
4561 }
4562 else
5eb05ea3
KH
4563 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4564 NULL, charset);
df7492f9
KH
4565 if (!charset)
4566 {
41cbe562
KH
4567 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4568 {
4569 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4570 charset = CHARSET_FROM_ID (charset_ascii);
4571 }
4572 else
4573 {
4574 c = coding->default_char;
5eb05ea3
KH
4575 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4576 charset_list, NULL, charset);
41cbe562 4577 }
df7492f9
KH
4578 }
4579 ENCODE_ISO_CHARACTER (charset, c);
4580 }
84fbb8a0 4581 }
b73bfc1c 4582
df7492f9
KH
4583 if (coding->mode & CODING_MODE_LAST_BLOCK
4584 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4585 {
4586 ASSURE_DESTINATION (safe_room);
4587 ENCODE_RESET_PLANE_AND_REGISTER ();
4588 }
065e3595 4589 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4590 CODING_ISO_BOL (coding) = bol_designation;
4591 coding->produced_char += produced_chars;
4592 coding->produced = dst - coding->destination;
4593 return 0;
4ed46869
KH
4594}
4595
4596\f
df7492f9 4597/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4598
df7492f9 4599/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4600 quite widely. So, for the moment, Emacs supports them in the bare
4601 C code. But, in the future, they may be supported only by CCL. */
4602
4603/* SJIS is a coding system encoding three character sets: ASCII, right
4604 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4605 as is. A character of charset katakana-jisx0201 is encoded by
4606 "position-code + 0x80". A character of charset japanese-jisx0208
4607 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4608 so that it fit in the range below.
4ed46869
KH
4609
4610 --- CODE RANGE of SJIS ---
4611 (character set) (range)
4612 ASCII 0x00 .. 0x7F
df7492f9 4613 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4614 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4615 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4616 -------------------------------
4617
4618*/
4619
4620/* BIG5 is a coding system encoding two character sets: ASCII and
4621 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4622 character set and is encoded in two-byte.
4ed46869
KH
4623
4624 --- CODE RANGE of BIG5 ---
4625 (character set) (range)
4626 ASCII 0x00 .. 0x7F
4627 Big5 (1st byte) 0xA1 .. 0xFE
4628 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4629 --------------------------
4630
df7492f9 4631 */
4ed46869
KH
4632
4633/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4634 Check if a text is encoded in SJIS. If it is, return
df7492f9 4635 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4636
0a28aafb 4637static int
cf84bb53
JB
4638detect_coding_sjis (struct coding_system *coding,
4639 struct coding_detection_info *detect_info)
4ed46869 4640{
065e3595 4641 const unsigned char *src = coding->source, *src_base;
8f924df7 4642 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 4643 int multibytep = coding->src_multibyte;
d311d28c 4644 ptrdiff_t consumed_chars = 0;
df7492f9 4645 int found = 0;
b73bfc1c 4646 int c;
f07190ca
KH
4647 Lisp_Object attrs, charset_list;
4648 int max_first_byte_of_2_byte_code;
4649
4650 CODING_GET_INFO (coding, attrs, charset_list);
4651 max_first_byte_of_2_byte_code
4652 = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
df7492f9 4653
ff0dacd7 4654 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4655 /* A coding system of this category is always ASCII compatible. */
4656 src += coding->head_ascii;
4ed46869 4657
b73bfc1c 4658 while (1)
4ed46869 4659 {
065e3595 4660 src_base = src;
df7492f9 4661 ONE_MORE_BYTE (c);
682169fe
KH
4662 if (c < 0x80)
4663 continue;
f07190ca
KH
4664 if ((c >= 0x81 && c <= 0x9F)
4665 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4ed46869 4666 {
df7492f9 4667 ONE_MORE_BYTE (c);
682169fe 4668 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4669 break;
ff0dacd7 4670 found = CATEGORY_MASK_SJIS;
4ed46869 4671 }
df7492f9 4672 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4673 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4674 else
4675 break;
4ed46869 4676 }
ff0dacd7 4677 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4678 return 0;
4679
4680 no_more_source:
065e3595 4681 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4682 {
ff0dacd7 4683 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4684 return 0;
4ed46869 4685 }
ff0dacd7
KH
4686 detect_info->found |= found;
4687 return 1;
4ed46869
KH
4688}
4689
4690/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4691 Check if a text is encoded in BIG5. If it is, return
df7492f9 4692 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4693
0a28aafb 4694static int
cf84bb53
JB
4695detect_coding_big5 (struct coding_system *coding,
4696 struct coding_detection_info *detect_info)
4ed46869 4697{
065e3595 4698 const unsigned char *src = coding->source, *src_base;
8f924df7 4699 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 4700 int multibytep = coding->src_multibyte;
d311d28c 4701 ptrdiff_t consumed_chars = 0;
df7492f9 4702 int found = 0;
b73bfc1c 4703 int c;
fa42c37f 4704
ff0dacd7 4705 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4706 /* A coding system of this category is always ASCII compatible. */
4707 src += coding->head_ascii;
fa42c37f 4708
b73bfc1c 4709 while (1)
fa42c37f 4710 {
065e3595 4711 src_base = src;
df7492f9
KH
4712 ONE_MORE_BYTE (c);
4713 if (c < 0x80)
fa42c37f 4714 continue;
df7492f9 4715 if (c >= 0xA1)
fa42c37f 4716 {
df7492f9
KH
4717 ONE_MORE_BYTE (c);
4718 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4719 return 0;
ff0dacd7 4720 found = CATEGORY_MASK_BIG5;
fa42c37f 4721 }
df7492f9
KH
4722 else
4723 break;
fa42c37f 4724 }
ff0dacd7 4725 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4726 return 0;
fa42c37f 4727
df7492f9 4728 no_more_source:
065e3595 4729 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4730 {
ff0dacd7 4731 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4732 return 0;
4733 }
ff0dacd7
KH
4734 detect_info->found |= found;
4735 return 1;
fa42c37f
KH
4736}
4737
4ed46869
KH
4738/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4739 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4740
b73bfc1c 4741static void
971de7fb 4742decode_coding_sjis (struct coding_system *coding)
4ed46869 4743{
8f924df7
KH
4744 const unsigned char *src = coding->source + coding->consumed;
4745 const unsigned char *src_end = coding->source + coding->src_bytes;
4746 const unsigned char *src_base;
69a80ea3 4747 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4748 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4749 the end. */
69a80ea3 4750 int *charbuf_end
df80c7f0 4751 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
d311d28c 4752 ptrdiff_t consumed_chars = 0, consumed_chars_base;
df7492f9
KH
4753 int multibytep = coding->src_multibyte;
4754 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4755 struct charset *charset_kanji2;
24a73b0a 4756 Lisp_Object attrs, charset_list, val;
d311d28c
PE
4757 ptrdiff_t char_offset = coding->produced_char;
4758 ptrdiff_t last_offset = char_offset;
ff0dacd7 4759 int last_id = charset_ascii;
2735d060 4760 int eol_dos =
0a9564cb 4761 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4762 int byte_after_cr = -1;
a5d301df 4763
24a73b0a 4764 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4765
4766 val = charset_list;
4767 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4768 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4769 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4770 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4771
b73bfc1c 4772 while (1)
4ed46869 4773 {
df7492f9 4774 int c, c1;
24a73b0a 4775 struct charset *charset;
fa42c37f 4776
b73bfc1c 4777 src_base = src;
df7492f9 4778 consumed_chars_base = consumed_chars;
fa42c37f 4779
df7492f9 4780 if (charbuf >= charbuf_end)
b71f6f73
KH
4781 {
4782 if (byte_after_cr >= 0)
4783 src_base--;
4784 break;
4785 }
df7492f9 4786
119852e7
KH
4787 if (byte_after_cr >= 0)
4788 c = byte_after_cr, byte_after_cr = -1;
4789 else
4790 ONE_MORE_BYTE (c);
065e3595
KH
4791 if (c < 0)
4792 goto invalid_code;
24a73b0a 4793 if (c < 0x80)
119852e7 4794 {
2735d060 4795 if (eol_dos && c == '\r')
119852e7
KH
4796 ONE_MORE_BYTE (byte_after_cr);
4797 charset = charset_roman;
4798 }
57a47f8a 4799 else if (c == 0x80 || c == 0xA0)
8e921c4b 4800 goto invalid_code;
57a47f8a
KH
4801 else if (c >= 0xA1 && c <= 0xDF)
4802 {
4803 /* SJIS -> JISX0201-Kana */
4804 c &= 0x7F;
4805 charset = charset_kana;
4806 }
4807 else if (c <= 0xEF)
df7492f9 4808 {
57a47f8a
KH
4809 /* SJIS -> JISX0208 */
4810 ONE_MORE_BYTE (c1);
4811 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4812 goto invalid_code;
57a47f8a
KH
4813 c = (c << 8) | c1;
4814 SJIS_TO_JIS (c);
4815 charset = charset_kanji;
4816 }
4817 else if (c <= 0xFC && charset_kanji2)
4818 {
c6876370 4819 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4820 ONE_MORE_BYTE (c1);
4821 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4822 goto invalid_code;
57a47f8a
KH
4823 c = (c << 8) | c1;
4824 SJIS_TO_JIS2 (c);
4825 charset = charset_kanji2;
df7492f9 4826 }
57a47f8a
KH
4827 else
4828 goto invalid_code;
24a73b0a
KH
4829 if (charset->id != charset_ascii
4830 && last_id != charset->id)
4831 {
4832 if (last_id != charset_ascii)
69a80ea3 4833 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4834 last_id = charset->id;
4835 last_offset = char_offset;
4836 }
4837 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4838 *charbuf++ = c;
ff0dacd7 4839 char_offset++;
df7492f9 4840 continue;
b73bfc1c 4841
df7492f9
KH
4842 invalid_code:
4843 src = src_base;
4844 consumed_chars = consumed_chars_base;
4845 ONE_MORE_BYTE (c);
065e3595 4846 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4847 char_offset++;
df7492f9
KH
4848 coding->errors++;
4849 }
fa42c37f 4850
df7492f9 4851 no_more_source:
ff0dacd7 4852 if (last_id != charset_ascii)
69a80ea3 4853 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4854 coding->consumed_char += consumed_chars_base;
4855 coding->consumed = src_base - coding->source;
4856 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4857}
4858
b73bfc1c 4859static void
971de7fb 4860decode_coding_big5 (struct coding_system *coding)
4ed46869 4861{
8f924df7
KH
4862 const unsigned char *src = coding->source + coding->consumed;
4863 const unsigned char *src_end = coding->source + coding->src_bytes;
4864 const unsigned char *src_base;
69a80ea3 4865 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4866 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4867 the end. */
69a80ea3 4868 int *charbuf_end
df80c7f0 4869 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
d311d28c 4870 ptrdiff_t consumed_chars = 0, consumed_chars_base;
df7492f9
KH
4871 int multibytep = coding->src_multibyte;
4872 struct charset *charset_roman, *charset_big5;
24a73b0a 4873 Lisp_Object attrs, charset_list, val;
d311d28c
PE
4874 ptrdiff_t char_offset = coding->produced_char;
4875 ptrdiff_t last_offset = char_offset;
ff0dacd7 4876 int last_id = charset_ascii;
2735d060 4877 int eol_dos =
0a9564cb 4878 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4879 int byte_after_cr = -1;
df7492f9 4880
24a73b0a 4881 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4882 val = charset_list;
4883 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4884 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4885
b73bfc1c 4886 while (1)
4ed46869 4887 {
df7492f9 4888 int c, c1;
24a73b0a 4889 struct charset *charset;
b73bfc1c
KH
4890
4891 src_base = src;
df7492f9
KH
4892 consumed_chars_base = consumed_chars;
4893
4894 if (charbuf >= charbuf_end)
b71f6f73
KH
4895 {
4896 if (byte_after_cr >= 0)
4897 src_base--;
4898 break;
4899 }
df7492f9 4900
119852e7 4901 if (byte_after_cr >= 0)
14daee73 4902 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4903 else
4904 ONE_MORE_BYTE (c);
b73bfc1c 4905
065e3595
KH
4906 if (c < 0)
4907 goto invalid_code;
24a73b0a 4908 if (c < 0x80)
119852e7 4909 {
2735d060 4910 if (eol_dos && c == '\r')
119852e7
KH
4911 ONE_MORE_BYTE (byte_after_cr);
4912 charset = charset_roman;
4913 }
24a73b0a 4914 else
4ed46869 4915 {
24a73b0a
KH
4916 /* BIG5 -> Big5 */
4917 if (c < 0xA1 || c > 0xFE)
4918 goto invalid_code;
4919 ONE_MORE_BYTE (c1);
4920 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4921 goto invalid_code;
4922 c = c << 8 | c1;
4923 charset = charset_big5;
4ed46869 4924 }
24a73b0a
KH
4925 if (charset->id != charset_ascii
4926 && last_id != charset->id)
df7492f9 4927 {
24a73b0a 4928 if (last_id != charset_ascii)
69a80ea3 4929 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4930 last_id = charset->id;
4931 last_offset = char_offset;
4ed46869 4932 }
24a73b0a 4933 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4934 *charbuf++ = c;
ff0dacd7 4935 char_offset++;
fb88bf2d
KH
4936 continue;
4937
df7492f9 4938 invalid_code:
4ed46869 4939 src = src_base;
df7492f9
KH
4940 consumed_chars = consumed_chars_base;
4941 ONE_MORE_BYTE (c);
065e3595 4942 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4943 char_offset++;
df7492f9 4944 coding->errors++;
fb88bf2d 4945 }
d46c5b12 4946
df7492f9 4947 no_more_source:
ff0dacd7 4948 if (last_id != charset_ascii)
69a80ea3 4949 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4950 coding->consumed_char += consumed_chars_base;
4951 coding->consumed = src_base - coding->source;
4952 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4953}
4954
4955/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4956 This function can encode charsets `ascii', `katakana-jisx0201',
4957 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4958 are sure that all these charsets are registered as official charset
4ed46869
KH
4959 (i.e. do not have extended leading-codes). Characters of other
4960 charsets are produced without any encoding. If SJIS_P is 1, encode
4961 SJIS text, else encode BIG5 text. */
4962
df7492f9 4963static int
971de7fb 4964encode_coding_sjis (struct coding_system *coding)
4ed46869 4965{
df7492f9
KH
4966 int multibytep = coding->dst_multibyte;
4967 int *charbuf = coding->charbuf;
4968 int *charbuf_end = charbuf + coding->charbuf_used;
4969 unsigned char *dst = coding->destination + coding->produced;
4970 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4971 int safe_room = 4;
d311d28c 4972 ptrdiff_t produced_chars = 0;
24a73b0a 4973 Lisp_Object attrs, charset_list, val;
df7492f9 4974 int ascii_compatible;
66ebf983 4975 struct charset *charset_kanji, *charset_kana;
57a47f8a 4976 struct charset *charset_kanji2;
df7492f9 4977 int c;
a5d301df 4978
24a73b0a 4979 CODING_GET_INFO (coding, attrs, charset_list);
66ebf983 4980 val = XCDR (charset_list);
df7492f9 4981 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4982 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4983 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4984
df7492f9 4985 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4986
df7492f9
KH
4987 while (charbuf < charbuf_end)
4988 {
4989 ASSURE_DESTINATION (safe_room);
4990 c = *charbuf++;
b73bfc1c 4991 /* Now encode the character C. */
df7492f9
KH
4992 if (ASCII_CHAR_P (c) && ascii_compatible)
4993 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4994 else if (CHAR_BYTE8_P (c))
4995 {
4996 c = CHAR_TO_BYTE8 (c);
4997 EMIT_ONE_BYTE (c);
4998 }
df7492f9 4999 else
b73bfc1c 5000 {
df7492f9 5001 unsigned code;
5eb05ea3
KH
5002 struct charset *charset;
5003 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5004 &code, charset);
df7492f9
KH
5005
5006 if (!charset)
4ed46869 5007 {
41cbe562 5008 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5009 {
41cbe562
KH
5010 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5011 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5012 }
41cbe562 5013 else
b73bfc1c 5014 {
41cbe562 5015 c = coding->default_char;
5eb05ea3
KH
5016 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5017 charset_list, &code, charset);
b73bfc1c 5018 }
b73bfc1c 5019 }
df7492f9
KH
5020 if (code == CHARSET_INVALID_CODE (charset))
5021 abort ();
5022 if (charset == charset_kanji)
5023 {
5024 int c1, c2;
5025 JIS_TO_SJIS (code);
5026 c1 = code >> 8, c2 = code & 0xFF;
5027 EMIT_TWO_BYTES (c1, c2);
5028 }
5029 else if (charset == charset_kana)
5030 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
5031 else if (charset_kanji2 && charset == charset_kanji2)
5032 {
5033 int c1, c2;
5034
5035 c1 = code >> 8;
f07190ca
KH
5036 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5037 || c1 == 0x28
57a47f8a
KH
5038 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5039 {
5040 JIS_TO_SJIS2 (code);
5041 c1 = code >> 8, c2 = code & 0xFF;
5042 EMIT_TWO_BYTES (c1, c2);
5043 }
5044 else
5045 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5046 }
df7492f9
KH
5047 else
5048 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5049 }
5050 }
065e3595 5051 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5052 coding->produced_char += produced_chars;
5053 coding->produced = dst - coding->destination;
5054 return 0;
5055}
5056
5057static int
971de7fb 5058encode_coding_big5 (struct coding_system *coding)
df7492f9
KH
5059{
5060 int multibytep = coding->dst_multibyte;
5061 int *charbuf = coding->charbuf;
5062 int *charbuf_end = charbuf + coding->charbuf_used;
5063 unsigned char *dst = coding->destination + coding->produced;
5064 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5065 int safe_room = 4;
d311d28c 5066 ptrdiff_t produced_chars = 0;
24a73b0a 5067 Lisp_Object attrs, charset_list, val;
df7492f9 5068 int ascii_compatible;
66ebf983 5069 struct charset *charset_big5;
df7492f9
KH
5070 int c;
5071
24a73b0a 5072 CODING_GET_INFO (coding, attrs, charset_list);
66ebf983 5073 val = XCDR (charset_list);
df7492f9
KH
5074 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5075 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5076
5077 while (charbuf < charbuf_end)
5078 {
5079 ASSURE_DESTINATION (safe_room);
5080 c = *charbuf++;
5081 /* Now encode the character C. */
5082 if (ASCII_CHAR_P (c) && ascii_compatible)
5083 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5084 else if (CHAR_BYTE8_P (c))
5085 {
5086 c = CHAR_TO_BYTE8 (c);
5087 EMIT_ONE_BYTE (c);
b73bfc1c
KH
5088 }
5089 else
5090 {
df7492f9 5091 unsigned code;
5eb05ea3
KH
5092 struct charset *charset;
5093 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5094 &code, charset);
df7492f9
KH
5095
5096 if (! charset)
b73bfc1c 5097 {
41cbe562 5098 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5099 {
41cbe562
KH
5100 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5101 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5102 }
41cbe562 5103 else
0eecad43 5104 {
41cbe562 5105 c = coding->default_char;
5eb05ea3
KH
5106 CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5107 charset_list, &code, charset);
0eecad43 5108 }
4ed46869 5109 }
df7492f9
KH
5110 if (code == CHARSET_INVALID_CODE (charset))
5111 abort ();
5112 if (charset == charset_big5)
b73bfc1c 5113 {
df7492f9
KH
5114 int c1, c2;
5115
5116 c1 = code >> 8, c2 = code & 0xFF;
5117 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 5118 }
df7492f9
KH
5119 else
5120 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 5121 }
4ed46869 5122 }
065e3595 5123 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5124 coding->produced_char += produced_chars;
5125 coding->produced = dst - coding->destination;
5126 return 0;
4ed46869
KH
5127}
5128
5129\f
df7492f9 5130/*** 10. CCL handlers ***/
1397dc18
KH
5131
5132/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5133 Check if a text is encoded in a coding system of which
5134 encoder/decoder are written in CCL program. If it is, return
df7492f9 5135 CATEGORY_MASK_CCL, else return 0. */
1397dc18 5136
0a28aafb 5137static int
cf84bb53
JB
5138detect_coding_ccl (struct coding_system *coding,
5139 struct coding_detection_info *detect_info)
1397dc18 5140{
065e3595 5141 const unsigned char *src = coding->source, *src_base;
8f924df7 5142 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 5143 int multibytep = coding->src_multibyte;
d311d28c 5144 ptrdiff_t consumed_chars = 0;
df7492f9 5145 int found = 0;
0e219d54 5146 unsigned char *valids;
d311d28c 5147 ptrdiff_t head_ascii = coding->head_ascii;
df7492f9
KH
5148 Lisp_Object attrs;
5149
ff0dacd7
KH
5150 detect_info->checked |= CATEGORY_MASK_CCL;
5151
df7492f9 5152 coding = &coding_categories[coding_category_ccl];
0e219d54 5153 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
5154 attrs = CODING_ID_ATTRS (coding->id);
5155 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5156 src += head_ascii;
1397dc18 5157
b73bfc1c 5158 while (1)
1397dc18 5159 {
df7492f9 5160 int c;
065e3595
KH
5161
5162 src_base = src;
df7492f9 5163 ONE_MORE_BYTE (c);
065e3595 5164 if (c < 0 || ! valids[c])
df7492f9 5165 break;
ff0dacd7
KH
5166 if ((valids[c] > 1))
5167 found = CATEGORY_MASK_CCL;
df7492f9 5168 }
ff0dacd7 5169 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
5170 return 0;
5171
5172 no_more_source:
ff0dacd7
KH
5173 detect_info->found |= found;
5174 return 1;
df7492f9
KH
5175}
5176
5177static void
971de7fb 5178decode_coding_ccl (struct coding_system *coding)
df7492f9 5179{
7c78e542 5180 const unsigned char *src = coding->source + coding->consumed;
8f924df7 5181 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
5182 int *charbuf = coding->charbuf + coding->charbuf_used;
5183 int *charbuf_end = coding->charbuf + coding->charbuf_size;
d311d28c 5184 ptrdiff_t consumed_chars = 0;
df7492f9 5185 int multibytep = coding->src_multibyte;
d0396581 5186 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9 5187 int source_charbuf[1024];
fbdc1721 5188 int source_byteidx[1025];
24a73b0a 5189 Lisp_Object attrs, charset_list;
df7492f9 5190
24a73b0a 5191 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5192
d0396581 5193 while (1)
df7492f9 5194 {
7c78e542 5195 const unsigned char *p = src;
df7492f9
KH
5196 int i = 0;
5197
5198 if (multibytep)
fbdc1721
KH
5199 {
5200 while (i < 1024 && p < src_end)
5201 {
5202 source_byteidx[i] = p - src;
5203 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5204 }
5205 source_byteidx[i] = p - src;
5206 }
df7492f9
KH
5207 else
5208 while (i < 1024 && p < src_end)
5209 source_charbuf[i++] = *p++;
8f924df7 5210
df7492f9 5211 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
d0396581
KH
5212 ccl->last_block = 1;
5213 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5214 charset_list);
5215 charbuf += ccl->produced;
fbdc1721 5216 if (multibytep)
d0396581 5217 src += source_byteidx[ccl->consumed];
df7492f9 5218 else
d0396581
KH
5219 src += ccl->consumed;
5220 consumed_chars += ccl->consumed;
5221 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
df7492f9
KH
5222 break;
5223 }
5224
d0396581 5225 switch (ccl->status)
df7492f9
KH
5226 {
5227 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5228 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5229 break;
5230 case CCL_STAT_SUSPEND_BY_DST:
d0396581 5231 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5232 break;
5233 case CCL_STAT_QUIT:
5234 case CCL_STAT_INVALID_CMD:
065e3595 5235 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5236 break;
5237 default:
065e3595 5238 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5239 break;
5240 }
5241 coding->consumed_char += consumed_chars;
5242 coding->consumed = src - coding->source;
5243 coding->charbuf_used = charbuf - coding->charbuf;
5244}
5245
5246static int
971de7fb 5247encode_coding_ccl (struct coding_system *coding)
df7492f9 5248{
fb608df3 5249 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9
KH
5250 int multibytep = coding->dst_multibyte;
5251 int *charbuf = coding->charbuf;
5252 int *charbuf_end = charbuf + coding->charbuf_used;
5253 unsigned char *dst = coding->destination + coding->produced;
5254 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9 5255 int destination_charbuf[1024];
d311d28c 5256 ptrdiff_t produced_chars = 0;
a53e2e89 5257 int i;
24a73b0a 5258 Lisp_Object attrs, charset_list;
df7492f9 5259
24a73b0a 5260 CODING_GET_INFO (coding, attrs, charset_list);
fb608df3
KH
5261 if (coding->consumed_char == coding->src_chars
5262 && coding->mode & CODING_MODE_LAST_BLOCK)
5263 ccl->last_block = 1;
df7492f9 5264
76470ad1 5265 do
df7492f9 5266 {
fb608df3 5267 ccl_driver (ccl, charbuf, destination_charbuf,
8cffd3e7 5268 charbuf_end - charbuf, 1024, charset_list);
df7492f9 5269 if (multibytep)
8cffd3e7 5270 {
fb608df3
KH
5271 ASSURE_DESTINATION (ccl->produced * 2);
5272 for (i = 0; i < ccl->produced; i++)
8cffd3e7
KH
5273 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5274 }
df7492f9
KH
5275 else
5276 {
fb608df3
KH
5277 ASSURE_DESTINATION (ccl->produced);
5278 for (i = 0; i < ccl->produced; i++)
df7492f9 5279 *dst++ = destination_charbuf[i] & 0xFF;
fb608df3 5280 produced_chars += ccl->produced;
df7492f9 5281 }
fb608df3
KH
5282 charbuf += ccl->consumed;
5283 if (ccl->status == CCL_STAT_QUIT
5284 || ccl->status == CCL_STAT_INVALID_CMD)
8cffd3e7 5285 break;
df7492f9 5286 }
76470ad1 5287 while (charbuf < charbuf_end);
df7492f9 5288
fb608df3 5289 switch (ccl->status)
df7492f9
KH
5290 {
5291 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5292 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5293 break;
5294 case CCL_STAT_SUSPEND_BY_DST:
065e3595 5295 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5296 break;
5297 case CCL_STAT_QUIT:
5298 case CCL_STAT_INVALID_CMD:
065e3595 5299 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5300 break;
5301 default:
065e3595 5302 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5303 break;
1397dc18 5304 }
df7492f9
KH
5305
5306 coding->produced_char += produced_chars;
5307 coding->produced = dst - coding->destination;
5308 return 0;
1397dc18
KH
5309}
5310
df7492f9 5311
1397dc18 5312\f
df7492f9 5313/*** 10, 11. no-conversion handlers ***/
4ed46869 5314
b73bfc1c 5315/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 5316
b73bfc1c 5317static void
971de7fb 5318decode_coding_raw_text (struct coding_system *coding)
4ed46869 5319{
2735d060 5320 int eol_dos =
0a9564cb 5321 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5322
df7492f9 5323 coding->chars_at_source = 1;
119852e7
KH
5324 coding->consumed_char = coding->src_chars;
5325 coding->consumed = coding->src_bytes;
2735d060 5326 if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
119852e7
KH
5327 {
5328 coding->consumed_char--;
5329 coding->consumed--;
5330 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5331 }
5332 else
5333 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5334}
4ed46869 5335
df7492f9 5336static int
971de7fb 5337encode_coding_raw_text (struct coding_system *coding)
df7492f9
KH
5338{
5339 int multibytep = coding->dst_multibyte;
5340 int *charbuf = coding->charbuf;
5341 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5342 unsigned char *dst = coding->destination + coding->produced;
5343 unsigned char *dst_end = coding->destination + coding->dst_bytes;
d311d28c 5344 ptrdiff_t produced_chars = 0;
b73bfc1c
KH
5345 int c;
5346
df7492f9 5347 if (multibytep)
b73bfc1c 5348 {
df7492f9 5349 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5350
df7492f9
KH
5351 if (coding->src_multibyte)
5352 while (charbuf < charbuf_end)
5353 {
5354 ASSURE_DESTINATION (safe_room);
5355 c = *charbuf++;
5356 if (ASCII_CHAR_P (c))
5357 EMIT_ONE_ASCII_BYTE (c);
5358 else if (CHAR_BYTE8_P (c))
5359 {
5360 c = CHAR_TO_BYTE8 (c);
5361 EMIT_ONE_BYTE (c);
5362 }
5363 else
5364 {
5365 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5366
df7492f9 5367 CHAR_STRING_ADVANCE (c, p1);
8abc3f12 5368 do
9d123124
KH
5369 {
5370 EMIT_ONE_BYTE (*p0);
5371 p0++;
5372 }
8abc3f12 5373 while (p0 < p1);
df7492f9
KH
5374 }
5375 }
b73bfc1c 5376 else
df7492f9
KH
5377 while (charbuf < charbuf_end)
5378 {
5379 ASSURE_DESTINATION (safe_room);
5380 c = *charbuf++;
5381 EMIT_ONE_BYTE (c);
5382 }
5383 }
5384 else
4ed46869 5385 {
df7492f9 5386 if (coding->src_multibyte)
d46c5b12 5387 {
df7492f9
KH
5388 int safe_room = MAX_MULTIBYTE_LENGTH;
5389
5390 while (charbuf < charbuf_end)
d46c5b12 5391 {
df7492f9
KH
5392 ASSURE_DESTINATION (safe_room);
5393 c = *charbuf++;
5394 if (ASCII_CHAR_P (c))
5395 *dst++ = c;
5396 else if (CHAR_BYTE8_P (c))
5397 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5398 else
df7492f9 5399 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5400 }
5401 }
df7492f9
KH
5402 else
5403 {
5404 ASSURE_DESTINATION (charbuf_end - charbuf);
5405 while (charbuf < charbuf_end && dst < dst_end)
5406 *dst++ = *charbuf++;
8f924df7 5407 }
319a3947 5408 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5409 }
065e3595 5410 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5411 coding->produced_char += produced_chars;
df7492f9
KH
5412 coding->produced = dst - coding->destination;
5413 return 0;
4ed46869
KH
5414}
5415
ff0dacd7
KH
5416/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5417 Check if a text is encoded in a charset-based coding system. If it
5418 is, return 1, else return 0. */
5419
0a28aafb 5420static int
cf84bb53
JB
5421detect_coding_charset (struct coding_system *coding,
5422 struct coding_detection_info *detect_info)
1397dc18 5423{
065e3595 5424 const unsigned char *src = coding->source, *src_base;
8f924df7 5425 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 5426 int multibytep = coding->src_multibyte;
d311d28c 5427 ptrdiff_t consumed_chars = 0;
07295713 5428 Lisp_Object attrs, valids, name;
584948ac 5429 int found = 0;
d311d28c 5430 ptrdiff_t head_ascii = coding->head_ascii;
07295713 5431 int check_latin_extra = 0;
1397dc18 5432
ff0dacd7
KH
5433 detect_info->checked |= CATEGORY_MASK_CHARSET;
5434
df7492f9
KH
5435 coding = &coding_categories[coding_category_charset];
5436 attrs = CODING_ID_ATTRS (coding->id);
5437 valids = AREF (attrs, coding_attr_charset_valids);
07295713 5438 name = CODING_ID_NAME (coding->id);
51b59d79 5439 if (strncmp (SSDATA (SYMBOL_NAME (name)),
237aabf4 5440 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
51b59d79 5441 || strncmp (SSDATA (SYMBOL_NAME (name)),
237aabf4 5442 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
07295713 5443 check_latin_extra = 1;
237aabf4 5444
df7492f9 5445 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5446 src += head_ascii;
1397dc18 5447
b73bfc1c 5448 while (1)
1397dc18 5449 {
df7492f9 5450 int c;
716b3fa0
KH
5451 Lisp_Object val;
5452 struct charset *charset;
5453 int dim, idx;
1397dc18 5454
065e3595 5455 src_base = src;
df7492f9 5456 ONE_MORE_BYTE (c);
065e3595
KH
5457 if (c < 0)
5458 continue;
716b3fa0
KH
5459 val = AREF (valids, c);
5460 if (NILP (val))
df7492f9 5461 break;
584948ac 5462 if (c >= 0x80)
07295713
KH
5463 {
5464 if (c < 0xA0
237aabf4
JR
5465 && check_latin_extra
5466 && (!VECTORP (Vlatin_extra_code_table)
28be1ada 5467 || NILP (AREF (Vlatin_extra_code_table, c))))
07295713
KH
5468 break;
5469 found = CATEGORY_MASK_CHARSET;
5470 }
716b3fa0
KH
5471 if (INTEGERP (val))
5472 {
5473 charset = CHARSET_FROM_ID (XFASTINT (val));
5474 dim = CHARSET_DIMENSION (charset);
5475 for (idx = 1; idx < dim; idx++)
5476 {
5477 if (src == src_end)
5478 goto too_short;
5479 ONE_MORE_BYTE (c);
2f9442b8
PE
5480 if (c < charset->code_space[(dim - 1 - idx) * 4]
5481 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
716b3fa0
KH
5482 break;
5483 }
5484 if (idx < dim)
5485 break;
5486 }
5487 else
5488 {
5489 idx = 1;
5490 for (; CONSP (val); val = XCDR (val))
5491 {
5492 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5493 dim = CHARSET_DIMENSION (charset);
5494 while (idx < dim)
5495 {
5496 if (src == src_end)
5497 goto too_short;
5498 ONE_MORE_BYTE (c);
5499 if (c < charset->code_space[(dim - 1 - idx) * 4]
5500 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5501 break;
5502 idx++;
5503 }
5504 if (idx == dim)
5505 {
5506 val = Qnil;
5507 break;
5508 }
5509 }
5510 if (CONSP (val))
5511 break;
5512 }
df7492f9 5513 }
716b3fa0 5514 too_short:
ff0dacd7 5515 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5516 return 0;
4ed46869 5517
df7492f9 5518 no_more_source:
ff0dacd7
KH
5519 detect_info->found |= found;
5520 return 1;
df7492f9 5521}
b73bfc1c 5522
b73bfc1c 5523static void
971de7fb 5524decode_coding_charset (struct coding_system *coding)
4ed46869 5525{
8f924df7
KH
5526 const unsigned char *src = coding->source + coding->consumed;
5527 const unsigned char *src_end = coding->source + coding->src_bytes;
5528 const unsigned char *src_base;
69a80ea3 5529 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 5530 /* We may produce one charset annotation in one loop and one more at
df80c7f0 5531 the end. */
69a80ea3 5532 int *charbuf_end
df80c7f0 5533 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
d311d28c 5534 ptrdiff_t consumed_chars = 0, consumed_chars_base;
df7492f9 5535 int multibytep = coding->src_multibyte;
66ebf983
PE
5536 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5537 Lisp_Object valids;
d311d28c
PE
5538 ptrdiff_t char_offset = coding->produced_char;
5539 ptrdiff_t last_offset = char_offset;
ff0dacd7 5540 int last_id = charset_ascii;
2735d060 5541 int eol_dos =
0a9564cb 5542 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5543 int byte_after_cr = -1;
df7492f9 5544
4eb6d3f1 5545 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5546
df7492f9 5547 while (1)
4ed46869 5548 {
4eb6d3f1 5549 int c;
24a73b0a
KH
5550 Lisp_Object val;
5551 struct charset *charset;
5552 int dim;
5553 int len = 1;
5554 unsigned code;
df7492f9
KH
5555
5556 src_base = src;
5557 consumed_chars_base = consumed_chars;
b73bfc1c 5558
df7492f9 5559 if (charbuf >= charbuf_end)
b71f6f73
KH
5560 {
5561 if (byte_after_cr >= 0)
5562 src_base--;
5563 break;
5564 }
df7492f9 5565
119852e7
KH
5566 if (byte_after_cr >= 0)
5567 {
5568 c = byte_after_cr;
5569 byte_after_cr = -1;
5570 }
5571 else
5572 {
5573 ONE_MORE_BYTE (c);
2735d060 5574 if (eol_dos && c == '\r')
119852e7
KH
5575 ONE_MORE_BYTE (byte_after_cr);
5576 }
065e3595
KH
5577 if (c < 0)
5578 goto invalid_code;
24a73b0a
KH
5579 code = c;
5580
5581 val = AREF (valids, c);
1b17adfd 5582 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5583 goto invalid_code;
5584 if (INTEGERP (val))
d46c5b12 5585 {
24a73b0a
KH
5586 charset = CHARSET_FROM_ID (XFASTINT (val));
5587 dim = CHARSET_DIMENSION (charset);
5588 while (len < dim)
b73bfc1c 5589 {
24a73b0a
KH
5590 ONE_MORE_BYTE (c);
5591 code = (code << 8) | c;
5592 len++;
b73bfc1c 5593 }
24a73b0a
KH
5594 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5595 charset, code, c);
d46c5b12 5596 }
df7492f9 5597 else
d46c5b12 5598 {
24a73b0a
KH
5599 /* VAL is a list of charset IDs. It is assured that the
5600 list is sorted by charset dimensions (smaller one
5601 comes first). */
5602 while (CONSP (val))
4eb6d3f1 5603 {
24a73b0a 5604 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5605 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5606 while (len < dim)
4eb6d3f1 5607 {
acb2a965
KH
5608 ONE_MORE_BYTE (c);
5609 code = (code << 8) | c;
f9d71dcd 5610 len++;
4eb6d3f1 5611 }
24a73b0a
KH
5612 CODING_DECODE_CHAR (coding, src, src_base,
5613 src_end, charset, code, c);
5614 if (c >= 0)
5615 break;
5616 val = XCDR (val);
ff0dacd7 5617 }
d46c5b12 5618 }
24a73b0a
KH
5619 if (c < 0)
5620 goto invalid_code;
5621 if (charset->id != charset_ascii
5622 && last_id != charset->id)
5623 {
5624 if (last_id != charset_ascii)
69a80ea3 5625 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5626 last_id = charset->id;
5627 last_offset = char_offset;
5628 }
5629
df7492f9 5630 *charbuf++ = c;
ff0dacd7 5631 char_offset++;
df7492f9
KH
5632 continue;
5633
5634 invalid_code:
5635 src = src_base;
5636 consumed_chars = consumed_chars_base;
5637 ONE_MORE_BYTE (c);
065e3595 5638 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5639 char_offset++;
df7492f9 5640 coding->errors++;
4ed46869
KH
5641 }
5642
df7492f9 5643 no_more_source:
ff0dacd7 5644 if (last_id != charset_ascii)
69a80ea3 5645 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5646 coding->consumed_char += consumed_chars_base;
5647 coding->consumed = src_base - coding->source;
5648 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5649}
5650
df7492f9 5651static int
971de7fb 5652encode_coding_charset (struct coding_system *coding)
4ed46869 5653{
df7492f9
KH
5654 int multibytep = coding->dst_multibyte;
5655 int *charbuf = coding->charbuf;
5656 int *charbuf_end = charbuf + coding->charbuf_used;
5657 unsigned char *dst = coding->destination + coding->produced;
5658 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5659 int safe_room = MAX_MULTIBYTE_LENGTH;
d311d28c 5660 ptrdiff_t produced_chars = 0;
24a73b0a 5661 Lisp_Object attrs, charset_list;
df7492f9 5662 int ascii_compatible;
b73bfc1c 5663 int c;
b73bfc1c 5664
24a73b0a 5665 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5666 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5667
df7492f9 5668 while (charbuf < charbuf_end)
4ed46869 5669 {
4eb6d3f1 5670 struct charset *charset;
df7492f9 5671 unsigned code;
8f924df7 5672
df7492f9
KH
5673 ASSURE_DESTINATION (safe_room);
5674 c = *charbuf++;
5675 if (ascii_compatible && ASCII_CHAR_P (c))
5676 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5677 else if (CHAR_BYTE8_P (c))
4ed46869 5678 {
16eafb5d
KH
5679 c = CHAR_TO_BYTE8 (c);
5680 EMIT_ONE_BYTE (c);
d46c5b12 5681 }
d46c5b12 5682 else
b73bfc1c 5683 {
5eb05ea3
KH
5684 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5685 &code, charset);
5686
4eb6d3f1
KH
5687 if (charset)
5688 {
5689 if (CHARSET_DIMENSION (charset) == 1)
5690 EMIT_ONE_BYTE (code);
5691 else if (CHARSET_DIMENSION (charset) == 2)
5692 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5693 else if (CHARSET_DIMENSION (charset) == 3)
5694 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5695 else
5696 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5697 (code >> 8) & 0xFF, code & 0xFF);
5698 }
5699 else
41cbe562
KH
5700 {
5701 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5702 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5703 else
5704 c = coding->default_char;
5705 EMIT_ONE_BYTE (c);
5706 }
4ed46869 5707 }
4ed46869
KH
5708 }
5709
065e3595 5710 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5711 coding->produced_char += produced_chars;
5712 coding->produced = dst - coding->destination;
5713 return 0;
4ed46869
KH
5714}
5715
5716\f
1397dc18 5717/*** 7. C library functions ***/
4ed46869 5718
df7492f9
KH
5719/* Setup coding context CODING from information about CODING_SYSTEM.
5720 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5721 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5722
ec6d2bb8 5723void
971de7fb 5724setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
4ed46869 5725{
df7492f9
KH
5726 Lisp_Object attrs;
5727 Lisp_Object eol_type;
5728 Lisp_Object coding_type;
4608c386 5729 Lisp_Object val;
4ed46869 5730
df7492f9 5731 if (NILP (coding_system))
ae6f73fa 5732 coding_system = Qundecided;
c07c8e12 5733
df7492f9 5734 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5735
df7492f9 5736 attrs = CODING_ID_ATTRS (coding->id);
0a9564cb 5737 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5738
df7492f9
KH
5739 coding->mode = 0;
5740 coding->head_ascii = -1;
4a015c45
KH
5741 if (VECTORP (eol_type))
5742 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5743 | CODING_REQUIRE_DETECTION_MASK);
5744 else if (! EQ (eol_type, Qunix))
5745 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5746 | CODING_REQUIRE_ENCODING_MASK);
5747 else
5748 coding->common_flags = 0;
5e5c78be
KH
5749 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5750 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5751 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5752 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5753 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5754 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5755
df7492f9 5756 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5757 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5758 coding->safe_charsets = SDATA (val);
df7492f9 5759 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
624bda09 5760 coding->carryover_bytes = 0;
4608c386 5761
df7492f9
KH
5762 coding_type = CODING_ATTR_TYPE (attrs);
5763 if (EQ (coding_type, Qundecided))
d46c5b12 5764 {
df7492f9
KH
5765 coding->detector = NULL;
5766 coding->decoder = decode_coding_raw_text;
5767 coding->encoder = encode_coding_raw_text;
5768 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5769 }
df7492f9 5770 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5771 {
df7492f9
KH
5772 int i;
5773 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5774
5775 /* Invoke graphic register 0 to plane 0. */
5776 CODING_ISO_INVOCATION (coding, 0) = 0;
5777 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5778 CODING_ISO_INVOCATION (coding, 1)
5779 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5780 /* Setup the initial status of designation. */
5781 for (i = 0; i < 4; i++)
5782 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5783 /* Not single shifting initially. */
5784 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5785 /* Beginning of buffer should also be regarded as bol. */
5786 CODING_ISO_BOL (coding) = 1;
5787 coding->detector = detect_coding_iso_2022;
5788 coding->decoder = decode_coding_iso_2022;
5789 coding->encoder = encode_coding_iso_2022;
5790 if (flags & CODING_ISO_FLAG_SAFE)
5791 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5792 coding->common_flags
df7492f9
KH
5793 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5794 | CODING_REQUIRE_FLUSHING_MASK);
5795 if (flags & CODING_ISO_FLAG_COMPOSITION)
5796 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5797 if (flags & CODING_ISO_FLAG_DESIGNATION)
5798 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5799 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5800 {
5801 setup_iso_safe_charsets (attrs);
5802 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5803 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5804 coding->safe_charsets = SDATA (val);
df7492f9
KH
5805 }
5806 CODING_ISO_FLAGS (coding) = flags;
e951386e
KH
5807 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5808 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5809 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5810 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
d46c5b12 5811 }
df7492f9 5812 else if (EQ (coding_type, Qcharset))
d46c5b12 5813 {
df7492f9
KH
5814 coding->detector = detect_coding_charset;
5815 coding->decoder = decode_coding_charset;
5816 coding->encoder = encode_coding_charset;
d46c5b12 5817 coding->common_flags
df7492f9 5818 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5819 }
df7492f9 5820 else if (EQ (coding_type, Qutf_8))
d46c5b12 5821 {
a470d443
KH
5822 val = AREF (attrs, coding_attr_utf_bom);
5823 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5824 : EQ (val, Qt) ? utf_with_bom
5825 : utf_without_bom);
df7492f9
KH
5826 coding->detector = detect_coding_utf_8;
5827 coding->decoder = decode_coding_utf_8;
5828 coding->encoder = encode_coding_utf_8;
5829 coding->common_flags
5830 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5831 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5832 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5833 }
5834 else if (EQ (coding_type, Qutf_16))
5835 {
a470d443
KH
5836 val = AREF (attrs, coding_attr_utf_bom);
5837 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5838 : EQ (val, Qt) ? utf_with_bom
5839 : utf_without_bom);
df7492f9 5840 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5841 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5842 : utf_16_little_endian);
e19c3639 5843 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5844 coding->detector = detect_coding_utf_16;
5845 coding->decoder = decode_coding_utf_16;
5846 coding->encoder = encode_coding_utf_16;
5847 coding->common_flags
5848 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5849 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5850 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5851 }
df7492f9 5852 else if (EQ (coding_type, Qccl))
4ed46869 5853 {
df7492f9
KH
5854 coding->detector = detect_coding_ccl;
5855 coding->decoder = decode_coding_ccl;
5856 coding->encoder = encode_coding_ccl;
c952af22 5857 coding->common_flags
df7492f9
KH
5858 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5859 | CODING_REQUIRE_FLUSHING_MASK);
5860 }
5861 else if (EQ (coding_type, Qemacs_mule))
5862 {
5863 coding->detector = detect_coding_emacs_mule;
5864 coding->decoder = decode_coding_emacs_mule;
5865 coding->encoder = encode_coding_emacs_mule;
c952af22 5866 coding->common_flags
df7492f9 5867 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
e951386e 5868 coding->spec.emacs_mule.full_support = 1;
df7492f9
KH
5869 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5870 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5871 {
5872 Lisp_Object tail, safe_charsets;
5873 int max_charset_id = 0;
5874
5875 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5876 tail = XCDR (tail))
5877 if (max_charset_id < XFASTINT (XCAR (tail)))
5878 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5879 safe_charsets = make_uninit_string (max_charset_id + 1);
5880 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5881 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5882 tail = XCDR (tail))
8f924df7 5883 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5884 coding->max_charset_id = max_charset_id;
1b3b981b 5885 coding->safe_charsets = SDATA (safe_charsets);
e951386e 5886 coding->spec.emacs_mule.full_support = 1;
df7492f9 5887 }
e951386e
KH
5888 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5889 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
df7492f9
KH
5890 }
5891 else if (EQ (coding_type, Qshift_jis))
5892 {
5893 coding->detector = detect_coding_sjis;
5894 coding->decoder = decode_coding_sjis;
5895 coding->encoder = encode_coding_sjis;
c952af22 5896 coding->common_flags
df7492f9
KH
5897 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5898 }
5899 else if (EQ (coding_type, Qbig5))
5900 {
5901 coding->detector = detect_coding_big5;
5902 coding->decoder = decode_coding_big5;
5903 coding->encoder = encode_coding_big5;
c952af22 5904 coding->common_flags
df7492f9
KH
5905 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5906 }
5907 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5908 {
df7492f9
KH
5909 coding->detector = NULL;
5910 coding->decoder = decode_coding_raw_text;
5911 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5912 if (! EQ (eol_type, Qunix))
5913 {
5914 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5915 if (! VECTORP (eol_type))
5916 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5917 }
5918
4ed46869 5919 }
4ed46869 5920
df7492f9 5921 return;
4ed46869
KH
5922}
5923
0ff61e78
KH
5924/* Return a list of charsets supported by CODING. */
5925
5926Lisp_Object
971de7fb 5927coding_charset_list (struct coding_system *coding)
0ff61e78 5928{
35befdaa 5929 Lisp_Object attrs, charset_list;
0ff61e78
KH
5930
5931 CODING_GET_INFO (coding, attrs, charset_list);
5932 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5933 {
5934 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5935
5936 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5937 charset_list = Viso_2022_charset_list;
5938 }
5939 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5940 {
5941 charset_list = Vemacs_mule_charset_list;
5942 }
5943 return charset_list;
5944}
5945
5946
e9f91ece
KH
5947/* Return a list of charsets supported by CODING-SYSTEM. */
5948
5949Lisp_Object
971de7fb 5950coding_system_charset_list (Lisp_Object coding_system)
e9f91ece 5951{
d3411f89 5952 ptrdiff_t id;
e9f91ece
KH
5953 Lisp_Object attrs, charset_list;
5954
5955 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5956 attrs = CODING_ID_ATTRS (id);
5957
5958 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5959 {
5960 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5961
5962 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5963 charset_list = Viso_2022_charset_list;
5964 else
5965 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5966 }
5967 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5968 {
5969 charset_list = Vemacs_mule_charset_list;
5970 }
5971 else
5972 {
5973 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5974 }
5975 return charset_list;
5976}
5977
5978
df7492f9
KH
5979/* Return raw-text or one of its subsidiaries that has the same
5980 eol_type as CODING-SYSTEM. */
ec6d2bb8 5981
df7492f9 5982Lisp_Object
971de7fb 5983raw_text_coding_system (Lisp_Object coding_system)
ec6d2bb8 5984{
0be8721c 5985 Lisp_Object spec, attrs;
df7492f9 5986 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5987
d3e4cb56
KH
5988 if (NILP (coding_system))
5989 return Qraw_text;
df7492f9
KH
5990 spec = CODING_SYSTEM_SPEC (coding_system);
5991 attrs = AREF (spec, 0);
ec6d2bb8 5992
df7492f9
KH
5993 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5994 return coding_system;
ec6d2bb8 5995
df7492f9
KH
5996 eol_type = AREF (spec, 2);
5997 if (VECTORP (eol_type))
5998 return Qraw_text;
5999 spec = CODING_SYSTEM_SPEC (Qraw_text);
6000 raw_text_eol_type = AREF (spec, 2);
6001 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6002 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6003 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
6004}
6005
54f78171 6006
1911a33b
KH
6007/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
6008 the subsidiary that has the same eol-spec as PARENT (if it is not
6009 nil and specifies end-of-line format) or the system's setting
fcbcfb64 6010 (system_eol_type). */
df7492f9
KH
6011
6012Lisp_Object
971de7fb 6013coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
54f78171 6014{
3e139625 6015 Lisp_Object spec, eol_type;
54f78171 6016
d3e4cb56
KH
6017 if (NILP (coding_system))
6018 coding_system = Qraw_text;
df7492f9 6019 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 6020 eol_type = AREF (spec, 2);
fcbcfb64 6021 if (VECTORP (eol_type))
df7492f9 6022 {
df7492f9
KH
6023 Lisp_Object parent_eol_type;
6024
fcbcfb64
KH
6025 if (! NILP (parent))
6026 {
6027 Lisp_Object parent_spec;
6028
4a015c45 6029 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64 6030 parent_eol_type = AREF (parent_spec, 2);
1911a33b 6031 if (VECTORP (parent_eol_type))
4628bef1 6032 parent_eol_type = system_eol_type;
fcbcfb64
KH
6033 }
6034 else
6035 parent_eol_type = system_eol_type;
df7492f9
KH
6036 if (EQ (parent_eol_type, Qunix))
6037 coding_system = AREF (eol_type, 0);
6038 else if (EQ (parent_eol_type, Qdos))
6039 coding_system = AREF (eol_type, 1);
6040 else if (EQ (parent_eol_type, Qmac))
6041 coding_system = AREF (eol_type, 2);
54f78171 6042 }
df7492f9 6043 return coding_system;
54f78171
KH
6044}
6045
fcaf8878
KH
6046
6047/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6048 decided for writing to a process. If not, complement them, and
6049 return a new coding system. */
6050
6051Lisp_Object
4628bef1 6052complement_process_encoding_system (Lisp_Object coding_system)
fcaf8878 6053{
5886ec9c
KH
6054 Lisp_Object coding_base = Qnil, eol_base = Qnil;
6055 Lisp_Object spec, attrs;
93d50df8 6056 int i;
fcaf8878 6057
93d50df8 6058 for (i = 0; i < 3; i++)
fcaf8878 6059 {
93d50df8
KH
6060 if (i == 1)
6061 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6062 else if (i == 2)
6063 coding_system = preferred_coding_system ();
6064 spec = CODING_SYSTEM_SPEC (coding_system);
6065 if (NILP (spec))
6066 continue;
6067 attrs = AREF (spec, 0);
6068 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6069 coding_base = CODING_ATTR_BASE_NAME (attrs);
6070 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6071 eol_base = coding_system;
6072 if (! NILP (coding_base) && ! NILP (eol_base))
6073 break;
fcaf8878 6074 }
fcaf8878 6075
93d50df8
KH
6076 if (i > 0)
6077 /* The original CODING_SYSTEM didn't specify text-conversion or
6078 eol-conversion. Be sure that we return a fully complemented
6079 coding system. */
6080 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6081 return coding_system;
fcaf8878
KH
6082}
6083
6084
4ed46869
KH
6085/* Emacs has a mechanism to automatically detect a coding system if it
6086 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6087 it's impossible to distinguish some coding systems accurately
6088 because they use the same range of codes. So, at first, coding
6089 systems are categorized into 7, those are:
6090
0ef69138 6091 o coding-category-emacs-mule
4ed46869
KH
6092
6093 The category for a coding system which has the same code range
6094 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 6095 symbol) `emacs-mule' by default.
4ed46869
KH
6096
6097 o coding-category-sjis
6098
6099 The category for a coding system which has the same code range
6100 as SJIS. Assigned the coding-system (Lisp
7717c392 6101 symbol) `japanese-shift-jis' by default.
4ed46869
KH
6102
6103 o coding-category-iso-7
6104
6105 The category for a coding system which has the same code range
7717c392 6106 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
6107 shift and single shift functions. This can encode/decode all
6108 charsets. Assigned the coding-system (Lisp symbol)
6109 `iso-2022-7bit' by default.
6110
6111 o coding-category-iso-7-tight
6112
6113 Same as coding-category-iso-7 except that this can
6114 encode/decode only the specified charsets.
4ed46869
KH
6115
6116 o coding-category-iso-8-1
6117
6118 The category for a coding system which has the same code range
6119 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6120 for DIMENSION1 charset. This doesn't use any locking shift
6121 and single shift functions. Assigned the coding-system (Lisp
6122 symbol) `iso-latin-1' by default.
4ed46869
KH
6123
6124 o coding-category-iso-8-2
6125
6126 The category for a coding system which has the same code range
6127 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6128 for DIMENSION2 charset. This doesn't use any locking shift
6129 and single shift functions. Assigned the coding-system (Lisp
6130 symbol) `japanese-iso-8bit' by default.
4ed46869 6131
7717c392 6132 o coding-category-iso-7-else
4ed46869
KH
6133
6134 The category for a coding system which has the same code range
ad1746f5 6135 as ISO2022 of 7-bit environment but uses locking shift or
7717c392
KH
6136 single shift functions. Assigned the coding-system (Lisp
6137 symbol) `iso-2022-7bit-lock' by default.
6138
6139 o coding-category-iso-8-else
6140
6141 The category for a coding system which has the same code range
ad1746f5 6142 as ISO2022 of 8-bit environment but uses locking shift or
7717c392
KH
6143 single shift functions. Assigned the coding-system (Lisp
6144 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
6145
6146 o coding-category-big5
6147
6148 The category for a coding system which has the same code range
6149 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 6150 `cn-big5' by default.
4ed46869 6151
fa42c37f
KH
6152 o coding-category-utf-8
6153
6154 The category for a coding system which has the same code range
6e76ae91 6155 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
6156 symbol) `utf-8' by default.
6157
6158 o coding-category-utf-16-be
6159
6160 The category for a coding system in which a text has an
6161 Unicode signature (cf. Unicode Standard) in the order of BIG
6162 endian at the head. Assigned the coding-system (Lisp symbol)
6163 `utf-16-be' by default.
6164
6165 o coding-category-utf-16-le
6166
6167 The category for a coding system in which a text has an
6168 Unicode signature (cf. Unicode Standard) in the order of
6169 LITTLE endian at the head. Assigned the coding-system (Lisp
6170 symbol) `utf-16-le' by default.
6171
1397dc18
KH
6172 o coding-category-ccl
6173
6174 The category for a coding system of which encoder/decoder is
6175 written in CCL programs. The default value is nil, i.e., no
6176 coding system is assigned.
6177
4ed46869
KH
6178 o coding-category-binary
6179
6180 The category for a coding system not categorized in any of the
6181 above. Assigned the coding-system (Lisp symbol)
e0e989f6 6182 `no-conversion' by default.
4ed46869
KH
6183
6184 Each of them is a Lisp symbol and the value is an actual
df7492f9 6185 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
6186 What Emacs does actually is to detect a category of coding system.
6187 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 6188 decide only one possible category, it selects a category of the
4ed46869
KH
6189 highest priority. Priorities of categories are also specified by a
6190 user in a Lisp variable `coding-category-list'.
6191
6192*/
6193
df7492f9
KH
6194#define EOL_SEEN_NONE 0
6195#define EOL_SEEN_LF 1
6196#define EOL_SEEN_CR 2
6197#define EOL_SEEN_CRLF 4
66cfb530 6198
ff0dacd7
KH
6199/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6200 SOURCE is encoded. If CATEGORY is one of
6201 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6202 two-byte, else they are encoded by one-byte.
6203
6204 Return one of EOL_SEEN_XXX. */
4ed46869 6205
bc4bc72a 6206#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
6207
6208static int
d311d28c 6209detect_eol (const unsigned char *source, ptrdiff_t src_bytes,
cf84bb53 6210 enum coding_category category)
4ed46869 6211{
f6cbaf43 6212 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 6213 unsigned char c;
df7492f9
KH
6214 int total = 0;
6215 int eol_seen = EOL_SEEN_NONE;
4ed46869 6216
89528eb3 6217 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 6218 {
df7492f9 6219 int msb, lsb;
fa42c37f 6220
89528eb3
KH
6221 msb = category == (coding_category_utf_16_le
6222 | coding_category_utf_16_le_nosig);
df7492f9 6223 lsb = 1 - msb;
fa42c37f 6224
df7492f9 6225 while (src + 1 < src_end)
fa42c37f 6226 {
df7492f9
KH
6227 c = src[lsb];
6228 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 6229 {
df7492f9
KH
6230 int this_eol;
6231
6232 if (c == '\n')
6233 this_eol = EOL_SEEN_LF;
6234 else if (src + 3 >= src_end
6235 || src[msb + 2] != 0
6236 || src[lsb + 2] != '\n')
6237 this_eol = EOL_SEEN_CR;
fa42c37f 6238 else
75f4f1ac
EZ
6239 {
6240 this_eol = EOL_SEEN_CRLF;
6241 src += 2;
6242 }
df7492f9
KH
6243
6244 if (eol_seen == EOL_SEEN_NONE)
6245 /* This is the first end-of-line. */
6246 eol_seen = this_eol;
6247 else if (eol_seen != this_eol)
fa42c37f 6248 {
75f4f1ac
EZ
6249 /* The found type is different from what found before.
6250 Allow for stray ^M characters in DOS EOL files. */
ef1b0ba7
SM
6251 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6252 || (eol_seen == EOL_SEEN_CRLF
6253 && this_eol == EOL_SEEN_CR))
75f4f1ac
EZ
6254 eol_seen = EOL_SEEN_CRLF;
6255 else
6256 {
6257 eol_seen = EOL_SEEN_LF;
6258 break;
6259 }
fa42c37f 6260 }
df7492f9
KH
6261 if (++total == MAX_EOL_CHECK_COUNT)
6262 break;
fa42c37f 6263 }
df7492f9 6264 src += 2;
fa42c37f 6265 }
bcf26d6a 6266 }
d46c5b12 6267 else
ef1b0ba7
SM
6268 while (src < src_end)
6269 {
6270 c = *src++;
6271 if (c == '\n' || c == '\r')
6272 {
6273 int this_eol;
d46c5b12 6274
ef1b0ba7
SM
6275 if (c == '\n')
6276 this_eol = EOL_SEEN_LF;
6277 else if (src >= src_end || *src != '\n')
6278 this_eol = EOL_SEEN_CR;
6279 else
6280 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 6281
ef1b0ba7
SM
6282 if (eol_seen == EOL_SEEN_NONE)
6283 /* This is the first end-of-line. */
6284 eol_seen = this_eol;
6285 else if (eol_seen != this_eol)
6286 {
6287 /* The found type is different from what found before.
6288 Allow for stray ^M characters in DOS EOL files. */
6289 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6290 || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6291 eol_seen = EOL_SEEN_CRLF;
6292 else
6293 {
6294 eol_seen = EOL_SEEN_LF;
6295 break;
6296 }
6297 }
6298 if (++total == MAX_EOL_CHECK_COUNT)
6299 break;
6300 }
6301 }
df7492f9 6302 return eol_seen;
73be902c
KH
6303}
6304
df7492f9 6305
24a73b0a 6306static Lisp_Object
971de7fb 6307adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
73be902c 6308{
0be8721c 6309 Lisp_Object eol_type;
8f924df7 6310
df7492f9
KH
6311 eol_type = CODING_ID_EOL_TYPE (coding->id);
6312 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
6313 {
6314 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6315 eol_type = Qunix;
6316 }
6f197c07 6317 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
6318 {
6319 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6320 eol_type = Qdos;
6321 }
6f197c07 6322 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
6323 {
6324 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6325 eol_type = Qmac;
6326 }
6327 return eol_type;
d46c5b12 6328}
4ed46869 6329
df7492f9
KH
6330/* Detect how a text specified in CODING is encoded. If a coding
6331 system is detected, update fields of CODING by the detected coding
6332 system. */
0a28aafb 6333
74ab6df5 6334static void
971de7fb 6335detect_coding (struct coding_system *coding)
d46c5b12 6336{
8f924df7 6337 const unsigned char *src, *src_end;
73cce38d 6338 int saved_mode = coding->mode;
d46c5b12 6339
df7492f9
KH
6340 coding->consumed = coding->consumed_char = 0;
6341 coding->produced = coding->produced_char = 0;
6342 coding_set_source (coding);
1c3478b0 6343
df7492f9 6344 src_end = coding->source + coding->src_bytes;
c0e16b14 6345 coding->head_ascii = 0;
1c3478b0 6346
df7492f9
KH
6347 /* If we have not yet decided the text encoding type, detect it
6348 now. */
6349 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 6350 {
df7492f9 6351 int c, i;
6cb21a4f 6352 struct coding_detection_info detect_info;
2f3cbb32 6353 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 6354
6cb21a4f 6355 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 6356 for (src = coding->source; src < src_end; src++)
d46c5b12 6357 {
df7492f9 6358 c = *src;
6cb21a4f 6359 if (c & 0x80)
6cb21a4f 6360 {
2f3cbb32 6361 eight_bit_found = 1;
2f3cbb32
KH
6362 if (null_byte_found)
6363 break;
6364 }
6365 else if (c < 0x20)
6366 {
6367 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6368 && ! inhibit_iso_escape_detection
6369 && ! detect_info.checked)
6cb21a4f 6370 {
2f3cbb32
KH
6371 if (detect_coding_iso_2022 (coding, &detect_info))
6372 {
6373 /* We have scanned the whole data. */
6374 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
6375 {
6376 /* We didn't find an 8-bit code. We may
6377 have found a null-byte, but it's very
ce5b453a 6378 rare that a binary file conforms to
c0e16b14
KH
6379 ISO-2022. */
6380 src = src_end;
6381 coding->head_ascii = src - coding->source;
6382 }
6383 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6384 break;
6385 }
6386 }
97b1b294 6387 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6388 {
6389 null_byte_found = 1;
6390 if (eight_bit_found)
6391 break;
6cb21a4f 6392 }
c006c0c8
KH
6393 if (! eight_bit_found)
6394 coding->head_ascii++;
6cb21a4f 6395 }
c006c0c8 6396 else if (! eight_bit_found)
c0e16b14 6397 coding->head_ascii++;
d46c5b12 6398 }
df7492f9 6399
2f3cbb32
KH
6400 if (null_byte_found || eight_bit_found
6401 || coding->head_ascii < coding->src_bytes
6cb21a4f 6402 || detect_info.found)
d46c5b12 6403 {
ff0dacd7
KH
6404 enum coding_category category;
6405 struct coding_system *this;
df7492f9 6406
6cb21a4f
KH
6407 if (coding->head_ascii == coding->src_bytes)
6408 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6409 for (i = 0; i < coding_category_raw_text; i++)
6410 {
6411 category = coding_priorities[i];
6412 this = coding_categories + category;
6413 if (detect_info.found & (1 << category))
24a73b0a 6414 break;
6cb21a4f
KH
6415 }
6416 else
2f3cbb32
KH
6417 {
6418 if (null_byte_found)
ff0dacd7 6419 {
2f3cbb32
KH
6420 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6421 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6422 }
2f3cbb32
KH
6423 for (i = 0; i < coding_category_raw_text; i++)
6424 {
6425 category = coding_priorities[i];
6426 this = coding_categories + category;
6427 if (this->id < 0)
6428 {
6429 /* No coding system of this category is defined. */
6430 detect_info.rejected |= (1 << category);
6431 }
6432 else if (category >= coding_category_raw_text)
6433 continue;
6434 else if (detect_info.checked & (1 << category))
6435 {
6436 if (detect_info.found & (1 << category))
6437 break;
6438 }
6439 else if ((*(this->detector)) (coding, &detect_info)
6440 && detect_info.found & (1 << category))
6441 {
6442 if (category == coding_category_utf_16_auto)
6443 {
6444 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6445 category = coding_category_utf_16_le;
6446 else
6447 category = coding_category_utf_16_be;
6448 }
6449 break;
6450 }
6451 }
2f3cbb32 6452 }
c0e16b14
KH
6453
6454 if (i < coding_category_raw_text)
6455 setup_coding_system (CODING_ID_NAME (this->id), coding);
6456 else if (null_byte_found)
6457 setup_coding_system (Qno_conversion, coding);
6458 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6459 == CATEGORY_MASK_ANY)
6460 setup_coding_system (Qraw_text, coding);
6461 else if (detect_info.rejected)
6462 for (i = 0; i < coding_category_raw_text; i++)
6463 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6464 {
6465 this = coding_categories + coding_priorities[i];
6466 setup_coding_system (CODING_ID_NAME (this->id), coding);
6467 break;
6468 }
d46c5b12 6469 }
b73bfc1c 6470 }
a470d443
KH
6471 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6472 == coding_category_utf_8_auto)
6473 {
6474 Lisp_Object coding_systems;
6475 struct coding_detection_info detect_info;
6476
6477 coding_systems
6478 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6479 detect_info.found = detect_info.rejected = 0;
6480 coding->head_ascii = 0;
6481 if (CONSP (coding_systems)
6482 && detect_coding_utf_8 (coding, &detect_info))
6483 {
6484 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6485 setup_coding_system (XCAR (coding_systems), coding);
6486 else
6487 setup_coding_system (XCDR (coding_systems), coding);
6488 }
6489 }
24a73b0a
KH
6490 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6491 == coding_category_utf_16_auto)
b49a1807
KH
6492 {
6493 Lisp_Object coding_systems;
6494 struct coding_detection_info detect_info;
6495
6496 coding_systems
a470d443 6497 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6498 detect_info.found = detect_info.rejected = 0;
a470d443 6499 coding->head_ascii = 0;
b49a1807 6500 if (CONSP (coding_systems)
24a73b0a 6501 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6502 {
6503 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6504 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6505 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6506 setup_coding_system (XCDR (coding_systems), coding);
6507 }
6508 }
73cce38d 6509 coding->mode = saved_mode;
4ed46869 6510}
4ed46869 6511
d46c5b12 6512
aaaf0b1e 6513static void
971de7fb 6514decode_eol (struct coding_system *coding)
aaaf0b1e 6515{
24a73b0a
KH
6516 Lisp_Object eol_type;
6517 unsigned char *p, *pbeg, *pend;
3ed051d4 6518
24a73b0a 6519 eol_type = CODING_ID_EOL_TYPE (coding->id);
0a9564cb 6520 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
24a73b0a
KH
6521 return;
6522
6523 if (NILP (coding->dst_object))
6524 pbeg = coding->destination;
6525 else
6526 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6527 pend = pbeg + coding->produced;
6528
6529 if (VECTORP (eol_type))
aaaf0b1e 6530 {
df7492f9 6531 int eol_seen = EOL_SEEN_NONE;
4ed46869 6532
24a73b0a 6533 for (p = pbeg; p < pend; p++)
aaaf0b1e 6534 {
df7492f9
KH
6535 if (*p == '\n')
6536 eol_seen |= EOL_SEEN_LF;
6537 else if (*p == '\r')
aaaf0b1e 6538 {
df7492f9 6539 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6540 {
df7492f9
KH
6541 eol_seen |= EOL_SEEN_CRLF;
6542 p++;
aaaf0b1e 6543 }
aaaf0b1e 6544 else
df7492f9 6545 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6546 }
aaaf0b1e 6547 }
75f4f1ac
EZ
6548 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6549 if ((eol_seen & EOL_SEEN_CRLF) != 0
6550 && (eol_seen & EOL_SEEN_CR) != 0
6551 && (eol_seen & EOL_SEEN_LF) == 0)
6552 eol_seen = EOL_SEEN_CRLF;
6553 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6554 && eol_seen != EOL_SEEN_LF
6555 && eol_seen != EOL_SEEN_CRLF
6556 && eol_seen != EOL_SEEN_CR)
6557 eol_seen = EOL_SEEN_LF;
df7492f9 6558 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6559 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6560 }
d46c5b12 6561
24a73b0a 6562 if (EQ (eol_type, Qmac))
27901516 6563 {
24a73b0a 6564 for (p = pbeg; p < pend; p++)
df7492f9
KH
6565 if (*p == '\r')
6566 *p = '\n';
4ed46869 6567 }
24a73b0a 6568 else if (EQ (eol_type, Qdos))
df7492f9 6569 {
d311d28c 6570 ptrdiff_t n = 0;
b73bfc1c 6571
24a73b0a
KH
6572 if (NILP (coding->dst_object))
6573 {
4347441b
KH
6574 /* Start deleting '\r' from the tail to minimize the memory
6575 movement. */
24a73b0a
KH
6576 for (p = pend - 2; p >= pbeg; p--)
6577 if (*p == '\r')
6578 {
72af86bd 6579 memmove (p, p + 1, pend-- - p - 1);
24a73b0a
KH
6580 n++;
6581 }
6582 }
6583 else
6584 {
d311d28c
PE
6585 ptrdiff_t pos_byte = coding->dst_pos_byte;
6586 ptrdiff_t pos = coding->dst_pos;
6587 ptrdiff_t pos_end = pos + coding->produced_char - 1;
4347441b
KH
6588
6589 while (pos < pos_end)
6590 {
6591 p = BYTE_POS_ADDR (pos_byte);
6592 if (*p == '\r' && p[1] == '\n')
6593 {
6594 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6595 n++;
6596 pos_end--;
6597 }
6598 pos++;
69b8522d
KH
6599 if (coding->dst_multibyte)
6600 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6601 else
6602 pos_byte++;
4347441b 6603 }
24a73b0a
KH
6604 }
6605 coding->produced -= n;
6606 coding->produced_char -= n;
aaaf0b1e 6607 }
4ed46869
KH
6608}
6609
7d64c6ad 6610
a6f87d34
KH
6611/* Return a translation table (or list of them) from coding system
6612 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6613 decoding (ENCODEP is zero). */
7d64c6ad 6614
e6a54062 6615static Lisp_Object
971de7fb 6616get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
7d64c6ad
KH
6617{
6618 Lisp_Object standard, translation_table;
09ee6fdd 6619 Lisp_Object val;
7d64c6ad 6620
4bed5909
CY
6621 if (NILP (Venable_character_translation))
6622 {
6623 if (max_lookup)
6624 *max_lookup = 0;
6625 return Qnil;
6626 }
7d64c6ad
KH
6627 if (encodep)
6628 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6629 standard = Vstandard_translation_table_for_encode;
6630 else
6631 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6632 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6633 if (NILP (translation_table))
09ee6fdd
KH
6634 translation_table = standard;
6635 else
a6f87d34 6636 {
09ee6fdd
KH
6637 if (SYMBOLP (translation_table))
6638 translation_table = Fget (translation_table, Qtranslation_table);
6639 else if (CONSP (translation_table))
6640 {
6641 translation_table = Fcopy_sequence (translation_table);
6642 for (val = translation_table; CONSP (val); val = XCDR (val))
6643 if (SYMBOLP (XCAR (val)))
6644 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6645 }
6646 if (CHAR_TABLE_P (standard))
6647 {
6648 if (CONSP (translation_table))
6649 translation_table = nconc2 (translation_table,
6650 Fcons (standard, Qnil));
6651 else
6652 translation_table = Fcons (translation_table,
6653 Fcons (standard, Qnil));
6654 }
a6f87d34 6655 }
2170c8f0
KH
6656
6657 if (max_lookup)
09ee6fdd 6658 {
2170c8f0
KH
6659 *max_lookup = 1;
6660 if (CHAR_TABLE_P (translation_table)
6661 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6662 {
6663 val = XCHAR_TABLE (translation_table)->extras[1];
6664 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6665 *max_lookup = XFASTINT (val);
6666 }
6667 else if (CONSP (translation_table))
6668 {
2735d060 6669 Lisp_Object tail;
09ee6fdd 6670
2170c8f0
KH
6671 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6672 if (CHAR_TABLE_P (XCAR (tail))
6673 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6674 {
2735d060
PE
6675 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6676 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6677 *max_lookup = XFASTINT (tailval);
2170c8f0
KH
6678 }
6679 }
a6f87d34 6680 }
7d64c6ad
KH
6681 return translation_table;
6682}
6683
09ee6fdd
KH
6684#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6685 do { \
6686 trans = Qnil; \
6687 if (CHAR_TABLE_P (table)) \
6688 { \
6689 trans = CHAR_TABLE_REF (table, c); \
6690 if (CHARACTERP (trans)) \
6691 c = XFASTINT (trans), trans = Qnil; \
6692 } \
6693 else if (CONSP (table)) \
6694 { \
6695 Lisp_Object tail; \
6696 \
6697 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6698 if (CHAR_TABLE_P (XCAR (tail))) \
6699 { \
6700 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6701 if (CHARACTERP (trans)) \
6702 c = XFASTINT (trans), trans = Qnil; \
6703 else if (! NILP (trans)) \
6704 break; \
6705 } \
6706 } \
e6a54062
KH
6707 } while (0)
6708
7d64c6ad 6709
e951386e
KH
6710/* Return a translation of character(s) at BUF according to TRANS.
6711 TRANS is TO-CHAR or ((FROM . TO) ...) where
6712 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6713 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6714 translation is found, and Qnil if not found..
6715 If BUF is too short to lookup characters in FROM, return Qt. */
6716
69a80ea3 6717static Lisp_Object
971de7fb 6718get_translation (Lisp_Object trans, int *buf, int *buf_end)
69a80ea3 6719{
e951386e
KH
6720
6721 if (INTEGERP (trans))
6722 return trans;
6723 for (; CONSP (trans); trans = XCDR (trans))
69a80ea3 6724 {
e951386e
KH
6725 Lisp_Object val = XCAR (trans);
6726 Lisp_Object from = XCAR (val);
2c6a9faa
PE
6727 ptrdiff_t len = ASIZE (from);
6728 ptrdiff_t i;
69a80ea3 6729
e951386e 6730 for (i = 0; i < len; i++)
69a80ea3 6731 {
e951386e
KH
6732 if (buf + i == buf_end)
6733 return Qt;
6734 if (XINT (AREF (from, i)) != buf[i])
6735 break;
69a80ea3 6736 }
e951386e
KH
6737 if (i == len)
6738 return val;
69a80ea3 6739 }
e951386e 6740 return Qnil;
69a80ea3
KH
6741}
6742
6743
d46c5b12 6744static int
cf84bb53
JB
6745produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6746 int last_block)
4ed46869 6747{
df7492f9
KH
6748 unsigned char *dst = coding->destination + coding->produced;
6749 unsigned char *dst_end = coding->destination + coding->dst_bytes;
d311d28c
PE
6750 ptrdiff_t produced;
6751 ptrdiff_t produced_chars = 0;
69a80ea3 6752 int carryover = 0;
4ed46869 6753
df7492f9 6754 if (! coding->chars_at_source)
4ed46869 6755 {
119852e7 6756 /* Source characters are in coding->charbuf. */
fba4576f
AS
6757 int *buf = coding->charbuf;
6758 int *buf_end = buf + coding->charbuf_used;
4ed46869 6759
db274c7a
KH
6760 if (EQ (coding->src_object, coding->dst_object))
6761 {
6762 coding_set_source (coding);
6763 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6764 }
4ed46869 6765
df7492f9 6766 while (buf < buf_end)
4ed46869 6767 {
27bb1ca4
PE
6768 int c = *buf;
6769 ptrdiff_t i;
bc4bc72a 6770
df7492f9
KH
6771 if (c >= 0)
6772 {
d311d28c 6773 ptrdiff_t from_nchars = 1, to_nchars = 1;
69a80ea3
KH
6774 Lisp_Object trans = Qnil;
6775
09ee6fdd 6776 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6777 if (! NILP (trans))
69a80ea3 6778 {
e951386e
KH
6779 trans = get_translation (trans, buf, buf_end);
6780 if (INTEGERP (trans))
6781 c = XINT (trans);
6782 else if (CONSP (trans))
6783 {
6784 from_nchars = ASIZE (XCAR (trans));
6785 trans = XCDR (trans);
6786 if (INTEGERP (trans))
6787 c = XINT (trans);
6788 else
6789 {
6790 to_nchars = ASIZE (trans);
6791 c = XINT (AREF (trans, 0));
6792 }
6793 }
6794 else if (EQ (trans, Qt) && ! last_block)
69a80ea3 6795 break;
69a80ea3
KH
6796 }
6797
5d009b3a 6798 if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
69a80ea3 6799 {
5d009b3a
PE
6800 if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
6801 / MAX_MULTIBYTE_LENGTH)
6802 < to_nchars)
6803 memory_full (SIZE_MAX);
69a80ea3
KH
6804 dst = alloc_destination (coding,
6805 buf_end - buf
6806 + MAX_MULTIBYTE_LENGTH * to_nchars,
6807 dst);
db274c7a
KH
6808 if (EQ (coding->src_object, coding->dst_object))
6809 {
6810 coding_set_source (coding);
e951386e
KH
6811 dst_end = (((unsigned char *) coding->source)
6812 + coding->consumed);
db274c7a
KH
6813 }
6814 else
6815 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6816 }
6817
433f7f87 6818 for (i = 0; i < to_nchars; i++)
69a80ea3 6819 {
433f7f87
KH
6820 if (i > 0)
6821 c = XINT (AREF (trans, i));
69a80ea3
KH
6822 if (coding->dst_multibyte
6823 || ! CHAR_BYTE8_P (c))
db274c7a 6824 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6825 else
6826 *dst++ = CHAR_TO_BYTE8 (c);
6827 }
6828 produced_chars += to_nchars;
e951386e 6829 buf += from_nchars;
d46c5b12 6830 }
df7492f9 6831 else
69a80ea3
KH
6832 /* This is an annotation datum. (-C) is the length. */
6833 buf += -c;
4ed46869 6834 }
69a80ea3 6835 carryover = buf_end - buf;
4ed46869 6836 }
fa42c37f 6837 else
fa42c37f 6838 {
119852e7 6839 /* Source characters are at coding->source. */
8f924df7 6840 const unsigned char *src = coding->source;
119852e7 6841 const unsigned char *src_end = src + coding->consumed;
4ed46869 6842
db274c7a
KH
6843 if (EQ (coding->dst_object, coding->src_object))
6844 dst_end = (unsigned char *) src;
df7492f9 6845 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6846 {
df7492f9 6847 if (coding->src_multibyte)
fa42c37f 6848 {
71c81426 6849 int multibytep = 1;
d311d28c 6850 ptrdiff_t consumed_chars = 0;
d46c5b12 6851
df7492f9
KH
6852 while (1)
6853 {
8f924df7 6854 const unsigned char *src_base = src;
df7492f9 6855 int c;
b73bfc1c 6856
df7492f9 6857 ONE_MORE_BYTE (c);
119852e7 6858 if (dst == dst_end)
df7492f9 6859 {
119852e7
KH
6860 if (EQ (coding->src_object, coding->dst_object))
6861 dst_end = (unsigned char *) src;
6862 if (dst == dst_end)
df7492f9 6863 {
d311d28c 6864 ptrdiff_t offset = src - coding->source;
119852e7
KH
6865
6866 dst = alloc_destination (coding, src_end - src + 1,
6867 dst);
6868 dst_end = coding->destination + coding->dst_bytes;
6869 coding_set_source (coding);
6870 src = coding->source + offset;
5c1ca13d 6871 src_end = coding->source + coding->consumed;
db274c7a
KH
6872 if (EQ (coding->src_object, coding->dst_object))
6873 dst_end = (unsigned char *) src;
df7492f9 6874 }
df7492f9
KH
6875 }
6876 *dst++ = c;
6877 produced_chars++;
6878 }
6879 no_more_source:
6880 ;
fa42c37f
KH
6881 }
6882 else
df7492f9
KH
6883 while (src < src_end)
6884 {
71c81426 6885 int multibytep = 1;
df7492f9 6886 int c = *src++;
b73bfc1c 6887
df7492f9
KH
6888 if (dst >= dst_end - 1)
6889 {
2c78b7e1 6890 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6891 dst_end = (unsigned char *) src;
2c78b7e1
KH
6892 if (dst >= dst_end - 1)
6893 {
d311d28c
PE
6894 ptrdiff_t offset = src - coding->source;
6895 ptrdiff_t more_bytes;
119852e7 6896
db274c7a
KH
6897 if (EQ (coding->src_object, coding->dst_object))
6898 more_bytes = ((src_end - src) / 2) + 2;
6899 else
6900 more_bytes = src_end - src + 2;
6901 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6902 dst_end = coding->destination + coding->dst_bytes;
6903 coding_set_source (coding);
119852e7 6904 src = coding->source + offset;
5c1ca13d 6905 src_end = coding->source + coding->consumed;
db274c7a
KH
6906 if (EQ (coding->src_object, coding->dst_object))
6907 dst_end = (unsigned char *) src;
2c78b7e1 6908 }
df7492f9
KH
6909 }
6910 EMIT_ONE_BYTE (c);
6911 }
d46c5b12 6912 }
df7492f9
KH
6913 else
6914 {
6915 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6916 {
d311d28c 6917 ptrdiff_t require = coding->src_bytes - coding->dst_bytes;
4ed46869 6918
df7492f9 6919 if (require > 0)
fa42c37f 6920 {
d311d28c 6921 ptrdiff_t offset = src - coding->source;
df7492f9
KH
6922
6923 dst = alloc_destination (coding, require, dst);
6924 coding_set_source (coding);
6925 src = coding->source + offset;
5c1ca13d 6926 src_end = coding->source + coding->consumed;
fa42c37f
KH
6927 }
6928 }
119852e7 6929 produced_chars = coding->consumed_char;
df7492f9 6930 while (src < src_end)
14daee73 6931 *dst++ = *src++;
fa42c37f
KH
6932 }
6933 }
6934
df7492f9 6935 produced = dst - (coding->destination + coding->produced);
284201e4 6936 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6937 insert_from_gap (produced_chars, produced);
6938 coding->produced += produced;
6939 coding->produced_char += produced_chars;
69a80ea3 6940 return carryover;
fa42c37f
KH
6941}
6942
ff0dacd7
KH
6943/* Compose text in CODING->object according to the annotation data at
6944 CHARBUF. CHARBUF is an array:
e951386e 6945 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
df7492f9 6946 */
4ed46869 6947
55d4c1b2 6948static inline void
d311d28c 6949produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
4ed46869 6950{
df7492f9 6951 int len;
d311d28c 6952 ptrdiff_t to;
df7492f9 6953 enum composition_method method;
df7492f9 6954 Lisp_Object components;
fa42c37f 6955
e951386e 6956 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
69a80ea3 6957 to = pos + charbuf[2];
e951386e 6958 method = (enum composition_method) (charbuf[4]);
d46c5b12 6959
df7492f9
KH
6960 if (method == COMPOSITION_RELATIVE)
6961 components = Qnil;
e951386e 6962 else
d46c5b12 6963 {
df7492f9 6964 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
e951386e 6965 int i, j;
b73bfc1c 6966
e951386e
KH
6967 if (method == COMPOSITION_WITH_RULE)
6968 len = charbuf[2] * 3 - 2;
6969 charbuf += MAX_ANNOTATION_LENGTH;
6970 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6971 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
9ffd559c 6972 {
e951386e
KH
6973 if (charbuf[i] >= 0)
6974 args[j] = make_number (charbuf[i]);
6975 else
6976 {
6977 i++;
6978 args[j] = make_number (charbuf[i] % 0x100);
6979 }
9ffd559c 6980 }
e951386e 6981 components = (i == j ? Fstring (j, args) : Fvector (j, args));
d46c5b12 6982 }
69a80ea3 6983 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6984}
6985
d46c5b12 6986
ff0dacd7
KH
6987/* Put `charset' property on text in CODING->object according to
6988 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6989 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6990 */
d46c5b12 6991
55d4c1b2 6992static inline void
d311d28c 6993produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
d46c5b12 6994{
d311d28c 6995 ptrdiff_t from = pos - charbuf[2];
69a80ea3 6996 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6997
69a80ea3 6998 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6999 Qcharset, CHARSET_NAME (charset),
7000 coding->dst_object);
d46c5b12
KH
7001}
7002
d46c5b12 7003
df7492f9
KH
7004#define CHARBUF_SIZE 0x4000
7005
7006#define ALLOC_CONVERSION_WORK_AREA(coding) \
7007 do { \
8510724d 7008 int size = CHARBUF_SIZE; \
df7492f9
KH
7009 \
7010 coding->charbuf = NULL; \
7011 while (size > 1024) \
7012 { \
38182d90 7013 coding->charbuf = alloca (sizeof (int) * size); \
df7492f9
KH
7014 if (coding->charbuf) \
7015 break; \
7016 size >>= 1; \
7017 } \
7018 if (! coding->charbuf) \
7019 { \
065e3595 7020 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
7021 return coding->result; \
7022 } \
7023 coding->charbuf_size = size; \
7024 } while (0)
4ed46869 7025
d46c5b12
KH
7026
7027static void
d311d28c 7028produce_annotation (struct coding_system *coding, ptrdiff_t pos)
d46c5b12 7029{
df7492f9
KH
7030 int *charbuf = coding->charbuf;
7031 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 7032
ff0dacd7
KH
7033 if (NILP (coding->dst_object))
7034 return;
d46c5b12 7035
df7492f9 7036 while (charbuf < charbuf_end)
a84f1519 7037 {
df7492f9 7038 if (*charbuf >= 0)
e951386e 7039 pos++, charbuf++;
d46c5b12 7040 else
d46c5b12 7041 {
df7492f9 7042 int len = -*charbuf;
e951386e
KH
7043
7044 if (len > 2)
7045 switch (charbuf[1])
7046 {
7047 case CODING_ANNOTATE_COMPOSITION_MASK:
7048 produce_composition (coding, charbuf, pos);
7049 break;
7050 case CODING_ANNOTATE_CHARSET_MASK:
7051 produce_charset (coding, charbuf, pos);
7052 break;
7053 }
df7492f9 7054 charbuf += len;
d46c5b12 7055 }
a84f1519 7056 }
d46c5b12
KH
7057}
7058
df7492f9
KH
7059/* Decode the data at CODING->src_object into CODING->dst_object.
7060 CODING->src_object is a buffer, a string, or nil.
7061 CODING->dst_object is a buffer.
d46c5b12 7062
df7492f9
KH
7063 If CODING->src_object is a buffer, it must be the current buffer.
7064 In this case, if CODING->src_pos is positive, it is a position of
7065 the source text in the buffer, otherwise, the source text is in the
7066 gap area of the buffer, and CODING->src_pos specifies the offset of
7067 the text from GPT (which must be the same as PT). If this is the
7068 same buffer as CODING->dst_object, CODING->src_pos must be
7069 negative.
d46c5b12 7070
b6828792 7071 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 7072 that string.
d46c5b12 7073
df7492f9
KH
7074 If CODING->src_object is nil, CODING->source must already point to
7075 the non-relocatable memory area. In this case, CODING->src_pos is
7076 an offset from CODING->source.
73be902c 7077
df7492f9
KH
7078 The decoded data is inserted at the current point of the buffer
7079 CODING->dst_object.
7080*/
d46c5b12 7081
df7492f9 7082static int
971de7fb 7083decode_coding (struct coding_system *coding)
d46c5b12 7084{
df7492f9 7085 Lisp_Object attrs;
24a73b0a 7086 Lisp_Object undo_list;
7d64c6ad 7087 Lisp_Object translation_table;
d0396581 7088 struct ccl_spec cclspec;
69a80ea3
KH
7089 int carryover;
7090 int i;
d46c5b12 7091
df7492f9
KH
7092 if (BUFFERP (coding->src_object)
7093 && coding->src_pos > 0
7094 && coding->src_pos < GPT
7095 && coding->src_pos + coding->src_chars > GPT)
7096 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 7097
24a73b0a 7098 undo_list = Qt;
df7492f9 7099 if (BUFFERP (coding->dst_object))
1c3478b0 7100 {
df7492f9
KH
7101 if (current_buffer != XBUFFER (coding->dst_object))
7102 set_buffer_internal (XBUFFER (coding->dst_object));
7103 if (GPT != PT)
7104 move_gap_both (PT, PT_BYTE);
4b4deea2
TT
7105 undo_list = BVAR (current_buffer, undo_list);
7106 BVAR (current_buffer, undo_list) = Qt;
1c3478b0
KH
7107 }
7108
df7492f9
KH
7109 coding->consumed = coding->consumed_char = 0;
7110 coding->produced = coding->produced_char = 0;
7111 coding->chars_at_source = 0;
065e3595 7112 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 7113 coding->errors = 0;
1c3478b0 7114
df7492f9
KH
7115 ALLOC_CONVERSION_WORK_AREA (coding);
7116
7117 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 7118 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 7119
69a80ea3 7120 carryover = 0;
d0396581
KH
7121 if (coding->decoder == decode_coding_ccl)
7122 {
7123 coding->spec.ccl = &cclspec;
7124 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7125 }
df7492f9 7126 do
b73bfc1c 7127 {
d311d28c 7128 ptrdiff_t pos = coding->dst_pos + coding->produced_char;
69a80ea3 7129
df7492f9
KH
7130 coding_set_source (coding);
7131 coding->annotated = 0;
69a80ea3 7132 coding->charbuf_used = carryover;
df7492f9 7133 (*(coding->decoder)) (coding);
df7492f9 7134 coding_set_destination (coding);
69a80ea3 7135 carryover = produce_chars (coding, translation_table, 0);
df7492f9 7136 if (coding->annotated)
69a80ea3
KH
7137 produce_annotation (coding, pos);
7138 for (i = 0; i < carryover; i++)
7139 coding->charbuf[i]
7140 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 7141 }
d0396581
KH
7142 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7143 || (coding->consumed < coding->src_bytes
7144 && (coding->result == CODING_RESULT_SUCCESS
7145 || coding->result == CODING_RESULT_INVALID_SRC)));
d46c5b12 7146
69a80ea3
KH
7147 if (carryover > 0)
7148 {
7149 coding_set_destination (coding);
7150 coding->charbuf_used = carryover;
7151 produce_chars (coding, translation_table, 1);
7152 }
7153
df7492f9
KH
7154 coding->carryover_bytes = 0;
7155 if (coding->consumed < coding->src_bytes)
d46c5b12 7156 {
df7492f9 7157 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 7158 const unsigned char *src;
df7492f9
KH
7159
7160 coding_set_source (coding);
7161 coding_set_destination (coding);
7162 src = coding->source + coding->consumed;
7163
7164 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 7165 {
df7492f9
KH
7166 /* Flush out unprocessed data as binary chars. We are sure
7167 that the number of data is less than the size of
7168 coding->charbuf. */
065e3595 7169 coding->charbuf_used = 0;
b2dab6c8
JR
7170 coding->chars_at_source = 0;
7171
df7492f9 7172 while (nbytes-- > 0)
1c3478b0 7173 {
df7492f9 7174 int c = *src++;
98725083 7175
1c91457d
KH
7176 if (c & 0x80)
7177 c = BYTE8_TO_CHAR (c);
7178 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 7179 }
f6cbaf43 7180 produce_chars (coding, Qnil, 1);
d46c5b12 7181 }
d46c5b12 7182 else
df7492f9
KH
7183 {
7184 /* Record unprocessed bytes in coding->carryover. We are
7185 sure that the number of data is less than the size of
7186 coding->carryover. */
7187 unsigned char *p = coding->carryover;
7188
f289d375
KH
7189 if (nbytes > sizeof coding->carryover)
7190 nbytes = sizeof coding->carryover;
df7492f9
KH
7191 coding->carryover_bytes = nbytes;
7192 while (nbytes-- > 0)
7193 *p++ = *src++;
1c3478b0 7194 }
df7492f9 7195 coding->consumed = coding->src_bytes;
b73bfc1c 7196 }
69f76525 7197
0a9564cb
EZ
7198 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7199 && !inhibit_eol_conversion)
4347441b 7200 decode_eol (coding);
24a73b0a
KH
7201 if (BUFFERP (coding->dst_object))
7202 {
4b4deea2 7203 BVAR (current_buffer, undo_list) = undo_list;
24a73b0a
KH
7204 record_insert (coding->dst_pos, coding->produced_char);
7205 }
73be902c 7206 return coding->result;
4ed46869
KH
7207}
7208
aaaf0b1e 7209
e1c23804 7210/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
7211 ending before LIMIT of CODING->src_object (buffer or string), store
7212 the data in BUF, set *STOP to a starting position of the next
7213 composition (if any) or to LIMIT, and return the address of the
7214 next element of BUF.
7215
7216 If such an annotation is not found, set *STOP to a starting
7217 position of a composition after POS (if any) or to LIMIT, and
7218 return BUF. */
7219
55d4c1b2 7220static inline int *
d311d28c 7221handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
cf84bb53 7222 struct coding_system *coding, int *buf,
d311d28c 7223 ptrdiff_t *stop)
aaaf0b1e 7224{
d311d28c 7225 ptrdiff_t start, end;
ff0dacd7 7226 Lisp_Object prop;
aaaf0b1e 7227
ff0dacd7
KH
7228 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7229 || end > limit)
7230 *stop = limit;
7231 else if (start > pos)
7232 *stop = start;
7233 else
aaaf0b1e 7234 {
ff0dacd7 7235 if (start == pos)
aaaf0b1e 7236 {
ff0dacd7
KH
7237 /* We found a composition. Store the corresponding
7238 annotation data in BUF. */
7239 int *head = buf;
7240 enum composition_method method = COMPOSITION_METHOD (prop);
7241 int nchars = COMPOSITION_LENGTH (prop);
7242
e951386e 7243 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
ff0dacd7 7244 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 7245 {
ff0dacd7 7246 Lisp_Object components;
2c6a9faa 7247 ptrdiff_t i, len, i_byte;
ff0dacd7
KH
7248
7249 components = COMPOSITION_COMPONENTS (prop);
7250 if (VECTORP (components))
aaaf0b1e 7251 {
77b37c05 7252 len = ASIZE (components);
ff0dacd7
KH
7253 for (i = 0; i < len; i++)
7254 *buf++ = XINT (AREF (components, i));
aaaf0b1e 7255 }
ff0dacd7 7256 else if (STRINGP (components))
aaaf0b1e 7257 {
8f924df7 7258 len = SCHARS (components);
ff0dacd7
KH
7259 i = i_byte = 0;
7260 while (i < len)
7261 {
7262 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7263 buf++;
7264 }
7265 }
7266 else if (INTEGERP (components))
7267 {
7268 len = 1;
7269 *buf++ = XINT (components);
7270 }
7271 else if (CONSP (components))
7272 {
7273 for (len = 0; CONSP (components);
7274 len++, components = XCDR (components))
7275 *buf++ = XINT (XCAR (components));
aaaf0b1e 7276 }
aaaf0b1e 7277 else
ff0dacd7
KH
7278 abort ();
7279 *head -= len;
aaaf0b1e 7280 }
aaaf0b1e 7281 }
ff0dacd7
KH
7282
7283 if (find_composition (end, limit, &start, &end, &prop,
7284 coding->src_object)
7285 && end <= limit)
7286 *stop = start;
7287 else
7288 *stop = limit;
aaaf0b1e 7289 }
ff0dacd7
KH
7290 return buf;
7291}
7292
7293
e1c23804 7294/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
7295 CODING->src_object (buffer of string), store the data in BUF, set
7296 *STOP to the position where the value of `charset' property changes
7297 (limiting by LIMIT), and return the address of the next element of
7298 BUF.
7299
7300 If the property value is nil, set *STOP to the position where the
7301 property value is non-nil (limiting by LIMIT), and return BUF. */
7302
55d4c1b2 7303static inline int *
d311d28c 7304handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
cf84bb53 7305 struct coding_system *coding, int *buf,
d311d28c 7306 ptrdiff_t *stop)
ff0dacd7
KH
7307{
7308 Lisp_Object val, next;
7309 int id;
7310
7311 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7312 if (! NILP (val) && CHARSETP (val))
7313 id = XINT (CHARSET_SYMBOL_ID (val));
7314 else
7315 id = -1;
69a80ea3 7316 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
7317 next = Fnext_single_property_change (make_number (pos), Qcharset,
7318 coding->src_object,
7319 make_number (limit));
7320 *stop = XINT (next);
7321 return buf;
7322}
7323
7324
df7492f9 7325static void
cf84bb53
JB
7326consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7327 int max_lookup)
df7492f9
KH
7328{
7329 int *buf = coding->charbuf;
ff0dacd7 7330 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 7331 const unsigned char *src = coding->source + coding->consumed;
4776e638 7332 const unsigned char *src_end = coding->source + coding->src_bytes;
d311d28c
PE
7333 ptrdiff_t pos = coding->src_pos + coding->consumed_char;
7334 ptrdiff_t end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
7335 int multibytep = coding->src_multibyte;
7336 Lisp_Object eol_type;
7337 int c;
d311d28c 7338 ptrdiff_t stop, stop_composition, stop_charset;
09ee6fdd 7339 int *lookup_buf = NULL;
433f7f87
KH
7340
7341 if (! NILP (translation_table))
09ee6fdd 7342 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 7343
0a9564cb 7344 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
df7492f9
KH
7345 if (VECTORP (eol_type))
7346 eol_type = Qunix;
88993dfd 7347
df7492f9
KH
7348 /* Note: composition handling is not yet implemented. */
7349 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 7350
0b5670c9
KH
7351 if (NILP (coding->src_object))
7352 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 7353 else
0b5670c9
KH
7354 {
7355 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7356 stop = stop_composition = pos;
7357 else
7358 stop = stop_composition = end_pos;
7359 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7360 stop = stop_charset = pos;
7361 else
7362 stop_charset = end_pos;
7363 }
ec6d2bb8 7364
24a73b0a 7365 /* Compensate for CRLF and conversion. */
ff0dacd7 7366 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 7367 while (buf < buf_end)
aaaf0b1e 7368 {
433f7f87
KH
7369 Lisp_Object trans;
7370
df7492f9 7371 if (pos == stop)
ec6d2bb8 7372 {
df7492f9
KH
7373 if (pos == end_pos)
7374 break;
ff0dacd7
KH
7375 if (pos == stop_composition)
7376 buf = handle_composition_annotation (pos, end_pos, coding,
7377 buf, &stop_composition);
7378 if (pos == stop_charset)
7379 buf = handle_charset_annotation (pos, end_pos, coding,
7380 buf, &stop_charset);
7381 stop = (stop_composition < stop_charset
7382 ? stop_composition : stop_charset);
df7492f9
KH
7383 }
7384
7385 if (! multibytep)
4776e638 7386 {
d311d28c 7387 int bytes;
aaaf0b1e 7388
4d1e6632
KH
7389 if (coding->encoder == encode_coding_raw_text
7390 || coding->encoder == encode_coding_ccl)
ea29edf2
KH
7391 c = *src++, pos++;
7392 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7393 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7394 else
f03caae0 7395 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7396 }
df7492f9 7397 else
db274c7a 7398 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7399 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7400 c = '\n';
7401 if (! EQ (eol_type, Qunix))
aaaf0b1e 7402 {
df7492f9 7403 if (c == '\n')
aaaf0b1e 7404 {
df7492f9
KH
7405 if (EQ (eol_type, Qdos))
7406 *buf++ = '\r';
7407 else
7408 c = '\r';
aaaf0b1e
KH
7409 }
7410 }
433f7f87 7411
e6a54062 7412 trans = Qnil;
09ee6fdd 7413 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7414 if (NILP (trans))
433f7f87
KH
7415 *buf++ = c;
7416 else
7417 {
2c6a9faa 7418 ptrdiff_t from_nchars = 1, to_nchars = 1;
433f7f87
KH
7419 int *lookup_buf_end;
7420 const unsigned char *p = src;
7421 int i;
7422
7423 lookup_buf[0] = c;
7424 for (i = 1; i < max_lookup && p < src_end; i++)
7425 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7426 lookup_buf_end = lookup_buf + i;
e951386e
KH
7427 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7428 if (INTEGERP (trans))
7429 c = XINT (trans);
7430 else if (CONSP (trans))
7431 {
7432 from_nchars = ASIZE (XCAR (trans));
7433 trans = XCDR (trans);
7434 if (INTEGERP (trans))
7435 c = XINT (trans);
7436 else
7437 {
7438 to_nchars = ASIZE (trans);
2c6a9faa 7439 if (buf_end - buf < to_nchars)
e951386e
KH
7440 break;
7441 c = XINT (AREF (trans, 0));
7442 }
7443 }
7444 else
433f7f87 7445 break;
e951386e 7446 *buf++ = c;
433f7f87
KH
7447 for (i = 1; i < to_nchars; i++)
7448 *buf++ = XINT (AREF (trans, i));
7449 for (i = 1; i < from_nchars; i++, pos++)
7450 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7451 }
aaaf0b1e 7452 }
ec6d2bb8 7453
df7492f9
KH
7454 coding->consumed = src - coding->source;
7455 coding->consumed_char = pos - coding->src_pos;
7456 coding->charbuf_used = buf - coding->charbuf;
7457 coding->chars_at_source = 0;
aaaf0b1e
KH
7458}
7459
4ed46869 7460
df7492f9
KH
7461/* Encode the text at CODING->src_object into CODING->dst_object.
7462 CODING->src_object is a buffer or a string.
7463 CODING->dst_object is a buffer or nil.
7464
7465 If CODING->src_object is a buffer, it must be the current buffer.
7466 In this case, if CODING->src_pos is positive, it is a position of
7467 the source text in the buffer, otherwise. the source text is in the
7468 gap area of the buffer, and coding->src_pos specifies the offset of
7469 the text from GPT (which must be the same as PT). If this is the
7470 same buffer as CODING->dst_object, CODING->src_pos must be
7471 negative and CODING should not have `pre-write-conversion'.
7472
7473 If CODING->src_object is a string, CODING should not have
7474 `pre-write-conversion'.
7475
7476 If CODING->dst_object is a buffer, the encoded data is inserted at
7477 the current point of that buffer.
7478
7479 If CODING->dst_object is nil, the encoded data is placed at the
7480 memory area specified by CODING->destination. */
7481
7482static int
971de7fb 7483encode_coding (struct coding_system *coding)
4ed46869 7484{
df7492f9 7485 Lisp_Object attrs;
7d64c6ad 7486 Lisp_Object translation_table;
09ee6fdd 7487 int max_lookup;
fb608df3 7488 struct ccl_spec cclspec;
9861e777 7489
df7492f9 7490 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7491 if (coding->encoder == encode_coding_raw_text)
7492 translation_table = Qnil, max_lookup = 0;
7493 else
7494 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7495
df7492f9 7496 if (BUFFERP (coding->dst_object))
8844fa83 7497 {
df7492f9
KH
7498 set_buffer_internal (XBUFFER (coding->dst_object));
7499 coding->dst_multibyte
4b4deea2 7500 = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
8844fa83 7501 }
4ed46869 7502
b73bfc1c 7503 coding->consumed = coding->consumed_char = 0;
df7492f9 7504 coding->produced = coding->produced_char = 0;
065e3595 7505 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7506 coding->errors = 0;
b73bfc1c 7507
df7492f9 7508 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7509
fb608df3
KH
7510 if (coding->encoder == encode_coding_ccl)
7511 {
7512 coding->spec.ccl = &cclspec;
7513 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7514 }
df7492f9
KH
7515 do {
7516 coding_set_source (coding);
09ee6fdd 7517 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7518 coding_set_destination (coding);
7519 (*(coding->encoder)) (coding);
7520 } while (coding->consumed_char < coding->src_chars);
7521
284201e4 7522 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7523 insert_from_gap (coding->produced_char, coding->produced);
7524
7525 return (coding->result);
ec6d2bb8
KH
7526}
7527
fb88bf2d 7528
24a73b0a
KH
7529/* Name (or base name) of work buffer for code conversion. */
7530static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7531
24a73b0a
KH
7532/* A working buffer used by the top level conversion. Once it is
7533 created, it is never destroyed. It has the name
7534 Vcode_conversion_workbuf_name. The other working buffers are
7535 destroyed after the use is finished, and their names are modified
7536 versions of Vcode_conversion_workbuf_name. */
7537static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7538
24a73b0a
KH
7539/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7540static int reused_workbuf_in_use;
4ed46869 7541
24a73b0a 7542
ad1746f5 7543/* Return a working buffer of code conversion. MULTIBYTE specifies the
24a73b0a 7544 multibyteness of returning buffer. */
b73bfc1c 7545
f6cbaf43 7546static Lisp_Object
971de7fb 7547make_conversion_work_buffer (int multibyte)
df7492f9 7548{
24a73b0a
KH
7549 Lisp_Object name, workbuf;
7550 struct buffer *current;
4ed46869 7551
24a73b0a 7552 if (reused_workbuf_in_use++)
065e3595
KH
7553 {
7554 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7555 workbuf = Fget_buffer_create (name);
7556 }
df7492f9 7557 else
065e3595 7558 {
159bd5a2 7559 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7560 Vcode_conversion_reused_workbuf
7561 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7562 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7563 }
24a73b0a
KH
7564 current = current_buffer;
7565 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7566 /* We can't allow modification hooks to run in the work buffer. For
7567 instance, directory_files_internal assumes that file decoding
7568 doesn't compile new regexps. */
7569 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7570 Ferase_buffer ();
4b4deea2
TT
7571 BVAR (current_buffer, undo_list) = Qt;
7572 BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
df7492f9 7573 set_buffer_internal (current);
24a73b0a 7574 return workbuf;
df7492f9 7575}
d46c5b12 7576
24a73b0a 7577
4776e638 7578static Lisp_Object
971de7fb 7579code_conversion_restore (Lisp_Object arg)
4776e638 7580{
24a73b0a 7581 Lisp_Object current, workbuf;
948bdcf3 7582 struct gcpro gcpro1;
24a73b0a 7583
948bdcf3 7584 GCPRO1 (arg);
24a73b0a
KH
7585 current = XCAR (arg);
7586 workbuf = XCDR (arg);
7587 if (! NILP (workbuf))
7588 {
7589 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7590 reused_workbuf_in_use = 0;
d17337e5 7591 else
24a73b0a
KH
7592 Fkill_buffer (workbuf);
7593 }
7594 set_buffer_internal (XBUFFER (current));
948bdcf3 7595 UNGCPRO;
4776e638
KH
7596 return Qnil;
7597}
b73bfc1c 7598
24a73b0a 7599Lisp_Object
971de7fb 7600code_conversion_save (int with_work_buf, int multibyte)
df7492f9 7601{
24a73b0a 7602 Lisp_Object workbuf = Qnil;
b73bfc1c 7603
4776e638 7604 if (with_work_buf)
24a73b0a
KH
7605 workbuf = make_conversion_work_buffer (multibyte);
7606 record_unwind_protect (code_conversion_restore,
7607 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7608 return workbuf;
df7492f9 7609}
d46c5b12 7610
df7492f9 7611int
cf84bb53 7612decode_coding_gap (struct coding_system *coding,
d311d28c 7613 ptrdiff_t chars, ptrdiff_t bytes)
df7492f9 7614{
d311d28c 7615 ptrdiff_t count = SPECPDL_INDEX ();
5e5c78be 7616 Lisp_Object attrs;
fb88bf2d 7617
24a73b0a 7618 code_conversion_save (0, 0);
ec6d2bb8 7619
24a73b0a 7620 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7621 coding->src_chars = chars;
7622 coding->src_bytes = bytes;
7623 coding->src_pos = -chars;
7624 coding->src_pos_byte = -bytes;
7625 coding->src_multibyte = chars < bytes;
24a73b0a 7626 coding->dst_object = coding->src_object;
df7492f9
KH
7627 coding->dst_pos = PT;
7628 coding->dst_pos_byte = PT_BYTE;
4b4deea2 7629 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
4ed46869 7630
df7492f9
KH
7631 if (CODING_REQUIRE_DETECTION (coding))
7632 detect_coding (coding);
8f924df7 7633
9286b333 7634 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7635 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7636 decode_coding (coding);
287c57d7 7637 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7638
5e5c78be
KH
7639 attrs = CODING_ID_ATTRS (coding->id);
7640 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7641 {
d311d28c 7642 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
5e5c78be
KH
7643 Lisp_Object val;
7644
7645 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7646 val = call1 (CODING_ATTR_POST_READ (attrs),
7647 make_number (coding->produced_char));
5e5c78be
KH
7648 CHECK_NATNUM (val);
7649 coding->produced_char += Z - prev_Z;
7650 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7651 }
4ed46869 7652
df7492f9 7653 unbind_to (count, Qnil);
b73bfc1c
KH
7654 return coding->result;
7655}
52d41803 7656
d46c5b12 7657
df7492f9
KH
7658/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7659 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7660
df7492f9 7661 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7662
df7492f9
KH
7663 If it is a buffer, the text is at point of the buffer. FROM and TO
7664 are positions in the buffer.
b73bfc1c 7665
df7492f9
KH
7666 If it is a string, the text is at the beginning of the string.
7667 FROM and TO are indices to the string.
4ed46869 7668
df7492f9
KH
7669 If it is nil, the text is at coding->source. FROM and TO are
7670 indices to coding->source.
bb10be8b 7671
df7492f9 7672 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7673
df7492f9
KH
7674 If it is a buffer, the decoded text is inserted at point of the
7675 buffer. If the buffer is the same as SRC_OBJECT, the source text
7676 is deleted.
4ed46869 7677
df7492f9
KH
7678 If it is Qt, a string is made from the decoded text, and
7679 set in CODING->dst_object.
d46c5b12 7680
df7492f9 7681 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7682 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7683 CODING->destination by xmalloc. If the decoded text is longer than
7684 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7685 */
d46c5b12 7686
df7492f9 7687void
cf84bb53
JB
7688decode_coding_object (struct coding_system *coding,
7689 Lisp_Object src_object,
d311d28c
PE
7690 ptrdiff_t from, ptrdiff_t from_byte,
7691 ptrdiff_t to, ptrdiff_t to_byte,
cf84bb53 7692 Lisp_Object dst_object)
d46c5b12 7693{
d311d28c 7694 ptrdiff_t count = SPECPDL_INDEX ();
c4a63b12 7695 unsigned char *destination IF_LINT (= NULL);
d311d28c
PE
7696 ptrdiff_t dst_bytes IF_LINT (= 0);
7697 ptrdiff_t chars = to - from;
7698 ptrdiff_t bytes = to_byte - from_byte;
df7492f9 7699 Lisp_Object attrs;
c4a63b12 7700 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
64cedb0c 7701 int need_marker_adjustment = 0;
b3bfad50 7702 Lisp_Object old_deactivate_mark;
d46c5b12 7703
b3bfad50 7704 old_deactivate_mark = Vdeactivate_mark;
93dec019 7705
df7492f9 7706 if (NILP (dst_object))
d46c5b12 7707 {
df7492f9
KH
7708 destination = coding->destination;
7709 dst_bytes = coding->dst_bytes;
d46c5b12 7710 }
93dec019 7711
df7492f9
KH
7712 coding->src_object = src_object;
7713 coding->src_chars = chars;
7714 coding->src_bytes = bytes;
7715 coding->src_multibyte = chars < bytes;
70ad9fc4 7716
df7492f9 7717 if (STRINGP (src_object))
d46c5b12 7718 {
df7492f9
KH
7719 coding->src_pos = from;
7720 coding->src_pos_byte = from_byte;
d46c5b12 7721 }
df7492f9 7722 else if (BUFFERP (src_object))
88993dfd 7723 {
df7492f9
KH
7724 set_buffer_internal (XBUFFER (src_object));
7725 if (from != GPT)
7726 move_gap_both (from, from_byte);
7727 if (EQ (src_object, dst_object))
fb88bf2d 7728 {
64cedb0c
KH
7729 struct Lisp_Marker *tail;
7730
7731 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7732 {
7733 tail->need_adjustment
7734 = tail->charpos == (tail->insertion_type ? from : to);
7735 need_marker_adjustment |= tail->need_adjustment;
7736 }
4776e638 7737 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7738 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7739 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7740 del_range_both (from, from_byte, to, to_byte, 1);
7741 coding->src_pos = -chars;
7742 coding->src_pos_byte = -bytes;
fb88bf2d 7743 }
df7492f9 7744 else
fb88bf2d 7745 {
df7492f9
KH
7746 coding->src_pos = from;
7747 coding->src_pos_byte = from_byte;
fb88bf2d 7748 }
88993dfd
KH
7749 }
7750
df7492f9
KH
7751 if (CODING_REQUIRE_DETECTION (coding))
7752 detect_coding (coding);
7753 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7754
2cb26057
KH
7755 if (EQ (dst_object, Qt)
7756 || (! NILP (CODING_ATTR_POST_READ (attrs))
7757 && NILP (dst_object)))
b73bfc1c 7758 {
a1567c45
SM
7759 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7760 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7761 coding->dst_pos = BEG;
7762 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7763 }
df7492f9 7764 else if (BUFFERP (dst_object))
d46c5b12 7765 {
24a73b0a 7766 code_conversion_save (0, 0);
df7492f9
KH
7767 coding->dst_object = dst_object;
7768 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7769 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7770 coding->dst_multibyte
4b4deea2 7771 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
d46c5b12
KH
7772 }
7773 else
7774 {
24a73b0a 7775 code_conversion_save (0, 0);
df7492f9 7776 coding->dst_object = Qnil;
0154725e
SM
7777 /* Most callers presume this will return a multibyte result, and they
7778 won't use `binary' or `raw-text' anyway, so let's not worry about
7779 CODING_FOR_UNIBYTE. */
bb555731 7780 coding->dst_multibyte = 1;
d46c5b12
KH
7781 }
7782
df7492f9 7783 decode_coding (coding);
fa46990e 7784
df7492f9
KH
7785 if (BUFFERP (coding->dst_object))
7786 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7787
df7492f9 7788 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7789 {
b3bfad50 7790 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d311d28c 7791 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7792 Lisp_Object val;
d46c5b12 7793
c0cc7f7f 7794 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7795 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7796 old_deactivate_mark);
d4850d67
KH
7797 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7798 make_number (coding->produced_char));
df7492f9
KH
7799 UNGCPRO;
7800 CHECK_NATNUM (val);
7801 coding->produced_char += Z - prev_Z;
7802 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7803 }
de79a6a5 7804
df7492f9 7805 if (EQ (dst_object, Qt))
ec6d2bb8 7806 {
df7492f9
KH
7807 coding->dst_object = Fbuffer_string ();
7808 }
7809 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7810 {
7811 set_buffer_internal (XBUFFER (coding->dst_object));
7812 if (dst_bytes < coding->produced)
7813 {
b3bfad50 7814 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7815 if (! destination)
7816 {
065e3595 7817 record_conversion_result (coding,
ebaf11b6 7818 CODING_RESULT_INSUFFICIENT_MEM);
df7492f9
KH
7819 unbind_to (count, Qnil);
7820 return;
7821 }
7822 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7823 move_gap_both (BEGV, BEGV_BYTE);
72af86bd 7824 memcpy (destination, BEGV_ADDR, coding->produced);
df7492f9 7825 coding->destination = destination;
d46c5b12 7826 }
ec6d2bb8 7827 }
b73bfc1c 7828
4776e638
KH
7829 if (saved_pt >= 0)
7830 {
7831 /* This is the case of:
7832 (BUFFERP (src_object) && EQ (src_object, dst_object))
7833 As we have moved PT while replacing the original buffer
7834 contents, we must recover it now. */
7835 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7836 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7837 if (saved_pt < from)
7838 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7839 else if (saved_pt < from + chars)
7840 TEMP_SET_PT_BOTH (from, from_byte);
4b4deea2 7841 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
4776e638
KH
7842 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7843 saved_pt_byte + (coding->produced - bytes));
7844 else
7845 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7846 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7847
7848 if (need_marker_adjustment)
7849 {
7850 struct Lisp_Marker *tail;
7851
7852 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7853 if (tail->need_adjustment)
7854 {
7855 tail->need_adjustment = 0;
7856 if (tail->insertion_type)
7857 {
7858 tail->bytepos = from_byte;
7859 tail->charpos = from;
7860 }
7861 else
7862 {
7863 tail->bytepos = from_byte + coding->produced;
7864 tail->charpos
4b4deea2 7865 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
64cedb0c
KH
7866 ? tail->bytepos : from + coding->produced_char);
7867 }
7868 }
7869 }
d46c5b12 7870 }
4776e638 7871
b3bfad50 7872 Vdeactivate_mark = old_deactivate_mark;
065e3595 7873 unbind_to (count, coding->dst_object);
d46c5b12
KH
7874}
7875
d46c5b12 7876
df7492f9 7877void
cf84bb53
JB
7878encode_coding_object (struct coding_system *coding,
7879 Lisp_Object src_object,
d311d28c
PE
7880 ptrdiff_t from, ptrdiff_t from_byte,
7881 ptrdiff_t to, ptrdiff_t to_byte,
cf84bb53 7882 Lisp_Object dst_object)
d46c5b12 7883{
d311d28c
PE
7884 ptrdiff_t count = SPECPDL_INDEX ();
7885 ptrdiff_t chars = to - from;
7886 ptrdiff_t bytes = to_byte - from_byte;
df7492f9 7887 Lisp_Object attrs;
c4a63b12 7888 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
64cedb0c 7889 int need_marker_adjustment = 0;
c02d943b 7890 int kill_src_buffer = 0;
b3bfad50 7891 Lisp_Object old_deactivate_mark;
df7492f9 7892
b3bfad50 7893 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7894
7895 coding->src_object = src_object;
7896 coding->src_chars = chars;
7897 coding->src_bytes = bytes;
7898 coding->src_multibyte = chars < bytes;
7899
7900 attrs = CODING_ID_ATTRS (coding->id);
7901
64cedb0c
KH
7902 if (EQ (src_object, dst_object))
7903 {
7904 struct Lisp_Marker *tail;
7905
7906 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7907 {
7908 tail->need_adjustment
7909 = tail->charpos == (tail->insertion_type ? from : to);
7910 need_marker_adjustment |= tail->need_adjustment;
7911 }
7912 }
7913
df7492f9 7914 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7915 {
24a73b0a 7916 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7917 set_buffer_internal (XBUFFER (coding->src_object));
7918 if (STRINGP (src_object))
7919 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7920 else if (BUFFERP (src_object))
7921 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7922 else
b68864e5 7923 insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7924
df7492f9
KH
7925 if (EQ (src_object, dst_object))
7926 {
7927 set_buffer_internal (XBUFFER (src_object));
4776e638 7928 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7929 del_range_both (from, from_byte, to, to_byte, 1);
7930 set_buffer_internal (XBUFFER (coding->src_object));
7931 }
7932
d4850d67 7933 {
b3bfad50 7934 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7935
b3bfad50
KH
7936 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7937 old_deactivate_mark);
6cd7a139
DA
7938 safe_call2 (CODING_ATTR_PRE_WRITE (attrs),
7939 make_number (BEG), make_number (Z));
b3bfad50 7940 UNGCPRO;
d4850d67 7941 }
c02d943b
KH
7942 if (XBUFFER (coding->src_object) != current_buffer)
7943 kill_src_buffer = 1;
ac87bbef 7944 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7945 if (BEG != GPT)
7946 move_gap_both (BEG, BEG_BYTE);
7947 coding->src_chars = Z - BEG;
7948 coding->src_bytes = Z_BYTE - BEG_BYTE;
7949 coding->src_pos = BEG;
7950 coding->src_pos_byte = BEG_BYTE;
7951 coding->src_multibyte = Z < Z_BYTE;
7952 }
7953 else if (STRINGP (src_object))
d46c5b12 7954 {
24a73b0a 7955 code_conversion_save (0, 0);
df7492f9
KH
7956 coding->src_pos = from;
7957 coding->src_pos_byte = from_byte;
b73bfc1c 7958 }
df7492f9 7959 else if (BUFFERP (src_object))
b73bfc1c 7960 {
24a73b0a 7961 code_conversion_save (0, 0);
df7492f9 7962 set_buffer_internal (XBUFFER (src_object));
df7492f9 7963 if (EQ (src_object, dst_object))
d46c5b12 7964 {
4776e638 7965 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7966 coding->src_object = del_range_1 (from, to, 1, 1);
7967 coding->src_pos = 0;
7968 coding->src_pos_byte = 0;
d46c5b12 7969 }
df7492f9 7970 else
d46c5b12 7971 {
ff0dacd7
KH
7972 if (from < GPT && to >= GPT)
7973 move_gap_both (from, from_byte);
df7492f9
KH
7974 coding->src_pos = from;
7975 coding->src_pos_byte = from_byte;
d46c5b12 7976 }
d46c5b12 7977 }
4776e638 7978 else
24a73b0a 7979 code_conversion_save (0, 0);
d46c5b12 7980
df7492f9 7981 if (BUFFERP (dst_object))
88993dfd 7982 {
df7492f9 7983 coding->dst_object = dst_object;
28f67a95
KH
7984 if (EQ (src_object, dst_object))
7985 {
7986 coding->dst_pos = from;
7987 coding->dst_pos_byte = from_byte;
7988 }
7989 else
7990 {
319a3947
KH
7991 struct buffer *current = current_buffer;
7992
7993 set_buffer_temp (XBUFFER (dst_object));
7994 coding->dst_pos = PT;
7995 coding->dst_pos_byte = PT_BYTE;
7996 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7997 set_buffer_temp (current);
28f67a95 7998 }
df7492f9 7999 coding->dst_multibyte
4b4deea2 8000 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
88993dfd 8001 }
df7492f9 8002 else if (EQ (dst_object, Qt))
d46c5b12 8003 {
5d009b3a 8004 ptrdiff_t dst_bytes = max (1, coding->src_chars);
df7492f9 8005 coding->dst_object = Qnil;
23f86fce 8006 coding->destination = xmalloc (dst_bytes);
5d009b3a 8007 coding->dst_bytes = dst_bytes;
df7492f9 8008 coding->dst_multibyte = 0;
d46c5b12
KH
8009 }
8010 else
8011 {
df7492f9
KH
8012 coding->dst_object = Qnil;
8013 coding->dst_multibyte = 0;
d46c5b12
KH
8014 }
8015
df7492f9 8016 encode_coding (coding);
d46c5b12 8017
df7492f9 8018 if (EQ (dst_object, Qt))
d46c5b12 8019 {
df7492f9
KH
8020 if (BUFFERP (coding->dst_object))
8021 coding->dst_object = Fbuffer_string ();
8022 else
d46c5b12 8023 {
df7492f9
KH
8024 coding->dst_object
8025 = make_unibyte_string ((char *) coding->destination,
8026 coding->produced);
8027 xfree (coding->destination);
d46c5b12 8028 }
4ed46869 8029 }
d46c5b12 8030
4776e638
KH
8031 if (saved_pt >= 0)
8032 {
8033 /* This is the case of:
8034 (BUFFERP (src_object) && EQ (src_object, dst_object))
8035 As we have moved PT while replacing the original buffer
8036 contents, we must recover it now. */
8037 set_buffer_internal (XBUFFER (src_object));
8038 if (saved_pt < from)
8039 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8040 else if (saved_pt < from + chars)
8041 TEMP_SET_PT_BOTH (from, from_byte);
4b4deea2 8042 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
4776e638
KH
8043 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8044 saved_pt_byte + (coding->produced - bytes));
d46c5b12 8045 else
4776e638
KH
8046 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8047 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
8048
8049 if (need_marker_adjustment)
8050 {
8051 struct Lisp_Marker *tail;
8052
8053 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8054 if (tail->need_adjustment)
8055 {
8056 tail->need_adjustment = 0;
8057 if (tail->insertion_type)
8058 {
8059 tail->bytepos = from_byte;
8060 tail->charpos = from;
8061 }
8062 else
8063 {
8064 tail->bytepos = from_byte + coding->produced;
8065 tail->charpos
4b4deea2 8066 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
64cedb0c
KH
8067 ? tail->bytepos : from + coding->produced_char);
8068 }
8069 }
8070 }
4776e638
KH
8071 }
8072
c02d943b
KH
8073 if (kill_src_buffer)
8074 Fkill_buffer (coding->src_object);
b3bfad50
KH
8075
8076 Vdeactivate_mark = old_deactivate_mark;
df7492f9 8077 unbind_to (count, Qnil);
b73bfc1c
KH
8078}
8079
df7492f9 8080
b73bfc1c 8081Lisp_Object
971de7fb 8082preferred_coding_system (void)
b73bfc1c 8083{
df7492f9 8084 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 8085
df7492f9 8086 return CODING_ID_NAME (id);
4ed46869
KH
8087}
8088
8089\f
8090#ifdef emacs
1397dc18 8091/*** 8. Emacs Lisp library functions ***/
4ed46869 8092
a7ca3326 8093DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 8094 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 8095See the documentation of `define-coding-system' for information
48b0f3ae 8096about coding-system objects. */)
5842a27b 8097 (Lisp_Object object)
4ed46869 8098{
d4a1d553
JB
8099 if (NILP (object)
8100 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 8101 return Qt;
d4a1d553
JB
8102 if (! SYMBOLP (object)
8103 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
8104 return Qnil;
8105 return Qt;
4ed46869
KH
8106}
8107
a7ca3326 8108DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
9d991de8 8109 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae 8110 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
5842a27b 8111 (Lisp_Object prompt)
4ed46869 8112{
e0e989f6 8113 Lisp_Object val;
9d991de8
RS
8114 do
8115 {
4608c386
KH
8116 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8117 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 8118 }
8f924df7 8119 while (SCHARS (val) == 0);
e0e989f6 8120 return (Fintern (val, Qnil));
4ed46869
KH
8121}
8122
a7ca3326 8123DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 8124 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
8125If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8126Ignores case when completing coding systems (all Emacs coding systems
8127are lower-case). */)
5842a27b 8128 (Lisp_Object prompt, Lisp_Object default_coding_system)
4ed46869 8129{
f44d27ce 8130 Lisp_Object val;
d311d28c 8131 ptrdiff_t count = SPECPDL_INDEX ();
c7183fb8 8132
9b787f3e 8133 if (SYMBOLP (default_coding_system))
57d25e6f 8134 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 8135 specbind (Qcompletion_ignore_case, Qt);
4608c386 8136 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
8137 Qt, Qnil, Qcoding_system_history,
8138 default_coding_system, Qnil);
c7183fb8 8139 unbind_to (count, Qnil);
8f924df7 8140 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
8141}
8142
a7ca3326 8143DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4ed46869 8144 1, 1, 0,
48b0f3ae 8145 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
8146If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8147It is valid if it is nil or a symbol defined as a coding system by the
8148function `define-coding-system'. */)
5842a27b 8149 (Lisp_Object coding_system)
4ed46869 8150{
44e8490d
KH
8151 Lisp_Object define_form;
8152
8153 define_form = Fget (coding_system, Qcoding_system_define_form);
8154 if (! NILP (define_form))
8155 {
8156 Fput (coding_system, Qcoding_system_define_form, Qnil);
8157 safe_eval (define_form);
8158 }
4ed46869
KH
8159 if (!NILP (Fcoding_system_p (coding_system)))
8160 return coding_system;
fcad4ec4 8161 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 8162}
df7492f9 8163
3a73fa5d 8164\f
89528eb3
KH
8165/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8166 HIGHEST is nonzero, return the coding system of the highest
ad1746f5 8167 priority among the detected coding systems. Otherwise return a
89528eb3
KH
8168 list of detected coding systems sorted by their priorities. If
8169 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8170 multibyte form but contains only ASCII and eight-bit chars.
8171 Otherwise, the bytes are raw bytes.
8172
8173 CODING-SYSTEM controls the detection as below:
8174
8175 If it is nil, detect both text-format and eol-format. If the
8176 text-format part of CODING-SYSTEM is already specified
8177 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
8178 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8179 detect only text-format. */
8180
d46c5b12 8181Lisp_Object
cf84bb53 8182detect_coding_system (const unsigned char *src,
d311d28c 8183 ptrdiff_t src_chars, ptrdiff_t src_bytes,
cf84bb53
JB
8184 int highest, int multibytep,
8185 Lisp_Object coding_system)
4ed46869 8186{
8f924df7 8187 const unsigned char *src_end = src + src_bytes;
df7492f9 8188 Lisp_Object attrs, eol_type;
4533845d 8189 Lisp_Object val = Qnil;
df7492f9 8190 struct coding_system coding;
d3411f89 8191 ptrdiff_t id;
ff0dacd7 8192 struct coding_detection_info detect_info;
24a73b0a 8193 enum coding_category base_category;
2f3cbb32 8194 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 8195
df7492f9
KH
8196 if (NILP (coding_system))
8197 coding_system = Qundecided;
8198 setup_coding_system (coding_system, &coding);
8199 attrs = CODING_ID_ATTRS (coding.id);
8200 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 8201 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 8202
df7492f9 8203 coding.source = src;
24a73b0a 8204 coding.src_chars = src_chars;
df7492f9
KH
8205 coding.src_bytes = src_bytes;
8206 coding.src_multibyte = multibytep;
8207 coding.consumed = 0;
89528eb3 8208 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 8209 coding.head_ascii = 0;
d46c5b12 8210
ff0dacd7 8211 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 8212
89528eb3 8213 /* At first, detect text-format if necessary. */
24a73b0a
KH
8214 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8215 if (base_category == coding_category_undecided)
4ed46869 8216 {
c4a63b12
PE
8217 enum coding_category category IF_LINT (= 0);
8218 struct coding_system *this IF_LINT (= NULL);
ff0dacd7 8219 int c, i;
88993dfd 8220
24a73b0a 8221 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 8222 for (; src < src_end; src++)
4ed46869 8223 {
df7492f9 8224 c = *src;
6cb21a4f 8225 if (c & 0x80)
6cb21a4f 8226 {
2f3cbb32 8227 eight_bit_found = 1;
2f3cbb32
KH
8228 if (null_byte_found)
8229 break;
8230 }
c0e16b14 8231 else if (c < 0x20)
2f3cbb32
KH
8232 {
8233 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8234 && ! inhibit_iso_escape_detection
8235 && ! detect_info.checked)
6cb21a4f 8236 {
2f3cbb32
KH
8237 if (detect_coding_iso_2022 (&coding, &detect_info))
8238 {
8239 /* We have scanned the whole data. */
8240 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
8241 {
8242 /* We didn't find an 8-bit code. We may
8243 have found a null-byte, but it's very
8244 rare that a binary file confirm to
8245 ISO-2022. */
8246 src = src_end;
8247 coding.head_ascii = src - coding.source;
8248 }
8249 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
8250 break;
8251 }
8252 }
97b1b294 8253 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
8254 {
8255 null_byte_found = 1;
8256 if (eight_bit_found)
8257 break;
6cb21a4f 8258 }
c006c0c8
KH
8259 if (! eight_bit_found)
8260 coding.head_ascii++;
6cb21a4f 8261 }
c006c0c8 8262 else if (! eight_bit_found)
c0e16b14 8263 coding.head_ascii++;
4ed46869 8264 }
88993dfd 8265
2f3cbb32
KH
8266 if (null_byte_found || eight_bit_found
8267 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
8268 || detect_info.found)
8269 {
2f3cbb32 8270 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
8271 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
8272 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 8273 {
6cb21a4f 8274 category = coding_priorities[i];
c7266f4a 8275 this = coding_categories + category;
6cb21a4f 8276 if (detect_info.found & (1 << category))
ff0dacd7
KH
8277 break;
8278 }
6cb21a4f 8279 else
2f3cbb32
KH
8280 {
8281 if (null_byte_found)
8282 {
8283 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8284 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8285 }
8286 for (i = 0; i < coding_category_raw_text; i++)
8287 {
8288 category = coding_priorities[i];
8289 this = coding_categories + category;
6cb21a4f 8290
2f3cbb32
KH
8291 if (this->id < 0)
8292 {
8293 /* No coding system of this category is defined. */
8294 detect_info.rejected |= (1 << category);
8295 }
8296 else if (category >= coding_category_raw_text)
8297 continue;
8298 else if (detect_info.checked & (1 << category))
8299 {
8300 if (highest
8301 && (detect_info.found & (1 << category)))
6cb21a4f 8302 break;
2f3cbb32
KH
8303 }
8304 else if ((*(this->detector)) (&coding, &detect_info)
8305 && highest
8306 && (detect_info.found & (1 << category)))
8307 {
8308 if (category == coding_category_utf_16_auto)
8309 {
8310 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8311 category = coding_category_utf_16_le;
8312 else
8313 category = coding_category_utf_16_be;
8314 }
8315 break;
8316 }
8317 }
8318 }
6cb21a4f 8319 }
ec6d2bb8 8320
4cddb209
KH
8321 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8322 || null_byte_found)
ec6d2bb8 8323 {
ff0dacd7 8324 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 8325 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
8326 val = Fcons (make_number (id), Qnil);
8327 }
ff0dacd7 8328 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 8329 {
ff0dacd7 8330 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
8331 id = coding_categories[coding_category_undecided].id;
8332 val = Fcons (make_number (id), Qnil);
8333 }
8334 else if (highest)
8335 {
ff0dacd7 8336 if (detect_info.found)
ec6d2bb8 8337 {
ff0dacd7
KH
8338 detect_info.found = 1 << category;
8339 val = Fcons (make_number (this->id), Qnil);
8340 }
8341 else
8342 for (i = 0; i < coding_category_raw_text; i++)
8343 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8344 {
8345 detect_info.found = 1 << coding_priorities[i];
8346 id = coding_categories[coding_priorities[i]].id;
8347 val = Fcons (make_number (id), Qnil);
8348 break;
8349 }
8350 }
89528eb3
KH
8351 else
8352 {
ff0dacd7
KH
8353 int mask = detect_info.rejected | detect_info.found;
8354 int found = 0;
ec6d2bb8 8355
89528eb3 8356 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8357 {
8358 category = coding_priorities[i];
8359 if (! (mask & (1 << category)))
ec6d2bb8 8360 {
ff0dacd7
KH
8361 found |= 1 << category;
8362 id = coding_categories[category].id;
c7266f4a
KH
8363 if (id >= 0)
8364 val = Fcons (make_number (id), val);
ff0dacd7
KH
8365 }
8366 }
8367 for (i = coding_category_raw_text - 1; i >= 0; i--)
8368 {
8369 category = coding_priorities[i];
8370 if (detect_info.found & (1 << category))
8371 {
8372 id = coding_categories[category].id;
8373 val = Fcons (make_number (id), val);
ec6d2bb8 8374 }
ec6d2bb8 8375 }
ff0dacd7 8376 detect_info.found |= found;
ec6d2bb8 8377 }
ec6d2bb8 8378 }
a470d443
KH
8379 else if (base_category == coding_category_utf_8_auto)
8380 {
8381 if (detect_coding_utf_8 (&coding, &detect_info))
8382 {
8383 struct coding_system *this;
8384
8385 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8386 this = coding_categories + coding_category_utf_8_sig;
8387 else
8388 this = coding_categories + coding_category_utf_8_nosig;
8389 val = Fcons (make_number (this->id), Qnil);
8390 }
8391 }
24a73b0a
KH
8392 else if (base_category == coding_category_utf_16_auto)
8393 {
8394 if (detect_coding_utf_16 (&coding, &detect_info))
8395 {
24a73b0a
KH
8396 struct coding_system *this;
8397
8398 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8399 this = coding_categories + coding_category_utf_16_le;
8400 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8401 this = coding_categories + coding_category_utf_16_be;
8402 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8403 this = coding_categories + coding_category_utf_16_be_nosig;
8404 else
8405 this = coding_categories + coding_category_utf_16_le_nosig;
8406 val = Fcons (make_number (this->id), Qnil);
8407 }
8408 }
df7492f9
KH
8409 else
8410 {
ff0dacd7 8411 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8412 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8413 }
df7492f9 8414
89528eb3 8415 /* Then, detect eol-format if necessary. */
df7492f9 8416 {
4533845d 8417 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8418 Lisp_Object tail;
8419
89528eb3
KH
8420 if (VECTORP (eol_type))
8421 {
ff0dacd7 8422 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8423 {
8424 if (null_byte_found)
8425 normal_eol = EOL_SEEN_LF;
8426 else
8427 normal_eol = detect_eol (coding.source, src_bytes,
8428 coding_category_raw_text);
8429 }
ff0dacd7
KH
8430 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8431 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8432 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8433 coding_category_utf_16_be);
ff0dacd7
KH
8434 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8435 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8436 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8437 coding_category_utf_16_le);
8438 }
8439 else
8440 {
8441 if (EQ (eol_type, Qunix))
8442 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8443 else if (EQ (eol_type, Qdos))
8444 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8445 else
8446 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8447 }
8448
df7492f9
KH
8449 for (tail = val; CONSP (tail); tail = XCDR (tail))
8450 {
89528eb3 8451 enum coding_category category;
df7492f9 8452 int this_eol;
89528eb3
KH
8453
8454 id = XINT (XCAR (tail));
8455 attrs = CODING_ID_ATTRS (id);
8456 category = XINT (CODING_ATTR_CATEGORY (attrs));
8457 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8458 if (VECTORP (eol_type))
8459 {
89528eb3
KH
8460 if (category == coding_category_utf_16_be
8461 || category == coding_category_utf_16_be_nosig)
8462 this_eol = utf_16_be_eol;
8463 else if (category == coding_category_utf_16_le
8464 || category == coding_category_utf_16_le_nosig)
8465 this_eol = utf_16_le_eol;
df7492f9 8466 else
89528eb3
KH
8467 this_eol = normal_eol;
8468
df7492f9
KH
8469 if (this_eol == EOL_SEEN_LF)
8470 XSETCAR (tail, AREF (eol_type, 0));
8471 else if (this_eol == EOL_SEEN_CRLF)
8472 XSETCAR (tail, AREF (eol_type, 1));
8473 else if (this_eol == EOL_SEEN_CR)
8474 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8475 else
8476 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8477 }
89528eb3
KH
8478 else
8479 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8480 }
8481 }
ec6d2bb8 8482
4533845d 8483 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8484}
8485
ec6d2bb8 8486
d46c5b12
KH
8487DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8488 2, 3, 0,
48b0f3ae
PJ
8489 doc: /* Detect coding system of the text in the region between START and END.
8490Return a list of possible coding systems ordered by priority.
b811c52b
KH
8491The coding systems to try and their priorities follows what
8492the function `coding-system-priority-list' (which see) returns.
ec6d2bb8 8493
12e0131a 8494If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8495characters as ESC), it returns a list of single element `undecided'
8496or its subsidiary coding system according to a detected end-of-line
8497format.
ec6d2bb8 8498
48b0f3ae
PJ
8499If optional argument HIGHEST is non-nil, return the coding system of
8500highest priority. */)
5842a27b 8501 (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
d46c5b12 8502{
d311d28c
PE
8503 ptrdiff_t from, to;
8504 ptrdiff_t from_byte, to_byte;
ec6d2bb8 8505
b7826503
PJ
8506 CHECK_NUMBER_COERCE_MARKER (start);
8507 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8508
d46c5b12
KH
8509 validate_region (&start, &end);
8510 from = XINT (start), to = XINT (end);
8511 from_byte = CHAR_TO_BYTE (from);
8512 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8513
d46c5b12
KH
8514 if (from < GPT && to >= GPT)
8515 move_gap_both (to, to_byte);
c210f766 8516
d46c5b12 8517 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8518 to - from, to_byte - from_byte,
0a28aafb 8519 !NILP (highest),
4b4deea2 8520 !NILP (BVAR (current_buffer
5d8ea120 8521 , enable_multibyte_characters)),
df7492f9 8522 Qnil);
ec6d2bb8
KH
8523}
8524
d46c5b12
KH
8525DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8526 1, 2, 0,
48b0f3ae
PJ
8527 doc: /* Detect coding system of the text in STRING.
8528Return a list of possible coding systems ordered by priority.
67ceab9d
KH
8529The coding systems to try and their priorities follows what
8530the function `coding-system-priority-list' (which see) returns.
fb88bf2d 8531
12e0131a 8532If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8533characters as ESC), it returns a list of single element `undecided'
8534or its subsidiary coding system according to a detected end-of-line
8535format.
d46c5b12 8536
48b0f3ae
PJ
8537If optional argument HIGHEST is non-nil, return the coding system of
8538highest priority. */)
5842a27b 8539 (Lisp_Object string, Lisp_Object highest)
d46c5b12 8540{
b7826503 8541 CHECK_STRING (string);
b73bfc1c 8542
24a73b0a
KH
8543 return detect_coding_system (SDATA (string),
8544 SCHARS (string), SBYTES (string),
8f924df7 8545 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8546 Qnil);
4ed46869 8547}
4ed46869 8548
b73bfc1c 8549
55d4c1b2 8550static inline int
971de7fb 8551char_encodable_p (int c, Lisp_Object attrs)
05e6f5dc 8552{
df7492f9 8553 Lisp_Object tail;
df7492f9 8554 struct charset *charset;
7d64c6ad 8555 Lisp_Object translation_table;
d46c5b12 8556
7d64c6ad 8557 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8558 if (! NILP (translation_table))
7d64c6ad 8559 c = translate_char (translation_table, c);
df7492f9
KH
8560 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8561 CONSP (tail); tail = XCDR (tail))
e133c8fa 8562 {
df7492f9
KH
8563 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8564 if (CHAR_CHARSET_P (c, charset))
8565 break;
e133c8fa 8566 }
df7492f9 8567 return (! NILP (tail));
05e6f5dc 8568}
83fa074f 8569
fb88bf2d 8570
df7492f9
KH
8571/* Return a list of coding systems that safely encode the text between
8572 START and END. If EXCLUDE is non-nil, it is a list of coding
8573 systems not to check. The returned list doesn't contain any such
48468dac 8574 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8575 unibyte, return t. */
e077cc80 8576
df7492f9
KH
8577DEFUN ("find-coding-systems-region-internal",
8578 Ffind_coding_systems_region_internal,
8579 Sfind_coding_systems_region_internal, 2, 3, 0,
8580 doc: /* Internal use only. */)
5842a27b 8581 (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
df7492f9
KH
8582{
8583 Lisp_Object coding_attrs_list, safe_codings;
d311d28c 8584 ptrdiff_t start_byte, end_byte;
7c78e542 8585 const unsigned char *p, *pbeg, *pend;
df7492f9 8586 int c;
0e727afa 8587 Lisp_Object tail, elt, work_table;
d46c5b12 8588
df7492f9
KH
8589 if (STRINGP (start))
8590 {
8591 if (!STRING_MULTIBYTE (start)
8f924df7 8592 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8593 return Qt;
8594 start_byte = 0;
8f924df7 8595 end_byte = SBYTES (start);
df7492f9
KH
8596 }
8597 else
d46c5b12 8598 {
df7492f9
KH
8599 CHECK_NUMBER_COERCE_MARKER (start);
8600 CHECK_NUMBER_COERCE_MARKER (end);
8601 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8602 args_out_of_range (start, end);
4b4deea2 8603 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
df7492f9
KH
8604 return Qt;
8605 start_byte = CHAR_TO_BYTE (XINT (start));
8606 end_byte = CHAR_TO_BYTE (XINT (end));
8607 if (XINT (end) - XINT (start) == end_byte - start_byte)
8608 return Qt;
d46c5b12 8609
e1c23804 8610 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8611 {
e1c23804
DL
8612 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8613 move_gap_both (XINT (start), start_byte);
df7492f9 8614 else
e1c23804 8615 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8616 }
8617 }
8618
df7492f9
KH
8619 coding_attrs_list = Qnil;
8620 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8621 if (NILP (exclude)
8622 || NILP (Fmemq (XCAR (tail), exclude)))
8623 {
8624 Lisp_Object attrs;
d46c5b12 8625
df7492f9
KH
8626 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8627 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8628 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8629 {
8630 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8631 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8632 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8633 }
df7492f9 8634 }
d46c5b12 8635
df7492f9 8636 if (STRINGP (start))
8f924df7 8637 p = pbeg = SDATA (start);
df7492f9
KH
8638 else
8639 p = pbeg = BYTE_POS_ADDR (start_byte);
8640 pend = p + (end_byte - start_byte);
b843d1ae 8641
df7492f9
KH
8642 while (p < pend && ASCII_BYTE_P (*p)) p++;
8643 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8644
0e727afa 8645 work_table = Fmake_char_table (Qnil, Qnil);
05e6f5dc 8646 while (p < pend)
72d1a715 8647 {
df7492f9
KH
8648 if (ASCII_BYTE_P (*p))
8649 p++;
72d1a715
RS
8650 else
8651 {
df7492f9 8652 c = STRING_CHAR_ADVANCE (p);
0e727afa
YM
8653 if (!NILP (char_table_ref (work_table, c)))
8654 /* This character was already checked. Ignore it. */
8655 continue;
12410ef1 8656
df7492f9
KH
8657 charset_map_loaded = 0;
8658 for (tail = coding_attrs_list; CONSP (tail);)
8659 {
8660 elt = XCAR (tail);
8661 if (NILP (elt))
8662 tail = XCDR (tail);
8663 else if (char_encodable_p (c, elt))
8664 tail = XCDR (tail);
8665 else if (CONSP (XCDR (tail)))
8666 {
8667 XSETCAR (tail, XCAR (XCDR (tail)));
8668 XSETCDR (tail, XCDR (XCDR (tail)));
8669 }
8670 else
8671 {
8672 XSETCAR (tail, Qnil);
8673 tail = XCDR (tail);
8674 }
8675 }
8676 if (charset_map_loaded)
8677 {
d311d28c 8678 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8679
df7492f9 8680 if (STRINGP (start))
8f924df7 8681 pbeg = SDATA (start);
df7492f9
KH
8682 else
8683 pbeg = BYTE_POS_ADDR (start_byte);
8684 p = pbeg + p_offset;
8685 pend = pbeg + pend_offset;
8686 }
0e727afa 8687 char_table_set (work_table, c, Qt);
df7492f9 8688 }
ec6d2bb8 8689 }
fb88bf2d 8690
988b3759 8691 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8692 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8693 if (! NILP (XCAR (tail)))
8694 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8695
05e6f5dc
KH
8696 return safe_codings;
8697}
4956c225 8698
d46c5b12 8699
8f924df7
KH
8700DEFUN ("unencodable-char-position", Funencodable_char_position,
8701 Sunencodable_char_position, 3, 5, 0,
8702 doc: /*
8703Return position of first un-encodable character in a region.
d4a1d553 8704START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8705encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8706
8f924df7
KH
8707If optional 4th argument COUNT is non-nil, it specifies at most how
8708many un-encodable characters to search. In this case, the value is a
8709list of positions.
d46c5b12 8710
8f924df7
KH
8711If optional 5th argument STRING is non-nil, it is a string to search
8712for un-encodable characters. In that case, START and END are indexes
8713to the string. */)
5842a27b 8714 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8f924df7 8715{
d311d28c 8716 EMACS_INT n;
8f924df7 8717 struct coding_system coding;
7d64c6ad 8718 Lisp_Object attrs, charset_list, translation_table;
8f924df7 8719 Lisp_Object positions;
d311d28c 8720 ptrdiff_t from, to;
8f924df7
KH
8721 const unsigned char *p, *stop, *pend;
8722 int ascii_compatible;
fb88bf2d 8723
8f924df7
KH
8724 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8725 attrs = CODING_ID_ATTRS (coding.id);
8726 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8727 return Qnil;
8728 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8729 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8730 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8731
8f924df7
KH
8732 if (NILP (string))
8733 {
8734 validate_region (&start, &end);
8735 from = XINT (start);
8736 to = XINT (end);
4b4deea2 8737 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8f924df7
KH
8738 || (ascii_compatible
8739 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8740 return Qnil;
8741 p = CHAR_POS_ADDR (from);
8742 pend = CHAR_POS_ADDR (to);
8743 if (from < GPT && to >= GPT)
8744 stop = GPT_ADDR;
8745 else
8746 stop = pend;
8747 }
8748 else
8749 {
8750 CHECK_STRING (string);
8751 CHECK_NATNUM (start);
8752 CHECK_NATNUM (end);
d311d28c
PE
8753 if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
8754 args_out_of_range_3 (string, start, end);
8f924df7
KH
8755 from = XINT (start);
8756 to = XINT (end);
8f924df7
KH
8757 if (! STRING_MULTIBYTE (string))
8758 return Qnil;
8759 p = SDATA (string) + string_char_to_byte (string, from);
8760 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8761 if (ascii_compatible && (to - from) == (pend - p))
8762 return Qnil;
8763 }
f2558efd 8764
8f924df7
KH
8765 if (NILP (count))
8766 n = 1;
8767 else
b73bfc1c 8768 {
8f924df7
KH
8769 CHECK_NATNUM (count);
8770 n = XINT (count);
b73bfc1c
KH
8771 }
8772
8f924df7 8773 positions = Qnil;
3633e3aa 8774 charset_map_loaded = 0;
8f924df7 8775 while (1)
d46c5b12 8776 {
8f924df7 8777 int c;
ec6d2bb8 8778
8f924df7
KH
8779 if (ascii_compatible)
8780 while (p < stop && ASCII_BYTE_P (*p))
8781 p++, from++;
8782 if (p >= stop)
0e79d667 8783 {
8f924df7
KH
8784 if (p >= pend)
8785 break;
8786 stop = pend;
8787 p = GAP_END_ADDR;
0e79d667 8788 }
ec6d2bb8 8789
8f924df7
KH
8790 c = STRING_CHAR_ADVANCE (p);
8791 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8792 && ! char_charset (translate_char (translation_table, c),
8793 charset_list, NULL))
ec6d2bb8 8794 {
8f924df7
KH
8795 positions = Fcons (make_number (from), positions);
8796 n--;
8797 if (n == 0)
8798 break;
ec6d2bb8
KH
8799 }
8800
8f924df7 8801 from++;
3633e3aa
KH
8802 if (charset_map_loaded && NILP (string))
8803 {
8804 p = CHAR_POS_ADDR (from);
8805 pend = CHAR_POS_ADDR (to);
8806 if (from < GPT && to >= GPT)
8807 stop = GPT_ADDR;
8808 else
8809 stop = pend;
8810 charset_map_loaded = 0;
8811 }
8f924df7 8812 }
d46c5b12 8813
8f924df7
KH
8814 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8815}
d46c5b12 8816
d46c5b12 8817
df7492f9
KH
8818DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8819 Scheck_coding_systems_region, 3, 3, 0,
8820 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8821
df7492f9
KH
8822START and END are buffer positions specifying the region.
8823CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8824
df7492f9 8825The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8826CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8827whole region, POS0, POS1, ... are buffer positions where non-encodable
8828characters are found.
93dec019 8829
df7492f9
KH
8830If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8831value is nil.
93dec019 8832
df7492f9
KH
8833START may be a string. In that case, check if the string is
8834encodable, and the value contains indices to the string instead of
5704f39a
KH
8835buffer positions. END is ignored.
8836
4c1958f4 8837If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8838is nil. */)
5842a27b 8839 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
05e6f5dc 8840{
df7492f9 8841 Lisp_Object list;
d311d28c
PE
8842 ptrdiff_t start_byte, end_byte;
8843 ptrdiff_t pos;
7c78e542 8844 const unsigned char *p, *pbeg, *pend;
df7492f9 8845 int c;
7d64c6ad 8846 Lisp_Object tail, elt, attrs;
70ad9fc4 8847
05e6f5dc
KH
8848 if (STRINGP (start))
8849 {
df7492f9 8850 if (!STRING_MULTIBYTE (start)
4c1958f4 8851 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8852 return Qnil;
8853 start_byte = 0;
8f924df7 8854 end_byte = SBYTES (start);
df7492f9 8855 pos = 0;
d46c5b12 8856 }
05e6f5dc 8857 else
b73bfc1c 8858 {
b7826503
PJ
8859 CHECK_NUMBER_COERCE_MARKER (start);
8860 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8861 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8862 args_out_of_range (start, end);
4b4deea2 8863 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
df7492f9
KH
8864 return Qnil;
8865 start_byte = CHAR_TO_BYTE (XINT (start));
8866 end_byte = CHAR_TO_BYTE (XINT (end));
8867 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 8868 return Qnil;
df7492f9 8869
e1c23804 8870 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8871 {
e1c23804
DL
8872 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8873 move_gap_both (XINT (start), start_byte);
df7492f9 8874 else
e1c23804 8875 move_gap_both (XINT (end), end_byte);
b73bfc1c 8876 }
e1c23804 8877 pos = XINT (start);
b73bfc1c 8878 }
7553d0e1 8879
df7492f9
KH
8880 list = Qnil;
8881 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8882 {
df7492f9 8883 elt = XCAR (tail);
7d64c6ad 8884 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8885 ASET (attrs, coding_attr_trans_tbl,
8886 get_translation_table (attrs, 1, NULL));
7d64c6ad 8887 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8888 }
8889
df7492f9 8890 if (STRINGP (start))
8f924df7 8891 p = pbeg = SDATA (start);
72d1a715 8892 else
df7492f9
KH
8893 p = pbeg = BYTE_POS_ADDR (start_byte);
8894 pend = p + (end_byte - start_byte);
4ed46869 8895
df7492f9
KH
8896 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8897 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8898
df7492f9 8899 while (p < pend)
d46c5b12 8900 {
df7492f9
KH
8901 if (ASCII_BYTE_P (*p))
8902 p++;
e133c8fa 8903 else
05e6f5dc 8904 {
df7492f9
KH
8905 c = STRING_CHAR_ADVANCE (p);
8906
8907 charset_map_loaded = 0;
8908 for (tail = list; CONSP (tail); tail = XCDR (tail))
8909 {
8910 elt = XCDR (XCAR (tail));
8911 if (! char_encodable_p (c, XCAR (elt)))
8912 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8913 }
8914 if (charset_map_loaded)
8915 {
d311d28c 8916 ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
df7492f9
KH
8917
8918 if (STRINGP (start))
8f924df7 8919 pbeg = SDATA (start);
df7492f9
KH
8920 else
8921 pbeg = BYTE_POS_ADDR (start_byte);
8922 p = pbeg + p_offset;
8923 pend = pbeg + pend_offset;
8924 }
05e6f5dc 8925 }
df7492f9 8926 pos++;
d46c5b12 8927 }
4ed46869 8928
df7492f9
KH
8929 tail = list;
8930 list = Qnil;
8931 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8932 {
df7492f9
KH
8933 elt = XCAR (tail);
8934 if (CONSP (XCDR (XCDR (elt))))
8935 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8936 list);
ec6d2bb8 8937 }
2b4f9037 8938
df7492f9 8939 return list;
d46c5b12
KH
8940}
8941
3fd9494b 8942
74ab6df5 8943static Lisp_Object
cf84bb53
JB
8944code_convert_region (Lisp_Object start, Lisp_Object end,
8945 Lisp_Object coding_system, Lisp_Object dst_object,
8946 int encodep, int norecord)
4ed46869 8947{
3a73fa5d 8948 struct coding_system coding;
d311d28c 8949 ptrdiff_t from, from_byte, to, to_byte;
df7492f9 8950 Lisp_Object src_object;
4ed46869 8951
b7826503
PJ
8952 CHECK_NUMBER_COERCE_MARKER (start);
8953 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8954 if (NILP (coding_system))
8955 coding_system = Qno_conversion;
8956 else
8957 CHECK_CODING_SYSTEM (coding_system);
8958 src_object = Fcurrent_buffer ();
8959 if (NILP (dst_object))
8960 dst_object = src_object;
8961 else if (! EQ (dst_object, Qt))
8962 CHECK_BUFFER (dst_object);
3a73fa5d 8963
d46c5b12
KH
8964 validate_region (&start, &end);
8965 from = XFASTINT (start);
df7492f9 8966 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8967 to = XFASTINT (end);
df7492f9 8968 to_byte = CHAR_TO_BYTE (to);
764ca8da 8969
df7492f9
KH
8970 setup_coding_system (coding_system, &coding);
8971 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8972
df7492f9
KH
8973 if (encodep)
8974 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8975 dst_object);
8976 else
8977 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8978 dst_object);
8979 if (! norecord)
8980 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8981
df7492f9
KH
8982 return (BUFFERP (dst_object)
8983 ? make_number (coding.produced_char)
8984 : coding.dst_object);
4031e2bf 8985}
78108bcd 8986
4ed46869 8987
4031e2bf 8988DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8989 3, 4, "r\nzCoding system: ",
48b0f3ae 8990 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8991When called from a program, takes four arguments:
8992 START, END, CODING-SYSTEM, and DESTINATION.
8993START and END are buffer positions.
8844fa83 8994
df7492f9 8995Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8996If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
8997If buffer, the decoded text is inserted in that buffer after point (point
8998does not move).
446dcd75 8999In those cases, the length of the decoded text is returned.
319a3947 9000If DESTINATION is t, the decoded text is returned.
8844fa83 9001
48b0f3ae
PJ
9002This function sets `last-coding-system-used' to the precise coding system
9003used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9004not fully specified.) */)
5842a27b 9005 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
4031e2bf 9006{
df7492f9 9007 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 9008}
8844fa83 9009
3a73fa5d 9010DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
9011 3, 4, "r\nzCoding system: ",
9012 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
9013When called from a program, takes four arguments:
9014 START, END, CODING-SYSTEM and DESTINATION.
9015START and END are buffer positions.
d46c5b12 9016
df7492f9
KH
9017Optional 4th arguments DESTINATION specifies where the encoded text goes.
9018If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
9019If buffer, the encoded text is inserted in that buffer after point (point
9020does not move).
446dcd75 9021In those cases, the length of the encoded text is returned.
319a3947 9022If DESTINATION is t, the encoded text is returned.
2391eaa4 9023
48b0f3ae
PJ
9024This function sets `last-coding-system-used' to the precise coding system
9025used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9026not fully specified.) */)
5842a27b 9027 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
3a73fa5d 9028{
df7492f9 9029 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
9030}
9031
9032Lisp_Object
6f704c76
DN
9033code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9034 Lisp_Object dst_object, int encodep, int nocopy, int norecord)
b73bfc1c 9035{
4031e2bf 9036 struct coding_system coding;
d311d28c 9037 ptrdiff_t chars, bytes;
ec6d2bb8 9038
b7826503 9039 CHECK_STRING (string);
d46c5b12 9040 if (NILP (coding_system))
4956c225 9041 {
df7492f9
KH
9042 if (! norecord)
9043 Vlast_coding_system_used = Qno_conversion;
9044 if (NILP (dst_object))
9045 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 9046 }
b73bfc1c 9047
df7492f9
KH
9048 if (NILP (coding_system))
9049 coding_system = Qno_conversion;
9050 else
9051 CHECK_CODING_SYSTEM (coding_system);
9052 if (NILP (dst_object))
9053 dst_object = Qt;
9054 else if (! EQ (dst_object, Qt))
9055 CHECK_BUFFER (dst_object);
73be902c 9056
df7492f9 9057 setup_coding_system (coding_system, &coding);
d46c5b12 9058 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
9059 chars = SCHARS (string);
9060 bytes = SBYTES (string);
df7492f9
KH
9061 if (encodep)
9062 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9063 else
9064 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9065 if (! norecord)
9066 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 9067
df7492f9
KH
9068 return (BUFFERP (dst_object)
9069 ? make_number (coding.produced_char)
9070 : coding.dst_object);
4ed46869 9071}
73be902c 9072
b73bfc1c 9073
ecec61c1 9074/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 9075 Do not set Vlast_coding_system_used.
4ed46869 9076
ec6d2bb8
KH
9077 This function is called only from macros DECODE_FILE and
9078 ENCODE_FILE, thus we ignore character composition. */
4ed46869 9079
ecec61c1 9080Lisp_Object
cf84bb53
JB
9081code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9082 int encodep)
4ed46869 9083{
0be8721c 9084 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
9085}
9086
4ed46869 9087
a7ca3326 9088DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
df7492f9
KH
9089 2, 4, 0,
9090 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9091
9092Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9093if the decoding operation is trivial.
ecec61c1 9094
d4a1d553 9095Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
9096inserted in that buffer after point (point does not move). In this
9097case, the return value is the length of the decoded text.
ecec61c1 9098
df7492f9
KH
9099This function sets `last-coding-system-used' to the precise coding system
9100used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 9101not fully specified.) */)
5842a27b 9102 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
4ed46869 9103{
df7492f9
KH
9104 return code_convert_string (string, coding_system, buffer,
9105 0, ! NILP (nocopy), 0);
4ed46869
KH
9106}
9107
df7492f9
KH
9108DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9109 2, 4, 0,
9110 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9111
9112Optional third arg NOCOPY non-nil means it is OK to return STRING
9113itself if the encoding operation is trivial.
9114
d4a1d553 9115Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
9116inserted in that buffer after point (point does not move). In this
9117case, the return value is the length of the encoded text.
df7492f9
KH
9118
9119This function sets `last-coding-system-used' to the precise coding system
9120used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9121not fully specified.) */)
5842a27b 9122 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
4ed46869 9123{
df7492f9 9124 return code_convert_string (string, coding_system, buffer,
4550efdf 9125 1, ! NILP (nocopy), 0);
4ed46869 9126}
df7492f9 9127
3a73fa5d 9128\f
4ed46869 9129DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
9130 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9131Return the corresponding character. */)
5842a27b 9132 (Lisp_Object code)
4ed46869 9133{
df7492f9
KH
9134 Lisp_Object spec, attrs, val;
9135 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
5fdb398c
PE
9136 EMACS_INT ch;
9137 int c;
4ed46869 9138
df7492f9 9139 CHECK_NATNUM (code);
5fdb398c 9140 ch = XFASTINT (code);
df7492f9
KH
9141 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9142 attrs = AREF (spec, 0);
4ed46869 9143
5fdb398c 9144 if (ASCII_BYTE_P (ch)
df7492f9
KH
9145 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9146 return code;
4ed46869 9147
df7492f9
KH
9148 val = CODING_ATTR_CHARSET_LIST (attrs);
9149 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
9150 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9151 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 9152
5fdb398c
PE
9153 if (ch <= 0x7F)
9154 {
9155 c = ch;
9156 charset = charset_roman;
9157 }
9158 else if (ch >= 0xA0 && ch < 0xDF)
55ab7be3 9159 {
5fdb398c 9160 c = ch - 0x80;
df7492f9 9161 charset = charset_kana;
4ed46869 9162 }
55ab7be3 9163 else
4ed46869 9164 {
5fdb398c
PE
9165 EMACS_INT c1 = ch >> 8;
9166 int c2 = ch & 0xFF;
df7492f9 9167
2735d060
PE
9168 if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9169 || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
c2982e87 9170 error ("Invalid code: %"pI"d", ch);
5fdb398c 9171 c = ch;
df7492f9
KH
9172 SJIS_TO_JIS (c);
9173 charset = charset_kanji;
4ed46869 9174 }
df7492f9
KH
9175 c = DECODE_CHAR (charset, c);
9176 if (c < 0)
c2982e87 9177 error ("Invalid code: %"pI"d", ch);
df7492f9 9178 return make_number (c);
93dec019 9179}
4ed46869 9180
48b0f3ae 9181
4ed46869 9182DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 9183 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae 9184Return the corresponding code in SJIS. */)
5842a27b 9185 (Lisp_Object ch)
4ed46869 9186{
df7492f9
KH
9187 Lisp_Object spec, attrs, charset_list;
9188 int c;
9189 struct charset *charset;
9190 unsigned code;
48b0f3ae 9191
df7492f9
KH
9192 CHECK_CHARACTER (ch);
9193 c = XFASTINT (ch);
9194 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9195 attrs = AREF (spec, 0);
9196
9197 if (ASCII_CHAR_P (c)
9198 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9199 return ch;
9200
9201 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9202 charset = char_charset (c, charset_list, &code);
9203 if (code == CHARSET_INVALID_CODE (charset))
e6c3da20 9204 error ("Can't encode by shift_jis encoding: %c", c);
df7492f9
KH
9205 JIS_TO_SJIS (code);
9206
9207 return make_number (code);
4ed46869
KH
9208}
9209
9210DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
9211 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9212Return the corresponding character. */)
5842a27b 9213 (Lisp_Object code)
d46c5b12 9214{
df7492f9
KH
9215 Lisp_Object spec, attrs, val;
9216 struct charset *charset_roman, *charset_big5, *charset;
5fdb398c 9217 EMACS_INT ch;
df7492f9 9218 int c;
6289dd10 9219
df7492f9 9220 CHECK_NATNUM (code);
5fdb398c 9221 ch = XFASTINT (code);
df7492f9
KH
9222 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9223 attrs = AREF (spec, 0);
4ed46869 9224
5fdb398c 9225 if (ASCII_BYTE_P (ch)
df7492f9
KH
9226 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9227 return code;
6289dd10 9228
df7492f9
KH
9229 val = CODING_ATTR_CHARSET_LIST (attrs);
9230 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9231 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 9232
5fdb398c
PE
9233 if (ch <= 0x7F)
9234 {
9235 c = ch;
9236 charset = charset_roman;
9237 }
c28a9453
KH
9238 else
9239 {
5fdb398c
PE
9240 EMACS_INT b1 = ch >> 8;
9241 int b2 = ch & 0x7F;
df7492f9
KH
9242 if (b1 < 0xA1 || b1 > 0xFE
9243 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
c2982e87 9244 error ("Invalid code: %"pI"d", ch);
5fdb398c 9245 c = ch;
df7492f9 9246 charset = charset_big5;
c28a9453 9247 }
5fdb398c 9248 c = DECODE_CHAR (charset, c);
df7492f9 9249 if (c < 0)
c2982e87 9250 error ("Invalid code: %"pI"d", ch);
df7492f9 9251 return make_number (c);
d46c5b12 9252}
6289dd10 9253
4ed46869 9254DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 9255 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae 9256Return the corresponding character code in Big5. */)
5842a27b 9257 (Lisp_Object ch)
4ed46869 9258{
df7492f9
KH
9259 Lisp_Object spec, attrs, charset_list;
9260 struct charset *charset;
9261 int c;
9262 unsigned code;
9263
9264 CHECK_CHARACTER (ch);
9265 c = XFASTINT (ch);
9266 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9267 attrs = AREF (spec, 0);
9268 if (ASCII_CHAR_P (c)
9269 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9270 return ch;
9271
9272 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9273 charset = char_charset (c, charset_list, &code);
9274 if (code == CHARSET_INVALID_CODE (charset))
e6c3da20 9275 error ("Can't encode by Big5 encoding: %c", c);
df7492f9
KH
9276
9277 return make_number (code);
4ed46869 9278}
48b0f3ae 9279
3a73fa5d 9280\f
002fdb44 9281DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 9282 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 9283 doc: /* Internal use only. */)
5842a27b 9284 (Lisp_Object coding_system, Lisp_Object terminal)
4ed46869 9285{
b18fad6d
KH
9286 struct terminal *term = get_terminal (terminal, 1);
9287 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
b7826503 9288 CHECK_SYMBOL (coding_system);
b8299c66 9289 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 9290 /* We had better not send unsafe characters to terminal. */
c73bd236 9291 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
ad1746f5 9292 /* Character composition should be disabled. */
c73bd236 9293 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
9294 terminal_coding->src_multibyte = 1;
9295 terminal_coding->dst_multibyte = 0;
b18fad6d
KH
9296 if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9297 term->charset_list = coding_charset_list (terminal_coding);
9298 else
6b4bb703 9299 term->charset_list = Fcons (make_number (charset_ascii), Qnil);
4ed46869
KH
9300 return Qnil;
9301}
9302
c4825358
KH
9303DEFUN ("set-safe-terminal-coding-system-internal",
9304 Fset_safe_terminal_coding_system_internal,
48b0f3ae 9305 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 9306 doc: /* Internal use only. */)
5842a27b 9307 (Lisp_Object coding_system)
d46c5b12 9308{
b7826503 9309 CHECK_SYMBOL (coding_system);
c4825358
KH
9310 setup_coding_system (Fcheck_coding_system (coding_system),
9311 &safe_terminal_coding);
ad1746f5 9312 /* Character composition should be disabled. */
df7492f9 9313 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
9314 safe_terminal_coding.src_multibyte = 1;
9315 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
9316 return Qnil;
9317}
4ed46869 9318
002fdb44 9319DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 9320 Sterminal_coding_system, 0, 1, 0,
6ed8eeff 9321 doc: /* Return coding system specified for terminal output on the given terminal.
708e05dc 9322TERMINAL may be a terminal object, a frame, or nil for the selected
6ed8eeff 9323frame's terminal device. */)
5842a27b 9324 (Lisp_Object terminal)
4ed46869 9325{
985773c9
MB
9326 struct coding_system *terminal_coding
9327 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9328 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 9329
6d5eb5b0 9330 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 9331 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
9332}
9333
002fdb44 9334DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 9335 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 9336 doc: /* Internal use only. */)
5842a27b 9337 (Lisp_Object coding_system, Lisp_Object terminal)
4ed46869 9338{
6ed8eeff 9339 struct terminal *t = get_terminal (terminal, 1);
b7826503 9340 CHECK_SYMBOL (coding_system);
624bda09
KH
9341 if (NILP (coding_system))
9342 coding_system = Qno_conversion;
9343 else
9344 Fcheck_coding_system (coding_system);
9345 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
ad1746f5 9346 /* Character composition should be disabled. */
c73bd236
MB
9347 TERMINAL_KEYBOARD_CODING (t)->common_flags
9348 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
9349 return Qnil;
9350}
9351
9352DEFUN ("keyboard-coding-system",
985773c9 9353 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 9354 doc: /* Return coding system specified for decoding keyboard input. */)
5842a27b 9355 (Lisp_Object terminal)
4ed46869 9356{
985773c9
MB
9357 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9358 (get_terminal (terminal, 1))->id);
4ed46869
KH
9359}
9360
4ed46869 9361\f
a7ca3326 9362DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
a5d301df 9363 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
9364 doc: /* Choose a coding system for an operation based on the target name.
9365The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9366DECODING-SYSTEM is the coding system to use for decoding
9367\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9368for encoding (in case OPERATION does encoding).
05e6f5dc 9369
48b0f3ae
PJ
9370The first argument OPERATION specifies an I/O primitive:
9371 For file I/O, `insert-file-contents' or `write-region'.
9372 For process I/O, `call-process', `call-process-region', or `start-process'.
9373 For network I/O, `open-network-stream'.
05e6f5dc 9374
48b0f3ae
PJ
9375The remaining arguments should be the same arguments that were passed
9376to the primitive. Depending on which primitive, one of those arguments
9377is selected as the TARGET. For example, if OPERATION does file I/O,
9378whichever argument specifies the file name is TARGET.
05e6f5dc 9379
48b0f3ae 9380TARGET has a meaning which depends on OPERATION:
b883cdb2 9381 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9382 For process I/O, TARGET is a process name.
d4a1d553 9383 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9384
d4a1d553 9385This function looks up what is specified for TARGET in
48b0f3ae
PJ
9386`file-coding-system-alist', `process-coding-system-alist',
9387or `network-coding-system-alist' depending on OPERATION.
9388They may specify a coding system, a cons of coding systems,
9389or a function symbol to call.
9390In the last case, we call the function with one argument,
9391which is a list of all the arguments given to this function.
1011c487
MB
9392If the function can't decide a coding system, it can return
9393`undecided' so that the normal code-detection is performed.
48b0f3ae 9394
b883cdb2
MB
9395If OPERATION is `insert-file-contents', the argument corresponding to
9396TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9397file name to look up, and BUFFER is a buffer that contains the file's
9398contents (not yet decoded). If `file-coding-system-alist' specifies a
9399function to call for FILENAME, that function should examine the
9400contents of BUFFER instead of reading the file.
9401
d918f936 9402usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
f66c7cf8 9403 (ptrdiff_t nargs, Lisp_Object *args)
6b89e3aa 9404{
4ed46869
KH
9405 Lisp_Object operation, target_idx, target, val;
9406 register Lisp_Object chain;
177c0ea7 9407
4ed46869
KH
9408 if (nargs < 2)
9409 error ("Too few arguments");
9410 operation = args[0];
9411 if (!SYMBOLP (operation)
d311d28c 9412 || (target_idx = Fget (operation, Qtarget_idx), !NATNUMP (target_idx)))
3ed051d4 9413 error ("Invalid first argument");
7b09a37a 9414 if (nargs <= 1 + XFASTINT (target_idx))
94dcfacf 9415 error ("Too few arguments for operation `%s'",
8f924df7 9416 SDATA (SYMBOL_NAME (operation)));
c5101a77 9417 target = args[XFASTINT (target_idx) + 1];
4ed46869 9418 if (!(STRINGP (target)
091a0ff0
KH
9419 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9420 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9421 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
94dcfacf
EZ
9422 error ("Invalid argument %"pI"d of operation `%s'",
9423 XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
091a0ff0
KH
9424 if (CONSP (target))
9425 target = XCAR (target);
4ed46869 9426
2e34157c
RS
9427 chain = ((EQ (operation, Qinsert_file_contents)
9428 || EQ (operation, Qwrite_region))
02ba4723 9429 ? Vfile_coding_system_alist
2e34157c 9430 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9431 ? Vnetwork_coding_system_alist
9432 : Vprocess_coding_system_alist));
4ed46869
KH
9433 if (NILP (chain))
9434 return Qnil;
9435
03699b14 9436 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9437 {
f44d27ce 9438 Lisp_Object elt;
6b89e3aa 9439
df7492f9 9440 elt = XCAR (chain);
4ed46869
KH
9441 if (CONSP (elt)
9442 && ((STRINGP (target)
03699b14
KR
9443 && STRINGP (XCAR (elt))
9444 && fast_string_match (XCAR (elt), target) >= 0)
9445 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9446 {
03699b14 9447 val = XCDR (elt);
b19fd4c5
KH
9448 /* Here, if VAL is both a valid coding system and a valid
9449 function symbol, we return VAL as a coding system. */
02ba4723
KH
9450 if (CONSP (val))
9451 return val;
9452 if (! SYMBOLP (val))
9453 return Qnil;
9454 if (! NILP (Fcoding_system_p (val)))
9455 return Fcons (val, val);
b19fd4c5 9456 if (! NILP (Ffboundp (val)))
6b89e3aa 9457 {
e2b97060
MB
9458 /* We use call1 rather than safe_call1
9459 so as to get bug reports about functions called here
9460 which don't handle the current interface. */
9461 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9462 if (CONSP (val))
9463 return val;
9464 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9465 return Fcons (val, val);
6b89e3aa 9466 }
02ba4723 9467 return Qnil;
6b89e3aa
KH
9468 }
9469 }
4ed46869 9470 return Qnil;
6b89e3aa
KH
9471}
9472
df7492f9 9473DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9474 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9475 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9476If multiple coding systems belong to the same category,
a3181084
DL
9477all but the first one are ignored.
9478
d4a1d553 9479usage: (set-coding-system-priority &rest coding-systems) */)
f66c7cf8 9480 (ptrdiff_t nargs, Lisp_Object *args)
df7492f9 9481{
f66c7cf8 9482 ptrdiff_t i, j;
df7492f9
KH
9483 int changed[coding_category_max];
9484 enum coding_category priorities[coding_category_max];
9485
72af86bd 9486 memset (changed, 0, sizeof changed);
6b89e3aa 9487
df7492f9 9488 for (i = j = 0; i < nargs; i++)
6b89e3aa 9489 {
df7492f9
KH
9490 enum coding_category category;
9491 Lisp_Object spec, attrs;
6b89e3aa 9492
df7492f9
KH
9493 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9494 attrs = AREF (spec, 0);
9495 category = XINT (CODING_ATTR_CATEGORY (attrs));
9496 if (changed[category])
9497 /* Ignore this coding system because a coding system of the
9498 same category already had a higher priority. */
9499 continue;
9500 changed[category] = 1;
9501 priorities[j++] = category;
9502 if (coding_categories[category].id >= 0
9503 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9504 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9505 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9506 }
6b89e3aa 9507
df7492f9
KH
9508 /* Now we have decided top J priorities. Reflect the order of the
9509 original priorities to the remaining priorities. */
6b89e3aa 9510
df7492f9 9511 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9512 {
df7492f9
KH
9513 while (j < coding_category_max
9514 && changed[coding_priorities[j]])
9515 j++;
9516 if (j == coding_category_max)
9517 abort ();
9518 priorities[i] = coding_priorities[j];
9519 }
6b89e3aa 9520
72af86bd 9521 memcpy (coding_priorities, priorities, sizeof priorities);
177c0ea7 9522
ff563fce
KH
9523 /* Update `coding-category-list'. */
9524 Vcoding_category_list = Qnil;
c5101a77 9525 for (i = coding_category_max; i-- > 0; )
ff563fce
KH
9526 Vcoding_category_list
9527 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9528 Vcoding_category_list);
6b89e3aa 9529
df7492f9 9530 return Qnil;
6b89e3aa
KH
9531}
9532
df7492f9
KH
9533DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9534 Scoding_system_priority_list, 0, 1, 0,
da7db224 9535 doc: /* Return a list of coding systems ordered by their priorities.
b811c52b
KH
9536The list contains a subset of coding systems; i.e. coding systems
9537assigned to each coding category (see `coding-category-list').
9538
da7db224 9539HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 9540 (Lisp_Object highestp)
d46c5b12
KH
9541{
9542 int i;
df7492f9 9543 Lisp_Object val;
6b89e3aa 9544
df7492f9 9545 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9546 {
df7492f9
KH
9547 enum coding_category category = coding_priorities[i];
9548 int id = coding_categories[category].id;
9549 Lisp_Object attrs;
068a9dbd 9550
df7492f9
KH
9551 if (id < 0)
9552 continue;
9553 attrs = CODING_ID_ATTRS (id);
9554 if (! NILP (highestp))
9555 return CODING_ATTR_BASE_NAME (attrs);
9556 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9557 }
9558 return Fnreverse (val);
9559}
068a9dbd 9560
91433552 9561static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9562
9563static Lisp_Object
971de7fb 9564make_subsidiaries (Lisp_Object base)
068a9dbd 9565{
df7492f9 9566 Lisp_Object subsidiaries;
1bfdaf10 9567 ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
38182d90 9568 char *buf = alloca (base_name_len + 6);
df7492f9 9569 int i;
068a9dbd 9570
72af86bd 9571 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
df7492f9
KH
9572 subsidiaries = Fmake_vector (make_number (3), Qnil);
9573 for (i = 0; i < 3; i++)
068a9dbd 9574 {
1bfdaf10 9575 strcpy (buf + base_name_len, suffixes[i]);
df7492f9 9576 ASET (subsidiaries, i, intern (buf));
068a9dbd 9577 }
df7492f9 9578 return subsidiaries;
068a9dbd
KH
9579}
9580
9581
df7492f9
KH
9582DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9583 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9584 doc: /* For internal use only.
9585usage: (define-coding-system-internal ...) */)
f66c7cf8 9586 (ptrdiff_t nargs, Lisp_Object *args)
068a9dbd 9587{
df7492f9
KH
9588 Lisp_Object name;
9589 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9590 Lisp_Object attrs; /* Vector of attributes. */
9591 Lisp_Object eol_type;
9592 Lisp_Object aliases;
9593 Lisp_Object coding_type, charset_list, safe_charsets;
9594 enum coding_category category;
9595 Lisp_Object tail, val;
9596 int max_charset_id = 0;
9597 int i;
068a9dbd 9598
df7492f9
KH
9599 if (nargs < coding_arg_max)
9600 goto short_args;
068a9dbd 9601
df7492f9 9602 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9603
df7492f9
KH
9604 name = args[coding_arg_name];
9605 CHECK_SYMBOL (name);
9606 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9607
df7492f9
KH
9608 val = args[coding_arg_mnemonic];
9609 if (! STRINGP (val))
9610 CHECK_CHARACTER (val);
9611 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9612
df7492f9
KH
9613 coding_type = args[coding_arg_coding_type];
9614 CHECK_SYMBOL (coding_type);
9615 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9616
df7492f9
KH
9617 charset_list = args[coding_arg_charset_list];
9618 if (SYMBOLP (charset_list))
9619 {
9620 if (EQ (charset_list, Qiso_2022))
9621 {
9622 if (! EQ (coding_type, Qiso_2022))
9623 error ("Invalid charset-list");
9624 charset_list = Viso_2022_charset_list;
9625 }
9626 else if (EQ (charset_list, Qemacs_mule))
9627 {
9628 if (! EQ (coding_type, Qemacs_mule))
9629 error ("Invalid charset-list");
9630 charset_list = Vemacs_mule_charset_list;
9631 }
9632 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
d311d28c
PE
9633 {
9634 if (! RANGED_INTEGERP (0, XCAR (tail), INT_MAX - 1))
9635 error ("Invalid charset-list");
9636 if (max_charset_id < XFASTINT (XCAR (tail)))
9637 max_charset_id = XFASTINT (XCAR (tail));
9638 }
df7492f9 9639 }
068a9dbd
KH
9640 else
9641 {
df7492f9 9642 charset_list = Fcopy_sequence (charset_list);
985773c9 9643 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9644 {
df7492f9
KH
9645 struct charset *charset;
9646
985773c9 9647 val = XCAR (tail);
df7492f9
KH
9648 CHECK_CHARSET_GET_CHARSET (val, charset);
9649 if (EQ (coding_type, Qiso_2022)
9650 ? CHARSET_ISO_FINAL (charset) < 0
9651 : EQ (coding_type, Qemacs_mule)
9652 ? CHARSET_EMACS_MULE_ID (charset) < 0
9653 : 0)
9654 error ("Can't handle charset `%s'",
8f924df7 9655 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9656
8f924df7 9657 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9658 if (max_charset_id < charset->id)
9659 max_charset_id = charset->id;
068a9dbd
KH
9660 }
9661 }
df7492f9 9662 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9663
1b3b981b
AS
9664 safe_charsets = make_uninit_string (max_charset_id + 1);
9665 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9666 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9667 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9668 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9669
584948ac 9670 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9671
df7492f9 9672 val = args[coding_arg_decode_translation_table];
a6f87d34 9673 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9674 CHECK_SYMBOL (val);
df7492f9 9675 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9676
df7492f9 9677 val = args[coding_arg_encode_translation_table];
a6f87d34 9678 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9679 CHECK_SYMBOL (val);
df7492f9 9680 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9681
df7492f9
KH
9682 val = args[coding_arg_post_read_conversion];
9683 CHECK_SYMBOL (val);
9684 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9685
df7492f9
KH
9686 val = args[coding_arg_pre_write_conversion];
9687 CHECK_SYMBOL (val);
9688 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9689
df7492f9
KH
9690 val = args[coding_arg_default_char];
9691 if (NILP (val))
9692 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9693 else
9694 {
8f924df7 9695 CHECK_CHARACTER (val);
df7492f9
KH
9696 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9697 }
4031e2bf 9698
8f924df7
KH
9699 val = args[coding_arg_for_unibyte];
9700 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9701
df7492f9
KH
9702 val = args[coding_arg_plist];
9703 CHECK_LIST (val);
9704 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9705
df7492f9
KH
9706 if (EQ (coding_type, Qcharset))
9707 {
c7c66a95
KH
9708 /* Generate a lisp vector of 256 elements. Each element is nil,
9709 integer, or a list of charset IDs.
3a73fa5d 9710
c7c66a95
KH
9711 If Nth element is nil, the byte code N is invalid in this
9712 coding system.
4ed46869 9713
c7c66a95
KH
9714 If Nth element is a number NUM, N is the first byte of a
9715 charset whose ID is NUM.
4ed46869 9716
c7c66a95
KH
9717 If Nth element is a list of charset IDs, N is the first byte
9718 of one of them. The list is sorted by dimensions of the
ad1746f5 9719 charsets. A charset of smaller dimension comes first. */
df7492f9 9720 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9721
5c99c2e6 9722 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9723 {
c7c66a95
KH
9724 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9725 int dim = CHARSET_DIMENSION (charset);
9726 int idx = (dim - 1) * 4;
4ed46869 9727
5c99c2e6 9728 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9729 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9730
15d143f7
KH
9731 for (i = charset->code_space[idx];
9732 i <= charset->code_space[idx + 1]; i++)
9733 {
c7c66a95
KH
9734 Lisp_Object tmp, tmp2;
9735 int dim2;
ec6d2bb8 9736
c7c66a95
KH
9737 tmp = AREF (val, i);
9738 if (NILP (tmp))
9739 tmp = XCAR (tail);
9740 else if (NUMBERP (tmp))
9741 {
9742 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9743 if (dim < dim2)
c7c66a95 9744 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9745 else
9746 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9747 }
15d143f7 9748 else
c7c66a95
KH
9749 {
9750 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9751 {
9752 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9753 if (dim < dim2)
9754 break;
9755 }
9756 if (NILP (tmp2))
9757 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9758 else
9759 {
9760 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9761 XSETCAR (tmp2, XCAR (tail));
9762 }
9763 }
9764 ASET (val, i, tmp);
15d143f7 9765 }
df7492f9
KH
9766 }
9767 ASET (attrs, coding_attr_charset_valids, val);
9768 category = coding_category_charset;
9769 }
9770 else if (EQ (coding_type, Qccl))
9771 {
9772 Lisp_Object valids;
ecec61c1 9773
df7492f9
KH
9774 if (nargs < coding_arg_ccl_max)
9775 goto short_args;
ecec61c1 9776
df7492f9
KH
9777 val = args[coding_arg_ccl_decoder];
9778 CHECK_CCL_PROGRAM (val);
9779 if (VECTORP (val))
9780 val = Fcopy_sequence (val);
9781 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9782
df7492f9
KH
9783 val = args[coding_arg_ccl_encoder];
9784 CHECK_CCL_PROGRAM (val);
9785 if (VECTORP (val))
9786 val = Fcopy_sequence (val);
9787 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9788
df7492f9
KH
9789 val = args[coding_arg_ccl_valids];
9790 valids = Fmake_string (make_number (256), make_number (0));
7d7bbefd 9791 for (tail = val; CONSP (tail); tail = XCDR (tail))
df7492f9 9792 {
8dcbea82 9793 int from, to;
ecec61c1 9794
34348bd4 9795 val = XCAR (tail);
df7492f9 9796 if (INTEGERP (val))
8dcbea82 9797 {
d311d28c 9798 if (! (0 <= XINT (val) && XINT (val) <= 255))
8dcbea82 9799 args_out_of_range_3 (val, make_number (0), make_number (255));
d311d28c 9800 from = to = XINT (val);
8dcbea82 9801 }
df7492f9
KH
9802 else
9803 {
df7492f9 9804 CHECK_CONS (val);
8f924df7 9805 CHECK_NATNUM_CAR (val);
d311d28c
PE
9806 CHECK_NUMBER_CDR (val);
9807 if (XINT (XCAR (val)) > 255)
8dcbea82
KH
9808 args_out_of_range_3 (XCAR (val),
9809 make_number (0), make_number (255));
d311d28c
PE
9810 from = XINT (XCAR (val));
9811 if (! (from <= XINT (XCDR (val)) && XINT (XCDR (val)) <= 255))
8dcbea82
KH
9812 args_out_of_range_3 (XCDR (val),
9813 XCAR (val), make_number (255));
d311d28c 9814 to = XINT (XCDR (val));
df7492f9 9815 }
8dcbea82 9816 for (i = from; i <= to; i++)
8f924df7 9817 SSET (valids, i, 1);
df7492f9
KH
9818 }
9819 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9820
df7492f9 9821 category = coding_category_ccl;
55ab7be3 9822 }
df7492f9 9823 else if (EQ (coding_type, Qutf_16))
55ab7be3 9824 {
df7492f9 9825 Lisp_Object bom, endian;
4ed46869 9826
584948ac 9827 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9828
df7492f9
KH
9829 if (nargs < coding_arg_utf16_max)
9830 goto short_args;
4ed46869 9831
df7492f9
KH
9832 bom = args[coding_arg_utf16_bom];
9833 if (! NILP (bom) && ! EQ (bom, Qt))
9834 {
9835 CHECK_CONS (bom);
8f924df7
KH
9836 val = XCAR (bom);
9837 CHECK_CODING_SYSTEM (val);
9838 val = XCDR (bom);
9839 CHECK_CODING_SYSTEM (val);
df7492f9 9840 }
a470d443 9841 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9842
9843 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9844 CHECK_SYMBOL (endian);
9845 if (NILP (endian))
9846 endian = Qbig;
9847 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9848 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9849 ASET (attrs, coding_attr_utf_16_endian, endian);
9850
9851 category = (CONSP (bom)
9852 ? coding_category_utf_16_auto
9853 : NILP (bom)
b49a1807 9854 ? (EQ (endian, Qbig)
df7492f9
KH
9855 ? coding_category_utf_16_be_nosig
9856 : coding_category_utf_16_le_nosig)
b49a1807 9857 : (EQ (endian, Qbig)
df7492f9
KH
9858 ? coding_category_utf_16_be
9859 : coding_category_utf_16_le));
9860 }
9861 else if (EQ (coding_type, Qiso_2022))
9862 {
9863 Lisp_Object initial, reg_usage, request, flags;
1397dc18 9864
df7492f9
KH
9865 if (nargs < coding_arg_iso2022_max)
9866 goto short_args;
9867
9868 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9869 CHECK_VECTOR (initial);
9870 for (i = 0; i < 4; i++)
9871 {
9872 val = Faref (initial, make_number (i));
9873 if (! NILP (val))
9874 {
584948ac
KH
9875 struct charset *charset;
9876
9877 CHECK_CHARSET_GET_CHARSET (val, charset);
9878 ASET (initial, i, make_number (CHARSET_ID (charset)));
9879 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9880 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9881 }
9882 else
9883 ASET (initial, i, make_number (-1));
9884 }
9885
9886 reg_usage = args[coding_arg_iso2022_reg_usage];
9887 CHECK_CONS (reg_usage);
8f924df7
KH
9888 CHECK_NUMBER_CAR (reg_usage);
9889 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9890
9891 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7d7bbefd 9892 for (tail = request; CONSP (tail); tail = XCDR (tail))
1397dc18 9893 {
df7492f9 9894 int id;
2735d060 9895 Lisp_Object tmp1;
df7492f9 9896
34348bd4 9897 val = XCAR (tail);
df7492f9 9898 CHECK_CONS (val);
2735d060
PE
9899 tmp1 = XCAR (val);
9900 CHECK_CHARSET_GET_ID (tmp1, id);
8f924df7 9901 CHECK_NATNUM_CDR (val);
df7492f9 9902 if (XINT (XCDR (val)) >= 4)
c2982e87 9903 error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val)));
8f924df7 9904 XSETCAR (val, make_number (id));
1397dc18 9905 }
4ed46869 9906
df7492f9
KH
9907 flags = args[coding_arg_iso2022_flags];
9908 CHECK_NATNUM (flags);
d311d28c 9909 i = XINT (flags) & INT_MAX;
df7492f9 9910 if (EQ (args[coding_arg_charset_list], Qiso_2022))
d311d28c
PE
9911 i |= CODING_ISO_FLAG_FULL_SUPPORT;
9912 flags = make_number (i);
df7492f9
KH
9913
9914 ASET (attrs, coding_attr_iso_initial, initial);
9915 ASET (attrs, coding_attr_iso_usage, reg_usage);
9916 ASET (attrs, coding_attr_iso_request, request);
9917 ASET (attrs, coding_attr_iso_flags, flags);
9918 setup_iso_safe_charsets (attrs);
9919
9920 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9921 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9922 | CODING_ISO_FLAG_SINGLE_SHIFT))
9923 ? coding_category_iso_7_else
9924 : EQ (args[coding_arg_charset_list], Qiso_2022)
9925 ? coding_category_iso_7
9926 : coding_category_iso_7_tight);
9927 else
9928 {
9929 int id = XINT (AREF (initial, 1));
9930
c6fb6e98 9931 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9932 || EQ (args[coding_arg_charset_list], Qiso_2022)
9933 || id < 0)
9934 ? coding_category_iso_8_else
9935 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9936 ? coding_category_iso_8_1
9937 : coding_category_iso_8_2);
9938 }
0ce7886f
KH
9939 if (category != coding_category_iso_8_1
9940 && category != coding_category_iso_8_2)
9941 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9942 }
9943 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9944 {
df7492f9
KH
9945 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9946 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9947 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9948 category = coding_category_emacs_mule;
c28a9453 9949 }
df7492f9 9950 else if (EQ (coding_type, Qshift_jis))
c28a9453 9951 {
df7492f9
KH
9952
9953 struct charset *charset;
9954
7d64c6ad 9955 if (XINT (Flength (charset_list)) != 3
6e07c25f 9956 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9957 error ("There should be three or four charsets");
df7492f9
KH
9958
9959 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9960 if (CHARSET_DIMENSION (charset) != 1)
9961 error ("Dimension of charset %s is not one",
8f924df7 9962 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9963 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9964 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9965
9966 charset_list = XCDR (charset_list);
9967 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9968 if (CHARSET_DIMENSION (charset) != 1)
9969 error ("Dimension of charset %s is not one",
8f924df7 9970 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9971
9972 charset_list = XCDR (charset_list);
9973 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9974 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9975 error ("Dimension of charset %s is not two",
9976 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9977
9978 charset_list = XCDR (charset_list);
2b917a06
KH
9979 if (! NILP (charset_list))
9980 {
9981 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9982 if (CHARSET_DIMENSION (charset) != 2)
9983 error ("Dimension of charset %s is not two",
9984 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9985 }
df7492f9
KH
9986
9987 category = coding_category_sjis;
9988 Vsjis_coding_system = name;
c28a9453 9989 }
df7492f9
KH
9990 else if (EQ (coding_type, Qbig5))
9991 {
9992 struct charset *charset;
4ed46869 9993
df7492f9
KH
9994 if (XINT (Flength (charset_list)) != 2)
9995 error ("There should be just two charsets");
9996
9997 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9998 if (CHARSET_DIMENSION (charset) != 1)
9999 error ("Dimension of charset %s is not one",
8f924df7 10000 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
10001 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10002 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10003
10004 charset_list = XCDR (charset_list);
10005 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10006 if (CHARSET_DIMENSION (charset) != 2)
10007 error ("Dimension of charset %s is not two",
8f924df7 10008 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 10009
df7492f9
KH
10010 category = coding_category_big5;
10011 Vbig5_coding_system = name;
10012 }
10013 else if (EQ (coding_type, Qraw_text))
c28a9453 10014 {
584948ac
KH
10015 category = coding_category_raw_text;
10016 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 10017 }
df7492f9 10018 else if (EQ (coding_type, Qutf_8))
4ed46869 10019 {
a470d443
KH
10020 Lisp_Object bom;
10021
a470d443
KH
10022 if (nargs < coding_arg_utf8_max)
10023 goto short_args;
10024
10025 bom = args[coding_arg_utf8_bom];
10026 if (! NILP (bom) && ! EQ (bom, Qt))
10027 {
10028 CHECK_CONS (bom);
10029 val = XCAR (bom);
10030 CHECK_CODING_SYSTEM (val);
10031 val = XCDR (bom);
10032 CHECK_CODING_SYSTEM (val);
10033 }
10034 ASET (attrs, coding_attr_utf_bom, bom);
0e5317f7
KH
10035 if (NILP (bom))
10036 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
10037
10038 category = (CONSP (bom) ? coding_category_utf_8_auto
10039 : NILP (bom) ? coding_category_utf_8_nosig
10040 : coding_category_utf_8_sig);
4ed46869 10041 }
df7492f9
KH
10042 else if (EQ (coding_type, Qundecided))
10043 category = coding_category_undecided;
4ed46869 10044 else
df7492f9 10045 error ("Invalid coding system type: %s",
8f924df7 10046 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 10047
df7492f9 10048 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
10049 CODING_ATTR_PLIST (attrs)
10050 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10051 CODING_ATTR_PLIST (attrs)));
35befdaa 10052 CODING_ATTR_PLIST (attrs)
3ed051d4 10053 = Fcons (QCascii_compatible_p,
35befdaa
KH
10054 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10055 CODING_ATTR_PLIST (attrs)));
c4825358 10056
df7492f9
KH
10057 eol_type = args[coding_arg_eol_type];
10058 if (! NILP (eol_type)
10059 && ! EQ (eol_type, Qunix)
10060 && ! EQ (eol_type, Qdos)
10061 && ! EQ (eol_type, Qmac))
10062 error ("Invalid eol-type");
4ed46869 10063
df7492f9 10064 aliases = Fcons (name, Qnil);
4ed46869 10065
df7492f9
KH
10066 if (NILP (eol_type))
10067 {
10068 eol_type = make_subsidiaries (name);
10069 for (i = 0; i < 3; i++)
1397dc18 10070 {
df7492f9
KH
10071 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10072
10073 this_name = AREF (eol_type, i);
10074 this_aliases = Fcons (this_name, Qnil);
10075 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10076 this_spec = Fmake_vector (make_number (3), attrs);
10077 ASET (this_spec, 1, this_aliases);
10078 ASET (this_spec, 2, this_eol_type);
10079 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10080 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
10081 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10082 if (NILP (val))
10083 Vcoding_system_alist
10084 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10085 Vcoding_system_alist);
1397dc18 10086 }
d46c5b12 10087 }
4ed46869 10088
df7492f9
KH
10089 spec_vec = Fmake_vector (make_number (3), attrs);
10090 ASET (spec_vec, 1, aliases);
10091 ASET (spec_vec, 2, eol_type);
48b0f3ae 10092
df7492f9
KH
10093 Fputhash (name, spec_vec, Vcoding_system_hash_table);
10094 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
10095 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10096 if (NILP (val))
10097 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10098 Vcoding_system_alist);
48b0f3ae 10099
df7492f9
KH
10100 {
10101 int id = coding_categories[category].id;
48b0f3ae 10102
df7492f9
KH
10103 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10104 setup_coding_system (name, &coding_categories[category]);
10105 }
48b0f3ae 10106
d46c5b12 10107 return Qnil;
48b0f3ae 10108
df7492f9
KH
10109 short_args:
10110 return Fsignal (Qwrong_number_of_arguments,
10111 Fcons (intern ("define-coding-system-internal"),
10112 make_number (nargs)));
d46c5b12 10113}
4ed46869 10114
d6925f38 10115
a6f87d34
KH
10116DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10117 3, 3, 0,
10118 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
5842a27b 10119 (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
a6f87d34 10120{
3dbe7859 10121 Lisp_Object spec, attrs;
a6f87d34
KH
10122
10123 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10124 attrs = AREF (spec, 0);
10125 if (EQ (prop, QCmnemonic))
10126 {
10127 if (! STRINGP (val))
10128 CHECK_CHARACTER (val);
10129 CODING_ATTR_MNEMONIC (attrs) = val;
10130 }
2133e2d1 10131 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
10132 {
10133 if (NILP (val))
10134 val = make_number (' ');
10135 else
10136 CHECK_CHARACTER (val);
10137 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10138 }
10139 else if (EQ (prop, QCdecode_translation_table))
10140 {
10141 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10142 CHECK_SYMBOL (val);
10143 CODING_ATTR_DECODE_TBL (attrs) = val;
10144 }
10145 else if (EQ (prop, QCencode_translation_table))
10146 {
10147 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10148 CHECK_SYMBOL (val);
10149 CODING_ATTR_ENCODE_TBL (attrs) = val;
10150 }
10151 else if (EQ (prop, QCpost_read_conversion))
10152 {
10153 CHECK_SYMBOL (val);
10154 CODING_ATTR_POST_READ (attrs) = val;
10155 }
10156 else if (EQ (prop, QCpre_write_conversion))
10157 {
10158 CHECK_SYMBOL (val);
10159 CODING_ATTR_PRE_WRITE (attrs) = val;
10160 }
35befdaa
KH
10161 else if (EQ (prop, QCascii_compatible_p))
10162 {
10163 CODING_ATTR_ASCII_COMPAT (attrs) = val;
10164 }
a6f87d34
KH
10165
10166 CODING_ATTR_PLIST (attrs)
10167 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10168 return val;
10169}
10170
10171
df7492f9
KH
10172DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10173 Sdefine_coding_system_alias, 2, 2, 0,
10174 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
5842a27b 10175 (Lisp_Object alias, Lisp_Object coding_system)
66cfb530 10176{
583f71ca 10177 Lisp_Object spec, aliases, eol_type, val;
4ed46869 10178
df7492f9
KH
10179 CHECK_SYMBOL (alias);
10180 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10181 aliases = AREF (spec, 1);
d4a1d553 10182 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
10183 element is a base coding system. Append ALIAS at the tail of the
10184 list. */
df7492f9
KH
10185 while (!NILP (XCDR (aliases)))
10186 aliases = XCDR (aliases);
8f924df7 10187 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 10188
df7492f9
KH
10189 eol_type = AREF (spec, 2);
10190 if (VECTORP (eol_type))
4ed46869 10191 {
df7492f9
KH
10192 Lisp_Object subsidiaries;
10193 int i;
4ed46869 10194
df7492f9
KH
10195 subsidiaries = make_subsidiaries (alias);
10196 for (i = 0; i < 3; i++)
10197 Fdefine_coding_system_alias (AREF (subsidiaries, i),
10198 AREF (eol_type, i));
4ed46869 10199 }
df7492f9
KH
10200
10201 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 10202 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
10203 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10204 if (NILP (val))
10205 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10206 Vcoding_system_alist);
66cfb530 10207
4ed46869
KH
10208 return Qnil;
10209}
10210
a7ca3326 10211DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
df7492f9
KH
10212 1, 1, 0,
10213 doc: /* Return the base of CODING-SYSTEM.
da7db224 10214Any alias or subsidiary coding system is not a base coding system. */)
5842a27b 10215 (Lisp_Object coding_system)
d46c5b12 10216{
df7492f9 10217 Lisp_Object spec, attrs;
d46c5b12 10218
df7492f9
KH
10219 if (NILP (coding_system))
10220 return (Qno_conversion);
10221 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10222 attrs = AREF (spec, 0);
10223 return CODING_ATTR_BASE_NAME (attrs);
10224}
1397dc18 10225
df7492f9
KH
10226DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10227 1, 1, 0,
10228 doc: "Return the property list of CODING-SYSTEM.")
5842a27b 10229 (Lisp_Object coding_system)
df7492f9
KH
10230{
10231 Lisp_Object spec, attrs;
1397dc18 10232
df7492f9
KH
10233 if (NILP (coding_system))
10234 coding_system = Qno_conversion;
10235 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10236 attrs = AREF (spec, 0);
10237 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
10238}
10239
df7492f9
KH
10240
10241DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10242 1, 1, 0,
da7db224 10243 doc: /* Return the list of aliases of CODING-SYSTEM. */)
5842a27b 10244 (Lisp_Object coding_system)
66cfb530 10245{
df7492f9 10246 Lisp_Object spec;
84d60297 10247
df7492f9
KH
10248 if (NILP (coding_system))
10249 coding_system = Qno_conversion;
10250 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 10251 return AREF (spec, 1);
df7492f9 10252}
66cfb530 10253
a7ca3326 10254DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
df7492f9
KH
10255 Scoding_system_eol_type, 1, 1, 0,
10256 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 10257An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 10258
df7492f9
KH
10259Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10260and CR respectively.
66cfb530 10261
df7492f9
KH
10262A vector value indicates that a format of end-of-line should be
10263detected automatically. Nth element of the vector is the subsidiary
10264coding system whose eol-type is N. */)
5842a27b 10265 (Lisp_Object coding_system)
6b89e3aa 10266{
df7492f9
KH
10267 Lisp_Object spec, eol_type;
10268 int n;
6b89e3aa 10269
df7492f9
KH
10270 if (NILP (coding_system))
10271 coding_system = Qno_conversion;
10272 if (! CODING_SYSTEM_P (coding_system))
10273 return Qnil;
10274 spec = CODING_SYSTEM_SPEC (coding_system);
10275 eol_type = AREF (spec, 2);
10276 if (VECTORP (eol_type))
10277 return Fcopy_sequence (eol_type);
10278 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10279 return make_number (n);
6b89e3aa
KH
10280}
10281
4ed46869
KH
10282#endif /* emacs */
10283
10284\f
1397dc18 10285/*** 9. Post-amble ***/
4ed46869 10286
dfcf069d 10287void
971de7fb 10288init_coding_once (void)
4ed46869
KH
10289{
10290 int i;
10291
df7492f9
KH
10292 for (i = 0; i < coding_category_max; i++)
10293 {
10294 coding_categories[i].id = -1;
10295 coding_priorities[i] = i;
10296 }
4ed46869
KH
10297
10298 /* ISO2022 specific initialize routine. */
10299 for (i = 0; i < 0x20; i++)
b73bfc1c 10300 iso_code_class[i] = ISO_control_0;
4ed46869
KH
10301 for (i = 0x21; i < 0x7F; i++)
10302 iso_code_class[i] = ISO_graphic_plane_0;
10303 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 10304 iso_code_class[i] = ISO_control_1;
4ed46869
KH
10305 for (i = 0xA1; i < 0xFF; i++)
10306 iso_code_class[i] = ISO_graphic_plane_1;
10307 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10308 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
10309 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10310 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10311 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10312 iso_code_class[ISO_CODE_ESC] = ISO_escape;
10313 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10314 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10315 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10316
df7492f9
KH
10317 for (i = 0; i < 256; i++)
10318 {
10319 emacs_mule_bytes[i] = 1;
10320 }
7c78e542
KH
10321 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10322 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10323 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10324 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
10325}
10326
10327#ifdef emacs
10328
dfcf069d 10329void
971de7fb 10330syms_of_coding (void)
e0e989f6 10331{
df7492f9 10332 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
10333 {
10334 Lisp_Object args[2];
10335 args[0] = QCtest;
10336 args[1] = Qeq;
10337 Vcoding_system_hash_table = Fmake_hash_table (2, args);
10338 }
df7492f9
KH
10339
10340 staticpro (&Vsjis_coding_system);
10341 Vsjis_coding_system = Qnil;
e0e989f6 10342
df7492f9
KH
10343 staticpro (&Vbig5_coding_system);
10344 Vbig5_coding_system = Qnil;
10345
24a73b0a
KH
10346 staticpro (&Vcode_conversion_reused_workbuf);
10347 Vcode_conversion_reused_workbuf = Qnil;
10348
10349 staticpro (&Vcode_conversion_workbuf_name);
2a0213a6 10350 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
e0e989f6 10351
24a73b0a 10352 reused_workbuf_in_use = 0;
df7492f9
KH
10353
10354 DEFSYM (Qcharset, "charset");
10355 DEFSYM (Qtarget_idx, "target-idx");
10356 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
10357 Fset (Qcoding_system_history, Qnil);
10358
9ce27fde 10359 /* Target FILENAME is the first argument. */
e0e989f6 10360 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10361 /* Target FILENAME is the third argument. */
e0e989f6
KH
10362 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10363
df7492f9 10364 DEFSYM (Qcall_process, "call-process");
9ce27fde 10365 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10366 Fput (Qcall_process, Qtarget_idx, make_number (0));
10367
df7492f9 10368 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10369 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10370 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10371
df7492f9 10372 DEFSYM (Qstart_process, "start-process");
9ce27fde 10373 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10374 Fput (Qstart_process, Qtarget_idx, make_number (2));
10375
df7492f9 10376 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10377 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10378 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10379
df7492f9
KH
10380 DEFSYM (Qcoding_system, "coding-system");
10381 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10382
df7492f9
KH
10383 DEFSYM (Qeol_type, "eol-type");
10384 DEFSYM (Qunix, "unix");
10385 DEFSYM (Qdos, "dos");
4ed46869 10386
df7492f9
KH
10387 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10388 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10389 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10390 DEFSYM (Qdefault_char, "default-char");
10391 DEFSYM (Qundecided, "undecided");
10392 DEFSYM (Qno_conversion, "no-conversion");
10393 DEFSYM (Qraw_text, "raw-text");
4ed46869 10394
df7492f9 10395 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10396
df7492f9 10397 DEFSYM (Qutf_8, "utf-8");
8f924df7 10398 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10399
df7492f9 10400 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10401 DEFSYM (Qbig, "big");
10402 DEFSYM (Qlittle, "little");
27901516 10403
df7492f9
KH
10404 DEFSYM (Qshift_jis, "shift-jis");
10405 DEFSYM (Qbig5, "big5");
4ed46869 10406
df7492f9 10407 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10408
df7492f9 10409 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869 10410 Fput (Qcoding_system_error, Qerror_conditions,
3438fe21 10411 listn (CONSTYPE_PURE, 2, Qcoding_system_error, Qerror));
4ed46869 10412 Fput (Qcoding_system_error, Qerror_message,
2a0213a6 10413 build_pure_c_string ("Invalid coding system"));
4ed46869 10414
05e6f5dc
KH
10415 /* Intern this now in case it isn't already done.
10416 Setting this variable twice is harmless.
10417 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 10418 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
70c22245 10419
df7492f9 10420 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10421 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10422 DEFSYM (Qtranslation_table_id, "translation-table-id");
10423 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10424 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10425
df7492f9 10426 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10427
df7492f9 10428 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10429
01378f49 10430 DEFSYM (QCcategory, ":category");
a6f87d34 10431 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10432 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10433 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10434 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10435 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10436 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10437 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10438
df7492f9
KH
10439 Vcoding_category_table
10440 = Fmake_vector (make_number (coding_category_max), Qnil);
10441 staticpro (&Vcoding_category_table);
10442 /* Followings are target of code detection. */
10443 ASET (Vcoding_category_table, coding_category_iso_7,
d67b4f80 10444 intern_c_string ("coding-category-iso-7"));
df7492f9 10445 ASET (Vcoding_category_table, coding_category_iso_7_tight,
d67b4f80 10446 intern_c_string ("coding-category-iso-7-tight"));
df7492f9 10447 ASET (Vcoding_category_table, coding_category_iso_8_1,
d67b4f80 10448 intern_c_string ("coding-category-iso-8-1"));
df7492f9 10449 ASET (Vcoding_category_table, coding_category_iso_8_2,
d67b4f80 10450 intern_c_string ("coding-category-iso-8-2"));
df7492f9 10451 ASET (Vcoding_category_table, coding_category_iso_7_else,
d67b4f80 10452 intern_c_string ("coding-category-iso-7-else"));
df7492f9 10453 ASET (Vcoding_category_table, coding_category_iso_8_else,
d67b4f80 10454 intern_c_string ("coding-category-iso-8-else"));
a470d443 10455 ASET (Vcoding_category_table, coding_category_utf_8_auto,
d67b4f80 10456 intern_c_string ("coding-category-utf-8-auto"));
a470d443 10457 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
d67b4f80 10458 intern_c_string ("coding-category-utf-8"));
a470d443 10459 ASET (Vcoding_category_table, coding_category_utf_8_sig,
d67b4f80 10460 intern_c_string ("coding-category-utf-8-sig"));
df7492f9 10461 ASET (Vcoding_category_table, coding_category_utf_16_be,
d67b4f80 10462 intern_c_string ("coding-category-utf-16-be"));
ff563fce 10463 ASET (Vcoding_category_table, coding_category_utf_16_auto,
d67b4f80 10464 intern_c_string ("coding-category-utf-16-auto"));
df7492f9 10465 ASET (Vcoding_category_table, coding_category_utf_16_le,
d67b4f80 10466 intern_c_string ("coding-category-utf-16-le"));
df7492f9 10467 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
d67b4f80 10468 intern_c_string ("coding-category-utf-16-be-nosig"));
df7492f9 10469 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
d67b4f80 10470 intern_c_string ("coding-category-utf-16-le-nosig"));
df7492f9 10471 ASET (Vcoding_category_table, coding_category_charset,
d67b4f80 10472 intern_c_string ("coding-category-charset"));
df7492f9 10473 ASET (Vcoding_category_table, coding_category_sjis,
d67b4f80 10474 intern_c_string ("coding-category-sjis"));
df7492f9 10475 ASET (Vcoding_category_table, coding_category_big5,
d67b4f80 10476 intern_c_string ("coding-category-big5"));
df7492f9 10477 ASET (Vcoding_category_table, coding_category_ccl,
d67b4f80 10478 intern_c_string ("coding-category-ccl"));
df7492f9 10479 ASET (Vcoding_category_table, coding_category_emacs_mule,
d67b4f80 10480 intern_c_string ("coding-category-emacs-mule"));
df7492f9
KH
10481 /* Followings are NOT target of code detection. */
10482 ASET (Vcoding_category_table, coding_category_raw_text,
d67b4f80 10483 intern_c_string ("coding-category-raw-text"));
df7492f9 10484 ASET (Vcoding_category_table, coding_category_undecided,
d67b4f80 10485 intern_c_string ("coding-category-undecided"));
ecf488bc 10486
065e3595
KH
10487 DEFSYM (Qinsufficient_source, "insufficient-source");
10488 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10489 DEFSYM (Qinvalid_source, "invalid-source");
10490 DEFSYM (Qinterrupted, "interrupted");
10491 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10492 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10493
4ed46869
KH
10494 defsubr (&Scoding_system_p);
10495 defsubr (&Sread_coding_system);
10496 defsubr (&Sread_non_nil_coding_system);
10497 defsubr (&Scheck_coding_system);
10498 defsubr (&Sdetect_coding_region);
d46c5b12 10499 defsubr (&Sdetect_coding_string);
05e6f5dc 10500 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10501 defsubr (&Sunencodable_char_position);
df7492f9 10502 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10503 defsubr (&Sdecode_coding_region);
10504 defsubr (&Sencode_coding_region);
10505 defsubr (&Sdecode_coding_string);
10506 defsubr (&Sencode_coding_string);
10507 defsubr (&Sdecode_sjis_char);
10508 defsubr (&Sencode_sjis_char);
10509 defsubr (&Sdecode_big5_char);
10510 defsubr (&Sencode_big5_char);
1ba9e4ab 10511 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10512 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10513 defsubr (&Sterminal_coding_system);
1ba9e4ab 10514 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10515 defsubr (&Skeyboard_coding_system);
a5d301df 10516 defsubr (&Sfind_operation_coding_system);
df7492f9 10517 defsubr (&Sset_coding_system_priority);
6b89e3aa 10518 defsubr (&Sdefine_coding_system_internal);
df7492f9 10519 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10520 defsubr (&Scoding_system_put);
df7492f9
KH
10521 defsubr (&Scoding_system_base);
10522 defsubr (&Scoding_system_plist);
10523 defsubr (&Scoding_system_aliases);
10524 defsubr (&Scoding_system_eol_type);
10525 defsubr (&Scoding_system_priority_list);
4ed46869 10526
29208e82 10527 DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
48b0f3ae
PJ
10528 doc: /* List of coding systems.
10529
10530Do not alter the value of this variable manually. This variable should be
df7492f9 10531updated by the functions `define-coding-system' and
48b0f3ae 10532`define-coding-system-alias'. */);
4608c386
KH
10533 Vcoding_system_list = Qnil;
10534
29208e82 10535 DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
48b0f3ae
PJ
10536 doc: /* Alist of coding system names.
10537Each element is one element list of coding system name.
446dcd75 10538This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10539
10540Do not alter the value of this variable manually. This variable should be
10541updated by the functions `make-coding-system' and
10542`define-coding-system-alias'. */);
4608c386
KH
10543 Vcoding_system_alist = Qnil;
10544
29208e82 10545 DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
48b0f3ae
PJ
10546 doc: /* List of coding-categories (symbols) ordered by priority.
10547
10548On detecting a coding system, Emacs tries code detection algorithms
10549associated with each coding-category one by one in this order. When
10550one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10551system bound to the corresponding coding-category is selected.
10552
448e17d6 10553Don't modify this variable directly, but use `set-coding-system-priority'. */);
4ed46869
KH
10554 {
10555 int i;
10556
10557 Vcoding_category_list = Qnil;
df7492f9 10558 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10559 Vcoding_category_list
28be1ada 10560 = Fcons (AREF (Vcoding_category_table, i),
d46c5b12 10561 Vcoding_category_list);
4ed46869
KH
10562 }
10563
29208e82 10564 DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
48b0f3ae
PJ
10565 doc: /* Specify the coding system for read operations.
10566It is useful to bind this variable with `let', but do not set it globally.
10567If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10568If not, an appropriate element is used from one of the coding system alists.
10569There are three such tables: `file-coding-system-alist',
48b0f3ae 10570`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10571 Vcoding_system_for_read = Qnil;
10572
29208e82 10573 DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
48b0f3ae
PJ
10574 doc: /* Specify the coding system for write operations.
10575Programs bind this variable with `let', but you should not set it globally.
10576If the value is a coding system, it is used for encoding of output,
10577when writing it to a file and when sending it to a file or subprocess.
10578
10579If this does not specify a coding system, an appropriate element
446dcd75
JB
10580is used from one of the coding system alists.
10581There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10582`process-coding-system-alist', and `network-coding-system-alist'.
10583For output to files, if the above procedure does not specify a coding system,
10584the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10585 Vcoding_system_for_write = Qnil;
10586
29208e82 10587 DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
df7492f9
KH
10588 doc: /*
10589Coding system used in the latest file or process I/O. */);
4ed46869
KH
10590 Vlast_coding_system_used = Qnil;
10591
29208e82 10592 DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
065e3595
KH
10593 doc: /*
10594Error status of the last code conversion.
10595
10596When an error was detected in the last code conversion, this variable
10597is set to one of the following symbols.
10598 `insufficient-source'
10599 `inconsistent-eol'
10600 `invalid-source'
10601 `interrupted'
10602 `insufficient-memory'
10603When no error was detected, the value doesn't change. So, to check
10604the error status of a code conversion by this variable, you must
10605explicitly set this variable to nil before performing code
10606conversion. */);
10607 Vlast_code_conversion_error = Qnil;
10608
29208e82 10609 DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
df7492f9
KH
10610 doc: /*
10611*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10612See info node `Coding Systems' and info node `Text and Binary' concerning
10613such conversion. */);
9ce27fde
KH
10614 inhibit_eol_conversion = 0;
10615
29208e82 10616 DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
df7492f9
KH
10617 doc: /*
10618Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10619Bind it to t if the process output is to be treated as if it were a file
10620read from some filesystem. */);
ed29121d
EZ
10621 inherit_process_coding_system = 0;
10622
29208e82 10623 DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
df7492f9
KH
10624 doc: /*
10625Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10626The format is ((PATTERN . VAL) ...),
10627where PATTERN is a regular expression matching a file name,
10628VAL is a coding system, a cons of coding systems, or a function symbol.
10629If VAL is a coding system, it is used for both decoding and encoding
10630the file contents.
10631If VAL is a cons of coding systems, the car part is used for decoding,
10632and the cdr part is used for encoding.
10633If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10634or a cons of coding systems which are used as above. The function is
10635called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10636`find-operation-coding-system' was called. If the function can't decide
10637a coding system, it can return `undecided' so that the normal
10638code-detection is performed.
48b0f3ae
PJ
10639
10640See also the function `find-operation-coding-system'
10641and the variable `auto-coding-alist'. */);
02ba4723
KH
10642 Vfile_coding_system_alist = Qnil;
10643
29208e82 10644 DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
df7492f9
KH
10645 doc: /*
10646Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10647The format is ((PATTERN . VAL) ...),
10648where PATTERN is a regular expression matching a program name,
10649VAL is a coding system, a cons of coding systems, or a function symbol.
10650If VAL is a coding system, it is used for both decoding what received
10651from the program and encoding what sent to the program.
10652If VAL is a cons of coding systems, the car part is used for decoding,
10653and the cdr part is used for encoding.
10654If VAL is a function symbol, the function must return a coding system
10655or a cons of coding systems which are used as above.
10656
10657See also the function `find-operation-coding-system'. */);
02ba4723
KH
10658 Vprocess_coding_system_alist = Qnil;
10659
29208e82 10660 DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
df7492f9
KH
10661 doc: /*
10662Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10663The format is ((PATTERN . VAL) ...),
10664where PATTERN is a regular expression matching a network service name
10665or is a port number to connect to,
10666VAL is a coding system, a cons of coding systems, or a function symbol.
10667If VAL is a coding system, it is used for both decoding what received
10668from the network stream and encoding what sent to the network stream.
10669If VAL is a cons of coding systems, the car part is used for decoding,
10670and the cdr part is used for encoding.
10671If VAL is a function symbol, the function must return a coding system
10672or a cons of coding systems which are used as above.
10673
10674See also the function `find-operation-coding-system'. */);
02ba4723 10675 Vnetwork_coding_system_alist = Qnil;
4ed46869 10676
29208e82 10677 DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
75205970
RS
10678 doc: /* Coding system to use with system messages.
10679Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10680 Vlocale_coding_system = Qnil;
10681
005f0d35 10682 /* The eol mnemonics are reset in startup.el system-dependently. */
29208e82 10683 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
df7492f9
KH
10684 doc: /*
10685*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
2a0213a6 10686 eol_mnemonic_unix = build_pure_c_string (":");
4ed46869 10687
29208e82 10688 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
df7492f9
KH
10689 doc: /*
10690*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
2a0213a6 10691 eol_mnemonic_dos = build_pure_c_string ("\\");
4ed46869 10692
29208e82 10693 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
df7492f9
KH
10694 doc: /*
10695*String displayed in mode line for MAC-like (CR) end-of-line format. */);
2a0213a6 10696 eol_mnemonic_mac = build_pure_c_string ("/");
4ed46869 10697
29208e82 10698 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
df7492f9
KH
10699 doc: /*
10700*String displayed in mode line when end-of-line format is not yet determined. */);
2a0213a6 10701 eol_mnemonic_undecided = build_pure_c_string (":");
4ed46869 10702
29208e82 10703 DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
df7492f9
KH
10704 doc: /*
10705*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10706 Venable_character_translation = Qt;
bdd9fb48 10707
f967223b 10708 DEFVAR_LISP ("standard-translation-table-for-decode",
29208e82 10709 Vstandard_translation_table_for_decode,
48b0f3ae 10710 doc: /* Table for translating characters while decoding. */);
f967223b 10711 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10712
f967223b 10713 DEFVAR_LISP ("standard-translation-table-for-encode",
29208e82 10714 Vstandard_translation_table_for_encode,
48b0f3ae 10715 doc: /* Table for translating characters while encoding. */);
f967223b 10716 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10717
29208e82 10718 DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
48b0f3ae
PJ
10719 doc: /* Alist of charsets vs revision numbers.
10720While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10721designate it with the escape sequence identifying revision (cdr part
10722of the element). */);
10723 Vcharset_revision_table = Qnil;
02ba4723
KH
10724
10725 DEFVAR_LISP ("default-process-coding-system",
29208e82 10726 Vdefault_process_coding_system,
48b0f3ae
PJ
10727 doc: /* Cons of coding systems used for process I/O by default.
10728The car part is used for decoding a process output,
10729the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10730 Vdefault_process_coding_system = Qnil;
c4825358 10731
29208e82 10732 DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
df7492f9
KH
10733 doc: /*
10734Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10735This is a vector of length 256.
10736If Nth element is non-nil, the existence of code N in a file
10737\(or output of subprocess) doesn't prevent it to be detected as
10738a coding system of ISO 2022 variant which has a flag
10739`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10740or reading output of a subprocess.
446dcd75 10741Only 128th through 159th elements have a meaning. */);
3f003981 10742 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10743
10744 DEFVAR_LISP ("select-safe-coding-system-function",
29208e82 10745 Vselect_safe_coding_system_function,
df7492f9
KH
10746 doc: /*
10747Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10748
10749If set, this function is called to force a user to select a proper
10750coding system which can encode the text in the case that a default
fdecf907
GM
10751coding system used in each operation can't encode the text. The
10752function should take care that the buffer is not modified while
10753the coding system is being selected.
48b0f3ae
PJ
10754
10755The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10756 Vselect_safe_coding_system_function = Qnil;
10757
5d5bf4d8 10758 DEFVAR_BOOL ("coding-system-require-warning",
29208e82 10759 coding_system_require_warning,
5d5bf4d8 10760 doc: /* Internal use only.
6b89e3aa
KH
10761If non-nil, on writing a file, `select-safe-coding-system-function' is
10762called even if `coding-system-for-write' is non-nil. The command
10763`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10764 coding_system_require_warning = 0;
10765
10766
22ab2303 10767 DEFVAR_BOOL ("inhibit-iso-escape-detection",
29208e82 10768 inhibit_iso_escape_detection,
df7492f9 10769 doc: /*
97b1b294 10770If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10771
97b1b294
EZ
10772When Emacs reads text, it tries to detect how the text is encoded.
10773This code detection is sensitive to escape sequences. If Emacs sees
10774a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10775of the ISO2022 encodings, and decodes text by the corresponding coding
10776system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10777
10778However, there may be a case that you want to read escape sequences in
10779a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10780Then the code detection will ignore any escape sequences, and no text is
10781detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10782escape sequences become visible in a buffer.
10783
10784The default value is nil, and it is strongly recommended not to change
10785it. That is because many Emacs Lisp source files that contain
10786non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10787in Emacs's distribution, and they won't be decoded correctly on
10788reading if you suppress escape sequence detection.
10789
10790The other way to read escape sequences in a file without decoding is
97b1b294 10791to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10792escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10793 inhibit_iso_escape_detection = 0;
002fdb44 10794
97b1b294 10795 DEFVAR_BOOL ("inhibit-null-byte-detection",
29208e82 10796 inhibit_null_byte_detection,
97b1b294
EZ
10797 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10798By default, Emacs treats it as binary data, and does not attempt to
10799decode it. The effect is as if you specified `no-conversion' for
10800reading that text.
10801
10802Set this to non-nil when a regular text happens to include null bytes.
10803Examples are Index nodes of Info files and null-byte delimited output
10804from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10805decode text as usual. */);
10806 inhibit_null_byte_detection = 0;
10807
29208e82 10808 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
15c8f9d1 10809 doc: /* Char table for translating self-inserting characters.
446dcd75 10810This is applied to the result of input methods, not their input.
8434d0b8
EZ
10811See also `keyboard-translate-table'.
10812
10813Use of this variable for character code unification was rendered
10814obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10815internal character representation. */);
002fdb44 10816 Vtranslation_table_for_input = Qnil;
8f924df7 10817
2c78b7e1
KH
10818 {
10819 Lisp_Object args[coding_arg_max];
8f924df7 10820 Lisp_Object plist[16];
2c78b7e1
KH
10821 int i;
10822
10823 for (i = 0; i < coding_arg_max; i++)
10824 args[i] = Qnil;
10825
d67b4f80 10826 plist[0] = intern_c_string (":name");
2c78b7e1 10827 plist[1] = args[coding_arg_name] = Qno_conversion;
d67b4f80 10828 plist[2] = intern_c_string (":mnemonic");
2c78b7e1 10829 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
d67b4f80 10830 plist[4] = intern_c_string (":coding-type");
2c78b7e1 10831 plist[5] = args[coding_arg_coding_type] = Qraw_text;
d67b4f80 10832 plist[6] = intern_c_string (":ascii-compatible-p");
2c78b7e1 10833 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
d67b4f80 10834 plist[8] = intern_c_string (":default-char");
2c78b7e1 10835 plist[9] = args[coding_arg_default_char] = make_number (0);
d67b4f80 10836 plist[10] = intern_c_string (":for-unibyte");
8f924df7 10837 plist[11] = args[coding_arg_for_unibyte] = Qt;
d67b4f80 10838 plist[12] = intern_c_string (":docstring");
2a0213a6 10839 plist[13] = build_pure_c_string ("Do no conversion.\n\
2c78b7e1
KH
10840\n\
10841When you visit a file with this coding, the file is read into a\n\
10842unibyte buffer as is, thus each byte of a file is treated as a\n\
10843character.");
d67b4f80 10844 plist[14] = intern_c_string (":eol-type");
8f924df7
KH
10845 plist[15] = args[coding_arg_eol_type] = Qunix;
10846 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10847 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10848
10849 plist[1] = args[coding_arg_name] = Qundecided;
10850 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10851 plist[5] = args[coding_arg_coding_type] = Qundecided;
10852 /* This is already set.
35befdaa 10853 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
d67b4f80 10854 plist[8] = intern_c_string (":charset-list");
ae6f73fa
KH
10855 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10856 plist[11] = args[coding_arg_for_unibyte] = Qnil;
2a0213a6 10857 plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
ae6f73fa
KH
10858 plist[15] = args[coding_arg_eol_type] = Qnil;
10859 args[coding_arg_plist] = Flist (16, plist);
10860 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10861 }
10862
2c78b7e1 10863 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10864
10865 {
10866 int i;
10867
10868 for (i = 0; i < coding_category_max; i++)
10869 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10870 }
1a4990fb 10871#if defined (DOS_NT)
fcbcfb64
KH
10872 system_eol_type = Qdos;
10873#else
10874 system_eol_type = Qunix;
10875#endif
10876 staticpro (&system_eol_type);
4ed46869
KH
10877}
10878
68c45bf0 10879char *
971de7fb 10880emacs_strerror (int error_number)
68c45bf0
PE
10881{
10882 char *str;
10883
ca9c0567 10884 synchronize_system_messages_locale ();
68c45bf0
PE
10885 str = strerror (error_number);
10886
10887 if (! NILP (Vlocale_coding_system))
10888 {
10889 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10890 Vlocale_coding_system,
10891 0);
51b59d79 10892 str = SSDATA (dec);
68c45bf0
PE
10893 }
10894
10895 return str;
10896}
10897
4ed46869 10898#endif /* emacs */