* net/soap-client.el:
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
73b0cd50 2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
7976eda0 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8f924df7 7 Copyright (C) 2003
df7492f9
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
9ec0b715 13GNU Emacs is free software: you can redistribute it and/or modify
369314dc 14it under the terms of the GNU General Public License as published by
9ec0b715
GM
15the Free Software Foundation, either version 3 of the License, or
16(at your option) any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc 23You should have received a copy of the GNU General Public License
9ec0b715 24along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 56 coding system.
4ed46869 57
e19c3639
KH
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
4ed46869 63
e19c3639 64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
4ed46869 66
df7492f9
KH
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
4ed46869
KH
82
83 The most famous coding system for multiple character sets. X's
df7492f9
KH
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
4ed46869 87
df7492f9 88 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
cfb43547 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
4ed46869
KH
138
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9 146 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO.
df7492f9 149
ff0dacd7 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
151
152 Below is the template of these functions. */
153
4ed46869 154#if 0
df7492f9 155static int
cf84bb53
JB
156detect_coding_XXX (struct coding_system *coding,
157 struct coding_detection_info *detect_info)
4ed46869 158{
f1d34bca
MB
159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 161 int multibytep = coding->src_multibyte;
ff0dacd7 162 int consumed_chars = 0;
df7492f9
KH
163 int found = 0;
164 ...;
165
166 while (1)
167 {
ad1746f5 168 /* Get one byte from the source. If the source is exhausted, jump
df7492f9
KH
169 to no_more_source:. */
170 ONE_MORE_BYTE (c);
ff0dacd7
KH
171
172 if (! __C_conforms_to_XXX___ (c))
173 break;
174 if (! __C_strongly_suggests_XXX__ (c))
175 found = CATEGORY_MASK_XXX;
df7492f9 176 }
ff0dacd7
KH
177 /* The byte sequence is invalid for XXX. */
178 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 179 return 0;
ff0dacd7 180
df7492f9 181 no_more_source:
ad1746f5 182 /* The source exhausted successfully. */
ff0dacd7 183 detect_info->found |= found;
df7492f9 184 return 1;
4ed46869
KH
185}
186#endif
187
188/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
189
df7492f9
KH
190 These functions decode a byte sequence specified as a source by
191 CODING. The resulting multibyte text goes to a place pointed to by
192 CODING->charbuf, the length of which should not exceed
193 CODING->charbuf_size;
d46c5b12 194
df7492f9
KH
195 These functions set the information of original and decoded texts in
196 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
197 They also set CODING->result to one of CODING_RESULT_XXX indicating
198 how the decoding is finished.
d46c5b12 199
df7492f9 200 Below is the template of these functions. */
d46c5b12 201
4ed46869 202#if 0
b73bfc1c 203static void
cf84bb53 204decode_coding_XXXX (struct coding_system *coding)
4ed46869 205{
f1d34bca
MB
206 const unsigned char *src = coding->source + coding->consumed;
207 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
208 /* SRC_BASE remembers the start position in source in each loop.
209 The loop will be exited when there's not enough source code, or
210 when there's no room in CHARBUF for a decoded character. */
f1d34bca 211 const unsigned char *src_base;
df7492f9 212 /* A buffer to produce decoded characters. */
69a80ea3
KH
213 int *charbuf = coding->charbuf + coding->charbuf_used;
214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
215 int multibytep = coding->src_multibyte;
216
217 while (1)
218 {
219 src_base = src;
220 if (charbuf < charbuf_end)
221 /* No more room to produce a decoded character. */
222 break;
223 ONE_MORE_BYTE (c);
224 /* Decode it. */
225 }
226
227 no_more_source:
228 if (src_base < src_end
229 && coding->mode & CODING_MODE_LAST_BLOCK)
230 /* If the source ends by partial bytes to construct a character,
231 treat them as eight-bit raw data. */
232 while (src_base < src_end && charbuf < charbuf_end)
233 *charbuf++ = *src_base++;
234 /* Remember how many bytes and characters we consumed. If the
235 source is multibyte, the bytes and chars are not identical. */
236 coding->consumed = coding->consumed_char = src_base - coding->source;
237 /* Remember how many characters we produced. */
238 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
239}
240#endif
241
242/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
243
df7492f9
KH
244 These functions encode SRC_BYTES length text at SOURCE of Emacs'
245 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
246 goes to a place pointed to by DESTINATION, the length of which
247 should not exceed DST_BYTES.
d46c5b12 248
df7492f9
KH
249 These functions set the information of original and encoded texts in
250 the members produced, produced_char, consumed, and consumed_char of
251 the structure *CODING. They also set the member result to one of
252 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 253
df7492f9
KH
254 DST_BYTES zero means that source area and destination area are
255 overlapped, which means that we can produce a encoded text until it
256 reaches at the head of not-yet-encoded source text.
d46c5b12 257
df7492f9 258 Below is a template of these functions. */
4ed46869 259#if 0
b73bfc1c 260static void
cf84bb53 261encode_coding_XXX (struct coding_system *coding)
4ed46869 262{
df7492f9
KH
263 int multibytep = coding->dst_multibyte;
264 int *charbuf = coding->charbuf;
265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
266 unsigned char *dst = coding->destination + coding->produced;
267 unsigned char *dst_end = coding->destination + coding->dst_bytes;
268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
269 int produced_chars = 0;
270
271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
272 {
273 int c = *charbuf;
274 /* Encode C into DST, and increment DST. */
275 }
276 label_no_more_destination:
277 /* How many chars and bytes we produced. */
278 coding->produced_char += produced_chars;
279 coding->produced = dst - coding->destination;
4ed46869
KH
280}
281#endif
282
4ed46869
KH
283\f
284/*** 1. Preamble ***/
285
68c45bf0 286#include <config.h>
4ed46869 287#include <stdio.h>
d7306fe6 288#include <setjmp.h>
4ed46869 289
4ed46869
KH
290#include "lisp.h"
291#include "buffer.h"
df7492f9 292#include "character.h"
4ed46869
KH
293#include "charset.h"
294#include "ccl.h"
df7492f9 295#include "composite.h"
4ed46869
KH
296#include "coding.h"
297#include "window.h"
b8299c66
KL
298#include "frame.h"
299#include "termhooks.h"
4ed46869 300
df7492f9 301Lisp_Object Vcoding_system_hash_table;
4ed46869 302
df7492f9 303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73 304Lisp_Object Qunix, Qdos;
4ed46869
KH
305Lisp_Object Qbuffer_file_coding_system;
306Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 307Lisp_Object Qdefault_char;
27901516 308Lisp_Object Qno_conversion, Qundecided;
df7492f9 309Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 310Lisp_Object Qbig, Qlittle;
bb0115a2 311Lisp_Object Qcoding_system_history;
1397dc18 312Lisp_Object Qvalid_codes;
2133e2d1 313Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
a6f87d34
KH
314Lisp_Object QCdecode_translation_table, QCencode_translation_table;
315Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 316Lisp_Object QCascii_compatible_p;
4ed46869 317
387f6ba5 318Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
319Lisp_Object Qstart_process, Qopen_network_stream;
320Lisp_Object Qtarget_idx;
321
065e3595
KH
322Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
323Lisp_Object Qinterrupted, Qinsufficient_memory;
324
44e8490d
KH
325/* If a symbol has this property, evaluate the value to define the
326 symbol as a coding system. */
327static Lisp_Object Qcoding_system_define_form;
328
fcbcfb64
KH
329/* Format of end-of-line decided by system. This is Qunix on
330 Unix and Mac, Qdos on DOS/Windows.
331 This has an effect only for external encoding (i.e. for output to
332 file and process), not for in-buffer or Lisp string encoding. */
333static Lisp_Object system_eol_type;
334
4ed46869
KH
335#ifdef emacs
336
4608c386 337Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 338
d46c5b12
KH
339/* Coding system emacs-mule and raw-text are for converting only
340 end-of-line format. */
341Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 342Lisp_Object Qutf_8_emacs;
ecf488bc 343
4ed46869
KH
344/* Coding-systems are handed between Emacs Lisp programs and C internal
345 routines by the following three variables. */
c4825358
KH
346/* Coding system to be used to encode text for terminal display when
347 terminal coding system is nil. */
348struct coding_system safe_terminal_coding;
349
4ed46869
KH
350#endif /* emacs */
351
f967223b
KH
352Lisp_Object Qtranslation_table;
353Lisp_Object Qtranslation_table_id;
354Lisp_Object Qtranslation_table_for_decode;
355Lisp_Object Qtranslation_table_for_encode;
4ed46869 356
df7492f9
KH
357/* Two special coding systems. */
358Lisp_Object Vsjis_coding_system;
359Lisp_Object Vbig5_coding_system;
360
df7492f9
KH
361/* ISO2022 section */
362
363#define CODING_ISO_INITIAL(coding, reg) \
364 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
365 coding_attr_iso_initial), \
366 reg)))
367
368
1b3b981b
AS
369#define CODING_ISO_REQUEST(coding, charset_id) \
370 (((charset_id) <= (coding)->max_charset_id \
371 ? ((coding)->safe_charsets[charset_id] != 255 \
372 ? (coding)->safe_charsets[charset_id] \
373 : -1) \
df7492f9
KH
374 : -1))
375
376
377#define CODING_ISO_FLAGS(coding) \
378 ((coding)->spec.iso_2022.flags)
379#define CODING_ISO_DESIGNATION(coding, reg) \
380 ((coding)->spec.iso_2022.current_designation[reg])
381#define CODING_ISO_INVOCATION(coding, plane) \
382 ((coding)->spec.iso_2022.current_invocation[plane])
383#define CODING_ISO_SINGLE_SHIFTING(coding) \
384 ((coding)->spec.iso_2022.single_shifting)
385#define CODING_ISO_BOL(coding) \
386 ((coding)->spec.iso_2022.bol)
387#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
388 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
e951386e
KH
389#define CODING_ISO_CMP_STATUS(coding) \
390 (&(coding)->spec.iso_2022.cmp_status)
391#define CODING_ISO_EXTSEGMENT_LEN(coding) \
392 ((coding)->spec.iso_2022.ctext_extended_segment_len)
393#define CODING_ISO_EMBEDDED_UTF_8(coding) \
394 ((coding)->spec.iso_2022.embedded_utf_8)
df7492f9
KH
395
396/* Control characters of ISO2022. */
397 /* code */ /* function */
398#define ISO_CODE_LF 0x0A /* line-feed */
399#define ISO_CODE_CR 0x0D /* carriage-return */
400#define ISO_CODE_SO 0x0E /* shift-out */
401#define ISO_CODE_SI 0x0F /* shift-in */
402#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
403#define ISO_CODE_ESC 0x1B /* escape */
404#define ISO_CODE_SS2 0x8E /* single-shift-2 */
405#define ISO_CODE_SS3 0x8F /* single-shift-3 */
406#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
407
408/* All code (1-byte) of ISO2022 is classified into one of the
409 followings. */
410enum iso_code_class_type
411 {
412 ISO_control_0, /* Control codes in the range
413 0x00..0x1F and 0x7F, except for the
414 following 5 codes. */
df7492f9
KH
415 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
416 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
417 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
418 ISO_escape, /* ISO_CODE_SO (0x1B) */
419 ISO_control_1, /* Control codes in the range
420 0x80..0x9F, except for the
421 following 3 codes. */
422 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
423 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
424 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
425 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
426 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
427 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
428 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
429 };
05e6f5dc 430
df7492f9
KH
431/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
432 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 433
df7492f9
KH
434/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
435 instead of the correct short-form sequence (e.g. ESC $ A). */
436#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 437
df7492f9
KH
438/* If set, reset graphic planes and registers at end-of-line to the
439 initial state. */
440#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 441
df7492f9
KH
442/* If set, reset graphic planes and registers before any control
443 characters to the initial state. */
444#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 445
df7492f9
KH
446/* If set, encode by 7-bit environment. */
447#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 448
df7492f9
KH
449/* If set, use locking-shift function. */
450#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 451
df7492f9
KH
452/* If set, use single-shift function. Overwrite
453 CODING_ISO_FLAG_LOCKING_SHIFT. */
454#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 455
df7492f9
KH
456/* If set, use designation escape sequence. */
457#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 458
df7492f9
KH
459/* If set, produce revision number sequence. */
460#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 461
df7492f9
KH
462/* If set, produce ISO6429's direction specifying sequence. */
463#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 464
df7492f9
KH
465/* If set, assume designation states are reset at beginning of line on
466 output. */
467#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 468
df7492f9
KH
469/* If set, designation sequence should be placed at beginning of line
470 on output. */
471#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 472
ad1746f5 473/* If set, do not encode unsafe characters on output. */
df7492f9 474#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 475
df7492f9
KH
476/* If set, extra latin codes (128..159) are accepted as a valid code
477 on input. */
478#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 479
df7492f9 480#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 481
df7492f9 482#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 483
bf16eb23 484#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 485
bf16eb23 486#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 487
bf16eb23 488#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 489
df7492f9
KH
490/* A character to be produced on output if encoding of the original
491 character is prohibited by CODING_ISO_FLAG_SAFE. */
492#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 493
a470d443
KH
494/* UTF-8 section */
495#define CODING_UTF_8_BOM(coding) \
496 ((coding)->spec.utf_8_bom)
4ed46869 497
df7492f9
KH
498/* UTF-16 section */
499#define CODING_UTF_16_BOM(coding) \
500 ((coding)->spec.utf_16.bom)
4ed46869 501
df7492f9
KH
502#define CODING_UTF_16_ENDIAN(coding) \
503 ((coding)->spec.utf_16.endian)
4ed46869 504
df7492f9
KH
505#define CODING_UTF_16_SURROGATE(coding) \
506 ((coding)->spec.utf_16.surrogate)
4ed46869 507
4ed46869 508
df7492f9
KH
509/* CCL section */
510#define CODING_CCL_DECODER(coding) \
511 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
512#define CODING_CCL_ENCODER(coding) \
513 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
514#define CODING_CCL_VALIDS(coding) \
8f924df7 515 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 516
5a936b46 517/* Index for each coding category in `coding_categories' */
4ed46869 518
df7492f9
KH
519enum coding_category
520 {
521 coding_category_iso_7,
522 coding_category_iso_7_tight,
523 coding_category_iso_8_1,
524 coding_category_iso_8_2,
525 coding_category_iso_7_else,
526 coding_category_iso_8_else,
a470d443
KH
527 coding_category_utf_8_auto,
528 coding_category_utf_8_nosig,
529 coding_category_utf_8_sig,
df7492f9
KH
530 coding_category_utf_16_auto,
531 coding_category_utf_16_be,
532 coding_category_utf_16_le,
533 coding_category_utf_16_be_nosig,
534 coding_category_utf_16_le_nosig,
535 coding_category_charset,
536 coding_category_sjis,
537 coding_category_big5,
538 coding_category_ccl,
539 coding_category_emacs_mule,
540 /* All above are targets of code detection. */
541 coding_category_raw_text,
542 coding_category_undecided,
543 coding_category_max
544 };
545
546/* Definitions of flag bits used in detect_coding_XXXX. */
547#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
548#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
549#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
550#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
551#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
552#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
553#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
554#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
555#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 556#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
557#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
558#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
559#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
560#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
561#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
562#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
563#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
564#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
565#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 566#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
567
568/* This value is returned if detect_coding_mask () find nothing other
569 than ASCII characters. */
570#define CATEGORY_MASK_ANY \
571 (CATEGORY_MASK_ISO_7 \
572 | CATEGORY_MASK_ISO_7_TIGHT \
573 | CATEGORY_MASK_ISO_8_1 \
574 | CATEGORY_MASK_ISO_8_2 \
575 | CATEGORY_MASK_ISO_7_ELSE \
576 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
577 | CATEGORY_MASK_UTF_8_AUTO \
578 | CATEGORY_MASK_UTF_8_NOSIG \
579 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 580 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
581 | CATEGORY_MASK_UTF_16_BE \
582 | CATEGORY_MASK_UTF_16_LE \
583 | CATEGORY_MASK_UTF_16_BE_NOSIG \
584 | CATEGORY_MASK_UTF_16_LE_NOSIG \
585 | CATEGORY_MASK_CHARSET \
586 | CATEGORY_MASK_SJIS \
587 | CATEGORY_MASK_BIG5 \
588 | CATEGORY_MASK_CCL \
589 | CATEGORY_MASK_EMACS_MULE)
590
591
592#define CATEGORY_MASK_ISO_7BIT \
593 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
594
595#define CATEGORY_MASK_ISO_8BIT \
596 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
597
598#define CATEGORY_MASK_ISO_ELSE \
599 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
600
601#define CATEGORY_MASK_ISO_ESCAPE \
602 (CATEGORY_MASK_ISO_7 \
603 | CATEGORY_MASK_ISO_7_TIGHT \
604 | CATEGORY_MASK_ISO_7_ELSE \
605 | CATEGORY_MASK_ISO_8_ELSE)
606
607#define CATEGORY_MASK_ISO \
608 ( CATEGORY_MASK_ISO_7BIT \
609 | CATEGORY_MASK_ISO_8BIT \
610 | CATEGORY_MASK_ISO_ELSE)
611
612#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
613 (CATEGORY_MASK_UTF_16_AUTO \
614 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
615 | CATEGORY_MASK_UTF_16_LE \
616 | CATEGORY_MASK_UTF_16_BE_NOSIG \
617 | CATEGORY_MASK_UTF_16_LE_NOSIG)
618
a470d443
KH
619#define CATEGORY_MASK_UTF_8 \
620 (CATEGORY_MASK_UTF_8_AUTO \
621 | CATEGORY_MASK_UTF_8_NOSIG \
622 | CATEGORY_MASK_UTF_8_SIG)
df7492f9 623
df7492f9 624/* Table of coding categories (Lisp symbols). This variable is for
ad1746f5 625 internal use only. */
df7492f9
KH
626static Lisp_Object Vcoding_category_table;
627
628/* Table of coding-categories ordered by priority. */
629static enum coding_category coding_priorities[coding_category_max];
630
631/* Nth element is a coding context for the coding system bound to the
632 Nth coding category. */
633static struct coding_system coding_categories[coding_category_max];
634
df7492f9
KH
635/*** Commonly used macros and functions ***/
636
637#ifndef min
638#define min(a, b) ((a) < (b) ? (a) : (b))
639#endif
640#ifndef max
641#define max(a, b) ((a) > (b) ? (a) : (b))
642#endif
4ed46869 643
24a73b0a
KH
644#define CODING_GET_INFO(coding, attrs, charset_list) \
645 do { \
646 (attrs) = CODING_ID_ATTRS ((coding)->id); \
647 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 648 } while (0)
4ed46869 649
4ed46869 650
df7492f9
KH
651/* Safely get one byte from the source text pointed by SRC which ends
652 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
653 in the source, it jumps to `no_more_source'. If multibytep is
654 nonzero, and a multibyte character is found at SRC, set C to the
655 negative value of the character code. The caller should declare
656 and set these variables appropriately in advance:
657 src, src_end, multibytep */
aa72b389 658
065e3595
KH
659#define ONE_MORE_BYTE(c) \
660 do { \
661 if (src == src_end) \
662 { \
663 if (src_base < src) \
664 record_conversion_result \
665 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
666 goto no_more_source; \
667 } \
668 c = *src++; \
669 if (multibytep && (c & 0x80)) \
670 { \
671 if ((c & 0xFE) == 0xC0) \
672 c = ((c & 1) << 6) | *src++; \
673 else \
674 { \
35befdaa
KH
675 src--; \
676 c = - string_char (src, &src, NULL); \
065e3595
KH
677 record_conversion_result \
678 (coding, CODING_RESULT_INVALID_SRC); \
679 } \
680 } \
681 consumed_chars++; \
aa72b389
KH
682 } while (0)
683
f56a4450 684/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
685 at SRC_END, and set C1 and C2 to those bytes while skipping the
686 heading multibyte characters. If there are not enough bytes in the
687 source, it jumps to `no_more_source'. If multibytep is nonzero and
688 a multibyte character is found for C2, set C2 to the negative value
689 of the character code. The caller should declare and set these
690 variables appropriately in advance:
f56a4450
KH
691 src, src_end, multibytep
692 It is intended that this macro is used in detect_coding_utf_16. */
693
220eeac9
KH
694#define TWO_MORE_BYTES(c1, c2) \
695 do { \
696 do { \
697 if (src == src_end) \
698 goto no_more_source; \
699 c1 = *src++; \
700 if (multibytep && (c1 & 0x80)) \
701 { \
702 if ((c1 & 0xFE) == 0xC0) \
703 c1 = ((c1 & 1) << 6) | *src++; \
704 else \
705 { \
706 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
707 c1 = -1; \
708 } \
709 } \
710 } while (c1 < 0); \
711 if (src == src_end) \
712 goto no_more_source; \
713 c2 = *src++; \
714 if (multibytep && (c2 & 0x80)) \
715 { \
716 if ((c2 & 0xFE) == 0xC0) \
717 c2 = ((c2 & 1) << 6) | *src++; \
718 else \
719 c2 = -1; \
720 } \
f56a4450
KH
721 } while (0)
722
aa72b389 723
065e3595
KH
724#define ONE_MORE_BYTE_NO_CHECK(c) \
725 do { \
726 c = *src++; \
727 if (multibytep && (c & 0x80)) \
728 { \
729 if ((c & 0xFE) == 0xC0) \
730 c = ((c & 1) << 6) | *src++; \
731 else \
732 { \
35befdaa
KH
733 src--; \
734 c = - string_char (src, &src, NULL); \
065e3595
KH
735 record_conversion_result \
736 (coding, CODING_RESULT_INVALID_SRC); \
737 } \
738 } \
739 consumed_chars++; \
aa72b389
KH
740 } while (0)
741
aa72b389 742
df7492f9
KH
743/* Store a byte C in the place pointed by DST and increment DST to the
744 next free point, and increment PRODUCED_CHARS. The caller should
745 assure that C is 0..127, and declare and set the variable `dst'
746 appropriately in advance.
747*/
aa72b389
KH
748
749
df7492f9
KH
750#define EMIT_ONE_ASCII_BYTE(c) \
751 do { \
752 produced_chars++; \
753 *dst++ = (c); \
b6871cc7 754 } while (0)
aa72b389
KH
755
756
ad1746f5 757/* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
aa72b389 758
df7492f9
KH
759#define EMIT_TWO_ASCII_BYTES(c1, c2) \
760 do { \
761 produced_chars += 2; \
762 *dst++ = (c1), *dst++ = (c2); \
763 } while (0)
aa72b389
KH
764
765
df7492f9
KH
766/* Store a byte C in the place pointed by DST and increment DST to the
767 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
768 nonzero, store in an appropriate multibyte from. The caller should
769 declare and set the variables `dst' and `multibytep' appropriately
770 in advance. */
771
772#define EMIT_ONE_BYTE(c) \
773 do { \
774 produced_chars++; \
775 if (multibytep) \
776 { \
777 int ch = (c); \
778 if (ch >= 0x80) \
779 ch = BYTE8_TO_CHAR (ch); \
780 CHAR_STRING_ADVANCE (ch, dst); \
781 } \
782 else \
783 *dst++ = (c); \
aa72b389 784 } while (0)
aa72b389 785
aa72b389 786
df7492f9 787/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 788
e19c3639
KH
789#define EMIT_TWO_BYTES(c1, c2) \
790 do { \
791 produced_chars += 2; \
792 if (multibytep) \
793 { \
794 int ch; \
795 \
796 ch = (c1); \
797 if (ch >= 0x80) \
798 ch = BYTE8_TO_CHAR (ch); \
799 CHAR_STRING_ADVANCE (ch, dst); \
800 ch = (c2); \
801 if (ch >= 0x80) \
802 ch = BYTE8_TO_CHAR (ch); \
803 CHAR_STRING_ADVANCE (ch, dst); \
804 } \
805 else \
806 { \
807 *dst++ = (c1); \
808 *dst++ = (c2); \
809 } \
aa72b389
KH
810 } while (0)
811
812
df7492f9
KH
813#define EMIT_THREE_BYTES(c1, c2, c3) \
814 do { \
815 EMIT_ONE_BYTE (c1); \
816 EMIT_TWO_BYTES (c2, c3); \
817 } while (0)
aa72b389 818
aa72b389 819
df7492f9
KH
820#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
821 do { \
822 EMIT_TWO_BYTES (c1, c2); \
823 EMIT_TWO_BYTES (c3, c4); \
824 } while (0)
aa72b389 825
aa72b389 826
f6cbaf43 827/* Prototypes for static functions. */
f57e2426
J
828static void record_conversion_result (struct coding_system *coding,
829 enum coding_result_code result);
830static int detect_coding_utf_8 (struct coding_system *,
831 struct coding_detection_info *info);
832static void decode_coding_utf_8 (struct coding_system *);
833static int encode_coding_utf_8 (struct coding_system *);
834
835static int detect_coding_utf_16 (struct coding_system *,
836 struct coding_detection_info *info);
837static void decode_coding_utf_16 (struct coding_system *);
838static int encode_coding_utf_16 (struct coding_system *);
839
840static int detect_coding_iso_2022 (struct coding_system *,
841 struct coding_detection_info *info);
842static void decode_coding_iso_2022 (struct coding_system *);
843static int encode_coding_iso_2022 (struct coding_system *);
844
845static int detect_coding_emacs_mule (struct coding_system *,
846 struct coding_detection_info *info);
847static void decode_coding_emacs_mule (struct coding_system *);
848static int encode_coding_emacs_mule (struct coding_system *);
849
850static int detect_coding_sjis (struct coding_system *,
851 struct coding_detection_info *info);
852static void decode_coding_sjis (struct coding_system *);
853static int encode_coding_sjis (struct coding_system *);
854
855static int detect_coding_big5 (struct coding_system *,
856 struct coding_detection_info *info);
857static void decode_coding_big5 (struct coding_system *);
858static int encode_coding_big5 (struct coding_system *);
859
860static int detect_coding_ccl (struct coding_system *,
861 struct coding_detection_info *info);
862static void decode_coding_ccl (struct coding_system *);
863static int encode_coding_ccl (struct coding_system *);
864
865static void decode_coding_raw_text (struct coding_system *);
866static int encode_coding_raw_text (struct coding_system *);
867
868static void coding_set_source (struct coding_system *);
869static void coding_set_destination (struct coding_system *);
870static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
871static void coding_alloc_by_making_gap (struct coding_system *,
872 EMACS_INT, EMACS_INT);
873static unsigned char *alloc_destination (struct coding_system *,
874 EMACS_INT, unsigned char *);
875static void setup_iso_safe_charsets (Lisp_Object);
876static unsigned char *encode_designation_at_bol (struct coding_system *,
877 int *, int *,
878 unsigned char *);
879static int detect_eol (const unsigned char *,
880 EMACS_INT, enum coding_category);
881static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
882static void decode_eol (struct coding_system *);
883static Lisp_Object get_translation_table (Lisp_Object, int, int *);
884static Lisp_Object get_translation (Lisp_Object, int *, int *);
885static int produce_chars (struct coding_system *, Lisp_Object, int);
886static INLINE void produce_charset (struct coding_system *, int *,
887 EMACS_INT);
888static void produce_annotation (struct coding_system *, EMACS_INT);
889static int decode_coding (struct coding_system *);
890static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
891 struct coding_system *,
892 int *, EMACS_INT *);
893static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
894 struct coding_system *,
895 int *, EMACS_INT *);
896static void consume_chars (struct coding_system *, Lisp_Object, int);
897static int encode_coding (struct coding_system *);
898static Lisp_Object make_conversion_work_buffer (int);
899static Lisp_Object code_conversion_restore (Lisp_Object);
900static INLINE int char_encodable_p (int, Lisp_Object);
901static Lisp_Object make_subsidiaries (Lisp_Object);
f6cbaf43 902
065e3595
KH
903static void
904record_conversion_result (struct coding_system *coding,
905 enum coding_result_code result)
906{
907 coding->result = result;
908 switch (result)
909 {
910 case CODING_RESULT_INSUFFICIENT_SRC:
911 Vlast_code_conversion_error = Qinsufficient_source;
912 break;
913 case CODING_RESULT_INCONSISTENT_EOL:
914 Vlast_code_conversion_error = Qinconsistent_eol;
915 break;
916 case CODING_RESULT_INVALID_SRC:
917 Vlast_code_conversion_error = Qinvalid_source;
918 break;
919 case CODING_RESULT_INTERRUPT:
920 Vlast_code_conversion_error = Qinterrupted;
921 break;
922 case CODING_RESULT_INSUFFICIENT_MEM:
923 Vlast_code_conversion_error = Qinsufficient_memory;
924 break;
ebaf11b6
KH
925 case CODING_RESULT_INSUFFICIENT_DST:
926 /* Don't record this error in Vlast_code_conversion_error
927 because it happens just temporarily and is resolved when the
928 whole conversion is finished. */
929 break;
409ea3a1
AS
930 case CODING_RESULT_SUCCESS:
931 break;
35befdaa
KH
932 default:
933 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
934 }
935}
936
75f80e63
EZ
937/* This wrapper macro is used to preserve validity of pointers into
938 buffer text across calls to decode_char, which could cause
939 relocation of buffers if it loads a charset map, because loading a
940 charset map allocates large structures. */
df7492f9
KH
941#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
942 do { \
943 charset_map_loaded = 0; \
944 c = DECODE_CHAR (charset, code); \
945 if (charset_map_loaded) \
946 { \
8f924df7 947 const unsigned char *orig = coding->source; \
df7492f9
KH
948 EMACS_INT offset; \
949 \
950 coding_set_source (coding); \
951 offset = coding->source - orig; \
952 src += offset; \
953 src_base += offset; \
954 src_end += offset; \
955 } \
aa72b389
KH
956 } while (0)
957
958
119852e7
KH
959/* If there are at least BYTES length of room at dst, allocate memory
960 for coding->destination and update dst and dst_end. We don't have
961 to take care of coding->source which will be relocated. It is
962 handled by calling coding_set_source in encode_coding. */
963
df7492f9
KH
964#define ASSURE_DESTINATION(bytes) \
965 do { \
966 if (dst + (bytes) >= dst_end) \
967 { \
968 int more_bytes = charbuf_end - charbuf + (bytes); \
969 \
970 dst = alloc_destination (coding, more_bytes, dst); \
971 dst_end = coding->destination + coding->dst_bytes; \
972 } \
973 } while (0)
aa72b389 974
aa72b389 975
db274c7a
KH
976/* Store multibyte form of the character C in P, and advance P to the
977 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
978 never calls MAYBE_UNIFY_CHAR. */
979
980#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
981 do { \
982 if ((c) <= MAX_1_BYTE_CHAR) \
983 *(p)++ = (c); \
984 else if ((c) <= MAX_2_BYTE_CHAR) \
985 *(p)++ = (0xC0 | ((c) >> 6)), \
986 *(p)++ = (0x80 | ((c) & 0x3F)); \
987 else if ((c) <= MAX_3_BYTE_CHAR) \
988 *(p)++ = (0xE0 | ((c) >> 12)), \
989 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
990 *(p)++ = (0x80 | ((c) & 0x3F)); \
991 else if ((c) <= MAX_4_BYTE_CHAR) \
992 *(p)++ = (0xF0 | (c >> 18)), \
993 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
994 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
995 *(p)++ = (0x80 | (c & 0x3F)); \
996 else if ((c) <= MAX_5_BYTE_CHAR) \
997 *(p)++ = 0xF8, \
998 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
999 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1000 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1001 *(p)++ = (0x80 | (c & 0x3F)); \
1002 else \
1003 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1004 } while (0)
1005
1006
1007/* Return the character code of character whose multibyte form is at
1008 P, and advance P to the end of the multibyte form. This is like
1009 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1010
1011#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1012 (!((p)[0] & 0x80) \
1013 ? *(p)++ \
1014 : ! ((p)[0] & 0x20) \
1015 ? ((p) += 2, \
1016 ((((p)[-2] & 0x1F) << 6) \
1017 | ((p)[-1] & 0x3F) \
1018 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1019 : ! ((p)[0] & 0x10) \
1020 ? ((p) += 3, \
1021 ((((p)[-3] & 0x0F) << 12) \
1022 | (((p)[-2] & 0x3F) << 6) \
1023 | ((p)[-1] & 0x3F))) \
1024 : ! ((p)[0] & 0x08) \
1025 ? ((p) += 4, \
1026 ((((p)[-4] & 0xF) << 18) \
1027 | (((p)[-3] & 0x3F) << 12) \
1028 | (((p)[-2] & 0x3F) << 6) \
1029 | ((p)[-1] & 0x3F))) \
1030 : ((p) += 5, \
1031 ((((p)[-4] & 0x3F) << 18) \
1032 | (((p)[-3] & 0x3F) << 12) \
1033 | (((p)[-2] & 0x3F) << 6) \
1034 | ((p)[-1] & 0x3F))))
1035
aa72b389 1036
df7492f9 1037static void
971de7fb 1038coding_set_source (struct coding_system *coding)
aa72b389 1039{
df7492f9
KH
1040 if (BUFFERP (coding->src_object))
1041 {
2cb26057 1042 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1043
df7492f9 1044 if (coding->src_pos < 0)
2cb26057 1045 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1046 else
2cb26057 1047 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1048 }
df7492f9 1049 else if (STRINGP (coding->src_object))
aa72b389 1050 {
8f924df7 1051 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1052 }
df7492f9
KH
1053 else
1054 /* Otherwise, the source is C string and is never relocated
1055 automatically. Thus we don't have to update anything. */
1056 ;
1057}
aa72b389 1058
df7492f9 1059static void
971de7fb 1060coding_set_destination (struct coding_system *coding)
df7492f9
KH
1061{
1062 if (BUFFERP (coding->dst_object))
aa72b389 1063 {
df7492f9 1064 if (coding->src_pos < 0)
aa72b389 1065 {
13818c30 1066 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1067 coding->dst_bytes = (GAP_END_ADDR
1068 - (coding->src_bytes - coding->consumed)
1069 - coding->destination);
aa72b389 1070 }
df7492f9 1071 else
28f67a95
KH
1072 {
1073 /* We are sure that coding->dst_pos_byte is before the gap
1074 of the buffer. */
1075 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1076 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1077 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1078 - coding->destination);
1079 }
df7492f9
KH
1080 }
1081 else
1082 /* Otherwise, the destination is C string and is never relocated
1083 automatically. Thus we don't have to update anything. */
1084 ;
1085}
1086
1087
1088static void
971de7fb 1089coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
df7492f9
KH
1090{
1091 coding->destination = (unsigned char *) xrealloc (coding->destination,
1092 coding->dst_bytes + bytes);
1093 coding->dst_bytes += bytes;
1094}
1095
1096static void
cf84bb53
JB
1097coding_alloc_by_making_gap (struct coding_system *coding,
1098 EMACS_INT gap_head_used, EMACS_INT bytes)
df7492f9 1099{
db274c7a 1100 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1101 {
db274c7a
KH
1102 /* The gap may contain the produced data at the head and not-yet
1103 consumed data at the tail. To preserve those data, we at
1104 first make the gap size to zero, then increase the gap
1105 size. */
1106 EMACS_INT add = GAP_SIZE;
1107
1108 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1109 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1110 make_gap (bytes);
1111 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1112 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1113 }
730fff51 1114 else
df7492f9 1115 {
2c78b7e1
KH
1116 Lisp_Object this_buffer;
1117
1118 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1119 set_buffer_internal (XBUFFER (coding->dst_object));
1120 make_gap (bytes);
1121 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1122 }
df7492f9 1123}
8f924df7 1124
df7492f9
KH
1125
1126static unsigned char *
cf84bb53
JB
1127alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1128 unsigned char *dst)
df7492f9
KH
1129{
1130 EMACS_INT offset = dst - coding->destination;
1131
1132 if (BUFFERP (coding->dst_object))
db274c7a
KH
1133 {
1134 struct buffer *buf = XBUFFER (coding->dst_object);
1135
1136 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1137 }
aa72b389 1138 else
df7492f9 1139 coding_alloc_by_realloc (coding, nbytes);
df7492f9
KH
1140 coding_set_destination (coding);
1141 dst = coding->destination + offset;
1142 return dst;
1143}
aa72b389 1144
ff0dacd7
KH
1145/** Macros for annotations. */
1146
ff0dacd7
KH
1147/* An annotation data is stored in the array coding->charbuf in this
1148 format:
69a80ea3 1149 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1150 LENGTH is the number of elements in the annotation.
1151 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1152 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1153
1154 The format of the following elements depend on ANNOTATION_MASK.
1155
1156 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1157 follows:
e951386e
KH
1158 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1159
1160 NBYTES is the number of bytes specified in the header part of
1161 old-style emacs-mule encoding, or 0 for the other kind of
1162 composition.
1163
ff0dacd7 1164 METHOD is one of enum composition_method.
e951386e 1165
ad1746f5 1166 Optional COMPOSITION-COMPONENTS are characters and composition
ff0dacd7
KH
1167 rules.
1168
1169 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
e951386e
KH
1170 follows.
1171
1172 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1173 recover from an invalid annotation, and should be skipped by
1174 produce_annotation. */
1175
1176/* Maximum length of the header of annotation data. */
1177#define MAX_ANNOTATION_LENGTH 5
ff0dacd7 1178
69a80ea3 1179#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1180 do { \
1181 *(buf)++ = -(len); \
1182 *(buf)++ = (mask); \
69a80ea3 1183 *(buf)++ = (nchars); \
ff0dacd7
KH
1184 coding->annotated = 1; \
1185 } while (0);
1186
e951386e 1187#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
69a80ea3 1188 do { \
e951386e
KH
1189 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1190 *buf++ = nbytes; \
69a80ea3 1191 *buf++ = method; \
ff0dacd7
KH
1192 } while (0)
1193
1194
69a80ea3
KH
1195#define ADD_CHARSET_DATA(buf, nchars, id) \
1196 do { \
1197 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1198 *buf++ = id; \
ff0dacd7
KH
1199 } while (0)
1200
df7492f9
KH
1201\f
1202/*** 2. Emacs' internal format (emacs-utf-8) ***/
1203
1204
1205
1206\f
1207/*** 3. UTF-8 ***/
1208
1209/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1210 Check if a text is encoded in UTF-8. If it is, return 1, else
1211 return 0. */
df7492f9
KH
1212
1213#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1214#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1215#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1216#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1217#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1218#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1219
a470d443
KH
1220#define UTF_BOM 0xFEFF
1221#define UTF_8_BOM_1 0xEF
1222#define UTF_8_BOM_2 0xBB
1223#define UTF_8_BOM_3 0xBF
1224
df7492f9 1225static int
cf84bb53
JB
1226detect_coding_utf_8 (struct coding_system *coding,
1227 struct coding_detection_info *detect_info)
df7492f9 1228{
065e3595 1229 const unsigned char *src = coding->source, *src_base;
8f924df7 1230 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1231 int multibytep = coding->src_multibyte;
1232 int consumed_chars = 0;
a470d443 1233 int bom_found = 0;
df7492f9
KH
1234 int found = 0;
1235
ff0dacd7 1236 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1237 /* A coding system of this category is always ASCII compatible. */
1238 src += coding->head_ascii;
1239
1240 while (1)
aa72b389 1241 {
df7492f9 1242 int c, c1, c2, c3, c4;
aa72b389 1243
065e3595 1244 src_base = src;
df7492f9 1245 ONE_MORE_BYTE (c);
065e3595 1246 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1247 continue;
1248 ONE_MORE_BYTE (c1);
065e3595 1249 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1250 break;
1251 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1252 {
a470d443 1253 found = 1;
df7492f9 1254 continue;
aa72b389 1255 }
df7492f9 1256 ONE_MORE_BYTE (c2);
065e3595 1257 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1258 break;
1259 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1260 {
a470d443
KH
1261 found = 1;
1262 if (src_base == coding->source
1263 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1264 bom_found = 1;
df7492f9 1265 continue;
aa72b389 1266 }
df7492f9 1267 ONE_MORE_BYTE (c3);
065e3595 1268 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1269 break;
1270 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1271 {
a470d443 1272 found = 1;
df7492f9
KH
1273 continue;
1274 }
1275 ONE_MORE_BYTE (c4);
065e3595 1276 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1277 break;
1278 if (UTF_8_5_OCTET_LEADING_P (c))
1279 {
a470d443 1280 found = 1;
df7492f9
KH
1281 continue;
1282 }
1283 break;
aa72b389 1284 }
ff0dacd7 1285 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1286 return 0;
aa72b389 1287
df7492f9 1288 no_more_source:
065e3595 1289 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1290 {
ff0dacd7 1291 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1292 return 0;
aa72b389 1293 }
a470d443
KH
1294 if (bom_found)
1295 {
1296 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1297 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1298 }
1299 else
1300 {
1301 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1302 if (found)
1303 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1304 }
ff0dacd7 1305 return 1;
aa72b389
KH
1306}
1307
4ed46869 1308
b73bfc1c 1309static void
971de7fb 1310decode_coding_utf_8 (struct coding_system *coding)
b73bfc1c 1311{
8f924df7
KH
1312 const unsigned char *src = coding->source + coding->consumed;
1313 const unsigned char *src_end = coding->source + coding->src_bytes;
1314 const unsigned char *src_base;
69a80ea3
KH
1315 int *charbuf = coding->charbuf + coding->charbuf_used;
1316 int *charbuf_end = coding->charbuf + coding->charbuf_size;
453b38f0 1317 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1318 int multibytep = coding->src_multibyte;
a470d443 1319 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
24a73b0a 1320 Lisp_Object attr, charset_list;
0a9564cb
EZ
1321 int eol_crlf =
1322 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1323 int byte_after_cr = -1;
4ed46869 1324
24a73b0a 1325 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1326
a470d443
KH
1327 if (bom != utf_without_bom)
1328 {
1329 int c1, c2, c3;
1330
1331 src_base = src;
1332 ONE_MORE_BYTE (c1);
1333 if (! UTF_8_3_OCTET_LEADING_P (c1))
1334 src = src_base;
1335 else
1336 {
159bd5a2 1337 ONE_MORE_BYTE (c2);
a470d443
KH
1338 if (! UTF_8_EXTRA_OCTET_P (c2))
1339 src = src_base;
1340 else
1341 {
159bd5a2 1342 ONE_MORE_BYTE (c3);
a470d443
KH
1343 if (! UTF_8_EXTRA_OCTET_P (c3))
1344 src = src_base;
1345 else
1346 {
1347 if ((c1 != UTF_8_BOM_1)
1348 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1349 src = src_base;
1350 else
1351 CODING_UTF_8_BOM (coding) = utf_without_bom;
1352 }
1353 }
1354 }
1355 }
1356 CODING_UTF_8_BOM (coding) = utf_without_bom;
1357
df7492f9 1358 while (1)
b73bfc1c 1359 {
df7492f9 1360 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1361
df7492f9
KH
1362 src_base = src;
1363 consumed_chars_base = consumed_chars;
4af310db 1364
df7492f9 1365 if (charbuf >= charbuf_end)
b71f6f73
KH
1366 {
1367 if (byte_after_cr >= 0)
1368 src_base--;
1369 break;
1370 }
df7492f9 1371
119852e7
KH
1372 if (byte_after_cr >= 0)
1373 c1 = byte_after_cr, byte_after_cr = -1;
1374 else
1375 ONE_MORE_BYTE (c1);
065e3595
KH
1376 if (c1 < 0)
1377 {
1378 c = - c1;
1379 }
1a4990fb 1380 else if (UTF_8_1_OCTET_P (c1))
df7492f9 1381 {
119852e7
KH
1382 if (eol_crlf && c1 == '\r')
1383 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1384 c = c1;
4af310db 1385 }
df7492f9 1386 else
4af310db 1387 {
df7492f9 1388 ONE_MORE_BYTE (c2);
065e3595 1389 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1390 goto invalid_code;
1391 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1392 {
b0edb2c5
DL
1393 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1394 /* Reject overlong sequences here and below. Encoders
1395 producing them are incorrect, they can be misleading,
1396 and they mess up read/write invariance. */
1397 if (c < 128)
1398 goto invalid_code;
4af310db 1399 }
df7492f9 1400 else
aa72b389 1401 {
df7492f9 1402 ONE_MORE_BYTE (c3);
065e3595 1403 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1404 goto invalid_code;
1405 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1406 {
1407 c = (((c1 & 0xF) << 12)
1408 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1409 if (c < 0x800
1410 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1411 goto invalid_code;
1412 }
df7492f9
KH
1413 else
1414 {
1415 ONE_MORE_BYTE (c4);
065e3595 1416 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1417 goto invalid_code;
1418 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1419 {
df7492f9
KH
1420 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1421 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1422 if (c < 0x10000)
1423 goto invalid_code;
1424 }
df7492f9
KH
1425 else
1426 {
1427 ONE_MORE_BYTE (c5);
065e3595 1428 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1429 goto invalid_code;
1430 if (UTF_8_5_OCTET_LEADING_P (c1))
1431 {
1432 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1433 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1434 | (c5 & 0x3F));
b0edb2c5 1435 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1436 goto invalid_code;
1437 }
1438 else
1439 goto invalid_code;
1440 }
1441 }
aa72b389 1442 }
b73bfc1c 1443 }
df7492f9
KH
1444
1445 *charbuf++ = c;
1446 continue;
1447
1448 invalid_code:
1449 src = src_base;
1450 consumed_chars = consumed_chars_base;
1451 ONE_MORE_BYTE (c);
1452 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1453 coding->errors++;
aa72b389
KH
1454 }
1455
df7492f9
KH
1456 no_more_source:
1457 coding->consumed_char += consumed_chars_base;
1458 coding->consumed = src_base - coding->source;
1459 coding->charbuf_used = charbuf - coding->charbuf;
1460}
1461
1462
1463static int
971de7fb 1464encode_coding_utf_8 (struct coding_system *coding)
df7492f9
KH
1465{
1466 int multibytep = coding->dst_multibyte;
1467 int *charbuf = coding->charbuf;
1468 int *charbuf_end = charbuf + coding->charbuf_used;
1469 unsigned char *dst = coding->destination + coding->produced;
1470 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1471 int produced_chars = 0;
df7492f9
KH
1472 int c;
1473
a470d443
KH
1474 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1475 {
1476 ASSURE_DESTINATION (3);
1477 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1478 CODING_UTF_8_BOM (coding) = utf_without_bom;
1479 }
1480
df7492f9 1481 if (multibytep)
aa72b389 1482 {
df7492f9
KH
1483 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1484
1485 while (charbuf < charbuf_end)
b73bfc1c 1486 {
df7492f9 1487 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1488
df7492f9
KH
1489 ASSURE_DESTINATION (safe_room);
1490 c = *charbuf++;
28f67a95
KH
1491 if (CHAR_BYTE8_P (c))
1492 {
1493 c = CHAR_TO_BYTE8 (c);
1494 EMIT_ONE_BYTE (c);
1495 }
1496 else
1497 {
db274c7a 1498 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1499 for (p = str; p < pend; p++)
1500 EMIT_ONE_BYTE (*p);
1501 }
b73bfc1c 1502 }
aa72b389 1503 }
df7492f9
KH
1504 else
1505 {
1506 int safe_room = MAX_MULTIBYTE_LENGTH;
1507
1508 while (charbuf < charbuf_end)
b73bfc1c 1509 {
df7492f9
KH
1510 ASSURE_DESTINATION (safe_room);
1511 c = *charbuf++;
f03caae0
KH
1512 if (CHAR_BYTE8_P (c))
1513 *dst++ = CHAR_TO_BYTE8 (c);
1514 else
db274c7a 1515 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1516 produced_chars++;
4ed46869
KH
1517 }
1518 }
065e3595 1519 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1520 coding->produced_char += produced_chars;
1521 coding->produced = dst - coding->destination;
1522 return 0;
4ed46869
KH
1523}
1524
b73bfc1c 1525
df7492f9 1526/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1527 Check if a text is encoded in one of UTF-16 based coding systems.
1528 If it is, return 1, else return 0. */
aa72b389 1529
df7492f9
KH
1530#define UTF_16_HIGH_SURROGATE_P(val) \
1531 (((val) & 0xFC00) == 0xD800)
1532
1533#define UTF_16_LOW_SURROGATE_P(val) \
1534 (((val) & 0xFC00) == 0xDC00)
93dec019 1535
df7492f9
KH
1536#define UTF_16_INVALID_P(val) \
1537 (((val) == 0xFFFE) \
1538 || ((val) == 0xFFFF) \
1539 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1540
aa72b389 1541
df7492f9 1542static int
cf84bb53
JB
1543detect_coding_utf_16 (struct coding_system *coding,
1544 struct coding_detection_info *detect_info)
aa72b389 1545{
ef1b0ba7 1546 const unsigned char *src = coding->source;
8f924df7 1547 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 1548 int multibytep = coding->src_multibyte;
df7492f9 1549 int c1, c2;
aa72b389 1550
ff0dacd7 1551 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1552 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1553 && (coding->src_chars & 1))
ff0dacd7
KH
1554 {
1555 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1556 return 0;
1557 }
24a73b0a 1558
f56a4450 1559 TWO_MORE_BYTES (c1, c2);
df7492f9 1560 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1561 {
b49a1807
KH
1562 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1563 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1564 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1565 | CATEGORY_MASK_UTF_16_BE_NOSIG
1566 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1567 }
df7492f9 1568 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1569 {
b49a1807
KH
1570 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1571 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1572 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1573 | CATEGORY_MASK_UTF_16_BE_NOSIG
1574 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1575 }
220eeac9 1576 else if (c2 < 0)
f56a4450
KH
1577 {
1578 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1579 return 0;
1580 }
2f3cbb32 1581 else
24a73b0a 1582 {
2f3cbb32
KH
1583 /* We check the dispersion of Eth and Oth bytes where E is even and
1584 O is odd. If both are high, we assume binary data.*/
1585 unsigned char e[256], o[256];
1586 unsigned e_num = 1, o_num = 1;
1587
1588 memset (e, 0, 256);
1589 memset (o, 0, 256);
1590 e[c1] = 1;
1591 o[c2] = 1;
1592
cc13543e
KH
1593 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1594 |CATEGORY_MASK_UTF_16_BE
1595 | CATEGORY_MASK_UTF_16_LE);
2f3cbb32 1596
7f1faf1c
KH
1597 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1598 != CATEGORY_MASK_UTF_16)
2f3cbb32 1599 {
f56a4450 1600 TWO_MORE_BYTES (c1, c2);
220eeac9 1601 if (c2 < 0)
f56a4450 1602 break;
2f3cbb32
KH
1603 if (! e[c1])
1604 {
1605 e[c1] = 1;
1606 e_num++;
cc13543e
KH
1607 if (e_num >= 128)
1608 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
2f3cbb32
KH
1609 }
1610 if (! o[c2])
1611 {
977b85f4 1612 o[c2] = 1;
2f3cbb32 1613 o_num++;
cc13543e
KH
1614 if (o_num >= 128)
1615 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
2f3cbb32
KH
1616 }
1617 }
2f3cbb32 1618 return 0;
ff0dacd7 1619 }
2f3cbb32 1620
df7492f9 1621 no_more_source:
ff0dacd7 1622 return 1;
df7492f9 1623}
aa72b389 1624
df7492f9 1625static void
971de7fb 1626decode_coding_utf_16 (struct coding_system *coding)
df7492f9 1627{
8f924df7
KH
1628 const unsigned char *src = coding->source + coding->consumed;
1629 const unsigned char *src_end = coding->source + coding->src_bytes;
1630 const unsigned char *src_base;
69a80ea3 1631 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
1632 /* We may produces at most 3 chars in one loop. */
1633 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
3a8406e1 1634 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1635 int multibytep = coding->src_multibyte;
a470d443 1636 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1637 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1638 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1639 Lisp_Object attr, charset_list;
0a9564cb
EZ
1640 int eol_crlf =
1641 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1642 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1643
24a73b0a 1644 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1645
a470d443 1646 if (bom == utf_with_bom)
aa72b389 1647 {
df7492f9 1648 int c, c1, c2;
4af310db 1649
aa72b389 1650 src_base = src;
df7492f9
KH
1651 ONE_MORE_BYTE (c1);
1652 ONE_MORE_BYTE (c2);
e19c3639 1653 c = (c1 << 8) | c2;
aa72b389 1654
b49a1807
KH
1655 if (endian == utf_16_big_endian
1656 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1657 {
b49a1807
KH
1658 /* The first two bytes are not BOM. Treat them as bytes
1659 for a normal character. */
1660 src = src_base;
1661 coding->errors++;
aa72b389 1662 }
a470d443 1663 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1664 }
a470d443 1665 else if (bom == utf_detect_bom)
b49a1807
KH
1666 {
1667 /* We have already tried to detect BOM and failed in
1668 detect_coding. */
a470d443 1669 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1670 }
aa72b389 1671
df7492f9
KH
1672 while (1)
1673 {
1674 int c, c1, c2;
1675
1676 src_base = src;
1677 consumed_chars_base = consumed_chars;
1678
df80c7f0 1679 if (charbuf >= charbuf_end)
b71f6f73
KH
1680 {
1681 if (byte_after_cr1 >= 0)
1682 src_base -= 2;
1683 break;
1684 }
df7492f9 1685
119852e7
KH
1686 if (byte_after_cr1 >= 0)
1687 c1 = byte_after_cr1, byte_after_cr1 = -1;
1688 else
1689 ONE_MORE_BYTE (c1);
065e3595
KH
1690 if (c1 < 0)
1691 {
1692 *charbuf++ = -c1;
1693 continue;
1694 }
119852e7
KH
1695 if (byte_after_cr2 >= 0)
1696 c2 = byte_after_cr2, byte_after_cr2 = -1;
1697 else
1698 ONE_MORE_BYTE (c2);
065e3595
KH
1699 if (c2 < 0)
1700 {
1701 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1702 *charbuf++ = -c2;
1703 continue;
1704 }
df7492f9 1705 c = (endian == utf_16_big_endian
e19c3639 1706 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1707
df7492f9 1708 if (surrogate)
fd3ae0b9 1709 {
df7492f9 1710 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1711 {
df7492f9
KH
1712 if (endian == utf_16_big_endian)
1713 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1714 else
1715 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1716 *charbuf++ = c1;
1717 *charbuf++ = c2;
1718 coding->errors++;
1719 if (UTF_16_HIGH_SURROGATE_P (c))
1720 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1721 else
df7492f9 1722 *charbuf++ = c;
fd3ae0b9
KH
1723 }
1724 else
df7492f9
KH
1725 {
1726 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1727 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1728 *charbuf++ = 0x10000 + c;
df7492f9 1729 }
fd3ae0b9 1730 }
aa72b389 1731 else
df7492f9
KH
1732 {
1733 if (UTF_16_HIGH_SURROGATE_P (c))
1734 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735 else
119852e7
KH
1736 {
1737 if (eol_crlf && c == '\r')
1738 {
1739 ONE_MORE_BYTE (byte_after_cr1);
1740 ONE_MORE_BYTE (byte_after_cr2);
1741 }
1742 *charbuf++ = c;
1743 }
8f924df7 1744 }
aa72b389 1745 }
df7492f9
KH
1746
1747 no_more_source:
1748 coding->consumed_char += consumed_chars_base;
1749 coding->consumed = src_base - coding->source;
1750 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1751}
b73bfc1c 1752
df7492f9 1753static int
971de7fb 1754encode_coding_utf_16 (struct coding_system *coding)
df7492f9
KH
1755{
1756 int multibytep = coding->dst_multibyte;
1757 int *charbuf = coding->charbuf;
1758 int *charbuf_end = charbuf + coding->charbuf_used;
1759 unsigned char *dst = coding->destination + coding->produced;
1760 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1761 int safe_room = 8;
a470d443 1762 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1763 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1764 int produced_chars = 0;
24a73b0a 1765 Lisp_Object attrs, charset_list;
df7492f9 1766 int c;
4ed46869 1767
24a73b0a 1768 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1769
a470d443 1770 if (bom != utf_without_bom)
df7492f9
KH
1771 {
1772 ASSURE_DESTINATION (safe_room);
1773 if (big_endian)
df7492f9 1774 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1775 else
1776 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1777 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1778 }
1779
1780 while (charbuf < charbuf_end)
1781 {
1782 ASSURE_DESTINATION (safe_room);
1783 c = *charbuf++;
60afa08d 1784 if (c > MAX_UNICODE_CHAR)
e19c3639 1785 c = coding->default_char;
df7492f9
KH
1786
1787 if (c < 0x10000)
1788 {
1789 if (big_endian)
1790 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1791 else
1792 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1793 }
1794 else
1795 {
1796 int c1, c2;
1797
1798 c -= 0x10000;
1799 c1 = (c >> 10) + 0xD800;
1800 c2 = (c & 0x3FF) + 0xDC00;
1801 if (big_endian)
1802 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1803 else
1804 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1805 }
1806 }
065e3595 1807 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1808 coding->produced = dst - coding->destination;
1809 coding->produced_char += produced_chars;
1810 return 0;
1811}
1812
1813\f
1814/*** 6. Old Emacs' internal format (emacs-mule) ***/
1815
1816/* Emacs' internal format for representation of multiple character
1817 sets is a kind of multi-byte encoding, i.e. characters are
1818 represented by variable-length sequences of one-byte codes.
1819
1820 ASCII characters and control characters (e.g. `tab', `newline') are
1821 represented by one-byte sequences which are their ASCII codes, in
1822 the range 0x00 through 0x7F.
1823
1824 8-bit characters of the range 0x80..0x9F are represented by
1825 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1826 code + 0x20).
1827
1828 8-bit characters of the range 0xA0..0xFF are represented by
1829 one-byte sequences which are their 8-bit code.
1830
1831 The other characters are represented by a sequence of `base
1832 leading-code', optional `extended leading-code', and one or two
1833 `position-code's. The length of the sequence is determined by the
1834 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1835 whereas extended leading-code and position-code take the range 0xA0
1836 through 0xFF. See `charset.h' for more details about leading-code
1837 and position-code.
1838
1839 --- CODE RANGE of Emacs' internal format ---
1840 character set range
1841 ------------- -----
1842 ascii 0x00..0x7F
1843 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1844 eight-bit-graphic 0xA0..0xBF
1845 ELSE 0x81..0x9D + [0xA0..0xFF]+
1846 ---------------------------------------------
1847
1848 As this is the internal character representation, the format is
1849 usually not used externally (i.e. in a file or in a data sent to a
1850 process). But, it is possible to have a text externally in this
1851 format (i.e. by encoding by the coding system `emacs-mule').
1852
1853 In that case, a sequence of one-byte codes has a slightly different
1854 form.
1855
1856 At first, all characters in eight-bit-control are represented by
1857 one-byte sequences which are their 8-bit code.
1858
1859 Next, character composition data are represented by the byte
1860 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1861 where,
e951386e 1862 METHOD is 0xF2 plus one of composition method (enum
df7492f9
KH
1863 composition_method),
1864
1865 BYTES is 0xA0 plus a byte length of this composition data,
1866
e951386e 1867 CHARS is 0xA0 plus a number of characters composed by this
df7492f9
KH
1868 data,
1869
ad1746f5 1870 COMPONENTs are characters of multibyte form or composition
df7492f9
KH
1871 rules encoded by two-byte of ASCII codes.
1872
1873 In addition, for backward compatibility, the following formats are
1874 also recognized as composition data on decoding.
1875
1876 0x80 MSEQ ...
1877 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1878
1879 Here,
1880 MSEQ is a multibyte form but in these special format:
1881 ASCII: 0xA0 ASCII_CODE+0x80,
1882 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1883 RULE is a one byte code of the range 0xA0..0xF0 that
1884 represents a composition rule.
1885 */
1886
1887char emacs_mule_bytes[256];
1888
e951386e
KH
1889
1890/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1891 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1892 else return 0. */
1893
1894static int
cf84bb53
JB
1895detect_coding_emacs_mule (struct coding_system *coding,
1896 struct coding_detection_info *detect_info)
e951386e
KH
1897{
1898 const unsigned char *src = coding->source, *src_base;
1899 const unsigned char *src_end = coding->source + coding->src_bytes;
1900 int multibytep = coding->src_multibyte;
1901 int consumed_chars = 0;
1902 int c;
1903 int found = 0;
1904
1905 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1906 /* A coding system of this category is always ASCII compatible. */
1907 src += coding->head_ascii;
1908
1909 while (1)
1910 {
1911 src_base = src;
1912 ONE_MORE_BYTE (c);
1913 if (c < 0)
1914 continue;
1915 if (c == 0x80)
1916 {
1917 /* Perhaps the start of composite character. We simply skip
1918 it because analyzing it is too heavy for detecting. But,
1919 at least, we check that the composite character
1920 constitutes of more than 4 bytes. */
1921 const unsigned char *src_base;
1922
1923 repeat:
1924 src_base = src;
1925 do
1926 {
1927 ONE_MORE_BYTE (c);
1928 }
1929 while (c >= 0xA0);
1930
1931 if (src - src_base <= 4)
1932 break;
1933 found = CATEGORY_MASK_EMACS_MULE;
1934 if (c == 0x80)
1935 goto repeat;
1936 }
1937
1938 if (c < 0x80)
1939 {
1940 if (c < 0x20
1941 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1942 break;
1943 }
1944 else
1945 {
396475b7 1946 int more_bytes = emacs_mule_bytes[c] - 1;
e951386e
KH
1947
1948 while (more_bytes > 0)
1949 {
1950 ONE_MORE_BYTE (c);
1951 if (c < 0xA0)
1952 {
1953 src--; /* Unread the last byte. */
1954 break;
1955 }
1956 more_bytes--;
1957 }
1958 if (more_bytes != 0)
1959 break;
1960 found = CATEGORY_MASK_EMACS_MULE;
1961 }
1962 }
1963 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1964 return 0;
1965
1966 no_more_source:
1967 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1968 {
1969 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1970 return 0;
1971 }
1972 detect_info->found |= found;
1973 return 1;
1974}
1975
1976
1977/* Parse emacs-mule multibyte sequence at SRC and return the decoded
1978 character. If CMP_STATUS indicates that we must expect MSEQ or
1979 RULE described above, decode it and return the negative value of
685ebdc8 1980 the decoded character or rule. If an invalid byte is found, return
e951386e
KH
1981 -1. If SRC is too short, return -2. */
1982
df7492f9 1983int
cf84bb53
JB
1984emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1985 int *nbytes, int *nchars, int *id,
1986 struct composition_status *cmp_status)
df7492f9 1987{
8f924df7
KH
1988 const unsigned char *src_end = coding->source + coding->src_bytes;
1989 const unsigned char *src_base = src;
df7492f9 1990 int multibytep = coding->src_multibyte;
b84ae584 1991 int charset_id;
df7492f9
KH
1992 unsigned code;
1993 int c;
1994 int consumed_chars = 0;
e951386e 1995 int mseq_found = 0;
df7492f9
KH
1996
1997 ONE_MORE_BYTE (c);
065e3595 1998 if (c < 0)
df7492f9 1999 {
065e3595 2000 c = -c;
b84ae584 2001 charset_id = emacs_mule_charset[0];
065e3595
KH
2002 }
2003 else
2004 {
4d41e8b7
KH
2005 if (c >= 0xA0)
2006 {
e951386e
KH
2007 if (cmp_status->state != COMPOSING_NO
2008 && cmp_status->old_form)
4d41e8b7 2009 {
e951386e
KH
2010 if (cmp_status->state == COMPOSING_CHAR)
2011 {
2012 if (c == 0xA0)
2013 {
2014 ONE_MORE_BYTE (c);
2015 c -= 0x80;
2016 if (c < 0)
2017 goto invalid_code;
2018 }
2019 else
2020 c -= 0x20;
2021 mseq_found = 1;
2022 }
2023 else
2024 {
2025 *nbytes = src - src_base;
2026 *nchars = consumed_chars;
2027 return -c;
2028 }
4d41e8b7
KH
2029 }
2030 else
e951386e 2031 goto invalid_code;
4d41e8b7
KH
2032 }
2033
065e3595 2034 switch (emacs_mule_bytes[c])
b73bfc1c 2035 {
065e3595 2036 case 2:
b84ae584 2037 if ((charset_id = emacs_mule_charset[c]) < 0)
df7492f9
KH
2038 goto invalid_code;
2039 ONE_MORE_BYTE (c);
9ffd559c 2040 if (c < 0xA0)
065e3595 2041 goto invalid_code;
df7492f9 2042 code = c & 0x7F;
065e3595
KH
2043 break;
2044
2045 case 3:
2046 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2047 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2048 {
2049 ONE_MORE_BYTE (c);
b84ae584 2050 if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0)
065e3595
KH
2051 goto invalid_code;
2052 ONE_MORE_BYTE (c);
9ffd559c 2053 if (c < 0xA0)
065e3595
KH
2054 goto invalid_code;
2055 code = c & 0x7F;
2056 }
2057 else
2058 {
b84ae584 2059 if ((charset_id = emacs_mule_charset[c]) < 0)
065e3595
KH
2060 goto invalid_code;
2061 ONE_MORE_BYTE (c);
9ffd559c 2062 if (c < 0xA0)
065e3595
KH
2063 goto invalid_code;
2064 code = (c & 0x7F) << 8;
2065 ONE_MORE_BYTE (c);
9ffd559c 2066 if (c < 0xA0)
065e3595
KH
2067 goto invalid_code;
2068 code |= c & 0x7F;
2069 }
2070 break;
2071
2072 case 4:
2073 ONE_MORE_BYTE (c);
b84ae584 2074 if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0)
df7492f9
KH
2075 goto invalid_code;
2076 ONE_MORE_BYTE (c);
9ffd559c 2077 if (c < 0xA0)
065e3595 2078 goto invalid_code;
781d7a48 2079 code = (c & 0x7F) << 8;
df7492f9 2080 ONE_MORE_BYTE (c);
9ffd559c 2081 if (c < 0xA0)
065e3595 2082 goto invalid_code;
df7492f9 2083 code |= c & 0x7F;
065e3595 2084 break;
df7492f9 2085
065e3595
KH
2086 case 1:
2087 code = c;
b84ae584 2088 charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
065e3595 2089 break;
df7492f9 2090
065e3595
KH
2091 default:
2092 abort ();
2093 }
b84ae584
KH
2094 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2095 CHARSET_FROM_ID (charset_id), code, c);
065e3595
KH
2096 if (c < 0)
2097 goto invalid_code;
df7492f9 2098 }
df7492f9
KH
2099 *nbytes = src - src_base;
2100 *nchars = consumed_chars;
ff0dacd7 2101 if (id)
b84ae584 2102 *id = charset_id;
e951386e 2103 return (mseq_found ? -c : c);
df7492f9
KH
2104
2105 no_more_source:
2106 return -2;
2107
2108 invalid_code:
2109 return -1;
2110}
2111
2112
e951386e 2113/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
df7492f9 2114
e951386e
KH
2115/* Handle these composition sequence ('|': the end of header elements,
2116 BYTES and CHARS >= 0xA0):
df7492f9 2117
e951386e
KH
2118 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2119 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2120 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
df7492f9 2121
e951386e 2122 and these old form:
1a4990fb 2123
e951386e
KH
2124 (4) relative composition: 0x80 | MSEQ ... MSEQ
2125 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
df7492f9 2126
e951386e
KH
2127 When the starter 0x80 and the following header elements are found,
2128 this annotation header is produced.
df7492f9 2129
e951386e 2130 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
df7492f9 2131
e951386e
KH
2132 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2133 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
df7492f9 2134
e951386e
KH
2135 Then, upon reading the following elements, these codes are produced
2136 until the composition end is found:
df7492f9 2137
e951386e
KH
2138 (1) CHAR ... CHAR
2139 (2) ALT ... ALT CHAR ... CHAR
2140 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2141 (4) CHAR ... CHAR
2142 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
4ed46869 2143
e951386e
KH
2144 When the composition end is found, LENGTH and NCHARS in the
2145 annotation header is updated as below:
b73bfc1c 2146
e951386e
KH
2147 (1) LENGTH: unchanged, NCHARS: unchanged
2148 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2149 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2150 (4) LENGTH: unchanged, NCHARS: number of CHARs
2151 (5) LENGTH: unchanged, NCHARS: number of CHARs
df7492f9 2152
e951386e
KH
2153 If an error is found while composing, the annotation header is
2154 changed to the original composition header (plus filler -1s) as
2155 below:
2156
2157 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2158 (5) [ 0x80 0xFF -1 -1- -1 ]
2159
2160 and the sequence [ -2 DECODED-RULE ] is changed to the original
2161 byte sequence as below:
2162 o the original byte sequence is B: [ B -1 ]
2163 o the original byte sequence is B1 B2: [ B1 B2 ]
2164
2165 Most of the routines are implemented by macros because many
2166 variables and labels in the caller decode_coding_emacs_mule must be
2167 accessible, and they are usually called just once (thus doesn't
2168 increase the size of compiled object). */
2169
2170/* Decode a composition rule represented by C as a component of
2171 composition sequence of Emacs 20 style. Set RULE to the decoded
2172 rule. */
2173
2174#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
df7492f9 2175 do { \
e951386e
KH
2176 int gref, nref; \
2177 \
4d41e8b7 2178 c -= 0xA0; \
df7492f9
KH
2179 if (c < 0 || c >= 81) \
2180 goto invalid_code; \
df7492f9 2181 gref = c / 9, nref = c % 9; \
e951386e
KH
2182 if (gref == 4) gref = 10; \
2183 if (nref == 4) nref = 10; \
2184 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
df7492f9
KH
2185 } while (0)
2186
2187
e951386e
KH
2188/* Decode a composition rule represented by C and the following byte
2189 at SRC as a component of composition sequence of Emacs 21 style.
2190 Set RULE to the decoded rule. */
781d7a48 2191
e951386e 2192#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
781d7a48
KH
2193 do { \
2194 int gref, nref; \
e951386e
KH
2195 \
2196 gref = c - 0x20; \
2197 if (gref < 0 || gref >= 81) \
781d7a48 2198 goto invalid_code; \
e951386e
KH
2199 ONE_MORE_BYTE (c); \
2200 nref = c - 0x20; \
2201 if (nref < 0 || nref >= 81) \
781d7a48 2202 goto invalid_code; \
e951386e 2203 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
781d7a48
KH
2204 } while (0)
2205
2206
e951386e
KH
2207/* Start of Emacs 21 style format. The first three bytes at SRC are
2208 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2209 byte length of this composition information, CHARS is the number of
2210 characters composed by this composition. */
2211
2212#define DECODE_EMACS_MULE_21_COMPOSITION() \
aa72b389 2213 do { \
781d7a48 2214 enum composition_method method = c - 0xF2; \
df7492f9 2215 int nbytes, nchars; \
e951386e 2216 \
df7492f9 2217 ONE_MORE_BYTE (c); \
065e3595
KH
2218 if (c < 0) \
2219 goto invalid_code; \
df7492f9 2220 nbytes = c - 0xA0; \
e951386e 2221 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
df7492f9
KH
2222 goto invalid_code; \
2223 ONE_MORE_BYTE (c); \
2224 nchars = c - 0xA0; \
e951386e
KH
2225 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2226 goto invalid_code; \
2227 cmp_status->old_form = 0; \
2228 cmp_status->method = method; \
2229 if (method == COMPOSITION_RELATIVE) \
2230 cmp_status->state = COMPOSING_CHAR; \
2231 else \
2232 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2233 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2234 cmp_status->nchars = nchars; \
2235 cmp_status->ncomps = nbytes - 4; \
2236 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
aa72b389 2237 } while (0)
93dec019 2238
aa72b389 2239
e951386e
KH
2240/* Start of Emacs 20 style format for relative composition. */
2241
2242#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2243 do { \
2244 cmp_status->old_form = 1; \
2245 cmp_status->method = COMPOSITION_RELATIVE; \
2246 cmp_status->state = COMPOSING_CHAR; \
2247 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2248 cmp_status->nchars = cmp_status->ncomps = 0; \
2249 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2250 } while (0)
2251
2252
2253/* Start of Emacs 20 style format for rule-base composition. */
2254
2255#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2256 do { \
2257 cmp_status->old_form = 1; \
2258 cmp_status->method = COMPOSITION_WITH_RULE; \
2259 cmp_status->state = COMPOSING_CHAR; \
2260 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2261 cmp_status->nchars = cmp_status->ncomps = 0; \
2262 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
df7492f9
KH
2263 } while (0)
2264
2265
e951386e
KH
2266#define DECODE_EMACS_MULE_COMPOSITION_START() \
2267 do { \
2268 const unsigned char *current_src = src; \
2269 \
2270 ONE_MORE_BYTE (c); \
2271 if (c < 0) \
2272 goto invalid_code; \
2273 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2274 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2275 DECODE_EMACS_MULE_21_COMPOSITION (); \
2276 else if (c < 0xA0) \
2277 goto invalid_code; \
2278 else if (c < 0xC0) \
2279 { \
2280 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2281 /* Re-read C as a composition component. */ \
2282 src = current_src; \
2283 } \
2284 else if (c == 0xFF) \
2285 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2286 else \
2287 goto invalid_code; \
2288 } while (0)
2289
2290#define EMACS_MULE_COMPOSITION_END() \
df7492f9 2291 do { \
e951386e 2292 int idx = - cmp_status->length; \
4d41e8b7 2293 \
e951386e
KH
2294 if (cmp_status->old_form) \
2295 charbuf[idx + 2] = cmp_status->nchars; \
2296 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2297 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2298 cmp_status->state = COMPOSING_NO; \
2299 } while (0)
2300
2301
2302static int
cf84bb53
JB
2303emacs_mule_finish_composition (int *charbuf,
2304 struct composition_status *cmp_status)
e951386e
KH
2305{
2306 int idx = - cmp_status->length;
2307 int new_chars;
2308
2309 if (cmp_status->old_form && cmp_status->nchars > 0)
2310 {
2311 charbuf[idx + 2] = cmp_status->nchars;
2312 new_chars = 0;
2313 if (cmp_status->method == COMPOSITION_WITH_RULE
2314 && cmp_status->state == COMPOSING_CHAR)
2315 {
2316 /* The last rule was invalid. */
2317 int rule = charbuf[-1] + 0xA0;
2318
2319 charbuf[-2] = BYTE8_TO_CHAR (rule);
2320 charbuf[-1] = -1;
2321 new_chars = 1;
2322 }
2323 }
2324 else
2325 {
2326 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2327
2328 if (cmp_status->method == COMPOSITION_WITH_RULE)
2329 {
2330 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2331 charbuf[idx++] = -3;
2332 charbuf[idx++] = 0;
2333 new_chars = 1;
2334 }
2335 else
2336 {
2337 int nchars = charbuf[idx + 1] + 0xA0;
2338 int nbytes = charbuf[idx + 2] + 0xA0;
2339
2340 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2341 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2342 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2343 charbuf[idx++] = -1;
2344 new_chars = 4;
2345 }
2346 }
2347 cmp_status->state = COMPOSING_NO;
2348 return new_chars;
2349}
2350
2351#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2352 do { \
2353 if (cmp_status->state != COMPOSING_NO) \
2354 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
df7492f9
KH
2355 } while (0)
2356
aa72b389
KH
2357
2358static void
971de7fb 2359decode_coding_emacs_mule (struct coding_system *coding)
aa72b389 2360{
8f924df7
KH
2361 const unsigned char *src = coding->source + coding->consumed;
2362 const unsigned char *src_end = coding->source + coding->src_bytes;
2363 const unsigned char *src_base;
69a80ea3 2364 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
2365 /* We may produce two annotations (charset and composition) in one
2366 loop and one more charset annotation at the end. */
69a80ea3 2367 int *charbuf_end
df80c7f0 2368 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 2369 int consumed_chars = 0, consumed_chars_base;
df7492f9 2370 int multibytep = coding->src_multibyte;
24a73b0a 2371 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2372 int char_offset = coding->produced_char;
2373 int last_offset = char_offset;
2374 int last_id = charset_ascii;
0a9564cb
EZ
2375 int eol_crlf =
2376 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 2377 int byte_after_cr = -1;
e951386e 2378 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
aa72b389 2379
24a73b0a 2380 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2381
e951386e
KH
2382 if (cmp_status->state != COMPOSING_NO)
2383 {
2384 int i;
2385
2386 for (i = 0; i < cmp_status->length; i++)
2387 *charbuf++ = cmp_status->carryover[i];
2388 coding->annotated = 1;
2389 }
2390
aa72b389
KH
2391 while (1)
2392 {
e951386e 2393 int c, id;
df7492f9 2394
aa72b389 2395 src_base = src;
df7492f9
KH
2396 consumed_chars_base = consumed_chars;
2397
2398 if (charbuf >= charbuf_end)
b71f6f73
KH
2399 {
2400 if (byte_after_cr >= 0)
2401 src_base--;
2402 break;
2403 }
aa72b389 2404
119852e7
KH
2405 if (byte_after_cr >= 0)
2406 c = byte_after_cr, byte_after_cr = -1;
2407 else
2408 ONE_MORE_BYTE (c);
e951386e
KH
2409
2410 if (c < 0 || c == 0x80)
065e3595 2411 {
e951386e
KH
2412 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2413 if (c < 0)
2414 {
2415 *charbuf++ = -c;
2416 char_offset++;
2417 }
2418 else
2419 DECODE_EMACS_MULE_COMPOSITION_START ();
2420 continue;
065e3595 2421 }
e951386e
KH
2422
2423 if (c < 0x80)
aa72b389 2424 {
119852e7
KH
2425 if (eol_crlf && c == '\r')
2426 ONE_MORE_BYTE (byte_after_cr);
e951386e
KH
2427 id = charset_ascii;
2428 if (cmp_status->state != COMPOSING_NO)
2429 {
2430 if (cmp_status->old_form)
2431 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2432 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2433 cmp_status->ncomps--;
2434 }
2435 }
2436 else
2437 {
2438 int nchars, nbytes;
75f80e63
EZ
2439 /* emacs_mule_char can load a charset map from a file, which
2440 allocates a large structure and might cause buffer text
2441 to be relocated as result. Thus, we need to remember the
ad1746f5 2442 original pointer to buffer text, and fix up all related
75f80e63
EZ
2443 pointers after the call. */
2444 const unsigned char *orig = coding->source;
2445 EMACS_INT offset;
e951386e
KH
2446
2447 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2448 cmp_status);
75f80e63
EZ
2449 offset = coding->source - orig;
2450 if (offset)
2451 {
2452 src += offset;
2453 src_base += offset;
2454 src_end += offset;
2455 }
e951386e
KH
2456 if (c < 0)
2457 {
2458 if (c == -1)
2459 goto invalid_code;
2460 if (c == -2)
2461 break;
2462 }
2463 src = src_base + nbytes;
2464 consumed_chars = consumed_chars_base + nchars;
2465 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2466 cmp_status->ncomps -= nchars;
2467 }
2468
ad1746f5 2469 /* Now if C >= 0, we found a normally encoded character, if C <
e951386e
KH
2470 0, we found an old-style composition component character or
2471 rule. */
2472
2473 if (cmp_status->state == COMPOSING_NO)
2474 {
2475 if (last_id != id)
2476 {
2477 if (last_id != charset_ascii)
2478 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2479 last_id);
2480 last_id = id;
2481 last_offset = char_offset;
2482 }
df7492f9
KH
2483 *charbuf++ = c;
2484 char_offset++;
aa72b389 2485 }
e951386e 2486 else if (cmp_status->state == COMPOSING_CHAR)
df7492f9 2487 {
e951386e
KH
2488 if (cmp_status->old_form)
2489 {
2490 if (c >= 0)
2491 {
2492 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2493 *charbuf++ = c;
2494 char_offset++;
2495 }
2496 else
2497 {
2498 *charbuf++ = -c;
2499 cmp_status->nchars++;
2500 cmp_status->length++;
2501 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2502 EMACS_MULE_COMPOSITION_END ();
2503 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2504 cmp_status->state = COMPOSING_RULE;
2505 }
2506 }
df7492f9 2507 else
e951386e
KH
2508 {
2509 *charbuf++ = c;
2510 cmp_status->length++;
2511 cmp_status->nchars--;
2512 if (cmp_status->nchars == 0)
2513 EMACS_MULE_COMPOSITION_END ();
2514 }
df7492f9 2515 }
e951386e 2516 else if (cmp_status->state == COMPOSING_RULE)
df7492f9 2517 {
e951386e 2518 int rule;
ff0dacd7 2519
e951386e 2520 if (c >= 0)
df7492f9 2521 {
e951386e
KH
2522 EMACS_MULE_COMPOSITION_END ();
2523 *charbuf++ = c;
2524 char_offset++;
df7492f9 2525 }
e951386e 2526 else
ff0dacd7 2527 {
e951386e
KH
2528 c = -c;
2529 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2530 if (rule < 0)
2531 goto invalid_code;
2532 *charbuf++ = -2;
2533 *charbuf++ = rule;
2534 cmp_status->length += 2;
2535 cmp_status->state = COMPOSING_CHAR;
ff0dacd7 2536 }
e951386e
KH
2537 }
2538 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2539 {
df7492f9 2540 *charbuf++ = c;
e951386e
KH
2541 cmp_status->length++;
2542 if (cmp_status->ncomps == 0)
2543 cmp_status->state = COMPOSING_CHAR;
2544 else if (cmp_status->ncomps > 0)
2545 {
2546 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2547 cmp_status->state = COMPOSING_COMPONENT_RULE;
2548 }
2549 else
2550 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9 2551 }
e951386e
KH
2552 else /* COMPOSING_COMPONENT_RULE */
2553 {
2554 int rule;
2555
2556 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2557 if (rule < 0)
2558 goto invalid_code;
2559 *charbuf++ = -2;
2560 *charbuf++ = rule;
2561 cmp_status->length += 2;
2562 cmp_status->ncomps--;
2563 if (cmp_status->ncomps > 0)
2564 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2565 else
2566 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2567 }
2568 continue;
2569
df7492f9 2570 invalid_code:
e951386e 2571 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9
KH
2572 src = src_base;
2573 consumed_chars = consumed_chars_base;
2574 ONE_MORE_BYTE (c);
2575 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2576 char_offset++;
df7492f9
KH
2577 coding->errors++;
2578 }
2579
2580 no_more_source:
e951386e
KH
2581 if (cmp_status->state != COMPOSING_NO)
2582 {
2583 if (coding->mode & CODING_MODE_LAST_BLOCK)
2584 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2585 else
2586 {
2587 int i;
2588
2589 charbuf -= cmp_status->length;
2590 for (i = 0; i < cmp_status->length; i++)
2591 cmp_status->carryover[i] = charbuf[i];
2592 }
2593 }
ff0dacd7 2594 if (last_id != charset_ascii)
69a80ea3 2595 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2596 coding->consumed_char += consumed_chars_base;
2597 coding->consumed = src_base - coding->source;
2598 coding->charbuf_used = charbuf - coding->charbuf;
2599}
2600
2601
2602#define EMACS_MULE_LEADING_CODES(id, codes) \
2603 do { \
2604 if (id < 0xA0) \
2605 codes[0] = id, codes[1] = 0; \
2606 else if (id < 0xE0) \
2607 codes[0] = 0x9A, codes[1] = id; \
2608 else if (id < 0xF0) \
2609 codes[0] = 0x9B, codes[1] = id; \
2610 else if (id < 0xF5) \
2611 codes[0] = 0x9C, codes[1] = id; \
2612 else \
2613 codes[0] = 0x9D, codes[1] = id; \
2614 } while (0);
2615
aa72b389 2616
df7492f9 2617static int
971de7fb 2618encode_coding_emacs_mule (struct coding_system *coding)
df7492f9
KH
2619{
2620 int multibytep = coding->dst_multibyte;
2621 int *charbuf = coding->charbuf;
2622 int *charbuf_end = charbuf + coding->charbuf_used;
2623 unsigned char *dst = coding->destination + coding->produced;
2624 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2625 int safe_room = 8;
df7492f9 2626 int produced_chars = 0;
24a73b0a 2627 Lisp_Object attrs, charset_list;
df7492f9 2628 int c;
ff0dacd7 2629 int preferred_charset_id = -1;
df7492f9 2630
24a73b0a 2631 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2632 if (! EQ (charset_list, Vemacs_mule_charset_list))
2633 {
2634 CODING_ATTR_CHARSET_LIST (attrs)
2635 = charset_list = Vemacs_mule_charset_list;
2636 }
df7492f9
KH
2637
2638 while (charbuf < charbuf_end)
2639 {
2640 ASSURE_DESTINATION (safe_room);
2641 c = *charbuf++;
ff0dacd7
KH
2642
2643 if (c < 0)
2644 {
2645 /* Handle an annotation. */
2646 switch (*charbuf)
2647 {
2648 case CODING_ANNOTATE_COMPOSITION_MASK:
2649 /* Not yet implemented. */
2650 break;
2651 case CODING_ANNOTATE_CHARSET_MASK:
2652 preferred_charset_id = charbuf[3];
2653 if (preferred_charset_id >= 0
2654 && NILP (Fmemq (make_number (preferred_charset_id),
2655 charset_list)))
2656 preferred_charset_id = -1;
2657 break;
2658 default:
2659 abort ();
2660 }
2661 charbuf += -c - 1;
2662 continue;
2663 }
2664
df7492f9
KH
2665 if (ASCII_CHAR_P (c))
2666 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2667 else if (CHAR_BYTE8_P (c))
2668 {
2669 c = CHAR_TO_BYTE8 (c);
2670 EMIT_ONE_BYTE (c);
2671 }
df7492f9 2672 else
aa72b389 2673 {
df7492f9
KH
2674 struct charset *charset;
2675 unsigned code;
2676 int dimension;
2677 int emacs_mule_id;
2678 unsigned char leading_codes[2];
2679
ff0dacd7
KH
2680 if (preferred_charset_id >= 0)
2681 {
2682 charset = CHARSET_FROM_ID (preferred_charset_id);
905ca9d2
KH
2683 if (CHAR_CHARSET_P (c, charset))
2684 code = ENCODE_CHAR (charset, c);
2685 else
2686 charset = char_charset (c, charset_list, &code);
ff0dacd7
KH
2687 }
2688 else
2689 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2690 if (! charset)
2691 {
2692 c = coding->default_char;
2693 if (ASCII_CHAR_P (c))
2694 {
2695 EMIT_ONE_ASCII_BYTE (c);
2696 continue;
2697 }
2698 charset = char_charset (c, charset_list, &code);
2699 }
2700 dimension = CHARSET_DIMENSION (charset);
2701 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2702 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2703 EMIT_ONE_BYTE (leading_codes[0]);
2704 if (leading_codes[1])
2705 EMIT_ONE_BYTE (leading_codes[1]);
2706 if (dimension == 1)
1fa663f9 2707 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2708 else
df7492f9 2709 {
1fa663f9 2710 code |= 0x8080;
df7492f9
KH
2711 EMIT_ONE_BYTE (code >> 8);
2712 EMIT_ONE_BYTE (code & 0xFF);
2713 }
aa72b389 2714 }
aa72b389 2715 }
065e3595 2716 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2717 coding->produced_char += produced_chars;
2718 coding->produced = dst - coding->destination;
2719 return 0;
aa72b389 2720}
b73bfc1c 2721
4ed46869 2722\f
df7492f9 2723/*** 7. ISO2022 handlers ***/
4ed46869
KH
2724
2725/* The following note describes the coding system ISO2022 briefly.
39787efd 2726 Since the intention of this note is to help understand the
5a936b46 2727 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2728 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2729 original document of ISO2022. This is equivalent to the standard
cfb43547 2730 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2731
2732 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2733 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2734 is encoded using bytes less than 128. This may make the encoded
2735 text a little bit longer, but the text passes more easily through
cfb43547 2736 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2737 Significant Bit).
b73bfc1c 2738
cfb43547
DL
2739 There are two kinds of character sets: control character sets and
2740 graphic character sets. The former contain control characters such
4ed46869 2741 as `newline' and `escape' to provide control functions (control
39787efd 2742 functions are also provided by escape sequences). The latter
cfb43547 2743 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2744 two control character sets and many graphic character sets.
2745
2746 Graphic character sets are classified into one of the following
39787efd
KH
2747 four classes, according to the number of bytes (DIMENSION) and
2748 number of characters in one dimension (CHARS) of the set:
2749 - DIMENSION1_CHARS94
2750 - DIMENSION1_CHARS96
2751 - DIMENSION2_CHARS94
2752 - DIMENSION2_CHARS96
2753
2754 In addition, each character set is assigned an identification tag,
cfb43547 2755 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2756 hereafter). The <F> of each character set is decided by ECMA(*)
2757 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2758 (0x30..0x3F are for private use only).
4ed46869
KH
2759
2760 Note (*): ECMA = European Computer Manufacturers Association
2761
cfb43547 2762 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2763 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2764 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2765 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2766 o DIMENSION2_CHARS96 -- none for the moment
2767
39787efd 2768 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2769 C0 [0x00..0x1F] -- control character plane 0
2770 GL [0x20..0x7F] -- graphic character plane 0
2771 C1 [0x80..0x9F] -- control character plane 1
2772 GR [0xA0..0xFF] -- graphic character plane 1
2773
2774 A control character set is directly designated and invoked to C0 or
39787efd
KH
2775 C1 by an escape sequence. The most common case is that:
2776 - ISO646's control character set is designated/invoked to C0, and
2777 - ISO6429's control character set is designated/invoked to C1,
2778 and usually these designations/invocations are omitted in encoded
2779 text. In a 7-bit environment, only C0 can be used, and a control
2780 character for C1 is encoded by an appropriate escape sequence to
2781 fit into the environment. All control characters for C1 are
2782 defined to have corresponding escape sequences.
4ed46869
KH
2783
2784 A graphic character set is at first designated to one of four
2785 graphic registers (G0 through G3), then these graphic registers are
2786 invoked to GL or GR. These designations and invocations can be
2787 done independently. The most common case is that G0 is invoked to
39787efd
KH
2788 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2789 these invocations and designations are omitted in encoded text.
2790 In a 7-bit environment, only GL can be used.
4ed46869 2791
39787efd
KH
2792 When a graphic character set of CHARS94 is invoked to GL, codes
2793 0x20 and 0x7F of the GL area work as control characters SPACE and
2794 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2795 be used.
4ed46869
KH
2796
2797 There are two ways of invocation: locking-shift and single-shift.
2798 With locking-shift, the invocation lasts until the next different
39787efd
KH
2799 invocation, whereas with single-shift, the invocation affects the
2800 following character only and doesn't affect the locking-shift
2801 state. Invocations are done by the following control characters or
2802 escape sequences:
4ed46869
KH
2803
2804 ----------------------------------------------------------------------
39787efd 2805 abbrev function cntrl escape seq description
4ed46869 2806 ----------------------------------------------------------------------
39787efd
KH
2807 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2808 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2809 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2810 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2811 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2812 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2813 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2814 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2815 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2816 ----------------------------------------------------------------------
39787efd
KH
2817 (*) These are not used by any known coding system.
2818
2819 Control characters for these functions are defined by macros
2820 ISO_CODE_XXX in `coding.h'.
4ed46869 2821
39787efd 2822 Designations are done by the following escape sequences:
4ed46869
KH
2823 ----------------------------------------------------------------------
2824 escape sequence description
2825 ----------------------------------------------------------------------
2826 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2827 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2828 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2829 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2830 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2831 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2832 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2833 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2834 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2835 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2836 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2837 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2838 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2839 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2840 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2841 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2842 ----------------------------------------------------------------------
2843
2844 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2845 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2846
2847 Note (*): Although these designations are not allowed in ISO2022,
2848 Emacs accepts them on decoding, and produces them on encoding
39787efd 2849 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2850 7-bit environment, non-locking-shift, and non-single-shift.
2851
2852 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2853 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2854
cfb43547 2855 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2856 same multilingual text in ISO2022. Actually, there exist many
2857 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2858 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2859 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2860 localized platforms), and all of these are variants of ISO2022.
2861
2862 In addition to the above, Emacs handles two more kinds of escape
2863 sequences: ISO6429's direction specification and Emacs' private
2864 sequence for specifying character composition.
2865
39787efd 2866 ISO6429's direction specification takes the following form:
4ed46869
KH
2867 o CSI ']' -- end of the current direction
2868 o CSI '0' ']' -- end of the current direction
2869 o CSI '1' ']' -- start of left-to-right text
2870 o CSI '2' ']' -- start of right-to-left text
2871 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2872 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2873
2874 Character composition specification takes the following form:
ec6d2bb8
KH
2875 o ESC '0' -- start relative composition
2876 o ESC '1' -- end composition
2877 o ESC '2' -- start rule-base composition (*)
2878 o ESC '3' -- start relative composition with alternate chars (**)
2879 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2880 Since these are not standard escape sequences of any ISO standard,
cfb43547 2881 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2882
5a936b46
DL
2883 (*) This form is used only in Emacs 20.7 and older versions,
2884 but newer versions can safely decode it.
cfb43547 2885 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2886 and older versions can't decode it.
ec6d2bb8 2887
cfb43547 2888 Here's a list of example usages of these composition escape
b73bfc1c 2889 sequences (categorized by `enum composition_method').
ec6d2bb8 2890
b73bfc1c 2891 COMPOSITION_RELATIVE:
ec6d2bb8 2892 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2893 COMPOSITION_WITH_RULE:
ec6d2bb8 2894 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2895 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2896 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2897 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2898 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2899
2900enum iso_code_class_type iso_code_class[256];
2901
df7492f9
KH
2902#define SAFE_CHARSET_P(coding, id) \
2903 ((id) <= (coding)->max_charset_id \
1b3b981b 2904 && (coding)->safe_charsets[id] != 255)
df7492f9
KH
2905
2906
2907#define SHIFT_OUT_OK(category) \
2908 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2909
2910static void
971de7fb 2911setup_iso_safe_charsets (Lisp_Object attrs)
df7492f9
KH
2912{
2913 Lisp_Object charset_list, safe_charsets;
2914 Lisp_Object request;
2915 Lisp_Object reg_usage;
2916 Lisp_Object tail;
2917 int reg94, reg96;
2918 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2919 int max_charset_id;
2920
2921 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2922 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2923 && ! EQ (charset_list, Viso_2022_charset_list))
2924 {
2925 CODING_ATTR_CHARSET_LIST (attrs)
2926 = charset_list = Viso_2022_charset_list;
2927 ASET (attrs, coding_attr_safe_charsets, Qnil);
2928 }
2929
2930 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2931 return;
2932
2933 max_charset_id = 0;
2934 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2935 {
2936 int id = XINT (XCAR (tail));
2937 if (max_charset_id < id)
2938 max_charset_id = id;
2939 }
d46c5b12 2940
1b3b981b
AS
2941 safe_charsets = make_uninit_string (max_charset_id + 1);
2942 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
2943 request = AREF (attrs, coding_attr_iso_request);
2944 reg_usage = AREF (attrs, coding_attr_iso_usage);
2945 reg94 = XINT (XCAR (reg_usage));
2946 reg96 = XINT (XCDR (reg_usage));
2947
2948 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2949 {
2950 Lisp_Object id;
2951 Lisp_Object reg;
2952 struct charset *charset;
2953
2954 id = XCAR (tail);
2955 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2956 reg = Fcdr (Fassq (id, request));
df7492f9 2957 if (! NILP (reg))
8f924df7 2958 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2959 else if (charset->iso_chars_96)
2960 {
2961 if (reg96 < 4)
8f924df7 2962 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2963 }
2964 else
2965 {
2966 if (reg94 < 4)
8f924df7 2967 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2968 }
2969 }
2970 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2971}
d46c5b12 2972
b6871cc7 2973
4ed46869 2974/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ad1746f5 2975 Check if a text is encoded in one of ISO-2022 based coding systems.
ff0dacd7 2976 If it is, return 1, else return 0. */
4ed46869 2977
0a28aafb 2978static int
cf84bb53
JB
2979detect_coding_iso_2022 (struct coding_system *coding,
2980 struct coding_detection_info *detect_info)
4ed46869 2981{
8f924df7
KH
2982 const unsigned char *src = coding->source, *src_base = src;
2983 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2984 int multibytep = coding->src_multibyte;
ff0dacd7 2985 int single_shifting = 0;
df7492f9
KH
2986 int id;
2987 int c, c1;
2988 int consumed_chars = 0;
2989 int i;
ff0dacd7
KH
2990 int rejected = 0;
2991 int found = 0;
cee53ed4 2992 int composition_count = -1;
ff0dacd7
KH
2993
2994 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2995
2996 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2997 {
2998 struct coding_system *this = &(coding_categories[i]);
2999 Lisp_Object attrs, val;
3000
c6b278e7
KH
3001 if (this->id < 0)
3002 continue;
df7492f9
KH
3003 attrs = CODING_ID_ATTRS (this->id);
3004 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 3005 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
3006 setup_iso_safe_charsets (attrs);
3007 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 3008 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 3009 this->safe_charsets = SDATA (val);
df7492f9
KH
3010 }
3011
3012 /* A coding system of this category is always ASCII compatible. */
3013 src += coding->head_ascii;
3f003981 3014
ff0dacd7 3015 while (rejected != CATEGORY_MASK_ISO)
4ed46869 3016 {
065e3595 3017 src_base = src;
df7492f9 3018 ONE_MORE_BYTE (c);
4ed46869
KH
3019 switch (c)
3020 {
3021 case ISO_CODE_ESC:
74383408
KH
3022 if (inhibit_iso_escape_detection)
3023 break;
f46869e4 3024 single_shifting = 0;
df7492f9 3025 ONE_MORE_BYTE (c);
d46c5b12 3026 if (c >= '(' && c <= '/')
4ed46869 3027 {
bf9cdd4e 3028 /* Designation sequence for a charset of dimension 1. */
df7492f9 3029 ONE_MORE_BYTE (c1);
d46c5b12 3030 if (c1 < ' ' || c1 >= 0x80
df7492f9 3031 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
3032 /* Invalid designation sequence. Just ignore. */
3033 break;
bf9cdd4e
KH
3034 }
3035 else if (c == '$')
3036 {
3037 /* Designation sequence for a charset of dimension 2. */
df7492f9 3038 ONE_MORE_BYTE (c);
bf9cdd4e
KH
3039 if (c >= '@' && c <= 'B')
3040 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 3041 id = iso_charset_table[1][0][c];
bf9cdd4e 3042 else if (c >= '(' && c <= '/')
bcf26d6a 3043 {
df7492f9 3044 ONE_MORE_BYTE (c1);
d46c5b12 3045 if (c1 < ' ' || c1 >= 0x80
df7492f9 3046 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
3047 /* Invalid designation sequence. Just ignore. */
3048 break;
bcf26d6a 3049 }
bf9cdd4e 3050 else
ff0dacd7 3051 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
3052 break;
3053 }
ae9ff118 3054 else if (c == 'N' || c == 'O')
d46c5b12 3055 {
ae9ff118 3056 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
3057 single_shifting = 1;
3058 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 3059 break;
4ed46869 3060 }
cee53ed4
KH
3061 else if (c == '1')
3062 {
3063 /* End of composition. */
3064 if (composition_count < 0
3065 || composition_count > MAX_COMPOSITION_COMPONENTS)
3066 /* Invalid */
3067 break;
3068 composition_count = -1;
3069 found |= CATEGORY_MASK_ISO;
3070 }
ec6d2bb8
KH
3071 else if (c >= '0' && c <= '4')
3072 {
3073 /* ESC <Fp> for start/end composition. */
cee53ed4 3074 composition_count = 0;
ec6d2bb8
KH
3075 break;
3076 }
bf9cdd4e 3077 else
df7492f9 3078 {
ff0dacd7 3079 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
3080 break;
3081 }
d46c5b12
KH
3082
3083 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 3084 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
3085 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3086 id))
ff0dacd7 3087 found |= CATEGORY_MASK_ISO_7;
d46c5b12 3088 else
ff0dacd7 3089 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
3090 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3091 id))
ff0dacd7 3092 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 3093 else
ff0dacd7 3094 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
3095 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3096 id))
ff0dacd7 3097 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 3098 else
ff0dacd7 3099 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
3100 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3101 id))
ff0dacd7 3102 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 3103 else
ff0dacd7 3104 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
3105 break;
3106
4ed46869 3107 case ISO_CODE_SO:
d46c5b12 3108 case ISO_CODE_SI:
ff0dacd7 3109 /* Locking shift out/in. */
74383408
KH
3110 if (inhibit_iso_escape_detection)
3111 break;
f46869e4 3112 single_shifting = 0;
ff0dacd7 3113 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
3114 break;
3115
4ed46869 3116 case ISO_CODE_CSI:
ff0dacd7 3117 /* Control sequence introducer. */
f46869e4 3118 single_shifting = 0;
ff0dacd7
KH
3119 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3120 found |= CATEGORY_MASK_ISO_8_ELSE;
3121 goto check_extra_latin;
3122
4ed46869
KH
3123 case ISO_CODE_SS2:
3124 case ISO_CODE_SS3:
ff0dacd7
KH
3125 /* Single shift. */
3126 if (inhibit_iso_escape_detection)
3127 break;
75e2a253 3128 single_shifting = 0;
ff0dacd7
KH
3129 rejected |= CATEGORY_MASK_ISO_7BIT;
3130 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3131 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 3132 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
3133 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3134 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
3135 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3136 if (single_shifting)
3137 break;
ff0dacd7 3138 goto check_extra_latin;
4ed46869
KH
3139
3140 default:
065e3595
KH
3141 if (c < 0)
3142 continue;
4ed46869 3143 if (c < 0x80)
f46869e4 3144 {
cee53ed4
KH
3145 if (composition_count >= 0)
3146 composition_count++;
f46869e4
KH
3147 single_shifting = 0;
3148 break;
3149 }
ff0dacd7 3150 if (c >= 0xA0)
c4825358 3151 {
ff0dacd7
KH
3152 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3153 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 3154 /* Check the length of succeeding codes of the range
ff0dacd7
KH
3155 0xA0..0FF. If the byte length is even, we include
3156 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
3157 only when we are not single shifting. */
3158 if (! single_shifting
3159 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 3160 {
e17de821 3161 int i = 1;
b73bfc1c
KH
3162 while (src < src_end)
3163 {
d12bd917 3164 src_base = src;
df7492f9 3165 ONE_MORE_BYTE (c);
b73bfc1c 3166 if (c < 0xA0)
d12bd917
KH
3167 {
3168 src = src_base;
3169 break;
3170 }
b73bfc1c
KH
3171 i++;
3172 }
3173
3174 if (i & 1 && src < src_end)
cee53ed4
KH
3175 {
3176 rejected |= CATEGORY_MASK_ISO_8_2;
3177 if (composition_count >= 0)
3178 composition_count += i;
3179 }
f46869e4 3180 else
cee53ed4
KH
3181 {
3182 found |= CATEGORY_MASK_ISO_8_2;
3183 if (composition_count >= 0)
3184 composition_count += i / 2;
3185 }
f46869e4 3186 }
ff0dacd7 3187 break;
4ed46869 3188 }
ff0dacd7
KH
3189 check_extra_latin:
3190 single_shifting = 0;
3191 if (! VECTORP (Vlatin_extra_code_table)
3192 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3193 {
3194 rejected = CATEGORY_MASK_ISO;
3195 break;
3196 }
3197 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198 & CODING_ISO_FLAG_LATIN_EXTRA)
3199 found |= CATEGORY_MASK_ISO_8_1;
3200 else
3201 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 3202 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
3203 }
3204 }
ff0dacd7
KH
3205 detect_info->rejected |= CATEGORY_MASK_ISO;
3206 return 0;
4ed46869 3207
df7492f9 3208 no_more_source:
ff0dacd7
KH
3209 detect_info->rejected |= rejected;
3210 detect_info->found |= (found & ~rejected);
df7492f9 3211 return 1;
4ed46869 3212}
ec6d2bb8 3213
4ed46869 3214
134b9549
KH
3215/* Set designation state into CODING. Set CHARS_96 to -1 if the
3216 escape sequence should be kept. */
df7492f9
KH
3217#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3218 do { \
3219 int id, prev; \
3220 \
3221 if (final < '0' || final >= 128 \
3222 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3223 || !SAFE_CHARSET_P (coding, id)) \
3224 { \
3225 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3226 chars_96 = -1; \
3227 break; \
df7492f9
KH
3228 } \
3229 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3230 if (id == charset_jisx0201_roman) \
3231 { \
3232 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3233 id = charset_ascii; \
3234 } \
3235 else if (id == charset_jisx0208_1978) \
3236 { \
3237 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3238 id = charset_jisx0208; \
3239 } \
df7492f9
KH
3240 CODING_ISO_DESIGNATION (coding, reg) = id; \
3241 /* If there was an invalid designation to REG previously, and this \
3242 designation is ASCII to REG, we should keep this designation \
3243 sequence. */ \
3244 if (prev == -2 && id == charset_ascii) \
134b9549 3245 chars_96 = -1; \
4ed46869
KH
3246 } while (0)
3247
d46c5b12 3248
e951386e
KH
3249/* Handle these composition sequence (ALT: alternate char):
3250
3251 (1) relative composition: ESC 0 CHAR ... ESC 1
3252 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3253 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3254 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3255
3256 When the start sequence (ESC 0/2/3/4) is found, this annotation
3257 header is produced.
3258
3259 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3260
3261 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3262 produced until the end sequence (ESC 1) is found:
3263
3264 (1) CHAR ... CHAR
3265 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3266 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3267 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3268
3269 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3270 annotation header is updated as below:
3271
3272 (1) LENGTH: unchanged, NCHARS: number of CHARs
3273 (2) LENGTH: unchanged, NCHARS: number of CHARs
3274 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3275 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3276
3277 If an error is found while composing, the annotation header is
3278 changed to:
3279
3280 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3281
3282 and the sequence [ -2 DECODED-RULE ] is changed to the original
3283 byte sequence as below:
3284 o the original byte sequence is B: [ B -1 ]
3285 o the original byte sequence is B1 B2: [ B1 B2 ]
3286 and the sequence [ -1 -1 ] is changed to the original byte
3287 sequence:
3288 [ ESC '0' ]
3289*/
3290
3291/* Decode a composition rule C1 and maybe one more byte from the
3292 source, and set RULE to the encoded composition rule, NBYTES to the
3293 length of the composition rule. If the rule is invalid, set RULE
3294 to some negative value. */
3295
3296#define DECODE_COMPOSITION_RULE(rule, nbytes) \
3297 do { \
3298 rule = c1 - 32; \
3299 if (rule < 0) \
3300 break; \
3301 if (rule < 81) /* old format (before ver.21) */ \
3302 { \
3303 int gref = (rule) / 9; \
3304 int nref = (rule) % 9; \
3305 if (gref == 4) gref = 10; \
3306 if (nref == 4) nref = 10; \
3307 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3308 nbytes = 1; \
3309 } \
3310 else /* new format (after ver.21) */ \
3311 { \
3312 int c; \
3313 \
3314 ONE_MORE_BYTE (c); \
3315 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3316 if (rule >= 0) \
3317 rule += 0x100; /* to destinguish it from the old format */ \
3318 nbytes = 2; \
3319 } \
3320 } while (0)
3321
3322#define ENCODE_COMPOSITION_RULE(rule) \
df7492f9 3323 do { \
e951386e
KH
3324 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3325 \
3326 if (rule < 0x100) /* old format */ \
df7492f9 3327 { \
e951386e
KH
3328 if (gref == 10) gref = 4; \
3329 if (nref == 10) nref = 4; \
3330 charbuf[idx] = 32 + gref * 9 + nref; \
3331 charbuf[idx + 1] = -1; \
3332 new_chars++; \
df7492f9 3333 } \
e951386e 3334 else /* new format */ \
df7492f9 3335 { \
e951386e
KH
3336 charbuf[idx] = 32 + 81 + gref; \
3337 charbuf[idx + 1] = 32 + nref; \
3338 new_chars += 2; \
df7492f9
KH
3339 } \
3340 } while (0)
3341
e951386e
KH
3342/* Finish the current composition as invalid. */
3343
f57e2426 3344static int finish_composition (int *, struct composition_status *);
e951386e
KH
3345
3346static int
971de7fb 3347finish_composition (int *charbuf, struct composition_status *cmp_status)
e951386e
KH
3348{
3349 int idx = - cmp_status->length;
3350 int new_chars;
3351
3352 /* Recover the original ESC sequence */
3353 charbuf[idx++] = ISO_CODE_ESC;
3354 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3355 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3356 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3357 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3358 : '4');
3359 charbuf[idx++] = -2;
3360 charbuf[idx++] = 0;
3361 charbuf[idx++] = -1;
3362 new_chars = cmp_status->nchars;
3363 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3364 for (; idx < 0; idx++)
3365 {
3366 int elt = charbuf[idx];
3367
3368 if (elt == -2)
3369 {
3370 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3371 idx++;
3372 }
3373 else if (elt == -1)
3374 {
3375 charbuf[idx++] = ISO_CODE_ESC;
3376 charbuf[idx] = '0';
3377 new_chars += 2;
3378 }
3379 }
3380 cmp_status->state = COMPOSING_NO;
3381 return new_chars;
3382}
3383
ad1746f5 3384/* If characters are under composition, finish the composition. */
e951386e
KH
3385#define MAYBE_FINISH_COMPOSITION() \
3386 do { \
3387 if (cmp_status->state != COMPOSING_NO) \
3388 char_offset += finish_composition (charbuf, cmp_status); \
3389 } while (0)
d46c5b12 3390
aa72b389 3391/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
e951386e 3392
aa72b389
KH
3393 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3394 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3395 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3396 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
ec6d2bb8 3397
e951386e
KH
3398 Produce this annotation sequence now:
3399
3400 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3401*/
3402
3403#define DECODE_COMPOSITION_START(c1) \
3404 do { \
3405 if (c1 == '0' \
3406 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3407 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3408 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3409 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3410 { \
3411 *charbuf++ = -1; \
3412 *charbuf++= -1; \
3413 cmp_status->state = COMPOSING_CHAR; \
3414 cmp_status->length += 2; \
3415 } \
3416 else \
3417 { \
3418 MAYBE_FINISH_COMPOSITION (); \
3419 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3420 : c1 == '2' ? COMPOSITION_WITH_RULE \
3421 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3422 : COMPOSITION_WITH_RULE_ALTCHARS); \
3423 cmp_status->state \
3424 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3425 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3426 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3427 cmp_status->nchars = cmp_status->ncomps = 0; \
3428 coding->annotated = 1; \
3429 } \
ec6d2bb8
KH
3430 } while (0)
3431
ec6d2bb8 3432
e951386e 3433/* Handle composition end sequence ESC 1. */
df7492f9
KH
3434
3435#define DECODE_COMPOSITION_END() \
ec6d2bb8 3436 do { \
e951386e
KH
3437 if (cmp_status->nchars == 0 \
3438 || ((cmp_status->state == COMPOSING_CHAR) \
3439 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
ec6d2bb8 3440 { \
e951386e
KH
3441 MAYBE_FINISH_COMPOSITION (); \
3442 goto invalid_code; \
ec6d2bb8 3443 } \
e951386e
KH
3444 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3445 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3446 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3447 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3448 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3449 char_offset += cmp_status->nchars; \
3450 cmp_status->state = COMPOSING_NO; \
ec6d2bb8
KH
3451 } while (0)
3452
e951386e 3453/* Store a composition rule RULE in charbuf, and update cmp_status. */
df7492f9 3454
e951386e
KH
3455#define STORE_COMPOSITION_RULE(rule) \
3456 do { \
3457 *charbuf++ = -2; \
3458 *charbuf++ = rule; \
3459 cmp_status->length += 2; \
3460 cmp_status->state--; \
3461 } while (0)
ec6d2bb8 3462
e951386e
KH
3463/* Store a composed char or a component char C in charbuf, and update
3464 cmp_status. */
3465
3466#define STORE_COMPOSITION_CHAR(c) \
ec6d2bb8 3467 do { \
e951386e
KH
3468 *charbuf++ = (c); \
3469 cmp_status->length++; \
3470 if (cmp_status->state == COMPOSING_CHAR) \
3471 cmp_status->nchars++; \
df7492f9 3472 else \
e951386e
KH
3473 cmp_status->ncomps++; \
3474 if (cmp_status->method == COMPOSITION_WITH_RULE \
3475 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3476 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3477 cmp_status->state++; \
ec6d2bb8 3478 } while (0)
88993dfd 3479
d46c5b12 3480
4ed46869
KH
3481/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3482
b73bfc1c 3483static void
971de7fb 3484decode_coding_iso_2022 (struct coding_system *coding)
4ed46869 3485{
8f924df7
KH
3486 const unsigned char *src = coding->source + coding->consumed;
3487 const unsigned char *src_end = coding->source + coding->src_bytes;
3488 const unsigned char *src_base;
69a80ea3 3489 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5
KH
3490 /* We may produce two annotations (charset and composition) in one
3491 loop and one more charset annotation at the end. */
ff0dacd7 3492 int *charbuf_end
df80c7f0 3493 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 3494 int consumed_chars = 0, consumed_chars_base;
df7492f9 3495 int multibytep = coding->src_multibyte;
4ed46869 3496 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3497 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3498 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3499 int charset_id_2, charset_id_3;
df7492f9
KH
3500 struct charset *charset;
3501 int c;
e951386e 3502 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
24a73b0a 3503 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3504 int char_offset = coding->produced_char;
3505 int last_offset = char_offset;
3506 int last_id = charset_ascii;
0a9564cb
EZ
3507 int eol_crlf =
3508 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 3509 int byte_after_cr = -1;
e951386e 3510 int i;
df7492f9 3511
24a73b0a 3512 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3513 setup_iso_safe_charsets (attrs);
287c57d7
KH
3514 /* Charset list may have been changed. */
3515 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 3516 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c 3517
e951386e
KH
3518 if (cmp_status->state != COMPOSING_NO)
3519 {
3520 for (i = 0; i < cmp_status->length; i++)
3521 *charbuf++ = cmp_status->carryover[i];
3522 coding->annotated = 1;
3523 }
3524
b73bfc1c 3525 while (1)
4ed46869 3526 {
cf299835 3527 int c1, c2, c3;
b73bfc1c
KH
3528
3529 src_base = src;
df7492f9
KH
3530 consumed_chars_base = consumed_chars;
3531
3532 if (charbuf >= charbuf_end)
b71f6f73
KH
3533 {
3534 if (byte_after_cr >= 0)
3535 src_base--;
3536 break;
3537 }
df7492f9 3538
119852e7
KH
3539 if (byte_after_cr >= 0)
3540 c1 = byte_after_cr, byte_after_cr = -1;
3541 else
3542 ONE_MORE_BYTE (c1);
065e3595
KH
3543 if (c1 < 0)
3544 goto invalid_code;
4ed46869 3545
e951386e 3546 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
4ed46869 3547 {
e951386e
KH
3548 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3549 char_offset++;
3550 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3551 continue;
3552 }
3553
3554 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3555 {
3556 if (c1 == ISO_CODE_ESC)
ec6d2bb8 3557 {
e951386e
KH
3558 if (src + 1 >= src_end)
3559 goto no_more_source;
3560 *charbuf++ = ISO_CODE_ESC;
3561 char_offset++;
3562 if (src[0] == '%' && src[1] == '@')
df7492f9 3563 {
e951386e
KH
3564 src += 2;
3565 consumed_chars += 2;
3566 char_offset += 2;
3567 /* We are sure charbuf can contain two more chars. */
3568 *charbuf++ = '%';
3569 *charbuf++ = '@';
3570 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
df7492f9 3571 }
4ed46869 3572 }
e951386e
KH
3573 else
3574 {
3575 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3576 char_offset++;
3577 }
3578 continue;
3579 }
3580
3581 if ((cmp_status->state == COMPOSING_RULE
3582 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3583 && c1 != ISO_CODE_ESC)
3584 {
3585 int rule, nbytes;
3586
3587 DECODE_COMPOSITION_RULE (rule, nbytes);
3588 if (rule < 0)
3589 goto invalid_code;
3590 STORE_COMPOSITION_RULE (rule);
3591 continue;
3592 }
3593
3594 /* We produce at most one character. */
3595 switch (iso_code_class [c1])
3596 {
3597 case ISO_0x20_or_0x7F:
df7492f9
KH
3598 if (charset_id_0 < 0
3599 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3600 /* This is SPACE or DEL. */
3601 charset = CHARSET_FROM_ID (charset_ascii);
3602 else
3603 charset = CHARSET_FROM_ID (charset_id_0);
3604 break;
4ed46869
KH
3605
3606 case ISO_graphic_plane_0:
134b9549
KH
3607 if (charset_id_0 < 0)
3608 charset = CHARSET_FROM_ID (charset_ascii);
3609 else
3610 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3611 break;
3612
3613 case ISO_0xA0_or_0xFF:
df7492f9
KH
3614 if (charset_id_1 < 0
3615 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3616 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3617 goto invalid_code;
4ed46869
KH
3618 /* This is a graphic character, we fall down ... */
3619
3620 case ISO_graphic_plane_1:
df7492f9
KH
3621 if (charset_id_1 < 0)
3622 goto invalid_code;
3623 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3624 break;
3625
df7492f9 3626 case ISO_control_0:
119852e7
KH
3627 if (eol_crlf && c1 == '\r')
3628 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3629 MAYBE_FINISH_COMPOSITION ();
3630 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3631 break;
3632
df7492f9 3633 case ISO_control_1:
df7492f9
KH
3634 goto invalid_code;
3635
4ed46869 3636 case ISO_shift_out:
df7492f9
KH
3637 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3638 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3639 goto invalid_code;
3640 CODING_ISO_INVOCATION (coding, 0) = 1;
3641 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3642 continue;
4ed46869
KH
3643
3644 case ISO_shift_in:
df7492f9
KH
3645 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3646 goto invalid_code;
3647 CODING_ISO_INVOCATION (coding, 0) = 0;
3648 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3649 continue;
4ed46869
KH
3650
3651 case ISO_single_shift_2_7:
a63dba42
KH
3652 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3653 goto invalid_code;
4ed46869 3654 case ISO_single_shift_2:
df7492f9
KH
3655 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3656 goto invalid_code;
4ed46869
KH
3657 /* SS2 is handled as an escape sequence of ESC 'N' */
3658 c1 = 'N';
3659 goto label_escape_sequence;
3660
3661 case ISO_single_shift_3:
df7492f9
KH
3662 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3663 goto invalid_code;
4ed46869
KH
3664 /* SS2 is handled as an escape sequence of ESC 'O' */
3665 c1 = 'O';
3666 goto label_escape_sequence;
3667
3668 case ISO_control_sequence_introducer:
3669 /* CSI is handled as an escape sequence of ESC '[' ... */
3670 c1 = '[';
3671 goto label_escape_sequence;
3672
3673 case ISO_escape:
3674 ONE_MORE_BYTE (c1);
3675 label_escape_sequence:
df7492f9 3676 /* Escape sequences handled here are invocation,
4ed46869
KH
3677 designation, direction specification, and character
3678 composition specification. */
3679 switch (c1)
3680 {
3681 case '&': /* revision of following character set */
3682 ONE_MORE_BYTE (c1);
3683 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3684 goto invalid_code;
4ed46869
KH
3685 ONE_MORE_BYTE (c1);
3686 if (c1 != ISO_CODE_ESC)
df7492f9 3687 goto invalid_code;
4ed46869
KH
3688 ONE_MORE_BYTE (c1);
3689 goto label_escape_sequence;
3690
3691 case '$': /* designation of 2-byte character set */
df7492f9
KH
3692 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3693 goto invalid_code;
134b9549
KH
3694 {
3695 int reg, chars96;
3696
3697 ONE_MORE_BYTE (c1);
3698 if (c1 >= '@' && c1 <= 'B')
3699 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3700 or JISX0208.1980 */
134b9549
KH
3701 reg = 0, chars96 = 0;
3702 }
3703 else if (c1 >= 0x28 && c1 <= 0x2B)
3704 { /* designation of DIMENSION2_CHARS94 character set */
3705 reg = c1 - 0x28, chars96 = 0;
3706 ONE_MORE_BYTE (c1);
3707 }
3708 else if (c1 >= 0x2C && c1 <= 0x2F)
3709 { /* designation of DIMENSION2_CHARS96 character set */
3710 reg = c1 - 0x2C, chars96 = 1;
3711 ONE_MORE_BYTE (c1);
3712 }
3713 else
3714 goto invalid_code;
3715 DECODE_DESIGNATION (reg, 2, chars96, c1);
3716 /* We must update these variables now. */
3717 if (reg == 0)
3718 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3719 else if (reg == 1)
3720 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3721 if (chars96 < 0)
3722 goto invalid_code;
3723 }
b73bfc1c 3724 continue;
4ed46869
KH
3725
3726 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3727 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3728 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3729 goto invalid_code;
3730 CODING_ISO_INVOCATION (coding, 0) = 2;
3731 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3732 continue;
4ed46869
KH
3733
3734 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3735 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3736 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3737 goto invalid_code;
3738 CODING_ISO_INVOCATION (coding, 0) = 3;
3739 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3740 continue;
4ed46869
KH
3741
3742 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3743 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3744 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3745 goto invalid_code;
134b9549
KH
3746 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3747 if (charset_id_2 < 0)
3748 charset = CHARSET_FROM_ID (charset_ascii);
3749 else
3750 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3751 ONE_MORE_BYTE (c1);
e7046a18 3752 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3753 goto invalid_code;
4ed46869
KH
3754 break;
3755
3756 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3757 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3758 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3759 goto invalid_code;
134b9549
KH
3760 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3761 if (charset_id_3 < 0)
3762 charset = CHARSET_FROM_ID (charset_ascii);
3763 else
3764 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3765 ONE_MORE_BYTE (c1);
e7046a18 3766 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3767 goto invalid_code;
4ed46869
KH
3768 break;
3769
ec6d2bb8 3770 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3771 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3772 goto invalid_code;
e951386e
KH
3773 if (last_id != charset_ascii)
3774 {
3775 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3776 last_id = charset_ascii;
3777 last_offset = char_offset;
3778 }
ec6d2bb8 3779 DECODE_COMPOSITION_START (c1);
b73bfc1c 3780 continue;
4ed46869 3781
ec6d2bb8 3782 case '1': /* end composition */
e951386e 3783 if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3784 goto invalid_code;
3785 DECODE_COMPOSITION_END ();
b73bfc1c 3786 continue;
4ed46869
KH
3787
3788 case '[': /* specification of direction */
de59072a 3789 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
df7492f9 3790 goto invalid_code;
4ed46869 3791 /* For the moment, nested direction is not supported.
d46c5b12 3792 So, `coding->mode & CODING_MODE_DIRECTION' zero means
ad1746f5 3793 left-to-right, and nonzero means right-to-left. */
4ed46869
KH
3794 ONE_MORE_BYTE (c1);
3795 switch (c1)
3796 {
3797 case ']': /* end of the current direction */
d46c5b12 3798 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3799
3800 case '0': /* end of the current direction */
3801 case '1': /* start of left-to-right direction */
3802 ONE_MORE_BYTE (c1);
3803 if (c1 == ']')
d46c5b12 3804 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3805 else
df7492f9 3806 goto invalid_code;
4ed46869
KH
3807 break;
3808
3809 case '2': /* start of right-to-left direction */
3810 ONE_MORE_BYTE (c1);
3811 if (c1 == ']')
d46c5b12 3812 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3813 else
df7492f9 3814 goto invalid_code;
4ed46869
KH
3815 break;
3816
3817 default:
df7492f9 3818 goto invalid_code;
4ed46869 3819 }
b73bfc1c 3820 continue;
4ed46869 3821
103e0180 3822 case '%':
103e0180
KH
3823 ONE_MORE_BYTE (c1);
3824 if (c1 == '/')
3825 {
3826 /* CTEXT extended segment:
3827 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3828 We keep these bytes as is for the moment.
3829 They may be decoded by post-read-conversion. */
3830 int dim, M, L;
4776e638 3831 int size;
8f924df7 3832
103e0180 3833 ONE_MORE_BYTE (dim);
7a84eee5 3834 if (dim < '0' || dim > '4')
e951386e 3835 goto invalid_code;
103e0180 3836 ONE_MORE_BYTE (M);
e951386e
KH
3837 if (M < 128)
3838 goto invalid_code;
103e0180 3839 ONE_MORE_BYTE (L);
e951386e
KH
3840 if (L < 128)
3841 goto invalid_code;
103e0180 3842 size = ((M - 128) * 128) + (L - 128);
e951386e 3843 if (charbuf + 6 > charbuf_end)
4776e638
KH
3844 goto break_loop;
3845 *charbuf++ = ISO_CODE_ESC;
3846 *charbuf++ = '%';
3847 *charbuf++ = '/';
3848 *charbuf++ = dim;
3849 *charbuf++ = BYTE8_TO_CHAR (M);
3850 *charbuf++ = BYTE8_TO_CHAR (L);
e951386e 3851 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
103e0180
KH
3852 }
3853 else if (c1 == 'G')
3854 {
103e0180
KH
3855 /* XFree86 extension for embedding UTF-8 in CTEXT:
3856 ESC % G --UTF-8-BYTES-- ESC % @
3857 We keep these bytes as is for the moment.
3858 They may be decoded by post-read-conversion. */
e951386e 3859 if (charbuf + 3 > charbuf_end)
4776e638 3860 goto break_loop;
e951386e
KH
3861 *charbuf++ = ISO_CODE_ESC;
3862 *charbuf++ = '%';
3863 *charbuf++ = 'G';
3864 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
103e0180
KH
3865 }
3866 else
4776e638 3867 goto invalid_code;
103e0180 3868 continue;
4776e638 3869 break;
103e0180 3870
4ed46869 3871 default:
df7492f9
KH
3872 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3873 goto invalid_code;
134b9549
KH
3874 {
3875 int reg, chars96;
3876
3877 if (c1 >= 0x28 && c1 <= 0x2B)
3878 { /* designation of DIMENSION1_CHARS94 character set */
3879 reg = c1 - 0x28, chars96 = 0;
3880 ONE_MORE_BYTE (c1);
3881 }
3882 else if (c1 >= 0x2C && c1 <= 0x2F)
3883 { /* designation of DIMENSION1_CHARS96 character set */
3884 reg = c1 - 0x2C, chars96 = 1;
3885 ONE_MORE_BYTE (c1);
3886 }
3887 else
3888 goto invalid_code;
3889 DECODE_DESIGNATION (reg, 1, chars96, c1);
3890 /* We must update these variables now. */
3891 if (reg == 0)
3892 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3893 else if (reg == 1)
3894 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3895 if (chars96 < 0)
3896 goto invalid_code;
3897 }
b73bfc1c 3898 continue;
4ed46869 3899 }
b73bfc1c 3900 }
4ed46869 3901
e951386e
KH
3902 if (cmp_status->state == COMPOSING_NO
3903 && charset->id != charset_ascii
ff0dacd7
KH
3904 && last_id != charset->id)
3905 {
3906 if (last_id != charset_ascii)
69a80ea3 3907 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3908 last_id = charset->id;
3909 last_offset = char_offset;
3910 }
3911
b73bfc1c 3912 /* Now we know CHARSET and 1st position code C1 of a character.
cf299835
KH
3913 Produce a decoded character while getting 2nd and 3rd
3914 position codes C2, C3 if necessary. */
df7492f9 3915 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3916 {
3917 ONE_MORE_BYTE (c2);
cf299835
KH
3918 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3919 || ((c1 & 0x80) != (c2 & 0x80)))
b73bfc1c 3920 /* C2 is not in a valid range. */
df7492f9 3921 goto invalid_code;
cf299835
KH
3922 if (CHARSET_DIMENSION (charset) == 2)
3923 c1 = (c1 << 8) | c2;
3924 else
df7492f9 3925 {
cf299835
KH
3926 ONE_MORE_BYTE (c3);
3927 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3928 || ((c1 & 0x80) != (c3 & 0x80)))
3929 /* C3 is not in a valid range. */
df7492f9 3930 goto invalid_code;
cf299835 3931 c1 = (c1 << 16) | (c2 << 8) | c2;
df7492f9
KH
3932 }
3933 }
cf299835 3934 c1 &= 0x7F7F7F;
df7492f9
KH
3935 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3936 if (c < 0)
3937 {
3938 MAYBE_FINISH_COMPOSITION ();
3939 for (; src_base < src; src_base++, char_offset++)
3940 {
3941 if (ASCII_BYTE_P (*src_base))
3942 *charbuf++ = *src_base;
3943 else
3944 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3945 }
3946 }
e951386e 3947 else if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3948 {
3949 *charbuf++ = c;
3950 char_offset++;
4ed46869 3951 }
e951386e
KH
3952 else if ((cmp_status->state == COMPOSING_CHAR
3953 ? cmp_status->nchars
3954 : cmp_status->ncomps)
3955 >= MAX_COMPOSITION_COMPONENTS)
781d7a48 3956 {
e951386e
KH
3957 /* Too long composition. */
3958 MAYBE_FINISH_COMPOSITION ();
3959 *charbuf++ = c;
3960 char_offset++;
4ed46869 3961 }
e951386e
KH
3962 else
3963 STORE_COMPOSITION_CHAR (c);
4ed46869
KH
3964 continue;
3965
df7492f9
KH
3966 invalid_code:
3967 MAYBE_FINISH_COMPOSITION ();
4ed46869 3968 src = src_base;
df7492f9
KH
3969 consumed_chars = consumed_chars_base;
3970 ONE_MORE_BYTE (c);
065e3595 3971 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3972 char_offset++;
df7492f9 3973 coding->errors++;
4776e638
KH
3974 continue;
3975
3976 break_loop:
3977 break;
4ed46869 3978 }
fb88bf2d 3979
df7492f9 3980 no_more_source:
e951386e
KH
3981 if (cmp_status->state != COMPOSING_NO)
3982 {
3983 if (coding->mode & CODING_MODE_LAST_BLOCK)
3984 MAYBE_FINISH_COMPOSITION ();
3985 else
3986 {
3987 charbuf -= cmp_status->length;
3988 for (i = 0; i < cmp_status->length; i++)
3989 cmp_status->carryover[i] = charbuf[i];
3990 }
3991 }
3992 else if (last_id != charset_ascii)
69a80ea3 3993 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3994 coding->consumed_char += consumed_chars_base;
3995 coding->consumed = src_base - coding->source;
3996 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3997}
3998
b73bfc1c 3999
f4dee582 4000/* ISO2022 encoding stuff. */
4ed46869
KH
4001
4002/*
f4dee582 4003 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 4004 specify more details. In Emacs, each coding system of ISO2022
4ed46869 4005 variant has the following specifications:
df7492f9 4006 1. Initial designation to G0 thru G3.
4ed46869
KH
4007 2. Allows short-form designation?
4008 3. ASCII should be designated to G0 before control characters?
4009 4. ASCII should be designated to G0 at end of line?
4010 5. 7-bit environment or 8-bit environment?
4011 6. Use locking-shift?
4012 7. Use Single-shift?
4013 And the following two are only for Japanese:
4014 8. Use ASCII in place of JIS0201-1976-Roman?
4015 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
4016 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4017 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 4018 details.
4ed46869
KH
4019*/
4020
4021/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
4022 register REG at DST, and increment DST. If <final-char> of CHARSET is
4023 '@', 'A', or 'B' and the coding system CODING allows, produce
4024 designation sequence of short-form. */
4ed46869
KH
4025
4026#define ENCODE_DESIGNATION(charset, reg, coding) \
4027 do { \
df7492f9 4028 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
675e2c69
DN
4029 const char *intermediate_char_94 = "()*+"; \
4030 const char *intermediate_char_96 = ",-./"; \
df7492f9
KH
4031 int revision = -1; \
4032 int c; \
4033 \
4034 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 4035 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
4036 \
4037 if (revision >= 0) \
70c22245 4038 { \
df7492f9
KH
4039 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4040 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 4041 } \
df7492f9 4042 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
4043 if (CHARSET_DIMENSION (charset) == 1) \
4044 { \
df7492f9
KH
4045 if (! CHARSET_ISO_CHARS_96 (charset)) \
4046 c = intermediate_char_94[reg]; \
4ed46869 4047 else \
df7492f9
KH
4048 c = intermediate_char_96[reg]; \
4049 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
4050 } \
4051 else \
4052 { \
df7492f9
KH
4053 EMIT_ONE_ASCII_BYTE ('$'); \
4054 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 4055 { \
df7492f9 4056 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
4057 || reg != 0 \
4058 || final_char < '@' || final_char > 'B') \
df7492f9 4059 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
4060 } \
4061 else \
df7492f9 4062 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 4063 } \
df7492f9
KH
4064 EMIT_ONE_ASCII_BYTE (final_char); \
4065 \
4066 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
4067 } while (0)
4068
df7492f9 4069
4ed46869
KH
4070/* The following two macros produce codes (control character or escape
4071 sequence) for ISO2022 single-shift functions (single-shift-2 and
4072 single-shift-3). */
4073
df7492f9
KH
4074#define ENCODE_SINGLE_SHIFT_2 \
4075 do { \
4076 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4077 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4078 else \
4079 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4080 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4081 } while (0)
4082
df7492f9
KH
4083
4084#define ENCODE_SINGLE_SHIFT_3 \
4085 do { \
4086 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4087 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4088 else \
4089 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4090 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4091 } while (0)
4092
df7492f9 4093
4ed46869
KH
4094/* The following four macros produce codes (control character or
4095 escape sequence) for ISO2022 locking-shift functions (shift-in,
4096 shift-out, locking-shift-2, and locking-shift-3). */
4097
df7492f9
KH
4098#define ENCODE_SHIFT_IN \
4099 do { \
4100 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4101 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
4102 } while (0)
4103
df7492f9
KH
4104
4105#define ENCODE_SHIFT_OUT \
4106 do { \
4107 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4108 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
4109 } while (0)
4110
df7492f9
KH
4111
4112#define ENCODE_LOCKING_SHIFT_2 \
4113 do { \
4114 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4115 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
4116 } while (0)
4117
df7492f9
KH
4118
4119#define ENCODE_LOCKING_SHIFT_3 \
4120 do { \
4121 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4122 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
4123 } while (0)
4124
df7492f9 4125
f4dee582
RS
4126/* Produce codes for a DIMENSION1 character whose character set is
4127 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
4128 sequences are also produced in advance if necessary. */
4129
6e85d753
KH
4130#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4131 do { \
df7492f9 4132 int id = CHARSET_ID (charset); \
bf16eb23
KH
4133 \
4134 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4135 && id == charset_ascii) \
4136 { \
4137 id = charset_jisx0201_roman; \
4138 charset = CHARSET_FROM_ID (id); \
4139 } \
4140 \
df7492f9 4141 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4142 { \
df7492f9
KH
4143 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4144 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 4145 else \
df7492f9
KH
4146 EMIT_ONE_BYTE (c1 | 0x80); \
4147 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4148 break; \
4149 } \
df7492f9 4150 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4151 { \
df7492f9 4152 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
4153 break; \
4154 } \
df7492f9 4155 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4156 { \
df7492f9 4157 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
4158 break; \
4159 } \
6e85d753
KH
4160 else \
4161 /* Since CHARSET is not yet invoked to any graphic planes, we \
4162 must invoke it, or, at first, designate it to some graphic \
4163 register. Then repeat the loop to actually produce the \
4164 character. */ \
df7492f9
KH
4165 dst = encode_invocation_designation (charset, coding, dst, \
4166 &produced_chars); \
4ed46869
KH
4167 } while (1)
4168
df7492f9 4169
f4dee582
RS
4170/* Produce codes for a DIMENSION2 character whose character set is
4171 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
4172 invocation codes are also produced in advance if necessary. */
4173
6e85d753
KH
4174#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4175 do { \
df7492f9 4176 int id = CHARSET_ID (charset); \
bf16eb23
KH
4177 \
4178 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4179 && id == charset_jisx0208) \
4180 { \
4181 id = charset_jisx0208_1978; \
4182 charset = CHARSET_FROM_ID (id); \
4183 } \
4184 \
df7492f9 4185 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4186 { \
df7492f9
KH
4187 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4188 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 4189 else \
df7492f9
KH
4190 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4191 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4192 break; \
4193 } \
df7492f9 4194 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4195 { \
df7492f9 4196 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
4197 break; \
4198 } \
df7492f9 4199 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4200 { \
df7492f9 4201 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
4202 break; \
4203 } \
6e85d753
KH
4204 else \
4205 /* Since CHARSET is not yet invoked to any graphic planes, we \
4206 must invoke it, or, at first, designate it to some graphic \
4207 register. Then repeat the loop to actually produce the \
4208 character. */ \
df7492f9
KH
4209 dst = encode_invocation_designation (charset, coding, dst, \
4210 &produced_chars); \
4ed46869
KH
4211 } while (1)
4212
05e6f5dc 4213
df7492f9
KH
4214#define ENCODE_ISO_CHARACTER(charset, c) \
4215 do { \
1a4990fb 4216 int code = ENCODE_CHAR ((charset), (c)); \
df7492f9
KH
4217 \
4218 if (CHARSET_DIMENSION (charset) == 1) \
4219 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4220 else \
4221 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 4222 } while (0)
bdd9fb48 4223
05e6f5dc 4224
4ed46869 4225/* Produce designation and invocation codes at a place pointed by DST
df7492f9 4226 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
4227 Return new DST. */
4228
4229unsigned char *
cf84bb53
JB
4230encode_invocation_designation (struct charset *charset,
4231 struct coding_system *coding,
4232 unsigned char *dst, int *p_nchars)
4ed46869 4233{
df7492f9
KH
4234 int multibytep = coding->dst_multibyte;
4235 int produced_chars = *p_nchars;
4ed46869 4236 int reg; /* graphic register number */
df7492f9 4237 int id = CHARSET_ID (charset);
4ed46869
KH
4238
4239 /* At first, check designations. */
4240 for (reg = 0; reg < 4; reg++)
df7492f9 4241 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
4242 break;
4243
4244 if (reg >= 4)
4245 {
4246 /* CHARSET is not yet designated to any graphic registers. */
4247 /* At first check the requested designation. */
df7492f9
KH
4248 reg = CODING_ISO_REQUEST (coding, id);
4249 if (reg < 0)
1ba9e4ab
KH
4250 /* Since CHARSET requests no special designation, designate it
4251 to graphic register 0. */
4ed46869
KH
4252 reg = 0;
4253
4254 ENCODE_DESIGNATION (charset, reg, coding);
4255 }
4256
df7492f9
KH
4257 if (CODING_ISO_INVOCATION (coding, 0) != reg
4258 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
4259 {
4260 /* Since the graphic register REG is not invoked to any graphic
4261 planes, invoke it to graphic plane 0. */
4262 switch (reg)
4263 {
4264 case 0: /* graphic register 0 */
4265 ENCODE_SHIFT_IN;
4266 break;
4267
4268 case 1: /* graphic register 1 */
4269 ENCODE_SHIFT_OUT;
4270 break;
4271
4272 case 2: /* graphic register 2 */
df7492f9 4273 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4274 ENCODE_SINGLE_SHIFT_2;
4275 else
4276 ENCODE_LOCKING_SHIFT_2;
4277 break;
4278
4279 case 3: /* graphic register 3 */
df7492f9 4280 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4281 ENCODE_SINGLE_SHIFT_3;
4282 else
4283 ENCODE_LOCKING_SHIFT_3;
4284 break;
4285 }
4286 }
b73bfc1c 4287
df7492f9 4288 *p_nchars = produced_chars;
4ed46869
KH
4289 return dst;
4290}
4291
df7492f9
KH
4292/* The following three macros produce codes for indicating direction
4293 of text. */
4294#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 4295 do { \
df7492f9
KH
4296 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4297 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 4298 else \
df7492f9 4299 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
4300 } while (0)
4301
ec6d2bb8 4302
df7492f9
KH
4303#define ENCODE_DIRECTION_R2L() \
4304 do { \
4305 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4306 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
4307 } while (0)
4308
ec6d2bb8 4309
df7492f9 4310#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 4311 do { \
df7492f9
KH
4312 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4313 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 4314 } while (0)
4ed46869 4315
4ed46869
KH
4316
4317/* Produce codes for designation and invocation to reset the graphic
4318 planes and registers to initial state. */
df7492f9
KH
4319#define ENCODE_RESET_PLANE_AND_REGISTER() \
4320 do { \
4321 int reg; \
4322 struct charset *charset; \
4323 \
4324 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4325 ENCODE_SHIFT_IN; \
4326 for (reg = 0; reg < 4; reg++) \
4327 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4328 && (CODING_ISO_DESIGNATION (coding, reg) \
4329 != CODING_ISO_INITIAL (coding, reg))) \
4330 { \
4331 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4332 ENCODE_DESIGNATION (charset, reg, coding); \
4333 } \
4ed46869
KH
4334 } while (0)
4335
df7492f9 4336
bdd9fb48 4337/* Produce designation sequences of charsets in the line started from
b73bfc1c 4338 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
4339
4340 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4341 find all the necessary designations. */
4342
b73bfc1c 4343static unsigned char *
cf84bb53
JB
4344encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4345 int *charbuf_end, unsigned char *dst)
e0e989f6 4346{
df7492f9 4347 struct charset *charset;
bdd9fb48
KH
4348 /* Table of charsets to be designated to each graphic register. */
4349 int r[4];
df7492f9
KH
4350 int c, found = 0, reg;
4351 int produced_chars = 0;
4352 int multibytep = coding->dst_multibyte;
4353 Lisp_Object attrs;
4354 Lisp_Object charset_list;
4355
4356 attrs = CODING_ID_ATTRS (coding->id);
4357 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4358 if (EQ (charset_list, Qiso_2022))
4359 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4360
4361 for (reg = 0; reg < 4; reg++)
4362 r[reg] = -1;
4363
b73bfc1c 4364 while (found < 4)
e0e989f6 4365 {
df7492f9
KH
4366 int id;
4367
4368 c = *charbuf++;
b73bfc1c
KH
4369 if (c == '\n')
4370 break;
df7492f9
KH
4371 charset = char_charset (c, charset_list, NULL);
4372 id = CHARSET_ID (charset);
4373 reg = CODING_ISO_REQUEST (coding, id);
4374 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4375 {
4376 found++;
df7492f9 4377 r[reg] = id;
bdd9fb48 4378 }
bdd9fb48
KH
4379 }
4380
4381 if (found)
4382 {
4383 for (reg = 0; reg < 4; reg++)
4384 if (r[reg] >= 0
df7492f9
KH
4385 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4386 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4387 }
b73bfc1c
KH
4388
4389 return dst;
e0e989f6
KH
4390}
4391
4ed46869
KH
4392/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4393
df7492f9 4394static int
971de7fb 4395encode_coding_iso_2022 (struct coding_system *coding)
4ed46869 4396{
df7492f9
KH
4397 int multibytep = coding->dst_multibyte;
4398 int *charbuf = coding->charbuf;
4399 int *charbuf_end = charbuf + coding->charbuf_used;
4400 unsigned char *dst = coding->destination + coding->produced;
4401 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4402 int safe_room = 16;
4403 int bol_designation
4404 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4405 && CODING_ISO_BOL (coding));
4406 int produced_chars = 0;
4407 Lisp_Object attrs, eol_type, charset_list;
4408 int ascii_compatible;
b73bfc1c 4409 int c;
ff0dacd7 4410 int preferred_charset_id = -1;
05e6f5dc 4411
24a73b0a 4412 CODING_GET_INFO (coding, attrs, charset_list);
0a9564cb 4413 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
24a73b0a
KH
4414 if (VECTORP (eol_type))
4415 eol_type = Qunix;
4416
004068e4 4417 setup_iso_safe_charsets (attrs);
ff0dacd7 4418 /* Charset list may have been changed. */
287c57d7 4419 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4420 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4421
a552b35a
KH
4422 ascii_compatible
4423 = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4424 && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4425 | CODING_ISO_FLAG_LOCKING_SHIFT)));
bdd9fb48 4426
df7492f9 4427 while (charbuf < charbuf_end)
4ed46869 4428 {
df7492f9 4429 ASSURE_DESTINATION (safe_room);
b73bfc1c 4430
df7492f9 4431 if (bol_designation)
b73bfc1c 4432 {
df7492f9 4433 unsigned char *dst_prev = dst;
4ed46869 4434
bdd9fb48 4435 /* We have to produce designation sequences if any now. */
df7492f9
KH
4436 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4437 bol_designation = 0;
4438 /* We are sure that designation sequences are all ASCII bytes. */
4439 produced_chars += dst - dst_prev;
e0e989f6
KH
4440 }
4441
df7492f9 4442 c = *charbuf++;
ec6d2bb8 4443
ff0dacd7
KH
4444 if (c < 0)
4445 {
4446 /* Handle an annotation. */
4447 switch (*charbuf)
ec6d2bb8 4448 {
ff0dacd7
KH
4449 case CODING_ANNOTATE_COMPOSITION_MASK:
4450 /* Not yet implemented. */
4451 break;
4452 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4453 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4454 if (preferred_charset_id >= 0
4455 && NILP (Fmemq (make_number (preferred_charset_id),
4456 charset_list)))
4457 preferred_charset_id = -1;
4458 break;
4459 default:
4460 abort ();
4ed46869 4461 }
ff0dacd7
KH
4462 charbuf += -c - 1;
4463 continue;
4ed46869 4464 }
ec6d2bb8 4465
b73bfc1c
KH
4466 /* Now encode the character C. */
4467 if (c < 0x20 || c == 0x7F)
4468 {
df7492f9
KH
4469 if (c == '\n'
4470 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4471 {
df7492f9
KH
4472 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4473 ENCODE_RESET_PLANE_AND_REGISTER ();
4474 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4475 {
df7492f9
KH
4476 int i;
4477
4478 for (i = 0; i < 4; i++)
4479 CODING_ISO_DESIGNATION (coding, i)
4480 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4481 }
df7492f9
KH
4482 bol_designation
4483 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4484 }
df7492f9
KH
4485 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4486 ENCODE_RESET_PLANE_AND_REGISTER ();
4487 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4488 }
df7492f9 4489 else if (ASCII_CHAR_P (c))
88993dfd 4490 {
df7492f9
KH
4491 if (ascii_compatible)
4492 EMIT_ONE_ASCII_BYTE (c);
93dec019 4493 else
19a8d9e0 4494 {
bf16eb23
KH
4495 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4496 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4497 }
4ed46869 4498 }
16eafb5d 4499 else if (CHAR_BYTE8_P (c))
88993dfd 4500 {
16eafb5d
KH
4501 c = CHAR_TO_BYTE8 (c);
4502 EMIT_ONE_BYTE (c);
88993dfd 4503 }
b73bfc1c 4504 else
df7492f9 4505 {
ff0dacd7 4506 struct charset *charset;
b73bfc1c 4507
ff0dacd7
KH
4508 if (preferred_charset_id >= 0)
4509 {
4510 charset = CHARSET_FROM_ID (preferred_charset_id);
4511 if (! CHAR_CHARSET_P (c, charset))
4512 charset = char_charset (c, charset_list, NULL);
4513 }
4514 else
4515 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4516 if (!charset)
4517 {
41cbe562
KH
4518 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4519 {
4520 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4521 charset = CHARSET_FROM_ID (charset_ascii);
4522 }
4523 else
4524 {
4525 c = coding->default_char;
4526 charset = char_charset (c, charset_list, NULL);
4527 }
df7492f9
KH
4528 }
4529 ENCODE_ISO_CHARACTER (charset, c);
4530 }
84fbb8a0 4531 }
b73bfc1c 4532
df7492f9
KH
4533 if (coding->mode & CODING_MODE_LAST_BLOCK
4534 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4535 {
4536 ASSURE_DESTINATION (safe_room);
4537 ENCODE_RESET_PLANE_AND_REGISTER ();
4538 }
065e3595 4539 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4540 CODING_ISO_BOL (coding) = bol_designation;
4541 coding->produced_char += produced_chars;
4542 coding->produced = dst - coding->destination;
4543 return 0;
4ed46869
KH
4544}
4545
4546\f
df7492f9 4547/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4548
df7492f9 4549/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4550 quite widely. So, for the moment, Emacs supports them in the bare
4551 C code. But, in the future, they may be supported only by CCL. */
4552
4553/* SJIS is a coding system encoding three character sets: ASCII, right
4554 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4555 as is. A character of charset katakana-jisx0201 is encoded by
4556 "position-code + 0x80". A character of charset japanese-jisx0208
4557 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4558 so that it fit in the range below.
4ed46869
KH
4559
4560 --- CODE RANGE of SJIS ---
4561 (character set) (range)
4562 ASCII 0x00 .. 0x7F
df7492f9 4563 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4564 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4565 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4566 -------------------------------
4567
4568*/
4569
4570/* BIG5 is a coding system encoding two character sets: ASCII and
4571 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4572 character set and is encoded in two-byte.
4ed46869
KH
4573
4574 --- CODE RANGE of BIG5 ---
4575 (character set) (range)
4576 ASCII 0x00 .. 0x7F
4577 Big5 (1st byte) 0xA1 .. 0xFE
4578 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4579 --------------------------
4580
df7492f9 4581 */
4ed46869
KH
4582
4583/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4584 Check if a text is encoded in SJIS. If it is, return
df7492f9 4585 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4586
0a28aafb 4587static int
cf84bb53
JB
4588detect_coding_sjis (struct coding_system *coding,
4589 struct coding_detection_info *detect_info)
4ed46869 4590{
065e3595 4591 const unsigned char *src = coding->source, *src_base;
8f924df7 4592 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4593 int multibytep = coding->src_multibyte;
4594 int consumed_chars = 0;
4595 int found = 0;
b73bfc1c 4596 int c;
f07190ca
KH
4597 Lisp_Object attrs, charset_list;
4598 int max_first_byte_of_2_byte_code;
4599
4600 CODING_GET_INFO (coding, attrs, charset_list);
4601 max_first_byte_of_2_byte_code
4602 = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
df7492f9 4603
ff0dacd7 4604 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4605 /* A coding system of this category is always ASCII compatible. */
4606 src += coding->head_ascii;
4ed46869 4607
b73bfc1c 4608 while (1)
4ed46869 4609 {
065e3595 4610 src_base = src;
df7492f9 4611 ONE_MORE_BYTE (c);
682169fe
KH
4612 if (c < 0x80)
4613 continue;
f07190ca
KH
4614 if ((c >= 0x81 && c <= 0x9F)
4615 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4ed46869 4616 {
df7492f9 4617 ONE_MORE_BYTE (c);
682169fe 4618 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4619 break;
ff0dacd7 4620 found = CATEGORY_MASK_SJIS;
4ed46869 4621 }
df7492f9 4622 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4623 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4624 else
4625 break;
4ed46869 4626 }
ff0dacd7 4627 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4628 return 0;
4629
4630 no_more_source:
065e3595 4631 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4632 {
ff0dacd7 4633 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4634 return 0;
4ed46869 4635 }
ff0dacd7
KH
4636 detect_info->found |= found;
4637 return 1;
4ed46869
KH
4638}
4639
4640/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4641 Check if a text is encoded in BIG5. If it is, return
df7492f9 4642 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4643
0a28aafb 4644static int
cf84bb53
JB
4645detect_coding_big5 (struct coding_system *coding,
4646 struct coding_detection_info *detect_info)
4ed46869 4647{
065e3595 4648 const unsigned char *src = coding->source, *src_base;
8f924df7 4649 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4650 int multibytep = coding->src_multibyte;
4651 int consumed_chars = 0;
4652 int found = 0;
b73bfc1c 4653 int c;
fa42c37f 4654
ff0dacd7 4655 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4656 /* A coding system of this category is always ASCII compatible. */
4657 src += coding->head_ascii;
fa42c37f 4658
b73bfc1c 4659 while (1)
fa42c37f 4660 {
065e3595 4661 src_base = src;
df7492f9
KH
4662 ONE_MORE_BYTE (c);
4663 if (c < 0x80)
fa42c37f 4664 continue;
df7492f9 4665 if (c >= 0xA1)
fa42c37f 4666 {
df7492f9
KH
4667 ONE_MORE_BYTE (c);
4668 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4669 return 0;
ff0dacd7 4670 found = CATEGORY_MASK_BIG5;
fa42c37f 4671 }
df7492f9
KH
4672 else
4673 break;
fa42c37f 4674 }
ff0dacd7 4675 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4676 return 0;
fa42c37f 4677
df7492f9 4678 no_more_source:
065e3595 4679 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4680 {
ff0dacd7 4681 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4682 return 0;
4683 }
ff0dacd7
KH
4684 detect_info->found |= found;
4685 return 1;
fa42c37f
KH
4686}
4687
4ed46869
KH
4688/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4689 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4690
b73bfc1c 4691static void
971de7fb 4692decode_coding_sjis (struct coding_system *coding)
4ed46869 4693{
8f924df7
KH
4694 const unsigned char *src = coding->source + coding->consumed;
4695 const unsigned char *src_end = coding->source + coding->src_bytes;
4696 const unsigned char *src_base;
69a80ea3 4697 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4698 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4699 the end. */
69a80ea3 4700 int *charbuf_end
df80c7f0 4701 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4702 int consumed_chars = 0, consumed_chars_base;
4703 int multibytep = coding->src_multibyte;
4704 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4705 struct charset *charset_kanji2;
24a73b0a 4706 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4707 int char_offset = coding->produced_char;
4708 int last_offset = char_offset;
4709 int last_id = charset_ascii;
0a9564cb
EZ
4710 int eol_crlf =
4711 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4712 int byte_after_cr = -1;
a5d301df 4713
24a73b0a 4714 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4715
4716 val = charset_list;
4717 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4718 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4719 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4720 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4721
b73bfc1c 4722 while (1)
4ed46869 4723 {
df7492f9 4724 int c, c1;
24a73b0a 4725 struct charset *charset;
fa42c37f 4726
b73bfc1c 4727 src_base = src;
df7492f9 4728 consumed_chars_base = consumed_chars;
fa42c37f 4729
df7492f9 4730 if (charbuf >= charbuf_end)
b71f6f73
KH
4731 {
4732 if (byte_after_cr >= 0)
4733 src_base--;
4734 break;
4735 }
df7492f9 4736
119852e7
KH
4737 if (byte_after_cr >= 0)
4738 c = byte_after_cr, byte_after_cr = -1;
4739 else
4740 ONE_MORE_BYTE (c);
065e3595
KH
4741 if (c < 0)
4742 goto invalid_code;
24a73b0a 4743 if (c < 0x80)
119852e7
KH
4744 {
4745 if (eol_crlf && c == '\r')
4746 ONE_MORE_BYTE (byte_after_cr);
4747 charset = charset_roman;
4748 }
57a47f8a 4749 else if (c == 0x80 || c == 0xA0)
8e921c4b 4750 goto invalid_code;
57a47f8a
KH
4751 else if (c >= 0xA1 && c <= 0xDF)
4752 {
4753 /* SJIS -> JISX0201-Kana */
4754 c &= 0x7F;
4755 charset = charset_kana;
4756 }
4757 else if (c <= 0xEF)
df7492f9 4758 {
57a47f8a
KH
4759 /* SJIS -> JISX0208 */
4760 ONE_MORE_BYTE (c1);
4761 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4762 goto invalid_code;
57a47f8a
KH
4763 c = (c << 8) | c1;
4764 SJIS_TO_JIS (c);
4765 charset = charset_kanji;
4766 }
4767 else if (c <= 0xFC && charset_kanji2)
4768 {
c6876370 4769 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4770 ONE_MORE_BYTE (c1);
4771 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4772 goto invalid_code;
57a47f8a
KH
4773 c = (c << 8) | c1;
4774 SJIS_TO_JIS2 (c);
4775 charset = charset_kanji2;
df7492f9 4776 }
57a47f8a
KH
4777 else
4778 goto invalid_code;
24a73b0a
KH
4779 if (charset->id != charset_ascii
4780 && last_id != charset->id)
4781 {
4782 if (last_id != charset_ascii)
69a80ea3 4783 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4784 last_id = charset->id;
4785 last_offset = char_offset;
4786 }
4787 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4788 *charbuf++ = c;
ff0dacd7 4789 char_offset++;
df7492f9 4790 continue;
b73bfc1c 4791
df7492f9
KH
4792 invalid_code:
4793 src = src_base;
4794 consumed_chars = consumed_chars_base;
4795 ONE_MORE_BYTE (c);
065e3595 4796 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4797 char_offset++;
df7492f9
KH
4798 coding->errors++;
4799 }
fa42c37f 4800
df7492f9 4801 no_more_source:
ff0dacd7 4802 if (last_id != charset_ascii)
69a80ea3 4803 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4804 coding->consumed_char += consumed_chars_base;
4805 coding->consumed = src_base - coding->source;
4806 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4807}
4808
b73bfc1c 4809static void
971de7fb 4810decode_coding_big5 (struct coding_system *coding)
4ed46869 4811{
8f924df7
KH
4812 const unsigned char *src = coding->source + coding->consumed;
4813 const unsigned char *src_end = coding->source + coding->src_bytes;
4814 const unsigned char *src_base;
69a80ea3 4815 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 4816 /* We may produce one charset annotation in one loop and one more at
df80c7f0 4817 the end. */
69a80ea3 4818 int *charbuf_end
df80c7f0 4819 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4820 int consumed_chars = 0, consumed_chars_base;
4821 int multibytep = coding->src_multibyte;
4822 struct charset *charset_roman, *charset_big5;
24a73b0a 4823 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4824 int char_offset = coding->produced_char;
4825 int last_offset = char_offset;
4826 int last_id = charset_ascii;
0a9564cb
EZ
4827 int eol_crlf =
4828 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4829 int byte_after_cr = -1;
df7492f9 4830
24a73b0a 4831 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4832 val = charset_list;
4833 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4834 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4835
b73bfc1c 4836 while (1)
4ed46869 4837 {
df7492f9 4838 int c, c1;
24a73b0a 4839 struct charset *charset;
b73bfc1c
KH
4840
4841 src_base = src;
df7492f9
KH
4842 consumed_chars_base = consumed_chars;
4843
4844 if (charbuf >= charbuf_end)
b71f6f73
KH
4845 {
4846 if (byte_after_cr >= 0)
4847 src_base--;
4848 break;
4849 }
df7492f9 4850
119852e7 4851 if (byte_after_cr >= 0)
14daee73 4852 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4853 else
4854 ONE_MORE_BYTE (c);
b73bfc1c 4855
065e3595
KH
4856 if (c < 0)
4857 goto invalid_code;
24a73b0a 4858 if (c < 0x80)
119852e7 4859 {
14daee73 4860 if (eol_crlf && c == '\r')
119852e7
KH
4861 ONE_MORE_BYTE (byte_after_cr);
4862 charset = charset_roman;
4863 }
24a73b0a 4864 else
4ed46869 4865 {
24a73b0a
KH
4866 /* BIG5 -> Big5 */
4867 if (c < 0xA1 || c > 0xFE)
4868 goto invalid_code;
4869 ONE_MORE_BYTE (c1);
4870 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4871 goto invalid_code;
4872 c = c << 8 | c1;
4873 charset = charset_big5;
4ed46869 4874 }
24a73b0a
KH
4875 if (charset->id != charset_ascii
4876 && last_id != charset->id)
df7492f9 4877 {
24a73b0a 4878 if (last_id != charset_ascii)
69a80ea3 4879 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4880 last_id = charset->id;
4881 last_offset = char_offset;
4ed46869 4882 }
24a73b0a 4883 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4884 *charbuf++ = c;
ff0dacd7 4885 char_offset++;
fb88bf2d
KH
4886 continue;
4887
df7492f9 4888 invalid_code:
4ed46869 4889 src = src_base;
df7492f9
KH
4890 consumed_chars = consumed_chars_base;
4891 ONE_MORE_BYTE (c);
065e3595 4892 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4893 char_offset++;
df7492f9 4894 coding->errors++;
fb88bf2d 4895 }
d46c5b12 4896
df7492f9 4897 no_more_source:
ff0dacd7 4898 if (last_id != charset_ascii)
69a80ea3 4899 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4900 coding->consumed_char += consumed_chars_base;
4901 coding->consumed = src_base - coding->source;
4902 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4903}
4904
4905/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4906 This function can encode charsets `ascii', `katakana-jisx0201',
4907 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4908 are sure that all these charsets are registered as official charset
4ed46869
KH
4909 (i.e. do not have extended leading-codes). Characters of other
4910 charsets are produced without any encoding. If SJIS_P is 1, encode
4911 SJIS text, else encode BIG5 text. */
4912
df7492f9 4913static int
971de7fb 4914encode_coding_sjis (struct coding_system *coding)
4ed46869 4915{
df7492f9
KH
4916 int multibytep = coding->dst_multibyte;
4917 int *charbuf = coding->charbuf;
4918 int *charbuf_end = charbuf + coding->charbuf_used;
4919 unsigned char *dst = coding->destination + coding->produced;
4920 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4921 int safe_room = 4;
4922 int produced_chars = 0;
24a73b0a 4923 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4924 int ascii_compatible;
4925 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4926 struct charset *charset_kanji2;
df7492f9 4927 int c;
a5d301df 4928
24a73b0a 4929 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4930 val = charset_list;
4931 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4932 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4933 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4934 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4935
df7492f9 4936 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4937
df7492f9
KH
4938 while (charbuf < charbuf_end)
4939 {
4940 ASSURE_DESTINATION (safe_room);
4941 c = *charbuf++;
b73bfc1c 4942 /* Now encode the character C. */
df7492f9
KH
4943 if (ASCII_CHAR_P (c) && ascii_compatible)
4944 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4945 else if (CHAR_BYTE8_P (c))
4946 {
4947 c = CHAR_TO_BYTE8 (c);
4948 EMIT_ONE_BYTE (c);
4949 }
df7492f9 4950 else
b73bfc1c 4951 {
df7492f9
KH
4952 unsigned code;
4953 struct charset *charset = char_charset (c, charset_list, &code);
4954
4955 if (!charset)
4ed46869 4956 {
41cbe562 4957 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4958 {
41cbe562
KH
4959 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4960 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4961 }
41cbe562 4962 else
b73bfc1c 4963 {
41cbe562
KH
4964 c = coding->default_char;
4965 charset = char_charset (c, charset_list, &code);
b73bfc1c 4966 }
b73bfc1c 4967 }
df7492f9
KH
4968 if (code == CHARSET_INVALID_CODE (charset))
4969 abort ();
4970 if (charset == charset_kanji)
4971 {
4972 int c1, c2;
4973 JIS_TO_SJIS (code);
4974 c1 = code >> 8, c2 = code & 0xFF;
4975 EMIT_TWO_BYTES (c1, c2);
4976 }
4977 else if (charset == charset_kana)
4978 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4979 else if (charset_kanji2 && charset == charset_kanji2)
4980 {
4981 int c1, c2;
4982
4983 c1 = code >> 8;
f07190ca
KH
4984 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
4985 || c1 == 0x28
57a47f8a
KH
4986 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4987 {
4988 JIS_TO_SJIS2 (code);
4989 c1 = code >> 8, c2 = code & 0xFF;
4990 EMIT_TWO_BYTES (c1, c2);
4991 }
4992 else
4993 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4994 }
df7492f9
KH
4995 else
4996 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4997 }
4998 }
065e3595 4999 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5000 coding->produced_char += produced_chars;
5001 coding->produced = dst - coding->destination;
5002 return 0;
5003}
5004
5005static int
971de7fb 5006encode_coding_big5 (struct coding_system *coding)
df7492f9
KH
5007{
5008 int multibytep = coding->dst_multibyte;
5009 int *charbuf = coding->charbuf;
5010 int *charbuf_end = charbuf + coding->charbuf_used;
5011 unsigned char *dst = coding->destination + coding->produced;
5012 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5013 int safe_room = 4;
5014 int produced_chars = 0;
24a73b0a 5015 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5016 int ascii_compatible;
5017 struct charset *charset_roman, *charset_big5;
5018 int c;
5019
24a73b0a 5020 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5021 val = charset_list;
5022 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5023 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5024 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5025
5026 while (charbuf < charbuf_end)
5027 {
5028 ASSURE_DESTINATION (safe_room);
5029 c = *charbuf++;
5030 /* Now encode the character C. */
5031 if (ASCII_CHAR_P (c) && ascii_compatible)
5032 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5033 else if (CHAR_BYTE8_P (c))
5034 {
5035 c = CHAR_TO_BYTE8 (c);
5036 EMIT_ONE_BYTE (c);
b73bfc1c
KH
5037 }
5038 else
5039 {
df7492f9
KH
5040 unsigned code;
5041 struct charset *charset = char_charset (c, charset_list, &code);
5042
5043 if (! charset)
b73bfc1c 5044 {
41cbe562 5045 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5046 {
41cbe562
KH
5047 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5048 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5049 }
41cbe562 5050 else
0eecad43 5051 {
41cbe562
KH
5052 c = coding->default_char;
5053 charset = char_charset (c, charset_list, &code);
0eecad43 5054 }
4ed46869 5055 }
df7492f9
KH
5056 if (code == CHARSET_INVALID_CODE (charset))
5057 abort ();
5058 if (charset == charset_big5)
b73bfc1c 5059 {
df7492f9
KH
5060 int c1, c2;
5061
5062 c1 = code >> 8, c2 = code & 0xFF;
5063 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 5064 }
df7492f9
KH
5065 else
5066 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 5067 }
4ed46869 5068 }
065e3595 5069 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5070 coding->produced_char += produced_chars;
5071 coding->produced = dst - coding->destination;
5072 return 0;
4ed46869
KH
5073}
5074
5075\f
df7492f9 5076/*** 10. CCL handlers ***/
1397dc18
KH
5077
5078/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5079 Check if a text is encoded in a coding system of which
5080 encoder/decoder are written in CCL program. If it is, return
df7492f9 5081 CATEGORY_MASK_CCL, else return 0. */
1397dc18 5082
0a28aafb 5083static int
cf84bb53
JB
5084detect_coding_ccl (struct coding_system *coding,
5085 struct coding_detection_info *detect_info)
1397dc18 5086{
065e3595 5087 const unsigned char *src = coding->source, *src_base;
8f924df7 5088 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5089 int multibytep = coding->src_multibyte;
5090 int consumed_chars = 0;
5091 int found = 0;
0e219d54 5092 unsigned char *valids;
df7492f9
KH
5093 int head_ascii = coding->head_ascii;
5094 Lisp_Object attrs;
5095
ff0dacd7
KH
5096 detect_info->checked |= CATEGORY_MASK_CCL;
5097
df7492f9 5098 coding = &coding_categories[coding_category_ccl];
0e219d54 5099 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
5100 attrs = CODING_ID_ATTRS (coding->id);
5101 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5102 src += head_ascii;
1397dc18 5103
b73bfc1c 5104 while (1)
1397dc18 5105 {
df7492f9 5106 int c;
065e3595
KH
5107
5108 src_base = src;
df7492f9 5109 ONE_MORE_BYTE (c);
065e3595 5110 if (c < 0 || ! valids[c])
df7492f9 5111 break;
ff0dacd7
KH
5112 if ((valids[c] > 1))
5113 found = CATEGORY_MASK_CCL;
df7492f9 5114 }
ff0dacd7 5115 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
5116 return 0;
5117
5118 no_more_source:
ff0dacd7
KH
5119 detect_info->found |= found;
5120 return 1;
df7492f9
KH
5121}
5122
5123static void
971de7fb 5124decode_coding_ccl (struct coding_system *coding)
df7492f9 5125{
7c78e542 5126 const unsigned char *src = coding->source + coding->consumed;
8f924df7 5127 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
5128 int *charbuf = coding->charbuf + coding->charbuf_used;
5129 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
5130 int consumed_chars = 0;
5131 int multibytep = coding->src_multibyte;
d0396581 5132 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9 5133 int source_charbuf[1024];
fbdc1721 5134 int source_byteidx[1025];
24a73b0a 5135 Lisp_Object attrs, charset_list;
df7492f9 5136
24a73b0a 5137 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5138
d0396581 5139 while (1)
df7492f9 5140 {
7c78e542 5141 const unsigned char *p = src;
df7492f9
KH
5142 int i = 0;
5143
5144 if (multibytep)
fbdc1721
KH
5145 {
5146 while (i < 1024 && p < src_end)
5147 {
5148 source_byteidx[i] = p - src;
5149 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5150 }
5151 source_byteidx[i] = p - src;
5152 }
df7492f9
KH
5153 else
5154 while (i < 1024 && p < src_end)
5155 source_charbuf[i++] = *p++;
8f924df7 5156
df7492f9 5157 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
d0396581
KH
5158 ccl->last_block = 1;
5159 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5160 charset_list);
5161 charbuf += ccl->produced;
fbdc1721 5162 if (multibytep)
d0396581 5163 src += source_byteidx[ccl->consumed];
df7492f9 5164 else
d0396581
KH
5165 src += ccl->consumed;
5166 consumed_chars += ccl->consumed;
5167 if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
df7492f9
KH
5168 break;
5169 }
5170
d0396581 5171 switch (ccl->status)
df7492f9
KH
5172 {
5173 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5174 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5175 break;
5176 case CCL_STAT_SUSPEND_BY_DST:
d0396581 5177 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5178 break;
5179 case CCL_STAT_QUIT:
5180 case CCL_STAT_INVALID_CMD:
065e3595 5181 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5182 break;
5183 default:
065e3595 5184 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5185 break;
5186 }
5187 coding->consumed_char += consumed_chars;
5188 coding->consumed = src - coding->source;
5189 coding->charbuf_used = charbuf - coding->charbuf;
5190}
5191
5192static int
971de7fb 5193encode_coding_ccl (struct coding_system *coding)
df7492f9 5194{
fb608df3 5195 struct ccl_program *ccl = &coding->spec.ccl->ccl;
df7492f9
KH
5196 int multibytep = coding->dst_multibyte;
5197 int *charbuf = coding->charbuf;
5198 int *charbuf_end = charbuf + coding->charbuf_used;
5199 unsigned char *dst = coding->destination + coding->produced;
5200 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
5201 int destination_charbuf[1024];
5202 int i, produced_chars = 0;
24a73b0a 5203 Lisp_Object attrs, charset_list;
df7492f9 5204
24a73b0a 5205 CODING_GET_INFO (coding, attrs, charset_list);
fb608df3
KH
5206 if (coding->consumed_char == coding->src_chars
5207 && coding->mode & CODING_MODE_LAST_BLOCK)
5208 ccl->last_block = 1;
df7492f9 5209
8cffd3e7 5210 while (charbuf < charbuf_end)
df7492f9 5211 {
fb608df3 5212 ccl_driver (ccl, charbuf, destination_charbuf,
8cffd3e7 5213 charbuf_end - charbuf, 1024, charset_list);
df7492f9 5214 if (multibytep)
8cffd3e7 5215 {
fb608df3
KH
5216 ASSURE_DESTINATION (ccl->produced * 2);
5217 for (i = 0; i < ccl->produced; i++)
8cffd3e7
KH
5218 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5219 }
df7492f9
KH
5220 else
5221 {
fb608df3
KH
5222 ASSURE_DESTINATION (ccl->produced);
5223 for (i = 0; i < ccl->produced; i++)
df7492f9 5224 *dst++ = destination_charbuf[i] & 0xFF;
fb608df3 5225 produced_chars += ccl->produced;
df7492f9 5226 }
fb608df3
KH
5227 charbuf += ccl->consumed;
5228 if (ccl->status == CCL_STAT_QUIT
5229 || ccl->status == CCL_STAT_INVALID_CMD)
8cffd3e7 5230 break;
df7492f9
KH
5231 }
5232
fb608df3 5233 switch (ccl->status)
df7492f9
KH
5234 {
5235 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5236 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5237 break;
5238 case CCL_STAT_SUSPEND_BY_DST:
065e3595 5239 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5240 break;
5241 case CCL_STAT_QUIT:
5242 case CCL_STAT_INVALID_CMD:
065e3595 5243 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5244 break;
5245 default:
065e3595 5246 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5247 break;
1397dc18 5248 }
df7492f9
KH
5249
5250 coding->produced_char += produced_chars;
5251 coding->produced = dst - coding->destination;
5252 return 0;
1397dc18
KH
5253}
5254
df7492f9 5255
1397dc18 5256\f
df7492f9 5257/*** 10, 11. no-conversion handlers ***/
4ed46869 5258
b73bfc1c 5259/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 5260
b73bfc1c 5261static void
971de7fb 5262decode_coding_raw_text (struct coding_system *coding)
4ed46869 5263{
0a9564cb
EZ
5264 int eol_crlf =
5265 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5266
df7492f9 5267 coding->chars_at_source = 1;
119852e7
KH
5268 coding->consumed_char = coding->src_chars;
5269 coding->consumed = coding->src_bytes;
5270 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5271 {
5272 coding->consumed_char--;
5273 coding->consumed--;
5274 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5275 }
5276 else
5277 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5278}
4ed46869 5279
df7492f9 5280static int
971de7fb 5281encode_coding_raw_text (struct coding_system *coding)
df7492f9
KH
5282{
5283 int multibytep = coding->dst_multibyte;
5284 int *charbuf = coding->charbuf;
5285 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5286 unsigned char *dst = coding->destination + coding->produced;
5287 unsigned char *dst_end = coding->destination + coding->dst_bytes;
a0ed9b27 5288 int produced_chars = 0;
b73bfc1c
KH
5289 int c;
5290
df7492f9 5291 if (multibytep)
b73bfc1c 5292 {
df7492f9 5293 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5294
df7492f9
KH
5295 if (coding->src_multibyte)
5296 while (charbuf < charbuf_end)
5297 {
5298 ASSURE_DESTINATION (safe_room);
5299 c = *charbuf++;
5300 if (ASCII_CHAR_P (c))
5301 EMIT_ONE_ASCII_BYTE (c);
5302 else if (CHAR_BYTE8_P (c))
5303 {
5304 c = CHAR_TO_BYTE8 (c);
5305 EMIT_ONE_BYTE (c);
5306 }
5307 else
5308 {
5309 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5310
df7492f9
KH
5311 CHAR_STRING_ADVANCE (c, p1);
5312 while (p0 < p1)
9d123124
KH
5313 {
5314 EMIT_ONE_BYTE (*p0);
5315 p0++;
5316 }
df7492f9
KH
5317 }
5318 }
b73bfc1c 5319 else
df7492f9
KH
5320 while (charbuf < charbuf_end)
5321 {
5322 ASSURE_DESTINATION (safe_room);
5323 c = *charbuf++;
5324 EMIT_ONE_BYTE (c);
5325 }
5326 }
5327 else
4ed46869 5328 {
df7492f9 5329 if (coding->src_multibyte)
d46c5b12 5330 {
df7492f9
KH
5331 int safe_room = MAX_MULTIBYTE_LENGTH;
5332
5333 while (charbuf < charbuf_end)
d46c5b12 5334 {
df7492f9
KH
5335 ASSURE_DESTINATION (safe_room);
5336 c = *charbuf++;
5337 if (ASCII_CHAR_P (c))
5338 *dst++ = c;
5339 else if (CHAR_BYTE8_P (c))
5340 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5341 else
df7492f9 5342 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5343 }
5344 }
df7492f9
KH
5345 else
5346 {
5347 ASSURE_DESTINATION (charbuf_end - charbuf);
5348 while (charbuf < charbuf_end && dst < dst_end)
5349 *dst++ = *charbuf++;
8f924df7 5350 }
319a3947 5351 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5352 }
065e3595 5353 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5354 coding->produced_char += produced_chars;
df7492f9
KH
5355 coding->produced = dst - coding->destination;
5356 return 0;
4ed46869
KH
5357}
5358
ff0dacd7
KH
5359/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5360 Check if a text is encoded in a charset-based coding system. If it
5361 is, return 1, else return 0. */
5362
0a28aafb 5363static int
cf84bb53
JB
5364detect_coding_charset (struct coding_system *coding,
5365 struct coding_detection_info *detect_info)
1397dc18 5366{
065e3595 5367 const unsigned char *src = coding->source, *src_base;
8f924df7 5368 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5369 int multibytep = coding->src_multibyte;
5370 int consumed_chars = 0;
07295713 5371 Lisp_Object attrs, valids, name;
584948ac 5372 int found = 0;
716b3fa0 5373 int head_ascii = coding->head_ascii;
07295713 5374 int check_latin_extra = 0;
1397dc18 5375
ff0dacd7
KH
5376 detect_info->checked |= CATEGORY_MASK_CHARSET;
5377
df7492f9
KH
5378 coding = &coding_categories[coding_category_charset];
5379 attrs = CODING_ID_ATTRS (coding->id);
5380 valids = AREF (attrs, coding_attr_charset_valids);
07295713 5381 name = CODING_ID_NAME (coding->id);
51b59d79 5382 if (strncmp (SSDATA (SYMBOL_NAME (name)),
237aabf4 5383 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
51b59d79 5384 || strncmp (SSDATA (SYMBOL_NAME (name)),
237aabf4 5385 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
07295713 5386 check_latin_extra = 1;
237aabf4 5387
df7492f9 5388 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5389 src += head_ascii;
1397dc18 5390
b73bfc1c 5391 while (1)
1397dc18 5392 {
df7492f9 5393 int c;
716b3fa0
KH
5394 Lisp_Object val;
5395 struct charset *charset;
5396 int dim, idx;
1397dc18 5397
065e3595 5398 src_base = src;
df7492f9 5399 ONE_MORE_BYTE (c);
065e3595
KH
5400 if (c < 0)
5401 continue;
716b3fa0
KH
5402 val = AREF (valids, c);
5403 if (NILP (val))
df7492f9 5404 break;
584948ac 5405 if (c >= 0x80)
07295713
KH
5406 {
5407 if (c < 0xA0
237aabf4
JR
5408 && check_latin_extra
5409 && (!VECTORP (Vlatin_extra_code_table)
9f0526cb 5410 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
07295713
KH
5411 break;
5412 found = CATEGORY_MASK_CHARSET;
5413 }
716b3fa0
KH
5414 if (INTEGERP (val))
5415 {
5416 charset = CHARSET_FROM_ID (XFASTINT (val));
5417 dim = CHARSET_DIMENSION (charset);
5418 for (idx = 1; idx < dim; idx++)
5419 {
5420 if (src == src_end)
5421 goto too_short;
5422 ONE_MORE_BYTE (c);
3ed051d4 5423 if (c < charset->code_space[(dim - 1 - idx) * 2]
716b3fa0
KH
5424 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5425 break;
5426 }
5427 if (idx < dim)
5428 break;
5429 }
5430 else
5431 {
5432 idx = 1;
5433 for (; CONSP (val); val = XCDR (val))
5434 {
5435 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5436 dim = CHARSET_DIMENSION (charset);
5437 while (idx < dim)
5438 {
5439 if (src == src_end)
5440 goto too_short;
5441 ONE_MORE_BYTE (c);
5442 if (c < charset->code_space[(dim - 1 - idx) * 4]
5443 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5444 break;
5445 idx++;
5446 }
5447 if (idx == dim)
5448 {
5449 val = Qnil;
5450 break;
5451 }
5452 }
5453 if (CONSP (val))
5454 break;
5455 }
df7492f9 5456 }
716b3fa0 5457 too_short:
ff0dacd7 5458 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5459 return 0;
4ed46869 5460
df7492f9 5461 no_more_source:
ff0dacd7
KH
5462 detect_info->found |= found;
5463 return 1;
df7492f9 5464}
b73bfc1c 5465
b73bfc1c 5466static void
971de7fb 5467decode_coding_charset (struct coding_system *coding)
4ed46869 5468{
8f924df7
KH
5469 const unsigned char *src = coding->source + coding->consumed;
5470 const unsigned char *src_end = coding->source + coding->src_bytes;
5471 const unsigned char *src_base;
69a80ea3 5472 int *charbuf = coding->charbuf + coding->charbuf_used;
ad1746f5 5473 /* We may produce one charset annotation in one loop and one more at
df80c7f0 5474 the end. */
69a80ea3 5475 int *charbuf_end
df80c7f0 5476 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
5477 int consumed_chars = 0, consumed_chars_base;
5478 int multibytep = coding->src_multibyte;
24a73b0a 5479 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5480 int char_offset = coding->produced_char;
5481 int last_offset = char_offset;
5482 int last_id = charset_ascii;
0a9564cb
EZ
5483 int eol_crlf =
5484 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5485 int byte_after_cr = -1;
df7492f9 5486
24a73b0a 5487 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5488 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5489
df7492f9 5490 while (1)
4ed46869 5491 {
4eb6d3f1 5492 int c;
24a73b0a
KH
5493 Lisp_Object val;
5494 struct charset *charset;
5495 int dim;
5496 int len = 1;
5497 unsigned code;
df7492f9
KH
5498
5499 src_base = src;
5500 consumed_chars_base = consumed_chars;
b73bfc1c 5501
df7492f9 5502 if (charbuf >= charbuf_end)
b71f6f73
KH
5503 {
5504 if (byte_after_cr >= 0)
5505 src_base--;
5506 break;
5507 }
df7492f9 5508
119852e7
KH
5509 if (byte_after_cr >= 0)
5510 {
5511 c = byte_after_cr;
5512 byte_after_cr = -1;
5513 }
5514 else
5515 {
5516 ONE_MORE_BYTE (c);
5517 if (eol_crlf && c == '\r')
5518 ONE_MORE_BYTE (byte_after_cr);
5519 }
065e3595
KH
5520 if (c < 0)
5521 goto invalid_code;
24a73b0a
KH
5522 code = c;
5523
5524 val = AREF (valids, c);
1b17adfd 5525 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5526 goto invalid_code;
5527 if (INTEGERP (val))
d46c5b12 5528 {
24a73b0a
KH
5529 charset = CHARSET_FROM_ID (XFASTINT (val));
5530 dim = CHARSET_DIMENSION (charset);
5531 while (len < dim)
b73bfc1c 5532 {
24a73b0a
KH
5533 ONE_MORE_BYTE (c);
5534 code = (code << 8) | c;
5535 len++;
b73bfc1c 5536 }
24a73b0a
KH
5537 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5538 charset, code, c);
d46c5b12 5539 }
df7492f9 5540 else
d46c5b12 5541 {
24a73b0a
KH
5542 /* VAL is a list of charset IDs. It is assured that the
5543 list is sorted by charset dimensions (smaller one
5544 comes first). */
5545 while (CONSP (val))
4eb6d3f1 5546 {
24a73b0a 5547 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5548 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5549 while (len < dim)
4eb6d3f1 5550 {
acb2a965
KH
5551 ONE_MORE_BYTE (c);
5552 code = (code << 8) | c;
f9d71dcd 5553 len++;
4eb6d3f1 5554 }
24a73b0a
KH
5555 CODING_DECODE_CHAR (coding, src, src_base,
5556 src_end, charset, code, c);
5557 if (c >= 0)
5558 break;
5559 val = XCDR (val);
ff0dacd7 5560 }
d46c5b12 5561 }
24a73b0a
KH
5562 if (c < 0)
5563 goto invalid_code;
5564 if (charset->id != charset_ascii
5565 && last_id != charset->id)
5566 {
5567 if (last_id != charset_ascii)
69a80ea3 5568 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5569 last_id = charset->id;
5570 last_offset = char_offset;
5571 }
5572
df7492f9 5573 *charbuf++ = c;
ff0dacd7 5574 char_offset++;
df7492f9
KH
5575 continue;
5576
5577 invalid_code:
5578 src = src_base;
5579 consumed_chars = consumed_chars_base;
5580 ONE_MORE_BYTE (c);
065e3595 5581 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5582 char_offset++;
df7492f9 5583 coding->errors++;
4ed46869
KH
5584 }
5585
df7492f9 5586 no_more_source:
ff0dacd7 5587 if (last_id != charset_ascii)
69a80ea3 5588 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5589 coding->consumed_char += consumed_chars_base;
5590 coding->consumed = src_base - coding->source;
5591 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5592}
5593
df7492f9 5594static int
971de7fb 5595encode_coding_charset (struct coding_system *coding)
4ed46869 5596{
df7492f9
KH
5597 int multibytep = coding->dst_multibyte;
5598 int *charbuf = coding->charbuf;
5599 int *charbuf_end = charbuf + coding->charbuf_used;
5600 unsigned char *dst = coding->destination + coding->produced;
5601 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5602 int safe_room = MAX_MULTIBYTE_LENGTH;
5603 int produced_chars = 0;
24a73b0a 5604 Lisp_Object attrs, charset_list;
df7492f9 5605 int ascii_compatible;
b73bfc1c 5606 int c;
b73bfc1c 5607
24a73b0a 5608 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5609 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5610
df7492f9 5611 while (charbuf < charbuf_end)
4ed46869 5612 {
4eb6d3f1 5613 struct charset *charset;
df7492f9 5614 unsigned code;
8f924df7 5615
df7492f9
KH
5616 ASSURE_DESTINATION (safe_room);
5617 c = *charbuf++;
5618 if (ascii_compatible && ASCII_CHAR_P (c))
5619 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5620 else if (CHAR_BYTE8_P (c))
4ed46869 5621 {
16eafb5d
KH
5622 c = CHAR_TO_BYTE8 (c);
5623 EMIT_ONE_BYTE (c);
d46c5b12 5624 }
d46c5b12 5625 else
b73bfc1c 5626 {
4eb6d3f1
KH
5627 charset = char_charset (c, charset_list, &code);
5628 if (charset)
5629 {
5630 if (CHARSET_DIMENSION (charset) == 1)
5631 EMIT_ONE_BYTE (code);
5632 else if (CHARSET_DIMENSION (charset) == 2)
5633 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5634 else if (CHARSET_DIMENSION (charset) == 3)
5635 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5636 else
5637 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5638 (code >> 8) & 0xFF, code & 0xFF);
5639 }
5640 else
41cbe562
KH
5641 {
5642 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5643 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5644 else
5645 c = coding->default_char;
5646 EMIT_ONE_BYTE (c);
5647 }
4ed46869 5648 }
4ed46869
KH
5649 }
5650
065e3595 5651 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5652 coding->produced_char += produced_chars;
5653 coding->produced = dst - coding->destination;
5654 return 0;
4ed46869
KH
5655}
5656
5657\f
1397dc18 5658/*** 7. C library functions ***/
4ed46869 5659
df7492f9
KH
5660/* Setup coding context CODING from information about CODING_SYSTEM.
5661 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5662 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5663
ec6d2bb8 5664void
971de7fb 5665setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
4ed46869 5666{
df7492f9
KH
5667 Lisp_Object attrs;
5668 Lisp_Object eol_type;
5669 Lisp_Object coding_type;
4608c386 5670 Lisp_Object val;
4ed46869 5671
df7492f9 5672 if (NILP (coding_system))
ae6f73fa 5673 coding_system = Qundecided;
c07c8e12 5674
df7492f9 5675 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5676
df7492f9 5677 attrs = CODING_ID_ATTRS (coding->id);
0a9564cb 5678 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5679
df7492f9
KH
5680 coding->mode = 0;
5681 coding->head_ascii = -1;
4a015c45
KH
5682 if (VECTORP (eol_type))
5683 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5684 | CODING_REQUIRE_DETECTION_MASK);
5685 else if (! EQ (eol_type, Qunix))
5686 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5687 | CODING_REQUIRE_ENCODING_MASK);
5688 else
5689 coding->common_flags = 0;
5e5c78be
KH
5690 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5691 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5692 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5693 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5694 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5695 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5696
df7492f9 5697 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5698 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5699 coding->safe_charsets = SDATA (val);
df7492f9 5700 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
624bda09 5701 coding->carryover_bytes = 0;
4608c386 5702
df7492f9
KH
5703 coding_type = CODING_ATTR_TYPE (attrs);
5704 if (EQ (coding_type, Qundecided))
d46c5b12 5705 {
df7492f9
KH
5706 coding->detector = NULL;
5707 coding->decoder = decode_coding_raw_text;
5708 coding->encoder = encode_coding_raw_text;
5709 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5710 }
df7492f9 5711 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5712 {
df7492f9
KH
5713 int i;
5714 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5715
5716 /* Invoke graphic register 0 to plane 0. */
5717 CODING_ISO_INVOCATION (coding, 0) = 0;
5718 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5719 CODING_ISO_INVOCATION (coding, 1)
5720 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5721 /* Setup the initial status of designation. */
5722 for (i = 0; i < 4; i++)
5723 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5724 /* Not single shifting initially. */
5725 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5726 /* Beginning of buffer should also be regarded as bol. */
5727 CODING_ISO_BOL (coding) = 1;
5728 coding->detector = detect_coding_iso_2022;
5729 coding->decoder = decode_coding_iso_2022;
5730 coding->encoder = encode_coding_iso_2022;
5731 if (flags & CODING_ISO_FLAG_SAFE)
5732 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5733 coding->common_flags
df7492f9
KH
5734 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5735 | CODING_REQUIRE_FLUSHING_MASK);
5736 if (flags & CODING_ISO_FLAG_COMPOSITION)
5737 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5738 if (flags & CODING_ISO_FLAG_DESIGNATION)
5739 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5740 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5741 {
5742 setup_iso_safe_charsets (attrs);
5743 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5744 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5745 coding->safe_charsets = SDATA (val);
df7492f9
KH
5746 }
5747 CODING_ISO_FLAGS (coding) = flags;
e951386e
KH
5748 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5749 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5750 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5751 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
d46c5b12 5752 }
df7492f9 5753 else if (EQ (coding_type, Qcharset))
d46c5b12 5754 {
df7492f9
KH
5755 coding->detector = detect_coding_charset;
5756 coding->decoder = decode_coding_charset;
5757 coding->encoder = encode_coding_charset;
d46c5b12 5758 coding->common_flags
df7492f9 5759 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5760 }
df7492f9 5761 else if (EQ (coding_type, Qutf_8))
d46c5b12 5762 {
a470d443
KH
5763 val = AREF (attrs, coding_attr_utf_bom);
5764 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5765 : EQ (val, Qt) ? utf_with_bom
5766 : utf_without_bom);
df7492f9
KH
5767 coding->detector = detect_coding_utf_8;
5768 coding->decoder = decode_coding_utf_8;
5769 coding->encoder = encode_coding_utf_8;
5770 coding->common_flags
5771 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5772 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5773 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5774 }
5775 else if (EQ (coding_type, Qutf_16))
5776 {
a470d443
KH
5777 val = AREF (attrs, coding_attr_utf_bom);
5778 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5779 : EQ (val, Qt) ? utf_with_bom
5780 : utf_without_bom);
df7492f9 5781 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5782 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5783 : utf_16_little_endian);
e19c3639 5784 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5785 coding->detector = detect_coding_utf_16;
5786 coding->decoder = decode_coding_utf_16;
5787 coding->encoder = encode_coding_utf_16;
5788 coding->common_flags
5789 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5790 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5791 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5792 }
df7492f9 5793 else if (EQ (coding_type, Qccl))
4ed46869 5794 {
df7492f9
KH
5795 coding->detector = detect_coding_ccl;
5796 coding->decoder = decode_coding_ccl;
5797 coding->encoder = encode_coding_ccl;
c952af22 5798 coding->common_flags
df7492f9
KH
5799 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5800 | CODING_REQUIRE_FLUSHING_MASK);
5801 }
5802 else if (EQ (coding_type, Qemacs_mule))
5803 {
5804 coding->detector = detect_coding_emacs_mule;
5805 coding->decoder = decode_coding_emacs_mule;
5806 coding->encoder = encode_coding_emacs_mule;
c952af22 5807 coding->common_flags
df7492f9 5808 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
e951386e 5809 coding->spec.emacs_mule.full_support = 1;
df7492f9
KH
5810 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5811 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5812 {
5813 Lisp_Object tail, safe_charsets;
5814 int max_charset_id = 0;
5815
5816 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5817 tail = XCDR (tail))
5818 if (max_charset_id < XFASTINT (XCAR (tail)))
5819 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5820 safe_charsets = make_uninit_string (max_charset_id + 1);
5821 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5822 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5823 tail = XCDR (tail))
8f924df7 5824 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5825 coding->max_charset_id = max_charset_id;
1b3b981b 5826 coding->safe_charsets = SDATA (safe_charsets);
e951386e 5827 coding->spec.emacs_mule.full_support = 1;
df7492f9 5828 }
e951386e
KH
5829 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5830 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
df7492f9
KH
5831 }
5832 else if (EQ (coding_type, Qshift_jis))
5833 {
5834 coding->detector = detect_coding_sjis;
5835 coding->decoder = decode_coding_sjis;
5836 coding->encoder = encode_coding_sjis;
c952af22 5837 coding->common_flags
df7492f9
KH
5838 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5839 }
5840 else if (EQ (coding_type, Qbig5))
5841 {
5842 coding->detector = detect_coding_big5;
5843 coding->decoder = decode_coding_big5;
5844 coding->encoder = encode_coding_big5;
c952af22 5845 coding->common_flags
df7492f9
KH
5846 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5847 }
5848 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5849 {
df7492f9
KH
5850 coding->detector = NULL;
5851 coding->decoder = decode_coding_raw_text;
5852 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5853 if (! EQ (eol_type, Qunix))
5854 {
5855 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5856 if (! VECTORP (eol_type))
5857 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5858 }
5859
4ed46869 5860 }
4ed46869 5861
df7492f9 5862 return;
4ed46869
KH
5863}
5864
0ff61e78
KH
5865/* Return a list of charsets supported by CODING. */
5866
5867Lisp_Object
971de7fb 5868coding_charset_list (struct coding_system *coding)
0ff61e78 5869{
35befdaa 5870 Lisp_Object attrs, charset_list;
0ff61e78
KH
5871
5872 CODING_GET_INFO (coding, attrs, charset_list);
5873 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5874 {
5875 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5876
5877 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5878 charset_list = Viso_2022_charset_list;
5879 }
5880 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5881 {
5882 charset_list = Vemacs_mule_charset_list;
5883 }
5884 return charset_list;
5885}
5886
5887
e9f91ece
KH
5888/* Return a list of charsets supported by CODING-SYSTEM. */
5889
5890Lisp_Object
971de7fb 5891coding_system_charset_list (Lisp_Object coding_system)
e9f91ece
KH
5892{
5893 int id;
5894 Lisp_Object attrs, charset_list;
5895
5896 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5897 attrs = CODING_ID_ATTRS (id);
5898
5899 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5900 {
5901 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5902
5903 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5904 charset_list = Viso_2022_charset_list;
5905 else
5906 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5907 }
5908 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5909 {
5910 charset_list = Vemacs_mule_charset_list;
5911 }
5912 else
5913 {
5914 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5915 }
5916 return charset_list;
5917}
5918
5919
df7492f9
KH
5920/* Return raw-text or one of its subsidiaries that has the same
5921 eol_type as CODING-SYSTEM. */
ec6d2bb8 5922
df7492f9 5923Lisp_Object
971de7fb 5924raw_text_coding_system (Lisp_Object coding_system)
ec6d2bb8 5925{
0be8721c 5926 Lisp_Object spec, attrs;
df7492f9 5927 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5928
d3e4cb56
KH
5929 if (NILP (coding_system))
5930 return Qraw_text;
df7492f9
KH
5931 spec = CODING_SYSTEM_SPEC (coding_system);
5932 attrs = AREF (spec, 0);
ec6d2bb8 5933
df7492f9
KH
5934 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5935 return coding_system;
ec6d2bb8 5936
df7492f9
KH
5937 eol_type = AREF (spec, 2);
5938 if (VECTORP (eol_type))
5939 return Qraw_text;
5940 spec = CODING_SYSTEM_SPEC (Qraw_text);
5941 raw_text_eol_type = AREF (spec, 2);
5942 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5943 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5944 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5945}
5946
54f78171 5947
1911a33b
KH
5948/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
5949 the subsidiary that has the same eol-spec as PARENT (if it is not
5950 nil and specifies end-of-line format) or the system's setting
fcbcfb64 5951 (system_eol_type). */
df7492f9
KH
5952
5953Lisp_Object
971de7fb 5954coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
54f78171 5955{
3e139625 5956 Lisp_Object spec, eol_type;
54f78171 5957
d3e4cb56
KH
5958 if (NILP (coding_system))
5959 coding_system = Qraw_text;
df7492f9 5960 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5961 eol_type = AREF (spec, 2);
fcbcfb64 5962 if (VECTORP (eol_type))
df7492f9 5963 {
df7492f9
KH
5964 Lisp_Object parent_eol_type;
5965
fcbcfb64
KH
5966 if (! NILP (parent))
5967 {
5968 Lisp_Object parent_spec;
5969
4a015c45 5970 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64 5971 parent_eol_type = AREF (parent_spec, 2);
1911a33b 5972 if (VECTORP (parent_eol_type))
4628bef1 5973 parent_eol_type = system_eol_type;
fcbcfb64
KH
5974 }
5975 else
5976 parent_eol_type = system_eol_type;
df7492f9
KH
5977 if (EQ (parent_eol_type, Qunix))
5978 coding_system = AREF (eol_type, 0);
5979 else if (EQ (parent_eol_type, Qdos))
5980 coding_system = AREF (eol_type, 1);
5981 else if (EQ (parent_eol_type, Qmac))
5982 coding_system = AREF (eol_type, 2);
54f78171 5983 }
df7492f9 5984 return coding_system;
54f78171
KH
5985}
5986
fcaf8878
KH
5987
5988/* Check if text-conversion and eol-conversion of CODING_SYSTEM are
5989 decided for writing to a process. If not, complement them, and
5990 return a new coding system. */
5991
5992Lisp_Object
4628bef1 5993complement_process_encoding_system (Lisp_Object coding_system)
fcaf8878 5994{
5886ec9c
KH
5995 Lisp_Object coding_base = Qnil, eol_base = Qnil;
5996 Lisp_Object spec, attrs;
93d50df8 5997 int i;
fcaf8878 5998
93d50df8 5999 for (i = 0; i < 3; i++)
fcaf8878 6000 {
93d50df8
KH
6001 if (i == 1)
6002 coding_system = CDR_SAFE (Vdefault_process_coding_system);
6003 else if (i == 2)
6004 coding_system = preferred_coding_system ();
6005 spec = CODING_SYSTEM_SPEC (coding_system);
6006 if (NILP (spec))
6007 continue;
6008 attrs = AREF (spec, 0);
6009 if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6010 coding_base = CODING_ATTR_BASE_NAME (attrs);
6011 if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6012 eol_base = coding_system;
6013 if (! NILP (coding_base) && ! NILP (eol_base))
6014 break;
fcaf8878 6015 }
fcaf8878 6016
93d50df8
KH
6017 if (i > 0)
6018 /* The original CODING_SYSTEM didn't specify text-conversion or
6019 eol-conversion. Be sure that we return a fully complemented
6020 coding system. */
6021 coding_system = coding_inherit_eol_type (coding_base, eol_base);
6022 return coding_system;
fcaf8878
KH
6023}
6024
6025
4ed46869
KH
6026/* Emacs has a mechanism to automatically detect a coding system if it
6027 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6028 it's impossible to distinguish some coding systems accurately
6029 because they use the same range of codes. So, at first, coding
6030 systems are categorized into 7, those are:
6031
0ef69138 6032 o coding-category-emacs-mule
4ed46869
KH
6033
6034 The category for a coding system which has the same code range
6035 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 6036 symbol) `emacs-mule' by default.
4ed46869
KH
6037
6038 o coding-category-sjis
6039
6040 The category for a coding system which has the same code range
6041 as SJIS. Assigned the coding-system (Lisp
7717c392 6042 symbol) `japanese-shift-jis' by default.
4ed46869
KH
6043
6044 o coding-category-iso-7
6045
6046 The category for a coding system which has the same code range
7717c392 6047 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
6048 shift and single shift functions. This can encode/decode all
6049 charsets. Assigned the coding-system (Lisp symbol)
6050 `iso-2022-7bit' by default.
6051
6052 o coding-category-iso-7-tight
6053
6054 Same as coding-category-iso-7 except that this can
6055 encode/decode only the specified charsets.
4ed46869
KH
6056
6057 o coding-category-iso-8-1
6058
6059 The category for a coding system which has the same code range
6060 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6061 for DIMENSION1 charset. This doesn't use any locking shift
6062 and single shift functions. Assigned the coding-system (Lisp
6063 symbol) `iso-latin-1' by default.
4ed46869
KH
6064
6065 o coding-category-iso-8-2
6066
6067 The category for a coding system which has the same code range
6068 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6069 for DIMENSION2 charset. This doesn't use any locking shift
6070 and single shift functions. Assigned the coding-system (Lisp
6071 symbol) `japanese-iso-8bit' by default.
4ed46869 6072
7717c392 6073 o coding-category-iso-7-else
4ed46869
KH
6074
6075 The category for a coding system which has the same code range
ad1746f5 6076 as ISO2022 of 7-bit environment but uses locking shift or
7717c392
KH
6077 single shift functions. Assigned the coding-system (Lisp
6078 symbol) `iso-2022-7bit-lock' by default.
6079
6080 o coding-category-iso-8-else
6081
6082 The category for a coding system which has the same code range
ad1746f5 6083 as ISO2022 of 8-bit environment but uses locking shift or
7717c392
KH
6084 single shift functions. Assigned the coding-system (Lisp
6085 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
6086
6087 o coding-category-big5
6088
6089 The category for a coding system which has the same code range
6090 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 6091 `cn-big5' by default.
4ed46869 6092
fa42c37f
KH
6093 o coding-category-utf-8
6094
6095 The category for a coding system which has the same code range
6e76ae91 6096 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
6097 symbol) `utf-8' by default.
6098
6099 o coding-category-utf-16-be
6100
6101 The category for a coding system in which a text has an
6102 Unicode signature (cf. Unicode Standard) in the order of BIG
6103 endian at the head. Assigned the coding-system (Lisp symbol)
6104 `utf-16-be' by default.
6105
6106 o coding-category-utf-16-le
6107
6108 The category for a coding system in which a text has an
6109 Unicode signature (cf. Unicode Standard) in the order of
6110 LITTLE endian at the head. Assigned the coding-system (Lisp
6111 symbol) `utf-16-le' by default.
6112
1397dc18
KH
6113 o coding-category-ccl
6114
6115 The category for a coding system of which encoder/decoder is
6116 written in CCL programs. The default value is nil, i.e., no
6117 coding system is assigned.
6118
4ed46869
KH
6119 o coding-category-binary
6120
6121 The category for a coding system not categorized in any of the
6122 above. Assigned the coding-system (Lisp symbol)
e0e989f6 6123 `no-conversion' by default.
4ed46869
KH
6124
6125 Each of them is a Lisp symbol and the value is an actual
df7492f9 6126 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
6127 What Emacs does actually is to detect a category of coding system.
6128 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 6129 decide only one possible category, it selects a category of the
4ed46869
KH
6130 highest priority. Priorities of categories are also specified by a
6131 user in a Lisp variable `coding-category-list'.
6132
6133*/
6134
df7492f9
KH
6135#define EOL_SEEN_NONE 0
6136#define EOL_SEEN_LF 1
6137#define EOL_SEEN_CR 2
6138#define EOL_SEEN_CRLF 4
66cfb530 6139
ff0dacd7
KH
6140/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6141 SOURCE is encoded. If CATEGORY is one of
6142 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6143 two-byte, else they are encoded by one-byte.
6144
6145 Return one of EOL_SEEN_XXX. */
4ed46869 6146
bc4bc72a 6147#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
6148
6149static int
cf84bb53
JB
6150detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6151 enum coding_category category)
4ed46869 6152{
f6cbaf43 6153 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 6154 unsigned char c;
df7492f9
KH
6155 int total = 0;
6156 int eol_seen = EOL_SEEN_NONE;
4ed46869 6157
89528eb3 6158 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 6159 {
df7492f9 6160 int msb, lsb;
fa42c37f 6161
89528eb3
KH
6162 msb = category == (coding_category_utf_16_le
6163 | coding_category_utf_16_le_nosig);
df7492f9 6164 lsb = 1 - msb;
fa42c37f 6165
df7492f9 6166 while (src + 1 < src_end)
fa42c37f 6167 {
df7492f9
KH
6168 c = src[lsb];
6169 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 6170 {
df7492f9
KH
6171 int this_eol;
6172
6173 if (c == '\n')
6174 this_eol = EOL_SEEN_LF;
6175 else if (src + 3 >= src_end
6176 || src[msb + 2] != 0
6177 || src[lsb + 2] != '\n')
6178 this_eol = EOL_SEEN_CR;
fa42c37f 6179 else
75f4f1ac
EZ
6180 {
6181 this_eol = EOL_SEEN_CRLF;
6182 src += 2;
6183 }
df7492f9
KH
6184
6185 if (eol_seen == EOL_SEEN_NONE)
6186 /* This is the first end-of-line. */
6187 eol_seen = this_eol;
6188 else if (eol_seen != this_eol)
fa42c37f 6189 {
75f4f1ac
EZ
6190 /* The found type is different from what found before.
6191 Allow for stray ^M characters in DOS EOL files. */
ef1b0ba7
SM
6192 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6193 || (eol_seen == EOL_SEEN_CRLF
6194 && this_eol == EOL_SEEN_CR))
75f4f1ac
EZ
6195 eol_seen = EOL_SEEN_CRLF;
6196 else
6197 {
6198 eol_seen = EOL_SEEN_LF;
6199 break;
6200 }
fa42c37f 6201 }
df7492f9
KH
6202 if (++total == MAX_EOL_CHECK_COUNT)
6203 break;
fa42c37f 6204 }
df7492f9 6205 src += 2;
fa42c37f 6206 }
bcf26d6a 6207 }
d46c5b12 6208 else
ef1b0ba7
SM
6209 while (src < src_end)
6210 {
6211 c = *src++;
6212 if (c == '\n' || c == '\r')
6213 {
6214 int this_eol;
d46c5b12 6215
ef1b0ba7
SM
6216 if (c == '\n')
6217 this_eol = EOL_SEEN_LF;
6218 else if (src >= src_end || *src != '\n')
6219 this_eol = EOL_SEEN_CR;
6220 else
6221 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 6222
ef1b0ba7
SM
6223 if (eol_seen == EOL_SEEN_NONE)
6224 /* This is the first end-of-line. */
6225 eol_seen = this_eol;
6226 else if (eol_seen != this_eol)
6227 {
6228 /* The found type is different from what found before.
6229 Allow for stray ^M characters in DOS EOL files. */
6230 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6231 || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6232 eol_seen = EOL_SEEN_CRLF;
6233 else
6234 {
6235 eol_seen = EOL_SEEN_LF;
6236 break;
6237 }
6238 }
6239 if (++total == MAX_EOL_CHECK_COUNT)
6240 break;
6241 }
6242 }
df7492f9 6243 return eol_seen;
73be902c
KH
6244}
6245
df7492f9 6246
24a73b0a 6247static Lisp_Object
971de7fb 6248adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
73be902c 6249{
0be8721c 6250 Lisp_Object eol_type;
8f924df7 6251
df7492f9
KH
6252 eol_type = CODING_ID_EOL_TYPE (coding->id);
6253 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
6254 {
6255 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6256 eol_type = Qunix;
6257 }
6f197c07 6258 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
6259 {
6260 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6261 eol_type = Qdos;
6262 }
6f197c07 6263 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
6264 {
6265 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6266 eol_type = Qmac;
6267 }
6268 return eol_type;
d46c5b12 6269}
4ed46869 6270
df7492f9
KH
6271/* Detect how a text specified in CODING is encoded. If a coding
6272 system is detected, update fields of CODING by the detected coding
6273 system. */
0a28aafb 6274
df7492f9 6275void
971de7fb 6276detect_coding (struct coding_system *coding)
d46c5b12 6277{
8f924df7 6278 const unsigned char *src, *src_end;
73cce38d 6279 int saved_mode = coding->mode;
d46c5b12 6280
df7492f9
KH
6281 coding->consumed = coding->consumed_char = 0;
6282 coding->produced = coding->produced_char = 0;
6283 coding_set_source (coding);
1c3478b0 6284
df7492f9 6285 src_end = coding->source + coding->src_bytes;
c0e16b14 6286 coding->head_ascii = 0;
1c3478b0 6287
df7492f9
KH
6288 /* If we have not yet decided the text encoding type, detect it
6289 now. */
6290 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 6291 {
df7492f9 6292 int c, i;
6cb21a4f 6293 struct coding_detection_info detect_info;
2f3cbb32 6294 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 6295
6cb21a4f 6296 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 6297 for (src = coding->source; src < src_end; src++)
d46c5b12 6298 {
df7492f9 6299 c = *src;
6cb21a4f 6300 if (c & 0x80)
6cb21a4f 6301 {
2f3cbb32 6302 eight_bit_found = 1;
2f3cbb32
KH
6303 if (null_byte_found)
6304 break;
6305 }
6306 else if (c < 0x20)
6307 {
6308 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6309 && ! inhibit_iso_escape_detection
6310 && ! detect_info.checked)
6cb21a4f 6311 {
2f3cbb32
KH
6312 if (detect_coding_iso_2022 (coding, &detect_info))
6313 {
6314 /* We have scanned the whole data. */
6315 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
6316 {
6317 /* We didn't find an 8-bit code. We may
6318 have found a null-byte, but it's very
ce5b453a 6319 rare that a binary file conforms to
c0e16b14
KH
6320 ISO-2022. */
6321 src = src_end;
6322 coding->head_ascii = src - coding->source;
6323 }
6324 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6325 break;
6326 }
6327 }
97b1b294 6328 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6329 {
6330 null_byte_found = 1;
6331 if (eight_bit_found)
6332 break;
6cb21a4f 6333 }
c006c0c8
KH
6334 if (! eight_bit_found)
6335 coding->head_ascii++;
6cb21a4f 6336 }
c006c0c8 6337 else if (! eight_bit_found)
c0e16b14 6338 coding->head_ascii++;
d46c5b12 6339 }
df7492f9 6340
2f3cbb32
KH
6341 if (null_byte_found || eight_bit_found
6342 || coding->head_ascii < coding->src_bytes
6cb21a4f 6343 || detect_info.found)
d46c5b12 6344 {
ff0dacd7
KH
6345 enum coding_category category;
6346 struct coding_system *this;
df7492f9 6347
6cb21a4f
KH
6348 if (coding->head_ascii == coding->src_bytes)
6349 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6350 for (i = 0; i < coding_category_raw_text; i++)
6351 {
6352 category = coding_priorities[i];
6353 this = coding_categories + category;
6354 if (detect_info.found & (1 << category))
24a73b0a 6355 break;
6cb21a4f
KH
6356 }
6357 else
2f3cbb32
KH
6358 {
6359 if (null_byte_found)
ff0dacd7 6360 {
2f3cbb32
KH
6361 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6362 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6363 }
2f3cbb32
KH
6364 for (i = 0; i < coding_category_raw_text; i++)
6365 {
6366 category = coding_priorities[i];
6367 this = coding_categories + category;
6368 if (this->id < 0)
6369 {
6370 /* No coding system of this category is defined. */
6371 detect_info.rejected |= (1 << category);
6372 }
6373 else if (category >= coding_category_raw_text)
6374 continue;
6375 else if (detect_info.checked & (1 << category))
6376 {
6377 if (detect_info.found & (1 << category))
6378 break;
6379 }
6380 else if ((*(this->detector)) (coding, &detect_info)
6381 && detect_info.found & (1 << category))
6382 {
6383 if (category == coding_category_utf_16_auto)
6384 {
6385 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6386 category = coding_category_utf_16_le;
6387 else
6388 category = coding_category_utf_16_be;
6389 }
6390 break;
6391 }
6392 }
2f3cbb32 6393 }
c0e16b14
KH
6394
6395 if (i < coding_category_raw_text)
6396 setup_coding_system (CODING_ID_NAME (this->id), coding);
6397 else if (null_byte_found)
6398 setup_coding_system (Qno_conversion, coding);
6399 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6400 == CATEGORY_MASK_ANY)
6401 setup_coding_system (Qraw_text, coding);
6402 else if (detect_info.rejected)
6403 for (i = 0; i < coding_category_raw_text; i++)
6404 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6405 {
6406 this = coding_categories + coding_priorities[i];
6407 setup_coding_system (CODING_ID_NAME (this->id), coding);
6408 break;
6409 }
d46c5b12 6410 }
b73bfc1c 6411 }
a470d443
KH
6412 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6413 == coding_category_utf_8_auto)
6414 {
6415 Lisp_Object coding_systems;
6416 struct coding_detection_info detect_info;
6417
6418 coding_systems
6419 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6420 detect_info.found = detect_info.rejected = 0;
6421 coding->head_ascii = 0;
6422 if (CONSP (coding_systems)
6423 && detect_coding_utf_8 (coding, &detect_info))
6424 {
6425 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6426 setup_coding_system (XCAR (coding_systems), coding);
6427 else
6428 setup_coding_system (XCDR (coding_systems), coding);
6429 }
6430 }
24a73b0a
KH
6431 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6432 == coding_category_utf_16_auto)
b49a1807
KH
6433 {
6434 Lisp_Object coding_systems;
6435 struct coding_detection_info detect_info;
6436
6437 coding_systems
a470d443 6438 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6439 detect_info.found = detect_info.rejected = 0;
a470d443 6440 coding->head_ascii = 0;
b49a1807 6441 if (CONSP (coding_systems)
24a73b0a 6442 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6443 {
6444 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6445 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6446 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6447 setup_coding_system (XCDR (coding_systems), coding);
6448 }
6449 }
73cce38d 6450 coding->mode = saved_mode;
4ed46869 6451}
4ed46869 6452
d46c5b12 6453
aaaf0b1e 6454static void
971de7fb 6455decode_eol (struct coding_system *coding)
aaaf0b1e 6456{
24a73b0a
KH
6457 Lisp_Object eol_type;
6458 unsigned char *p, *pbeg, *pend;
3ed051d4 6459
24a73b0a 6460 eol_type = CODING_ID_EOL_TYPE (coding->id);
0a9564cb 6461 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
24a73b0a
KH
6462 return;
6463
6464 if (NILP (coding->dst_object))
6465 pbeg = coding->destination;
6466 else
6467 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6468 pend = pbeg + coding->produced;
6469
6470 if (VECTORP (eol_type))
aaaf0b1e 6471 {
df7492f9 6472 int eol_seen = EOL_SEEN_NONE;
4ed46869 6473
24a73b0a 6474 for (p = pbeg; p < pend; p++)
aaaf0b1e 6475 {
df7492f9
KH
6476 if (*p == '\n')
6477 eol_seen |= EOL_SEEN_LF;
6478 else if (*p == '\r')
aaaf0b1e 6479 {
df7492f9 6480 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6481 {
df7492f9
KH
6482 eol_seen |= EOL_SEEN_CRLF;
6483 p++;
aaaf0b1e 6484 }
aaaf0b1e 6485 else
df7492f9 6486 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6487 }
aaaf0b1e 6488 }
75f4f1ac
EZ
6489 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6490 if ((eol_seen & EOL_SEEN_CRLF) != 0
6491 && (eol_seen & EOL_SEEN_CR) != 0
6492 && (eol_seen & EOL_SEEN_LF) == 0)
6493 eol_seen = EOL_SEEN_CRLF;
6494 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6495 && eol_seen != EOL_SEEN_LF
6496 && eol_seen != EOL_SEEN_CRLF
6497 && eol_seen != EOL_SEEN_CR)
6498 eol_seen = EOL_SEEN_LF;
df7492f9 6499 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6500 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6501 }
d46c5b12 6502
24a73b0a 6503 if (EQ (eol_type, Qmac))
27901516 6504 {
24a73b0a 6505 for (p = pbeg; p < pend; p++)
df7492f9
KH
6506 if (*p == '\r')
6507 *p = '\n';
4ed46869 6508 }
24a73b0a 6509 else if (EQ (eol_type, Qdos))
df7492f9 6510 {
24a73b0a 6511 int n = 0;
b73bfc1c 6512
24a73b0a
KH
6513 if (NILP (coding->dst_object))
6514 {
4347441b
KH
6515 /* Start deleting '\r' from the tail to minimize the memory
6516 movement. */
24a73b0a
KH
6517 for (p = pend - 2; p >= pbeg; p--)
6518 if (*p == '\r')
6519 {
72af86bd 6520 memmove (p, p + 1, pend-- - p - 1);
24a73b0a
KH
6521 n++;
6522 }
6523 }
6524 else
6525 {
4347441b
KH
6526 int pos_byte = coding->dst_pos_byte;
6527 int pos = coding->dst_pos;
6528 int pos_end = pos + coding->produced_char - 1;
6529
6530 while (pos < pos_end)
6531 {
6532 p = BYTE_POS_ADDR (pos_byte);
6533 if (*p == '\r' && p[1] == '\n')
6534 {
6535 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6536 n++;
6537 pos_end--;
6538 }
6539 pos++;
69b8522d
KH
6540 if (coding->dst_multibyte)
6541 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6542 else
6543 pos_byte++;
4347441b 6544 }
24a73b0a
KH
6545 }
6546 coding->produced -= n;
6547 coding->produced_char -= n;
aaaf0b1e 6548 }
4ed46869
KH
6549}
6550
7d64c6ad 6551
a6f87d34
KH
6552/* Return a translation table (or list of them) from coding system
6553 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6554 decoding (ENCODEP is zero). */
7d64c6ad 6555
e6a54062 6556static Lisp_Object
971de7fb 6557get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
7d64c6ad
KH
6558{
6559 Lisp_Object standard, translation_table;
09ee6fdd 6560 Lisp_Object val;
7d64c6ad 6561
4bed5909
CY
6562 if (NILP (Venable_character_translation))
6563 {
6564 if (max_lookup)
6565 *max_lookup = 0;
6566 return Qnil;
6567 }
7d64c6ad
KH
6568 if (encodep)
6569 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6570 standard = Vstandard_translation_table_for_encode;
6571 else
6572 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6573 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6574 if (NILP (translation_table))
09ee6fdd
KH
6575 translation_table = standard;
6576 else
a6f87d34 6577 {
09ee6fdd
KH
6578 if (SYMBOLP (translation_table))
6579 translation_table = Fget (translation_table, Qtranslation_table);
6580 else if (CONSP (translation_table))
6581 {
6582 translation_table = Fcopy_sequence (translation_table);
6583 for (val = translation_table; CONSP (val); val = XCDR (val))
6584 if (SYMBOLP (XCAR (val)))
6585 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6586 }
6587 if (CHAR_TABLE_P (standard))
6588 {
6589 if (CONSP (translation_table))
6590 translation_table = nconc2 (translation_table,
6591 Fcons (standard, Qnil));
6592 else
6593 translation_table = Fcons (translation_table,
6594 Fcons (standard, Qnil));
6595 }
a6f87d34 6596 }
2170c8f0
KH
6597
6598 if (max_lookup)
09ee6fdd 6599 {
2170c8f0
KH
6600 *max_lookup = 1;
6601 if (CHAR_TABLE_P (translation_table)
6602 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6603 {
6604 val = XCHAR_TABLE (translation_table)->extras[1];
6605 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6606 *max_lookup = XFASTINT (val);
6607 }
6608 else if (CONSP (translation_table))
6609 {
6610 Lisp_Object tail, val;
09ee6fdd 6611
2170c8f0
KH
6612 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6613 if (CHAR_TABLE_P (XCAR (tail))
6614 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6615 {
6616 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6617 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6618 *max_lookup = XFASTINT (val);
6619 }
6620 }
a6f87d34 6621 }
7d64c6ad
KH
6622 return translation_table;
6623}
6624
09ee6fdd
KH
6625#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6626 do { \
6627 trans = Qnil; \
6628 if (CHAR_TABLE_P (table)) \
6629 { \
6630 trans = CHAR_TABLE_REF (table, c); \
6631 if (CHARACTERP (trans)) \
6632 c = XFASTINT (trans), trans = Qnil; \
6633 } \
6634 else if (CONSP (table)) \
6635 { \
6636 Lisp_Object tail; \
6637 \
6638 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6639 if (CHAR_TABLE_P (XCAR (tail))) \
6640 { \
6641 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6642 if (CHARACTERP (trans)) \
6643 c = XFASTINT (trans), trans = Qnil; \
6644 else if (! NILP (trans)) \
6645 break; \
6646 } \
6647 } \
e6a54062
KH
6648 } while (0)
6649
7d64c6ad 6650
e951386e
KH
6651/* Return a translation of character(s) at BUF according to TRANS.
6652 TRANS is TO-CHAR or ((FROM . TO) ...) where
6653 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6654 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6655 translation is found, and Qnil if not found..
6656 If BUF is too short to lookup characters in FROM, return Qt. */
6657
69a80ea3 6658static Lisp_Object
971de7fb 6659get_translation (Lisp_Object trans, int *buf, int *buf_end)
69a80ea3 6660{
e951386e
KH
6661
6662 if (INTEGERP (trans))
6663 return trans;
6664 for (; CONSP (trans); trans = XCDR (trans))
69a80ea3 6665 {
e951386e
KH
6666 Lisp_Object val = XCAR (trans);
6667 Lisp_Object from = XCAR (val);
6668 int len = ASIZE (from);
6669 int i;
69a80ea3 6670
e951386e 6671 for (i = 0; i < len; i++)
69a80ea3 6672 {
e951386e
KH
6673 if (buf + i == buf_end)
6674 return Qt;
6675 if (XINT (AREF (from, i)) != buf[i])
6676 break;
69a80ea3 6677 }
e951386e
KH
6678 if (i == len)
6679 return val;
69a80ea3 6680 }
e951386e 6681 return Qnil;
69a80ea3
KH
6682}
6683
6684
d46c5b12 6685static int
cf84bb53
JB
6686produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6687 int last_block)
4ed46869 6688{
df7492f9
KH
6689 unsigned char *dst = coding->destination + coding->produced;
6690 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6691 EMACS_INT produced;
6692 EMACS_INT produced_chars = 0;
69a80ea3 6693 int carryover = 0;
4ed46869 6694
df7492f9 6695 if (! coding->chars_at_source)
4ed46869 6696 {
119852e7 6697 /* Source characters are in coding->charbuf. */
fba4576f
AS
6698 int *buf = coding->charbuf;
6699 int *buf_end = buf + coding->charbuf_used;
4ed46869 6700
db274c7a
KH
6701 if (EQ (coding->src_object, coding->dst_object))
6702 {
6703 coding_set_source (coding);
6704 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6705 }
4ed46869 6706
df7492f9 6707 while (buf < buf_end)
4ed46869 6708 {
69a80ea3 6709 int c = *buf, i;
bc4bc72a 6710
df7492f9
KH
6711 if (c >= 0)
6712 {
69a80ea3
KH
6713 int from_nchars = 1, to_nchars = 1;
6714 Lisp_Object trans = Qnil;
6715
09ee6fdd 6716 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6717 if (! NILP (trans))
69a80ea3 6718 {
e951386e
KH
6719 trans = get_translation (trans, buf, buf_end);
6720 if (INTEGERP (trans))
6721 c = XINT (trans);
6722 else if (CONSP (trans))
6723 {
6724 from_nchars = ASIZE (XCAR (trans));
6725 trans = XCDR (trans);
6726 if (INTEGERP (trans))
6727 c = XINT (trans);
6728 else
6729 {
6730 to_nchars = ASIZE (trans);
6731 c = XINT (AREF (trans, 0));
6732 }
6733 }
6734 else if (EQ (trans, Qt) && ! last_block)
69a80ea3 6735 break;
69a80ea3
KH
6736 }
6737
6738 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6739 {
6740 dst = alloc_destination (coding,
6741 buf_end - buf
6742 + MAX_MULTIBYTE_LENGTH * to_nchars,
6743 dst);
db274c7a
KH
6744 if (EQ (coding->src_object, coding->dst_object))
6745 {
6746 coding_set_source (coding);
e951386e
KH
6747 dst_end = (((unsigned char *) coding->source)
6748 + coding->consumed);
db274c7a
KH
6749 }
6750 else
6751 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6752 }
6753
433f7f87 6754 for (i = 0; i < to_nchars; i++)
69a80ea3 6755 {
433f7f87
KH
6756 if (i > 0)
6757 c = XINT (AREF (trans, i));
69a80ea3
KH
6758 if (coding->dst_multibyte
6759 || ! CHAR_BYTE8_P (c))
db274c7a 6760 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6761 else
6762 *dst++ = CHAR_TO_BYTE8 (c);
6763 }
6764 produced_chars += to_nchars;
e951386e 6765 buf += from_nchars;
d46c5b12 6766 }
df7492f9 6767 else
69a80ea3
KH
6768 /* This is an annotation datum. (-C) is the length. */
6769 buf += -c;
4ed46869 6770 }
69a80ea3 6771 carryover = buf_end - buf;
4ed46869 6772 }
fa42c37f 6773 else
fa42c37f 6774 {
119852e7 6775 /* Source characters are at coding->source. */
8f924df7 6776 const unsigned char *src = coding->source;
119852e7 6777 const unsigned char *src_end = src + coding->consumed;
4ed46869 6778
db274c7a
KH
6779 if (EQ (coding->dst_object, coding->src_object))
6780 dst_end = (unsigned char *) src;
df7492f9 6781 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6782 {
df7492f9 6783 if (coding->src_multibyte)
fa42c37f 6784 {
71c81426 6785 int multibytep = 1;
4533845d 6786 EMACS_INT consumed_chars = 0;
d46c5b12 6787
df7492f9
KH
6788 while (1)
6789 {
8f924df7 6790 const unsigned char *src_base = src;
df7492f9 6791 int c;
b73bfc1c 6792
df7492f9 6793 ONE_MORE_BYTE (c);
119852e7 6794 if (dst == dst_end)
df7492f9 6795 {
119852e7
KH
6796 if (EQ (coding->src_object, coding->dst_object))
6797 dst_end = (unsigned char *) src;
6798 if (dst == dst_end)
df7492f9 6799 {
119852e7
KH
6800 EMACS_INT offset = src - coding->source;
6801
6802 dst = alloc_destination (coding, src_end - src + 1,
6803 dst);
6804 dst_end = coding->destination + coding->dst_bytes;
6805 coding_set_source (coding);
6806 src = coding->source + offset;
6807 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6808 if (EQ (coding->src_object, coding->dst_object))
6809 dst_end = (unsigned char *) src;
df7492f9 6810 }
df7492f9
KH
6811 }
6812 *dst++ = c;
6813 produced_chars++;
6814 }
6815 no_more_source:
6816 ;
fa42c37f
KH
6817 }
6818 else
df7492f9
KH
6819 while (src < src_end)
6820 {
71c81426 6821 int multibytep = 1;
df7492f9 6822 int c = *src++;
b73bfc1c 6823
df7492f9
KH
6824 if (dst >= dst_end - 1)
6825 {
2c78b7e1 6826 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6827 dst_end = (unsigned char *) src;
2c78b7e1
KH
6828 if (dst >= dst_end - 1)
6829 {
119852e7 6830 EMACS_INT offset = src - coding->source;
db274c7a 6831 EMACS_INT more_bytes;
119852e7 6832
db274c7a
KH
6833 if (EQ (coding->src_object, coding->dst_object))
6834 more_bytes = ((src_end - src) / 2) + 2;
6835 else
6836 more_bytes = src_end - src + 2;
6837 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6838 dst_end = coding->destination + coding->dst_bytes;
6839 coding_set_source (coding);
119852e7 6840 src = coding->source + offset;
2c78b7e1 6841 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6842 if (EQ (coding->src_object, coding->dst_object))
6843 dst_end = (unsigned char *) src;
2c78b7e1 6844 }
df7492f9
KH
6845 }
6846 EMIT_ONE_BYTE (c);
6847 }
d46c5b12 6848 }
df7492f9
KH
6849 else
6850 {
6851 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6852 {
119852e7 6853 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6854
df7492f9 6855 if (require > 0)
fa42c37f 6856 {
df7492f9
KH
6857 EMACS_INT offset = src - coding->source;
6858
6859 dst = alloc_destination (coding, require, dst);
6860 coding_set_source (coding);
6861 src = coding->source + offset;
6862 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6863 }
6864 }
119852e7 6865 produced_chars = coding->consumed_char;
df7492f9 6866 while (src < src_end)
14daee73 6867 *dst++ = *src++;
fa42c37f
KH
6868 }
6869 }
6870
df7492f9 6871 produced = dst - (coding->destination + coding->produced);
284201e4 6872 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6873 insert_from_gap (produced_chars, produced);
6874 coding->produced += produced;
6875 coding->produced_char += produced_chars;
69a80ea3 6876 return carryover;
fa42c37f
KH
6877}
6878
ff0dacd7
KH
6879/* Compose text in CODING->object according to the annotation data at
6880 CHARBUF. CHARBUF is an array:
e951386e 6881 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
df7492f9 6882 */
4ed46869 6883
df7492f9 6884static INLINE void
971de7fb 6885produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
4ed46869 6886{
df7492f9 6887 int len;
69a80ea3 6888 EMACS_INT to;
df7492f9 6889 enum composition_method method;
df7492f9 6890 Lisp_Object components;
fa42c37f 6891
e951386e 6892 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
69a80ea3 6893 to = pos + charbuf[2];
e951386e 6894 method = (enum composition_method) (charbuf[4]);
d46c5b12 6895
df7492f9
KH
6896 if (method == COMPOSITION_RELATIVE)
6897 components = Qnil;
e951386e 6898 else
d46c5b12 6899 {
df7492f9 6900 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
e951386e 6901 int i, j;
b73bfc1c 6902
e951386e
KH
6903 if (method == COMPOSITION_WITH_RULE)
6904 len = charbuf[2] * 3 - 2;
6905 charbuf += MAX_ANNOTATION_LENGTH;
6906 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6907 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
9ffd559c 6908 {
e951386e
KH
6909 if (charbuf[i] >= 0)
6910 args[j] = make_number (charbuf[i]);
6911 else
6912 {
6913 i++;
6914 args[j] = make_number (charbuf[i] % 0x100);
6915 }
9ffd559c 6916 }
e951386e 6917 components = (i == j ? Fstring (j, args) : Fvector (j, args));
d46c5b12 6918 }
69a80ea3 6919 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6920}
6921
d46c5b12 6922
ff0dacd7
KH
6923/* Put `charset' property on text in CODING->object according to
6924 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6925 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6926 */
d46c5b12 6927
ff0dacd7 6928static INLINE void
971de7fb 6929produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
d46c5b12 6930{
69a80ea3
KH
6931 EMACS_INT from = pos - charbuf[2];
6932 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6933
69a80ea3 6934 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6935 Qcharset, CHARSET_NAME (charset),
6936 coding->dst_object);
d46c5b12
KH
6937}
6938
d46c5b12 6939
df7492f9
KH
6940#define CHARBUF_SIZE 0x4000
6941
6942#define ALLOC_CONVERSION_WORK_AREA(coding) \
6943 do { \
8510724d 6944 int size = CHARBUF_SIZE; \
df7492f9
KH
6945 \
6946 coding->charbuf = NULL; \
6947 while (size > 1024) \
6948 { \
6949 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6950 if (coding->charbuf) \
6951 break; \
6952 size >>= 1; \
6953 } \
6954 if (! coding->charbuf) \
6955 { \
065e3595 6956 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6957 return coding->result; \
6958 } \
6959 coding->charbuf_size = size; \
6960 } while (0)
4ed46869 6961
d46c5b12
KH
6962
6963static void
971de7fb 6964produce_annotation (struct coding_system *coding, EMACS_INT pos)
d46c5b12 6965{
df7492f9
KH
6966 int *charbuf = coding->charbuf;
6967 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6968
ff0dacd7
KH
6969 if (NILP (coding->dst_object))
6970 return;
d46c5b12 6971
df7492f9 6972 while (charbuf < charbuf_end)
a84f1519 6973 {
df7492f9 6974 if (*charbuf >= 0)
e951386e 6975 pos++, charbuf++;
d46c5b12 6976 else
d46c5b12 6977 {
df7492f9 6978 int len = -*charbuf;
e951386e
KH
6979
6980 if (len > 2)
6981 switch (charbuf[1])
6982 {
6983 case CODING_ANNOTATE_COMPOSITION_MASK:
6984 produce_composition (coding, charbuf, pos);
6985 break;
6986 case CODING_ANNOTATE_CHARSET_MASK:
6987 produce_charset (coding, charbuf, pos);
6988 break;
6989 }
df7492f9 6990 charbuf += len;
d46c5b12 6991 }
a84f1519 6992 }
d46c5b12
KH
6993}
6994
df7492f9
KH
6995/* Decode the data at CODING->src_object into CODING->dst_object.
6996 CODING->src_object is a buffer, a string, or nil.
6997 CODING->dst_object is a buffer.
d46c5b12 6998
df7492f9
KH
6999 If CODING->src_object is a buffer, it must be the current buffer.
7000 In this case, if CODING->src_pos is positive, it is a position of
7001 the source text in the buffer, otherwise, the source text is in the
7002 gap area of the buffer, and CODING->src_pos specifies the offset of
7003 the text from GPT (which must be the same as PT). If this is the
7004 same buffer as CODING->dst_object, CODING->src_pos must be
7005 negative.
d46c5b12 7006
b6828792 7007 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 7008 that string.
d46c5b12 7009
df7492f9
KH
7010 If CODING->src_object is nil, CODING->source must already point to
7011 the non-relocatable memory area. In this case, CODING->src_pos is
7012 an offset from CODING->source.
73be902c 7013
df7492f9
KH
7014 The decoded data is inserted at the current point of the buffer
7015 CODING->dst_object.
7016*/
d46c5b12 7017
df7492f9 7018static int
971de7fb 7019decode_coding (struct coding_system *coding)
d46c5b12 7020{
df7492f9 7021 Lisp_Object attrs;
24a73b0a 7022 Lisp_Object undo_list;
7d64c6ad 7023 Lisp_Object translation_table;
d0396581 7024 struct ccl_spec cclspec;
69a80ea3
KH
7025 int carryover;
7026 int i;
d46c5b12 7027
df7492f9
KH
7028 if (BUFFERP (coding->src_object)
7029 && coding->src_pos > 0
7030 && coding->src_pos < GPT
7031 && coding->src_pos + coding->src_chars > GPT)
7032 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 7033
24a73b0a 7034 undo_list = Qt;
df7492f9 7035 if (BUFFERP (coding->dst_object))
1c3478b0 7036 {
df7492f9
KH
7037 if (current_buffer != XBUFFER (coding->dst_object))
7038 set_buffer_internal (XBUFFER (coding->dst_object));
7039 if (GPT != PT)
7040 move_gap_both (PT, PT_BYTE);
5d8ea120
TT
7041 undo_list = B_ (current_buffer, undo_list);
7042 B_ (current_buffer, undo_list) = Qt;
1c3478b0
KH
7043 }
7044
df7492f9
KH
7045 coding->consumed = coding->consumed_char = 0;
7046 coding->produced = coding->produced_char = 0;
7047 coding->chars_at_source = 0;
065e3595 7048 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 7049 coding->errors = 0;
1c3478b0 7050
df7492f9
KH
7051 ALLOC_CONVERSION_WORK_AREA (coding);
7052
7053 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 7054 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 7055
69a80ea3 7056 carryover = 0;
d0396581
KH
7057 if (coding->decoder == decode_coding_ccl)
7058 {
7059 coding->spec.ccl = &cclspec;
7060 setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7061 }
df7492f9 7062 do
b73bfc1c 7063 {
69a80ea3
KH
7064 EMACS_INT pos = coding->dst_pos + coding->produced_char;
7065
df7492f9
KH
7066 coding_set_source (coding);
7067 coding->annotated = 0;
69a80ea3 7068 coding->charbuf_used = carryover;
df7492f9 7069 (*(coding->decoder)) (coding);
df7492f9 7070 coding_set_destination (coding);
69a80ea3 7071 carryover = produce_chars (coding, translation_table, 0);
df7492f9 7072 if (coding->annotated)
69a80ea3
KH
7073 produce_annotation (coding, pos);
7074 for (i = 0; i < carryover; i++)
7075 coding->charbuf[i]
7076 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 7077 }
d0396581
KH
7078 while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7079 || (coding->consumed < coding->src_bytes
7080 && (coding->result == CODING_RESULT_SUCCESS
7081 || coding->result == CODING_RESULT_INVALID_SRC)));
d46c5b12 7082
69a80ea3
KH
7083 if (carryover > 0)
7084 {
7085 coding_set_destination (coding);
7086 coding->charbuf_used = carryover;
7087 produce_chars (coding, translation_table, 1);
7088 }
7089
df7492f9
KH
7090 coding->carryover_bytes = 0;
7091 if (coding->consumed < coding->src_bytes)
d46c5b12 7092 {
df7492f9 7093 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 7094 const unsigned char *src;
df7492f9
KH
7095
7096 coding_set_source (coding);
7097 coding_set_destination (coding);
7098 src = coding->source + coding->consumed;
7099
7100 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 7101 {
df7492f9
KH
7102 /* Flush out unprocessed data as binary chars. We are sure
7103 that the number of data is less than the size of
7104 coding->charbuf. */
065e3595 7105 coding->charbuf_used = 0;
b2dab6c8
JR
7106 coding->chars_at_source = 0;
7107
df7492f9 7108 while (nbytes-- > 0)
1c3478b0 7109 {
df7492f9 7110 int c = *src++;
98725083 7111
1c91457d
KH
7112 if (c & 0x80)
7113 c = BYTE8_TO_CHAR (c);
7114 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 7115 }
f6cbaf43 7116 produce_chars (coding, Qnil, 1);
d46c5b12 7117 }
d46c5b12 7118 else
df7492f9
KH
7119 {
7120 /* Record unprocessed bytes in coding->carryover. We are
7121 sure that the number of data is less than the size of
7122 coding->carryover. */
7123 unsigned char *p = coding->carryover;
7124
f289d375
KH
7125 if (nbytes > sizeof coding->carryover)
7126 nbytes = sizeof coding->carryover;
df7492f9
KH
7127 coding->carryover_bytes = nbytes;
7128 while (nbytes-- > 0)
7129 *p++ = *src++;
1c3478b0 7130 }
df7492f9 7131 coding->consumed = coding->src_bytes;
b73bfc1c 7132 }
69f76525 7133
0a9564cb
EZ
7134 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7135 && !inhibit_eol_conversion)
4347441b 7136 decode_eol (coding);
24a73b0a
KH
7137 if (BUFFERP (coding->dst_object))
7138 {
5d8ea120 7139 B_ (current_buffer, undo_list) = undo_list;
24a73b0a
KH
7140 record_insert (coding->dst_pos, coding->produced_char);
7141 }
73be902c 7142 return coding->result;
4ed46869
KH
7143}
7144
aaaf0b1e 7145
e1c23804 7146/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
7147 ending before LIMIT of CODING->src_object (buffer or string), store
7148 the data in BUF, set *STOP to a starting position of the next
7149 composition (if any) or to LIMIT, and return the address of the
7150 next element of BUF.
7151
7152 If such an annotation is not found, set *STOP to a starting
7153 position of a composition after POS (if any) or to LIMIT, and
7154 return BUF. */
7155
7156static INLINE int *
cf84bb53
JB
7157handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7158 struct coding_system *coding, int *buf,
7159 EMACS_INT *stop)
aaaf0b1e 7160{
ff0dacd7
KH
7161 EMACS_INT start, end;
7162 Lisp_Object prop;
aaaf0b1e 7163
ff0dacd7
KH
7164 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7165 || end > limit)
7166 *stop = limit;
7167 else if (start > pos)
7168 *stop = start;
7169 else
aaaf0b1e 7170 {
ff0dacd7 7171 if (start == pos)
aaaf0b1e 7172 {
ff0dacd7
KH
7173 /* We found a composition. Store the corresponding
7174 annotation data in BUF. */
7175 int *head = buf;
7176 enum composition_method method = COMPOSITION_METHOD (prop);
7177 int nchars = COMPOSITION_LENGTH (prop);
7178
e951386e 7179 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
ff0dacd7 7180 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 7181 {
ff0dacd7
KH
7182 Lisp_Object components;
7183 int len, i, i_byte;
7184
7185 components = COMPOSITION_COMPONENTS (prop);
7186 if (VECTORP (components))
aaaf0b1e 7187 {
ff0dacd7
KH
7188 len = XVECTOR (components)->size;
7189 for (i = 0; i < len; i++)
7190 *buf++ = XINT (AREF (components, i));
aaaf0b1e 7191 }
ff0dacd7 7192 else if (STRINGP (components))
aaaf0b1e 7193 {
8f924df7 7194 len = SCHARS (components);
ff0dacd7
KH
7195 i = i_byte = 0;
7196 while (i < len)
7197 {
7198 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7199 buf++;
7200 }
7201 }
7202 else if (INTEGERP (components))
7203 {
7204 len = 1;
7205 *buf++ = XINT (components);
7206 }
7207 else if (CONSP (components))
7208 {
7209 for (len = 0; CONSP (components);
7210 len++, components = XCDR (components))
7211 *buf++ = XINT (XCAR (components));
aaaf0b1e 7212 }
aaaf0b1e 7213 else
ff0dacd7
KH
7214 abort ();
7215 *head -= len;
aaaf0b1e 7216 }
aaaf0b1e 7217 }
ff0dacd7
KH
7218
7219 if (find_composition (end, limit, &start, &end, &prop,
7220 coding->src_object)
7221 && end <= limit)
7222 *stop = start;
7223 else
7224 *stop = limit;
aaaf0b1e 7225 }
ff0dacd7
KH
7226 return buf;
7227}
7228
7229
e1c23804 7230/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
7231 CODING->src_object (buffer of string), store the data in BUF, set
7232 *STOP to the position where the value of `charset' property changes
7233 (limiting by LIMIT), and return the address of the next element of
7234 BUF.
7235
7236 If the property value is nil, set *STOP to the position where the
7237 property value is non-nil (limiting by LIMIT), and return BUF. */
7238
7239static INLINE int *
cf84bb53
JB
7240handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7241 struct coding_system *coding, int *buf,
7242 EMACS_INT *stop)
ff0dacd7
KH
7243{
7244 Lisp_Object val, next;
7245 int id;
7246
7247 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7248 if (! NILP (val) && CHARSETP (val))
7249 id = XINT (CHARSET_SYMBOL_ID (val));
7250 else
7251 id = -1;
69a80ea3 7252 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
7253 next = Fnext_single_property_change (make_number (pos), Qcharset,
7254 coding->src_object,
7255 make_number (limit));
7256 *stop = XINT (next);
7257 return buf;
7258}
7259
7260
df7492f9 7261static void
cf84bb53
JB
7262consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7263 int max_lookup)
df7492f9
KH
7264{
7265 int *buf = coding->charbuf;
ff0dacd7 7266 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 7267 const unsigned char *src = coding->source + coding->consumed;
4776e638 7268 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
7269 EMACS_INT pos = coding->src_pos + coding->consumed_char;
7270 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
7271 int multibytep = coding->src_multibyte;
7272 Lisp_Object eol_type;
7273 int c;
ff0dacd7 7274 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 7275 int *lookup_buf = NULL;
433f7f87
KH
7276
7277 if (! NILP (translation_table))
09ee6fdd 7278 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 7279
0a9564cb 7280 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
df7492f9
KH
7281 if (VECTORP (eol_type))
7282 eol_type = Qunix;
88993dfd 7283
df7492f9
KH
7284 /* Note: composition handling is not yet implemented. */
7285 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 7286
0b5670c9
KH
7287 if (NILP (coding->src_object))
7288 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 7289 else
0b5670c9
KH
7290 {
7291 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7292 stop = stop_composition = pos;
7293 else
7294 stop = stop_composition = end_pos;
7295 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7296 stop = stop_charset = pos;
7297 else
7298 stop_charset = end_pos;
7299 }
ec6d2bb8 7300
24a73b0a 7301 /* Compensate for CRLF and conversion. */
ff0dacd7 7302 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 7303 while (buf < buf_end)
aaaf0b1e 7304 {
433f7f87
KH
7305 Lisp_Object trans;
7306
df7492f9 7307 if (pos == stop)
ec6d2bb8 7308 {
df7492f9
KH
7309 if (pos == end_pos)
7310 break;
ff0dacd7
KH
7311 if (pos == stop_composition)
7312 buf = handle_composition_annotation (pos, end_pos, coding,
7313 buf, &stop_composition);
7314 if (pos == stop_charset)
7315 buf = handle_charset_annotation (pos, end_pos, coding,
7316 buf, &stop_charset);
7317 stop = (stop_composition < stop_charset
7318 ? stop_composition : stop_charset);
df7492f9
KH
7319 }
7320
7321 if (! multibytep)
4776e638 7322 {
d3e4cb56 7323 EMACS_INT bytes;
aaaf0b1e 7324
4d1e6632
KH
7325 if (coding->encoder == encode_coding_raw_text
7326 || coding->encoder == encode_coding_ccl)
ea29edf2
KH
7327 c = *src++, pos++;
7328 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7329 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7330 else
f03caae0 7331 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7332 }
df7492f9 7333 else
db274c7a 7334 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7335 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7336 c = '\n';
7337 if (! EQ (eol_type, Qunix))
aaaf0b1e 7338 {
df7492f9 7339 if (c == '\n')
aaaf0b1e 7340 {
df7492f9
KH
7341 if (EQ (eol_type, Qdos))
7342 *buf++ = '\r';
7343 else
7344 c = '\r';
aaaf0b1e
KH
7345 }
7346 }
433f7f87 7347
e6a54062 7348 trans = Qnil;
09ee6fdd 7349 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7350 if (NILP (trans))
433f7f87
KH
7351 *buf++ = c;
7352 else
7353 {
7354 int from_nchars = 1, to_nchars = 1;
7355 int *lookup_buf_end;
7356 const unsigned char *p = src;
7357 int i;
7358
7359 lookup_buf[0] = c;
7360 for (i = 1; i < max_lookup && p < src_end; i++)
7361 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7362 lookup_buf_end = lookup_buf + i;
e951386e
KH
7363 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7364 if (INTEGERP (trans))
7365 c = XINT (trans);
7366 else if (CONSP (trans))
7367 {
7368 from_nchars = ASIZE (XCAR (trans));
7369 trans = XCDR (trans);
7370 if (INTEGERP (trans))
7371 c = XINT (trans);
7372 else
7373 {
7374 to_nchars = ASIZE (trans);
7375 if (buf + to_nchars > buf_end)
7376 break;
7377 c = XINT (AREF (trans, 0));
7378 }
7379 }
7380 else
433f7f87 7381 break;
e951386e 7382 *buf++ = c;
433f7f87
KH
7383 for (i = 1; i < to_nchars; i++)
7384 *buf++ = XINT (AREF (trans, i));
7385 for (i = 1; i < from_nchars; i++, pos++)
7386 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7387 }
aaaf0b1e 7388 }
ec6d2bb8 7389
df7492f9
KH
7390 coding->consumed = src - coding->source;
7391 coding->consumed_char = pos - coding->src_pos;
7392 coding->charbuf_used = buf - coding->charbuf;
7393 coding->chars_at_source = 0;
aaaf0b1e
KH
7394}
7395
4ed46869 7396
df7492f9
KH
7397/* Encode the text at CODING->src_object into CODING->dst_object.
7398 CODING->src_object is a buffer or a string.
7399 CODING->dst_object is a buffer or nil.
7400
7401 If CODING->src_object is a buffer, it must be the current buffer.
7402 In this case, if CODING->src_pos is positive, it is a position of
7403 the source text in the buffer, otherwise. the source text is in the
7404 gap area of the buffer, and coding->src_pos specifies the offset of
7405 the text from GPT (which must be the same as PT). If this is the
7406 same buffer as CODING->dst_object, CODING->src_pos must be
7407 negative and CODING should not have `pre-write-conversion'.
7408
7409 If CODING->src_object is a string, CODING should not have
7410 `pre-write-conversion'.
7411
7412 If CODING->dst_object is a buffer, the encoded data is inserted at
7413 the current point of that buffer.
7414
7415 If CODING->dst_object is nil, the encoded data is placed at the
7416 memory area specified by CODING->destination. */
7417
7418static int
971de7fb 7419encode_coding (struct coding_system *coding)
4ed46869 7420{
df7492f9 7421 Lisp_Object attrs;
7d64c6ad 7422 Lisp_Object translation_table;
09ee6fdd 7423 int max_lookup;
fb608df3 7424 struct ccl_spec cclspec;
9861e777 7425
df7492f9 7426 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7427 if (coding->encoder == encode_coding_raw_text)
7428 translation_table = Qnil, max_lookup = 0;
7429 else
7430 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7431
df7492f9 7432 if (BUFFERP (coding->dst_object))
8844fa83 7433 {
df7492f9
KH
7434 set_buffer_internal (XBUFFER (coding->dst_object));
7435 coding->dst_multibyte
5d8ea120 7436 = ! NILP (B_ (current_buffer, enable_multibyte_characters));
8844fa83 7437 }
4ed46869 7438
b73bfc1c 7439 coding->consumed = coding->consumed_char = 0;
df7492f9 7440 coding->produced = coding->produced_char = 0;
065e3595 7441 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7442 coding->errors = 0;
b73bfc1c 7443
df7492f9 7444 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7445
fb608df3
KH
7446 if (coding->encoder == encode_coding_ccl)
7447 {
7448 coding->spec.ccl = &cclspec;
7449 setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7450 }
df7492f9
KH
7451 do {
7452 coding_set_source (coding);
09ee6fdd 7453 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7454 coding_set_destination (coding);
7455 (*(coding->encoder)) (coding);
7456 } while (coding->consumed_char < coding->src_chars);
7457
284201e4 7458 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7459 insert_from_gap (coding->produced_char, coding->produced);
7460
7461 return (coding->result);
ec6d2bb8
KH
7462}
7463
fb88bf2d 7464
24a73b0a
KH
7465/* Name (or base name) of work buffer for code conversion. */
7466static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7467
24a73b0a
KH
7468/* A working buffer used by the top level conversion. Once it is
7469 created, it is never destroyed. It has the name
7470 Vcode_conversion_workbuf_name. The other working buffers are
7471 destroyed after the use is finished, and their names are modified
7472 versions of Vcode_conversion_workbuf_name. */
7473static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7474
24a73b0a
KH
7475/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7476static int reused_workbuf_in_use;
4ed46869 7477
24a73b0a 7478
ad1746f5 7479/* Return a working buffer of code conversion. MULTIBYTE specifies the
24a73b0a 7480 multibyteness of returning buffer. */
b73bfc1c 7481
f6cbaf43 7482static Lisp_Object
971de7fb 7483make_conversion_work_buffer (int multibyte)
df7492f9 7484{
24a73b0a
KH
7485 Lisp_Object name, workbuf;
7486 struct buffer *current;
4ed46869 7487
24a73b0a 7488 if (reused_workbuf_in_use++)
065e3595
KH
7489 {
7490 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7491 workbuf = Fget_buffer_create (name);
7492 }
df7492f9 7493 else
065e3595 7494 {
159bd5a2 7495 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7496 Vcode_conversion_reused_workbuf
7497 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7498 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7499 }
24a73b0a
KH
7500 current = current_buffer;
7501 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7502 /* We can't allow modification hooks to run in the work buffer. For
7503 instance, directory_files_internal assumes that file decoding
7504 doesn't compile new regexps. */
7505 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7506 Ferase_buffer ();
5d8ea120
TT
7507 B_ (current_buffer, undo_list) = Qt;
7508 B_ (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
df7492f9 7509 set_buffer_internal (current);
24a73b0a 7510 return workbuf;
df7492f9 7511}
d46c5b12 7512
24a73b0a 7513
4776e638 7514static Lisp_Object
971de7fb 7515code_conversion_restore (Lisp_Object arg)
4776e638 7516{
24a73b0a 7517 Lisp_Object current, workbuf;
948bdcf3 7518 struct gcpro gcpro1;
24a73b0a 7519
948bdcf3 7520 GCPRO1 (arg);
24a73b0a
KH
7521 current = XCAR (arg);
7522 workbuf = XCDR (arg);
7523 if (! NILP (workbuf))
7524 {
7525 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7526 reused_workbuf_in_use = 0;
7527 else if (! NILP (Fbuffer_live_p (workbuf)))
7528 Fkill_buffer (workbuf);
7529 }
7530 set_buffer_internal (XBUFFER (current));
948bdcf3 7531 UNGCPRO;
4776e638
KH
7532 return Qnil;
7533}
b73bfc1c 7534
24a73b0a 7535Lisp_Object
971de7fb 7536code_conversion_save (int with_work_buf, int multibyte)
df7492f9 7537{
24a73b0a 7538 Lisp_Object workbuf = Qnil;
b73bfc1c 7539
4776e638 7540 if (with_work_buf)
24a73b0a
KH
7541 workbuf = make_conversion_work_buffer (multibyte);
7542 record_unwind_protect (code_conversion_restore,
7543 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7544 return workbuf;
df7492f9 7545}
d46c5b12 7546
df7492f9 7547int
cf84bb53
JB
7548decode_coding_gap (struct coding_system *coding,
7549 EMACS_INT chars, EMACS_INT bytes)
df7492f9 7550{
1a4990fb 7551 int count = SPECPDL_INDEX ();
5e5c78be 7552 Lisp_Object attrs;
fb88bf2d 7553
24a73b0a 7554 code_conversion_save (0, 0);
ec6d2bb8 7555
24a73b0a 7556 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7557 coding->src_chars = chars;
7558 coding->src_bytes = bytes;
7559 coding->src_pos = -chars;
7560 coding->src_pos_byte = -bytes;
7561 coding->src_multibyte = chars < bytes;
24a73b0a 7562 coding->dst_object = coding->src_object;
df7492f9
KH
7563 coding->dst_pos = PT;
7564 coding->dst_pos_byte = PT_BYTE;
5d8ea120 7565 coding->dst_multibyte = ! NILP (B_ (current_buffer, enable_multibyte_characters));
4ed46869 7566
df7492f9
KH
7567 if (CODING_REQUIRE_DETECTION (coding))
7568 detect_coding (coding);
8f924df7 7569
9286b333 7570 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7571 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7572 decode_coding (coding);
287c57d7 7573 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7574
5e5c78be
KH
7575 attrs = CODING_ID_ATTRS (coding->id);
7576 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7577 {
5e5c78be
KH
7578 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7579 Lisp_Object val;
7580
7581 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7582 val = call1 (CODING_ATTR_POST_READ (attrs),
7583 make_number (coding->produced_char));
5e5c78be
KH
7584 CHECK_NATNUM (val);
7585 coding->produced_char += Z - prev_Z;
7586 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7587 }
4ed46869 7588
df7492f9 7589 unbind_to (count, Qnil);
b73bfc1c
KH
7590 return coding->result;
7591}
52d41803 7592
4ed46869 7593int
cf84bb53
JB
7594encode_coding_gap (struct coding_system *coding,
7595 EMACS_INT chars, EMACS_INT bytes)
4ed46869 7596{
1a4990fb 7597 int count = SPECPDL_INDEX ();
4ed46869 7598
24a73b0a 7599 code_conversion_save (0, 0);
4ed46869 7600
24a73b0a 7601 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7602 coding->src_chars = chars;
7603 coding->src_bytes = bytes;
7604 coding->src_pos = -chars;
7605 coding->src_pos_byte = -bytes;
7606 coding->src_multibyte = chars < bytes;
7607 coding->dst_object = coding->src_object;
7608 coding->dst_pos = PT;
7609 coding->dst_pos_byte = PT_BYTE;
4ed46869 7610
df7492f9 7611 encode_coding (coding);
b73bfc1c 7612
df7492f9
KH
7613 unbind_to (count, Qnil);
7614 return coding->result;
7615}
4ed46869 7616
d46c5b12 7617
df7492f9
KH
7618/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7619 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7620
df7492f9 7621 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7622
df7492f9
KH
7623 If it is a buffer, the text is at point of the buffer. FROM and TO
7624 are positions in the buffer.
b73bfc1c 7625
df7492f9
KH
7626 If it is a string, the text is at the beginning of the string.
7627 FROM and TO are indices to the string.
4ed46869 7628
df7492f9
KH
7629 If it is nil, the text is at coding->source. FROM and TO are
7630 indices to coding->source.
bb10be8b 7631
df7492f9 7632 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7633
df7492f9
KH
7634 If it is a buffer, the decoded text is inserted at point of the
7635 buffer. If the buffer is the same as SRC_OBJECT, the source text
7636 is deleted.
4ed46869 7637
df7492f9
KH
7638 If it is Qt, a string is made from the decoded text, and
7639 set in CODING->dst_object.
d46c5b12 7640
df7492f9 7641 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7642 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7643 CODING->destination by xmalloc. If the decoded text is longer than
7644 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7645 */
d46c5b12 7646
df7492f9 7647void
cf84bb53
JB
7648decode_coding_object (struct coding_system *coding,
7649 Lisp_Object src_object,
7650 EMACS_INT from, EMACS_INT from_byte,
7651 EMACS_INT to, EMACS_INT to_byte,
7652 Lisp_Object dst_object)
d46c5b12 7653{
1a4990fb 7654 int count = SPECPDL_INDEX ();
df7492f9
KH
7655 unsigned char *destination;
7656 EMACS_INT dst_bytes;
7657 EMACS_INT chars = to - from;
7658 EMACS_INT bytes = to_byte - from_byte;
7659 Lisp_Object attrs;
4776e638 7660 int saved_pt = -1, saved_pt_byte;
64cedb0c 7661 int need_marker_adjustment = 0;
b3bfad50 7662 Lisp_Object old_deactivate_mark;
d46c5b12 7663
b3bfad50 7664 old_deactivate_mark = Vdeactivate_mark;
93dec019 7665
df7492f9 7666 if (NILP (dst_object))
d46c5b12 7667 {
df7492f9
KH
7668 destination = coding->destination;
7669 dst_bytes = coding->dst_bytes;
d46c5b12 7670 }
93dec019 7671
df7492f9
KH
7672 coding->src_object = src_object;
7673 coding->src_chars = chars;
7674 coding->src_bytes = bytes;
7675 coding->src_multibyte = chars < bytes;
70ad9fc4 7676
df7492f9 7677 if (STRINGP (src_object))
d46c5b12 7678 {
df7492f9
KH
7679 coding->src_pos = from;
7680 coding->src_pos_byte = from_byte;
d46c5b12 7681 }
df7492f9 7682 else if (BUFFERP (src_object))
88993dfd 7683 {
df7492f9
KH
7684 set_buffer_internal (XBUFFER (src_object));
7685 if (from != GPT)
7686 move_gap_both (from, from_byte);
7687 if (EQ (src_object, dst_object))
fb88bf2d 7688 {
64cedb0c
KH
7689 struct Lisp_Marker *tail;
7690
7691 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7692 {
7693 tail->need_adjustment
7694 = tail->charpos == (tail->insertion_type ? from : to);
7695 need_marker_adjustment |= tail->need_adjustment;
7696 }
4776e638 7697 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7698 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7699 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7700 del_range_both (from, from_byte, to, to_byte, 1);
7701 coding->src_pos = -chars;
7702 coding->src_pos_byte = -bytes;
fb88bf2d 7703 }
df7492f9 7704 else
fb88bf2d 7705 {
df7492f9
KH
7706 coding->src_pos = from;
7707 coding->src_pos_byte = from_byte;
fb88bf2d 7708 }
88993dfd
KH
7709 }
7710
df7492f9
KH
7711 if (CODING_REQUIRE_DETECTION (coding))
7712 detect_coding (coding);
7713 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7714
2cb26057
KH
7715 if (EQ (dst_object, Qt)
7716 || (! NILP (CODING_ATTR_POST_READ (attrs))
7717 && NILP (dst_object)))
b73bfc1c 7718 {
a1567c45
SM
7719 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7720 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7721 coding->dst_pos = BEG;
7722 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7723 }
df7492f9 7724 else if (BUFFERP (dst_object))
d46c5b12 7725 {
24a73b0a 7726 code_conversion_save (0, 0);
df7492f9
KH
7727 coding->dst_object = dst_object;
7728 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7729 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7730 coding->dst_multibyte
5d8ea120 7731 = ! NILP (B_ (XBUFFER (dst_object), enable_multibyte_characters));
d46c5b12
KH
7732 }
7733 else
7734 {
24a73b0a 7735 code_conversion_save (0, 0);
df7492f9 7736 coding->dst_object = Qnil;
0154725e
SM
7737 /* Most callers presume this will return a multibyte result, and they
7738 won't use `binary' or `raw-text' anyway, so let's not worry about
7739 CODING_FOR_UNIBYTE. */
bb555731 7740 coding->dst_multibyte = 1;
d46c5b12
KH
7741 }
7742
df7492f9 7743 decode_coding (coding);
fa46990e 7744
df7492f9
KH
7745 if (BUFFERP (coding->dst_object))
7746 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7747
df7492f9 7748 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7749 {
b3bfad50 7750 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7751 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7752 Lisp_Object val;
d46c5b12 7753
c0cc7f7f 7754 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7755 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7756 old_deactivate_mark);
d4850d67
KH
7757 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7758 make_number (coding->produced_char));
df7492f9
KH
7759 UNGCPRO;
7760 CHECK_NATNUM (val);
7761 coding->produced_char += Z - prev_Z;
7762 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7763 }
de79a6a5 7764
df7492f9 7765 if (EQ (dst_object, Qt))
ec6d2bb8 7766 {
df7492f9
KH
7767 coding->dst_object = Fbuffer_string ();
7768 }
7769 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7770 {
7771 set_buffer_internal (XBUFFER (coding->dst_object));
7772 if (dst_bytes < coding->produced)
7773 {
b3bfad50 7774 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7775 if (! destination)
7776 {
065e3595 7777 record_conversion_result (coding,
ebaf11b6 7778 CODING_RESULT_INSUFFICIENT_MEM);
df7492f9
KH
7779 unbind_to (count, Qnil);
7780 return;
7781 }
7782 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7783 move_gap_both (BEGV, BEGV_BYTE);
72af86bd 7784 memcpy (destination, BEGV_ADDR, coding->produced);
df7492f9 7785 coding->destination = destination;
d46c5b12 7786 }
ec6d2bb8 7787 }
b73bfc1c 7788
4776e638
KH
7789 if (saved_pt >= 0)
7790 {
7791 /* This is the case of:
7792 (BUFFERP (src_object) && EQ (src_object, dst_object))
7793 As we have moved PT while replacing the original buffer
7794 contents, we must recover it now. */
7795 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7796 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7797 if (saved_pt < from)
7798 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7799 else if (saved_pt < from + chars)
7800 TEMP_SET_PT_BOTH (from, from_byte);
5d8ea120 7801 else if (! NILP (B_ (current_buffer, enable_multibyte_characters)))
4776e638
KH
7802 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7803 saved_pt_byte + (coding->produced - bytes));
7804 else
7805 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7806 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7807
7808 if (need_marker_adjustment)
7809 {
7810 struct Lisp_Marker *tail;
7811
7812 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7813 if (tail->need_adjustment)
7814 {
7815 tail->need_adjustment = 0;
7816 if (tail->insertion_type)
7817 {
7818 tail->bytepos = from_byte;
7819 tail->charpos = from;
7820 }
7821 else
7822 {
7823 tail->bytepos = from_byte + coding->produced;
7824 tail->charpos
5d8ea120 7825 = (NILP (B_ (current_buffer, enable_multibyte_characters))
64cedb0c
KH
7826 ? tail->bytepos : from + coding->produced_char);
7827 }
7828 }
7829 }
d46c5b12 7830 }
4776e638 7831
b3bfad50 7832 Vdeactivate_mark = old_deactivate_mark;
065e3595 7833 unbind_to (count, coding->dst_object);
d46c5b12
KH
7834}
7835
d46c5b12 7836
df7492f9 7837void
cf84bb53
JB
7838encode_coding_object (struct coding_system *coding,
7839 Lisp_Object src_object,
7840 EMACS_INT from, EMACS_INT from_byte,
7841 EMACS_INT to, EMACS_INT to_byte,
7842 Lisp_Object dst_object)
d46c5b12 7843{
1a4990fb 7844 int count = SPECPDL_INDEX ();
df7492f9
KH
7845 EMACS_INT chars = to - from;
7846 EMACS_INT bytes = to_byte - from_byte;
7847 Lisp_Object attrs;
4776e638 7848 int saved_pt = -1, saved_pt_byte;
64cedb0c 7849 int need_marker_adjustment = 0;
c02d943b 7850 int kill_src_buffer = 0;
b3bfad50 7851 Lisp_Object old_deactivate_mark;
df7492f9 7852
b3bfad50 7853 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7854
7855 coding->src_object = src_object;
7856 coding->src_chars = chars;
7857 coding->src_bytes = bytes;
7858 coding->src_multibyte = chars < bytes;
7859
7860 attrs = CODING_ID_ATTRS (coding->id);
7861
64cedb0c
KH
7862 if (EQ (src_object, dst_object))
7863 {
7864 struct Lisp_Marker *tail;
7865
7866 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7867 {
7868 tail->need_adjustment
7869 = tail->charpos == (tail->insertion_type ? from : to);
7870 need_marker_adjustment |= tail->need_adjustment;
7871 }
7872 }
7873
df7492f9 7874 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7875 {
24a73b0a 7876 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7877 set_buffer_internal (XBUFFER (coding->src_object));
7878 if (STRINGP (src_object))
7879 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7880 else if (BUFFERP (src_object))
7881 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7882 else
b68864e5 7883 insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7884
df7492f9
KH
7885 if (EQ (src_object, dst_object))
7886 {
7887 set_buffer_internal (XBUFFER (src_object));
4776e638 7888 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7889 del_range_both (from, from_byte, to, to_byte, 1);
7890 set_buffer_internal (XBUFFER (coding->src_object));
7891 }
7892
d4850d67
KH
7893 {
7894 Lisp_Object args[3];
b3bfad50 7895 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7896
b3bfad50
KH
7897 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7898 old_deactivate_mark);
d4850d67
KH
7899 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7900 args[1] = make_number (BEG);
7901 args[2] = make_number (Z);
7902 safe_call (3, args);
b3bfad50 7903 UNGCPRO;
d4850d67 7904 }
c02d943b
KH
7905 if (XBUFFER (coding->src_object) != current_buffer)
7906 kill_src_buffer = 1;
ac87bbef 7907 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7908 if (BEG != GPT)
7909 move_gap_both (BEG, BEG_BYTE);
7910 coding->src_chars = Z - BEG;
7911 coding->src_bytes = Z_BYTE - BEG_BYTE;
7912 coding->src_pos = BEG;
7913 coding->src_pos_byte = BEG_BYTE;
7914 coding->src_multibyte = Z < Z_BYTE;
7915 }
7916 else if (STRINGP (src_object))
d46c5b12 7917 {
24a73b0a 7918 code_conversion_save (0, 0);
df7492f9
KH
7919 coding->src_pos = from;
7920 coding->src_pos_byte = from_byte;
b73bfc1c 7921 }
df7492f9 7922 else if (BUFFERP (src_object))
b73bfc1c 7923 {
24a73b0a 7924 code_conversion_save (0, 0);
df7492f9 7925 set_buffer_internal (XBUFFER (src_object));
df7492f9 7926 if (EQ (src_object, dst_object))
d46c5b12 7927 {
4776e638 7928 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7929 coding->src_object = del_range_1 (from, to, 1, 1);
7930 coding->src_pos = 0;
7931 coding->src_pos_byte = 0;
d46c5b12 7932 }
df7492f9 7933 else
d46c5b12 7934 {
ff0dacd7
KH
7935 if (from < GPT && to >= GPT)
7936 move_gap_both (from, from_byte);
df7492f9
KH
7937 coding->src_pos = from;
7938 coding->src_pos_byte = from_byte;
d46c5b12 7939 }
d46c5b12 7940 }
4776e638 7941 else
24a73b0a 7942 code_conversion_save (0, 0);
d46c5b12 7943
df7492f9 7944 if (BUFFERP (dst_object))
88993dfd 7945 {
df7492f9 7946 coding->dst_object = dst_object;
28f67a95
KH
7947 if (EQ (src_object, dst_object))
7948 {
7949 coding->dst_pos = from;
7950 coding->dst_pos_byte = from_byte;
7951 }
7952 else
7953 {
319a3947
KH
7954 struct buffer *current = current_buffer;
7955
7956 set_buffer_temp (XBUFFER (dst_object));
7957 coding->dst_pos = PT;
7958 coding->dst_pos_byte = PT_BYTE;
7959 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7960 set_buffer_temp (current);
28f67a95 7961 }
df7492f9 7962 coding->dst_multibyte
5d8ea120 7963 = ! NILP (B_ (XBUFFER (dst_object), enable_multibyte_characters));
88993dfd 7964 }
df7492f9 7965 else if (EQ (dst_object, Qt))
d46c5b12 7966 {
df7492f9 7967 coding->dst_object = Qnil;
df7492f9 7968 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7969 if (coding->dst_bytes == 0)
7970 coding->dst_bytes = 1;
7971 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7972 coding->dst_multibyte = 0;
d46c5b12
KH
7973 }
7974 else
7975 {
df7492f9
KH
7976 coding->dst_object = Qnil;
7977 coding->dst_multibyte = 0;
d46c5b12
KH
7978 }
7979
df7492f9 7980 encode_coding (coding);
d46c5b12 7981
df7492f9 7982 if (EQ (dst_object, Qt))
d46c5b12 7983 {
df7492f9
KH
7984 if (BUFFERP (coding->dst_object))
7985 coding->dst_object = Fbuffer_string ();
7986 else
d46c5b12 7987 {
df7492f9
KH
7988 coding->dst_object
7989 = make_unibyte_string ((char *) coding->destination,
7990 coding->produced);
7991 xfree (coding->destination);
d46c5b12 7992 }
4ed46869 7993 }
d46c5b12 7994
4776e638
KH
7995 if (saved_pt >= 0)
7996 {
7997 /* This is the case of:
7998 (BUFFERP (src_object) && EQ (src_object, dst_object))
7999 As we have moved PT while replacing the original buffer
8000 contents, we must recover it now. */
8001 set_buffer_internal (XBUFFER (src_object));
8002 if (saved_pt < from)
8003 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8004 else if (saved_pt < from + chars)
8005 TEMP_SET_PT_BOTH (from, from_byte);
5d8ea120 8006 else if (! NILP (B_ (current_buffer, enable_multibyte_characters)))
4776e638
KH
8007 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8008 saved_pt_byte + (coding->produced - bytes));
d46c5b12 8009 else
4776e638
KH
8010 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8011 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
8012
8013 if (need_marker_adjustment)
8014 {
8015 struct Lisp_Marker *tail;
8016
8017 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8018 if (tail->need_adjustment)
8019 {
8020 tail->need_adjustment = 0;
8021 if (tail->insertion_type)
8022 {
8023 tail->bytepos = from_byte;
8024 tail->charpos = from;
8025 }
8026 else
8027 {
8028 tail->bytepos = from_byte + coding->produced;
8029 tail->charpos
5d8ea120 8030 = (NILP (B_ (current_buffer, enable_multibyte_characters))
64cedb0c
KH
8031 ? tail->bytepos : from + coding->produced_char);
8032 }
8033 }
8034 }
4776e638
KH
8035 }
8036
c02d943b
KH
8037 if (kill_src_buffer)
8038 Fkill_buffer (coding->src_object);
b3bfad50
KH
8039
8040 Vdeactivate_mark = old_deactivate_mark;
df7492f9 8041 unbind_to (count, Qnil);
b73bfc1c
KH
8042}
8043
df7492f9 8044
b73bfc1c 8045Lisp_Object
971de7fb 8046preferred_coding_system (void)
b73bfc1c 8047{
df7492f9 8048 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 8049
df7492f9 8050 return CODING_ID_NAME (id);
4ed46869
KH
8051}
8052
8053\f
8054#ifdef emacs
1397dc18 8055/*** 8. Emacs Lisp library functions ***/
4ed46869 8056
4ed46869 8057DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 8058 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 8059See the documentation of `define-coding-system' for information
48b0f3ae 8060about coding-system objects. */)
5842a27b 8061 (Lisp_Object object)
4ed46869 8062{
d4a1d553
JB
8063 if (NILP (object)
8064 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 8065 return Qt;
d4a1d553
JB
8066 if (! SYMBOLP (object)
8067 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
8068 return Qnil;
8069 return Qt;
4ed46869
KH
8070}
8071
9d991de8
RS
8072DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8073 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae 8074 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
5842a27b 8075 (Lisp_Object prompt)
4ed46869 8076{
e0e989f6 8077 Lisp_Object val;
9d991de8
RS
8078 do
8079 {
4608c386
KH
8080 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8081 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 8082 }
8f924df7 8083 while (SCHARS (val) == 0);
e0e989f6 8084 return (Fintern (val, Qnil));
4ed46869
KH
8085}
8086
9b787f3e 8087DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 8088 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
8089If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8090Ignores case when completing coding systems (all Emacs coding systems
8091are lower-case). */)
5842a27b 8092 (Lisp_Object prompt, Lisp_Object default_coding_system)
4ed46869 8093{
f44d27ce 8094 Lisp_Object val;
c7183fb8
GM
8095 int count = SPECPDL_INDEX ();
8096
9b787f3e 8097 if (SYMBOLP (default_coding_system))
57d25e6f 8098 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 8099 specbind (Qcompletion_ignore_case, Qt);
4608c386 8100 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
8101 Qt, Qnil, Qcoding_system_history,
8102 default_coding_system, Qnil);
c7183fb8 8103 unbind_to (count, Qnil);
8f924df7 8104 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
8105}
8106
8107DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8108 1, 1, 0,
48b0f3ae 8109 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
8110If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8111It is valid if it is nil or a symbol defined as a coding system by the
8112function `define-coding-system'. */)
5842a27b 8113 (Lisp_Object coding_system)
4ed46869 8114{
44e8490d
KH
8115 Lisp_Object define_form;
8116
8117 define_form = Fget (coding_system, Qcoding_system_define_form);
8118 if (! NILP (define_form))
8119 {
8120 Fput (coding_system, Qcoding_system_define_form, Qnil);
8121 safe_eval (define_form);
8122 }
4ed46869
KH
8123 if (!NILP (Fcoding_system_p (coding_system)))
8124 return coding_system;
fcad4ec4 8125 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 8126}
df7492f9 8127
3a73fa5d 8128\f
89528eb3
KH
8129/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8130 HIGHEST is nonzero, return the coding system of the highest
ad1746f5 8131 priority among the detected coding systems. Otherwise return a
89528eb3
KH
8132 list of detected coding systems sorted by their priorities. If
8133 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8134 multibyte form but contains only ASCII and eight-bit chars.
8135 Otherwise, the bytes are raw bytes.
8136
8137 CODING-SYSTEM controls the detection as below:
8138
8139 If it is nil, detect both text-format and eol-format. If the
8140 text-format part of CODING-SYSTEM is already specified
8141 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
8142 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8143 detect only text-format. */
8144
d46c5b12 8145Lisp_Object
cf84bb53
JB
8146detect_coding_system (const unsigned char *src,
8147 EMACS_INT src_chars, EMACS_INT src_bytes,
8148 int highest, int multibytep,
8149 Lisp_Object coding_system)
4ed46869 8150{
8f924df7 8151 const unsigned char *src_end = src + src_bytes;
df7492f9 8152 Lisp_Object attrs, eol_type;
4533845d 8153 Lisp_Object val = Qnil;
df7492f9 8154 struct coding_system coding;
89528eb3 8155 int id;
ff0dacd7 8156 struct coding_detection_info detect_info;
24a73b0a 8157 enum coding_category base_category;
2f3cbb32 8158 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 8159
df7492f9
KH
8160 if (NILP (coding_system))
8161 coding_system = Qundecided;
8162 setup_coding_system (coding_system, &coding);
8163 attrs = CODING_ID_ATTRS (coding.id);
8164 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 8165 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 8166
df7492f9 8167 coding.source = src;
24a73b0a 8168 coding.src_chars = src_chars;
df7492f9
KH
8169 coding.src_bytes = src_bytes;
8170 coding.src_multibyte = multibytep;
8171 coding.consumed = 0;
89528eb3 8172 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 8173 coding.head_ascii = 0;
d46c5b12 8174
ff0dacd7 8175 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 8176
89528eb3 8177 /* At first, detect text-format if necessary. */
24a73b0a
KH
8178 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8179 if (base_category == coding_category_undecided)
4ed46869 8180 {
ff0dacd7
KH
8181 enum coding_category category;
8182 struct coding_system *this;
8183 int c, i;
88993dfd 8184
24a73b0a 8185 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 8186 for (; src < src_end; src++)
4ed46869 8187 {
df7492f9 8188 c = *src;
6cb21a4f 8189 if (c & 0x80)
6cb21a4f 8190 {
2f3cbb32 8191 eight_bit_found = 1;
2f3cbb32
KH
8192 if (null_byte_found)
8193 break;
8194 }
c0e16b14 8195 else if (c < 0x20)
2f3cbb32
KH
8196 {
8197 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8198 && ! inhibit_iso_escape_detection
8199 && ! detect_info.checked)
6cb21a4f 8200 {
2f3cbb32
KH
8201 if (detect_coding_iso_2022 (&coding, &detect_info))
8202 {
8203 /* We have scanned the whole data. */
8204 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
8205 {
8206 /* We didn't find an 8-bit code. We may
8207 have found a null-byte, but it's very
8208 rare that a binary file confirm to
8209 ISO-2022. */
8210 src = src_end;
8211 coding.head_ascii = src - coding.source;
8212 }
8213 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
8214 break;
8215 }
8216 }
97b1b294 8217 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
8218 {
8219 null_byte_found = 1;
8220 if (eight_bit_found)
8221 break;
6cb21a4f 8222 }
c006c0c8
KH
8223 if (! eight_bit_found)
8224 coding.head_ascii++;
6cb21a4f 8225 }
c006c0c8 8226 else if (! eight_bit_found)
c0e16b14 8227 coding.head_ascii++;
4ed46869 8228 }
88993dfd 8229
2f3cbb32
KH
8230 if (null_byte_found || eight_bit_found
8231 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
8232 || detect_info.found)
8233 {
2f3cbb32 8234 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
8235 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
8236 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 8237 {
6cb21a4f 8238 category = coding_priorities[i];
c7266f4a 8239 this = coding_categories + category;
6cb21a4f 8240 if (detect_info.found & (1 << category))
ff0dacd7
KH
8241 break;
8242 }
6cb21a4f 8243 else
2f3cbb32
KH
8244 {
8245 if (null_byte_found)
8246 {
8247 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8248 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8249 }
8250 for (i = 0; i < coding_category_raw_text; i++)
8251 {
8252 category = coding_priorities[i];
8253 this = coding_categories + category;
6cb21a4f 8254
2f3cbb32
KH
8255 if (this->id < 0)
8256 {
8257 /* No coding system of this category is defined. */
8258 detect_info.rejected |= (1 << category);
8259 }
8260 else if (category >= coding_category_raw_text)
8261 continue;
8262 else if (detect_info.checked & (1 << category))
8263 {
8264 if (highest
8265 && (detect_info.found & (1 << category)))
6cb21a4f 8266 break;
2f3cbb32
KH
8267 }
8268 else if ((*(this->detector)) (&coding, &detect_info)
8269 && highest
8270 && (detect_info.found & (1 << category)))
8271 {
8272 if (category == coding_category_utf_16_auto)
8273 {
8274 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8275 category = coding_category_utf_16_le;
8276 else
8277 category = coding_category_utf_16_be;
8278 }
8279 break;
8280 }
8281 }
8282 }
6cb21a4f 8283 }
ec6d2bb8 8284
4cddb209
KH
8285 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8286 || null_byte_found)
ec6d2bb8 8287 {
ff0dacd7 8288 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 8289 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
8290 val = Fcons (make_number (id), Qnil);
8291 }
ff0dacd7 8292 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 8293 {
ff0dacd7 8294 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
8295 id = coding_categories[coding_category_undecided].id;
8296 val = Fcons (make_number (id), Qnil);
8297 }
8298 else if (highest)
8299 {
ff0dacd7 8300 if (detect_info.found)
ec6d2bb8 8301 {
ff0dacd7
KH
8302 detect_info.found = 1 << category;
8303 val = Fcons (make_number (this->id), Qnil);
8304 }
8305 else
8306 for (i = 0; i < coding_category_raw_text; i++)
8307 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8308 {
8309 detect_info.found = 1 << coding_priorities[i];
8310 id = coding_categories[coding_priorities[i]].id;
8311 val = Fcons (make_number (id), Qnil);
8312 break;
8313 }
8314 }
89528eb3
KH
8315 else
8316 {
ff0dacd7
KH
8317 int mask = detect_info.rejected | detect_info.found;
8318 int found = 0;
ec6d2bb8 8319
89528eb3 8320 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8321 {
8322 category = coding_priorities[i];
8323 if (! (mask & (1 << category)))
ec6d2bb8 8324 {
ff0dacd7
KH
8325 found |= 1 << category;
8326 id = coding_categories[category].id;
c7266f4a
KH
8327 if (id >= 0)
8328 val = Fcons (make_number (id), val);
ff0dacd7
KH
8329 }
8330 }
8331 for (i = coding_category_raw_text - 1; i >= 0; i--)
8332 {
8333 category = coding_priorities[i];
8334 if (detect_info.found & (1 << category))
8335 {
8336 id = coding_categories[category].id;
8337 val = Fcons (make_number (id), val);
ec6d2bb8 8338 }
ec6d2bb8 8339 }
ff0dacd7 8340 detect_info.found |= found;
ec6d2bb8 8341 }
ec6d2bb8 8342 }
a470d443
KH
8343 else if (base_category == coding_category_utf_8_auto)
8344 {
8345 if (detect_coding_utf_8 (&coding, &detect_info))
8346 {
8347 struct coding_system *this;
8348
8349 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8350 this = coding_categories + coding_category_utf_8_sig;
8351 else
8352 this = coding_categories + coding_category_utf_8_nosig;
8353 val = Fcons (make_number (this->id), Qnil);
8354 }
8355 }
24a73b0a
KH
8356 else if (base_category == coding_category_utf_16_auto)
8357 {
8358 if (detect_coding_utf_16 (&coding, &detect_info))
8359 {
24a73b0a
KH
8360 struct coding_system *this;
8361
8362 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8363 this = coding_categories + coding_category_utf_16_le;
8364 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8365 this = coding_categories + coding_category_utf_16_be;
8366 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8367 this = coding_categories + coding_category_utf_16_be_nosig;
8368 else
8369 this = coding_categories + coding_category_utf_16_le_nosig;
8370 val = Fcons (make_number (this->id), Qnil);
8371 }
8372 }
df7492f9
KH
8373 else
8374 {
ff0dacd7 8375 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8376 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8377 }
df7492f9 8378
89528eb3 8379 /* Then, detect eol-format if necessary. */
df7492f9 8380 {
4533845d 8381 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8382 Lisp_Object tail;
8383
89528eb3
KH
8384 if (VECTORP (eol_type))
8385 {
ff0dacd7 8386 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8387 {
8388 if (null_byte_found)
8389 normal_eol = EOL_SEEN_LF;
8390 else
8391 normal_eol = detect_eol (coding.source, src_bytes,
8392 coding_category_raw_text);
8393 }
ff0dacd7
KH
8394 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8395 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8396 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8397 coding_category_utf_16_be);
ff0dacd7
KH
8398 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8399 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8400 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8401 coding_category_utf_16_le);
8402 }
8403 else
8404 {
8405 if (EQ (eol_type, Qunix))
8406 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8407 else if (EQ (eol_type, Qdos))
8408 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8409 else
8410 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8411 }
8412
df7492f9
KH
8413 for (tail = val; CONSP (tail); tail = XCDR (tail))
8414 {
89528eb3 8415 enum coding_category category;
df7492f9 8416 int this_eol;
89528eb3
KH
8417
8418 id = XINT (XCAR (tail));
8419 attrs = CODING_ID_ATTRS (id);
8420 category = XINT (CODING_ATTR_CATEGORY (attrs));
8421 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8422 if (VECTORP (eol_type))
8423 {
89528eb3
KH
8424 if (category == coding_category_utf_16_be
8425 || category == coding_category_utf_16_be_nosig)
8426 this_eol = utf_16_be_eol;
8427 else if (category == coding_category_utf_16_le
8428 || category == coding_category_utf_16_le_nosig)
8429 this_eol = utf_16_le_eol;
df7492f9 8430 else
89528eb3
KH
8431 this_eol = normal_eol;
8432
df7492f9
KH
8433 if (this_eol == EOL_SEEN_LF)
8434 XSETCAR (tail, AREF (eol_type, 0));
8435 else if (this_eol == EOL_SEEN_CRLF)
8436 XSETCAR (tail, AREF (eol_type, 1));
8437 else if (this_eol == EOL_SEEN_CR)
8438 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8439 else
8440 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8441 }
89528eb3
KH
8442 else
8443 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8444 }
8445 }
ec6d2bb8 8446
4533845d 8447 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8448}
8449
ec6d2bb8 8450
d46c5b12
KH
8451DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8452 2, 3, 0,
48b0f3ae
PJ
8453 doc: /* Detect coding system of the text in the region between START and END.
8454Return a list of possible coding systems ordered by priority.
b811c52b
KH
8455The coding systems to try and their priorities follows what
8456the function `coding-system-priority-list' (which see) returns.
ec6d2bb8 8457
12e0131a 8458If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8459characters as ESC), it returns a list of single element `undecided'
8460or its subsidiary coding system according to a detected end-of-line
8461format.
ec6d2bb8 8462
48b0f3ae
PJ
8463If optional argument HIGHEST is non-nil, return the coding system of
8464highest priority. */)
5842a27b 8465 (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
d46c5b12
KH
8466{
8467 int from, to;
8468 int from_byte, to_byte;
ec6d2bb8 8469
b7826503
PJ
8470 CHECK_NUMBER_COERCE_MARKER (start);
8471 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8472
d46c5b12
KH
8473 validate_region (&start, &end);
8474 from = XINT (start), to = XINT (end);
8475 from_byte = CHAR_TO_BYTE (from);
8476 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8477
d46c5b12
KH
8478 if (from < GPT && to >= GPT)
8479 move_gap_both (to, to_byte);
c210f766 8480
d46c5b12 8481 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8482 to - from, to_byte - from_byte,
0a28aafb 8483 !NILP (highest),
5d8ea120
TT
8484 !NILP (B_ (current_buffer
8485 , enable_multibyte_characters)),
df7492f9 8486 Qnil);
ec6d2bb8
KH
8487}
8488
d46c5b12
KH
8489DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8490 1, 2, 0,
48b0f3ae
PJ
8491 doc: /* Detect coding system of the text in STRING.
8492Return a list of possible coding systems ordered by priority.
67ceab9d
KH
8493The coding systems to try and their priorities follows what
8494the function `coding-system-priority-list' (which see) returns.
fb88bf2d 8495
12e0131a 8496If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8497characters as ESC), it returns a list of single element `undecided'
8498or its subsidiary coding system according to a detected end-of-line
8499format.
d46c5b12 8500
48b0f3ae
PJ
8501If optional argument HIGHEST is non-nil, return the coding system of
8502highest priority. */)
5842a27b 8503 (Lisp_Object string, Lisp_Object highest)
d46c5b12 8504{
b7826503 8505 CHECK_STRING (string);
b73bfc1c 8506
24a73b0a
KH
8507 return detect_coding_system (SDATA (string),
8508 SCHARS (string), SBYTES (string),
8f924df7 8509 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8510 Qnil);
4ed46869 8511}
4ed46869 8512
b73bfc1c 8513
df7492f9 8514static INLINE int
971de7fb 8515char_encodable_p (int c, Lisp_Object attrs)
05e6f5dc 8516{
df7492f9 8517 Lisp_Object tail;
df7492f9 8518 struct charset *charset;
7d64c6ad 8519 Lisp_Object translation_table;
d46c5b12 8520
7d64c6ad 8521 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8522 if (! NILP (translation_table))
7d64c6ad 8523 c = translate_char (translation_table, c);
df7492f9
KH
8524 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8525 CONSP (tail); tail = XCDR (tail))
e133c8fa 8526 {
df7492f9
KH
8527 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8528 if (CHAR_CHARSET_P (c, charset))
8529 break;
e133c8fa 8530 }
df7492f9 8531 return (! NILP (tail));
05e6f5dc 8532}
83fa074f 8533
fb88bf2d 8534
df7492f9
KH
8535/* Return a list of coding systems that safely encode the text between
8536 START and END. If EXCLUDE is non-nil, it is a list of coding
8537 systems not to check. The returned list doesn't contain any such
48468dac 8538 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8539 unibyte, return t. */
e077cc80 8540
df7492f9
KH
8541DEFUN ("find-coding-systems-region-internal",
8542 Ffind_coding_systems_region_internal,
8543 Sfind_coding_systems_region_internal, 2, 3, 0,
8544 doc: /* Internal use only. */)
5842a27b 8545 (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
df7492f9
KH
8546{
8547 Lisp_Object coding_attrs_list, safe_codings;
8548 EMACS_INT start_byte, end_byte;
7c78e542 8549 const unsigned char *p, *pbeg, *pend;
df7492f9 8550 int c;
0e727afa 8551 Lisp_Object tail, elt, work_table;
d46c5b12 8552
df7492f9
KH
8553 if (STRINGP (start))
8554 {
8555 if (!STRING_MULTIBYTE (start)
8f924df7 8556 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8557 return Qt;
8558 start_byte = 0;
8f924df7 8559 end_byte = SBYTES (start);
df7492f9
KH
8560 }
8561 else
d46c5b12 8562 {
df7492f9
KH
8563 CHECK_NUMBER_COERCE_MARKER (start);
8564 CHECK_NUMBER_COERCE_MARKER (end);
8565 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8566 args_out_of_range (start, end);
5d8ea120 8567 if (NILP (B_ (current_buffer, enable_multibyte_characters)))
df7492f9
KH
8568 return Qt;
8569 start_byte = CHAR_TO_BYTE (XINT (start));
8570 end_byte = CHAR_TO_BYTE (XINT (end));
8571 if (XINT (end) - XINT (start) == end_byte - start_byte)
8572 return Qt;
d46c5b12 8573
e1c23804 8574 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8575 {
e1c23804
DL
8576 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8577 move_gap_both (XINT (start), start_byte);
df7492f9 8578 else
e1c23804 8579 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8580 }
8581 }
8582
df7492f9
KH
8583 coding_attrs_list = Qnil;
8584 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8585 if (NILP (exclude)
8586 || NILP (Fmemq (XCAR (tail), exclude)))
8587 {
8588 Lisp_Object attrs;
d46c5b12 8589
df7492f9
KH
8590 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8591 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8592 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8593 {
8594 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8595 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8596 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8597 }
df7492f9 8598 }
d46c5b12 8599
df7492f9 8600 if (STRINGP (start))
8f924df7 8601 p = pbeg = SDATA (start);
df7492f9
KH
8602 else
8603 p = pbeg = BYTE_POS_ADDR (start_byte);
8604 pend = p + (end_byte - start_byte);
b843d1ae 8605
df7492f9
KH
8606 while (p < pend && ASCII_BYTE_P (*p)) p++;
8607 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8608
0e727afa 8609 work_table = Fmake_char_table (Qnil, Qnil);
05e6f5dc 8610 while (p < pend)
72d1a715 8611 {
df7492f9
KH
8612 if (ASCII_BYTE_P (*p))
8613 p++;
72d1a715
RS
8614 else
8615 {
df7492f9 8616 c = STRING_CHAR_ADVANCE (p);
0e727afa
YM
8617 if (!NILP (char_table_ref (work_table, c)))
8618 /* This character was already checked. Ignore it. */
8619 continue;
12410ef1 8620
df7492f9
KH
8621 charset_map_loaded = 0;
8622 for (tail = coding_attrs_list; CONSP (tail);)
8623 {
8624 elt = XCAR (tail);
8625 if (NILP (elt))
8626 tail = XCDR (tail);
8627 else if (char_encodable_p (c, elt))
8628 tail = XCDR (tail);
8629 else if (CONSP (XCDR (tail)))
8630 {
8631 XSETCAR (tail, XCAR (XCDR (tail)));
8632 XSETCDR (tail, XCDR (XCDR (tail)));
8633 }
8634 else
8635 {
8636 XSETCAR (tail, Qnil);
8637 tail = XCDR (tail);
8638 }
8639 }
8640 if (charset_map_loaded)
8641 {
8642 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8643
df7492f9 8644 if (STRINGP (start))
8f924df7 8645 pbeg = SDATA (start);
df7492f9
KH
8646 else
8647 pbeg = BYTE_POS_ADDR (start_byte);
8648 p = pbeg + p_offset;
8649 pend = pbeg + pend_offset;
8650 }
0e727afa 8651 char_table_set (work_table, c, Qt);
df7492f9 8652 }
ec6d2bb8 8653 }
fb88bf2d 8654
988b3759 8655 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8656 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8657 if (! NILP (XCAR (tail)))
8658 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8659
05e6f5dc
KH
8660 return safe_codings;
8661}
4956c225 8662
d46c5b12 8663
8f924df7
KH
8664DEFUN ("unencodable-char-position", Funencodable_char_position,
8665 Sunencodable_char_position, 3, 5, 0,
8666 doc: /*
8667Return position of first un-encodable character in a region.
d4a1d553 8668START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8669encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8670
8f924df7
KH
8671If optional 4th argument COUNT is non-nil, it specifies at most how
8672many un-encodable characters to search. In this case, the value is a
8673list of positions.
d46c5b12 8674
8f924df7
KH
8675If optional 5th argument STRING is non-nil, it is a string to search
8676for un-encodable characters. In that case, START and END are indexes
8677to the string. */)
5842a27b 8678 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8f924df7
KH
8679{
8680 int n;
8681 struct coding_system coding;
7d64c6ad 8682 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8683 Lisp_Object positions;
8684 int from, to;
8685 const unsigned char *p, *stop, *pend;
8686 int ascii_compatible;
fb88bf2d 8687
8f924df7
KH
8688 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8689 attrs = CODING_ID_ATTRS (coding.id);
8690 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8691 return Qnil;
8692 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8693 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8694 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8695
8f924df7
KH
8696 if (NILP (string))
8697 {
8698 validate_region (&start, &end);
8699 from = XINT (start);
8700 to = XINT (end);
5d8ea120 8701 if (NILP (B_ (current_buffer, enable_multibyte_characters))
8f924df7
KH
8702 || (ascii_compatible
8703 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8704 return Qnil;
8705 p = CHAR_POS_ADDR (from);
8706 pend = CHAR_POS_ADDR (to);
8707 if (from < GPT && to >= GPT)
8708 stop = GPT_ADDR;
8709 else
8710 stop = pend;
8711 }
8712 else
8713 {
8714 CHECK_STRING (string);
8715 CHECK_NATNUM (start);
8716 CHECK_NATNUM (end);
8717 from = XINT (start);
8718 to = XINT (end);
8719 if (from > to
8720 || to > SCHARS (string))
8721 args_out_of_range_3 (string, start, end);
8722 if (! STRING_MULTIBYTE (string))
8723 return Qnil;
8724 p = SDATA (string) + string_char_to_byte (string, from);
8725 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8726 if (ascii_compatible && (to - from) == (pend - p))
8727 return Qnil;
8728 }
f2558efd 8729
8f924df7
KH
8730 if (NILP (count))
8731 n = 1;
8732 else
b73bfc1c 8733 {
8f924df7
KH
8734 CHECK_NATNUM (count);
8735 n = XINT (count);
b73bfc1c
KH
8736 }
8737
8f924df7
KH
8738 positions = Qnil;
8739 while (1)
d46c5b12 8740 {
8f924df7 8741 int c;
ec6d2bb8 8742
8f924df7
KH
8743 if (ascii_compatible)
8744 while (p < stop && ASCII_BYTE_P (*p))
8745 p++, from++;
8746 if (p >= stop)
0e79d667 8747 {
8f924df7
KH
8748 if (p >= pend)
8749 break;
8750 stop = pend;
8751 p = GAP_END_ADDR;
0e79d667 8752 }
ec6d2bb8 8753
8f924df7
KH
8754 c = STRING_CHAR_ADVANCE (p);
8755 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8756 && ! char_charset (translate_char (translation_table, c),
8757 charset_list, NULL))
ec6d2bb8 8758 {
8f924df7
KH
8759 positions = Fcons (make_number (from), positions);
8760 n--;
8761 if (n == 0)
8762 break;
ec6d2bb8
KH
8763 }
8764
8f924df7
KH
8765 from++;
8766 }
d46c5b12 8767
8f924df7
KH
8768 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8769}
d46c5b12 8770
d46c5b12 8771
df7492f9
KH
8772DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8773 Scheck_coding_systems_region, 3, 3, 0,
8774 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8775
df7492f9
KH
8776START and END are buffer positions specifying the region.
8777CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8778
df7492f9 8779The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8780CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8781whole region, POS0, POS1, ... are buffer positions where non-encodable
8782characters are found.
93dec019 8783
df7492f9
KH
8784If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8785value is nil.
93dec019 8786
df7492f9
KH
8787START may be a string. In that case, check if the string is
8788encodable, and the value contains indices to the string instead of
5704f39a
KH
8789buffer positions. END is ignored.
8790
4c1958f4 8791If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8792is nil. */)
5842a27b 8793 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
05e6f5dc 8794{
df7492f9
KH
8795 Lisp_Object list;
8796 EMACS_INT start_byte, end_byte;
8797 int pos;
7c78e542 8798 const unsigned char *p, *pbeg, *pend;
df7492f9 8799 int c;
7d64c6ad 8800 Lisp_Object tail, elt, attrs;
70ad9fc4 8801
05e6f5dc
KH
8802 if (STRINGP (start))
8803 {
df7492f9 8804 if (!STRING_MULTIBYTE (start)
4c1958f4 8805 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8806 return Qnil;
8807 start_byte = 0;
8f924df7 8808 end_byte = SBYTES (start);
df7492f9 8809 pos = 0;
d46c5b12 8810 }
05e6f5dc 8811 else
b73bfc1c 8812 {
b7826503
PJ
8813 CHECK_NUMBER_COERCE_MARKER (start);
8814 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8815 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8816 args_out_of_range (start, end);
5d8ea120 8817 if (NILP (B_ (current_buffer, enable_multibyte_characters)))
df7492f9
KH
8818 return Qnil;
8819 start_byte = CHAR_TO_BYTE (XINT (start));
8820 end_byte = CHAR_TO_BYTE (XINT (end));
8821 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 8822 return Qnil;
df7492f9 8823
e1c23804 8824 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8825 {
e1c23804
DL
8826 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8827 move_gap_both (XINT (start), start_byte);
df7492f9 8828 else
e1c23804 8829 move_gap_both (XINT (end), end_byte);
b73bfc1c 8830 }
e1c23804 8831 pos = XINT (start);
b73bfc1c 8832 }
7553d0e1 8833
df7492f9
KH
8834 list = Qnil;
8835 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8836 {
df7492f9 8837 elt = XCAR (tail);
7d64c6ad 8838 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8839 ASET (attrs, coding_attr_trans_tbl,
8840 get_translation_table (attrs, 1, NULL));
7d64c6ad 8841 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8842 }
8843
df7492f9 8844 if (STRINGP (start))
8f924df7 8845 p = pbeg = SDATA (start);
72d1a715 8846 else
df7492f9
KH
8847 p = pbeg = BYTE_POS_ADDR (start_byte);
8848 pend = p + (end_byte - start_byte);
4ed46869 8849
df7492f9
KH
8850 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8851 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8852
df7492f9 8853 while (p < pend)
d46c5b12 8854 {
df7492f9
KH
8855 if (ASCII_BYTE_P (*p))
8856 p++;
e133c8fa 8857 else
05e6f5dc 8858 {
df7492f9
KH
8859 c = STRING_CHAR_ADVANCE (p);
8860
8861 charset_map_loaded = 0;
8862 for (tail = list; CONSP (tail); tail = XCDR (tail))
8863 {
8864 elt = XCDR (XCAR (tail));
8865 if (! char_encodable_p (c, XCAR (elt)))
8866 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8867 }
8868 if (charset_map_loaded)
8869 {
8870 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8871
8872 if (STRINGP (start))
8f924df7 8873 pbeg = SDATA (start);
df7492f9
KH
8874 else
8875 pbeg = BYTE_POS_ADDR (start_byte);
8876 p = pbeg + p_offset;
8877 pend = pbeg + pend_offset;
8878 }
05e6f5dc 8879 }
df7492f9 8880 pos++;
d46c5b12 8881 }
4ed46869 8882
df7492f9
KH
8883 tail = list;
8884 list = Qnil;
8885 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8886 {
df7492f9
KH
8887 elt = XCAR (tail);
8888 if (CONSP (XCDR (XCDR (elt))))
8889 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8890 list);
ec6d2bb8 8891 }
2b4f9037 8892
df7492f9 8893 return list;
d46c5b12
KH
8894}
8895
3fd9494b 8896
b73bfc1c 8897Lisp_Object
cf84bb53
JB
8898code_convert_region (Lisp_Object start, Lisp_Object end,
8899 Lisp_Object coding_system, Lisp_Object dst_object,
8900 int encodep, int norecord)
4ed46869 8901{
3a73fa5d 8902 struct coding_system coding;
df7492f9
KH
8903 EMACS_INT from, from_byte, to, to_byte;
8904 Lisp_Object src_object;
4ed46869 8905
b7826503
PJ
8906 CHECK_NUMBER_COERCE_MARKER (start);
8907 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8908 if (NILP (coding_system))
8909 coding_system = Qno_conversion;
8910 else
8911 CHECK_CODING_SYSTEM (coding_system);
8912 src_object = Fcurrent_buffer ();
8913 if (NILP (dst_object))
8914 dst_object = src_object;
8915 else if (! EQ (dst_object, Qt))
8916 CHECK_BUFFER (dst_object);
3a73fa5d 8917
d46c5b12
KH
8918 validate_region (&start, &end);
8919 from = XFASTINT (start);
df7492f9 8920 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8921 to = XFASTINT (end);
df7492f9 8922 to_byte = CHAR_TO_BYTE (to);
764ca8da 8923
df7492f9
KH
8924 setup_coding_system (coding_system, &coding);
8925 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8926
df7492f9
KH
8927 if (encodep)
8928 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8929 dst_object);
8930 else
8931 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8932 dst_object);
8933 if (! norecord)
8934 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8935
df7492f9
KH
8936 return (BUFFERP (dst_object)
8937 ? make_number (coding.produced_char)
8938 : coding.dst_object);
4031e2bf 8939}
78108bcd 8940
4ed46869 8941
4031e2bf 8942DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8943 3, 4, "r\nzCoding system: ",
48b0f3ae 8944 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8945When called from a program, takes four arguments:
8946 START, END, CODING-SYSTEM, and DESTINATION.
8947START and END are buffer positions.
8844fa83 8948
df7492f9 8949Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8950If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
8951If buffer, the decoded text is inserted in that buffer after point (point
8952does not move).
446dcd75 8953In those cases, the length of the decoded text is returned.
319a3947 8954If DESTINATION is t, the decoded text is returned.
8844fa83 8955
48b0f3ae
PJ
8956This function sets `last-coding-system-used' to the precise coding system
8957used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 8958not fully specified.) */)
5842a27b 8959 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
4031e2bf 8960{
df7492f9 8961 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8962}
8844fa83 8963
3a73fa5d 8964DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8965 3, 4, "r\nzCoding system: ",
8966 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
8967When called from a program, takes four arguments:
8968 START, END, CODING-SYSTEM and DESTINATION.
8969START and END are buffer positions.
d46c5b12 8970
df7492f9
KH
8971Optional 4th arguments DESTINATION specifies where the encoded text goes.
8972If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
8973If buffer, the encoded text is inserted in that buffer after point (point
8974does not move).
446dcd75 8975In those cases, the length of the encoded text is returned.
319a3947 8976If DESTINATION is t, the encoded text is returned.
2391eaa4 8977
48b0f3ae
PJ
8978This function sets `last-coding-system-used' to the precise coding system
8979used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 8980not fully specified.) */)
5842a27b 8981 (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
3a73fa5d 8982{
df7492f9 8983 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
8984}
8985
8986Lisp_Object
6f704c76
DN
8987code_convert_string (Lisp_Object string, Lisp_Object coding_system,
8988 Lisp_Object dst_object, int encodep, int nocopy, int norecord)
b73bfc1c 8989{
4031e2bf 8990 struct coding_system coding;
df7492f9 8991 EMACS_INT chars, bytes;
ec6d2bb8 8992
b7826503 8993 CHECK_STRING (string);
d46c5b12 8994 if (NILP (coding_system))
4956c225 8995 {
df7492f9
KH
8996 if (! norecord)
8997 Vlast_coding_system_used = Qno_conversion;
8998 if (NILP (dst_object))
8999 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 9000 }
b73bfc1c 9001
df7492f9
KH
9002 if (NILP (coding_system))
9003 coding_system = Qno_conversion;
9004 else
9005 CHECK_CODING_SYSTEM (coding_system);
9006 if (NILP (dst_object))
9007 dst_object = Qt;
9008 else if (! EQ (dst_object, Qt))
9009 CHECK_BUFFER (dst_object);
73be902c 9010
df7492f9 9011 setup_coding_system (coding_system, &coding);
d46c5b12 9012 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
9013 chars = SCHARS (string);
9014 bytes = SBYTES (string);
df7492f9
KH
9015 if (encodep)
9016 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9017 else
9018 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9019 if (! norecord)
9020 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 9021
df7492f9
KH
9022 return (BUFFERP (dst_object)
9023 ? make_number (coding.produced_char)
9024 : coding.dst_object);
4ed46869 9025}
73be902c 9026
b73bfc1c 9027
ecec61c1 9028/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 9029 Do not set Vlast_coding_system_used.
4ed46869 9030
ec6d2bb8
KH
9031 This function is called only from macros DECODE_FILE and
9032 ENCODE_FILE, thus we ignore character composition. */
4ed46869 9033
ecec61c1 9034Lisp_Object
cf84bb53
JB
9035code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9036 int encodep)
4ed46869 9037{
0be8721c 9038 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
9039}
9040
4ed46869 9041
df7492f9
KH
9042DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9043 2, 4, 0,
9044 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9045
9046Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9047if the decoding operation is trivial.
ecec61c1 9048
d4a1d553 9049Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
9050inserted in that buffer after point (point does not move). In this
9051case, the return value is the length of the decoded text.
ecec61c1 9052
df7492f9
KH
9053This function sets `last-coding-system-used' to the precise coding system
9054used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 9055not fully specified.) */)
5842a27b 9056 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
4ed46869 9057{
df7492f9
KH
9058 return code_convert_string (string, coding_system, buffer,
9059 0, ! NILP (nocopy), 0);
4ed46869
KH
9060}
9061
df7492f9
KH
9062DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9063 2, 4, 0,
9064 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9065
9066Optional third arg NOCOPY non-nil means it is OK to return STRING
9067itself if the encoding operation is trivial.
9068
d4a1d553 9069Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
9070inserted in that buffer after point (point does not move). In this
9071case, the return value is the length of the encoded text.
df7492f9
KH
9072
9073This function sets `last-coding-system-used' to the precise coding system
9074used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9075not fully specified.) */)
5842a27b 9076 (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
4ed46869 9077{
df7492f9 9078 return code_convert_string (string, coding_system, buffer,
c197f191 9079 1, ! NILP (nocopy), 1);
4ed46869 9080}
df7492f9 9081
3a73fa5d 9082\f
4ed46869 9083DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
9084 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9085Return the corresponding character. */)
5842a27b 9086 (Lisp_Object code)
4ed46869 9087{
df7492f9
KH
9088 Lisp_Object spec, attrs, val;
9089 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9090 int c;
4ed46869 9091
df7492f9
KH
9092 CHECK_NATNUM (code);
9093 c = XFASTINT (code);
9094 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9095 attrs = AREF (spec, 0);
4ed46869 9096
df7492f9
KH
9097 if (ASCII_BYTE_P (c)
9098 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9099 return code;
4ed46869 9100
df7492f9
KH
9101 val = CODING_ATTR_CHARSET_LIST (attrs);
9102 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
9103 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9104 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 9105
df7492f9
KH
9106 if (c <= 0x7F)
9107 charset = charset_roman;
9108 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 9109 {
df7492f9
KH
9110 charset = charset_kana;
9111 c -= 0x80;
4ed46869 9112 }
55ab7be3 9113 else
4ed46869 9114 {
004068e4 9115 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
9116
9117 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9118 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9119 error ("Invalid code: %d", code);
9120 SJIS_TO_JIS (c);
9121 charset = charset_kanji;
4ed46869 9122 }
df7492f9
KH
9123 c = DECODE_CHAR (charset, c);
9124 if (c < 0)
9125 error ("Invalid code: %d", code);
9126 return make_number (c);
93dec019 9127}
4ed46869 9128
48b0f3ae 9129
4ed46869 9130DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 9131 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae 9132Return the corresponding code in SJIS. */)
5842a27b 9133 (Lisp_Object ch)
4ed46869 9134{
df7492f9
KH
9135 Lisp_Object spec, attrs, charset_list;
9136 int c;
9137 struct charset *charset;
9138 unsigned code;
48b0f3ae 9139
df7492f9
KH
9140 CHECK_CHARACTER (ch);
9141 c = XFASTINT (ch);
9142 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9143 attrs = AREF (spec, 0);
9144
9145 if (ASCII_CHAR_P (c)
9146 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9147 return ch;
9148
9149 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9150 charset = char_charset (c, charset_list, &code);
9151 if (code == CHARSET_INVALID_CODE (charset))
9152 error ("Can't encode by shift_jis encoding: %d", c);
9153 JIS_TO_SJIS (code);
9154
9155 return make_number (code);
4ed46869
KH
9156}
9157
9158DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
9159 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9160Return the corresponding character. */)
5842a27b 9161 (Lisp_Object code)
d46c5b12 9162{
df7492f9
KH
9163 Lisp_Object spec, attrs, val;
9164 struct charset *charset_roman, *charset_big5, *charset;
9165 int c;
6289dd10 9166
df7492f9
KH
9167 CHECK_NATNUM (code);
9168 c = XFASTINT (code);
9169 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9170 attrs = AREF (spec, 0);
4ed46869 9171
df7492f9
KH
9172 if (ASCII_BYTE_P (c)
9173 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9174 return code;
6289dd10 9175
df7492f9
KH
9176 val = CODING_ATTR_CHARSET_LIST (attrs);
9177 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9178 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 9179
df7492f9
KH
9180 if (c <= 0x7F)
9181 charset = charset_roman;
c28a9453
KH
9182 else
9183 {
df7492f9
KH
9184 int b1 = c >> 8, b2 = c & 0x7F;
9185 if (b1 < 0xA1 || b1 > 0xFE
9186 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9187 error ("Invalid code: %d", code);
9188 charset = charset_big5;
c28a9453 9189 }
df7492f9
KH
9190 c = DECODE_CHAR (charset, (unsigned )c);
9191 if (c < 0)
9192 error ("Invalid code: %d", code);
9193 return make_number (c);
d46c5b12 9194}
6289dd10 9195
4ed46869 9196DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 9197 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae 9198Return the corresponding character code in Big5. */)
5842a27b 9199 (Lisp_Object ch)
4ed46869 9200{
df7492f9
KH
9201 Lisp_Object spec, attrs, charset_list;
9202 struct charset *charset;
9203 int c;
9204 unsigned code;
9205
9206 CHECK_CHARACTER (ch);
9207 c = XFASTINT (ch);
9208 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9209 attrs = AREF (spec, 0);
9210 if (ASCII_CHAR_P (c)
9211 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9212 return ch;
9213
9214 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9215 charset = char_charset (c, charset_list, &code);
9216 if (code == CHARSET_INVALID_CODE (charset))
9217 error ("Can't encode by Big5 encoding: %d", c);
9218
9219 return make_number (code);
4ed46869 9220}
48b0f3ae 9221
3a73fa5d 9222\f
002fdb44 9223DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 9224 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 9225 doc: /* Internal use only. */)
5842a27b 9226 (Lisp_Object coding_system, Lisp_Object terminal)
4ed46869 9227{
b18fad6d
KH
9228 struct terminal *term = get_terminal (terminal, 1);
9229 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
b7826503 9230 CHECK_SYMBOL (coding_system);
b8299c66 9231 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 9232 /* We had better not send unsafe characters to terminal. */
c73bd236 9233 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
ad1746f5 9234 /* Character composition should be disabled. */
c73bd236 9235 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
9236 terminal_coding->src_multibyte = 1;
9237 terminal_coding->dst_multibyte = 0;
b18fad6d
KH
9238 if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9239 term->charset_list = coding_charset_list (terminal_coding);
9240 else
6b4bb703 9241 term->charset_list = Fcons (make_number (charset_ascii), Qnil);
4ed46869
KH
9242 return Qnil;
9243}
9244
c4825358
KH
9245DEFUN ("set-safe-terminal-coding-system-internal",
9246 Fset_safe_terminal_coding_system_internal,
48b0f3ae 9247 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 9248 doc: /* Internal use only. */)
5842a27b 9249 (Lisp_Object coding_system)
d46c5b12 9250{
b7826503 9251 CHECK_SYMBOL (coding_system);
c4825358
KH
9252 setup_coding_system (Fcheck_coding_system (coding_system),
9253 &safe_terminal_coding);
ad1746f5 9254 /* Character composition should be disabled. */
df7492f9 9255 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
9256 safe_terminal_coding.src_multibyte = 1;
9257 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
9258 return Qnil;
9259}
4ed46869 9260
002fdb44 9261DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 9262 Sterminal_coding_system, 0, 1, 0,
6ed8eeff 9263 doc: /* Return coding system specified for terminal output on the given terminal.
708e05dc 9264TERMINAL may be a terminal object, a frame, or nil for the selected
6ed8eeff 9265frame's terminal device. */)
5842a27b 9266 (Lisp_Object terminal)
4ed46869 9267{
985773c9
MB
9268 struct coding_system *terminal_coding
9269 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9270 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 9271
ae6f73fa 9272 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 9273 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
9274}
9275
002fdb44 9276DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 9277 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 9278 doc: /* Internal use only. */)
5842a27b 9279 (Lisp_Object coding_system, Lisp_Object terminal)
4ed46869 9280{
6ed8eeff 9281 struct terminal *t = get_terminal (terminal, 1);
b7826503 9282 CHECK_SYMBOL (coding_system);
624bda09
KH
9283 if (NILP (coding_system))
9284 coding_system = Qno_conversion;
9285 else
9286 Fcheck_coding_system (coding_system);
9287 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
ad1746f5 9288 /* Character composition should be disabled. */
c73bd236
MB
9289 TERMINAL_KEYBOARD_CODING (t)->common_flags
9290 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
9291 return Qnil;
9292}
9293
9294DEFUN ("keyboard-coding-system",
985773c9 9295 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 9296 doc: /* Return coding system specified for decoding keyboard input. */)
5842a27b 9297 (Lisp_Object terminal)
4ed46869 9298{
985773c9
MB
9299 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9300 (get_terminal (terminal, 1))->id);
4ed46869
KH
9301}
9302
4ed46869 9303\f
a5d301df
KH
9304DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9305 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
9306 doc: /* Choose a coding system for an operation based on the target name.
9307The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9308DECODING-SYSTEM is the coding system to use for decoding
9309\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9310for encoding (in case OPERATION does encoding).
05e6f5dc 9311
48b0f3ae
PJ
9312The first argument OPERATION specifies an I/O primitive:
9313 For file I/O, `insert-file-contents' or `write-region'.
9314 For process I/O, `call-process', `call-process-region', or `start-process'.
9315 For network I/O, `open-network-stream'.
05e6f5dc 9316
48b0f3ae
PJ
9317The remaining arguments should be the same arguments that were passed
9318to the primitive. Depending on which primitive, one of those arguments
9319is selected as the TARGET. For example, if OPERATION does file I/O,
9320whichever argument specifies the file name is TARGET.
05e6f5dc 9321
48b0f3ae 9322TARGET has a meaning which depends on OPERATION:
b883cdb2 9323 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9324 For process I/O, TARGET is a process name.
d4a1d553 9325 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9326
d4a1d553 9327This function looks up what is specified for TARGET in
48b0f3ae
PJ
9328`file-coding-system-alist', `process-coding-system-alist',
9329or `network-coding-system-alist' depending on OPERATION.
9330They may specify a coding system, a cons of coding systems,
9331or a function symbol to call.
9332In the last case, we call the function with one argument,
9333which is a list of all the arguments given to this function.
1011c487
MB
9334If the function can't decide a coding system, it can return
9335`undecided' so that the normal code-detection is performed.
48b0f3ae 9336
b883cdb2
MB
9337If OPERATION is `insert-file-contents', the argument corresponding to
9338TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9339file name to look up, and BUFFER is a buffer that contains the file's
9340contents (not yet decoded). If `file-coding-system-alist' specifies a
9341function to call for FILENAME, that function should examine the
9342contents of BUFFER instead of reading the file.
9343
d918f936 9344usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
5842a27b 9345 (int nargs, Lisp_Object *args)
6b89e3aa 9346{
4ed46869
KH
9347 Lisp_Object operation, target_idx, target, val;
9348 register Lisp_Object chain;
177c0ea7 9349
4ed46869
KH
9350 if (nargs < 2)
9351 error ("Too few arguments");
9352 operation = args[0];
9353 if (!SYMBOLP (operation)
9354 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3ed051d4 9355 error ("Invalid first argument");
4ed46869
KH
9356 if (nargs < 1 + XINT (target_idx))
9357 error ("Too few arguments for operation: %s",
8f924df7 9358 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
9359 target = args[XINT (target_idx) + 1];
9360 if (!(STRINGP (target)
091a0ff0
KH
9361 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9362 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9363 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 9364 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
9365 if (CONSP (target))
9366 target = XCAR (target);
4ed46869 9367
2e34157c
RS
9368 chain = ((EQ (operation, Qinsert_file_contents)
9369 || EQ (operation, Qwrite_region))
02ba4723 9370 ? Vfile_coding_system_alist
2e34157c 9371 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9372 ? Vnetwork_coding_system_alist
9373 : Vprocess_coding_system_alist));
4ed46869
KH
9374 if (NILP (chain))
9375 return Qnil;
9376
03699b14 9377 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9378 {
f44d27ce 9379 Lisp_Object elt;
6b89e3aa 9380
df7492f9 9381 elt = XCAR (chain);
4ed46869
KH
9382 if (CONSP (elt)
9383 && ((STRINGP (target)
03699b14
KR
9384 && STRINGP (XCAR (elt))
9385 && fast_string_match (XCAR (elt), target) >= 0)
9386 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9387 {
03699b14 9388 val = XCDR (elt);
b19fd4c5
KH
9389 /* Here, if VAL is both a valid coding system and a valid
9390 function symbol, we return VAL as a coding system. */
02ba4723
KH
9391 if (CONSP (val))
9392 return val;
9393 if (! SYMBOLP (val))
9394 return Qnil;
9395 if (! NILP (Fcoding_system_p (val)))
9396 return Fcons (val, val);
b19fd4c5 9397 if (! NILP (Ffboundp (val)))
6b89e3aa 9398 {
e2b97060
MB
9399 /* We use call1 rather than safe_call1
9400 so as to get bug reports about functions called here
9401 which don't handle the current interface. */
9402 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9403 if (CONSP (val))
9404 return val;
9405 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9406 return Fcons (val, val);
6b89e3aa 9407 }
02ba4723 9408 return Qnil;
6b89e3aa
KH
9409 }
9410 }
4ed46869 9411 return Qnil;
6b89e3aa
KH
9412}
9413
df7492f9 9414DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9415 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9416 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9417If multiple coding systems belong to the same category,
a3181084
DL
9418all but the first one are ignored.
9419
d4a1d553 9420usage: (set-coding-system-priority &rest coding-systems) */)
5842a27b 9421 (int nargs, Lisp_Object *args)
df7492f9
KH
9422{
9423 int i, j;
9424 int changed[coding_category_max];
9425 enum coding_category priorities[coding_category_max];
9426
72af86bd 9427 memset (changed, 0, sizeof changed);
6b89e3aa 9428
df7492f9 9429 for (i = j = 0; i < nargs; i++)
6b89e3aa 9430 {
df7492f9
KH
9431 enum coding_category category;
9432 Lisp_Object spec, attrs;
6b89e3aa 9433
df7492f9
KH
9434 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9435 attrs = AREF (spec, 0);
9436 category = XINT (CODING_ATTR_CATEGORY (attrs));
9437 if (changed[category])
9438 /* Ignore this coding system because a coding system of the
9439 same category already had a higher priority. */
9440 continue;
9441 changed[category] = 1;
9442 priorities[j++] = category;
9443 if (coding_categories[category].id >= 0
9444 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9445 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9446 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9447 }
6b89e3aa 9448
df7492f9
KH
9449 /* Now we have decided top J priorities. Reflect the order of the
9450 original priorities to the remaining priorities. */
6b89e3aa 9451
df7492f9 9452 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9453 {
df7492f9
KH
9454 while (j < coding_category_max
9455 && changed[coding_priorities[j]])
9456 j++;
9457 if (j == coding_category_max)
9458 abort ();
9459 priorities[i] = coding_priorities[j];
9460 }
6b89e3aa 9461
72af86bd 9462 memcpy (coding_priorities, priorities, sizeof priorities);
177c0ea7 9463
ff563fce
KH
9464 /* Update `coding-category-list'. */
9465 Vcoding_category_list = Qnil;
9466 for (i = coding_category_max - 1; i >= 0; i--)
9467 Vcoding_category_list
9468 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9469 Vcoding_category_list);
6b89e3aa 9470
df7492f9 9471 return Qnil;
6b89e3aa
KH
9472}
9473
df7492f9
KH
9474DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9475 Scoding_system_priority_list, 0, 1, 0,
da7db224 9476 doc: /* Return a list of coding systems ordered by their priorities.
b811c52b
KH
9477The list contains a subset of coding systems; i.e. coding systems
9478assigned to each coding category (see `coding-category-list').
9479
da7db224 9480HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 9481 (Lisp_Object highestp)
d46c5b12
KH
9482{
9483 int i;
df7492f9 9484 Lisp_Object val;
6b89e3aa 9485
df7492f9 9486 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9487 {
df7492f9
KH
9488 enum coding_category category = coding_priorities[i];
9489 int id = coding_categories[category].id;
9490 Lisp_Object attrs;
068a9dbd 9491
df7492f9
KH
9492 if (id < 0)
9493 continue;
9494 attrs = CODING_ID_ATTRS (id);
9495 if (! NILP (highestp))
9496 return CODING_ATTR_BASE_NAME (attrs);
9497 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9498 }
9499 return Fnreverse (val);
9500}
068a9dbd 9501
91433552 9502static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9503
9504static Lisp_Object
971de7fb 9505make_subsidiaries (Lisp_Object base)
068a9dbd 9506{
df7492f9 9507 Lisp_Object subsidiaries;
8f924df7 9508 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
9509 char *buf = (char *) alloca (base_name_len + 6);
9510 int i;
068a9dbd 9511
72af86bd 9512 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
df7492f9
KH
9513 subsidiaries = Fmake_vector (make_number (3), Qnil);
9514 for (i = 0; i < 3; i++)
068a9dbd 9515 {
72af86bd 9516 memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
df7492f9 9517 ASET (subsidiaries, i, intern (buf));
068a9dbd 9518 }
df7492f9 9519 return subsidiaries;
068a9dbd
KH
9520}
9521
9522
df7492f9
KH
9523DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9524 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9525 doc: /* For internal use only.
9526usage: (define-coding-system-internal ...) */)
5842a27b 9527 (int nargs, Lisp_Object *args)
068a9dbd 9528{
df7492f9
KH
9529 Lisp_Object name;
9530 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9531 Lisp_Object attrs; /* Vector of attributes. */
9532 Lisp_Object eol_type;
9533 Lisp_Object aliases;
9534 Lisp_Object coding_type, charset_list, safe_charsets;
9535 enum coding_category category;
9536 Lisp_Object tail, val;
9537 int max_charset_id = 0;
9538 int i;
068a9dbd 9539
df7492f9
KH
9540 if (nargs < coding_arg_max)
9541 goto short_args;
068a9dbd 9542
df7492f9 9543 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9544
df7492f9
KH
9545 name = args[coding_arg_name];
9546 CHECK_SYMBOL (name);
9547 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9548
df7492f9
KH
9549 val = args[coding_arg_mnemonic];
9550 if (! STRINGP (val))
9551 CHECK_CHARACTER (val);
9552 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9553
df7492f9
KH
9554 coding_type = args[coding_arg_coding_type];
9555 CHECK_SYMBOL (coding_type);
9556 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9557
df7492f9
KH
9558 charset_list = args[coding_arg_charset_list];
9559 if (SYMBOLP (charset_list))
9560 {
9561 if (EQ (charset_list, Qiso_2022))
9562 {
9563 if (! EQ (coding_type, Qiso_2022))
9564 error ("Invalid charset-list");
9565 charset_list = Viso_2022_charset_list;
9566 }
9567 else if (EQ (charset_list, Qemacs_mule))
9568 {
9569 if (! EQ (coding_type, Qemacs_mule))
9570 error ("Invalid charset-list");
9571 charset_list = Vemacs_mule_charset_list;
9572 }
9573 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9574 if (max_charset_id < XFASTINT (XCAR (tail)))
9575 max_charset_id = XFASTINT (XCAR (tail));
9576 }
068a9dbd
KH
9577 else
9578 {
df7492f9 9579 charset_list = Fcopy_sequence (charset_list);
985773c9 9580 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9581 {
df7492f9
KH
9582 struct charset *charset;
9583
985773c9 9584 val = XCAR (tail);
df7492f9
KH
9585 CHECK_CHARSET_GET_CHARSET (val, charset);
9586 if (EQ (coding_type, Qiso_2022)
9587 ? CHARSET_ISO_FINAL (charset) < 0
9588 : EQ (coding_type, Qemacs_mule)
9589 ? CHARSET_EMACS_MULE_ID (charset) < 0
9590 : 0)
9591 error ("Can't handle charset `%s'",
8f924df7 9592 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9593
8f924df7 9594 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9595 if (max_charset_id < charset->id)
9596 max_charset_id = charset->id;
068a9dbd
KH
9597 }
9598 }
df7492f9 9599 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9600
1b3b981b
AS
9601 safe_charsets = make_uninit_string (max_charset_id + 1);
9602 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9603 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9604 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9605 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9606
584948ac 9607 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9608
df7492f9 9609 val = args[coding_arg_decode_translation_table];
a6f87d34 9610 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9611 CHECK_SYMBOL (val);
df7492f9 9612 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9613
df7492f9 9614 val = args[coding_arg_encode_translation_table];
a6f87d34 9615 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9616 CHECK_SYMBOL (val);
df7492f9 9617 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9618
df7492f9
KH
9619 val = args[coding_arg_post_read_conversion];
9620 CHECK_SYMBOL (val);
9621 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9622
df7492f9
KH
9623 val = args[coding_arg_pre_write_conversion];
9624 CHECK_SYMBOL (val);
9625 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9626
df7492f9
KH
9627 val = args[coding_arg_default_char];
9628 if (NILP (val))
9629 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9630 else
9631 {
8f924df7 9632 CHECK_CHARACTER (val);
df7492f9
KH
9633 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9634 }
4031e2bf 9635
8f924df7
KH
9636 val = args[coding_arg_for_unibyte];
9637 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9638
df7492f9
KH
9639 val = args[coding_arg_plist];
9640 CHECK_LIST (val);
9641 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9642
df7492f9
KH
9643 if (EQ (coding_type, Qcharset))
9644 {
c7c66a95
KH
9645 /* Generate a lisp vector of 256 elements. Each element is nil,
9646 integer, or a list of charset IDs.
3a73fa5d 9647
c7c66a95
KH
9648 If Nth element is nil, the byte code N is invalid in this
9649 coding system.
4ed46869 9650
c7c66a95
KH
9651 If Nth element is a number NUM, N is the first byte of a
9652 charset whose ID is NUM.
4ed46869 9653
c7c66a95
KH
9654 If Nth element is a list of charset IDs, N is the first byte
9655 of one of them. The list is sorted by dimensions of the
ad1746f5 9656 charsets. A charset of smaller dimension comes first. */
df7492f9 9657 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9658
5c99c2e6 9659 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9660 {
c7c66a95
KH
9661 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9662 int dim = CHARSET_DIMENSION (charset);
9663 int idx = (dim - 1) * 4;
4ed46869 9664
5c99c2e6 9665 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9666 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9667
15d143f7
KH
9668 for (i = charset->code_space[idx];
9669 i <= charset->code_space[idx + 1]; i++)
9670 {
c7c66a95
KH
9671 Lisp_Object tmp, tmp2;
9672 int dim2;
ec6d2bb8 9673
c7c66a95
KH
9674 tmp = AREF (val, i);
9675 if (NILP (tmp))
9676 tmp = XCAR (tail);
9677 else if (NUMBERP (tmp))
9678 {
9679 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9680 if (dim < dim2)
c7c66a95 9681 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9682 else
9683 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9684 }
15d143f7 9685 else
c7c66a95
KH
9686 {
9687 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9688 {
9689 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9690 if (dim < dim2)
9691 break;
9692 }
9693 if (NILP (tmp2))
9694 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9695 else
9696 {
9697 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9698 XSETCAR (tmp2, XCAR (tail));
9699 }
9700 }
9701 ASET (val, i, tmp);
15d143f7 9702 }
df7492f9
KH
9703 }
9704 ASET (attrs, coding_attr_charset_valids, val);
9705 category = coding_category_charset;
9706 }
9707 else if (EQ (coding_type, Qccl))
9708 {
9709 Lisp_Object valids;
ecec61c1 9710
df7492f9
KH
9711 if (nargs < coding_arg_ccl_max)
9712 goto short_args;
ecec61c1 9713
df7492f9
KH
9714 val = args[coding_arg_ccl_decoder];
9715 CHECK_CCL_PROGRAM (val);
9716 if (VECTORP (val))
9717 val = Fcopy_sequence (val);
9718 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9719
df7492f9
KH
9720 val = args[coding_arg_ccl_encoder];
9721 CHECK_CCL_PROGRAM (val);
9722 if (VECTORP (val))
9723 val = Fcopy_sequence (val);
9724 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9725
df7492f9
KH
9726 val = args[coding_arg_ccl_valids];
9727 valids = Fmake_string (make_number (256), make_number (0));
9728 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9729 {
8dcbea82 9730 int from, to;
ecec61c1 9731
df7492f9
KH
9732 val = Fcar (tail);
9733 if (INTEGERP (val))
8dcbea82
KH
9734 {
9735 from = to = XINT (val);
9736 if (from < 0 || from > 255)
9737 args_out_of_range_3 (val, make_number (0), make_number (255));
9738 }
df7492f9
KH
9739 else
9740 {
df7492f9 9741 CHECK_CONS (val);
8f924df7
KH
9742 CHECK_NATNUM_CAR (val);
9743 CHECK_NATNUM_CDR (val);
df7492f9 9744 from = XINT (XCAR (val));
8f924df7 9745 if (from > 255)
8dcbea82
KH
9746 args_out_of_range_3 (XCAR (val),
9747 make_number (0), make_number (255));
df7492f9 9748 to = XINT (XCDR (val));
8dcbea82
KH
9749 if (to < from || to > 255)
9750 args_out_of_range_3 (XCDR (val),
9751 XCAR (val), make_number (255));
df7492f9 9752 }
8dcbea82 9753 for (i = from; i <= to; i++)
8f924df7 9754 SSET (valids, i, 1);
df7492f9
KH
9755 }
9756 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9757
df7492f9 9758 category = coding_category_ccl;
55ab7be3 9759 }
df7492f9 9760 else if (EQ (coding_type, Qutf_16))
55ab7be3 9761 {
df7492f9 9762 Lisp_Object bom, endian;
4ed46869 9763
584948ac 9764 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9765
df7492f9
KH
9766 if (nargs < coding_arg_utf16_max)
9767 goto short_args;
4ed46869 9768
df7492f9
KH
9769 bom = args[coding_arg_utf16_bom];
9770 if (! NILP (bom) && ! EQ (bom, Qt))
9771 {
9772 CHECK_CONS (bom);
8f924df7
KH
9773 val = XCAR (bom);
9774 CHECK_CODING_SYSTEM (val);
9775 val = XCDR (bom);
9776 CHECK_CODING_SYSTEM (val);
df7492f9 9777 }
a470d443 9778 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9779
9780 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9781 CHECK_SYMBOL (endian);
9782 if (NILP (endian))
9783 endian = Qbig;
9784 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9785 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9786 ASET (attrs, coding_attr_utf_16_endian, endian);
9787
9788 category = (CONSP (bom)
9789 ? coding_category_utf_16_auto
9790 : NILP (bom)
b49a1807 9791 ? (EQ (endian, Qbig)
df7492f9
KH
9792 ? coding_category_utf_16_be_nosig
9793 : coding_category_utf_16_le_nosig)
b49a1807 9794 : (EQ (endian, Qbig)
df7492f9
KH
9795 ? coding_category_utf_16_be
9796 : coding_category_utf_16_le));
9797 }
9798 else if (EQ (coding_type, Qiso_2022))
9799 {
9800 Lisp_Object initial, reg_usage, request, flags;
4776e638 9801 int i;
1397dc18 9802
df7492f9
KH
9803 if (nargs < coding_arg_iso2022_max)
9804 goto short_args;
9805
9806 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9807 CHECK_VECTOR (initial);
9808 for (i = 0; i < 4; i++)
9809 {
9810 val = Faref (initial, make_number (i));
9811 if (! NILP (val))
9812 {
584948ac
KH
9813 struct charset *charset;
9814
9815 CHECK_CHARSET_GET_CHARSET (val, charset);
9816 ASET (initial, i, make_number (CHARSET_ID (charset)));
9817 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9818 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9819 }
9820 else
9821 ASET (initial, i, make_number (-1));
9822 }
9823
9824 reg_usage = args[coding_arg_iso2022_reg_usage];
9825 CHECK_CONS (reg_usage);
8f924df7
KH
9826 CHECK_NUMBER_CAR (reg_usage);
9827 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9828
9829 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9830 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9831 {
df7492f9 9832 int id;
8f924df7 9833 Lisp_Object tmp;
df7492f9
KH
9834
9835 val = Fcar (tail);
9836 CHECK_CONS (val);
8f924df7
KH
9837 tmp = XCAR (val);
9838 CHECK_CHARSET_GET_ID (tmp, id);
9839 CHECK_NATNUM_CDR (val);
df7492f9
KH
9840 if (XINT (XCDR (val)) >= 4)
9841 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9842 XSETCAR (val, make_number (id));
1397dc18 9843 }
4ed46869 9844
df7492f9
KH
9845 flags = args[coding_arg_iso2022_flags];
9846 CHECK_NATNUM (flags);
9847 i = XINT (flags);
9848 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9849 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9850
9851 ASET (attrs, coding_attr_iso_initial, initial);
9852 ASET (attrs, coding_attr_iso_usage, reg_usage);
9853 ASET (attrs, coding_attr_iso_request, request);
9854 ASET (attrs, coding_attr_iso_flags, flags);
9855 setup_iso_safe_charsets (attrs);
9856
9857 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9858 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9859 | CODING_ISO_FLAG_SINGLE_SHIFT))
9860 ? coding_category_iso_7_else
9861 : EQ (args[coding_arg_charset_list], Qiso_2022)
9862 ? coding_category_iso_7
9863 : coding_category_iso_7_tight);
9864 else
9865 {
9866 int id = XINT (AREF (initial, 1));
9867
c6fb6e98 9868 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9869 || EQ (args[coding_arg_charset_list], Qiso_2022)
9870 || id < 0)
9871 ? coding_category_iso_8_else
9872 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9873 ? coding_category_iso_8_1
9874 : coding_category_iso_8_2);
9875 }
0ce7886f
KH
9876 if (category != coding_category_iso_8_1
9877 && category != coding_category_iso_8_2)
9878 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9879 }
9880 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9881 {
df7492f9
KH
9882 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9883 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9884 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9885 category = coding_category_emacs_mule;
c28a9453 9886 }
df7492f9 9887 else if (EQ (coding_type, Qshift_jis))
c28a9453 9888 {
df7492f9
KH
9889
9890 struct charset *charset;
9891
7d64c6ad 9892 if (XINT (Flength (charset_list)) != 3
6e07c25f 9893 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9894 error ("There should be three or four charsets");
df7492f9
KH
9895
9896 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9897 if (CHARSET_DIMENSION (charset) != 1)
9898 error ("Dimension of charset %s is not one",
8f924df7 9899 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9900 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9901 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9902
9903 charset_list = XCDR (charset_list);
9904 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9905 if (CHARSET_DIMENSION (charset) != 1)
9906 error ("Dimension of charset %s is not one",
8f924df7 9907 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9908
9909 charset_list = XCDR (charset_list);
9910 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9911 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9912 error ("Dimension of charset %s is not two",
9913 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9914
9915 charset_list = XCDR (charset_list);
2b917a06
KH
9916 if (! NILP (charset_list))
9917 {
9918 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9919 if (CHARSET_DIMENSION (charset) != 2)
9920 error ("Dimension of charset %s is not two",
9921 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9922 }
df7492f9
KH
9923
9924 category = coding_category_sjis;
9925 Vsjis_coding_system = name;
c28a9453 9926 }
df7492f9
KH
9927 else if (EQ (coding_type, Qbig5))
9928 {
9929 struct charset *charset;
4ed46869 9930
df7492f9
KH
9931 if (XINT (Flength (charset_list)) != 2)
9932 error ("There should be just two charsets");
9933
9934 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9935 if (CHARSET_DIMENSION (charset) != 1)
9936 error ("Dimension of charset %s is not one",
8f924df7 9937 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9938 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9939 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9940
9941 charset_list = XCDR (charset_list);
9942 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9943 if (CHARSET_DIMENSION (charset) != 2)
9944 error ("Dimension of charset %s is not two",
8f924df7 9945 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9946
df7492f9
KH
9947 category = coding_category_big5;
9948 Vbig5_coding_system = name;
9949 }
9950 else if (EQ (coding_type, Qraw_text))
c28a9453 9951 {
584948ac
KH
9952 category = coding_category_raw_text;
9953 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9954 }
df7492f9 9955 else if (EQ (coding_type, Qutf_8))
4ed46869 9956 {
a470d443
KH
9957 Lisp_Object bom;
9958
584948ac 9959 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
9960
9961 if (nargs < coding_arg_utf8_max)
9962 goto short_args;
9963
9964 bom = args[coding_arg_utf8_bom];
9965 if (! NILP (bom) && ! EQ (bom, Qt))
9966 {
9967 CHECK_CONS (bom);
9968 val = XCAR (bom);
9969 CHECK_CODING_SYSTEM (val);
9970 val = XCDR (bom);
9971 CHECK_CODING_SYSTEM (val);
9972 }
9973 ASET (attrs, coding_attr_utf_bom, bom);
9974
9975 category = (CONSP (bom) ? coding_category_utf_8_auto
9976 : NILP (bom) ? coding_category_utf_8_nosig
9977 : coding_category_utf_8_sig);
4ed46869 9978 }
df7492f9
KH
9979 else if (EQ (coding_type, Qundecided))
9980 category = coding_category_undecided;
4ed46869 9981 else
df7492f9 9982 error ("Invalid coding system type: %s",
8f924df7 9983 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 9984
df7492f9 9985 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
9986 CODING_ATTR_PLIST (attrs)
9987 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9988 CODING_ATTR_PLIST (attrs)));
35befdaa 9989 CODING_ATTR_PLIST (attrs)
3ed051d4 9990 = Fcons (QCascii_compatible_p,
35befdaa
KH
9991 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9992 CODING_ATTR_PLIST (attrs)));
c4825358 9993
df7492f9
KH
9994 eol_type = args[coding_arg_eol_type];
9995 if (! NILP (eol_type)
9996 && ! EQ (eol_type, Qunix)
9997 && ! EQ (eol_type, Qdos)
9998 && ! EQ (eol_type, Qmac))
9999 error ("Invalid eol-type");
4ed46869 10000
df7492f9 10001 aliases = Fcons (name, Qnil);
4ed46869 10002
df7492f9
KH
10003 if (NILP (eol_type))
10004 {
10005 eol_type = make_subsidiaries (name);
10006 for (i = 0; i < 3; i++)
1397dc18 10007 {
df7492f9
KH
10008 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10009
10010 this_name = AREF (eol_type, i);
10011 this_aliases = Fcons (this_name, Qnil);
10012 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10013 this_spec = Fmake_vector (make_number (3), attrs);
10014 ASET (this_spec, 1, this_aliases);
10015 ASET (this_spec, 2, this_eol_type);
10016 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10017 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
10018 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10019 if (NILP (val))
10020 Vcoding_system_alist
10021 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10022 Vcoding_system_alist);
1397dc18 10023 }
d46c5b12 10024 }
4ed46869 10025
df7492f9
KH
10026 spec_vec = Fmake_vector (make_number (3), attrs);
10027 ASET (spec_vec, 1, aliases);
10028 ASET (spec_vec, 2, eol_type);
48b0f3ae 10029
df7492f9
KH
10030 Fputhash (name, spec_vec, Vcoding_system_hash_table);
10031 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
10032 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10033 if (NILP (val))
10034 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10035 Vcoding_system_alist);
48b0f3ae 10036
df7492f9
KH
10037 {
10038 int id = coding_categories[category].id;
48b0f3ae 10039
df7492f9
KH
10040 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10041 setup_coding_system (name, &coding_categories[category]);
10042 }
48b0f3ae 10043
d46c5b12 10044 return Qnil;
48b0f3ae 10045
df7492f9
KH
10046 short_args:
10047 return Fsignal (Qwrong_number_of_arguments,
10048 Fcons (intern ("define-coding-system-internal"),
10049 make_number (nargs)));
d46c5b12 10050}
4ed46869 10051
d6925f38 10052
a6f87d34
KH
10053DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10054 3, 3, 0,
10055 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
5842a27b 10056 (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
a6f87d34 10057{
3dbe7859 10058 Lisp_Object spec, attrs;
a6f87d34
KH
10059
10060 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10061 attrs = AREF (spec, 0);
10062 if (EQ (prop, QCmnemonic))
10063 {
10064 if (! STRINGP (val))
10065 CHECK_CHARACTER (val);
10066 CODING_ATTR_MNEMONIC (attrs) = val;
10067 }
2133e2d1 10068 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
10069 {
10070 if (NILP (val))
10071 val = make_number (' ');
10072 else
10073 CHECK_CHARACTER (val);
10074 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10075 }
10076 else if (EQ (prop, QCdecode_translation_table))
10077 {
10078 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10079 CHECK_SYMBOL (val);
10080 CODING_ATTR_DECODE_TBL (attrs) = val;
10081 }
10082 else if (EQ (prop, QCencode_translation_table))
10083 {
10084 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10085 CHECK_SYMBOL (val);
10086 CODING_ATTR_ENCODE_TBL (attrs) = val;
10087 }
10088 else if (EQ (prop, QCpost_read_conversion))
10089 {
10090 CHECK_SYMBOL (val);
10091 CODING_ATTR_POST_READ (attrs) = val;
10092 }
10093 else if (EQ (prop, QCpre_write_conversion))
10094 {
10095 CHECK_SYMBOL (val);
10096 CODING_ATTR_PRE_WRITE (attrs) = val;
10097 }
35befdaa
KH
10098 else if (EQ (prop, QCascii_compatible_p))
10099 {
10100 CODING_ATTR_ASCII_COMPAT (attrs) = val;
10101 }
a6f87d34
KH
10102
10103 CODING_ATTR_PLIST (attrs)
10104 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10105 return val;
10106}
10107
10108
df7492f9
KH
10109DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10110 Sdefine_coding_system_alias, 2, 2, 0,
10111 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
5842a27b 10112 (Lisp_Object alias, Lisp_Object coding_system)
66cfb530 10113{
583f71ca 10114 Lisp_Object spec, aliases, eol_type, val;
4ed46869 10115
df7492f9
KH
10116 CHECK_SYMBOL (alias);
10117 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10118 aliases = AREF (spec, 1);
d4a1d553 10119 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
10120 element is a base coding system. Append ALIAS at the tail of the
10121 list. */
df7492f9
KH
10122 while (!NILP (XCDR (aliases)))
10123 aliases = XCDR (aliases);
8f924df7 10124 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 10125
df7492f9
KH
10126 eol_type = AREF (spec, 2);
10127 if (VECTORP (eol_type))
4ed46869 10128 {
df7492f9
KH
10129 Lisp_Object subsidiaries;
10130 int i;
4ed46869 10131
df7492f9
KH
10132 subsidiaries = make_subsidiaries (alias);
10133 for (i = 0; i < 3; i++)
10134 Fdefine_coding_system_alias (AREF (subsidiaries, i),
10135 AREF (eol_type, i));
4ed46869 10136 }
df7492f9
KH
10137
10138 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 10139 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
10140 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10141 if (NILP (val))
10142 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10143 Vcoding_system_alist);
66cfb530 10144
4ed46869
KH
10145 return Qnil;
10146}
10147
df7492f9
KH
10148DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10149 1, 1, 0,
10150 doc: /* Return the base of CODING-SYSTEM.
da7db224 10151Any alias or subsidiary coding system is not a base coding system. */)
5842a27b 10152 (Lisp_Object coding_system)
d46c5b12 10153{
df7492f9 10154 Lisp_Object spec, attrs;
d46c5b12 10155
df7492f9
KH
10156 if (NILP (coding_system))
10157 return (Qno_conversion);
10158 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10159 attrs = AREF (spec, 0);
10160 return CODING_ATTR_BASE_NAME (attrs);
10161}
1397dc18 10162
df7492f9
KH
10163DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10164 1, 1, 0,
10165 doc: "Return the property list of CODING-SYSTEM.")
5842a27b 10166 (Lisp_Object coding_system)
df7492f9
KH
10167{
10168 Lisp_Object spec, attrs;
1397dc18 10169
df7492f9
KH
10170 if (NILP (coding_system))
10171 coding_system = Qno_conversion;
10172 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10173 attrs = AREF (spec, 0);
10174 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
10175}
10176
df7492f9
KH
10177
10178DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10179 1, 1, 0,
da7db224 10180 doc: /* Return the list of aliases of CODING-SYSTEM. */)
5842a27b 10181 (Lisp_Object coding_system)
66cfb530 10182{
df7492f9 10183 Lisp_Object spec;
84d60297 10184
df7492f9
KH
10185 if (NILP (coding_system))
10186 coding_system = Qno_conversion;
10187 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 10188 return AREF (spec, 1);
df7492f9 10189}
66cfb530 10190
df7492f9
KH
10191DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10192 Scoding_system_eol_type, 1, 1, 0,
10193 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 10194An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 10195
df7492f9
KH
10196Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10197and CR respectively.
66cfb530 10198
df7492f9
KH
10199A vector value indicates that a format of end-of-line should be
10200detected automatically. Nth element of the vector is the subsidiary
10201coding system whose eol-type is N. */)
5842a27b 10202 (Lisp_Object coding_system)
6b89e3aa 10203{
df7492f9
KH
10204 Lisp_Object spec, eol_type;
10205 int n;
6b89e3aa 10206
df7492f9
KH
10207 if (NILP (coding_system))
10208 coding_system = Qno_conversion;
10209 if (! CODING_SYSTEM_P (coding_system))
10210 return Qnil;
10211 spec = CODING_SYSTEM_SPEC (coding_system);
10212 eol_type = AREF (spec, 2);
10213 if (VECTORP (eol_type))
10214 return Fcopy_sequence (eol_type);
10215 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10216 return make_number (n);
6b89e3aa
KH
10217}
10218
4ed46869
KH
10219#endif /* emacs */
10220
10221\f
1397dc18 10222/*** 9. Post-amble ***/
4ed46869 10223
dfcf069d 10224void
971de7fb 10225init_coding_once (void)
4ed46869
KH
10226{
10227 int i;
10228
df7492f9
KH
10229 for (i = 0; i < coding_category_max; i++)
10230 {
10231 coding_categories[i].id = -1;
10232 coding_priorities[i] = i;
10233 }
4ed46869
KH
10234
10235 /* ISO2022 specific initialize routine. */
10236 for (i = 0; i < 0x20; i++)
b73bfc1c 10237 iso_code_class[i] = ISO_control_0;
4ed46869
KH
10238 for (i = 0x21; i < 0x7F; i++)
10239 iso_code_class[i] = ISO_graphic_plane_0;
10240 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 10241 iso_code_class[i] = ISO_control_1;
4ed46869
KH
10242 for (i = 0xA1; i < 0xFF; i++)
10243 iso_code_class[i] = ISO_graphic_plane_1;
10244 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10245 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
10246 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10247 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10248 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10249 iso_code_class[ISO_CODE_ESC] = ISO_escape;
10250 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10251 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10252 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10253
df7492f9
KH
10254 for (i = 0; i < 256; i++)
10255 {
10256 emacs_mule_bytes[i] = 1;
10257 }
7c78e542
KH
10258 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10259 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10260 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10261 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
10262}
10263
10264#ifdef emacs
10265
dfcf069d 10266void
971de7fb 10267syms_of_coding (void)
e0e989f6 10268{
df7492f9 10269 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
10270 {
10271 Lisp_Object args[2];
10272 args[0] = QCtest;
10273 args[1] = Qeq;
10274 Vcoding_system_hash_table = Fmake_hash_table (2, args);
10275 }
df7492f9
KH
10276
10277 staticpro (&Vsjis_coding_system);
10278 Vsjis_coding_system = Qnil;
e0e989f6 10279
df7492f9
KH
10280 staticpro (&Vbig5_coding_system);
10281 Vbig5_coding_system = Qnil;
10282
24a73b0a
KH
10283 staticpro (&Vcode_conversion_reused_workbuf);
10284 Vcode_conversion_reused_workbuf = Qnil;
10285
10286 staticpro (&Vcode_conversion_workbuf_name);
d67b4f80 10287 Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
e0e989f6 10288
24a73b0a 10289 reused_workbuf_in_use = 0;
df7492f9
KH
10290
10291 DEFSYM (Qcharset, "charset");
10292 DEFSYM (Qtarget_idx, "target-idx");
10293 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
10294 Fset (Qcoding_system_history, Qnil);
10295
9ce27fde 10296 /* Target FILENAME is the first argument. */
e0e989f6 10297 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10298 /* Target FILENAME is the third argument. */
e0e989f6
KH
10299 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10300
df7492f9 10301 DEFSYM (Qcall_process, "call-process");
9ce27fde 10302 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10303 Fput (Qcall_process, Qtarget_idx, make_number (0));
10304
df7492f9 10305 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10306 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10307 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10308
df7492f9 10309 DEFSYM (Qstart_process, "start-process");
9ce27fde 10310 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10311 Fput (Qstart_process, Qtarget_idx, make_number (2));
10312
df7492f9 10313 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10314 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10315 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10316
df7492f9
KH
10317 DEFSYM (Qcoding_system, "coding-system");
10318 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10319
df7492f9
KH
10320 DEFSYM (Qeol_type, "eol-type");
10321 DEFSYM (Qunix, "unix");
10322 DEFSYM (Qdos, "dos");
4ed46869 10323
df7492f9
KH
10324 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10325 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10326 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10327 DEFSYM (Qdefault_char, "default-char");
10328 DEFSYM (Qundecided, "undecided");
10329 DEFSYM (Qno_conversion, "no-conversion");
10330 DEFSYM (Qraw_text, "raw-text");
4ed46869 10331
df7492f9 10332 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10333
df7492f9 10334 DEFSYM (Qutf_8, "utf-8");
8f924df7 10335 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10336
df7492f9 10337 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10338 DEFSYM (Qbig, "big");
10339 DEFSYM (Qlittle, "little");
27901516 10340
df7492f9
KH
10341 DEFSYM (Qshift_jis, "shift-jis");
10342 DEFSYM (Qbig5, "big5");
4ed46869 10343
df7492f9 10344 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10345
df7492f9 10346 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869 10347 Fput (Qcoding_system_error, Qerror_conditions,
d67b4f80 10348 pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
4ed46869 10349 Fput (Qcoding_system_error, Qerror_message,
d67b4f80 10350 make_pure_c_string ("Invalid coding system"));
4ed46869 10351
05e6f5dc
KH
10352 /* Intern this now in case it isn't already done.
10353 Setting this variable twice is harmless.
10354 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 10355 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
70c22245 10356
df7492f9 10357 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10358 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10359 DEFSYM (Qtranslation_table_id, "translation-table-id");
10360 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10361 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10362
df7492f9 10363 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10364
df7492f9 10365 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10366
01378f49 10367 DEFSYM (QCcategory, ":category");
a6f87d34 10368 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10369 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10370 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10371 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10372 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10373 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10374 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10375
df7492f9
KH
10376 Vcoding_category_table
10377 = Fmake_vector (make_number (coding_category_max), Qnil);
10378 staticpro (&Vcoding_category_table);
10379 /* Followings are target of code detection. */
10380 ASET (Vcoding_category_table, coding_category_iso_7,
d67b4f80 10381 intern_c_string ("coding-category-iso-7"));
df7492f9 10382 ASET (Vcoding_category_table, coding_category_iso_7_tight,
d67b4f80 10383 intern_c_string ("coding-category-iso-7-tight"));
df7492f9 10384 ASET (Vcoding_category_table, coding_category_iso_8_1,
d67b4f80 10385 intern_c_string ("coding-category-iso-8-1"));
df7492f9 10386 ASET (Vcoding_category_table, coding_category_iso_8_2,
d67b4f80 10387 intern_c_string ("coding-category-iso-8-2"));
df7492f9 10388 ASET (Vcoding_category_table, coding_category_iso_7_else,
d67b4f80 10389 intern_c_string ("coding-category-iso-7-else"));
df7492f9 10390 ASET (Vcoding_category_table, coding_category_iso_8_else,
d67b4f80 10391 intern_c_string ("coding-category-iso-8-else"));
a470d443 10392 ASET (Vcoding_category_table, coding_category_utf_8_auto,
d67b4f80 10393 intern_c_string ("coding-category-utf-8-auto"));
a470d443 10394 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
d67b4f80 10395 intern_c_string ("coding-category-utf-8"));
a470d443 10396 ASET (Vcoding_category_table, coding_category_utf_8_sig,
d67b4f80 10397 intern_c_string ("coding-category-utf-8-sig"));
df7492f9 10398 ASET (Vcoding_category_table, coding_category_utf_16_be,
d67b4f80 10399 intern_c_string ("coding-category-utf-16-be"));
ff563fce 10400 ASET (Vcoding_category_table, coding_category_utf_16_auto,
d67b4f80 10401 intern_c_string ("coding-category-utf-16-auto"));
df7492f9 10402 ASET (Vcoding_category_table, coding_category_utf_16_le,
d67b4f80 10403 intern_c_string ("coding-category-utf-16-le"));
df7492f9 10404 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
d67b4f80 10405 intern_c_string ("coding-category-utf-16-be-nosig"));
df7492f9 10406 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
d67b4f80 10407 intern_c_string ("coding-category-utf-16-le-nosig"));
df7492f9 10408 ASET (Vcoding_category_table, coding_category_charset,
d67b4f80 10409 intern_c_string ("coding-category-charset"));
df7492f9 10410 ASET (Vcoding_category_table, coding_category_sjis,
d67b4f80 10411 intern_c_string ("coding-category-sjis"));
df7492f9 10412 ASET (Vcoding_category_table, coding_category_big5,
d67b4f80 10413 intern_c_string ("coding-category-big5"));
df7492f9 10414 ASET (Vcoding_category_table, coding_category_ccl,
d67b4f80 10415 intern_c_string ("coding-category-ccl"));
df7492f9 10416 ASET (Vcoding_category_table, coding_category_emacs_mule,
d67b4f80 10417 intern_c_string ("coding-category-emacs-mule"));
df7492f9
KH
10418 /* Followings are NOT target of code detection. */
10419 ASET (Vcoding_category_table, coding_category_raw_text,
d67b4f80 10420 intern_c_string ("coding-category-raw-text"));
df7492f9 10421 ASET (Vcoding_category_table, coding_category_undecided,
d67b4f80 10422 intern_c_string ("coding-category-undecided"));
ecf488bc 10423
065e3595
KH
10424 DEFSYM (Qinsufficient_source, "insufficient-source");
10425 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10426 DEFSYM (Qinvalid_source, "invalid-source");
10427 DEFSYM (Qinterrupted, "interrupted");
10428 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10429 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10430
4ed46869
KH
10431 defsubr (&Scoding_system_p);
10432 defsubr (&Sread_coding_system);
10433 defsubr (&Sread_non_nil_coding_system);
10434 defsubr (&Scheck_coding_system);
10435 defsubr (&Sdetect_coding_region);
d46c5b12 10436 defsubr (&Sdetect_coding_string);
05e6f5dc 10437 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10438 defsubr (&Sunencodable_char_position);
df7492f9 10439 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10440 defsubr (&Sdecode_coding_region);
10441 defsubr (&Sencode_coding_region);
10442 defsubr (&Sdecode_coding_string);
10443 defsubr (&Sencode_coding_string);
10444 defsubr (&Sdecode_sjis_char);
10445 defsubr (&Sencode_sjis_char);
10446 defsubr (&Sdecode_big5_char);
10447 defsubr (&Sencode_big5_char);
1ba9e4ab 10448 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10449 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10450 defsubr (&Sterminal_coding_system);
1ba9e4ab 10451 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10452 defsubr (&Skeyboard_coding_system);
a5d301df 10453 defsubr (&Sfind_operation_coding_system);
df7492f9 10454 defsubr (&Sset_coding_system_priority);
6b89e3aa 10455 defsubr (&Sdefine_coding_system_internal);
df7492f9 10456 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10457 defsubr (&Scoding_system_put);
df7492f9
KH
10458 defsubr (&Scoding_system_base);
10459 defsubr (&Scoding_system_plist);
10460 defsubr (&Scoding_system_aliases);
10461 defsubr (&Scoding_system_eol_type);
10462 defsubr (&Scoding_system_priority_list);
4ed46869 10463
29208e82 10464 DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
48b0f3ae
PJ
10465 doc: /* List of coding systems.
10466
10467Do not alter the value of this variable manually. This variable should be
df7492f9 10468updated by the functions `define-coding-system' and
48b0f3ae 10469`define-coding-system-alias'. */);
4608c386
KH
10470 Vcoding_system_list = Qnil;
10471
29208e82 10472 DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
48b0f3ae
PJ
10473 doc: /* Alist of coding system names.
10474Each element is one element list of coding system name.
446dcd75 10475This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10476
10477Do not alter the value of this variable manually. This variable should be
10478updated by the functions `make-coding-system' and
10479`define-coding-system-alias'. */);
4608c386
KH
10480 Vcoding_system_alist = Qnil;
10481
29208e82 10482 DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
48b0f3ae
PJ
10483 doc: /* List of coding-categories (symbols) ordered by priority.
10484
10485On detecting a coding system, Emacs tries code detection algorithms
10486associated with each coding-category one by one in this order. When
10487one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10488system bound to the corresponding coding-category is selected.
10489
448e17d6 10490Don't modify this variable directly, but use `set-coding-system-priority'. */);
4ed46869
KH
10491 {
10492 int i;
10493
10494 Vcoding_category_list = Qnil;
df7492f9 10495 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10496 Vcoding_category_list
d46c5b12
KH
10497 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10498 Vcoding_category_list);
4ed46869
KH
10499 }
10500
29208e82 10501 DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
48b0f3ae
PJ
10502 doc: /* Specify the coding system for read operations.
10503It is useful to bind this variable with `let', but do not set it globally.
10504If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10505If not, an appropriate element is used from one of the coding system alists.
10506There are three such tables: `file-coding-system-alist',
48b0f3ae 10507`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10508 Vcoding_system_for_read = Qnil;
10509
29208e82 10510 DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
48b0f3ae
PJ
10511 doc: /* Specify the coding system for write operations.
10512Programs bind this variable with `let', but you should not set it globally.
10513If the value is a coding system, it is used for encoding of output,
10514when writing it to a file and when sending it to a file or subprocess.
10515
10516If this does not specify a coding system, an appropriate element
446dcd75
JB
10517is used from one of the coding system alists.
10518There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10519`process-coding-system-alist', and `network-coding-system-alist'.
10520For output to files, if the above procedure does not specify a coding system,
10521the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10522 Vcoding_system_for_write = Qnil;
10523
29208e82 10524 DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
df7492f9
KH
10525 doc: /*
10526Coding system used in the latest file or process I/O. */);
4ed46869
KH
10527 Vlast_coding_system_used = Qnil;
10528
29208e82 10529 DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
065e3595
KH
10530 doc: /*
10531Error status of the last code conversion.
10532
10533When an error was detected in the last code conversion, this variable
10534is set to one of the following symbols.
10535 `insufficient-source'
10536 `inconsistent-eol'
10537 `invalid-source'
10538 `interrupted'
10539 `insufficient-memory'
10540When no error was detected, the value doesn't change. So, to check
10541the error status of a code conversion by this variable, you must
10542explicitly set this variable to nil before performing code
10543conversion. */);
10544 Vlast_code_conversion_error = Qnil;
10545
29208e82 10546 DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
df7492f9
KH
10547 doc: /*
10548*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10549See info node `Coding Systems' and info node `Text and Binary' concerning
10550such conversion. */);
9ce27fde
KH
10551 inhibit_eol_conversion = 0;
10552
29208e82 10553 DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
df7492f9
KH
10554 doc: /*
10555Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10556Bind it to t if the process output is to be treated as if it were a file
10557read from some filesystem. */);
ed29121d
EZ
10558 inherit_process_coding_system = 0;
10559
29208e82 10560 DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
df7492f9
KH
10561 doc: /*
10562Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10563The format is ((PATTERN . VAL) ...),
10564where PATTERN is a regular expression matching a file name,
10565VAL is a coding system, a cons of coding systems, or a function symbol.
10566If VAL is a coding system, it is used for both decoding and encoding
10567the file contents.
10568If VAL is a cons of coding systems, the car part is used for decoding,
10569and the cdr part is used for encoding.
10570If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10571or a cons of coding systems which are used as above. The function is
10572called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10573`find-operation-coding-system' was called. If the function can't decide
10574a coding system, it can return `undecided' so that the normal
10575code-detection is performed.
48b0f3ae
PJ
10576
10577See also the function `find-operation-coding-system'
10578and the variable `auto-coding-alist'. */);
02ba4723
KH
10579 Vfile_coding_system_alist = Qnil;
10580
29208e82 10581 DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
df7492f9
KH
10582 doc: /*
10583Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10584The format is ((PATTERN . VAL) ...),
10585where PATTERN is a regular expression matching a program name,
10586VAL is a coding system, a cons of coding systems, or a function symbol.
10587If VAL is a coding system, it is used for both decoding what received
10588from the program and encoding what sent to the program.
10589If VAL is a cons of coding systems, the car part is used for decoding,
10590and the cdr part is used for encoding.
10591If VAL is a function symbol, the function must return a coding system
10592or a cons of coding systems which are used as above.
10593
10594See also the function `find-operation-coding-system'. */);
02ba4723
KH
10595 Vprocess_coding_system_alist = Qnil;
10596
29208e82 10597 DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
df7492f9
KH
10598 doc: /*
10599Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10600The format is ((PATTERN . VAL) ...),
10601where PATTERN is a regular expression matching a network service name
10602or is a port number to connect to,
10603VAL is a coding system, a cons of coding systems, or a function symbol.
10604If VAL is a coding system, it is used for both decoding what received
10605from the network stream and encoding what sent to the network stream.
10606If VAL is a cons of coding systems, the car part is used for decoding,
10607and the cdr part is used for encoding.
10608If VAL is a function symbol, the function must return a coding system
10609or a cons of coding systems which are used as above.
10610
10611See also the function `find-operation-coding-system'. */);
02ba4723 10612 Vnetwork_coding_system_alist = Qnil;
4ed46869 10613
29208e82 10614 DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
75205970
RS
10615 doc: /* Coding system to use with system messages.
10616Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10617 Vlocale_coding_system = Qnil;
10618
005f0d35 10619 /* The eol mnemonics are reset in startup.el system-dependently. */
29208e82 10620 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
df7492f9
KH
10621 doc: /*
10622*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
d67b4f80 10623 eol_mnemonic_unix = make_pure_c_string (":");
4ed46869 10624
29208e82 10625 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
df7492f9
KH
10626 doc: /*
10627*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
d67b4f80 10628 eol_mnemonic_dos = make_pure_c_string ("\\");
4ed46869 10629
29208e82 10630 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
df7492f9
KH
10631 doc: /*
10632*String displayed in mode line for MAC-like (CR) end-of-line format. */);
d67b4f80 10633 eol_mnemonic_mac = make_pure_c_string ("/");
4ed46869 10634
29208e82 10635 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
df7492f9
KH
10636 doc: /*
10637*String displayed in mode line when end-of-line format is not yet determined. */);
d67b4f80 10638 eol_mnemonic_undecided = make_pure_c_string (":");
4ed46869 10639
29208e82 10640 DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
df7492f9
KH
10641 doc: /*
10642*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10643 Venable_character_translation = Qt;
bdd9fb48 10644
f967223b 10645 DEFVAR_LISP ("standard-translation-table-for-decode",
29208e82 10646 Vstandard_translation_table_for_decode,
48b0f3ae 10647 doc: /* Table for translating characters while decoding. */);
f967223b 10648 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10649
f967223b 10650 DEFVAR_LISP ("standard-translation-table-for-encode",
29208e82 10651 Vstandard_translation_table_for_encode,
48b0f3ae 10652 doc: /* Table for translating characters while encoding. */);
f967223b 10653 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10654
29208e82 10655 DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
48b0f3ae
PJ
10656 doc: /* Alist of charsets vs revision numbers.
10657While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10658designate it with the escape sequence identifying revision (cdr part
10659of the element). */);
10660 Vcharset_revision_table = Qnil;
02ba4723
KH
10661
10662 DEFVAR_LISP ("default-process-coding-system",
29208e82 10663 Vdefault_process_coding_system,
48b0f3ae
PJ
10664 doc: /* Cons of coding systems used for process I/O by default.
10665The car part is used for decoding a process output,
10666the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10667 Vdefault_process_coding_system = Qnil;
c4825358 10668
29208e82 10669 DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
df7492f9
KH
10670 doc: /*
10671Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10672This is a vector of length 256.
10673If Nth element is non-nil, the existence of code N in a file
10674\(or output of subprocess) doesn't prevent it to be detected as
10675a coding system of ISO 2022 variant which has a flag
10676`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10677or reading output of a subprocess.
446dcd75 10678Only 128th through 159th elements have a meaning. */);
3f003981 10679 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10680
10681 DEFVAR_LISP ("select-safe-coding-system-function",
29208e82 10682 Vselect_safe_coding_system_function,
df7492f9
KH
10683 doc: /*
10684Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10685
10686If set, this function is called to force a user to select a proper
10687coding system which can encode the text in the case that a default
fdecf907
GM
10688coding system used in each operation can't encode the text. The
10689function should take care that the buffer is not modified while
10690the coding system is being selected.
48b0f3ae
PJ
10691
10692The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10693 Vselect_safe_coding_system_function = Qnil;
10694
5d5bf4d8 10695 DEFVAR_BOOL ("coding-system-require-warning",
29208e82 10696 coding_system_require_warning,
5d5bf4d8 10697 doc: /* Internal use only.
6b89e3aa
KH
10698If non-nil, on writing a file, `select-safe-coding-system-function' is
10699called even if `coding-system-for-write' is non-nil. The command
10700`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10701 coding_system_require_warning = 0;
10702
10703
22ab2303 10704 DEFVAR_BOOL ("inhibit-iso-escape-detection",
29208e82 10705 inhibit_iso_escape_detection,
df7492f9 10706 doc: /*
97b1b294 10707If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10708
97b1b294
EZ
10709When Emacs reads text, it tries to detect how the text is encoded.
10710This code detection is sensitive to escape sequences. If Emacs sees
10711a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10712of the ISO2022 encodings, and decodes text by the corresponding coding
10713system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10714
10715However, there may be a case that you want to read escape sequences in
10716a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10717Then the code detection will ignore any escape sequences, and no text is
10718detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10719escape sequences become visible in a buffer.
10720
10721The default value is nil, and it is strongly recommended not to change
10722it. That is because many Emacs Lisp source files that contain
10723non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10724in Emacs's distribution, and they won't be decoded correctly on
10725reading if you suppress escape sequence detection.
10726
10727The other way to read escape sequences in a file without decoding is
97b1b294 10728to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10729escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10730 inhibit_iso_escape_detection = 0;
002fdb44 10731
97b1b294 10732 DEFVAR_BOOL ("inhibit-null-byte-detection",
29208e82 10733 inhibit_null_byte_detection,
97b1b294
EZ
10734 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10735By default, Emacs treats it as binary data, and does not attempt to
10736decode it. The effect is as if you specified `no-conversion' for
10737reading that text.
10738
10739Set this to non-nil when a regular text happens to include null bytes.
10740Examples are Index nodes of Info files and null-byte delimited output
10741from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10742decode text as usual. */);
10743 inhibit_null_byte_detection = 0;
10744
29208e82 10745 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
15c8f9d1 10746 doc: /* Char table for translating self-inserting characters.
446dcd75 10747This is applied to the result of input methods, not their input.
8434d0b8
EZ
10748See also `keyboard-translate-table'.
10749
10750Use of this variable for character code unification was rendered
10751obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10752internal character representation. */);
002fdb44 10753 Vtranslation_table_for_input = Qnil;
8f924df7 10754
2c78b7e1
KH
10755 {
10756 Lisp_Object args[coding_arg_max];
8f924df7 10757 Lisp_Object plist[16];
2c78b7e1
KH
10758 int i;
10759
10760 for (i = 0; i < coding_arg_max; i++)
10761 args[i] = Qnil;
10762
d67b4f80 10763 plist[0] = intern_c_string (":name");
2c78b7e1 10764 plist[1] = args[coding_arg_name] = Qno_conversion;
d67b4f80 10765 plist[2] = intern_c_string (":mnemonic");
2c78b7e1 10766 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
d67b4f80 10767 plist[4] = intern_c_string (":coding-type");
2c78b7e1 10768 plist[5] = args[coding_arg_coding_type] = Qraw_text;
d67b4f80 10769 plist[6] = intern_c_string (":ascii-compatible-p");
2c78b7e1 10770 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
d67b4f80 10771 plist[8] = intern_c_string (":default-char");
2c78b7e1 10772 plist[9] = args[coding_arg_default_char] = make_number (0);
d67b4f80 10773 plist[10] = intern_c_string (":for-unibyte");
8f924df7 10774 plist[11] = args[coding_arg_for_unibyte] = Qt;
d67b4f80
DN
10775 plist[12] = intern_c_string (":docstring");
10776 plist[13] = make_pure_c_string ("Do no conversion.\n\
2c78b7e1
KH
10777\n\
10778When you visit a file with this coding, the file is read into a\n\
10779unibyte buffer as is, thus each byte of a file is treated as a\n\
10780character.");
d67b4f80 10781 plist[14] = intern_c_string (":eol-type");
8f924df7
KH
10782 plist[15] = args[coding_arg_eol_type] = Qunix;
10783 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10784 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10785
10786 plist[1] = args[coding_arg_name] = Qundecided;
10787 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10788 plist[5] = args[coding_arg_coding_type] = Qundecided;
10789 /* This is already set.
35befdaa 10790 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
d67b4f80 10791 plist[8] = intern_c_string (":charset-list");
ae6f73fa
KH
10792 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10793 plist[11] = args[coding_arg_for_unibyte] = Qnil;
d67b4f80 10794 plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
ae6f73fa
KH
10795 plist[15] = args[coding_arg_eol_type] = Qnil;
10796 args[coding_arg_plist] = Flist (16, plist);
10797 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10798 }
10799
2c78b7e1 10800 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10801
10802 {
10803 int i;
10804
10805 for (i = 0; i < coding_category_max; i++)
10806 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10807 }
1a4990fb 10808#if defined (DOS_NT)
fcbcfb64
KH
10809 system_eol_type = Qdos;
10810#else
10811 system_eol_type = Qunix;
10812#endif
10813 staticpro (&system_eol_type);
4ed46869
KH
10814}
10815
68c45bf0 10816char *
971de7fb 10817emacs_strerror (int error_number)
68c45bf0
PE
10818{
10819 char *str;
10820
ca9c0567 10821 synchronize_system_messages_locale ();
68c45bf0
PE
10822 str = strerror (error_number);
10823
10824 if (! NILP (Vlocale_coding_system))
10825 {
10826 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10827 Vlocale_coding_system,
10828 0);
51b59d79 10829 str = SSDATA (dec);
68c45bf0
PE
10830 }
10831
10832 return str;
10833}
10834
4ed46869 10835#endif /* emacs */