(decode_coding_object): Revert part of last change.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
8cabe764 3 2006, 2007, 2008 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
8cabe764 5 2005, 2006, 2007, 2008
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
14GNU Emacs is free software; you can redistribute it and/or modify
15it under the terms of the GNU General Public License as published by
1427aa65 16the Free Software Foundation; either version 3, or (at your option)
369314dc 17any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc
KH
24You should have received a copy of the GNU General Public License
25along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
26the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA. */
4ed46869
KH
28
29/*** TABLE OF CONTENTS ***
30
b73bfc1c 31 0. General comments
4ed46869 32 1. Preamble
df7492f9
KH
33 2. Emacs' internal format (emacs-utf-8) handlers
34 3. UTF-8 handlers
35 4. UTF-16 handlers
36 5. Charset-base coding systems handlers
37 6. emacs-mule (old Emacs' internal format) handlers
38 7. ISO2022 handlers
39 8. Shift-JIS and BIG5 handlers
40 9. CCL handlers
41 10. C library functions
42 11. Emacs Lisp library functions
43 12. Postamble
4ed46869
KH
44
45*/
46
df7492f9 47/*** 0. General comments ***
b73bfc1c
KH
48
49
df7492f9 50CODING SYSTEM
4ed46869 51
5bad0796
DL
52 A coding system is an object for an encoding mechanism that contains
53 information about how to convert byte sequences to character
e19c3639
KH
54 sequences and vice versa. When we say "decode", it means converting
55 a byte sequence of a specific coding system into a character
56 sequence that is represented by Emacs' internal coding system
57 `emacs-utf-8', and when we say "encode", it means converting a
58 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 59 coding system.
4ed46869 60
e19c3639
KH
61 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
62 C level, a coding system is represented by a vector of attributes
5bad0796 63 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
64 coding system symbol to attributes vector is done by looking up
65 Vcharset_hash_table by the symbol.
4ed46869 66
e19c3639 67 Coding systems are classified into the following types depending on
5bad0796 68 the encoding mechanism. Here's a brief description of the types.
4ed46869 69
df7492f9
KH
70 o UTF-8
71
72 o UTF-16
73
74 o Charset-base coding system
75
76 A coding system defined by one or more (coded) character sets.
5bad0796 77 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
78 character set.
79
5bad0796 80 o Old Emacs internal format (emacs-mule)
df7492f9 81
5bad0796 82 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 83
df7492f9 84 o ISO2022-base coding system
4ed46869
KH
85
86 The most famous coding system for multiple character sets. X's
df7492f9
KH
87 Compound Text, various EUCs (Extended Unix Code), and coding systems
88 used in the Internet communication such as ISO-2022-JP are all
89 variants of ISO2022.
4ed46869 90
df7492f9 91 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 92
4ed46869
KH
93 A coding system to encode character sets: ASCII, JISX0201, and
94 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 95 section 8.
4ed46869 96
df7492f9 97 o BIG5
4ed46869 98
df7492f9 99 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 100 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
101 described in section 8. In this file, when we write "big5" (all
102 lowercase), we mean the coding system, and when we write "Big5"
103 (capitalized), we mean the character set.
4ed46869 104
df7492f9 105 o CCL
27901516 106
5bad0796 107 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
108 not listed above, he can supply a decoder and an encoder for it in
109 CCL (Code Conversion Language) programs. Emacs executes the CCL
110 program while decoding/encoding.
27901516 111
df7492f9 112 o Raw-text
4ed46869 113
5a936b46 114 A coding system for text containing raw eight-bit data. Emacs
5bad0796 115 treats each byte of source text as a character (except for
df7492f9 116 end-of-line conversion).
4ed46869 117
df7492f9
KH
118 o No-conversion
119
120 Like raw text, but don't do end-of-line conversion.
4ed46869 121
4ed46869 122
df7492f9 123END-OF-LINE FORMAT
4ed46869 124
5bad0796 125 How text end-of-line is encoded depends on operating system. For
df7492f9 126 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 127 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
128 `line-feed' codes. MacOS's format is usually one byte of
129 `carriage-return'.
4ed46869 130
cfb43547 131 Since text character encoding and end-of-line encoding are
df7492f9
KH
132 independent, any coding system described above can take any format
133 of end-of-line (except for no-conversion).
4ed46869 134
e19c3639
KH
135STRUCT CODING_SYSTEM
136
137 Before using a coding system for code conversion (i.e. decoding and
138 encoding), we setup a structure of type `struct coding_system'.
139 This structure keeps various information about a specific code
5bad0796 140 conversion (e.g. the location of source and destination data).
4ed46869
KH
141
142*/
143
df7492f9
KH
144/* COMMON MACROS */
145
146
4ed46869
KH
147/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
148
df7492f9 149 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
150 CODING conforms to the format of XXX, and update the members of
151 DETECT_INFO.
df7492f9 152
ff0dacd7 153 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9 158static int
ff0dacd7 159detect_coding_XXX (coding, detect_info)
df7492f9 160 struct coding_system *coding;
ff0dacd7 161 struct coding_detection_info *detect_info;
4ed46869 162{
f1d34bca
MB
163 const unsigned char *src = coding->source;
164 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 165 int multibytep = coding->src_multibyte;
ff0dacd7 166 int consumed_chars = 0;
df7492f9
KH
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
ff0dacd7
KH
175
176 if (! __C_conforms_to_XXX___ (c))
177 break;
178 if (! __C_strongly_suggests_XXX__ (c))
179 found = CATEGORY_MASK_XXX;
df7492f9 180 }
ff0dacd7
KH
181 /* The byte sequence is invalid for XXX. */
182 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 183 return 0;
ff0dacd7 184
df7492f9 185 no_more_source:
ff0dacd7
KH
186 /* The source exausted successfully. */
187 detect_info->found |= found;
df7492f9 188 return 1;
4ed46869
KH
189}
190#endif
191
192/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193
df7492f9
KH
194 These functions decode a byte sequence specified as a source by
195 CODING. The resulting multibyte text goes to a place pointed to by
196 CODING->charbuf, the length of which should not exceed
197 CODING->charbuf_size;
d46c5b12 198
df7492f9
KH
199 These functions set the information of original and decoded texts in
200 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
201 They also set CODING->result to one of CODING_RESULT_XXX indicating
202 how the decoding is finished.
d46c5b12 203
df7492f9 204 Below is the template of these functions. */
d46c5b12 205
4ed46869 206#if 0
b73bfc1c 207static void
df7492f9 208decode_coding_XXXX (coding)
4ed46869 209 struct coding_system *coding;
4ed46869 210{
f1d34bca
MB
211 const unsigned char *src = coding->source + coding->consumed;
212 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
213 /* SRC_BASE remembers the start position in source in each loop.
214 The loop will be exited when there's not enough source code, or
215 when there's no room in CHARBUF for a decoded character. */
f1d34bca 216 const unsigned char *src_base;
df7492f9 217 /* A buffer to produce decoded characters. */
69a80ea3
KH
218 int *charbuf = coding->charbuf + coding->charbuf_used;
219 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
220 int multibytep = coding->src_multibyte;
221
222 while (1)
223 {
224 src_base = src;
225 if (charbuf < charbuf_end)
226 /* No more room to produce a decoded character. */
227 break;
228 ONE_MORE_BYTE (c);
229 /* Decode it. */
230 }
231
232 no_more_source:
233 if (src_base < src_end
234 && coding->mode & CODING_MODE_LAST_BLOCK)
235 /* If the source ends by partial bytes to construct a character,
236 treat them as eight-bit raw data. */
237 while (src_base < src_end && charbuf < charbuf_end)
238 *charbuf++ = *src_base++;
239 /* Remember how many bytes and characters we consumed. If the
240 source is multibyte, the bytes and chars are not identical. */
241 coding->consumed = coding->consumed_char = src_base - coding->source;
242 /* Remember how many characters we produced. */
243 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
244}
245#endif
246
247/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248
df7492f9
KH
249 These functions encode SRC_BYTES length text at SOURCE of Emacs'
250 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
251 goes to a place pointed to by DESTINATION, the length of which
252 should not exceed DST_BYTES.
d46c5b12 253
df7492f9
KH
254 These functions set the information of original and encoded texts in
255 the members produced, produced_char, consumed, and consumed_char of
256 the structure *CODING. They also set the member result to one of
257 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 258
df7492f9
KH
259 DST_BYTES zero means that source area and destination area are
260 overlapped, which means that we can produce a encoded text until it
261 reaches at the head of not-yet-encoded source text.
d46c5b12 262
df7492f9 263 Below is a template of these functions. */
4ed46869 264#if 0
b73bfc1c 265static void
df7492f9 266encode_coding_XXX (coding)
4ed46869 267 struct coding_system *coding;
4ed46869 268{
df7492f9
KH
269 int multibytep = coding->dst_multibyte;
270 int *charbuf = coding->charbuf;
271 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
272 unsigned char *dst = coding->destination + coding->produced;
273 unsigned char *dst_end = coding->destination + coding->dst_bytes;
274 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
275 int produced_chars = 0;
276
277 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
278 {
279 int c = *charbuf;
280 /* Encode C into DST, and increment DST. */
281 }
282 label_no_more_destination:
283 /* How many chars and bytes we produced. */
284 coding->produced_char += produced_chars;
285 coding->produced = dst - coding->destination;
4ed46869
KH
286}
287#endif
288
4ed46869
KH
289\f
290/*** 1. Preamble ***/
291
68c45bf0 292#include <config.h>
4ed46869
KH
293#include <stdio.h>
294
4ed46869
KH
295#include "lisp.h"
296#include "buffer.h"
df7492f9 297#include "character.h"
4ed46869
KH
298#include "charset.h"
299#include "ccl.h"
df7492f9 300#include "composite.h"
4ed46869
KH
301#include "coding.h"
302#include "window.h"
b8299c66
KL
303#include "frame.h"
304#include "termhooks.h"
4ed46869 305
df7492f9 306Lisp_Object Vcoding_system_hash_table;
4ed46869 307
df7492f9 308Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
309Lisp_Object Qunix, Qdos;
310extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
311Lisp_Object Qbuffer_file_coding_system;
312Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 313Lisp_Object Qdefault_char;
27901516 314Lisp_Object Qno_conversion, Qundecided;
df7492f9 315Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 316Lisp_Object Qbig, Qlittle;
bb0115a2 317Lisp_Object Qcoding_system_history;
1397dc18 318Lisp_Object Qvalid_codes;
a6f87d34
KH
319Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
320Lisp_Object QCdecode_translation_table, QCencode_translation_table;
321Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 322Lisp_Object QCascii_compatible_p;
4ed46869
KH
323
324extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 325Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
326Lisp_Object Qstart_process, Qopen_network_stream;
327Lisp_Object Qtarget_idx;
328
065e3595
KH
329Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
330Lisp_Object Qinterrupted, Qinsufficient_memory;
331
c7183fb8
GM
332extern Lisp_Object Qcompletion_ignore_case;
333
44e8490d
KH
334/* If a symbol has this property, evaluate the value to define the
335 symbol as a coding system. */
336static Lisp_Object Qcoding_system_define_form;
337
5d5bf4d8
KH
338int coding_system_require_warning;
339
d46c5b12
KH
340Lisp_Object Vselect_safe_coding_system_function;
341
7722baf9
EZ
342/* Mnemonic string for each format of end-of-line. */
343Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
344/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 345 decided. */
7722baf9 346Lisp_Object eol_mnemonic_undecided;
4ed46869 347
fcbcfb64
KH
348/* Format of end-of-line decided by system. This is Qunix on
349 Unix and Mac, Qdos on DOS/Windows.
350 This has an effect only for external encoding (i.e. for output to
351 file and process), not for in-buffer or Lisp string encoding. */
352static Lisp_Object system_eol_type;
353
4ed46869
KH
354#ifdef emacs
355
4608c386
KH
356Lisp_Object Vcoding_system_list, Vcoding_system_alist;
357
358Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 359
d46c5b12
KH
360/* Coding system emacs-mule and raw-text are for converting only
361 end-of-line format. */
362Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 363Lisp_Object Qutf_8_emacs;
ecf488bc 364
4ed46869
KH
365/* Coding-systems are handed between Emacs Lisp programs and C internal
366 routines by the following three variables. */
367/* Coding-system for reading files and receiving data from process. */
368Lisp_Object Vcoding_system_for_read;
369/* Coding-system for writing files and sending data to process. */
370Lisp_Object Vcoding_system_for_write;
371/* Coding-system actually used in the latest I/O. */
372Lisp_Object Vlast_coding_system_used;
065e3595
KH
373/* Set to non-nil when an error is detected while code conversion. */
374Lisp_Object Vlast_code_conversion_error;
c4825358 375/* A vector of length 256 which contains information about special
94487c4e 376 Latin codes (especially for dealing with Microsoft codes). */
3f003981 377Lisp_Object Vlatin_extra_code_table;
c4825358 378
9ce27fde
KH
379/* Flag to inhibit code conversion of end-of-line format. */
380int inhibit_eol_conversion;
381
74383408
KH
382/* Flag to inhibit ISO2022 escape sequence detection. */
383int inhibit_iso_escape_detection;
384
ed29121d
EZ
385/* Flag to make buffer-file-coding-system inherit from process-coding. */
386int inherit_process_coding_system;
387
c4825358
KH
388/* Coding system to be used to encode text for terminal display when
389 terminal coding system is nil. */
390struct coding_system safe_terminal_coding;
391
02ba4723
KH
392Lisp_Object Vfile_coding_system_alist;
393Lisp_Object Vprocess_coding_system_alist;
394Lisp_Object Vnetwork_coding_system_alist;
4ed46869 395
68c45bf0
PE
396Lisp_Object Vlocale_coding_system;
397
4ed46869
KH
398#endif /* emacs */
399
f967223b
KH
400/* Flag to tell if we look up translation table on character code
401 conversion. */
84fbb8a0 402Lisp_Object Venable_character_translation;
f967223b
KH
403/* Standard translation table to look up on decoding (reading). */
404Lisp_Object Vstandard_translation_table_for_decode;
405/* Standard translation table to look up on encoding (writing). */
406Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 407
f967223b
KH
408Lisp_Object Qtranslation_table;
409Lisp_Object Qtranslation_table_id;
410Lisp_Object Qtranslation_table_for_decode;
411Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
412
413/* Alist of charsets vs revision number. */
df7492f9 414static Lisp_Object Vcharset_revision_table;
4ed46869 415
02ba4723
KH
416/* Default coding systems used for process I/O. */
417Lisp_Object Vdefault_process_coding_system;
418
002fdb44
DL
419/* Char table for translating Quail and self-inserting input. */
420Lisp_Object Vtranslation_table_for_input;
421
df7492f9
KH
422/* Two special coding systems. */
423Lisp_Object Vsjis_coding_system;
424Lisp_Object Vbig5_coding_system;
425
df7492f9
KH
426/* ISO2022 section */
427
428#define CODING_ISO_INITIAL(coding, reg) \
429 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
430 coding_attr_iso_initial), \
431 reg)))
432
433
434#define CODING_ISO_REQUEST(coding, charset_id) \
435 ((charset_id <= (coding)->max_charset_id \
436 ? (coding)->safe_charsets[charset_id] \
437 : -1))
438
439
440#define CODING_ISO_FLAGS(coding) \
441 ((coding)->spec.iso_2022.flags)
442#define CODING_ISO_DESIGNATION(coding, reg) \
443 ((coding)->spec.iso_2022.current_designation[reg])
444#define CODING_ISO_INVOCATION(coding, plane) \
445 ((coding)->spec.iso_2022.current_invocation[plane])
446#define CODING_ISO_SINGLE_SHIFTING(coding) \
447 ((coding)->spec.iso_2022.single_shifting)
448#define CODING_ISO_BOL(coding) \
449 ((coding)->spec.iso_2022.bol)
450#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
451 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
452
453/* Control characters of ISO2022. */
454 /* code */ /* function */
455#define ISO_CODE_LF 0x0A /* line-feed */
456#define ISO_CODE_CR 0x0D /* carriage-return */
457#define ISO_CODE_SO 0x0E /* shift-out */
458#define ISO_CODE_SI 0x0F /* shift-in */
459#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
460#define ISO_CODE_ESC 0x1B /* escape */
461#define ISO_CODE_SS2 0x8E /* single-shift-2 */
462#define ISO_CODE_SS3 0x8F /* single-shift-3 */
463#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
464
465/* All code (1-byte) of ISO2022 is classified into one of the
466 followings. */
467enum iso_code_class_type
468 {
469 ISO_control_0, /* Control codes in the range
470 0x00..0x1F and 0x7F, except for the
471 following 5 codes. */
df7492f9
KH
472 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
473 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
474 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
475 ISO_escape, /* ISO_CODE_SO (0x1B) */
476 ISO_control_1, /* Control codes in the range
477 0x80..0x9F, except for the
478 following 3 codes. */
479 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
480 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
481 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
482 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
483 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
484 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
485 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
486 };
05e6f5dc 487
df7492f9
KH
488/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
489 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 490
df7492f9
KH
491/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
492 instead of the correct short-form sequence (e.g. ESC $ A). */
493#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 494
df7492f9
KH
495/* If set, reset graphic planes and registers at end-of-line to the
496 initial state. */
497#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 498
df7492f9
KH
499/* If set, reset graphic planes and registers before any control
500 characters to the initial state. */
501#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 502
df7492f9
KH
503/* If set, encode by 7-bit environment. */
504#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 505
df7492f9
KH
506/* If set, use locking-shift function. */
507#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 508
df7492f9
KH
509/* If set, use single-shift function. Overwrite
510 CODING_ISO_FLAG_LOCKING_SHIFT. */
511#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 512
df7492f9
KH
513/* If set, use designation escape sequence. */
514#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 515
df7492f9
KH
516/* If set, produce revision number sequence. */
517#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 518
df7492f9
KH
519/* If set, produce ISO6429's direction specifying sequence. */
520#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 521
df7492f9
KH
522/* If set, assume designation states are reset at beginning of line on
523 output. */
524#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 525
df7492f9
KH
526/* If set, designation sequence should be placed at beginning of line
527 on output. */
528#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 529
df7492f9
KH
530/* If set, do not encode unsafe charactes on output. */
531#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 532
df7492f9
KH
533/* If set, extra latin codes (128..159) are accepted as a valid code
534 on input. */
535#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 536
df7492f9 537#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 538
df7492f9 539#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 540
bf16eb23 541#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 542
bf16eb23 543#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 544
bf16eb23 545#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 546
df7492f9
KH
547/* A character to be produced on output if encoding of the original
548 character is prohibited by CODING_ISO_FLAG_SAFE. */
549#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 550
4ed46869 551
df7492f9
KH
552/* UTF-16 section */
553#define CODING_UTF_16_BOM(coding) \
554 ((coding)->spec.utf_16.bom)
4ed46869 555
df7492f9
KH
556#define CODING_UTF_16_ENDIAN(coding) \
557 ((coding)->spec.utf_16.endian)
4ed46869 558
df7492f9
KH
559#define CODING_UTF_16_SURROGATE(coding) \
560 ((coding)->spec.utf_16.surrogate)
4ed46869 561
4ed46869 562
df7492f9
KH
563/* CCL section */
564#define CODING_CCL_DECODER(coding) \
565 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
566#define CODING_CCL_ENCODER(coding) \
567 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
568#define CODING_CCL_VALIDS(coding) \
8f924df7 569 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 570
5a936b46 571/* Index for each coding category in `coding_categories' */
4ed46869 572
df7492f9
KH
573enum coding_category
574 {
575 coding_category_iso_7,
576 coding_category_iso_7_tight,
577 coding_category_iso_8_1,
578 coding_category_iso_8_2,
579 coding_category_iso_7_else,
580 coding_category_iso_8_else,
581 coding_category_utf_8,
582 coding_category_utf_16_auto,
583 coding_category_utf_16_be,
584 coding_category_utf_16_le,
585 coding_category_utf_16_be_nosig,
586 coding_category_utf_16_le_nosig,
587 coding_category_charset,
588 coding_category_sjis,
589 coding_category_big5,
590 coding_category_ccl,
591 coding_category_emacs_mule,
592 /* All above are targets of code detection. */
593 coding_category_raw_text,
594 coding_category_undecided,
595 coding_category_max
596 };
597
598/* Definitions of flag bits used in detect_coding_XXXX. */
599#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
600#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
601#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
602#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
603#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
604#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
605#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 606#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
607#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
608#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
609#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
610#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
611#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
612#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
613#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
614#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
615#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 616#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
617
618/* This value is returned if detect_coding_mask () find nothing other
619 than ASCII characters. */
620#define CATEGORY_MASK_ANY \
621 (CATEGORY_MASK_ISO_7 \
622 | CATEGORY_MASK_ISO_7_TIGHT \
623 | CATEGORY_MASK_ISO_8_1 \
624 | CATEGORY_MASK_ISO_8_2 \
625 | CATEGORY_MASK_ISO_7_ELSE \
626 | CATEGORY_MASK_ISO_8_ELSE \
627 | CATEGORY_MASK_UTF_8 \
628 | CATEGORY_MASK_UTF_16_BE \
629 | CATEGORY_MASK_UTF_16_LE \
630 | CATEGORY_MASK_UTF_16_BE_NOSIG \
631 | CATEGORY_MASK_UTF_16_LE_NOSIG \
632 | CATEGORY_MASK_CHARSET \
633 | CATEGORY_MASK_SJIS \
634 | CATEGORY_MASK_BIG5 \
635 | CATEGORY_MASK_CCL \
636 | CATEGORY_MASK_EMACS_MULE)
637
638
639#define CATEGORY_MASK_ISO_7BIT \
640 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
641
642#define CATEGORY_MASK_ISO_8BIT \
643 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
644
645#define CATEGORY_MASK_ISO_ELSE \
646 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
647
648#define CATEGORY_MASK_ISO_ESCAPE \
649 (CATEGORY_MASK_ISO_7 \
650 | CATEGORY_MASK_ISO_7_TIGHT \
651 | CATEGORY_MASK_ISO_7_ELSE \
652 | CATEGORY_MASK_ISO_8_ELSE)
653
654#define CATEGORY_MASK_ISO \
655 ( CATEGORY_MASK_ISO_7BIT \
656 | CATEGORY_MASK_ISO_8BIT \
657 | CATEGORY_MASK_ISO_ELSE)
658
659#define CATEGORY_MASK_UTF_16 \
660 (CATEGORY_MASK_UTF_16_BE \
661 | CATEGORY_MASK_UTF_16_LE \
662 | CATEGORY_MASK_UTF_16_BE_NOSIG \
663 | CATEGORY_MASK_UTF_16_LE_NOSIG)
664
665
666/* List of symbols `coding-category-xxx' ordered by priority. This
667 variable is exposed to Emacs Lisp. */
668static Lisp_Object Vcoding_category_list;
669
670/* Table of coding categories (Lisp symbols). This variable is for
671 internal use oly. */
672static Lisp_Object Vcoding_category_table;
673
674/* Table of coding-categories ordered by priority. */
675static enum coding_category coding_priorities[coding_category_max];
676
677/* Nth element is a coding context for the coding system bound to the
678 Nth coding category. */
679static struct coding_system coding_categories[coding_category_max];
680
df7492f9
KH
681/*** Commonly used macros and functions ***/
682
683#ifndef min
684#define min(a, b) ((a) < (b) ? (a) : (b))
685#endif
686#ifndef max
687#define max(a, b) ((a) > (b) ? (a) : (b))
688#endif
4ed46869 689
24a73b0a
KH
690#define CODING_GET_INFO(coding, attrs, charset_list) \
691 do { \
692 (attrs) = CODING_ID_ATTRS ((coding)->id); \
693 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 694 } while (0)
4ed46869 695
4ed46869 696
df7492f9
KH
697/* Safely get one byte from the source text pointed by SRC which ends
698 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
699 in the source, it jumps to `no_more_source'. If multibytep is
700 nonzero, and a multibyte character is found at SRC, set C to the
701 negative value of the character code. The caller should declare
702 and set these variables appropriately in advance:
703 src, src_end, multibytep */
aa72b389 704
065e3595
KH
705#define ONE_MORE_BYTE(c) \
706 do { \
707 if (src == src_end) \
708 { \
709 if (src_base < src) \
710 record_conversion_result \
711 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
712 goto no_more_source; \
713 } \
714 c = *src++; \
715 if (multibytep && (c & 0x80)) \
716 { \
717 if ((c & 0xFE) == 0xC0) \
718 c = ((c & 1) << 6) | *src++; \
719 else \
720 { \
35befdaa
KH
721 src--; \
722 c = - string_char (src, &src, NULL); \
065e3595
KH
723 record_conversion_result \
724 (coding, CODING_RESULT_INVALID_SRC); \
725 } \
726 } \
727 consumed_chars++; \
aa72b389
KH
728 } while (0)
729
aa72b389 730
065e3595
KH
731#define ONE_MORE_BYTE_NO_CHECK(c) \
732 do { \
733 c = *src++; \
734 if (multibytep && (c & 0x80)) \
735 { \
736 if ((c & 0xFE) == 0xC0) \
737 c = ((c & 1) << 6) | *src++; \
738 else \
739 { \
35befdaa
KH
740 src--; \
741 c = - string_char (src, &src, NULL); \
065e3595
KH
742 record_conversion_result \
743 (coding, CODING_RESULT_INVALID_SRC); \
744 } \
745 } \
746 consumed_chars++; \
aa72b389
KH
747 } while (0)
748
aa72b389 749
df7492f9
KH
750/* Store a byte C in the place pointed by DST and increment DST to the
751 next free point, and increment PRODUCED_CHARS. The caller should
752 assure that C is 0..127, and declare and set the variable `dst'
753 appropriately in advance.
754*/
aa72b389
KH
755
756
df7492f9
KH
757#define EMIT_ONE_ASCII_BYTE(c) \
758 do { \
759 produced_chars++; \
760 *dst++ = (c); \
b6871cc7 761 } while (0)
aa72b389
KH
762
763
df7492f9 764/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 765
df7492f9
KH
766#define EMIT_TWO_ASCII_BYTES(c1, c2) \
767 do { \
768 produced_chars += 2; \
769 *dst++ = (c1), *dst++ = (c2); \
770 } while (0)
aa72b389
KH
771
772
df7492f9
KH
773/* Store a byte C in the place pointed by DST and increment DST to the
774 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
775 nonzero, store in an appropriate multibyte from. The caller should
776 declare and set the variables `dst' and `multibytep' appropriately
777 in advance. */
778
779#define EMIT_ONE_BYTE(c) \
780 do { \
781 produced_chars++; \
782 if (multibytep) \
783 { \
784 int ch = (c); \
785 if (ch >= 0x80) \
786 ch = BYTE8_TO_CHAR (ch); \
787 CHAR_STRING_ADVANCE (ch, dst); \
788 } \
789 else \
790 *dst++ = (c); \
aa72b389 791 } while (0)
aa72b389 792
aa72b389 793
df7492f9 794/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 795
e19c3639
KH
796#define EMIT_TWO_BYTES(c1, c2) \
797 do { \
798 produced_chars += 2; \
799 if (multibytep) \
800 { \
801 int ch; \
802 \
803 ch = (c1); \
804 if (ch >= 0x80) \
805 ch = BYTE8_TO_CHAR (ch); \
806 CHAR_STRING_ADVANCE (ch, dst); \
807 ch = (c2); \
808 if (ch >= 0x80) \
809 ch = BYTE8_TO_CHAR (ch); \
810 CHAR_STRING_ADVANCE (ch, dst); \
811 } \
812 else \
813 { \
814 *dst++ = (c1); \
815 *dst++ = (c2); \
816 } \
aa72b389
KH
817 } while (0)
818
819
df7492f9
KH
820#define EMIT_THREE_BYTES(c1, c2, c3) \
821 do { \
822 EMIT_ONE_BYTE (c1); \
823 EMIT_TWO_BYTES (c2, c3); \
824 } while (0)
aa72b389 825
aa72b389 826
df7492f9
KH
827#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
828 do { \
829 EMIT_TWO_BYTES (c1, c2); \
830 EMIT_TWO_BYTES (c3, c4); \
831 } while (0)
aa72b389 832
aa72b389 833
f6cbaf43
KH
834/* Prototypes for static functions. */
835static void record_conversion_result P_ ((struct coding_system *coding,
836 enum coding_result_code result));
837static int detect_coding_utf_8 P_ ((struct coding_system *,
838 struct coding_detection_info *info));
839static void decode_coding_utf_8 P_ ((struct coding_system *));
840static int encode_coding_utf_8 P_ ((struct coding_system *));
841
842static int detect_coding_utf_16 P_ ((struct coding_system *,
843 struct coding_detection_info *info));
844static void decode_coding_utf_16 P_ ((struct coding_system *));
845static int encode_coding_utf_16 P_ ((struct coding_system *));
846
847static int detect_coding_iso_2022 P_ ((struct coding_system *,
848 struct coding_detection_info *info));
849static void decode_coding_iso_2022 P_ ((struct coding_system *));
850static int encode_coding_iso_2022 P_ ((struct coding_system *));
851
852static int detect_coding_emacs_mule P_ ((struct coding_system *,
853 struct coding_detection_info *info));
854static void decode_coding_emacs_mule P_ ((struct coding_system *));
855static int encode_coding_emacs_mule P_ ((struct coding_system *));
856
857static int detect_coding_sjis P_ ((struct coding_system *,
858 struct coding_detection_info *info));
859static void decode_coding_sjis P_ ((struct coding_system *));
860static int encode_coding_sjis P_ ((struct coding_system *));
861
862static int detect_coding_big5 P_ ((struct coding_system *,
863 struct coding_detection_info *info));
864static void decode_coding_big5 P_ ((struct coding_system *));
865static int encode_coding_big5 P_ ((struct coding_system *));
866
867static int detect_coding_ccl P_ ((struct coding_system *,
868 struct coding_detection_info *info));
869static void decode_coding_ccl P_ ((struct coding_system *));
870static int encode_coding_ccl P_ ((struct coding_system *));
871
872static void decode_coding_raw_text P_ ((struct coding_system *));
873static int encode_coding_raw_text P_ ((struct coding_system *));
874
875static void coding_set_source P_ ((struct coding_system *));
876static void coding_set_destination P_ ((struct coding_system *));
877static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
878static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 879 EMACS_INT, EMACS_INT));
f6cbaf43
KH
880static unsigned char *alloc_destination P_ ((struct coding_system *,
881 EMACS_INT, unsigned char *));
882static void setup_iso_safe_charsets P_ ((Lisp_Object));
883static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
884 int *, int *,
885 unsigned char *));
886static int detect_eol P_ ((const unsigned char *,
887 EMACS_INT, enum coding_category));
888static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
889static void decode_eol P_ ((struct coding_system *));
890static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
891static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
892 int, int *, int *));
893static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
894static INLINE void produce_composition P_ ((struct coding_system *, int *,
895 EMACS_INT));
896static INLINE void produce_charset P_ ((struct coding_system *, int *,
897 EMACS_INT));
898static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
899static int decode_coding P_ ((struct coding_system *));
900static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
901 struct coding_system *,
902 int *, EMACS_INT *));
903static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
904 struct coding_system *,
905 int *, EMACS_INT *));
906static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
907static int encode_coding P_ ((struct coding_system *));
908static Lisp_Object make_conversion_work_buffer P_ ((int));
909static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
910static INLINE int char_encodable_p P_ ((int, Lisp_Object));
911static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
912
065e3595
KH
913static void
914record_conversion_result (struct coding_system *coding,
915 enum coding_result_code result)
916{
917 coding->result = result;
918 switch (result)
919 {
920 case CODING_RESULT_INSUFFICIENT_SRC:
921 Vlast_code_conversion_error = Qinsufficient_source;
922 break;
923 case CODING_RESULT_INCONSISTENT_EOL:
924 Vlast_code_conversion_error = Qinconsistent_eol;
925 break;
926 case CODING_RESULT_INVALID_SRC:
927 Vlast_code_conversion_error = Qinvalid_source;
928 break;
929 case CODING_RESULT_INTERRUPT:
930 Vlast_code_conversion_error = Qinterrupted;
931 break;
932 case CODING_RESULT_INSUFFICIENT_MEM:
933 Vlast_code_conversion_error = Qinsufficient_memory;
934 break;
35befdaa
KH
935 default:
936 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
937 }
938}
939
df7492f9
KH
940#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
941 do { \
942 charset_map_loaded = 0; \
943 c = DECODE_CHAR (charset, code); \
944 if (charset_map_loaded) \
945 { \
8f924df7 946 const unsigned char *orig = coding->source; \
df7492f9
KH
947 EMACS_INT offset; \
948 \
949 coding_set_source (coding); \
950 offset = coding->source - orig; \
951 src += offset; \
952 src_base += offset; \
953 src_end += offset; \
954 } \
aa72b389
KH
955 } while (0)
956
957
119852e7
KH
958/* If there are at least BYTES length of room at dst, allocate memory
959 for coding->destination and update dst and dst_end. We don't have
960 to take care of coding->source which will be relocated. It is
961 handled by calling coding_set_source in encode_coding. */
962
df7492f9
KH
963#define ASSURE_DESTINATION(bytes) \
964 do { \
965 if (dst + (bytes) >= dst_end) \
966 { \
967 int more_bytes = charbuf_end - charbuf + (bytes); \
968 \
969 dst = alloc_destination (coding, more_bytes, dst); \
970 dst_end = coding->destination + coding->dst_bytes; \
971 } \
972 } while (0)
aa72b389 973
aa72b389 974
db274c7a
KH
975/* Store multibyte form of the character C in P, and advance P to the
976 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
977 never calls MAYBE_UNIFY_CHAR. */
978
979#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
980 do { \
981 if ((c) <= MAX_1_BYTE_CHAR) \
982 *(p)++ = (c); \
983 else if ((c) <= MAX_2_BYTE_CHAR) \
984 *(p)++ = (0xC0 | ((c) >> 6)), \
985 *(p)++ = (0x80 | ((c) & 0x3F)); \
986 else if ((c) <= MAX_3_BYTE_CHAR) \
987 *(p)++ = (0xE0 | ((c) >> 12)), \
988 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
989 *(p)++ = (0x80 | ((c) & 0x3F)); \
990 else if ((c) <= MAX_4_BYTE_CHAR) \
991 *(p)++ = (0xF0 | (c >> 18)), \
992 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
993 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
994 *(p)++ = (0x80 | (c & 0x3F)); \
995 else if ((c) <= MAX_5_BYTE_CHAR) \
996 *(p)++ = 0xF8, \
997 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
998 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
999 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1000 *(p)++ = (0x80 | (c & 0x3F)); \
1001 else \
1002 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1003 } while (0)
1004
1005
1006/* Return the character code of character whose multibyte form is at
1007 P, and advance P to the end of the multibyte form. This is like
1008 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1009
1010#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1011 (!((p)[0] & 0x80) \
1012 ? *(p)++ \
1013 : ! ((p)[0] & 0x20) \
1014 ? ((p) += 2, \
1015 ((((p)[-2] & 0x1F) << 6) \
1016 | ((p)[-1] & 0x3F) \
1017 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1018 : ! ((p)[0] & 0x10) \
1019 ? ((p) += 3, \
1020 ((((p)[-3] & 0x0F) << 12) \
1021 | (((p)[-2] & 0x3F) << 6) \
1022 | ((p)[-1] & 0x3F))) \
1023 : ! ((p)[0] & 0x08) \
1024 ? ((p) += 4, \
1025 ((((p)[-4] & 0xF) << 18) \
1026 | (((p)[-3] & 0x3F) << 12) \
1027 | (((p)[-2] & 0x3F) << 6) \
1028 | ((p)[-1] & 0x3F))) \
1029 : ((p) += 5, \
1030 ((((p)[-4] & 0x3F) << 18) \
1031 | (((p)[-3] & 0x3F) << 12) \
1032 | (((p)[-2] & 0x3F) << 6) \
1033 | ((p)[-1] & 0x3F))))
1034
aa72b389 1035
df7492f9
KH
1036static void
1037coding_set_source (coding)
aa72b389 1038 struct coding_system *coding;
aa72b389 1039{
df7492f9
KH
1040 if (BUFFERP (coding->src_object))
1041 {
2cb26057 1042 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1043
df7492f9 1044 if (coding->src_pos < 0)
2cb26057 1045 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1046 else
2cb26057 1047 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1048 }
df7492f9 1049 else if (STRINGP (coding->src_object))
aa72b389 1050 {
8f924df7 1051 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1052 }
df7492f9
KH
1053 else
1054 /* Otherwise, the source is C string and is never relocated
1055 automatically. Thus we don't have to update anything. */
1056 ;
1057}
aa72b389 1058
df7492f9
KH
1059static void
1060coding_set_destination (coding)
1061 struct coding_system *coding;
1062{
1063 if (BUFFERP (coding->dst_object))
aa72b389 1064 {
df7492f9 1065 if (coding->src_pos < 0)
aa72b389 1066 {
13818c30 1067 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1068 coding->dst_bytes = (GAP_END_ADDR
1069 - (coding->src_bytes - coding->consumed)
1070 - coding->destination);
aa72b389 1071 }
df7492f9 1072 else
28f67a95
KH
1073 {
1074 /* We are sure that coding->dst_pos_byte is before the gap
1075 of the buffer. */
1076 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1077 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1078 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1079 - coding->destination);
1080 }
df7492f9
KH
1081 }
1082 else
1083 /* Otherwise, the destination is C string and is never relocated
1084 automatically. Thus we don't have to update anything. */
1085 ;
1086}
1087
1088
1089static void
1090coding_alloc_by_realloc (coding, bytes)
1091 struct coding_system *coding;
1092 EMACS_INT bytes;
1093{
1094 coding->destination = (unsigned char *) xrealloc (coding->destination,
1095 coding->dst_bytes + bytes);
1096 coding->dst_bytes += bytes;
1097}
1098
1099static void
db274c7a 1100coding_alloc_by_making_gap (coding, gap_head_used, bytes)
df7492f9 1101 struct coding_system *coding;
db274c7a 1102 EMACS_INT gap_head_used, bytes;
df7492f9 1103{
db274c7a 1104 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1105 {
db274c7a
KH
1106 /* The gap may contain the produced data at the head and not-yet
1107 consumed data at the tail. To preserve those data, we at
1108 first make the gap size to zero, then increase the gap
1109 size. */
1110 EMACS_INT add = GAP_SIZE;
1111
1112 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1113 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1114 make_gap (bytes);
1115 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1116 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1117 }
730fff51 1118 else
df7492f9 1119 {
2c78b7e1
KH
1120 Lisp_Object this_buffer;
1121
1122 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1123 set_buffer_internal (XBUFFER (coding->dst_object));
1124 make_gap (bytes);
1125 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1126 }
df7492f9 1127}
8f924df7 1128
df7492f9
KH
1129
1130static unsigned char *
1131alloc_destination (coding, nbytes, dst)
1132 struct coding_system *coding;
3e139625 1133 EMACS_INT nbytes;
df7492f9
KH
1134 unsigned char *dst;
1135{
1136 EMACS_INT offset = dst - coding->destination;
1137
1138 if (BUFFERP (coding->dst_object))
db274c7a
KH
1139 {
1140 struct buffer *buf = XBUFFER (coding->dst_object);
1141
1142 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1143 }
aa72b389 1144 else
df7492f9 1145 coding_alloc_by_realloc (coding, nbytes);
065e3595 1146 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1147 coding_set_destination (coding);
1148 dst = coding->destination + offset;
1149 return dst;
1150}
aa72b389 1151
ff0dacd7
KH
1152/** Macros for annotations. */
1153
1154/* Maximum length of annotation data (sum of annotations for
1155 composition and charset). */
69a80ea3 1156#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1157
1158/* An annotation data is stored in the array coding->charbuf in this
1159 format:
69a80ea3 1160 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1161 LENGTH is the number of elements in the annotation.
1162 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1163 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1164
1165 The format of the following elements depend on ANNOTATION_MASK.
1166
1167 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1168 follows:
1169 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1170 METHOD is one of enum composition_method.
1171 Optionnal COMPOSITION-COMPONENTS are characters and composition
1172 rules.
1173
1174 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1175 follows. */
1176
69a80ea3 1177#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1178 do { \
1179 *(buf)++ = -(len); \
1180 *(buf)++ = (mask); \
69a80ea3 1181 *(buf)++ = (nchars); \
ff0dacd7
KH
1182 coding->annotated = 1; \
1183 } while (0);
1184
69a80ea3
KH
1185#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1186 do { \
1187 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1188 *buf++ = method; \
ff0dacd7
KH
1189 } while (0)
1190
1191
69a80ea3
KH
1192#define ADD_CHARSET_DATA(buf, nchars, id) \
1193 do { \
1194 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1195 *buf++ = id; \
ff0dacd7
KH
1196 } while (0)
1197
df7492f9
KH
1198\f
1199/*** 2. Emacs' internal format (emacs-utf-8) ***/
1200
1201
1202
1203\f
1204/*** 3. UTF-8 ***/
1205
1206/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1207 Check if a text is encoded in UTF-8. If it is, return 1, else
1208 return 0. */
df7492f9
KH
1209
1210#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1211#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1212#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1213#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1214#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1215#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1216
1217static int
ff0dacd7 1218detect_coding_utf_8 (coding, detect_info)
df7492f9 1219 struct coding_system *coding;
ff0dacd7 1220 struct coding_detection_info *detect_info;
df7492f9 1221{
065e3595 1222 const unsigned char *src = coding->source, *src_base;
8f924df7 1223 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1224 int multibytep = coding->src_multibyte;
1225 int consumed_chars = 0;
1226 int found = 0;
1227
ff0dacd7 1228 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1229 /* A coding system of this category is always ASCII compatible. */
1230 src += coding->head_ascii;
1231
1232 while (1)
aa72b389 1233 {
df7492f9 1234 int c, c1, c2, c3, c4;
aa72b389 1235
065e3595 1236 src_base = src;
df7492f9 1237 ONE_MORE_BYTE (c);
065e3595 1238 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1239 continue;
1240 ONE_MORE_BYTE (c1);
065e3595 1241 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1242 break;
1243 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1244 {
ff0dacd7 1245 found = CATEGORY_MASK_UTF_8;
df7492f9 1246 continue;
aa72b389 1247 }
df7492f9 1248 ONE_MORE_BYTE (c2);
065e3595 1249 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1250 break;
1251 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1252 {
ff0dacd7 1253 found = CATEGORY_MASK_UTF_8;
df7492f9 1254 continue;
aa72b389 1255 }
df7492f9 1256 ONE_MORE_BYTE (c3);
065e3595 1257 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1258 break;
1259 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1260 {
ff0dacd7 1261 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1262 continue;
1263 }
1264 ONE_MORE_BYTE (c4);
065e3595 1265 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1266 break;
1267 if (UTF_8_5_OCTET_LEADING_P (c))
1268 {
ff0dacd7 1269 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1270 continue;
1271 }
1272 break;
aa72b389 1273 }
ff0dacd7 1274 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1275 return 0;
aa72b389 1276
df7492f9 1277 no_more_source:
065e3595 1278 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1279 {
ff0dacd7 1280 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1281 return 0;
aa72b389 1282 }
ff0dacd7
KH
1283 detect_info->found |= found;
1284 return 1;
aa72b389
KH
1285}
1286
4ed46869 1287
b73bfc1c 1288static void
df7492f9 1289decode_coding_utf_8 (coding)
b73bfc1c 1290 struct coding_system *coding;
b73bfc1c 1291{
8f924df7
KH
1292 const unsigned char *src = coding->source + coding->consumed;
1293 const unsigned char *src_end = coding->source + coding->src_bytes;
1294 const unsigned char *src_base;
69a80ea3
KH
1295 int *charbuf = coding->charbuf + coding->charbuf_used;
1296 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1297 int consumed_chars = 0, consumed_chars_base;
1298 int multibytep = coding->src_multibyte;
24a73b0a 1299 Lisp_Object attr, charset_list;
119852e7
KH
1300 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1301 int byte_after_cr = -1;
4ed46869 1302
24a73b0a 1303 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1304
1305 while (1)
b73bfc1c 1306 {
df7492f9 1307 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1308
df7492f9
KH
1309 src_base = src;
1310 consumed_chars_base = consumed_chars;
4af310db 1311
df7492f9
KH
1312 if (charbuf >= charbuf_end)
1313 break;
1314
119852e7
KH
1315 if (byte_after_cr >= 0)
1316 c1 = byte_after_cr, byte_after_cr = -1;
1317 else
1318 ONE_MORE_BYTE (c1);
065e3595
KH
1319 if (c1 < 0)
1320 {
1321 c = - c1;
1322 }
1323 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1324 {
119852e7
KH
1325 if (eol_crlf && c1 == '\r')
1326 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1327 c = c1;
4af310db 1328 }
df7492f9 1329 else
4af310db 1330 {
df7492f9 1331 ONE_MORE_BYTE (c2);
065e3595 1332 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1333 goto invalid_code;
1334 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1335 {
b0edb2c5
DL
1336 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1337 /* Reject overlong sequences here and below. Encoders
1338 producing them are incorrect, they can be misleading,
1339 and they mess up read/write invariance. */
1340 if (c < 128)
1341 goto invalid_code;
4af310db 1342 }
df7492f9 1343 else
aa72b389 1344 {
df7492f9 1345 ONE_MORE_BYTE (c3);
065e3595 1346 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1347 goto invalid_code;
1348 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1349 {
1350 c = (((c1 & 0xF) << 12)
1351 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1352 if (c < 0x800
1353 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1354 goto invalid_code;
1355 }
df7492f9
KH
1356 else
1357 {
1358 ONE_MORE_BYTE (c4);
065e3595 1359 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1360 goto invalid_code;
1361 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1362 {
df7492f9
KH
1363 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1364 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1365 if (c < 0x10000)
1366 goto invalid_code;
1367 }
df7492f9
KH
1368 else
1369 {
1370 ONE_MORE_BYTE (c5);
065e3595 1371 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1372 goto invalid_code;
1373 if (UTF_8_5_OCTET_LEADING_P (c1))
1374 {
1375 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1376 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1377 | (c5 & 0x3F));
b0edb2c5 1378 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1379 goto invalid_code;
1380 }
1381 else
1382 goto invalid_code;
1383 }
1384 }
aa72b389 1385 }
b73bfc1c 1386 }
df7492f9
KH
1387
1388 *charbuf++ = c;
1389 continue;
1390
1391 invalid_code:
1392 src = src_base;
1393 consumed_chars = consumed_chars_base;
1394 ONE_MORE_BYTE (c);
1395 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1396 coding->errors++;
aa72b389
KH
1397 }
1398
df7492f9
KH
1399 no_more_source:
1400 coding->consumed_char += consumed_chars_base;
1401 coding->consumed = src_base - coding->source;
1402 coding->charbuf_used = charbuf - coding->charbuf;
1403}
1404
1405
1406static int
1407encode_coding_utf_8 (coding)
1408 struct coding_system *coding;
1409{
1410 int multibytep = coding->dst_multibyte;
1411 int *charbuf = coding->charbuf;
1412 int *charbuf_end = charbuf + coding->charbuf_used;
1413 unsigned char *dst = coding->destination + coding->produced;
1414 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1415 int produced_chars = 0;
df7492f9
KH
1416 int c;
1417
1418 if (multibytep)
aa72b389 1419 {
df7492f9
KH
1420 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1421
1422 while (charbuf < charbuf_end)
b73bfc1c 1423 {
df7492f9 1424 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1425
df7492f9
KH
1426 ASSURE_DESTINATION (safe_room);
1427 c = *charbuf++;
28f67a95
KH
1428 if (CHAR_BYTE8_P (c))
1429 {
1430 c = CHAR_TO_BYTE8 (c);
1431 EMIT_ONE_BYTE (c);
1432 }
1433 else
1434 {
db274c7a 1435 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1436 for (p = str; p < pend; p++)
1437 EMIT_ONE_BYTE (*p);
1438 }
b73bfc1c 1439 }
aa72b389 1440 }
df7492f9
KH
1441 else
1442 {
1443 int safe_room = MAX_MULTIBYTE_LENGTH;
1444
1445 while (charbuf < charbuf_end)
b73bfc1c 1446 {
df7492f9
KH
1447 ASSURE_DESTINATION (safe_room);
1448 c = *charbuf++;
f03caae0
KH
1449 if (CHAR_BYTE8_P (c))
1450 *dst++ = CHAR_TO_BYTE8 (c);
1451 else
db274c7a 1452 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1453 produced_chars++;
4ed46869
KH
1454 }
1455 }
065e3595 1456 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1457 coding->produced_char += produced_chars;
1458 coding->produced = dst - coding->destination;
1459 return 0;
4ed46869
KH
1460}
1461
b73bfc1c 1462
df7492f9 1463/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1464 Check if a text is encoded in one of UTF-16 based coding systems.
1465 If it is, return 1, else return 0. */
aa72b389 1466
df7492f9
KH
1467#define UTF_16_HIGH_SURROGATE_P(val) \
1468 (((val) & 0xFC00) == 0xD800)
1469
1470#define UTF_16_LOW_SURROGATE_P(val) \
1471 (((val) & 0xFC00) == 0xDC00)
93dec019 1472
df7492f9
KH
1473#define UTF_16_INVALID_P(val) \
1474 (((val) == 0xFFFE) \
1475 || ((val) == 0xFFFF) \
1476 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1477
aa72b389 1478
df7492f9 1479static int
ff0dacd7 1480detect_coding_utf_16 (coding, detect_info)
aa72b389 1481 struct coding_system *coding;
ff0dacd7 1482 struct coding_detection_info *detect_info;
aa72b389 1483{
8f924df7
KH
1484 const unsigned char *src = coding->source, *src_base = src;
1485 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1486 int multibytep = coding->src_multibyte;
1487 int consumed_chars = 0;
1488 int c1, c2;
aa72b389 1489
ff0dacd7 1490 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1491 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1492 && (coding->src_chars & 1))
ff0dacd7
KH
1493 {
1494 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1495 return 0;
1496 }
24a73b0a 1497
df7492f9
KH
1498 ONE_MORE_BYTE (c1);
1499 ONE_MORE_BYTE (c2);
df7492f9 1500 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1501 {
b49a1807
KH
1502 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1503 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1504 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1505 | CATEGORY_MASK_UTF_16_BE_NOSIG
1506 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1507 }
df7492f9 1508 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1509 {
b49a1807
KH
1510 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1511 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1512 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1513 | CATEGORY_MASK_UTF_16_BE_NOSIG
1514 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1515 }
065e3595 1516 else if (c1 >= 0 && c2 >= 0)
24a73b0a 1517 {
24a73b0a
KH
1518 detect_info->rejected
1519 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1520 }
df7492f9 1521 no_more_source:
ff0dacd7 1522 return 1;
df7492f9 1523}
aa72b389 1524
df7492f9
KH
1525static void
1526decode_coding_utf_16 (coding)
1527 struct coding_system *coding;
1528{
8f924df7
KH
1529 const unsigned char *src = coding->source + coding->consumed;
1530 const unsigned char *src_end = coding->source + coding->src_bytes;
1531 const unsigned char *src_base;
69a80ea3
KH
1532 int *charbuf = coding->charbuf + coding->charbuf_used;
1533 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1534 int consumed_chars = 0, consumed_chars_base;
1535 int multibytep = coding->src_multibyte;
1536 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1537 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1538 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1539 Lisp_Object attr, charset_list;
119852e7
KH
1540 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1541 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1542
24a73b0a 1543 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1544
b49a1807 1545 if (bom == utf_16_with_bom)
aa72b389 1546 {
df7492f9 1547 int c, c1, c2;
4af310db 1548
aa72b389 1549 src_base = src;
df7492f9
KH
1550 ONE_MORE_BYTE (c1);
1551 ONE_MORE_BYTE (c2);
e19c3639 1552 c = (c1 << 8) | c2;
aa72b389 1553
b49a1807
KH
1554 if (endian == utf_16_big_endian
1555 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1556 {
b49a1807
KH
1557 /* The first two bytes are not BOM. Treat them as bytes
1558 for a normal character. */
1559 src = src_base;
1560 coding->errors++;
aa72b389 1561 }
b49a1807
KH
1562 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1563 }
1564 else if (bom == utf_16_detect_bom)
1565 {
1566 /* We have already tried to detect BOM and failed in
1567 detect_coding. */
1568 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1569 }
aa72b389 1570
df7492f9
KH
1571 while (1)
1572 {
1573 int c, c1, c2;
1574
1575 src_base = src;
1576 consumed_chars_base = consumed_chars;
1577
1578 if (charbuf + 2 >= charbuf_end)
1579 break;
1580
119852e7
KH
1581 if (byte_after_cr1 >= 0)
1582 c1 = byte_after_cr1, byte_after_cr1 = -1;
1583 else
1584 ONE_MORE_BYTE (c1);
065e3595
KH
1585 if (c1 < 0)
1586 {
1587 *charbuf++ = -c1;
1588 continue;
1589 }
119852e7
KH
1590 if (byte_after_cr2 >= 0)
1591 c2 = byte_after_cr2, byte_after_cr2 = -1;
1592 else
1593 ONE_MORE_BYTE (c2);
065e3595
KH
1594 if (c2 < 0)
1595 {
1596 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1597 *charbuf++ = -c2;
1598 continue;
1599 }
df7492f9 1600 c = (endian == utf_16_big_endian
e19c3639 1601 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1602
df7492f9 1603 if (surrogate)
fd3ae0b9 1604 {
df7492f9 1605 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1606 {
df7492f9
KH
1607 if (endian == utf_16_big_endian)
1608 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1609 else
1610 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1611 *charbuf++ = c1;
1612 *charbuf++ = c2;
1613 coding->errors++;
1614 if (UTF_16_HIGH_SURROGATE_P (c))
1615 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1616 else
df7492f9 1617 *charbuf++ = c;
fd3ae0b9
KH
1618 }
1619 else
df7492f9
KH
1620 {
1621 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1622 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1623 *charbuf++ = 0x10000 + c;
df7492f9 1624 }
fd3ae0b9 1625 }
aa72b389 1626 else
df7492f9
KH
1627 {
1628 if (UTF_16_HIGH_SURROGATE_P (c))
1629 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1630 else
119852e7
KH
1631 {
1632 if (eol_crlf && c == '\r')
1633 {
1634 ONE_MORE_BYTE (byte_after_cr1);
1635 ONE_MORE_BYTE (byte_after_cr2);
1636 }
1637 *charbuf++ = c;
1638 }
8f924df7 1639 }
aa72b389 1640 }
df7492f9
KH
1641
1642 no_more_source:
1643 coding->consumed_char += consumed_chars_base;
1644 coding->consumed = src_base - coding->source;
1645 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1646}
b73bfc1c 1647
df7492f9
KH
1648static int
1649encode_coding_utf_16 (coding)
1650 struct coding_system *coding;
1651{
1652 int multibytep = coding->dst_multibyte;
1653 int *charbuf = coding->charbuf;
1654 int *charbuf_end = charbuf + coding->charbuf_used;
1655 unsigned char *dst = coding->destination + coding->produced;
1656 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1657 int safe_room = 8;
1658 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1659 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1660 int produced_chars = 0;
24a73b0a 1661 Lisp_Object attrs, charset_list;
df7492f9 1662 int c;
4ed46869 1663
24a73b0a 1664 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1665
b49a1807 1666 if (bom != utf_16_without_bom)
df7492f9
KH
1667 {
1668 ASSURE_DESTINATION (safe_room);
1669 if (big_endian)
df7492f9 1670 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1671 else
1672 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1673 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1674 }
1675
1676 while (charbuf < charbuf_end)
1677 {
1678 ASSURE_DESTINATION (safe_room);
1679 c = *charbuf++;
e19c3639
KH
1680 if (c >= MAX_UNICODE_CHAR)
1681 c = coding->default_char;
df7492f9
KH
1682
1683 if (c < 0x10000)
1684 {
1685 if (big_endian)
1686 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1687 else
1688 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1689 }
1690 else
1691 {
1692 int c1, c2;
1693
1694 c -= 0x10000;
1695 c1 = (c >> 10) + 0xD800;
1696 c2 = (c & 0x3FF) + 0xDC00;
1697 if (big_endian)
1698 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1699 else
1700 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1701 }
1702 }
065e3595 1703 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1704 coding->produced = dst - coding->destination;
1705 coding->produced_char += produced_chars;
1706 return 0;
1707}
1708
1709\f
1710/*** 6. Old Emacs' internal format (emacs-mule) ***/
1711
1712/* Emacs' internal format for representation of multiple character
1713 sets is a kind of multi-byte encoding, i.e. characters are
1714 represented by variable-length sequences of one-byte codes.
1715
1716 ASCII characters and control characters (e.g. `tab', `newline') are
1717 represented by one-byte sequences which are their ASCII codes, in
1718 the range 0x00 through 0x7F.
1719
1720 8-bit characters of the range 0x80..0x9F are represented by
1721 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1722 code + 0x20).
1723
1724 8-bit characters of the range 0xA0..0xFF are represented by
1725 one-byte sequences which are their 8-bit code.
1726
1727 The other characters are represented by a sequence of `base
1728 leading-code', optional `extended leading-code', and one or two
1729 `position-code's. The length of the sequence is determined by the
1730 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1731 whereas extended leading-code and position-code take the range 0xA0
1732 through 0xFF. See `charset.h' for more details about leading-code
1733 and position-code.
1734
1735 --- CODE RANGE of Emacs' internal format ---
1736 character set range
1737 ------------- -----
1738 ascii 0x00..0x7F
1739 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1740 eight-bit-graphic 0xA0..0xBF
1741 ELSE 0x81..0x9D + [0xA0..0xFF]+
1742 ---------------------------------------------
1743
1744 As this is the internal character representation, the format is
1745 usually not used externally (i.e. in a file or in a data sent to a
1746 process). But, it is possible to have a text externally in this
1747 format (i.e. by encoding by the coding system `emacs-mule').
1748
1749 In that case, a sequence of one-byte codes has a slightly different
1750 form.
1751
1752 At first, all characters in eight-bit-control are represented by
1753 one-byte sequences which are their 8-bit code.
1754
1755 Next, character composition data are represented by the byte
1756 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1757 where,
1758 METHOD is 0xF0 plus one of composition method (enum
1759 composition_method),
1760
1761 BYTES is 0xA0 plus a byte length of this composition data,
1762
1763 CHARS is 0x20 plus a number of characters composed by this
1764 data,
1765
1766 COMPONENTs are characters of multibye form or composition
1767 rules encoded by two-byte of ASCII codes.
1768
1769 In addition, for backward compatibility, the following formats are
1770 also recognized as composition data on decoding.
1771
1772 0x80 MSEQ ...
1773 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1774
1775 Here,
1776 MSEQ is a multibyte form but in these special format:
1777 ASCII: 0xA0 ASCII_CODE+0x80,
1778 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1779 RULE is a one byte code of the range 0xA0..0xF0 that
1780 represents a composition rule.
1781 */
1782
1783char emacs_mule_bytes[256];
1784
df7492f9 1785int
ff0dacd7 1786emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1787 struct coding_system *coding;
065e3595 1788 const unsigned char *src;
ff0dacd7 1789 int *nbytes, *nchars, *id;
df7492f9 1790{
8f924df7
KH
1791 const unsigned char *src_end = coding->source + coding->src_bytes;
1792 const unsigned char *src_base = src;
df7492f9 1793 int multibytep = coding->src_multibyte;
df7492f9
KH
1794 struct charset *charset;
1795 unsigned code;
1796 int c;
1797 int consumed_chars = 0;
1798
1799 ONE_MORE_BYTE (c);
065e3595 1800 if (c < 0)
df7492f9 1801 {
065e3595
KH
1802 c = -c;
1803 charset = emacs_mule_charset[0];
1804 }
1805 else
1806 {
4d41e8b7
KH
1807 if (c >= 0xA0)
1808 {
b3af4b28 1809 /* Old style component character of a composition. */
4d41e8b7
KH
1810 if (c == 0xA0)
1811 {
1812 ONE_MORE_BYTE (c);
1813 c -= 0x80;
1814 }
1815 else
1816 c -= 0x20;
1817 }
1818
065e3595 1819 switch (emacs_mule_bytes[c])
b73bfc1c 1820 {
065e3595 1821 case 2:
df7492f9
KH
1822 if (! (charset = emacs_mule_charset[c]))
1823 goto invalid_code;
1824 ONE_MORE_BYTE (c);
9ffd559c 1825 if (c < 0xA0)
065e3595 1826 goto invalid_code;
df7492f9 1827 code = c & 0x7F;
065e3595
KH
1828 break;
1829
1830 case 3:
1831 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1832 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1833 {
1834 ONE_MORE_BYTE (c);
9ffd559c 1835 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1836 goto invalid_code;
1837 ONE_MORE_BYTE (c);
9ffd559c 1838 if (c < 0xA0)
065e3595
KH
1839 goto invalid_code;
1840 code = c & 0x7F;
1841 }
1842 else
1843 {
1844 if (! (charset = emacs_mule_charset[c]))
1845 goto invalid_code;
1846 ONE_MORE_BYTE (c);
9ffd559c 1847 if (c < 0xA0)
065e3595
KH
1848 goto invalid_code;
1849 code = (c & 0x7F) << 8;
1850 ONE_MORE_BYTE (c);
9ffd559c 1851 if (c < 0xA0)
065e3595
KH
1852 goto invalid_code;
1853 code |= c & 0x7F;
1854 }
1855 break;
1856
1857 case 4:
1858 ONE_MORE_BYTE (c);
1859 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1860 goto invalid_code;
1861 ONE_MORE_BYTE (c);
9ffd559c 1862 if (c < 0xA0)
065e3595 1863 goto invalid_code;
781d7a48 1864 code = (c & 0x7F) << 8;
df7492f9 1865 ONE_MORE_BYTE (c);
9ffd559c 1866 if (c < 0xA0)
065e3595 1867 goto invalid_code;
df7492f9 1868 code |= c & 0x7F;
065e3595 1869 break;
df7492f9 1870
065e3595
KH
1871 case 1:
1872 code = c;
1873 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1874 ? charset_ascii : charset_eight_bit);
1875 break;
df7492f9 1876
065e3595
KH
1877 default:
1878 abort ();
1879 }
1880 c = DECODE_CHAR (charset, code);
1881 if (c < 0)
1882 goto invalid_code;
df7492f9 1883 }
df7492f9
KH
1884 *nbytes = src - src_base;
1885 *nchars = consumed_chars;
ff0dacd7
KH
1886 if (id)
1887 *id = charset->id;
df7492f9
KH
1888 return c;
1889
1890 no_more_source:
1891 return -2;
1892
1893 invalid_code:
1894 return -1;
1895}
1896
1897
1898/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1899 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1900 else return 0. */
df7492f9
KH
1901
1902static int
ff0dacd7 1903detect_coding_emacs_mule (coding, detect_info)
df7492f9 1904 struct coding_system *coding;
ff0dacd7 1905 struct coding_detection_info *detect_info;
df7492f9 1906{
065e3595 1907 const unsigned char *src = coding->source, *src_base;
8f924df7 1908 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1909 int multibytep = coding->src_multibyte;
1910 int consumed_chars = 0;
1911 int c;
1912 int found = 0;
1913
ff0dacd7 1914 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1915 /* A coding system of this category is always ASCII compatible. */
1916 src += coding->head_ascii;
1917
1918 while (1)
1919 {
065e3595 1920 src_base = src;
df7492f9 1921 ONE_MORE_BYTE (c);
065e3595
KH
1922 if (c < 0)
1923 continue;
df7492f9
KH
1924 if (c == 0x80)
1925 {
1926 /* Perhaps the start of composite character. We simple skip
1927 it because analyzing it is too heavy for detecting. But,
1928 at least, we check that the composite character
1929 constitues of more than 4 bytes. */
8f924df7 1930 const unsigned char *src_base;
df7492f9
KH
1931
1932 repeat:
1933 src_base = src;
1934 do
1935 {
1936 ONE_MORE_BYTE (c);
1937 }
1938 while (c >= 0xA0);
1939
1940 if (src - src_base <= 4)
1941 break;
ff0dacd7 1942 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1943 if (c == 0x80)
1944 goto repeat;
b73bfc1c 1945 }
df7492f9
KH
1946
1947 if (c < 0x80)
b73bfc1c 1948 {
df7492f9
KH
1949 if (c < 0x20
1950 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1951 break;
1952 }
1953 else
1954 {
0e219d54 1955 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1956
0e219d54 1957 while (more_bytes > 0)
df7492f9
KH
1958 {
1959 ONE_MORE_BYTE (c);
0e219d54
KH
1960 if (c < 0xA0)
1961 {
1962 src--; /* Unread the last byte. */
1963 break;
1964 }
1965 more_bytes--;
df7492f9 1966 }
0e219d54 1967 if (more_bytes != 0)
df7492f9 1968 break;
ff0dacd7 1969 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1970 }
1971 }
ff0dacd7 1972 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1973 return 0;
1974
1975 no_more_source:
065e3595 1976 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1977 {
ff0dacd7 1978 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1979 return 0;
1980 }
ff0dacd7
KH
1981 detect_info->found |= found;
1982 return 1;
4ed46869
KH
1983}
1984
b73bfc1c 1985
df7492f9
KH
1986/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1987
1988/* Decode a character represented as a component of composition
1989 sequence of Emacs 20/21 style at SRC. Set C to that character and
1990 update SRC to the head of next character (or an encoded composition
1991 rule). If SRC doesn't points a composition component, set C to -1.
1992 If SRC points an invalid byte sequence, global exit by a return
1993 value 0. */
1994
1995#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
f937a7db 1996 do \
df7492f9
KH
1997 { \
1998 int c; \
1999 int nbytes, nchars; \
2000 \
2001 if (src == src_end) \
2002 break; \
ff0dacd7 2003 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
2004 if (c < 0) \
2005 { \
2006 if (c == -2) \
2007 break; \
2008 goto invalid_code; \
2009 } \
2010 *buf++ = c; \
2011 src += nbytes; \
2012 consumed_chars += nchars; \
2013 } \
f937a7db 2014 while (0)
df7492f9
KH
2015
2016
2017/* Decode a composition rule represented as a component of composition
781d7a48
KH
2018 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
2019 and increment BUF. If SRC points an invalid byte sequence, set C
2020 to -1. */
df7492f9 2021
781d7a48 2022#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
2023 do { \
2024 int c, gref, nref; \
2025 \
781d7a48 2026 if (src >= src_end) \
df7492f9
KH
2027 goto invalid_code; \
2028 ONE_MORE_BYTE_NO_CHECK (c); \
4d41e8b7 2029 c -= 0xA0; \
df7492f9
KH
2030 if (c < 0 || c >= 81) \
2031 goto invalid_code; \
2032 \
2033 gref = c / 9, nref = c % 9; \
2034 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2035 } while (0)
2036
2037
781d7a48
KH
2038/* Decode a composition rule represented as a component of composition
2039 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
2040 and increment BUF. If SRC points an invalid byte sequence, set C
2041 to -1. */
2042
2043#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
2044 do { \
2045 int gref, nref; \
2046 \
2047 if (src + 1>= src_end) \
2048 goto invalid_code; \
2049 ONE_MORE_BYTE_NO_CHECK (gref); \
2050 gref -= 0x20; \
2051 ONE_MORE_BYTE_NO_CHECK (nref); \
2052 nref -= 0x20; \
2053 if (gref < 0 || gref >= 81 \
2054 || nref < 0 || nref >= 81) \
2055 goto invalid_code; \
2056 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2057 } while (0)
2058
2059
df7492f9 2060#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 2061 do { \
df7492f9 2062 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 2063 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
2064 the byte length of this composition information, CHARS is the \
2065 number of characters composed by this composition. */ \
781d7a48
KH
2066 enum composition_method method = c - 0xF2; \
2067 int *charbuf_base = charbuf; \
df7492f9
KH
2068 int consumed_chars_limit; \
2069 int nbytes, nchars; \
2070 \
2071 ONE_MORE_BYTE (c); \
065e3595
KH
2072 if (c < 0) \
2073 goto invalid_code; \
df7492f9
KH
2074 nbytes = c - 0xA0; \
2075 if (nbytes < 3) \
2076 goto invalid_code; \
2077 ONE_MORE_BYTE (c); \
065e3595
KH
2078 if (c < 0) \
2079 goto invalid_code; \
df7492f9 2080 nchars = c - 0xA0; \
69a80ea3 2081 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
2082 consumed_chars_limit = consumed_chars_base + nbytes; \
2083 if (method != COMPOSITION_RELATIVE) \
aa72b389 2084 { \
df7492f9
KH
2085 int i = 0; \
2086 while (consumed_chars < consumed_chars_limit) \
aa72b389 2087 { \
df7492f9 2088 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 2089 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
2090 else \
2091 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 2092 i++; \
aa72b389 2093 } \
df7492f9
KH
2094 if (consumed_chars < consumed_chars_limit) \
2095 goto invalid_code; \
781d7a48 2096 charbuf_base[0] -= i; \
aa72b389
KH
2097 } \
2098 } while (0)
93dec019 2099
aa72b389 2100
d959f512
KH
2101#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2102 do { \
2103 /* Emacs 20 style format for relative composition. */ \
2104 /* Store multibyte form of characters to be composed. */ \
2105 enum composition_method method = COMPOSITION_RELATIVE; \
2106 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2107 int *buf = components; \
2108 int i, j; \
2109 \
2110 src = src_base; \
2111 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2112 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2113 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2114 if (i < 2) \
2115 goto invalid_code; \
2116 ADD_COMPOSITION_DATA (charbuf, i, method); \
2117 for (j = 0; j < i; j++) \
2118 *charbuf++ = components[j]; \
df7492f9
KH
2119 } while (0)
2120
2121
2122#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2123 do { \
2124 /* Emacs 20 style format for rule-base composition. */ \
2125 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2126 enum composition_method method = COMPOSITION_WITH_RULE; \
4d41e8b7 2127 int *charbuf_base = charbuf; \
df7492f9
KH
2128 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2129 int *buf = components; \
2130 int i, j; \
4d41e8b7 2131 \
df7492f9 2132 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
4d41e8b7 2133 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
df7492f9 2134 { \
4d41e8b7
KH
2135 if (*src < 0xA0) \
2136 break; \
781d7a48 2137 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2138 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2139 } \
4d41e8b7 2140 if (i <= 1 || (buf - components) % 2 == 0) \
df7492f9 2141 goto invalid_code; \
4d41e8b7 2142 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
df7492f9 2143 goto no_more_source; \
4d41e8b7
KH
2144 ADD_COMPOSITION_DATA (charbuf, i, method); \
2145 i = i * 2 - 1; \
df7492f9
KH
2146 for (j = 0; j < i; j++) \
2147 *charbuf++ = components[j]; \
4d41e8b7 2148 charbuf_base[0] -= i; \
df7492f9
KH
2149 for (j = 0; j < i; j += 2) \
2150 *charbuf++ = components[j]; \
2151 } while (0)
2152
aa72b389
KH
2153
2154static void
df7492f9 2155decode_coding_emacs_mule (coding)
aa72b389 2156 struct coding_system *coding;
aa72b389 2157{
8f924df7
KH
2158 const unsigned char *src = coding->source + coding->consumed;
2159 const unsigned char *src_end = coding->source + coding->src_bytes;
2160 const unsigned char *src_base;
69a80ea3
KH
2161 int *charbuf = coding->charbuf + coding->charbuf_used;
2162 int *charbuf_end
2163 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2164 int consumed_chars = 0, consumed_chars_base;
df7492f9 2165 int multibytep = coding->src_multibyte;
24a73b0a 2166 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2167 int char_offset = coding->produced_char;
2168 int last_offset = char_offset;
2169 int last_id = charset_ascii;
119852e7
KH
2170 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2171 int byte_after_cr = -1;
aa72b389 2172
24a73b0a 2173 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2174
aa72b389
KH
2175 while (1)
2176 {
df7492f9
KH
2177 int c;
2178
aa72b389 2179 src_base = src;
df7492f9
KH
2180 consumed_chars_base = consumed_chars;
2181
2182 if (charbuf >= charbuf_end)
2183 break;
aa72b389 2184
119852e7
KH
2185 if (byte_after_cr >= 0)
2186 c = byte_after_cr, byte_after_cr = -1;
2187 else
2188 ONE_MORE_BYTE (c);
065e3595
KH
2189 if (c < 0)
2190 {
2191 *charbuf++ = -c;
2192 char_offset++;
2193 }
2194 else if (c < 0x80)
aa72b389 2195 {
119852e7
KH
2196 if (eol_crlf && c == '\r')
2197 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
2198 *charbuf++ = c;
2199 char_offset++;
aa72b389 2200 }
df7492f9
KH
2201 else if (c == 0x80)
2202 {
df7492f9 2203 ONE_MORE_BYTE (c);
065e3595
KH
2204 if (c < 0)
2205 goto invalid_code;
781d7a48
KH
2206 if (c - 0xF2 >= COMPOSITION_RELATIVE
2207 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2208 DECODE_EMACS_MULE_21_COMPOSITION (c);
2209 else if (c < 0xC0)
2210 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2211 else if (c == 0xFF)
2212 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2213 else
2214 goto invalid_code;
2215 }
2216 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2217 {
2218 int nbytes, nchars;
ff0dacd7
KH
2219 int id;
2220
781d7a48
KH
2221 src = src_base;
2222 consumed_chars = consumed_chars_base;
ff0dacd7 2223 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2224 if (c < 0)
2225 {
2226 if (c == -2)
2227 break;
2228 goto invalid_code;
2229 }
ff0dacd7
KH
2230 if (last_id != id)
2231 {
2232 if (last_id != charset_ascii)
69a80ea3 2233 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2234 last_id = id;
2235 last_offset = char_offset;
2236 }
df7492f9 2237 *charbuf++ = c;
781d7a48
KH
2238 src += nbytes;
2239 consumed_chars += nchars;
df7492f9
KH
2240 char_offset++;
2241 }
4d41e8b7
KH
2242 else
2243 goto invalid_code;
df7492f9
KH
2244 continue;
2245
2246 invalid_code:
2247 src = src_base;
2248 consumed_chars = consumed_chars_base;
2249 ONE_MORE_BYTE (c);
2250 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2251 char_offset++;
df7492f9
KH
2252 coding->errors++;
2253 }
2254
2255 no_more_source:
ff0dacd7 2256 if (last_id != charset_ascii)
69a80ea3 2257 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2258 coding->consumed_char += consumed_chars_base;
2259 coding->consumed = src_base - coding->source;
2260 coding->charbuf_used = charbuf - coding->charbuf;
2261}
2262
2263
2264#define EMACS_MULE_LEADING_CODES(id, codes) \
2265 do { \
2266 if (id < 0xA0) \
2267 codes[0] = id, codes[1] = 0; \
2268 else if (id < 0xE0) \
2269 codes[0] = 0x9A, codes[1] = id; \
2270 else if (id < 0xF0) \
2271 codes[0] = 0x9B, codes[1] = id; \
2272 else if (id < 0xF5) \
2273 codes[0] = 0x9C, codes[1] = id; \
2274 else \
2275 codes[0] = 0x9D, codes[1] = id; \
2276 } while (0);
2277
aa72b389 2278
df7492f9
KH
2279static int
2280encode_coding_emacs_mule (coding)
2281 struct coding_system *coding;
2282{
2283 int multibytep = coding->dst_multibyte;
2284 int *charbuf = coding->charbuf;
2285 int *charbuf_end = charbuf + coding->charbuf_used;
2286 unsigned char *dst = coding->destination + coding->produced;
2287 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2288 int safe_room = 8;
df7492f9 2289 int produced_chars = 0;
24a73b0a 2290 Lisp_Object attrs, charset_list;
df7492f9 2291 int c;
ff0dacd7 2292 int preferred_charset_id = -1;
df7492f9 2293
24a73b0a 2294 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2295 if (! EQ (charset_list, Vemacs_mule_charset_list))
2296 {
2297 CODING_ATTR_CHARSET_LIST (attrs)
2298 = charset_list = Vemacs_mule_charset_list;
2299 }
df7492f9
KH
2300
2301 while (charbuf < charbuf_end)
2302 {
2303 ASSURE_DESTINATION (safe_room);
2304 c = *charbuf++;
ff0dacd7
KH
2305
2306 if (c < 0)
2307 {
2308 /* Handle an annotation. */
2309 switch (*charbuf)
2310 {
2311 case CODING_ANNOTATE_COMPOSITION_MASK:
2312 /* Not yet implemented. */
2313 break;
2314 case CODING_ANNOTATE_CHARSET_MASK:
2315 preferred_charset_id = charbuf[3];
2316 if (preferred_charset_id >= 0
2317 && NILP (Fmemq (make_number (preferred_charset_id),
2318 charset_list)))
2319 preferred_charset_id = -1;
2320 break;
2321 default:
2322 abort ();
2323 }
2324 charbuf += -c - 1;
2325 continue;
2326 }
2327
df7492f9
KH
2328 if (ASCII_CHAR_P (c))
2329 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2330 else if (CHAR_BYTE8_P (c))
2331 {
2332 c = CHAR_TO_BYTE8 (c);
2333 EMIT_ONE_BYTE (c);
2334 }
df7492f9 2335 else
aa72b389 2336 {
df7492f9
KH
2337 struct charset *charset;
2338 unsigned code;
2339 int dimension;
2340 int emacs_mule_id;
2341 unsigned char leading_codes[2];
2342
ff0dacd7
KH
2343 if (preferred_charset_id >= 0)
2344 {
2345 charset = CHARSET_FROM_ID (preferred_charset_id);
2346 if (! CHAR_CHARSET_P (c, charset))
2347 charset = char_charset (c, charset_list, NULL);
2348 }
2349 else
2350 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2351 if (! charset)
2352 {
2353 c = coding->default_char;
2354 if (ASCII_CHAR_P (c))
2355 {
2356 EMIT_ONE_ASCII_BYTE (c);
2357 continue;
2358 }
2359 charset = char_charset (c, charset_list, &code);
2360 }
2361 dimension = CHARSET_DIMENSION (charset);
2362 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2363 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2364 EMIT_ONE_BYTE (leading_codes[0]);
2365 if (leading_codes[1])
2366 EMIT_ONE_BYTE (leading_codes[1]);
2367 if (dimension == 1)
1fa663f9 2368 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2369 else
df7492f9 2370 {
1fa663f9 2371 code |= 0x8080;
df7492f9
KH
2372 EMIT_ONE_BYTE (code >> 8);
2373 EMIT_ONE_BYTE (code & 0xFF);
2374 }
aa72b389 2375 }
aa72b389 2376 }
065e3595 2377 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2378 coding->produced_char += produced_chars;
2379 coding->produced = dst - coding->destination;
2380 return 0;
aa72b389 2381}
b73bfc1c 2382
4ed46869 2383\f
df7492f9 2384/*** 7. ISO2022 handlers ***/
4ed46869
KH
2385
2386/* The following note describes the coding system ISO2022 briefly.
39787efd 2387 Since the intention of this note is to help understand the
5a936b46 2388 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2389 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2390 original document of ISO2022. This is equivalent to the standard
cfb43547 2391 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2392
2393 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2394 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2395 is encoded using bytes less than 128. This may make the encoded
2396 text a little bit longer, but the text passes more easily through
cfb43547 2397 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2398 Significant Bit).
b73bfc1c 2399
cfb43547
DL
2400 There are two kinds of character sets: control character sets and
2401 graphic character sets. The former contain control characters such
4ed46869 2402 as `newline' and `escape' to provide control functions (control
39787efd 2403 functions are also provided by escape sequences). The latter
cfb43547 2404 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2405 two control character sets and many graphic character sets.
2406
2407 Graphic character sets are classified into one of the following
39787efd
KH
2408 four classes, according to the number of bytes (DIMENSION) and
2409 number of characters in one dimension (CHARS) of the set:
2410 - DIMENSION1_CHARS94
2411 - DIMENSION1_CHARS96
2412 - DIMENSION2_CHARS94
2413 - DIMENSION2_CHARS96
2414
2415 In addition, each character set is assigned an identification tag,
cfb43547 2416 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2417 hereafter). The <F> of each character set is decided by ECMA(*)
2418 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2419 (0x30..0x3F are for private use only).
4ed46869
KH
2420
2421 Note (*): ECMA = European Computer Manufacturers Association
2422
cfb43547 2423 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2424 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2425 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2426 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2427 o DIMENSION2_CHARS96 -- none for the moment
2428
39787efd 2429 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2430 C0 [0x00..0x1F] -- control character plane 0
2431 GL [0x20..0x7F] -- graphic character plane 0
2432 C1 [0x80..0x9F] -- control character plane 1
2433 GR [0xA0..0xFF] -- graphic character plane 1
2434
2435 A control character set is directly designated and invoked to C0 or
39787efd
KH
2436 C1 by an escape sequence. The most common case is that:
2437 - ISO646's control character set is designated/invoked to C0, and
2438 - ISO6429's control character set is designated/invoked to C1,
2439 and usually these designations/invocations are omitted in encoded
2440 text. In a 7-bit environment, only C0 can be used, and a control
2441 character for C1 is encoded by an appropriate escape sequence to
2442 fit into the environment. All control characters for C1 are
2443 defined to have corresponding escape sequences.
4ed46869
KH
2444
2445 A graphic character set is at first designated to one of four
2446 graphic registers (G0 through G3), then these graphic registers are
2447 invoked to GL or GR. These designations and invocations can be
2448 done independently. The most common case is that G0 is invoked to
39787efd
KH
2449 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2450 these invocations and designations are omitted in encoded text.
2451 In a 7-bit environment, only GL can be used.
4ed46869 2452
39787efd
KH
2453 When a graphic character set of CHARS94 is invoked to GL, codes
2454 0x20 and 0x7F of the GL area work as control characters SPACE and
2455 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2456 be used.
4ed46869
KH
2457
2458 There are two ways of invocation: locking-shift and single-shift.
2459 With locking-shift, the invocation lasts until the next different
39787efd
KH
2460 invocation, whereas with single-shift, the invocation affects the
2461 following character only and doesn't affect the locking-shift
2462 state. Invocations are done by the following control characters or
2463 escape sequences:
4ed46869
KH
2464
2465 ----------------------------------------------------------------------
39787efd 2466 abbrev function cntrl escape seq description
4ed46869 2467 ----------------------------------------------------------------------
39787efd
KH
2468 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2469 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2470 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2471 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2472 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2473 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2474 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2475 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2476 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2477 ----------------------------------------------------------------------
39787efd
KH
2478 (*) These are not used by any known coding system.
2479
2480 Control characters for these functions are defined by macros
2481 ISO_CODE_XXX in `coding.h'.
4ed46869 2482
39787efd 2483 Designations are done by the following escape sequences:
4ed46869
KH
2484 ----------------------------------------------------------------------
2485 escape sequence description
2486 ----------------------------------------------------------------------
2487 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2488 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2489 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2490 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2491 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2492 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2493 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2494 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2495 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2496 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2497 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2498 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2499 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2500 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2501 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2502 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2503 ----------------------------------------------------------------------
2504
2505 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2506 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2507
2508 Note (*): Although these designations are not allowed in ISO2022,
2509 Emacs accepts them on decoding, and produces them on encoding
39787efd 2510 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2511 7-bit environment, non-locking-shift, and non-single-shift.
2512
2513 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2514 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2515
cfb43547 2516 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2517 same multilingual text in ISO2022. Actually, there exist many
2518 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2519 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2520 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2521 localized platforms), and all of these are variants of ISO2022.
2522
2523 In addition to the above, Emacs handles two more kinds of escape
2524 sequences: ISO6429's direction specification and Emacs' private
2525 sequence for specifying character composition.
2526
39787efd 2527 ISO6429's direction specification takes the following form:
4ed46869
KH
2528 o CSI ']' -- end of the current direction
2529 o CSI '0' ']' -- end of the current direction
2530 o CSI '1' ']' -- start of left-to-right text
2531 o CSI '2' ']' -- start of right-to-left text
2532 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2533 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2534
2535 Character composition specification takes the following form:
ec6d2bb8
KH
2536 o ESC '0' -- start relative composition
2537 o ESC '1' -- end composition
2538 o ESC '2' -- start rule-base composition (*)
2539 o ESC '3' -- start relative composition with alternate chars (**)
2540 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2541 Since these are not standard escape sequences of any ISO standard,
cfb43547 2542 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2543
5a936b46
DL
2544 (*) This form is used only in Emacs 20.7 and older versions,
2545 but newer versions can safely decode it.
cfb43547 2546 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2547 and older versions can't decode it.
ec6d2bb8 2548
cfb43547 2549 Here's a list of example usages of these composition escape
b73bfc1c 2550 sequences (categorized by `enum composition_method').
ec6d2bb8 2551
b73bfc1c 2552 COMPOSITION_RELATIVE:
ec6d2bb8 2553 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2554 COMPOSITION_WITH_RULE:
ec6d2bb8 2555 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2556 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2557 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2558 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2559 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2560
2561enum iso_code_class_type iso_code_class[256];
2562
df7492f9
KH
2563#define SAFE_CHARSET_P(coding, id) \
2564 ((id) <= (coding)->max_charset_id \
2565 && (coding)->safe_charsets[id] >= 0)
2566
2567
2568#define SHIFT_OUT_OK(category) \
2569 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2570
2571static void
f0064e1f
DL
2572setup_iso_safe_charsets (attrs)
2573 Lisp_Object attrs;
df7492f9
KH
2574{
2575 Lisp_Object charset_list, safe_charsets;
2576 Lisp_Object request;
2577 Lisp_Object reg_usage;
2578 Lisp_Object tail;
2579 int reg94, reg96;
2580 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2581 int max_charset_id;
2582
2583 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2584 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2585 && ! EQ (charset_list, Viso_2022_charset_list))
2586 {
2587 CODING_ATTR_CHARSET_LIST (attrs)
2588 = charset_list = Viso_2022_charset_list;
2589 ASET (attrs, coding_attr_safe_charsets, Qnil);
2590 }
2591
2592 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2593 return;
2594
2595 max_charset_id = 0;
2596 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2597 {
2598 int id = XINT (XCAR (tail));
2599 if (max_charset_id < id)
2600 max_charset_id = id;
2601 }
d46c5b12 2602
df7492f9
KH
2603 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2604 make_number (255));
2605 request = AREF (attrs, coding_attr_iso_request);
2606 reg_usage = AREF (attrs, coding_attr_iso_usage);
2607 reg94 = XINT (XCAR (reg_usage));
2608 reg96 = XINT (XCDR (reg_usage));
2609
2610 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2611 {
2612 Lisp_Object id;
2613 Lisp_Object reg;
2614 struct charset *charset;
2615
2616 id = XCAR (tail);
2617 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2618 reg = Fcdr (Fassq (id, request));
df7492f9 2619 if (! NILP (reg))
8f924df7 2620 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2621 else if (charset->iso_chars_96)
2622 {
2623 if (reg96 < 4)
8f924df7 2624 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2625 }
2626 else
2627 {
2628 if (reg94 < 4)
8f924df7 2629 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2630 }
2631 }
2632 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2633}
d46c5b12 2634
b6871cc7 2635
4ed46869 2636/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2637 Check if a text is encoded in one of ISO-2022 based codig systems.
2638 If it is, return 1, else return 0. */
4ed46869 2639
0a28aafb 2640static int
ff0dacd7 2641detect_coding_iso_2022 (coding, detect_info)
df7492f9 2642 struct coding_system *coding;
ff0dacd7 2643 struct coding_detection_info *detect_info;
4ed46869 2644{
8f924df7
KH
2645 const unsigned char *src = coding->source, *src_base = src;
2646 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2647 int multibytep = coding->src_multibyte;
ff0dacd7 2648 int single_shifting = 0;
df7492f9
KH
2649 int id;
2650 int c, c1;
2651 int consumed_chars = 0;
2652 int i;
ff0dacd7
KH
2653 int rejected = 0;
2654 int found = 0;
2655
2656 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2657
2658 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2659 {
2660 struct coding_system *this = &(coding_categories[i]);
2661 Lisp_Object attrs, val;
2662
2663 attrs = CODING_ID_ATTRS (this->id);
2664 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2665 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2666 setup_iso_safe_charsets (attrs);
2667 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2668 this->max_charset_id = SCHARS (val) - 1;
2669 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2670 }
2671
2672 /* A coding system of this category is always ASCII compatible. */
2673 src += coding->head_ascii;
3f003981 2674
ff0dacd7 2675 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2676 {
065e3595 2677 src_base = src;
df7492f9 2678 ONE_MORE_BYTE (c);
4ed46869
KH
2679 switch (c)
2680 {
2681 case ISO_CODE_ESC:
74383408
KH
2682 if (inhibit_iso_escape_detection)
2683 break;
f46869e4 2684 single_shifting = 0;
df7492f9 2685 ONE_MORE_BYTE (c);
d46c5b12 2686 if (c >= '(' && c <= '/')
4ed46869 2687 {
bf9cdd4e 2688 /* Designation sequence for a charset of dimension 1. */
df7492f9 2689 ONE_MORE_BYTE (c1);
d46c5b12 2690 if (c1 < ' ' || c1 >= 0x80
df7492f9 2691 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2692 /* Invalid designation sequence. Just ignore. */
2693 break;
bf9cdd4e
KH
2694 }
2695 else if (c == '$')
2696 {
2697 /* Designation sequence for a charset of dimension 2. */
df7492f9 2698 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2699 if (c >= '@' && c <= 'B')
2700 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2701 id = iso_charset_table[1][0][c];
bf9cdd4e 2702 else if (c >= '(' && c <= '/')
bcf26d6a 2703 {
df7492f9 2704 ONE_MORE_BYTE (c1);
d46c5b12 2705 if (c1 < ' ' || c1 >= 0x80
df7492f9 2706 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2707 /* Invalid designation sequence. Just ignore. */
2708 break;
bcf26d6a 2709 }
bf9cdd4e 2710 else
ff0dacd7 2711 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2712 break;
2713 }
ae9ff118 2714 else if (c == 'N' || c == 'O')
d46c5b12 2715 {
ae9ff118 2716 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2717 single_shifting = 1;
2718 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2719 break;
4ed46869 2720 }
ec6d2bb8
KH
2721 else if (c >= '0' && c <= '4')
2722 {
2723 /* ESC <Fp> for start/end composition. */
ff0dacd7 2724 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2725 break;
2726 }
bf9cdd4e 2727 else
df7492f9 2728 {
ff0dacd7 2729 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2730 break;
2731 }
d46c5b12
KH
2732
2733 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2734 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2735 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2736 id))
ff0dacd7 2737 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2738 else
ff0dacd7 2739 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2740 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2741 id))
ff0dacd7 2742 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2743 else
ff0dacd7 2744 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2745 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2746 id))
ff0dacd7 2747 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2748 else
ff0dacd7 2749 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2750 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2751 id))
ff0dacd7 2752 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2753 else
ff0dacd7 2754 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2755 break;
2756
4ed46869 2757 case ISO_CODE_SO:
d46c5b12 2758 case ISO_CODE_SI:
ff0dacd7 2759 /* Locking shift out/in. */
74383408
KH
2760 if (inhibit_iso_escape_detection)
2761 break;
f46869e4 2762 single_shifting = 0;
ff0dacd7 2763 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
2764 break;
2765
4ed46869 2766 case ISO_CODE_CSI:
ff0dacd7 2767 /* Control sequence introducer. */
f46869e4 2768 single_shifting = 0;
ff0dacd7
KH
2769 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2770 found |= CATEGORY_MASK_ISO_8_ELSE;
2771 goto check_extra_latin;
2772
4ed46869
KH
2773 case ISO_CODE_SS2:
2774 case ISO_CODE_SS3:
ff0dacd7
KH
2775 /* Single shift. */
2776 if (inhibit_iso_escape_detection)
2777 break;
75e2a253 2778 single_shifting = 0;
ff0dacd7
KH
2779 rejected |= CATEGORY_MASK_ISO_7BIT;
2780 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2781 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2782 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2783 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2784 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2785 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2786 if (single_shifting)
2787 break;
ff0dacd7 2788 goto check_extra_latin;
4ed46869
KH
2789
2790 default:
065e3595
KH
2791 if (c < 0)
2792 continue;
4ed46869 2793 if (c < 0x80)
f46869e4
KH
2794 {
2795 single_shifting = 0;
2796 break;
2797 }
ff0dacd7 2798 if (c >= 0xA0)
c4825358 2799 {
ff0dacd7
KH
2800 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2801 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2802 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2803 0xA0..0FF. If the byte length is even, we include
2804 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2805 only when we are not single shifting. */
2806 if (! single_shifting
2807 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2808 {
e17de821 2809 int i = 1;
b73bfc1c
KH
2810 while (src < src_end)
2811 {
df7492f9 2812 ONE_MORE_BYTE (c);
b73bfc1c
KH
2813 if (c < 0xA0)
2814 break;
2815 i++;
2816 }
2817
2818 if (i & 1 && src < src_end)
ff0dacd7 2819 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2820 else
ff0dacd7 2821 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2822 }
ff0dacd7 2823 break;
4ed46869 2824 }
ff0dacd7
KH
2825 check_extra_latin:
2826 single_shifting = 0;
2827 if (! VECTORP (Vlatin_extra_code_table)
2828 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2829 {
2830 rejected = CATEGORY_MASK_ISO;
2831 break;
2832 }
2833 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2834 & CODING_ISO_FLAG_LATIN_EXTRA)
2835 found |= CATEGORY_MASK_ISO_8_1;
2836 else
2837 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2838 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2839 }
2840 }
ff0dacd7
KH
2841 detect_info->rejected |= CATEGORY_MASK_ISO;
2842 return 0;
4ed46869 2843
df7492f9 2844 no_more_source:
ff0dacd7
KH
2845 detect_info->rejected |= rejected;
2846 detect_info->found |= (found & ~rejected);
df7492f9 2847 return 1;
4ed46869 2848}
ec6d2bb8 2849
4ed46869 2850
134b9549
KH
2851/* Set designation state into CODING. Set CHARS_96 to -1 if the
2852 escape sequence should be kept. */
df7492f9
KH
2853#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2854 do { \
2855 int id, prev; \
2856 \
2857 if (final < '0' || final >= 128 \
2858 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2859 || !SAFE_CHARSET_P (coding, id)) \
2860 { \
2861 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2862 chars_96 = -1; \
2863 break; \
df7492f9
KH
2864 } \
2865 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2866 if (id == charset_jisx0201_roman) \
2867 { \
2868 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2869 id = charset_ascii; \
2870 } \
2871 else if (id == charset_jisx0208_1978) \
2872 { \
2873 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2874 id = charset_jisx0208; \
2875 } \
df7492f9
KH
2876 CODING_ISO_DESIGNATION (coding, reg) = id; \
2877 /* If there was an invalid designation to REG previously, and this \
2878 designation is ASCII to REG, we should keep this designation \
2879 sequence. */ \
2880 if (prev == -2 && id == charset_ascii) \
134b9549 2881 chars_96 = -1; \
4ed46869
KH
2882 } while (0)
2883
d46c5b12 2884
df7492f9
KH
2885#define MAYBE_FINISH_COMPOSITION() \
2886 do { \
2887 int i; \
2888 if (composition_state == COMPOSING_NO) \
2889 break; \
2890 /* It is assured that we have enough room for producing \
2891 characters stored in the table `components'. */ \
2892 if (charbuf + component_idx > charbuf_end) \
2893 goto no_more_source; \
2894 composition_state = COMPOSING_NO; \
2895 if (method == COMPOSITION_RELATIVE \
2896 || method == COMPOSITION_WITH_ALTCHARS) \
2897 { \
2898 for (i = 0; i < component_idx; i++) \
2899 *charbuf++ = components[i]; \
2900 char_offset += component_idx; \
2901 } \
2902 else \
2903 { \
2904 for (i = 0; i < component_idx; i += 2) \
2905 *charbuf++ = components[i]; \
2906 char_offset += (component_idx / 2) + 1; \
2907 } \
2908 } while (0)
2909
d46c5b12 2910
aa72b389
KH
2911/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2912 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2913 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2914 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2915 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2916 */
ec6d2bb8 2917
df7492f9
KH
2918#define DECODE_COMPOSITION_START(c1) \
2919 do { \
2920 if (c1 == '0' \
781d7a48 2921 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2922 { \
2923 component_len = component_idx; \
2924 composition_state = COMPOSING_CHAR; \
2925 } \
2926 else \
2927 { \
8f924df7 2928 const unsigned char *p; \
df7492f9
KH
2929 \
2930 MAYBE_FINISH_COMPOSITION (); \
2931 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2932 goto no_more_source; \
2933 for (p = src; p < src_end - 1; p++) \
2934 if (*p == ISO_CODE_ESC && p[1] == '1') \
2935 break; \
2936 if (p == src_end - 1) \
2937 { \
9286b333
KH
2938 /* The current composition doesn't end in the current \
2939 source. */ \
2940 record_conversion_result \
2941 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
df7492f9
KH
2942 goto no_more_source; \
2943 } \
2944 \
2945 /* This is surely the start of a composition. */ \
2946 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2947 : c1 == '2' ? COMPOSITION_WITH_RULE \
2948 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2949 : COMPOSITION_WITH_RULE_ALTCHARS); \
2950 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2951 : COMPOSING_COMPONENT_CHAR); \
2952 component_idx = component_len = 0; \
2953 } \
ec6d2bb8
KH
2954 } while (0)
2955
ec6d2bb8 2956
df7492f9
KH
2957/* Handle compositoin end sequence ESC 1. */
2958
2959#define DECODE_COMPOSITION_END() \
ec6d2bb8 2960 do { \
df7492f9
KH
2961 int nchars = (component_len > 0 ? component_idx - component_len \
2962 : method == COMPOSITION_RELATIVE ? component_idx \
2963 : (component_idx + 1) / 2); \
2964 int i; \
2965 int *saved_charbuf = charbuf; \
2966 \
69a80ea3 2967 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2968 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2969 { \
df7492f9
KH
2970 if (component_len == 0) \
2971 for (i = 0; i < component_idx; i++) \
2972 *charbuf++ = components[i]; \
2973 else \
2974 for (i = 0; i < component_len; i++) \
2975 *charbuf++ = components[i]; \
2976 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2977 } \
df7492f9
KH
2978 if (method == COMPOSITION_WITH_RULE) \
2979 for (i = 0; i < component_idx; i += 2, char_offset++) \
2980 *charbuf++ = components[i]; \
ec6d2bb8 2981 else \
df7492f9
KH
2982 for (i = component_len; i < component_idx; i++, char_offset++) \
2983 *charbuf++ = components[i]; \
2984 coding->annotated = 1; \
2985 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2986 } while (0)
2987
df7492f9 2988
ec6d2bb8
KH
2989/* Decode a composition rule from the byte C1 (and maybe one more byte
2990 from SRC) and store one encoded composition rule in
2991 coding->cmp_data. */
2992
2993#define DECODE_COMPOSITION_RULE(c1) \
2994 do { \
ec6d2bb8
KH
2995 (c1) -= 32; \
2996 if (c1 < 81) /* old format (before ver.21) */ \
2997 { \
2998 int gref = (c1) / 9; \
2999 int nref = (c1) % 9; \
3000 if (gref == 4) gref = 10; \
3001 if (nref == 4) nref = 10; \
df7492f9 3002 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 3003 } \
b73bfc1c 3004 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
3005 { \
3006 ONE_MORE_BYTE (c2); \
df7492f9 3007 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 3008 } \
df7492f9
KH
3009 else \
3010 c1 = 0; \
ec6d2bb8 3011 } while (0)
88993dfd 3012
d46c5b12 3013
4ed46869
KH
3014/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3015
b73bfc1c 3016static void
df7492f9 3017decode_coding_iso_2022 (coding)
4ed46869 3018 struct coding_system *coding;
4ed46869 3019{
8f924df7
KH
3020 const unsigned char *src = coding->source + coding->consumed;
3021 const unsigned char *src_end = coding->source + coding->src_bytes;
3022 const unsigned char *src_base;
69a80ea3 3023 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 3024 int *charbuf_end
69a80ea3 3025 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 3026 int consumed_chars = 0, consumed_chars_base;
df7492f9 3027 int multibytep = coding->src_multibyte;
4ed46869 3028 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3029 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3030 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3031 int charset_id_2, charset_id_3;
df7492f9
KH
3032 struct charset *charset;
3033 int c;
3034 /* For handling composition sequence. */
3035#define COMPOSING_NO 0
3036#define COMPOSING_CHAR 1
3037#define COMPOSING_RULE 2
3038#define COMPOSING_COMPONENT_CHAR 3
3039#define COMPOSING_COMPONENT_RULE 4
3040
3041 int composition_state = COMPOSING_NO;
3042 enum composition_method method;
3043 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3044 int component_idx;
3045 int component_len;
24a73b0a 3046 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3047 int char_offset = coding->produced_char;
3048 int last_offset = char_offset;
3049 int last_id = charset_ascii;
119852e7
KH
3050 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3051 int byte_after_cr = -1;
df7492f9 3052
24a73b0a 3053 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3054 setup_iso_safe_charsets (attrs);
287c57d7
KH
3055 /* Charset list may have been changed. */
3056 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3057 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
b73bfc1c
KH
3058
3059 while (1)
4ed46869 3060 {
463f5630 3061 int c1, c2;
b73bfc1c
KH
3062
3063 src_base = src;
df7492f9
KH
3064 consumed_chars_base = consumed_chars;
3065
3066 if (charbuf >= charbuf_end)
3067 break;
3068
119852e7
KH
3069 if (byte_after_cr >= 0)
3070 c1 = byte_after_cr, byte_after_cr = -1;
3071 else
3072 ONE_MORE_BYTE (c1);
065e3595
KH
3073 if (c1 < 0)
3074 goto invalid_code;
4ed46869 3075
98725083 3076 /* We produce at most one character. */
4ed46869
KH
3077 switch (iso_code_class [c1])
3078 {
3079 case ISO_0x20_or_0x7F:
df7492f9 3080 if (composition_state != COMPOSING_NO)
ec6d2bb8 3081 {
df7492f9
KH
3082 if (composition_state == COMPOSING_RULE
3083 || composition_state == COMPOSING_COMPONENT_RULE)
3084 {
3085 DECODE_COMPOSITION_RULE (c1);
3086 components[component_idx++] = c1;
3087 composition_state--;
3088 continue;
3089 }
4ed46869 3090 }
df7492f9
KH
3091 if (charset_id_0 < 0
3092 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3093 /* This is SPACE or DEL. */
3094 charset = CHARSET_FROM_ID (charset_ascii);
3095 else
3096 charset = CHARSET_FROM_ID (charset_id_0);
3097 break;
4ed46869
KH
3098
3099 case ISO_graphic_plane_0:
781d7a48 3100 if (composition_state != COMPOSING_NO)
b73bfc1c 3101 {
781d7a48
KH
3102 if (composition_state == COMPOSING_RULE
3103 || composition_state == COMPOSING_COMPONENT_RULE)
3104 {
3105 DECODE_COMPOSITION_RULE (c1);
3106 components[component_idx++] = c1;
3107 composition_state--;
3108 continue;
3109 }
b73bfc1c 3110 }
134b9549
KH
3111 if (charset_id_0 < 0)
3112 charset = CHARSET_FROM_ID (charset_ascii);
3113 else
3114 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3115 break;
3116
3117 case ISO_0xA0_or_0xFF:
df7492f9
KH
3118 if (charset_id_1 < 0
3119 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3120 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3121 goto invalid_code;
4ed46869
KH
3122 /* This is a graphic character, we fall down ... */
3123
3124 case ISO_graphic_plane_1:
df7492f9
KH
3125 if (charset_id_1 < 0)
3126 goto invalid_code;
3127 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3128 break;
3129
df7492f9 3130 case ISO_control_0:
119852e7
KH
3131 if (eol_crlf && c1 == '\r')
3132 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3133 MAYBE_FINISH_COMPOSITION ();
3134 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3135 break;
3136
df7492f9
KH
3137 case ISO_control_1:
3138 MAYBE_FINISH_COMPOSITION ();
3139 goto invalid_code;
3140
4ed46869 3141 case ISO_shift_out:
df7492f9
KH
3142 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3143 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3144 goto invalid_code;
3145 CODING_ISO_INVOCATION (coding, 0) = 1;
3146 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3147 continue;
4ed46869
KH
3148
3149 case ISO_shift_in:
df7492f9
KH
3150 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3151 goto invalid_code;
3152 CODING_ISO_INVOCATION (coding, 0) = 0;
3153 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3154 continue;
4ed46869
KH
3155
3156 case ISO_single_shift_2_7:
3157 case ISO_single_shift_2:
df7492f9
KH
3158 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3159 goto invalid_code;
4ed46869
KH
3160 /* SS2 is handled as an escape sequence of ESC 'N' */
3161 c1 = 'N';
3162 goto label_escape_sequence;
3163
3164 case ISO_single_shift_3:
df7492f9
KH
3165 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3166 goto invalid_code;
4ed46869
KH
3167 /* SS2 is handled as an escape sequence of ESC 'O' */
3168 c1 = 'O';
3169 goto label_escape_sequence;
3170
3171 case ISO_control_sequence_introducer:
3172 /* CSI is handled as an escape sequence of ESC '[' ... */
3173 c1 = '[';
3174 goto label_escape_sequence;
3175
3176 case ISO_escape:
3177 ONE_MORE_BYTE (c1);
3178 label_escape_sequence:
df7492f9 3179 /* Escape sequences handled here are invocation,
4ed46869
KH
3180 designation, direction specification, and character
3181 composition specification. */
3182 switch (c1)
3183 {
3184 case '&': /* revision of following character set */
3185 ONE_MORE_BYTE (c1);
3186 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3187 goto invalid_code;
4ed46869
KH
3188 ONE_MORE_BYTE (c1);
3189 if (c1 != ISO_CODE_ESC)
df7492f9 3190 goto invalid_code;
4ed46869
KH
3191 ONE_MORE_BYTE (c1);
3192 goto label_escape_sequence;
3193
3194 case '$': /* designation of 2-byte character set */
df7492f9
KH
3195 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3196 goto invalid_code;
134b9549
KH
3197 {
3198 int reg, chars96;
3199
3200 ONE_MORE_BYTE (c1);
3201 if (c1 >= '@' && c1 <= 'B')
3202 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3203 or JISX0208.1980 */
134b9549
KH
3204 reg = 0, chars96 = 0;
3205 }
3206 else if (c1 >= 0x28 && c1 <= 0x2B)
3207 { /* designation of DIMENSION2_CHARS94 character set */
3208 reg = c1 - 0x28, chars96 = 0;
3209 ONE_MORE_BYTE (c1);
3210 }
3211 else if (c1 >= 0x2C && c1 <= 0x2F)
3212 { /* designation of DIMENSION2_CHARS96 character set */
3213 reg = c1 - 0x2C, chars96 = 1;
3214 ONE_MORE_BYTE (c1);
3215 }
3216 else
3217 goto invalid_code;
3218 DECODE_DESIGNATION (reg, 2, chars96, c1);
3219 /* We must update these variables now. */
3220 if (reg == 0)
3221 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3222 else if (reg == 1)
3223 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3224 if (chars96 < 0)
3225 goto invalid_code;
3226 }
b73bfc1c 3227 continue;
4ed46869
KH
3228
3229 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3230 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3231 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3232 goto invalid_code;
3233 CODING_ISO_INVOCATION (coding, 0) = 2;
3234 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3235 continue;
4ed46869
KH
3236
3237 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3238 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3239 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3240 goto invalid_code;
3241 CODING_ISO_INVOCATION (coding, 0) = 3;
3242 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3243 continue;
4ed46869
KH
3244
3245 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3246 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3247 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3248 goto invalid_code;
134b9549
KH
3249 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3250 if (charset_id_2 < 0)
3251 charset = CHARSET_FROM_ID (charset_ascii);
3252 else
3253 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3254 ONE_MORE_BYTE (c1);
e7046a18 3255 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3256 goto invalid_code;
4ed46869
KH
3257 break;
3258
3259 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3260 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3261 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3262 goto invalid_code;
134b9549
KH
3263 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3264 if (charset_id_3 < 0)
3265 charset = CHARSET_FROM_ID (charset_ascii);
3266 else
3267 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3268 ONE_MORE_BYTE (c1);
e7046a18 3269 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3270 goto invalid_code;
4ed46869
KH
3271 break;
3272
ec6d2bb8 3273 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3274 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3275 goto invalid_code;
ec6d2bb8 3276 DECODE_COMPOSITION_START (c1);
b73bfc1c 3277 continue;
4ed46869 3278
ec6d2bb8 3279 case '1': /* end composition */
df7492f9
KH
3280 if (composition_state == COMPOSING_NO)
3281 goto invalid_code;
3282 DECODE_COMPOSITION_END ();
b73bfc1c 3283 continue;
4ed46869
KH
3284
3285 case '[': /* specification of direction */
df7492f9
KH
3286 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3287 goto invalid_code;
4ed46869 3288 /* For the moment, nested direction is not supported.
d46c5b12 3289 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3290 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3291 ONE_MORE_BYTE (c1);
3292 switch (c1)
3293 {
3294 case ']': /* end of the current direction */
d46c5b12 3295 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3296
3297 case '0': /* end of the current direction */
3298 case '1': /* start of left-to-right direction */
3299 ONE_MORE_BYTE (c1);
3300 if (c1 == ']')
d46c5b12 3301 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3302 else
df7492f9 3303 goto invalid_code;
4ed46869
KH
3304 break;
3305
3306 case '2': /* start of right-to-left direction */
3307 ONE_MORE_BYTE (c1);
3308 if (c1 == ']')
d46c5b12 3309 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3310 else
df7492f9 3311 goto invalid_code;
4ed46869
KH
3312 break;
3313
3314 default:
df7492f9 3315 goto invalid_code;
4ed46869 3316 }
b73bfc1c 3317 continue;
4ed46869 3318
103e0180 3319 case '%':
103e0180
KH
3320 ONE_MORE_BYTE (c1);
3321 if (c1 == '/')
3322 {
3323 /* CTEXT extended segment:
3324 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3325 We keep these bytes as is for the moment.
3326 They may be decoded by post-read-conversion. */
3327 int dim, M, L;
4776e638 3328 int size;
8f924df7 3329
103e0180
KH
3330 ONE_MORE_BYTE (dim);
3331 ONE_MORE_BYTE (M);
3332 ONE_MORE_BYTE (L);
3333 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3334 if (charbuf + 8 + size > charbuf_end)
3335 goto break_loop;
3336 *charbuf++ = ISO_CODE_ESC;
3337 *charbuf++ = '%';
3338 *charbuf++ = '/';
3339 *charbuf++ = dim;
3340 *charbuf++ = BYTE8_TO_CHAR (M);
3341 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3342 while (size-- > 0)
3343 {
3344 ONE_MORE_BYTE (c1);
4776e638 3345 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3346 }
103e0180
KH
3347 }
3348 else if (c1 == 'G')
3349 {
103e0180
KH
3350 /* XFree86 extension for embedding UTF-8 in CTEXT:
3351 ESC % G --UTF-8-BYTES-- ESC % @
3352 We keep these bytes as is for the moment.
3353 They may be decoded by post-read-conversion. */
4776e638
KH
3354 int *p = charbuf;
3355
3356 if (p + 6 > charbuf_end)
3357 goto break_loop;
3358 *p++ = ISO_CODE_ESC;
3359 *p++ = '%';
3360 *p++ = 'G';
3361 while (p < charbuf_end)
103e0180
KH
3362 {
3363 ONE_MORE_BYTE (c1);
3364 if (c1 == ISO_CODE_ESC
3365 && src + 1 < src_end
3366 && src[0] == '%'
3367 && src[1] == '@')
9ffd559c
KH
3368 {
3369 src += 2;
3370 break;
3371 }
4776e638 3372 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3373 }
4776e638
KH
3374 if (p + 3 > charbuf_end)
3375 goto break_loop;
3376 *p++ = ISO_CODE_ESC;
3377 *p++ = '%';
3378 *p++ = '@';
3379 charbuf = p;
103e0180
KH
3380 }
3381 else
4776e638 3382 goto invalid_code;
103e0180 3383 continue;
4776e638 3384 break;
103e0180 3385
4ed46869 3386 default:
df7492f9
KH
3387 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3388 goto invalid_code;
134b9549
KH
3389 {
3390 int reg, chars96;
3391
3392 if (c1 >= 0x28 && c1 <= 0x2B)
3393 { /* designation of DIMENSION1_CHARS94 character set */
3394 reg = c1 - 0x28, chars96 = 0;
3395 ONE_MORE_BYTE (c1);
3396 }
3397 else if (c1 >= 0x2C && c1 <= 0x2F)
3398 { /* designation of DIMENSION1_CHARS96 character set */
3399 reg = c1 - 0x2C, chars96 = 1;
3400 ONE_MORE_BYTE (c1);
3401 }
3402 else
3403 goto invalid_code;
3404 DECODE_DESIGNATION (reg, 1, chars96, c1);
3405 /* We must update these variables now. */
3406 if (reg == 0)
3407 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3408 else if (reg == 1)
3409 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3410 if (chars96 < 0)
3411 goto invalid_code;
3412 }
b73bfc1c 3413 continue;
4ed46869 3414 }
b73bfc1c 3415 }
4ed46869 3416
ff0dacd7
KH
3417 if (charset->id != charset_ascii
3418 && last_id != charset->id)
3419 {
3420 if (last_id != charset_ascii)
69a80ea3 3421 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3422 last_id = charset->id;
3423 last_offset = char_offset;
3424 }
3425
b73bfc1c 3426 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3427 Produce a decoded character while getting 2nd position code
3428 C2 if necessary. */
3429 c1 &= 0x7F;
3430 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3431 {
3432 ONE_MORE_BYTE (c2);
df7492f9 3433 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3434 /* C2 is not in a valid range. */
df7492f9
KH
3435 goto invalid_code;
3436 c1 = (c1 << 8) | (c2 & 0x7F);
3437 if (CHARSET_DIMENSION (charset) > 2)
3438 {
3439 ONE_MORE_BYTE (c2);
3440 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3441 /* C2 is not in a valid range. */
3442 goto invalid_code;
3443 c1 = (c1 << 8) | (c2 & 0x7F);
3444 }
3445 }
3446
3447 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3448 if (c < 0)
3449 {
3450 MAYBE_FINISH_COMPOSITION ();
3451 for (; src_base < src; src_base++, char_offset++)
3452 {
3453 if (ASCII_BYTE_P (*src_base))
3454 *charbuf++ = *src_base;
3455 else
3456 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3457 }
3458 }
3459 else if (composition_state == COMPOSING_NO)
3460 {
3461 *charbuf++ = c;
3462 char_offset++;
4ed46869 3463 }
df7492f9 3464 else
781d7a48
KH
3465 {
3466 components[component_idx++] = c;
3467 if (method == COMPOSITION_WITH_RULE
3468 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3469 && composition_state == COMPOSING_COMPONENT_CHAR))
3470 composition_state++;
4ed46869
KH
3471 }
3472 continue;
3473
df7492f9
KH
3474 invalid_code:
3475 MAYBE_FINISH_COMPOSITION ();
4ed46869 3476 src = src_base;
df7492f9
KH
3477 consumed_chars = consumed_chars_base;
3478 ONE_MORE_BYTE (c);
065e3595 3479 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3480 char_offset++;
df7492f9 3481 coding->errors++;
4776e638
KH
3482 continue;
3483
3484 break_loop:
3485 break;
4ed46869 3486 }
fb88bf2d 3487
df7492f9 3488 no_more_source:
ff0dacd7 3489 if (last_id != charset_ascii)
69a80ea3 3490 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3491 coding->consumed_char += consumed_chars_base;
3492 coding->consumed = src_base - coding->source;
3493 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3494}
3495
b73bfc1c 3496
f4dee582 3497/* ISO2022 encoding stuff. */
4ed46869
KH
3498
3499/*
f4dee582 3500 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3501 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3502 variant has the following specifications:
df7492f9 3503 1. Initial designation to G0 thru G3.
4ed46869
KH
3504 2. Allows short-form designation?
3505 3. ASCII should be designated to G0 before control characters?
3506 4. ASCII should be designated to G0 at end of line?
3507 5. 7-bit environment or 8-bit environment?
3508 6. Use locking-shift?
3509 7. Use Single-shift?
3510 And the following two are only for Japanese:
3511 8. Use ASCII in place of JIS0201-1976-Roman?
3512 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3513 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3514 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3515 details.
4ed46869
KH
3516*/
3517
3518/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3519 register REG at DST, and increment DST. If <final-char> of CHARSET is
3520 '@', 'A', or 'B' and the coding system CODING allows, produce
3521 designation sequence of short-form. */
4ed46869
KH
3522
3523#define ENCODE_DESIGNATION(charset, reg, coding) \
3524 do { \
df7492f9 3525 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3526 char *intermediate_char_94 = "()*+"; \
3527 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3528 int revision = -1; \
3529 int c; \
3530 \
3531 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3532 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3533 \
3534 if (revision >= 0) \
70c22245 3535 { \
df7492f9
KH
3536 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3537 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3538 } \
df7492f9 3539 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3540 if (CHARSET_DIMENSION (charset) == 1) \
3541 { \
df7492f9
KH
3542 if (! CHARSET_ISO_CHARS_96 (charset)) \
3543 c = intermediate_char_94[reg]; \
4ed46869 3544 else \
df7492f9
KH
3545 c = intermediate_char_96[reg]; \
3546 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3547 } \
3548 else \
3549 { \
df7492f9
KH
3550 EMIT_ONE_ASCII_BYTE ('$'); \
3551 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3552 { \
df7492f9 3553 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3554 || reg != 0 \
3555 || final_char < '@' || final_char > 'B') \
df7492f9 3556 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3557 } \
3558 else \
df7492f9 3559 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3560 } \
df7492f9
KH
3561 EMIT_ONE_ASCII_BYTE (final_char); \
3562 \
3563 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3564 } while (0)
3565
df7492f9 3566
4ed46869
KH
3567/* The following two macros produce codes (control character or escape
3568 sequence) for ISO2022 single-shift functions (single-shift-2 and
3569 single-shift-3). */
3570
df7492f9
KH
3571#define ENCODE_SINGLE_SHIFT_2 \
3572 do { \
3573 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3574 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3575 else \
3576 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3577 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3578 } while (0)
3579
df7492f9
KH
3580
3581#define ENCODE_SINGLE_SHIFT_3 \
3582 do { \
3583 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3584 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3585 else \
3586 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3587 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3588 } while (0)
3589
df7492f9 3590
4ed46869
KH
3591/* The following four macros produce codes (control character or
3592 escape sequence) for ISO2022 locking-shift functions (shift-in,
3593 shift-out, locking-shift-2, and locking-shift-3). */
3594
df7492f9
KH
3595#define ENCODE_SHIFT_IN \
3596 do { \
3597 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3598 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3599 } while (0)
3600
df7492f9
KH
3601
3602#define ENCODE_SHIFT_OUT \
3603 do { \
3604 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3605 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3606 } while (0)
3607
df7492f9
KH
3608
3609#define ENCODE_LOCKING_SHIFT_2 \
3610 do { \
3611 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3612 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3613 } while (0)
3614
df7492f9
KH
3615
3616#define ENCODE_LOCKING_SHIFT_3 \
3617 do { \
3618 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3619 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3620 } while (0)
3621
df7492f9 3622
f4dee582
RS
3623/* Produce codes for a DIMENSION1 character whose character set is
3624 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3625 sequences are also produced in advance if necessary. */
3626
6e85d753
KH
3627#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3628 do { \
df7492f9 3629 int id = CHARSET_ID (charset); \
bf16eb23
KH
3630 \
3631 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3632 && id == charset_ascii) \
3633 { \
3634 id = charset_jisx0201_roman; \
3635 charset = CHARSET_FROM_ID (id); \
3636 } \
3637 \
df7492f9 3638 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3639 { \
df7492f9
KH
3640 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3641 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3642 else \
df7492f9
KH
3643 EMIT_ONE_BYTE (c1 | 0x80); \
3644 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3645 break; \
3646 } \
df7492f9 3647 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3648 { \
df7492f9 3649 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3650 break; \
3651 } \
df7492f9 3652 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3653 { \
df7492f9 3654 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3655 break; \
3656 } \
6e85d753
KH
3657 else \
3658 /* Since CHARSET is not yet invoked to any graphic planes, we \
3659 must invoke it, or, at first, designate it to some graphic \
3660 register. Then repeat the loop to actually produce the \
3661 character. */ \
df7492f9
KH
3662 dst = encode_invocation_designation (charset, coding, dst, \
3663 &produced_chars); \
4ed46869
KH
3664 } while (1)
3665
df7492f9 3666
f4dee582
RS
3667/* Produce codes for a DIMENSION2 character whose character set is
3668 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3669 invocation codes are also produced in advance if necessary. */
3670
6e85d753
KH
3671#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3672 do { \
df7492f9 3673 int id = CHARSET_ID (charset); \
bf16eb23
KH
3674 \
3675 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3676 && id == charset_jisx0208) \
3677 { \
3678 id = charset_jisx0208_1978; \
3679 charset = CHARSET_FROM_ID (id); \
3680 } \
3681 \
df7492f9 3682 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3683 { \
df7492f9
KH
3684 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3685 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3686 else \
df7492f9
KH
3687 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3688 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3689 break; \
3690 } \
df7492f9 3691 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3692 { \
df7492f9 3693 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3694 break; \
3695 } \
df7492f9 3696 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3697 { \
df7492f9 3698 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3699 break; \
3700 } \
6e85d753
KH
3701 else \
3702 /* Since CHARSET is not yet invoked to any graphic planes, we \
3703 must invoke it, or, at first, designate it to some graphic \
3704 register. Then repeat the loop to actually produce the \
3705 character. */ \
df7492f9
KH
3706 dst = encode_invocation_designation (charset, coding, dst, \
3707 &produced_chars); \
4ed46869
KH
3708 } while (1)
3709
05e6f5dc 3710
df7492f9
KH
3711#define ENCODE_ISO_CHARACTER(charset, c) \
3712 do { \
3713 int code = ENCODE_CHAR ((charset),(c)); \
3714 \
3715 if (CHARSET_DIMENSION (charset) == 1) \
3716 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3717 else \
3718 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3719 } while (0)
bdd9fb48 3720
05e6f5dc 3721
4ed46869 3722/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3723 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3724 Return new DST. */
3725
3726unsigned char *
df7492f9
KH
3727encode_invocation_designation (charset, coding, dst, p_nchars)
3728 struct charset *charset;
4ed46869
KH
3729 struct coding_system *coding;
3730 unsigned char *dst;
df7492f9 3731 int *p_nchars;
4ed46869 3732{
df7492f9
KH
3733 int multibytep = coding->dst_multibyte;
3734 int produced_chars = *p_nchars;
4ed46869 3735 int reg; /* graphic register number */
df7492f9 3736 int id = CHARSET_ID (charset);
4ed46869
KH
3737
3738 /* At first, check designations. */
3739 for (reg = 0; reg < 4; reg++)
df7492f9 3740 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3741 break;
3742
3743 if (reg >= 4)
3744 {
3745 /* CHARSET is not yet designated to any graphic registers. */
3746 /* At first check the requested designation. */
df7492f9
KH
3747 reg = CODING_ISO_REQUEST (coding, id);
3748 if (reg < 0)
1ba9e4ab
KH
3749 /* Since CHARSET requests no special designation, designate it
3750 to graphic register 0. */
4ed46869
KH
3751 reg = 0;
3752
3753 ENCODE_DESIGNATION (charset, reg, coding);
3754 }
3755
df7492f9
KH
3756 if (CODING_ISO_INVOCATION (coding, 0) != reg
3757 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3758 {
3759 /* Since the graphic register REG is not invoked to any graphic
3760 planes, invoke it to graphic plane 0. */
3761 switch (reg)
3762 {
3763 case 0: /* graphic register 0 */
3764 ENCODE_SHIFT_IN;
3765 break;
3766
3767 case 1: /* graphic register 1 */
3768 ENCODE_SHIFT_OUT;
3769 break;
3770
3771 case 2: /* graphic register 2 */
df7492f9 3772 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3773 ENCODE_SINGLE_SHIFT_2;
3774 else
3775 ENCODE_LOCKING_SHIFT_2;
3776 break;
3777
3778 case 3: /* graphic register 3 */
df7492f9 3779 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3780 ENCODE_SINGLE_SHIFT_3;
3781 else
3782 ENCODE_LOCKING_SHIFT_3;
3783 break;
3784 }
3785 }
b73bfc1c 3786
df7492f9 3787 *p_nchars = produced_chars;
4ed46869
KH
3788 return dst;
3789}
3790
df7492f9
KH
3791/* The following three macros produce codes for indicating direction
3792 of text. */
3793#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3794 do { \
df7492f9
KH
3795 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3796 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3797 else \
df7492f9 3798 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3799 } while (0)
3800
ec6d2bb8 3801
df7492f9
KH
3802#define ENCODE_DIRECTION_R2L() \
3803 do { \
3804 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3805 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3806 } while (0)
3807
ec6d2bb8 3808
df7492f9 3809#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3810 do { \
df7492f9
KH
3811 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3812 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3813 } while (0)
4ed46869 3814
4ed46869
KH
3815
3816/* Produce codes for designation and invocation to reset the graphic
3817 planes and registers to initial state. */
df7492f9
KH
3818#define ENCODE_RESET_PLANE_AND_REGISTER() \
3819 do { \
3820 int reg; \
3821 struct charset *charset; \
3822 \
3823 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3824 ENCODE_SHIFT_IN; \
3825 for (reg = 0; reg < 4; reg++) \
3826 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3827 && (CODING_ISO_DESIGNATION (coding, reg) \
3828 != CODING_ISO_INITIAL (coding, reg))) \
3829 { \
3830 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3831 ENCODE_DESIGNATION (charset, reg, coding); \
3832 } \
4ed46869
KH
3833 } while (0)
3834
df7492f9 3835
bdd9fb48 3836/* Produce designation sequences of charsets in the line started from
b73bfc1c 3837 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3838
3839 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3840 find all the necessary designations. */
3841
b73bfc1c 3842static unsigned char *
df7492f9 3843encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3844 struct coding_system *coding;
df7492f9
KH
3845 int *charbuf, *charbuf_end;
3846 unsigned char *dst;
e0e989f6 3847{
df7492f9 3848 struct charset *charset;
bdd9fb48
KH
3849 /* Table of charsets to be designated to each graphic register. */
3850 int r[4];
df7492f9
KH
3851 int c, found = 0, reg;
3852 int produced_chars = 0;
3853 int multibytep = coding->dst_multibyte;
3854 Lisp_Object attrs;
3855 Lisp_Object charset_list;
3856
3857 attrs = CODING_ID_ATTRS (coding->id);
3858 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3859 if (EQ (charset_list, Qiso_2022))
3860 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3861
3862 for (reg = 0; reg < 4; reg++)
3863 r[reg] = -1;
3864
b73bfc1c 3865 while (found < 4)
e0e989f6 3866 {
df7492f9
KH
3867 int id;
3868
3869 c = *charbuf++;
b73bfc1c
KH
3870 if (c == '\n')
3871 break;
df7492f9
KH
3872 charset = char_charset (c, charset_list, NULL);
3873 id = CHARSET_ID (charset);
3874 reg = CODING_ISO_REQUEST (coding, id);
3875 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3876 {
3877 found++;
df7492f9 3878 r[reg] = id;
bdd9fb48 3879 }
bdd9fb48
KH
3880 }
3881
3882 if (found)
3883 {
3884 for (reg = 0; reg < 4; reg++)
3885 if (r[reg] >= 0
df7492f9
KH
3886 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3887 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3888 }
b73bfc1c
KH
3889
3890 return dst;
e0e989f6
KH
3891}
3892
4ed46869
KH
3893/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3894
df7492f9
KH
3895static int
3896encode_coding_iso_2022 (coding)
4ed46869 3897 struct coding_system *coding;
4ed46869 3898{
df7492f9
KH
3899 int multibytep = coding->dst_multibyte;
3900 int *charbuf = coding->charbuf;
3901 int *charbuf_end = charbuf + coding->charbuf_used;
3902 unsigned char *dst = coding->destination + coding->produced;
3903 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3904 int safe_room = 16;
3905 int bol_designation
3906 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3907 && CODING_ISO_BOL (coding));
3908 int produced_chars = 0;
3909 Lisp_Object attrs, eol_type, charset_list;
3910 int ascii_compatible;
b73bfc1c 3911 int c;
ff0dacd7 3912 int preferred_charset_id = -1;
05e6f5dc 3913
24a73b0a
KH
3914 CODING_GET_INFO (coding, attrs, charset_list);
3915 eol_type = CODING_ID_EOL_TYPE (coding->id);
3916 if (VECTORP (eol_type))
3917 eol_type = Qunix;
3918
004068e4 3919 setup_iso_safe_charsets (attrs);
ff0dacd7 3920 /* Charset list may have been changed. */
287c57d7 3921 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8f924df7 3922 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3923
df7492f9 3924 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3925
df7492f9 3926 while (charbuf < charbuf_end)
4ed46869 3927 {
df7492f9 3928 ASSURE_DESTINATION (safe_room);
b73bfc1c 3929
df7492f9 3930 if (bol_designation)
b73bfc1c 3931 {
df7492f9 3932 unsigned char *dst_prev = dst;
4ed46869 3933
bdd9fb48 3934 /* We have to produce designation sequences if any now. */
df7492f9
KH
3935 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3936 bol_designation = 0;
3937 /* We are sure that designation sequences are all ASCII bytes. */
3938 produced_chars += dst - dst_prev;
e0e989f6
KH
3939 }
3940
df7492f9 3941 c = *charbuf++;
ec6d2bb8 3942
ff0dacd7
KH
3943 if (c < 0)
3944 {
3945 /* Handle an annotation. */
3946 switch (*charbuf)
ec6d2bb8 3947 {
ff0dacd7
KH
3948 case CODING_ANNOTATE_COMPOSITION_MASK:
3949 /* Not yet implemented. */
3950 break;
3951 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 3952 preferred_charset_id = charbuf[2];
ff0dacd7
KH
3953 if (preferred_charset_id >= 0
3954 && NILP (Fmemq (make_number (preferred_charset_id),
3955 charset_list)))
3956 preferred_charset_id = -1;
3957 break;
3958 default:
3959 abort ();
4ed46869 3960 }
ff0dacd7
KH
3961 charbuf += -c - 1;
3962 continue;
4ed46869 3963 }
ec6d2bb8 3964
b73bfc1c
KH
3965 /* Now encode the character C. */
3966 if (c < 0x20 || c == 0x7F)
3967 {
df7492f9
KH
3968 if (c == '\n'
3969 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3970 {
df7492f9
KH
3971 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3972 ENCODE_RESET_PLANE_AND_REGISTER ();
3973 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3974 {
df7492f9
KH
3975 int i;
3976
3977 for (i = 0; i < 4; i++)
3978 CODING_ISO_DESIGNATION (coding, i)
3979 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3980 }
df7492f9
KH
3981 bol_designation
3982 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3983 }
df7492f9
KH
3984 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3985 ENCODE_RESET_PLANE_AND_REGISTER ();
3986 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3987 }
df7492f9 3988 else if (ASCII_CHAR_P (c))
88993dfd 3989 {
df7492f9
KH
3990 if (ascii_compatible)
3991 EMIT_ONE_ASCII_BYTE (c);
93dec019 3992 else
19a8d9e0 3993 {
bf16eb23
KH
3994 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3995 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3996 }
4ed46869 3997 }
16eafb5d 3998 else if (CHAR_BYTE8_P (c))
88993dfd 3999 {
16eafb5d
KH
4000 c = CHAR_TO_BYTE8 (c);
4001 EMIT_ONE_BYTE (c);
88993dfd 4002 }
b73bfc1c 4003 else
df7492f9 4004 {
ff0dacd7 4005 struct charset *charset;
b73bfc1c 4006
ff0dacd7
KH
4007 if (preferred_charset_id >= 0)
4008 {
4009 charset = CHARSET_FROM_ID (preferred_charset_id);
4010 if (! CHAR_CHARSET_P (c, charset))
4011 charset = char_charset (c, charset_list, NULL);
4012 }
4013 else
4014 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4015 if (!charset)
4016 {
41cbe562
KH
4017 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4018 {
4019 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4020 charset = CHARSET_FROM_ID (charset_ascii);
4021 }
4022 else
4023 {
4024 c = coding->default_char;
4025 charset = char_charset (c, charset_list, NULL);
4026 }
df7492f9
KH
4027 }
4028 ENCODE_ISO_CHARACTER (charset, c);
4029 }
84fbb8a0 4030 }
b73bfc1c 4031
df7492f9
KH
4032 if (coding->mode & CODING_MODE_LAST_BLOCK
4033 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4034 {
4035 ASSURE_DESTINATION (safe_room);
4036 ENCODE_RESET_PLANE_AND_REGISTER ();
4037 }
065e3595 4038 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4039 CODING_ISO_BOL (coding) = bol_designation;
4040 coding->produced_char += produced_chars;
4041 coding->produced = dst - coding->destination;
4042 return 0;
4ed46869
KH
4043}
4044
4045\f
df7492f9 4046/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4047
df7492f9 4048/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4049 quite widely. So, for the moment, Emacs supports them in the bare
4050 C code. But, in the future, they may be supported only by CCL. */
4051
4052/* SJIS is a coding system encoding three character sets: ASCII, right
4053 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4054 as is. A character of charset katakana-jisx0201 is encoded by
4055 "position-code + 0x80". A character of charset japanese-jisx0208
4056 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4057 so that it fit in the range below.
4ed46869
KH
4058
4059 --- CODE RANGE of SJIS ---
4060 (character set) (range)
4061 ASCII 0x00 .. 0x7F
df7492f9 4062 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4063 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4064 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4065 -------------------------------
4066
4067*/
4068
4069/* BIG5 is a coding system encoding two character sets: ASCII and
4070 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4071 character set and is encoded in two-byte.
4ed46869
KH
4072
4073 --- CODE RANGE of BIG5 ---
4074 (character set) (range)
4075 ASCII 0x00 .. 0x7F
4076 Big5 (1st byte) 0xA1 .. 0xFE
4077 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4078 --------------------------
4079
df7492f9 4080 */
4ed46869
KH
4081
4082/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4083 Check if a text is encoded in SJIS. If it is, return
df7492f9 4084 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4085
0a28aafb 4086static int
ff0dacd7 4087detect_coding_sjis (coding, detect_info)
df7492f9 4088 struct coding_system *coding;
ff0dacd7 4089 struct coding_detection_info *detect_info;
4ed46869 4090{
065e3595 4091 const unsigned char *src = coding->source, *src_base;
8f924df7 4092 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4093 int multibytep = coding->src_multibyte;
4094 int consumed_chars = 0;
4095 int found = 0;
b73bfc1c 4096 int c;
df7492f9 4097
ff0dacd7 4098 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4099 /* A coding system of this category is always ASCII compatible. */
4100 src += coding->head_ascii;
4ed46869 4101
b73bfc1c 4102 while (1)
4ed46869 4103 {
065e3595 4104 src_base = src;
df7492f9 4105 ONE_MORE_BYTE (c);
682169fe
KH
4106 if (c < 0x80)
4107 continue;
df7492f9 4108 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 4109 {
df7492f9 4110 ONE_MORE_BYTE (c);
682169fe 4111 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4112 break;
ff0dacd7 4113 found = CATEGORY_MASK_SJIS;
4ed46869 4114 }
df7492f9 4115 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4116 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4117 else
4118 break;
4ed46869 4119 }
ff0dacd7 4120 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4121 return 0;
4122
4123 no_more_source:
065e3595 4124 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4125 {
ff0dacd7 4126 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4127 return 0;
4ed46869 4128 }
ff0dacd7
KH
4129 detect_info->found |= found;
4130 return 1;
4ed46869
KH
4131}
4132
4133/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4134 Check if a text is encoded in BIG5. If it is, return
df7492f9 4135 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4136
0a28aafb 4137static int
ff0dacd7 4138detect_coding_big5 (coding, detect_info)
df7492f9 4139 struct coding_system *coding;
ff0dacd7 4140 struct coding_detection_info *detect_info;
4ed46869 4141{
065e3595 4142 const unsigned char *src = coding->source, *src_base;
8f924df7 4143 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4144 int multibytep = coding->src_multibyte;
4145 int consumed_chars = 0;
4146 int found = 0;
b73bfc1c 4147 int c;
fa42c37f 4148
ff0dacd7 4149 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4150 /* A coding system of this category is always ASCII compatible. */
4151 src += coding->head_ascii;
fa42c37f 4152
b73bfc1c 4153 while (1)
fa42c37f 4154 {
065e3595 4155 src_base = src;
df7492f9
KH
4156 ONE_MORE_BYTE (c);
4157 if (c < 0x80)
fa42c37f 4158 continue;
df7492f9 4159 if (c >= 0xA1)
fa42c37f 4160 {
df7492f9
KH
4161 ONE_MORE_BYTE (c);
4162 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4163 return 0;
ff0dacd7 4164 found = CATEGORY_MASK_BIG5;
fa42c37f 4165 }
df7492f9
KH
4166 else
4167 break;
fa42c37f 4168 }
ff0dacd7 4169 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4170 return 0;
fa42c37f 4171
df7492f9 4172 no_more_source:
065e3595 4173 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4174 {
ff0dacd7 4175 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4176 return 0;
4177 }
ff0dacd7
KH
4178 detect_info->found |= found;
4179 return 1;
fa42c37f
KH
4180}
4181
4ed46869
KH
4182/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4183 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4184
b73bfc1c 4185static void
df7492f9 4186decode_coding_sjis (coding)
4ed46869 4187 struct coding_system *coding;
4ed46869 4188{
8f924df7
KH
4189 const unsigned char *src = coding->source + coding->consumed;
4190 const unsigned char *src_end = coding->source + coding->src_bytes;
4191 const unsigned char *src_base;
69a80ea3
KH
4192 int *charbuf = coding->charbuf + coding->charbuf_used;
4193 int *charbuf_end
4194 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4195 int consumed_chars = 0, consumed_chars_base;
4196 int multibytep = coding->src_multibyte;
4197 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4198 struct charset *charset_kanji2;
24a73b0a 4199 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4200 int char_offset = coding->produced_char;
4201 int last_offset = char_offset;
4202 int last_id = charset_ascii;
119852e7
KH
4203 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4204 int byte_after_cr = -1;
a5d301df 4205
24a73b0a 4206 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4207
4208 val = charset_list;
4209 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4210 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4211 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4212 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4213
b73bfc1c 4214 while (1)
4ed46869 4215 {
df7492f9 4216 int c, c1;
24a73b0a 4217 struct charset *charset;
fa42c37f 4218
b73bfc1c 4219 src_base = src;
df7492f9 4220 consumed_chars_base = consumed_chars;
fa42c37f 4221
df7492f9
KH
4222 if (charbuf >= charbuf_end)
4223 break;
4224
119852e7
KH
4225 if (byte_after_cr >= 0)
4226 c = byte_after_cr, byte_after_cr = -1;
4227 else
4228 ONE_MORE_BYTE (c);
065e3595
KH
4229 if (c < 0)
4230 goto invalid_code;
24a73b0a 4231 if (c < 0x80)
119852e7
KH
4232 {
4233 if (eol_crlf && c == '\r')
4234 ONE_MORE_BYTE (byte_after_cr);
4235 charset = charset_roman;
4236 }
57a47f8a 4237 else if (c == 0x80 || c == 0xA0)
8e921c4b 4238 goto invalid_code;
57a47f8a
KH
4239 else if (c >= 0xA1 && c <= 0xDF)
4240 {
4241 /* SJIS -> JISX0201-Kana */
4242 c &= 0x7F;
4243 charset = charset_kana;
4244 }
4245 else if (c <= 0xEF)
df7492f9 4246 {
57a47f8a
KH
4247 /* SJIS -> JISX0208 */
4248 ONE_MORE_BYTE (c1);
4249 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4250 goto invalid_code;
57a47f8a
KH
4251 c = (c << 8) | c1;
4252 SJIS_TO_JIS (c);
4253 charset = charset_kanji;
4254 }
4255 else if (c <= 0xFC && charset_kanji2)
4256 {
c6876370 4257 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4258 ONE_MORE_BYTE (c1);
4259 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4260 goto invalid_code;
57a47f8a
KH
4261 c = (c << 8) | c1;
4262 SJIS_TO_JIS2 (c);
4263 charset = charset_kanji2;
df7492f9 4264 }
57a47f8a
KH
4265 else
4266 goto invalid_code;
24a73b0a
KH
4267 if (charset->id != charset_ascii
4268 && last_id != charset->id)
4269 {
4270 if (last_id != charset_ascii)
69a80ea3 4271 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4272 last_id = charset->id;
4273 last_offset = char_offset;
4274 }
4275 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4276 *charbuf++ = c;
ff0dacd7 4277 char_offset++;
df7492f9 4278 continue;
b73bfc1c 4279
df7492f9
KH
4280 invalid_code:
4281 src = src_base;
4282 consumed_chars = consumed_chars_base;
4283 ONE_MORE_BYTE (c);
065e3595 4284 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4285 char_offset++;
df7492f9
KH
4286 coding->errors++;
4287 }
fa42c37f 4288
df7492f9 4289 no_more_source:
ff0dacd7 4290 if (last_id != charset_ascii)
69a80ea3 4291 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4292 coding->consumed_char += consumed_chars_base;
4293 coding->consumed = src_base - coding->source;
4294 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4295}
4296
b73bfc1c 4297static void
df7492f9 4298decode_coding_big5 (coding)
4ed46869 4299 struct coding_system *coding;
4ed46869 4300{
8f924df7
KH
4301 const unsigned char *src = coding->source + coding->consumed;
4302 const unsigned char *src_end = coding->source + coding->src_bytes;
4303 const unsigned char *src_base;
69a80ea3
KH
4304 int *charbuf = coding->charbuf + coding->charbuf_used;
4305 int *charbuf_end
4306 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4307 int consumed_chars = 0, consumed_chars_base;
4308 int multibytep = coding->src_multibyte;
4309 struct charset *charset_roman, *charset_big5;
24a73b0a 4310 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4311 int char_offset = coding->produced_char;
4312 int last_offset = char_offset;
4313 int last_id = charset_ascii;
119852e7
KH
4314 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4315 int byte_after_cr = -1;
df7492f9 4316
24a73b0a 4317 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4318 val = charset_list;
4319 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4320 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4321
b73bfc1c 4322 while (1)
4ed46869 4323 {
df7492f9 4324 int c, c1;
24a73b0a 4325 struct charset *charset;
b73bfc1c
KH
4326
4327 src_base = src;
df7492f9
KH
4328 consumed_chars_base = consumed_chars;
4329
4330 if (charbuf >= charbuf_end)
4331 break;
4332
119852e7 4333 if (byte_after_cr >= 0)
14daee73 4334 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4335 else
4336 ONE_MORE_BYTE (c);
b73bfc1c 4337
065e3595
KH
4338 if (c < 0)
4339 goto invalid_code;
24a73b0a 4340 if (c < 0x80)
119852e7 4341 {
14daee73 4342 if (eol_crlf && c == '\r')
119852e7
KH
4343 ONE_MORE_BYTE (byte_after_cr);
4344 charset = charset_roman;
4345 }
24a73b0a 4346 else
4ed46869 4347 {
24a73b0a
KH
4348 /* BIG5 -> Big5 */
4349 if (c < 0xA1 || c > 0xFE)
4350 goto invalid_code;
4351 ONE_MORE_BYTE (c1);
4352 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4353 goto invalid_code;
4354 c = c << 8 | c1;
4355 charset = charset_big5;
4ed46869 4356 }
24a73b0a
KH
4357 if (charset->id != charset_ascii
4358 && last_id != charset->id)
df7492f9 4359 {
24a73b0a 4360 if (last_id != charset_ascii)
69a80ea3 4361 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4362 last_id = charset->id;
4363 last_offset = char_offset;
4ed46869 4364 }
24a73b0a 4365 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4366 *charbuf++ = c;
ff0dacd7 4367 char_offset++;
fb88bf2d
KH
4368 continue;
4369
df7492f9 4370 invalid_code:
4ed46869 4371 src = src_base;
df7492f9
KH
4372 consumed_chars = consumed_chars_base;
4373 ONE_MORE_BYTE (c);
065e3595 4374 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4375 char_offset++;
df7492f9 4376 coding->errors++;
fb88bf2d 4377 }
d46c5b12 4378
df7492f9 4379 no_more_source:
ff0dacd7 4380 if (last_id != charset_ascii)
69a80ea3 4381 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4382 coding->consumed_char += consumed_chars_base;
4383 coding->consumed = src_base - coding->source;
4384 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4385}
4386
4387/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4388 This function can encode charsets `ascii', `katakana-jisx0201',
4389 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4390 are sure that all these charsets are registered as official charset
4ed46869
KH
4391 (i.e. do not have extended leading-codes). Characters of other
4392 charsets are produced without any encoding. If SJIS_P is 1, encode
4393 SJIS text, else encode BIG5 text. */
4394
df7492f9
KH
4395static int
4396encode_coding_sjis (coding)
4ed46869 4397 struct coding_system *coding;
4ed46869 4398{
df7492f9
KH
4399 int multibytep = coding->dst_multibyte;
4400 int *charbuf = coding->charbuf;
4401 int *charbuf_end = charbuf + coding->charbuf_used;
4402 unsigned char *dst = coding->destination + coding->produced;
4403 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4404 int safe_room = 4;
4405 int produced_chars = 0;
24a73b0a 4406 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4407 int ascii_compatible;
4408 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4409 struct charset *charset_kanji2;
df7492f9 4410 int c;
a5d301df 4411
24a73b0a 4412 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4413 val = charset_list;
4414 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4415 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4416 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4417 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4418
df7492f9 4419 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4420
df7492f9
KH
4421 while (charbuf < charbuf_end)
4422 {
4423 ASSURE_DESTINATION (safe_room);
4424 c = *charbuf++;
b73bfc1c 4425 /* Now encode the character C. */
df7492f9
KH
4426 if (ASCII_CHAR_P (c) && ascii_compatible)
4427 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4428 else if (CHAR_BYTE8_P (c))
4429 {
4430 c = CHAR_TO_BYTE8 (c);
4431 EMIT_ONE_BYTE (c);
4432 }
df7492f9 4433 else
b73bfc1c 4434 {
df7492f9
KH
4435 unsigned code;
4436 struct charset *charset = char_charset (c, charset_list, &code);
4437
4438 if (!charset)
4ed46869 4439 {
41cbe562 4440 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4441 {
41cbe562
KH
4442 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4443 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4444 }
41cbe562 4445 else
b73bfc1c 4446 {
41cbe562
KH
4447 c = coding->default_char;
4448 charset = char_charset (c, charset_list, &code);
b73bfc1c 4449 }
b73bfc1c 4450 }
df7492f9
KH
4451 if (code == CHARSET_INVALID_CODE (charset))
4452 abort ();
4453 if (charset == charset_kanji)
4454 {
4455 int c1, c2;
4456 JIS_TO_SJIS (code);
4457 c1 = code >> 8, c2 = code & 0xFF;
4458 EMIT_TWO_BYTES (c1, c2);
4459 }
4460 else if (charset == charset_kana)
4461 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4462 else if (charset_kanji2 && charset == charset_kanji2)
4463 {
4464 int c1, c2;
4465
4466 c1 = code >> 8;
4467 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4468 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4469 {
4470 JIS_TO_SJIS2 (code);
4471 c1 = code >> 8, c2 = code & 0xFF;
4472 EMIT_TWO_BYTES (c1, c2);
4473 }
4474 else
4475 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4476 }
df7492f9
KH
4477 else
4478 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4479 }
4480 }
065e3595 4481 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4482 coding->produced_char += produced_chars;
4483 coding->produced = dst - coding->destination;
4484 return 0;
4485}
4486
4487static int
4488encode_coding_big5 (coding)
4489 struct coding_system *coding;
4490{
4491 int multibytep = coding->dst_multibyte;
4492 int *charbuf = coding->charbuf;
4493 int *charbuf_end = charbuf + coding->charbuf_used;
4494 unsigned char *dst = coding->destination + coding->produced;
4495 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4496 int safe_room = 4;
4497 int produced_chars = 0;
24a73b0a 4498 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4499 int ascii_compatible;
4500 struct charset *charset_roman, *charset_big5;
4501 int c;
4502
24a73b0a 4503 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4504 val = charset_list;
4505 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4506 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4507 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4508
4509 while (charbuf < charbuf_end)
4510 {
4511 ASSURE_DESTINATION (safe_room);
4512 c = *charbuf++;
4513 /* Now encode the character C. */
4514 if (ASCII_CHAR_P (c) && ascii_compatible)
4515 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4516 else if (CHAR_BYTE8_P (c))
4517 {
4518 c = CHAR_TO_BYTE8 (c);
4519 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4520 }
4521 else
4522 {
df7492f9
KH
4523 unsigned code;
4524 struct charset *charset = char_charset (c, charset_list, &code);
4525
4526 if (! charset)
b73bfc1c 4527 {
41cbe562 4528 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4529 {
41cbe562
KH
4530 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4531 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4532 }
41cbe562 4533 else
0eecad43 4534 {
41cbe562
KH
4535 c = coding->default_char;
4536 charset = char_charset (c, charset_list, &code);
0eecad43 4537 }
4ed46869 4538 }
df7492f9
KH
4539 if (code == CHARSET_INVALID_CODE (charset))
4540 abort ();
4541 if (charset == charset_big5)
b73bfc1c 4542 {
df7492f9
KH
4543 int c1, c2;
4544
4545 c1 = code >> 8, c2 = code & 0xFF;
4546 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4547 }
df7492f9
KH
4548 else
4549 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4550 }
4ed46869 4551 }
065e3595 4552 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4553 coding->produced_char += produced_chars;
4554 coding->produced = dst - coding->destination;
4555 return 0;
4ed46869
KH
4556}
4557
4558\f
df7492f9 4559/*** 10. CCL handlers ***/
1397dc18
KH
4560
4561/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4562 Check if a text is encoded in a coding system of which
4563 encoder/decoder are written in CCL program. If it is, return
df7492f9 4564 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4565
0a28aafb 4566static int
ff0dacd7 4567detect_coding_ccl (coding, detect_info)
df7492f9 4568 struct coding_system *coding;
ff0dacd7 4569 struct coding_detection_info *detect_info;
1397dc18 4570{
065e3595 4571 const unsigned char *src = coding->source, *src_base;
8f924df7 4572 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4573 int multibytep = coding->src_multibyte;
4574 int consumed_chars = 0;
4575 int found = 0;
0e219d54 4576 unsigned char *valids;
df7492f9
KH
4577 int head_ascii = coding->head_ascii;
4578 Lisp_Object attrs;
4579
ff0dacd7
KH
4580 detect_info->checked |= CATEGORY_MASK_CCL;
4581
df7492f9 4582 coding = &coding_categories[coding_category_ccl];
0e219d54 4583 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4584 attrs = CODING_ID_ATTRS (coding->id);
4585 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4586 src += head_ascii;
1397dc18 4587
b73bfc1c 4588 while (1)
1397dc18 4589 {
df7492f9 4590 int c;
065e3595
KH
4591
4592 src_base = src;
df7492f9 4593 ONE_MORE_BYTE (c);
065e3595 4594 if (c < 0 || ! valids[c])
df7492f9 4595 break;
ff0dacd7
KH
4596 if ((valids[c] > 1))
4597 found = CATEGORY_MASK_CCL;
df7492f9 4598 }
ff0dacd7 4599 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4600 return 0;
4601
4602 no_more_source:
ff0dacd7
KH
4603 detect_info->found |= found;
4604 return 1;
df7492f9
KH
4605}
4606
4607static void
4608decode_coding_ccl (coding)
4609 struct coding_system *coding;
4610{
7c78e542 4611 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4612 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4613 int *charbuf = coding->charbuf + coding->charbuf_used;
4614 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4615 int consumed_chars = 0;
4616 int multibytep = coding->src_multibyte;
4617 struct ccl_program ccl;
4618 int source_charbuf[1024];
4619 int source_byteidx[1024];
24a73b0a 4620 Lisp_Object attrs, charset_list;
df7492f9 4621
24a73b0a 4622 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4623 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4624
4625 while (src < src_end)
4626 {
7c78e542 4627 const unsigned char *p = src;
df7492f9
KH
4628 int *source, *source_end;
4629 int i = 0;
4630
4631 if (multibytep)
4632 while (i < 1024 && p < src_end)
4633 {
4634 source_byteidx[i] = p - src;
4635 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4636 }
4637 else
4638 while (i < 1024 && p < src_end)
4639 source_charbuf[i++] = *p++;
8f924df7 4640
df7492f9
KH
4641 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4642 ccl.last_block = 1;
4643
4644 source = source_charbuf;
4645 source_end = source + i;
4646 while (source < source_end)
4647 {
4648 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4649 source_end - source, charbuf_end - charbuf,
4650 charset_list);
df7492f9
KH
4651 source += ccl.consumed;
4652 charbuf += ccl.produced;
4653 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4654 break;
4655 }
4656 if (source < source_end)
4657 src += source_byteidx[source - source_charbuf];
4658 else
4659 src = p;
4660 consumed_chars += source - source_charbuf;
4661
4662 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4663 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4664 break;
4665 }
4666
4667 switch (ccl.status)
4668 {
4669 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4670 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4671 break;
4672 case CCL_STAT_SUSPEND_BY_DST:
4673 break;
4674 case CCL_STAT_QUIT:
4675 case CCL_STAT_INVALID_CMD:
065e3595 4676 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4677 break;
4678 default:
065e3595 4679 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4680 break;
4681 }
4682 coding->consumed_char += consumed_chars;
4683 coding->consumed = src - coding->source;
4684 coding->charbuf_used = charbuf - coding->charbuf;
4685}
4686
4687static int
4688encode_coding_ccl (coding)
4689 struct coding_system *coding;
4690{
4691 struct ccl_program ccl;
4692 int multibytep = coding->dst_multibyte;
4693 int *charbuf = coding->charbuf;
4694 int *charbuf_end = charbuf + coding->charbuf_used;
4695 unsigned char *dst = coding->destination + coding->produced;
4696 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
4697 int destination_charbuf[1024];
4698 int i, produced_chars = 0;
24a73b0a 4699 Lisp_Object attrs, charset_list;
df7492f9 4700
24a73b0a 4701 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4702 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4703
4704 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4705 ccl.dst_multibyte = coding->dst_multibyte;
4706
8cffd3e7 4707 while (charbuf < charbuf_end)
df7492f9 4708 {
df7492f9 4709 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 4710 charbuf_end - charbuf, 1024, charset_list);
df7492f9 4711 if (multibytep)
8cffd3e7
KH
4712 {
4713 ASSURE_DESTINATION (ccl.produced * 2);
4714 for (i = 0; i < ccl.produced; i++)
4715 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4716 }
df7492f9
KH
4717 else
4718 {
8cffd3e7 4719 ASSURE_DESTINATION (ccl.produced);
df7492f9
KH
4720 for (i = 0; i < ccl.produced; i++)
4721 *dst++ = destination_charbuf[i] & 0xFF;
4722 produced_chars += ccl.produced;
4723 }
8cffd3e7
KH
4724 charbuf += ccl.consumed;
4725 if (ccl.status == CCL_STAT_QUIT
4726 || ccl.status == CCL_STAT_INVALID_CMD)
4727 break;
df7492f9
KH
4728 }
4729
4730 switch (ccl.status)
4731 {
4732 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4733 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4734 break;
4735 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4736 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4737 break;
4738 case CCL_STAT_QUIT:
4739 case CCL_STAT_INVALID_CMD:
065e3595 4740 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4741 break;
4742 default:
065e3595 4743 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4744 break;
1397dc18 4745 }
df7492f9
KH
4746
4747 coding->produced_char += produced_chars;
4748 coding->produced = dst - coding->destination;
4749 return 0;
1397dc18
KH
4750}
4751
df7492f9 4752
1397dc18 4753\f
df7492f9 4754/*** 10, 11. no-conversion handlers ***/
4ed46869 4755
b73bfc1c 4756/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4757
b73bfc1c 4758static void
df7492f9 4759decode_coding_raw_text (coding)
4ed46869 4760 struct coding_system *coding;
4ed46869 4761{
119852e7
KH
4762 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4763
df7492f9 4764 coding->chars_at_source = 1;
119852e7
KH
4765 coding->consumed_char = coding->src_chars;
4766 coding->consumed = coding->src_bytes;
4767 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4768 {
4769 coding->consumed_char--;
4770 coding->consumed--;
4771 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4772 }
4773 else
4774 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4775}
4ed46869 4776
df7492f9
KH
4777static int
4778encode_coding_raw_text (coding)
4779 struct coding_system *coding;
4780{
4781 int multibytep = coding->dst_multibyte;
4782 int *charbuf = coding->charbuf;
4783 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4784 unsigned char *dst = coding->destination + coding->produced;
4785 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4786 int produced_chars = 0;
b73bfc1c
KH
4787 int c;
4788
df7492f9 4789 if (multibytep)
b73bfc1c 4790 {
df7492f9 4791 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4792
df7492f9
KH
4793 if (coding->src_multibyte)
4794 while (charbuf < charbuf_end)
4795 {
4796 ASSURE_DESTINATION (safe_room);
4797 c = *charbuf++;
4798 if (ASCII_CHAR_P (c))
4799 EMIT_ONE_ASCII_BYTE (c);
4800 else if (CHAR_BYTE8_P (c))
4801 {
4802 c = CHAR_TO_BYTE8 (c);
4803 EMIT_ONE_BYTE (c);
4804 }
4805 else
4806 {
4807 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4808
df7492f9
KH
4809 CHAR_STRING_ADVANCE (c, p1);
4810 while (p0 < p1)
9d123124
KH
4811 {
4812 EMIT_ONE_BYTE (*p0);
4813 p0++;
4814 }
df7492f9
KH
4815 }
4816 }
b73bfc1c 4817 else
df7492f9
KH
4818 while (charbuf < charbuf_end)
4819 {
4820 ASSURE_DESTINATION (safe_room);
4821 c = *charbuf++;
4822 EMIT_ONE_BYTE (c);
4823 }
4824 }
4825 else
4ed46869 4826 {
df7492f9 4827 if (coding->src_multibyte)
d46c5b12 4828 {
df7492f9
KH
4829 int safe_room = MAX_MULTIBYTE_LENGTH;
4830
4831 while (charbuf < charbuf_end)
d46c5b12 4832 {
df7492f9
KH
4833 ASSURE_DESTINATION (safe_room);
4834 c = *charbuf++;
4835 if (ASCII_CHAR_P (c))
4836 *dst++ = c;
4837 else if (CHAR_BYTE8_P (c))
4838 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4839 else
df7492f9
KH
4840 CHAR_STRING_ADVANCE (c, dst);
4841 produced_chars++;
d46c5b12
KH
4842 }
4843 }
df7492f9
KH
4844 else
4845 {
4846 ASSURE_DESTINATION (charbuf_end - charbuf);
4847 while (charbuf < charbuf_end && dst < dst_end)
4848 *dst++ = *charbuf++;
4849 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4850 }
4ed46869 4851 }
065e3595 4852 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4853 coding->produced_char += produced_chars;
4854 coding->produced = dst - coding->destination;
4855 return 0;
4ed46869
KH
4856}
4857
ff0dacd7
KH
4858/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4859 Check if a text is encoded in a charset-based coding system. If it
4860 is, return 1, else return 0. */
4861
0a28aafb 4862static int
ff0dacd7 4863detect_coding_charset (coding, detect_info)
df7492f9 4864 struct coding_system *coding;
ff0dacd7 4865 struct coding_detection_info *detect_info;
1397dc18 4866{
065e3595 4867 const unsigned char *src = coding->source, *src_base;
8f924df7 4868 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4869 int multibytep = coding->src_multibyte;
4870 int consumed_chars = 0;
4871 Lisp_Object attrs, valids;
584948ac 4872 int found = 0;
716b3fa0 4873 int head_ascii = coding->head_ascii;
1397dc18 4874
ff0dacd7
KH
4875 detect_info->checked |= CATEGORY_MASK_CHARSET;
4876
df7492f9
KH
4877 coding = &coding_categories[coding_category_charset];
4878 attrs = CODING_ID_ATTRS (coding->id);
4879 valids = AREF (attrs, coding_attr_charset_valids);
4880
4881 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 4882 src += head_ascii;
1397dc18 4883
b73bfc1c 4884 while (1)
1397dc18 4885 {
df7492f9 4886 int c;
716b3fa0
KH
4887 Lisp_Object val;
4888 struct charset *charset;
4889 int dim, idx;
1397dc18 4890
065e3595 4891 src_base = src;
df7492f9 4892 ONE_MORE_BYTE (c);
065e3595
KH
4893 if (c < 0)
4894 continue;
716b3fa0
KH
4895 val = AREF (valids, c);
4896 if (NILP (val))
df7492f9 4897 break;
584948ac 4898 if (c >= 0x80)
ff0dacd7 4899 found = CATEGORY_MASK_CHARSET;
716b3fa0
KH
4900 if (INTEGERP (val))
4901 {
4902 charset = CHARSET_FROM_ID (XFASTINT (val));
4903 dim = CHARSET_DIMENSION (charset);
4904 for (idx = 1; idx < dim; idx++)
4905 {
4906 if (src == src_end)
4907 goto too_short;
4908 ONE_MORE_BYTE (c);
4909 if (c < charset->code_space[(dim - 1 - idx) * 2]
4910 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4911 break;
4912 }
4913 if (idx < dim)
4914 break;
4915 }
4916 else
4917 {
4918 idx = 1;
4919 for (; CONSP (val); val = XCDR (val))
4920 {
4921 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4922 dim = CHARSET_DIMENSION (charset);
4923 while (idx < dim)
4924 {
4925 if (src == src_end)
4926 goto too_short;
4927 ONE_MORE_BYTE (c);
4928 if (c < charset->code_space[(dim - 1 - idx) * 4]
4929 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4930 break;
4931 idx++;
4932 }
4933 if (idx == dim)
4934 {
4935 val = Qnil;
4936 break;
4937 }
4938 }
4939 if (CONSP (val))
4940 break;
4941 }
df7492f9 4942 }
716b3fa0 4943 too_short:
ff0dacd7 4944 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4945 return 0;
4ed46869 4946
df7492f9 4947 no_more_source:
ff0dacd7
KH
4948 detect_info->found |= found;
4949 return 1;
df7492f9 4950}
b73bfc1c 4951
b73bfc1c 4952static void
df7492f9 4953decode_coding_charset (coding)
4ed46869 4954 struct coding_system *coding;
4ed46869 4955{
8f924df7
KH
4956 const unsigned char *src = coding->source + coding->consumed;
4957 const unsigned char *src_end = coding->source + coding->src_bytes;
4958 const unsigned char *src_base;
69a80ea3
KH
4959 int *charbuf = coding->charbuf + coding->charbuf_used;
4960 int *charbuf_end
4961 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4962 int consumed_chars = 0, consumed_chars_base;
4963 int multibytep = coding->src_multibyte;
24a73b0a 4964 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4965 int char_offset = coding->produced_char;
4966 int last_offset = char_offset;
4967 int last_id = charset_ascii;
119852e7
KH
4968 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4969 int byte_after_cr = -1;
df7492f9 4970
24a73b0a 4971 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4972 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4973
df7492f9 4974 while (1)
4ed46869 4975 {
4eb6d3f1 4976 int c;
24a73b0a
KH
4977 Lisp_Object val;
4978 struct charset *charset;
4979 int dim;
4980 int len = 1;
4981 unsigned code;
df7492f9
KH
4982
4983 src_base = src;
4984 consumed_chars_base = consumed_chars;
b73bfc1c 4985
df7492f9
KH
4986 if (charbuf >= charbuf_end)
4987 break;
4988
119852e7
KH
4989 if (byte_after_cr >= 0)
4990 {
4991 c = byte_after_cr;
4992 byte_after_cr = -1;
4993 }
4994 else
4995 {
4996 ONE_MORE_BYTE (c);
4997 if (eol_crlf && c == '\r')
4998 ONE_MORE_BYTE (byte_after_cr);
4999 }
065e3595
KH
5000 if (c < 0)
5001 goto invalid_code;
24a73b0a
KH
5002 code = c;
5003
5004 val = AREF (valids, c);
5005 if (NILP (val))
5006 goto invalid_code;
5007 if (INTEGERP (val))
d46c5b12 5008 {
24a73b0a
KH
5009 charset = CHARSET_FROM_ID (XFASTINT (val));
5010 dim = CHARSET_DIMENSION (charset);
5011 while (len < dim)
b73bfc1c 5012 {
24a73b0a
KH
5013 ONE_MORE_BYTE (c);
5014 code = (code << 8) | c;
5015 len++;
b73bfc1c 5016 }
24a73b0a
KH
5017 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5018 charset, code, c);
d46c5b12 5019 }
df7492f9 5020 else
d46c5b12 5021 {
24a73b0a
KH
5022 /* VAL is a list of charset IDs. It is assured that the
5023 list is sorted by charset dimensions (smaller one
5024 comes first). */
5025 while (CONSP (val))
4eb6d3f1 5026 {
24a73b0a 5027 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5028 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5029 while (len < dim)
4eb6d3f1 5030 {
acb2a965
KH
5031 ONE_MORE_BYTE (c);
5032 code = (code << 8) | c;
f9d71dcd 5033 len++;
4eb6d3f1 5034 }
24a73b0a
KH
5035 CODING_DECODE_CHAR (coding, src, src_base,
5036 src_end, charset, code, c);
5037 if (c >= 0)
5038 break;
5039 val = XCDR (val);
ff0dacd7 5040 }
d46c5b12 5041 }
24a73b0a
KH
5042 if (c < 0)
5043 goto invalid_code;
5044 if (charset->id != charset_ascii
5045 && last_id != charset->id)
5046 {
5047 if (last_id != charset_ascii)
69a80ea3 5048 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5049 last_id = charset->id;
5050 last_offset = char_offset;
5051 }
5052
df7492f9 5053 *charbuf++ = c;
ff0dacd7 5054 char_offset++;
df7492f9
KH
5055 continue;
5056
5057 invalid_code:
5058 src = src_base;
5059 consumed_chars = consumed_chars_base;
5060 ONE_MORE_BYTE (c);
065e3595 5061 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5062 char_offset++;
df7492f9 5063 coding->errors++;
4ed46869
KH
5064 }
5065
df7492f9 5066 no_more_source:
ff0dacd7 5067 if (last_id != charset_ascii)
69a80ea3 5068 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5069 coding->consumed_char += consumed_chars_base;
5070 coding->consumed = src_base - coding->source;
5071 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5072}
5073
df7492f9
KH
5074static int
5075encode_coding_charset (coding)
4ed46869 5076 struct coding_system *coding;
4ed46869 5077{
df7492f9
KH
5078 int multibytep = coding->dst_multibyte;
5079 int *charbuf = coding->charbuf;
5080 int *charbuf_end = charbuf + coding->charbuf_used;
5081 unsigned char *dst = coding->destination + coding->produced;
5082 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5083 int safe_room = MAX_MULTIBYTE_LENGTH;
5084 int produced_chars = 0;
24a73b0a 5085 Lisp_Object attrs, charset_list;
df7492f9 5086 int ascii_compatible;
b73bfc1c 5087 int c;
b73bfc1c 5088
24a73b0a 5089 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5090 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5091
df7492f9 5092 while (charbuf < charbuf_end)
4ed46869 5093 {
4eb6d3f1 5094 struct charset *charset;
df7492f9 5095 unsigned code;
8f924df7 5096
df7492f9
KH
5097 ASSURE_DESTINATION (safe_room);
5098 c = *charbuf++;
5099 if (ascii_compatible && ASCII_CHAR_P (c))
5100 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5101 else if (CHAR_BYTE8_P (c))
4ed46869 5102 {
16eafb5d
KH
5103 c = CHAR_TO_BYTE8 (c);
5104 EMIT_ONE_BYTE (c);
d46c5b12 5105 }
d46c5b12 5106 else
b73bfc1c 5107 {
4eb6d3f1
KH
5108 charset = char_charset (c, charset_list, &code);
5109 if (charset)
5110 {
5111 if (CHARSET_DIMENSION (charset) == 1)
5112 EMIT_ONE_BYTE (code);
5113 else if (CHARSET_DIMENSION (charset) == 2)
5114 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5115 else if (CHARSET_DIMENSION (charset) == 3)
5116 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5117 else
5118 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5119 (code >> 8) & 0xFF, code & 0xFF);
5120 }
5121 else
41cbe562
KH
5122 {
5123 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5124 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5125 else
5126 c = coding->default_char;
5127 EMIT_ONE_BYTE (c);
5128 }
4ed46869 5129 }
4ed46869
KH
5130 }
5131
065e3595 5132 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5133 coding->produced_char += produced_chars;
5134 coding->produced = dst - coding->destination;
5135 return 0;
4ed46869
KH
5136}
5137
5138\f
1397dc18 5139/*** 7. C library functions ***/
4ed46869 5140
df7492f9
KH
5141/* Setup coding context CODING from information about CODING_SYSTEM.
5142 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5143 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5144
ec6d2bb8 5145void
e0e989f6
KH
5146setup_coding_system (coding_system, coding)
5147 Lisp_Object coding_system;
4ed46869
KH
5148 struct coding_system *coding;
5149{
df7492f9
KH
5150 Lisp_Object attrs;
5151 Lisp_Object eol_type;
5152 Lisp_Object coding_type;
4608c386 5153 Lisp_Object val;
4ed46869 5154
df7492f9 5155 if (NILP (coding_system))
ae6f73fa 5156 coding_system = Qundecided;
c07c8e12 5157
df7492f9 5158 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5159
df7492f9
KH
5160 attrs = CODING_ID_ATTRS (coding->id);
5161 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5162
df7492f9
KH
5163 coding->mode = 0;
5164 coding->head_ascii = -1;
4a015c45
KH
5165 if (VECTORP (eol_type))
5166 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5167 | CODING_REQUIRE_DETECTION_MASK);
5168 else if (! EQ (eol_type, Qunix))
5169 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5170 | CODING_REQUIRE_ENCODING_MASK);
5171 else
5172 coding->common_flags = 0;
5e5c78be
KH
5173 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5174 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5175 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5176 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5177 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5178 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5179
df7492f9 5180 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5181 coding->max_charset_id = SCHARS (val) - 1;
5182 coding->safe_charsets = (char *) SDATA (val);
df7492f9 5183 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 5184
df7492f9
KH
5185 coding_type = CODING_ATTR_TYPE (attrs);
5186 if (EQ (coding_type, Qundecided))
d46c5b12 5187 {
df7492f9
KH
5188 coding->detector = NULL;
5189 coding->decoder = decode_coding_raw_text;
5190 coding->encoder = encode_coding_raw_text;
5191 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5192 }
df7492f9 5193 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5194 {
df7492f9
KH
5195 int i;
5196 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5197
5198 /* Invoke graphic register 0 to plane 0. */
5199 CODING_ISO_INVOCATION (coding, 0) = 0;
5200 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5201 CODING_ISO_INVOCATION (coding, 1)
5202 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5203 /* Setup the initial status of designation. */
5204 for (i = 0; i < 4; i++)
5205 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5206 /* Not single shifting initially. */
5207 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5208 /* Beginning of buffer should also be regarded as bol. */
5209 CODING_ISO_BOL (coding) = 1;
5210 coding->detector = detect_coding_iso_2022;
5211 coding->decoder = decode_coding_iso_2022;
5212 coding->encoder = encode_coding_iso_2022;
5213 if (flags & CODING_ISO_FLAG_SAFE)
5214 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5215 coding->common_flags
df7492f9
KH
5216 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5217 | CODING_REQUIRE_FLUSHING_MASK);
5218 if (flags & CODING_ISO_FLAG_COMPOSITION)
5219 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5220 if (flags & CODING_ISO_FLAG_DESIGNATION)
5221 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5222 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5223 {
5224 setup_iso_safe_charsets (attrs);
5225 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5226 coding->max_charset_id = SCHARS (val) - 1;
5227 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
5228 }
5229 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5230 }
df7492f9 5231 else if (EQ (coding_type, Qcharset))
d46c5b12 5232 {
df7492f9
KH
5233 coding->detector = detect_coding_charset;
5234 coding->decoder = decode_coding_charset;
5235 coding->encoder = encode_coding_charset;
d46c5b12 5236 coding->common_flags
df7492f9 5237 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5238 }
df7492f9 5239 else if (EQ (coding_type, Qutf_8))
d46c5b12 5240 {
df7492f9
KH
5241 coding->detector = detect_coding_utf_8;
5242 coding->decoder = decode_coding_utf_8;
5243 coding->encoder = encode_coding_utf_8;
5244 coding->common_flags
5245 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5246 }
5247 else if (EQ (coding_type, Qutf_16))
5248 {
5249 val = AREF (attrs, coding_attr_utf_16_bom);
5250 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5251 : EQ (val, Qt) ? utf_16_with_bom
5252 : utf_16_without_bom);
5253 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5254 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5255 : utf_16_little_endian);
e19c3639 5256 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5257 coding->detector = detect_coding_utf_16;
5258 coding->decoder = decode_coding_utf_16;
5259 coding->encoder = encode_coding_utf_16;
5260 coding->common_flags
5261 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5262 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5263 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5264 }
df7492f9 5265 else if (EQ (coding_type, Qccl))
4ed46869 5266 {
df7492f9
KH
5267 coding->detector = detect_coding_ccl;
5268 coding->decoder = decode_coding_ccl;
5269 coding->encoder = encode_coding_ccl;
c952af22 5270 coding->common_flags
df7492f9
KH
5271 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5272 | CODING_REQUIRE_FLUSHING_MASK);
5273 }
5274 else if (EQ (coding_type, Qemacs_mule))
5275 {
5276 coding->detector = detect_coding_emacs_mule;
5277 coding->decoder = decode_coding_emacs_mule;
5278 coding->encoder = encode_coding_emacs_mule;
c952af22 5279 coding->common_flags
df7492f9
KH
5280 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5281 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5282 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5283 {
5284 Lisp_Object tail, safe_charsets;
5285 int max_charset_id = 0;
5286
5287 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5288 tail = XCDR (tail))
5289 if (max_charset_id < XFASTINT (XCAR (tail)))
5290 max_charset_id = XFASTINT (XCAR (tail));
5291 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5292 make_number (255));
5293 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5294 tail = XCDR (tail))
8f924df7 5295 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5296 coding->max_charset_id = max_charset_id;
8f924df7 5297 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5298 }
5299 }
5300 else if (EQ (coding_type, Qshift_jis))
5301 {
5302 coding->detector = detect_coding_sjis;
5303 coding->decoder = decode_coding_sjis;
5304 coding->encoder = encode_coding_sjis;
c952af22 5305 coding->common_flags
df7492f9
KH
5306 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5307 }
5308 else if (EQ (coding_type, Qbig5))
5309 {
5310 coding->detector = detect_coding_big5;
5311 coding->decoder = decode_coding_big5;
5312 coding->encoder = encode_coding_big5;
c952af22 5313 coding->common_flags
df7492f9
KH
5314 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5315 }
5316 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5317 {
df7492f9
KH
5318 coding->detector = NULL;
5319 coding->decoder = decode_coding_raw_text;
5320 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5321 if (! EQ (eol_type, Qunix))
5322 {
5323 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5324 if (! VECTORP (eol_type))
5325 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5326 }
5327
4ed46869 5328 }
4ed46869 5329
df7492f9 5330 return;
4ed46869
KH
5331}
5332
0ff61e78
KH
5333/* Return a list of charsets supported by CODING. */
5334
5335Lisp_Object
5336coding_charset_list (coding)
5337 struct coding_system *coding;
5338{
35befdaa 5339 Lisp_Object attrs, charset_list;
0ff61e78
KH
5340
5341 CODING_GET_INFO (coding, attrs, charset_list);
5342 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5343 {
5344 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5345
5346 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5347 charset_list = Viso_2022_charset_list;
5348 }
5349 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5350 {
5351 charset_list = Vemacs_mule_charset_list;
5352 }
5353 return charset_list;
5354}
5355
5356
df7492f9
KH
5357/* Return raw-text or one of its subsidiaries that has the same
5358 eol_type as CODING-SYSTEM. */
ec6d2bb8 5359
df7492f9
KH
5360Lisp_Object
5361raw_text_coding_system (coding_system)
5362 Lisp_Object coding_system;
ec6d2bb8 5363{
0be8721c 5364 Lisp_Object spec, attrs;
df7492f9 5365 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5366
d3e4cb56
KH
5367 if (NILP (coding_system))
5368 return Qraw_text;
df7492f9
KH
5369 spec = CODING_SYSTEM_SPEC (coding_system);
5370 attrs = AREF (spec, 0);
ec6d2bb8 5371
df7492f9
KH
5372 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5373 return coding_system;
ec6d2bb8 5374
df7492f9
KH
5375 eol_type = AREF (spec, 2);
5376 if (VECTORP (eol_type))
5377 return Qraw_text;
5378 spec = CODING_SYSTEM_SPEC (Qraw_text);
5379 raw_text_eol_type = AREF (spec, 2);
5380 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5381 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5382 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5383}
5384
54f78171 5385
df7492f9
KH
5386/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5387 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
5388 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5389 inherit end-of-line format from the system's setting
5390 (system_eol_type). */
df7492f9
KH
5391
5392Lisp_Object
5393coding_inherit_eol_type (coding_system, parent)
b74e4686 5394 Lisp_Object coding_system, parent;
54f78171 5395{
3e139625 5396 Lisp_Object spec, eol_type;
54f78171 5397
d3e4cb56
KH
5398 if (NILP (coding_system))
5399 coding_system = Qraw_text;
df7492f9 5400 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5401 eol_type = AREF (spec, 2);
fcbcfb64 5402 if (VECTORP (eol_type))
df7492f9 5403 {
df7492f9
KH
5404 Lisp_Object parent_eol_type;
5405
fcbcfb64
KH
5406 if (! NILP (parent))
5407 {
5408 Lisp_Object parent_spec;
5409
4a015c45 5410 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
5411 parent_eol_type = AREF (parent_spec, 2);
5412 }
5413 else
5414 parent_eol_type = system_eol_type;
df7492f9
KH
5415 if (EQ (parent_eol_type, Qunix))
5416 coding_system = AREF (eol_type, 0);
5417 else if (EQ (parent_eol_type, Qdos))
5418 coding_system = AREF (eol_type, 1);
5419 else if (EQ (parent_eol_type, Qmac))
5420 coding_system = AREF (eol_type, 2);
54f78171 5421 }
df7492f9 5422 return coding_system;
54f78171
KH
5423}
5424
4ed46869
KH
5425/* Emacs has a mechanism to automatically detect a coding system if it
5426 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5427 it's impossible to distinguish some coding systems accurately
5428 because they use the same range of codes. So, at first, coding
5429 systems are categorized into 7, those are:
5430
0ef69138 5431 o coding-category-emacs-mule
4ed46869
KH
5432
5433 The category for a coding system which has the same code range
5434 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5435 symbol) `emacs-mule' by default.
4ed46869
KH
5436
5437 o coding-category-sjis
5438
5439 The category for a coding system which has the same code range
5440 as SJIS. Assigned the coding-system (Lisp
7717c392 5441 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5442
5443 o coding-category-iso-7
5444
5445 The category for a coding system which has the same code range
7717c392 5446 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5447 shift and single shift functions. This can encode/decode all
5448 charsets. Assigned the coding-system (Lisp symbol)
5449 `iso-2022-7bit' by default.
5450
5451 o coding-category-iso-7-tight
5452
5453 Same as coding-category-iso-7 except that this can
5454 encode/decode only the specified charsets.
4ed46869
KH
5455
5456 o coding-category-iso-8-1
5457
5458 The category for a coding system which has the same code range
5459 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5460 for DIMENSION1 charset. This doesn't use any locking shift
5461 and single shift functions. Assigned the coding-system (Lisp
5462 symbol) `iso-latin-1' by default.
4ed46869
KH
5463
5464 o coding-category-iso-8-2
5465
5466 The category for a coding system which has the same code range
5467 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5468 for DIMENSION2 charset. This doesn't use any locking shift
5469 and single shift functions. Assigned the coding-system (Lisp
5470 symbol) `japanese-iso-8bit' by default.
4ed46869 5471
7717c392 5472 o coding-category-iso-7-else
4ed46869
KH
5473
5474 The category for a coding system which has the same code range
df7492f9 5475 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5476 single shift functions. Assigned the coding-system (Lisp
5477 symbol) `iso-2022-7bit-lock' by default.
5478
5479 o coding-category-iso-8-else
5480
5481 The category for a coding system which has the same code range
df7492f9 5482 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5483 single shift functions. Assigned the coding-system (Lisp
5484 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5485
5486 o coding-category-big5
5487
5488 The category for a coding system which has the same code range
5489 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5490 `cn-big5' by default.
4ed46869 5491
fa42c37f
KH
5492 o coding-category-utf-8
5493
5494 The category for a coding system which has the same code range
6e76ae91 5495 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5496 symbol) `utf-8' by default.
5497
5498 o coding-category-utf-16-be
5499
5500 The category for a coding system in which a text has an
5501 Unicode signature (cf. Unicode Standard) in the order of BIG
5502 endian at the head. Assigned the coding-system (Lisp symbol)
5503 `utf-16-be' by default.
5504
5505 o coding-category-utf-16-le
5506
5507 The category for a coding system in which a text has an
5508 Unicode signature (cf. Unicode Standard) in the order of
5509 LITTLE endian at the head. Assigned the coding-system (Lisp
5510 symbol) `utf-16-le' by default.
5511
1397dc18
KH
5512 o coding-category-ccl
5513
5514 The category for a coding system of which encoder/decoder is
5515 written in CCL programs. The default value is nil, i.e., no
5516 coding system is assigned.
5517
4ed46869
KH
5518 o coding-category-binary
5519
5520 The category for a coding system not categorized in any of the
5521 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5522 `no-conversion' by default.
4ed46869
KH
5523
5524 Each of them is a Lisp symbol and the value is an actual
df7492f9 5525 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5526 What Emacs does actually is to detect a category of coding system.
5527 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5528 decide only one possible category, it selects a category of the
4ed46869
KH
5529 highest priority. Priorities of categories are also specified by a
5530 user in a Lisp variable `coding-category-list'.
5531
5532*/
5533
df7492f9
KH
5534#define EOL_SEEN_NONE 0
5535#define EOL_SEEN_LF 1
5536#define EOL_SEEN_CR 2
5537#define EOL_SEEN_CRLF 4
66cfb530 5538
ff0dacd7
KH
5539/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5540 SOURCE is encoded. If CATEGORY is one of
5541 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5542 two-byte, else they are encoded by one-byte.
5543
5544 Return one of EOL_SEEN_XXX. */
4ed46869 5545
bc4bc72a 5546#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5547
5548static int
89528eb3 5549detect_eol (source, src_bytes, category)
f6cbaf43 5550 const unsigned char *source;
df7492f9 5551 EMACS_INT src_bytes;
89528eb3 5552 enum coding_category category;
4ed46869 5553{
f6cbaf43 5554 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5555 unsigned char c;
df7492f9
KH
5556 int total = 0;
5557 int eol_seen = EOL_SEEN_NONE;
4ed46869 5558
89528eb3 5559 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5560 {
df7492f9 5561 int msb, lsb;
fa42c37f 5562
89528eb3
KH
5563 msb = category == (coding_category_utf_16_le
5564 | coding_category_utf_16_le_nosig);
df7492f9 5565 lsb = 1 - msb;
fa42c37f 5566
df7492f9 5567 while (src + 1 < src_end)
fa42c37f 5568 {
df7492f9
KH
5569 c = src[lsb];
5570 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5571 {
df7492f9
KH
5572 int this_eol;
5573
5574 if (c == '\n')
5575 this_eol = EOL_SEEN_LF;
5576 else if (src + 3 >= src_end
5577 || src[msb + 2] != 0
5578 || src[lsb + 2] != '\n')
5579 this_eol = EOL_SEEN_CR;
fa42c37f 5580 else
8f924df7 5581 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5582
5583 if (eol_seen == EOL_SEEN_NONE)
5584 /* This is the first end-of-line. */
5585 eol_seen = this_eol;
5586 else if (eol_seen != this_eol)
fa42c37f 5587 {
df7492f9
KH
5588 /* The found type is different from what found before. */
5589 eol_seen = EOL_SEEN_LF;
5590 break;
fa42c37f 5591 }
df7492f9
KH
5592 if (++total == MAX_EOL_CHECK_COUNT)
5593 break;
fa42c37f 5594 }
df7492f9 5595 src += 2;
fa42c37f 5596 }
bcf26d6a 5597 }
d46c5b12 5598 else
c4825358 5599 {
df7492f9 5600 while (src < src_end)
27901516 5601 {
df7492f9
KH
5602 c = *src++;
5603 if (c == '\n' || c == '\r')
5604 {
5605 int this_eol;
d46c5b12 5606
df7492f9
KH
5607 if (c == '\n')
5608 this_eol = EOL_SEEN_LF;
5609 else if (src >= src_end || *src != '\n')
5610 this_eol = EOL_SEEN_CR;
5611 else
5612 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5613
df7492f9
KH
5614 if (eol_seen == EOL_SEEN_NONE)
5615 /* This is the first end-of-line. */
5616 eol_seen = this_eol;
5617 else if (eol_seen != this_eol)
5618 {
5619 /* The found type is different from what found before. */
5620 eol_seen = EOL_SEEN_LF;
5621 break;
5622 }
5623 if (++total == MAX_EOL_CHECK_COUNT)
5624 break;
5625 }
5626 }
73be902c 5627 }
df7492f9 5628 return eol_seen;
73be902c
KH
5629}
5630
df7492f9 5631
24a73b0a 5632static Lisp_Object
df7492f9
KH
5633adjust_coding_eol_type (coding, eol_seen)
5634 struct coding_system *coding;
5635 int eol_seen;
73be902c 5636{
0be8721c 5637 Lisp_Object eol_type;
8f924df7 5638
df7492f9
KH
5639 eol_type = CODING_ID_EOL_TYPE (coding->id);
5640 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5641 {
5642 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5643 eol_type = Qunix;
5644 }
6f197c07 5645 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5646 {
5647 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5648 eol_type = Qdos;
5649 }
6f197c07 5650 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5651 {
5652 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5653 eol_type = Qmac;
5654 }
5655 return eol_type;
d46c5b12 5656}
4ed46869 5657
df7492f9
KH
5658/* Detect how a text specified in CODING is encoded. If a coding
5659 system is detected, update fields of CODING by the detected coding
5660 system. */
0a28aafb 5661
df7492f9
KH
5662void
5663detect_coding (coding)
d46c5b12 5664 struct coding_system *coding;
d46c5b12 5665{
8f924df7 5666 const unsigned char *src, *src_end;
d46c5b12 5667
df7492f9
KH
5668 coding->consumed = coding->consumed_char = 0;
5669 coding->produced = coding->produced_char = 0;
5670 coding_set_source (coding);
1c3478b0 5671
df7492f9 5672 src_end = coding->source + coding->src_bytes;
1c3478b0 5673
df7492f9
KH
5674 /* If we have not yet decided the text encoding type, detect it
5675 now. */
5676 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5677 {
df7492f9 5678 int c, i;
6cb21a4f 5679 struct coding_detection_info detect_info;
df7492f9 5680
6cb21a4f 5681 detect_info.checked = detect_info.found = detect_info.rejected = 0;
24a73b0a 5682 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5683 {
df7492f9 5684 c = *src;
6cb21a4f 5685 if (c & 0x80)
df7492f9 5686 break;
6cb21a4f
KH
5687 if (c < 0x20
5688 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5689 && ! inhibit_iso_escape_detection
5690 && ! detect_info.checked)
5691 {
5692 coding->head_ascii = src - (coding->source + coding->consumed);
5693 if (detect_coding_iso_2022 (coding, &detect_info))
5694 {
5695 /* We have scanned the whole data. */
5696 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5697 /* We didn't find an 8-bit code. */
5698 src = src_end;
5699 break;
5700 }
5701 }
d46c5b12 5702 }
df7492f9
KH
5703 coding->head_ascii = src - (coding->source + coding->consumed);
5704
3aef54f3 5705 if (coding->head_ascii < coding->src_bytes
6cb21a4f 5706 || detect_info.found)
d46c5b12 5707 {
ff0dacd7
KH
5708 enum coding_category category;
5709 struct coding_system *this;
df7492f9 5710
6cb21a4f
KH
5711 if (coding->head_ascii == coding->src_bytes)
5712 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5713 for (i = 0; i < coding_category_raw_text; i++)
5714 {
5715 category = coding_priorities[i];
5716 this = coding_categories + category;
5717 if (detect_info.found & (1 << category))
24a73b0a 5718 break;
6cb21a4f
KH
5719 }
5720 else
5721 for (i = 0; i < coding_category_raw_text; i++)
5722 {
5723 category = coding_priorities[i];
5724 this = coding_categories + category;
5725 if (this->id < 0)
5726 {
5727 /* No coding system of this category is defined. */
5728 detect_info.rejected |= (1 << category);
5729 }
5730 else if (category >= coding_category_raw_text)
5731 continue;
5732 else if (detect_info.checked & (1 << category))
5733 {
5734 if (detect_info.found & (1 << category))
5735 break;
5736 }
5737 else if ((*(this->detector)) (coding, &detect_info)
5738 && detect_info.found & (1 << category))
5739 {
5740 if (category == coding_category_utf_16_auto)
5741 {
5742 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5743 category = coding_category_utf_16_le;
5744 else
5745 category = coding_category_utf_16_be;
5746 }
5747 break;
5748 }
5749 }
5750
ff0dacd7
KH
5751 if (i < coding_category_raw_text)
5752 setup_coding_system (CODING_ID_NAME (this->id), coding);
5753 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5754 setup_coding_system (Qraw_text, coding);
ff0dacd7 5755 else if (detect_info.rejected)
df7492f9 5756 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5757 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5758 {
5759 this = coding_categories + coding_priorities[i];
5760 setup_coding_system (CODING_ID_NAME (this->id), coding);
5761 break;
5762 }
d46c5b12 5763 }
b73bfc1c 5764 }
24a73b0a
KH
5765 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5766 == coding_category_utf_16_auto)
b49a1807
KH
5767 {
5768 Lisp_Object coding_systems;
5769 struct coding_detection_info detect_info;
5770
5771 coding_systems
5772 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5773 detect_info.found = detect_info.rejected = 0;
5774 if (CONSP (coding_systems)
24a73b0a 5775 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5776 {
5777 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5778 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5779 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5780 setup_coding_system (XCDR (coding_systems), coding);
5781 }
5782 }
4ed46869 5783}
4ed46869 5784
d46c5b12 5785
aaaf0b1e 5786static void
df7492f9 5787decode_eol (coding)
aaaf0b1e 5788 struct coding_system *coding;
aaaf0b1e 5789{
24a73b0a
KH
5790 Lisp_Object eol_type;
5791 unsigned char *p, *pbeg, *pend;
5792
5793 eol_type = CODING_ID_EOL_TYPE (coding->id);
5794 if (EQ (eol_type, Qunix))
5795 return;
5796
5797 if (NILP (coding->dst_object))
5798 pbeg = coding->destination;
5799 else
5800 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5801 pend = pbeg + coding->produced;
5802
5803 if (VECTORP (eol_type))
aaaf0b1e 5804 {
df7492f9 5805 int eol_seen = EOL_SEEN_NONE;
4ed46869 5806
24a73b0a 5807 for (p = pbeg; p < pend; p++)
aaaf0b1e 5808 {
df7492f9
KH
5809 if (*p == '\n')
5810 eol_seen |= EOL_SEEN_LF;
5811 else if (*p == '\r')
aaaf0b1e 5812 {
df7492f9 5813 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5814 {
df7492f9
KH
5815 eol_seen |= EOL_SEEN_CRLF;
5816 p++;
aaaf0b1e 5817 }
aaaf0b1e 5818 else
df7492f9 5819 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5820 }
aaaf0b1e 5821 }
24a73b0a
KH
5822 if (eol_seen != EOL_SEEN_NONE
5823 && eol_seen != EOL_SEEN_LF
5824 && eol_seen != EOL_SEEN_CRLF
5825 && eol_seen != EOL_SEEN_CR)
5826 eol_seen = EOL_SEEN_LF;
df7492f9 5827 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5828 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5829 }
d46c5b12 5830
24a73b0a 5831 if (EQ (eol_type, Qmac))
27901516 5832 {
24a73b0a 5833 for (p = pbeg; p < pend; p++)
df7492f9
KH
5834 if (*p == '\r')
5835 *p = '\n';
4ed46869 5836 }
24a73b0a 5837 else if (EQ (eol_type, Qdos))
df7492f9 5838 {
24a73b0a 5839 int n = 0;
b73bfc1c 5840
24a73b0a
KH
5841 if (NILP (coding->dst_object))
5842 {
4347441b
KH
5843 /* Start deleting '\r' from the tail to minimize the memory
5844 movement. */
24a73b0a
KH
5845 for (p = pend - 2; p >= pbeg; p--)
5846 if (*p == '\r')
5847 {
5848 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5849 n++;
5850 }
5851 }
5852 else
5853 {
4347441b
KH
5854 int pos_byte = coding->dst_pos_byte;
5855 int pos = coding->dst_pos;
5856 int pos_end = pos + coding->produced_char - 1;
5857
5858 while (pos < pos_end)
5859 {
5860 p = BYTE_POS_ADDR (pos_byte);
5861 if (*p == '\r' && p[1] == '\n')
5862 {
5863 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5864 n++;
5865 pos_end--;
5866 }
5867 pos++;
69b8522d
KH
5868 if (coding->dst_multibyte)
5869 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5870 else
5871 pos_byte++;
4347441b 5872 }
24a73b0a
KH
5873 }
5874 coding->produced -= n;
5875 coding->produced_char -= n;
aaaf0b1e 5876 }
4ed46869
KH
5877}
5878
7d64c6ad 5879
a6f87d34
KH
5880/* Return a translation table (or list of them) from coding system
5881 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5882 decoding (ENCODEP is zero). */
7d64c6ad 5883
e6a54062 5884static Lisp_Object
09ee6fdd
KH
5885get_translation_table (attrs, encodep, max_lookup)
5886 Lisp_Object attrs;
5887 int encodep, *max_lookup;
7d64c6ad
KH
5888{
5889 Lisp_Object standard, translation_table;
09ee6fdd 5890 Lisp_Object val;
7d64c6ad
KH
5891
5892 if (encodep)
5893 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5894 standard = Vstandard_translation_table_for_encode;
5895 else
5896 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5897 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5898 if (NILP (translation_table))
09ee6fdd
KH
5899 translation_table = standard;
5900 else
a6f87d34 5901 {
09ee6fdd
KH
5902 if (SYMBOLP (translation_table))
5903 translation_table = Fget (translation_table, Qtranslation_table);
5904 else if (CONSP (translation_table))
5905 {
5906 translation_table = Fcopy_sequence (translation_table);
5907 for (val = translation_table; CONSP (val); val = XCDR (val))
5908 if (SYMBOLP (XCAR (val)))
5909 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5910 }
5911 if (CHAR_TABLE_P (standard))
5912 {
5913 if (CONSP (translation_table))
5914 translation_table = nconc2 (translation_table,
5915 Fcons (standard, Qnil));
5916 else
5917 translation_table = Fcons (translation_table,
5918 Fcons (standard, Qnil));
5919 }
a6f87d34 5920 }
2170c8f0
KH
5921
5922 if (max_lookup)
09ee6fdd 5923 {
2170c8f0
KH
5924 *max_lookup = 1;
5925 if (CHAR_TABLE_P (translation_table)
5926 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5927 {
5928 val = XCHAR_TABLE (translation_table)->extras[1];
5929 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5930 *max_lookup = XFASTINT (val);
5931 }
5932 else if (CONSP (translation_table))
5933 {
5934 Lisp_Object tail, val;
09ee6fdd 5935
2170c8f0
KH
5936 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5937 if (CHAR_TABLE_P (XCAR (tail))
5938 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5939 {
5940 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5941 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5942 *max_lookup = XFASTINT (val);
5943 }
5944 }
a6f87d34 5945 }
7d64c6ad
KH
5946 return translation_table;
5947}
5948
09ee6fdd
KH
5949#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5950 do { \
5951 trans = Qnil; \
5952 if (CHAR_TABLE_P (table)) \
5953 { \
5954 trans = CHAR_TABLE_REF (table, c); \
5955 if (CHARACTERP (trans)) \
5956 c = XFASTINT (trans), trans = Qnil; \
5957 } \
5958 else if (CONSP (table)) \
5959 { \
5960 Lisp_Object tail; \
5961 \
5962 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5963 if (CHAR_TABLE_P (XCAR (tail))) \
5964 { \
5965 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5966 if (CHARACTERP (trans)) \
5967 c = XFASTINT (trans), trans = Qnil; \
5968 else if (! NILP (trans)) \
5969 break; \
5970 } \
5971 } \
e6a54062
KH
5972 } while (0)
5973
7d64c6ad 5974
69a80ea3
KH
5975static Lisp_Object
5976get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5977 Lisp_Object val;
5978 int *buf, *buf_end;
5979 int last_block;
5980 int *from_nchars, *to_nchars;
5981{
433f7f87
KH
5982 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5983 [TO-CHAR ...]. */
69a80ea3
KH
5984 if (CONSP (val))
5985 {
433f7f87 5986 Lisp_Object from, tail;
69a80ea3
KH
5987 int i, len;
5988
433f7f87 5989 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5990 {
433f7f87
KH
5991 val = XCAR (tail);
5992 from = XCAR (val);
5993 len = ASIZE (from);
5994 for (i = 0; i < len; i++)
5995 {
5996 if (buf + i == buf_end)
5997 {
5998 if (! last_block)
5999 return Qt;
6000 break;
6001 }
6002 if (XINT (AREF (from, i)) != buf[i])
6003 break;
6004 }
6005 if (i == len)
6006 {
6007 val = XCDR (val);
6008 *from_nchars = len;
6009 break;
6010 }
69a80ea3 6011 }
433f7f87
KH
6012 if (! CONSP (tail))
6013 return Qnil;
69a80ea3
KH
6014 }
6015 if (VECTORP (val))
6016 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6017 else
6018 *buf = XINT (val);
6019 return val;
6020}
6021
6022
d46c5b12 6023static int
69a80ea3 6024produce_chars (coding, translation_table, last_block)
df7492f9 6025 struct coding_system *coding;
69a80ea3
KH
6026 Lisp_Object translation_table;
6027 int last_block;
4ed46869 6028{
df7492f9
KH
6029 unsigned char *dst = coding->destination + coding->produced;
6030 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6031 EMACS_INT produced;
6032 EMACS_INT produced_chars = 0;
69a80ea3 6033 int carryover = 0;
4ed46869 6034
df7492f9 6035 if (! coding->chars_at_source)
4ed46869 6036 {
119852e7 6037 /* Source characters are in coding->charbuf. */
fba4576f
AS
6038 int *buf = coding->charbuf;
6039 int *buf_end = buf + coding->charbuf_used;
4ed46869 6040
db274c7a
KH
6041 if (EQ (coding->src_object, coding->dst_object))
6042 {
6043 coding_set_source (coding);
6044 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6045 }
4ed46869 6046
df7492f9 6047 while (buf < buf_end)
4ed46869 6048 {
69a80ea3 6049 int c = *buf, i;
bc4bc72a 6050
df7492f9
KH
6051 if (c >= 0)
6052 {
69a80ea3
KH
6053 int from_nchars = 1, to_nchars = 1;
6054 Lisp_Object trans = Qnil;
6055
09ee6fdd 6056 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6057 if (! NILP (trans))
69a80ea3
KH
6058 {
6059 trans = get_translation (trans, buf, buf_end, last_block,
6060 &from_nchars, &to_nchars);
6061 if (EQ (trans, Qt))
6062 break;
6063 c = *buf;
6064 }
6065
6066 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6067 {
6068 dst = alloc_destination (coding,
6069 buf_end - buf
6070 + MAX_MULTIBYTE_LENGTH * to_nchars,
6071 dst);
db274c7a
KH
6072 if (EQ (coding->src_object, coding->dst_object))
6073 {
6074 coding_set_source (coding);
6075 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6076 }
6077 else
6078 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6079 }
6080
433f7f87 6081 for (i = 0; i < to_nchars; i++)
69a80ea3 6082 {
433f7f87
KH
6083 if (i > 0)
6084 c = XINT (AREF (trans, i));
69a80ea3
KH
6085 if (coding->dst_multibyte
6086 || ! CHAR_BYTE8_P (c))
db274c7a 6087 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6088 else
6089 *dst++ = CHAR_TO_BYTE8 (c);
6090 }
6091 produced_chars += to_nchars;
6092 *buf++ = to_nchars;
6093 while (--from_nchars > 0)
6094 *buf++ = 0;
d46c5b12 6095 }
df7492f9 6096 else
69a80ea3
KH
6097 /* This is an annotation datum. (-C) is the length. */
6098 buf += -c;
4ed46869 6099 }
69a80ea3 6100 carryover = buf_end - buf;
4ed46869 6101 }
fa42c37f 6102 else
fa42c37f 6103 {
119852e7 6104 /* Source characters are at coding->source. */
8f924df7 6105 const unsigned char *src = coding->source;
119852e7 6106 const unsigned char *src_end = src + coding->consumed;
4ed46869 6107
db274c7a
KH
6108 if (EQ (coding->dst_object, coding->src_object))
6109 dst_end = (unsigned char *) src;
df7492f9 6110 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6111 {
df7492f9 6112 if (coding->src_multibyte)
fa42c37f 6113 {
71c81426 6114 int multibytep = 1;
119852e7 6115 EMACS_INT consumed_chars;
d46c5b12 6116
df7492f9
KH
6117 while (1)
6118 {
8f924df7 6119 const unsigned char *src_base = src;
df7492f9 6120 int c;
b73bfc1c 6121
df7492f9 6122 ONE_MORE_BYTE (c);
119852e7 6123 if (dst == dst_end)
df7492f9 6124 {
119852e7
KH
6125 if (EQ (coding->src_object, coding->dst_object))
6126 dst_end = (unsigned char *) src;
6127 if (dst == dst_end)
df7492f9 6128 {
119852e7
KH
6129 EMACS_INT offset = src - coding->source;
6130
6131 dst = alloc_destination (coding, src_end - src + 1,
6132 dst);
6133 dst_end = coding->destination + coding->dst_bytes;
6134 coding_set_source (coding);
6135 src = coding->source + offset;
6136 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6137 if (EQ (coding->src_object, coding->dst_object))
6138 dst_end = (unsigned char *) src;
df7492f9 6139 }
df7492f9
KH
6140 }
6141 *dst++ = c;
6142 produced_chars++;
6143 }
6144 no_more_source:
6145 ;
fa42c37f
KH
6146 }
6147 else
df7492f9
KH
6148 while (src < src_end)
6149 {
71c81426 6150 int multibytep = 1;
df7492f9 6151 int c = *src++;
b73bfc1c 6152
df7492f9
KH
6153 if (dst >= dst_end - 1)
6154 {
2c78b7e1 6155 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6156 dst_end = (unsigned char *) src;
2c78b7e1
KH
6157 if (dst >= dst_end - 1)
6158 {
119852e7 6159 EMACS_INT offset = src - coding->source;
db274c7a 6160 EMACS_INT more_bytes;
119852e7 6161
db274c7a
KH
6162 if (EQ (coding->src_object, coding->dst_object))
6163 more_bytes = ((src_end - src) / 2) + 2;
6164 else
6165 more_bytes = src_end - src + 2;
6166 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6167 dst_end = coding->destination + coding->dst_bytes;
6168 coding_set_source (coding);
119852e7 6169 src = coding->source + offset;
2c78b7e1 6170 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6171 if (EQ (coding->src_object, coding->dst_object))
6172 dst_end = (unsigned char *) src;
2c78b7e1 6173 }
df7492f9
KH
6174 }
6175 EMIT_ONE_BYTE (c);
6176 }
d46c5b12 6177 }
df7492f9
KH
6178 else
6179 {
6180 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6181 {
119852e7 6182 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6183
df7492f9 6184 if (require > 0)
fa42c37f 6185 {
df7492f9
KH
6186 EMACS_INT offset = src - coding->source;
6187
6188 dst = alloc_destination (coding, require, dst);
6189 coding_set_source (coding);
6190 src = coding->source + offset;
6191 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6192 }
6193 }
119852e7 6194 produced_chars = coding->consumed_char;
df7492f9 6195 while (src < src_end)
14daee73 6196 *dst++ = *src++;
fa42c37f
KH
6197 }
6198 }
6199
df7492f9 6200 produced = dst - (coding->destination + coding->produced);
284201e4 6201 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6202 insert_from_gap (produced_chars, produced);
6203 coding->produced += produced;
6204 coding->produced_char += produced_chars;
69a80ea3 6205 return carryover;
fa42c37f
KH
6206}
6207
ff0dacd7
KH
6208/* Compose text in CODING->object according to the annotation data at
6209 CHARBUF. CHARBUF is an array:
6210 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 6211 */
4ed46869 6212
df7492f9 6213static INLINE void
69a80ea3 6214produce_composition (coding, charbuf, pos)
4ed46869 6215 struct coding_system *coding;
df7492f9 6216 int *charbuf;
69a80ea3 6217 EMACS_INT pos;
4ed46869 6218{
df7492f9 6219 int len;
69a80ea3 6220 EMACS_INT to;
df7492f9 6221 enum composition_method method;
df7492f9 6222 Lisp_Object components;
fa42c37f 6223
df7492f9 6224 len = -charbuf[0];
69a80ea3 6225 to = pos + charbuf[2];
9ffd559c
KH
6226 if (to <= pos)
6227 return;
69a80ea3 6228 method = (enum composition_method) (charbuf[3]);
d46c5b12 6229
df7492f9
KH
6230 if (method == COMPOSITION_RELATIVE)
6231 components = Qnil;
9ffd559c
KH
6232 else if (method >= COMPOSITION_WITH_RULE
6233 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6234 {
df7492f9
KH
6235 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6236 int i;
b73bfc1c 6237
69a80ea3
KH
6238 len -= 4;
6239 charbuf += 4;
df7492f9 6240 for (i = 0; i < len; i++)
9ffd559c
KH
6241 {
6242 args[i] = make_number (charbuf[i]);
f75c90a9 6243 if (charbuf[i] < 0)
9ffd559c
KH
6244 return;
6245 }
df7492f9
KH
6246 components = (method == COMPOSITION_WITH_ALTCHARS
6247 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6248 }
9ffd559c
KH
6249 else
6250 return;
69a80ea3 6251 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6252}
6253
d46c5b12 6254
ff0dacd7
KH
6255/* Put `charset' property on text in CODING->object according to
6256 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6257 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6258 */
d46c5b12 6259
ff0dacd7 6260static INLINE void
69a80ea3 6261produce_charset (coding, charbuf, pos)
d46c5b12 6262 struct coding_system *coding;
ff0dacd7 6263 int *charbuf;
69a80ea3 6264 EMACS_INT pos;
d46c5b12 6265{
69a80ea3
KH
6266 EMACS_INT from = pos - charbuf[2];
6267 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6268
69a80ea3 6269 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6270 Qcharset, CHARSET_NAME (charset),
6271 coding->dst_object);
d46c5b12
KH
6272}
6273
d46c5b12 6274
df7492f9
KH
6275#define CHARBUF_SIZE 0x4000
6276
6277#define ALLOC_CONVERSION_WORK_AREA(coding) \
6278 do { \
6279 int size = CHARBUF_SIZE;; \
6280 \
6281 coding->charbuf = NULL; \
6282 while (size > 1024) \
6283 { \
6284 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6285 if (coding->charbuf) \
6286 break; \
6287 size >>= 1; \
6288 } \
6289 if (! coding->charbuf) \
6290 { \
065e3595 6291 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6292 return coding->result; \
6293 } \
6294 coding->charbuf_size = size; \
6295 } while (0)
4ed46869 6296
d46c5b12
KH
6297
6298static void
69a80ea3 6299produce_annotation (coding, pos)
d46c5b12 6300 struct coding_system *coding;
69a80ea3 6301 EMACS_INT pos;
d46c5b12 6302{
df7492f9
KH
6303 int *charbuf = coding->charbuf;
6304 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6305
ff0dacd7
KH
6306 if (NILP (coding->dst_object))
6307 return;
d46c5b12 6308
df7492f9 6309 while (charbuf < charbuf_end)
a84f1519 6310 {
df7492f9 6311 if (*charbuf >= 0)
69a80ea3 6312 pos += *charbuf++;
d46c5b12 6313 else
d46c5b12 6314 {
df7492f9 6315 int len = -*charbuf;
ff0dacd7 6316 switch (charbuf[1])
df7492f9
KH
6317 {
6318 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6319 produce_composition (coding, charbuf, pos);
df7492f9 6320 break;
ff0dacd7 6321 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6322 produce_charset (coding, charbuf, pos);
ff0dacd7 6323 break;
df7492f9
KH
6324 default:
6325 abort ();
6326 }
6327 charbuf += len;
d46c5b12 6328 }
a84f1519 6329 }
d46c5b12
KH
6330}
6331
df7492f9
KH
6332/* Decode the data at CODING->src_object into CODING->dst_object.
6333 CODING->src_object is a buffer, a string, or nil.
6334 CODING->dst_object is a buffer.
d46c5b12 6335
df7492f9
KH
6336 If CODING->src_object is a buffer, it must be the current buffer.
6337 In this case, if CODING->src_pos is positive, it is a position of
6338 the source text in the buffer, otherwise, the source text is in the
6339 gap area of the buffer, and CODING->src_pos specifies the offset of
6340 the text from GPT (which must be the same as PT). If this is the
6341 same buffer as CODING->dst_object, CODING->src_pos must be
6342 negative.
d46c5b12 6343
b6828792 6344 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 6345 that string.
d46c5b12 6346
df7492f9
KH
6347 If CODING->src_object is nil, CODING->source must already point to
6348 the non-relocatable memory area. In this case, CODING->src_pos is
6349 an offset from CODING->source.
73be902c 6350
df7492f9
KH
6351 The decoded data is inserted at the current point of the buffer
6352 CODING->dst_object.
6353*/
d46c5b12 6354
df7492f9
KH
6355static int
6356decode_coding (coding)
d46c5b12 6357 struct coding_system *coding;
d46c5b12 6358{
df7492f9 6359 Lisp_Object attrs;
24a73b0a 6360 Lisp_Object undo_list;
7d64c6ad 6361 Lisp_Object translation_table;
69a80ea3
KH
6362 int carryover;
6363 int i;
d46c5b12 6364
df7492f9
KH
6365 if (BUFFERP (coding->src_object)
6366 && coding->src_pos > 0
6367 && coding->src_pos < GPT
6368 && coding->src_pos + coding->src_chars > GPT)
6369 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6370
24a73b0a 6371 undo_list = Qt;
df7492f9 6372 if (BUFFERP (coding->dst_object))
1c3478b0 6373 {
df7492f9
KH
6374 if (current_buffer != XBUFFER (coding->dst_object))
6375 set_buffer_internal (XBUFFER (coding->dst_object));
6376 if (GPT != PT)
6377 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6378 undo_list = current_buffer->undo_list;
6379 current_buffer->undo_list = Qt;
1c3478b0
KH
6380 }
6381
df7492f9
KH
6382 coding->consumed = coding->consumed_char = 0;
6383 coding->produced = coding->produced_char = 0;
6384 coding->chars_at_source = 0;
065e3595 6385 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6386 coding->errors = 0;
1c3478b0 6387
df7492f9
KH
6388 ALLOC_CONVERSION_WORK_AREA (coding);
6389
6390 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6391 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6392
69a80ea3 6393 carryover = 0;
df7492f9 6394 do
b73bfc1c 6395 {
69a80ea3
KH
6396 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6397
df7492f9
KH
6398 coding_set_source (coding);
6399 coding->annotated = 0;
69a80ea3 6400 coding->charbuf_used = carryover;
df7492f9 6401 (*(coding->decoder)) (coding);
df7492f9 6402 coding_set_destination (coding);
69a80ea3 6403 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6404 if (coding->annotated)
69a80ea3
KH
6405 produce_annotation (coding, pos);
6406 for (i = 0; i < carryover; i++)
6407 coding->charbuf[i]
6408 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6409 }
df7492f9 6410 while (coding->consumed < coding->src_bytes
54b367bb
KH
6411 && (coding->result == CODING_RESULT_SUCCESS
6412 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 6413
69a80ea3
KH
6414 if (carryover > 0)
6415 {
6416 coding_set_destination (coding);
6417 coding->charbuf_used = carryover;
6418 produce_chars (coding, translation_table, 1);
6419 }
6420
df7492f9
KH
6421 coding->carryover_bytes = 0;
6422 if (coding->consumed < coding->src_bytes)
d46c5b12 6423 {
df7492f9 6424 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6425 const unsigned char *src;
df7492f9
KH
6426
6427 coding_set_source (coding);
6428 coding_set_destination (coding);
6429 src = coding->source + coding->consumed;
6430
6431 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6432 {
df7492f9
KH
6433 /* Flush out unprocessed data as binary chars. We are sure
6434 that the number of data is less than the size of
6435 coding->charbuf. */
065e3595 6436 coding->charbuf_used = 0;
df7492f9 6437 while (nbytes-- > 0)
1c3478b0 6438 {
df7492f9 6439 int c = *src++;
98725083 6440
1c91457d
KH
6441 if (c & 0x80)
6442 c = BYTE8_TO_CHAR (c);
6443 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6444 }
f6cbaf43 6445 produce_chars (coding, Qnil, 1);
d46c5b12 6446 }
d46c5b12 6447 else
df7492f9
KH
6448 {
6449 /* Record unprocessed bytes in coding->carryover. We are
6450 sure that the number of data is less than the size of
6451 coding->carryover. */
6452 unsigned char *p = coding->carryover;
6453
6454 coding->carryover_bytes = nbytes;
6455 while (nbytes-- > 0)
6456 *p++ = *src++;
1c3478b0 6457 }
df7492f9 6458 coding->consumed = coding->src_bytes;
b73bfc1c 6459 }
69f76525 6460
4347441b
KH
6461 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6462 decode_eol (coding);
24a73b0a
KH
6463 if (BUFFERP (coding->dst_object))
6464 {
6465 current_buffer->undo_list = undo_list;
6466 record_insert (coding->dst_pos, coding->produced_char);
6467 }
73be902c 6468 return coding->result;
4ed46869
KH
6469}
6470
aaaf0b1e 6471
e1c23804 6472/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6473 ending before LIMIT of CODING->src_object (buffer or string), store
6474 the data in BUF, set *STOP to a starting position of the next
6475 composition (if any) or to LIMIT, and return the address of the
6476 next element of BUF.
6477
6478 If such an annotation is not found, set *STOP to a starting
6479 position of a composition after POS (if any) or to LIMIT, and
6480 return BUF. */
6481
6482static INLINE int *
6483handle_composition_annotation (pos, limit, coding, buf, stop)
6484 EMACS_INT pos, limit;
aaaf0b1e 6485 struct coding_system *coding;
ff0dacd7
KH
6486 int *buf;
6487 EMACS_INT *stop;
aaaf0b1e 6488{
ff0dacd7
KH
6489 EMACS_INT start, end;
6490 Lisp_Object prop;
aaaf0b1e 6491
ff0dacd7
KH
6492 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6493 || end > limit)
6494 *stop = limit;
6495 else if (start > pos)
6496 *stop = start;
6497 else
aaaf0b1e 6498 {
ff0dacd7 6499 if (start == pos)
aaaf0b1e 6500 {
ff0dacd7
KH
6501 /* We found a composition. Store the corresponding
6502 annotation data in BUF. */
6503 int *head = buf;
6504 enum composition_method method = COMPOSITION_METHOD (prop);
6505 int nchars = COMPOSITION_LENGTH (prop);
6506
69a80ea3 6507 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6508 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6509 {
ff0dacd7
KH
6510 Lisp_Object components;
6511 int len, i, i_byte;
6512
6513 components = COMPOSITION_COMPONENTS (prop);
6514 if (VECTORP (components))
aaaf0b1e 6515 {
ff0dacd7
KH
6516 len = XVECTOR (components)->size;
6517 for (i = 0; i < len; i++)
6518 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6519 }
ff0dacd7 6520 else if (STRINGP (components))
aaaf0b1e 6521 {
8f924df7 6522 len = SCHARS (components);
ff0dacd7
KH
6523 i = i_byte = 0;
6524 while (i < len)
6525 {
6526 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6527 buf++;
6528 }
6529 }
6530 else if (INTEGERP (components))
6531 {
6532 len = 1;
6533 *buf++ = XINT (components);
6534 }
6535 else if (CONSP (components))
6536 {
6537 for (len = 0; CONSP (components);
6538 len++, components = XCDR (components))
6539 *buf++ = XINT (XCAR (components));
aaaf0b1e 6540 }
aaaf0b1e 6541 else
ff0dacd7
KH
6542 abort ();
6543 *head -= len;
aaaf0b1e 6544 }
aaaf0b1e 6545 }
ff0dacd7
KH
6546
6547 if (find_composition (end, limit, &start, &end, &prop,
6548 coding->src_object)
6549 && end <= limit)
6550 *stop = start;
6551 else
6552 *stop = limit;
aaaf0b1e 6553 }
ff0dacd7
KH
6554 return buf;
6555}
6556
6557
e1c23804 6558/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6559 CODING->src_object (buffer of string), store the data in BUF, set
6560 *STOP to the position where the value of `charset' property changes
6561 (limiting by LIMIT), and return the address of the next element of
6562 BUF.
6563
6564 If the property value is nil, set *STOP to the position where the
6565 property value is non-nil (limiting by LIMIT), and return BUF. */
6566
6567static INLINE int *
6568handle_charset_annotation (pos, limit, coding, buf, stop)
6569 EMACS_INT pos, limit;
6570 struct coding_system *coding;
6571 int *buf;
6572 EMACS_INT *stop;
6573{
6574 Lisp_Object val, next;
6575 int id;
6576
6577 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6578 if (! NILP (val) && CHARSETP (val))
6579 id = XINT (CHARSET_SYMBOL_ID (val));
6580 else
6581 id = -1;
69a80ea3 6582 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6583 next = Fnext_single_property_change (make_number (pos), Qcharset,
6584 coding->src_object,
6585 make_number (limit));
6586 *stop = XINT (next);
6587 return buf;
6588}
6589
6590
df7492f9 6591static void
09ee6fdd 6592consume_chars (coding, translation_table, max_lookup)
df7492f9 6593 struct coding_system *coding;
433f7f87 6594 Lisp_Object translation_table;
09ee6fdd 6595 int max_lookup;
df7492f9
KH
6596{
6597 int *buf = coding->charbuf;
ff0dacd7 6598 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6599 const unsigned char *src = coding->source + coding->consumed;
4776e638 6600 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6601 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6602 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6603 int multibytep = coding->src_multibyte;
6604 Lisp_Object eol_type;
6605 int c;
ff0dacd7 6606 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6607 int *lookup_buf = NULL;
433f7f87
KH
6608
6609 if (! NILP (translation_table))
09ee6fdd 6610 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6611
df7492f9
KH
6612 eol_type = CODING_ID_EOL_TYPE (coding->id);
6613 if (VECTORP (eol_type))
6614 eol_type = Qunix;
88993dfd 6615
df7492f9
KH
6616 /* Note: composition handling is not yet implemented. */
6617 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6618
0b5670c9
KH
6619 if (NILP (coding->src_object))
6620 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6621 else
0b5670c9
KH
6622 {
6623 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6624 stop = stop_composition = pos;
6625 else
6626 stop = stop_composition = end_pos;
6627 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6628 stop = stop_charset = pos;
6629 else
6630 stop_charset = end_pos;
6631 }
ec6d2bb8 6632
24a73b0a 6633 /* Compensate for CRLF and conversion. */
ff0dacd7 6634 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6635 while (buf < buf_end)
aaaf0b1e 6636 {
433f7f87
KH
6637 Lisp_Object trans;
6638
df7492f9 6639 if (pos == stop)
ec6d2bb8 6640 {
df7492f9
KH
6641 if (pos == end_pos)
6642 break;
ff0dacd7
KH
6643 if (pos == stop_composition)
6644 buf = handle_composition_annotation (pos, end_pos, coding,
6645 buf, &stop_composition);
6646 if (pos == stop_charset)
6647 buf = handle_charset_annotation (pos, end_pos, coding,
6648 buf, &stop_charset);
6649 stop = (stop_composition < stop_charset
6650 ? stop_composition : stop_charset);
df7492f9
KH
6651 }
6652
6653 if (! multibytep)
4776e638 6654 {
d3e4cb56 6655 EMACS_INT bytes;
aaaf0b1e 6656
ea29edf2
KH
6657 if (coding->encoder == encode_coding_raw_text)
6658 c = *src++, pos++;
6659 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 6660 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 6661 else
f03caae0 6662 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6663 }
df7492f9 6664 else
db274c7a 6665 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
6666 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6667 c = '\n';
6668 if (! EQ (eol_type, Qunix))
aaaf0b1e 6669 {
df7492f9 6670 if (c == '\n')
aaaf0b1e 6671 {
df7492f9
KH
6672 if (EQ (eol_type, Qdos))
6673 *buf++ = '\r';
6674 else
6675 c = '\r';
aaaf0b1e
KH
6676 }
6677 }
433f7f87 6678
e6a54062 6679 trans = Qnil;
09ee6fdd 6680 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6681 if (NILP (trans))
433f7f87
KH
6682 *buf++ = c;
6683 else
6684 {
6685 int from_nchars = 1, to_nchars = 1;
6686 int *lookup_buf_end;
6687 const unsigned char *p = src;
6688 int i;
6689
6690 lookup_buf[0] = c;
6691 for (i = 1; i < max_lookup && p < src_end; i++)
6692 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6693 lookup_buf_end = lookup_buf + i;
6694 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6695 &from_nchars, &to_nchars);
6696 if (EQ (trans, Qt)
6697 || buf + to_nchars > buf_end)
6698 break;
6699 *buf++ = *lookup_buf;
6700 for (i = 1; i < to_nchars; i++)
6701 *buf++ = XINT (AREF (trans, i));
6702 for (i = 1; i < from_nchars; i++, pos++)
6703 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6704 }
aaaf0b1e 6705 }
ec6d2bb8 6706
df7492f9
KH
6707 coding->consumed = src - coding->source;
6708 coding->consumed_char = pos - coding->src_pos;
6709 coding->charbuf_used = buf - coding->charbuf;
6710 coding->chars_at_source = 0;
aaaf0b1e
KH
6711}
6712
4ed46869 6713
df7492f9
KH
6714/* Encode the text at CODING->src_object into CODING->dst_object.
6715 CODING->src_object is a buffer or a string.
6716 CODING->dst_object is a buffer or nil.
6717
6718 If CODING->src_object is a buffer, it must be the current buffer.
6719 In this case, if CODING->src_pos is positive, it is a position of
6720 the source text in the buffer, otherwise. the source text is in the
6721 gap area of the buffer, and coding->src_pos specifies the offset of
6722 the text from GPT (which must be the same as PT). If this is the
6723 same buffer as CODING->dst_object, CODING->src_pos must be
6724 negative and CODING should not have `pre-write-conversion'.
6725
6726 If CODING->src_object is a string, CODING should not have
6727 `pre-write-conversion'.
6728
6729 If CODING->dst_object is a buffer, the encoded data is inserted at
6730 the current point of that buffer.
6731
6732 If CODING->dst_object is nil, the encoded data is placed at the
6733 memory area specified by CODING->destination. */
6734
6735static int
6736encode_coding (coding)
4ed46869 6737 struct coding_system *coding;
4ed46869 6738{
df7492f9 6739 Lisp_Object attrs;
7d64c6ad 6740 Lisp_Object translation_table;
09ee6fdd 6741 int max_lookup;
9861e777 6742
df7492f9 6743 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
6744 if (coding->encoder == encode_coding_raw_text)
6745 translation_table = Qnil, max_lookup = 0;
6746 else
6747 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6748
df7492f9 6749 if (BUFFERP (coding->dst_object))
8844fa83 6750 {
df7492f9
KH
6751 set_buffer_internal (XBUFFER (coding->dst_object));
6752 coding->dst_multibyte
6753 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6754 }
4ed46869 6755
b73bfc1c 6756 coding->consumed = coding->consumed_char = 0;
df7492f9 6757 coding->produced = coding->produced_char = 0;
065e3595 6758 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6759 coding->errors = 0;
b73bfc1c 6760
df7492f9 6761 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6762
df7492f9
KH
6763 do {
6764 coding_set_source (coding);
09ee6fdd 6765 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6766 coding_set_destination (coding);
6767 (*(coding->encoder)) (coding);
6768 } while (coding->consumed_char < coding->src_chars);
6769
284201e4 6770 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
6771 insert_from_gap (coding->produced_char, coding->produced);
6772
6773 return (coding->result);
ec6d2bb8
KH
6774}
6775
fb88bf2d 6776
24a73b0a
KH
6777/* Name (or base name) of work buffer for code conversion. */
6778static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6779
24a73b0a
KH
6780/* A working buffer used by the top level conversion. Once it is
6781 created, it is never destroyed. It has the name
6782 Vcode_conversion_workbuf_name. The other working buffers are
6783 destroyed after the use is finished, and their names are modified
6784 versions of Vcode_conversion_workbuf_name. */
6785static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6786
24a73b0a
KH
6787/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6788static int reused_workbuf_in_use;
4ed46869 6789
24a73b0a
KH
6790
6791/* Return a working buffer of code convesion. MULTIBYTE specifies the
6792 multibyteness of returning buffer. */
b73bfc1c 6793
f6cbaf43 6794static Lisp_Object
24a73b0a 6795make_conversion_work_buffer (multibyte)
f6cbaf43 6796 int multibyte;
df7492f9 6797{
24a73b0a
KH
6798 Lisp_Object name, workbuf;
6799 struct buffer *current;
4ed46869 6800
24a73b0a 6801 if (reused_workbuf_in_use++)
065e3595
KH
6802 {
6803 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6804 workbuf = Fget_buffer_create (name);
6805 }
df7492f9 6806 else
065e3595
KH
6807 {
6808 name = Vcode_conversion_workbuf_name;
6809 workbuf = Fget_buffer_create (name);
6810 if (NILP (Vcode_conversion_reused_workbuf))
6811 Vcode_conversion_reused_workbuf = workbuf;
6812 }
24a73b0a
KH
6813 current = current_buffer;
6814 set_buffer_internal (XBUFFER (workbuf));
6815 Ferase_buffer ();
df7492f9 6816 current_buffer->undo_list = Qt;
24a73b0a 6817 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6818 set_buffer_internal (current);
24a73b0a 6819 return workbuf;
df7492f9 6820}
d46c5b12 6821
24a73b0a 6822
4776e638 6823static Lisp_Object
24a73b0a
KH
6824code_conversion_restore (arg)
6825 Lisp_Object arg;
4776e638 6826{
24a73b0a 6827 Lisp_Object current, workbuf;
948bdcf3 6828 struct gcpro gcpro1;
24a73b0a 6829
948bdcf3 6830 GCPRO1 (arg);
24a73b0a
KH
6831 current = XCAR (arg);
6832 workbuf = XCDR (arg);
6833 if (! NILP (workbuf))
6834 {
6835 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6836 reused_workbuf_in_use = 0;
6837 else if (! NILP (Fbuffer_live_p (workbuf)))
6838 Fkill_buffer (workbuf);
6839 }
6840 set_buffer_internal (XBUFFER (current));
948bdcf3 6841 UNGCPRO;
4776e638
KH
6842 return Qnil;
6843}
b73bfc1c 6844
24a73b0a
KH
6845Lisp_Object
6846code_conversion_save (with_work_buf, multibyte)
4776e638 6847 int with_work_buf, multibyte;
df7492f9 6848{
24a73b0a 6849 Lisp_Object workbuf = Qnil;
b73bfc1c 6850
4776e638 6851 if (with_work_buf)
24a73b0a
KH
6852 workbuf = make_conversion_work_buffer (multibyte);
6853 record_unwind_protect (code_conversion_restore,
6854 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6855 return workbuf;
df7492f9 6856}
d46c5b12 6857
df7492f9
KH
6858int
6859decode_coding_gap (coding, chars, bytes)
6860 struct coding_system *coding;
6861 EMACS_INT chars, bytes;
6862{
6863 int count = specpdl_ptr - specpdl;
5e5c78be 6864 Lisp_Object attrs;
fb88bf2d 6865
24a73b0a 6866 code_conversion_save (0, 0);
ec6d2bb8 6867
24a73b0a 6868 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6869 coding->src_chars = chars;
6870 coding->src_bytes = bytes;
6871 coding->src_pos = -chars;
6872 coding->src_pos_byte = -bytes;
6873 coding->src_multibyte = chars < bytes;
24a73b0a 6874 coding->dst_object = coding->src_object;
df7492f9
KH
6875 coding->dst_pos = PT;
6876 coding->dst_pos_byte = PT_BYTE;
71c81426 6877 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 6878
df7492f9
KH
6879 if (CODING_REQUIRE_DETECTION (coding))
6880 detect_coding (coding);
8f924df7 6881
9286b333 6882 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 6883 current_buffer->text->inhibit_shrinking = 1;
df7492f9 6884 decode_coding (coding);
287c57d7 6885 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 6886
5e5c78be
KH
6887 attrs = CODING_ID_ATTRS (coding->id);
6888 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6889 {
5e5c78be
KH
6890 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6891 Lisp_Object val;
6892
6893 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6894 val = call1 (CODING_ATTR_POST_READ (attrs),
6895 make_number (coding->produced_char));
5e5c78be
KH
6896 CHECK_NATNUM (val);
6897 coding->produced_char += Z - prev_Z;
6898 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6899 }
4ed46869 6900
df7492f9 6901 unbind_to (count, Qnil);
b73bfc1c
KH
6902 return coding->result;
6903}
52d41803 6904
4ed46869 6905int
df7492f9 6906encode_coding_gap (coding, chars, bytes)
4ed46869 6907 struct coding_system *coding;
df7492f9 6908 EMACS_INT chars, bytes;
4ed46869 6909{
df7492f9 6910 int count = specpdl_ptr - specpdl;
4ed46869 6911
24a73b0a 6912 code_conversion_save (0, 0);
4ed46869 6913
24a73b0a 6914 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6915 coding->src_chars = chars;
6916 coding->src_bytes = bytes;
6917 coding->src_pos = -chars;
6918 coding->src_pos_byte = -bytes;
6919 coding->src_multibyte = chars < bytes;
6920 coding->dst_object = coding->src_object;
6921 coding->dst_pos = PT;
6922 coding->dst_pos_byte = PT_BYTE;
4ed46869 6923
df7492f9 6924 encode_coding (coding);
b73bfc1c 6925
df7492f9
KH
6926 unbind_to (count, Qnil);
6927 return coding->result;
6928}
4ed46869 6929
d46c5b12 6930
df7492f9
KH
6931/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6932 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6933
df7492f9 6934 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6935
df7492f9
KH
6936 If it is a buffer, the text is at point of the buffer. FROM and TO
6937 are positions in the buffer.
b73bfc1c 6938
df7492f9
KH
6939 If it is a string, the text is at the beginning of the string.
6940 FROM and TO are indices to the string.
4ed46869 6941
df7492f9
KH
6942 If it is nil, the text is at coding->source. FROM and TO are
6943 indices to coding->source.
bb10be8b 6944
df7492f9 6945 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6946
df7492f9
KH
6947 If it is a buffer, the decoded text is inserted at point of the
6948 buffer. If the buffer is the same as SRC_OBJECT, the source text
6949 is deleted.
4ed46869 6950
df7492f9
KH
6951 If it is Qt, a string is made from the decoded text, and
6952 set in CODING->dst_object.
d46c5b12 6953
df7492f9 6954 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6955 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6956 CODING->destination by xmalloc. If the decoded text is longer than
6957 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6958 */
d46c5b12 6959
df7492f9
KH
6960void
6961decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6962 dst_object)
d46c5b12 6963 struct coding_system *coding;
df7492f9
KH
6964 Lisp_Object src_object;
6965 EMACS_INT from, from_byte, to, to_byte;
6966 Lisp_Object dst_object;
d46c5b12 6967{
df7492f9
KH
6968 int count = specpdl_ptr - specpdl;
6969 unsigned char *destination;
6970 EMACS_INT dst_bytes;
6971 EMACS_INT chars = to - from;
6972 EMACS_INT bytes = to_byte - from_byte;
6973 Lisp_Object attrs;
4776e638 6974 int saved_pt = -1, saved_pt_byte;
64cedb0c 6975 int need_marker_adjustment = 0;
b3bfad50 6976 Lisp_Object old_deactivate_mark;
d46c5b12 6977
b3bfad50 6978 old_deactivate_mark = Vdeactivate_mark;
93dec019 6979
df7492f9 6980 if (NILP (dst_object))
d46c5b12 6981 {
df7492f9
KH
6982 destination = coding->destination;
6983 dst_bytes = coding->dst_bytes;
d46c5b12 6984 }
93dec019 6985
df7492f9
KH
6986 coding->src_object = src_object;
6987 coding->src_chars = chars;
6988 coding->src_bytes = bytes;
6989 coding->src_multibyte = chars < bytes;
70ad9fc4 6990
df7492f9 6991 if (STRINGP (src_object))
d46c5b12 6992 {
df7492f9
KH
6993 coding->src_pos = from;
6994 coding->src_pos_byte = from_byte;
d46c5b12 6995 }
df7492f9 6996 else if (BUFFERP (src_object))
88993dfd 6997 {
df7492f9
KH
6998 set_buffer_internal (XBUFFER (src_object));
6999 if (from != GPT)
7000 move_gap_both (from, from_byte);
7001 if (EQ (src_object, dst_object))
fb88bf2d 7002 {
64cedb0c
KH
7003 struct Lisp_Marker *tail;
7004
7005 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7006 {
7007 tail->need_adjustment
7008 = tail->charpos == (tail->insertion_type ? from : to);
7009 need_marker_adjustment |= tail->need_adjustment;
7010 }
4776e638 7011 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7012 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7013 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7014 del_range_both (from, from_byte, to, to_byte, 1);
7015 coding->src_pos = -chars;
7016 coding->src_pos_byte = -bytes;
fb88bf2d 7017 }
df7492f9 7018 else
fb88bf2d 7019 {
df7492f9
KH
7020 coding->src_pos = from;
7021 coding->src_pos_byte = from_byte;
fb88bf2d 7022 }
88993dfd
KH
7023 }
7024
df7492f9
KH
7025 if (CODING_REQUIRE_DETECTION (coding))
7026 detect_coding (coding);
7027 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7028
2cb26057
KH
7029 if (EQ (dst_object, Qt)
7030 || (! NILP (CODING_ATTR_POST_READ (attrs))
7031 && NILP (dst_object)))
b73bfc1c 7032 {
a1567c45
SM
7033 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7034 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7035 coding->dst_pos = BEG;
7036 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7037 }
df7492f9 7038 else if (BUFFERP (dst_object))
d46c5b12 7039 {
24a73b0a 7040 code_conversion_save (0, 0);
df7492f9
KH
7041 coding->dst_object = dst_object;
7042 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7043 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7044 coding->dst_multibyte
7045 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7046 }
7047 else
7048 {
24a73b0a 7049 code_conversion_save (0, 0);
df7492f9 7050 coding->dst_object = Qnil;
0154725e
SM
7051 /* Most callers presume this will return a multibyte result, and they
7052 won't use `binary' or `raw-text' anyway, so let's not worry about
7053 CODING_FOR_UNIBYTE. */
7054 coding->dst_multibyte = Qt;
d46c5b12
KH
7055 }
7056
df7492f9 7057 decode_coding (coding);
fa46990e 7058
df7492f9
KH
7059 if (BUFFERP (coding->dst_object))
7060 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7061
df7492f9 7062 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7063 {
b3bfad50 7064 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7065 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7066 Lisp_Object val;
d46c5b12 7067
c0cc7f7f 7068 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7069 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7070 old_deactivate_mark);
d4850d67
KH
7071 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7072 make_number (coding->produced_char));
df7492f9
KH
7073 UNGCPRO;
7074 CHECK_NATNUM (val);
7075 coding->produced_char += Z - prev_Z;
7076 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7077 }
de79a6a5 7078
df7492f9 7079 if (EQ (dst_object, Qt))
ec6d2bb8 7080 {
df7492f9
KH
7081 coding->dst_object = Fbuffer_string ();
7082 }
7083 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7084 {
7085 set_buffer_internal (XBUFFER (coding->dst_object));
7086 if (dst_bytes < coding->produced)
7087 {
b3bfad50 7088 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7089 if (! destination)
7090 {
065e3595
KH
7091 record_conversion_result (coding,
7092 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
7093 unbind_to (count, Qnil);
7094 return;
7095 }
7096 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7097 move_gap_both (BEGV, BEGV_BYTE);
7098 bcopy (BEGV_ADDR, destination, coding->produced);
7099 coding->destination = destination;
d46c5b12 7100 }
ec6d2bb8 7101 }
b73bfc1c 7102
4776e638
KH
7103 if (saved_pt >= 0)
7104 {
7105 /* This is the case of:
7106 (BUFFERP (src_object) && EQ (src_object, dst_object))
7107 As we have moved PT while replacing the original buffer
7108 contents, we must recover it now. */
7109 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7110 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7111 if (saved_pt < from)
7112 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7113 else if (saved_pt < from + chars)
7114 TEMP_SET_PT_BOTH (from, from_byte);
7115 else if (! NILP (current_buffer->enable_multibyte_characters))
7116 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7117 saved_pt_byte + (coding->produced - bytes));
7118 else
7119 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7120 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7121
7122 if (need_marker_adjustment)
7123 {
7124 struct Lisp_Marker *tail;
7125
7126 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7127 if (tail->need_adjustment)
7128 {
7129 tail->need_adjustment = 0;
7130 if (tail->insertion_type)
7131 {
7132 tail->bytepos = from_byte;
7133 tail->charpos = from;
7134 }
7135 else
7136 {
7137 tail->bytepos = from_byte + coding->produced;
7138 tail->charpos
7139 = (NILP (current_buffer->enable_multibyte_characters)
7140 ? tail->bytepos : from + coding->produced_char);
7141 }
7142 }
7143 }
d46c5b12 7144 }
4776e638 7145
b3bfad50 7146 Vdeactivate_mark = old_deactivate_mark;
065e3595 7147 unbind_to (count, coding->dst_object);
d46c5b12
KH
7148}
7149
d46c5b12 7150
df7492f9
KH
7151void
7152encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7153 dst_object)
d46c5b12 7154 struct coding_system *coding;
df7492f9
KH
7155 Lisp_Object src_object;
7156 EMACS_INT from, from_byte, to, to_byte;
7157 Lisp_Object dst_object;
d46c5b12 7158{
b73bfc1c 7159 int count = specpdl_ptr - specpdl;
df7492f9
KH
7160 EMACS_INT chars = to - from;
7161 EMACS_INT bytes = to_byte - from_byte;
7162 Lisp_Object attrs;
4776e638 7163 int saved_pt = -1, saved_pt_byte;
64cedb0c 7164 int need_marker_adjustment = 0;
c02d943b 7165 int kill_src_buffer = 0;
b3bfad50 7166 Lisp_Object old_deactivate_mark;
df7492f9 7167
b3bfad50 7168 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7169
7170 coding->src_object = src_object;
7171 coding->src_chars = chars;
7172 coding->src_bytes = bytes;
7173 coding->src_multibyte = chars < bytes;
7174
7175 attrs = CODING_ID_ATTRS (coding->id);
7176
64cedb0c
KH
7177 if (EQ (src_object, dst_object))
7178 {
7179 struct Lisp_Marker *tail;
7180
7181 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7182 {
7183 tail->need_adjustment
7184 = tail->charpos == (tail->insertion_type ? from : to);
7185 need_marker_adjustment |= tail->need_adjustment;
7186 }
7187 }
7188
df7492f9 7189 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7190 {
24a73b0a 7191 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7192 set_buffer_internal (XBUFFER (coding->src_object));
7193 if (STRINGP (src_object))
7194 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7195 else if (BUFFERP (src_object))
7196 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7197 else
7198 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7199
df7492f9
KH
7200 if (EQ (src_object, dst_object))
7201 {
7202 set_buffer_internal (XBUFFER (src_object));
4776e638 7203 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7204 del_range_both (from, from_byte, to, to_byte, 1);
7205 set_buffer_internal (XBUFFER (coding->src_object));
7206 }
7207
d4850d67
KH
7208 {
7209 Lisp_Object args[3];
b3bfad50 7210 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7211
b3bfad50
KH
7212 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7213 old_deactivate_mark);
d4850d67
KH
7214 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7215 args[1] = make_number (BEG);
7216 args[2] = make_number (Z);
7217 safe_call (3, args);
b3bfad50 7218 UNGCPRO;
d4850d67 7219 }
c02d943b
KH
7220 if (XBUFFER (coding->src_object) != current_buffer)
7221 kill_src_buffer = 1;
ac87bbef 7222 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7223 if (BEG != GPT)
7224 move_gap_both (BEG, BEG_BYTE);
7225 coding->src_chars = Z - BEG;
7226 coding->src_bytes = Z_BYTE - BEG_BYTE;
7227 coding->src_pos = BEG;
7228 coding->src_pos_byte = BEG_BYTE;
7229 coding->src_multibyte = Z < Z_BYTE;
7230 }
7231 else if (STRINGP (src_object))
d46c5b12 7232 {
24a73b0a 7233 code_conversion_save (0, 0);
df7492f9
KH
7234 coding->src_pos = from;
7235 coding->src_pos_byte = from_byte;
b73bfc1c 7236 }
df7492f9 7237 else if (BUFFERP (src_object))
b73bfc1c 7238 {
24a73b0a 7239 code_conversion_save (0, 0);
df7492f9 7240 set_buffer_internal (XBUFFER (src_object));
df7492f9 7241 if (EQ (src_object, dst_object))
d46c5b12 7242 {
4776e638 7243 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7244 coding->src_object = del_range_1 (from, to, 1, 1);
7245 coding->src_pos = 0;
7246 coding->src_pos_byte = 0;
d46c5b12 7247 }
df7492f9 7248 else
d46c5b12 7249 {
ff0dacd7
KH
7250 if (from < GPT && to >= GPT)
7251 move_gap_both (from, from_byte);
df7492f9
KH
7252 coding->src_pos = from;
7253 coding->src_pos_byte = from_byte;
d46c5b12 7254 }
d46c5b12 7255 }
4776e638 7256 else
24a73b0a 7257 code_conversion_save (0, 0);
d46c5b12 7258
df7492f9 7259 if (BUFFERP (dst_object))
88993dfd 7260 {
df7492f9 7261 coding->dst_object = dst_object;
28f67a95
KH
7262 if (EQ (src_object, dst_object))
7263 {
7264 coding->dst_pos = from;
7265 coding->dst_pos_byte = from_byte;
7266 }
7267 else
7268 {
7269 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7270 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7271 }
df7492f9
KH
7272 coding->dst_multibyte
7273 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7274 }
df7492f9 7275 else if (EQ (dst_object, Qt))
d46c5b12 7276 {
df7492f9 7277 coding->dst_object = Qnil;
df7492f9 7278 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7279 if (coding->dst_bytes == 0)
7280 coding->dst_bytes = 1;
7281 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7282 coding->dst_multibyte = 0;
d46c5b12
KH
7283 }
7284 else
7285 {
df7492f9
KH
7286 coding->dst_object = Qnil;
7287 coding->dst_multibyte = 0;
d46c5b12
KH
7288 }
7289
df7492f9 7290 encode_coding (coding);
d46c5b12 7291
df7492f9 7292 if (EQ (dst_object, Qt))
d46c5b12 7293 {
df7492f9
KH
7294 if (BUFFERP (coding->dst_object))
7295 coding->dst_object = Fbuffer_string ();
7296 else
d46c5b12 7297 {
df7492f9
KH
7298 coding->dst_object
7299 = make_unibyte_string ((char *) coding->destination,
7300 coding->produced);
7301 xfree (coding->destination);
d46c5b12 7302 }
4ed46869 7303 }
d46c5b12 7304
4776e638
KH
7305 if (saved_pt >= 0)
7306 {
7307 /* This is the case of:
7308 (BUFFERP (src_object) && EQ (src_object, dst_object))
7309 As we have moved PT while replacing the original buffer
7310 contents, we must recover it now. */
7311 set_buffer_internal (XBUFFER (src_object));
7312 if (saved_pt < from)
7313 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7314 else if (saved_pt < from + chars)
7315 TEMP_SET_PT_BOTH (from, from_byte);
7316 else if (! NILP (current_buffer->enable_multibyte_characters))
7317 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7318 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7319 else
4776e638
KH
7320 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7321 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7322
7323 if (need_marker_adjustment)
7324 {
7325 struct Lisp_Marker *tail;
7326
7327 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7328 if (tail->need_adjustment)
7329 {
7330 tail->need_adjustment = 0;
7331 if (tail->insertion_type)
7332 {
7333 tail->bytepos = from_byte;
7334 tail->charpos = from;
7335 }
7336 else
7337 {
7338 tail->bytepos = from_byte + coding->produced;
7339 tail->charpos
7340 = (NILP (current_buffer->enable_multibyte_characters)
7341 ? tail->bytepos : from + coding->produced_char);
7342 }
7343 }
7344 }
4776e638
KH
7345 }
7346
c02d943b
KH
7347 if (kill_src_buffer)
7348 Fkill_buffer (coding->src_object);
b3bfad50
KH
7349
7350 Vdeactivate_mark = old_deactivate_mark;
df7492f9 7351 unbind_to (count, Qnil);
b73bfc1c
KH
7352}
7353
df7492f9 7354
b73bfc1c 7355Lisp_Object
df7492f9 7356preferred_coding_system ()
b73bfc1c 7357{
df7492f9 7358 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7359
df7492f9 7360 return CODING_ID_NAME (id);
4ed46869
KH
7361}
7362
7363\f
7364#ifdef emacs
1397dc18 7365/*** 8. Emacs Lisp library functions ***/
4ed46869 7366
4ed46869 7367DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7368 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7369See the documentation of `define-coding-system' for information
48b0f3ae
PJ
7370about coding-system objects. */)
7371 (obj)
4ed46869
KH
7372 Lisp_Object obj;
7373{
44e8490d
KH
7374 if (NILP (obj)
7375 || CODING_SYSTEM_ID (obj) >= 0)
7376 return Qt;
7377 if (! SYMBOLP (obj)
7378 || NILP (Fget (obj, Qcoding_system_define_form)))
7379 return Qnil;
7380 return Qt;
4ed46869
KH
7381}
7382
9d991de8
RS
7383DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7384 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7385 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7386 (prompt)
4ed46869
KH
7387 Lisp_Object prompt;
7388{
e0e989f6 7389 Lisp_Object val;
9d991de8
RS
7390 do
7391 {
4608c386
KH
7392 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7393 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7394 }
8f924df7 7395 while (SCHARS (val) == 0);
e0e989f6 7396 return (Fintern (val, Qnil));
4ed46869
KH
7397}
7398
9b787f3e 7399DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 7400 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
7401If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7402Ignores case when completing coding systems (all Emacs coding systems
7403are lower-case). */)
48b0f3ae 7404 (prompt, default_coding_system)
9b787f3e 7405 Lisp_Object prompt, default_coding_system;
4ed46869 7406{
f44d27ce 7407 Lisp_Object val;
c7183fb8
GM
7408 int count = SPECPDL_INDEX ();
7409
9b787f3e 7410 if (SYMBOLP (default_coding_system))
57d25e6f 7411 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 7412 specbind (Qcompletion_ignore_case, Qt);
4608c386 7413 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7414 Qt, Qnil, Qcoding_system_history,
7415 default_coding_system, Qnil);
c7183fb8 7416 unbind_to (count, Qnil);
8f924df7 7417 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7418}
7419
7420DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7421 1, 1, 0,
48b0f3ae 7422 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7423If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7424It is valid if it is nil or a symbol defined as a coding system by the
7425function `define-coding-system'. */)
df7492f9 7426 (coding_system)
4ed46869
KH
7427 Lisp_Object coding_system;
7428{
44e8490d
KH
7429 Lisp_Object define_form;
7430
7431 define_form = Fget (coding_system, Qcoding_system_define_form);
7432 if (! NILP (define_form))
7433 {
7434 Fput (coding_system, Qcoding_system_define_form, Qnil);
7435 safe_eval (define_form);
7436 }
4ed46869
KH
7437 if (!NILP (Fcoding_system_p (coding_system)))
7438 return coding_system;
fcad4ec4 7439 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 7440}
df7492f9 7441
3a73fa5d 7442\f
89528eb3
KH
7443/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7444 HIGHEST is nonzero, return the coding system of the highest
7445 priority among the detected coding systems. Otherwize return a
7446 list of detected coding systems sorted by their priorities. If
7447 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7448 multibyte form but contains only ASCII and eight-bit chars.
7449 Otherwise, the bytes are raw bytes.
7450
7451 CODING-SYSTEM controls the detection as below:
7452
7453 If it is nil, detect both text-format and eol-format. If the
7454 text-format part of CODING-SYSTEM is already specified
7455 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7456 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7457 detect only text-format. */
7458
d46c5b12 7459Lisp_Object
24a73b0a
KH
7460detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7461 coding_system)
8f924df7 7462 const unsigned char *src;
13818c30
SM
7463 EMACS_INT src_chars, src_bytes;
7464 int highest;
0a28aafb 7465 int multibytep;
df7492f9 7466 Lisp_Object coding_system;
4ed46869 7467{
8f924df7 7468 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7469 Lisp_Object attrs, eol_type;
7470 Lisp_Object val;
7471 struct coding_system coding;
89528eb3 7472 int id;
ff0dacd7 7473 struct coding_detection_info detect_info;
24a73b0a 7474 enum coding_category base_category;
b73bfc1c 7475
df7492f9
KH
7476 if (NILP (coding_system))
7477 coding_system = Qundecided;
7478 setup_coding_system (coding_system, &coding);
7479 attrs = CODING_ID_ATTRS (coding.id);
7480 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7481 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7482
df7492f9 7483 coding.source = src;
24a73b0a 7484 coding.src_chars = src_chars;
df7492f9
KH
7485 coding.src_bytes = src_bytes;
7486 coding.src_multibyte = multibytep;
7487 coding.consumed = 0;
89528eb3 7488 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7489
ff0dacd7 7490 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7491
89528eb3 7492 /* At first, detect text-format if necessary. */
24a73b0a
KH
7493 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7494 if (base_category == coding_category_undecided)
4ed46869 7495 {
ff0dacd7
KH
7496 enum coding_category category;
7497 struct coding_system *this;
7498 int c, i;
88993dfd 7499
24a73b0a
KH
7500 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7501 for (i = 0; src < src_end; i++, src++)
4ed46869 7502 {
df7492f9 7503 c = *src;
6cb21a4f 7504 if (c & 0x80)
d46c5b12 7505 break;
6cb21a4f
KH
7506 if (c < 0x20
7507 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
c7266f4a 7508 && ! inhibit_iso_escape_detection)
6cb21a4f
KH
7509 {
7510 coding.head_ascii = src - coding.source;
7511 if (detect_coding_iso_2022 (&coding, &detect_info))
7512 {
7513 /* We have scanned the whole data. */
7514 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7515 /* We didn't find an 8-bit code. */
7516 src = src_end;
7517 break;
7518 }
7519 }
4ed46869 7520 }
df7492f9 7521 coding.head_ascii = src - coding.source;
88993dfd 7522
6cb21a4f
KH
7523 if (src < src_end
7524 || detect_info.found)
7525 {
7526 if (src == src_end)
7527 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7528 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7529 {
6cb21a4f 7530 category = coding_priorities[i];
c7266f4a 7531 this = coding_categories + category;
6cb21a4f 7532 if (detect_info.found & (1 << category))
ff0dacd7
KH
7533 break;
7534 }
6cb21a4f
KH
7535 else
7536 for (i = 0; i < coding_category_raw_text; i++)
df7492f9 7537 {
6cb21a4f
KH
7538 category = coding_priorities[i];
7539 this = coding_categories + category;
7540
7541 if (this->id < 0)
24a73b0a 7542 {
6cb21a4f
KH
7543 /* No coding system of this category is defined. */
7544 detect_info.rejected |= (1 << category);
7545 }
7546 else if (category >= coding_category_raw_text)
7547 continue;
7548 else if (detect_info.checked & (1 << category))
7549 {
7550 if (highest
7551 && (detect_info.found & (1 << category)))
7552 break;
7553 }
7554 else
7555 {
7556 if ((*(this->detector)) (&coding, &detect_info)
7557 && highest
7558 && (detect_info.found & (1 << category)))
24a73b0a 7559 {
6cb21a4f
KH
7560 if (category == coding_category_utf_16_auto)
7561 {
7562 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7563 category = coding_category_utf_16_le;
7564 else
7565 category = coding_category_utf_16_be;
7566 }
7567 break;
24a73b0a 7568 }
24a73b0a 7569 }
df7492f9 7570 }
6cb21a4f 7571 }
ec6d2bb8 7572
ff0dacd7 7573 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7574 {
ff0dacd7 7575 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7576 id = coding_categories[coding_category_raw_text].id;
7577 val = Fcons (make_number (id), Qnil);
7578 }
ff0dacd7 7579 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7580 {
ff0dacd7 7581 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7582 id = coding_categories[coding_category_undecided].id;
7583 val = Fcons (make_number (id), Qnil);
7584 }
7585 else if (highest)
7586 {
ff0dacd7 7587 if (detect_info.found)
ec6d2bb8 7588 {
ff0dacd7
KH
7589 detect_info.found = 1 << category;
7590 val = Fcons (make_number (this->id), Qnil);
7591 }
7592 else
7593 for (i = 0; i < coding_category_raw_text; i++)
7594 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7595 {
7596 detect_info.found = 1 << coding_priorities[i];
7597 id = coding_categories[coding_priorities[i]].id;
7598 val = Fcons (make_number (id), Qnil);
7599 break;
7600 }
7601 }
89528eb3
KH
7602 else
7603 {
ff0dacd7
KH
7604 int mask = detect_info.rejected | detect_info.found;
7605 int found = 0;
89528eb3 7606 val = Qnil;
ec6d2bb8 7607
89528eb3 7608 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7609 {
7610 category = coding_priorities[i];
7611 if (! (mask & (1 << category)))
ec6d2bb8 7612 {
ff0dacd7
KH
7613 found |= 1 << category;
7614 id = coding_categories[category].id;
c7266f4a
KH
7615 if (id >= 0)
7616 val = Fcons (make_number (id), val);
ff0dacd7
KH
7617 }
7618 }
7619 for (i = coding_category_raw_text - 1; i >= 0; i--)
7620 {
7621 category = coding_priorities[i];
7622 if (detect_info.found & (1 << category))
7623 {
7624 id = coding_categories[category].id;
7625 val = Fcons (make_number (id), val);
ec6d2bb8 7626 }
ec6d2bb8 7627 }
ff0dacd7 7628 detect_info.found |= found;
ec6d2bb8 7629 }
ec6d2bb8 7630 }
24a73b0a
KH
7631 else if (base_category == coding_category_utf_16_auto)
7632 {
7633 if (detect_coding_utf_16 (&coding, &detect_info))
7634 {
24a73b0a
KH
7635 struct coding_system *this;
7636
7637 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7638 this = coding_categories + coding_category_utf_16_le;
7639 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7640 this = coding_categories + coding_category_utf_16_be;
7641 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7642 this = coding_categories + coding_category_utf_16_be_nosig;
7643 else
7644 this = coding_categories + coding_category_utf_16_le_nosig;
7645 val = Fcons (make_number (this->id), Qnil);
7646 }
7647 }
df7492f9
KH
7648 else
7649 {
ff0dacd7 7650 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7651 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7652 }
df7492f9 7653
89528eb3 7654 /* Then, detect eol-format if necessary. */
df7492f9 7655 {
89528eb3 7656 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7657 Lisp_Object tail;
7658
89528eb3
KH
7659 if (VECTORP (eol_type))
7660 {
ff0dacd7 7661 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7662 normal_eol = detect_eol (coding.source, src_bytes,
7663 coding_category_raw_text);
ff0dacd7
KH
7664 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7665 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7666 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7667 coding_category_utf_16_be);
ff0dacd7
KH
7668 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7669 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7670 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7671 coding_category_utf_16_le);
7672 }
7673 else
7674 {
7675 if (EQ (eol_type, Qunix))
7676 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7677 else if (EQ (eol_type, Qdos))
7678 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7679 else
7680 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7681 }
7682
df7492f9
KH
7683 for (tail = val; CONSP (tail); tail = XCDR (tail))
7684 {
89528eb3 7685 enum coding_category category;
df7492f9 7686 int this_eol;
89528eb3
KH
7687
7688 id = XINT (XCAR (tail));
7689 attrs = CODING_ID_ATTRS (id);
7690 category = XINT (CODING_ATTR_CATEGORY (attrs));
7691 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7692 if (VECTORP (eol_type))
7693 {
89528eb3
KH
7694 if (category == coding_category_utf_16_be
7695 || category == coding_category_utf_16_be_nosig)
7696 this_eol = utf_16_be_eol;
7697 else if (category == coding_category_utf_16_le
7698 || category == coding_category_utf_16_le_nosig)
7699 this_eol = utf_16_le_eol;
df7492f9 7700 else
89528eb3
KH
7701 this_eol = normal_eol;
7702
df7492f9
KH
7703 if (this_eol == EOL_SEEN_LF)
7704 XSETCAR (tail, AREF (eol_type, 0));
7705 else if (this_eol == EOL_SEEN_CRLF)
7706 XSETCAR (tail, AREF (eol_type, 1));
7707 else if (this_eol == EOL_SEEN_CR)
7708 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7709 else
7710 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7711 }
89528eb3
KH
7712 else
7713 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7714 }
7715 }
ec6d2bb8 7716
03699b14 7717 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7718}
7719
ec6d2bb8 7720
d46c5b12
KH
7721DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7722 2, 3, 0,
48b0f3ae
PJ
7723 doc: /* Detect coding system of the text in the region between START and END.
7724Return a list of possible coding systems ordered by priority.
ec6d2bb8 7725
12e0131a
KH
7726If only ASCII characters are found (except for such ISO-2022 control
7727characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7728`undecided' or its subsidiary coding system according to a detected
7729end-of-line format.
ec6d2bb8 7730
48b0f3ae
PJ
7731If optional argument HIGHEST is non-nil, return the coding system of
7732highest priority. */)
7733 (start, end, highest)
d46c5b12
KH
7734 Lisp_Object start, end, highest;
7735{
7736 int from, to;
7737 int from_byte, to_byte;
ec6d2bb8 7738
b7826503
PJ
7739 CHECK_NUMBER_COERCE_MARKER (start);
7740 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7741
d46c5b12
KH
7742 validate_region (&start, &end);
7743 from = XINT (start), to = XINT (end);
7744 from_byte = CHAR_TO_BYTE (from);
7745 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7746
d46c5b12
KH
7747 if (from < GPT && to >= GPT)
7748 move_gap_both (to, to_byte);
c210f766 7749
d46c5b12 7750 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7751 to - from, to_byte - from_byte,
0a28aafb
KH
7752 !NILP (highest),
7753 !NILP (current_buffer
df7492f9
KH
7754 ->enable_multibyte_characters),
7755 Qnil);
ec6d2bb8
KH
7756}
7757
d46c5b12
KH
7758DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7759 1, 2, 0,
48b0f3ae
PJ
7760 doc: /* Detect coding system of the text in STRING.
7761Return a list of possible coding systems ordered by priority.
fb88bf2d 7762
12e0131a
KH
7763If only ASCII characters are found (except for such ISO-2022 control
7764characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7765`undecided' or its subsidiary coding system according to a detected
7766end-of-line format.
d46c5b12 7767
48b0f3ae
PJ
7768If optional argument HIGHEST is non-nil, return the coding system of
7769highest priority. */)
7770 (string, highest)
d46c5b12
KH
7771 Lisp_Object string, highest;
7772{
b7826503 7773 CHECK_STRING (string);
b73bfc1c 7774
24a73b0a
KH
7775 return detect_coding_system (SDATA (string),
7776 SCHARS (string), SBYTES (string),
8f924df7 7777 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7778 Qnil);
4ed46869 7779}
4ed46869 7780
b73bfc1c 7781
df7492f9
KH
7782static INLINE int
7783char_encodable_p (c, attrs)
7784 int c;
7785 Lisp_Object attrs;
05e6f5dc 7786{
df7492f9 7787 Lisp_Object tail;
df7492f9 7788 struct charset *charset;
7d64c6ad 7789 Lisp_Object translation_table;
d46c5b12 7790
7d64c6ad 7791 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7792 if (! NILP (translation_table))
7d64c6ad 7793 c = translate_char (translation_table, c);
df7492f9
KH
7794 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7795 CONSP (tail); tail = XCDR (tail))
e133c8fa 7796 {
df7492f9
KH
7797 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7798 if (CHAR_CHARSET_P (c, charset))
7799 break;
e133c8fa 7800 }
df7492f9 7801 return (! NILP (tail));
05e6f5dc 7802}
83fa074f 7803
fb88bf2d 7804
df7492f9
KH
7805/* Return a list of coding systems that safely encode the text between
7806 START and END. If EXCLUDE is non-nil, it is a list of coding
7807 systems not to check. The returned list doesn't contain any such
48468dac 7808 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7809 unibyte, return t. */
e077cc80 7810
df7492f9
KH
7811DEFUN ("find-coding-systems-region-internal",
7812 Ffind_coding_systems_region_internal,
7813 Sfind_coding_systems_region_internal, 2, 3, 0,
7814 doc: /* Internal use only. */)
7815 (start, end, exclude)
7816 Lisp_Object start, end, exclude;
7817{
7818 Lisp_Object coding_attrs_list, safe_codings;
7819 EMACS_INT start_byte, end_byte;
7c78e542 7820 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7821 int c;
7822 Lisp_Object tail, elt;
d46c5b12 7823
df7492f9
KH
7824 if (STRINGP (start))
7825 {
7826 if (!STRING_MULTIBYTE (start)
8f924df7 7827 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7828 return Qt;
7829 start_byte = 0;
8f924df7 7830 end_byte = SBYTES (start);
df7492f9
KH
7831 }
7832 else
d46c5b12 7833 {
df7492f9
KH
7834 CHECK_NUMBER_COERCE_MARKER (start);
7835 CHECK_NUMBER_COERCE_MARKER (end);
7836 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7837 args_out_of_range (start, end);
7838 if (NILP (current_buffer->enable_multibyte_characters))
7839 return Qt;
7840 start_byte = CHAR_TO_BYTE (XINT (start));
7841 end_byte = CHAR_TO_BYTE (XINT (end));
7842 if (XINT (end) - XINT (start) == end_byte - start_byte)
7843 return Qt;
d46c5b12 7844
e1c23804 7845 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7846 {
e1c23804
DL
7847 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7848 move_gap_both (XINT (start), start_byte);
df7492f9 7849 else
e1c23804 7850 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7851 }
7852 }
7853
df7492f9
KH
7854 coding_attrs_list = Qnil;
7855 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7856 if (NILP (exclude)
7857 || NILP (Fmemq (XCAR (tail), exclude)))
7858 {
7859 Lisp_Object attrs;
d46c5b12 7860
df7492f9
KH
7861 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7862 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7863 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7864 {
7865 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7866 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7867 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7868 }
df7492f9 7869 }
d46c5b12 7870
df7492f9 7871 if (STRINGP (start))
8f924df7 7872 p = pbeg = SDATA (start);
df7492f9
KH
7873 else
7874 p = pbeg = BYTE_POS_ADDR (start_byte);
7875 pend = p + (end_byte - start_byte);
b843d1ae 7876
df7492f9
KH
7877 while (p < pend && ASCII_BYTE_P (*p)) p++;
7878 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7879
05e6f5dc 7880 while (p < pend)
72d1a715 7881 {
df7492f9
KH
7882 if (ASCII_BYTE_P (*p))
7883 p++;
72d1a715
RS
7884 else
7885 {
df7492f9 7886 c = STRING_CHAR_ADVANCE (p);
12410ef1 7887
df7492f9
KH
7888 charset_map_loaded = 0;
7889 for (tail = coding_attrs_list; CONSP (tail);)
7890 {
7891 elt = XCAR (tail);
7892 if (NILP (elt))
7893 tail = XCDR (tail);
7894 else if (char_encodable_p (c, elt))
7895 tail = XCDR (tail);
7896 else if (CONSP (XCDR (tail)))
7897 {
7898 XSETCAR (tail, XCAR (XCDR (tail)));
7899 XSETCDR (tail, XCDR (XCDR (tail)));
7900 }
7901 else
7902 {
7903 XSETCAR (tail, Qnil);
7904 tail = XCDR (tail);
7905 }
7906 }
7907 if (charset_map_loaded)
7908 {
7909 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7910
df7492f9 7911 if (STRINGP (start))
8f924df7 7912 pbeg = SDATA (start);
df7492f9
KH
7913 else
7914 pbeg = BYTE_POS_ADDR (start_byte);
7915 p = pbeg + p_offset;
7916 pend = pbeg + pend_offset;
7917 }
7918 }
ec6d2bb8 7919 }
fb88bf2d 7920
988b3759 7921 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
7922 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7923 if (! NILP (XCAR (tail)))
7924 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7925
05e6f5dc
KH
7926 return safe_codings;
7927}
4956c225 7928
d46c5b12 7929
8f924df7
KH
7930DEFUN ("unencodable-char-position", Funencodable_char_position,
7931 Sunencodable_char_position, 3, 5, 0,
7932 doc: /*
7933Return position of first un-encodable character in a region.
7934START and END specfiy the region and CODING-SYSTEM specifies the
7935encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7936
8f924df7
KH
7937If optional 4th argument COUNT is non-nil, it specifies at most how
7938many un-encodable characters to search. In this case, the value is a
7939list of positions.
d46c5b12 7940
8f924df7
KH
7941If optional 5th argument STRING is non-nil, it is a string to search
7942for un-encodable characters. In that case, START and END are indexes
7943to the string. */)
7944 (start, end, coding_system, count, string)
7945 Lisp_Object start, end, coding_system, count, string;
7946{
7947 int n;
7948 struct coding_system coding;
7d64c6ad 7949 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7950 Lisp_Object positions;
7951 int from, to;
7952 const unsigned char *p, *stop, *pend;
7953 int ascii_compatible;
fb88bf2d 7954
8f924df7
KH
7955 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7956 attrs = CODING_ID_ATTRS (coding.id);
7957 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7958 return Qnil;
7959 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7960 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7961 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7962
8f924df7
KH
7963 if (NILP (string))
7964 {
7965 validate_region (&start, &end);
7966 from = XINT (start);
7967 to = XINT (end);
7968 if (NILP (current_buffer->enable_multibyte_characters)
7969 || (ascii_compatible
7970 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7971 return Qnil;
7972 p = CHAR_POS_ADDR (from);
7973 pend = CHAR_POS_ADDR (to);
7974 if (from < GPT && to >= GPT)
7975 stop = GPT_ADDR;
7976 else
7977 stop = pend;
7978 }
7979 else
7980 {
7981 CHECK_STRING (string);
7982 CHECK_NATNUM (start);
7983 CHECK_NATNUM (end);
7984 from = XINT (start);
7985 to = XINT (end);
7986 if (from > to
7987 || to > SCHARS (string))
7988 args_out_of_range_3 (string, start, end);
7989 if (! STRING_MULTIBYTE (string))
7990 return Qnil;
7991 p = SDATA (string) + string_char_to_byte (string, from);
7992 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7993 if (ascii_compatible && (to - from) == (pend - p))
7994 return Qnil;
7995 }
f2558efd 7996
8f924df7
KH
7997 if (NILP (count))
7998 n = 1;
7999 else
b73bfc1c 8000 {
8f924df7
KH
8001 CHECK_NATNUM (count);
8002 n = XINT (count);
b73bfc1c
KH
8003 }
8004
8f924df7
KH
8005 positions = Qnil;
8006 while (1)
d46c5b12 8007 {
8f924df7 8008 int c;
ec6d2bb8 8009
8f924df7
KH
8010 if (ascii_compatible)
8011 while (p < stop && ASCII_BYTE_P (*p))
8012 p++, from++;
8013 if (p >= stop)
0e79d667 8014 {
8f924df7
KH
8015 if (p >= pend)
8016 break;
8017 stop = pend;
8018 p = GAP_END_ADDR;
0e79d667 8019 }
ec6d2bb8 8020
8f924df7
KH
8021 c = STRING_CHAR_ADVANCE (p);
8022 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8023 && ! char_charset (translate_char (translation_table, c),
8024 charset_list, NULL))
ec6d2bb8 8025 {
8f924df7
KH
8026 positions = Fcons (make_number (from), positions);
8027 n--;
8028 if (n == 0)
8029 break;
ec6d2bb8
KH
8030 }
8031
8f924df7
KH
8032 from++;
8033 }
d46c5b12 8034
8f924df7
KH
8035 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8036}
d46c5b12 8037
d46c5b12 8038
df7492f9
KH
8039DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8040 Scheck_coding_systems_region, 3, 3, 0,
8041 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8042
df7492f9
KH
8043START and END are buffer positions specifying the region.
8044CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8045
df7492f9
KH
8046The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8047CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
8048whole region, POS0, POS1, ... are buffer positions where non-encodable
8049characters are found.
93dec019 8050
df7492f9
KH
8051If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8052value is nil.
93dec019 8053
df7492f9
KH
8054START may be a string. In that case, check if the string is
8055encodable, and the value contains indices to the string instead of
8056buffer positions. END is ignored. */)
8057 (start, end, coding_system_list)
8058 Lisp_Object start, end, coding_system_list;
05e6f5dc 8059{
df7492f9
KH
8060 Lisp_Object list;
8061 EMACS_INT start_byte, end_byte;
8062 int pos;
7c78e542 8063 const unsigned char *p, *pbeg, *pend;
df7492f9 8064 int c;
7d64c6ad 8065 Lisp_Object tail, elt, attrs;
70ad9fc4 8066
05e6f5dc
KH
8067 if (STRINGP (start))
8068 {
df7492f9 8069 if (!STRING_MULTIBYTE (start)
8f924df7 8070 && SCHARS (start) != SBYTES (start))
df7492f9
KH
8071 return Qnil;
8072 start_byte = 0;
8f924df7 8073 end_byte = SBYTES (start);
df7492f9 8074 pos = 0;
d46c5b12 8075 }
05e6f5dc 8076 else
b73bfc1c 8077 {
b7826503
PJ
8078 CHECK_NUMBER_COERCE_MARKER (start);
8079 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8080 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8081 args_out_of_range (start, end);
8082 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8083 return Qnil;
8084 start_byte = CHAR_TO_BYTE (XINT (start));
8085 end_byte = CHAR_TO_BYTE (XINT (end));
8086 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 8087 return Qt;
df7492f9 8088
e1c23804 8089 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8090 {
e1c23804
DL
8091 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8092 move_gap_both (XINT (start), start_byte);
df7492f9 8093 else
e1c23804 8094 move_gap_both (XINT (end), end_byte);
b73bfc1c 8095 }
e1c23804 8096 pos = XINT (start);
b73bfc1c 8097 }
7553d0e1 8098
df7492f9
KH
8099 list = Qnil;
8100 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8101 {
df7492f9 8102 elt = XCAR (tail);
7d64c6ad 8103 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8104 ASET (attrs, coding_attr_trans_tbl,
8105 get_translation_table (attrs, 1, NULL));
7d64c6ad 8106 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8107 }
8108
df7492f9 8109 if (STRINGP (start))
8f924df7 8110 p = pbeg = SDATA (start);
72d1a715 8111 else
df7492f9
KH
8112 p = pbeg = BYTE_POS_ADDR (start_byte);
8113 pend = p + (end_byte - start_byte);
4ed46869 8114
df7492f9
KH
8115 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8116 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8117
df7492f9 8118 while (p < pend)
d46c5b12 8119 {
df7492f9
KH
8120 if (ASCII_BYTE_P (*p))
8121 p++;
e133c8fa 8122 else
05e6f5dc 8123 {
df7492f9
KH
8124 c = STRING_CHAR_ADVANCE (p);
8125
8126 charset_map_loaded = 0;
8127 for (tail = list; CONSP (tail); tail = XCDR (tail))
8128 {
8129 elt = XCDR (XCAR (tail));
8130 if (! char_encodable_p (c, XCAR (elt)))
8131 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8132 }
8133 if (charset_map_loaded)
8134 {
8135 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8136
8137 if (STRINGP (start))
8f924df7 8138 pbeg = SDATA (start);
df7492f9
KH
8139 else
8140 pbeg = BYTE_POS_ADDR (start_byte);
8141 p = pbeg + p_offset;
8142 pend = pbeg + pend_offset;
8143 }
05e6f5dc 8144 }
df7492f9 8145 pos++;
d46c5b12 8146 }
4ed46869 8147
df7492f9
KH
8148 tail = list;
8149 list = Qnil;
8150 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8151 {
df7492f9
KH
8152 elt = XCAR (tail);
8153 if (CONSP (XCDR (XCDR (elt))))
8154 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8155 list);
ec6d2bb8 8156 }
2b4f9037 8157
df7492f9 8158 return list;
d46c5b12
KH
8159}
8160
3fd9494b 8161
b73bfc1c 8162Lisp_Object
df7492f9
KH
8163code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8164 Lisp_Object start, end, coding_system, dst_object;
8165 int encodep, norecord;
4ed46869 8166{
3a73fa5d 8167 struct coding_system coding;
df7492f9
KH
8168 EMACS_INT from, from_byte, to, to_byte;
8169 Lisp_Object src_object;
4ed46869 8170
b7826503
PJ
8171 CHECK_NUMBER_COERCE_MARKER (start);
8172 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8173 if (NILP (coding_system))
8174 coding_system = Qno_conversion;
8175 else
8176 CHECK_CODING_SYSTEM (coding_system);
8177 src_object = Fcurrent_buffer ();
8178 if (NILP (dst_object))
8179 dst_object = src_object;
8180 else if (! EQ (dst_object, Qt))
8181 CHECK_BUFFER (dst_object);
3a73fa5d 8182
d46c5b12
KH
8183 validate_region (&start, &end);
8184 from = XFASTINT (start);
df7492f9 8185 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8186 to = XFASTINT (end);
df7492f9 8187 to_byte = CHAR_TO_BYTE (to);
764ca8da 8188
df7492f9
KH
8189 setup_coding_system (coding_system, &coding);
8190 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8191
df7492f9
KH
8192 if (encodep)
8193 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8194 dst_object);
8195 else
8196 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8197 dst_object);
8198 if (! norecord)
8199 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8200
df7492f9
KH
8201 return (BUFFERP (dst_object)
8202 ? make_number (coding.produced_char)
8203 : coding.dst_object);
4031e2bf 8204}
78108bcd 8205
4ed46869 8206
4031e2bf 8207DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8208 3, 4, "r\nzCoding system: ",
48b0f3ae 8209 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8210When called from a program, takes four arguments:
8211 START, END, CODING-SYSTEM, and DESTINATION.
8212START and END are buffer positions.
8844fa83 8213
df7492f9 8214Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8215If nil, the region between START and END is replaced by the decoded text.
df7492f9
KH
8216If buffer, the decoded text is inserted in the buffer.
8217If t, the decoded text is returned.
8844fa83 8218
48b0f3ae
PJ
8219This function sets `last-coding-system-used' to the precise coding system
8220used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8221not fully specified.)
8222It returns the length of the decoded text. */)
df7492f9
KH
8223 (start, end, coding_system, destination)
8224 Lisp_Object start, end, coding_system, destination;
4031e2bf 8225{
df7492f9 8226 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8227}
8844fa83 8228
3a73fa5d 8229DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8230 3, 4, "r\nzCoding system: ",
8231 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
8232When called from a program, takes three arguments:
8233START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 8234
df7492f9
KH
8235Optional 4th arguments DESTINATION specifies where the encoded text goes.
8236If nil, the region between START and END is replace by the encoded text.
8237If buffer, the encoded text is inserted in the buffer.
8238If t, the encoded text is returned.
2391eaa4 8239
48b0f3ae
PJ
8240This function sets `last-coding-system-used' to the precise coding system
8241used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8242not fully specified.)
8243It returns the length of the encoded text. */)
df7492f9
KH
8244 (start, end, coding_system, destination)
8245 Lisp_Object start, end, coding_system, destination;
3a73fa5d 8246{
df7492f9 8247 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
8248}
8249
8250Lisp_Object
df7492f9
KH
8251code_convert_string (string, coding_system, dst_object,
8252 encodep, nocopy, norecord)
8253 Lisp_Object string, coding_system, dst_object;
8254 int encodep, nocopy, norecord;
b73bfc1c 8255{
4031e2bf 8256 struct coding_system coding;
df7492f9 8257 EMACS_INT chars, bytes;
ec6d2bb8 8258
b7826503 8259 CHECK_STRING (string);
d46c5b12 8260 if (NILP (coding_system))
4956c225 8261 {
df7492f9
KH
8262 if (! norecord)
8263 Vlast_coding_system_used = Qno_conversion;
8264 if (NILP (dst_object))
8265 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 8266 }
b73bfc1c 8267
df7492f9
KH
8268 if (NILP (coding_system))
8269 coding_system = Qno_conversion;
8270 else
8271 CHECK_CODING_SYSTEM (coding_system);
8272 if (NILP (dst_object))
8273 dst_object = Qt;
8274 else if (! EQ (dst_object, Qt))
8275 CHECK_BUFFER (dst_object);
73be902c 8276
df7492f9 8277 setup_coding_system (coding_system, &coding);
d46c5b12 8278 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
8279 chars = SCHARS (string);
8280 bytes = SBYTES (string);
df7492f9
KH
8281 if (encodep)
8282 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8283 else
8284 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8285 if (! norecord)
8286 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 8287
df7492f9
KH
8288 return (BUFFERP (dst_object)
8289 ? make_number (coding.produced_char)
8290 : coding.dst_object);
4ed46869 8291}
73be902c 8292
b73bfc1c 8293
ecec61c1 8294/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 8295 Do not set Vlast_coding_system_used.
4ed46869 8296
ec6d2bb8
KH
8297 This function is called only from macros DECODE_FILE and
8298 ENCODE_FILE, thus we ignore character composition. */
4ed46869 8299
ecec61c1
KH
8300Lisp_Object
8301code_convert_string_norecord (string, coding_system, encodep)
8302 Lisp_Object string, coding_system;
8303 int encodep;
4ed46869 8304{
0be8721c 8305 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
8306}
8307
4ed46869 8308
df7492f9
KH
8309DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8310 2, 4, 0,
8311 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8312
8313Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8314if the decoding operation is trivial.
ecec61c1 8315
df7492f9 8316Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 8317inserted in BUFFER instead of returned as a string. In this case,
df7492f9 8318the return value is BUFFER.
ecec61c1 8319
df7492f9
KH
8320This function sets `last-coding-system-used' to the precise coding system
8321used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8322not fully specified. */)
8323 (string, coding_system, nocopy, buffer)
8324 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8325{
df7492f9
KH
8326 return code_convert_string (string, coding_system, buffer,
8327 0, ! NILP (nocopy), 0);
4ed46869
KH
8328}
8329
df7492f9
KH
8330DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8331 2, 4, 0,
8332 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8333
8334Optional third arg NOCOPY non-nil means it is OK to return STRING
8335itself if the encoding operation is trivial.
8336
8337Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 8338inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
8339the return value is BUFFER.
8340
8341This function sets `last-coding-system-used' to the precise coding system
8342used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8343not fully specified.) */)
8344 (string, coding_system, nocopy, buffer)
8345 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8346{
df7492f9 8347 return code_convert_string (string, coding_system, buffer,
c197f191 8348 1, ! NILP (nocopy), 1);
4ed46869 8349}
df7492f9 8350
3a73fa5d 8351\f
4ed46869 8352DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8353 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8354Return the corresponding character. */)
8355 (code)
4ed46869 8356 Lisp_Object code;
4ed46869 8357{
df7492f9
KH
8358 Lisp_Object spec, attrs, val;
8359 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8360 int c;
4ed46869 8361
df7492f9
KH
8362 CHECK_NATNUM (code);
8363 c = XFASTINT (code);
8364 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8365 attrs = AREF (spec, 0);
4ed46869 8366
df7492f9
KH
8367 if (ASCII_BYTE_P (c)
8368 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8369 return code;
4ed46869 8370
df7492f9
KH
8371 val = CODING_ATTR_CHARSET_LIST (attrs);
8372 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8373 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8374 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8375
df7492f9
KH
8376 if (c <= 0x7F)
8377 charset = charset_roman;
8378 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8379 {
df7492f9
KH
8380 charset = charset_kana;
8381 c -= 0x80;
4ed46869 8382 }
55ab7be3 8383 else
4ed46869 8384 {
004068e4 8385 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8386
8387 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8388 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8389 error ("Invalid code: %d", code);
8390 SJIS_TO_JIS (c);
8391 charset = charset_kanji;
4ed46869 8392 }
df7492f9
KH
8393 c = DECODE_CHAR (charset, c);
8394 if (c < 0)
8395 error ("Invalid code: %d", code);
8396 return make_number (c);
93dec019 8397}
4ed46869 8398
48b0f3ae 8399
4ed46869 8400DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 8401 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
8402Return the corresponding code in SJIS. */)
8403 (ch)
df7492f9 8404 Lisp_Object ch;
4ed46869 8405{
df7492f9
KH
8406 Lisp_Object spec, attrs, charset_list;
8407 int c;
8408 struct charset *charset;
8409 unsigned code;
48b0f3ae 8410
df7492f9
KH
8411 CHECK_CHARACTER (ch);
8412 c = XFASTINT (ch);
8413 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8414 attrs = AREF (spec, 0);
8415
8416 if (ASCII_CHAR_P (c)
8417 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8418 return ch;
8419
8420 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8421 charset = char_charset (c, charset_list, &code);
8422 if (code == CHARSET_INVALID_CODE (charset))
8423 error ("Can't encode by shift_jis encoding: %d", c);
8424 JIS_TO_SJIS (code);
8425
8426 return make_number (code);
4ed46869
KH
8427}
8428
8429DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8430 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8431Return the corresponding character. */)
8432 (code)
4ed46869 8433 Lisp_Object code;
d46c5b12 8434{
df7492f9
KH
8435 Lisp_Object spec, attrs, val;
8436 struct charset *charset_roman, *charset_big5, *charset;
8437 int c;
6289dd10 8438
df7492f9
KH
8439 CHECK_NATNUM (code);
8440 c = XFASTINT (code);
8441 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8442 attrs = AREF (spec, 0);
4ed46869 8443
df7492f9
KH
8444 if (ASCII_BYTE_P (c)
8445 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8446 return code;
6289dd10 8447
df7492f9
KH
8448 val = CODING_ATTR_CHARSET_LIST (attrs);
8449 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8450 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8451
df7492f9
KH
8452 if (c <= 0x7F)
8453 charset = charset_roman;
c28a9453
KH
8454 else
8455 {
df7492f9
KH
8456 int b1 = c >> 8, b2 = c & 0x7F;
8457 if (b1 < 0xA1 || b1 > 0xFE
8458 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8459 error ("Invalid code: %d", code);
8460 charset = charset_big5;
c28a9453 8461 }
df7492f9
KH
8462 c = DECODE_CHAR (charset, (unsigned )c);
8463 if (c < 0)
8464 error ("Invalid code: %d", code);
8465 return make_number (c);
d46c5b12 8466}
6289dd10 8467
4ed46869 8468DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 8469 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
8470Return the corresponding character code in Big5. */)
8471 (ch)
4ed46869
KH
8472 Lisp_Object ch;
8473{
df7492f9
KH
8474 Lisp_Object spec, attrs, charset_list;
8475 struct charset *charset;
8476 int c;
8477 unsigned code;
8478
8479 CHECK_CHARACTER (ch);
8480 c = XFASTINT (ch);
8481 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8482 attrs = AREF (spec, 0);
8483 if (ASCII_CHAR_P (c)
8484 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8485 return ch;
8486
8487 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8488 charset = char_charset (c, charset_list, &code);
8489 if (code == CHARSET_INVALID_CODE (charset))
8490 error ("Can't encode by Big5 encoding: %d", c);
8491
8492 return make_number (code);
4ed46869 8493}
48b0f3ae 8494
3a73fa5d 8495\f
002fdb44 8496DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 8497 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 8498 doc: /* Internal use only. */)
6ed8eeff 8499 (coding_system, terminal)
b74e4686 8500 Lisp_Object coding_system;
6ed8eeff 8501 Lisp_Object terminal;
4ed46869 8502{
6ed8eeff 8503 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 8504 CHECK_SYMBOL (coding_system);
b8299c66 8505 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 8506 /* We had better not send unsafe characters to terminal. */
c73bd236 8507 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 8508 /* Characer composition should be disabled. */
c73bd236 8509 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
8510 terminal_coding->src_multibyte = 1;
8511 terminal_coding->dst_multibyte = 0;
4ed46869
KH
8512 return Qnil;
8513}
8514
c4825358
KH
8515DEFUN ("set-safe-terminal-coding-system-internal",
8516 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8517 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8518 doc: /* Internal use only. */)
48b0f3ae 8519 (coding_system)
b74e4686 8520 Lisp_Object coding_system;
d46c5b12 8521{
b7826503 8522 CHECK_SYMBOL (coding_system);
c4825358
KH
8523 setup_coding_system (Fcheck_coding_system (coding_system),
8524 &safe_terminal_coding);
df7492f9
KH
8525 /* Characer composition should be disabled. */
8526 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8527 safe_terminal_coding.src_multibyte = 1;
8528 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8529 return Qnil;
8530}
4ed46869 8531
002fdb44 8532DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 8533 Sterminal_coding_system, 0, 1, 0,
6ed8eeff
KL
8534 doc: /* Return coding system specified for terminal output on the given terminal.
8535TERMINAL may be a terminal id, a frame, or nil for the selected
8536frame's terminal device. */)
8537 (terminal)
8538 Lisp_Object terminal;
4ed46869 8539{
985773c9
MB
8540 struct coding_system *terminal_coding
8541 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8542 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 8543
ae6f73fa 8544 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 8545 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
8546}
8547
002fdb44 8548DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 8549 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 8550 doc: /* Internal use only. */)
6ed8eeff 8551 (coding_system, terminal)
4ed46869 8552 Lisp_Object coding_system;
6ed8eeff 8553 Lisp_Object terminal;
4ed46869 8554{
6ed8eeff 8555 struct terminal *t = get_terminal (terminal, 1);
b7826503 8556 CHECK_SYMBOL (coding_system);
df7492f9 8557 setup_coding_system (Fcheck_coding_system (coding_system),
c73bd236 8558 TERMINAL_KEYBOARD_CODING (t));
df7492f9 8559 /* Characer composition should be disabled. */
c73bd236
MB
8560 TERMINAL_KEYBOARD_CODING (t)->common_flags
8561 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8562 return Qnil;
8563}
8564
8565DEFUN ("keyboard-coding-system",
985773c9 8566 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 8567 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
8568 (terminal)
8569 Lisp_Object terminal;
4ed46869 8570{
985773c9
MB
8571 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8572 (get_terminal (terminal, 1))->id);
4ed46869
KH
8573}
8574
4ed46869 8575\f
a5d301df
KH
8576DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8577 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8578 doc: /* Choose a coding system for an operation based on the target name.
8579The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8580DECODING-SYSTEM is the coding system to use for decoding
8581\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8582for encoding (in case OPERATION does encoding).
05e6f5dc 8583
48b0f3ae
PJ
8584The first argument OPERATION specifies an I/O primitive:
8585 For file I/O, `insert-file-contents' or `write-region'.
8586 For process I/O, `call-process', `call-process-region', or `start-process'.
8587 For network I/O, `open-network-stream'.
05e6f5dc 8588
48b0f3ae
PJ
8589The remaining arguments should be the same arguments that were passed
8590to the primitive. Depending on which primitive, one of those arguments
8591is selected as the TARGET. For example, if OPERATION does file I/O,
8592whichever argument specifies the file name is TARGET.
05e6f5dc 8593
48b0f3ae 8594TARGET has a meaning which depends on OPERATION:
b883cdb2 8595 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae
PJ
8596 For process I/O, TARGET is a process name.
8597 For network I/O, TARGET is a service name or a port number
05e6f5dc 8598
48b0f3ae
PJ
8599This function looks up what specified for TARGET in,
8600`file-coding-system-alist', `process-coding-system-alist',
8601or `network-coding-system-alist' depending on OPERATION.
8602They may specify a coding system, a cons of coding systems,
8603or a function symbol to call.
8604In the last case, we call the function with one argument,
8605which is a list of all the arguments given to this function.
1011c487
MB
8606If the function can't decide a coding system, it can return
8607`undecided' so that the normal code-detection is performed.
48b0f3ae 8608
b883cdb2
MB
8609If OPERATION is `insert-file-contents', the argument corresponding to
8610TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
8611file name to look up, and BUFFER is a buffer that contains the file's
8612contents (not yet decoded). If `file-coding-system-alist' specifies a
8613function to call for FILENAME, that function should examine the
8614contents of BUFFER instead of reading the file.
8615
d918f936 8616usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 8617 (nargs, args)
4ed46869
KH
8618 int nargs;
8619 Lisp_Object *args;
6b89e3aa 8620{
4ed46869
KH
8621 Lisp_Object operation, target_idx, target, val;
8622 register Lisp_Object chain;
177c0ea7 8623
4ed46869
KH
8624 if (nargs < 2)
8625 error ("Too few arguments");
8626 operation = args[0];
8627 if (!SYMBOLP (operation)
8628 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8629 error ("Invalid first arguement");
4ed46869
KH
8630 if (nargs < 1 + XINT (target_idx))
8631 error ("Too few arguments for operation: %s",
8f924df7 8632 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8633 target = args[XINT (target_idx) + 1];
8634 if (!(STRINGP (target)
091a0ff0
KH
8635 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8636 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 8637 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8638 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
8639 if (CONSP (target))
8640 target = XCAR (target);
4ed46869 8641
2e34157c
RS
8642 chain = ((EQ (operation, Qinsert_file_contents)
8643 || EQ (operation, Qwrite_region))
02ba4723 8644 ? Vfile_coding_system_alist
2e34157c 8645 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8646 ? Vnetwork_coding_system_alist
8647 : Vprocess_coding_system_alist));
4ed46869
KH
8648 if (NILP (chain))
8649 return Qnil;
8650
03699b14 8651 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8652 {
f44d27ce 8653 Lisp_Object elt;
6b89e3aa 8654
df7492f9 8655 elt = XCAR (chain);
4ed46869
KH
8656 if (CONSP (elt)
8657 && ((STRINGP (target)
03699b14
KR
8658 && STRINGP (XCAR (elt))
8659 && fast_string_match (XCAR (elt), target) >= 0)
8660 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8661 {
03699b14 8662 val = XCDR (elt);
b19fd4c5
KH
8663 /* Here, if VAL is both a valid coding system and a valid
8664 function symbol, we return VAL as a coding system. */
02ba4723
KH
8665 if (CONSP (val))
8666 return val;
8667 if (! SYMBOLP (val))
8668 return Qnil;
8669 if (! NILP (Fcoding_system_p (val)))
8670 return Fcons (val, val);
b19fd4c5 8671 if (! NILP (Ffboundp (val)))
6b89e3aa 8672 {
e2b97060
MB
8673 /* We use call1 rather than safe_call1
8674 so as to get bug reports about functions called here
8675 which don't handle the current interface. */
8676 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
8677 if (CONSP (val))
8678 return val;
8679 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8680 return Fcons (val, val);
6b89e3aa 8681 }
02ba4723 8682 return Qnil;
6b89e3aa
KH
8683 }
8684 }
4ed46869 8685 return Qnil;
6b89e3aa
KH
8686}
8687
df7492f9 8688DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8689 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8690 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8691If multiple coding systems belongs to the same category,
a3181084
DL
8692all but the first one are ignored.
8693
8694usage: (set-coding-system-priority ...) */)
df7492f9
KH
8695 (nargs, args)
8696 int nargs;
8697 Lisp_Object *args;
8698{
8699 int i, j;
8700 int changed[coding_category_max];
8701 enum coding_category priorities[coding_category_max];
8702
8703 bzero (changed, sizeof changed);
6b89e3aa 8704
df7492f9 8705 for (i = j = 0; i < nargs; i++)
6b89e3aa 8706 {
df7492f9
KH
8707 enum coding_category category;
8708 Lisp_Object spec, attrs;
6b89e3aa 8709
df7492f9
KH
8710 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8711 attrs = AREF (spec, 0);
8712 category = XINT (CODING_ATTR_CATEGORY (attrs));
8713 if (changed[category])
8714 /* Ignore this coding system because a coding system of the
8715 same category already had a higher priority. */
8716 continue;
8717 changed[category] = 1;
8718 priorities[j++] = category;
8719 if (coding_categories[category].id >= 0
8720 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8721 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8722 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8723 }
6b89e3aa 8724
df7492f9
KH
8725 /* Now we have decided top J priorities. Reflect the order of the
8726 original priorities to the remaining priorities. */
6b89e3aa 8727
df7492f9 8728 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8729 {
df7492f9
KH
8730 while (j < coding_category_max
8731 && changed[coding_priorities[j]])
8732 j++;
8733 if (j == coding_category_max)
8734 abort ();
8735 priorities[i] = coding_priorities[j];
8736 }
6b89e3aa 8737
df7492f9 8738 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8739
ff563fce
KH
8740 /* Update `coding-category-list'. */
8741 Vcoding_category_list = Qnil;
8742 for (i = coding_category_max - 1; i >= 0; i--)
8743 Vcoding_category_list
8744 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8745 Vcoding_category_list);
6b89e3aa 8746
df7492f9 8747 return Qnil;
6b89e3aa
KH
8748}
8749
df7492f9
KH
8750DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8751 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8752 doc: /* Return a list of coding systems ordered by their priorities.
8753HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8754 (highestp)
8755 Lisp_Object highestp;
d46c5b12
KH
8756{
8757 int i;
df7492f9 8758 Lisp_Object val;
6b89e3aa 8759
df7492f9 8760 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8761 {
df7492f9
KH
8762 enum coding_category category = coding_priorities[i];
8763 int id = coding_categories[category].id;
8764 Lisp_Object attrs;
068a9dbd 8765
df7492f9
KH
8766 if (id < 0)
8767 continue;
8768 attrs = CODING_ID_ATTRS (id);
8769 if (! NILP (highestp))
8770 return CODING_ATTR_BASE_NAME (attrs);
8771 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8772 }
8773 return Fnreverse (val);
8774}
068a9dbd 8775
f0064e1f 8776static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8777
8778static Lisp_Object
df7492f9
KH
8779make_subsidiaries (base)
8780 Lisp_Object base;
068a9dbd 8781{
df7492f9 8782 Lisp_Object subsidiaries;
8f924df7 8783 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8784 char *buf = (char *) alloca (base_name_len + 6);
8785 int i;
068a9dbd 8786
8f924df7 8787 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8788 subsidiaries = Fmake_vector (make_number (3), Qnil);
8789 for (i = 0; i < 3; i++)
068a9dbd 8790 {
df7492f9
KH
8791 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8792 ASET (subsidiaries, i, intern (buf));
068a9dbd 8793 }
df7492f9 8794 return subsidiaries;
068a9dbd
KH
8795}
8796
8797
df7492f9
KH
8798DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8799 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8800 doc: /* For internal use only.
8801usage: (define-coding-system-internal ...) */)
df7492f9
KH
8802 (nargs, args)
8803 int nargs;
8804 Lisp_Object *args;
068a9dbd 8805{
df7492f9
KH
8806 Lisp_Object name;
8807 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8808 Lisp_Object attrs; /* Vector of attributes. */
8809 Lisp_Object eol_type;
8810 Lisp_Object aliases;
8811 Lisp_Object coding_type, charset_list, safe_charsets;
8812 enum coding_category category;
8813 Lisp_Object tail, val;
8814 int max_charset_id = 0;
8815 int i;
068a9dbd 8816
df7492f9
KH
8817 if (nargs < coding_arg_max)
8818 goto short_args;
068a9dbd 8819
df7492f9 8820 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8821
df7492f9
KH
8822 name = args[coding_arg_name];
8823 CHECK_SYMBOL (name);
8824 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8825
df7492f9
KH
8826 val = args[coding_arg_mnemonic];
8827 if (! STRINGP (val))
8828 CHECK_CHARACTER (val);
8829 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8830
df7492f9
KH
8831 coding_type = args[coding_arg_coding_type];
8832 CHECK_SYMBOL (coding_type);
8833 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8834
df7492f9
KH
8835 charset_list = args[coding_arg_charset_list];
8836 if (SYMBOLP (charset_list))
8837 {
8838 if (EQ (charset_list, Qiso_2022))
8839 {
8840 if (! EQ (coding_type, Qiso_2022))
8841 error ("Invalid charset-list");
8842 charset_list = Viso_2022_charset_list;
8843 }
8844 else if (EQ (charset_list, Qemacs_mule))
8845 {
8846 if (! EQ (coding_type, Qemacs_mule))
8847 error ("Invalid charset-list");
8848 charset_list = Vemacs_mule_charset_list;
8849 }
8850 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8851 if (max_charset_id < XFASTINT (XCAR (tail)))
8852 max_charset_id = XFASTINT (XCAR (tail));
8853 }
068a9dbd
KH
8854 else
8855 {
df7492f9 8856 charset_list = Fcopy_sequence (charset_list);
985773c9 8857 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 8858 {
df7492f9
KH
8859 struct charset *charset;
8860
985773c9 8861 val = XCAR (tail);
df7492f9
KH
8862 CHECK_CHARSET_GET_CHARSET (val, charset);
8863 if (EQ (coding_type, Qiso_2022)
8864 ? CHARSET_ISO_FINAL (charset) < 0
8865 : EQ (coding_type, Qemacs_mule)
8866 ? CHARSET_EMACS_MULE_ID (charset) < 0
8867 : 0)
8868 error ("Can't handle charset `%s'",
8f924df7 8869 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8870
8f924df7 8871 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8872 if (max_charset_id < charset->id)
8873 max_charset_id = charset->id;
068a9dbd
KH
8874 }
8875 }
df7492f9 8876 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8877
df7492f9
KH
8878 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8879 make_number (255));
8880 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8881 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8882 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8883
584948ac 8884 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8885
df7492f9 8886 val = args[coding_arg_decode_translation_table];
a6f87d34 8887 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8888 CHECK_SYMBOL (val);
df7492f9 8889 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8890
df7492f9 8891 val = args[coding_arg_encode_translation_table];
a6f87d34 8892 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8893 CHECK_SYMBOL (val);
df7492f9 8894 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8895
df7492f9
KH
8896 val = args[coding_arg_post_read_conversion];
8897 CHECK_SYMBOL (val);
8898 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8899
df7492f9
KH
8900 val = args[coding_arg_pre_write_conversion];
8901 CHECK_SYMBOL (val);
8902 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8903
df7492f9
KH
8904 val = args[coding_arg_default_char];
8905 if (NILP (val))
8906 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8907 else
8908 {
8f924df7 8909 CHECK_CHARACTER (val);
df7492f9
KH
8910 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8911 }
4031e2bf 8912
8f924df7
KH
8913 val = args[coding_arg_for_unibyte];
8914 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8915
df7492f9
KH
8916 val = args[coding_arg_plist];
8917 CHECK_LIST (val);
8918 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8919
df7492f9
KH
8920 if (EQ (coding_type, Qcharset))
8921 {
c7c66a95
KH
8922 /* Generate a lisp vector of 256 elements. Each element is nil,
8923 integer, or a list of charset IDs.
3a73fa5d 8924
c7c66a95
KH
8925 If Nth element is nil, the byte code N is invalid in this
8926 coding system.
4ed46869 8927
c7c66a95
KH
8928 If Nth element is a number NUM, N is the first byte of a
8929 charset whose ID is NUM.
4ed46869 8930
c7c66a95
KH
8931 If Nth element is a list of charset IDs, N is the first byte
8932 of one of them. The list is sorted by dimensions of the
2bc515e4 8933 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8934 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8935
5c99c2e6 8936 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8937 {
c7c66a95
KH
8938 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8939 int dim = CHARSET_DIMENSION (charset);
8940 int idx = (dim - 1) * 4;
4ed46869 8941
5c99c2e6 8942 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8943 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8944
15d143f7
KH
8945 for (i = charset->code_space[idx];
8946 i <= charset->code_space[idx + 1]; i++)
8947 {
c7c66a95
KH
8948 Lisp_Object tmp, tmp2;
8949 int dim2;
ec6d2bb8 8950
c7c66a95
KH
8951 tmp = AREF (val, i);
8952 if (NILP (tmp))
8953 tmp = XCAR (tail);
8954 else if (NUMBERP (tmp))
8955 {
8956 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8957 if (dim < dim2)
c7c66a95 8958 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8959 else
8960 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8961 }
15d143f7 8962 else
c7c66a95
KH
8963 {
8964 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8965 {
8966 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8967 if (dim < dim2)
8968 break;
8969 }
8970 if (NILP (tmp2))
8971 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8972 else
8973 {
8974 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8975 XSETCAR (tmp2, XCAR (tail));
8976 }
8977 }
8978 ASET (val, i, tmp);
15d143f7 8979 }
df7492f9
KH
8980 }
8981 ASET (attrs, coding_attr_charset_valids, val);
8982 category = coding_category_charset;
8983 }
8984 else if (EQ (coding_type, Qccl))
8985 {
8986 Lisp_Object valids;
ecec61c1 8987
df7492f9
KH
8988 if (nargs < coding_arg_ccl_max)
8989 goto short_args;
ecec61c1 8990
df7492f9
KH
8991 val = args[coding_arg_ccl_decoder];
8992 CHECK_CCL_PROGRAM (val);
8993 if (VECTORP (val))
8994 val = Fcopy_sequence (val);
8995 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8996
df7492f9
KH
8997 val = args[coding_arg_ccl_encoder];
8998 CHECK_CCL_PROGRAM (val);
8999 if (VECTORP (val))
9000 val = Fcopy_sequence (val);
9001 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9002
df7492f9
KH
9003 val = args[coding_arg_ccl_valids];
9004 valids = Fmake_string (make_number (256), make_number (0));
9005 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9006 {
8dcbea82 9007 int from, to;
ecec61c1 9008
df7492f9
KH
9009 val = Fcar (tail);
9010 if (INTEGERP (val))
8dcbea82
KH
9011 {
9012 from = to = XINT (val);
9013 if (from < 0 || from > 255)
9014 args_out_of_range_3 (val, make_number (0), make_number (255));
9015 }
df7492f9
KH
9016 else
9017 {
df7492f9 9018 CHECK_CONS (val);
8f924df7
KH
9019 CHECK_NATNUM_CAR (val);
9020 CHECK_NATNUM_CDR (val);
df7492f9 9021 from = XINT (XCAR (val));
8f924df7 9022 if (from > 255)
8dcbea82
KH
9023 args_out_of_range_3 (XCAR (val),
9024 make_number (0), make_number (255));
df7492f9 9025 to = XINT (XCDR (val));
8dcbea82
KH
9026 if (to < from || to > 255)
9027 args_out_of_range_3 (XCDR (val),
9028 XCAR (val), make_number (255));
df7492f9 9029 }
8dcbea82 9030 for (i = from; i <= to; i++)
8f924df7 9031 SSET (valids, i, 1);
df7492f9
KH
9032 }
9033 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9034
df7492f9 9035 category = coding_category_ccl;
55ab7be3 9036 }
df7492f9 9037 else if (EQ (coding_type, Qutf_16))
55ab7be3 9038 {
df7492f9 9039 Lisp_Object bom, endian;
4ed46869 9040
584948ac 9041 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9042
df7492f9
KH
9043 if (nargs < coding_arg_utf16_max)
9044 goto short_args;
4ed46869 9045
df7492f9
KH
9046 bom = args[coding_arg_utf16_bom];
9047 if (! NILP (bom) && ! EQ (bom, Qt))
9048 {
9049 CHECK_CONS (bom);
8f924df7
KH
9050 val = XCAR (bom);
9051 CHECK_CODING_SYSTEM (val);
9052 val = XCDR (bom);
9053 CHECK_CODING_SYSTEM (val);
df7492f9
KH
9054 }
9055 ASET (attrs, coding_attr_utf_16_bom, bom);
9056
9057 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9058 CHECK_SYMBOL (endian);
9059 if (NILP (endian))
9060 endian = Qbig;
9061 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9062 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9063 ASET (attrs, coding_attr_utf_16_endian, endian);
9064
9065 category = (CONSP (bom)
9066 ? coding_category_utf_16_auto
9067 : NILP (bom)
b49a1807 9068 ? (EQ (endian, Qbig)
df7492f9
KH
9069 ? coding_category_utf_16_be_nosig
9070 : coding_category_utf_16_le_nosig)
b49a1807 9071 : (EQ (endian, Qbig)
df7492f9
KH
9072 ? coding_category_utf_16_be
9073 : coding_category_utf_16_le));
9074 }
9075 else if (EQ (coding_type, Qiso_2022))
9076 {
9077 Lisp_Object initial, reg_usage, request, flags;
4776e638 9078 int i;
1397dc18 9079
df7492f9
KH
9080 if (nargs < coding_arg_iso2022_max)
9081 goto short_args;
9082
9083 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9084 CHECK_VECTOR (initial);
9085 for (i = 0; i < 4; i++)
9086 {
9087 val = Faref (initial, make_number (i));
9088 if (! NILP (val))
9089 {
584948ac
KH
9090 struct charset *charset;
9091
9092 CHECK_CHARSET_GET_CHARSET (val, charset);
9093 ASET (initial, i, make_number (CHARSET_ID (charset)));
9094 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9095 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9096 }
9097 else
9098 ASET (initial, i, make_number (-1));
9099 }
9100
9101 reg_usage = args[coding_arg_iso2022_reg_usage];
9102 CHECK_CONS (reg_usage);
8f924df7
KH
9103 CHECK_NUMBER_CAR (reg_usage);
9104 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9105
9106 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9107 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9108 {
df7492f9 9109 int id;
8f924df7 9110 Lisp_Object tmp;
df7492f9
KH
9111
9112 val = Fcar (tail);
9113 CHECK_CONS (val);
8f924df7
KH
9114 tmp = XCAR (val);
9115 CHECK_CHARSET_GET_ID (tmp, id);
9116 CHECK_NATNUM_CDR (val);
df7492f9
KH
9117 if (XINT (XCDR (val)) >= 4)
9118 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9119 XSETCAR (val, make_number (id));
1397dc18 9120 }
4ed46869 9121
df7492f9
KH
9122 flags = args[coding_arg_iso2022_flags];
9123 CHECK_NATNUM (flags);
9124 i = XINT (flags);
9125 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9126 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9127
9128 ASET (attrs, coding_attr_iso_initial, initial);
9129 ASET (attrs, coding_attr_iso_usage, reg_usage);
9130 ASET (attrs, coding_attr_iso_request, request);
9131 ASET (attrs, coding_attr_iso_flags, flags);
9132 setup_iso_safe_charsets (attrs);
9133
9134 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9135 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9136 | CODING_ISO_FLAG_SINGLE_SHIFT))
9137 ? coding_category_iso_7_else
9138 : EQ (args[coding_arg_charset_list], Qiso_2022)
9139 ? coding_category_iso_7
9140 : coding_category_iso_7_tight);
9141 else
9142 {
9143 int id = XINT (AREF (initial, 1));
9144
c6fb6e98 9145 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9146 || EQ (args[coding_arg_charset_list], Qiso_2022)
9147 || id < 0)
9148 ? coding_category_iso_8_else
9149 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9150 ? coding_category_iso_8_1
9151 : coding_category_iso_8_2);
9152 }
0ce7886f
KH
9153 if (category != coding_category_iso_8_1
9154 && category != coding_category_iso_8_2)
9155 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9156 }
9157 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9158 {
df7492f9
KH
9159 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9160 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9161 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9162 category = coding_category_emacs_mule;
c28a9453 9163 }
df7492f9 9164 else if (EQ (coding_type, Qshift_jis))
c28a9453 9165 {
df7492f9
KH
9166
9167 struct charset *charset;
9168
7d64c6ad 9169 if (XINT (Flength (charset_list)) != 3
6e07c25f 9170 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9171 error ("There should be three or four charsets");
df7492f9
KH
9172
9173 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9174 if (CHARSET_DIMENSION (charset) != 1)
9175 error ("Dimension of charset %s is not one",
8f924df7 9176 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9177 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9178 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9179
9180 charset_list = XCDR (charset_list);
9181 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9182 if (CHARSET_DIMENSION (charset) != 1)
9183 error ("Dimension of charset %s is not one",
8f924df7 9184 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9185
9186 charset_list = XCDR (charset_list);
9187 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9188 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9189 error ("Dimension of charset %s is not two",
9190 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9191
9192 charset_list = XCDR (charset_list);
2b917a06
KH
9193 if (! NILP (charset_list))
9194 {
9195 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9196 if (CHARSET_DIMENSION (charset) != 2)
9197 error ("Dimension of charset %s is not two",
9198 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9199 }
df7492f9
KH
9200
9201 category = coding_category_sjis;
9202 Vsjis_coding_system = name;
c28a9453 9203 }
df7492f9
KH
9204 else if (EQ (coding_type, Qbig5))
9205 {
9206 struct charset *charset;
4ed46869 9207
df7492f9
KH
9208 if (XINT (Flength (charset_list)) != 2)
9209 error ("There should be just two charsets");
9210
9211 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9212 if (CHARSET_DIMENSION (charset) != 1)
9213 error ("Dimension of charset %s is not one",
8f924df7 9214 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9215 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9216 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9217
9218 charset_list = XCDR (charset_list);
9219 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9220 if (CHARSET_DIMENSION (charset) != 2)
9221 error ("Dimension of charset %s is not two",
8f924df7 9222 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9223
df7492f9
KH
9224 category = coding_category_big5;
9225 Vbig5_coding_system = name;
9226 }
9227 else if (EQ (coding_type, Qraw_text))
c28a9453 9228 {
584948ac
KH
9229 category = coding_category_raw_text;
9230 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9231 }
df7492f9 9232 else if (EQ (coding_type, Qutf_8))
4ed46869 9233 {
584948ac
KH
9234 category = coding_category_utf_8;
9235 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 9236 }
df7492f9
KH
9237 else if (EQ (coding_type, Qundecided))
9238 category = coding_category_undecided;
4ed46869 9239 else
df7492f9 9240 error ("Invalid coding system type: %s",
8f924df7 9241 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 9242
df7492f9 9243 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
9244 CODING_ATTR_PLIST (attrs)
9245 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9246 CODING_ATTR_PLIST (attrs)));
35befdaa
KH
9247 CODING_ATTR_PLIST (attrs)
9248 = Fcons (QCascii_compatible_p,
9249 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9250 CODING_ATTR_PLIST (attrs)));
c4825358 9251
df7492f9
KH
9252 eol_type = args[coding_arg_eol_type];
9253 if (! NILP (eol_type)
9254 && ! EQ (eol_type, Qunix)
9255 && ! EQ (eol_type, Qdos)
9256 && ! EQ (eol_type, Qmac))
9257 error ("Invalid eol-type");
4ed46869 9258
df7492f9 9259 aliases = Fcons (name, Qnil);
4ed46869 9260
df7492f9
KH
9261 if (NILP (eol_type))
9262 {
9263 eol_type = make_subsidiaries (name);
9264 for (i = 0; i < 3; i++)
1397dc18 9265 {
df7492f9
KH
9266 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9267
9268 this_name = AREF (eol_type, i);
9269 this_aliases = Fcons (this_name, Qnil);
9270 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9271 this_spec = Fmake_vector (make_number (3), attrs);
9272 ASET (this_spec, 1, this_aliases);
9273 ASET (this_spec, 2, this_eol_type);
9274 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9275 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
9276 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9277 if (NILP (val))
9278 Vcoding_system_alist
9279 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9280 Vcoding_system_alist);
1397dc18 9281 }
d46c5b12 9282 }
4ed46869 9283
df7492f9
KH
9284 spec_vec = Fmake_vector (make_number (3), attrs);
9285 ASET (spec_vec, 1, aliases);
9286 ASET (spec_vec, 2, eol_type);
48b0f3ae 9287
df7492f9
KH
9288 Fputhash (name, spec_vec, Vcoding_system_hash_table);
9289 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
9290 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9291 if (NILP (val))
9292 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9293 Vcoding_system_alist);
48b0f3ae 9294
df7492f9
KH
9295 {
9296 int id = coding_categories[category].id;
48b0f3ae 9297
df7492f9
KH
9298 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9299 setup_coding_system (name, &coding_categories[category]);
9300 }
48b0f3ae 9301
d46c5b12 9302 return Qnil;
48b0f3ae 9303
df7492f9
KH
9304 short_args:
9305 return Fsignal (Qwrong_number_of_arguments,
9306 Fcons (intern ("define-coding-system-internal"),
9307 make_number (nargs)));
d46c5b12 9308}
4ed46869 9309
d6925f38 9310
a6f87d34
KH
9311DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9312 3, 3, 0,
9313 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9314 (coding_system, prop, val)
9315 Lisp_Object coding_system, prop, val;
9316{
3dbe7859 9317 Lisp_Object spec, attrs;
a6f87d34
KH
9318
9319 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9320 attrs = AREF (spec, 0);
9321 if (EQ (prop, QCmnemonic))
9322 {
9323 if (! STRINGP (val))
9324 CHECK_CHARACTER (val);
9325 CODING_ATTR_MNEMONIC (attrs) = val;
9326 }
9327 else if (EQ (prop, QCdefalut_char))
9328 {
9329 if (NILP (val))
9330 val = make_number (' ');
9331 else
9332 CHECK_CHARACTER (val);
9333 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9334 }
9335 else if (EQ (prop, QCdecode_translation_table))
9336 {
9337 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9338 CHECK_SYMBOL (val);
9339 CODING_ATTR_DECODE_TBL (attrs) = val;
9340 }
9341 else if (EQ (prop, QCencode_translation_table))
9342 {
9343 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9344 CHECK_SYMBOL (val);
9345 CODING_ATTR_ENCODE_TBL (attrs) = val;
9346 }
9347 else if (EQ (prop, QCpost_read_conversion))
9348 {
9349 CHECK_SYMBOL (val);
9350 CODING_ATTR_POST_READ (attrs) = val;
9351 }
9352 else if (EQ (prop, QCpre_write_conversion))
9353 {
9354 CHECK_SYMBOL (val);
9355 CODING_ATTR_PRE_WRITE (attrs) = val;
9356 }
35befdaa
KH
9357 else if (EQ (prop, QCascii_compatible_p))
9358 {
9359 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9360 }
a6f87d34
KH
9361
9362 CODING_ATTR_PLIST (attrs)
9363 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9364 return val;
9365}
9366
9367
df7492f9
KH
9368DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9369 Sdefine_coding_system_alias, 2, 2, 0,
9370 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9371 (alias, coding_system)
9372 Lisp_Object alias, coding_system;
66cfb530 9373{
583f71ca 9374 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9375
df7492f9
KH
9376 CHECK_SYMBOL (alias);
9377 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9378 aliases = AREF (spec, 1);
d6925f38
KH
9379 /* ALISES should be a list of length more than zero, and the first
9380 element is a base coding system. Append ALIAS at the tail of the
9381 list. */
df7492f9
KH
9382 while (!NILP (XCDR (aliases)))
9383 aliases = XCDR (aliases);
8f924df7 9384 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9385
df7492f9
KH
9386 eol_type = AREF (spec, 2);
9387 if (VECTORP (eol_type))
4ed46869 9388 {
df7492f9
KH
9389 Lisp_Object subsidiaries;
9390 int i;
4ed46869 9391
df7492f9
KH
9392 subsidiaries = make_subsidiaries (alias);
9393 for (i = 0; i < 3; i++)
9394 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9395 AREF (eol_type, i));
4ed46869 9396 }
df7492f9
KH
9397
9398 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9399 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9400 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9401 if (NILP (val))
9402 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9403 Vcoding_system_alist);
66cfb530 9404
4ed46869
KH
9405 return Qnil;
9406}
9407
df7492f9
KH
9408DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9409 1, 1, 0,
9410 doc: /* Return the base of CODING-SYSTEM.
da7db224 9411Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9412 (coding_system)
9413 Lisp_Object coding_system;
d46c5b12 9414{
df7492f9 9415 Lisp_Object spec, attrs;
d46c5b12 9416
df7492f9
KH
9417 if (NILP (coding_system))
9418 return (Qno_conversion);
9419 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9420 attrs = AREF (spec, 0);
9421 return CODING_ATTR_BASE_NAME (attrs);
9422}
1397dc18 9423
df7492f9
KH
9424DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9425 1, 1, 0,
9426 doc: "Return the property list of CODING-SYSTEM.")
9427 (coding_system)
9428 Lisp_Object coding_system;
9429{
9430 Lisp_Object spec, attrs;
1397dc18 9431
df7492f9
KH
9432 if (NILP (coding_system))
9433 coding_system = Qno_conversion;
9434 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9435 attrs = AREF (spec, 0);
9436 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9437}
9438
df7492f9
KH
9439
9440DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9441 1, 1, 0,
da7db224 9442 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9443 (coding_system)
9444 Lisp_Object coding_system;
66cfb530 9445{
df7492f9 9446 Lisp_Object spec;
84d60297 9447
df7492f9
KH
9448 if (NILP (coding_system))
9449 coding_system = Qno_conversion;
9450 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9451 return AREF (spec, 1);
df7492f9 9452}
66cfb530 9453
df7492f9
KH
9454DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9455 Scoding_system_eol_type, 1, 1, 0,
9456 doc: /* Return eol-type of CODING-SYSTEM.
9457An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 9458
df7492f9
KH
9459Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9460and CR respectively.
66cfb530 9461
df7492f9
KH
9462A vector value indicates that a format of end-of-line should be
9463detected automatically. Nth element of the vector is the subsidiary
9464coding system whose eol-type is N. */)
6b89e3aa
KH
9465 (coding_system)
9466 Lisp_Object coding_system;
9467{
df7492f9
KH
9468 Lisp_Object spec, eol_type;
9469 int n;
6b89e3aa 9470
df7492f9
KH
9471 if (NILP (coding_system))
9472 coding_system = Qno_conversion;
9473 if (! CODING_SYSTEM_P (coding_system))
9474 return Qnil;
9475 spec = CODING_SYSTEM_SPEC (coding_system);
9476 eol_type = AREF (spec, 2);
9477 if (VECTORP (eol_type))
9478 return Fcopy_sequence (eol_type);
9479 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9480 return make_number (n);
6b89e3aa
KH
9481}
9482
4ed46869
KH
9483#endif /* emacs */
9484
9485\f
1397dc18 9486/*** 9. Post-amble ***/
4ed46869 9487
dfcf069d 9488void
4ed46869
KH
9489init_coding_once ()
9490{
9491 int i;
9492
df7492f9
KH
9493 for (i = 0; i < coding_category_max; i++)
9494 {
9495 coding_categories[i].id = -1;
9496 coding_priorities[i] = i;
9497 }
4ed46869
KH
9498
9499 /* ISO2022 specific initialize routine. */
9500 for (i = 0; i < 0x20; i++)
b73bfc1c 9501 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9502 for (i = 0x21; i < 0x7F; i++)
9503 iso_code_class[i] = ISO_graphic_plane_0;
9504 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9505 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9506 for (i = 0xA1; i < 0xFF; i++)
9507 iso_code_class[i] = ISO_graphic_plane_1;
9508 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9509 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9510 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9511 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9512 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9513 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9514 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9515 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9516 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9517
df7492f9
KH
9518 for (i = 0; i < 256; i++)
9519 {
9520 emacs_mule_bytes[i] = 1;
9521 }
7c78e542
KH
9522 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9523 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9524 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9525 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9526}
9527
9528#ifdef emacs
9529
dfcf069d 9530void
e0e989f6
KH
9531syms_of_coding ()
9532{
df7492f9 9533 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9534 {
9535 Lisp_Object args[2];
9536 args[0] = QCtest;
9537 args[1] = Qeq;
9538 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9539 }
df7492f9
KH
9540
9541 staticpro (&Vsjis_coding_system);
9542 Vsjis_coding_system = Qnil;
e0e989f6 9543
df7492f9
KH
9544 staticpro (&Vbig5_coding_system);
9545 Vbig5_coding_system = Qnil;
9546
24a73b0a
KH
9547 staticpro (&Vcode_conversion_reused_workbuf);
9548 Vcode_conversion_reused_workbuf = Qnil;
9549
9550 staticpro (&Vcode_conversion_workbuf_name);
9551 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9552
24a73b0a 9553 reused_workbuf_in_use = 0;
df7492f9
KH
9554
9555 DEFSYM (Qcharset, "charset");
9556 DEFSYM (Qtarget_idx, "target-idx");
9557 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9558 Fset (Qcoding_system_history, Qnil);
9559
9ce27fde 9560 /* Target FILENAME is the first argument. */
e0e989f6 9561 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9562 /* Target FILENAME is the third argument. */
e0e989f6
KH
9563 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9564
df7492f9 9565 DEFSYM (Qcall_process, "call-process");
9ce27fde 9566 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9567 Fput (Qcall_process, Qtarget_idx, make_number (0));
9568
df7492f9 9569 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9570 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9571 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9572
df7492f9 9573 DEFSYM (Qstart_process, "start-process");
9ce27fde 9574 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9575 Fput (Qstart_process, Qtarget_idx, make_number (2));
9576
df7492f9 9577 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9578 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9579 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9580
df7492f9
KH
9581 DEFSYM (Qcoding_system, "coding-system");
9582 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9583
df7492f9
KH
9584 DEFSYM (Qeol_type, "eol-type");
9585 DEFSYM (Qunix, "unix");
9586 DEFSYM (Qdos, "dos");
4ed46869 9587
df7492f9
KH
9588 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9589 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9590 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9591 DEFSYM (Qdefault_char, "default-char");
9592 DEFSYM (Qundecided, "undecided");
9593 DEFSYM (Qno_conversion, "no-conversion");
9594 DEFSYM (Qraw_text, "raw-text");
4ed46869 9595
df7492f9 9596 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9597
df7492f9 9598 DEFSYM (Qutf_8, "utf-8");
8f924df7 9599 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9600
df7492f9 9601 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9602 DEFSYM (Qbig, "big");
9603 DEFSYM (Qlittle, "little");
27901516 9604
df7492f9
KH
9605 DEFSYM (Qshift_jis, "shift-jis");
9606 DEFSYM (Qbig5, "big5");
4ed46869 9607
df7492f9 9608 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9609
df7492f9 9610 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9611 Fput (Qcoding_system_error, Qerror_conditions,
9612 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9613 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9614 build_string ("Invalid coding system"));
4ed46869 9615
05e6f5dc
KH
9616 /* Intern this now in case it isn't already done.
9617 Setting this variable twice is harmless.
9618 But don't staticpro it here--that is done in alloc.c. */
9619 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9620
df7492f9 9621 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9622 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9623 DEFSYM (Qtranslation_table_id, "translation-table-id");
9624 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9625 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9626
df7492f9 9627 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9628
df7492f9 9629 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9630
01378f49 9631 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9632 DEFSYM (QCmnemonic, ":mnemonic");
9633 DEFSYM (QCdefalut_char, ":default-char");
9634 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9635 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9636 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9637 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 9638 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 9639
df7492f9
KH
9640 Vcoding_category_table
9641 = Fmake_vector (make_number (coding_category_max), Qnil);
9642 staticpro (&Vcoding_category_table);
9643 /* Followings are target of code detection. */
9644 ASET (Vcoding_category_table, coding_category_iso_7,
9645 intern ("coding-category-iso-7"));
9646 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9647 intern ("coding-category-iso-7-tight"));
9648 ASET (Vcoding_category_table, coding_category_iso_8_1,
9649 intern ("coding-category-iso-8-1"));
9650 ASET (Vcoding_category_table, coding_category_iso_8_2,
9651 intern ("coding-category-iso-8-2"));
9652 ASET (Vcoding_category_table, coding_category_iso_7_else,
9653 intern ("coding-category-iso-7-else"));
9654 ASET (Vcoding_category_table, coding_category_iso_8_else,
9655 intern ("coding-category-iso-8-else"));
9656 ASET (Vcoding_category_table, coding_category_utf_8,
9657 intern ("coding-category-utf-8"));
9658 ASET (Vcoding_category_table, coding_category_utf_16_be,
9659 intern ("coding-category-utf-16-be"));
ff563fce
KH
9660 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9661 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9662 ASET (Vcoding_category_table, coding_category_utf_16_le,
9663 intern ("coding-category-utf-16-le"));
9664 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9665 intern ("coding-category-utf-16-be-nosig"));
9666 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9667 intern ("coding-category-utf-16-le-nosig"));
9668 ASET (Vcoding_category_table, coding_category_charset,
9669 intern ("coding-category-charset"));
9670 ASET (Vcoding_category_table, coding_category_sjis,
9671 intern ("coding-category-sjis"));
9672 ASET (Vcoding_category_table, coding_category_big5,
9673 intern ("coding-category-big5"));
9674 ASET (Vcoding_category_table, coding_category_ccl,
9675 intern ("coding-category-ccl"));
9676 ASET (Vcoding_category_table, coding_category_emacs_mule,
9677 intern ("coding-category-emacs-mule"));
9678 /* Followings are NOT target of code detection. */
9679 ASET (Vcoding_category_table, coding_category_raw_text,
9680 intern ("coding-category-raw-text"));
9681 ASET (Vcoding_category_table, coding_category_undecided,
9682 intern ("coding-category-undecided"));
ecf488bc 9683
065e3595
KH
9684 DEFSYM (Qinsufficient_source, "insufficient-source");
9685 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9686 DEFSYM (Qinvalid_source, "invalid-source");
9687 DEFSYM (Qinterrupted, "interrupted");
9688 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 9689 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 9690
4ed46869
KH
9691 defsubr (&Scoding_system_p);
9692 defsubr (&Sread_coding_system);
9693 defsubr (&Sread_non_nil_coding_system);
9694 defsubr (&Scheck_coding_system);
9695 defsubr (&Sdetect_coding_region);
d46c5b12 9696 defsubr (&Sdetect_coding_string);
05e6f5dc 9697 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9698 defsubr (&Sunencodable_char_position);
df7492f9 9699 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9700 defsubr (&Sdecode_coding_region);
9701 defsubr (&Sencode_coding_region);
9702 defsubr (&Sdecode_coding_string);
9703 defsubr (&Sencode_coding_string);
9704 defsubr (&Sdecode_sjis_char);
9705 defsubr (&Sencode_sjis_char);
9706 defsubr (&Sdecode_big5_char);
9707 defsubr (&Sencode_big5_char);
1ba9e4ab 9708 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9709 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9710 defsubr (&Sterminal_coding_system);
1ba9e4ab 9711 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9712 defsubr (&Skeyboard_coding_system);
a5d301df 9713 defsubr (&Sfind_operation_coding_system);
df7492f9 9714 defsubr (&Sset_coding_system_priority);
6b89e3aa 9715 defsubr (&Sdefine_coding_system_internal);
df7492f9 9716 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9717 defsubr (&Scoding_system_put);
df7492f9
KH
9718 defsubr (&Scoding_system_base);
9719 defsubr (&Scoding_system_plist);
9720 defsubr (&Scoding_system_aliases);
9721 defsubr (&Scoding_system_eol_type);
9722 defsubr (&Scoding_system_priority_list);
4ed46869 9723
4608c386 9724 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9725 doc: /* List of coding systems.
9726
9727Do not alter the value of this variable manually. This variable should be
df7492f9 9728updated by the functions `define-coding-system' and
48b0f3ae 9729`define-coding-system-alias'. */);
4608c386
KH
9730 Vcoding_system_list = Qnil;
9731
9732 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9733 doc: /* Alist of coding system names.
9734Each element is one element list of coding system name.
9735This variable is given to `completing-read' as TABLE argument.
9736
9737Do not alter the value of this variable manually. This variable should be
9738updated by the functions `make-coding-system' and
9739`define-coding-system-alias'. */);
4608c386
KH
9740 Vcoding_system_alist = Qnil;
9741
4ed46869 9742 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9743 doc: /* List of coding-categories (symbols) ordered by priority.
9744
9745On detecting a coding system, Emacs tries code detection algorithms
9746associated with each coding-category one by one in this order. When
9747one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
9748system bound to the corresponding coding-category is selected.
9749
42205607 9750Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
9751 {
9752 int i;
9753
9754 Vcoding_category_list = Qnil;
df7492f9 9755 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9756 Vcoding_category_list
d46c5b12
KH
9757 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9758 Vcoding_category_list);
4ed46869
KH
9759 }
9760
9761 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9762 doc: /* Specify the coding system for read operations.
9763It is useful to bind this variable with `let', but do not set it globally.
9764If the value is a coding system, it is used for decoding on read operation.
9765If not, an appropriate element is used from one of the coding system alists:
9766There are three such tables, `file-coding-system-alist',
9767`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9768 Vcoding_system_for_read = Qnil;
9769
9770 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9771 doc: /* Specify the coding system for write operations.
9772Programs bind this variable with `let', but you should not set it globally.
9773If the value is a coding system, it is used for encoding of output,
9774when writing it to a file and when sending it to a file or subprocess.
9775
9776If this does not specify a coding system, an appropriate element
9777is used from one of the coding system alists:
9778There are three such tables, `file-coding-system-alist',
9779`process-coding-system-alist', and `network-coding-system-alist'.
9780For output to files, if the above procedure does not specify a coding system,
9781the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9782 Vcoding_system_for_write = Qnil;
9783
9784 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9785 doc: /*
9786Coding system used in the latest file or process I/O. */);
4ed46869
KH
9787 Vlast_coding_system_used = Qnil;
9788
065e3595
KH
9789 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9790 doc: /*
9791Error status of the last code conversion.
9792
9793When an error was detected in the last code conversion, this variable
9794is set to one of the following symbols.
9795 `insufficient-source'
9796 `inconsistent-eol'
9797 `invalid-source'
9798 `interrupted'
9799 `insufficient-memory'
9800When no error was detected, the value doesn't change. So, to check
9801the error status of a code conversion by this variable, you must
9802explicitly set this variable to nil before performing code
9803conversion. */);
9804 Vlast_code_conversion_error = Qnil;
9805
9ce27fde 9806 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9807 doc: /*
9808*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9809See info node `Coding Systems' and info node `Text and Binary' concerning
9810such conversion. */);
9ce27fde
KH
9811 inhibit_eol_conversion = 0;
9812
ed29121d 9813 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9814 doc: /*
9815Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9816Bind it to t if the process output is to be treated as if it were a file
9817read from some filesystem. */);
ed29121d
EZ
9818 inherit_process_coding_system = 0;
9819
02ba4723 9820 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9821 doc: /*
9822Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9823The format is ((PATTERN . VAL) ...),
9824where PATTERN is a regular expression matching a file name,
9825VAL is a coding system, a cons of coding systems, or a function symbol.
9826If VAL is a coding system, it is used for both decoding and encoding
9827the file contents.
9828If VAL is a cons of coding systems, the car part is used for decoding,
9829and the cdr part is used for encoding.
9830If VAL is a function symbol, the function must return a coding system
2c53e699
KH
9831or a cons of coding systems which are used as above. The function is
9832called with an argument that is a list of the arguments with which
5a0bbd9a
KH
9833`find-operation-coding-system' was called. If the function can't decide
9834a coding system, it can return `undecided' so that the normal
9835code-detection is performed.
48b0f3ae
PJ
9836
9837See also the function `find-operation-coding-system'
9838and the variable `auto-coding-alist'. */);
02ba4723
KH
9839 Vfile_coding_system_alist = Qnil;
9840
9841 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9842 doc: /*
9843Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9844The format is ((PATTERN . VAL) ...),
9845where PATTERN is a regular expression matching a program name,
9846VAL is a coding system, a cons of coding systems, or a function symbol.
9847If VAL is a coding system, it is used for both decoding what received
9848from the program and encoding what sent to the program.
9849If VAL is a cons of coding systems, the car part is used for decoding,
9850and the cdr part is used for encoding.
9851If VAL is a function symbol, the function must return a coding system
9852or a cons of coding systems which are used as above.
9853
9854See also the function `find-operation-coding-system'. */);
02ba4723
KH
9855 Vprocess_coding_system_alist = Qnil;
9856
9857 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9858 doc: /*
9859Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9860The format is ((PATTERN . VAL) ...),
9861where PATTERN is a regular expression matching a network service name
9862or is a port number to connect to,
9863VAL is a coding system, a cons of coding systems, or a function symbol.
9864If VAL is a coding system, it is used for both decoding what received
9865from the network stream and encoding what sent to the network stream.
9866If VAL is a cons of coding systems, the car part is used for decoding,
9867and the cdr part is used for encoding.
9868If VAL is a function symbol, the function must return a coding system
9869or a cons of coding systems which are used as above.
9870
9871See also the function `find-operation-coding-system'. */);
02ba4723 9872 Vnetwork_coding_system_alist = Qnil;
4ed46869 9873
68c45bf0 9874 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9875 doc: /* Coding system to use with system messages.
9876Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9877 Vlocale_coding_system = Qnil;
9878
005f0d35 9879 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9880 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9881 doc: /*
9882*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9883 eol_mnemonic_unix = build_string (":");
4ed46869 9884
7722baf9 9885 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9886 doc: /*
9887*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9888 eol_mnemonic_dos = build_string ("\\");
4ed46869 9889
7722baf9 9890 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9891 doc: /*
9892*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9893 eol_mnemonic_mac = build_string ("/");
4ed46869 9894
7722baf9 9895 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9896 doc: /*
9897*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9898 eol_mnemonic_undecided = build_string (":");
4ed46869 9899
84fbb8a0 9900 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9901 doc: /*
9902*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9903 Venable_character_translation = Qt;
bdd9fb48 9904
f967223b 9905 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9906 &Vstandard_translation_table_for_decode,
9907 doc: /* Table for translating characters while decoding. */);
f967223b 9908 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9909
f967223b 9910 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9911 &Vstandard_translation_table_for_encode,
9912 doc: /* Table for translating characters while encoding. */);
f967223b 9913 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9914
df7492f9 9915 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9916 doc: /* Alist of charsets vs revision numbers.
9917While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9918designate it with the escape sequence identifying revision (cdr part
9919of the element). */);
9920 Vcharset_revision_table = Qnil;
02ba4723
KH
9921
9922 DEFVAR_LISP ("default-process-coding-system",
9923 &Vdefault_process_coding_system,
48b0f3ae
PJ
9924 doc: /* Cons of coding systems used for process I/O by default.
9925The car part is used for decoding a process output,
9926the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9927 Vdefault_process_coding_system = Qnil;
c4825358 9928
3f003981 9929 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9930 doc: /*
9931Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9932This is a vector of length 256.
9933If Nth element is non-nil, the existence of code N in a file
9934\(or output of subprocess) doesn't prevent it to be detected as
9935a coding system of ISO 2022 variant which has a flag
9936`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9937or reading output of a subprocess.
9938Only 128th through 159th elements has a meaning. */);
3f003981 9939 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9940
9941 DEFVAR_LISP ("select-safe-coding-system-function",
9942 &Vselect_safe_coding_system_function,
df7492f9
KH
9943 doc: /*
9944Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9945
9946If set, this function is called to force a user to select a proper
9947coding system which can encode the text in the case that a default
fdecf907
GM
9948coding system used in each operation can't encode the text. The
9949function should take care that the buffer is not modified while
9950the coding system is being selected.
48b0f3ae
PJ
9951
9952The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9953 Vselect_safe_coding_system_function = Qnil;
9954
5d5bf4d8
KH
9955 DEFVAR_BOOL ("coding-system-require-warning",
9956 &coding_system_require_warning,
9957 doc: /* Internal use only.
6b89e3aa
KH
9958If non-nil, on writing a file, `select-safe-coding-system-function' is
9959called even if `coding-system-for-write' is non-nil. The command
9960`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9961 coding_system_require_warning = 0;
9962
9963
22ab2303 9964 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9965 &inhibit_iso_escape_detection,
df7492f9
KH
9966 doc: /*
9967If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9968
9969By default, on reading a file, Emacs tries to detect how the text is
9970encoded. This code detection is sensitive to escape sequences. If
9971the sequence is valid as ISO2022, the code is determined as one of
9972the ISO2022 encodings, and the file is decoded by the corresponding
9973coding system (e.g. `iso-2022-7bit').
9974
9975However, there may be a case that you want to read escape sequences in
9976a file as is. In such a case, you can set this variable to non-nil.
9977Then, as the code detection ignores any escape sequences, no file is
9978detected as encoded in some ISO2022 encoding. The result is that all
9979escape sequences become visible in a buffer.
9980
9981The default value is nil, and it is strongly recommended not to change
9982it. That is because many Emacs Lisp source files that contain
9983non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9984in Emacs's distribution, and they won't be decoded correctly on
9985reading if you suppress escape sequence detection.
9986
9987The other way to read escape sequences in a file without decoding is
9988to explicitly specify some coding system that doesn't use ISO2022's
9989escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9990 inhibit_iso_escape_detection = 0;
002fdb44
DL
9991
9992 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9993 doc: /* Char table for translating self-inserting characters.
9994This is applied to the result of input methods, not their input. See also
9995`keyboard-translate-table'. */);
002fdb44 9996 Vtranslation_table_for_input = Qnil;
8f924df7 9997
2c78b7e1
KH
9998 {
9999 Lisp_Object args[coding_arg_max];
8f924df7 10000 Lisp_Object plist[16];
2c78b7e1
KH
10001 int i;
10002
10003 for (i = 0; i < coding_arg_max; i++)
10004 args[i] = Qnil;
10005
10006 plist[0] = intern (":name");
10007 plist[1] = args[coding_arg_name] = Qno_conversion;
10008 plist[2] = intern (":mnemonic");
10009 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10010 plist[4] = intern (":coding-type");
10011 plist[5] = args[coding_arg_coding_type] = Qraw_text;
10012 plist[6] = intern (":ascii-compatible-p");
10013 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10014 plist[8] = intern (":default-char");
10015 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
10016 plist[10] = intern (":for-unibyte");
10017 plist[11] = args[coding_arg_for_unibyte] = Qt;
10018 plist[12] = intern (":docstring");
10019 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
10020\n\
10021When you visit a file with this coding, the file is read into a\n\
10022unibyte buffer as is, thus each byte of a file is treated as a\n\
10023character.");
8f924df7
KH
10024 plist[14] = intern (":eol-type");
10025 plist[15] = args[coding_arg_eol_type] = Qunix;
10026 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10027 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10028
10029 plist[1] = args[coding_arg_name] = Qundecided;
10030 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10031 plist[5] = args[coding_arg_coding_type] = Qundecided;
10032 /* This is already set.
35befdaa 10033 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
10034 plist[8] = intern (":charset-list");
10035 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10036 plist[11] = args[coding_arg_for_unibyte] = Qnil;
10037 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10038 plist[15] = args[coding_arg_eol_type] = Qnil;
10039 args[coding_arg_plist] = Flist (16, plist);
10040 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10041 }
10042
2c78b7e1 10043 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10044
10045 {
10046 int i;
10047
10048 for (i = 0; i < coding_category_max; i++)
10049 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10050 }
fcbcfb64
KH
10051#if defined (MSDOS) || defined (WINDOWSNT)
10052 system_eol_type = Qdos;
10053#else
10054 system_eol_type = Qunix;
10055#endif
10056 staticpro (&system_eol_type);
4ed46869
KH
10057}
10058
68c45bf0
PE
10059char *
10060emacs_strerror (error_number)
10061 int error_number;
10062{
10063 char *str;
10064
ca9c0567 10065 synchronize_system_messages_locale ();
68c45bf0
PE
10066 str = strerror (error_number);
10067
10068 if (! NILP (Vlocale_coding_system))
10069 {
10070 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10071 Vlocale_coding_system,
10072 0);
d5db4077 10073 str = (char *) SDATA (dec);
68c45bf0
PE
10074 }
10075
10076 return str;
10077}
10078
4ed46869 10079#endif /* emacs */
9ffd559c
KH
10080
10081/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10082 (do not change this comment) */