(CATEGORY_MASK_ANY): Add CATEGORY_MASK_UTF_16_AUTO.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
8cabe764 3 2006, 2007, 2008 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
8cabe764 5 2005, 2006, 2007, 2008
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
14GNU Emacs is free software; you can redistribute it and/or modify
15it under the terms of the GNU General Public License as published by
1427aa65 16the Free Software Foundation; either version 3, or (at your option)
369314dc 17any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc
KH
24You should have received a copy of the GNU General Public License
25along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
26the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA. */
4ed46869
KH
28
29/*** TABLE OF CONTENTS ***
30
b73bfc1c 31 0. General comments
4ed46869 32 1. Preamble
df7492f9
KH
33 2. Emacs' internal format (emacs-utf-8) handlers
34 3. UTF-8 handlers
35 4. UTF-16 handlers
36 5. Charset-base coding systems handlers
37 6. emacs-mule (old Emacs' internal format) handlers
38 7. ISO2022 handlers
39 8. Shift-JIS and BIG5 handlers
40 9. CCL handlers
41 10. C library functions
42 11. Emacs Lisp library functions
43 12. Postamble
4ed46869
KH
44
45*/
46
df7492f9 47/*** 0. General comments ***
b73bfc1c
KH
48
49
df7492f9 50CODING SYSTEM
4ed46869 51
5bad0796
DL
52 A coding system is an object for an encoding mechanism that contains
53 information about how to convert byte sequences to character
e19c3639
KH
54 sequences and vice versa. When we say "decode", it means converting
55 a byte sequence of a specific coding system into a character
56 sequence that is represented by Emacs' internal coding system
57 `emacs-utf-8', and when we say "encode", it means converting a
58 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 59 coding system.
4ed46869 60
e19c3639
KH
61 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
62 C level, a coding system is represented by a vector of attributes
5bad0796 63 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
64 coding system symbol to attributes vector is done by looking up
65 Vcharset_hash_table by the symbol.
4ed46869 66
e19c3639 67 Coding systems are classified into the following types depending on
5bad0796 68 the encoding mechanism. Here's a brief description of the types.
4ed46869 69
df7492f9
KH
70 o UTF-8
71
72 o UTF-16
73
74 o Charset-base coding system
75
76 A coding system defined by one or more (coded) character sets.
5bad0796 77 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
78 character set.
79
5bad0796 80 o Old Emacs internal format (emacs-mule)
df7492f9 81
5bad0796 82 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 83
df7492f9 84 o ISO2022-base coding system
4ed46869
KH
85
86 The most famous coding system for multiple character sets. X's
df7492f9
KH
87 Compound Text, various EUCs (Extended Unix Code), and coding systems
88 used in the Internet communication such as ISO-2022-JP are all
89 variants of ISO2022.
4ed46869 90
df7492f9 91 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 92
4ed46869
KH
93 A coding system to encode character sets: ASCII, JISX0201, and
94 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 95 section 8.
4ed46869 96
df7492f9 97 o BIG5
4ed46869 98
df7492f9 99 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 100 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
101 described in section 8. In this file, when we write "big5" (all
102 lowercase), we mean the coding system, and when we write "Big5"
103 (capitalized), we mean the character set.
4ed46869 104
df7492f9 105 o CCL
27901516 106
5bad0796 107 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
108 not listed above, he can supply a decoder and an encoder for it in
109 CCL (Code Conversion Language) programs. Emacs executes the CCL
110 program while decoding/encoding.
27901516 111
df7492f9 112 o Raw-text
4ed46869 113
5a936b46 114 A coding system for text containing raw eight-bit data. Emacs
5bad0796 115 treats each byte of source text as a character (except for
df7492f9 116 end-of-line conversion).
4ed46869 117
df7492f9
KH
118 o No-conversion
119
120 Like raw text, but don't do end-of-line conversion.
4ed46869 121
4ed46869 122
df7492f9 123END-OF-LINE FORMAT
4ed46869 124
5bad0796 125 How text end-of-line is encoded depends on operating system. For
df7492f9 126 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 127 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
128 `line-feed' codes. MacOS's format is usually one byte of
129 `carriage-return'.
4ed46869 130
cfb43547 131 Since text character encoding and end-of-line encoding are
df7492f9
KH
132 independent, any coding system described above can take any format
133 of end-of-line (except for no-conversion).
4ed46869 134
e19c3639
KH
135STRUCT CODING_SYSTEM
136
137 Before using a coding system for code conversion (i.e. decoding and
138 encoding), we setup a structure of type `struct coding_system'.
139 This structure keeps various information about a specific code
5bad0796 140 conversion (e.g. the location of source and destination data).
4ed46869
KH
141
142*/
143
df7492f9
KH
144/* COMMON MACROS */
145
146
4ed46869
KH
147/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
148
df7492f9 149 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
150 CODING conforms to the format of XXX, and update the members of
151 DETECT_INFO.
df7492f9 152
ff0dacd7 153 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9 158static int
ff0dacd7 159detect_coding_XXX (coding, detect_info)
df7492f9 160 struct coding_system *coding;
ff0dacd7 161 struct coding_detection_info *detect_info;
4ed46869 162{
f1d34bca
MB
163 const unsigned char *src = coding->source;
164 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 165 int multibytep = coding->src_multibyte;
ff0dacd7 166 int consumed_chars = 0;
df7492f9
KH
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
ff0dacd7
KH
175
176 if (! __C_conforms_to_XXX___ (c))
177 break;
178 if (! __C_strongly_suggests_XXX__ (c))
179 found = CATEGORY_MASK_XXX;
df7492f9 180 }
ff0dacd7
KH
181 /* The byte sequence is invalid for XXX. */
182 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 183 return 0;
ff0dacd7 184
df7492f9 185 no_more_source:
ff0dacd7
KH
186 /* The source exausted successfully. */
187 detect_info->found |= found;
df7492f9 188 return 1;
4ed46869
KH
189}
190#endif
191
192/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193
df7492f9
KH
194 These functions decode a byte sequence specified as a source by
195 CODING. The resulting multibyte text goes to a place pointed to by
196 CODING->charbuf, the length of which should not exceed
197 CODING->charbuf_size;
d46c5b12 198
df7492f9
KH
199 These functions set the information of original and decoded texts in
200 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
201 They also set CODING->result to one of CODING_RESULT_XXX indicating
202 how the decoding is finished.
d46c5b12 203
df7492f9 204 Below is the template of these functions. */
d46c5b12 205
4ed46869 206#if 0
b73bfc1c 207static void
df7492f9 208decode_coding_XXXX (coding)
4ed46869 209 struct coding_system *coding;
4ed46869 210{
f1d34bca
MB
211 const unsigned char *src = coding->source + coding->consumed;
212 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
213 /* SRC_BASE remembers the start position in source in each loop.
214 The loop will be exited when there's not enough source code, or
215 when there's no room in CHARBUF for a decoded character. */
f1d34bca 216 const unsigned char *src_base;
df7492f9 217 /* A buffer to produce decoded characters. */
69a80ea3
KH
218 int *charbuf = coding->charbuf + coding->charbuf_used;
219 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
220 int multibytep = coding->src_multibyte;
221
222 while (1)
223 {
224 src_base = src;
225 if (charbuf < charbuf_end)
226 /* No more room to produce a decoded character. */
227 break;
228 ONE_MORE_BYTE (c);
229 /* Decode it. */
230 }
231
232 no_more_source:
233 if (src_base < src_end
234 && coding->mode & CODING_MODE_LAST_BLOCK)
235 /* If the source ends by partial bytes to construct a character,
236 treat them as eight-bit raw data. */
237 while (src_base < src_end && charbuf < charbuf_end)
238 *charbuf++ = *src_base++;
239 /* Remember how many bytes and characters we consumed. If the
240 source is multibyte, the bytes and chars are not identical. */
241 coding->consumed = coding->consumed_char = src_base - coding->source;
242 /* Remember how many characters we produced. */
243 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
244}
245#endif
246
247/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248
df7492f9
KH
249 These functions encode SRC_BYTES length text at SOURCE of Emacs'
250 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
251 goes to a place pointed to by DESTINATION, the length of which
252 should not exceed DST_BYTES.
d46c5b12 253
df7492f9
KH
254 These functions set the information of original and encoded texts in
255 the members produced, produced_char, consumed, and consumed_char of
256 the structure *CODING. They also set the member result to one of
257 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 258
df7492f9
KH
259 DST_BYTES zero means that source area and destination area are
260 overlapped, which means that we can produce a encoded text until it
261 reaches at the head of not-yet-encoded source text.
d46c5b12 262
df7492f9 263 Below is a template of these functions. */
4ed46869 264#if 0
b73bfc1c 265static void
df7492f9 266encode_coding_XXX (coding)
4ed46869 267 struct coding_system *coding;
4ed46869 268{
df7492f9
KH
269 int multibytep = coding->dst_multibyte;
270 int *charbuf = coding->charbuf;
271 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
272 unsigned char *dst = coding->destination + coding->produced;
273 unsigned char *dst_end = coding->destination + coding->dst_bytes;
274 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
275 int produced_chars = 0;
276
277 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
278 {
279 int c = *charbuf;
280 /* Encode C into DST, and increment DST. */
281 }
282 label_no_more_destination:
283 /* How many chars and bytes we produced. */
284 coding->produced_char += produced_chars;
285 coding->produced = dst - coding->destination;
4ed46869
KH
286}
287#endif
288
4ed46869
KH
289\f
290/*** 1. Preamble ***/
291
68c45bf0 292#include <config.h>
4ed46869
KH
293#include <stdio.h>
294
4ed46869
KH
295#include "lisp.h"
296#include "buffer.h"
df7492f9 297#include "character.h"
4ed46869
KH
298#include "charset.h"
299#include "ccl.h"
df7492f9 300#include "composite.h"
4ed46869
KH
301#include "coding.h"
302#include "window.h"
b8299c66
KL
303#include "frame.h"
304#include "termhooks.h"
4ed46869 305
df7492f9 306Lisp_Object Vcoding_system_hash_table;
4ed46869 307
df7492f9 308Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
309Lisp_Object Qunix, Qdos;
310extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
311Lisp_Object Qbuffer_file_coding_system;
312Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 313Lisp_Object Qdefault_char;
27901516 314Lisp_Object Qno_conversion, Qundecided;
df7492f9 315Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 316Lisp_Object Qbig, Qlittle;
bb0115a2 317Lisp_Object Qcoding_system_history;
1397dc18 318Lisp_Object Qvalid_codes;
a6f87d34
KH
319Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
320Lisp_Object QCdecode_translation_table, QCencode_translation_table;
321Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 322Lisp_Object QCascii_compatible_p;
4ed46869
KH
323
324extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 325Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
326Lisp_Object Qstart_process, Qopen_network_stream;
327Lisp_Object Qtarget_idx;
328
065e3595
KH
329Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
330Lisp_Object Qinterrupted, Qinsufficient_memory;
331
c7183fb8
GM
332extern Lisp_Object Qcompletion_ignore_case;
333
44e8490d
KH
334/* If a symbol has this property, evaluate the value to define the
335 symbol as a coding system. */
336static Lisp_Object Qcoding_system_define_form;
337
5d5bf4d8
KH
338int coding_system_require_warning;
339
d46c5b12
KH
340Lisp_Object Vselect_safe_coding_system_function;
341
7722baf9
EZ
342/* Mnemonic string for each format of end-of-line. */
343Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
344/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 345 decided. */
7722baf9 346Lisp_Object eol_mnemonic_undecided;
4ed46869 347
fcbcfb64
KH
348/* Format of end-of-line decided by system. This is Qunix on
349 Unix and Mac, Qdos on DOS/Windows.
350 This has an effect only for external encoding (i.e. for output to
351 file and process), not for in-buffer or Lisp string encoding. */
352static Lisp_Object system_eol_type;
353
4ed46869
KH
354#ifdef emacs
355
4608c386
KH
356Lisp_Object Vcoding_system_list, Vcoding_system_alist;
357
358Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 359
d46c5b12
KH
360/* Coding system emacs-mule and raw-text are for converting only
361 end-of-line format. */
362Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 363Lisp_Object Qutf_8_emacs;
ecf488bc 364
4ed46869
KH
365/* Coding-systems are handed between Emacs Lisp programs and C internal
366 routines by the following three variables. */
367/* Coding-system for reading files and receiving data from process. */
368Lisp_Object Vcoding_system_for_read;
369/* Coding-system for writing files and sending data to process. */
370Lisp_Object Vcoding_system_for_write;
371/* Coding-system actually used in the latest I/O. */
372Lisp_Object Vlast_coding_system_used;
065e3595
KH
373/* Set to non-nil when an error is detected while code conversion. */
374Lisp_Object Vlast_code_conversion_error;
c4825358 375/* A vector of length 256 which contains information about special
94487c4e 376 Latin codes (especially for dealing with Microsoft codes). */
3f003981 377Lisp_Object Vlatin_extra_code_table;
c4825358 378
9ce27fde
KH
379/* Flag to inhibit code conversion of end-of-line format. */
380int inhibit_eol_conversion;
381
74383408
KH
382/* Flag to inhibit ISO2022 escape sequence detection. */
383int inhibit_iso_escape_detection;
384
ed29121d
EZ
385/* Flag to make buffer-file-coding-system inherit from process-coding. */
386int inherit_process_coding_system;
387
c4825358
KH
388/* Coding system to be used to encode text for terminal display when
389 terminal coding system is nil. */
390struct coding_system safe_terminal_coding;
391
02ba4723
KH
392Lisp_Object Vfile_coding_system_alist;
393Lisp_Object Vprocess_coding_system_alist;
394Lisp_Object Vnetwork_coding_system_alist;
4ed46869 395
68c45bf0
PE
396Lisp_Object Vlocale_coding_system;
397
4ed46869
KH
398#endif /* emacs */
399
f967223b
KH
400/* Flag to tell if we look up translation table on character code
401 conversion. */
84fbb8a0 402Lisp_Object Venable_character_translation;
f967223b
KH
403/* Standard translation table to look up on decoding (reading). */
404Lisp_Object Vstandard_translation_table_for_decode;
405/* Standard translation table to look up on encoding (writing). */
406Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 407
f967223b
KH
408Lisp_Object Qtranslation_table;
409Lisp_Object Qtranslation_table_id;
410Lisp_Object Qtranslation_table_for_decode;
411Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
412
413/* Alist of charsets vs revision number. */
df7492f9 414static Lisp_Object Vcharset_revision_table;
4ed46869 415
02ba4723
KH
416/* Default coding systems used for process I/O. */
417Lisp_Object Vdefault_process_coding_system;
418
002fdb44
DL
419/* Char table for translating Quail and self-inserting input. */
420Lisp_Object Vtranslation_table_for_input;
421
df7492f9
KH
422/* Two special coding systems. */
423Lisp_Object Vsjis_coding_system;
424Lisp_Object Vbig5_coding_system;
425
df7492f9
KH
426/* ISO2022 section */
427
428#define CODING_ISO_INITIAL(coding, reg) \
429 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
430 coding_attr_iso_initial), \
431 reg)))
432
433
434#define CODING_ISO_REQUEST(coding, charset_id) \
435 ((charset_id <= (coding)->max_charset_id \
436 ? (coding)->safe_charsets[charset_id] \
437 : -1))
438
439
440#define CODING_ISO_FLAGS(coding) \
441 ((coding)->spec.iso_2022.flags)
442#define CODING_ISO_DESIGNATION(coding, reg) \
443 ((coding)->spec.iso_2022.current_designation[reg])
444#define CODING_ISO_INVOCATION(coding, plane) \
445 ((coding)->spec.iso_2022.current_invocation[plane])
446#define CODING_ISO_SINGLE_SHIFTING(coding) \
447 ((coding)->spec.iso_2022.single_shifting)
448#define CODING_ISO_BOL(coding) \
449 ((coding)->spec.iso_2022.bol)
450#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
451 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
452
453/* Control characters of ISO2022. */
454 /* code */ /* function */
455#define ISO_CODE_LF 0x0A /* line-feed */
456#define ISO_CODE_CR 0x0D /* carriage-return */
457#define ISO_CODE_SO 0x0E /* shift-out */
458#define ISO_CODE_SI 0x0F /* shift-in */
459#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
460#define ISO_CODE_ESC 0x1B /* escape */
461#define ISO_CODE_SS2 0x8E /* single-shift-2 */
462#define ISO_CODE_SS3 0x8F /* single-shift-3 */
463#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
464
465/* All code (1-byte) of ISO2022 is classified into one of the
466 followings. */
467enum iso_code_class_type
468 {
469 ISO_control_0, /* Control codes in the range
470 0x00..0x1F and 0x7F, except for the
471 following 5 codes. */
df7492f9
KH
472 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
473 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
474 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
475 ISO_escape, /* ISO_CODE_SO (0x1B) */
476 ISO_control_1, /* Control codes in the range
477 0x80..0x9F, except for the
478 following 3 codes. */
479 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
480 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
481 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
482 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
483 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
484 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
485 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
486 };
05e6f5dc 487
df7492f9
KH
488/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
489 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 490
df7492f9
KH
491/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
492 instead of the correct short-form sequence (e.g. ESC $ A). */
493#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 494
df7492f9
KH
495/* If set, reset graphic planes and registers at end-of-line to the
496 initial state. */
497#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 498
df7492f9
KH
499/* If set, reset graphic planes and registers before any control
500 characters to the initial state. */
501#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 502
df7492f9
KH
503/* If set, encode by 7-bit environment. */
504#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 505
df7492f9
KH
506/* If set, use locking-shift function. */
507#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 508
df7492f9
KH
509/* If set, use single-shift function. Overwrite
510 CODING_ISO_FLAG_LOCKING_SHIFT. */
511#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 512
df7492f9
KH
513/* If set, use designation escape sequence. */
514#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 515
df7492f9
KH
516/* If set, produce revision number sequence. */
517#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 518
df7492f9
KH
519/* If set, produce ISO6429's direction specifying sequence. */
520#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 521
df7492f9
KH
522/* If set, assume designation states are reset at beginning of line on
523 output. */
524#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 525
df7492f9
KH
526/* If set, designation sequence should be placed at beginning of line
527 on output. */
528#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 529
df7492f9
KH
530/* If set, do not encode unsafe charactes on output. */
531#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 532
df7492f9
KH
533/* If set, extra latin codes (128..159) are accepted as a valid code
534 on input. */
535#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 536
df7492f9 537#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 538
df7492f9 539#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 540
bf16eb23 541#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 542
bf16eb23 543#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 544
bf16eb23 545#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 546
df7492f9
KH
547/* A character to be produced on output if encoding of the original
548 character is prohibited by CODING_ISO_FLAG_SAFE. */
549#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 550
4ed46869 551
df7492f9
KH
552/* UTF-16 section */
553#define CODING_UTF_16_BOM(coding) \
554 ((coding)->spec.utf_16.bom)
4ed46869 555
df7492f9
KH
556#define CODING_UTF_16_ENDIAN(coding) \
557 ((coding)->spec.utf_16.endian)
4ed46869 558
df7492f9
KH
559#define CODING_UTF_16_SURROGATE(coding) \
560 ((coding)->spec.utf_16.surrogate)
4ed46869 561
4ed46869 562
df7492f9
KH
563/* CCL section */
564#define CODING_CCL_DECODER(coding) \
565 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
566#define CODING_CCL_ENCODER(coding) \
567 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
568#define CODING_CCL_VALIDS(coding) \
8f924df7 569 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 570
5a936b46 571/* Index for each coding category in `coding_categories' */
4ed46869 572
df7492f9
KH
573enum coding_category
574 {
575 coding_category_iso_7,
576 coding_category_iso_7_tight,
577 coding_category_iso_8_1,
578 coding_category_iso_8_2,
579 coding_category_iso_7_else,
580 coding_category_iso_8_else,
581 coding_category_utf_8,
582 coding_category_utf_16_auto,
583 coding_category_utf_16_be,
584 coding_category_utf_16_le,
585 coding_category_utf_16_be_nosig,
586 coding_category_utf_16_le_nosig,
587 coding_category_charset,
588 coding_category_sjis,
589 coding_category_big5,
590 coding_category_ccl,
591 coding_category_emacs_mule,
592 /* All above are targets of code detection. */
593 coding_category_raw_text,
594 coding_category_undecided,
595 coding_category_max
596 };
597
598/* Definitions of flag bits used in detect_coding_XXXX. */
599#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
600#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
601#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
602#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
603#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
604#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
605#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 606#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
607#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
608#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
609#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
610#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
611#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
612#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
613#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
614#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
615#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 616#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
617
618/* This value is returned if detect_coding_mask () find nothing other
619 than ASCII characters. */
620#define CATEGORY_MASK_ANY \
621 (CATEGORY_MASK_ISO_7 \
622 | CATEGORY_MASK_ISO_7_TIGHT \
623 | CATEGORY_MASK_ISO_8_1 \
624 | CATEGORY_MASK_ISO_8_2 \
625 | CATEGORY_MASK_ISO_7_ELSE \
626 | CATEGORY_MASK_ISO_8_ELSE \
627 | CATEGORY_MASK_UTF_8 \
2f3cbb32 628 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
629 | CATEGORY_MASK_UTF_16_BE \
630 | CATEGORY_MASK_UTF_16_LE \
631 | CATEGORY_MASK_UTF_16_BE_NOSIG \
632 | CATEGORY_MASK_UTF_16_LE_NOSIG \
633 | CATEGORY_MASK_CHARSET \
634 | CATEGORY_MASK_SJIS \
635 | CATEGORY_MASK_BIG5 \
636 | CATEGORY_MASK_CCL \
637 | CATEGORY_MASK_EMACS_MULE)
638
639
640#define CATEGORY_MASK_ISO_7BIT \
641 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
642
643#define CATEGORY_MASK_ISO_8BIT \
644 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
645
646#define CATEGORY_MASK_ISO_ELSE \
647 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
648
649#define CATEGORY_MASK_ISO_ESCAPE \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_7_ELSE \
653 | CATEGORY_MASK_ISO_8_ELSE)
654
655#define CATEGORY_MASK_ISO \
656 ( CATEGORY_MASK_ISO_7BIT \
657 | CATEGORY_MASK_ISO_8BIT \
658 | CATEGORY_MASK_ISO_ELSE)
659
660#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
661 (CATEGORY_MASK_UTF_16_AUTO \
662 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
663 | CATEGORY_MASK_UTF_16_LE \
664 | CATEGORY_MASK_UTF_16_BE_NOSIG \
665 | CATEGORY_MASK_UTF_16_LE_NOSIG)
666
667
668/* List of symbols `coding-category-xxx' ordered by priority. This
669 variable is exposed to Emacs Lisp. */
670static Lisp_Object Vcoding_category_list;
671
672/* Table of coding categories (Lisp symbols). This variable is for
673 internal use oly. */
674static Lisp_Object Vcoding_category_table;
675
676/* Table of coding-categories ordered by priority. */
677static enum coding_category coding_priorities[coding_category_max];
678
679/* Nth element is a coding context for the coding system bound to the
680 Nth coding category. */
681static struct coding_system coding_categories[coding_category_max];
682
df7492f9
KH
683/*** Commonly used macros and functions ***/
684
685#ifndef min
686#define min(a, b) ((a) < (b) ? (a) : (b))
687#endif
688#ifndef max
689#define max(a, b) ((a) > (b) ? (a) : (b))
690#endif
4ed46869 691
24a73b0a
KH
692#define CODING_GET_INFO(coding, attrs, charset_list) \
693 do { \
694 (attrs) = CODING_ID_ATTRS ((coding)->id); \
695 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 696 } while (0)
4ed46869 697
4ed46869 698
df7492f9
KH
699/* Safely get one byte from the source text pointed by SRC which ends
700 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
701 in the source, it jumps to `no_more_source'. If multibytep is
702 nonzero, and a multibyte character is found at SRC, set C to the
703 negative value of the character code. The caller should declare
704 and set these variables appropriately in advance:
705 src, src_end, multibytep */
aa72b389 706
065e3595
KH
707#define ONE_MORE_BYTE(c) \
708 do { \
709 if (src == src_end) \
710 { \
711 if (src_base < src) \
712 record_conversion_result \
713 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
714 goto no_more_source; \
715 } \
716 c = *src++; \
717 if (multibytep && (c & 0x80)) \
718 { \
719 if ((c & 0xFE) == 0xC0) \
720 c = ((c & 1) << 6) | *src++; \
721 else \
722 { \
35befdaa
KH
723 src--; \
724 c = - string_char (src, &src, NULL); \
065e3595
KH
725 record_conversion_result \
726 (coding, CODING_RESULT_INVALID_SRC); \
727 } \
728 } \
729 consumed_chars++; \
aa72b389
KH
730 } while (0)
731
aa72b389 732
065e3595
KH
733#define ONE_MORE_BYTE_NO_CHECK(c) \
734 do { \
735 c = *src++; \
736 if (multibytep && (c & 0x80)) \
737 { \
738 if ((c & 0xFE) == 0xC0) \
739 c = ((c & 1) << 6) | *src++; \
740 else \
741 { \
35befdaa
KH
742 src--; \
743 c = - string_char (src, &src, NULL); \
065e3595
KH
744 record_conversion_result \
745 (coding, CODING_RESULT_INVALID_SRC); \
746 } \
747 } \
748 consumed_chars++; \
aa72b389
KH
749 } while (0)
750
aa72b389 751
df7492f9
KH
752/* Store a byte C in the place pointed by DST and increment DST to the
753 next free point, and increment PRODUCED_CHARS. The caller should
754 assure that C is 0..127, and declare and set the variable `dst'
755 appropriately in advance.
756*/
aa72b389
KH
757
758
df7492f9
KH
759#define EMIT_ONE_ASCII_BYTE(c) \
760 do { \
761 produced_chars++; \
762 *dst++ = (c); \
b6871cc7 763 } while (0)
aa72b389
KH
764
765
df7492f9 766/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 767
df7492f9
KH
768#define EMIT_TWO_ASCII_BYTES(c1, c2) \
769 do { \
770 produced_chars += 2; \
771 *dst++ = (c1), *dst++ = (c2); \
772 } while (0)
aa72b389
KH
773
774
df7492f9
KH
775/* Store a byte C in the place pointed by DST and increment DST to the
776 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
777 nonzero, store in an appropriate multibyte from. The caller should
778 declare and set the variables `dst' and `multibytep' appropriately
779 in advance. */
780
781#define EMIT_ONE_BYTE(c) \
782 do { \
783 produced_chars++; \
784 if (multibytep) \
785 { \
786 int ch = (c); \
787 if (ch >= 0x80) \
788 ch = BYTE8_TO_CHAR (ch); \
789 CHAR_STRING_ADVANCE (ch, dst); \
790 } \
791 else \
792 *dst++ = (c); \
aa72b389 793 } while (0)
aa72b389 794
aa72b389 795
df7492f9 796/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 797
e19c3639
KH
798#define EMIT_TWO_BYTES(c1, c2) \
799 do { \
800 produced_chars += 2; \
801 if (multibytep) \
802 { \
803 int ch; \
804 \
805 ch = (c1); \
806 if (ch >= 0x80) \
807 ch = BYTE8_TO_CHAR (ch); \
808 CHAR_STRING_ADVANCE (ch, dst); \
809 ch = (c2); \
810 if (ch >= 0x80) \
811 ch = BYTE8_TO_CHAR (ch); \
812 CHAR_STRING_ADVANCE (ch, dst); \
813 } \
814 else \
815 { \
816 *dst++ = (c1); \
817 *dst++ = (c2); \
818 } \
aa72b389
KH
819 } while (0)
820
821
df7492f9
KH
822#define EMIT_THREE_BYTES(c1, c2, c3) \
823 do { \
824 EMIT_ONE_BYTE (c1); \
825 EMIT_TWO_BYTES (c2, c3); \
826 } while (0)
aa72b389 827
aa72b389 828
df7492f9
KH
829#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
830 do { \
831 EMIT_TWO_BYTES (c1, c2); \
832 EMIT_TWO_BYTES (c3, c4); \
833 } while (0)
aa72b389 834
aa72b389 835
f6cbaf43
KH
836/* Prototypes for static functions. */
837static void record_conversion_result P_ ((struct coding_system *coding,
838 enum coding_result_code result));
839static int detect_coding_utf_8 P_ ((struct coding_system *,
840 struct coding_detection_info *info));
841static void decode_coding_utf_8 P_ ((struct coding_system *));
842static int encode_coding_utf_8 P_ ((struct coding_system *));
843
844static int detect_coding_utf_16 P_ ((struct coding_system *,
845 struct coding_detection_info *info));
846static void decode_coding_utf_16 P_ ((struct coding_system *));
847static int encode_coding_utf_16 P_ ((struct coding_system *));
848
849static int detect_coding_iso_2022 P_ ((struct coding_system *,
850 struct coding_detection_info *info));
851static void decode_coding_iso_2022 P_ ((struct coding_system *));
852static int encode_coding_iso_2022 P_ ((struct coding_system *));
853
854static int detect_coding_emacs_mule P_ ((struct coding_system *,
855 struct coding_detection_info *info));
856static void decode_coding_emacs_mule P_ ((struct coding_system *));
857static int encode_coding_emacs_mule P_ ((struct coding_system *));
858
859static int detect_coding_sjis P_ ((struct coding_system *,
860 struct coding_detection_info *info));
861static void decode_coding_sjis P_ ((struct coding_system *));
862static int encode_coding_sjis P_ ((struct coding_system *));
863
864static int detect_coding_big5 P_ ((struct coding_system *,
865 struct coding_detection_info *info));
866static void decode_coding_big5 P_ ((struct coding_system *));
867static int encode_coding_big5 P_ ((struct coding_system *));
868
869static int detect_coding_ccl P_ ((struct coding_system *,
870 struct coding_detection_info *info));
871static void decode_coding_ccl P_ ((struct coding_system *));
872static int encode_coding_ccl P_ ((struct coding_system *));
873
874static void decode_coding_raw_text P_ ((struct coding_system *));
875static int encode_coding_raw_text P_ ((struct coding_system *));
876
877static void coding_set_source P_ ((struct coding_system *));
878static void coding_set_destination P_ ((struct coding_system *));
879static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
880static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 881 EMACS_INT, EMACS_INT));
f6cbaf43
KH
882static unsigned char *alloc_destination P_ ((struct coding_system *,
883 EMACS_INT, unsigned char *));
884static void setup_iso_safe_charsets P_ ((Lisp_Object));
885static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
886 int *, int *,
887 unsigned char *));
888static int detect_eol P_ ((const unsigned char *,
889 EMACS_INT, enum coding_category));
890static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
891static void decode_eol P_ ((struct coding_system *));
892static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
893static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
894 int, int *, int *));
895static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
896static INLINE void produce_composition P_ ((struct coding_system *, int *,
897 EMACS_INT));
898static INLINE void produce_charset P_ ((struct coding_system *, int *,
899 EMACS_INT));
900static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
901static int decode_coding P_ ((struct coding_system *));
902static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
903 struct coding_system *,
904 int *, EMACS_INT *));
905static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
906 struct coding_system *,
907 int *, EMACS_INT *));
908static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
909static int encode_coding P_ ((struct coding_system *));
910static Lisp_Object make_conversion_work_buffer P_ ((int));
911static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
912static INLINE int char_encodable_p P_ ((int, Lisp_Object));
913static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
914
065e3595
KH
915static void
916record_conversion_result (struct coding_system *coding,
917 enum coding_result_code result)
918{
919 coding->result = result;
920 switch (result)
921 {
922 case CODING_RESULT_INSUFFICIENT_SRC:
923 Vlast_code_conversion_error = Qinsufficient_source;
924 break;
925 case CODING_RESULT_INCONSISTENT_EOL:
926 Vlast_code_conversion_error = Qinconsistent_eol;
927 break;
928 case CODING_RESULT_INVALID_SRC:
929 Vlast_code_conversion_error = Qinvalid_source;
930 break;
931 case CODING_RESULT_INTERRUPT:
932 Vlast_code_conversion_error = Qinterrupted;
933 break;
934 case CODING_RESULT_INSUFFICIENT_MEM:
935 Vlast_code_conversion_error = Qinsufficient_memory;
936 break;
35befdaa
KH
937 default:
938 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
939 }
940}
941
df7492f9
KH
942#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
943 do { \
944 charset_map_loaded = 0; \
945 c = DECODE_CHAR (charset, code); \
946 if (charset_map_loaded) \
947 { \
8f924df7 948 const unsigned char *orig = coding->source; \
df7492f9
KH
949 EMACS_INT offset; \
950 \
951 coding_set_source (coding); \
952 offset = coding->source - orig; \
953 src += offset; \
954 src_base += offset; \
955 src_end += offset; \
956 } \
aa72b389
KH
957 } while (0)
958
959
119852e7
KH
960/* If there are at least BYTES length of room at dst, allocate memory
961 for coding->destination and update dst and dst_end. We don't have
962 to take care of coding->source which will be relocated. It is
963 handled by calling coding_set_source in encode_coding. */
964
df7492f9
KH
965#define ASSURE_DESTINATION(bytes) \
966 do { \
967 if (dst + (bytes) >= dst_end) \
968 { \
969 int more_bytes = charbuf_end - charbuf + (bytes); \
970 \
971 dst = alloc_destination (coding, more_bytes, dst); \
972 dst_end = coding->destination + coding->dst_bytes; \
973 } \
974 } while (0)
aa72b389 975
aa72b389 976
db274c7a
KH
977/* Store multibyte form of the character C in P, and advance P to the
978 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
979 never calls MAYBE_UNIFY_CHAR. */
980
981#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
982 do { \
983 if ((c) <= MAX_1_BYTE_CHAR) \
984 *(p)++ = (c); \
985 else if ((c) <= MAX_2_BYTE_CHAR) \
986 *(p)++ = (0xC0 | ((c) >> 6)), \
987 *(p)++ = (0x80 | ((c) & 0x3F)); \
988 else if ((c) <= MAX_3_BYTE_CHAR) \
989 *(p)++ = (0xE0 | ((c) >> 12)), \
990 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
991 *(p)++ = (0x80 | ((c) & 0x3F)); \
992 else if ((c) <= MAX_4_BYTE_CHAR) \
993 *(p)++ = (0xF0 | (c >> 18)), \
994 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
995 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
996 *(p)++ = (0x80 | (c & 0x3F)); \
997 else if ((c) <= MAX_5_BYTE_CHAR) \
998 *(p)++ = 0xF8, \
999 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1000 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1001 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1002 *(p)++ = (0x80 | (c & 0x3F)); \
1003 else \
1004 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1005 } while (0)
1006
1007
1008/* Return the character code of character whose multibyte form is at
1009 P, and advance P to the end of the multibyte form. This is like
1010 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1011
1012#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1013 (!((p)[0] & 0x80) \
1014 ? *(p)++ \
1015 : ! ((p)[0] & 0x20) \
1016 ? ((p) += 2, \
1017 ((((p)[-2] & 0x1F) << 6) \
1018 | ((p)[-1] & 0x3F) \
1019 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1020 : ! ((p)[0] & 0x10) \
1021 ? ((p) += 3, \
1022 ((((p)[-3] & 0x0F) << 12) \
1023 | (((p)[-2] & 0x3F) << 6) \
1024 | ((p)[-1] & 0x3F))) \
1025 : ! ((p)[0] & 0x08) \
1026 ? ((p) += 4, \
1027 ((((p)[-4] & 0xF) << 18) \
1028 | (((p)[-3] & 0x3F) << 12) \
1029 | (((p)[-2] & 0x3F) << 6) \
1030 | ((p)[-1] & 0x3F))) \
1031 : ((p) += 5, \
1032 ((((p)[-4] & 0x3F) << 18) \
1033 | (((p)[-3] & 0x3F) << 12) \
1034 | (((p)[-2] & 0x3F) << 6) \
1035 | ((p)[-1] & 0x3F))))
1036
aa72b389 1037
df7492f9
KH
1038static void
1039coding_set_source (coding)
aa72b389 1040 struct coding_system *coding;
aa72b389 1041{
df7492f9
KH
1042 if (BUFFERP (coding->src_object))
1043 {
2cb26057 1044 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1045
df7492f9 1046 if (coding->src_pos < 0)
2cb26057 1047 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1048 else
2cb26057 1049 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1050 }
df7492f9 1051 else if (STRINGP (coding->src_object))
aa72b389 1052 {
8f924df7 1053 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1054 }
df7492f9
KH
1055 else
1056 /* Otherwise, the source is C string and is never relocated
1057 automatically. Thus we don't have to update anything. */
1058 ;
1059}
aa72b389 1060
df7492f9
KH
1061static void
1062coding_set_destination (coding)
1063 struct coding_system *coding;
1064{
1065 if (BUFFERP (coding->dst_object))
aa72b389 1066 {
df7492f9 1067 if (coding->src_pos < 0)
aa72b389 1068 {
13818c30 1069 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1070 coding->dst_bytes = (GAP_END_ADDR
1071 - (coding->src_bytes - coding->consumed)
1072 - coding->destination);
aa72b389 1073 }
df7492f9 1074 else
28f67a95
KH
1075 {
1076 /* We are sure that coding->dst_pos_byte is before the gap
1077 of the buffer. */
1078 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1079 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1080 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1081 - coding->destination);
1082 }
df7492f9
KH
1083 }
1084 else
1085 /* Otherwise, the destination is C string and is never relocated
1086 automatically. Thus we don't have to update anything. */
1087 ;
1088}
1089
1090
1091static void
1092coding_alloc_by_realloc (coding, bytes)
1093 struct coding_system *coding;
1094 EMACS_INT bytes;
1095{
1096 coding->destination = (unsigned char *) xrealloc (coding->destination,
1097 coding->dst_bytes + bytes);
1098 coding->dst_bytes += bytes;
1099}
1100
1101static void
db274c7a 1102coding_alloc_by_making_gap (coding, gap_head_used, bytes)
df7492f9 1103 struct coding_system *coding;
db274c7a 1104 EMACS_INT gap_head_used, bytes;
df7492f9 1105{
db274c7a 1106 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1107 {
db274c7a
KH
1108 /* The gap may contain the produced data at the head and not-yet
1109 consumed data at the tail. To preserve those data, we at
1110 first make the gap size to zero, then increase the gap
1111 size. */
1112 EMACS_INT add = GAP_SIZE;
1113
1114 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1115 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1116 make_gap (bytes);
1117 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1118 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1119 }
730fff51 1120 else
df7492f9 1121 {
2c78b7e1
KH
1122 Lisp_Object this_buffer;
1123
1124 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1125 set_buffer_internal (XBUFFER (coding->dst_object));
1126 make_gap (bytes);
1127 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1128 }
df7492f9 1129}
8f924df7 1130
df7492f9
KH
1131
1132static unsigned char *
1133alloc_destination (coding, nbytes, dst)
1134 struct coding_system *coding;
3e139625 1135 EMACS_INT nbytes;
df7492f9
KH
1136 unsigned char *dst;
1137{
1138 EMACS_INT offset = dst - coding->destination;
1139
1140 if (BUFFERP (coding->dst_object))
db274c7a
KH
1141 {
1142 struct buffer *buf = XBUFFER (coding->dst_object);
1143
1144 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1145 }
aa72b389 1146 else
df7492f9 1147 coding_alloc_by_realloc (coding, nbytes);
065e3595 1148 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1149 coding_set_destination (coding);
1150 dst = coding->destination + offset;
1151 return dst;
1152}
aa72b389 1153
ff0dacd7
KH
1154/** Macros for annotations. */
1155
1156/* Maximum length of annotation data (sum of annotations for
1157 composition and charset). */
69a80ea3 1158#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1159
1160/* An annotation data is stored in the array coding->charbuf in this
1161 format:
69a80ea3 1162 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1163 LENGTH is the number of elements in the annotation.
1164 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1165 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1166
1167 The format of the following elements depend on ANNOTATION_MASK.
1168
1169 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1170 follows:
1171 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1172 METHOD is one of enum composition_method.
1173 Optionnal COMPOSITION-COMPONENTS are characters and composition
1174 rules.
1175
1176 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1177 follows. */
1178
69a80ea3 1179#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1180 do { \
1181 *(buf)++ = -(len); \
1182 *(buf)++ = (mask); \
69a80ea3 1183 *(buf)++ = (nchars); \
ff0dacd7
KH
1184 coding->annotated = 1; \
1185 } while (0);
1186
69a80ea3
KH
1187#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1188 do { \
1189 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1190 *buf++ = method; \
ff0dacd7
KH
1191 } while (0)
1192
1193
69a80ea3
KH
1194#define ADD_CHARSET_DATA(buf, nchars, id) \
1195 do { \
1196 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1197 *buf++ = id; \
ff0dacd7
KH
1198 } while (0)
1199
df7492f9
KH
1200\f
1201/*** 2. Emacs' internal format (emacs-utf-8) ***/
1202
1203
1204
1205\f
1206/*** 3. UTF-8 ***/
1207
1208/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1209 Check if a text is encoded in UTF-8. If it is, return 1, else
1210 return 0. */
df7492f9
KH
1211
1212#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1213#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1214#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1215#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1216#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1217#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1218
1219static int
ff0dacd7 1220detect_coding_utf_8 (coding, detect_info)
df7492f9 1221 struct coding_system *coding;
ff0dacd7 1222 struct coding_detection_info *detect_info;
df7492f9 1223{
065e3595 1224 const unsigned char *src = coding->source, *src_base;
8f924df7 1225 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1226 int multibytep = coding->src_multibyte;
1227 int consumed_chars = 0;
1228 int found = 0;
1229
ff0dacd7 1230 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1231 /* A coding system of this category is always ASCII compatible. */
1232 src += coding->head_ascii;
1233
1234 while (1)
aa72b389 1235 {
df7492f9 1236 int c, c1, c2, c3, c4;
aa72b389 1237
065e3595 1238 src_base = src;
df7492f9 1239 ONE_MORE_BYTE (c);
065e3595 1240 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1241 continue;
1242 ONE_MORE_BYTE (c1);
065e3595 1243 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1244 break;
1245 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1246 {
ff0dacd7 1247 found = CATEGORY_MASK_UTF_8;
df7492f9 1248 continue;
aa72b389 1249 }
df7492f9 1250 ONE_MORE_BYTE (c2);
065e3595 1251 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1252 break;
1253 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1254 {
ff0dacd7 1255 found = CATEGORY_MASK_UTF_8;
df7492f9 1256 continue;
aa72b389 1257 }
df7492f9 1258 ONE_MORE_BYTE (c3);
065e3595 1259 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1260 break;
1261 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1262 {
ff0dacd7 1263 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1264 continue;
1265 }
1266 ONE_MORE_BYTE (c4);
065e3595 1267 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1268 break;
1269 if (UTF_8_5_OCTET_LEADING_P (c))
1270 {
ff0dacd7 1271 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1272 continue;
1273 }
1274 break;
aa72b389 1275 }
ff0dacd7 1276 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1277 return 0;
aa72b389 1278
df7492f9 1279 no_more_source:
065e3595 1280 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1281 {
ff0dacd7 1282 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1283 return 0;
aa72b389 1284 }
ff0dacd7
KH
1285 detect_info->found |= found;
1286 return 1;
aa72b389
KH
1287}
1288
4ed46869 1289
b73bfc1c 1290static void
df7492f9 1291decode_coding_utf_8 (coding)
b73bfc1c 1292 struct coding_system *coding;
b73bfc1c 1293{
8f924df7
KH
1294 const unsigned char *src = coding->source + coding->consumed;
1295 const unsigned char *src_end = coding->source + coding->src_bytes;
1296 const unsigned char *src_base;
69a80ea3
KH
1297 int *charbuf = coding->charbuf + coding->charbuf_used;
1298 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1299 int consumed_chars = 0, consumed_chars_base;
1300 int multibytep = coding->src_multibyte;
24a73b0a 1301 Lisp_Object attr, charset_list;
119852e7
KH
1302 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1303 int byte_after_cr = -1;
4ed46869 1304
24a73b0a 1305 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1306
1307 while (1)
b73bfc1c 1308 {
df7492f9 1309 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1310
df7492f9
KH
1311 src_base = src;
1312 consumed_chars_base = consumed_chars;
4af310db 1313
df7492f9
KH
1314 if (charbuf >= charbuf_end)
1315 break;
1316
119852e7
KH
1317 if (byte_after_cr >= 0)
1318 c1 = byte_after_cr, byte_after_cr = -1;
1319 else
1320 ONE_MORE_BYTE (c1);
065e3595
KH
1321 if (c1 < 0)
1322 {
1323 c = - c1;
1324 }
1325 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1326 {
119852e7
KH
1327 if (eol_crlf && c1 == '\r')
1328 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1329 c = c1;
4af310db 1330 }
df7492f9 1331 else
4af310db 1332 {
df7492f9 1333 ONE_MORE_BYTE (c2);
065e3595 1334 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1335 goto invalid_code;
1336 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1337 {
b0edb2c5
DL
1338 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1339 /* Reject overlong sequences here and below. Encoders
1340 producing them are incorrect, they can be misleading,
1341 and they mess up read/write invariance. */
1342 if (c < 128)
1343 goto invalid_code;
4af310db 1344 }
df7492f9 1345 else
aa72b389 1346 {
df7492f9 1347 ONE_MORE_BYTE (c3);
065e3595 1348 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1349 goto invalid_code;
1350 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1351 {
1352 c = (((c1 & 0xF) << 12)
1353 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1354 if (c < 0x800
1355 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1356 goto invalid_code;
1357 }
df7492f9
KH
1358 else
1359 {
1360 ONE_MORE_BYTE (c4);
065e3595 1361 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1362 goto invalid_code;
1363 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1364 {
df7492f9
KH
1365 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1366 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1367 if (c < 0x10000)
1368 goto invalid_code;
1369 }
df7492f9
KH
1370 else
1371 {
1372 ONE_MORE_BYTE (c5);
065e3595 1373 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1374 goto invalid_code;
1375 if (UTF_8_5_OCTET_LEADING_P (c1))
1376 {
1377 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1378 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1379 | (c5 & 0x3F));
b0edb2c5 1380 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1381 goto invalid_code;
1382 }
1383 else
1384 goto invalid_code;
1385 }
1386 }
aa72b389 1387 }
b73bfc1c 1388 }
df7492f9
KH
1389
1390 *charbuf++ = c;
1391 continue;
1392
1393 invalid_code:
1394 src = src_base;
1395 consumed_chars = consumed_chars_base;
1396 ONE_MORE_BYTE (c);
1397 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1398 coding->errors++;
aa72b389
KH
1399 }
1400
df7492f9
KH
1401 no_more_source:
1402 coding->consumed_char += consumed_chars_base;
1403 coding->consumed = src_base - coding->source;
1404 coding->charbuf_used = charbuf - coding->charbuf;
1405}
1406
1407
1408static int
1409encode_coding_utf_8 (coding)
1410 struct coding_system *coding;
1411{
1412 int multibytep = coding->dst_multibyte;
1413 int *charbuf = coding->charbuf;
1414 int *charbuf_end = charbuf + coding->charbuf_used;
1415 unsigned char *dst = coding->destination + coding->produced;
1416 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1417 int produced_chars = 0;
df7492f9
KH
1418 int c;
1419
1420 if (multibytep)
aa72b389 1421 {
df7492f9
KH
1422 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1423
1424 while (charbuf < charbuf_end)
b73bfc1c 1425 {
df7492f9 1426 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1427
df7492f9
KH
1428 ASSURE_DESTINATION (safe_room);
1429 c = *charbuf++;
28f67a95
KH
1430 if (CHAR_BYTE8_P (c))
1431 {
1432 c = CHAR_TO_BYTE8 (c);
1433 EMIT_ONE_BYTE (c);
1434 }
1435 else
1436 {
db274c7a 1437 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1438 for (p = str; p < pend; p++)
1439 EMIT_ONE_BYTE (*p);
1440 }
b73bfc1c 1441 }
aa72b389 1442 }
df7492f9
KH
1443 else
1444 {
1445 int safe_room = MAX_MULTIBYTE_LENGTH;
1446
1447 while (charbuf < charbuf_end)
b73bfc1c 1448 {
df7492f9
KH
1449 ASSURE_DESTINATION (safe_room);
1450 c = *charbuf++;
f03caae0
KH
1451 if (CHAR_BYTE8_P (c))
1452 *dst++ = CHAR_TO_BYTE8 (c);
1453 else
db274c7a 1454 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1455 produced_chars++;
4ed46869
KH
1456 }
1457 }
065e3595 1458 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1459 coding->produced_char += produced_chars;
1460 coding->produced = dst - coding->destination;
1461 return 0;
4ed46869
KH
1462}
1463
b73bfc1c 1464
df7492f9 1465/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1466 Check if a text is encoded in one of UTF-16 based coding systems.
1467 If it is, return 1, else return 0. */
aa72b389 1468
df7492f9
KH
1469#define UTF_16_HIGH_SURROGATE_P(val) \
1470 (((val) & 0xFC00) == 0xD800)
1471
1472#define UTF_16_LOW_SURROGATE_P(val) \
1473 (((val) & 0xFC00) == 0xDC00)
93dec019 1474
df7492f9
KH
1475#define UTF_16_INVALID_P(val) \
1476 (((val) == 0xFFFE) \
1477 || ((val) == 0xFFFF) \
1478 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1479
aa72b389 1480
df7492f9 1481static int
ff0dacd7 1482detect_coding_utf_16 (coding, detect_info)
aa72b389 1483 struct coding_system *coding;
ff0dacd7 1484 struct coding_detection_info *detect_info;
aa72b389 1485{
8f924df7
KH
1486 const unsigned char *src = coding->source, *src_base = src;
1487 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1488 int multibytep = coding->src_multibyte;
1489 int consumed_chars = 0;
1490 int c1, c2;
aa72b389 1491
ff0dacd7 1492 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1493 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1494 && (coding->src_chars & 1))
ff0dacd7
KH
1495 {
1496 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1497 return 0;
1498 }
24a73b0a 1499
df7492f9
KH
1500 ONE_MORE_BYTE (c1);
1501 ONE_MORE_BYTE (c2);
df7492f9 1502 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1503 {
b49a1807
KH
1504 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1505 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1506 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1507 | CATEGORY_MASK_UTF_16_BE_NOSIG
1508 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1509 }
df7492f9 1510 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1511 {
b49a1807
KH
1512 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1513 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1514 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1515 | CATEGORY_MASK_UTF_16_BE_NOSIG
1516 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1517 }
2f3cbb32 1518 else
24a73b0a 1519 {
2f3cbb32
KH
1520 /* We check the dispersion of Eth and Oth bytes where E is even and
1521 O is odd. If both are high, we assume binary data.*/
1522 unsigned char e[256], o[256];
1523 unsigned e_num = 1, o_num = 1;
1524
1525 memset (e, 0, 256);
1526 memset (o, 0, 256);
1527 e[c1] = 1;
1528 o[c2] = 1;
1529
24a73b0a
KH
1530 detect_info->rejected
1531 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
2f3cbb32
KH
1532
1533 while (1)
1534 {
1535 ONE_MORE_BYTE (c1);
1536 ONE_MORE_BYTE (c2);
1537 if (! e[c1])
1538 {
1539 e[c1] = 1;
1540 e_num++;
1541 if (e_num >= 128)
1542 break;
1543 }
1544 if (! o[c2])
1545 {
1546 o[c1] = 1;
1547 o_num++;
1548 if (o_num >= 128)
1549 break;
1550 }
1551 }
1552 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1553 return 0;
ff0dacd7 1554 }
2f3cbb32 1555
df7492f9 1556 no_more_source:
ff0dacd7 1557 return 1;
df7492f9 1558}
aa72b389 1559
df7492f9
KH
1560static void
1561decode_coding_utf_16 (coding)
1562 struct coding_system *coding;
1563{
8f924df7
KH
1564 const unsigned char *src = coding->source + coding->consumed;
1565 const unsigned char *src_end = coding->source + coding->src_bytes;
1566 const unsigned char *src_base;
69a80ea3
KH
1567 int *charbuf = coding->charbuf + coding->charbuf_used;
1568 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1569 int consumed_chars = 0, consumed_chars_base;
1570 int multibytep = coding->src_multibyte;
1571 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1572 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1573 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1574 Lisp_Object attr, charset_list;
119852e7
KH
1575 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1576 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1577
24a73b0a 1578 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1579
b49a1807 1580 if (bom == utf_16_with_bom)
aa72b389 1581 {
df7492f9 1582 int c, c1, c2;
4af310db 1583
aa72b389 1584 src_base = src;
df7492f9
KH
1585 ONE_MORE_BYTE (c1);
1586 ONE_MORE_BYTE (c2);
e19c3639 1587 c = (c1 << 8) | c2;
aa72b389 1588
b49a1807
KH
1589 if (endian == utf_16_big_endian
1590 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1591 {
b49a1807
KH
1592 /* The first two bytes are not BOM. Treat them as bytes
1593 for a normal character. */
1594 src = src_base;
1595 coding->errors++;
aa72b389 1596 }
b49a1807
KH
1597 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1598 }
1599 else if (bom == utf_16_detect_bom)
1600 {
1601 /* We have already tried to detect BOM and failed in
1602 detect_coding. */
1603 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1604 }
aa72b389 1605
df7492f9
KH
1606 while (1)
1607 {
1608 int c, c1, c2;
1609
1610 src_base = src;
1611 consumed_chars_base = consumed_chars;
1612
1613 if (charbuf + 2 >= charbuf_end)
1614 break;
1615
119852e7
KH
1616 if (byte_after_cr1 >= 0)
1617 c1 = byte_after_cr1, byte_after_cr1 = -1;
1618 else
1619 ONE_MORE_BYTE (c1);
065e3595
KH
1620 if (c1 < 0)
1621 {
1622 *charbuf++ = -c1;
1623 continue;
1624 }
119852e7
KH
1625 if (byte_after_cr2 >= 0)
1626 c2 = byte_after_cr2, byte_after_cr2 = -1;
1627 else
1628 ONE_MORE_BYTE (c2);
065e3595
KH
1629 if (c2 < 0)
1630 {
1631 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1632 *charbuf++ = -c2;
1633 continue;
1634 }
df7492f9 1635 c = (endian == utf_16_big_endian
e19c3639 1636 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1637
df7492f9 1638 if (surrogate)
fd3ae0b9 1639 {
df7492f9 1640 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1641 {
df7492f9
KH
1642 if (endian == utf_16_big_endian)
1643 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1644 else
1645 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1646 *charbuf++ = c1;
1647 *charbuf++ = c2;
1648 coding->errors++;
1649 if (UTF_16_HIGH_SURROGATE_P (c))
1650 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1651 else
df7492f9 1652 *charbuf++ = c;
fd3ae0b9
KH
1653 }
1654 else
df7492f9
KH
1655 {
1656 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1657 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1658 *charbuf++ = 0x10000 + c;
df7492f9 1659 }
fd3ae0b9 1660 }
aa72b389 1661 else
df7492f9
KH
1662 {
1663 if (UTF_16_HIGH_SURROGATE_P (c))
1664 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1665 else
119852e7
KH
1666 {
1667 if (eol_crlf && c == '\r')
1668 {
1669 ONE_MORE_BYTE (byte_after_cr1);
1670 ONE_MORE_BYTE (byte_after_cr2);
1671 }
1672 *charbuf++ = c;
1673 }
8f924df7 1674 }
aa72b389 1675 }
df7492f9
KH
1676
1677 no_more_source:
1678 coding->consumed_char += consumed_chars_base;
1679 coding->consumed = src_base - coding->source;
1680 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1681}
b73bfc1c 1682
df7492f9
KH
1683static int
1684encode_coding_utf_16 (coding)
1685 struct coding_system *coding;
1686{
1687 int multibytep = coding->dst_multibyte;
1688 int *charbuf = coding->charbuf;
1689 int *charbuf_end = charbuf + coding->charbuf_used;
1690 unsigned char *dst = coding->destination + coding->produced;
1691 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1692 int safe_room = 8;
1693 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1694 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1695 int produced_chars = 0;
24a73b0a 1696 Lisp_Object attrs, charset_list;
df7492f9 1697 int c;
4ed46869 1698
24a73b0a 1699 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1700
b49a1807 1701 if (bom != utf_16_without_bom)
df7492f9
KH
1702 {
1703 ASSURE_DESTINATION (safe_room);
1704 if (big_endian)
df7492f9 1705 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1706 else
1707 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1708 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1709 }
1710
1711 while (charbuf < charbuf_end)
1712 {
1713 ASSURE_DESTINATION (safe_room);
1714 c = *charbuf++;
e19c3639
KH
1715 if (c >= MAX_UNICODE_CHAR)
1716 c = coding->default_char;
df7492f9
KH
1717
1718 if (c < 0x10000)
1719 {
1720 if (big_endian)
1721 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1722 else
1723 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1724 }
1725 else
1726 {
1727 int c1, c2;
1728
1729 c -= 0x10000;
1730 c1 = (c >> 10) + 0xD800;
1731 c2 = (c & 0x3FF) + 0xDC00;
1732 if (big_endian)
1733 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1734 else
1735 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1736 }
1737 }
065e3595 1738 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1739 coding->produced = dst - coding->destination;
1740 coding->produced_char += produced_chars;
1741 return 0;
1742}
1743
1744\f
1745/*** 6. Old Emacs' internal format (emacs-mule) ***/
1746
1747/* Emacs' internal format for representation of multiple character
1748 sets is a kind of multi-byte encoding, i.e. characters are
1749 represented by variable-length sequences of one-byte codes.
1750
1751 ASCII characters and control characters (e.g. `tab', `newline') are
1752 represented by one-byte sequences which are their ASCII codes, in
1753 the range 0x00 through 0x7F.
1754
1755 8-bit characters of the range 0x80..0x9F are represented by
1756 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1757 code + 0x20).
1758
1759 8-bit characters of the range 0xA0..0xFF are represented by
1760 one-byte sequences which are their 8-bit code.
1761
1762 The other characters are represented by a sequence of `base
1763 leading-code', optional `extended leading-code', and one or two
1764 `position-code's. The length of the sequence is determined by the
1765 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1766 whereas extended leading-code and position-code take the range 0xA0
1767 through 0xFF. See `charset.h' for more details about leading-code
1768 and position-code.
1769
1770 --- CODE RANGE of Emacs' internal format ---
1771 character set range
1772 ------------- -----
1773 ascii 0x00..0x7F
1774 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1775 eight-bit-graphic 0xA0..0xBF
1776 ELSE 0x81..0x9D + [0xA0..0xFF]+
1777 ---------------------------------------------
1778
1779 As this is the internal character representation, the format is
1780 usually not used externally (i.e. in a file or in a data sent to a
1781 process). But, it is possible to have a text externally in this
1782 format (i.e. by encoding by the coding system `emacs-mule').
1783
1784 In that case, a sequence of one-byte codes has a slightly different
1785 form.
1786
1787 At first, all characters in eight-bit-control are represented by
1788 one-byte sequences which are their 8-bit code.
1789
1790 Next, character composition data are represented by the byte
1791 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1792 where,
1793 METHOD is 0xF0 plus one of composition method (enum
1794 composition_method),
1795
1796 BYTES is 0xA0 plus a byte length of this composition data,
1797
1798 CHARS is 0x20 plus a number of characters composed by this
1799 data,
1800
1801 COMPONENTs are characters of multibye form or composition
1802 rules encoded by two-byte of ASCII codes.
1803
1804 In addition, for backward compatibility, the following formats are
1805 also recognized as composition data on decoding.
1806
1807 0x80 MSEQ ...
1808 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1809
1810 Here,
1811 MSEQ is a multibyte form but in these special format:
1812 ASCII: 0xA0 ASCII_CODE+0x80,
1813 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1814 RULE is a one byte code of the range 0xA0..0xF0 that
1815 represents a composition rule.
1816 */
1817
1818char emacs_mule_bytes[256];
1819
df7492f9 1820int
ff0dacd7 1821emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1822 struct coding_system *coding;
065e3595 1823 const unsigned char *src;
ff0dacd7 1824 int *nbytes, *nchars, *id;
df7492f9 1825{
8f924df7
KH
1826 const unsigned char *src_end = coding->source + coding->src_bytes;
1827 const unsigned char *src_base = src;
df7492f9 1828 int multibytep = coding->src_multibyte;
df7492f9
KH
1829 struct charset *charset;
1830 unsigned code;
1831 int c;
1832 int consumed_chars = 0;
1833
1834 ONE_MORE_BYTE (c);
065e3595 1835 if (c < 0)
df7492f9 1836 {
065e3595
KH
1837 c = -c;
1838 charset = emacs_mule_charset[0];
1839 }
1840 else
1841 {
4d41e8b7
KH
1842 if (c >= 0xA0)
1843 {
b3af4b28 1844 /* Old style component character of a composition. */
4d41e8b7
KH
1845 if (c == 0xA0)
1846 {
1847 ONE_MORE_BYTE (c);
1848 c -= 0x80;
1849 }
1850 else
1851 c -= 0x20;
1852 }
1853
065e3595 1854 switch (emacs_mule_bytes[c])
b73bfc1c 1855 {
065e3595 1856 case 2:
df7492f9
KH
1857 if (! (charset = emacs_mule_charset[c]))
1858 goto invalid_code;
1859 ONE_MORE_BYTE (c);
9ffd559c 1860 if (c < 0xA0)
065e3595 1861 goto invalid_code;
df7492f9 1862 code = c & 0x7F;
065e3595
KH
1863 break;
1864
1865 case 3:
1866 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1867 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1868 {
1869 ONE_MORE_BYTE (c);
9ffd559c 1870 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1871 goto invalid_code;
1872 ONE_MORE_BYTE (c);
9ffd559c 1873 if (c < 0xA0)
065e3595
KH
1874 goto invalid_code;
1875 code = c & 0x7F;
1876 }
1877 else
1878 {
1879 if (! (charset = emacs_mule_charset[c]))
1880 goto invalid_code;
1881 ONE_MORE_BYTE (c);
9ffd559c 1882 if (c < 0xA0)
065e3595
KH
1883 goto invalid_code;
1884 code = (c & 0x7F) << 8;
1885 ONE_MORE_BYTE (c);
9ffd559c 1886 if (c < 0xA0)
065e3595
KH
1887 goto invalid_code;
1888 code |= c & 0x7F;
1889 }
1890 break;
1891
1892 case 4:
1893 ONE_MORE_BYTE (c);
1894 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1895 goto invalid_code;
1896 ONE_MORE_BYTE (c);
9ffd559c 1897 if (c < 0xA0)
065e3595 1898 goto invalid_code;
781d7a48 1899 code = (c & 0x7F) << 8;
df7492f9 1900 ONE_MORE_BYTE (c);
9ffd559c 1901 if (c < 0xA0)
065e3595 1902 goto invalid_code;
df7492f9 1903 code |= c & 0x7F;
065e3595 1904 break;
df7492f9 1905
065e3595
KH
1906 case 1:
1907 code = c;
1908 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1909 ? charset_ascii : charset_eight_bit);
1910 break;
df7492f9 1911
065e3595
KH
1912 default:
1913 abort ();
1914 }
1915 c = DECODE_CHAR (charset, code);
1916 if (c < 0)
1917 goto invalid_code;
df7492f9 1918 }
df7492f9
KH
1919 *nbytes = src - src_base;
1920 *nchars = consumed_chars;
ff0dacd7
KH
1921 if (id)
1922 *id = charset->id;
df7492f9
KH
1923 return c;
1924
1925 no_more_source:
1926 return -2;
1927
1928 invalid_code:
1929 return -1;
1930}
1931
1932
1933/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1934 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1935 else return 0. */
df7492f9
KH
1936
1937static int
ff0dacd7 1938detect_coding_emacs_mule (coding, detect_info)
df7492f9 1939 struct coding_system *coding;
ff0dacd7 1940 struct coding_detection_info *detect_info;
df7492f9 1941{
065e3595 1942 const unsigned char *src = coding->source, *src_base;
8f924df7 1943 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1944 int multibytep = coding->src_multibyte;
1945 int consumed_chars = 0;
1946 int c;
1947 int found = 0;
1948
ff0dacd7 1949 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1950 /* A coding system of this category is always ASCII compatible. */
1951 src += coding->head_ascii;
1952
1953 while (1)
1954 {
065e3595 1955 src_base = src;
df7492f9 1956 ONE_MORE_BYTE (c);
065e3595
KH
1957 if (c < 0)
1958 continue;
df7492f9
KH
1959 if (c == 0x80)
1960 {
1961 /* Perhaps the start of composite character. We simple skip
1962 it because analyzing it is too heavy for detecting. But,
1963 at least, we check that the composite character
1964 constitues of more than 4 bytes. */
8f924df7 1965 const unsigned char *src_base;
df7492f9
KH
1966
1967 repeat:
1968 src_base = src;
1969 do
1970 {
1971 ONE_MORE_BYTE (c);
1972 }
1973 while (c >= 0xA0);
1974
1975 if (src - src_base <= 4)
1976 break;
ff0dacd7 1977 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1978 if (c == 0x80)
1979 goto repeat;
b73bfc1c 1980 }
df7492f9
KH
1981
1982 if (c < 0x80)
b73bfc1c 1983 {
df7492f9
KH
1984 if (c < 0x20
1985 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1986 break;
1987 }
1988 else
1989 {
0e219d54 1990 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1991
0e219d54 1992 while (more_bytes > 0)
df7492f9
KH
1993 {
1994 ONE_MORE_BYTE (c);
0e219d54
KH
1995 if (c < 0xA0)
1996 {
1997 src--; /* Unread the last byte. */
1998 break;
1999 }
2000 more_bytes--;
df7492f9 2001 }
0e219d54 2002 if (more_bytes != 0)
df7492f9 2003 break;
ff0dacd7 2004 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
2005 }
2006 }
ff0dacd7 2007 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
2008 return 0;
2009
2010 no_more_source:
065e3595 2011 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 2012 {
ff0dacd7 2013 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
2014 return 0;
2015 }
ff0dacd7
KH
2016 detect_info->found |= found;
2017 return 1;
4ed46869
KH
2018}
2019
b73bfc1c 2020
df7492f9
KH
2021/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2022
2023/* Decode a character represented as a component of composition
2024 sequence of Emacs 20/21 style at SRC. Set C to that character and
2025 update SRC to the head of next character (or an encoded composition
2026 rule). If SRC doesn't points a composition component, set C to -1.
2027 If SRC points an invalid byte sequence, global exit by a return
2028 value 0. */
2029
2030#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
f937a7db 2031 do \
df7492f9
KH
2032 { \
2033 int c; \
2034 int nbytes, nchars; \
2035 \
2036 if (src == src_end) \
2037 break; \
ff0dacd7 2038 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
2039 if (c < 0) \
2040 { \
2041 if (c == -2) \
2042 break; \
2043 goto invalid_code; \
2044 } \
2045 *buf++ = c; \
2046 src += nbytes; \
2047 consumed_chars += nchars; \
2048 } \
f937a7db 2049 while (0)
df7492f9
KH
2050
2051
2052/* Decode a composition rule represented as a component of composition
781d7a48
KH
2053 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
2054 and increment BUF. If SRC points an invalid byte sequence, set C
2055 to -1. */
df7492f9 2056
781d7a48 2057#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
2058 do { \
2059 int c, gref, nref; \
2060 \
781d7a48 2061 if (src >= src_end) \
df7492f9
KH
2062 goto invalid_code; \
2063 ONE_MORE_BYTE_NO_CHECK (c); \
4d41e8b7 2064 c -= 0xA0; \
df7492f9
KH
2065 if (c < 0 || c >= 81) \
2066 goto invalid_code; \
2067 \
2068 gref = c / 9, nref = c % 9; \
2069 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2070 } while (0)
2071
2072
781d7a48
KH
2073/* Decode a composition rule represented as a component of composition
2074 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
2075 and increment BUF. If SRC points an invalid byte sequence, set C
2076 to -1. */
2077
2078#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
2079 do { \
2080 int gref, nref; \
2081 \
2082 if (src + 1>= src_end) \
2083 goto invalid_code; \
2084 ONE_MORE_BYTE_NO_CHECK (gref); \
2085 gref -= 0x20; \
2086 ONE_MORE_BYTE_NO_CHECK (nref); \
2087 nref -= 0x20; \
2088 if (gref < 0 || gref >= 81 \
2089 || nref < 0 || nref >= 81) \
2090 goto invalid_code; \
2091 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2092 } while (0)
2093
2094
df7492f9 2095#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 2096 do { \
df7492f9 2097 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 2098 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
2099 the byte length of this composition information, CHARS is the \
2100 number of characters composed by this composition. */ \
781d7a48
KH
2101 enum composition_method method = c - 0xF2; \
2102 int *charbuf_base = charbuf; \
df7492f9
KH
2103 int consumed_chars_limit; \
2104 int nbytes, nchars; \
2105 \
2106 ONE_MORE_BYTE (c); \
065e3595
KH
2107 if (c < 0) \
2108 goto invalid_code; \
df7492f9
KH
2109 nbytes = c - 0xA0; \
2110 if (nbytes < 3) \
2111 goto invalid_code; \
2112 ONE_MORE_BYTE (c); \
065e3595
KH
2113 if (c < 0) \
2114 goto invalid_code; \
df7492f9 2115 nchars = c - 0xA0; \
69a80ea3 2116 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
2117 consumed_chars_limit = consumed_chars_base + nbytes; \
2118 if (method != COMPOSITION_RELATIVE) \
aa72b389 2119 { \
df7492f9
KH
2120 int i = 0; \
2121 while (consumed_chars < consumed_chars_limit) \
aa72b389 2122 { \
df7492f9 2123 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 2124 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
2125 else \
2126 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 2127 i++; \
aa72b389 2128 } \
df7492f9
KH
2129 if (consumed_chars < consumed_chars_limit) \
2130 goto invalid_code; \
781d7a48 2131 charbuf_base[0] -= i; \
aa72b389
KH
2132 } \
2133 } while (0)
93dec019 2134
aa72b389 2135
d959f512
KH
2136#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2137 do { \
2138 /* Emacs 20 style format for relative composition. */ \
2139 /* Store multibyte form of characters to be composed. */ \
2140 enum composition_method method = COMPOSITION_RELATIVE; \
2141 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2142 int *buf = components; \
2143 int i, j; \
2144 \
2145 src = src_base; \
2146 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2147 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2148 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2149 if (i < 2) \
2150 goto invalid_code; \
2151 ADD_COMPOSITION_DATA (charbuf, i, method); \
2152 for (j = 0; j < i; j++) \
2153 *charbuf++ = components[j]; \
df7492f9
KH
2154 } while (0)
2155
2156
2157#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2158 do { \
2159 /* Emacs 20 style format for rule-base composition. */ \
2160 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2161 enum composition_method method = COMPOSITION_WITH_RULE; \
4d41e8b7 2162 int *charbuf_base = charbuf; \
df7492f9
KH
2163 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2164 int *buf = components; \
2165 int i, j; \
4d41e8b7 2166 \
df7492f9 2167 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
4d41e8b7 2168 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
df7492f9 2169 { \
4d41e8b7
KH
2170 if (*src < 0xA0) \
2171 break; \
781d7a48 2172 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2173 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2174 } \
4d41e8b7 2175 if (i <= 1 || (buf - components) % 2 == 0) \
df7492f9 2176 goto invalid_code; \
4d41e8b7 2177 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
df7492f9 2178 goto no_more_source; \
4d41e8b7
KH
2179 ADD_COMPOSITION_DATA (charbuf, i, method); \
2180 i = i * 2 - 1; \
df7492f9
KH
2181 for (j = 0; j < i; j++) \
2182 *charbuf++ = components[j]; \
4d41e8b7 2183 charbuf_base[0] -= i; \
df7492f9
KH
2184 for (j = 0; j < i; j += 2) \
2185 *charbuf++ = components[j]; \
2186 } while (0)
2187
aa72b389
KH
2188
2189static void
df7492f9 2190decode_coding_emacs_mule (coding)
aa72b389 2191 struct coding_system *coding;
aa72b389 2192{
8f924df7
KH
2193 const unsigned char *src = coding->source + coding->consumed;
2194 const unsigned char *src_end = coding->source + coding->src_bytes;
2195 const unsigned char *src_base;
69a80ea3
KH
2196 int *charbuf = coding->charbuf + coding->charbuf_used;
2197 int *charbuf_end
2198 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2199 int consumed_chars = 0, consumed_chars_base;
df7492f9 2200 int multibytep = coding->src_multibyte;
24a73b0a 2201 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2202 int char_offset = coding->produced_char;
2203 int last_offset = char_offset;
2204 int last_id = charset_ascii;
119852e7
KH
2205 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2206 int byte_after_cr = -1;
aa72b389 2207
24a73b0a 2208 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2209
aa72b389
KH
2210 while (1)
2211 {
df7492f9
KH
2212 int c;
2213
aa72b389 2214 src_base = src;
df7492f9
KH
2215 consumed_chars_base = consumed_chars;
2216
2217 if (charbuf >= charbuf_end)
2218 break;
aa72b389 2219
119852e7
KH
2220 if (byte_after_cr >= 0)
2221 c = byte_after_cr, byte_after_cr = -1;
2222 else
2223 ONE_MORE_BYTE (c);
065e3595
KH
2224 if (c < 0)
2225 {
2226 *charbuf++ = -c;
2227 char_offset++;
2228 }
2229 else if (c < 0x80)
aa72b389 2230 {
119852e7
KH
2231 if (eol_crlf && c == '\r')
2232 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
2233 *charbuf++ = c;
2234 char_offset++;
aa72b389 2235 }
df7492f9
KH
2236 else if (c == 0x80)
2237 {
df7492f9 2238 ONE_MORE_BYTE (c);
065e3595
KH
2239 if (c < 0)
2240 goto invalid_code;
781d7a48
KH
2241 if (c - 0xF2 >= COMPOSITION_RELATIVE
2242 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2243 DECODE_EMACS_MULE_21_COMPOSITION (c);
2244 else if (c < 0xC0)
2245 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2246 else if (c == 0xFF)
2247 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2248 else
2249 goto invalid_code;
2250 }
2251 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2252 {
2253 int nbytes, nchars;
ff0dacd7
KH
2254 int id;
2255
781d7a48
KH
2256 src = src_base;
2257 consumed_chars = consumed_chars_base;
ff0dacd7 2258 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2259 if (c < 0)
2260 {
2261 if (c == -2)
2262 break;
2263 goto invalid_code;
2264 }
ff0dacd7
KH
2265 if (last_id != id)
2266 {
2267 if (last_id != charset_ascii)
69a80ea3 2268 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2269 last_id = id;
2270 last_offset = char_offset;
2271 }
df7492f9 2272 *charbuf++ = c;
781d7a48
KH
2273 src += nbytes;
2274 consumed_chars += nchars;
df7492f9
KH
2275 char_offset++;
2276 }
4d41e8b7
KH
2277 else
2278 goto invalid_code;
df7492f9
KH
2279 continue;
2280
2281 invalid_code:
2282 src = src_base;
2283 consumed_chars = consumed_chars_base;
2284 ONE_MORE_BYTE (c);
2285 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2286 char_offset++;
df7492f9
KH
2287 coding->errors++;
2288 }
2289
2290 no_more_source:
ff0dacd7 2291 if (last_id != charset_ascii)
69a80ea3 2292 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2293 coding->consumed_char += consumed_chars_base;
2294 coding->consumed = src_base - coding->source;
2295 coding->charbuf_used = charbuf - coding->charbuf;
2296}
2297
2298
2299#define EMACS_MULE_LEADING_CODES(id, codes) \
2300 do { \
2301 if (id < 0xA0) \
2302 codes[0] = id, codes[1] = 0; \
2303 else if (id < 0xE0) \
2304 codes[0] = 0x9A, codes[1] = id; \
2305 else if (id < 0xF0) \
2306 codes[0] = 0x9B, codes[1] = id; \
2307 else if (id < 0xF5) \
2308 codes[0] = 0x9C, codes[1] = id; \
2309 else \
2310 codes[0] = 0x9D, codes[1] = id; \
2311 } while (0);
2312
aa72b389 2313
df7492f9
KH
2314static int
2315encode_coding_emacs_mule (coding)
2316 struct coding_system *coding;
2317{
2318 int multibytep = coding->dst_multibyte;
2319 int *charbuf = coding->charbuf;
2320 int *charbuf_end = charbuf + coding->charbuf_used;
2321 unsigned char *dst = coding->destination + coding->produced;
2322 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2323 int safe_room = 8;
df7492f9 2324 int produced_chars = 0;
24a73b0a 2325 Lisp_Object attrs, charset_list;
df7492f9 2326 int c;
ff0dacd7 2327 int preferred_charset_id = -1;
df7492f9 2328
24a73b0a 2329 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2330 if (! EQ (charset_list, Vemacs_mule_charset_list))
2331 {
2332 CODING_ATTR_CHARSET_LIST (attrs)
2333 = charset_list = Vemacs_mule_charset_list;
2334 }
df7492f9
KH
2335
2336 while (charbuf < charbuf_end)
2337 {
2338 ASSURE_DESTINATION (safe_room);
2339 c = *charbuf++;
ff0dacd7
KH
2340
2341 if (c < 0)
2342 {
2343 /* Handle an annotation. */
2344 switch (*charbuf)
2345 {
2346 case CODING_ANNOTATE_COMPOSITION_MASK:
2347 /* Not yet implemented. */
2348 break;
2349 case CODING_ANNOTATE_CHARSET_MASK:
2350 preferred_charset_id = charbuf[3];
2351 if (preferred_charset_id >= 0
2352 && NILP (Fmemq (make_number (preferred_charset_id),
2353 charset_list)))
2354 preferred_charset_id = -1;
2355 break;
2356 default:
2357 abort ();
2358 }
2359 charbuf += -c - 1;
2360 continue;
2361 }
2362
df7492f9
KH
2363 if (ASCII_CHAR_P (c))
2364 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2365 else if (CHAR_BYTE8_P (c))
2366 {
2367 c = CHAR_TO_BYTE8 (c);
2368 EMIT_ONE_BYTE (c);
2369 }
df7492f9 2370 else
aa72b389 2371 {
df7492f9
KH
2372 struct charset *charset;
2373 unsigned code;
2374 int dimension;
2375 int emacs_mule_id;
2376 unsigned char leading_codes[2];
2377
ff0dacd7
KH
2378 if (preferred_charset_id >= 0)
2379 {
2380 charset = CHARSET_FROM_ID (preferred_charset_id);
2381 if (! CHAR_CHARSET_P (c, charset))
2382 charset = char_charset (c, charset_list, NULL);
2383 }
2384 else
2385 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2386 if (! charset)
2387 {
2388 c = coding->default_char;
2389 if (ASCII_CHAR_P (c))
2390 {
2391 EMIT_ONE_ASCII_BYTE (c);
2392 continue;
2393 }
2394 charset = char_charset (c, charset_list, &code);
2395 }
2396 dimension = CHARSET_DIMENSION (charset);
2397 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2398 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2399 EMIT_ONE_BYTE (leading_codes[0]);
2400 if (leading_codes[1])
2401 EMIT_ONE_BYTE (leading_codes[1]);
2402 if (dimension == 1)
1fa663f9 2403 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2404 else
df7492f9 2405 {
1fa663f9 2406 code |= 0x8080;
df7492f9
KH
2407 EMIT_ONE_BYTE (code >> 8);
2408 EMIT_ONE_BYTE (code & 0xFF);
2409 }
aa72b389 2410 }
aa72b389 2411 }
065e3595 2412 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2413 coding->produced_char += produced_chars;
2414 coding->produced = dst - coding->destination;
2415 return 0;
aa72b389 2416}
b73bfc1c 2417
4ed46869 2418\f
df7492f9 2419/*** 7. ISO2022 handlers ***/
4ed46869
KH
2420
2421/* The following note describes the coding system ISO2022 briefly.
39787efd 2422 Since the intention of this note is to help understand the
5a936b46 2423 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2424 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2425 original document of ISO2022. This is equivalent to the standard
cfb43547 2426 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2427
2428 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2429 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2430 is encoded using bytes less than 128. This may make the encoded
2431 text a little bit longer, but the text passes more easily through
cfb43547 2432 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2433 Significant Bit).
b73bfc1c 2434
cfb43547
DL
2435 There are two kinds of character sets: control character sets and
2436 graphic character sets. The former contain control characters such
4ed46869 2437 as `newline' and `escape' to provide control functions (control
39787efd 2438 functions are also provided by escape sequences). The latter
cfb43547 2439 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2440 two control character sets and many graphic character sets.
2441
2442 Graphic character sets are classified into one of the following
39787efd
KH
2443 four classes, according to the number of bytes (DIMENSION) and
2444 number of characters in one dimension (CHARS) of the set:
2445 - DIMENSION1_CHARS94
2446 - DIMENSION1_CHARS96
2447 - DIMENSION2_CHARS94
2448 - DIMENSION2_CHARS96
2449
2450 In addition, each character set is assigned an identification tag,
cfb43547 2451 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2452 hereafter). The <F> of each character set is decided by ECMA(*)
2453 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2454 (0x30..0x3F are for private use only).
4ed46869
KH
2455
2456 Note (*): ECMA = European Computer Manufacturers Association
2457
cfb43547 2458 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2459 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2460 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2461 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2462 o DIMENSION2_CHARS96 -- none for the moment
2463
39787efd 2464 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2465 C0 [0x00..0x1F] -- control character plane 0
2466 GL [0x20..0x7F] -- graphic character plane 0
2467 C1 [0x80..0x9F] -- control character plane 1
2468 GR [0xA0..0xFF] -- graphic character plane 1
2469
2470 A control character set is directly designated and invoked to C0 or
39787efd
KH
2471 C1 by an escape sequence. The most common case is that:
2472 - ISO646's control character set is designated/invoked to C0, and
2473 - ISO6429's control character set is designated/invoked to C1,
2474 and usually these designations/invocations are omitted in encoded
2475 text. In a 7-bit environment, only C0 can be used, and a control
2476 character for C1 is encoded by an appropriate escape sequence to
2477 fit into the environment. All control characters for C1 are
2478 defined to have corresponding escape sequences.
4ed46869
KH
2479
2480 A graphic character set is at first designated to one of four
2481 graphic registers (G0 through G3), then these graphic registers are
2482 invoked to GL or GR. These designations and invocations can be
2483 done independently. The most common case is that G0 is invoked to
39787efd
KH
2484 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2485 these invocations and designations are omitted in encoded text.
2486 In a 7-bit environment, only GL can be used.
4ed46869 2487
39787efd
KH
2488 When a graphic character set of CHARS94 is invoked to GL, codes
2489 0x20 and 0x7F of the GL area work as control characters SPACE and
2490 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2491 be used.
4ed46869
KH
2492
2493 There are two ways of invocation: locking-shift and single-shift.
2494 With locking-shift, the invocation lasts until the next different
39787efd
KH
2495 invocation, whereas with single-shift, the invocation affects the
2496 following character only and doesn't affect the locking-shift
2497 state. Invocations are done by the following control characters or
2498 escape sequences:
4ed46869
KH
2499
2500 ----------------------------------------------------------------------
39787efd 2501 abbrev function cntrl escape seq description
4ed46869 2502 ----------------------------------------------------------------------
39787efd
KH
2503 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2504 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2505 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2506 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2507 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2508 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2509 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2510 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2511 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2512 ----------------------------------------------------------------------
39787efd
KH
2513 (*) These are not used by any known coding system.
2514
2515 Control characters for these functions are defined by macros
2516 ISO_CODE_XXX in `coding.h'.
4ed46869 2517
39787efd 2518 Designations are done by the following escape sequences:
4ed46869
KH
2519 ----------------------------------------------------------------------
2520 escape sequence description
2521 ----------------------------------------------------------------------
2522 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2523 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2524 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2525 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2526 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2527 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2528 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2529 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2530 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2531 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2532 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2533 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2534 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2535 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2536 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2537 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2538 ----------------------------------------------------------------------
2539
2540 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2541 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2542
2543 Note (*): Although these designations are not allowed in ISO2022,
2544 Emacs accepts them on decoding, and produces them on encoding
39787efd 2545 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2546 7-bit environment, non-locking-shift, and non-single-shift.
2547
2548 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2549 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2550
cfb43547 2551 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2552 same multilingual text in ISO2022. Actually, there exist many
2553 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2554 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2555 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2556 localized platforms), and all of these are variants of ISO2022.
2557
2558 In addition to the above, Emacs handles two more kinds of escape
2559 sequences: ISO6429's direction specification and Emacs' private
2560 sequence for specifying character composition.
2561
39787efd 2562 ISO6429's direction specification takes the following form:
4ed46869
KH
2563 o CSI ']' -- end of the current direction
2564 o CSI '0' ']' -- end of the current direction
2565 o CSI '1' ']' -- start of left-to-right text
2566 o CSI '2' ']' -- start of right-to-left text
2567 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2568 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2569
2570 Character composition specification takes the following form:
ec6d2bb8
KH
2571 o ESC '0' -- start relative composition
2572 o ESC '1' -- end composition
2573 o ESC '2' -- start rule-base composition (*)
2574 o ESC '3' -- start relative composition with alternate chars (**)
2575 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2576 Since these are not standard escape sequences of any ISO standard,
cfb43547 2577 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2578
5a936b46
DL
2579 (*) This form is used only in Emacs 20.7 and older versions,
2580 but newer versions can safely decode it.
cfb43547 2581 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2582 and older versions can't decode it.
ec6d2bb8 2583
cfb43547 2584 Here's a list of example usages of these composition escape
b73bfc1c 2585 sequences (categorized by `enum composition_method').
ec6d2bb8 2586
b73bfc1c 2587 COMPOSITION_RELATIVE:
ec6d2bb8 2588 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2589 COMPOSITION_WITH_RULE:
ec6d2bb8 2590 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2591 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2592 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2593 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2594 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2595
2596enum iso_code_class_type iso_code_class[256];
2597
df7492f9
KH
2598#define SAFE_CHARSET_P(coding, id) \
2599 ((id) <= (coding)->max_charset_id \
2600 && (coding)->safe_charsets[id] >= 0)
2601
2602
2603#define SHIFT_OUT_OK(category) \
2604 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2605
2606static void
f0064e1f
DL
2607setup_iso_safe_charsets (attrs)
2608 Lisp_Object attrs;
df7492f9
KH
2609{
2610 Lisp_Object charset_list, safe_charsets;
2611 Lisp_Object request;
2612 Lisp_Object reg_usage;
2613 Lisp_Object tail;
2614 int reg94, reg96;
2615 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2616 int max_charset_id;
2617
2618 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2619 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2620 && ! EQ (charset_list, Viso_2022_charset_list))
2621 {
2622 CODING_ATTR_CHARSET_LIST (attrs)
2623 = charset_list = Viso_2022_charset_list;
2624 ASET (attrs, coding_attr_safe_charsets, Qnil);
2625 }
2626
2627 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2628 return;
2629
2630 max_charset_id = 0;
2631 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2632 {
2633 int id = XINT (XCAR (tail));
2634 if (max_charset_id < id)
2635 max_charset_id = id;
2636 }
d46c5b12 2637
df7492f9
KH
2638 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2639 make_number (255));
2640 request = AREF (attrs, coding_attr_iso_request);
2641 reg_usage = AREF (attrs, coding_attr_iso_usage);
2642 reg94 = XINT (XCAR (reg_usage));
2643 reg96 = XINT (XCDR (reg_usage));
2644
2645 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2646 {
2647 Lisp_Object id;
2648 Lisp_Object reg;
2649 struct charset *charset;
2650
2651 id = XCAR (tail);
2652 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2653 reg = Fcdr (Fassq (id, request));
df7492f9 2654 if (! NILP (reg))
8f924df7 2655 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2656 else if (charset->iso_chars_96)
2657 {
2658 if (reg96 < 4)
8f924df7 2659 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2660 }
2661 else
2662 {
2663 if (reg94 < 4)
8f924df7 2664 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2665 }
2666 }
2667 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2668}
d46c5b12 2669
b6871cc7 2670
4ed46869 2671/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2672 Check if a text is encoded in one of ISO-2022 based codig systems.
2673 If it is, return 1, else return 0. */
4ed46869 2674
0a28aafb 2675static int
ff0dacd7 2676detect_coding_iso_2022 (coding, detect_info)
df7492f9 2677 struct coding_system *coding;
ff0dacd7 2678 struct coding_detection_info *detect_info;
4ed46869 2679{
8f924df7
KH
2680 const unsigned char *src = coding->source, *src_base = src;
2681 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2682 int multibytep = coding->src_multibyte;
ff0dacd7 2683 int single_shifting = 0;
df7492f9
KH
2684 int id;
2685 int c, c1;
2686 int consumed_chars = 0;
2687 int i;
ff0dacd7
KH
2688 int rejected = 0;
2689 int found = 0;
2690
2691 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2692
2693 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2694 {
2695 struct coding_system *this = &(coding_categories[i]);
2696 Lisp_Object attrs, val;
2697
2698 attrs = CODING_ID_ATTRS (this->id);
2699 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2700 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2701 setup_iso_safe_charsets (attrs);
2702 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2703 this->max_charset_id = SCHARS (val) - 1;
2704 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2705 }
2706
2707 /* A coding system of this category is always ASCII compatible. */
2708 src += coding->head_ascii;
3f003981 2709
ff0dacd7 2710 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2711 {
065e3595 2712 src_base = src;
df7492f9 2713 ONE_MORE_BYTE (c);
4ed46869
KH
2714 switch (c)
2715 {
2716 case ISO_CODE_ESC:
74383408
KH
2717 if (inhibit_iso_escape_detection)
2718 break;
f46869e4 2719 single_shifting = 0;
df7492f9 2720 ONE_MORE_BYTE (c);
d46c5b12 2721 if (c >= '(' && c <= '/')
4ed46869 2722 {
bf9cdd4e 2723 /* Designation sequence for a charset of dimension 1. */
df7492f9 2724 ONE_MORE_BYTE (c1);
d46c5b12 2725 if (c1 < ' ' || c1 >= 0x80
df7492f9 2726 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2727 /* Invalid designation sequence. Just ignore. */
2728 break;
bf9cdd4e
KH
2729 }
2730 else if (c == '$')
2731 {
2732 /* Designation sequence for a charset of dimension 2. */
df7492f9 2733 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2734 if (c >= '@' && c <= 'B')
2735 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2736 id = iso_charset_table[1][0][c];
bf9cdd4e 2737 else if (c >= '(' && c <= '/')
bcf26d6a 2738 {
df7492f9 2739 ONE_MORE_BYTE (c1);
d46c5b12 2740 if (c1 < ' ' || c1 >= 0x80
df7492f9 2741 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2742 /* Invalid designation sequence. Just ignore. */
2743 break;
bcf26d6a 2744 }
bf9cdd4e 2745 else
ff0dacd7 2746 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2747 break;
2748 }
ae9ff118 2749 else if (c == 'N' || c == 'O')
d46c5b12 2750 {
ae9ff118 2751 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2752 single_shifting = 1;
2753 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2754 break;
4ed46869 2755 }
ec6d2bb8
KH
2756 else if (c >= '0' && c <= '4')
2757 {
2758 /* ESC <Fp> for start/end composition. */
ff0dacd7 2759 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2760 break;
2761 }
bf9cdd4e 2762 else
df7492f9 2763 {
ff0dacd7 2764 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2765 break;
2766 }
d46c5b12
KH
2767
2768 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2769 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2770 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2771 id))
ff0dacd7 2772 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2773 else
ff0dacd7 2774 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2775 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2776 id))
ff0dacd7 2777 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2778 else
ff0dacd7 2779 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2780 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2781 id))
ff0dacd7 2782 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2783 else
ff0dacd7 2784 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2785 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2786 id))
ff0dacd7 2787 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2788 else
ff0dacd7 2789 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2790 break;
2791
4ed46869 2792 case ISO_CODE_SO:
d46c5b12 2793 case ISO_CODE_SI:
ff0dacd7 2794 /* Locking shift out/in. */
74383408
KH
2795 if (inhibit_iso_escape_detection)
2796 break;
f46869e4 2797 single_shifting = 0;
ff0dacd7 2798 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
2799 break;
2800
4ed46869 2801 case ISO_CODE_CSI:
ff0dacd7 2802 /* Control sequence introducer. */
f46869e4 2803 single_shifting = 0;
ff0dacd7
KH
2804 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2805 found |= CATEGORY_MASK_ISO_8_ELSE;
2806 goto check_extra_latin;
2807
4ed46869
KH
2808 case ISO_CODE_SS2:
2809 case ISO_CODE_SS3:
ff0dacd7
KH
2810 /* Single shift. */
2811 if (inhibit_iso_escape_detection)
2812 break;
75e2a253 2813 single_shifting = 0;
ff0dacd7
KH
2814 rejected |= CATEGORY_MASK_ISO_7BIT;
2815 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2816 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2817 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2818 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2819 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2820 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2821 if (single_shifting)
2822 break;
ff0dacd7 2823 goto check_extra_latin;
4ed46869
KH
2824
2825 default:
065e3595
KH
2826 if (c < 0)
2827 continue;
4ed46869 2828 if (c < 0x80)
f46869e4
KH
2829 {
2830 single_shifting = 0;
2831 break;
2832 }
ff0dacd7 2833 if (c >= 0xA0)
c4825358 2834 {
ff0dacd7
KH
2835 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2836 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2837 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2838 0xA0..0FF. If the byte length is even, we include
2839 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2840 only when we are not single shifting. */
2841 if (! single_shifting
2842 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2843 {
e17de821 2844 int i = 1;
b73bfc1c
KH
2845 while (src < src_end)
2846 {
df7492f9 2847 ONE_MORE_BYTE (c);
b73bfc1c
KH
2848 if (c < 0xA0)
2849 break;
2850 i++;
2851 }
2852
2853 if (i & 1 && src < src_end)
ff0dacd7 2854 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2855 else
ff0dacd7 2856 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2857 }
ff0dacd7 2858 break;
4ed46869 2859 }
ff0dacd7
KH
2860 check_extra_latin:
2861 single_shifting = 0;
2862 if (! VECTORP (Vlatin_extra_code_table)
2863 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2864 {
2865 rejected = CATEGORY_MASK_ISO;
2866 break;
2867 }
2868 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2869 & CODING_ISO_FLAG_LATIN_EXTRA)
2870 found |= CATEGORY_MASK_ISO_8_1;
2871 else
2872 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2873 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2874 }
2875 }
ff0dacd7
KH
2876 detect_info->rejected |= CATEGORY_MASK_ISO;
2877 return 0;
4ed46869 2878
df7492f9 2879 no_more_source:
ff0dacd7
KH
2880 detect_info->rejected |= rejected;
2881 detect_info->found |= (found & ~rejected);
df7492f9 2882 return 1;
4ed46869 2883}
ec6d2bb8 2884
4ed46869 2885
134b9549
KH
2886/* Set designation state into CODING. Set CHARS_96 to -1 if the
2887 escape sequence should be kept. */
df7492f9
KH
2888#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2889 do { \
2890 int id, prev; \
2891 \
2892 if (final < '0' || final >= 128 \
2893 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2894 || !SAFE_CHARSET_P (coding, id)) \
2895 { \
2896 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2897 chars_96 = -1; \
2898 break; \
df7492f9
KH
2899 } \
2900 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2901 if (id == charset_jisx0201_roman) \
2902 { \
2903 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2904 id = charset_ascii; \
2905 } \
2906 else if (id == charset_jisx0208_1978) \
2907 { \
2908 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2909 id = charset_jisx0208; \
2910 } \
df7492f9
KH
2911 CODING_ISO_DESIGNATION (coding, reg) = id; \
2912 /* If there was an invalid designation to REG previously, and this \
2913 designation is ASCII to REG, we should keep this designation \
2914 sequence. */ \
2915 if (prev == -2 && id == charset_ascii) \
134b9549 2916 chars_96 = -1; \
4ed46869
KH
2917 } while (0)
2918
d46c5b12 2919
df7492f9
KH
2920#define MAYBE_FINISH_COMPOSITION() \
2921 do { \
2922 int i; \
2923 if (composition_state == COMPOSING_NO) \
2924 break; \
2925 /* It is assured that we have enough room for producing \
2926 characters stored in the table `components'. */ \
2927 if (charbuf + component_idx > charbuf_end) \
2928 goto no_more_source; \
2929 composition_state = COMPOSING_NO; \
2930 if (method == COMPOSITION_RELATIVE \
2931 || method == COMPOSITION_WITH_ALTCHARS) \
2932 { \
2933 for (i = 0; i < component_idx; i++) \
2934 *charbuf++ = components[i]; \
2935 char_offset += component_idx; \
2936 } \
2937 else \
2938 { \
2939 for (i = 0; i < component_idx; i += 2) \
2940 *charbuf++ = components[i]; \
2941 char_offset += (component_idx / 2) + 1; \
2942 } \
2943 } while (0)
2944
d46c5b12 2945
aa72b389
KH
2946/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2947 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2948 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2949 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2950 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2951 */
ec6d2bb8 2952
df7492f9
KH
2953#define DECODE_COMPOSITION_START(c1) \
2954 do { \
2955 if (c1 == '0' \
781d7a48 2956 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2957 { \
2958 component_len = component_idx; \
2959 composition_state = COMPOSING_CHAR; \
2960 } \
2961 else \
2962 { \
8f924df7 2963 const unsigned char *p; \
df7492f9
KH
2964 \
2965 MAYBE_FINISH_COMPOSITION (); \
2966 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2967 goto no_more_source; \
2968 for (p = src; p < src_end - 1; p++) \
2969 if (*p == ISO_CODE_ESC && p[1] == '1') \
2970 break; \
2971 if (p == src_end - 1) \
2972 { \
9286b333
KH
2973 /* The current composition doesn't end in the current \
2974 source. */ \
2975 record_conversion_result \
2976 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
df7492f9
KH
2977 goto no_more_source; \
2978 } \
2979 \
2980 /* This is surely the start of a composition. */ \
2981 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2982 : c1 == '2' ? COMPOSITION_WITH_RULE \
2983 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2984 : COMPOSITION_WITH_RULE_ALTCHARS); \
2985 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2986 : COMPOSING_COMPONENT_CHAR); \
2987 component_idx = component_len = 0; \
2988 } \
ec6d2bb8
KH
2989 } while (0)
2990
ec6d2bb8 2991
df7492f9
KH
2992/* Handle compositoin end sequence ESC 1. */
2993
2994#define DECODE_COMPOSITION_END() \
ec6d2bb8 2995 do { \
df7492f9
KH
2996 int nchars = (component_len > 0 ? component_idx - component_len \
2997 : method == COMPOSITION_RELATIVE ? component_idx \
2998 : (component_idx + 1) / 2); \
2999 int i; \
3000 int *saved_charbuf = charbuf; \
3001 \
69a80ea3 3002 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 3003 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 3004 { \
df7492f9
KH
3005 if (component_len == 0) \
3006 for (i = 0; i < component_idx; i++) \
3007 *charbuf++ = components[i]; \
3008 else \
3009 for (i = 0; i < component_len; i++) \
3010 *charbuf++ = components[i]; \
3011 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 3012 } \
df7492f9
KH
3013 if (method == COMPOSITION_WITH_RULE) \
3014 for (i = 0; i < component_idx; i += 2, char_offset++) \
3015 *charbuf++ = components[i]; \
ec6d2bb8 3016 else \
df7492f9
KH
3017 for (i = component_len; i < component_idx; i++, char_offset++) \
3018 *charbuf++ = components[i]; \
3019 coding->annotated = 1; \
3020 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
3021 } while (0)
3022
df7492f9 3023
ec6d2bb8
KH
3024/* Decode a composition rule from the byte C1 (and maybe one more byte
3025 from SRC) and store one encoded composition rule in
3026 coding->cmp_data. */
3027
3028#define DECODE_COMPOSITION_RULE(c1) \
3029 do { \
ec6d2bb8
KH
3030 (c1) -= 32; \
3031 if (c1 < 81) /* old format (before ver.21) */ \
3032 { \
3033 int gref = (c1) / 9; \
3034 int nref = (c1) % 9; \
3035 if (gref == 4) gref = 10; \
3036 if (nref == 4) nref = 10; \
df7492f9 3037 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 3038 } \
b73bfc1c 3039 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
3040 { \
3041 ONE_MORE_BYTE (c2); \
df7492f9 3042 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 3043 } \
df7492f9
KH
3044 else \
3045 c1 = 0; \
ec6d2bb8 3046 } while (0)
88993dfd 3047
d46c5b12 3048
4ed46869
KH
3049/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3050
b73bfc1c 3051static void
df7492f9 3052decode_coding_iso_2022 (coding)
4ed46869 3053 struct coding_system *coding;
4ed46869 3054{
8f924df7
KH
3055 const unsigned char *src = coding->source + coding->consumed;
3056 const unsigned char *src_end = coding->source + coding->src_bytes;
3057 const unsigned char *src_base;
69a80ea3 3058 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 3059 int *charbuf_end
69a80ea3 3060 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 3061 int consumed_chars = 0, consumed_chars_base;
df7492f9 3062 int multibytep = coding->src_multibyte;
4ed46869 3063 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3064 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3065 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3066 int charset_id_2, charset_id_3;
df7492f9
KH
3067 struct charset *charset;
3068 int c;
3069 /* For handling composition sequence. */
3070#define COMPOSING_NO 0
3071#define COMPOSING_CHAR 1
3072#define COMPOSING_RULE 2
3073#define COMPOSING_COMPONENT_CHAR 3
3074#define COMPOSING_COMPONENT_RULE 4
3075
3076 int composition_state = COMPOSING_NO;
3077 enum composition_method method;
3078 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3079 int component_idx;
3080 int component_len;
24a73b0a 3081 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3082 int char_offset = coding->produced_char;
3083 int last_offset = char_offset;
3084 int last_id = charset_ascii;
119852e7
KH
3085 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3086 int byte_after_cr = -1;
df7492f9 3087
24a73b0a 3088 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3089 setup_iso_safe_charsets (attrs);
287c57d7
KH
3090 /* Charset list may have been changed. */
3091 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3092 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
b73bfc1c
KH
3093
3094 while (1)
4ed46869 3095 {
463f5630 3096 int c1, c2;
b73bfc1c
KH
3097
3098 src_base = src;
df7492f9
KH
3099 consumed_chars_base = consumed_chars;
3100
3101 if (charbuf >= charbuf_end)
3102 break;
3103
119852e7
KH
3104 if (byte_after_cr >= 0)
3105 c1 = byte_after_cr, byte_after_cr = -1;
3106 else
3107 ONE_MORE_BYTE (c1);
065e3595
KH
3108 if (c1 < 0)
3109 goto invalid_code;
4ed46869 3110
98725083 3111 /* We produce at most one character. */
4ed46869
KH
3112 switch (iso_code_class [c1])
3113 {
3114 case ISO_0x20_or_0x7F:
df7492f9 3115 if (composition_state != COMPOSING_NO)
ec6d2bb8 3116 {
df7492f9
KH
3117 if (composition_state == COMPOSING_RULE
3118 || composition_state == COMPOSING_COMPONENT_RULE)
3119 {
3120 DECODE_COMPOSITION_RULE (c1);
3121 components[component_idx++] = c1;
3122 composition_state--;
3123 continue;
3124 }
4ed46869 3125 }
df7492f9
KH
3126 if (charset_id_0 < 0
3127 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3128 /* This is SPACE or DEL. */
3129 charset = CHARSET_FROM_ID (charset_ascii);
3130 else
3131 charset = CHARSET_FROM_ID (charset_id_0);
3132 break;
4ed46869
KH
3133
3134 case ISO_graphic_plane_0:
781d7a48 3135 if (composition_state != COMPOSING_NO)
b73bfc1c 3136 {
781d7a48
KH
3137 if (composition_state == COMPOSING_RULE
3138 || composition_state == COMPOSING_COMPONENT_RULE)
3139 {
3140 DECODE_COMPOSITION_RULE (c1);
3141 components[component_idx++] = c1;
3142 composition_state--;
3143 continue;
3144 }
b73bfc1c 3145 }
134b9549
KH
3146 if (charset_id_0 < 0)
3147 charset = CHARSET_FROM_ID (charset_ascii);
3148 else
3149 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3150 break;
3151
3152 case ISO_0xA0_or_0xFF:
df7492f9
KH
3153 if (charset_id_1 < 0
3154 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3155 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3156 goto invalid_code;
4ed46869
KH
3157 /* This is a graphic character, we fall down ... */
3158
3159 case ISO_graphic_plane_1:
df7492f9
KH
3160 if (charset_id_1 < 0)
3161 goto invalid_code;
3162 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3163 break;
3164
df7492f9 3165 case ISO_control_0:
119852e7
KH
3166 if (eol_crlf && c1 == '\r')
3167 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3168 MAYBE_FINISH_COMPOSITION ();
3169 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3170 break;
3171
df7492f9
KH
3172 case ISO_control_1:
3173 MAYBE_FINISH_COMPOSITION ();
3174 goto invalid_code;
3175
4ed46869 3176 case ISO_shift_out:
df7492f9
KH
3177 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3178 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3179 goto invalid_code;
3180 CODING_ISO_INVOCATION (coding, 0) = 1;
3181 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3182 continue;
4ed46869
KH
3183
3184 case ISO_shift_in:
df7492f9
KH
3185 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3186 goto invalid_code;
3187 CODING_ISO_INVOCATION (coding, 0) = 0;
3188 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3189 continue;
4ed46869
KH
3190
3191 case ISO_single_shift_2_7:
3192 case ISO_single_shift_2:
df7492f9
KH
3193 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3194 goto invalid_code;
4ed46869
KH
3195 /* SS2 is handled as an escape sequence of ESC 'N' */
3196 c1 = 'N';
3197 goto label_escape_sequence;
3198
3199 case ISO_single_shift_3:
df7492f9
KH
3200 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3201 goto invalid_code;
4ed46869
KH
3202 /* SS2 is handled as an escape sequence of ESC 'O' */
3203 c1 = 'O';
3204 goto label_escape_sequence;
3205
3206 case ISO_control_sequence_introducer:
3207 /* CSI is handled as an escape sequence of ESC '[' ... */
3208 c1 = '[';
3209 goto label_escape_sequence;
3210
3211 case ISO_escape:
3212 ONE_MORE_BYTE (c1);
3213 label_escape_sequence:
df7492f9 3214 /* Escape sequences handled here are invocation,
4ed46869
KH
3215 designation, direction specification, and character
3216 composition specification. */
3217 switch (c1)
3218 {
3219 case '&': /* revision of following character set */
3220 ONE_MORE_BYTE (c1);
3221 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3222 goto invalid_code;
4ed46869
KH
3223 ONE_MORE_BYTE (c1);
3224 if (c1 != ISO_CODE_ESC)
df7492f9 3225 goto invalid_code;
4ed46869
KH
3226 ONE_MORE_BYTE (c1);
3227 goto label_escape_sequence;
3228
3229 case '$': /* designation of 2-byte character set */
df7492f9
KH
3230 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3231 goto invalid_code;
134b9549
KH
3232 {
3233 int reg, chars96;
3234
3235 ONE_MORE_BYTE (c1);
3236 if (c1 >= '@' && c1 <= 'B')
3237 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3238 or JISX0208.1980 */
134b9549
KH
3239 reg = 0, chars96 = 0;
3240 }
3241 else if (c1 >= 0x28 && c1 <= 0x2B)
3242 { /* designation of DIMENSION2_CHARS94 character set */
3243 reg = c1 - 0x28, chars96 = 0;
3244 ONE_MORE_BYTE (c1);
3245 }
3246 else if (c1 >= 0x2C && c1 <= 0x2F)
3247 { /* designation of DIMENSION2_CHARS96 character set */
3248 reg = c1 - 0x2C, chars96 = 1;
3249 ONE_MORE_BYTE (c1);
3250 }
3251 else
3252 goto invalid_code;
3253 DECODE_DESIGNATION (reg, 2, chars96, c1);
3254 /* We must update these variables now. */
3255 if (reg == 0)
3256 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3257 else if (reg == 1)
3258 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3259 if (chars96 < 0)
3260 goto invalid_code;
3261 }
b73bfc1c 3262 continue;
4ed46869
KH
3263
3264 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3265 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3266 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3267 goto invalid_code;
3268 CODING_ISO_INVOCATION (coding, 0) = 2;
3269 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3270 continue;
4ed46869
KH
3271
3272 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3273 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3274 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3275 goto invalid_code;
3276 CODING_ISO_INVOCATION (coding, 0) = 3;
3277 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3278 continue;
4ed46869
KH
3279
3280 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3281 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3282 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3283 goto invalid_code;
134b9549
KH
3284 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3285 if (charset_id_2 < 0)
3286 charset = CHARSET_FROM_ID (charset_ascii);
3287 else
3288 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3289 ONE_MORE_BYTE (c1);
e7046a18 3290 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3291 goto invalid_code;
4ed46869
KH
3292 break;
3293
3294 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3295 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3296 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3297 goto invalid_code;
134b9549
KH
3298 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3299 if (charset_id_3 < 0)
3300 charset = CHARSET_FROM_ID (charset_ascii);
3301 else
3302 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3303 ONE_MORE_BYTE (c1);
e7046a18 3304 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3305 goto invalid_code;
4ed46869
KH
3306 break;
3307
ec6d2bb8 3308 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3309 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3310 goto invalid_code;
ec6d2bb8 3311 DECODE_COMPOSITION_START (c1);
b73bfc1c 3312 continue;
4ed46869 3313
ec6d2bb8 3314 case '1': /* end composition */
df7492f9
KH
3315 if (composition_state == COMPOSING_NO)
3316 goto invalid_code;
3317 DECODE_COMPOSITION_END ();
b73bfc1c 3318 continue;
4ed46869
KH
3319
3320 case '[': /* specification of direction */
df7492f9
KH
3321 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3322 goto invalid_code;
4ed46869 3323 /* For the moment, nested direction is not supported.
d46c5b12 3324 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3325 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3326 ONE_MORE_BYTE (c1);
3327 switch (c1)
3328 {
3329 case ']': /* end of the current direction */
d46c5b12 3330 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3331
3332 case '0': /* end of the current direction */
3333 case '1': /* start of left-to-right direction */
3334 ONE_MORE_BYTE (c1);
3335 if (c1 == ']')
d46c5b12 3336 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3337 else
df7492f9 3338 goto invalid_code;
4ed46869
KH
3339 break;
3340
3341 case '2': /* start of right-to-left direction */
3342 ONE_MORE_BYTE (c1);
3343 if (c1 == ']')
d46c5b12 3344 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3345 else
df7492f9 3346 goto invalid_code;
4ed46869
KH
3347 break;
3348
3349 default:
df7492f9 3350 goto invalid_code;
4ed46869 3351 }
b73bfc1c 3352 continue;
4ed46869 3353
103e0180 3354 case '%':
103e0180
KH
3355 ONE_MORE_BYTE (c1);
3356 if (c1 == '/')
3357 {
3358 /* CTEXT extended segment:
3359 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3360 We keep these bytes as is for the moment.
3361 They may be decoded by post-read-conversion. */
3362 int dim, M, L;
4776e638 3363 int size;
8f924df7 3364
103e0180
KH
3365 ONE_MORE_BYTE (dim);
3366 ONE_MORE_BYTE (M);
3367 ONE_MORE_BYTE (L);
3368 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3369 if (charbuf + 8 + size > charbuf_end)
3370 goto break_loop;
3371 *charbuf++ = ISO_CODE_ESC;
3372 *charbuf++ = '%';
3373 *charbuf++ = '/';
3374 *charbuf++ = dim;
3375 *charbuf++ = BYTE8_TO_CHAR (M);
3376 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3377 while (size-- > 0)
3378 {
3379 ONE_MORE_BYTE (c1);
4776e638 3380 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3381 }
103e0180
KH
3382 }
3383 else if (c1 == 'G')
3384 {
103e0180
KH
3385 /* XFree86 extension for embedding UTF-8 in CTEXT:
3386 ESC % G --UTF-8-BYTES-- ESC % @
3387 We keep these bytes as is for the moment.
3388 They may be decoded by post-read-conversion. */
4776e638
KH
3389 int *p = charbuf;
3390
3391 if (p + 6 > charbuf_end)
3392 goto break_loop;
3393 *p++ = ISO_CODE_ESC;
3394 *p++ = '%';
3395 *p++ = 'G';
3396 while (p < charbuf_end)
103e0180
KH
3397 {
3398 ONE_MORE_BYTE (c1);
3399 if (c1 == ISO_CODE_ESC
3400 && src + 1 < src_end
3401 && src[0] == '%'
3402 && src[1] == '@')
9ffd559c
KH
3403 {
3404 src += 2;
3405 break;
3406 }
4776e638 3407 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3408 }
4776e638
KH
3409 if (p + 3 > charbuf_end)
3410 goto break_loop;
3411 *p++ = ISO_CODE_ESC;
3412 *p++ = '%';
3413 *p++ = '@';
3414 charbuf = p;
103e0180
KH
3415 }
3416 else
4776e638 3417 goto invalid_code;
103e0180 3418 continue;
4776e638 3419 break;
103e0180 3420
4ed46869 3421 default:
df7492f9
KH
3422 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3423 goto invalid_code;
134b9549
KH
3424 {
3425 int reg, chars96;
3426
3427 if (c1 >= 0x28 && c1 <= 0x2B)
3428 { /* designation of DIMENSION1_CHARS94 character set */
3429 reg = c1 - 0x28, chars96 = 0;
3430 ONE_MORE_BYTE (c1);
3431 }
3432 else if (c1 >= 0x2C && c1 <= 0x2F)
3433 { /* designation of DIMENSION1_CHARS96 character set */
3434 reg = c1 - 0x2C, chars96 = 1;
3435 ONE_MORE_BYTE (c1);
3436 }
3437 else
3438 goto invalid_code;
3439 DECODE_DESIGNATION (reg, 1, chars96, c1);
3440 /* We must update these variables now. */
3441 if (reg == 0)
3442 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3443 else if (reg == 1)
3444 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3445 if (chars96 < 0)
3446 goto invalid_code;
3447 }
b73bfc1c 3448 continue;
4ed46869 3449 }
b73bfc1c 3450 }
4ed46869 3451
ff0dacd7
KH
3452 if (charset->id != charset_ascii
3453 && last_id != charset->id)
3454 {
3455 if (last_id != charset_ascii)
69a80ea3 3456 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3457 last_id = charset->id;
3458 last_offset = char_offset;
3459 }
3460
b73bfc1c 3461 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3462 Produce a decoded character while getting 2nd position code
3463 C2 if necessary. */
3464 c1 &= 0x7F;
3465 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3466 {
3467 ONE_MORE_BYTE (c2);
df7492f9 3468 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3469 /* C2 is not in a valid range. */
df7492f9
KH
3470 goto invalid_code;
3471 c1 = (c1 << 8) | (c2 & 0x7F);
3472 if (CHARSET_DIMENSION (charset) > 2)
3473 {
3474 ONE_MORE_BYTE (c2);
3475 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3476 /* C2 is not in a valid range. */
3477 goto invalid_code;
3478 c1 = (c1 << 8) | (c2 & 0x7F);
3479 }
3480 }
3481
3482 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3483 if (c < 0)
3484 {
3485 MAYBE_FINISH_COMPOSITION ();
3486 for (; src_base < src; src_base++, char_offset++)
3487 {
3488 if (ASCII_BYTE_P (*src_base))
3489 *charbuf++ = *src_base;
3490 else
3491 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3492 }
3493 }
3494 else if (composition_state == COMPOSING_NO)
3495 {
3496 *charbuf++ = c;
3497 char_offset++;
4ed46869 3498 }
df7492f9 3499 else
781d7a48
KH
3500 {
3501 components[component_idx++] = c;
3502 if (method == COMPOSITION_WITH_RULE
3503 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3504 && composition_state == COMPOSING_COMPONENT_CHAR))
3505 composition_state++;
4ed46869
KH
3506 }
3507 continue;
3508
df7492f9
KH
3509 invalid_code:
3510 MAYBE_FINISH_COMPOSITION ();
4ed46869 3511 src = src_base;
df7492f9
KH
3512 consumed_chars = consumed_chars_base;
3513 ONE_MORE_BYTE (c);
065e3595 3514 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3515 char_offset++;
df7492f9 3516 coding->errors++;
4776e638
KH
3517 continue;
3518
3519 break_loop:
3520 break;
4ed46869 3521 }
fb88bf2d 3522
df7492f9 3523 no_more_source:
ff0dacd7 3524 if (last_id != charset_ascii)
69a80ea3 3525 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3526 coding->consumed_char += consumed_chars_base;
3527 coding->consumed = src_base - coding->source;
3528 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3529}
3530
b73bfc1c 3531
f4dee582 3532/* ISO2022 encoding stuff. */
4ed46869
KH
3533
3534/*
f4dee582 3535 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3536 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3537 variant has the following specifications:
df7492f9 3538 1. Initial designation to G0 thru G3.
4ed46869
KH
3539 2. Allows short-form designation?
3540 3. ASCII should be designated to G0 before control characters?
3541 4. ASCII should be designated to G0 at end of line?
3542 5. 7-bit environment or 8-bit environment?
3543 6. Use locking-shift?
3544 7. Use Single-shift?
3545 And the following two are only for Japanese:
3546 8. Use ASCII in place of JIS0201-1976-Roman?
3547 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3548 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3549 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3550 details.
4ed46869
KH
3551*/
3552
3553/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3554 register REG at DST, and increment DST. If <final-char> of CHARSET is
3555 '@', 'A', or 'B' and the coding system CODING allows, produce
3556 designation sequence of short-form. */
4ed46869
KH
3557
3558#define ENCODE_DESIGNATION(charset, reg, coding) \
3559 do { \
df7492f9 3560 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3561 char *intermediate_char_94 = "()*+"; \
3562 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3563 int revision = -1; \
3564 int c; \
3565 \
3566 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3567 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3568 \
3569 if (revision >= 0) \
70c22245 3570 { \
df7492f9
KH
3571 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3572 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3573 } \
df7492f9 3574 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3575 if (CHARSET_DIMENSION (charset) == 1) \
3576 { \
df7492f9
KH
3577 if (! CHARSET_ISO_CHARS_96 (charset)) \
3578 c = intermediate_char_94[reg]; \
4ed46869 3579 else \
df7492f9
KH
3580 c = intermediate_char_96[reg]; \
3581 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3582 } \
3583 else \
3584 { \
df7492f9
KH
3585 EMIT_ONE_ASCII_BYTE ('$'); \
3586 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3587 { \
df7492f9 3588 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3589 || reg != 0 \
3590 || final_char < '@' || final_char > 'B') \
df7492f9 3591 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3592 } \
3593 else \
df7492f9 3594 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3595 } \
df7492f9
KH
3596 EMIT_ONE_ASCII_BYTE (final_char); \
3597 \
3598 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3599 } while (0)
3600
df7492f9 3601
4ed46869
KH
3602/* The following two macros produce codes (control character or escape
3603 sequence) for ISO2022 single-shift functions (single-shift-2 and
3604 single-shift-3). */
3605
df7492f9
KH
3606#define ENCODE_SINGLE_SHIFT_2 \
3607 do { \
3608 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3609 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3610 else \
3611 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3612 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3613 } while (0)
3614
df7492f9
KH
3615
3616#define ENCODE_SINGLE_SHIFT_3 \
3617 do { \
3618 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3619 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3620 else \
3621 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3622 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3623 } while (0)
3624
df7492f9 3625
4ed46869
KH
3626/* The following four macros produce codes (control character or
3627 escape sequence) for ISO2022 locking-shift functions (shift-in,
3628 shift-out, locking-shift-2, and locking-shift-3). */
3629
df7492f9
KH
3630#define ENCODE_SHIFT_IN \
3631 do { \
3632 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3633 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3634 } while (0)
3635
df7492f9
KH
3636
3637#define ENCODE_SHIFT_OUT \
3638 do { \
3639 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3640 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3641 } while (0)
3642
df7492f9
KH
3643
3644#define ENCODE_LOCKING_SHIFT_2 \
3645 do { \
3646 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3647 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3648 } while (0)
3649
df7492f9
KH
3650
3651#define ENCODE_LOCKING_SHIFT_3 \
3652 do { \
3653 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3654 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3655 } while (0)
3656
df7492f9 3657
f4dee582
RS
3658/* Produce codes for a DIMENSION1 character whose character set is
3659 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3660 sequences are also produced in advance if necessary. */
3661
6e85d753
KH
3662#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3663 do { \
df7492f9 3664 int id = CHARSET_ID (charset); \
bf16eb23
KH
3665 \
3666 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3667 && id == charset_ascii) \
3668 { \
3669 id = charset_jisx0201_roman; \
3670 charset = CHARSET_FROM_ID (id); \
3671 } \
3672 \
df7492f9 3673 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3674 { \
df7492f9
KH
3675 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3676 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3677 else \
df7492f9
KH
3678 EMIT_ONE_BYTE (c1 | 0x80); \
3679 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3680 break; \
3681 } \
df7492f9 3682 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3683 { \
df7492f9 3684 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3685 break; \
3686 } \
df7492f9 3687 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3688 { \
df7492f9 3689 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3690 break; \
3691 } \
6e85d753
KH
3692 else \
3693 /* Since CHARSET is not yet invoked to any graphic planes, we \
3694 must invoke it, or, at first, designate it to some graphic \
3695 register. Then repeat the loop to actually produce the \
3696 character. */ \
df7492f9
KH
3697 dst = encode_invocation_designation (charset, coding, dst, \
3698 &produced_chars); \
4ed46869
KH
3699 } while (1)
3700
df7492f9 3701
f4dee582
RS
3702/* Produce codes for a DIMENSION2 character whose character set is
3703 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3704 invocation codes are also produced in advance if necessary. */
3705
6e85d753
KH
3706#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3707 do { \
df7492f9 3708 int id = CHARSET_ID (charset); \
bf16eb23
KH
3709 \
3710 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3711 && id == charset_jisx0208) \
3712 { \
3713 id = charset_jisx0208_1978; \
3714 charset = CHARSET_FROM_ID (id); \
3715 } \
3716 \
df7492f9 3717 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3718 { \
df7492f9
KH
3719 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3720 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3721 else \
df7492f9
KH
3722 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3723 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3724 break; \
3725 } \
df7492f9 3726 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3727 { \
df7492f9 3728 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3729 break; \
3730 } \
df7492f9 3731 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3732 { \
df7492f9 3733 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3734 break; \
3735 } \
6e85d753
KH
3736 else \
3737 /* Since CHARSET is not yet invoked to any graphic planes, we \
3738 must invoke it, or, at first, designate it to some graphic \
3739 register. Then repeat the loop to actually produce the \
3740 character. */ \
df7492f9
KH
3741 dst = encode_invocation_designation (charset, coding, dst, \
3742 &produced_chars); \
4ed46869
KH
3743 } while (1)
3744
05e6f5dc 3745
df7492f9
KH
3746#define ENCODE_ISO_CHARACTER(charset, c) \
3747 do { \
3748 int code = ENCODE_CHAR ((charset),(c)); \
3749 \
3750 if (CHARSET_DIMENSION (charset) == 1) \
3751 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3752 else \
3753 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3754 } while (0)
bdd9fb48 3755
05e6f5dc 3756
4ed46869 3757/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3758 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3759 Return new DST. */
3760
3761unsigned char *
df7492f9
KH
3762encode_invocation_designation (charset, coding, dst, p_nchars)
3763 struct charset *charset;
4ed46869
KH
3764 struct coding_system *coding;
3765 unsigned char *dst;
df7492f9 3766 int *p_nchars;
4ed46869 3767{
df7492f9
KH
3768 int multibytep = coding->dst_multibyte;
3769 int produced_chars = *p_nchars;
4ed46869 3770 int reg; /* graphic register number */
df7492f9 3771 int id = CHARSET_ID (charset);
4ed46869
KH
3772
3773 /* At first, check designations. */
3774 for (reg = 0; reg < 4; reg++)
df7492f9 3775 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3776 break;
3777
3778 if (reg >= 4)
3779 {
3780 /* CHARSET is not yet designated to any graphic registers. */
3781 /* At first check the requested designation. */
df7492f9
KH
3782 reg = CODING_ISO_REQUEST (coding, id);
3783 if (reg < 0)
1ba9e4ab
KH
3784 /* Since CHARSET requests no special designation, designate it
3785 to graphic register 0. */
4ed46869
KH
3786 reg = 0;
3787
3788 ENCODE_DESIGNATION (charset, reg, coding);
3789 }
3790
df7492f9
KH
3791 if (CODING_ISO_INVOCATION (coding, 0) != reg
3792 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3793 {
3794 /* Since the graphic register REG is not invoked to any graphic
3795 planes, invoke it to graphic plane 0. */
3796 switch (reg)
3797 {
3798 case 0: /* graphic register 0 */
3799 ENCODE_SHIFT_IN;
3800 break;
3801
3802 case 1: /* graphic register 1 */
3803 ENCODE_SHIFT_OUT;
3804 break;
3805
3806 case 2: /* graphic register 2 */
df7492f9 3807 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3808 ENCODE_SINGLE_SHIFT_2;
3809 else
3810 ENCODE_LOCKING_SHIFT_2;
3811 break;
3812
3813 case 3: /* graphic register 3 */
df7492f9 3814 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3815 ENCODE_SINGLE_SHIFT_3;
3816 else
3817 ENCODE_LOCKING_SHIFT_3;
3818 break;
3819 }
3820 }
b73bfc1c 3821
df7492f9 3822 *p_nchars = produced_chars;
4ed46869
KH
3823 return dst;
3824}
3825
df7492f9
KH
3826/* The following three macros produce codes for indicating direction
3827 of text. */
3828#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3829 do { \
df7492f9
KH
3830 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3831 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3832 else \
df7492f9 3833 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3834 } while (0)
3835
ec6d2bb8 3836
df7492f9
KH
3837#define ENCODE_DIRECTION_R2L() \
3838 do { \
3839 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3840 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3841 } while (0)
3842
ec6d2bb8 3843
df7492f9 3844#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3845 do { \
df7492f9
KH
3846 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3847 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3848 } while (0)
4ed46869 3849
4ed46869
KH
3850
3851/* Produce codes for designation and invocation to reset the graphic
3852 planes and registers to initial state. */
df7492f9
KH
3853#define ENCODE_RESET_PLANE_AND_REGISTER() \
3854 do { \
3855 int reg; \
3856 struct charset *charset; \
3857 \
3858 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3859 ENCODE_SHIFT_IN; \
3860 for (reg = 0; reg < 4; reg++) \
3861 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3862 && (CODING_ISO_DESIGNATION (coding, reg) \
3863 != CODING_ISO_INITIAL (coding, reg))) \
3864 { \
3865 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3866 ENCODE_DESIGNATION (charset, reg, coding); \
3867 } \
4ed46869
KH
3868 } while (0)
3869
df7492f9 3870
bdd9fb48 3871/* Produce designation sequences of charsets in the line started from
b73bfc1c 3872 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3873
3874 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3875 find all the necessary designations. */
3876
b73bfc1c 3877static unsigned char *
df7492f9 3878encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3879 struct coding_system *coding;
df7492f9
KH
3880 int *charbuf, *charbuf_end;
3881 unsigned char *dst;
e0e989f6 3882{
df7492f9 3883 struct charset *charset;
bdd9fb48
KH
3884 /* Table of charsets to be designated to each graphic register. */
3885 int r[4];
df7492f9
KH
3886 int c, found = 0, reg;
3887 int produced_chars = 0;
3888 int multibytep = coding->dst_multibyte;
3889 Lisp_Object attrs;
3890 Lisp_Object charset_list;
3891
3892 attrs = CODING_ID_ATTRS (coding->id);
3893 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3894 if (EQ (charset_list, Qiso_2022))
3895 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3896
3897 for (reg = 0; reg < 4; reg++)
3898 r[reg] = -1;
3899
b73bfc1c 3900 while (found < 4)
e0e989f6 3901 {
df7492f9
KH
3902 int id;
3903
3904 c = *charbuf++;
b73bfc1c
KH
3905 if (c == '\n')
3906 break;
df7492f9
KH
3907 charset = char_charset (c, charset_list, NULL);
3908 id = CHARSET_ID (charset);
3909 reg = CODING_ISO_REQUEST (coding, id);
3910 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3911 {
3912 found++;
df7492f9 3913 r[reg] = id;
bdd9fb48 3914 }
bdd9fb48
KH
3915 }
3916
3917 if (found)
3918 {
3919 for (reg = 0; reg < 4; reg++)
3920 if (r[reg] >= 0
df7492f9
KH
3921 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3922 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3923 }
b73bfc1c
KH
3924
3925 return dst;
e0e989f6
KH
3926}
3927
4ed46869
KH
3928/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3929
df7492f9
KH
3930static int
3931encode_coding_iso_2022 (coding)
4ed46869 3932 struct coding_system *coding;
4ed46869 3933{
df7492f9
KH
3934 int multibytep = coding->dst_multibyte;
3935 int *charbuf = coding->charbuf;
3936 int *charbuf_end = charbuf + coding->charbuf_used;
3937 unsigned char *dst = coding->destination + coding->produced;
3938 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3939 int safe_room = 16;
3940 int bol_designation
3941 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3942 && CODING_ISO_BOL (coding));
3943 int produced_chars = 0;
3944 Lisp_Object attrs, eol_type, charset_list;
3945 int ascii_compatible;
b73bfc1c 3946 int c;
ff0dacd7 3947 int preferred_charset_id = -1;
05e6f5dc 3948
24a73b0a
KH
3949 CODING_GET_INFO (coding, attrs, charset_list);
3950 eol_type = CODING_ID_EOL_TYPE (coding->id);
3951 if (VECTORP (eol_type))
3952 eol_type = Qunix;
3953
004068e4 3954 setup_iso_safe_charsets (attrs);
ff0dacd7 3955 /* Charset list may have been changed. */
287c57d7 3956 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8f924df7 3957 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3958
df7492f9 3959 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3960
df7492f9 3961 while (charbuf < charbuf_end)
4ed46869 3962 {
df7492f9 3963 ASSURE_DESTINATION (safe_room);
b73bfc1c 3964
df7492f9 3965 if (bol_designation)
b73bfc1c 3966 {
df7492f9 3967 unsigned char *dst_prev = dst;
4ed46869 3968
bdd9fb48 3969 /* We have to produce designation sequences if any now. */
df7492f9
KH
3970 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3971 bol_designation = 0;
3972 /* We are sure that designation sequences are all ASCII bytes. */
3973 produced_chars += dst - dst_prev;
e0e989f6
KH
3974 }
3975
df7492f9 3976 c = *charbuf++;
ec6d2bb8 3977
ff0dacd7
KH
3978 if (c < 0)
3979 {
3980 /* Handle an annotation. */
3981 switch (*charbuf)
ec6d2bb8 3982 {
ff0dacd7
KH
3983 case CODING_ANNOTATE_COMPOSITION_MASK:
3984 /* Not yet implemented. */
3985 break;
3986 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 3987 preferred_charset_id = charbuf[2];
ff0dacd7
KH
3988 if (preferred_charset_id >= 0
3989 && NILP (Fmemq (make_number (preferred_charset_id),
3990 charset_list)))
3991 preferred_charset_id = -1;
3992 break;
3993 default:
3994 abort ();
4ed46869 3995 }
ff0dacd7
KH
3996 charbuf += -c - 1;
3997 continue;
4ed46869 3998 }
ec6d2bb8 3999
b73bfc1c
KH
4000 /* Now encode the character C. */
4001 if (c < 0x20 || c == 0x7F)
4002 {
df7492f9
KH
4003 if (c == '\n'
4004 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4005 {
df7492f9
KH
4006 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4007 ENCODE_RESET_PLANE_AND_REGISTER ();
4008 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4009 {
df7492f9
KH
4010 int i;
4011
4012 for (i = 0; i < 4; i++)
4013 CODING_ISO_DESIGNATION (coding, i)
4014 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4015 }
df7492f9
KH
4016 bol_designation
4017 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4018 }
df7492f9
KH
4019 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4020 ENCODE_RESET_PLANE_AND_REGISTER ();
4021 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4022 }
df7492f9 4023 else if (ASCII_CHAR_P (c))
88993dfd 4024 {
df7492f9
KH
4025 if (ascii_compatible)
4026 EMIT_ONE_ASCII_BYTE (c);
93dec019 4027 else
19a8d9e0 4028 {
bf16eb23
KH
4029 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4030 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4031 }
4ed46869 4032 }
16eafb5d 4033 else if (CHAR_BYTE8_P (c))
88993dfd 4034 {
16eafb5d
KH
4035 c = CHAR_TO_BYTE8 (c);
4036 EMIT_ONE_BYTE (c);
88993dfd 4037 }
b73bfc1c 4038 else
df7492f9 4039 {
ff0dacd7 4040 struct charset *charset;
b73bfc1c 4041
ff0dacd7
KH
4042 if (preferred_charset_id >= 0)
4043 {
4044 charset = CHARSET_FROM_ID (preferred_charset_id);
4045 if (! CHAR_CHARSET_P (c, charset))
4046 charset = char_charset (c, charset_list, NULL);
4047 }
4048 else
4049 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4050 if (!charset)
4051 {
41cbe562
KH
4052 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4053 {
4054 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4055 charset = CHARSET_FROM_ID (charset_ascii);
4056 }
4057 else
4058 {
4059 c = coding->default_char;
4060 charset = char_charset (c, charset_list, NULL);
4061 }
df7492f9
KH
4062 }
4063 ENCODE_ISO_CHARACTER (charset, c);
4064 }
84fbb8a0 4065 }
b73bfc1c 4066
df7492f9
KH
4067 if (coding->mode & CODING_MODE_LAST_BLOCK
4068 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4069 {
4070 ASSURE_DESTINATION (safe_room);
4071 ENCODE_RESET_PLANE_AND_REGISTER ();
4072 }
065e3595 4073 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4074 CODING_ISO_BOL (coding) = bol_designation;
4075 coding->produced_char += produced_chars;
4076 coding->produced = dst - coding->destination;
4077 return 0;
4ed46869
KH
4078}
4079
4080\f
df7492f9 4081/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4082
df7492f9 4083/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4084 quite widely. So, for the moment, Emacs supports them in the bare
4085 C code. But, in the future, they may be supported only by CCL. */
4086
4087/* SJIS is a coding system encoding three character sets: ASCII, right
4088 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4089 as is. A character of charset katakana-jisx0201 is encoded by
4090 "position-code + 0x80". A character of charset japanese-jisx0208
4091 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4092 so that it fit in the range below.
4ed46869
KH
4093
4094 --- CODE RANGE of SJIS ---
4095 (character set) (range)
4096 ASCII 0x00 .. 0x7F
df7492f9 4097 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4098 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4099 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4100 -------------------------------
4101
4102*/
4103
4104/* BIG5 is a coding system encoding two character sets: ASCII and
4105 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4106 character set and is encoded in two-byte.
4ed46869
KH
4107
4108 --- CODE RANGE of BIG5 ---
4109 (character set) (range)
4110 ASCII 0x00 .. 0x7F
4111 Big5 (1st byte) 0xA1 .. 0xFE
4112 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4113 --------------------------
4114
df7492f9 4115 */
4ed46869
KH
4116
4117/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4118 Check if a text is encoded in SJIS. If it is, return
df7492f9 4119 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4120
0a28aafb 4121static int
ff0dacd7 4122detect_coding_sjis (coding, detect_info)
df7492f9 4123 struct coding_system *coding;
ff0dacd7 4124 struct coding_detection_info *detect_info;
4ed46869 4125{
065e3595 4126 const unsigned char *src = coding->source, *src_base;
8f924df7 4127 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4128 int multibytep = coding->src_multibyte;
4129 int consumed_chars = 0;
4130 int found = 0;
b73bfc1c 4131 int c;
df7492f9 4132
ff0dacd7 4133 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4134 /* A coding system of this category is always ASCII compatible. */
4135 src += coding->head_ascii;
4ed46869 4136
b73bfc1c 4137 while (1)
4ed46869 4138 {
065e3595 4139 src_base = src;
df7492f9 4140 ONE_MORE_BYTE (c);
682169fe
KH
4141 if (c < 0x80)
4142 continue;
df7492f9 4143 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 4144 {
df7492f9 4145 ONE_MORE_BYTE (c);
682169fe 4146 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4147 break;
ff0dacd7 4148 found = CATEGORY_MASK_SJIS;
4ed46869 4149 }
df7492f9 4150 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4151 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4152 else
4153 break;
4ed46869 4154 }
ff0dacd7 4155 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4156 return 0;
4157
4158 no_more_source:
065e3595 4159 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4160 {
ff0dacd7 4161 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4162 return 0;
4ed46869 4163 }
ff0dacd7
KH
4164 detect_info->found |= found;
4165 return 1;
4ed46869
KH
4166}
4167
4168/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4169 Check if a text is encoded in BIG5. If it is, return
df7492f9 4170 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4171
0a28aafb 4172static int
ff0dacd7 4173detect_coding_big5 (coding, detect_info)
df7492f9 4174 struct coding_system *coding;
ff0dacd7 4175 struct coding_detection_info *detect_info;
4ed46869 4176{
065e3595 4177 const unsigned char *src = coding->source, *src_base;
8f924df7 4178 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4179 int multibytep = coding->src_multibyte;
4180 int consumed_chars = 0;
4181 int found = 0;
b73bfc1c 4182 int c;
fa42c37f 4183
ff0dacd7 4184 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4185 /* A coding system of this category is always ASCII compatible. */
4186 src += coding->head_ascii;
fa42c37f 4187
b73bfc1c 4188 while (1)
fa42c37f 4189 {
065e3595 4190 src_base = src;
df7492f9
KH
4191 ONE_MORE_BYTE (c);
4192 if (c < 0x80)
fa42c37f 4193 continue;
df7492f9 4194 if (c >= 0xA1)
fa42c37f 4195 {
df7492f9
KH
4196 ONE_MORE_BYTE (c);
4197 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4198 return 0;
ff0dacd7 4199 found = CATEGORY_MASK_BIG5;
fa42c37f 4200 }
df7492f9
KH
4201 else
4202 break;
fa42c37f 4203 }
ff0dacd7 4204 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4205 return 0;
fa42c37f 4206
df7492f9 4207 no_more_source:
065e3595 4208 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4209 {
ff0dacd7 4210 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4211 return 0;
4212 }
ff0dacd7
KH
4213 detect_info->found |= found;
4214 return 1;
fa42c37f
KH
4215}
4216
4ed46869
KH
4217/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4218 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4219
b73bfc1c 4220static void
df7492f9 4221decode_coding_sjis (coding)
4ed46869 4222 struct coding_system *coding;
4ed46869 4223{
8f924df7
KH
4224 const unsigned char *src = coding->source + coding->consumed;
4225 const unsigned char *src_end = coding->source + coding->src_bytes;
4226 const unsigned char *src_base;
69a80ea3
KH
4227 int *charbuf = coding->charbuf + coding->charbuf_used;
4228 int *charbuf_end
4229 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4230 int consumed_chars = 0, consumed_chars_base;
4231 int multibytep = coding->src_multibyte;
4232 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4233 struct charset *charset_kanji2;
24a73b0a 4234 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4235 int char_offset = coding->produced_char;
4236 int last_offset = char_offset;
4237 int last_id = charset_ascii;
119852e7
KH
4238 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4239 int byte_after_cr = -1;
a5d301df 4240
24a73b0a 4241 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4242
4243 val = charset_list;
4244 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4245 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4246 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4247 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4248
b73bfc1c 4249 while (1)
4ed46869 4250 {
df7492f9 4251 int c, c1;
24a73b0a 4252 struct charset *charset;
fa42c37f 4253
b73bfc1c 4254 src_base = src;
df7492f9 4255 consumed_chars_base = consumed_chars;
fa42c37f 4256
df7492f9
KH
4257 if (charbuf >= charbuf_end)
4258 break;
4259
119852e7
KH
4260 if (byte_after_cr >= 0)
4261 c = byte_after_cr, byte_after_cr = -1;
4262 else
4263 ONE_MORE_BYTE (c);
065e3595
KH
4264 if (c < 0)
4265 goto invalid_code;
24a73b0a 4266 if (c < 0x80)
119852e7
KH
4267 {
4268 if (eol_crlf && c == '\r')
4269 ONE_MORE_BYTE (byte_after_cr);
4270 charset = charset_roman;
4271 }
57a47f8a 4272 else if (c == 0x80 || c == 0xA0)
8e921c4b 4273 goto invalid_code;
57a47f8a
KH
4274 else if (c >= 0xA1 && c <= 0xDF)
4275 {
4276 /* SJIS -> JISX0201-Kana */
4277 c &= 0x7F;
4278 charset = charset_kana;
4279 }
4280 else if (c <= 0xEF)
df7492f9 4281 {
57a47f8a
KH
4282 /* SJIS -> JISX0208 */
4283 ONE_MORE_BYTE (c1);
4284 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4285 goto invalid_code;
57a47f8a
KH
4286 c = (c << 8) | c1;
4287 SJIS_TO_JIS (c);
4288 charset = charset_kanji;
4289 }
4290 else if (c <= 0xFC && charset_kanji2)
4291 {
c6876370 4292 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4293 ONE_MORE_BYTE (c1);
4294 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4295 goto invalid_code;
57a47f8a
KH
4296 c = (c << 8) | c1;
4297 SJIS_TO_JIS2 (c);
4298 charset = charset_kanji2;
df7492f9 4299 }
57a47f8a
KH
4300 else
4301 goto invalid_code;
24a73b0a
KH
4302 if (charset->id != charset_ascii
4303 && last_id != charset->id)
4304 {
4305 if (last_id != charset_ascii)
69a80ea3 4306 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4307 last_id = charset->id;
4308 last_offset = char_offset;
4309 }
4310 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4311 *charbuf++ = c;
ff0dacd7 4312 char_offset++;
df7492f9 4313 continue;
b73bfc1c 4314
df7492f9
KH
4315 invalid_code:
4316 src = src_base;
4317 consumed_chars = consumed_chars_base;
4318 ONE_MORE_BYTE (c);
065e3595 4319 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4320 char_offset++;
df7492f9
KH
4321 coding->errors++;
4322 }
fa42c37f 4323
df7492f9 4324 no_more_source:
ff0dacd7 4325 if (last_id != charset_ascii)
69a80ea3 4326 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4327 coding->consumed_char += consumed_chars_base;
4328 coding->consumed = src_base - coding->source;
4329 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4330}
4331
b73bfc1c 4332static void
df7492f9 4333decode_coding_big5 (coding)
4ed46869 4334 struct coding_system *coding;
4ed46869 4335{
8f924df7
KH
4336 const unsigned char *src = coding->source + coding->consumed;
4337 const unsigned char *src_end = coding->source + coding->src_bytes;
4338 const unsigned char *src_base;
69a80ea3
KH
4339 int *charbuf = coding->charbuf + coding->charbuf_used;
4340 int *charbuf_end
4341 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4342 int consumed_chars = 0, consumed_chars_base;
4343 int multibytep = coding->src_multibyte;
4344 struct charset *charset_roman, *charset_big5;
24a73b0a 4345 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4346 int char_offset = coding->produced_char;
4347 int last_offset = char_offset;
4348 int last_id = charset_ascii;
119852e7
KH
4349 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4350 int byte_after_cr = -1;
df7492f9 4351
24a73b0a 4352 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4353 val = charset_list;
4354 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4355 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4356
b73bfc1c 4357 while (1)
4ed46869 4358 {
df7492f9 4359 int c, c1;
24a73b0a 4360 struct charset *charset;
b73bfc1c
KH
4361
4362 src_base = src;
df7492f9
KH
4363 consumed_chars_base = consumed_chars;
4364
4365 if (charbuf >= charbuf_end)
4366 break;
4367
119852e7 4368 if (byte_after_cr >= 0)
14daee73 4369 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4370 else
4371 ONE_MORE_BYTE (c);
b73bfc1c 4372
065e3595
KH
4373 if (c < 0)
4374 goto invalid_code;
24a73b0a 4375 if (c < 0x80)
119852e7 4376 {
14daee73 4377 if (eol_crlf && c == '\r')
119852e7
KH
4378 ONE_MORE_BYTE (byte_after_cr);
4379 charset = charset_roman;
4380 }
24a73b0a 4381 else
4ed46869 4382 {
24a73b0a
KH
4383 /* BIG5 -> Big5 */
4384 if (c < 0xA1 || c > 0xFE)
4385 goto invalid_code;
4386 ONE_MORE_BYTE (c1);
4387 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4388 goto invalid_code;
4389 c = c << 8 | c1;
4390 charset = charset_big5;
4ed46869 4391 }
24a73b0a
KH
4392 if (charset->id != charset_ascii
4393 && last_id != charset->id)
df7492f9 4394 {
24a73b0a 4395 if (last_id != charset_ascii)
69a80ea3 4396 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4397 last_id = charset->id;
4398 last_offset = char_offset;
4ed46869 4399 }
24a73b0a 4400 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4401 *charbuf++ = c;
ff0dacd7 4402 char_offset++;
fb88bf2d
KH
4403 continue;
4404
df7492f9 4405 invalid_code:
4ed46869 4406 src = src_base;
df7492f9
KH
4407 consumed_chars = consumed_chars_base;
4408 ONE_MORE_BYTE (c);
065e3595 4409 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4410 char_offset++;
df7492f9 4411 coding->errors++;
fb88bf2d 4412 }
d46c5b12 4413
df7492f9 4414 no_more_source:
ff0dacd7 4415 if (last_id != charset_ascii)
69a80ea3 4416 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4417 coding->consumed_char += consumed_chars_base;
4418 coding->consumed = src_base - coding->source;
4419 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4420}
4421
4422/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4423 This function can encode charsets `ascii', `katakana-jisx0201',
4424 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4425 are sure that all these charsets are registered as official charset
4ed46869
KH
4426 (i.e. do not have extended leading-codes). Characters of other
4427 charsets are produced without any encoding. If SJIS_P is 1, encode
4428 SJIS text, else encode BIG5 text. */
4429
df7492f9
KH
4430static int
4431encode_coding_sjis (coding)
4ed46869 4432 struct coding_system *coding;
4ed46869 4433{
df7492f9
KH
4434 int multibytep = coding->dst_multibyte;
4435 int *charbuf = coding->charbuf;
4436 int *charbuf_end = charbuf + coding->charbuf_used;
4437 unsigned char *dst = coding->destination + coding->produced;
4438 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4439 int safe_room = 4;
4440 int produced_chars = 0;
24a73b0a 4441 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4442 int ascii_compatible;
4443 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4444 struct charset *charset_kanji2;
df7492f9 4445 int c;
a5d301df 4446
24a73b0a 4447 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4448 val = charset_list;
4449 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4450 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4451 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4452 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4453
df7492f9 4454 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4455
df7492f9
KH
4456 while (charbuf < charbuf_end)
4457 {
4458 ASSURE_DESTINATION (safe_room);
4459 c = *charbuf++;
b73bfc1c 4460 /* Now encode the character C. */
df7492f9
KH
4461 if (ASCII_CHAR_P (c) && ascii_compatible)
4462 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4463 else if (CHAR_BYTE8_P (c))
4464 {
4465 c = CHAR_TO_BYTE8 (c);
4466 EMIT_ONE_BYTE (c);
4467 }
df7492f9 4468 else
b73bfc1c 4469 {
df7492f9
KH
4470 unsigned code;
4471 struct charset *charset = char_charset (c, charset_list, &code);
4472
4473 if (!charset)
4ed46869 4474 {
41cbe562 4475 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4476 {
41cbe562
KH
4477 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4478 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4479 }
41cbe562 4480 else
b73bfc1c 4481 {
41cbe562
KH
4482 c = coding->default_char;
4483 charset = char_charset (c, charset_list, &code);
b73bfc1c 4484 }
b73bfc1c 4485 }
df7492f9
KH
4486 if (code == CHARSET_INVALID_CODE (charset))
4487 abort ();
4488 if (charset == charset_kanji)
4489 {
4490 int c1, c2;
4491 JIS_TO_SJIS (code);
4492 c1 = code >> 8, c2 = code & 0xFF;
4493 EMIT_TWO_BYTES (c1, c2);
4494 }
4495 else if (charset == charset_kana)
4496 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4497 else if (charset_kanji2 && charset == charset_kanji2)
4498 {
4499 int c1, c2;
4500
4501 c1 = code >> 8;
4502 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4503 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4504 {
4505 JIS_TO_SJIS2 (code);
4506 c1 = code >> 8, c2 = code & 0xFF;
4507 EMIT_TWO_BYTES (c1, c2);
4508 }
4509 else
4510 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4511 }
df7492f9
KH
4512 else
4513 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4514 }
4515 }
065e3595 4516 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4517 coding->produced_char += produced_chars;
4518 coding->produced = dst - coding->destination;
4519 return 0;
4520}
4521
4522static int
4523encode_coding_big5 (coding)
4524 struct coding_system *coding;
4525{
4526 int multibytep = coding->dst_multibyte;
4527 int *charbuf = coding->charbuf;
4528 int *charbuf_end = charbuf + coding->charbuf_used;
4529 unsigned char *dst = coding->destination + coding->produced;
4530 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4531 int safe_room = 4;
4532 int produced_chars = 0;
24a73b0a 4533 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4534 int ascii_compatible;
4535 struct charset *charset_roman, *charset_big5;
4536 int c;
4537
24a73b0a 4538 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4539 val = charset_list;
4540 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4541 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4542 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4543
4544 while (charbuf < charbuf_end)
4545 {
4546 ASSURE_DESTINATION (safe_room);
4547 c = *charbuf++;
4548 /* Now encode the character C. */
4549 if (ASCII_CHAR_P (c) && ascii_compatible)
4550 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4551 else if (CHAR_BYTE8_P (c))
4552 {
4553 c = CHAR_TO_BYTE8 (c);
4554 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4555 }
4556 else
4557 {
df7492f9
KH
4558 unsigned code;
4559 struct charset *charset = char_charset (c, charset_list, &code);
4560
4561 if (! charset)
b73bfc1c 4562 {
41cbe562 4563 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4564 {
41cbe562
KH
4565 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4566 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4567 }
41cbe562 4568 else
0eecad43 4569 {
41cbe562
KH
4570 c = coding->default_char;
4571 charset = char_charset (c, charset_list, &code);
0eecad43 4572 }
4ed46869 4573 }
df7492f9
KH
4574 if (code == CHARSET_INVALID_CODE (charset))
4575 abort ();
4576 if (charset == charset_big5)
b73bfc1c 4577 {
df7492f9
KH
4578 int c1, c2;
4579
4580 c1 = code >> 8, c2 = code & 0xFF;
4581 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4582 }
df7492f9
KH
4583 else
4584 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4585 }
4ed46869 4586 }
065e3595 4587 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4588 coding->produced_char += produced_chars;
4589 coding->produced = dst - coding->destination;
4590 return 0;
4ed46869
KH
4591}
4592
4593\f
df7492f9 4594/*** 10. CCL handlers ***/
1397dc18
KH
4595
4596/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4597 Check if a text is encoded in a coding system of which
4598 encoder/decoder are written in CCL program. If it is, return
df7492f9 4599 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4600
0a28aafb 4601static int
ff0dacd7 4602detect_coding_ccl (coding, detect_info)
df7492f9 4603 struct coding_system *coding;
ff0dacd7 4604 struct coding_detection_info *detect_info;
1397dc18 4605{
065e3595 4606 const unsigned char *src = coding->source, *src_base;
8f924df7 4607 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4608 int multibytep = coding->src_multibyte;
4609 int consumed_chars = 0;
4610 int found = 0;
0e219d54 4611 unsigned char *valids;
df7492f9
KH
4612 int head_ascii = coding->head_ascii;
4613 Lisp_Object attrs;
4614
ff0dacd7
KH
4615 detect_info->checked |= CATEGORY_MASK_CCL;
4616
df7492f9 4617 coding = &coding_categories[coding_category_ccl];
0e219d54 4618 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4619 attrs = CODING_ID_ATTRS (coding->id);
4620 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4621 src += head_ascii;
1397dc18 4622
b73bfc1c 4623 while (1)
1397dc18 4624 {
df7492f9 4625 int c;
065e3595
KH
4626
4627 src_base = src;
df7492f9 4628 ONE_MORE_BYTE (c);
065e3595 4629 if (c < 0 || ! valids[c])
df7492f9 4630 break;
ff0dacd7
KH
4631 if ((valids[c] > 1))
4632 found = CATEGORY_MASK_CCL;
df7492f9 4633 }
ff0dacd7 4634 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4635 return 0;
4636
4637 no_more_source:
ff0dacd7
KH
4638 detect_info->found |= found;
4639 return 1;
df7492f9
KH
4640}
4641
4642static void
4643decode_coding_ccl (coding)
4644 struct coding_system *coding;
4645{
7c78e542 4646 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4647 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4648 int *charbuf = coding->charbuf + coding->charbuf_used;
4649 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4650 int consumed_chars = 0;
4651 int multibytep = coding->src_multibyte;
4652 struct ccl_program ccl;
4653 int source_charbuf[1024];
4654 int source_byteidx[1024];
24a73b0a 4655 Lisp_Object attrs, charset_list;
df7492f9 4656
24a73b0a 4657 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4658 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4659
4660 while (src < src_end)
4661 {
7c78e542 4662 const unsigned char *p = src;
df7492f9
KH
4663 int *source, *source_end;
4664 int i = 0;
4665
4666 if (multibytep)
4667 while (i < 1024 && p < src_end)
4668 {
4669 source_byteidx[i] = p - src;
4670 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4671 }
4672 else
4673 while (i < 1024 && p < src_end)
4674 source_charbuf[i++] = *p++;
8f924df7 4675
df7492f9
KH
4676 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4677 ccl.last_block = 1;
4678
4679 source = source_charbuf;
4680 source_end = source + i;
4681 while (source < source_end)
4682 {
4683 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4684 source_end - source, charbuf_end - charbuf,
4685 charset_list);
df7492f9
KH
4686 source += ccl.consumed;
4687 charbuf += ccl.produced;
4688 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4689 break;
4690 }
4691 if (source < source_end)
4692 src += source_byteidx[source - source_charbuf];
4693 else
4694 src = p;
4695 consumed_chars += source - source_charbuf;
4696
4697 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4698 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4699 break;
4700 }
4701
4702 switch (ccl.status)
4703 {
4704 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4705 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4706 break;
4707 case CCL_STAT_SUSPEND_BY_DST:
4708 break;
4709 case CCL_STAT_QUIT:
4710 case CCL_STAT_INVALID_CMD:
065e3595 4711 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4712 break;
4713 default:
065e3595 4714 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4715 break;
4716 }
4717 coding->consumed_char += consumed_chars;
4718 coding->consumed = src - coding->source;
4719 coding->charbuf_used = charbuf - coding->charbuf;
4720}
4721
4722static int
4723encode_coding_ccl (coding)
4724 struct coding_system *coding;
4725{
4726 struct ccl_program ccl;
4727 int multibytep = coding->dst_multibyte;
4728 int *charbuf = coding->charbuf;
4729 int *charbuf_end = charbuf + coding->charbuf_used;
4730 unsigned char *dst = coding->destination + coding->produced;
4731 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
4732 int destination_charbuf[1024];
4733 int i, produced_chars = 0;
24a73b0a 4734 Lisp_Object attrs, charset_list;
df7492f9 4735
24a73b0a 4736 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4737 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4738
4739 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4740 ccl.dst_multibyte = coding->dst_multibyte;
4741
8cffd3e7 4742 while (charbuf < charbuf_end)
df7492f9 4743 {
df7492f9 4744 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 4745 charbuf_end - charbuf, 1024, charset_list);
df7492f9 4746 if (multibytep)
8cffd3e7
KH
4747 {
4748 ASSURE_DESTINATION (ccl.produced * 2);
4749 for (i = 0; i < ccl.produced; i++)
4750 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4751 }
df7492f9
KH
4752 else
4753 {
8cffd3e7 4754 ASSURE_DESTINATION (ccl.produced);
df7492f9
KH
4755 for (i = 0; i < ccl.produced; i++)
4756 *dst++ = destination_charbuf[i] & 0xFF;
4757 produced_chars += ccl.produced;
4758 }
8cffd3e7
KH
4759 charbuf += ccl.consumed;
4760 if (ccl.status == CCL_STAT_QUIT
4761 || ccl.status == CCL_STAT_INVALID_CMD)
4762 break;
df7492f9
KH
4763 }
4764
4765 switch (ccl.status)
4766 {
4767 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4768 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4769 break;
4770 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4771 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4772 break;
4773 case CCL_STAT_QUIT:
4774 case CCL_STAT_INVALID_CMD:
065e3595 4775 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4776 break;
4777 default:
065e3595 4778 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4779 break;
1397dc18 4780 }
df7492f9
KH
4781
4782 coding->produced_char += produced_chars;
4783 coding->produced = dst - coding->destination;
4784 return 0;
1397dc18
KH
4785}
4786
df7492f9 4787
1397dc18 4788\f
df7492f9 4789/*** 10, 11. no-conversion handlers ***/
4ed46869 4790
b73bfc1c 4791/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4792
b73bfc1c 4793static void
df7492f9 4794decode_coding_raw_text (coding)
4ed46869 4795 struct coding_system *coding;
4ed46869 4796{
119852e7
KH
4797 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4798
df7492f9 4799 coding->chars_at_source = 1;
119852e7
KH
4800 coding->consumed_char = coding->src_chars;
4801 coding->consumed = coding->src_bytes;
4802 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4803 {
4804 coding->consumed_char--;
4805 coding->consumed--;
4806 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4807 }
4808 else
4809 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4810}
4ed46869 4811
df7492f9
KH
4812static int
4813encode_coding_raw_text (coding)
4814 struct coding_system *coding;
4815{
4816 int multibytep = coding->dst_multibyte;
4817 int *charbuf = coding->charbuf;
4818 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4819 unsigned char *dst = coding->destination + coding->produced;
4820 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4821 int produced_chars = 0;
b73bfc1c
KH
4822 int c;
4823
df7492f9 4824 if (multibytep)
b73bfc1c 4825 {
df7492f9 4826 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4827
df7492f9
KH
4828 if (coding->src_multibyte)
4829 while (charbuf < charbuf_end)
4830 {
4831 ASSURE_DESTINATION (safe_room);
4832 c = *charbuf++;
4833 if (ASCII_CHAR_P (c))
4834 EMIT_ONE_ASCII_BYTE (c);
4835 else if (CHAR_BYTE8_P (c))
4836 {
4837 c = CHAR_TO_BYTE8 (c);
4838 EMIT_ONE_BYTE (c);
4839 }
4840 else
4841 {
4842 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4843
df7492f9
KH
4844 CHAR_STRING_ADVANCE (c, p1);
4845 while (p0 < p1)
9d123124
KH
4846 {
4847 EMIT_ONE_BYTE (*p0);
4848 p0++;
4849 }
df7492f9
KH
4850 }
4851 }
b73bfc1c 4852 else
df7492f9
KH
4853 while (charbuf < charbuf_end)
4854 {
4855 ASSURE_DESTINATION (safe_room);
4856 c = *charbuf++;
4857 EMIT_ONE_BYTE (c);
4858 }
4859 }
4860 else
4ed46869 4861 {
df7492f9 4862 if (coding->src_multibyte)
d46c5b12 4863 {
df7492f9
KH
4864 int safe_room = MAX_MULTIBYTE_LENGTH;
4865
4866 while (charbuf < charbuf_end)
d46c5b12 4867 {
df7492f9
KH
4868 ASSURE_DESTINATION (safe_room);
4869 c = *charbuf++;
4870 if (ASCII_CHAR_P (c))
4871 *dst++ = c;
4872 else if (CHAR_BYTE8_P (c))
4873 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4874 else
df7492f9
KH
4875 CHAR_STRING_ADVANCE (c, dst);
4876 produced_chars++;
d46c5b12
KH
4877 }
4878 }
df7492f9
KH
4879 else
4880 {
4881 ASSURE_DESTINATION (charbuf_end - charbuf);
4882 while (charbuf < charbuf_end && dst < dst_end)
4883 *dst++ = *charbuf++;
4884 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4885 }
4ed46869 4886 }
065e3595 4887 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4888 coding->produced_char += produced_chars;
4889 coding->produced = dst - coding->destination;
4890 return 0;
4ed46869
KH
4891}
4892
ff0dacd7
KH
4893/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4894 Check if a text is encoded in a charset-based coding system. If it
4895 is, return 1, else return 0. */
4896
0a28aafb 4897static int
ff0dacd7 4898detect_coding_charset (coding, detect_info)
df7492f9 4899 struct coding_system *coding;
ff0dacd7 4900 struct coding_detection_info *detect_info;
1397dc18 4901{
065e3595 4902 const unsigned char *src = coding->source, *src_base;
8f924df7 4903 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4904 int multibytep = coding->src_multibyte;
4905 int consumed_chars = 0;
4906 Lisp_Object attrs, valids;
584948ac 4907 int found = 0;
716b3fa0 4908 int head_ascii = coding->head_ascii;
1397dc18 4909
ff0dacd7
KH
4910 detect_info->checked |= CATEGORY_MASK_CHARSET;
4911
df7492f9
KH
4912 coding = &coding_categories[coding_category_charset];
4913 attrs = CODING_ID_ATTRS (coding->id);
4914 valids = AREF (attrs, coding_attr_charset_valids);
4915
4916 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 4917 src += head_ascii;
1397dc18 4918
b73bfc1c 4919 while (1)
1397dc18 4920 {
df7492f9 4921 int c;
716b3fa0
KH
4922 Lisp_Object val;
4923 struct charset *charset;
4924 int dim, idx;
1397dc18 4925
065e3595 4926 src_base = src;
df7492f9 4927 ONE_MORE_BYTE (c);
065e3595
KH
4928 if (c < 0)
4929 continue;
716b3fa0
KH
4930 val = AREF (valids, c);
4931 if (NILP (val))
df7492f9 4932 break;
584948ac 4933 if (c >= 0x80)
ff0dacd7 4934 found = CATEGORY_MASK_CHARSET;
716b3fa0
KH
4935 if (INTEGERP (val))
4936 {
4937 charset = CHARSET_FROM_ID (XFASTINT (val));
4938 dim = CHARSET_DIMENSION (charset);
4939 for (idx = 1; idx < dim; idx++)
4940 {
4941 if (src == src_end)
4942 goto too_short;
4943 ONE_MORE_BYTE (c);
4944 if (c < charset->code_space[(dim - 1 - idx) * 2]
4945 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4946 break;
4947 }
4948 if (idx < dim)
4949 break;
4950 }
4951 else
4952 {
4953 idx = 1;
4954 for (; CONSP (val); val = XCDR (val))
4955 {
4956 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4957 dim = CHARSET_DIMENSION (charset);
4958 while (idx < dim)
4959 {
4960 if (src == src_end)
4961 goto too_short;
4962 ONE_MORE_BYTE (c);
4963 if (c < charset->code_space[(dim - 1 - idx) * 4]
4964 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4965 break;
4966 idx++;
4967 }
4968 if (idx == dim)
4969 {
4970 val = Qnil;
4971 break;
4972 }
4973 }
4974 if (CONSP (val))
4975 break;
4976 }
df7492f9 4977 }
716b3fa0 4978 too_short:
ff0dacd7 4979 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4980 return 0;
4ed46869 4981
df7492f9 4982 no_more_source:
ff0dacd7
KH
4983 detect_info->found |= found;
4984 return 1;
df7492f9 4985}
b73bfc1c 4986
b73bfc1c 4987static void
df7492f9 4988decode_coding_charset (coding)
4ed46869 4989 struct coding_system *coding;
4ed46869 4990{
8f924df7
KH
4991 const unsigned char *src = coding->source + coding->consumed;
4992 const unsigned char *src_end = coding->source + coding->src_bytes;
4993 const unsigned char *src_base;
69a80ea3
KH
4994 int *charbuf = coding->charbuf + coding->charbuf_used;
4995 int *charbuf_end
4996 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4997 int consumed_chars = 0, consumed_chars_base;
4998 int multibytep = coding->src_multibyte;
24a73b0a 4999 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5000 int char_offset = coding->produced_char;
5001 int last_offset = char_offset;
5002 int last_id = charset_ascii;
119852e7
KH
5003 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5004 int byte_after_cr = -1;
df7492f9 5005
24a73b0a 5006 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5007 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5008
df7492f9 5009 while (1)
4ed46869 5010 {
4eb6d3f1 5011 int c;
24a73b0a
KH
5012 Lisp_Object val;
5013 struct charset *charset;
5014 int dim;
5015 int len = 1;
5016 unsigned code;
df7492f9
KH
5017
5018 src_base = src;
5019 consumed_chars_base = consumed_chars;
b73bfc1c 5020
df7492f9
KH
5021 if (charbuf >= charbuf_end)
5022 break;
5023
119852e7
KH
5024 if (byte_after_cr >= 0)
5025 {
5026 c = byte_after_cr;
5027 byte_after_cr = -1;
5028 }
5029 else
5030 {
5031 ONE_MORE_BYTE (c);
5032 if (eol_crlf && c == '\r')
5033 ONE_MORE_BYTE (byte_after_cr);
5034 }
065e3595
KH
5035 if (c < 0)
5036 goto invalid_code;
24a73b0a
KH
5037 code = c;
5038
5039 val = AREF (valids, c);
5040 if (NILP (val))
5041 goto invalid_code;
5042 if (INTEGERP (val))
d46c5b12 5043 {
24a73b0a
KH
5044 charset = CHARSET_FROM_ID (XFASTINT (val));
5045 dim = CHARSET_DIMENSION (charset);
5046 while (len < dim)
b73bfc1c 5047 {
24a73b0a
KH
5048 ONE_MORE_BYTE (c);
5049 code = (code << 8) | c;
5050 len++;
b73bfc1c 5051 }
24a73b0a
KH
5052 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5053 charset, code, c);
d46c5b12 5054 }
df7492f9 5055 else
d46c5b12 5056 {
24a73b0a
KH
5057 /* VAL is a list of charset IDs. It is assured that the
5058 list is sorted by charset dimensions (smaller one
5059 comes first). */
5060 while (CONSP (val))
4eb6d3f1 5061 {
24a73b0a 5062 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5063 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5064 while (len < dim)
4eb6d3f1 5065 {
acb2a965
KH
5066 ONE_MORE_BYTE (c);
5067 code = (code << 8) | c;
f9d71dcd 5068 len++;
4eb6d3f1 5069 }
24a73b0a
KH
5070 CODING_DECODE_CHAR (coding, src, src_base,
5071 src_end, charset, code, c);
5072 if (c >= 0)
5073 break;
5074 val = XCDR (val);
ff0dacd7 5075 }
d46c5b12 5076 }
24a73b0a
KH
5077 if (c < 0)
5078 goto invalid_code;
5079 if (charset->id != charset_ascii
5080 && last_id != charset->id)
5081 {
5082 if (last_id != charset_ascii)
69a80ea3 5083 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5084 last_id = charset->id;
5085 last_offset = char_offset;
5086 }
5087
df7492f9 5088 *charbuf++ = c;
ff0dacd7 5089 char_offset++;
df7492f9
KH
5090 continue;
5091
5092 invalid_code:
5093 src = src_base;
5094 consumed_chars = consumed_chars_base;
5095 ONE_MORE_BYTE (c);
065e3595 5096 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5097 char_offset++;
df7492f9 5098 coding->errors++;
4ed46869
KH
5099 }
5100
df7492f9 5101 no_more_source:
ff0dacd7 5102 if (last_id != charset_ascii)
69a80ea3 5103 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5104 coding->consumed_char += consumed_chars_base;
5105 coding->consumed = src_base - coding->source;
5106 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5107}
5108
df7492f9
KH
5109static int
5110encode_coding_charset (coding)
4ed46869 5111 struct coding_system *coding;
4ed46869 5112{
df7492f9
KH
5113 int multibytep = coding->dst_multibyte;
5114 int *charbuf = coding->charbuf;
5115 int *charbuf_end = charbuf + coding->charbuf_used;
5116 unsigned char *dst = coding->destination + coding->produced;
5117 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5118 int safe_room = MAX_MULTIBYTE_LENGTH;
5119 int produced_chars = 0;
24a73b0a 5120 Lisp_Object attrs, charset_list;
df7492f9 5121 int ascii_compatible;
b73bfc1c 5122 int c;
b73bfc1c 5123
24a73b0a 5124 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5125 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5126
df7492f9 5127 while (charbuf < charbuf_end)
4ed46869 5128 {
4eb6d3f1 5129 struct charset *charset;
df7492f9 5130 unsigned code;
8f924df7 5131
df7492f9
KH
5132 ASSURE_DESTINATION (safe_room);
5133 c = *charbuf++;
5134 if (ascii_compatible && ASCII_CHAR_P (c))
5135 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5136 else if (CHAR_BYTE8_P (c))
4ed46869 5137 {
16eafb5d
KH
5138 c = CHAR_TO_BYTE8 (c);
5139 EMIT_ONE_BYTE (c);
d46c5b12 5140 }
d46c5b12 5141 else
b73bfc1c 5142 {
4eb6d3f1
KH
5143 charset = char_charset (c, charset_list, &code);
5144 if (charset)
5145 {
5146 if (CHARSET_DIMENSION (charset) == 1)
5147 EMIT_ONE_BYTE (code);
5148 else if (CHARSET_DIMENSION (charset) == 2)
5149 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5150 else if (CHARSET_DIMENSION (charset) == 3)
5151 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5152 else
5153 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5154 (code >> 8) & 0xFF, code & 0xFF);
5155 }
5156 else
41cbe562
KH
5157 {
5158 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5159 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5160 else
5161 c = coding->default_char;
5162 EMIT_ONE_BYTE (c);
5163 }
4ed46869 5164 }
4ed46869
KH
5165 }
5166
065e3595 5167 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5168 coding->produced_char += produced_chars;
5169 coding->produced = dst - coding->destination;
5170 return 0;
4ed46869
KH
5171}
5172
5173\f
1397dc18 5174/*** 7. C library functions ***/
4ed46869 5175
df7492f9
KH
5176/* Setup coding context CODING from information about CODING_SYSTEM.
5177 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5178 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5179
ec6d2bb8 5180void
e0e989f6
KH
5181setup_coding_system (coding_system, coding)
5182 Lisp_Object coding_system;
4ed46869
KH
5183 struct coding_system *coding;
5184{
df7492f9
KH
5185 Lisp_Object attrs;
5186 Lisp_Object eol_type;
5187 Lisp_Object coding_type;
4608c386 5188 Lisp_Object val;
4ed46869 5189
df7492f9 5190 if (NILP (coding_system))
ae6f73fa 5191 coding_system = Qundecided;
c07c8e12 5192
df7492f9 5193 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5194
df7492f9
KH
5195 attrs = CODING_ID_ATTRS (coding->id);
5196 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5197
df7492f9
KH
5198 coding->mode = 0;
5199 coding->head_ascii = -1;
4a015c45
KH
5200 if (VECTORP (eol_type))
5201 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5202 | CODING_REQUIRE_DETECTION_MASK);
5203 else if (! EQ (eol_type, Qunix))
5204 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5205 | CODING_REQUIRE_ENCODING_MASK);
5206 else
5207 coding->common_flags = 0;
5e5c78be
KH
5208 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5209 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5210 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5211 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5212 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5213 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5214
df7492f9 5215 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5216 coding->max_charset_id = SCHARS (val) - 1;
5217 coding->safe_charsets = (char *) SDATA (val);
df7492f9 5218 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 5219
df7492f9
KH
5220 coding_type = CODING_ATTR_TYPE (attrs);
5221 if (EQ (coding_type, Qundecided))
d46c5b12 5222 {
df7492f9
KH
5223 coding->detector = NULL;
5224 coding->decoder = decode_coding_raw_text;
5225 coding->encoder = encode_coding_raw_text;
5226 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5227 }
df7492f9 5228 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5229 {
df7492f9
KH
5230 int i;
5231 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5232
5233 /* Invoke graphic register 0 to plane 0. */
5234 CODING_ISO_INVOCATION (coding, 0) = 0;
5235 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5236 CODING_ISO_INVOCATION (coding, 1)
5237 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5238 /* Setup the initial status of designation. */
5239 for (i = 0; i < 4; i++)
5240 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5241 /* Not single shifting initially. */
5242 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5243 /* Beginning of buffer should also be regarded as bol. */
5244 CODING_ISO_BOL (coding) = 1;
5245 coding->detector = detect_coding_iso_2022;
5246 coding->decoder = decode_coding_iso_2022;
5247 coding->encoder = encode_coding_iso_2022;
5248 if (flags & CODING_ISO_FLAG_SAFE)
5249 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5250 coding->common_flags
df7492f9
KH
5251 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5252 | CODING_REQUIRE_FLUSHING_MASK);
5253 if (flags & CODING_ISO_FLAG_COMPOSITION)
5254 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5255 if (flags & CODING_ISO_FLAG_DESIGNATION)
5256 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5257 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5258 {
5259 setup_iso_safe_charsets (attrs);
5260 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5261 coding->max_charset_id = SCHARS (val) - 1;
5262 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
5263 }
5264 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5265 }
df7492f9 5266 else if (EQ (coding_type, Qcharset))
d46c5b12 5267 {
df7492f9
KH
5268 coding->detector = detect_coding_charset;
5269 coding->decoder = decode_coding_charset;
5270 coding->encoder = encode_coding_charset;
d46c5b12 5271 coding->common_flags
df7492f9 5272 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5273 }
df7492f9 5274 else if (EQ (coding_type, Qutf_8))
d46c5b12 5275 {
df7492f9
KH
5276 coding->detector = detect_coding_utf_8;
5277 coding->decoder = decode_coding_utf_8;
5278 coding->encoder = encode_coding_utf_8;
5279 coding->common_flags
5280 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5281 }
5282 else if (EQ (coding_type, Qutf_16))
5283 {
5284 val = AREF (attrs, coding_attr_utf_16_bom);
5285 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5286 : EQ (val, Qt) ? utf_16_with_bom
5287 : utf_16_without_bom);
5288 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5289 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5290 : utf_16_little_endian);
e19c3639 5291 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5292 coding->detector = detect_coding_utf_16;
5293 coding->decoder = decode_coding_utf_16;
5294 coding->encoder = encode_coding_utf_16;
5295 coding->common_flags
5296 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5297 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5298 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5299 }
df7492f9 5300 else if (EQ (coding_type, Qccl))
4ed46869 5301 {
df7492f9
KH
5302 coding->detector = detect_coding_ccl;
5303 coding->decoder = decode_coding_ccl;
5304 coding->encoder = encode_coding_ccl;
c952af22 5305 coding->common_flags
df7492f9
KH
5306 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5307 | CODING_REQUIRE_FLUSHING_MASK);
5308 }
5309 else if (EQ (coding_type, Qemacs_mule))
5310 {
5311 coding->detector = detect_coding_emacs_mule;
5312 coding->decoder = decode_coding_emacs_mule;
5313 coding->encoder = encode_coding_emacs_mule;
c952af22 5314 coding->common_flags
df7492f9
KH
5315 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5316 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5317 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5318 {
5319 Lisp_Object tail, safe_charsets;
5320 int max_charset_id = 0;
5321
5322 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5323 tail = XCDR (tail))
5324 if (max_charset_id < XFASTINT (XCAR (tail)))
5325 max_charset_id = XFASTINT (XCAR (tail));
5326 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5327 make_number (255));
5328 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5329 tail = XCDR (tail))
8f924df7 5330 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5331 coding->max_charset_id = max_charset_id;
8f924df7 5332 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5333 }
5334 }
5335 else if (EQ (coding_type, Qshift_jis))
5336 {
5337 coding->detector = detect_coding_sjis;
5338 coding->decoder = decode_coding_sjis;
5339 coding->encoder = encode_coding_sjis;
c952af22 5340 coding->common_flags
df7492f9
KH
5341 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5342 }
5343 else if (EQ (coding_type, Qbig5))
5344 {
5345 coding->detector = detect_coding_big5;
5346 coding->decoder = decode_coding_big5;
5347 coding->encoder = encode_coding_big5;
c952af22 5348 coding->common_flags
df7492f9
KH
5349 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5350 }
5351 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5352 {
df7492f9
KH
5353 coding->detector = NULL;
5354 coding->decoder = decode_coding_raw_text;
5355 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5356 if (! EQ (eol_type, Qunix))
5357 {
5358 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5359 if (! VECTORP (eol_type))
5360 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5361 }
5362
4ed46869 5363 }
4ed46869 5364
df7492f9 5365 return;
4ed46869
KH
5366}
5367
0ff61e78
KH
5368/* Return a list of charsets supported by CODING. */
5369
5370Lisp_Object
5371coding_charset_list (coding)
5372 struct coding_system *coding;
5373{
35befdaa 5374 Lisp_Object attrs, charset_list;
0ff61e78
KH
5375
5376 CODING_GET_INFO (coding, attrs, charset_list);
5377 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5378 {
5379 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5380
5381 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5382 charset_list = Viso_2022_charset_list;
5383 }
5384 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5385 {
5386 charset_list = Vemacs_mule_charset_list;
5387 }
5388 return charset_list;
5389}
5390
5391
df7492f9
KH
5392/* Return raw-text or one of its subsidiaries that has the same
5393 eol_type as CODING-SYSTEM. */
ec6d2bb8 5394
df7492f9
KH
5395Lisp_Object
5396raw_text_coding_system (coding_system)
5397 Lisp_Object coding_system;
ec6d2bb8 5398{
0be8721c 5399 Lisp_Object spec, attrs;
df7492f9 5400 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5401
d3e4cb56
KH
5402 if (NILP (coding_system))
5403 return Qraw_text;
df7492f9
KH
5404 spec = CODING_SYSTEM_SPEC (coding_system);
5405 attrs = AREF (spec, 0);
ec6d2bb8 5406
df7492f9
KH
5407 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5408 return coding_system;
ec6d2bb8 5409
df7492f9
KH
5410 eol_type = AREF (spec, 2);
5411 if (VECTORP (eol_type))
5412 return Qraw_text;
5413 spec = CODING_SYSTEM_SPEC (Qraw_text);
5414 raw_text_eol_type = AREF (spec, 2);
5415 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5416 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5417 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5418}
5419
54f78171 5420
df7492f9
KH
5421/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5422 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
5423 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5424 inherit end-of-line format from the system's setting
5425 (system_eol_type). */
df7492f9
KH
5426
5427Lisp_Object
5428coding_inherit_eol_type (coding_system, parent)
b74e4686 5429 Lisp_Object coding_system, parent;
54f78171 5430{
3e139625 5431 Lisp_Object spec, eol_type;
54f78171 5432
d3e4cb56
KH
5433 if (NILP (coding_system))
5434 coding_system = Qraw_text;
df7492f9 5435 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5436 eol_type = AREF (spec, 2);
fcbcfb64 5437 if (VECTORP (eol_type))
df7492f9 5438 {
df7492f9
KH
5439 Lisp_Object parent_eol_type;
5440
fcbcfb64
KH
5441 if (! NILP (parent))
5442 {
5443 Lisp_Object parent_spec;
5444
4a015c45 5445 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
5446 parent_eol_type = AREF (parent_spec, 2);
5447 }
5448 else
5449 parent_eol_type = system_eol_type;
df7492f9
KH
5450 if (EQ (parent_eol_type, Qunix))
5451 coding_system = AREF (eol_type, 0);
5452 else if (EQ (parent_eol_type, Qdos))
5453 coding_system = AREF (eol_type, 1);
5454 else if (EQ (parent_eol_type, Qmac))
5455 coding_system = AREF (eol_type, 2);
54f78171 5456 }
df7492f9 5457 return coding_system;
54f78171
KH
5458}
5459
4ed46869
KH
5460/* Emacs has a mechanism to automatically detect a coding system if it
5461 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5462 it's impossible to distinguish some coding systems accurately
5463 because they use the same range of codes. So, at first, coding
5464 systems are categorized into 7, those are:
5465
0ef69138 5466 o coding-category-emacs-mule
4ed46869
KH
5467
5468 The category for a coding system which has the same code range
5469 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5470 symbol) `emacs-mule' by default.
4ed46869
KH
5471
5472 o coding-category-sjis
5473
5474 The category for a coding system which has the same code range
5475 as SJIS. Assigned the coding-system (Lisp
7717c392 5476 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5477
5478 o coding-category-iso-7
5479
5480 The category for a coding system which has the same code range
7717c392 5481 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5482 shift and single shift functions. This can encode/decode all
5483 charsets. Assigned the coding-system (Lisp symbol)
5484 `iso-2022-7bit' by default.
5485
5486 o coding-category-iso-7-tight
5487
5488 Same as coding-category-iso-7 except that this can
5489 encode/decode only the specified charsets.
4ed46869
KH
5490
5491 o coding-category-iso-8-1
5492
5493 The category for a coding system which has the same code range
5494 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5495 for DIMENSION1 charset. This doesn't use any locking shift
5496 and single shift functions. Assigned the coding-system (Lisp
5497 symbol) `iso-latin-1' by default.
4ed46869
KH
5498
5499 o coding-category-iso-8-2
5500
5501 The category for a coding system which has the same code range
5502 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5503 for DIMENSION2 charset. This doesn't use any locking shift
5504 and single shift functions. Assigned the coding-system (Lisp
5505 symbol) `japanese-iso-8bit' by default.
4ed46869 5506
7717c392 5507 o coding-category-iso-7-else
4ed46869
KH
5508
5509 The category for a coding system which has the same code range
df7492f9 5510 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5511 single shift functions. Assigned the coding-system (Lisp
5512 symbol) `iso-2022-7bit-lock' by default.
5513
5514 o coding-category-iso-8-else
5515
5516 The category for a coding system which has the same code range
df7492f9 5517 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5518 single shift functions. Assigned the coding-system (Lisp
5519 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5520
5521 o coding-category-big5
5522
5523 The category for a coding system which has the same code range
5524 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5525 `cn-big5' by default.
4ed46869 5526
fa42c37f
KH
5527 o coding-category-utf-8
5528
5529 The category for a coding system which has the same code range
6e76ae91 5530 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5531 symbol) `utf-8' by default.
5532
5533 o coding-category-utf-16-be
5534
5535 The category for a coding system in which a text has an
5536 Unicode signature (cf. Unicode Standard) in the order of BIG
5537 endian at the head. Assigned the coding-system (Lisp symbol)
5538 `utf-16-be' by default.
5539
5540 o coding-category-utf-16-le
5541
5542 The category for a coding system in which a text has an
5543 Unicode signature (cf. Unicode Standard) in the order of
5544 LITTLE endian at the head. Assigned the coding-system (Lisp
5545 symbol) `utf-16-le' by default.
5546
1397dc18
KH
5547 o coding-category-ccl
5548
5549 The category for a coding system of which encoder/decoder is
5550 written in CCL programs. The default value is nil, i.e., no
5551 coding system is assigned.
5552
4ed46869
KH
5553 o coding-category-binary
5554
5555 The category for a coding system not categorized in any of the
5556 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5557 `no-conversion' by default.
4ed46869
KH
5558
5559 Each of them is a Lisp symbol and the value is an actual
df7492f9 5560 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5561 What Emacs does actually is to detect a category of coding system.
5562 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5563 decide only one possible category, it selects a category of the
4ed46869
KH
5564 highest priority. Priorities of categories are also specified by a
5565 user in a Lisp variable `coding-category-list'.
5566
5567*/
5568
df7492f9
KH
5569#define EOL_SEEN_NONE 0
5570#define EOL_SEEN_LF 1
5571#define EOL_SEEN_CR 2
5572#define EOL_SEEN_CRLF 4
66cfb530 5573
ff0dacd7
KH
5574/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5575 SOURCE is encoded. If CATEGORY is one of
5576 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5577 two-byte, else they are encoded by one-byte.
5578
5579 Return one of EOL_SEEN_XXX. */
4ed46869 5580
bc4bc72a 5581#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5582
5583static int
89528eb3 5584detect_eol (source, src_bytes, category)
f6cbaf43 5585 const unsigned char *source;
df7492f9 5586 EMACS_INT src_bytes;
89528eb3 5587 enum coding_category category;
4ed46869 5588{
f6cbaf43 5589 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5590 unsigned char c;
df7492f9
KH
5591 int total = 0;
5592 int eol_seen = EOL_SEEN_NONE;
4ed46869 5593
89528eb3 5594 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5595 {
df7492f9 5596 int msb, lsb;
fa42c37f 5597
89528eb3
KH
5598 msb = category == (coding_category_utf_16_le
5599 | coding_category_utf_16_le_nosig);
df7492f9 5600 lsb = 1 - msb;
fa42c37f 5601
df7492f9 5602 while (src + 1 < src_end)
fa42c37f 5603 {
df7492f9
KH
5604 c = src[lsb];
5605 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5606 {
df7492f9
KH
5607 int this_eol;
5608
5609 if (c == '\n')
5610 this_eol = EOL_SEEN_LF;
5611 else if (src + 3 >= src_end
5612 || src[msb + 2] != 0
5613 || src[lsb + 2] != '\n')
5614 this_eol = EOL_SEEN_CR;
fa42c37f 5615 else
8f924df7 5616 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5617
5618 if (eol_seen == EOL_SEEN_NONE)
5619 /* This is the first end-of-line. */
5620 eol_seen = this_eol;
5621 else if (eol_seen != this_eol)
fa42c37f 5622 {
df7492f9
KH
5623 /* The found type is different from what found before. */
5624 eol_seen = EOL_SEEN_LF;
5625 break;
fa42c37f 5626 }
df7492f9
KH
5627 if (++total == MAX_EOL_CHECK_COUNT)
5628 break;
fa42c37f 5629 }
df7492f9 5630 src += 2;
fa42c37f 5631 }
bcf26d6a 5632 }
d46c5b12 5633 else
c4825358 5634 {
df7492f9 5635 while (src < src_end)
27901516 5636 {
df7492f9
KH
5637 c = *src++;
5638 if (c == '\n' || c == '\r')
5639 {
5640 int this_eol;
d46c5b12 5641
df7492f9
KH
5642 if (c == '\n')
5643 this_eol = EOL_SEEN_LF;
5644 else if (src >= src_end || *src != '\n')
5645 this_eol = EOL_SEEN_CR;
5646 else
5647 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5648
df7492f9
KH
5649 if (eol_seen == EOL_SEEN_NONE)
5650 /* This is the first end-of-line. */
5651 eol_seen = this_eol;
5652 else if (eol_seen != this_eol)
5653 {
5654 /* The found type is different from what found before. */
5655 eol_seen = EOL_SEEN_LF;
5656 break;
5657 }
5658 if (++total == MAX_EOL_CHECK_COUNT)
5659 break;
5660 }
5661 }
73be902c 5662 }
df7492f9 5663 return eol_seen;
73be902c
KH
5664}
5665
df7492f9 5666
24a73b0a 5667static Lisp_Object
df7492f9
KH
5668adjust_coding_eol_type (coding, eol_seen)
5669 struct coding_system *coding;
5670 int eol_seen;
73be902c 5671{
0be8721c 5672 Lisp_Object eol_type;
8f924df7 5673
df7492f9
KH
5674 eol_type = CODING_ID_EOL_TYPE (coding->id);
5675 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5676 {
5677 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5678 eol_type = Qunix;
5679 }
6f197c07 5680 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5681 {
5682 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5683 eol_type = Qdos;
5684 }
6f197c07 5685 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5686 {
5687 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5688 eol_type = Qmac;
5689 }
5690 return eol_type;
d46c5b12 5691}
4ed46869 5692
df7492f9
KH
5693/* Detect how a text specified in CODING is encoded. If a coding
5694 system is detected, update fields of CODING by the detected coding
5695 system. */
0a28aafb 5696
df7492f9
KH
5697void
5698detect_coding (coding)
d46c5b12 5699 struct coding_system *coding;
d46c5b12 5700{
8f924df7 5701 const unsigned char *src, *src_end;
d46c5b12 5702
df7492f9
KH
5703 coding->consumed = coding->consumed_char = 0;
5704 coding->produced = coding->produced_char = 0;
5705 coding_set_source (coding);
1c3478b0 5706
df7492f9 5707 src_end = coding->source + coding->src_bytes;
1c3478b0 5708
df7492f9
KH
5709 /* If we have not yet decided the text encoding type, detect it
5710 now. */
5711 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5712 {
df7492f9 5713 int c, i;
6cb21a4f 5714 struct coding_detection_info detect_info;
2f3cbb32 5715 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 5716
6cb21a4f 5717 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32
KH
5718 coding->head_ascii = -1;
5719 for (src = coding->source; src < src_end; src++)
d46c5b12 5720 {
df7492f9 5721 c = *src;
6cb21a4f 5722 if (c & 0x80)
6cb21a4f 5723 {
2f3cbb32
KH
5724 eight_bit_found = 1;
5725 if (coding->head_ascii < 0)
5726 coding->head_ascii = src - coding->source;
5727 if (null_byte_found)
5728 break;
5729 }
5730 else if (c < 0x20)
5731 {
5732 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5733 && ! inhibit_iso_escape_detection
5734 && ! detect_info.checked)
6cb21a4f 5735 {
2f3cbb32
KH
5736 if (coding->head_ascii < 0)
5737 coding->head_ascii = src - coding->source;
5738 if (detect_coding_iso_2022 (coding, &detect_info))
5739 {
5740 /* We have scanned the whole data. */
5741 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5742 /* We didn't find an 8-bit code. We may have
5743 found a null-byte, but it's very rare that
5744 a binary file confirm to ISO-2022. */
5745 src = src_end;
5746 break;
5747 }
5748 }
5749 else if (! c)
5750 {
5751 null_byte_found = 1;
5752 if (eight_bit_found)
5753 break;
6cb21a4f
KH
5754 }
5755 }
d46c5b12 5756 }
2f3cbb32
KH
5757 if (coding->head_ascii < 0)
5758 coding->head_ascii = src - coding->source;
df7492f9 5759
2f3cbb32
KH
5760 if (null_byte_found || eight_bit_found
5761 || coding->head_ascii < coding->src_bytes
6cb21a4f 5762 || detect_info.found)
d46c5b12 5763 {
ff0dacd7
KH
5764 enum coding_category category;
5765 struct coding_system *this;
df7492f9 5766
6cb21a4f
KH
5767 if (coding->head_ascii == coding->src_bytes)
5768 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5769 for (i = 0; i < coding_category_raw_text; i++)
5770 {
5771 category = coding_priorities[i];
5772 this = coding_categories + category;
5773 if (detect_info.found & (1 << category))
24a73b0a 5774 break;
6cb21a4f
KH
5775 }
5776 else
2f3cbb32
KH
5777 {
5778 if (null_byte_found)
ff0dacd7 5779 {
2f3cbb32
KH
5780 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5781 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 5782 }
2f3cbb32
KH
5783 for (i = 0; i < coding_category_raw_text; i++)
5784 {
5785 category = coding_priorities[i];
5786 this = coding_categories + category;
5787 if (this->id < 0)
5788 {
5789 /* No coding system of this category is defined. */
5790 detect_info.rejected |= (1 << category);
5791 }
5792 else if (category >= coding_category_raw_text)
5793 continue;
5794 else if (detect_info.checked & (1 << category))
5795 {
5796 if (detect_info.found & (1 << category))
5797 break;
5798 }
5799 else if ((*(this->detector)) (coding, &detect_info)
5800 && detect_info.found & (1 << category))
5801 {
5802 if (category == coding_category_utf_16_auto)
5803 {
5804 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5805 category = coding_category_utf_16_le;
5806 else
5807 category = coding_category_utf_16_be;
5808 }
5809 break;
5810 }
5811 }
5812
5813 if (i < coding_category_raw_text)
5814 setup_coding_system (CODING_ID_NAME (this->id), coding);
5815 else if (null_byte_found)
5816 setup_coding_system (Qno_conversion, coding);
5817 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5818 == CATEGORY_MASK_ANY)
5819 setup_coding_system (Qraw_text, coding);
5820 else if (detect_info.rejected)
5821 for (i = 0; i < coding_category_raw_text; i++)
5822 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5823 {
5824 this = coding_categories + coding_priorities[i];
5825 setup_coding_system (CODING_ID_NAME (this->id), coding);
5826 break;
5827 }
5828 }
d46c5b12 5829 }
b73bfc1c 5830 }
24a73b0a
KH
5831 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5832 == coding_category_utf_16_auto)
b49a1807
KH
5833 {
5834 Lisp_Object coding_systems;
5835 struct coding_detection_info detect_info;
5836
5837 coding_systems
5838 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5839 detect_info.found = detect_info.rejected = 0;
5840 if (CONSP (coding_systems)
24a73b0a 5841 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5842 {
5843 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5844 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5845 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5846 setup_coding_system (XCDR (coding_systems), coding);
5847 }
5848 }
4ed46869 5849}
4ed46869 5850
d46c5b12 5851
aaaf0b1e 5852static void
df7492f9 5853decode_eol (coding)
aaaf0b1e 5854 struct coding_system *coding;
aaaf0b1e 5855{
24a73b0a
KH
5856 Lisp_Object eol_type;
5857 unsigned char *p, *pbeg, *pend;
5858
5859 eol_type = CODING_ID_EOL_TYPE (coding->id);
5860 if (EQ (eol_type, Qunix))
5861 return;
5862
5863 if (NILP (coding->dst_object))
5864 pbeg = coding->destination;
5865 else
5866 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5867 pend = pbeg + coding->produced;
5868
5869 if (VECTORP (eol_type))
aaaf0b1e 5870 {
df7492f9 5871 int eol_seen = EOL_SEEN_NONE;
4ed46869 5872
24a73b0a 5873 for (p = pbeg; p < pend; p++)
aaaf0b1e 5874 {
df7492f9
KH
5875 if (*p == '\n')
5876 eol_seen |= EOL_SEEN_LF;
5877 else if (*p == '\r')
aaaf0b1e 5878 {
df7492f9 5879 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5880 {
df7492f9
KH
5881 eol_seen |= EOL_SEEN_CRLF;
5882 p++;
aaaf0b1e 5883 }
aaaf0b1e 5884 else
df7492f9 5885 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5886 }
aaaf0b1e 5887 }
24a73b0a
KH
5888 if (eol_seen != EOL_SEEN_NONE
5889 && eol_seen != EOL_SEEN_LF
5890 && eol_seen != EOL_SEEN_CRLF
5891 && eol_seen != EOL_SEEN_CR)
5892 eol_seen = EOL_SEEN_LF;
df7492f9 5893 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5894 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5895 }
d46c5b12 5896
24a73b0a 5897 if (EQ (eol_type, Qmac))
27901516 5898 {
24a73b0a 5899 for (p = pbeg; p < pend; p++)
df7492f9
KH
5900 if (*p == '\r')
5901 *p = '\n';
4ed46869 5902 }
24a73b0a 5903 else if (EQ (eol_type, Qdos))
df7492f9 5904 {
24a73b0a 5905 int n = 0;
b73bfc1c 5906
24a73b0a
KH
5907 if (NILP (coding->dst_object))
5908 {
4347441b
KH
5909 /* Start deleting '\r' from the tail to minimize the memory
5910 movement. */
24a73b0a
KH
5911 for (p = pend - 2; p >= pbeg; p--)
5912 if (*p == '\r')
5913 {
5914 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5915 n++;
5916 }
5917 }
5918 else
5919 {
4347441b
KH
5920 int pos_byte = coding->dst_pos_byte;
5921 int pos = coding->dst_pos;
5922 int pos_end = pos + coding->produced_char - 1;
5923
5924 while (pos < pos_end)
5925 {
5926 p = BYTE_POS_ADDR (pos_byte);
5927 if (*p == '\r' && p[1] == '\n')
5928 {
5929 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5930 n++;
5931 pos_end--;
5932 }
5933 pos++;
69b8522d
KH
5934 if (coding->dst_multibyte)
5935 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5936 else
5937 pos_byte++;
4347441b 5938 }
24a73b0a
KH
5939 }
5940 coding->produced -= n;
5941 coding->produced_char -= n;
aaaf0b1e 5942 }
4ed46869
KH
5943}
5944
7d64c6ad 5945
a6f87d34
KH
5946/* Return a translation table (or list of them) from coding system
5947 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5948 decoding (ENCODEP is zero). */
7d64c6ad 5949
e6a54062 5950static Lisp_Object
09ee6fdd
KH
5951get_translation_table (attrs, encodep, max_lookup)
5952 Lisp_Object attrs;
5953 int encodep, *max_lookup;
7d64c6ad
KH
5954{
5955 Lisp_Object standard, translation_table;
09ee6fdd 5956 Lisp_Object val;
7d64c6ad
KH
5957
5958 if (encodep)
5959 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5960 standard = Vstandard_translation_table_for_encode;
5961 else
5962 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5963 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5964 if (NILP (translation_table))
09ee6fdd
KH
5965 translation_table = standard;
5966 else
a6f87d34 5967 {
09ee6fdd
KH
5968 if (SYMBOLP (translation_table))
5969 translation_table = Fget (translation_table, Qtranslation_table);
5970 else if (CONSP (translation_table))
5971 {
5972 translation_table = Fcopy_sequence (translation_table);
5973 for (val = translation_table; CONSP (val); val = XCDR (val))
5974 if (SYMBOLP (XCAR (val)))
5975 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5976 }
5977 if (CHAR_TABLE_P (standard))
5978 {
5979 if (CONSP (translation_table))
5980 translation_table = nconc2 (translation_table,
5981 Fcons (standard, Qnil));
5982 else
5983 translation_table = Fcons (translation_table,
5984 Fcons (standard, Qnil));
5985 }
a6f87d34 5986 }
2170c8f0
KH
5987
5988 if (max_lookup)
09ee6fdd 5989 {
2170c8f0
KH
5990 *max_lookup = 1;
5991 if (CHAR_TABLE_P (translation_table)
5992 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5993 {
5994 val = XCHAR_TABLE (translation_table)->extras[1];
5995 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5996 *max_lookup = XFASTINT (val);
5997 }
5998 else if (CONSP (translation_table))
5999 {
6000 Lisp_Object tail, val;
09ee6fdd 6001
2170c8f0
KH
6002 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6003 if (CHAR_TABLE_P (XCAR (tail))
6004 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6005 {
6006 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6007 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6008 *max_lookup = XFASTINT (val);
6009 }
6010 }
a6f87d34 6011 }
7d64c6ad
KH
6012 return translation_table;
6013}
6014
09ee6fdd
KH
6015#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6016 do { \
6017 trans = Qnil; \
6018 if (CHAR_TABLE_P (table)) \
6019 { \
6020 trans = CHAR_TABLE_REF (table, c); \
6021 if (CHARACTERP (trans)) \
6022 c = XFASTINT (trans), trans = Qnil; \
6023 } \
6024 else if (CONSP (table)) \
6025 { \
6026 Lisp_Object tail; \
6027 \
6028 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6029 if (CHAR_TABLE_P (XCAR (tail))) \
6030 { \
6031 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6032 if (CHARACTERP (trans)) \
6033 c = XFASTINT (trans), trans = Qnil; \
6034 else if (! NILP (trans)) \
6035 break; \
6036 } \
6037 } \
e6a54062
KH
6038 } while (0)
6039
7d64c6ad 6040
69a80ea3
KH
6041static Lisp_Object
6042get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6043 Lisp_Object val;
6044 int *buf, *buf_end;
6045 int last_block;
6046 int *from_nchars, *to_nchars;
6047{
433f7f87
KH
6048 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
6049 [TO-CHAR ...]. */
69a80ea3
KH
6050 if (CONSP (val))
6051 {
433f7f87 6052 Lisp_Object from, tail;
69a80ea3
KH
6053 int i, len;
6054
433f7f87 6055 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 6056 {
433f7f87
KH
6057 val = XCAR (tail);
6058 from = XCAR (val);
6059 len = ASIZE (from);
6060 for (i = 0; i < len; i++)
6061 {
6062 if (buf + i == buf_end)
6063 {
6064 if (! last_block)
6065 return Qt;
6066 break;
6067 }
6068 if (XINT (AREF (from, i)) != buf[i])
6069 break;
6070 }
6071 if (i == len)
6072 {
6073 val = XCDR (val);
6074 *from_nchars = len;
6075 break;
6076 }
69a80ea3 6077 }
433f7f87
KH
6078 if (! CONSP (tail))
6079 return Qnil;
69a80ea3
KH
6080 }
6081 if (VECTORP (val))
6082 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6083 else
6084 *buf = XINT (val);
6085 return val;
6086}
6087
6088
d46c5b12 6089static int
69a80ea3 6090produce_chars (coding, translation_table, last_block)
df7492f9 6091 struct coding_system *coding;
69a80ea3
KH
6092 Lisp_Object translation_table;
6093 int last_block;
4ed46869 6094{
df7492f9
KH
6095 unsigned char *dst = coding->destination + coding->produced;
6096 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6097 EMACS_INT produced;
6098 EMACS_INT produced_chars = 0;
69a80ea3 6099 int carryover = 0;
4ed46869 6100
df7492f9 6101 if (! coding->chars_at_source)
4ed46869 6102 {
119852e7 6103 /* Source characters are in coding->charbuf. */
fba4576f
AS
6104 int *buf = coding->charbuf;
6105 int *buf_end = buf + coding->charbuf_used;
4ed46869 6106
db274c7a
KH
6107 if (EQ (coding->src_object, coding->dst_object))
6108 {
6109 coding_set_source (coding);
6110 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6111 }
4ed46869 6112
df7492f9 6113 while (buf < buf_end)
4ed46869 6114 {
69a80ea3 6115 int c = *buf, i;
bc4bc72a 6116
df7492f9
KH
6117 if (c >= 0)
6118 {
69a80ea3
KH
6119 int from_nchars = 1, to_nchars = 1;
6120 Lisp_Object trans = Qnil;
6121
09ee6fdd 6122 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6123 if (! NILP (trans))
69a80ea3
KH
6124 {
6125 trans = get_translation (trans, buf, buf_end, last_block,
6126 &from_nchars, &to_nchars);
6127 if (EQ (trans, Qt))
6128 break;
6129 c = *buf;
6130 }
6131
6132 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6133 {
6134 dst = alloc_destination (coding,
6135 buf_end - buf
6136 + MAX_MULTIBYTE_LENGTH * to_nchars,
6137 dst);
db274c7a
KH
6138 if (EQ (coding->src_object, coding->dst_object))
6139 {
6140 coding_set_source (coding);
6141 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6142 }
6143 else
6144 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6145 }
6146
433f7f87 6147 for (i = 0; i < to_nchars; i++)
69a80ea3 6148 {
433f7f87
KH
6149 if (i > 0)
6150 c = XINT (AREF (trans, i));
69a80ea3
KH
6151 if (coding->dst_multibyte
6152 || ! CHAR_BYTE8_P (c))
db274c7a 6153 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6154 else
6155 *dst++ = CHAR_TO_BYTE8 (c);
6156 }
6157 produced_chars += to_nchars;
6158 *buf++ = to_nchars;
6159 while (--from_nchars > 0)
6160 *buf++ = 0;
d46c5b12 6161 }
df7492f9 6162 else
69a80ea3
KH
6163 /* This is an annotation datum. (-C) is the length. */
6164 buf += -c;
4ed46869 6165 }
69a80ea3 6166 carryover = buf_end - buf;
4ed46869 6167 }
fa42c37f 6168 else
fa42c37f 6169 {
119852e7 6170 /* Source characters are at coding->source. */
8f924df7 6171 const unsigned char *src = coding->source;
119852e7 6172 const unsigned char *src_end = src + coding->consumed;
4ed46869 6173
db274c7a
KH
6174 if (EQ (coding->dst_object, coding->src_object))
6175 dst_end = (unsigned char *) src;
df7492f9 6176 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6177 {
df7492f9 6178 if (coding->src_multibyte)
fa42c37f 6179 {
71c81426 6180 int multibytep = 1;
119852e7 6181 EMACS_INT consumed_chars;
d46c5b12 6182
df7492f9
KH
6183 while (1)
6184 {
8f924df7 6185 const unsigned char *src_base = src;
df7492f9 6186 int c;
b73bfc1c 6187
df7492f9 6188 ONE_MORE_BYTE (c);
119852e7 6189 if (dst == dst_end)
df7492f9 6190 {
119852e7
KH
6191 if (EQ (coding->src_object, coding->dst_object))
6192 dst_end = (unsigned char *) src;
6193 if (dst == dst_end)
df7492f9 6194 {
119852e7
KH
6195 EMACS_INT offset = src - coding->source;
6196
6197 dst = alloc_destination (coding, src_end - src + 1,
6198 dst);
6199 dst_end = coding->destination + coding->dst_bytes;
6200 coding_set_source (coding);
6201 src = coding->source + offset;
6202 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6203 if (EQ (coding->src_object, coding->dst_object))
6204 dst_end = (unsigned char *) src;
df7492f9 6205 }
df7492f9
KH
6206 }
6207 *dst++ = c;
6208 produced_chars++;
6209 }
6210 no_more_source:
6211 ;
fa42c37f
KH
6212 }
6213 else
df7492f9
KH
6214 while (src < src_end)
6215 {
71c81426 6216 int multibytep = 1;
df7492f9 6217 int c = *src++;
b73bfc1c 6218
df7492f9
KH
6219 if (dst >= dst_end - 1)
6220 {
2c78b7e1 6221 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6222 dst_end = (unsigned char *) src;
2c78b7e1
KH
6223 if (dst >= dst_end - 1)
6224 {
119852e7 6225 EMACS_INT offset = src - coding->source;
db274c7a 6226 EMACS_INT more_bytes;
119852e7 6227
db274c7a
KH
6228 if (EQ (coding->src_object, coding->dst_object))
6229 more_bytes = ((src_end - src) / 2) + 2;
6230 else
6231 more_bytes = src_end - src + 2;
6232 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6233 dst_end = coding->destination + coding->dst_bytes;
6234 coding_set_source (coding);
119852e7 6235 src = coding->source + offset;
2c78b7e1 6236 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6237 if (EQ (coding->src_object, coding->dst_object))
6238 dst_end = (unsigned char *) src;
2c78b7e1 6239 }
df7492f9
KH
6240 }
6241 EMIT_ONE_BYTE (c);
6242 }
d46c5b12 6243 }
df7492f9
KH
6244 else
6245 {
6246 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6247 {
119852e7 6248 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6249
df7492f9 6250 if (require > 0)
fa42c37f 6251 {
df7492f9
KH
6252 EMACS_INT offset = src - coding->source;
6253
6254 dst = alloc_destination (coding, require, dst);
6255 coding_set_source (coding);
6256 src = coding->source + offset;
6257 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6258 }
6259 }
119852e7 6260 produced_chars = coding->consumed_char;
df7492f9 6261 while (src < src_end)
14daee73 6262 *dst++ = *src++;
fa42c37f
KH
6263 }
6264 }
6265
df7492f9 6266 produced = dst - (coding->destination + coding->produced);
284201e4 6267 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6268 insert_from_gap (produced_chars, produced);
6269 coding->produced += produced;
6270 coding->produced_char += produced_chars;
69a80ea3 6271 return carryover;
fa42c37f
KH
6272}
6273
ff0dacd7
KH
6274/* Compose text in CODING->object according to the annotation data at
6275 CHARBUF. CHARBUF is an array:
6276 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 6277 */
4ed46869 6278
df7492f9 6279static INLINE void
69a80ea3 6280produce_composition (coding, charbuf, pos)
4ed46869 6281 struct coding_system *coding;
df7492f9 6282 int *charbuf;
69a80ea3 6283 EMACS_INT pos;
4ed46869 6284{
df7492f9 6285 int len;
69a80ea3 6286 EMACS_INT to;
df7492f9 6287 enum composition_method method;
df7492f9 6288 Lisp_Object components;
fa42c37f 6289
df7492f9 6290 len = -charbuf[0];
69a80ea3 6291 to = pos + charbuf[2];
9ffd559c
KH
6292 if (to <= pos)
6293 return;
69a80ea3 6294 method = (enum composition_method) (charbuf[3]);
d46c5b12 6295
df7492f9
KH
6296 if (method == COMPOSITION_RELATIVE)
6297 components = Qnil;
9ffd559c
KH
6298 else if (method >= COMPOSITION_WITH_RULE
6299 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6300 {
df7492f9
KH
6301 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6302 int i;
b73bfc1c 6303
69a80ea3
KH
6304 len -= 4;
6305 charbuf += 4;
df7492f9 6306 for (i = 0; i < len; i++)
9ffd559c
KH
6307 {
6308 args[i] = make_number (charbuf[i]);
f75c90a9 6309 if (charbuf[i] < 0)
9ffd559c
KH
6310 return;
6311 }
df7492f9
KH
6312 components = (method == COMPOSITION_WITH_ALTCHARS
6313 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6314 }
9ffd559c
KH
6315 else
6316 return;
69a80ea3 6317 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6318}
6319
d46c5b12 6320
ff0dacd7
KH
6321/* Put `charset' property on text in CODING->object according to
6322 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6323 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6324 */
d46c5b12 6325
ff0dacd7 6326static INLINE void
69a80ea3 6327produce_charset (coding, charbuf, pos)
d46c5b12 6328 struct coding_system *coding;
ff0dacd7 6329 int *charbuf;
69a80ea3 6330 EMACS_INT pos;
d46c5b12 6331{
69a80ea3
KH
6332 EMACS_INT from = pos - charbuf[2];
6333 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6334
69a80ea3 6335 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6336 Qcharset, CHARSET_NAME (charset),
6337 coding->dst_object);
d46c5b12
KH
6338}
6339
d46c5b12 6340
df7492f9
KH
6341#define CHARBUF_SIZE 0x4000
6342
6343#define ALLOC_CONVERSION_WORK_AREA(coding) \
6344 do { \
6345 int size = CHARBUF_SIZE;; \
6346 \
6347 coding->charbuf = NULL; \
6348 while (size > 1024) \
6349 { \
6350 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6351 if (coding->charbuf) \
6352 break; \
6353 size >>= 1; \
6354 } \
6355 if (! coding->charbuf) \
6356 { \
065e3595 6357 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6358 return coding->result; \
6359 } \
6360 coding->charbuf_size = size; \
6361 } while (0)
4ed46869 6362
d46c5b12
KH
6363
6364static void
69a80ea3 6365produce_annotation (coding, pos)
d46c5b12 6366 struct coding_system *coding;
69a80ea3 6367 EMACS_INT pos;
d46c5b12 6368{
df7492f9
KH
6369 int *charbuf = coding->charbuf;
6370 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6371
ff0dacd7
KH
6372 if (NILP (coding->dst_object))
6373 return;
d46c5b12 6374
df7492f9 6375 while (charbuf < charbuf_end)
a84f1519 6376 {
df7492f9 6377 if (*charbuf >= 0)
69a80ea3 6378 pos += *charbuf++;
d46c5b12 6379 else
d46c5b12 6380 {
df7492f9 6381 int len = -*charbuf;
ff0dacd7 6382 switch (charbuf[1])
df7492f9
KH
6383 {
6384 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6385 produce_composition (coding, charbuf, pos);
df7492f9 6386 break;
ff0dacd7 6387 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6388 produce_charset (coding, charbuf, pos);
ff0dacd7 6389 break;
df7492f9
KH
6390 default:
6391 abort ();
6392 }
6393 charbuf += len;
d46c5b12 6394 }
a84f1519 6395 }
d46c5b12
KH
6396}
6397
df7492f9
KH
6398/* Decode the data at CODING->src_object into CODING->dst_object.
6399 CODING->src_object is a buffer, a string, or nil.
6400 CODING->dst_object is a buffer.
d46c5b12 6401
df7492f9
KH
6402 If CODING->src_object is a buffer, it must be the current buffer.
6403 In this case, if CODING->src_pos is positive, it is a position of
6404 the source text in the buffer, otherwise, the source text is in the
6405 gap area of the buffer, and CODING->src_pos specifies the offset of
6406 the text from GPT (which must be the same as PT). If this is the
6407 same buffer as CODING->dst_object, CODING->src_pos must be
6408 negative.
d46c5b12 6409
b6828792 6410 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 6411 that string.
d46c5b12 6412
df7492f9
KH
6413 If CODING->src_object is nil, CODING->source must already point to
6414 the non-relocatable memory area. In this case, CODING->src_pos is
6415 an offset from CODING->source.
73be902c 6416
df7492f9
KH
6417 The decoded data is inserted at the current point of the buffer
6418 CODING->dst_object.
6419*/
d46c5b12 6420
df7492f9
KH
6421static int
6422decode_coding (coding)
d46c5b12 6423 struct coding_system *coding;
d46c5b12 6424{
df7492f9 6425 Lisp_Object attrs;
24a73b0a 6426 Lisp_Object undo_list;
7d64c6ad 6427 Lisp_Object translation_table;
69a80ea3
KH
6428 int carryover;
6429 int i;
d46c5b12 6430
df7492f9
KH
6431 if (BUFFERP (coding->src_object)
6432 && coding->src_pos > 0
6433 && coding->src_pos < GPT
6434 && coding->src_pos + coding->src_chars > GPT)
6435 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6436
24a73b0a 6437 undo_list = Qt;
df7492f9 6438 if (BUFFERP (coding->dst_object))
1c3478b0 6439 {
df7492f9
KH
6440 if (current_buffer != XBUFFER (coding->dst_object))
6441 set_buffer_internal (XBUFFER (coding->dst_object));
6442 if (GPT != PT)
6443 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6444 undo_list = current_buffer->undo_list;
6445 current_buffer->undo_list = Qt;
1c3478b0
KH
6446 }
6447
df7492f9
KH
6448 coding->consumed = coding->consumed_char = 0;
6449 coding->produced = coding->produced_char = 0;
6450 coding->chars_at_source = 0;
065e3595 6451 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6452 coding->errors = 0;
1c3478b0 6453
df7492f9
KH
6454 ALLOC_CONVERSION_WORK_AREA (coding);
6455
6456 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6457 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6458
69a80ea3 6459 carryover = 0;
df7492f9 6460 do
b73bfc1c 6461 {
69a80ea3
KH
6462 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6463
df7492f9
KH
6464 coding_set_source (coding);
6465 coding->annotated = 0;
69a80ea3 6466 coding->charbuf_used = carryover;
df7492f9 6467 (*(coding->decoder)) (coding);
df7492f9 6468 coding_set_destination (coding);
69a80ea3 6469 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6470 if (coding->annotated)
69a80ea3
KH
6471 produce_annotation (coding, pos);
6472 for (i = 0; i < carryover; i++)
6473 coding->charbuf[i]
6474 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6475 }
df7492f9 6476 while (coding->consumed < coding->src_bytes
54b367bb
KH
6477 && (coding->result == CODING_RESULT_SUCCESS
6478 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 6479
69a80ea3
KH
6480 if (carryover > 0)
6481 {
6482 coding_set_destination (coding);
6483 coding->charbuf_used = carryover;
6484 produce_chars (coding, translation_table, 1);
6485 }
6486
df7492f9
KH
6487 coding->carryover_bytes = 0;
6488 if (coding->consumed < coding->src_bytes)
d46c5b12 6489 {
df7492f9 6490 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6491 const unsigned char *src;
df7492f9
KH
6492
6493 coding_set_source (coding);
6494 coding_set_destination (coding);
6495 src = coding->source + coding->consumed;
6496
6497 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6498 {
df7492f9
KH
6499 /* Flush out unprocessed data as binary chars. We are sure
6500 that the number of data is less than the size of
6501 coding->charbuf. */
065e3595 6502 coding->charbuf_used = 0;
df7492f9 6503 while (nbytes-- > 0)
1c3478b0 6504 {
df7492f9 6505 int c = *src++;
98725083 6506
1c91457d
KH
6507 if (c & 0x80)
6508 c = BYTE8_TO_CHAR (c);
6509 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6510 }
f6cbaf43 6511 produce_chars (coding, Qnil, 1);
d46c5b12 6512 }
d46c5b12 6513 else
df7492f9
KH
6514 {
6515 /* Record unprocessed bytes in coding->carryover. We are
6516 sure that the number of data is less than the size of
6517 coding->carryover. */
6518 unsigned char *p = coding->carryover;
6519
6520 coding->carryover_bytes = nbytes;
6521 while (nbytes-- > 0)
6522 *p++ = *src++;
1c3478b0 6523 }
df7492f9 6524 coding->consumed = coding->src_bytes;
b73bfc1c 6525 }
69f76525 6526
4347441b
KH
6527 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6528 decode_eol (coding);
24a73b0a
KH
6529 if (BUFFERP (coding->dst_object))
6530 {
6531 current_buffer->undo_list = undo_list;
6532 record_insert (coding->dst_pos, coding->produced_char);
6533 }
73be902c 6534 return coding->result;
4ed46869
KH
6535}
6536
aaaf0b1e 6537
e1c23804 6538/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6539 ending before LIMIT of CODING->src_object (buffer or string), store
6540 the data in BUF, set *STOP to a starting position of the next
6541 composition (if any) or to LIMIT, and return the address of the
6542 next element of BUF.
6543
6544 If such an annotation is not found, set *STOP to a starting
6545 position of a composition after POS (if any) or to LIMIT, and
6546 return BUF. */
6547
6548static INLINE int *
6549handle_composition_annotation (pos, limit, coding, buf, stop)
6550 EMACS_INT pos, limit;
aaaf0b1e 6551 struct coding_system *coding;
ff0dacd7
KH
6552 int *buf;
6553 EMACS_INT *stop;
aaaf0b1e 6554{
ff0dacd7
KH
6555 EMACS_INT start, end;
6556 Lisp_Object prop;
aaaf0b1e 6557
ff0dacd7
KH
6558 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6559 || end > limit)
6560 *stop = limit;
6561 else if (start > pos)
6562 *stop = start;
6563 else
aaaf0b1e 6564 {
ff0dacd7 6565 if (start == pos)
aaaf0b1e 6566 {
ff0dacd7
KH
6567 /* We found a composition. Store the corresponding
6568 annotation data in BUF. */
6569 int *head = buf;
6570 enum composition_method method = COMPOSITION_METHOD (prop);
6571 int nchars = COMPOSITION_LENGTH (prop);
6572
69a80ea3 6573 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6574 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6575 {
ff0dacd7
KH
6576 Lisp_Object components;
6577 int len, i, i_byte;
6578
6579 components = COMPOSITION_COMPONENTS (prop);
6580 if (VECTORP (components))
aaaf0b1e 6581 {
ff0dacd7
KH
6582 len = XVECTOR (components)->size;
6583 for (i = 0; i < len; i++)
6584 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6585 }
ff0dacd7 6586 else if (STRINGP (components))
aaaf0b1e 6587 {
8f924df7 6588 len = SCHARS (components);
ff0dacd7
KH
6589 i = i_byte = 0;
6590 while (i < len)
6591 {
6592 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6593 buf++;
6594 }
6595 }
6596 else if (INTEGERP (components))
6597 {
6598 len = 1;
6599 *buf++ = XINT (components);
6600 }
6601 else if (CONSP (components))
6602 {
6603 for (len = 0; CONSP (components);
6604 len++, components = XCDR (components))
6605 *buf++ = XINT (XCAR (components));
aaaf0b1e 6606 }
aaaf0b1e 6607 else
ff0dacd7
KH
6608 abort ();
6609 *head -= len;
aaaf0b1e 6610 }
aaaf0b1e 6611 }
ff0dacd7
KH
6612
6613 if (find_composition (end, limit, &start, &end, &prop,
6614 coding->src_object)
6615 && end <= limit)
6616 *stop = start;
6617 else
6618 *stop = limit;
aaaf0b1e 6619 }
ff0dacd7
KH
6620 return buf;
6621}
6622
6623
e1c23804 6624/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6625 CODING->src_object (buffer of string), store the data in BUF, set
6626 *STOP to the position where the value of `charset' property changes
6627 (limiting by LIMIT), and return the address of the next element of
6628 BUF.
6629
6630 If the property value is nil, set *STOP to the position where the
6631 property value is non-nil (limiting by LIMIT), and return BUF. */
6632
6633static INLINE int *
6634handle_charset_annotation (pos, limit, coding, buf, stop)
6635 EMACS_INT pos, limit;
6636 struct coding_system *coding;
6637 int *buf;
6638 EMACS_INT *stop;
6639{
6640 Lisp_Object val, next;
6641 int id;
6642
6643 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6644 if (! NILP (val) && CHARSETP (val))
6645 id = XINT (CHARSET_SYMBOL_ID (val));
6646 else
6647 id = -1;
69a80ea3 6648 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6649 next = Fnext_single_property_change (make_number (pos), Qcharset,
6650 coding->src_object,
6651 make_number (limit));
6652 *stop = XINT (next);
6653 return buf;
6654}
6655
6656
df7492f9 6657static void
09ee6fdd 6658consume_chars (coding, translation_table, max_lookup)
df7492f9 6659 struct coding_system *coding;
433f7f87 6660 Lisp_Object translation_table;
09ee6fdd 6661 int max_lookup;
df7492f9
KH
6662{
6663 int *buf = coding->charbuf;
ff0dacd7 6664 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6665 const unsigned char *src = coding->source + coding->consumed;
4776e638 6666 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6667 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6668 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6669 int multibytep = coding->src_multibyte;
6670 Lisp_Object eol_type;
6671 int c;
ff0dacd7 6672 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6673 int *lookup_buf = NULL;
433f7f87
KH
6674
6675 if (! NILP (translation_table))
09ee6fdd 6676 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6677
df7492f9
KH
6678 eol_type = CODING_ID_EOL_TYPE (coding->id);
6679 if (VECTORP (eol_type))
6680 eol_type = Qunix;
88993dfd 6681
df7492f9
KH
6682 /* Note: composition handling is not yet implemented. */
6683 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6684
0b5670c9
KH
6685 if (NILP (coding->src_object))
6686 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6687 else
0b5670c9
KH
6688 {
6689 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6690 stop = stop_composition = pos;
6691 else
6692 stop = stop_composition = end_pos;
6693 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6694 stop = stop_charset = pos;
6695 else
6696 stop_charset = end_pos;
6697 }
ec6d2bb8 6698
24a73b0a 6699 /* Compensate for CRLF and conversion. */
ff0dacd7 6700 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6701 while (buf < buf_end)
aaaf0b1e 6702 {
433f7f87
KH
6703 Lisp_Object trans;
6704
df7492f9 6705 if (pos == stop)
ec6d2bb8 6706 {
df7492f9
KH
6707 if (pos == end_pos)
6708 break;
ff0dacd7
KH
6709 if (pos == stop_composition)
6710 buf = handle_composition_annotation (pos, end_pos, coding,
6711 buf, &stop_composition);
6712 if (pos == stop_charset)
6713 buf = handle_charset_annotation (pos, end_pos, coding,
6714 buf, &stop_charset);
6715 stop = (stop_composition < stop_charset
6716 ? stop_composition : stop_charset);
df7492f9
KH
6717 }
6718
6719 if (! multibytep)
4776e638 6720 {
d3e4cb56 6721 EMACS_INT bytes;
aaaf0b1e 6722
ea29edf2
KH
6723 if (coding->encoder == encode_coding_raw_text)
6724 c = *src++, pos++;
6725 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 6726 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 6727 else
f03caae0 6728 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6729 }
df7492f9 6730 else
db274c7a 6731 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
6732 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6733 c = '\n';
6734 if (! EQ (eol_type, Qunix))
aaaf0b1e 6735 {
df7492f9 6736 if (c == '\n')
aaaf0b1e 6737 {
df7492f9
KH
6738 if (EQ (eol_type, Qdos))
6739 *buf++ = '\r';
6740 else
6741 c = '\r';
aaaf0b1e
KH
6742 }
6743 }
433f7f87 6744
e6a54062 6745 trans = Qnil;
09ee6fdd 6746 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6747 if (NILP (trans))
433f7f87
KH
6748 *buf++ = c;
6749 else
6750 {
6751 int from_nchars = 1, to_nchars = 1;
6752 int *lookup_buf_end;
6753 const unsigned char *p = src;
6754 int i;
6755
6756 lookup_buf[0] = c;
6757 for (i = 1; i < max_lookup && p < src_end; i++)
6758 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6759 lookup_buf_end = lookup_buf + i;
6760 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6761 &from_nchars, &to_nchars);
6762 if (EQ (trans, Qt)
6763 || buf + to_nchars > buf_end)
6764 break;
6765 *buf++ = *lookup_buf;
6766 for (i = 1; i < to_nchars; i++)
6767 *buf++ = XINT (AREF (trans, i));
6768 for (i = 1; i < from_nchars; i++, pos++)
6769 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6770 }
aaaf0b1e 6771 }
ec6d2bb8 6772
df7492f9
KH
6773 coding->consumed = src - coding->source;
6774 coding->consumed_char = pos - coding->src_pos;
6775 coding->charbuf_used = buf - coding->charbuf;
6776 coding->chars_at_source = 0;
aaaf0b1e
KH
6777}
6778
4ed46869 6779
df7492f9
KH
6780/* Encode the text at CODING->src_object into CODING->dst_object.
6781 CODING->src_object is a buffer or a string.
6782 CODING->dst_object is a buffer or nil.
6783
6784 If CODING->src_object is a buffer, it must be the current buffer.
6785 In this case, if CODING->src_pos is positive, it is a position of
6786 the source text in the buffer, otherwise. the source text is in the
6787 gap area of the buffer, and coding->src_pos specifies the offset of
6788 the text from GPT (which must be the same as PT). If this is the
6789 same buffer as CODING->dst_object, CODING->src_pos must be
6790 negative and CODING should not have `pre-write-conversion'.
6791
6792 If CODING->src_object is a string, CODING should not have
6793 `pre-write-conversion'.
6794
6795 If CODING->dst_object is a buffer, the encoded data is inserted at
6796 the current point of that buffer.
6797
6798 If CODING->dst_object is nil, the encoded data is placed at the
6799 memory area specified by CODING->destination. */
6800
6801static int
6802encode_coding (coding)
4ed46869 6803 struct coding_system *coding;
4ed46869 6804{
df7492f9 6805 Lisp_Object attrs;
7d64c6ad 6806 Lisp_Object translation_table;
09ee6fdd 6807 int max_lookup;
9861e777 6808
df7492f9 6809 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
6810 if (coding->encoder == encode_coding_raw_text)
6811 translation_table = Qnil, max_lookup = 0;
6812 else
6813 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6814
df7492f9 6815 if (BUFFERP (coding->dst_object))
8844fa83 6816 {
df7492f9
KH
6817 set_buffer_internal (XBUFFER (coding->dst_object));
6818 coding->dst_multibyte
6819 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6820 }
4ed46869 6821
b73bfc1c 6822 coding->consumed = coding->consumed_char = 0;
df7492f9 6823 coding->produced = coding->produced_char = 0;
065e3595 6824 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6825 coding->errors = 0;
b73bfc1c 6826
df7492f9 6827 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6828
df7492f9
KH
6829 do {
6830 coding_set_source (coding);
09ee6fdd 6831 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6832 coding_set_destination (coding);
6833 (*(coding->encoder)) (coding);
6834 } while (coding->consumed_char < coding->src_chars);
6835
284201e4 6836 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
6837 insert_from_gap (coding->produced_char, coding->produced);
6838
6839 return (coding->result);
ec6d2bb8
KH
6840}
6841
fb88bf2d 6842
24a73b0a
KH
6843/* Name (or base name) of work buffer for code conversion. */
6844static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6845
24a73b0a
KH
6846/* A working buffer used by the top level conversion. Once it is
6847 created, it is never destroyed. It has the name
6848 Vcode_conversion_workbuf_name. The other working buffers are
6849 destroyed after the use is finished, and their names are modified
6850 versions of Vcode_conversion_workbuf_name. */
6851static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6852
24a73b0a
KH
6853/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6854static int reused_workbuf_in_use;
4ed46869 6855
24a73b0a
KH
6856
6857/* Return a working buffer of code convesion. MULTIBYTE specifies the
6858 multibyteness of returning buffer. */
b73bfc1c 6859
f6cbaf43 6860static Lisp_Object
24a73b0a 6861make_conversion_work_buffer (multibyte)
f6cbaf43 6862 int multibyte;
df7492f9 6863{
24a73b0a
KH
6864 Lisp_Object name, workbuf;
6865 struct buffer *current;
4ed46869 6866
24a73b0a 6867 if (reused_workbuf_in_use++)
065e3595
KH
6868 {
6869 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6870 workbuf = Fget_buffer_create (name);
6871 }
df7492f9 6872 else
065e3595
KH
6873 {
6874 name = Vcode_conversion_workbuf_name;
6875 workbuf = Fget_buffer_create (name);
6876 if (NILP (Vcode_conversion_reused_workbuf))
6877 Vcode_conversion_reused_workbuf = workbuf;
6878 }
24a73b0a
KH
6879 current = current_buffer;
6880 set_buffer_internal (XBUFFER (workbuf));
6881 Ferase_buffer ();
df7492f9 6882 current_buffer->undo_list = Qt;
24a73b0a 6883 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6884 set_buffer_internal (current);
24a73b0a 6885 return workbuf;
df7492f9 6886}
d46c5b12 6887
24a73b0a 6888
4776e638 6889static Lisp_Object
24a73b0a
KH
6890code_conversion_restore (arg)
6891 Lisp_Object arg;
4776e638 6892{
24a73b0a 6893 Lisp_Object current, workbuf;
948bdcf3 6894 struct gcpro gcpro1;
24a73b0a 6895
948bdcf3 6896 GCPRO1 (arg);
24a73b0a
KH
6897 current = XCAR (arg);
6898 workbuf = XCDR (arg);
6899 if (! NILP (workbuf))
6900 {
6901 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6902 reused_workbuf_in_use = 0;
6903 else if (! NILP (Fbuffer_live_p (workbuf)))
6904 Fkill_buffer (workbuf);
6905 }
6906 set_buffer_internal (XBUFFER (current));
948bdcf3 6907 UNGCPRO;
4776e638
KH
6908 return Qnil;
6909}
b73bfc1c 6910
24a73b0a
KH
6911Lisp_Object
6912code_conversion_save (with_work_buf, multibyte)
4776e638 6913 int with_work_buf, multibyte;
df7492f9 6914{
24a73b0a 6915 Lisp_Object workbuf = Qnil;
b73bfc1c 6916
4776e638 6917 if (with_work_buf)
24a73b0a
KH
6918 workbuf = make_conversion_work_buffer (multibyte);
6919 record_unwind_protect (code_conversion_restore,
6920 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6921 return workbuf;
df7492f9 6922}
d46c5b12 6923
df7492f9
KH
6924int
6925decode_coding_gap (coding, chars, bytes)
6926 struct coding_system *coding;
6927 EMACS_INT chars, bytes;
6928{
6929 int count = specpdl_ptr - specpdl;
5e5c78be 6930 Lisp_Object attrs;
fb88bf2d 6931
24a73b0a 6932 code_conversion_save (0, 0);
ec6d2bb8 6933
24a73b0a 6934 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6935 coding->src_chars = chars;
6936 coding->src_bytes = bytes;
6937 coding->src_pos = -chars;
6938 coding->src_pos_byte = -bytes;
6939 coding->src_multibyte = chars < bytes;
24a73b0a 6940 coding->dst_object = coding->src_object;
df7492f9
KH
6941 coding->dst_pos = PT;
6942 coding->dst_pos_byte = PT_BYTE;
71c81426 6943 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 6944
df7492f9
KH
6945 if (CODING_REQUIRE_DETECTION (coding))
6946 detect_coding (coding);
8f924df7 6947
9286b333 6948 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 6949 current_buffer->text->inhibit_shrinking = 1;
df7492f9 6950 decode_coding (coding);
287c57d7 6951 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 6952
5e5c78be
KH
6953 attrs = CODING_ID_ATTRS (coding->id);
6954 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6955 {
5e5c78be
KH
6956 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6957 Lisp_Object val;
6958
6959 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6960 val = call1 (CODING_ATTR_POST_READ (attrs),
6961 make_number (coding->produced_char));
5e5c78be
KH
6962 CHECK_NATNUM (val);
6963 coding->produced_char += Z - prev_Z;
6964 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6965 }
4ed46869 6966
df7492f9 6967 unbind_to (count, Qnil);
b73bfc1c
KH
6968 return coding->result;
6969}
52d41803 6970
4ed46869 6971int
df7492f9 6972encode_coding_gap (coding, chars, bytes)
4ed46869 6973 struct coding_system *coding;
df7492f9 6974 EMACS_INT chars, bytes;
4ed46869 6975{
df7492f9 6976 int count = specpdl_ptr - specpdl;
4ed46869 6977
24a73b0a 6978 code_conversion_save (0, 0);
4ed46869 6979
24a73b0a 6980 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6981 coding->src_chars = chars;
6982 coding->src_bytes = bytes;
6983 coding->src_pos = -chars;
6984 coding->src_pos_byte = -bytes;
6985 coding->src_multibyte = chars < bytes;
6986 coding->dst_object = coding->src_object;
6987 coding->dst_pos = PT;
6988 coding->dst_pos_byte = PT_BYTE;
4ed46869 6989
df7492f9 6990 encode_coding (coding);
b73bfc1c 6991
df7492f9
KH
6992 unbind_to (count, Qnil);
6993 return coding->result;
6994}
4ed46869 6995
d46c5b12 6996
df7492f9
KH
6997/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6998 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6999
df7492f9 7000 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7001
df7492f9
KH
7002 If it is a buffer, the text is at point of the buffer. FROM and TO
7003 are positions in the buffer.
b73bfc1c 7004
df7492f9
KH
7005 If it is a string, the text is at the beginning of the string.
7006 FROM and TO are indices to the string.
4ed46869 7007
df7492f9
KH
7008 If it is nil, the text is at coding->source. FROM and TO are
7009 indices to coding->source.
bb10be8b 7010
df7492f9 7011 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7012
df7492f9
KH
7013 If it is a buffer, the decoded text is inserted at point of the
7014 buffer. If the buffer is the same as SRC_OBJECT, the source text
7015 is deleted.
4ed46869 7016
df7492f9
KH
7017 If it is Qt, a string is made from the decoded text, and
7018 set in CODING->dst_object.
d46c5b12 7019
df7492f9 7020 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7021 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7022 CODING->destination by xmalloc. If the decoded text is longer than
7023 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7024 */
d46c5b12 7025
df7492f9
KH
7026void
7027decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7028 dst_object)
d46c5b12 7029 struct coding_system *coding;
df7492f9
KH
7030 Lisp_Object src_object;
7031 EMACS_INT from, from_byte, to, to_byte;
7032 Lisp_Object dst_object;
d46c5b12 7033{
df7492f9
KH
7034 int count = specpdl_ptr - specpdl;
7035 unsigned char *destination;
7036 EMACS_INT dst_bytes;
7037 EMACS_INT chars = to - from;
7038 EMACS_INT bytes = to_byte - from_byte;
7039 Lisp_Object attrs;
4776e638 7040 int saved_pt = -1, saved_pt_byte;
64cedb0c 7041 int need_marker_adjustment = 0;
b3bfad50 7042 Lisp_Object old_deactivate_mark;
d46c5b12 7043
b3bfad50 7044 old_deactivate_mark = Vdeactivate_mark;
93dec019 7045
df7492f9 7046 if (NILP (dst_object))
d46c5b12 7047 {
df7492f9
KH
7048 destination = coding->destination;
7049 dst_bytes = coding->dst_bytes;
d46c5b12 7050 }
93dec019 7051
df7492f9
KH
7052 coding->src_object = src_object;
7053 coding->src_chars = chars;
7054 coding->src_bytes = bytes;
7055 coding->src_multibyte = chars < bytes;
70ad9fc4 7056
df7492f9 7057 if (STRINGP (src_object))
d46c5b12 7058 {
df7492f9
KH
7059 coding->src_pos = from;
7060 coding->src_pos_byte = from_byte;
d46c5b12 7061 }
df7492f9 7062 else if (BUFFERP (src_object))
88993dfd 7063 {
df7492f9
KH
7064 set_buffer_internal (XBUFFER (src_object));
7065 if (from != GPT)
7066 move_gap_both (from, from_byte);
7067 if (EQ (src_object, dst_object))
fb88bf2d 7068 {
64cedb0c
KH
7069 struct Lisp_Marker *tail;
7070
7071 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7072 {
7073 tail->need_adjustment
7074 = tail->charpos == (tail->insertion_type ? from : to);
7075 need_marker_adjustment |= tail->need_adjustment;
7076 }
4776e638 7077 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7078 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7079 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7080 del_range_both (from, from_byte, to, to_byte, 1);
7081 coding->src_pos = -chars;
7082 coding->src_pos_byte = -bytes;
fb88bf2d 7083 }
df7492f9 7084 else
fb88bf2d 7085 {
df7492f9
KH
7086 coding->src_pos = from;
7087 coding->src_pos_byte = from_byte;
fb88bf2d 7088 }
88993dfd
KH
7089 }
7090
df7492f9
KH
7091 if (CODING_REQUIRE_DETECTION (coding))
7092 detect_coding (coding);
7093 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7094
2cb26057
KH
7095 if (EQ (dst_object, Qt)
7096 || (! NILP (CODING_ATTR_POST_READ (attrs))
7097 && NILP (dst_object)))
b73bfc1c 7098 {
a1567c45
SM
7099 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7100 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7101 coding->dst_pos = BEG;
7102 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7103 }
df7492f9 7104 else if (BUFFERP (dst_object))
d46c5b12 7105 {
24a73b0a 7106 code_conversion_save (0, 0);
df7492f9
KH
7107 coding->dst_object = dst_object;
7108 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7109 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7110 coding->dst_multibyte
7111 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7112 }
7113 else
7114 {
24a73b0a 7115 code_conversion_save (0, 0);
df7492f9 7116 coding->dst_object = Qnil;
0154725e
SM
7117 /* Most callers presume this will return a multibyte result, and they
7118 won't use `binary' or `raw-text' anyway, so let's not worry about
7119 CODING_FOR_UNIBYTE. */
bb555731 7120 coding->dst_multibyte = 1;
d46c5b12
KH
7121 }
7122
df7492f9 7123 decode_coding (coding);
fa46990e 7124
df7492f9
KH
7125 if (BUFFERP (coding->dst_object))
7126 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7127
df7492f9 7128 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7129 {
b3bfad50 7130 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7131 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7132 Lisp_Object val;
d46c5b12 7133
c0cc7f7f 7134 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7135 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7136 old_deactivate_mark);
d4850d67
KH
7137 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7138 make_number (coding->produced_char));
df7492f9
KH
7139 UNGCPRO;
7140 CHECK_NATNUM (val);
7141 coding->produced_char += Z - prev_Z;
7142 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7143 }
de79a6a5 7144
df7492f9 7145 if (EQ (dst_object, Qt))
ec6d2bb8 7146 {
df7492f9
KH
7147 coding->dst_object = Fbuffer_string ();
7148 }
7149 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7150 {
7151 set_buffer_internal (XBUFFER (coding->dst_object));
7152 if (dst_bytes < coding->produced)
7153 {
b3bfad50 7154 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7155 if (! destination)
7156 {
065e3595
KH
7157 record_conversion_result (coding,
7158 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
7159 unbind_to (count, Qnil);
7160 return;
7161 }
7162 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7163 move_gap_both (BEGV, BEGV_BYTE);
7164 bcopy (BEGV_ADDR, destination, coding->produced);
7165 coding->destination = destination;
d46c5b12 7166 }
ec6d2bb8 7167 }
b73bfc1c 7168
4776e638
KH
7169 if (saved_pt >= 0)
7170 {
7171 /* This is the case of:
7172 (BUFFERP (src_object) && EQ (src_object, dst_object))
7173 As we have moved PT while replacing the original buffer
7174 contents, we must recover it now. */
7175 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7176 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7177 if (saved_pt < from)
7178 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7179 else if (saved_pt < from + chars)
7180 TEMP_SET_PT_BOTH (from, from_byte);
7181 else if (! NILP (current_buffer->enable_multibyte_characters))
7182 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7183 saved_pt_byte + (coding->produced - bytes));
7184 else
7185 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7186 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7187
7188 if (need_marker_adjustment)
7189 {
7190 struct Lisp_Marker *tail;
7191
7192 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7193 if (tail->need_adjustment)
7194 {
7195 tail->need_adjustment = 0;
7196 if (tail->insertion_type)
7197 {
7198 tail->bytepos = from_byte;
7199 tail->charpos = from;
7200 }
7201 else
7202 {
7203 tail->bytepos = from_byte + coding->produced;
7204 tail->charpos
7205 = (NILP (current_buffer->enable_multibyte_characters)
7206 ? tail->bytepos : from + coding->produced_char);
7207 }
7208 }
7209 }
d46c5b12 7210 }
4776e638 7211
b3bfad50 7212 Vdeactivate_mark = old_deactivate_mark;
065e3595 7213 unbind_to (count, coding->dst_object);
d46c5b12
KH
7214}
7215
d46c5b12 7216
df7492f9
KH
7217void
7218encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7219 dst_object)
d46c5b12 7220 struct coding_system *coding;
df7492f9
KH
7221 Lisp_Object src_object;
7222 EMACS_INT from, from_byte, to, to_byte;
7223 Lisp_Object dst_object;
d46c5b12 7224{
b73bfc1c 7225 int count = specpdl_ptr - specpdl;
df7492f9
KH
7226 EMACS_INT chars = to - from;
7227 EMACS_INT bytes = to_byte - from_byte;
7228 Lisp_Object attrs;
4776e638 7229 int saved_pt = -1, saved_pt_byte;
64cedb0c 7230 int need_marker_adjustment = 0;
c02d943b 7231 int kill_src_buffer = 0;
b3bfad50 7232 Lisp_Object old_deactivate_mark;
df7492f9 7233
b3bfad50 7234 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7235
7236 coding->src_object = src_object;
7237 coding->src_chars = chars;
7238 coding->src_bytes = bytes;
7239 coding->src_multibyte = chars < bytes;
7240
7241 attrs = CODING_ID_ATTRS (coding->id);
7242
64cedb0c
KH
7243 if (EQ (src_object, dst_object))
7244 {
7245 struct Lisp_Marker *tail;
7246
7247 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7248 {
7249 tail->need_adjustment
7250 = tail->charpos == (tail->insertion_type ? from : to);
7251 need_marker_adjustment |= tail->need_adjustment;
7252 }
7253 }
7254
df7492f9 7255 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7256 {
24a73b0a 7257 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7258 set_buffer_internal (XBUFFER (coding->src_object));
7259 if (STRINGP (src_object))
7260 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7261 else if (BUFFERP (src_object))
7262 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7263 else
7264 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7265
df7492f9
KH
7266 if (EQ (src_object, dst_object))
7267 {
7268 set_buffer_internal (XBUFFER (src_object));
4776e638 7269 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7270 del_range_both (from, from_byte, to, to_byte, 1);
7271 set_buffer_internal (XBUFFER (coding->src_object));
7272 }
7273
d4850d67
KH
7274 {
7275 Lisp_Object args[3];
b3bfad50 7276 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7277
b3bfad50
KH
7278 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7279 old_deactivate_mark);
d4850d67
KH
7280 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7281 args[1] = make_number (BEG);
7282 args[2] = make_number (Z);
7283 safe_call (3, args);
b3bfad50 7284 UNGCPRO;
d4850d67 7285 }
c02d943b
KH
7286 if (XBUFFER (coding->src_object) != current_buffer)
7287 kill_src_buffer = 1;
ac87bbef 7288 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7289 if (BEG != GPT)
7290 move_gap_both (BEG, BEG_BYTE);
7291 coding->src_chars = Z - BEG;
7292 coding->src_bytes = Z_BYTE - BEG_BYTE;
7293 coding->src_pos = BEG;
7294 coding->src_pos_byte = BEG_BYTE;
7295 coding->src_multibyte = Z < Z_BYTE;
7296 }
7297 else if (STRINGP (src_object))
d46c5b12 7298 {
24a73b0a 7299 code_conversion_save (0, 0);
df7492f9
KH
7300 coding->src_pos = from;
7301 coding->src_pos_byte = from_byte;
b73bfc1c 7302 }
df7492f9 7303 else if (BUFFERP (src_object))
b73bfc1c 7304 {
24a73b0a 7305 code_conversion_save (0, 0);
df7492f9 7306 set_buffer_internal (XBUFFER (src_object));
df7492f9 7307 if (EQ (src_object, dst_object))
d46c5b12 7308 {
4776e638 7309 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7310 coding->src_object = del_range_1 (from, to, 1, 1);
7311 coding->src_pos = 0;
7312 coding->src_pos_byte = 0;
d46c5b12 7313 }
df7492f9 7314 else
d46c5b12 7315 {
ff0dacd7
KH
7316 if (from < GPT && to >= GPT)
7317 move_gap_both (from, from_byte);
df7492f9
KH
7318 coding->src_pos = from;
7319 coding->src_pos_byte = from_byte;
d46c5b12 7320 }
d46c5b12 7321 }
4776e638 7322 else
24a73b0a 7323 code_conversion_save (0, 0);
d46c5b12 7324
df7492f9 7325 if (BUFFERP (dst_object))
88993dfd 7326 {
df7492f9 7327 coding->dst_object = dst_object;
28f67a95
KH
7328 if (EQ (src_object, dst_object))
7329 {
7330 coding->dst_pos = from;
7331 coding->dst_pos_byte = from_byte;
7332 }
7333 else
7334 {
7335 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7336 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7337 }
df7492f9
KH
7338 coding->dst_multibyte
7339 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7340 }
df7492f9 7341 else if (EQ (dst_object, Qt))
d46c5b12 7342 {
df7492f9 7343 coding->dst_object = Qnil;
df7492f9 7344 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7345 if (coding->dst_bytes == 0)
7346 coding->dst_bytes = 1;
7347 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7348 coding->dst_multibyte = 0;
d46c5b12
KH
7349 }
7350 else
7351 {
df7492f9
KH
7352 coding->dst_object = Qnil;
7353 coding->dst_multibyte = 0;
d46c5b12
KH
7354 }
7355
df7492f9 7356 encode_coding (coding);
d46c5b12 7357
df7492f9 7358 if (EQ (dst_object, Qt))
d46c5b12 7359 {
df7492f9
KH
7360 if (BUFFERP (coding->dst_object))
7361 coding->dst_object = Fbuffer_string ();
7362 else
d46c5b12 7363 {
df7492f9
KH
7364 coding->dst_object
7365 = make_unibyte_string ((char *) coding->destination,
7366 coding->produced);
7367 xfree (coding->destination);
d46c5b12 7368 }
4ed46869 7369 }
d46c5b12 7370
4776e638
KH
7371 if (saved_pt >= 0)
7372 {
7373 /* This is the case of:
7374 (BUFFERP (src_object) && EQ (src_object, dst_object))
7375 As we have moved PT while replacing the original buffer
7376 contents, we must recover it now. */
7377 set_buffer_internal (XBUFFER (src_object));
7378 if (saved_pt < from)
7379 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7380 else if (saved_pt < from + chars)
7381 TEMP_SET_PT_BOTH (from, from_byte);
7382 else if (! NILP (current_buffer->enable_multibyte_characters))
7383 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7384 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7385 else
4776e638
KH
7386 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7387 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7388
7389 if (need_marker_adjustment)
7390 {
7391 struct Lisp_Marker *tail;
7392
7393 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7394 if (tail->need_adjustment)
7395 {
7396 tail->need_adjustment = 0;
7397 if (tail->insertion_type)
7398 {
7399 tail->bytepos = from_byte;
7400 tail->charpos = from;
7401 }
7402 else
7403 {
7404 tail->bytepos = from_byte + coding->produced;
7405 tail->charpos
7406 = (NILP (current_buffer->enable_multibyte_characters)
7407 ? tail->bytepos : from + coding->produced_char);
7408 }
7409 }
7410 }
4776e638
KH
7411 }
7412
c02d943b
KH
7413 if (kill_src_buffer)
7414 Fkill_buffer (coding->src_object);
b3bfad50
KH
7415
7416 Vdeactivate_mark = old_deactivate_mark;
df7492f9 7417 unbind_to (count, Qnil);
b73bfc1c
KH
7418}
7419
df7492f9 7420
b73bfc1c 7421Lisp_Object
df7492f9 7422preferred_coding_system ()
b73bfc1c 7423{
df7492f9 7424 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7425
df7492f9 7426 return CODING_ID_NAME (id);
4ed46869
KH
7427}
7428
7429\f
7430#ifdef emacs
1397dc18 7431/*** 8. Emacs Lisp library functions ***/
4ed46869 7432
4ed46869 7433DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7434 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7435See the documentation of `define-coding-system' for information
48b0f3ae
PJ
7436about coding-system objects. */)
7437 (obj)
4ed46869
KH
7438 Lisp_Object obj;
7439{
44e8490d
KH
7440 if (NILP (obj)
7441 || CODING_SYSTEM_ID (obj) >= 0)
7442 return Qt;
7443 if (! SYMBOLP (obj)
7444 || NILP (Fget (obj, Qcoding_system_define_form)))
7445 return Qnil;
7446 return Qt;
4ed46869
KH
7447}
7448
9d991de8
RS
7449DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7450 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7451 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7452 (prompt)
4ed46869
KH
7453 Lisp_Object prompt;
7454{
e0e989f6 7455 Lisp_Object val;
9d991de8
RS
7456 do
7457 {
4608c386
KH
7458 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7459 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7460 }
8f924df7 7461 while (SCHARS (val) == 0);
e0e989f6 7462 return (Fintern (val, Qnil));
4ed46869
KH
7463}
7464
9b787f3e 7465DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 7466 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
7467If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7468Ignores case when completing coding systems (all Emacs coding systems
7469are lower-case). */)
48b0f3ae 7470 (prompt, default_coding_system)
9b787f3e 7471 Lisp_Object prompt, default_coding_system;
4ed46869 7472{
f44d27ce 7473 Lisp_Object val;
c7183fb8
GM
7474 int count = SPECPDL_INDEX ();
7475
9b787f3e 7476 if (SYMBOLP (default_coding_system))
57d25e6f 7477 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 7478 specbind (Qcompletion_ignore_case, Qt);
4608c386 7479 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7480 Qt, Qnil, Qcoding_system_history,
7481 default_coding_system, Qnil);
c7183fb8 7482 unbind_to (count, Qnil);
8f924df7 7483 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7484}
7485
7486DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7487 1, 1, 0,
48b0f3ae 7488 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7489If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7490It is valid if it is nil or a symbol defined as a coding system by the
7491function `define-coding-system'. */)
df7492f9 7492 (coding_system)
4ed46869
KH
7493 Lisp_Object coding_system;
7494{
44e8490d
KH
7495 Lisp_Object define_form;
7496
7497 define_form = Fget (coding_system, Qcoding_system_define_form);
7498 if (! NILP (define_form))
7499 {
7500 Fput (coding_system, Qcoding_system_define_form, Qnil);
7501 safe_eval (define_form);
7502 }
4ed46869
KH
7503 if (!NILP (Fcoding_system_p (coding_system)))
7504 return coding_system;
fcad4ec4 7505 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 7506}
df7492f9 7507
3a73fa5d 7508\f
89528eb3
KH
7509/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7510 HIGHEST is nonzero, return the coding system of the highest
7511 priority among the detected coding systems. Otherwize return a
7512 list of detected coding systems sorted by their priorities. If
7513 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7514 multibyte form but contains only ASCII and eight-bit chars.
7515 Otherwise, the bytes are raw bytes.
7516
7517 CODING-SYSTEM controls the detection as below:
7518
7519 If it is nil, detect both text-format and eol-format. If the
7520 text-format part of CODING-SYSTEM is already specified
7521 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7522 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7523 detect only text-format. */
7524
d46c5b12 7525Lisp_Object
24a73b0a
KH
7526detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7527 coding_system)
8f924df7 7528 const unsigned char *src;
13818c30
SM
7529 EMACS_INT src_chars, src_bytes;
7530 int highest;
0a28aafb 7531 int multibytep;
df7492f9 7532 Lisp_Object coding_system;
4ed46869 7533{
8f924df7 7534 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7535 Lisp_Object attrs, eol_type;
7536 Lisp_Object val;
7537 struct coding_system coding;
89528eb3 7538 int id;
ff0dacd7 7539 struct coding_detection_info detect_info;
24a73b0a 7540 enum coding_category base_category;
2f3cbb32 7541 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 7542
df7492f9
KH
7543 if (NILP (coding_system))
7544 coding_system = Qundecided;
7545 setup_coding_system (coding_system, &coding);
7546 attrs = CODING_ID_ATTRS (coding.id);
7547 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7548 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7549
df7492f9 7550 coding.source = src;
24a73b0a 7551 coding.src_chars = src_chars;
df7492f9
KH
7552 coding.src_bytes = src_bytes;
7553 coding.src_multibyte = multibytep;
7554 coding.consumed = 0;
89528eb3 7555 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7556
ff0dacd7 7557 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7558
89528eb3 7559 /* At first, detect text-format if necessary. */
24a73b0a
KH
7560 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7561 if (base_category == coding_category_undecided)
4ed46869 7562 {
ff0dacd7
KH
7563 enum coding_category category;
7564 struct coding_system *this;
7565 int c, i;
88993dfd 7566
2f3cbb32 7567 coding.head_ascii = -1;
24a73b0a 7568 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 7569 for (; src < src_end; src++)
4ed46869 7570 {
df7492f9 7571 c = *src;
6cb21a4f 7572 if (c & 0x80)
6cb21a4f 7573 {
2f3cbb32
KH
7574 eight_bit_found = 1;
7575 if (coding.head_ascii < 0)
7576 coding.head_ascii = src - coding.source;
7577 if (null_byte_found)
7578 break;
7579 }
7580 if (c < 0x20)
7581 {
7582 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7583 && ! inhibit_iso_escape_detection
7584 && ! detect_info.checked)
6cb21a4f 7585 {
2f3cbb32
KH
7586 if (coding.head_ascii < 0)
7587 coding.head_ascii = src - coding.source;
7588 if (detect_coding_iso_2022 (&coding, &detect_info))
7589 {
7590 /* We have scanned the whole data. */
7591 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7592 /* We didn't find an 8-bit code. We may have
7593 found a null-byte, but it's very rare that
7594 a binary file confirm to ISO-2022. */
7595 src = src_end;
7596 break;
7597 }
7598 }
7599 else if (! c)
7600 {
7601 null_byte_found = 1;
7602 if (eight_bit_found)
7603 break;
6cb21a4f
KH
7604 }
7605 }
4ed46869 7606 }
2f3cbb32
KH
7607 if (coding.head_ascii < 0)
7608 coding.head_ascii = src - coding.source;
88993dfd 7609
2f3cbb32
KH
7610 if (null_byte_found || eight_bit_found
7611 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
7612 || detect_info.found)
7613 {
2f3cbb32 7614 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
7615 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7616 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7617 {
6cb21a4f 7618 category = coding_priorities[i];
c7266f4a 7619 this = coding_categories + category;
6cb21a4f 7620 if (detect_info.found & (1 << category))
ff0dacd7
KH
7621 break;
7622 }
6cb21a4f 7623 else
2f3cbb32
KH
7624 {
7625 if (null_byte_found)
7626 {
7627 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7628 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7629 }
7630 for (i = 0; i < coding_category_raw_text; i++)
7631 {
7632 category = coding_priorities[i];
7633 this = coding_categories + category;
6cb21a4f 7634
2f3cbb32
KH
7635 if (this->id < 0)
7636 {
7637 /* No coding system of this category is defined. */
7638 detect_info.rejected |= (1 << category);
7639 }
7640 else if (category >= coding_category_raw_text)
7641 continue;
7642 else if (detect_info.checked & (1 << category))
7643 {
7644 if (highest
7645 && (detect_info.found & (1 << category)))
6cb21a4f 7646 break;
2f3cbb32
KH
7647 }
7648 else if ((*(this->detector)) (&coding, &detect_info)
7649 && highest
7650 && (detect_info.found & (1 << category)))
7651 {
7652 if (category == coding_category_utf_16_auto)
7653 {
7654 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7655 category = coding_category_utf_16_le;
7656 else
7657 category = coding_category_utf_16_be;
7658 }
7659 break;
7660 }
7661 }
7662 }
6cb21a4f 7663 }
ec6d2bb8 7664
2f3cbb32 7665 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
ec6d2bb8 7666 {
ff0dacd7 7667 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7668 id = coding_categories[coding_category_raw_text].id;
7669 val = Fcons (make_number (id), Qnil);
7670 }
ff0dacd7 7671 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7672 {
ff0dacd7 7673 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7674 id = coding_categories[coding_category_undecided].id;
7675 val = Fcons (make_number (id), Qnil);
7676 }
7677 else if (highest)
7678 {
ff0dacd7 7679 if (detect_info.found)
ec6d2bb8 7680 {
ff0dacd7
KH
7681 detect_info.found = 1 << category;
7682 val = Fcons (make_number (this->id), Qnil);
7683 }
7684 else
7685 for (i = 0; i < coding_category_raw_text; i++)
7686 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7687 {
7688 detect_info.found = 1 << coding_priorities[i];
7689 id = coding_categories[coding_priorities[i]].id;
7690 val = Fcons (make_number (id), Qnil);
7691 break;
7692 }
7693 }
89528eb3
KH
7694 else
7695 {
ff0dacd7
KH
7696 int mask = detect_info.rejected | detect_info.found;
7697 int found = 0;
89528eb3 7698 val = Qnil;
ec6d2bb8 7699
89528eb3 7700 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7701 {
7702 category = coding_priorities[i];
7703 if (! (mask & (1 << category)))
ec6d2bb8 7704 {
ff0dacd7
KH
7705 found |= 1 << category;
7706 id = coding_categories[category].id;
c7266f4a
KH
7707 if (id >= 0)
7708 val = Fcons (make_number (id), val);
ff0dacd7
KH
7709 }
7710 }
7711 for (i = coding_category_raw_text - 1; i >= 0; i--)
7712 {
7713 category = coding_priorities[i];
7714 if (detect_info.found & (1 << category))
7715 {
7716 id = coding_categories[category].id;
7717 val = Fcons (make_number (id), val);
ec6d2bb8 7718 }
ec6d2bb8 7719 }
ff0dacd7 7720 detect_info.found |= found;
ec6d2bb8 7721 }
ec6d2bb8 7722 }
24a73b0a
KH
7723 else if (base_category == coding_category_utf_16_auto)
7724 {
7725 if (detect_coding_utf_16 (&coding, &detect_info))
7726 {
24a73b0a
KH
7727 struct coding_system *this;
7728
7729 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7730 this = coding_categories + coding_category_utf_16_le;
7731 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7732 this = coding_categories + coding_category_utf_16_be;
7733 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7734 this = coding_categories + coding_category_utf_16_be_nosig;
7735 else
7736 this = coding_categories + coding_category_utf_16_le_nosig;
7737 val = Fcons (make_number (this->id), Qnil);
7738 }
7739 }
df7492f9
KH
7740 else
7741 {
ff0dacd7 7742 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7743 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7744 }
df7492f9 7745
89528eb3 7746 /* Then, detect eol-format if necessary. */
df7492f9 7747 {
89528eb3 7748 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7749 Lisp_Object tail;
7750
89528eb3
KH
7751 if (VECTORP (eol_type))
7752 {
ff0dacd7 7753 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
7754 {
7755 if (null_byte_found)
7756 normal_eol = EOL_SEEN_LF;
7757 else
7758 normal_eol = detect_eol (coding.source, src_bytes,
7759 coding_category_raw_text);
7760 }
ff0dacd7
KH
7761 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7762 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7763 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7764 coding_category_utf_16_be);
ff0dacd7
KH
7765 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7766 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7767 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7768 coding_category_utf_16_le);
7769 }
7770 else
7771 {
7772 if (EQ (eol_type, Qunix))
7773 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7774 else if (EQ (eol_type, Qdos))
7775 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7776 else
7777 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7778 }
7779
df7492f9
KH
7780 for (tail = val; CONSP (tail); tail = XCDR (tail))
7781 {
89528eb3 7782 enum coding_category category;
df7492f9 7783 int this_eol;
89528eb3
KH
7784
7785 id = XINT (XCAR (tail));
7786 attrs = CODING_ID_ATTRS (id);
7787 category = XINT (CODING_ATTR_CATEGORY (attrs));
7788 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7789 if (VECTORP (eol_type))
7790 {
89528eb3
KH
7791 if (category == coding_category_utf_16_be
7792 || category == coding_category_utf_16_be_nosig)
7793 this_eol = utf_16_be_eol;
7794 else if (category == coding_category_utf_16_le
7795 || category == coding_category_utf_16_le_nosig)
7796 this_eol = utf_16_le_eol;
df7492f9 7797 else
89528eb3
KH
7798 this_eol = normal_eol;
7799
df7492f9
KH
7800 if (this_eol == EOL_SEEN_LF)
7801 XSETCAR (tail, AREF (eol_type, 0));
7802 else if (this_eol == EOL_SEEN_CRLF)
7803 XSETCAR (tail, AREF (eol_type, 1));
7804 else if (this_eol == EOL_SEEN_CR)
7805 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7806 else
7807 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7808 }
89528eb3
KH
7809 else
7810 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7811 }
7812 }
ec6d2bb8 7813
03699b14 7814 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7815}
7816
ec6d2bb8 7817
d46c5b12
KH
7818DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7819 2, 3, 0,
48b0f3ae
PJ
7820 doc: /* Detect coding system of the text in the region between START and END.
7821Return a list of possible coding systems ordered by priority.
ec6d2bb8 7822
12e0131a
KH
7823If only ASCII characters are found (except for such ISO-2022 control
7824characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7825`undecided' or its subsidiary coding system according to a detected
7826end-of-line format.
ec6d2bb8 7827
48b0f3ae
PJ
7828If optional argument HIGHEST is non-nil, return the coding system of
7829highest priority. */)
7830 (start, end, highest)
d46c5b12
KH
7831 Lisp_Object start, end, highest;
7832{
7833 int from, to;
7834 int from_byte, to_byte;
ec6d2bb8 7835
b7826503
PJ
7836 CHECK_NUMBER_COERCE_MARKER (start);
7837 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7838
d46c5b12
KH
7839 validate_region (&start, &end);
7840 from = XINT (start), to = XINT (end);
7841 from_byte = CHAR_TO_BYTE (from);
7842 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7843
d46c5b12
KH
7844 if (from < GPT && to >= GPT)
7845 move_gap_both (to, to_byte);
c210f766 7846
d46c5b12 7847 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7848 to - from, to_byte - from_byte,
0a28aafb
KH
7849 !NILP (highest),
7850 !NILP (current_buffer
df7492f9
KH
7851 ->enable_multibyte_characters),
7852 Qnil);
ec6d2bb8
KH
7853}
7854
d46c5b12
KH
7855DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7856 1, 2, 0,
48b0f3ae
PJ
7857 doc: /* Detect coding system of the text in STRING.
7858Return a list of possible coding systems ordered by priority.
fb88bf2d 7859
12e0131a
KH
7860If only ASCII characters are found (except for such ISO-2022 control
7861characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7862`undecided' or its subsidiary coding system according to a detected
7863end-of-line format.
d46c5b12 7864
48b0f3ae
PJ
7865If optional argument HIGHEST is non-nil, return the coding system of
7866highest priority. */)
7867 (string, highest)
d46c5b12
KH
7868 Lisp_Object string, highest;
7869{
b7826503 7870 CHECK_STRING (string);
b73bfc1c 7871
24a73b0a
KH
7872 return detect_coding_system (SDATA (string),
7873 SCHARS (string), SBYTES (string),
8f924df7 7874 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7875 Qnil);
4ed46869 7876}
4ed46869 7877
b73bfc1c 7878
df7492f9
KH
7879static INLINE int
7880char_encodable_p (c, attrs)
7881 int c;
7882 Lisp_Object attrs;
05e6f5dc 7883{
df7492f9 7884 Lisp_Object tail;
df7492f9 7885 struct charset *charset;
7d64c6ad 7886 Lisp_Object translation_table;
d46c5b12 7887
7d64c6ad 7888 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7889 if (! NILP (translation_table))
7d64c6ad 7890 c = translate_char (translation_table, c);
df7492f9
KH
7891 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7892 CONSP (tail); tail = XCDR (tail))
e133c8fa 7893 {
df7492f9
KH
7894 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7895 if (CHAR_CHARSET_P (c, charset))
7896 break;
e133c8fa 7897 }
df7492f9 7898 return (! NILP (tail));
05e6f5dc 7899}
83fa074f 7900
fb88bf2d 7901
df7492f9
KH
7902/* Return a list of coding systems that safely encode the text between
7903 START and END. If EXCLUDE is non-nil, it is a list of coding
7904 systems not to check. The returned list doesn't contain any such
48468dac 7905 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7906 unibyte, return t. */
e077cc80 7907
df7492f9
KH
7908DEFUN ("find-coding-systems-region-internal",
7909 Ffind_coding_systems_region_internal,
7910 Sfind_coding_systems_region_internal, 2, 3, 0,
7911 doc: /* Internal use only. */)
7912 (start, end, exclude)
7913 Lisp_Object start, end, exclude;
7914{
7915 Lisp_Object coding_attrs_list, safe_codings;
7916 EMACS_INT start_byte, end_byte;
7c78e542 7917 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7918 int c;
7919 Lisp_Object tail, elt;
d46c5b12 7920
df7492f9
KH
7921 if (STRINGP (start))
7922 {
7923 if (!STRING_MULTIBYTE (start)
8f924df7 7924 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7925 return Qt;
7926 start_byte = 0;
8f924df7 7927 end_byte = SBYTES (start);
df7492f9
KH
7928 }
7929 else
d46c5b12 7930 {
df7492f9
KH
7931 CHECK_NUMBER_COERCE_MARKER (start);
7932 CHECK_NUMBER_COERCE_MARKER (end);
7933 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7934 args_out_of_range (start, end);
7935 if (NILP (current_buffer->enable_multibyte_characters))
7936 return Qt;
7937 start_byte = CHAR_TO_BYTE (XINT (start));
7938 end_byte = CHAR_TO_BYTE (XINT (end));
7939 if (XINT (end) - XINT (start) == end_byte - start_byte)
7940 return Qt;
d46c5b12 7941
e1c23804 7942 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7943 {
e1c23804
DL
7944 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7945 move_gap_both (XINT (start), start_byte);
df7492f9 7946 else
e1c23804 7947 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7948 }
7949 }
7950
df7492f9
KH
7951 coding_attrs_list = Qnil;
7952 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7953 if (NILP (exclude)
7954 || NILP (Fmemq (XCAR (tail), exclude)))
7955 {
7956 Lisp_Object attrs;
d46c5b12 7957
df7492f9
KH
7958 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7959 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7960 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7961 {
7962 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7963 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7964 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7965 }
df7492f9 7966 }
d46c5b12 7967
df7492f9 7968 if (STRINGP (start))
8f924df7 7969 p = pbeg = SDATA (start);
df7492f9
KH
7970 else
7971 p = pbeg = BYTE_POS_ADDR (start_byte);
7972 pend = p + (end_byte - start_byte);
b843d1ae 7973
df7492f9
KH
7974 while (p < pend && ASCII_BYTE_P (*p)) p++;
7975 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7976
05e6f5dc 7977 while (p < pend)
72d1a715 7978 {
df7492f9
KH
7979 if (ASCII_BYTE_P (*p))
7980 p++;
72d1a715
RS
7981 else
7982 {
df7492f9 7983 c = STRING_CHAR_ADVANCE (p);
12410ef1 7984
df7492f9
KH
7985 charset_map_loaded = 0;
7986 for (tail = coding_attrs_list; CONSP (tail);)
7987 {
7988 elt = XCAR (tail);
7989 if (NILP (elt))
7990 tail = XCDR (tail);
7991 else if (char_encodable_p (c, elt))
7992 tail = XCDR (tail);
7993 else if (CONSP (XCDR (tail)))
7994 {
7995 XSETCAR (tail, XCAR (XCDR (tail)));
7996 XSETCDR (tail, XCDR (XCDR (tail)));
7997 }
7998 else
7999 {
8000 XSETCAR (tail, Qnil);
8001 tail = XCDR (tail);
8002 }
8003 }
8004 if (charset_map_loaded)
8005 {
8006 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8007
df7492f9 8008 if (STRINGP (start))
8f924df7 8009 pbeg = SDATA (start);
df7492f9
KH
8010 else
8011 pbeg = BYTE_POS_ADDR (start_byte);
8012 p = pbeg + p_offset;
8013 pend = pbeg + pend_offset;
8014 }
8015 }
ec6d2bb8 8016 }
fb88bf2d 8017
988b3759 8018 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8019 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8020 if (! NILP (XCAR (tail)))
8021 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8022
05e6f5dc
KH
8023 return safe_codings;
8024}
4956c225 8025
d46c5b12 8026
8f924df7
KH
8027DEFUN ("unencodable-char-position", Funencodable_char_position,
8028 Sunencodable_char_position, 3, 5, 0,
8029 doc: /*
8030Return position of first un-encodable character in a region.
8031START and END specfiy the region and CODING-SYSTEM specifies the
8032encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8033
8f924df7
KH
8034If optional 4th argument COUNT is non-nil, it specifies at most how
8035many un-encodable characters to search. In this case, the value is a
8036list of positions.
d46c5b12 8037
8f924df7
KH
8038If optional 5th argument STRING is non-nil, it is a string to search
8039for un-encodable characters. In that case, START and END are indexes
8040to the string. */)
8041 (start, end, coding_system, count, string)
8042 Lisp_Object start, end, coding_system, count, string;
8043{
8044 int n;
8045 struct coding_system coding;
7d64c6ad 8046 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8047 Lisp_Object positions;
8048 int from, to;
8049 const unsigned char *p, *stop, *pend;
8050 int ascii_compatible;
fb88bf2d 8051
8f924df7
KH
8052 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8053 attrs = CODING_ID_ATTRS (coding.id);
8054 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8055 return Qnil;
8056 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8057 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8058 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8059
8f924df7
KH
8060 if (NILP (string))
8061 {
8062 validate_region (&start, &end);
8063 from = XINT (start);
8064 to = XINT (end);
8065 if (NILP (current_buffer->enable_multibyte_characters)
8066 || (ascii_compatible
8067 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8068 return Qnil;
8069 p = CHAR_POS_ADDR (from);
8070 pend = CHAR_POS_ADDR (to);
8071 if (from < GPT && to >= GPT)
8072 stop = GPT_ADDR;
8073 else
8074 stop = pend;
8075 }
8076 else
8077 {
8078 CHECK_STRING (string);
8079 CHECK_NATNUM (start);
8080 CHECK_NATNUM (end);
8081 from = XINT (start);
8082 to = XINT (end);
8083 if (from > to
8084 || to > SCHARS (string))
8085 args_out_of_range_3 (string, start, end);
8086 if (! STRING_MULTIBYTE (string))
8087 return Qnil;
8088 p = SDATA (string) + string_char_to_byte (string, from);
8089 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8090 if (ascii_compatible && (to - from) == (pend - p))
8091 return Qnil;
8092 }
f2558efd 8093
8f924df7
KH
8094 if (NILP (count))
8095 n = 1;
8096 else
b73bfc1c 8097 {
8f924df7
KH
8098 CHECK_NATNUM (count);
8099 n = XINT (count);
b73bfc1c
KH
8100 }
8101
8f924df7
KH
8102 positions = Qnil;
8103 while (1)
d46c5b12 8104 {
8f924df7 8105 int c;
ec6d2bb8 8106
8f924df7
KH
8107 if (ascii_compatible)
8108 while (p < stop && ASCII_BYTE_P (*p))
8109 p++, from++;
8110 if (p >= stop)
0e79d667 8111 {
8f924df7
KH
8112 if (p >= pend)
8113 break;
8114 stop = pend;
8115 p = GAP_END_ADDR;
0e79d667 8116 }
ec6d2bb8 8117
8f924df7
KH
8118 c = STRING_CHAR_ADVANCE (p);
8119 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8120 && ! char_charset (translate_char (translation_table, c),
8121 charset_list, NULL))
ec6d2bb8 8122 {
8f924df7
KH
8123 positions = Fcons (make_number (from), positions);
8124 n--;
8125 if (n == 0)
8126 break;
ec6d2bb8
KH
8127 }
8128
8f924df7
KH
8129 from++;
8130 }
d46c5b12 8131
8f924df7
KH
8132 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8133}
d46c5b12 8134
d46c5b12 8135
df7492f9
KH
8136DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8137 Scheck_coding_systems_region, 3, 3, 0,
8138 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8139
df7492f9
KH
8140START and END are buffer positions specifying the region.
8141CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8142
df7492f9
KH
8143The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8144CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
8145whole region, POS0, POS1, ... are buffer positions where non-encodable
8146characters are found.
93dec019 8147
df7492f9
KH
8148If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8149value is nil.
93dec019 8150
df7492f9
KH
8151START may be a string. In that case, check if the string is
8152encodable, and the value contains indices to the string instead of
8153buffer positions. END is ignored. */)
8154 (start, end, coding_system_list)
8155 Lisp_Object start, end, coding_system_list;
05e6f5dc 8156{
df7492f9
KH
8157 Lisp_Object list;
8158 EMACS_INT start_byte, end_byte;
8159 int pos;
7c78e542 8160 const unsigned char *p, *pbeg, *pend;
df7492f9 8161 int c;
7d64c6ad 8162 Lisp_Object tail, elt, attrs;
70ad9fc4 8163
05e6f5dc
KH
8164 if (STRINGP (start))
8165 {
df7492f9 8166 if (!STRING_MULTIBYTE (start)
8f924df7 8167 && SCHARS (start) != SBYTES (start))
df7492f9
KH
8168 return Qnil;
8169 start_byte = 0;
8f924df7 8170 end_byte = SBYTES (start);
df7492f9 8171 pos = 0;
d46c5b12 8172 }
05e6f5dc 8173 else
b73bfc1c 8174 {
b7826503
PJ
8175 CHECK_NUMBER_COERCE_MARKER (start);
8176 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8177 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8178 args_out_of_range (start, end);
8179 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8180 return Qnil;
8181 start_byte = CHAR_TO_BYTE (XINT (start));
8182 end_byte = CHAR_TO_BYTE (XINT (end));
8183 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 8184 return Qt;
df7492f9 8185
e1c23804 8186 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8187 {
e1c23804
DL
8188 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8189 move_gap_both (XINT (start), start_byte);
df7492f9 8190 else
e1c23804 8191 move_gap_both (XINT (end), end_byte);
b73bfc1c 8192 }
e1c23804 8193 pos = XINT (start);
b73bfc1c 8194 }
7553d0e1 8195
df7492f9
KH
8196 list = Qnil;
8197 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8198 {
df7492f9 8199 elt = XCAR (tail);
7d64c6ad 8200 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8201 ASET (attrs, coding_attr_trans_tbl,
8202 get_translation_table (attrs, 1, NULL));
7d64c6ad 8203 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8204 }
8205
df7492f9 8206 if (STRINGP (start))
8f924df7 8207 p = pbeg = SDATA (start);
72d1a715 8208 else
df7492f9
KH
8209 p = pbeg = BYTE_POS_ADDR (start_byte);
8210 pend = p + (end_byte - start_byte);
4ed46869 8211
df7492f9
KH
8212 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8213 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8214
df7492f9 8215 while (p < pend)
d46c5b12 8216 {
df7492f9
KH
8217 if (ASCII_BYTE_P (*p))
8218 p++;
e133c8fa 8219 else
05e6f5dc 8220 {
df7492f9
KH
8221 c = STRING_CHAR_ADVANCE (p);
8222
8223 charset_map_loaded = 0;
8224 for (tail = list; CONSP (tail); tail = XCDR (tail))
8225 {
8226 elt = XCDR (XCAR (tail));
8227 if (! char_encodable_p (c, XCAR (elt)))
8228 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8229 }
8230 if (charset_map_loaded)
8231 {
8232 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8233
8234 if (STRINGP (start))
8f924df7 8235 pbeg = SDATA (start);
df7492f9
KH
8236 else
8237 pbeg = BYTE_POS_ADDR (start_byte);
8238 p = pbeg + p_offset;
8239 pend = pbeg + pend_offset;
8240 }
05e6f5dc 8241 }
df7492f9 8242 pos++;
d46c5b12 8243 }
4ed46869 8244
df7492f9
KH
8245 tail = list;
8246 list = Qnil;
8247 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8248 {
df7492f9
KH
8249 elt = XCAR (tail);
8250 if (CONSP (XCDR (XCDR (elt))))
8251 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8252 list);
ec6d2bb8 8253 }
2b4f9037 8254
df7492f9 8255 return list;
d46c5b12
KH
8256}
8257
3fd9494b 8258
b73bfc1c 8259Lisp_Object
df7492f9
KH
8260code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8261 Lisp_Object start, end, coding_system, dst_object;
8262 int encodep, norecord;
4ed46869 8263{
3a73fa5d 8264 struct coding_system coding;
df7492f9
KH
8265 EMACS_INT from, from_byte, to, to_byte;
8266 Lisp_Object src_object;
4ed46869 8267
b7826503
PJ
8268 CHECK_NUMBER_COERCE_MARKER (start);
8269 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8270 if (NILP (coding_system))
8271 coding_system = Qno_conversion;
8272 else
8273 CHECK_CODING_SYSTEM (coding_system);
8274 src_object = Fcurrent_buffer ();
8275 if (NILP (dst_object))
8276 dst_object = src_object;
8277 else if (! EQ (dst_object, Qt))
8278 CHECK_BUFFER (dst_object);
3a73fa5d 8279
d46c5b12
KH
8280 validate_region (&start, &end);
8281 from = XFASTINT (start);
df7492f9 8282 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8283 to = XFASTINT (end);
df7492f9 8284 to_byte = CHAR_TO_BYTE (to);
764ca8da 8285
df7492f9
KH
8286 setup_coding_system (coding_system, &coding);
8287 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8288
df7492f9
KH
8289 if (encodep)
8290 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8291 dst_object);
8292 else
8293 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8294 dst_object);
8295 if (! norecord)
8296 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8297
df7492f9
KH
8298 return (BUFFERP (dst_object)
8299 ? make_number (coding.produced_char)
8300 : coding.dst_object);
4031e2bf 8301}
78108bcd 8302
4ed46869 8303
4031e2bf 8304DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8305 3, 4, "r\nzCoding system: ",
48b0f3ae 8306 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8307When called from a program, takes four arguments:
8308 START, END, CODING-SYSTEM, and DESTINATION.
8309START and END are buffer positions.
8844fa83 8310
df7492f9 8311Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8312If nil, the region between START and END is replaced by the decoded text.
df7492f9
KH
8313If buffer, the decoded text is inserted in the buffer.
8314If t, the decoded text is returned.
8844fa83 8315
48b0f3ae
PJ
8316This function sets `last-coding-system-used' to the precise coding system
8317used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8318not fully specified.)
8319It returns the length of the decoded text. */)
df7492f9
KH
8320 (start, end, coding_system, destination)
8321 Lisp_Object start, end, coding_system, destination;
4031e2bf 8322{
df7492f9 8323 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8324}
8844fa83 8325
3a73fa5d 8326DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8327 3, 4, "r\nzCoding system: ",
8328 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
8329When called from a program, takes three arguments:
8330START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 8331
df7492f9
KH
8332Optional 4th arguments DESTINATION specifies where the encoded text goes.
8333If nil, the region between START and END is replace by the encoded text.
8334If buffer, the encoded text is inserted in the buffer.
8335If t, the encoded text is returned.
2391eaa4 8336
48b0f3ae
PJ
8337This function sets `last-coding-system-used' to the precise coding system
8338used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8339not fully specified.)
8340It returns the length of the encoded text. */)
df7492f9
KH
8341 (start, end, coding_system, destination)
8342 Lisp_Object start, end, coding_system, destination;
3a73fa5d 8343{
df7492f9 8344 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
8345}
8346
8347Lisp_Object
df7492f9
KH
8348code_convert_string (string, coding_system, dst_object,
8349 encodep, nocopy, norecord)
8350 Lisp_Object string, coding_system, dst_object;
8351 int encodep, nocopy, norecord;
b73bfc1c 8352{
4031e2bf 8353 struct coding_system coding;
df7492f9 8354 EMACS_INT chars, bytes;
ec6d2bb8 8355
b7826503 8356 CHECK_STRING (string);
d46c5b12 8357 if (NILP (coding_system))
4956c225 8358 {
df7492f9
KH
8359 if (! norecord)
8360 Vlast_coding_system_used = Qno_conversion;
8361 if (NILP (dst_object))
8362 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 8363 }
b73bfc1c 8364
df7492f9
KH
8365 if (NILP (coding_system))
8366 coding_system = Qno_conversion;
8367 else
8368 CHECK_CODING_SYSTEM (coding_system);
8369 if (NILP (dst_object))
8370 dst_object = Qt;
8371 else if (! EQ (dst_object, Qt))
8372 CHECK_BUFFER (dst_object);
73be902c 8373
df7492f9 8374 setup_coding_system (coding_system, &coding);
d46c5b12 8375 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
8376 chars = SCHARS (string);
8377 bytes = SBYTES (string);
df7492f9
KH
8378 if (encodep)
8379 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8380 else
8381 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8382 if (! norecord)
8383 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 8384
df7492f9
KH
8385 return (BUFFERP (dst_object)
8386 ? make_number (coding.produced_char)
8387 : coding.dst_object);
4ed46869 8388}
73be902c 8389
b73bfc1c 8390
ecec61c1 8391/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 8392 Do not set Vlast_coding_system_used.
4ed46869 8393
ec6d2bb8
KH
8394 This function is called only from macros DECODE_FILE and
8395 ENCODE_FILE, thus we ignore character composition. */
4ed46869 8396
ecec61c1
KH
8397Lisp_Object
8398code_convert_string_norecord (string, coding_system, encodep)
8399 Lisp_Object string, coding_system;
8400 int encodep;
4ed46869 8401{
0be8721c 8402 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
8403}
8404
4ed46869 8405
df7492f9
KH
8406DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8407 2, 4, 0,
8408 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8409
8410Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8411if the decoding operation is trivial.
ecec61c1 8412
df7492f9 8413Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 8414inserted in BUFFER instead of returned as a string. In this case,
df7492f9 8415the return value is BUFFER.
ecec61c1 8416
df7492f9
KH
8417This function sets `last-coding-system-used' to the precise coding system
8418used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8419not fully specified. */)
8420 (string, coding_system, nocopy, buffer)
8421 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8422{
df7492f9
KH
8423 return code_convert_string (string, coding_system, buffer,
8424 0, ! NILP (nocopy), 0);
4ed46869
KH
8425}
8426
df7492f9
KH
8427DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8428 2, 4, 0,
8429 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8430
8431Optional third arg NOCOPY non-nil means it is OK to return STRING
8432itself if the encoding operation is trivial.
8433
8434Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 8435inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
8436the return value is BUFFER.
8437
8438This function sets `last-coding-system-used' to the precise coding system
8439used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8440not fully specified.) */)
8441 (string, coding_system, nocopy, buffer)
8442 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8443{
df7492f9 8444 return code_convert_string (string, coding_system, buffer,
c197f191 8445 1, ! NILP (nocopy), 1);
4ed46869 8446}
df7492f9 8447
3a73fa5d 8448\f
4ed46869 8449DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8450 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8451Return the corresponding character. */)
8452 (code)
4ed46869 8453 Lisp_Object code;
4ed46869 8454{
df7492f9
KH
8455 Lisp_Object spec, attrs, val;
8456 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8457 int c;
4ed46869 8458
df7492f9
KH
8459 CHECK_NATNUM (code);
8460 c = XFASTINT (code);
8461 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8462 attrs = AREF (spec, 0);
4ed46869 8463
df7492f9
KH
8464 if (ASCII_BYTE_P (c)
8465 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8466 return code;
4ed46869 8467
df7492f9
KH
8468 val = CODING_ATTR_CHARSET_LIST (attrs);
8469 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8470 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8471 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8472
df7492f9
KH
8473 if (c <= 0x7F)
8474 charset = charset_roman;
8475 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8476 {
df7492f9
KH
8477 charset = charset_kana;
8478 c -= 0x80;
4ed46869 8479 }
55ab7be3 8480 else
4ed46869 8481 {
004068e4 8482 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8483
8484 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8485 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8486 error ("Invalid code: %d", code);
8487 SJIS_TO_JIS (c);
8488 charset = charset_kanji;
4ed46869 8489 }
df7492f9
KH
8490 c = DECODE_CHAR (charset, c);
8491 if (c < 0)
8492 error ("Invalid code: %d", code);
8493 return make_number (c);
93dec019 8494}
4ed46869 8495
48b0f3ae 8496
4ed46869 8497DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 8498 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
8499Return the corresponding code in SJIS. */)
8500 (ch)
df7492f9 8501 Lisp_Object ch;
4ed46869 8502{
df7492f9
KH
8503 Lisp_Object spec, attrs, charset_list;
8504 int c;
8505 struct charset *charset;
8506 unsigned code;
48b0f3ae 8507
df7492f9
KH
8508 CHECK_CHARACTER (ch);
8509 c = XFASTINT (ch);
8510 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8511 attrs = AREF (spec, 0);
8512
8513 if (ASCII_CHAR_P (c)
8514 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8515 return ch;
8516
8517 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8518 charset = char_charset (c, charset_list, &code);
8519 if (code == CHARSET_INVALID_CODE (charset))
8520 error ("Can't encode by shift_jis encoding: %d", c);
8521 JIS_TO_SJIS (code);
8522
8523 return make_number (code);
4ed46869
KH
8524}
8525
8526DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8527 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8528Return the corresponding character. */)
8529 (code)
4ed46869 8530 Lisp_Object code;
d46c5b12 8531{
df7492f9
KH
8532 Lisp_Object spec, attrs, val;
8533 struct charset *charset_roman, *charset_big5, *charset;
8534 int c;
6289dd10 8535
df7492f9
KH
8536 CHECK_NATNUM (code);
8537 c = XFASTINT (code);
8538 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8539 attrs = AREF (spec, 0);
4ed46869 8540
df7492f9
KH
8541 if (ASCII_BYTE_P (c)
8542 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8543 return code;
6289dd10 8544
df7492f9
KH
8545 val = CODING_ATTR_CHARSET_LIST (attrs);
8546 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8547 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8548
df7492f9
KH
8549 if (c <= 0x7F)
8550 charset = charset_roman;
c28a9453
KH
8551 else
8552 {
df7492f9
KH
8553 int b1 = c >> 8, b2 = c & 0x7F;
8554 if (b1 < 0xA1 || b1 > 0xFE
8555 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8556 error ("Invalid code: %d", code);
8557 charset = charset_big5;
c28a9453 8558 }
df7492f9
KH
8559 c = DECODE_CHAR (charset, (unsigned )c);
8560 if (c < 0)
8561 error ("Invalid code: %d", code);
8562 return make_number (c);
d46c5b12 8563}
6289dd10 8564
4ed46869 8565DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 8566 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
8567Return the corresponding character code in Big5. */)
8568 (ch)
4ed46869
KH
8569 Lisp_Object ch;
8570{
df7492f9
KH
8571 Lisp_Object spec, attrs, charset_list;
8572 struct charset *charset;
8573 int c;
8574 unsigned code;
8575
8576 CHECK_CHARACTER (ch);
8577 c = XFASTINT (ch);
8578 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8579 attrs = AREF (spec, 0);
8580 if (ASCII_CHAR_P (c)
8581 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8582 return ch;
8583
8584 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8585 charset = char_charset (c, charset_list, &code);
8586 if (code == CHARSET_INVALID_CODE (charset))
8587 error ("Can't encode by Big5 encoding: %d", c);
8588
8589 return make_number (code);
4ed46869 8590}
48b0f3ae 8591
3a73fa5d 8592\f
002fdb44 8593DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 8594 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 8595 doc: /* Internal use only. */)
6ed8eeff 8596 (coding_system, terminal)
b74e4686 8597 Lisp_Object coding_system;
6ed8eeff 8598 Lisp_Object terminal;
4ed46869 8599{
6ed8eeff 8600 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 8601 CHECK_SYMBOL (coding_system);
b8299c66 8602 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 8603 /* We had better not send unsafe characters to terminal. */
c73bd236 8604 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 8605 /* Characer composition should be disabled. */
c73bd236 8606 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
8607 terminal_coding->src_multibyte = 1;
8608 terminal_coding->dst_multibyte = 0;
4ed46869
KH
8609 return Qnil;
8610}
8611
c4825358
KH
8612DEFUN ("set-safe-terminal-coding-system-internal",
8613 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8614 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8615 doc: /* Internal use only. */)
48b0f3ae 8616 (coding_system)
b74e4686 8617 Lisp_Object coding_system;
d46c5b12 8618{
b7826503 8619 CHECK_SYMBOL (coding_system);
c4825358
KH
8620 setup_coding_system (Fcheck_coding_system (coding_system),
8621 &safe_terminal_coding);
df7492f9
KH
8622 /* Characer composition should be disabled. */
8623 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8624 safe_terminal_coding.src_multibyte = 1;
8625 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8626 return Qnil;
8627}
4ed46869 8628
002fdb44 8629DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 8630 Sterminal_coding_system, 0, 1, 0,
6ed8eeff
KL
8631 doc: /* Return coding system specified for terminal output on the given terminal.
8632TERMINAL may be a terminal id, a frame, or nil for the selected
8633frame's terminal device. */)
8634 (terminal)
8635 Lisp_Object terminal;
4ed46869 8636{
985773c9
MB
8637 struct coding_system *terminal_coding
8638 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8639 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 8640
ae6f73fa 8641 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 8642 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
8643}
8644
002fdb44 8645DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 8646 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 8647 doc: /* Internal use only. */)
6ed8eeff 8648 (coding_system, terminal)
4ed46869 8649 Lisp_Object coding_system;
6ed8eeff 8650 Lisp_Object terminal;
4ed46869 8651{
6ed8eeff 8652 struct terminal *t = get_terminal (terminal, 1);
b7826503 8653 CHECK_SYMBOL (coding_system);
df7492f9 8654 setup_coding_system (Fcheck_coding_system (coding_system),
c73bd236 8655 TERMINAL_KEYBOARD_CODING (t));
df7492f9 8656 /* Characer composition should be disabled. */
c73bd236
MB
8657 TERMINAL_KEYBOARD_CODING (t)->common_flags
8658 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8659 return Qnil;
8660}
8661
8662DEFUN ("keyboard-coding-system",
985773c9 8663 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 8664 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
8665 (terminal)
8666 Lisp_Object terminal;
4ed46869 8667{
985773c9
MB
8668 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8669 (get_terminal (terminal, 1))->id);
4ed46869
KH
8670}
8671
4ed46869 8672\f
a5d301df
KH
8673DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8674 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8675 doc: /* Choose a coding system for an operation based on the target name.
8676The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8677DECODING-SYSTEM is the coding system to use for decoding
8678\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8679for encoding (in case OPERATION does encoding).
05e6f5dc 8680
48b0f3ae
PJ
8681The first argument OPERATION specifies an I/O primitive:
8682 For file I/O, `insert-file-contents' or `write-region'.
8683 For process I/O, `call-process', `call-process-region', or `start-process'.
8684 For network I/O, `open-network-stream'.
05e6f5dc 8685
48b0f3ae
PJ
8686The remaining arguments should be the same arguments that were passed
8687to the primitive. Depending on which primitive, one of those arguments
8688is selected as the TARGET. For example, if OPERATION does file I/O,
8689whichever argument specifies the file name is TARGET.
05e6f5dc 8690
48b0f3ae 8691TARGET has a meaning which depends on OPERATION:
b883cdb2 8692 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae
PJ
8693 For process I/O, TARGET is a process name.
8694 For network I/O, TARGET is a service name or a port number
05e6f5dc 8695
48b0f3ae
PJ
8696This function looks up what specified for TARGET in,
8697`file-coding-system-alist', `process-coding-system-alist',
8698or `network-coding-system-alist' depending on OPERATION.
8699They may specify a coding system, a cons of coding systems,
8700or a function symbol to call.
8701In the last case, we call the function with one argument,
8702which is a list of all the arguments given to this function.
1011c487
MB
8703If the function can't decide a coding system, it can return
8704`undecided' so that the normal code-detection is performed.
48b0f3ae 8705
b883cdb2
MB
8706If OPERATION is `insert-file-contents', the argument corresponding to
8707TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
8708file name to look up, and BUFFER is a buffer that contains the file's
8709contents (not yet decoded). If `file-coding-system-alist' specifies a
8710function to call for FILENAME, that function should examine the
8711contents of BUFFER instead of reading the file.
8712
d918f936 8713usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 8714 (nargs, args)
4ed46869
KH
8715 int nargs;
8716 Lisp_Object *args;
6b89e3aa 8717{
4ed46869
KH
8718 Lisp_Object operation, target_idx, target, val;
8719 register Lisp_Object chain;
177c0ea7 8720
4ed46869
KH
8721 if (nargs < 2)
8722 error ("Too few arguments");
8723 operation = args[0];
8724 if (!SYMBOLP (operation)
8725 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8726 error ("Invalid first arguement");
4ed46869
KH
8727 if (nargs < 1 + XINT (target_idx))
8728 error ("Too few arguments for operation: %s",
8f924df7 8729 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8730 target = args[XINT (target_idx) + 1];
8731 if (!(STRINGP (target)
091a0ff0
KH
8732 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8733 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 8734 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8735 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
8736 if (CONSP (target))
8737 target = XCAR (target);
4ed46869 8738
2e34157c
RS
8739 chain = ((EQ (operation, Qinsert_file_contents)
8740 || EQ (operation, Qwrite_region))
02ba4723 8741 ? Vfile_coding_system_alist
2e34157c 8742 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8743 ? Vnetwork_coding_system_alist
8744 : Vprocess_coding_system_alist));
4ed46869
KH
8745 if (NILP (chain))
8746 return Qnil;
8747
03699b14 8748 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8749 {
f44d27ce 8750 Lisp_Object elt;
6b89e3aa 8751
df7492f9 8752 elt = XCAR (chain);
4ed46869
KH
8753 if (CONSP (elt)
8754 && ((STRINGP (target)
03699b14
KR
8755 && STRINGP (XCAR (elt))
8756 && fast_string_match (XCAR (elt), target) >= 0)
8757 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8758 {
03699b14 8759 val = XCDR (elt);
b19fd4c5
KH
8760 /* Here, if VAL is both a valid coding system and a valid
8761 function symbol, we return VAL as a coding system. */
02ba4723
KH
8762 if (CONSP (val))
8763 return val;
8764 if (! SYMBOLP (val))
8765 return Qnil;
8766 if (! NILP (Fcoding_system_p (val)))
8767 return Fcons (val, val);
b19fd4c5 8768 if (! NILP (Ffboundp (val)))
6b89e3aa 8769 {
e2b97060
MB
8770 /* We use call1 rather than safe_call1
8771 so as to get bug reports about functions called here
8772 which don't handle the current interface. */
8773 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
8774 if (CONSP (val))
8775 return val;
8776 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8777 return Fcons (val, val);
6b89e3aa 8778 }
02ba4723 8779 return Qnil;
6b89e3aa
KH
8780 }
8781 }
4ed46869 8782 return Qnil;
6b89e3aa
KH
8783}
8784
df7492f9 8785DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8786 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8787 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8788If multiple coding systems belongs to the same category,
a3181084
DL
8789all but the first one are ignored.
8790
8791usage: (set-coding-system-priority ...) */)
df7492f9
KH
8792 (nargs, args)
8793 int nargs;
8794 Lisp_Object *args;
8795{
8796 int i, j;
8797 int changed[coding_category_max];
8798 enum coding_category priorities[coding_category_max];
8799
8800 bzero (changed, sizeof changed);
6b89e3aa 8801
df7492f9 8802 for (i = j = 0; i < nargs; i++)
6b89e3aa 8803 {
df7492f9
KH
8804 enum coding_category category;
8805 Lisp_Object spec, attrs;
6b89e3aa 8806
df7492f9
KH
8807 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8808 attrs = AREF (spec, 0);
8809 category = XINT (CODING_ATTR_CATEGORY (attrs));
8810 if (changed[category])
8811 /* Ignore this coding system because a coding system of the
8812 same category already had a higher priority. */
8813 continue;
8814 changed[category] = 1;
8815 priorities[j++] = category;
8816 if (coding_categories[category].id >= 0
8817 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8818 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8819 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8820 }
6b89e3aa 8821
df7492f9
KH
8822 /* Now we have decided top J priorities. Reflect the order of the
8823 original priorities to the remaining priorities. */
6b89e3aa 8824
df7492f9 8825 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8826 {
df7492f9
KH
8827 while (j < coding_category_max
8828 && changed[coding_priorities[j]])
8829 j++;
8830 if (j == coding_category_max)
8831 abort ();
8832 priorities[i] = coding_priorities[j];
8833 }
6b89e3aa 8834
df7492f9 8835 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8836
ff563fce
KH
8837 /* Update `coding-category-list'. */
8838 Vcoding_category_list = Qnil;
8839 for (i = coding_category_max - 1; i >= 0; i--)
8840 Vcoding_category_list
8841 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8842 Vcoding_category_list);
6b89e3aa 8843
df7492f9 8844 return Qnil;
6b89e3aa
KH
8845}
8846
df7492f9
KH
8847DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8848 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8849 doc: /* Return a list of coding systems ordered by their priorities.
8850HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8851 (highestp)
8852 Lisp_Object highestp;
d46c5b12
KH
8853{
8854 int i;
df7492f9 8855 Lisp_Object val;
6b89e3aa 8856
df7492f9 8857 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8858 {
df7492f9
KH
8859 enum coding_category category = coding_priorities[i];
8860 int id = coding_categories[category].id;
8861 Lisp_Object attrs;
068a9dbd 8862
df7492f9
KH
8863 if (id < 0)
8864 continue;
8865 attrs = CODING_ID_ATTRS (id);
8866 if (! NILP (highestp))
8867 return CODING_ATTR_BASE_NAME (attrs);
8868 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8869 }
8870 return Fnreverse (val);
8871}
068a9dbd 8872
f0064e1f 8873static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8874
8875static Lisp_Object
df7492f9
KH
8876make_subsidiaries (base)
8877 Lisp_Object base;
068a9dbd 8878{
df7492f9 8879 Lisp_Object subsidiaries;
8f924df7 8880 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8881 char *buf = (char *) alloca (base_name_len + 6);
8882 int i;
068a9dbd 8883
8f924df7 8884 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8885 subsidiaries = Fmake_vector (make_number (3), Qnil);
8886 for (i = 0; i < 3; i++)
068a9dbd 8887 {
df7492f9
KH
8888 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8889 ASET (subsidiaries, i, intern (buf));
068a9dbd 8890 }
df7492f9 8891 return subsidiaries;
068a9dbd
KH
8892}
8893
8894
df7492f9
KH
8895DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8896 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8897 doc: /* For internal use only.
8898usage: (define-coding-system-internal ...) */)
df7492f9
KH
8899 (nargs, args)
8900 int nargs;
8901 Lisp_Object *args;
068a9dbd 8902{
df7492f9
KH
8903 Lisp_Object name;
8904 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8905 Lisp_Object attrs; /* Vector of attributes. */
8906 Lisp_Object eol_type;
8907 Lisp_Object aliases;
8908 Lisp_Object coding_type, charset_list, safe_charsets;
8909 enum coding_category category;
8910 Lisp_Object tail, val;
8911 int max_charset_id = 0;
8912 int i;
068a9dbd 8913
df7492f9
KH
8914 if (nargs < coding_arg_max)
8915 goto short_args;
068a9dbd 8916
df7492f9 8917 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8918
df7492f9
KH
8919 name = args[coding_arg_name];
8920 CHECK_SYMBOL (name);
8921 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8922
df7492f9
KH
8923 val = args[coding_arg_mnemonic];
8924 if (! STRINGP (val))
8925 CHECK_CHARACTER (val);
8926 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8927
df7492f9
KH
8928 coding_type = args[coding_arg_coding_type];
8929 CHECK_SYMBOL (coding_type);
8930 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8931
df7492f9
KH
8932 charset_list = args[coding_arg_charset_list];
8933 if (SYMBOLP (charset_list))
8934 {
8935 if (EQ (charset_list, Qiso_2022))
8936 {
8937 if (! EQ (coding_type, Qiso_2022))
8938 error ("Invalid charset-list");
8939 charset_list = Viso_2022_charset_list;
8940 }
8941 else if (EQ (charset_list, Qemacs_mule))
8942 {
8943 if (! EQ (coding_type, Qemacs_mule))
8944 error ("Invalid charset-list");
8945 charset_list = Vemacs_mule_charset_list;
8946 }
8947 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8948 if (max_charset_id < XFASTINT (XCAR (tail)))
8949 max_charset_id = XFASTINT (XCAR (tail));
8950 }
068a9dbd
KH
8951 else
8952 {
df7492f9 8953 charset_list = Fcopy_sequence (charset_list);
985773c9 8954 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 8955 {
df7492f9
KH
8956 struct charset *charset;
8957
985773c9 8958 val = XCAR (tail);
df7492f9
KH
8959 CHECK_CHARSET_GET_CHARSET (val, charset);
8960 if (EQ (coding_type, Qiso_2022)
8961 ? CHARSET_ISO_FINAL (charset) < 0
8962 : EQ (coding_type, Qemacs_mule)
8963 ? CHARSET_EMACS_MULE_ID (charset) < 0
8964 : 0)
8965 error ("Can't handle charset `%s'",
8f924df7 8966 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8967
8f924df7 8968 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8969 if (max_charset_id < charset->id)
8970 max_charset_id = charset->id;
068a9dbd
KH
8971 }
8972 }
df7492f9 8973 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8974
df7492f9
KH
8975 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8976 make_number (255));
8977 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8978 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8979 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8980
584948ac 8981 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8982
df7492f9 8983 val = args[coding_arg_decode_translation_table];
a6f87d34 8984 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8985 CHECK_SYMBOL (val);
df7492f9 8986 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8987
df7492f9 8988 val = args[coding_arg_encode_translation_table];
a6f87d34 8989 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8990 CHECK_SYMBOL (val);
df7492f9 8991 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8992
df7492f9
KH
8993 val = args[coding_arg_post_read_conversion];
8994 CHECK_SYMBOL (val);
8995 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8996
df7492f9
KH
8997 val = args[coding_arg_pre_write_conversion];
8998 CHECK_SYMBOL (val);
8999 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9000
df7492f9
KH
9001 val = args[coding_arg_default_char];
9002 if (NILP (val))
9003 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9004 else
9005 {
8f924df7 9006 CHECK_CHARACTER (val);
df7492f9
KH
9007 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9008 }
4031e2bf 9009
8f924df7
KH
9010 val = args[coding_arg_for_unibyte];
9011 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9012
df7492f9
KH
9013 val = args[coding_arg_plist];
9014 CHECK_LIST (val);
9015 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9016
df7492f9
KH
9017 if (EQ (coding_type, Qcharset))
9018 {
c7c66a95
KH
9019 /* Generate a lisp vector of 256 elements. Each element is nil,
9020 integer, or a list of charset IDs.
3a73fa5d 9021
c7c66a95
KH
9022 If Nth element is nil, the byte code N is invalid in this
9023 coding system.
4ed46869 9024
c7c66a95
KH
9025 If Nth element is a number NUM, N is the first byte of a
9026 charset whose ID is NUM.
4ed46869 9027
c7c66a95
KH
9028 If Nth element is a list of charset IDs, N is the first byte
9029 of one of them. The list is sorted by dimensions of the
2bc515e4 9030 charsets. A charset of smaller dimension comes firtst. */
df7492f9 9031 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9032
5c99c2e6 9033 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9034 {
c7c66a95
KH
9035 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9036 int dim = CHARSET_DIMENSION (charset);
9037 int idx = (dim - 1) * 4;
4ed46869 9038
5c99c2e6 9039 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9040 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9041
15d143f7
KH
9042 for (i = charset->code_space[idx];
9043 i <= charset->code_space[idx + 1]; i++)
9044 {
c7c66a95
KH
9045 Lisp_Object tmp, tmp2;
9046 int dim2;
ec6d2bb8 9047
c7c66a95
KH
9048 tmp = AREF (val, i);
9049 if (NILP (tmp))
9050 tmp = XCAR (tail);
9051 else if (NUMBERP (tmp))
9052 {
9053 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9054 if (dim < dim2)
c7c66a95 9055 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9056 else
9057 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9058 }
15d143f7 9059 else
c7c66a95
KH
9060 {
9061 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9062 {
9063 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9064 if (dim < dim2)
9065 break;
9066 }
9067 if (NILP (tmp2))
9068 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9069 else
9070 {
9071 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9072 XSETCAR (tmp2, XCAR (tail));
9073 }
9074 }
9075 ASET (val, i, tmp);
15d143f7 9076 }
df7492f9
KH
9077 }
9078 ASET (attrs, coding_attr_charset_valids, val);
9079 category = coding_category_charset;
9080 }
9081 else if (EQ (coding_type, Qccl))
9082 {
9083 Lisp_Object valids;
ecec61c1 9084
df7492f9
KH
9085 if (nargs < coding_arg_ccl_max)
9086 goto short_args;
ecec61c1 9087
df7492f9
KH
9088 val = args[coding_arg_ccl_decoder];
9089 CHECK_CCL_PROGRAM (val);
9090 if (VECTORP (val))
9091 val = Fcopy_sequence (val);
9092 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9093
df7492f9
KH
9094 val = args[coding_arg_ccl_encoder];
9095 CHECK_CCL_PROGRAM (val);
9096 if (VECTORP (val))
9097 val = Fcopy_sequence (val);
9098 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9099
df7492f9
KH
9100 val = args[coding_arg_ccl_valids];
9101 valids = Fmake_string (make_number (256), make_number (0));
9102 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9103 {
8dcbea82 9104 int from, to;
ecec61c1 9105
df7492f9
KH
9106 val = Fcar (tail);
9107 if (INTEGERP (val))
8dcbea82
KH
9108 {
9109 from = to = XINT (val);
9110 if (from < 0 || from > 255)
9111 args_out_of_range_3 (val, make_number (0), make_number (255));
9112 }
df7492f9
KH
9113 else
9114 {
df7492f9 9115 CHECK_CONS (val);
8f924df7
KH
9116 CHECK_NATNUM_CAR (val);
9117 CHECK_NATNUM_CDR (val);
df7492f9 9118 from = XINT (XCAR (val));
8f924df7 9119 if (from > 255)
8dcbea82
KH
9120 args_out_of_range_3 (XCAR (val),
9121 make_number (0), make_number (255));
df7492f9 9122 to = XINT (XCDR (val));
8dcbea82
KH
9123 if (to < from || to > 255)
9124 args_out_of_range_3 (XCDR (val),
9125 XCAR (val), make_number (255));
df7492f9 9126 }
8dcbea82 9127 for (i = from; i <= to; i++)
8f924df7 9128 SSET (valids, i, 1);
df7492f9
KH
9129 }
9130 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9131
df7492f9 9132 category = coding_category_ccl;
55ab7be3 9133 }
df7492f9 9134 else if (EQ (coding_type, Qutf_16))
55ab7be3 9135 {
df7492f9 9136 Lisp_Object bom, endian;
4ed46869 9137
584948ac 9138 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9139
df7492f9
KH
9140 if (nargs < coding_arg_utf16_max)
9141 goto short_args;
4ed46869 9142
df7492f9
KH
9143 bom = args[coding_arg_utf16_bom];
9144 if (! NILP (bom) && ! EQ (bom, Qt))
9145 {
9146 CHECK_CONS (bom);
8f924df7
KH
9147 val = XCAR (bom);
9148 CHECK_CODING_SYSTEM (val);
9149 val = XCDR (bom);
9150 CHECK_CODING_SYSTEM (val);
df7492f9
KH
9151 }
9152 ASET (attrs, coding_attr_utf_16_bom, bom);
9153
9154 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9155 CHECK_SYMBOL (endian);
9156 if (NILP (endian))
9157 endian = Qbig;
9158 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9159 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9160 ASET (attrs, coding_attr_utf_16_endian, endian);
9161
9162 category = (CONSP (bom)
9163 ? coding_category_utf_16_auto
9164 : NILP (bom)
b49a1807 9165 ? (EQ (endian, Qbig)
df7492f9
KH
9166 ? coding_category_utf_16_be_nosig
9167 : coding_category_utf_16_le_nosig)
b49a1807 9168 : (EQ (endian, Qbig)
df7492f9
KH
9169 ? coding_category_utf_16_be
9170 : coding_category_utf_16_le));
9171 }
9172 else if (EQ (coding_type, Qiso_2022))
9173 {
9174 Lisp_Object initial, reg_usage, request, flags;
4776e638 9175 int i;
1397dc18 9176
df7492f9
KH
9177 if (nargs < coding_arg_iso2022_max)
9178 goto short_args;
9179
9180 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9181 CHECK_VECTOR (initial);
9182 for (i = 0; i < 4; i++)
9183 {
9184 val = Faref (initial, make_number (i));
9185 if (! NILP (val))
9186 {
584948ac
KH
9187 struct charset *charset;
9188
9189 CHECK_CHARSET_GET_CHARSET (val, charset);
9190 ASET (initial, i, make_number (CHARSET_ID (charset)));
9191 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9192 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9193 }
9194 else
9195 ASET (initial, i, make_number (-1));
9196 }
9197
9198 reg_usage = args[coding_arg_iso2022_reg_usage];
9199 CHECK_CONS (reg_usage);
8f924df7
KH
9200 CHECK_NUMBER_CAR (reg_usage);
9201 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9202
9203 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9204 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9205 {
df7492f9 9206 int id;
8f924df7 9207 Lisp_Object tmp;
df7492f9
KH
9208
9209 val = Fcar (tail);
9210 CHECK_CONS (val);
8f924df7
KH
9211 tmp = XCAR (val);
9212 CHECK_CHARSET_GET_ID (tmp, id);
9213 CHECK_NATNUM_CDR (val);
df7492f9
KH
9214 if (XINT (XCDR (val)) >= 4)
9215 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9216 XSETCAR (val, make_number (id));
1397dc18 9217 }
4ed46869 9218
df7492f9
KH
9219 flags = args[coding_arg_iso2022_flags];
9220 CHECK_NATNUM (flags);
9221 i = XINT (flags);
9222 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9223 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9224
9225 ASET (attrs, coding_attr_iso_initial, initial);
9226 ASET (attrs, coding_attr_iso_usage, reg_usage);
9227 ASET (attrs, coding_attr_iso_request, request);
9228 ASET (attrs, coding_attr_iso_flags, flags);
9229 setup_iso_safe_charsets (attrs);
9230
9231 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9232 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9233 | CODING_ISO_FLAG_SINGLE_SHIFT))
9234 ? coding_category_iso_7_else
9235 : EQ (args[coding_arg_charset_list], Qiso_2022)
9236 ? coding_category_iso_7
9237 : coding_category_iso_7_tight);
9238 else
9239 {
9240 int id = XINT (AREF (initial, 1));
9241
c6fb6e98 9242 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9243 || EQ (args[coding_arg_charset_list], Qiso_2022)
9244 || id < 0)
9245 ? coding_category_iso_8_else
9246 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9247 ? coding_category_iso_8_1
9248 : coding_category_iso_8_2);
9249 }
0ce7886f
KH
9250 if (category != coding_category_iso_8_1
9251 && category != coding_category_iso_8_2)
9252 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9253 }
9254 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9255 {
df7492f9
KH
9256 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9257 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9258 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9259 category = coding_category_emacs_mule;
c28a9453 9260 }
df7492f9 9261 else if (EQ (coding_type, Qshift_jis))
c28a9453 9262 {
df7492f9
KH
9263
9264 struct charset *charset;
9265
7d64c6ad 9266 if (XINT (Flength (charset_list)) != 3
6e07c25f 9267 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9268 error ("There should be three or four charsets");
df7492f9
KH
9269
9270 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9271 if (CHARSET_DIMENSION (charset) != 1)
9272 error ("Dimension of charset %s is not one",
8f924df7 9273 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9274 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9275 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9276
9277 charset_list = XCDR (charset_list);
9278 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9279 if (CHARSET_DIMENSION (charset) != 1)
9280 error ("Dimension of charset %s is not one",
8f924df7 9281 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9282
9283 charset_list = XCDR (charset_list);
9284 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9285 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9286 error ("Dimension of charset %s is not two",
9287 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9288
9289 charset_list = XCDR (charset_list);
2b917a06
KH
9290 if (! NILP (charset_list))
9291 {
9292 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9293 if (CHARSET_DIMENSION (charset) != 2)
9294 error ("Dimension of charset %s is not two",
9295 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9296 }
df7492f9
KH
9297
9298 category = coding_category_sjis;
9299 Vsjis_coding_system = name;
c28a9453 9300 }
df7492f9
KH
9301 else if (EQ (coding_type, Qbig5))
9302 {
9303 struct charset *charset;
4ed46869 9304
df7492f9
KH
9305 if (XINT (Flength (charset_list)) != 2)
9306 error ("There should be just two charsets");
9307
9308 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9309 if (CHARSET_DIMENSION (charset) != 1)
9310 error ("Dimension of charset %s is not one",
8f924df7 9311 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9312 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9313 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9314
9315 charset_list = XCDR (charset_list);
9316 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9317 if (CHARSET_DIMENSION (charset) != 2)
9318 error ("Dimension of charset %s is not two",
8f924df7 9319 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9320
df7492f9
KH
9321 category = coding_category_big5;
9322 Vbig5_coding_system = name;
9323 }
9324 else if (EQ (coding_type, Qraw_text))
c28a9453 9325 {
584948ac
KH
9326 category = coding_category_raw_text;
9327 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9328 }
df7492f9 9329 else if (EQ (coding_type, Qutf_8))
4ed46869 9330 {
584948ac
KH
9331 category = coding_category_utf_8;
9332 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 9333 }
df7492f9
KH
9334 else if (EQ (coding_type, Qundecided))
9335 category = coding_category_undecided;
4ed46869 9336 else
df7492f9 9337 error ("Invalid coding system type: %s",
8f924df7 9338 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 9339
df7492f9 9340 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
9341 CODING_ATTR_PLIST (attrs)
9342 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9343 CODING_ATTR_PLIST (attrs)));
35befdaa
KH
9344 CODING_ATTR_PLIST (attrs)
9345 = Fcons (QCascii_compatible_p,
9346 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9347 CODING_ATTR_PLIST (attrs)));
c4825358 9348
df7492f9
KH
9349 eol_type = args[coding_arg_eol_type];
9350 if (! NILP (eol_type)
9351 && ! EQ (eol_type, Qunix)
9352 && ! EQ (eol_type, Qdos)
9353 && ! EQ (eol_type, Qmac))
9354 error ("Invalid eol-type");
4ed46869 9355
df7492f9 9356 aliases = Fcons (name, Qnil);
4ed46869 9357
df7492f9
KH
9358 if (NILP (eol_type))
9359 {
9360 eol_type = make_subsidiaries (name);
9361 for (i = 0; i < 3; i++)
1397dc18 9362 {
df7492f9
KH
9363 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9364
9365 this_name = AREF (eol_type, i);
9366 this_aliases = Fcons (this_name, Qnil);
9367 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9368 this_spec = Fmake_vector (make_number (3), attrs);
9369 ASET (this_spec, 1, this_aliases);
9370 ASET (this_spec, 2, this_eol_type);
9371 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9372 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
9373 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9374 if (NILP (val))
9375 Vcoding_system_alist
9376 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9377 Vcoding_system_alist);
1397dc18 9378 }
d46c5b12 9379 }
4ed46869 9380
df7492f9
KH
9381 spec_vec = Fmake_vector (make_number (3), attrs);
9382 ASET (spec_vec, 1, aliases);
9383 ASET (spec_vec, 2, eol_type);
48b0f3ae 9384
df7492f9
KH
9385 Fputhash (name, spec_vec, Vcoding_system_hash_table);
9386 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
9387 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9388 if (NILP (val))
9389 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9390 Vcoding_system_alist);
48b0f3ae 9391
df7492f9
KH
9392 {
9393 int id = coding_categories[category].id;
48b0f3ae 9394
df7492f9
KH
9395 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9396 setup_coding_system (name, &coding_categories[category]);
9397 }
48b0f3ae 9398
d46c5b12 9399 return Qnil;
48b0f3ae 9400
df7492f9
KH
9401 short_args:
9402 return Fsignal (Qwrong_number_of_arguments,
9403 Fcons (intern ("define-coding-system-internal"),
9404 make_number (nargs)));
d46c5b12 9405}
4ed46869 9406
d6925f38 9407
a6f87d34
KH
9408DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9409 3, 3, 0,
9410 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9411 (coding_system, prop, val)
9412 Lisp_Object coding_system, prop, val;
9413{
3dbe7859 9414 Lisp_Object spec, attrs;
a6f87d34
KH
9415
9416 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9417 attrs = AREF (spec, 0);
9418 if (EQ (prop, QCmnemonic))
9419 {
9420 if (! STRINGP (val))
9421 CHECK_CHARACTER (val);
9422 CODING_ATTR_MNEMONIC (attrs) = val;
9423 }
9424 else if (EQ (prop, QCdefalut_char))
9425 {
9426 if (NILP (val))
9427 val = make_number (' ');
9428 else
9429 CHECK_CHARACTER (val);
9430 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9431 }
9432 else if (EQ (prop, QCdecode_translation_table))
9433 {
9434 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9435 CHECK_SYMBOL (val);
9436 CODING_ATTR_DECODE_TBL (attrs) = val;
9437 }
9438 else if (EQ (prop, QCencode_translation_table))
9439 {
9440 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9441 CHECK_SYMBOL (val);
9442 CODING_ATTR_ENCODE_TBL (attrs) = val;
9443 }
9444 else if (EQ (prop, QCpost_read_conversion))
9445 {
9446 CHECK_SYMBOL (val);
9447 CODING_ATTR_POST_READ (attrs) = val;
9448 }
9449 else if (EQ (prop, QCpre_write_conversion))
9450 {
9451 CHECK_SYMBOL (val);
9452 CODING_ATTR_PRE_WRITE (attrs) = val;
9453 }
35befdaa
KH
9454 else if (EQ (prop, QCascii_compatible_p))
9455 {
9456 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9457 }
a6f87d34
KH
9458
9459 CODING_ATTR_PLIST (attrs)
9460 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9461 return val;
9462}
9463
9464
df7492f9
KH
9465DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9466 Sdefine_coding_system_alias, 2, 2, 0,
9467 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9468 (alias, coding_system)
9469 Lisp_Object alias, coding_system;
66cfb530 9470{
583f71ca 9471 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9472
df7492f9
KH
9473 CHECK_SYMBOL (alias);
9474 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9475 aliases = AREF (spec, 1);
d6925f38
KH
9476 /* ALISES should be a list of length more than zero, and the first
9477 element is a base coding system. Append ALIAS at the tail of the
9478 list. */
df7492f9
KH
9479 while (!NILP (XCDR (aliases)))
9480 aliases = XCDR (aliases);
8f924df7 9481 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9482
df7492f9
KH
9483 eol_type = AREF (spec, 2);
9484 if (VECTORP (eol_type))
4ed46869 9485 {
df7492f9
KH
9486 Lisp_Object subsidiaries;
9487 int i;
4ed46869 9488
df7492f9
KH
9489 subsidiaries = make_subsidiaries (alias);
9490 for (i = 0; i < 3; i++)
9491 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9492 AREF (eol_type, i));
4ed46869 9493 }
df7492f9
KH
9494
9495 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9496 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9497 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9498 if (NILP (val))
9499 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9500 Vcoding_system_alist);
66cfb530 9501
4ed46869
KH
9502 return Qnil;
9503}
9504
df7492f9
KH
9505DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9506 1, 1, 0,
9507 doc: /* Return the base of CODING-SYSTEM.
da7db224 9508Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9509 (coding_system)
9510 Lisp_Object coding_system;
d46c5b12 9511{
df7492f9 9512 Lisp_Object spec, attrs;
d46c5b12 9513
df7492f9
KH
9514 if (NILP (coding_system))
9515 return (Qno_conversion);
9516 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9517 attrs = AREF (spec, 0);
9518 return CODING_ATTR_BASE_NAME (attrs);
9519}
1397dc18 9520
df7492f9
KH
9521DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9522 1, 1, 0,
9523 doc: "Return the property list of CODING-SYSTEM.")
9524 (coding_system)
9525 Lisp_Object coding_system;
9526{
9527 Lisp_Object spec, attrs;
1397dc18 9528
df7492f9
KH
9529 if (NILP (coding_system))
9530 coding_system = Qno_conversion;
9531 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9532 attrs = AREF (spec, 0);
9533 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9534}
9535
df7492f9
KH
9536
9537DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9538 1, 1, 0,
da7db224 9539 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9540 (coding_system)
9541 Lisp_Object coding_system;
66cfb530 9542{
df7492f9 9543 Lisp_Object spec;
84d60297 9544
df7492f9
KH
9545 if (NILP (coding_system))
9546 coding_system = Qno_conversion;
9547 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9548 return AREF (spec, 1);
df7492f9 9549}
66cfb530 9550
df7492f9
KH
9551DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9552 Scoding_system_eol_type, 1, 1, 0,
9553 doc: /* Return eol-type of CODING-SYSTEM.
9554An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 9555
df7492f9
KH
9556Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9557and CR respectively.
66cfb530 9558
df7492f9
KH
9559A vector value indicates that a format of end-of-line should be
9560detected automatically. Nth element of the vector is the subsidiary
9561coding system whose eol-type is N. */)
6b89e3aa
KH
9562 (coding_system)
9563 Lisp_Object coding_system;
9564{
df7492f9
KH
9565 Lisp_Object spec, eol_type;
9566 int n;
6b89e3aa 9567
df7492f9
KH
9568 if (NILP (coding_system))
9569 coding_system = Qno_conversion;
9570 if (! CODING_SYSTEM_P (coding_system))
9571 return Qnil;
9572 spec = CODING_SYSTEM_SPEC (coding_system);
9573 eol_type = AREF (spec, 2);
9574 if (VECTORP (eol_type))
9575 return Fcopy_sequence (eol_type);
9576 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9577 return make_number (n);
6b89e3aa
KH
9578}
9579
4ed46869
KH
9580#endif /* emacs */
9581
9582\f
1397dc18 9583/*** 9. Post-amble ***/
4ed46869 9584
dfcf069d 9585void
4ed46869
KH
9586init_coding_once ()
9587{
9588 int i;
9589
df7492f9
KH
9590 for (i = 0; i < coding_category_max; i++)
9591 {
9592 coding_categories[i].id = -1;
9593 coding_priorities[i] = i;
9594 }
4ed46869
KH
9595
9596 /* ISO2022 specific initialize routine. */
9597 for (i = 0; i < 0x20; i++)
b73bfc1c 9598 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9599 for (i = 0x21; i < 0x7F; i++)
9600 iso_code_class[i] = ISO_graphic_plane_0;
9601 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9602 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9603 for (i = 0xA1; i < 0xFF; i++)
9604 iso_code_class[i] = ISO_graphic_plane_1;
9605 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9606 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9607 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9608 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9609 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9610 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9611 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9612 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9613 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9614
df7492f9
KH
9615 for (i = 0; i < 256; i++)
9616 {
9617 emacs_mule_bytes[i] = 1;
9618 }
7c78e542
KH
9619 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9620 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9621 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9622 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9623}
9624
9625#ifdef emacs
9626
dfcf069d 9627void
e0e989f6
KH
9628syms_of_coding ()
9629{
df7492f9 9630 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9631 {
9632 Lisp_Object args[2];
9633 args[0] = QCtest;
9634 args[1] = Qeq;
9635 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9636 }
df7492f9
KH
9637
9638 staticpro (&Vsjis_coding_system);
9639 Vsjis_coding_system = Qnil;
e0e989f6 9640
df7492f9
KH
9641 staticpro (&Vbig5_coding_system);
9642 Vbig5_coding_system = Qnil;
9643
24a73b0a
KH
9644 staticpro (&Vcode_conversion_reused_workbuf);
9645 Vcode_conversion_reused_workbuf = Qnil;
9646
9647 staticpro (&Vcode_conversion_workbuf_name);
9648 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9649
24a73b0a 9650 reused_workbuf_in_use = 0;
df7492f9
KH
9651
9652 DEFSYM (Qcharset, "charset");
9653 DEFSYM (Qtarget_idx, "target-idx");
9654 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9655 Fset (Qcoding_system_history, Qnil);
9656
9ce27fde 9657 /* Target FILENAME is the first argument. */
e0e989f6 9658 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9659 /* Target FILENAME is the third argument. */
e0e989f6
KH
9660 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9661
df7492f9 9662 DEFSYM (Qcall_process, "call-process");
9ce27fde 9663 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9664 Fput (Qcall_process, Qtarget_idx, make_number (0));
9665
df7492f9 9666 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9667 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9668 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9669
df7492f9 9670 DEFSYM (Qstart_process, "start-process");
9ce27fde 9671 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9672 Fput (Qstart_process, Qtarget_idx, make_number (2));
9673
df7492f9 9674 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9675 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9676 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9677
df7492f9
KH
9678 DEFSYM (Qcoding_system, "coding-system");
9679 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9680
df7492f9
KH
9681 DEFSYM (Qeol_type, "eol-type");
9682 DEFSYM (Qunix, "unix");
9683 DEFSYM (Qdos, "dos");
4ed46869 9684
df7492f9
KH
9685 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9686 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9687 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9688 DEFSYM (Qdefault_char, "default-char");
9689 DEFSYM (Qundecided, "undecided");
9690 DEFSYM (Qno_conversion, "no-conversion");
9691 DEFSYM (Qraw_text, "raw-text");
4ed46869 9692
df7492f9 9693 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9694
df7492f9 9695 DEFSYM (Qutf_8, "utf-8");
8f924df7 9696 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9697
df7492f9 9698 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9699 DEFSYM (Qbig, "big");
9700 DEFSYM (Qlittle, "little");
27901516 9701
df7492f9
KH
9702 DEFSYM (Qshift_jis, "shift-jis");
9703 DEFSYM (Qbig5, "big5");
4ed46869 9704
df7492f9 9705 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9706
df7492f9 9707 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9708 Fput (Qcoding_system_error, Qerror_conditions,
9709 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9710 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9711 build_string ("Invalid coding system"));
4ed46869 9712
05e6f5dc
KH
9713 /* Intern this now in case it isn't already done.
9714 Setting this variable twice is harmless.
9715 But don't staticpro it here--that is done in alloc.c. */
9716 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9717
df7492f9 9718 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9719 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9720 DEFSYM (Qtranslation_table_id, "translation-table-id");
9721 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9722 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9723
df7492f9 9724 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9725
df7492f9 9726 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9727
01378f49 9728 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9729 DEFSYM (QCmnemonic, ":mnemonic");
9730 DEFSYM (QCdefalut_char, ":default-char");
9731 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9732 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9733 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9734 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 9735 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 9736
df7492f9
KH
9737 Vcoding_category_table
9738 = Fmake_vector (make_number (coding_category_max), Qnil);
9739 staticpro (&Vcoding_category_table);
9740 /* Followings are target of code detection. */
9741 ASET (Vcoding_category_table, coding_category_iso_7,
9742 intern ("coding-category-iso-7"));
9743 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9744 intern ("coding-category-iso-7-tight"));
9745 ASET (Vcoding_category_table, coding_category_iso_8_1,
9746 intern ("coding-category-iso-8-1"));
9747 ASET (Vcoding_category_table, coding_category_iso_8_2,
9748 intern ("coding-category-iso-8-2"));
9749 ASET (Vcoding_category_table, coding_category_iso_7_else,
9750 intern ("coding-category-iso-7-else"));
9751 ASET (Vcoding_category_table, coding_category_iso_8_else,
9752 intern ("coding-category-iso-8-else"));
9753 ASET (Vcoding_category_table, coding_category_utf_8,
9754 intern ("coding-category-utf-8"));
9755 ASET (Vcoding_category_table, coding_category_utf_16_be,
9756 intern ("coding-category-utf-16-be"));
ff563fce
KH
9757 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9758 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9759 ASET (Vcoding_category_table, coding_category_utf_16_le,
9760 intern ("coding-category-utf-16-le"));
9761 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9762 intern ("coding-category-utf-16-be-nosig"));
9763 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9764 intern ("coding-category-utf-16-le-nosig"));
9765 ASET (Vcoding_category_table, coding_category_charset,
9766 intern ("coding-category-charset"));
9767 ASET (Vcoding_category_table, coding_category_sjis,
9768 intern ("coding-category-sjis"));
9769 ASET (Vcoding_category_table, coding_category_big5,
9770 intern ("coding-category-big5"));
9771 ASET (Vcoding_category_table, coding_category_ccl,
9772 intern ("coding-category-ccl"));
9773 ASET (Vcoding_category_table, coding_category_emacs_mule,
9774 intern ("coding-category-emacs-mule"));
9775 /* Followings are NOT target of code detection. */
9776 ASET (Vcoding_category_table, coding_category_raw_text,
9777 intern ("coding-category-raw-text"));
9778 ASET (Vcoding_category_table, coding_category_undecided,
9779 intern ("coding-category-undecided"));
ecf488bc 9780
065e3595
KH
9781 DEFSYM (Qinsufficient_source, "insufficient-source");
9782 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9783 DEFSYM (Qinvalid_source, "invalid-source");
9784 DEFSYM (Qinterrupted, "interrupted");
9785 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 9786 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 9787
4ed46869
KH
9788 defsubr (&Scoding_system_p);
9789 defsubr (&Sread_coding_system);
9790 defsubr (&Sread_non_nil_coding_system);
9791 defsubr (&Scheck_coding_system);
9792 defsubr (&Sdetect_coding_region);
d46c5b12 9793 defsubr (&Sdetect_coding_string);
05e6f5dc 9794 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9795 defsubr (&Sunencodable_char_position);
df7492f9 9796 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9797 defsubr (&Sdecode_coding_region);
9798 defsubr (&Sencode_coding_region);
9799 defsubr (&Sdecode_coding_string);
9800 defsubr (&Sencode_coding_string);
9801 defsubr (&Sdecode_sjis_char);
9802 defsubr (&Sencode_sjis_char);
9803 defsubr (&Sdecode_big5_char);
9804 defsubr (&Sencode_big5_char);
1ba9e4ab 9805 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9806 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9807 defsubr (&Sterminal_coding_system);
1ba9e4ab 9808 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9809 defsubr (&Skeyboard_coding_system);
a5d301df 9810 defsubr (&Sfind_operation_coding_system);
df7492f9 9811 defsubr (&Sset_coding_system_priority);
6b89e3aa 9812 defsubr (&Sdefine_coding_system_internal);
df7492f9 9813 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9814 defsubr (&Scoding_system_put);
df7492f9
KH
9815 defsubr (&Scoding_system_base);
9816 defsubr (&Scoding_system_plist);
9817 defsubr (&Scoding_system_aliases);
9818 defsubr (&Scoding_system_eol_type);
9819 defsubr (&Scoding_system_priority_list);
4ed46869 9820
4608c386 9821 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9822 doc: /* List of coding systems.
9823
9824Do not alter the value of this variable manually. This variable should be
df7492f9 9825updated by the functions `define-coding-system' and
48b0f3ae 9826`define-coding-system-alias'. */);
4608c386
KH
9827 Vcoding_system_list = Qnil;
9828
9829 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9830 doc: /* Alist of coding system names.
9831Each element is one element list of coding system name.
9832This variable is given to `completing-read' as TABLE argument.
9833
9834Do not alter the value of this variable manually. This variable should be
9835updated by the functions `make-coding-system' and
9836`define-coding-system-alias'. */);
4608c386
KH
9837 Vcoding_system_alist = Qnil;
9838
4ed46869 9839 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9840 doc: /* List of coding-categories (symbols) ordered by priority.
9841
9842On detecting a coding system, Emacs tries code detection algorithms
9843associated with each coding-category one by one in this order. When
9844one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
9845system bound to the corresponding coding-category is selected.
9846
42205607 9847Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
9848 {
9849 int i;
9850
9851 Vcoding_category_list = Qnil;
df7492f9 9852 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9853 Vcoding_category_list
d46c5b12
KH
9854 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9855 Vcoding_category_list);
4ed46869
KH
9856 }
9857
9858 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9859 doc: /* Specify the coding system for read operations.
9860It is useful to bind this variable with `let', but do not set it globally.
9861If the value is a coding system, it is used for decoding on read operation.
9862If not, an appropriate element is used from one of the coding system alists:
9863There are three such tables, `file-coding-system-alist',
9864`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9865 Vcoding_system_for_read = Qnil;
9866
9867 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9868 doc: /* Specify the coding system for write operations.
9869Programs bind this variable with `let', but you should not set it globally.
9870If the value is a coding system, it is used for encoding of output,
9871when writing it to a file and when sending it to a file or subprocess.
9872
9873If this does not specify a coding system, an appropriate element
9874is used from one of the coding system alists:
9875There are three such tables, `file-coding-system-alist',
9876`process-coding-system-alist', and `network-coding-system-alist'.
9877For output to files, if the above procedure does not specify a coding system,
9878the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9879 Vcoding_system_for_write = Qnil;
9880
9881 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9882 doc: /*
9883Coding system used in the latest file or process I/O. */);
4ed46869
KH
9884 Vlast_coding_system_used = Qnil;
9885
065e3595
KH
9886 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9887 doc: /*
9888Error status of the last code conversion.
9889
9890When an error was detected in the last code conversion, this variable
9891is set to one of the following symbols.
9892 `insufficient-source'
9893 `inconsistent-eol'
9894 `invalid-source'
9895 `interrupted'
9896 `insufficient-memory'
9897When no error was detected, the value doesn't change. So, to check
9898the error status of a code conversion by this variable, you must
9899explicitly set this variable to nil before performing code
9900conversion. */);
9901 Vlast_code_conversion_error = Qnil;
9902
9ce27fde 9903 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9904 doc: /*
9905*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9906See info node `Coding Systems' and info node `Text and Binary' concerning
9907such conversion. */);
9ce27fde
KH
9908 inhibit_eol_conversion = 0;
9909
ed29121d 9910 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9911 doc: /*
9912Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9913Bind it to t if the process output is to be treated as if it were a file
9914read from some filesystem. */);
ed29121d
EZ
9915 inherit_process_coding_system = 0;
9916
02ba4723 9917 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9918 doc: /*
9919Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9920The format is ((PATTERN . VAL) ...),
9921where PATTERN is a regular expression matching a file name,
9922VAL is a coding system, a cons of coding systems, or a function symbol.
9923If VAL is a coding system, it is used for both decoding and encoding
9924the file contents.
9925If VAL is a cons of coding systems, the car part is used for decoding,
9926and the cdr part is used for encoding.
9927If VAL is a function symbol, the function must return a coding system
2c53e699
KH
9928or a cons of coding systems which are used as above. The function is
9929called with an argument that is a list of the arguments with which
5a0bbd9a
KH
9930`find-operation-coding-system' was called. If the function can't decide
9931a coding system, it can return `undecided' so that the normal
9932code-detection is performed.
48b0f3ae
PJ
9933
9934See also the function `find-operation-coding-system'
9935and the variable `auto-coding-alist'. */);
02ba4723
KH
9936 Vfile_coding_system_alist = Qnil;
9937
9938 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9939 doc: /*
9940Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9941The format is ((PATTERN . VAL) ...),
9942where PATTERN is a regular expression matching a program name,
9943VAL is a coding system, a cons of coding systems, or a function symbol.
9944If VAL is a coding system, it is used for both decoding what received
9945from the program and encoding what sent to the program.
9946If VAL is a cons of coding systems, the car part is used for decoding,
9947and the cdr part is used for encoding.
9948If VAL is a function symbol, the function must return a coding system
9949or a cons of coding systems which are used as above.
9950
9951See also the function `find-operation-coding-system'. */);
02ba4723
KH
9952 Vprocess_coding_system_alist = Qnil;
9953
9954 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9955 doc: /*
9956Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9957The format is ((PATTERN . VAL) ...),
9958where PATTERN is a regular expression matching a network service name
9959or is a port number to connect to,
9960VAL is a coding system, a cons of coding systems, or a function symbol.
9961If VAL is a coding system, it is used for both decoding what received
9962from the network stream and encoding what sent to the network stream.
9963If VAL is a cons of coding systems, the car part is used for decoding,
9964and the cdr part is used for encoding.
9965If VAL is a function symbol, the function must return a coding system
9966or a cons of coding systems which are used as above.
9967
9968See also the function `find-operation-coding-system'. */);
02ba4723 9969 Vnetwork_coding_system_alist = Qnil;
4ed46869 9970
68c45bf0 9971 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9972 doc: /* Coding system to use with system messages.
9973Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9974 Vlocale_coding_system = Qnil;
9975
005f0d35 9976 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9977 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9978 doc: /*
9979*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9980 eol_mnemonic_unix = build_string (":");
4ed46869 9981
7722baf9 9982 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9983 doc: /*
9984*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9985 eol_mnemonic_dos = build_string ("\\");
4ed46869 9986
7722baf9 9987 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9988 doc: /*
9989*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9990 eol_mnemonic_mac = build_string ("/");
4ed46869 9991
7722baf9 9992 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9993 doc: /*
9994*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9995 eol_mnemonic_undecided = build_string (":");
4ed46869 9996
84fbb8a0 9997 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9998 doc: /*
9999*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10000 Venable_character_translation = Qt;
bdd9fb48 10001
f967223b 10002 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
10003 &Vstandard_translation_table_for_decode,
10004 doc: /* Table for translating characters while decoding. */);
f967223b 10005 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10006
f967223b 10007 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
10008 &Vstandard_translation_table_for_encode,
10009 doc: /* Table for translating characters while encoding. */);
f967223b 10010 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10011
df7492f9 10012 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
10013 doc: /* Alist of charsets vs revision numbers.
10014While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10015designate it with the escape sequence identifying revision (cdr part
10016of the element). */);
10017 Vcharset_revision_table = Qnil;
02ba4723
KH
10018
10019 DEFVAR_LISP ("default-process-coding-system",
10020 &Vdefault_process_coding_system,
48b0f3ae
PJ
10021 doc: /* Cons of coding systems used for process I/O by default.
10022The car part is used for decoding a process output,
10023the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10024 Vdefault_process_coding_system = Qnil;
c4825358 10025
3f003981 10026 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
10027 doc: /*
10028Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10029This is a vector of length 256.
10030If Nth element is non-nil, the existence of code N in a file
10031\(or output of subprocess) doesn't prevent it to be detected as
10032a coding system of ISO 2022 variant which has a flag
10033`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10034or reading output of a subprocess.
10035Only 128th through 159th elements has a meaning. */);
3f003981 10036 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10037
10038 DEFVAR_LISP ("select-safe-coding-system-function",
10039 &Vselect_safe_coding_system_function,
df7492f9
KH
10040 doc: /*
10041Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10042
10043If set, this function is called to force a user to select a proper
10044coding system which can encode the text in the case that a default
fdecf907
GM
10045coding system used in each operation can't encode the text. The
10046function should take care that the buffer is not modified while
10047the coding system is being selected.
48b0f3ae
PJ
10048
10049The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10050 Vselect_safe_coding_system_function = Qnil;
10051
5d5bf4d8
KH
10052 DEFVAR_BOOL ("coding-system-require-warning",
10053 &coding_system_require_warning,
10054 doc: /* Internal use only.
6b89e3aa
KH
10055If non-nil, on writing a file, `select-safe-coding-system-function' is
10056called even if `coding-system-for-write' is non-nil. The command
10057`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10058 coding_system_require_warning = 0;
10059
10060
22ab2303 10061 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 10062 &inhibit_iso_escape_detection,
df7492f9
KH
10063 doc: /*
10064If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
10065
10066By default, on reading a file, Emacs tries to detect how the text is
10067encoded. This code detection is sensitive to escape sequences. If
10068the sequence is valid as ISO2022, the code is determined as one of
10069the ISO2022 encodings, and the file is decoded by the corresponding
10070coding system (e.g. `iso-2022-7bit').
10071
10072However, there may be a case that you want to read escape sequences in
10073a file as is. In such a case, you can set this variable to non-nil.
10074Then, as the code detection ignores any escape sequences, no file is
10075detected as encoded in some ISO2022 encoding. The result is that all
10076escape sequences become visible in a buffer.
10077
10078The default value is nil, and it is strongly recommended not to change
10079it. That is because many Emacs Lisp source files that contain
10080non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10081in Emacs's distribution, and they won't be decoded correctly on
10082reading if you suppress escape sequence detection.
10083
10084The other way to read escape sequences in a file without decoding is
10085to explicitly specify some coding system that doesn't use ISO2022's
10086escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10087 inhibit_iso_escape_detection = 0;
002fdb44
DL
10088
10089 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
10090 doc: /* Char table for translating self-inserting characters.
10091This is applied to the result of input methods, not their input. See also
10092`keyboard-translate-table'. */);
002fdb44 10093 Vtranslation_table_for_input = Qnil;
8f924df7 10094
2c78b7e1
KH
10095 {
10096 Lisp_Object args[coding_arg_max];
8f924df7 10097 Lisp_Object plist[16];
2c78b7e1
KH
10098 int i;
10099
10100 for (i = 0; i < coding_arg_max; i++)
10101 args[i] = Qnil;
10102
10103 plist[0] = intern (":name");
10104 plist[1] = args[coding_arg_name] = Qno_conversion;
10105 plist[2] = intern (":mnemonic");
10106 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10107 plist[4] = intern (":coding-type");
10108 plist[5] = args[coding_arg_coding_type] = Qraw_text;
10109 plist[6] = intern (":ascii-compatible-p");
10110 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10111 plist[8] = intern (":default-char");
10112 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
10113 plist[10] = intern (":for-unibyte");
10114 plist[11] = args[coding_arg_for_unibyte] = Qt;
10115 plist[12] = intern (":docstring");
10116 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
10117\n\
10118When you visit a file with this coding, the file is read into a\n\
10119unibyte buffer as is, thus each byte of a file is treated as a\n\
10120character.");
8f924df7
KH
10121 plist[14] = intern (":eol-type");
10122 plist[15] = args[coding_arg_eol_type] = Qunix;
10123 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10124 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10125
10126 plist[1] = args[coding_arg_name] = Qundecided;
10127 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10128 plist[5] = args[coding_arg_coding_type] = Qundecided;
10129 /* This is already set.
35befdaa 10130 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
10131 plist[8] = intern (":charset-list");
10132 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10133 plist[11] = args[coding_arg_for_unibyte] = Qnil;
10134 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10135 plist[15] = args[coding_arg_eol_type] = Qnil;
10136 args[coding_arg_plist] = Flist (16, plist);
10137 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10138 }
10139
2c78b7e1 10140 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10141
10142 {
10143 int i;
10144
10145 for (i = 0; i < coding_category_max; i++)
10146 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10147 }
fcbcfb64
KH
10148#if defined (MSDOS) || defined (WINDOWSNT)
10149 system_eol_type = Qdos;
10150#else
10151 system_eol_type = Qunix;
10152#endif
10153 staticpro (&system_eol_type);
4ed46869
KH
10154}
10155
68c45bf0
PE
10156char *
10157emacs_strerror (error_number)
10158 int error_number;
10159{
10160 char *str;
10161
ca9c0567 10162 synchronize_system_messages_locale ();
68c45bf0
PE
10163 str = strerror (error_number);
10164
10165 if (! NILP (Vlocale_coding_system))
10166 {
10167 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10168 Vlocale_coding_system,
10169 0);
d5db4077 10170 str = (char *) SDATA (dec);
68c45bf0
PE
10171 }
10172
10173 return str;
10174}
10175
4ed46869 10176#endif /* emacs */
9ffd559c
KH
10177
10178/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10179 (do not change this comment) */