(decode_coding_big5, produce_chars): Fix typos in last
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
8cabe764 3 2006, 2007, 2008 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
8cabe764 5 2005, 2006, 2007, 2008
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
14GNU Emacs is free software; you can redistribute it and/or modify
15it under the terms of the GNU General Public License as published by
1427aa65 16the Free Software Foundation; either version 3, or (at your option)
369314dc 17any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc
KH
24You should have received a copy of the GNU General Public License
25along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
26the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA. */
4ed46869
KH
28
29/*** TABLE OF CONTENTS ***
30
b73bfc1c 31 0. General comments
4ed46869 32 1. Preamble
df7492f9
KH
33 2. Emacs' internal format (emacs-utf-8) handlers
34 3. UTF-8 handlers
35 4. UTF-16 handlers
36 5. Charset-base coding systems handlers
37 6. emacs-mule (old Emacs' internal format) handlers
38 7. ISO2022 handlers
39 8. Shift-JIS and BIG5 handlers
40 9. CCL handlers
41 10. C library functions
42 11. Emacs Lisp library functions
43 12. Postamble
4ed46869
KH
44
45*/
46
df7492f9 47/*** 0. General comments ***
b73bfc1c
KH
48
49
df7492f9 50CODING SYSTEM
4ed46869 51
5bad0796
DL
52 A coding system is an object for an encoding mechanism that contains
53 information about how to convert byte sequences to character
e19c3639
KH
54 sequences and vice versa. When we say "decode", it means converting
55 a byte sequence of a specific coding system into a character
56 sequence that is represented by Emacs' internal coding system
57 `emacs-utf-8', and when we say "encode", it means converting a
58 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 59 coding system.
4ed46869 60
e19c3639
KH
61 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
62 C level, a coding system is represented by a vector of attributes
5bad0796 63 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
64 coding system symbol to attributes vector is done by looking up
65 Vcharset_hash_table by the symbol.
4ed46869 66
e19c3639 67 Coding systems are classified into the following types depending on
5bad0796 68 the encoding mechanism. Here's a brief description of the types.
4ed46869 69
df7492f9
KH
70 o UTF-8
71
72 o UTF-16
73
74 o Charset-base coding system
75
76 A coding system defined by one or more (coded) character sets.
5bad0796 77 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
78 character set.
79
5bad0796 80 o Old Emacs internal format (emacs-mule)
df7492f9 81
5bad0796 82 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 83
df7492f9 84 o ISO2022-base coding system
4ed46869
KH
85
86 The most famous coding system for multiple character sets. X's
df7492f9
KH
87 Compound Text, various EUCs (Extended Unix Code), and coding systems
88 used in the Internet communication such as ISO-2022-JP are all
89 variants of ISO2022.
4ed46869 90
df7492f9 91 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 92
4ed46869
KH
93 A coding system to encode character sets: ASCII, JISX0201, and
94 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 95 section 8.
4ed46869 96
df7492f9 97 o BIG5
4ed46869 98
df7492f9 99 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 100 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
101 described in section 8. In this file, when we write "big5" (all
102 lowercase), we mean the coding system, and when we write "Big5"
103 (capitalized), we mean the character set.
4ed46869 104
df7492f9 105 o CCL
27901516 106
5bad0796 107 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
108 not listed above, he can supply a decoder and an encoder for it in
109 CCL (Code Conversion Language) programs. Emacs executes the CCL
110 program while decoding/encoding.
27901516 111
df7492f9 112 o Raw-text
4ed46869 113
5a936b46 114 A coding system for text containing raw eight-bit data. Emacs
5bad0796 115 treats each byte of source text as a character (except for
df7492f9 116 end-of-line conversion).
4ed46869 117
df7492f9
KH
118 o No-conversion
119
120 Like raw text, but don't do end-of-line conversion.
4ed46869 121
4ed46869 122
df7492f9 123END-OF-LINE FORMAT
4ed46869 124
5bad0796 125 How text end-of-line is encoded depends on operating system. For
df7492f9 126 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 127 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
128 `line-feed' codes. MacOS's format is usually one byte of
129 `carriage-return'.
4ed46869 130
cfb43547 131 Since text character encoding and end-of-line encoding are
df7492f9
KH
132 independent, any coding system described above can take any format
133 of end-of-line (except for no-conversion).
4ed46869 134
e19c3639
KH
135STRUCT CODING_SYSTEM
136
137 Before using a coding system for code conversion (i.e. decoding and
138 encoding), we setup a structure of type `struct coding_system'.
139 This structure keeps various information about a specific code
5bad0796 140 conversion (e.g. the location of source and destination data).
4ed46869
KH
141
142*/
143
df7492f9
KH
144/* COMMON MACROS */
145
146
4ed46869
KH
147/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
148
df7492f9 149 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
150 CODING conforms to the format of XXX, and update the members of
151 DETECT_INFO.
df7492f9 152
ff0dacd7 153 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9 158static int
ff0dacd7 159detect_coding_XXX (coding, detect_info)
df7492f9 160 struct coding_system *coding;
ff0dacd7 161 struct coding_detection_info *detect_info;
4ed46869 162{
f1d34bca
MB
163 const unsigned char *src = coding->source;
164 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 165 int multibytep = coding->src_multibyte;
ff0dacd7 166 int consumed_chars = 0;
df7492f9
KH
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
ff0dacd7
KH
175
176 if (! __C_conforms_to_XXX___ (c))
177 break;
178 if (! __C_strongly_suggests_XXX__ (c))
179 found = CATEGORY_MASK_XXX;
df7492f9 180 }
ff0dacd7
KH
181 /* The byte sequence is invalid for XXX. */
182 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 183 return 0;
ff0dacd7 184
df7492f9 185 no_more_source:
ff0dacd7
KH
186 /* The source exausted successfully. */
187 detect_info->found |= found;
df7492f9 188 return 1;
4ed46869
KH
189}
190#endif
191
192/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193
df7492f9
KH
194 These functions decode a byte sequence specified as a source by
195 CODING. The resulting multibyte text goes to a place pointed to by
196 CODING->charbuf, the length of which should not exceed
197 CODING->charbuf_size;
d46c5b12 198
df7492f9
KH
199 These functions set the information of original and decoded texts in
200 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
201 They also set CODING->result to one of CODING_RESULT_XXX indicating
202 how the decoding is finished.
d46c5b12 203
df7492f9 204 Below is the template of these functions. */
d46c5b12 205
4ed46869 206#if 0
b73bfc1c 207static void
df7492f9 208decode_coding_XXXX (coding)
4ed46869 209 struct coding_system *coding;
4ed46869 210{
f1d34bca
MB
211 const unsigned char *src = coding->source + coding->consumed;
212 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
213 /* SRC_BASE remembers the start position in source in each loop.
214 The loop will be exited when there's not enough source code, or
215 when there's no room in CHARBUF for a decoded character. */
f1d34bca 216 const unsigned char *src_base;
df7492f9 217 /* A buffer to produce decoded characters. */
69a80ea3
KH
218 int *charbuf = coding->charbuf + coding->charbuf_used;
219 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
220 int multibytep = coding->src_multibyte;
221
222 while (1)
223 {
224 src_base = src;
225 if (charbuf < charbuf_end)
226 /* No more room to produce a decoded character. */
227 break;
228 ONE_MORE_BYTE (c);
229 /* Decode it. */
230 }
231
232 no_more_source:
233 if (src_base < src_end
234 && coding->mode & CODING_MODE_LAST_BLOCK)
235 /* If the source ends by partial bytes to construct a character,
236 treat them as eight-bit raw data. */
237 while (src_base < src_end && charbuf < charbuf_end)
238 *charbuf++ = *src_base++;
239 /* Remember how many bytes and characters we consumed. If the
240 source is multibyte, the bytes and chars are not identical. */
241 coding->consumed = coding->consumed_char = src_base - coding->source;
242 /* Remember how many characters we produced. */
243 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
244}
245#endif
246
247/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248
df7492f9
KH
249 These functions encode SRC_BYTES length text at SOURCE of Emacs'
250 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
251 goes to a place pointed to by DESTINATION, the length of which
252 should not exceed DST_BYTES.
d46c5b12 253
df7492f9
KH
254 These functions set the information of original and encoded texts in
255 the members produced, produced_char, consumed, and consumed_char of
256 the structure *CODING. They also set the member result to one of
257 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 258
df7492f9
KH
259 DST_BYTES zero means that source area and destination area are
260 overlapped, which means that we can produce a encoded text until it
261 reaches at the head of not-yet-encoded source text.
d46c5b12 262
df7492f9 263 Below is a template of these functions. */
4ed46869 264#if 0
b73bfc1c 265static void
df7492f9 266encode_coding_XXX (coding)
4ed46869 267 struct coding_system *coding;
4ed46869 268{
df7492f9
KH
269 int multibytep = coding->dst_multibyte;
270 int *charbuf = coding->charbuf;
271 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
272 unsigned char *dst = coding->destination + coding->produced;
273 unsigned char *dst_end = coding->destination + coding->dst_bytes;
274 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
275 int produced_chars = 0;
276
277 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
278 {
279 int c = *charbuf;
280 /* Encode C into DST, and increment DST. */
281 }
282 label_no_more_destination:
283 /* How many chars and bytes we produced. */
284 coding->produced_char += produced_chars;
285 coding->produced = dst - coding->destination;
4ed46869
KH
286}
287#endif
288
4ed46869
KH
289\f
290/*** 1. Preamble ***/
291
68c45bf0 292#include <config.h>
4ed46869
KH
293#include <stdio.h>
294
4ed46869
KH
295#include "lisp.h"
296#include "buffer.h"
df7492f9 297#include "character.h"
4ed46869
KH
298#include "charset.h"
299#include "ccl.h"
df7492f9 300#include "composite.h"
4ed46869
KH
301#include "coding.h"
302#include "window.h"
b8299c66
KL
303#include "frame.h"
304#include "termhooks.h"
4ed46869 305
df7492f9 306Lisp_Object Vcoding_system_hash_table;
4ed46869 307
df7492f9 308Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
309Lisp_Object Qunix, Qdos;
310extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
311Lisp_Object Qbuffer_file_coding_system;
312Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 313Lisp_Object Qdefault_char;
27901516 314Lisp_Object Qno_conversion, Qundecided;
df7492f9 315Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 316Lisp_Object Qbig, Qlittle;
bb0115a2 317Lisp_Object Qcoding_system_history;
1397dc18 318Lisp_Object Qvalid_codes;
a6f87d34
KH
319Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
320Lisp_Object QCdecode_translation_table, QCencode_translation_table;
321Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 322Lisp_Object QCascii_compatible_p;
4ed46869
KH
323
324extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 325Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
326Lisp_Object Qstart_process, Qopen_network_stream;
327Lisp_Object Qtarget_idx;
328
065e3595
KH
329Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
330Lisp_Object Qinterrupted, Qinsufficient_memory;
331
c7183fb8
GM
332extern Lisp_Object Qcompletion_ignore_case;
333
44e8490d
KH
334/* If a symbol has this property, evaluate the value to define the
335 symbol as a coding system. */
336static Lisp_Object Qcoding_system_define_form;
337
5d5bf4d8
KH
338int coding_system_require_warning;
339
d46c5b12
KH
340Lisp_Object Vselect_safe_coding_system_function;
341
7722baf9
EZ
342/* Mnemonic string for each format of end-of-line. */
343Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
344/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 345 decided. */
7722baf9 346Lisp_Object eol_mnemonic_undecided;
4ed46869 347
fcbcfb64
KH
348/* Format of end-of-line decided by system. This is Qunix on
349 Unix and Mac, Qdos on DOS/Windows.
350 This has an effect only for external encoding (i.e. for output to
351 file and process), not for in-buffer or Lisp string encoding. */
352static Lisp_Object system_eol_type;
353
4ed46869
KH
354#ifdef emacs
355
4608c386
KH
356Lisp_Object Vcoding_system_list, Vcoding_system_alist;
357
358Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 359
d46c5b12
KH
360/* Coding system emacs-mule and raw-text are for converting only
361 end-of-line format. */
362Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 363Lisp_Object Qutf_8_emacs;
ecf488bc 364
4ed46869
KH
365/* Coding-systems are handed between Emacs Lisp programs and C internal
366 routines by the following three variables. */
367/* Coding-system for reading files and receiving data from process. */
368Lisp_Object Vcoding_system_for_read;
369/* Coding-system for writing files and sending data to process. */
370Lisp_Object Vcoding_system_for_write;
371/* Coding-system actually used in the latest I/O. */
372Lisp_Object Vlast_coding_system_used;
065e3595
KH
373/* Set to non-nil when an error is detected while code conversion. */
374Lisp_Object Vlast_code_conversion_error;
c4825358 375/* A vector of length 256 which contains information about special
94487c4e 376 Latin codes (especially for dealing with Microsoft codes). */
3f003981 377Lisp_Object Vlatin_extra_code_table;
c4825358 378
9ce27fde
KH
379/* Flag to inhibit code conversion of end-of-line format. */
380int inhibit_eol_conversion;
381
74383408
KH
382/* Flag to inhibit ISO2022 escape sequence detection. */
383int inhibit_iso_escape_detection;
384
ed29121d
EZ
385/* Flag to make buffer-file-coding-system inherit from process-coding. */
386int inherit_process_coding_system;
387
c4825358
KH
388/* Coding system to be used to encode text for terminal display when
389 terminal coding system is nil. */
390struct coding_system safe_terminal_coding;
391
02ba4723
KH
392Lisp_Object Vfile_coding_system_alist;
393Lisp_Object Vprocess_coding_system_alist;
394Lisp_Object Vnetwork_coding_system_alist;
4ed46869 395
68c45bf0
PE
396Lisp_Object Vlocale_coding_system;
397
4ed46869
KH
398#endif /* emacs */
399
f967223b
KH
400/* Flag to tell if we look up translation table on character code
401 conversion. */
84fbb8a0 402Lisp_Object Venable_character_translation;
f967223b
KH
403/* Standard translation table to look up on decoding (reading). */
404Lisp_Object Vstandard_translation_table_for_decode;
405/* Standard translation table to look up on encoding (writing). */
406Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 407
f967223b
KH
408Lisp_Object Qtranslation_table;
409Lisp_Object Qtranslation_table_id;
410Lisp_Object Qtranslation_table_for_decode;
411Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
412
413/* Alist of charsets vs revision number. */
df7492f9 414static Lisp_Object Vcharset_revision_table;
4ed46869 415
02ba4723
KH
416/* Default coding systems used for process I/O. */
417Lisp_Object Vdefault_process_coding_system;
418
002fdb44
DL
419/* Char table for translating Quail and self-inserting input. */
420Lisp_Object Vtranslation_table_for_input;
421
df7492f9
KH
422/* Two special coding systems. */
423Lisp_Object Vsjis_coding_system;
424Lisp_Object Vbig5_coding_system;
425
df7492f9
KH
426/* ISO2022 section */
427
428#define CODING_ISO_INITIAL(coding, reg) \
429 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
430 coding_attr_iso_initial), \
431 reg)))
432
433
434#define CODING_ISO_REQUEST(coding, charset_id) \
435 ((charset_id <= (coding)->max_charset_id \
436 ? (coding)->safe_charsets[charset_id] \
437 : -1))
438
439
440#define CODING_ISO_FLAGS(coding) \
441 ((coding)->spec.iso_2022.flags)
442#define CODING_ISO_DESIGNATION(coding, reg) \
443 ((coding)->spec.iso_2022.current_designation[reg])
444#define CODING_ISO_INVOCATION(coding, plane) \
445 ((coding)->spec.iso_2022.current_invocation[plane])
446#define CODING_ISO_SINGLE_SHIFTING(coding) \
447 ((coding)->spec.iso_2022.single_shifting)
448#define CODING_ISO_BOL(coding) \
449 ((coding)->spec.iso_2022.bol)
450#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
451 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
452
453/* Control characters of ISO2022. */
454 /* code */ /* function */
455#define ISO_CODE_LF 0x0A /* line-feed */
456#define ISO_CODE_CR 0x0D /* carriage-return */
457#define ISO_CODE_SO 0x0E /* shift-out */
458#define ISO_CODE_SI 0x0F /* shift-in */
459#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
460#define ISO_CODE_ESC 0x1B /* escape */
461#define ISO_CODE_SS2 0x8E /* single-shift-2 */
462#define ISO_CODE_SS3 0x8F /* single-shift-3 */
463#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
464
465/* All code (1-byte) of ISO2022 is classified into one of the
466 followings. */
467enum iso_code_class_type
468 {
469 ISO_control_0, /* Control codes in the range
470 0x00..0x1F and 0x7F, except for the
471 following 5 codes. */
df7492f9
KH
472 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
473 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
474 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
475 ISO_escape, /* ISO_CODE_SO (0x1B) */
476 ISO_control_1, /* Control codes in the range
477 0x80..0x9F, except for the
478 following 3 codes. */
479 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
480 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
481 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
482 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
483 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
484 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
485 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
486 };
05e6f5dc 487
df7492f9
KH
488/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
489 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 490
df7492f9
KH
491/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
492 instead of the correct short-form sequence (e.g. ESC $ A). */
493#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 494
df7492f9
KH
495/* If set, reset graphic planes and registers at end-of-line to the
496 initial state. */
497#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 498
df7492f9
KH
499/* If set, reset graphic planes and registers before any control
500 characters to the initial state. */
501#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 502
df7492f9
KH
503/* If set, encode by 7-bit environment. */
504#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 505
df7492f9
KH
506/* If set, use locking-shift function. */
507#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 508
df7492f9
KH
509/* If set, use single-shift function. Overwrite
510 CODING_ISO_FLAG_LOCKING_SHIFT. */
511#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 512
df7492f9
KH
513/* If set, use designation escape sequence. */
514#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 515
df7492f9
KH
516/* If set, produce revision number sequence. */
517#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 518
df7492f9
KH
519/* If set, produce ISO6429's direction specifying sequence. */
520#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 521
df7492f9
KH
522/* If set, assume designation states are reset at beginning of line on
523 output. */
524#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 525
df7492f9
KH
526/* If set, designation sequence should be placed at beginning of line
527 on output. */
528#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 529
df7492f9
KH
530/* If set, do not encode unsafe charactes on output. */
531#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 532
df7492f9
KH
533/* If set, extra latin codes (128..159) are accepted as a valid code
534 on input. */
535#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 536
df7492f9 537#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 538
df7492f9 539#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 540
bf16eb23 541#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 542
bf16eb23 543#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 544
bf16eb23 545#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 546
df7492f9
KH
547/* A character to be produced on output if encoding of the original
548 character is prohibited by CODING_ISO_FLAG_SAFE. */
549#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 550
4ed46869 551
df7492f9
KH
552/* UTF-16 section */
553#define CODING_UTF_16_BOM(coding) \
554 ((coding)->spec.utf_16.bom)
4ed46869 555
df7492f9
KH
556#define CODING_UTF_16_ENDIAN(coding) \
557 ((coding)->spec.utf_16.endian)
4ed46869 558
df7492f9
KH
559#define CODING_UTF_16_SURROGATE(coding) \
560 ((coding)->spec.utf_16.surrogate)
4ed46869 561
4ed46869 562
df7492f9
KH
563/* CCL section */
564#define CODING_CCL_DECODER(coding) \
565 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
566#define CODING_CCL_ENCODER(coding) \
567 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
568#define CODING_CCL_VALIDS(coding) \
8f924df7 569 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 570
5a936b46 571/* Index for each coding category in `coding_categories' */
4ed46869 572
df7492f9
KH
573enum coding_category
574 {
575 coding_category_iso_7,
576 coding_category_iso_7_tight,
577 coding_category_iso_8_1,
578 coding_category_iso_8_2,
579 coding_category_iso_7_else,
580 coding_category_iso_8_else,
581 coding_category_utf_8,
582 coding_category_utf_16_auto,
583 coding_category_utf_16_be,
584 coding_category_utf_16_le,
585 coding_category_utf_16_be_nosig,
586 coding_category_utf_16_le_nosig,
587 coding_category_charset,
588 coding_category_sjis,
589 coding_category_big5,
590 coding_category_ccl,
591 coding_category_emacs_mule,
592 /* All above are targets of code detection. */
593 coding_category_raw_text,
594 coding_category_undecided,
595 coding_category_max
596 };
597
598/* Definitions of flag bits used in detect_coding_XXXX. */
599#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
600#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
601#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
602#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
603#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
604#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
605#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 606#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
607#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
608#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
609#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
610#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
611#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
612#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
613#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
614#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
615#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 616#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
617
618/* This value is returned if detect_coding_mask () find nothing other
619 than ASCII characters. */
620#define CATEGORY_MASK_ANY \
621 (CATEGORY_MASK_ISO_7 \
622 | CATEGORY_MASK_ISO_7_TIGHT \
623 | CATEGORY_MASK_ISO_8_1 \
624 | CATEGORY_MASK_ISO_8_2 \
625 | CATEGORY_MASK_ISO_7_ELSE \
626 | CATEGORY_MASK_ISO_8_ELSE \
627 | CATEGORY_MASK_UTF_8 \
628 | CATEGORY_MASK_UTF_16_BE \
629 | CATEGORY_MASK_UTF_16_LE \
630 | CATEGORY_MASK_UTF_16_BE_NOSIG \
631 | CATEGORY_MASK_UTF_16_LE_NOSIG \
632 | CATEGORY_MASK_CHARSET \
633 | CATEGORY_MASK_SJIS \
634 | CATEGORY_MASK_BIG5 \
635 | CATEGORY_MASK_CCL \
636 | CATEGORY_MASK_EMACS_MULE)
637
638
639#define CATEGORY_MASK_ISO_7BIT \
640 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
641
642#define CATEGORY_MASK_ISO_8BIT \
643 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
644
645#define CATEGORY_MASK_ISO_ELSE \
646 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
647
648#define CATEGORY_MASK_ISO_ESCAPE \
649 (CATEGORY_MASK_ISO_7 \
650 | CATEGORY_MASK_ISO_7_TIGHT \
651 | CATEGORY_MASK_ISO_7_ELSE \
652 | CATEGORY_MASK_ISO_8_ELSE)
653
654#define CATEGORY_MASK_ISO \
655 ( CATEGORY_MASK_ISO_7BIT \
656 | CATEGORY_MASK_ISO_8BIT \
657 | CATEGORY_MASK_ISO_ELSE)
658
659#define CATEGORY_MASK_UTF_16 \
660 (CATEGORY_MASK_UTF_16_BE \
661 | CATEGORY_MASK_UTF_16_LE \
662 | CATEGORY_MASK_UTF_16_BE_NOSIG \
663 | CATEGORY_MASK_UTF_16_LE_NOSIG)
664
665
666/* List of symbols `coding-category-xxx' ordered by priority. This
667 variable is exposed to Emacs Lisp. */
668static Lisp_Object Vcoding_category_list;
669
670/* Table of coding categories (Lisp symbols). This variable is for
671 internal use oly. */
672static Lisp_Object Vcoding_category_table;
673
674/* Table of coding-categories ordered by priority. */
675static enum coding_category coding_priorities[coding_category_max];
676
677/* Nth element is a coding context for the coding system bound to the
678 Nth coding category. */
679static struct coding_system coding_categories[coding_category_max];
680
df7492f9
KH
681/*** Commonly used macros and functions ***/
682
683#ifndef min
684#define min(a, b) ((a) < (b) ? (a) : (b))
685#endif
686#ifndef max
687#define max(a, b) ((a) > (b) ? (a) : (b))
688#endif
4ed46869 689
24a73b0a
KH
690#define CODING_GET_INFO(coding, attrs, charset_list) \
691 do { \
692 (attrs) = CODING_ID_ATTRS ((coding)->id); \
693 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 694 } while (0)
4ed46869 695
4ed46869 696
df7492f9
KH
697/* Safely get one byte from the source text pointed by SRC which ends
698 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
699 in the source, it jumps to `no_more_source'. If multibytep is
700 nonzero, and a multibyte character is found at SRC, set C to the
701 negative value of the character code. The caller should declare
702 and set these variables appropriately in advance:
703 src, src_end, multibytep */
aa72b389 704
065e3595
KH
705#define ONE_MORE_BYTE(c) \
706 do { \
707 if (src == src_end) \
708 { \
709 if (src_base < src) \
710 record_conversion_result \
711 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
712 goto no_more_source; \
713 } \
714 c = *src++; \
715 if (multibytep && (c & 0x80)) \
716 { \
717 if ((c & 0xFE) == 0xC0) \
718 c = ((c & 1) << 6) | *src++; \
719 else \
720 { \
35befdaa
KH
721 src--; \
722 c = - string_char (src, &src, NULL); \
065e3595
KH
723 record_conversion_result \
724 (coding, CODING_RESULT_INVALID_SRC); \
725 } \
726 } \
727 consumed_chars++; \
aa72b389
KH
728 } while (0)
729
aa72b389 730
065e3595
KH
731#define ONE_MORE_BYTE_NO_CHECK(c) \
732 do { \
733 c = *src++; \
734 if (multibytep && (c & 0x80)) \
735 { \
736 if ((c & 0xFE) == 0xC0) \
737 c = ((c & 1) << 6) | *src++; \
738 else \
739 { \
35befdaa
KH
740 src--; \
741 c = - string_char (src, &src, NULL); \
065e3595
KH
742 record_conversion_result \
743 (coding, CODING_RESULT_INVALID_SRC); \
744 } \
745 } \
746 consumed_chars++; \
aa72b389
KH
747 } while (0)
748
aa72b389 749
df7492f9
KH
750/* Store a byte C in the place pointed by DST and increment DST to the
751 next free point, and increment PRODUCED_CHARS. The caller should
752 assure that C is 0..127, and declare and set the variable `dst'
753 appropriately in advance.
754*/
aa72b389
KH
755
756
df7492f9
KH
757#define EMIT_ONE_ASCII_BYTE(c) \
758 do { \
759 produced_chars++; \
760 *dst++ = (c); \
b6871cc7 761 } while (0)
aa72b389
KH
762
763
df7492f9 764/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 765
df7492f9
KH
766#define EMIT_TWO_ASCII_BYTES(c1, c2) \
767 do { \
768 produced_chars += 2; \
769 *dst++ = (c1), *dst++ = (c2); \
770 } while (0)
aa72b389
KH
771
772
df7492f9
KH
773/* Store a byte C in the place pointed by DST and increment DST to the
774 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
775 nonzero, store in an appropriate multibyte from. The caller should
776 declare and set the variables `dst' and `multibytep' appropriately
777 in advance. */
778
779#define EMIT_ONE_BYTE(c) \
780 do { \
781 produced_chars++; \
782 if (multibytep) \
783 { \
784 int ch = (c); \
785 if (ch >= 0x80) \
786 ch = BYTE8_TO_CHAR (ch); \
787 CHAR_STRING_ADVANCE (ch, dst); \
788 } \
789 else \
790 *dst++ = (c); \
aa72b389 791 } while (0)
aa72b389 792
aa72b389 793
df7492f9 794/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 795
e19c3639
KH
796#define EMIT_TWO_BYTES(c1, c2) \
797 do { \
798 produced_chars += 2; \
799 if (multibytep) \
800 { \
801 int ch; \
802 \
803 ch = (c1); \
804 if (ch >= 0x80) \
805 ch = BYTE8_TO_CHAR (ch); \
806 CHAR_STRING_ADVANCE (ch, dst); \
807 ch = (c2); \
808 if (ch >= 0x80) \
809 ch = BYTE8_TO_CHAR (ch); \
810 CHAR_STRING_ADVANCE (ch, dst); \
811 } \
812 else \
813 { \
814 *dst++ = (c1); \
815 *dst++ = (c2); \
816 } \
aa72b389
KH
817 } while (0)
818
819
df7492f9
KH
820#define EMIT_THREE_BYTES(c1, c2, c3) \
821 do { \
822 EMIT_ONE_BYTE (c1); \
823 EMIT_TWO_BYTES (c2, c3); \
824 } while (0)
aa72b389 825
aa72b389 826
df7492f9
KH
827#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
828 do { \
829 EMIT_TWO_BYTES (c1, c2); \
830 EMIT_TWO_BYTES (c3, c4); \
831 } while (0)
aa72b389 832
aa72b389 833
f6cbaf43
KH
834/* Prototypes for static functions. */
835static void record_conversion_result P_ ((struct coding_system *coding,
836 enum coding_result_code result));
837static int detect_coding_utf_8 P_ ((struct coding_system *,
838 struct coding_detection_info *info));
839static void decode_coding_utf_8 P_ ((struct coding_system *));
840static int encode_coding_utf_8 P_ ((struct coding_system *));
841
842static int detect_coding_utf_16 P_ ((struct coding_system *,
843 struct coding_detection_info *info));
844static void decode_coding_utf_16 P_ ((struct coding_system *));
845static int encode_coding_utf_16 P_ ((struct coding_system *));
846
847static int detect_coding_iso_2022 P_ ((struct coding_system *,
848 struct coding_detection_info *info));
849static void decode_coding_iso_2022 P_ ((struct coding_system *));
850static int encode_coding_iso_2022 P_ ((struct coding_system *));
851
852static int detect_coding_emacs_mule P_ ((struct coding_system *,
853 struct coding_detection_info *info));
854static void decode_coding_emacs_mule P_ ((struct coding_system *));
855static int encode_coding_emacs_mule P_ ((struct coding_system *));
856
857static int detect_coding_sjis P_ ((struct coding_system *,
858 struct coding_detection_info *info));
859static void decode_coding_sjis P_ ((struct coding_system *));
860static int encode_coding_sjis P_ ((struct coding_system *));
861
862static int detect_coding_big5 P_ ((struct coding_system *,
863 struct coding_detection_info *info));
864static void decode_coding_big5 P_ ((struct coding_system *));
865static int encode_coding_big5 P_ ((struct coding_system *));
866
867static int detect_coding_ccl P_ ((struct coding_system *,
868 struct coding_detection_info *info));
869static void decode_coding_ccl P_ ((struct coding_system *));
870static int encode_coding_ccl P_ ((struct coding_system *));
871
872static void decode_coding_raw_text P_ ((struct coding_system *));
873static int encode_coding_raw_text P_ ((struct coding_system *));
874
875static void coding_set_source P_ ((struct coding_system *));
876static void coding_set_destination P_ ((struct coding_system *));
877static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
878static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 879 EMACS_INT, EMACS_INT));
f6cbaf43
KH
880static unsigned char *alloc_destination P_ ((struct coding_system *,
881 EMACS_INT, unsigned char *));
882static void setup_iso_safe_charsets P_ ((Lisp_Object));
883static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
884 int *, int *,
885 unsigned char *));
886static int detect_eol P_ ((const unsigned char *,
887 EMACS_INT, enum coding_category));
888static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
889static void decode_eol P_ ((struct coding_system *));
890static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
891static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
892 int, int *, int *));
893static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
894static INLINE void produce_composition P_ ((struct coding_system *, int *,
895 EMACS_INT));
896static INLINE void produce_charset P_ ((struct coding_system *, int *,
897 EMACS_INT));
898static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
899static int decode_coding P_ ((struct coding_system *));
900static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
901 struct coding_system *,
902 int *, EMACS_INT *));
903static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
904 struct coding_system *,
905 int *, EMACS_INT *));
906static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
907static int encode_coding P_ ((struct coding_system *));
908static Lisp_Object make_conversion_work_buffer P_ ((int));
909static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
910static INLINE int char_encodable_p P_ ((int, Lisp_Object));
911static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
912
065e3595
KH
913static void
914record_conversion_result (struct coding_system *coding,
915 enum coding_result_code result)
916{
917 coding->result = result;
918 switch (result)
919 {
920 case CODING_RESULT_INSUFFICIENT_SRC:
921 Vlast_code_conversion_error = Qinsufficient_source;
922 break;
923 case CODING_RESULT_INCONSISTENT_EOL:
924 Vlast_code_conversion_error = Qinconsistent_eol;
925 break;
926 case CODING_RESULT_INVALID_SRC:
927 Vlast_code_conversion_error = Qinvalid_source;
928 break;
929 case CODING_RESULT_INTERRUPT:
930 Vlast_code_conversion_error = Qinterrupted;
931 break;
932 case CODING_RESULT_INSUFFICIENT_MEM:
933 Vlast_code_conversion_error = Qinsufficient_memory;
934 break;
35befdaa
KH
935 default:
936 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
937 }
938}
939
df7492f9
KH
940#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
941 do { \
942 charset_map_loaded = 0; \
943 c = DECODE_CHAR (charset, code); \
944 if (charset_map_loaded) \
945 { \
8f924df7 946 const unsigned char *orig = coding->source; \
df7492f9
KH
947 EMACS_INT offset; \
948 \
949 coding_set_source (coding); \
950 offset = coding->source - orig; \
951 src += offset; \
952 src_base += offset; \
953 src_end += offset; \
954 } \
aa72b389
KH
955 } while (0)
956
957
119852e7
KH
958/* If there are at least BYTES length of room at dst, allocate memory
959 for coding->destination and update dst and dst_end. We don't have
960 to take care of coding->source which will be relocated. It is
961 handled by calling coding_set_source in encode_coding. */
962
df7492f9
KH
963#define ASSURE_DESTINATION(bytes) \
964 do { \
965 if (dst + (bytes) >= dst_end) \
966 { \
967 int more_bytes = charbuf_end - charbuf + (bytes); \
968 \
969 dst = alloc_destination (coding, more_bytes, dst); \
970 dst_end = coding->destination + coding->dst_bytes; \
971 } \
972 } while (0)
aa72b389 973
aa72b389 974
aa72b389 975
df7492f9
KH
976static void
977coding_set_source (coding)
aa72b389 978 struct coding_system *coding;
aa72b389 979{
df7492f9
KH
980 if (BUFFERP (coding->src_object))
981 {
2cb26057 982 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 983
df7492f9 984 if (coding->src_pos < 0)
2cb26057 985 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 986 else
2cb26057 987 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 988 }
df7492f9 989 else if (STRINGP (coding->src_object))
aa72b389 990 {
8f924df7 991 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 992 }
df7492f9
KH
993 else
994 /* Otherwise, the source is C string and is never relocated
995 automatically. Thus we don't have to update anything. */
996 ;
997}
aa72b389 998
df7492f9
KH
999static void
1000coding_set_destination (coding)
1001 struct coding_system *coding;
1002{
1003 if (BUFFERP (coding->dst_object))
aa72b389 1004 {
df7492f9 1005 if (coding->src_pos < 0)
aa72b389 1006 {
13818c30 1007 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1008 coding->dst_bytes = (GAP_END_ADDR
1009 - (coding->src_bytes - coding->consumed)
1010 - coding->destination);
aa72b389 1011 }
df7492f9 1012 else
28f67a95
KH
1013 {
1014 /* We are sure that coding->dst_pos_byte is before the gap
1015 of the buffer. */
1016 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1017 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1018 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1019 - coding->destination);
1020 }
df7492f9
KH
1021 }
1022 else
1023 /* Otherwise, the destination is C string and is never relocated
1024 automatically. Thus we don't have to update anything. */
1025 ;
1026}
1027
1028
1029static void
1030coding_alloc_by_realloc (coding, bytes)
1031 struct coding_system *coding;
1032 EMACS_INT bytes;
1033{
1034 coding->destination = (unsigned char *) xrealloc (coding->destination,
1035 coding->dst_bytes + bytes);
1036 coding->dst_bytes += bytes;
1037}
1038
1039static void
287c57d7 1040coding_alloc_by_making_gap (coding, offset, bytes)
df7492f9 1041 struct coding_system *coding;
287c57d7 1042 EMACS_INT offset, bytes;
df7492f9 1043{
2c78b7e1
KH
1044 if (BUFFERP (coding->dst_object)
1045 && EQ (coding->src_object, coding->dst_object))
df7492f9 1046 {
287c57d7 1047 EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
df7492f9 1048
287c57d7 1049 GPT += offset, GPT_BYTE += offset;
df7492f9
KH
1050 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1051 make_gap (bytes);
1052 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
287c57d7 1053 GPT -= offset, GPT_BYTE -= offset;
df7492f9 1054 }
730fff51 1055 else
df7492f9 1056 {
2c78b7e1
KH
1057 Lisp_Object this_buffer;
1058
1059 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1060 set_buffer_internal (XBUFFER (coding->dst_object));
1061 make_gap (bytes);
1062 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1063 }
df7492f9 1064}
8f924df7 1065
df7492f9
KH
1066
1067static unsigned char *
1068alloc_destination (coding, nbytes, dst)
1069 struct coding_system *coding;
3e139625 1070 EMACS_INT nbytes;
df7492f9
KH
1071 unsigned char *dst;
1072{
1073 EMACS_INT offset = dst - coding->destination;
1074
1075 if (BUFFERP (coding->dst_object))
287c57d7 1076 coding_alloc_by_making_gap (coding, offset, nbytes);
aa72b389 1077 else
df7492f9 1078 coding_alloc_by_realloc (coding, nbytes);
065e3595 1079 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1080 coding_set_destination (coding);
1081 dst = coding->destination + offset;
1082 return dst;
1083}
aa72b389 1084
ff0dacd7
KH
1085/** Macros for annotations. */
1086
1087/* Maximum length of annotation data (sum of annotations for
1088 composition and charset). */
69a80ea3 1089#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1090
1091/* An annotation data is stored in the array coding->charbuf in this
1092 format:
69a80ea3 1093 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1094 LENGTH is the number of elements in the annotation.
1095 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1096 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1097
1098 The format of the following elements depend on ANNOTATION_MASK.
1099
1100 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1101 follows:
1102 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1103 METHOD is one of enum composition_method.
1104 Optionnal COMPOSITION-COMPONENTS are characters and composition
1105 rules.
1106
1107 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1108 follows. */
1109
69a80ea3 1110#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1111 do { \
1112 *(buf)++ = -(len); \
1113 *(buf)++ = (mask); \
69a80ea3 1114 *(buf)++ = (nchars); \
ff0dacd7
KH
1115 coding->annotated = 1; \
1116 } while (0);
1117
69a80ea3
KH
1118#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1119 do { \
1120 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1121 *buf++ = method; \
ff0dacd7
KH
1122 } while (0)
1123
1124
69a80ea3
KH
1125#define ADD_CHARSET_DATA(buf, nchars, id) \
1126 do { \
1127 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1128 *buf++ = id; \
ff0dacd7
KH
1129 } while (0)
1130
df7492f9
KH
1131\f
1132/*** 2. Emacs' internal format (emacs-utf-8) ***/
1133
1134
1135
1136\f
1137/*** 3. UTF-8 ***/
1138
1139/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1140 Check if a text is encoded in UTF-8. If it is, return 1, else
1141 return 0. */
df7492f9
KH
1142
1143#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1144#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1145#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1146#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1147#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1148#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1149
1150static int
ff0dacd7 1151detect_coding_utf_8 (coding, detect_info)
df7492f9 1152 struct coding_system *coding;
ff0dacd7 1153 struct coding_detection_info *detect_info;
df7492f9 1154{
065e3595 1155 const unsigned char *src = coding->source, *src_base;
8f924df7 1156 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1157 int multibytep = coding->src_multibyte;
1158 int consumed_chars = 0;
1159 int found = 0;
1160
ff0dacd7 1161 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1162 /* A coding system of this category is always ASCII compatible. */
1163 src += coding->head_ascii;
1164
1165 while (1)
aa72b389 1166 {
df7492f9 1167 int c, c1, c2, c3, c4;
aa72b389 1168
065e3595 1169 src_base = src;
df7492f9 1170 ONE_MORE_BYTE (c);
065e3595 1171 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1172 continue;
1173 ONE_MORE_BYTE (c1);
065e3595 1174 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1175 break;
1176 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1177 {
ff0dacd7 1178 found = CATEGORY_MASK_UTF_8;
df7492f9 1179 continue;
aa72b389 1180 }
df7492f9 1181 ONE_MORE_BYTE (c2);
065e3595 1182 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1183 break;
1184 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1185 {
ff0dacd7 1186 found = CATEGORY_MASK_UTF_8;
df7492f9 1187 continue;
aa72b389 1188 }
df7492f9 1189 ONE_MORE_BYTE (c3);
065e3595 1190 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1191 break;
1192 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1193 {
ff0dacd7 1194 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1195 continue;
1196 }
1197 ONE_MORE_BYTE (c4);
065e3595 1198 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1199 break;
1200 if (UTF_8_5_OCTET_LEADING_P (c))
1201 {
ff0dacd7 1202 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1203 continue;
1204 }
1205 break;
aa72b389 1206 }
ff0dacd7 1207 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1208 return 0;
aa72b389 1209
df7492f9 1210 no_more_source:
065e3595 1211 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1212 {
ff0dacd7 1213 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1214 return 0;
aa72b389 1215 }
ff0dacd7
KH
1216 detect_info->found |= found;
1217 return 1;
aa72b389
KH
1218}
1219
4ed46869 1220
b73bfc1c 1221static void
df7492f9 1222decode_coding_utf_8 (coding)
b73bfc1c 1223 struct coding_system *coding;
b73bfc1c 1224{
8f924df7
KH
1225 const unsigned char *src = coding->source + coding->consumed;
1226 const unsigned char *src_end = coding->source + coding->src_bytes;
1227 const unsigned char *src_base;
69a80ea3
KH
1228 int *charbuf = coding->charbuf + coding->charbuf_used;
1229 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1230 int consumed_chars = 0, consumed_chars_base;
1231 int multibytep = coding->src_multibyte;
24a73b0a 1232 Lisp_Object attr, charset_list;
119852e7
KH
1233 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1234 int byte_after_cr = -1;
4ed46869 1235
24a73b0a 1236 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1237
1238 while (1)
b73bfc1c 1239 {
df7492f9 1240 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1241
df7492f9
KH
1242 src_base = src;
1243 consumed_chars_base = consumed_chars;
4af310db 1244
df7492f9
KH
1245 if (charbuf >= charbuf_end)
1246 break;
1247
119852e7
KH
1248 if (byte_after_cr >= 0)
1249 c1 = byte_after_cr, byte_after_cr = -1;
1250 else
1251 ONE_MORE_BYTE (c1);
065e3595
KH
1252 if (c1 < 0)
1253 {
1254 c = - c1;
1255 }
1256 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1257 {
119852e7
KH
1258 if (eol_crlf && c1 == '\r')
1259 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1260 c = c1;
4af310db 1261 }
df7492f9 1262 else
4af310db 1263 {
df7492f9 1264 ONE_MORE_BYTE (c2);
065e3595 1265 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1266 goto invalid_code;
1267 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1268 {
b0edb2c5
DL
1269 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1270 /* Reject overlong sequences here and below. Encoders
1271 producing them are incorrect, they can be misleading,
1272 and they mess up read/write invariance. */
1273 if (c < 128)
1274 goto invalid_code;
4af310db 1275 }
df7492f9 1276 else
aa72b389 1277 {
df7492f9 1278 ONE_MORE_BYTE (c3);
065e3595 1279 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1280 goto invalid_code;
1281 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1282 {
1283 c = (((c1 & 0xF) << 12)
1284 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1285 if (c < 0x800
1286 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1287 goto invalid_code;
1288 }
df7492f9
KH
1289 else
1290 {
1291 ONE_MORE_BYTE (c4);
065e3595 1292 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1293 goto invalid_code;
1294 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1295 {
df7492f9
KH
1296 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1297 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1298 if (c < 0x10000)
1299 goto invalid_code;
1300 }
df7492f9
KH
1301 else
1302 {
1303 ONE_MORE_BYTE (c5);
065e3595 1304 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1305 goto invalid_code;
1306 if (UTF_8_5_OCTET_LEADING_P (c1))
1307 {
1308 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1309 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1310 | (c5 & 0x3F));
b0edb2c5 1311 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1312 goto invalid_code;
1313 }
1314 else
1315 goto invalid_code;
1316 }
1317 }
aa72b389 1318 }
b73bfc1c 1319 }
df7492f9
KH
1320
1321 *charbuf++ = c;
1322 continue;
1323
1324 invalid_code:
1325 src = src_base;
1326 consumed_chars = consumed_chars_base;
1327 ONE_MORE_BYTE (c);
1328 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1329 coding->errors++;
aa72b389
KH
1330 }
1331
df7492f9
KH
1332 no_more_source:
1333 coding->consumed_char += consumed_chars_base;
1334 coding->consumed = src_base - coding->source;
1335 coding->charbuf_used = charbuf - coding->charbuf;
1336}
1337
1338
1339static int
1340encode_coding_utf_8 (coding)
1341 struct coding_system *coding;
1342{
1343 int multibytep = coding->dst_multibyte;
1344 int *charbuf = coding->charbuf;
1345 int *charbuf_end = charbuf + coding->charbuf_used;
1346 unsigned char *dst = coding->destination + coding->produced;
1347 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1348 int produced_chars = 0;
df7492f9
KH
1349 int c;
1350
1351 if (multibytep)
aa72b389 1352 {
df7492f9
KH
1353 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1354
1355 while (charbuf < charbuf_end)
b73bfc1c 1356 {
df7492f9 1357 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1358
df7492f9
KH
1359 ASSURE_DESTINATION (safe_room);
1360 c = *charbuf++;
28f67a95
KH
1361 if (CHAR_BYTE8_P (c))
1362 {
1363 c = CHAR_TO_BYTE8 (c);
1364 EMIT_ONE_BYTE (c);
1365 }
1366 else
1367 {
1368 CHAR_STRING_ADVANCE (c, pend);
1369 for (p = str; p < pend; p++)
1370 EMIT_ONE_BYTE (*p);
1371 }
b73bfc1c 1372 }
aa72b389 1373 }
df7492f9
KH
1374 else
1375 {
1376 int safe_room = MAX_MULTIBYTE_LENGTH;
1377
1378 while (charbuf < charbuf_end)
b73bfc1c 1379 {
df7492f9
KH
1380 ASSURE_DESTINATION (safe_room);
1381 c = *charbuf++;
f03caae0
KH
1382 if (CHAR_BYTE8_P (c))
1383 *dst++ = CHAR_TO_BYTE8 (c);
1384 else
1385 dst += CHAR_STRING (c, dst);
df7492f9 1386 produced_chars++;
4ed46869
KH
1387 }
1388 }
065e3595 1389 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1390 coding->produced_char += produced_chars;
1391 coding->produced = dst - coding->destination;
1392 return 0;
4ed46869
KH
1393}
1394
b73bfc1c 1395
df7492f9 1396/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1397 Check if a text is encoded in one of UTF-16 based coding systems.
1398 If it is, return 1, else return 0. */
aa72b389 1399
df7492f9
KH
1400#define UTF_16_HIGH_SURROGATE_P(val) \
1401 (((val) & 0xFC00) == 0xD800)
1402
1403#define UTF_16_LOW_SURROGATE_P(val) \
1404 (((val) & 0xFC00) == 0xDC00)
93dec019 1405
df7492f9
KH
1406#define UTF_16_INVALID_P(val) \
1407 (((val) == 0xFFFE) \
1408 || ((val) == 0xFFFF) \
1409 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1410
aa72b389 1411
df7492f9 1412static int
ff0dacd7 1413detect_coding_utf_16 (coding, detect_info)
aa72b389 1414 struct coding_system *coding;
ff0dacd7 1415 struct coding_detection_info *detect_info;
aa72b389 1416{
8f924df7
KH
1417 const unsigned char *src = coding->source, *src_base = src;
1418 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1419 int multibytep = coding->src_multibyte;
1420 int consumed_chars = 0;
1421 int c1, c2;
aa72b389 1422
ff0dacd7 1423 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1424 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1425 && (coding->src_chars & 1))
ff0dacd7
KH
1426 {
1427 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1428 return 0;
1429 }
24a73b0a 1430
df7492f9
KH
1431 ONE_MORE_BYTE (c1);
1432 ONE_MORE_BYTE (c2);
df7492f9 1433 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1434 {
b49a1807
KH
1435 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1436 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1437 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1438 | CATEGORY_MASK_UTF_16_BE_NOSIG
1439 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1440 }
df7492f9 1441 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1442 {
b49a1807
KH
1443 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1444 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1445 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1446 | CATEGORY_MASK_UTF_16_BE_NOSIG
1447 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1448 }
065e3595 1449 else if (c1 >= 0 && c2 >= 0)
24a73b0a 1450 {
24a73b0a
KH
1451 detect_info->rejected
1452 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1453 }
df7492f9 1454 no_more_source:
ff0dacd7 1455 return 1;
df7492f9 1456}
aa72b389 1457
df7492f9
KH
1458static void
1459decode_coding_utf_16 (coding)
1460 struct coding_system *coding;
1461{
8f924df7
KH
1462 const unsigned char *src = coding->source + coding->consumed;
1463 const unsigned char *src_end = coding->source + coding->src_bytes;
1464 const unsigned char *src_base;
69a80ea3
KH
1465 int *charbuf = coding->charbuf + coding->charbuf_used;
1466 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1467 int consumed_chars = 0, consumed_chars_base;
1468 int multibytep = coding->src_multibyte;
1469 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1470 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1471 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1472 Lisp_Object attr, charset_list;
119852e7
KH
1473 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1474 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1475
24a73b0a 1476 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1477
b49a1807 1478 if (bom == utf_16_with_bom)
aa72b389 1479 {
df7492f9 1480 int c, c1, c2;
4af310db 1481
aa72b389 1482 src_base = src;
df7492f9
KH
1483 ONE_MORE_BYTE (c1);
1484 ONE_MORE_BYTE (c2);
e19c3639 1485 c = (c1 << 8) | c2;
aa72b389 1486
b49a1807
KH
1487 if (endian == utf_16_big_endian
1488 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1489 {
b49a1807
KH
1490 /* The first two bytes are not BOM. Treat them as bytes
1491 for a normal character. */
1492 src = src_base;
1493 coding->errors++;
aa72b389 1494 }
b49a1807
KH
1495 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1496 }
1497 else if (bom == utf_16_detect_bom)
1498 {
1499 /* We have already tried to detect BOM and failed in
1500 detect_coding. */
1501 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1502 }
aa72b389 1503
df7492f9
KH
1504 while (1)
1505 {
1506 int c, c1, c2;
1507
1508 src_base = src;
1509 consumed_chars_base = consumed_chars;
1510
1511 if (charbuf + 2 >= charbuf_end)
1512 break;
1513
119852e7
KH
1514 if (byte_after_cr1 >= 0)
1515 c1 = byte_after_cr1, byte_after_cr1 = -1;
1516 else
1517 ONE_MORE_BYTE (c1);
065e3595
KH
1518 if (c1 < 0)
1519 {
1520 *charbuf++ = -c1;
1521 continue;
1522 }
119852e7
KH
1523 if (byte_after_cr2 >= 0)
1524 c2 = byte_after_cr2, byte_after_cr2 = -1;
1525 else
1526 ONE_MORE_BYTE (c2);
065e3595
KH
1527 if (c2 < 0)
1528 {
1529 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1530 *charbuf++ = -c2;
1531 continue;
1532 }
df7492f9 1533 c = (endian == utf_16_big_endian
e19c3639 1534 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1535
df7492f9 1536 if (surrogate)
fd3ae0b9 1537 {
df7492f9 1538 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1539 {
df7492f9
KH
1540 if (endian == utf_16_big_endian)
1541 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1542 else
1543 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1544 *charbuf++ = c1;
1545 *charbuf++ = c2;
1546 coding->errors++;
1547 if (UTF_16_HIGH_SURROGATE_P (c))
1548 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1549 else
df7492f9 1550 *charbuf++ = c;
fd3ae0b9
KH
1551 }
1552 else
df7492f9
KH
1553 {
1554 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1555 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1556 *charbuf++ = 0x10000 + c;
df7492f9 1557 }
fd3ae0b9 1558 }
aa72b389 1559 else
df7492f9
KH
1560 {
1561 if (UTF_16_HIGH_SURROGATE_P (c))
1562 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1563 else
119852e7
KH
1564 {
1565 if (eol_crlf && c == '\r')
1566 {
1567 ONE_MORE_BYTE (byte_after_cr1);
1568 ONE_MORE_BYTE (byte_after_cr2);
1569 }
1570 *charbuf++ = c;
1571 }
8f924df7 1572 }
aa72b389 1573 }
df7492f9
KH
1574
1575 no_more_source:
1576 coding->consumed_char += consumed_chars_base;
1577 coding->consumed = src_base - coding->source;
1578 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1579}
b73bfc1c 1580
df7492f9
KH
1581static int
1582encode_coding_utf_16 (coding)
1583 struct coding_system *coding;
1584{
1585 int multibytep = coding->dst_multibyte;
1586 int *charbuf = coding->charbuf;
1587 int *charbuf_end = charbuf + coding->charbuf_used;
1588 unsigned char *dst = coding->destination + coding->produced;
1589 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1590 int safe_room = 8;
1591 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1592 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1593 int produced_chars = 0;
24a73b0a 1594 Lisp_Object attrs, charset_list;
df7492f9 1595 int c;
4ed46869 1596
24a73b0a 1597 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1598
b49a1807 1599 if (bom != utf_16_without_bom)
df7492f9
KH
1600 {
1601 ASSURE_DESTINATION (safe_room);
1602 if (big_endian)
df7492f9 1603 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1604 else
1605 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1606 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1607 }
1608
1609 while (charbuf < charbuf_end)
1610 {
1611 ASSURE_DESTINATION (safe_room);
1612 c = *charbuf++;
e19c3639
KH
1613 if (c >= MAX_UNICODE_CHAR)
1614 c = coding->default_char;
df7492f9
KH
1615
1616 if (c < 0x10000)
1617 {
1618 if (big_endian)
1619 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1620 else
1621 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1622 }
1623 else
1624 {
1625 int c1, c2;
1626
1627 c -= 0x10000;
1628 c1 = (c >> 10) + 0xD800;
1629 c2 = (c & 0x3FF) + 0xDC00;
1630 if (big_endian)
1631 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1632 else
1633 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1634 }
1635 }
065e3595 1636 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1637 coding->produced = dst - coding->destination;
1638 coding->produced_char += produced_chars;
1639 return 0;
1640}
1641
1642\f
1643/*** 6. Old Emacs' internal format (emacs-mule) ***/
1644
1645/* Emacs' internal format for representation of multiple character
1646 sets is a kind of multi-byte encoding, i.e. characters are
1647 represented by variable-length sequences of one-byte codes.
1648
1649 ASCII characters and control characters (e.g. `tab', `newline') are
1650 represented by one-byte sequences which are their ASCII codes, in
1651 the range 0x00 through 0x7F.
1652
1653 8-bit characters of the range 0x80..0x9F are represented by
1654 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1655 code + 0x20).
1656
1657 8-bit characters of the range 0xA0..0xFF are represented by
1658 one-byte sequences which are their 8-bit code.
1659
1660 The other characters are represented by a sequence of `base
1661 leading-code', optional `extended leading-code', and one or two
1662 `position-code's. The length of the sequence is determined by the
1663 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1664 whereas extended leading-code and position-code take the range 0xA0
1665 through 0xFF. See `charset.h' for more details about leading-code
1666 and position-code.
1667
1668 --- CODE RANGE of Emacs' internal format ---
1669 character set range
1670 ------------- -----
1671 ascii 0x00..0x7F
1672 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1673 eight-bit-graphic 0xA0..0xBF
1674 ELSE 0x81..0x9D + [0xA0..0xFF]+
1675 ---------------------------------------------
1676
1677 As this is the internal character representation, the format is
1678 usually not used externally (i.e. in a file or in a data sent to a
1679 process). But, it is possible to have a text externally in this
1680 format (i.e. by encoding by the coding system `emacs-mule').
1681
1682 In that case, a sequence of one-byte codes has a slightly different
1683 form.
1684
1685 At first, all characters in eight-bit-control are represented by
1686 one-byte sequences which are their 8-bit code.
1687
1688 Next, character composition data are represented by the byte
1689 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1690 where,
1691 METHOD is 0xF0 plus one of composition method (enum
1692 composition_method),
1693
1694 BYTES is 0xA0 plus a byte length of this composition data,
1695
1696 CHARS is 0x20 plus a number of characters composed by this
1697 data,
1698
1699 COMPONENTs are characters of multibye form or composition
1700 rules encoded by two-byte of ASCII codes.
1701
1702 In addition, for backward compatibility, the following formats are
1703 also recognized as composition data on decoding.
1704
1705 0x80 MSEQ ...
1706 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1707
1708 Here,
1709 MSEQ is a multibyte form but in these special format:
1710 ASCII: 0xA0 ASCII_CODE+0x80,
1711 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1712 RULE is a one byte code of the range 0xA0..0xF0 that
1713 represents a composition rule.
1714 */
1715
1716char emacs_mule_bytes[256];
1717
df7492f9 1718int
ff0dacd7 1719emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1720 struct coding_system *coding;
065e3595 1721 const unsigned char *src;
ff0dacd7 1722 int *nbytes, *nchars, *id;
df7492f9 1723{
8f924df7
KH
1724 const unsigned char *src_end = coding->source + coding->src_bytes;
1725 const unsigned char *src_base = src;
df7492f9 1726 int multibytep = coding->src_multibyte;
df7492f9
KH
1727 struct charset *charset;
1728 unsigned code;
1729 int c;
1730 int consumed_chars = 0;
1731
1732 ONE_MORE_BYTE (c);
065e3595 1733 if (c < 0)
df7492f9 1734 {
065e3595
KH
1735 c = -c;
1736 charset = emacs_mule_charset[0];
1737 }
1738 else
1739 {
4d41e8b7
KH
1740 if (c >= 0xA0)
1741 {
b3af4b28 1742 /* Old style component character of a composition. */
4d41e8b7
KH
1743 if (c == 0xA0)
1744 {
1745 ONE_MORE_BYTE (c);
1746 c -= 0x80;
1747 }
1748 else
1749 c -= 0x20;
1750 }
1751
065e3595 1752 switch (emacs_mule_bytes[c])
b73bfc1c 1753 {
065e3595 1754 case 2:
df7492f9
KH
1755 if (! (charset = emacs_mule_charset[c]))
1756 goto invalid_code;
1757 ONE_MORE_BYTE (c);
9ffd559c 1758 if (c < 0xA0)
065e3595 1759 goto invalid_code;
df7492f9 1760 code = c & 0x7F;
065e3595
KH
1761 break;
1762
1763 case 3:
1764 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1765 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1766 {
1767 ONE_MORE_BYTE (c);
9ffd559c 1768 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1769 goto invalid_code;
1770 ONE_MORE_BYTE (c);
9ffd559c 1771 if (c < 0xA0)
065e3595
KH
1772 goto invalid_code;
1773 code = c & 0x7F;
1774 }
1775 else
1776 {
1777 if (! (charset = emacs_mule_charset[c]))
1778 goto invalid_code;
1779 ONE_MORE_BYTE (c);
9ffd559c 1780 if (c < 0xA0)
065e3595
KH
1781 goto invalid_code;
1782 code = (c & 0x7F) << 8;
1783 ONE_MORE_BYTE (c);
9ffd559c 1784 if (c < 0xA0)
065e3595
KH
1785 goto invalid_code;
1786 code |= c & 0x7F;
1787 }
1788 break;
1789
1790 case 4:
1791 ONE_MORE_BYTE (c);
1792 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1793 goto invalid_code;
1794 ONE_MORE_BYTE (c);
9ffd559c 1795 if (c < 0xA0)
065e3595 1796 goto invalid_code;
781d7a48 1797 code = (c & 0x7F) << 8;
df7492f9 1798 ONE_MORE_BYTE (c);
9ffd559c 1799 if (c < 0xA0)
065e3595 1800 goto invalid_code;
df7492f9 1801 code |= c & 0x7F;
065e3595 1802 break;
df7492f9 1803
065e3595
KH
1804 case 1:
1805 code = c;
1806 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1807 ? charset_ascii : charset_eight_bit);
1808 break;
df7492f9 1809
065e3595
KH
1810 default:
1811 abort ();
1812 }
1813 c = DECODE_CHAR (charset, code);
1814 if (c < 0)
1815 goto invalid_code;
df7492f9 1816 }
df7492f9
KH
1817 *nbytes = src - src_base;
1818 *nchars = consumed_chars;
ff0dacd7
KH
1819 if (id)
1820 *id = charset->id;
df7492f9
KH
1821 return c;
1822
1823 no_more_source:
1824 return -2;
1825
1826 invalid_code:
1827 return -1;
1828}
1829
1830
1831/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1832 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1833 else return 0. */
df7492f9
KH
1834
1835static int
ff0dacd7 1836detect_coding_emacs_mule (coding, detect_info)
df7492f9 1837 struct coding_system *coding;
ff0dacd7 1838 struct coding_detection_info *detect_info;
df7492f9 1839{
065e3595 1840 const unsigned char *src = coding->source, *src_base;
8f924df7 1841 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1842 int multibytep = coding->src_multibyte;
1843 int consumed_chars = 0;
1844 int c;
1845 int found = 0;
1846
ff0dacd7 1847 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1848 /* A coding system of this category is always ASCII compatible. */
1849 src += coding->head_ascii;
1850
1851 while (1)
1852 {
065e3595 1853 src_base = src;
df7492f9 1854 ONE_MORE_BYTE (c);
065e3595
KH
1855 if (c < 0)
1856 continue;
df7492f9
KH
1857 if (c == 0x80)
1858 {
1859 /* Perhaps the start of composite character. We simple skip
1860 it because analyzing it is too heavy for detecting. But,
1861 at least, we check that the composite character
1862 constitues of more than 4 bytes. */
8f924df7 1863 const unsigned char *src_base;
df7492f9
KH
1864
1865 repeat:
1866 src_base = src;
1867 do
1868 {
1869 ONE_MORE_BYTE (c);
1870 }
1871 while (c >= 0xA0);
1872
1873 if (src - src_base <= 4)
1874 break;
ff0dacd7 1875 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1876 if (c == 0x80)
1877 goto repeat;
b73bfc1c 1878 }
df7492f9
KH
1879
1880 if (c < 0x80)
b73bfc1c 1881 {
df7492f9
KH
1882 if (c < 0x20
1883 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1884 break;
1885 }
1886 else
1887 {
0e219d54 1888 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1889
0e219d54 1890 while (more_bytes > 0)
df7492f9
KH
1891 {
1892 ONE_MORE_BYTE (c);
0e219d54
KH
1893 if (c < 0xA0)
1894 {
1895 src--; /* Unread the last byte. */
1896 break;
1897 }
1898 more_bytes--;
df7492f9 1899 }
0e219d54 1900 if (more_bytes != 0)
df7492f9 1901 break;
ff0dacd7 1902 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1903 }
1904 }
ff0dacd7 1905 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1906 return 0;
1907
1908 no_more_source:
065e3595 1909 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1910 {
ff0dacd7 1911 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1912 return 0;
1913 }
ff0dacd7
KH
1914 detect_info->found |= found;
1915 return 1;
4ed46869
KH
1916}
1917
b73bfc1c 1918
df7492f9
KH
1919/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1920
1921/* Decode a character represented as a component of composition
1922 sequence of Emacs 20/21 style at SRC. Set C to that character and
1923 update SRC to the head of next character (or an encoded composition
1924 rule). If SRC doesn't points a composition component, set C to -1.
1925 If SRC points an invalid byte sequence, global exit by a return
1926 value 0. */
1927
1928#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
f937a7db 1929 do \
df7492f9
KH
1930 { \
1931 int c; \
1932 int nbytes, nchars; \
1933 \
1934 if (src == src_end) \
1935 break; \
ff0dacd7 1936 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1937 if (c < 0) \
1938 { \
1939 if (c == -2) \
1940 break; \
1941 goto invalid_code; \
1942 } \
1943 *buf++ = c; \
1944 src += nbytes; \
1945 consumed_chars += nchars; \
1946 } \
f937a7db 1947 while (0)
df7492f9
KH
1948
1949
1950/* Decode a composition rule represented as a component of composition
781d7a48
KH
1951 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1952 and increment BUF. If SRC points an invalid byte sequence, set C
1953 to -1. */
df7492f9 1954
781d7a48 1955#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1956 do { \
1957 int c, gref, nref; \
1958 \
781d7a48 1959 if (src >= src_end) \
df7492f9
KH
1960 goto invalid_code; \
1961 ONE_MORE_BYTE_NO_CHECK (c); \
4d41e8b7 1962 c -= 0xA0; \
df7492f9
KH
1963 if (c < 0 || c >= 81) \
1964 goto invalid_code; \
1965 \
1966 gref = c / 9, nref = c % 9; \
1967 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1968 } while (0)
1969
1970
781d7a48
KH
1971/* Decode a composition rule represented as a component of composition
1972 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1973 and increment BUF. If SRC points an invalid byte sequence, set C
1974 to -1. */
1975
1976#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1977 do { \
1978 int gref, nref; \
1979 \
1980 if (src + 1>= src_end) \
1981 goto invalid_code; \
1982 ONE_MORE_BYTE_NO_CHECK (gref); \
1983 gref -= 0x20; \
1984 ONE_MORE_BYTE_NO_CHECK (nref); \
1985 nref -= 0x20; \
1986 if (gref < 0 || gref >= 81 \
1987 || nref < 0 || nref >= 81) \
1988 goto invalid_code; \
1989 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1990 } while (0)
1991
1992
df7492f9 1993#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1994 do { \
df7492f9 1995 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1996 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1997 the byte length of this composition information, CHARS is the \
1998 number of characters composed by this composition. */ \
781d7a48
KH
1999 enum composition_method method = c - 0xF2; \
2000 int *charbuf_base = charbuf; \
df7492f9
KH
2001 int consumed_chars_limit; \
2002 int nbytes, nchars; \
2003 \
2004 ONE_MORE_BYTE (c); \
065e3595
KH
2005 if (c < 0) \
2006 goto invalid_code; \
df7492f9
KH
2007 nbytes = c - 0xA0; \
2008 if (nbytes < 3) \
2009 goto invalid_code; \
2010 ONE_MORE_BYTE (c); \
065e3595
KH
2011 if (c < 0) \
2012 goto invalid_code; \
df7492f9 2013 nchars = c - 0xA0; \
69a80ea3 2014 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
2015 consumed_chars_limit = consumed_chars_base + nbytes; \
2016 if (method != COMPOSITION_RELATIVE) \
aa72b389 2017 { \
df7492f9
KH
2018 int i = 0; \
2019 while (consumed_chars < consumed_chars_limit) \
aa72b389 2020 { \
df7492f9 2021 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 2022 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
2023 else \
2024 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 2025 i++; \
aa72b389 2026 } \
df7492f9
KH
2027 if (consumed_chars < consumed_chars_limit) \
2028 goto invalid_code; \
781d7a48 2029 charbuf_base[0] -= i; \
aa72b389
KH
2030 } \
2031 } while (0)
93dec019 2032
aa72b389 2033
d959f512
KH
2034#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2035 do { \
2036 /* Emacs 20 style format for relative composition. */ \
2037 /* Store multibyte form of characters to be composed. */ \
2038 enum composition_method method = COMPOSITION_RELATIVE; \
2039 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2040 int *buf = components; \
2041 int i, j; \
2042 \
2043 src = src_base; \
2044 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2045 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2046 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2047 if (i < 2) \
2048 goto invalid_code; \
2049 ADD_COMPOSITION_DATA (charbuf, i, method); \
2050 for (j = 0; j < i; j++) \
2051 *charbuf++ = components[j]; \
df7492f9
KH
2052 } while (0)
2053
2054
2055#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2056 do { \
2057 /* Emacs 20 style format for rule-base composition. */ \
2058 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2059 enum composition_method method = COMPOSITION_WITH_RULE; \
4d41e8b7 2060 int *charbuf_base = charbuf; \
df7492f9
KH
2061 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2062 int *buf = components; \
2063 int i, j; \
4d41e8b7 2064 \
df7492f9 2065 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
4d41e8b7 2066 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
df7492f9 2067 { \
4d41e8b7
KH
2068 if (*src < 0xA0) \
2069 break; \
781d7a48 2070 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2071 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2072 } \
4d41e8b7 2073 if (i <= 1 || (buf - components) % 2 == 0) \
df7492f9 2074 goto invalid_code; \
4d41e8b7 2075 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
df7492f9 2076 goto no_more_source; \
4d41e8b7
KH
2077 ADD_COMPOSITION_DATA (charbuf, i, method); \
2078 i = i * 2 - 1; \
df7492f9
KH
2079 for (j = 0; j < i; j++) \
2080 *charbuf++ = components[j]; \
4d41e8b7 2081 charbuf_base[0] -= i; \
df7492f9
KH
2082 for (j = 0; j < i; j += 2) \
2083 *charbuf++ = components[j]; \
2084 } while (0)
2085
aa72b389
KH
2086
2087static void
df7492f9 2088decode_coding_emacs_mule (coding)
aa72b389 2089 struct coding_system *coding;
aa72b389 2090{
8f924df7
KH
2091 const unsigned char *src = coding->source + coding->consumed;
2092 const unsigned char *src_end = coding->source + coding->src_bytes;
2093 const unsigned char *src_base;
69a80ea3
KH
2094 int *charbuf = coding->charbuf + coding->charbuf_used;
2095 int *charbuf_end
2096 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2097 int consumed_chars = 0, consumed_chars_base;
df7492f9 2098 int multibytep = coding->src_multibyte;
24a73b0a 2099 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2100 int char_offset = coding->produced_char;
2101 int last_offset = char_offset;
2102 int last_id = charset_ascii;
119852e7
KH
2103 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2104 int byte_after_cr = -1;
aa72b389 2105
24a73b0a 2106 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2107
aa72b389
KH
2108 while (1)
2109 {
df7492f9
KH
2110 int c;
2111
aa72b389 2112 src_base = src;
df7492f9
KH
2113 consumed_chars_base = consumed_chars;
2114
2115 if (charbuf >= charbuf_end)
2116 break;
aa72b389 2117
119852e7
KH
2118 if (byte_after_cr >= 0)
2119 c = byte_after_cr, byte_after_cr = -1;
2120 else
2121 ONE_MORE_BYTE (c);
065e3595
KH
2122 if (c < 0)
2123 {
2124 *charbuf++ = -c;
2125 char_offset++;
2126 }
2127 else if (c < 0x80)
aa72b389 2128 {
119852e7
KH
2129 if (eol_crlf && c == '\r')
2130 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
2131 *charbuf++ = c;
2132 char_offset++;
aa72b389 2133 }
df7492f9
KH
2134 else if (c == 0x80)
2135 {
df7492f9 2136 ONE_MORE_BYTE (c);
065e3595
KH
2137 if (c < 0)
2138 goto invalid_code;
781d7a48
KH
2139 if (c - 0xF2 >= COMPOSITION_RELATIVE
2140 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2141 DECODE_EMACS_MULE_21_COMPOSITION (c);
2142 else if (c < 0xC0)
2143 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2144 else if (c == 0xFF)
2145 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2146 else
2147 goto invalid_code;
2148 }
2149 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2150 {
2151 int nbytes, nchars;
ff0dacd7
KH
2152 int id;
2153
781d7a48
KH
2154 src = src_base;
2155 consumed_chars = consumed_chars_base;
ff0dacd7 2156 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2157 if (c < 0)
2158 {
2159 if (c == -2)
2160 break;
2161 goto invalid_code;
2162 }
ff0dacd7
KH
2163 if (last_id != id)
2164 {
2165 if (last_id != charset_ascii)
69a80ea3 2166 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2167 last_id = id;
2168 last_offset = char_offset;
2169 }
df7492f9 2170 *charbuf++ = c;
781d7a48
KH
2171 src += nbytes;
2172 consumed_chars += nchars;
df7492f9
KH
2173 char_offset++;
2174 }
4d41e8b7
KH
2175 else
2176 goto invalid_code;
df7492f9
KH
2177 continue;
2178
2179 invalid_code:
2180 src = src_base;
2181 consumed_chars = consumed_chars_base;
2182 ONE_MORE_BYTE (c);
2183 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2184 char_offset++;
df7492f9
KH
2185 coding->errors++;
2186 }
2187
2188 no_more_source:
ff0dacd7 2189 if (last_id != charset_ascii)
69a80ea3 2190 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2191 coding->consumed_char += consumed_chars_base;
2192 coding->consumed = src_base - coding->source;
2193 coding->charbuf_used = charbuf - coding->charbuf;
2194}
2195
2196
2197#define EMACS_MULE_LEADING_CODES(id, codes) \
2198 do { \
2199 if (id < 0xA0) \
2200 codes[0] = id, codes[1] = 0; \
2201 else if (id < 0xE0) \
2202 codes[0] = 0x9A, codes[1] = id; \
2203 else if (id < 0xF0) \
2204 codes[0] = 0x9B, codes[1] = id; \
2205 else if (id < 0xF5) \
2206 codes[0] = 0x9C, codes[1] = id; \
2207 else \
2208 codes[0] = 0x9D, codes[1] = id; \
2209 } while (0);
2210
aa72b389 2211
df7492f9
KH
2212static int
2213encode_coding_emacs_mule (coding)
2214 struct coding_system *coding;
2215{
2216 int multibytep = coding->dst_multibyte;
2217 int *charbuf = coding->charbuf;
2218 int *charbuf_end = charbuf + coding->charbuf_used;
2219 unsigned char *dst = coding->destination + coding->produced;
2220 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2221 int safe_room = 8;
df7492f9 2222 int produced_chars = 0;
24a73b0a 2223 Lisp_Object attrs, charset_list;
df7492f9 2224 int c;
ff0dacd7 2225 int preferred_charset_id = -1;
df7492f9 2226
24a73b0a 2227 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2228 if (! EQ (charset_list, Vemacs_mule_charset_list))
2229 {
2230 CODING_ATTR_CHARSET_LIST (attrs)
2231 = charset_list = Vemacs_mule_charset_list;
2232 }
df7492f9
KH
2233
2234 while (charbuf < charbuf_end)
2235 {
2236 ASSURE_DESTINATION (safe_room);
2237 c = *charbuf++;
ff0dacd7
KH
2238
2239 if (c < 0)
2240 {
2241 /* Handle an annotation. */
2242 switch (*charbuf)
2243 {
2244 case CODING_ANNOTATE_COMPOSITION_MASK:
2245 /* Not yet implemented. */
2246 break;
2247 case CODING_ANNOTATE_CHARSET_MASK:
2248 preferred_charset_id = charbuf[3];
2249 if (preferred_charset_id >= 0
2250 && NILP (Fmemq (make_number (preferred_charset_id),
2251 charset_list)))
2252 preferred_charset_id = -1;
2253 break;
2254 default:
2255 abort ();
2256 }
2257 charbuf += -c - 1;
2258 continue;
2259 }
2260
df7492f9
KH
2261 if (ASCII_CHAR_P (c))
2262 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2263 else if (CHAR_BYTE8_P (c))
2264 {
2265 c = CHAR_TO_BYTE8 (c);
2266 EMIT_ONE_BYTE (c);
2267 }
df7492f9 2268 else
aa72b389 2269 {
df7492f9
KH
2270 struct charset *charset;
2271 unsigned code;
2272 int dimension;
2273 int emacs_mule_id;
2274 unsigned char leading_codes[2];
2275
ff0dacd7
KH
2276 if (preferred_charset_id >= 0)
2277 {
2278 charset = CHARSET_FROM_ID (preferred_charset_id);
2279 if (! CHAR_CHARSET_P (c, charset))
2280 charset = char_charset (c, charset_list, NULL);
2281 }
2282 else
2283 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2284 if (! charset)
2285 {
2286 c = coding->default_char;
2287 if (ASCII_CHAR_P (c))
2288 {
2289 EMIT_ONE_ASCII_BYTE (c);
2290 continue;
2291 }
2292 charset = char_charset (c, charset_list, &code);
2293 }
2294 dimension = CHARSET_DIMENSION (charset);
2295 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2296 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2297 EMIT_ONE_BYTE (leading_codes[0]);
2298 if (leading_codes[1])
2299 EMIT_ONE_BYTE (leading_codes[1]);
2300 if (dimension == 1)
1fa663f9 2301 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2302 else
df7492f9 2303 {
1fa663f9 2304 code |= 0x8080;
df7492f9
KH
2305 EMIT_ONE_BYTE (code >> 8);
2306 EMIT_ONE_BYTE (code & 0xFF);
2307 }
aa72b389 2308 }
aa72b389 2309 }
065e3595 2310 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2311 coding->produced_char += produced_chars;
2312 coding->produced = dst - coding->destination;
2313 return 0;
aa72b389 2314}
b73bfc1c 2315
4ed46869 2316\f
df7492f9 2317/*** 7. ISO2022 handlers ***/
4ed46869
KH
2318
2319/* The following note describes the coding system ISO2022 briefly.
39787efd 2320 Since the intention of this note is to help understand the
5a936b46 2321 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2322 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2323 original document of ISO2022. This is equivalent to the standard
cfb43547 2324 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2325
2326 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2327 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2328 is encoded using bytes less than 128. This may make the encoded
2329 text a little bit longer, but the text passes more easily through
cfb43547 2330 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2331 Significant Bit).
b73bfc1c 2332
cfb43547
DL
2333 There are two kinds of character sets: control character sets and
2334 graphic character sets. The former contain control characters such
4ed46869 2335 as `newline' and `escape' to provide control functions (control
39787efd 2336 functions are also provided by escape sequences). The latter
cfb43547 2337 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2338 two control character sets and many graphic character sets.
2339
2340 Graphic character sets are classified into one of the following
39787efd
KH
2341 four classes, according to the number of bytes (DIMENSION) and
2342 number of characters in one dimension (CHARS) of the set:
2343 - DIMENSION1_CHARS94
2344 - DIMENSION1_CHARS96
2345 - DIMENSION2_CHARS94
2346 - DIMENSION2_CHARS96
2347
2348 In addition, each character set is assigned an identification tag,
cfb43547 2349 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2350 hereafter). The <F> of each character set is decided by ECMA(*)
2351 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2352 (0x30..0x3F are for private use only).
4ed46869
KH
2353
2354 Note (*): ECMA = European Computer Manufacturers Association
2355
cfb43547 2356 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2357 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2358 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2359 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2360 o DIMENSION2_CHARS96 -- none for the moment
2361
39787efd 2362 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2363 C0 [0x00..0x1F] -- control character plane 0
2364 GL [0x20..0x7F] -- graphic character plane 0
2365 C1 [0x80..0x9F] -- control character plane 1
2366 GR [0xA0..0xFF] -- graphic character plane 1
2367
2368 A control character set is directly designated and invoked to C0 or
39787efd
KH
2369 C1 by an escape sequence. The most common case is that:
2370 - ISO646's control character set is designated/invoked to C0, and
2371 - ISO6429's control character set is designated/invoked to C1,
2372 and usually these designations/invocations are omitted in encoded
2373 text. In a 7-bit environment, only C0 can be used, and a control
2374 character for C1 is encoded by an appropriate escape sequence to
2375 fit into the environment. All control characters for C1 are
2376 defined to have corresponding escape sequences.
4ed46869
KH
2377
2378 A graphic character set is at first designated to one of four
2379 graphic registers (G0 through G3), then these graphic registers are
2380 invoked to GL or GR. These designations and invocations can be
2381 done independently. The most common case is that G0 is invoked to
39787efd
KH
2382 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2383 these invocations and designations are omitted in encoded text.
2384 In a 7-bit environment, only GL can be used.
4ed46869 2385
39787efd
KH
2386 When a graphic character set of CHARS94 is invoked to GL, codes
2387 0x20 and 0x7F of the GL area work as control characters SPACE and
2388 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2389 be used.
4ed46869
KH
2390
2391 There are two ways of invocation: locking-shift and single-shift.
2392 With locking-shift, the invocation lasts until the next different
39787efd
KH
2393 invocation, whereas with single-shift, the invocation affects the
2394 following character only and doesn't affect the locking-shift
2395 state. Invocations are done by the following control characters or
2396 escape sequences:
4ed46869
KH
2397
2398 ----------------------------------------------------------------------
39787efd 2399 abbrev function cntrl escape seq description
4ed46869 2400 ----------------------------------------------------------------------
39787efd
KH
2401 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2402 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2403 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2404 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2405 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2406 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2407 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2408 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2409 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2410 ----------------------------------------------------------------------
39787efd
KH
2411 (*) These are not used by any known coding system.
2412
2413 Control characters for these functions are defined by macros
2414 ISO_CODE_XXX in `coding.h'.
4ed46869 2415
39787efd 2416 Designations are done by the following escape sequences:
4ed46869
KH
2417 ----------------------------------------------------------------------
2418 escape sequence description
2419 ----------------------------------------------------------------------
2420 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2421 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2422 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2423 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2424 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2425 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2426 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2427 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2428 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2429 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2430 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2431 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2432 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2433 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2434 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2435 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2436 ----------------------------------------------------------------------
2437
2438 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2439 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2440
2441 Note (*): Although these designations are not allowed in ISO2022,
2442 Emacs accepts them on decoding, and produces them on encoding
39787efd 2443 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2444 7-bit environment, non-locking-shift, and non-single-shift.
2445
2446 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2447 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2448
cfb43547 2449 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2450 same multilingual text in ISO2022. Actually, there exist many
2451 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2452 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2453 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2454 localized platforms), and all of these are variants of ISO2022.
2455
2456 In addition to the above, Emacs handles two more kinds of escape
2457 sequences: ISO6429's direction specification and Emacs' private
2458 sequence for specifying character composition.
2459
39787efd 2460 ISO6429's direction specification takes the following form:
4ed46869
KH
2461 o CSI ']' -- end of the current direction
2462 o CSI '0' ']' -- end of the current direction
2463 o CSI '1' ']' -- start of left-to-right text
2464 o CSI '2' ']' -- start of right-to-left text
2465 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2466 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2467
2468 Character composition specification takes the following form:
ec6d2bb8
KH
2469 o ESC '0' -- start relative composition
2470 o ESC '1' -- end composition
2471 o ESC '2' -- start rule-base composition (*)
2472 o ESC '3' -- start relative composition with alternate chars (**)
2473 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2474 Since these are not standard escape sequences of any ISO standard,
cfb43547 2475 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2476
5a936b46
DL
2477 (*) This form is used only in Emacs 20.7 and older versions,
2478 but newer versions can safely decode it.
cfb43547 2479 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2480 and older versions can't decode it.
ec6d2bb8 2481
cfb43547 2482 Here's a list of example usages of these composition escape
b73bfc1c 2483 sequences (categorized by `enum composition_method').
ec6d2bb8 2484
b73bfc1c 2485 COMPOSITION_RELATIVE:
ec6d2bb8 2486 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2487 COMPOSITION_WITH_RULE:
ec6d2bb8 2488 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2489 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2490 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2491 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2492 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2493
2494enum iso_code_class_type iso_code_class[256];
2495
df7492f9
KH
2496#define SAFE_CHARSET_P(coding, id) \
2497 ((id) <= (coding)->max_charset_id \
2498 && (coding)->safe_charsets[id] >= 0)
2499
2500
2501#define SHIFT_OUT_OK(category) \
2502 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2503
2504static void
f0064e1f
DL
2505setup_iso_safe_charsets (attrs)
2506 Lisp_Object attrs;
df7492f9
KH
2507{
2508 Lisp_Object charset_list, safe_charsets;
2509 Lisp_Object request;
2510 Lisp_Object reg_usage;
2511 Lisp_Object tail;
2512 int reg94, reg96;
2513 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2514 int max_charset_id;
2515
2516 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2517 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2518 && ! EQ (charset_list, Viso_2022_charset_list))
2519 {
2520 CODING_ATTR_CHARSET_LIST (attrs)
2521 = charset_list = Viso_2022_charset_list;
2522 ASET (attrs, coding_attr_safe_charsets, Qnil);
2523 }
2524
2525 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2526 return;
2527
2528 max_charset_id = 0;
2529 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2530 {
2531 int id = XINT (XCAR (tail));
2532 if (max_charset_id < id)
2533 max_charset_id = id;
2534 }
d46c5b12 2535
df7492f9
KH
2536 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2537 make_number (255));
2538 request = AREF (attrs, coding_attr_iso_request);
2539 reg_usage = AREF (attrs, coding_attr_iso_usage);
2540 reg94 = XINT (XCAR (reg_usage));
2541 reg96 = XINT (XCDR (reg_usage));
2542
2543 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2544 {
2545 Lisp_Object id;
2546 Lisp_Object reg;
2547 struct charset *charset;
2548
2549 id = XCAR (tail);
2550 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2551 reg = Fcdr (Fassq (id, request));
df7492f9 2552 if (! NILP (reg))
8f924df7 2553 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2554 else if (charset->iso_chars_96)
2555 {
2556 if (reg96 < 4)
8f924df7 2557 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2558 }
2559 else
2560 {
2561 if (reg94 < 4)
8f924df7 2562 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2563 }
2564 }
2565 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2566}
d46c5b12 2567
b6871cc7 2568
4ed46869 2569/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2570 Check if a text is encoded in one of ISO-2022 based codig systems.
2571 If it is, return 1, else return 0. */
4ed46869 2572
0a28aafb 2573static int
ff0dacd7 2574detect_coding_iso_2022 (coding, detect_info)
df7492f9 2575 struct coding_system *coding;
ff0dacd7 2576 struct coding_detection_info *detect_info;
4ed46869 2577{
8f924df7
KH
2578 const unsigned char *src = coding->source, *src_base = src;
2579 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2580 int multibytep = coding->src_multibyte;
ff0dacd7 2581 int single_shifting = 0;
df7492f9
KH
2582 int id;
2583 int c, c1;
2584 int consumed_chars = 0;
2585 int i;
ff0dacd7
KH
2586 int rejected = 0;
2587 int found = 0;
2588
2589 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2590
2591 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2592 {
2593 struct coding_system *this = &(coding_categories[i]);
2594 Lisp_Object attrs, val;
2595
2596 attrs = CODING_ID_ATTRS (this->id);
2597 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2598 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2599 setup_iso_safe_charsets (attrs);
2600 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2601 this->max_charset_id = SCHARS (val) - 1;
2602 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2603 }
2604
2605 /* A coding system of this category is always ASCII compatible. */
2606 src += coding->head_ascii;
3f003981 2607
ff0dacd7 2608 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2609 {
065e3595 2610 src_base = src;
df7492f9 2611 ONE_MORE_BYTE (c);
4ed46869
KH
2612 switch (c)
2613 {
2614 case ISO_CODE_ESC:
74383408
KH
2615 if (inhibit_iso_escape_detection)
2616 break;
f46869e4 2617 single_shifting = 0;
df7492f9 2618 ONE_MORE_BYTE (c);
d46c5b12 2619 if (c >= '(' && c <= '/')
4ed46869 2620 {
bf9cdd4e 2621 /* Designation sequence for a charset of dimension 1. */
df7492f9 2622 ONE_MORE_BYTE (c1);
d46c5b12 2623 if (c1 < ' ' || c1 >= 0x80
df7492f9 2624 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2625 /* Invalid designation sequence. Just ignore. */
2626 break;
bf9cdd4e
KH
2627 }
2628 else if (c == '$')
2629 {
2630 /* Designation sequence for a charset of dimension 2. */
df7492f9 2631 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2632 if (c >= '@' && c <= 'B')
2633 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2634 id = iso_charset_table[1][0][c];
bf9cdd4e 2635 else if (c >= '(' && c <= '/')
bcf26d6a 2636 {
df7492f9 2637 ONE_MORE_BYTE (c1);
d46c5b12 2638 if (c1 < ' ' || c1 >= 0x80
df7492f9 2639 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2640 /* Invalid designation sequence. Just ignore. */
2641 break;
bcf26d6a 2642 }
bf9cdd4e 2643 else
ff0dacd7 2644 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2645 break;
2646 }
ae9ff118 2647 else if (c == 'N' || c == 'O')
d46c5b12 2648 {
ae9ff118 2649 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2650 single_shifting = 1;
2651 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2652 break;
4ed46869 2653 }
ec6d2bb8
KH
2654 else if (c >= '0' && c <= '4')
2655 {
2656 /* ESC <Fp> for start/end composition. */
ff0dacd7 2657 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2658 break;
2659 }
bf9cdd4e 2660 else
df7492f9 2661 {
ff0dacd7 2662 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2663 break;
2664 }
d46c5b12
KH
2665
2666 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2667 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2668 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2669 id))
ff0dacd7 2670 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2671 else
ff0dacd7 2672 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2673 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2674 id))
ff0dacd7 2675 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2676 else
ff0dacd7 2677 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2678 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2679 id))
ff0dacd7 2680 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2681 else
ff0dacd7 2682 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2683 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2684 id))
ff0dacd7 2685 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2686 else
ff0dacd7 2687 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2688 break;
2689
4ed46869 2690 case ISO_CODE_SO:
d46c5b12 2691 case ISO_CODE_SI:
ff0dacd7 2692 /* Locking shift out/in. */
74383408
KH
2693 if (inhibit_iso_escape_detection)
2694 break;
f46869e4 2695 single_shifting = 0;
ff0dacd7 2696 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
2697 break;
2698
4ed46869 2699 case ISO_CODE_CSI:
ff0dacd7 2700 /* Control sequence introducer. */
f46869e4 2701 single_shifting = 0;
ff0dacd7
KH
2702 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2703 found |= CATEGORY_MASK_ISO_8_ELSE;
2704 goto check_extra_latin;
2705
4ed46869
KH
2706 case ISO_CODE_SS2:
2707 case ISO_CODE_SS3:
ff0dacd7
KH
2708 /* Single shift. */
2709 if (inhibit_iso_escape_detection)
2710 break;
75e2a253 2711 single_shifting = 0;
ff0dacd7
KH
2712 rejected |= CATEGORY_MASK_ISO_7BIT;
2713 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2714 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2715 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2716 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2717 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2718 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2719 if (single_shifting)
2720 break;
ff0dacd7 2721 goto check_extra_latin;
4ed46869
KH
2722
2723 default:
065e3595
KH
2724 if (c < 0)
2725 continue;
4ed46869 2726 if (c < 0x80)
f46869e4
KH
2727 {
2728 single_shifting = 0;
2729 break;
2730 }
ff0dacd7 2731 if (c >= 0xA0)
c4825358 2732 {
ff0dacd7
KH
2733 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2734 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2735 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2736 0xA0..0FF. If the byte length is even, we include
2737 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2738 only when we are not single shifting. */
2739 if (! single_shifting
2740 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2741 {
e17de821 2742 int i = 1;
b73bfc1c
KH
2743 while (src < src_end)
2744 {
df7492f9 2745 ONE_MORE_BYTE (c);
b73bfc1c
KH
2746 if (c < 0xA0)
2747 break;
2748 i++;
2749 }
2750
2751 if (i & 1 && src < src_end)
ff0dacd7 2752 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2753 else
ff0dacd7 2754 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2755 }
ff0dacd7 2756 break;
4ed46869 2757 }
ff0dacd7
KH
2758 check_extra_latin:
2759 single_shifting = 0;
2760 if (! VECTORP (Vlatin_extra_code_table)
2761 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2762 {
2763 rejected = CATEGORY_MASK_ISO;
2764 break;
2765 }
2766 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2767 & CODING_ISO_FLAG_LATIN_EXTRA)
2768 found |= CATEGORY_MASK_ISO_8_1;
2769 else
2770 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2771 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2772 }
2773 }
ff0dacd7
KH
2774 detect_info->rejected |= CATEGORY_MASK_ISO;
2775 return 0;
4ed46869 2776
df7492f9 2777 no_more_source:
ff0dacd7
KH
2778 detect_info->rejected |= rejected;
2779 detect_info->found |= (found & ~rejected);
df7492f9 2780 return 1;
4ed46869 2781}
ec6d2bb8 2782
4ed46869 2783
134b9549
KH
2784/* Set designation state into CODING. Set CHARS_96 to -1 if the
2785 escape sequence should be kept. */
df7492f9
KH
2786#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2787 do { \
2788 int id, prev; \
2789 \
2790 if (final < '0' || final >= 128 \
2791 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2792 || !SAFE_CHARSET_P (coding, id)) \
2793 { \
2794 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2795 chars_96 = -1; \
2796 break; \
df7492f9
KH
2797 } \
2798 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2799 if (id == charset_jisx0201_roman) \
2800 { \
2801 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2802 id = charset_ascii; \
2803 } \
2804 else if (id == charset_jisx0208_1978) \
2805 { \
2806 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2807 id = charset_jisx0208; \
2808 } \
df7492f9
KH
2809 CODING_ISO_DESIGNATION (coding, reg) = id; \
2810 /* If there was an invalid designation to REG previously, and this \
2811 designation is ASCII to REG, we should keep this designation \
2812 sequence. */ \
2813 if (prev == -2 && id == charset_ascii) \
134b9549 2814 chars_96 = -1; \
4ed46869
KH
2815 } while (0)
2816
d46c5b12 2817
df7492f9
KH
2818#define MAYBE_FINISH_COMPOSITION() \
2819 do { \
2820 int i; \
2821 if (composition_state == COMPOSING_NO) \
2822 break; \
2823 /* It is assured that we have enough room for producing \
2824 characters stored in the table `components'. */ \
2825 if (charbuf + component_idx > charbuf_end) \
2826 goto no_more_source; \
2827 composition_state = COMPOSING_NO; \
2828 if (method == COMPOSITION_RELATIVE \
2829 || method == COMPOSITION_WITH_ALTCHARS) \
2830 { \
2831 for (i = 0; i < component_idx; i++) \
2832 *charbuf++ = components[i]; \
2833 char_offset += component_idx; \
2834 } \
2835 else \
2836 { \
2837 for (i = 0; i < component_idx; i += 2) \
2838 *charbuf++ = components[i]; \
2839 char_offset += (component_idx / 2) + 1; \
2840 } \
2841 } while (0)
2842
d46c5b12 2843
aa72b389
KH
2844/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2845 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2846 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2847 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2848 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2849 */
ec6d2bb8 2850
df7492f9
KH
2851#define DECODE_COMPOSITION_START(c1) \
2852 do { \
2853 if (c1 == '0' \
781d7a48 2854 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2855 { \
2856 component_len = component_idx; \
2857 composition_state = COMPOSING_CHAR; \
2858 } \
2859 else \
2860 { \
8f924df7 2861 const unsigned char *p; \
df7492f9
KH
2862 \
2863 MAYBE_FINISH_COMPOSITION (); \
2864 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2865 goto no_more_source; \
2866 for (p = src; p < src_end - 1; p++) \
2867 if (*p == ISO_CODE_ESC && p[1] == '1') \
2868 break; \
2869 if (p == src_end - 1) \
2870 { \
9286b333
KH
2871 /* The current composition doesn't end in the current \
2872 source. */ \
2873 record_conversion_result \
2874 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
df7492f9
KH
2875 goto no_more_source; \
2876 } \
2877 \
2878 /* This is surely the start of a composition. */ \
2879 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2880 : c1 == '2' ? COMPOSITION_WITH_RULE \
2881 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2882 : COMPOSITION_WITH_RULE_ALTCHARS); \
2883 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2884 : COMPOSING_COMPONENT_CHAR); \
2885 component_idx = component_len = 0; \
2886 } \
ec6d2bb8
KH
2887 } while (0)
2888
ec6d2bb8 2889
df7492f9
KH
2890/* Handle compositoin end sequence ESC 1. */
2891
2892#define DECODE_COMPOSITION_END() \
ec6d2bb8 2893 do { \
df7492f9
KH
2894 int nchars = (component_len > 0 ? component_idx - component_len \
2895 : method == COMPOSITION_RELATIVE ? component_idx \
2896 : (component_idx + 1) / 2); \
2897 int i; \
2898 int *saved_charbuf = charbuf; \
2899 \
69a80ea3 2900 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2901 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2902 { \
df7492f9
KH
2903 if (component_len == 0) \
2904 for (i = 0; i < component_idx; i++) \
2905 *charbuf++ = components[i]; \
2906 else \
2907 for (i = 0; i < component_len; i++) \
2908 *charbuf++ = components[i]; \
2909 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2910 } \
df7492f9
KH
2911 if (method == COMPOSITION_WITH_RULE) \
2912 for (i = 0; i < component_idx; i += 2, char_offset++) \
2913 *charbuf++ = components[i]; \
ec6d2bb8 2914 else \
df7492f9
KH
2915 for (i = component_len; i < component_idx; i++, char_offset++) \
2916 *charbuf++ = components[i]; \
2917 coding->annotated = 1; \
2918 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2919 } while (0)
2920
df7492f9 2921
ec6d2bb8
KH
2922/* Decode a composition rule from the byte C1 (and maybe one more byte
2923 from SRC) and store one encoded composition rule in
2924 coding->cmp_data. */
2925
2926#define DECODE_COMPOSITION_RULE(c1) \
2927 do { \
ec6d2bb8
KH
2928 (c1) -= 32; \
2929 if (c1 < 81) /* old format (before ver.21) */ \
2930 { \
2931 int gref = (c1) / 9; \
2932 int nref = (c1) % 9; \
2933 if (gref == 4) gref = 10; \
2934 if (nref == 4) nref = 10; \
df7492f9 2935 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2936 } \
b73bfc1c 2937 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2938 { \
2939 ONE_MORE_BYTE (c2); \
df7492f9 2940 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2941 } \
df7492f9
KH
2942 else \
2943 c1 = 0; \
ec6d2bb8 2944 } while (0)
88993dfd 2945
d46c5b12 2946
4ed46869
KH
2947/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2948
b73bfc1c 2949static void
df7492f9 2950decode_coding_iso_2022 (coding)
4ed46869 2951 struct coding_system *coding;
4ed46869 2952{
8f924df7
KH
2953 const unsigned char *src = coding->source + coding->consumed;
2954 const unsigned char *src_end = coding->source + coding->src_bytes;
2955 const unsigned char *src_base;
69a80ea3 2956 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 2957 int *charbuf_end
69a80ea3 2958 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2959 int consumed_chars = 0, consumed_chars_base;
df7492f9 2960 int multibytep = coding->src_multibyte;
4ed46869 2961 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2962 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2963 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 2964 int charset_id_2, charset_id_3;
df7492f9
KH
2965 struct charset *charset;
2966 int c;
2967 /* For handling composition sequence. */
2968#define COMPOSING_NO 0
2969#define COMPOSING_CHAR 1
2970#define COMPOSING_RULE 2
2971#define COMPOSING_COMPONENT_CHAR 3
2972#define COMPOSING_COMPONENT_RULE 4
2973
2974 int composition_state = COMPOSING_NO;
2975 enum composition_method method;
2976 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2977 int component_idx;
2978 int component_len;
24a73b0a 2979 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2980 int char_offset = coding->produced_char;
2981 int last_offset = char_offset;
2982 int last_id = charset_ascii;
119852e7
KH
2983 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2984 int byte_after_cr = -1;
df7492f9 2985
24a73b0a 2986 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 2987 setup_iso_safe_charsets (attrs);
287c57d7
KH
2988 /* Charset list may have been changed. */
2989 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2990 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
b73bfc1c
KH
2991
2992 while (1)
4ed46869 2993 {
463f5630 2994 int c1, c2;
b73bfc1c
KH
2995
2996 src_base = src;
df7492f9
KH
2997 consumed_chars_base = consumed_chars;
2998
2999 if (charbuf >= charbuf_end)
3000 break;
3001
119852e7
KH
3002 if (byte_after_cr >= 0)
3003 c1 = byte_after_cr, byte_after_cr = -1;
3004 else
3005 ONE_MORE_BYTE (c1);
065e3595
KH
3006 if (c1 < 0)
3007 goto invalid_code;
4ed46869 3008
98725083 3009 /* We produce at most one character. */
4ed46869
KH
3010 switch (iso_code_class [c1])
3011 {
3012 case ISO_0x20_or_0x7F:
df7492f9 3013 if (composition_state != COMPOSING_NO)
ec6d2bb8 3014 {
df7492f9
KH
3015 if (composition_state == COMPOSING_RULE
3016 || composition_state == COMPOSING_COMPONENT_RULE)
3017 {
3018 DECODE_COMPOSITION_RULE (c1);
3019 components[component_idx++] = c1;
3020 composition_state--;
3021 continue;
3022 }
4ed46869 3023 }
df7492f9
KH
3024 if (charset_id_0 < 0
3025 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3026 /* This is SPACE or DEL. */
3027 charset = CHARSET_FROM_ID (charset_ascii);
3028 else
3029 charset = CHARSET_FROM_ID (charset_id_0);
3030 break;
4ed46869
KH
3031
3032 case ISO_graphic_plane_0:
781d7a48 3033 if (composition_state != COMPOSING_NO)
b73bfc1c 3034 {
781d7a48
KH
3035 if (composition_state == COMPOSING_RULE
3036 || composition_state == COMPOSING_COMPONENT_RULE)
3037 {
3038 DECODE_COMPOSITION_RULE (c1);
3039 components[component_idx++] = c1;
3040 composition_state--;
3041 continue;
3042 }
b73bfc1c 3043 }
134b9549
KH
3044 if (charset_id_0 < 0)
3045 charset = CHARSET_FROM_ID (charset_ascii);
3046 else
3047 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3048 break;
3049
3050 case ISO_0xA0_or_0xFF:
df7492f9
KH
3051 if (charset_id_1 < 0
3052 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3053 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3054 goto invalid_code;
4ed46869
KH
3055 /* This is a graphic character, we fall down ... */
3056
3057 case ISO_graphic_plane_1:
df7492f9
KH
3058 if (charset_id_1 < 0)
3059 goto invalid_code;
3060 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3061 break;
3062
df7492f9 3063 case ISO_control_0:
119852e7
KH
3064 if (eol_crlf && c1 == '\r')
3065 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3066 MAYBE_FINISH_COMPOSITION ();
3067 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3068 break;
3069
df7492f9
KH
3070 case ISO_control_1:
3071 MAYBE_FINISH_COMPOSITION ();
3072 goto invalid_code;
3073
4ed46869 3074 case ISO_shift_out:
df7492f9
KH
3075 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3076 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3077 goto invalid_code;
3078 CODING_ISO_INVOCATION (coding, 0) = 1;
3079 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3080 continue;
4ed46869
KH
3081
3082 case ISO_shift_in:
df7492f9
KH
3083 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3084 goto invalid_code;
3085 CODING_ISO_INVOCATION (coding, 0) = 0;
3086 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3087 continue;
4ed46869
KH
3088
3089 case ISO_single_shift_2_7:
3090 case ISO_single_shift_2:
df7492f9
KH
3091 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3092 goto invalid_code;
4ed46869
KH
3093 /* SS2 is handled as an escape sequence of ESC 'N' */
3094 c1 = 'N';
3095 goto label_escape_sequence;
3096
3097 case ISO_single_shift_3:
df7492f9
KH
3098 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3099 goto invalid_code;
4ed46869
KH
3100 /* SS2 is handled as an escape sequence of ESC 'O' */
3101 c1 = 'O';
3102 goto label_escape_sequence;
3103
3104 case ISO_control_sequence_introducer:
3105 /* CSI is handled as an escape sequence of ESC '[' ... */
3106 c1 = '[';
3107 goto label_escape_sequence;
3108
3109 case ISO_escape:
3110 ONE_MORE_BYTE (c1);
3111 label_escape_sequence:
df7492f9 3112 /* Escape sequences handled here are invocation,
4ed46869
KH
3113 designation, direction specification, and character
3114 composition specification. */
3115 switch (c1)
3116 {
3117 case '&': /* revision of following character set */
3118 ONE_MORE_BYTE (c1);
3119 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3120 goto invalid_code;
4ed46869
KH
3121 ONE_MORE_BYTE (c1);
3122 if (c1 != ISO_CODE_ESC)
df7492f9 3123 goto invalid_code;
4ed46869
KH
3124 ONE_MORE_BYTE (c1);
3125 goto label_escape_sequence;
3126
3127 case '$': /* designation of 2-byte character set */
df7492f9
KH
3128 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3129 goto invalid_code;
134b9549
KH
3130 {
3131 int reg, chars96;
3132
3133 ONE_MORE_BYTE (c1);
3134 if (c1 >= '@' && c1 <= 'B')
3135 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3136 or JISX0208.1980 */
134b9549
KH
3137 reg = 0, chars96 = 0;
3138 }
3139 else if (c1 >= 0x28 && c1 <= 0x2B)
3140 { /* designation of DIMENSION2_CHARS94 character set */
3141 reg = c1 - 0x28, chars96 = 0;
3142 ONE_MORE_BYTE (c1);
3143 }
3144 else if (c1 >= 0x2C && c1 <= 0x2F)
3145 { /* designation of DIMENSION2_CHARS96 character set */
3146 reg = c1 - 0x2C, chars96 = 1;
3147 ONE_MORE_BYTE (c1);
3148 }
3149 else
3150 goto invalid_code;
3151 DECODE_DESIGNATION (reg, 2, chars96, c1);
3152 /* We must update these variables now. */
3153 if (reg == 0)
3154 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3155 else if (reg == 1)
3156 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3157 if (chars96 < 0)
3158 goto invalid_code;
3159 }
b73bfc1c 3160 continue;
4ed46869
KH
3161
3162 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3163 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3164 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3165 goto invalid_code;
3166 CODING_ISO_INVOCATION (coding, 0) = 2;
3167 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3168 continue;
4ed46869
KH
3169
3170 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3171 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3172 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3173 goto invalid_code;
3174 CODING_ISO_INVOCATION (coding, 0) = 3;
3175 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3176 continue;
4ed46869
KH
3177
3178 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3179 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3180 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3181 goto invalid_code;
134b9549
KH
3182 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3183 if (charset_id_2 < 0)
3184 charset = CHARSET_FROM_ID (charset_ascii);
3185 else
3186 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3187 ONE_MORE_BYTE (c1);
e7046a18 3188 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3189 goto invalid_code;
4ed46869
KH
3190 break;
3191
3192 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3193 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3194 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3195 goto invalid_code;
134b9549
KH
3196 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3197 if (charset_id_3 < 0)
3198 charset = CHARSET_FROM_ID (charset_ascii);
3199 else
3200 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3201 ONE_MORE_BYTE (c1);
e7046a18 3202 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3203 goto invalid_code;
4ed46869
KH
3204 break;
3205
ec6d2bb8 3206 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3207 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3208 goto invalid_code;
ec6d2bb8 3209 DECODE_COMPOSITION_START (c1);
b73bfc1c 3210 continue;
4ed46869 3211
ec6d2bb8 3212 case '1': /* end composition */
df7492f9
KH
3213 if (composition_state == COMPOSING_NO)
3214 goto invalid_code;
3215 DECODE_COMPOSITION_END ();
b73bfc1c 3216 continue;
4ed46869
KH
3217
3218 case '[': /* specification of direction */
df7492f9
KH
3219 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3220 goto invalid_code;
4ed46869 3221 /* For the moment, nested direction is not supported.
d46c5b12 3222 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3223 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3224 ONE_MORE_BYTE (c1);
3225 switch (c1)
3226 {
3227 case ']': /* end of the current direction */
d46c5b12 3228 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3229
3230 case '0': /* end of the current direction */
3231 case '1': /* start of left-to-right direction */
3232 ONE_MORE_BYTE (c1);
3233 if (c1 == ']')
d46c5b12 3234 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3235 else
df7492f9 3236 goto invalid_code;
4ed46869
KH
3237 break;
3238
3239 case '2': /* start of right-to-left direction */
3240 ONE_MORE_BYTE (c1);
3241 if (c1 == ']')
d46c5b12 3242 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3243 else
df7492f9 3244 goto invalid_code;
4ed46869
KH
3245 break;
3246
3247 default:
df7492f9 3248 goto invalid_code;
4ed46869 3249 }
b73bfc1c 3250 continue;
4ed46869 3251
103e0180 3252 case '%':
103e0180
KH
3253 ONE_MORE_BYTE (c1);
3254 if (c1 == '/')
3255 {
3256 /* CTEXT extended segment:
3257 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3258 We keep these bytes as is for the moment.
3259 They may be decoded by post-read-conversion. */
3260 int dim, M, L;
4776e638 3261 int size;
8f924df7 3262
103e0180
KH
3263 ONE_MORE_BYTE (dim);
3264 ONE_MORE_BYTE (M);
3265 ONE_MORE_BYTE (L);
3266 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3267 if (charbuf + 8 + size > charbuf_end)
3268 goto break_loop;
3269 *charbuf++ = ISO_CODE_ESC;
3270 *charbuf++ = '%';
3271 *charbuf++ = '/';
3272 *charbuf++ = dim;
3273 *charbuf++ = BYTE8_TO_CHAR (M);
3274 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3275 while (size-- > 0)
3276 {
3277 ONE_MORE_BYTE (c1);
4776e638 3278 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3279 }
103e0180
KH
3280 }
3281 else if (c1 == 'G')
3282 {
103e0180
KH
3283 /* XFree86 extension for embedding UTF-8 in CTEXT:
3284 ESC % G --UTF-8-BYTES-- ESC % @
3285 We keep these bytes as is for the moment.
3286 They may be decoded by post-read-conversion. */
4776e638
KH
3287 int *p = charbuf;
3288
3289 if (p + 6 > charbuf_end)
3290 goto break_loop;
3291 *p++ = ISO_CODE_ESC;
3292 *p++ = '%';
3293 *p++ = 'G';
3294 while (p < charbuf_end)
103e0180
KH
3295 {
3296 ONE_MORE_BYTE (c1);
3297 if (c1 == ISO_CODE_ESC
3298 && src + 1 < src_end
3299 && src[0] == '%'
3300 && src[1] == '@')
9ffd559c
KH
3301 {
3302 src += 2;
3303 break;
3304 }
4776e638 3305 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3306 }
4776e638
KH
3307 if (p + 3 > charbuf_end)
3308 goto break_loop;
3309 *p++ = ISO_CODE_ESC;
3310 *p++ = '%';
3311 *p++ = '@';
3312 charbuf = p;
103e0180
KH
3313 }
3314 else
4776e638 3315 goto invalid_code;
103e0180 3316 continue;
4776e638 3317 break;
103e0180 3318
4ed46869 3319 default:
df7492f9
KH
3320 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3321 goto invalid_code;
134b9549
KH
3322 {
3323 int reg, chars96;
3324
3325 if (c1 >= 0x28 && c1 <= 0x2B)
3326 { /* designation of DIMENSION1_CHARS94 character set */
3327 reg = c1 - 0x28, chars96 = 0;
3328 ONE_MORE_BYTE (c1);
3329 }
3330 else if (c1 >= 0x2C && c1 <= 0x2F)
3331 { /* designation of DIMENSION1_CHARS96 character set */
3332 reg = c1 - 0x2C, chars96 = 1;
3333 ONE_MORE_BYTE (c1);
3334 }
3335 else
3336 goto invalid_code;
3337 DECODE_DESIGNATION (reg, 1, chars96, c1);
3338 /* We must update these variables now. */
3339 if (reg == 0)
3340 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3341 else if (reg == 1)
3342 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3343 if (chars96 < 0)
3344 goto invalid_code;
3345 }
b73bfc1c 3346 continue;
4ed46869 3347 }
b73bfc1c 3348 }
4ed46869 3349
ff0dacd7
KH
3350 if (charset->id != charset_ascii
3351 && last_id != charset->id)
3352 {
3353 if (last_id != charset_ascii)
69a80ea3 3354 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3355 last_id = charset->id;
3356 last_offset = char_offset;
3357 }
3358
b73bfc1c 3359 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3360 Produce a decoded character while getting 2nd position code
3361 C2 if necessary. */
3362 c1 &= 0x7F;
3363 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3364 {
3365 ONE_MORE_BYTE (c2);
df7492f9 3366 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3367 /* C2 is not in a valid range. */
df7492f9
KH
3368 goto invalid_code;
3369 c1 = (c1 << 8) | (c2 & 0x7F);
3370 if (CHARSET_DIMENSION (charset) > 2)
3371 {
3372 ONE_MORE_BYTE (c2);
3373 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3374 /* C2 is not in a valid range. */
3375 goto invalid_code;
3376 c1 = (c1 << 8) | (c2 & 0x7F);
3377 }
3378 }
3379
3380 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3381 if (c < 0)
3382 {
3383 MAYBE_FINISH_COMPOSITION ();
3384 for (; src_base < src; src_base++, char_offset++)
3385 {
3386 if (ASCII_BYTE_P (*src_base))
3387 *charbuf++ = *src_base;
3388 else
3389 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3390 }
3391 }
3392 else if (composition_state == COMPOSING_NO)
3393 {
3394 *charbuf++ = c;
3395 char_offset++;
4ed46869 3396 }
df7492f9 3397 else
781d7a48
KH
3398 {
3399 components[component_idx++] = c;
3400 if (method == COMPOSITION_WITH_RULE
3401 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3402 && composition_state == COMPOSING_COMPONENT_CHAR))
3403 composition_state++;
4ed46869
KH
3404 }
3405 continue;
3406
df7492f9
KH
3407 invalid_code:
3408 MAYBE_FINISH_COMPOSITION ();
4ed46869 3409 src = src_base;
df7492f9
KH
3410 consumed_chars = consumed_chars_base;
3411 ONE_MORE_BYTE (c);
065e3595 3412 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3413 char_offset++;
df7492f9 3414 coding->errors++;
4776e638
KH
3415 continue;
3416
3417 break_loop:
3418 break;
4ed46869 3419 }
fb88bf2d 3420
df7492f9 3421 no_more_source:
ff0dacd7 3422 if (last_id != charset_ascii)
69a80ea3 3423 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3424 coding->consumed_char += consumed_chars_base;
3425 coding->consumed = src_base - coding->source;
3426 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3427}
3428
b73bfc1c 3429
f4dee582 3430/* ISO2022 encoding stuff. */
4ed46869
KH
3431
3432/*
f4dee582 3433 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3434 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3435 variant has the following specifications:
df7492f9 3436 1. Initial designation to G0 thru G3.
4ed46869
KH
3437 2. Allows short-form designation?
3438 3. ASCII should be designated to G0 before control characters?
3439 4. ASCII should be designated to G0 at end of line?
3440 5. 7-bit environment or 8-bit environment?
3441 6. Use locking-shift?
3442 7. Use Single-shift?
3443 And the following two are only for Japanese:
3444 8. Use ASCII in place of JIS0201-1976-Roman?
3445 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3446 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3447 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3448 details.
4ed46869
KH
3449*/
3450
3451/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3452 register REG at DST, and increment DST. If <final-char> of CHARSET is
3453 '@', 'A', or 'B' and the coding system CODING allows, produce
3454 designation sequence of short-form. */
4ed46869
KH
3455
3456#define ENCODE_DESIGNATION(charset, reg, coding) \
3457 do { \
df7492f9 3458 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3459 char *intermediate_char_94 = "()*+"; \
3460 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3461 int revision = -1; \
3462 int c; \
3463 \
3464 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3465 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3466 \
3467 if (revision >= 0) \
70c22245 3468 { \
df7492f9
KH
3469 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3470 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3471 } \
df7492f9 3472 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3473 if (CHARSET_DIMENSION (charset) == 1) \
3474 { \
df7492f9
KH
3475 if (! CHARSET_ISO_CHARS_96 (charset)) \
3476 c = intermediate_char_94[reg]; \
4ed46869 3477 else \
df7492f9
KH
3478 c = intermediate_char_96[reg]; \
3479 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3480 } \
3481 else \
3482 { \
df7492f9
KH
3483 EMIT_ONE_ASCII_BYTE ('$'); \
3484 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3485 { \
df7492f9 3486 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3487 || reg != 0 \
3488 || final_char < '@' || final_char > 'B') \
df7492f9 3489 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3490 } \
3491 else \
df7492f9 3492 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3493 } \
df7492f9
KH
3494 EMIT_ONE_ASCII_BYTE (final_char); \
3495 \
3496 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3497 } while (0)
3498
df7492f9 3499
4ed46869
KH
3500/* The following two macros produce codes (control character or escape
3501 sequence) for ISO2022 single-shift functions (single-shift-2 and
3502 single-shift-3). */
3503
df7492f9
KH
3504#define ENCODE_SINGLE_SHIFT_2 \
3505 do { \
3506 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3507 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3508 else \
3509 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3510 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3511 } while (0)
3512
df7492f9
KH
3513
3514#define ENCODE_SINGLE_SHIFT_3 \
3515 do { \
3516 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3517 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3518 else \
3519 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3520 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3521 } while (0)
3522
df7492f9 3523
4ed46869
KH
3524/* The following four macros produce codes (control character or
3525 escape sequence) for ISO2022 locking-shift functions (shift-in,
3526 shift-out, locking-shift-2, and locking-shift-3). */
3527
df7492f9
KH
3528#define ENCODE_SHIFT_IN \
3529 do { \
3530 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3531 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3532 } while (0)
3533
df7492f9
KH
3534
3535#define ENCODE_SHIFT_OUT \
3536 do { \
3537 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3538 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3539 } while (0)
3540
df7492f9
KH
3541
3542#define ENCODE_LOCKING_SHIFT_2 \
3543 do { \
3544 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3545 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3546 } while (0)
3547
df7492f9
KH
3548
3549#define ENCODE_LOCKING_SHIFT_3 \
3550 do { \
3551 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3552 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3553 } while (0)
3554
df7492f9 3555
f4dee582
RS
3556/* Produce codes for a DIMENSION1 character whose character set is
3557 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3558 sequences are also produced in advance if necessary. */
3559
6e85d753
KH
3560#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3561 do { \
df7492f9 3562 int id = CHARSET_ID (charset); \
bf16eb23
KH
3563 \
3564 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3565 && id == charset_ascii) \
3566 { \
3567 id = charset_jisx0201_roman; \
3568 charset = CHARSET_FROM_ID (id); \
3569 } \
3570 \
df7492f9 3571 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3572 { \
df7492f9
KH
3573 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3574 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3575 else \
df7492f9
KH
3576 EMIT_ONE_BYTE (c1 | 0x80); \
3577 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3578 break; \
3579 } \
df7492f9 3580 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3581 { \
df7492f9 3582 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3583 break; \
3584 } \
df7492f9 3585 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3586 { \
df7492f9 3587 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3588 break; \
3589 } \
6e85d753
KH
3590 else \
3591 /* Since CHARSET is not yet invoked to any graphic planes, we \
3592 must invoke it, or, at first, designate it to some graphic \
3593 register. Then repeat the loop to actually produce the \
3594 character. */ \
df7492f9
KH
3595 dst = encode_invocation_designation (charset, coding, dst, \
3596 &produced_chars); \
4ed46869
KH
3597 } while (1)
3598
df7492f9 3599
f4dee582
RS
3600/* Produce codes for a DIMENSION2 character whose character set is
3601 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3602 invocation codes are also produced in advance if necessary. */
3603
6e85d753
KH
3604#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3605 do { \
df7492f9 3606 int id = CHARSET_ID (charset); \
bf16eb23
KH
3607 \
3608 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3609 && id == charset_jisx0208) \
3610 { \
3611 id = charset_jisx0208_1978; \
3612 charset = CHARSET_FROM_ID (id); \
3613 } \
3614 \
df7492f9 3615 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3616 { \
df7492f9
KH
3617 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3618 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3619 else \
df7492f9
KH
3620 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3621 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3622 break; \
3623 } \
df7492f9 3624 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3625 { \
df7492f9 3626 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3627 break; \
3628 } \
df7492f9 3629 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3630 { \
df7492f9 3631 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3632 break; \
3633 } \
6e85d753
KH
3634 else \
3635 /* Since CHARSET is not yet invoked to any graphic planes, we \
3636 must invoke it, or, at first, designate it to some graphic \
3637 register. Then repeat the loop to actually produce the \
3638 character. */ \
df7492f9
KH
3639 dst = encode_invocation_designation (charset, coding, dst, \
3640 &produced_chars); \
4ed46869
KH
3641 } while (1)
3642
05e6f5dc 3643
df7492f9
KH
3644#define ENCODE_ISO_CHARACTER(charset, c) \
3645 do { \
3646 int code = ENCODE_CHAR ((charset),(c)); \
3647 \
3648 if (CHARSET_DIMENSION (charset) == 1) \
3649 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3650 else \
3651 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3652 } while (0)
bdd9fb48 3653
05e6f5dc 3654
4ed46869 3655/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3656 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3657 Return new DST. */
3658
3659unsigned char *
df7492f9
KH
3660encode_invocation_designation (charset, coding, dst, p_nchars)
3661 struct charset *charset;
4ed46869
KH
3662 struct coding_system *coding;
3663 unsigned char *dst;
df7492f9 3664 int *p_nchars;
4ed46869 3665{
df7492f9
KH
3666 int multibytep = coding->dst_multibyte;
3667 int produced_chars = *p_nchars;
4ed46869 3668 int reg; /* graphic register number */
df7492f9 3669 int id = CHARSET_ID (charset);
4ed46869
KH
3670
3671 /* At first, check designations. */
3672 for (reg = 0; reg < 4; reg++)
df7492f9 3673 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3674 break;
3675
3676 if (reg >= 4)
3677 {
3678 /* CHARSET is not yet designated to any graphic registers. */
3679 /* At first check the requested designation. */
df7492f9
KH
3680 reg = CODING_ISO_REQUEST (coding, id);
3681 if (reg < 0)
1ba9e4ab
KH
3682 /* Since CHARSET requests no special designation, designate it
3683 to graphic register 0. */
4ed46869
KH
3684 reg = 0;
3685
3686 ENCODE_DESIGNATION (charset, reg, coding);
3687 }
3688
df7492f9
KH
3689 if (CODING_ISO_INVOCATION (coding, 0) != reg
3690 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3691 {
3692 /* Since the graphic register REG is not invoked to any graphic
3693 planes, invoke it to graphic plane 0. */
3694 switch (reg)
3695 {
3696 case 0: /* graphic register 0 */
3697 ENCODE_SHIFT_IN;
3698 break;
3699
3700 case 1: /* graphic register 1 */
3701 ENCODE_SHIFT_OUT;
3702 break;
3703
3704 case 2: /* graphic register 2 */
df7492f9 3705 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3706 ENCODE_SINGLE_SHIFT_2;
3707 else
3708 ENCODE_LOCKING_SHIFT_2;
3709 break;
3710
3711 case 3: /* graphic register 3 */
df7492f9 3712 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3713 ENCODE_SINGLE_SHIFT_3;
3714 else
3715 ENCODE_LOCKING_SHIFT_3;
3716 break;
3717 }
3718 }
b73bfc1c 3719
df7492f9 3720 *p_nchars = produced_chars;
4ed46869
KH
3721 return dst;
3722}
3723
df7492f9
KH
3724/* The following three macros produce codes for indicating direction
3725 of text. */
3726#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3727 do { \
df7492f9
KH
3728 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3729 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3730 else \
df7492f9 3731 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3732 } while (0)
3733
ec6d2bb8 3734
df7492f9
KH
3735#define ENCODE_DIRECTION_R2L() \
3736 do { \
3737 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3738 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3739 } while (0)
3740
ec6d2bb8 3741
df7492f9 3742#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3743 do { \
df7492f9
KH
3744 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3745 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3746 } while (0)
4ed46869 3747
4ed46869
KH
3748
3749/* Produce codes for designation and invocation to reset the graphic
3750 planes and registers to initial state. */
df7492f9
KH
3751#define ENCODE_RESET_PLANE_AND_REGISTER() \
3752 do { \
3753 int reg; \
3754 struct charset *charset; \
3755 \
3756 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3757 ENCODE_SHIFT_IN; \
3758 for (reg = 0; reg < 4; reg++) \
3759 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3760 && (CODING_ISO_DESIGNATION (coding, reg) \
3761 != CODING_ISO_INITIAL (coding, reg))) \
3762 { \
3763 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3764 ENCODE_DESIGNATION (charset, reg, coding); \
3765 } \
4ed46869
KH
3766 } while (0)
3767
df7492f9 3768
bdd9fb48 3769/* Produce designation sequences of charsets in the line started from
b73bfc1c 3770 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3771
3772 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3773 find all the necessary designations. */
3774
b73bfc1c 3775static unsigned char *
df7492f9 3776encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3777 struct coding_system *coding;
df7492f9
KH
3778 int *charbuf, *charbuf_end;
3779 unsigned char *dst;
e0e989f6 3780{
df7492f9 3781 struct charset *charset;
bdd9fb48
KH
3782 /* Table of charsets to be designated to each graphic register. */
3783 int r[4];
df7492f9
KH
3784 int c, found = 0, reg;
3785 int produced_chars = 0;
3786 int multibytep = coding->dst_multibyte;
3787 Lisp_Object attrs;
3788 Lisp_Object charset_list;
3789
3790 attrs = CODING_ID_ATTRS (coding->id);
3791 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3792 if (EQ (charset_list, Qiso_2022))
3793 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3794
3795 for (reg = 0; reg < 4; reg++)
3796 r[reg] = -1;
3797
b73bfc1c 3798 while (found < 4)
e0e989f6 3799 {
df7492f9
KH
3800 int id;
3801
3802 c = *charbuf++;
b73bfc1c
KH
3803 if (c == '\n')
3804 break;
df7492f9
KH
3805 charset = char_charset (c, charset_list, NULL);
3806 id = CHARSET_ID (charset);
3807 reg = CODING_ISO_REQUEST (coding, id);
3808 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3809 {
3810 found++;
df7492f9 3811 r[reg] = id;
bdd9fb48 3812 }
bdd9fb48
KH
3813 }
3814
3815 if (found)
3816 {
3817 for (reg = 0; reg < 4; reg++)
3818 if (r[reg] >= 0
df7492f9
KH
3819 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3820 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3821 }
b73bfc1c
KH
3822
3823 return dst;
e0e989f6
KH
3824}
3825
4ed46869
KH
3826/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3827
df7492f9
KH
3828static int
3829encode_coding_iso_2022 (coding)
4ed46869 3830 struct coding_system *coding;
4ed46869 3831{
df7492f9
KH
3832 int multibytep = coding->dst_multibyte;
3833 int *charbuf = coding->charbuf;
3834 int *charbuf_end = charbuf + coding->charbuf_used;
3835 unsigned char *dst = coding->destination + coding->produced;
3836 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3837 int safe_room = 16;
3838 int bol_designation
3839 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3840 && CODING_ISO_BOL (coding));
3841 int produced_chars = 0;
3842 Lisp_Object attrs, eol_type, charset_list;
3843 int ascii_compatible;
b73bfc1c 3844 int c;
ff0dacd7 3845 int preferred_charset_id = -1;
05e6f5dc 3846
24a73b0a
KH
3847 CODING_GET_INFO (coding, attrs, charset_list);
3848 eol_type = CODING_ID_EOL_TYPE (coding->id);
3849 if (VECTORP (eol_type))
3850 eol_type = Qunix;
3851
004068e4 3852 setup_iso_safe_charsets (attrs);
ff0dacd7 3853 /* Charset list may have been changed. */
287c57d7 3854 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8f924df7 3855 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3856
df7492f9 3857 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3858
df7492f9 3859 while (charbuf < charbuf_end)
4ed46869 3860 {
df7492f9 3861 ASSURE_DESTINATION (safe_room);
b73bfc1c 3862
df7492f9 3863 if (bol_designation)
b73bfc1c 3864 {
df7492f9 3865 unsigned char *dst_prev = dst;
4ed46869 3866
bdd9fb48 3867 /* We have to produce designation sequences if any now. */
df7492f9
KH
3868 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3869 bol_designation = 0;
3870 /* We are sure that designation sequences are all ASCII bytes. */
3871 produced_chars += dst - dst_prev;
e0e989f6
KH
3872 }
3873
df7492f9 3874 c = *charbuf++;
ec6d2bb8 3875
ff0dacd7
KH
3876 if (c < 0)
3877 {
3878 /* Handle an annotation. */
3879 switch (*charbuf)
ec6d2bb8 3880 {
ff0dacd7
KH
3881 case CODING_ANNOTATE_COMPOSITION_MASK:
3882 /* Not yet implemented. */
3883 break;
3884 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 3885 preferred_charset_id = charbuf[2];
ff0dacd7
KH
3886 if (preferred_charset_id >= 0
3887 && NILP (Fmemq (make_number (preferred_charset_id),
3888 charset_list)))
3889 preferred_charset_id = -1;
3890 break;
3891 default:
3892 abort ();
4ed46869 3893 }
ff0dacd7
KH
3894 charbuf += -c - 1;
3895 continue;
4ed46869 3896 }
ec6d2bb8 3897
b73bfc1c
KH
3898 /* Now encode the character C. */
3899 if (c < 0x20 || c == 0x7F)
3900 {
df7492f9
KH
3901 if (c == '\n'
3902 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3903 {
df7492f9
KH
3904 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3905 ENCODE_RESET_PLANE_AND_REGISTER ();
3906 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3907 {
df7492f9
KH
3908 int i;
3909
3910 for (i = 0; i < 4; i++)
3911 CODING_ISO_DESIGNATION (coding, i)
3912 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3913 }
df7492f9
KH
3914 bol_designation
3915 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3916 }
df7492f9
KH
3917 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3918 ENCODE_RESET_PLANE_AND_REGISTER ();
3919 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3920 }
df7492f9 3921 else if (ASCII_CHAR_P (c))
88993dfd 3922 {
df7492f9
KH
3923 if (ascii_compatible)
3924 EMIT_ONE_ASCII_BYTE (c);
93dec019 3925 else
19a8d9e0 3926 {
bf16eb23
KH
3927 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3928 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3929 }
4ed46869 3930 }
16eafb5d 3931 else if (CHAR_BYTE8_P (c))
88993dfd 3932 {
16eafb5d
KH
3933 c = CHAR_TO_BYTE8 (c);
3934 EMIT_ONE_BYTE (c);
88993dfd 3935 }
b73bfc1c 3936 else
df7492f9 3937 {
ff0dacd7 3938 struct charset *charset;
b73bfc1c 3939
ff0dacd7
KH
3940 if (preferred_charset_id >= 0)
3941 {
3942 charset = CHARSET_FROM_ID (preferred_charset_id);
3943 if (! CHAR_CHARSET_P (c, charset))
3944 charset = char_charset (c, charset_list, NULL);
3945 }
3946 else
3947 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3948 if (!charset)
3949 {
41cbe562
KH
3950 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3951 {
3952 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3953 charset = CHARSET_FROM_ID (charset_ascii);
3954 }
3955 else
3956 {
3957 c = coding->default_char;
3958 charset = char_charset (c, charset_list, NULL);
3959 }
df7492f9
KH
3960 }
3961 ENCODE_ISO_CHARACTER (charset, c);
3962 }
84fbb8a0 3963 }
b73bfc1c 3964
df7492f9
KH
3965 if (coding->mode & CODING_MODE_LAST_BLOCK
3966 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3967 {
3968 ASSURE_DESTINATION (safe_room);
3969 ENCODE_RESET_PLANE_AND_REGISTER ();
3970 }
065e3595 3971 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
3972 CODING_ISO_BOL (coding) = bol_designation;
3973 coding->produced_char += produced_chars;
3974 coding->produced = dst - coding->destination;
3975 return 0;
4ed46869
KH
3976}
3977
3978\f
df7492f9 3979/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3980
df7492f9 3981/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3982 quite widely. So, for the moment, Emacs supports them in the bare
3983 C code. But, in the future, they may be supported only by CCL. */
3984
3985/* SJIS is a coding system encoding three character sets: ASCII, right
3986 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3987 as is. A character of charset katakana-jisx0201 is encoded by
3988 "position-code + 0x80". A character of charset japanese-jisx0208
3989 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3990 so that it fit in the range below.
4ed46869
KH
3991
3992 --- CODE RANGE of SJIS ---
3993 (character set) (range)
3994 ASCII 0x00 .. 0x7F
df7492f9 3995 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3996 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3997 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3998 -------------------------------
3999
4000*/
4001
4002/* BIG5 is a coding system encoding two character sets: ASCII and
4003 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4004 character set and is encoded in two-byte.
4ed46869
KH
4005
4006 --- CODE RANGE of BIG5 ---
4007 (character set) (range)
4008 ASCII 0x00 .. 0x7F
4009 Big5 (1st byte) 0xA1 .. 0xFE
4010 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4011 --------------------------
4012
df7492f9 4013 */
4ed46869
KH
4014
4015/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4016 Check if a text is encoded in SJIS. If it is, return
df7492f9 4017 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4018
0a28aafb 4019static int
ff0dacd7 4020detect_coding_sjis (coding, detect_info)
df7492f9 4021 struct coding_system *coding;
ff0dacd7 4022 struct coding_detection_info *detect_info;
4ed46869 4023{
065e3595 4024 const unsigned char *src = coding->source, *src_base;
8f924df7 4025 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4026 int multibytep = coding->src_multibyte;
4027 int consumed_chars = 0;
4028 int found = 0;
b73bfc1c 4029 int c;
df7492f9 4030
ff0dacd7 4031 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4032 /* A coding system of this category is always ASCII compatible. */
4033 src += coding->head_ascii;
4ed46869 4034
b73bfc1c 4035 while (1)
4ed46869 4036 {
065e3595 4037 src_base = src;
df7492f9 4038 ONE_MORE_BYTE (c);
682169fe
KH
4039 if (c < 0x80)
4040 continue;
df7492f9 4041 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 4042 {
df7492f9 4043 ONE_MORE_BYTE (c);
682169fe 4044 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4045 break;
ff0dacd7 4046 found = CATEGORY_MASK_SJIS;
4ed46869 4047 }
df7492f9 4048 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4049 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4050 else
4051 break;
4ed46869 4052 }
ff0dacd7 4053 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4054 return 0;
4055
4056 no_more_source:
065e3595 4057 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4058 {
ff0dacd7 4059 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4060 return 0;
4ed46869 4061 }
ff0dacd7
KH
4062 detect_info->found |= found;
4063 return 1;
4ed46869
KH
4064}
4065
4066/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4067 Check if a text is encoded in BIG5. If it is, return
df7492f9 4068 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4069
0a28aafb 4070static int
ff0dacd7 4071detect_coding_big5 (coding, detect_info)
df7492f9 4072 struct coding_system *coding;
ff0dacd7 4073 struct coding_detection_info *detect_info;
4ed46869 4074{
065e3595 4075 const unsigned char *src = coding->source, *src_base;
8f924df7 4076 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4077 int multibytep = coding->src_multibyte;
4078 int consumed_chars = 0;
4079 int found = 0;
b73bfc1c 4080 int c;
fa42c37f 4081
ff0dacd7 4082 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4083 /* A coding system of this category is always ASCII compatible. */
4084 src += coding->head_ascii;
fa42c37f 4085
b73bfc1c 4086 while (1)
fa42c37f 4087 {
065e3595 4088 src_base = src;
df7492f9
KH
4089 ONE_MORE_BYTE (c);
4090 if (c < 0x80)
fa42c37f 4091 continue;
df7492f9 4092 if (c >= 0xA1)
fa42c37f 4093 {
df7492f9
KH
4094 ONE_MORE_BYTE (c);
4095 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4096 return 0;
ff0dacd7 4097 found = CATEGORY_MASK_BIG5;
fa42c37f 4098 }
df7492f9
KH
4099 else
4100 break;
fa42c37f 4101 }
ff0dacd7 4102 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4103 return 0;
fa42c37f 4104
df7492f9 4105 no_more_source:
065e3595 4106 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4107 {
ff0dacd7 4108 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4109 return 0;
4110 }
ff0dacd7
KH
4111 detect_info->found |= found;
4112 return 1;
fa42c37f
KH
4113}
4114
4ed46869
KH
4115/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4116 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4117
b73bfc1c 4118static void
df7492f9 4119decode_coding_sjis (coding)
4ed46869 4120 struct coding_system *coding;
4ed46869 4121{
8f924df7
KH
4122 const unsigned char *src = coding->source + coding->consumed;
4123 const unsigned char *src_end = coding->source + coding->src_bytes;
4124 const unsigned char *src_base;
69a80ea3
KH
4125 int *charbuf = coding->charbuf + coding->charbuf_used;
4126 int *charbuf_end
4127 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4128 int consumed_chars = 0, consumed_chars_base;
4129 int multibytep = coding->src_multibyte;
4130 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4131 struct charset *charset_kanji2;
24a73b0a 4132 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4133 int char_offset = coding->produced_char;
4134 int last_offset = char_offset;
4135 int last_id = charset_ascii;
119852e7
KH
4136 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4137 int byte_after_cr = -1;
a5d301df 4138
24a73b0a 4139 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4140
4141 val = charset_list;
4142 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4143 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4144 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4145 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4146
b73bfc1c 4147 while (1)
4ed46869 4148 {
df7492f9 4149 int c, c1;
24a73b0a 4150 struct charset *charset;
fa42c37f 4151
b73bfc1c 4152 src_base = src;
df7492f9 4153 consumed_chars_base = consumed_chars;
fa42c37f 4154
df7492f9
KH
4155 if (charbuf >= charbuf_end)
4156 break;
4157
119852e7
KH
4158 if (byte_after_cr >= 0)
4159 c = byte_after_cr, byte_after_cr = -1;
4160 else
4161 ONE_MORE_BYTE (c);
065e3595
KH
4162 if (c < 0)
4163 goto invalid_code;
24a73b0a 4164 if (c < 0x80)
119852e7
KH
4165 {
4166 if (eol_crlf && c == '\r')
4167 ONE_MORE_BYTE (byte_after_cr);
4168 charset = charset_roman;
4169 }
57a47f8a 4170 else if (c == 0x80 || c == 0xA0)
8e921c4b 4171 goto invalid_code;
57a47f8a
KH
4172 else if (c >= 0xA1 && c <= 0xDF)
4173 {
4174 /* SJIS -> JISX0201-Kana */
4175 c &= 0x7F;
4176 charset = charset_kana;
4177 }
4178 else if (c <= 0xEF)
df7492f9 4179 {
57a47f8a
KH
4180 /* SJIS -> JISX0208 */
4181 ONE_MORE_BYTE (c1);
4182 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4183 goto invalid_code;
57a47f8a
KH
4184 c = (c << 8) | c1;
4185 SJIS_TO_JIS (c);
4186 charset = charset_kanji;
4187 }
4188 else if (c <= 0xFC && charset_kanji2)
4189 {
c6876370 4190 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4191 ONE_MORE_BYTE (c1);
4192 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4193 goto invalid_code;
57a47f8a
KH
4194 c = (c << 8) | c1;
4195 SJIS_TO_JIS2 (c);
4196 charset = charset_kanji2;
df7492f9 4197 }
57a47f8a
KH
4198 else
4199 goto invalid_code;
24a73b0a
KH
4200 if (charset->id != charset_ascii
4201 && last_id != charset->id)
4202 {
4203 if (last_id != charset_ascii)
69a80ea3 4204 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4205 last_id = charset->id;
4206 last_offset = char_offset;
4207 }
4208 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4209 *charbuf++ = c;
ff0dacd7 4210 char_offset++;
df7492f9 4211 continue;
b73bfc1c 4212
df7492f9
KH
4213 invalid_code:
4214 src = src_base;
4215 consumed_chars = consumed_chars_base;
4216 ONE_MORE_BYTE (c);
065e3595 4217 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4218 char_offset++;
df7492f9
KH
4219 coding->errors++;
4220 }
fa42c37f 4221
df7492f9 4222 no_more_source:
ff0dacd7 4223 if (last_id != charset_ascii)
69a80ea3 4224 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4225 coding->consumed_char += consumed_chars_base;
4226 coding->consumed = src_base - coding->source;
4227 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4228}
4229
b73bfc1c 4230static void
df7492f9 4231decode_coding_big5 (coding)
4ed46869 4232 struct coding_system *coding;
4ed46869 4233{
8f924df7
KH
4234 const unsigned char *src = coding->source + coding->consumed;
4235 const unsigned char *src_end = coding->source + coding->src_bytes;
4236 const unsigned char *src_base;
69a80ea3
KH
4237 int *charbuf = coding->charbuf + coding->charbuf_used;
4238 int *charbuf_end
4239 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4240 int consumed_chars = 0, consumed_chars_base;
4241 int multibytep = coding->src_multibyte;
4242 struct charset *charset_roman, *charset_big5;
24a73b0a 4243 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4244 int char_offset = coding->produced_char;
4245 int last_offset = char_offset;
4246 int last_id = charset_ascii;
119852e7
KH
4247 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4248 int byte_after_cr = -1;
df7492f9 4249
24a73b0a 4250 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4251 val = charset_list;
4252 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4253 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4254
b73bfc1c 4255 while (1)
4ed46869 4256 {
df7492f9 4257 int c, c1;
24a73b0a 4258 struct charset *charset;
b73bfc1c
KH
4259
4260 src_base = src;
df7492f9
KH
4261 consumed_chars_base = consumed_chars;
4262
4263 if (charbuf >= charbuf_end)
4264 break;
4265
119852e7 4266 if (byte_after_cr >= 0)
14daee73 4267 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4268 else
4269 ONE_MORE_BYTE (c);
b73bfc1c 4270
065e3595
KH
4271 if (c < 0)
4272 goto invalid_code;
24a73b0a 4273 if (c < 0x80)
119852e7 4274 {
14daee73 4275 if (eol_crlf && c == '\r')
119852e7
KH
4276 ONE_MORE_BYTE (byte_after_cr);
4277 charset = charset_roman;
4278 }
24a73b0a 4279 else
4ed46869 4280 {
24a73b0a
KH
4281 /* BIG5 -> Big5 */
4282 if (c < 0xA1 || c > 0xFE)
4283 goto invalid_code;
4284 ONE_MORE_BYTE (c1);
4285 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4286 goto invalid_code;
4287 c = c << 8 | c1;
4288 charset = charset_big5;
4ed46869 4289 }
24a73b0a
KH
4290 if (charset->id != charset_ascii
4291 && last_id != charset->id)
df7492f9 4292 {
24a73b0a 4293 if (last_id != charset_ascii)
69a80ea3 4294 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4295 last_id = charset->id;
4296 last_offset = char_offset;
4ed46869 4297 }
24a73b0a 4298 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4299 *charbuf++ = c;
ff0dacd7 4300 char_offset++;
fb88bf2d
KH
4301 continue;
4302
df7492f9 4303 invalid_code:
4ed46869 4304 src = src_base;
df7492f9
KH
4305 consumed_chars = consumed_chars_base;
4306 ONE_MORE_BYTE (c);
065e3595 4307 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4308 char_offset++;
df7492f9 4309 coding->errors++;
fb88bf2d 4310 }
d46c5b12 4311
df7492f9 4312 no_more_source:
ff0dacd7 4313 if (last_id != charset_ascii)
69a80ea3 4314 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4315 coding->consumed_char += consumed_chars_base;
4316 coding->consumed = src_base - coding->source;
4317 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4318}
4319
4320/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4321 This function can encode charsets `ascii', `katakana-jisx0201',
4322 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4323 are sure that all these charsets are registered as official charset
4ed46869
KH
4324 (i.e. do not have extended leading-codes). Characters of other
4325 charsets are produced without any encoding. If SJIS_P is 1, encode
4326 SJIS text, else encode BIG5 text. */
4327
df7492f9
KH
4328static int
4329encode_coding_sjis (coding)
4ed46869 4330 struct coding_system *coding;
4ed46869 4331{
df7492f9
KH
4332 int multibytep = coding->dst_multibyte;
4333 int *charbuf = coding->charbuf;
4334 int *charbuf_end = charbuf + coding->charbuf_used;
4335 unsigned char *dst = coding->destination + coding->produced;
4336 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4337 int safe_room = 4;
4338 int produced_chars = 0;
24a73b0a 4339 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4340 int ascii_compatible;
4341 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4342 struct charset *charset_kanji2;
df7492f9 4343 int c;
a5d301df 4344
24a73b0a 4345 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4346 val = charset_list;
4347 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4348 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4349 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4350 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4351
df7492f9 4352 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4353
df7492f9
KH
4354 while (charbuf < charbuf_end)
4355 {
4356 ASSURE_DESTINATION (safe_room);
4357 c = *charbuf++;
b73bfc1c 4358 /* Now encode the character C. */
df7492f9
KH
4359 if (ASCII_CHAR_P (c) && ascii_compatible)
4360 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4361 else if (CHAR_BYTE8_P (c))
4362 {
4363 c = CHAR_TO_BYTE8 (c);
4364 EMIT_ONE_BYTE (c);
4365 }
df7492f9 4366 else
b73bfc1c 4367 {
df7492f9
KH
4368 unsigned code;
4369 struct charset *charset = char_charset (c, charset_list, &code);
4370
4371 if (!charset)
4ed46869 4372 {
41cbe562 4373 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4374 {
41cbe562
KH
4375 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4376 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4377 }
41cbe562 4378 else
b73bfc1c 4379 {
41cbe562
KH
4380 c = coding->default_char;
4381 charset = char_charset (c, charset_list, &code);
b73bfc1c 4382 }
b73bfc1c 4383 }
df7492f9
KH
4384 if (code == CHARSET_INVALID_CODE (charset))
4385 abort ();
4386 if (charset == charset_kanji)
4387 {
4388 int c1, c2;
4389 JIS_TO_SJIS (code);
4390 c1 = code >> 8, c2 = code & 0xFF;
4391 EMIT_TWO_BYTES (c1, c2);
4392 }
4393 else if (charset == charset_kana)
4394 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4395 else if (charset_kanji2 && charset == charset_kanji2)
4396 {
4397 int c1, c2;
4398
4399 c1 = code >> 8;
4400 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4401 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4402 {
4403 JIS_TO_SJIS2 (code);
4404 c1 = code >> 8, c2 = code & 0xFF;
4405 EMIT_TWO_BYTES (c1, c2);
4406 }
4407 else
4408 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4409 }
df7492f9
KH
4410 else
4411 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4412 }
4413 }
065e3595 4414 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4415 coding->produced_char += produced_chars;
4416 coding->produced = dst - coding->destination;
4417 return 0;
4418}
4419
4420static int
4421encode_coding_big5 (coding)
4422 struct coding_system *coding;
4423{
4424 int multibytep = coding->dst_multibyte;
4425 int *charbuf = coding->charbuf;
4426 int *charbuf_end = charbuf + coding->charbuf_used;
4427 unsigned char *dst = coding->destination + coding->produced;
4428 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4429 int safe_room = 4;
4430 int produced_chars = 0;
24a73b0a 4431 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4432 int ascii_compatible;
4433 struct charset *charset_roman, *charset_big5;
4434 int c;
4435
24a73b0a 4436 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4437 val = charset_list;
4438 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4439 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4440 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4441
4442 while (charbuf < charbuf_end)
4443 {
4444 ASSURE_DESTINATION (safe_room);
4445 c = *charbuf++;
4446 /* Now encode the character C. */
4447 if (ASCII_CHAR_P (c) && ascii_compatible)
4448 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4449 else if (CHAR_BYTE8_P (c))
4450 {
4451 c = CHAR_TO_BYTE8 (c);
4452 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4453 }
4454 else
4455 {
df7492f9
KH
4456 unsigned code;
4457 struct charset *charset = char_charset (c, charset_list, &code);
4458
4459 if (! charset)
b73bfc1c 4460 {
41cbe562 4461 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4462 {
41cbe562
KH
4463 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4464 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4465 }
41cbe562 4466 else
0eecad43 4467 {
41cbe562
KH
4468 c = coding->default_char;
4469 charset = char_charset (c, charset_list, &code);
0eecad43 4470 }
4ed46869 4471 }
df7492f9
KH
4472 if (code == CHARSET_INVALID_CODE (charset))
4473 abort ();
4474 if (charset == charset_big5)
b73bfc1c 4475 {
df7492f9
KH
4476 int c1, c2;
4477
4478 c1 = code >> 8, c2 = code & 0xFF;
4479 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4480 }
df7492f9
KH
4481 else
4482 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4483 }
4ed46869 4484 }
065e3595 4485 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4486 coding->produced_char += produced_chars;
4487 coding->produced = dst - coding->destination;
4488 return 0;
4ed46869
KH
4489}
4490
4491\f
df7492f9 4492/*** 10. CCL handlers ***/
1397dc18
KH
4493
4494/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4495 Check if a text is encoded in a coding system of which
4496 encoder/decoder are written in CCL program. If it is, return
df7492f9 4497 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4498
0a28aafb 4499static int
ff0dacd7 4500detect_coding_ccl (coding, detect_info)
df7492f9 4501 struct coding_system *coding;
ff0dacd7 4502 struct coding_detection_info *detect_info;
1397dc18 4503{
065e3595 4504 const unsigned char *src = coding->source, *src_base;
8f924df7 4505 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4506 int multibytep = coding->src_multibyte;
4507 int consumed_chars = 0;
4508 int found = 0;
0e219d54 4509 unsigned char *valids;
df7492f9
KH
4510 int head_ascii = coding->head_ascii;
4511 Lisp_Object attrs;
4512
ff0dacd7
KH
4513 detect_info->checked |= CATEGORY_MASK_CCL;
4514
df7492f9 4515 coding = &coding_categories[coding_category_ccl];
0e219d54 4516 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4517 attrs = CODING_ID_ATTRS (coding->id);
4518 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4519 src += head_ascii;
1397dc18 4520
b73bfc1c 4521 while (1)
1397dc18 4522 {
df7492f9 4523 int c;
065e3595
KH
4524
4525 src_base = src;
df7492f9 4526 ONE_MORE_BYTE (c);
065e3595 4527 if (c < 0 || ! valids[c])
df7492f9 4528 break;
ff0dacd7
KH
4529 if ((valids[c] > 1))
4530 found = CATEGORY_MASK_CCL;
df7492f9 4531 }
ff0dacd7 4532 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4533 return 0;
4534
4535 no_more_source:
ff0dacd7
KH
4536 detect_info->found |= found;
4537 return 1;
df7492f9
KH
4538}
4539
4540static void
4541decode_coding_ccl (coding)
4542 struct coding_system *coding;
4543{
7c78e542 4544 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4545 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4546 int *charbuf = coding->charbuf + coding->charbuf_used;
4547 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4548 int consumed_chars = 0;
4549 int multibytep = coding->src_multibyte;
4550 struct ccl_program ccl;
4551 int source_charbuf[1024];
4552 int source_byteidx[1024];
24a73b0a 4553 Lisp_Object attrs, charset_list;
df7492f9 4554
24a73b0a 4555 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4556 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4557
4558 while (src < src_end)
4559 {
7c78e542 4560 const unsigned char *p = src;
df7492f9
KH
4561 int *source, *source_end;
4562 int i = 0;
4563
4564 if (multibytep)
4565 while (i < 1024 && p < src_end)
4566 {
4567 source_byteidx[i] = p - src;
4568 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4569 }
4570 else
4571 while (i < 1024 && p < src_end)
4572 source_charbuf[i++] = *p++;
8f924df7 4573
df7492f9
KH
4574 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4575 ccl.last_block = 1;
4576
4577 source = source_charbuf;
4578 source_end = source + i;
4579 while (source < source_end)
4580 {
4581 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4582 source_end - source, charbuf_end - charbuf,
4583 charset_list);
df7492f9
KH
4584 source += ccl.consumed;
4585 charbuf += ccl.produced;
4586 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4587 break;
4588 }
4589 if (source < source_end)
4590 src += source_byteidx[source - source_charbuf];
4591 else
4592 src = p;
4593 consumed_chars += source - source_charbuf;
4594
4595 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4596 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4597 break;
4598 }
4599
4600 switch (ccl.status)
4601 {
4602 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4603 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4604 break;
4605 case CCL_STAT_SUSPEND_BY_DST:
4606 break;
4607 case CCL_STAT_QUIT:
4608 case CCL_STAT_INVALID_CMD:
065e3595 4609 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4610 break;
4611 default:
065e3595 4612 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4613 break;
4614 }
4615 coding->consumed_char += consumed_chars;
4616 coding->consumed = src - coding->source;
4617 coding->charbuf_used = charbuf - coding->charbuf;
4618}
4619
4620static int
4621encode_coding_ccl (coding)
4622 struct coding_system *coding;
4623{
4624 struct ccl_program ccl;
4625 int multibytep = coding->dst_multibyte;
4626 int *charbuf = coding->charbuf;
4627 int *charbuf_end = charbuf + coding->charbuf_used;
4628 unsigned char *dst = coding->destination + coding->produced;
4629 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
4630 int destination_charbuf[1024];
4631 int i, produced_chars = 0;
24a73b0a 4632 Lisp_Object attrs, charset_list;
df7492f9 4633
24a73b0a 4634 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4635 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4636
4637 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4638 ccl.dst_multibyte = coding->dst_multibyte;
4639
8cffd3e7 4640 while (charbuf < charbuf_end)
df7492f9 4641 {
df7492f9 4642 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 4643 charbuf_end - charbuf, 1024, charset_list);
df7492f9 4644 if (multibytep)
8cffd3e7
KH
4645 {
4646 ASSURE_DESTINATION (ccl.produced * 2);
4647 for (i = 0; i < ccl.produced; i++)
4648 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4649 }
df7492f9
KH
4650 else
4651 {
8cffd3e7 4652 ASSURE_DESTINATION (ccl.produced);
df7492f9
KH
4653 for (i = 0; i < ccl.produced; i++)
4654 *dst++ = destination_charbuf[i] & 0xFF;
4655 produced_chars += ccl.produced;
4656 }
8cffd3e7
KH
4657 charbuf += ccl.consumed;
4658 if (ccl.status == CCL_STAT_QUIT
4659 || ccl.status == CCL_STAT_INVALID_CMD)
4660 break;
df7492f9
KH
4661 }
4662
4663 switch (ccl.status)
4664 {
4665 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4666 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4667 break;
4668 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4669 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4670 break;
4671 case CCL_STAT_QUIT:
4672 case CCL_STAT_INVALID_CMD:
065e3595 4673 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4674 break;
4675 default:
065e3595 4676 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4677 break;
1397dc18 4678 }
df7492f9
KH
4679
4680 coding->produced_char += produced_chars;
4681 coding->produced = dst - coding->destination;
4682 return 0;
1397dc18
KH
4683}
4684
df7492f9 4685
1397dc18 4686\f
df7492f9 4687/*** 10, 11. no-conversion handlers ***/
4ed46869 4688
b73bfc1c 4689/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4690
b73bfc1c 4691static void
df7492f9 4692decode_coding_raw_text (coding)
4ed46869 4693 struct coding_system *coding;
4ed46869 4694{
119852e7
KH
4695 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4696
df7492f9 4697 coding->chars_at_source = 1;
119852e7
KH
4698 coding->consumed_char = coding->src_chars;
4699 coding->consumed = coding->src_bytes;
4700 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4701 {
4702 coding->consumed_char--;
4703 coding->consumed--;
4704 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4705 }
4706 else
4707 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4708}
4ed46869 4709
df7492f9
KH
4710static int
4711encode_coding_raw_text (coding)
4712 struct coding_system *coding;
4713{
4714 int multibytep = coding->dst_multibyte;
4715 int *charbuf = coding->charbuf;
4716 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4717 unsigned char *dst = coding->destination + coding->produced;
4718 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4719 int produced_chars = 0;
b73bfc1c
KH
4720 int c;
4721
df7492f9 4722 if (multibytep)
b73bfc1c 4723 {
df7492f9 4724 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4725
df7492f9
KH
4726 if (coding->src_multibyte)
4727 while (charbuf < charbuf_end)
4728 {
4729 ASSURE_DESTINATION (safe_room);
4730 c = *charbuf++;
4731 if (ASCII_CHAR_P (c))
4732 EMIT_ONE_ASCII_BYTE (c);
4733 else if (CHAR_BYTE8_P (c))
4734 {
4735 c = CHAR_TO_BYTE8 (c);
4736 EMIT_ONE_BYTE (c);
4737 }
4738 else
4739 {
4740 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4741
df7492f9
KH
4742 CHAR_STRING_ADVANCE (c, p1);
4743 while (p0 < p1)
9d123124
KH
4744 {
4745 EMIT_ONE_BYTE (*p0);
4746 p0++;
4747 }
df7492f9
KH
4748 }
4749 }
b73bfc1c 4750 else
df7492f9
KH
4751 while (charbuf < charbuf_end)
4752 {
4753 ASSURE_DESTINATION (safe_room);
4754 c = *charbuf++;
4755 EMIT_ONE_BYTE (c);
4756 }
4757 }
4758 else
4ed46869 4759 {
df7492f9 4760 if (coding->src_multibyte)
d46c5b12 4761 {
df7492f9
KH
4762 int safe_room = MAX_MULTIBYTE_LENGTH;
4763
4764 while (charbuf < charbuf_end)
d46c5b12 4765 {
df7492f9
KH
4766 ASSURE_DESTINATION (safe_room);
4767 c = *charbuf++;
4768 if (ASCII_CHAR_P (c))
4769 *dst++ = c;
4770 else if (CHAR_BYTE8_P (c))
4771 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4772 else
df7492f9
KH
4773 CHAR_STRING_ADVANCE (c, dst);
4774 produced_chars++;
d46c5b12
KH
4775 }
4776 }
df7492f9
KH
4777 else
4778 {
4779 ASSURE_DESTINATION (charbuf_end - charbuf);
4780 while (charbuf < charbuf_end && dst < dst_end)
4781 *dst++ = *charbuf++;
4782 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4783 }
4ed46869 4784 }
065e3595 4785 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4786 coding->produced_char += produced_chars;
4787 coding->produced = dst - coding->destination;
4788 return 0;
4ed46869
KH
4789}
4790
ff0dacd7
KH
4791/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4792 Check if a text is encoded in a charset-based coding system. If it
4793 is, return 1, else return 0. */
4794
0a28aafb 4795static int
ff0dacd7 4796detect_coding_charset (coding, detect_info)
df7492f9 4797 struct coding_system *coding;
ff0dacd7 4798 struct coding_detection_info *detect_info;
1397dc18 4799{
065e3595 4800 const unsigned char *src = coding->source, *src_base;
8f924df7 4801 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4802 int multibytep = coding->src_multibyte;
4803 int consumed_chars = 0;
4804 Lisp_Object attrs, valids;
584948ac 4805 int found = 0;
716b3fa0 4806 int head_ascii = coding->head_ascii;
1397dc18 4807
ff0dacd7
KH
4808 detect_info->checked |= CATEGORY_MASK_CHARSET;
4809
df7492f9
KH
4810 coding = &coding_categories[coding_category_charset];
4811 attrs = CODING_ID_ATTRS (coding->id);
4812 valids = AREF (attrs, coding_attr_charset_valids);
4813
4814 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 4815 src += head_ascii;
1397dc18 4816
b73bfc1c 4817 while (1)
1397dc18 4818 {
df7492f9 4819 int c;
716b3fa0
KH
4820 Lisp_Object val;
4821 struct charset *charset;
4822 int dim, idx;
1397dc18 4823
065e3595 4824 src_base = src;
df7492f9 4825 ONE_MORE_BYTE (c);
065e3595
KH
4826 if (c < 0)
4827 continue;
716b3fa0
KH
4828 val = AREF (valids, c);
4829 if (NILP (val))
df7492f9 4830 break;
584948ac 4831 if (c >= 0x80)
ff0dacd7 4832 found = CATEGORY_MASK_CHARSET;
716b3fa0
KH
4833 if (INTEGERP (val))
4834 {
4835 charset = CHARSET_FROM_ID (XFASTINT (val));
4836 dim = CHARSET_DIMENSION (charset);
4837 for (idx = 1; idx < dim; idx++)
4838 {
4839 if (src == src_end)
4840 goto too_short;
4841 ONE_MORE_BYTE (c);
4842 if (c < charset->code_space[(dim - 1 - idx) * 2]
4843 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4844 break;
4845 }
4846 if (idx < dim)
4847 break;
4848 }
4849 else
4850 {
4851 idx = 1;
4852 for (; CONSP (val); val = XCDR (val))
4853 {
4854 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4855 dim = CHARSET_DIMENSION (charset);
4856 while (idx < dim)
4857 {
4858 if (src == src_end)
4859 goto too_short;
4860 ONE_MORE_BYTE (c);
4861 if (c < charset->code_space[(dim - 1 - idx) * 4]
4862 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4863 break;
4864 idx++;
4865 }
4866 if (idx == dim)
4867 {
4868 val = Qnil;
4869 break;
4870 }
4871 }
4872 if (CONSP (val))
4873 break;
4874 }
df7492f9 4875 }
716b3fa0 4876 too_short:
ff0dacd7 4877 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4878 return 0;
4ed46869 4879
df7492f9 4880 no_more_source:
ff0dacd7
KH
4881 detect_info->found |= found;
4882 return 1;
df7492f9 4883}
b73bfc1c 4884
b73bfc1c 4885static void
df7492f9 4886decode_coding_charset (coding)
4ed46869 4887 struct coding_system *coding;
4ed46869 4888{
8f924df7
KH
4889 const unsigned char *src = coding->source + coding->consumed;
4890 const unsigned char *src_end = coding->source + coding->src_bytes;
4891 const unsigned char *src_base;
69a80ea3
KH
4892 int *charbuf = coding->charbuf + coding->charbuf_used;
4893 int *charbuf_end
4894 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4895 int consumed_chars = 0, consumed_chars_base;
4896 int multibytep = coding->src_multibyte;
24a73b0a 4897 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4898 int char_offset = coding->produced_char;
4899 int last_offset = char_offset;
4900 int last_id = charset_ascii;
119852e7
KH
4901 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4902 int byte_after_cr = -1;
df7492f9 4903
24a73b0a 4904 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4905 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4906
df7492f9 4907 while (1)
4ed46869 4908 {
4eb6d3f1 4909 int c;
24a73b0a
KH
4910 Lisp_Object val;
4911 struct charset *charset;
4912 int dim;
4913 int len = 1;
4914 unsigned code;
df7492f9
KH
4915
4916 src_base = src;
4917 consumed_chars_base = consumed_chars;
b73bfc1c 4918
df7492f9
KH
4919 if (charbuf >= charbuf_end)
4920 break;
4921
119852e7
KH
4922 if (byte_after_cr >= 0)
4923 {
4924 c = byte_after_cr;
4925 byte_after_cr = -1;
4926 }
4927 else
4928 {
4929 ONE_MORE_BYTE (c);
4930 if (eol_crlf && c == '\r')
4931 ONE_MORE_BYTE (byte_after_cr);
4932 }
065e3595
KH
4933 if (c < 0)
4934 goto invalid_code;
24a73b0a
KH
4935 code = c;
4936
4937 val = AREF (valids, c);
4938 if (NILP (val))
4939 goto invalid_code;
4940 if (INTEGERP (val))
d46c5b12 4941 {
24a73b0a
KH
4942 charset = CHARSET_FROM_ID (XFASTINT (val));
4943 dim = CHARSET_DIMENSION (charset);
4944 while (len < dim)
b73bfc1c 4945 {
24a73b0a
KH
4946 ONE_MORE_BYTE (c);
4947 code = (code << 8) | c;
4948 len++;
b73bfc1c 4949 }
24a73b0a
KH
4950 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4951 charset, code, c);
d46c5b12 4952 }
df7492f9 4953 else
d46c5b12 4954 {
24a73b0a
KH
4955 /* VAL is a list of charset IDs. It is assured that the
4956 list is sorted by charset dimensions (smaller one
4957 comes first). */
4958 while (CONSP (val))
4eb6d3f1 4959 {
24a73b0a 4960 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 4961 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4962 while (len < dim)
4eb6d3f1 4963 {
acb2a965
KH
4964 ONE_MORE_BYTE (c);
4965 code = (code << 8) | c;
f9d71dcd 4966 len++;
4eb6d3f1 4967 }
24a73b0a
KH
4968 CODING_DECODE_CHAR (coding, src, src_base,
4969 src_end, charset, code, c);
4970 if (c >= 0)
4971 break;
4972 val = XCDR (val);
ff0dacd7 4973 }
d46c5b12 4974 }
24a73b0a
KH
4975 if (c < 0)
4976 goto invalid_code;
4977 if (charset->id != charset_ascii
4978 && last_id != charset->id)
4979 {
4980 if (last_id != charset_ascii)
69a80ea3 4981 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4982 last_id = charset->id;
4983 last_offset = char_offset;
4984 }
4985
df7492f9 4986 *charbuf++ = c;
ff0dacd7 4987 char_offset++;
df7492f9
KH
4988 continue;
4989
4990 invalid_code:
4991 src = src_base;
4992 consumed_chars = consumed_chars_base;
4993 ONE_MORE_BYTE (c);
065e3595 4994 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4995 char_offset++;
df7492f9 4996 coding->errors++;
4ed46869
KH
4997 }
4998
df7492f9 4999 no_more_source:
ff0dacd7 5000 if (last_id != charset_ascii)
69a80ea3 5001 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5002 coding->consumed_char += consumed_chars_base;
5003 coding->consumed = src_base - coding->source;
5004 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5005}
5006
df7492f9
KH
5007static int
5008encode_coding_charset (coding)
4ed46869 5009 struct coding_system *coding;
4ed46869 5010{
df7492f9
KH
5011 int multibytep = coding->dst_multibyte;
5012 int *charbuf = coding->charbuf;
5013 int *charbuf_end = charbuf + coding->charbuf_used;
5014 unsigned char *dst = coding->destination + coding->produced;
5015 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5016 int safe_room = MAX_MULTIBYTE_LENGTH;
5017 int produced_chars = 0;
24a73b0a 5018 Lisp_Object attrs, charset_list;
df7492f9 5019 int ascii_compatible;
b73bfc1c 5020 int c;
b73bfc1c 5021
24a73b0a 5022 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5023 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5024
df7492f9 5025 while (charbuf < charbuf_end)
4ed46869 5026 {
4eb6d3f1 5027 struct charset *charset;
df7492f9 5028 unsigned code;
8f924df7 5029
df7492f9
KH
5030 ASSURE_DESTINATION (safe_room);
5031 c = *charbuf++;
5032 if (ascii_compatible && ASCII_CHAR_P (c))
5033 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5034 else if (CHAR_BYTE8_P (c))
4ed46869 5035 {
16eafb5d
KH
5036 c = CHAR_TO_BYTE8 (c);
5037 EMIT_ONE_BYTE (c);
d46c5b12 5038 }
d46c5b12 5039 else
b73bfc1c 5040 {
4eb6d3f1
KH
5041 charset = char_charset (c, charset_list, &code);
5042 if (charset)
5043 {
5044 if (CHARSET_DIMENSION (charset) == 1)
5045 EMIT_ONE_BYTE (code);
5046 else if (CHARSET_DIMENSION (charset) == 2)
5047 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5048 else if (CHARSET_DIMENSION (charset) == 3)
5049 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5050 else
5051 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5052 (code >> 8) & 0xFF, code & 0xFF);
5053 }
5054 else
41cbe562
KH
5055 {
5056 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5057 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5058 else
5059 c = coding->default_char;
5060 EMIT_ONE_BYTE (c);
5061 }
4ed46869 5062 }
4ed46869
KH
5063 }
5064
065e3595 5065 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5066 coding->produced_char += produced_chars;
5067 coding->produced = dst - coding->destination;
5068 return 0;
4ed46869
KH
5069}
5070
5071\f
1397dc18 5072/*** 7. C library functions ***/
4ed46869 5073
df7492f9
KH
5074/* Setup coding context CODING from information about CODING_SYSTEM.
5075 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5076 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5077
ec6d2bb8 5078void
e0e989f6
KH
5079setup_coding_system (coding_system, coding)
5080 Lisp_Object coding_system;
4ed46869
KH
5081 struct coding_system *coding;
5082{
df7492f9
KH
5083 Lisp_Object attrs;
5084 Lisp_Object eol_type;
5085 Lisp_Object coding_type;
4608c386 5086 Lisp_Object val;
4ed46869 5087
df7492f9 5088 if (NILP (coding_system))
ae6f73fa 5089 coding_system = Qundecided;
c07c8e12 5090
df7492f9 5091 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5092
df7492f9
KH
5093 attrs = CODING_ID_ATTRS (coding->id);
5094 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5095
df7492f9
KH
5096 coding->mode = 0;
5097 coding->head_ascii = -1;
4a015c45
KH
5098 if (VECTORP (eol_type))
5099 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5100 | CODING_REQUIRE_DETECTION_MASK);
5101 else if (! EQ (eol_type, Qunix))
5102 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5103 | CODING_REQUIRE_ENCODING_MASK);
5104 else
5105 coding->common_flags = 0;
5e5c78be
KH
5106 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5107 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5108 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5109 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5110 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5111 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5112
df7492f9 5113 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5114 coding->max_charset_id = SCHARS (val) - 1;
5115 coding->safe_charsets = (char *) SDATA (val);
df7492f9 5116 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 5117
df7492f9
KH
5118 coding_type = CODING_ATTR_TYPE (attrs);
5119 if (EQ (coding_type, Qundecided))
d46c5b12 5120 {
df7492f9
KH
5121 coding->detector = NULL;
5122 coding->decoder = decode_coding_raw_text;
5123 coding->encoder = encode_coding_raw_text;
5124 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5125 }
df7492f9 5126 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5127 {
df7492f9
KH
5128 int i;
5129 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5130
5131 /* Invoke graphic register 0 to plane 0. */
5132 CODING_ISO_INVOCATION (coding, 0) = 0;
5133 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5134 CODING_ISO_INVOCATION (coding, 1)
5135 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5136 /* Setup the initial status of designation. */
5137 for (i = 0; i < 4; i++)
5138 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5139 /* Not single shifting initially. */
5140 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5141 /* Beginning of buffer should also be regarded as bol. */
5142 CODING_ISO_BOL (coding) = 1;
5143 coding->detector = detect_coding_iso_2022;
5144 coding->decoder = decode_coding_iso_2022;
5145 coding->encoder = encode_coding_iso_2022;
5146 if (flags & CODING_ISO_FLAG_SAFE)
5147 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5148 coding->common_flags
df7492f9
KH
5149 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5150 | CODING_REQUIRE_FLUSHING_MASK);
5151 if (flags & CODING_ISO_FLAG_COMPOSITION)
5152 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5153 if (flags & CODING_ISO_FLAG_DESIGNATION)
5154 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5155 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5156 {
5157 setup_iso_safe_charsets (attrs);
5158 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5159 coding->max_charset_id = SCHARS (val) - 1;
5160 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
5161 }
5162 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5163 }
df7492f9 5164 else if (EQ (coding_type, Qcharset))
d46c5b12 5165 {
df7492f9
KH
5166 coding->detector = detect_coding_charset;
5167 coding->decoder = decode_coding_charset;
5168 coding->encoder = encode_coding_charset;
d46c5b12 5169 coding->common_flags
df7492f9 5170 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5171 }
df7492f9 5172 else if (EQ (coding_type, Qutf_8))
d46c5b12 5173 {
df7492f9
KH
5174 coding->detector = detect_coding_utf_8;
5175 coding->decoder = decode_coding_utf_8;
5176 coding->encoder = encode_coding_utf_8;
5177 coding->common_flags
5178 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5179 }
5180 else if (EQ (coding_type, Qutf_16))
5181 {
5182 val = AREF (attrs, coding_attr_utf_16_bom);
5183 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5184 : EQ (val, Qt) ? utf_16_with_bom
5185 : utf_16_without_bom);
5186 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5187 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5188 : utf_16_little_endian);
e19c3639 5189 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5190 coding->detector = detect_coding_utf_16;
5191 coding->decoder = decode_coding_utf_16;
5192 coding->encoder = encode_coding_utf_16;
5193 coding->common_flags
5194 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5195 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5196 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5197 }
df7492f9 5198 else if (EQ (coding_type, Qccl))
4ed46869 5199 {
df7492f9
KH
5200 coding->detector = detect_coding_ccl;
5201 coding->decoder = decode_coding_ccl;
5202 coding->encoder = encode_coding_ccl;
c952af22 5203 coding->common_flags
df7492f9
KH
5204 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5205 | CODING_REQUIRE_FLUSHING_MASK);
5206 }
5207 else if (EQ (coding_type, Qemacs_mule))
5208 {
5209 coding->detector = detect_coding_emacs_mule;
5210 coding->decoder = decode_coding_emacs_mule;
5211 coding->encoder = encode_coding_emacs_mule;
c952af22 5212 coding->common_flags
df7492f9
KH
5213 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5214 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5215 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5216 {
5217 Lisp_Object tail, safe_charsets;
5218 int max_charset_id = 0;
5219
5220 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5221 tail = XCDR (tail))
5222 if (max_charset_id < XFASTINT (XCAR (tail)))
5223 max_charset_id = XFASTINT (XCAR (tail));
5224 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5225 make_number (255));
5226 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5227 tail = XCDR (tail))
8f924df7 5228 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5229 coding->max_charset_id = max_charset_id;
8f924df7 5230 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5231 }
5232 }
5233 else if (EQ (coding_type, Qshift_jis))
5234 {
5235 coding->detector = detect_coding_sjis;
5236 coding->decoder = decode_coding_sjis;
5237 coding->encoder = encode_coding_sjis;
c952af22 5238 coding->common_flags
df7492f9
KH
5239 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5240 }
5241 else if (EQ (coding_type, Qbig5))
5242 {
5243 coding->detector = detect_coding_big5;
5244 coding->decoder = decode_coding_big5;
5245 coding->encoder = encode_coding_big5;
c952af22 5246 coding->common_flags
df7492f9
KH
5247 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5248 }
5249 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5250 {
df7492f9
KH
5251 coding->detector = NULL;
5252 coding->decoder = decode_coding_raw_text;
5253 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5254 if (! EQ (eol_type, Qunix))
5255 {
5256 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5257 if (! VECTORP (eol_type))
5258 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5259 }
5260
4ed46869 5261 }
4ed46869 5262
df7492f9 5263 return;
4ed46869
KH
5264}
5265
0ff61e78
KH
5266/* Return a list of charsets supported by CODING. */
5267
5268Lisp_Object
5269coding_charset_list (coding)
5270 struct coding_system *coding;
5271{
35befdaa 5272 Lisp_Object attrs, charset_list;
0ff61e78
KH
5273
5274 CODING_GET_INFO (coding, attrs, charset_list);
5275 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5276 {
5277 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5278
5279 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5280 charset_list = Viso_2022_charset_list;
5281 }
5282 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5283 {
5284 charset_list = Vemacs_mule_charset_list;
5285 }
5286 return charset_list;
5287}
5288
5289
df7492f9
KH
5290/* Return raw-text or one of its subsidiaries that has the same
5291 eol_type as CODING-SYSTEM. */
ec6d2bb8 5292
df7492f9
KH
5293Lisp_Object
5294raw_text_coding_system (coding_system)
5295 Lisp_Object coding_system;
ec6d2bb8 5296{
0be8721c 5297 Lisp_Object spec, attrs;
df7492f9 5298 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5299
d3e4cb56
KH
5300 if (NILP (coding_system))
5301 return Qraw_text;
df7492f9
KH
5302 spec = CODING_SYSTEM_SPEC (coding_system);
5303 attrs = AREF (spec, 0);
ec6d2bb8 5304
df7492f9
KH
5305 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5306 return coding_system;
ec6d2bb8 5307
df7492f9
KH
5308 eol_type = AREF (spec, 2);
5309 if (VECTORP (eol_type))
5310 return Qraw_text;
5311 spec = CODING_SYSTEM_SPEC (Qraw_text);
5312 raw_text_eol_type = AREF (spec, 2);
5313 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5314 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5315 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5316}
5317
54f78171 5318
df7492f9
KH
5319/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5320 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
5321 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5322 inherit end-of-line format from the system's setting
5323 (system_eol_type). */
df7492f9
KH
5324
5325Lisp_Object
5326coding_inherit_eol_type (coding_system, parent)
b74e4686 5327 Lisp_Object coding_system, parent;
54f78171 5328{
3e139625 5329 Lisp_Object spec, eol_type;
54f78171 5330
d3e4cb56
KH
5331 if (NILP (coding_system))
5332 coding_system = Qraw_text;
df7492f9 5333 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5334 eol_type = AREF (spec, 2);
fcbcfb64 5335 if (VECTORP (eol_type))
df7492f9 5336 {
df7492f9
KH
5337 Lisp_Object parent_eol_type;
5338
fcbcfb64
KH
5339 if (! NILP (parent))
5340 {
5341 Lisp_Object parent_spec;
5342
4a015c45 5343 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
5344 parent_eol_type = AREF (parent_spec, 2);
5345 }
5346 else
5347 parent_eol_type = system_eol_type;
df7492f9
KH
5348 if (EQ (parent_eol_type, Qunix))
5349 coding_system = AREF (eol_type, 0);
5350 else if (EQ (parent_eol_type, Qdos))
5351 coding_system = AREF (eol_type, 1);
5352 else if (EQ (parent_eol_type, Qmac))
5353 coding_system = AREF (eol_type, 2);
54f78171 5354 }
df7492f9 5355 return coding_system;
54f78171
KH
5356}
5357
4ed46869
KH
5358/* Emacs has a mechanism to automatically detect a coding system if it
5359 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5360 it's impossible to distinguish some coding systems accurately
5361 because they use the same range of codes. So, at first, coding
5362 systems are categorized into 7, those are:
5363
0ef69138 5364 o coding-category-emacs-mule
4ed46869
KH
5365
5366 The category for a coding system which has the same code range
5367 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5368 symbol) `emacs-mule' by default.
4ed46869
KH
5369
5370 o coding-category-sjis
5371
5372 The category for a coding system which has the same code range
5373 as SJIS. Assigned the coding-system (Lisp
7717c392 5374 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5375
5376 o coding-category-iso-7
5377
5378 The category for a coding system which has the same code range
7717c392 5379 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5380 shift and single shift functions. This can encode/decode all
5381 charsets. Assigned the coding-system (Lisp symbol)
5382 `iso-2022-7bit' by default.
5383
5384 o coding-category-iso-7-tight
5385
5386 Same as coding-category-iso-7 except that this can
5387 encode/decode only the specified charsets.
4ed46869
KH
5388
5389 o coding-category-iso-8-1
5390
5391 The category for a coding system which has the same code range
5392 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5393 for DIMENSION1 charset. This doesn't use any locking shift
5394 and single shift functions. Assigned the coding-system (Lisp
5395 symbol) `iso-latin-1' by default.
4ed46869
KH
5396
5397 o coding-category-iso-8-2
5398
5399 The category for a coding system which has the same code range
5400 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5401 for DIMENSION2 charset. This doesn't use any locking shift
5402 and single shift functions. Assigned the coding-system (Lisp
5403 symbol) `japanese-iso-8bit' by default.
4ed46869 5404
7717c392 5405 o coding-category-iso-7-else
4ed46869
KH
5406
5407 The category for a coding system which has the same code range
df7492f9 5408 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5409 single shift functions. Assigned the coding-system (Lisp
5410 symbol) `iso-2022-7bit-lock' by default.
5411
5412 o coding-category-iso-8-else
5413
5414 The category for a coding system which has the same code range
df7492f9 5415 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5416 single shift functions. Assigned the coding-system (Lisp
5417 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5418
5419 o coding-category-big5
5420
5421 The category for a coding system which has the same code range
5422 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5423 `cn-big5' by default.
4ed46869 5424
fa42c37f
KH
5425 o coding-category-utf-8
5426
5427 The category for a coding system which has the same code range
6e76ae91 5428 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5429 symbol) `utf-8' by default.
5430
5431 o coding-category-utf-16-be
5432
5433 The category for a coding system in which a text has an
5434 Unicode signature (cf. Unicode Standard) in the order of BIG
5435 endian at the head. Assigned the coding-system (Lisp symbol)
5436 `utf-16-be' by default.
5437
5438 o coding-category-utf-16-le
5439
5440 The category for a coding system in which a text has an
5441 Unicode signature (cf. Unicode Standard) in the order of
5442 LITTLE endian at the head. Assigned the coding-system (Lisp
5443 symbol) `utf-16-le' by default.
5444
1397dc18
KH
5445 o coding-category-ccl
5446
5447 The category for a coding system of which encoder/decoder is
5448 written in CCL programs. The default value is nil, i.e., no
5449 coding system is assigned.
5450
4ed46869
KH
5451 o coding-category-binary
5452
5453 The category for a coding system not categorized in any of the
5454 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5455 `no-conversion' by default.
4ed46869
KH
5456
5457 Each of them is a Lisp symbol and the value is an actual
df7492f9 5458 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5459 What Emacs does actually is to detect a category of coding system.
5460 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5461 decide only one possible category, it selects a category of the
4ed46869
KH
5462 highest priority. Priorities of categories are also specified by a
5463 user in a Lisp variable `coding-category-list'.
5464
5465*/
5466
df7492f9
KH
5467#define EOL_SEEN_NONE 0
5468#define EOL_SEEN_LF 1
5469#define EOL_SEEN_CR 2
5470#define EOL_SEEN_CRLF 4
66cfb530 5471
ff0dacd7
KH
5472/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5473 SOURCE is encoded. If CATEGORY is one of
5474 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5475 two-byte, else they are encoded by one-byte.
5476
5477 Return one of EOL_SEEN_XXX. */
4ed46869 5478
bc4bc72a 5479#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5480
5481static int
89528eb3 5482detect_eol (source, src_bytes, category)
f6cbaf43 5483 const unsigned char *source;
df7492f9 5484 EMACS_INT src_bytes;
89528eb3 5485 enum coding_category category;
4ed46869 5486{
f6cbaf43 5487 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5488 unsigned char c;
df7492f9
KH
5489 int total = 0;
5490 int eol_seen = EOL_SEEN_NONE;
4ed46869 5491
89528eb3 5492 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5493 {
df7492f9 5494 int msb, lsb;
fa42c37f 5495
89528eb3
KH
5496 msb = category == (coding_category_utf_16_le
5497 | coding_category_utf_16_le_nosig);
df7492f9 5498 lsb = 1 - msb;
fa42c37f 5499
df7492f9 5500 while (src + 1 < src_end)
fa42c37f 5501 {
df7492f9
KH
5502 c = src[lsb];
5503 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5504 {
df7492f9
KH
5505 int this_eol;
5506
5507 if (c == '\n')
5508 this_eol = EOL_SEEN_LF;
5509 else if (src + 3 >= src_end
5510 || src[msb + 2] != 0
5511 || src[lsb + 2] != '\n')
5512 this_eol = EOL_SEEN_CR;
fa42c37f 5513 else
8f924df7 5514 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5515
5516 if (eol_seen == EOL_SEEN_NONE)
5517 /* This is the first end-of-line. */
5518 eol_seen = this_eol;
5519 else if (eol_seen != this_eol)
fa42c37f 5520 {
df7492f9
KH
5521 /* The found type is different from what found before. */
5522 eol_seen = EOL_SEEN_LF;
5523 break;
fa42c37f 5524 }
df7492f9
KH
5525 if (++total == MAX_EOL_CHECK_COUNT)
5526 break;
fa42c37f 5527 }
df7492f9 5528 src += 2;
fa42c37f 5529 }
bcf26d6a 5530 }
d46c5b12 5531 else
c4825358 5532 {
df7492f9 5533 while (src < src_end)
27901516 5534 {
df7492f9
KH
5535 c = *src++;
5536 if (c == '\n' || c == '\r')
5537 {
5538 int this_eol;
d46c5b12 5539
df7492f9
KH
5540 if (c == '\n')
5541 this_eol = EOL_SEEN_LF;
5542 else if (src >= src_end || *src != '\n')
5543 this_eol = EOL_SEEN_CR;
5544 else
5545 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5546
df7492f9
KH
5547 if (eol_seen == EOL_SEEN_NONE)
5548 /* This is the first end-of-line. */
5549 eol_seen = this_eol;
5550 else if (eol_seen != this_eol)
5551 {
5552 /* The found type is different from what found before. */
5553 eol_seen = EOL_SEEN_LF;
5554 break;
5555 }
5556 if (++total == MAX_EOL_CHECK_COUNT)
5557 break;
5558 }
5559 }
73be902c 5560 }
df7492f9 5561 return eol_seen;
73be902c
KH
5562}
5563
df7492f9 5564
24a73b0a 5565static Lisp_Object
df7492f9
KH
5566adjust_coding_eol_type (coding, eol_seen)
5567 struct coding_system *coding;
5568 int eol_seen;
73be902c 5569{
0be8721c 5570 Lisp_Object eol_type;
8f924df7 5571
df7492f9
KH
5572 eol_type = CODING_ID_EOL_TYPE (coding->id);
5573 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5574 {
5575 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5576 eol_type = Qunix;
5577 }
6f197c07 5578 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5579 {
5580 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5581 eol_type = Qdos;
5582 }
6f197c07 5583 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5584 {
5585 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5586 eol_type = Qmac;
5587 }
5588 return eol_type;
d46c5b12 5589}
4ed46869 5590
df7492f9
KH
5591/* Detect how a text specified in CODING is encoded. If a coding
5592 system is detected, update fields of CODING by the detected coding
5593 system. */
0a28aafb 5594
df7492f9
KH
5595void
5596detect_coding (coding)
d46c5b12 5597 struct coding_system *coding;
d46c5b12 5598{
8f924df7 5599 const unsigned char *src, *src_end;
d46c5b12 5600
df7492f9
KH
5601 coding->consumed = coding->consumed_char = 0;
5602 coding->produced = coding->produced_char = 0;
5603 coding_set_source (coding);
1c3478b0 5604
df7492f9 5605 src_end = coding->source + coding->src_bytes;
1c3478b0 5606
df7492f9
KH
5607 /* If we have not yet decided the text encoding type, detect it
5608 now. */
5609 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5610 {
df7492f9 5611 int c, i;
6cb21a4f 5612 struct coding_detection_info detect_info;
df7492f9 5613
6cb21a4f 5614 detect_info.checked = detect_info.found = detect_info.rejected = 0;
24a73b0a 5615 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5616 {
df7492f9 5617 c = *src;
6cb21a4f 5618 if (c & 0x80)
df7492f9 5619 break;
6cb21a4f
KH
5620 if (c < 0x20
5621 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5622 && ! inhibit_iso_escape_detection
5623 && ! detect_info.checked)
5624 {
5625 coding->head_ascii = src - (coding->source + coding->consumed);
5626 if (detect_coding_iso_2022 (coding, &detect_info))
5627 {
5628 /* We have scanned the whole data. */
5629 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5630 /* We didn't find an 8-bit code. */
5631 src = src_end;
5632 break;
5633 }
5634 }
d46c5b12 5635 }
df7492f9
KH
5636 coding->head_ascii = src - (coding->source + coding->consumed);
5637
3aef54f3 5638 if (coding->head_ascii < coding->src_bytes
6cb21a4f 5639 || detect_info.found)
d46c5b12 5640 {
ff0dacd7
KH
5641 enum coding_category category;
5642 struct coding_system *this;
df7492f9 5643
6cb21a4f
KH
5644 if (coding->head_ascii == coding->src_bytes)
5645 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5646 for (i = 0; i < coding_category_raw_text; i++)
5647 {
5648 category = coding_priorities[i];
5649 this = coding_categories + category;
5650 if (detect_info.found & (1 << category))
24a73b0a 5651 break;
6cb21a4f
KH
5652 }
5653 else
5654 for (i = 0; i < coding_category_raw_text; i++)
5655 {
5656 category = coding_priorities[i];
5657 this = coding_categories + category;
5658 if (this->id < 0)
5659 {
5660 /* No coding system of this category is defined. */
5661 detect_info.rejected |= (1 << category);
5662 }
5663 else if (category >= coding_category_raw_text)
5664 continue;
5665 else if (detect_info.checked & (1 << category))
5666 {
5667 if (detect_info.found & (1 << category))
5668 break;
5669 }
5670 else if ((*(this->detector)) (coding, &detect_info)
5671 && detect_info.found & (1 << category))
5672 {
5673 if (category == coding_category_utf_16_auto)
5674 {
5675 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5676 category = coding_category_utf_16_le;
5677 else
5678 category = coding_category_utf_16_be;
5679 }
5680 break;
5681 }
5682 }
5683
ff0dacd7
KH
5684 if (i < coding_category_raw_text)
5685 setup_coding_system (CODING_ID_NAME (this->id), coding);
5686 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5687 setup_coding_system (Qraw_text, coding);
ff0dacd7 5688 else if (detect_info.rejected)
df7492f9 5689 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5690 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5691 {
5692 this = coding_categories + coding_priorities[i];
5693 setup_coding_system (CODING_ID_NAME (this->id), coding);
5694 break;
5695 }
d46c5b12 5696 }
b73bfc1c 5697 }
24a73b0a
KH
5698 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5699 == coding_category_utf_16_auto)
b49a1807
KH
5700 {
5701 Lisp_Object coding_systems;
5702 struct coding_detection_info detect_info;
5703
5704 coding_systems
5705 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5706 detect_info.found = detect_info.rejected = 0;
5707 if (CONSP (coding_systems)
24a73b0a 5708 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5709 {
5710 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5711 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5712 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5713 setup_coding_system (XCDR (coding_systems), coding);
5714 }
5715 }
4ed46869 5716}
4ed46869 5717
d46c5b12 5718
aaaf0b1e 5719static void
df7492f9 5720decode_eol (coding)
aaaf0b1e 5721 struct coding_system *coding;
aaaf0b1e 5722{
24a73b0a
KH
5723 Lisp_Object eol_type;
5724 unsigned char *p, *pbeg, *pend;
5725
5726 eol_type = CODING_ID_EOL_TYPE (coding->id);
5727 if (EQ (eol_type, Qunix))
5728 return;
5729
5730 if (NILP (coding->dst_object))
5731 pbeg = coding->destination;
5732 else
5733 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5734 pend = pbeg + coding->produced;
5735
5736 if (VECTORP (eol_type))
aaaf0b1e 5737 {
df7492f9 5738 int eol_seen = EOL_SEEN_NONE;
4ed46869 5739
24a73b0a 5740 for (p = pbeg; p < pend; p++)
aaaf0b1e 5741 {
df7492f9
KH
5742 if (*p == '\n')
5743 eol_seen |= EOL_SEEN_LF;
5744 else if (*p == '\r')
aaaf0b1e 5745 {
df7492f9 5746 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5747 {
df7492f9
KH
5748 eol_seen |= EOL_SEEN_CRLF;
5749 p++;
aaaf0b1e 5750 }
aaaf0b1e 5751 else
df7492f9 5752 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5753 }
aaaf0b1e 5754 }
24a73b0a
KH
5755 if (eol_seen != EOL_SEEN_NONE
5756 && eol_seen != EOL_SEEN_LF
5757 && eol_seen != EOL_SEEN_CRLF
5758 && eol_seen != EOL_SEEN_CR)
5759 eol_seen = EOL_SEEN_LF;
df7492f9 5760 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5761 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5762 }
d46c5b12 5763
24a73b0a 5764 if (EQ (eol_type, Qmac))
27901516 5765 {
24a73b0a 5766 for (p = pbeg; p < pend; p++)
df7492f9
KH
5767 if (*p == '\r')
5768 *p = '\n';
4ed46869 5769 }
24a73b0a 5770 else if (EQ (eol_type, Qdos))
df7492f9 5771 {
24a73b0a 5772 int n = 0;
b73bfc1c 5773
24a73b0a
KH
5774 if (NILP (coding->dst_object))
5775 {
4347441b
KH
5776 /* Start deleting '\r' from the tail to minimize the memory
5777 movement. */
24a73b0a
KH
5778 for (p = pend - 2; p >= pbeg; p--)
5779 if (*p == '\r')
5780 {
5781 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5782 n++;
5783 }
5784 }
5785 else
5786 {
4347441b
KH
5787 int pos_byte = coding->dst_pos_byte;
5788 int pos = coding->dst_pos;
5789 int pos_end = pos + coding->produced_char - 1;
5790
5791 while (pos < pos_end)
5792 {
5793 p = BYTE_POS_ADDR (pos_byte);
5794 if (*p == '\r' && p[1] == '\n')
5795 {
5796 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5797 n++;
5798 pos_end--;
5799 }
5800 pos++;
69b8522d
KH
5801 if (coding->dst_multibyte)
5802 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5803 else
5804 pos_byte++;
4347441b 5805 }
24a73b0a
KH
5806 }
5807 coding->produced -= n;
5808 coding->produced_char -= n;
aaaf0b1e 5809 }
4ed46869
KH
5810}
5811
7d64c6ad 5812
a6f87d34
KH
5813/* Return a translation table (or list of them) from coding system
5814 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5815 decoding (ENCODEP is zero). */
7d64c6ad 5816
e6a54062 5817static Lisp_Object
09ee6fdd
KH
5818get_translation_table (attrs, encodep, max_lookup)
5819 Lisp_Object attrs;
5820 int encodep, *max_lookup;
7d64c6ad
KH
5821{
5822 Lisp_Object standard, translation_table;
09ee6fdd 5823 Lisp_Object val;
7d64c6ad
KH
5824
5825 if (encodep)
5826 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5827 standard = Vstandard_translation_table_for_encode;
5828 else
5829 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5830 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5831 if (NILP (translation_table))
09ee6fdd
KH
5832 translation_table = standard;
5833 else
a6f87d34 5834 {
09ee6fdd
KH
5835 if (SYMBOLP (translation_table))
5836 translation_table = Fget (translation_table, Qtranslation_table);
5837 else if (CONSP (translation_table))
5838 {
5839 translation_table = Fcopy_sequence (translation_table);
5840 for (val = translation_table; CONSP (val); val = XCDR (val))
5841 if (SYMBOLP (XCAR (val)))
5842 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5843 }
5844 if (CHAR_TABLE_P (standard))
5845 {
5846 if (CONSP (translation_table))
5847 translation_table = nconc2 (translation_table,
5848 Fcons (standard, Qnil));
5849 else
5850 translation_table = Fcons (translation_table,
5851 Fcons (standard, Qnil));
5852 }
a6f87d34 5853 }
2170c8f0
KH
5854
5855 if (max_lookup)
09ee6fdd 5856 {
2170c8f0
KH
5857 *max_lookup = 1;
5858 if (CHAR_TABLE_P (translation_table)
5859 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5860 {
5861 val = XCHAR_TABLE (translation_table)->extras[1];
5862 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5863 *max_lookup = XFASTINT (val);
5864 }
5865 else if (CONSP (translation_table))
5866 {
5867 Lisp_Object tail, val;
09ee6fdd 5868
2170c8f0
KH
5869 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5870 if (CHAR_TABLE_P (XCAR (tail))
5871 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5872 {
5873 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5874 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5875 *max_lookup = XFASTINT (val);
5876 }
5877 }
a6f87d34 5878 }
7d64c6ad
KH
5879 return translation_table;
5880}
5881
09ee6fdd
KH
5882#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5883 do { \
5884 trans = Qnil; \
5885 if (CHAR_TABLE_P (table)) \
5886 { \
5887 trans = CHAR_TABLE_REF (table, c); \
5888 if (CHARACTERP (trans)) \
5889 c = XFASTINT (trans), trans = Qnil; \
5890 } \
5891 else if (CONSP (table)) \
5892 { \
5893 Lisp_Object tail; \
5894 \
5895 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5896 if (CHAR_TABLE_P (XCAR (tail))) \
5897 { \
5898 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5899 if (CHARACTERP (trans)) \
5900 c = XFASTINT (trans), trans = Qnil; \
5901 else if (! NILP (trans)) \
5902 break; \
5903 } \
5904 } \
e6a54062
KH
5905 } while (0)
5906
7d64c6ad 5907
69a80ea3
KH
5908static Lisp_Object
5909get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5910 Lisp_Object val;
5911 int *buf, *buf_end;
5912 int last_block;
5913 int *from_nchars, *to_nchars;
5914{
433f7f87
KH
5915 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5916 [TO-CHAR ...]. */
69a80ea3
KH
5917 if (CONSP (val))
5918 {
433f7f87 5919 Lisp_Object from, tail;
69a80ea3
KH
5920 int i, len;
5921
433f7f87 5922 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5923 {
433f7f87
KH
5924 val = XCAR (tail);
5925 from = XCAR (val);
5926 len = ASIZE (from);
5927 for (i = 0; i < len; i++)
5928 {
5929 if (buf + i == buf_end)
5930 {
5931 if (! last_block)
5932 return Qt;
5933 break;
5934 }
5935 if (XINT (AREF (from, i)) != buf[i])
5936 break;
5937 }
5938 if (i == len)
5939 {
5940 val = XCDR (val);
5941 *from_nchars = len;
5942 break;
5943 }
69a80ea3 5944 }
433f7f87
KH
5945 if (! CONSP (tail))
5946 return Qnil;
69a80ea3
KH
5947 }
5948 if (VECTORP (val))
5949 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5950 else
5951 *buf = XINT (val);
5952 return val;
5953}
5954
5955
d46c5b12 5956static int
69a80ea3 5957produce_chars (coding, translation_table, last_block)
df7492f9 5958 struct coding_system *coding;
69a80ea3
KH
5959 Lisp_Object translation_table;
5960 int last_block;
4ed46869 5961{
df7492f9
KH
5962 unsigned char *dst = coding->destination + coding->produced;
5963 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
5964 EMACS_INT produced;
5965 EMACS_INT produced_chars = 0;
69a80ea3 5966 int carryover = 0;
4ed46869 5967
df7492f9 5968 if (! coding->chars_at_source)
4ed46869 5969 {
119852e7 5970 /* Source characters are in coding->charbuf. */
fba4576f
AS
5971 int *buf = coding->charbuf;
5972 int *buf_end = buf + coding->charbuf_used;
4ed46869 5973
df7492f9
KH
5974 if (BUFFERP (coding->src_object)
5975 && EQ (coding->src_object, coding->dst_object))
8f924df7 5976 dst_end = ((unsigned char *) coding->source) + coding->consumed;
4ed46869 5977
df7492f9 5978 while (buf < buf_end)
4ed46869 5979 {
69a80ea3 5980 int c = *buf, i;
bc4bc72a 5981
df7492f9
KH
5982 if (c >= 0)
5983 {
69a80ea3
KH
5984 int from_nchars = 1, to_nchars = 1;
5985 Lisp_Object trans = Qnil;
5986
09ee6fdd 5987 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 5988 if (! NILP (trans))
69a80ea3
KH
5989 {
5990 trans = get_translation (trans, buf, buf_end, last_block,
5991 &from_nchars, &to_nchars);
5992 if (EQ (trans, Qt))
5993 break;
5994 c = *buf;
5995 }
5996
5997 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5998 {
5999 dst = alloc_destination (coding,
6000 buf_end - buf
6001 + MAX_MULTIBYTE_LENGTH * to_nchars,
6002 dst);
6003 dst_end = coding->destination + coding->dst_bytes;
6004 }
6005
433f7f87 6006 for (i = 0; i < to_nchars; i++)
69a80ea3 6007 {
433f7f87
KH
6008 if (i > 0)
6009 c = XINT (AREF (trans, i));
69a80ea3
KH
6010 if (coding->dst_multibyte
6011 || ! CHAR_BYTE8_P (c))
6012 CHAR_STRING_ADVANCE (c, dst);
6013 else
6014 *dst++ = CHAR_TO_BYTE8 (c);
6015 }
6016 produced_chars += to_nchars;
6017 *buf++ = to_nchars;
6018 while (--from_nchars > 0)
6019 *buf++ = 0;
d46c5b12 6020 }
df7492f9 6021 else
69a80ea3
KH
6022 /* This is an annotation datum. (-C) is the length. */
6023 buf += -c;
4ed46869 6024 }
69a80ea3 6025 carryover = buf_end - buf;
4ed46869 6026 }
fa42c37f 6027 else
fa42c37f 6028 {
119852e7 6029 /* Source characters are at coding->source. */
8f924df7 6030 const unsigned char *src = coding->source;
119852e7 6031 const unsigned char *src_end = src + coding->consumed;
4ed46869 6032
df7492f9 6033 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6034 {
df7492f9 6035 if (coding->src_multibyte)
fa42c37f 6036 {
71c81426 6037 int multibytep = 1;
119852e7 6038 EMACS_INT consumed_chars;
d46c5b12 6039
df7492f9
KH
6040 while (1)
6041 {
8f924df7 6042 const unsigned char *src_base = src;
df7492f9 6043 int c;
b73bfc1c 6044
df7492f9 6045 ONE_MORE_BYTE (c);
119852e7 6046 if (dst == dst_end)
df7492f9 6047 {
119852e7
KH
6048 if (EQ (coding->src_object, coding->dst_object))
6049 dst_end = (unsigned char *) src;
6050 if (dst == dst_end)
df7492f9 6051 {
119852e7
KH
6052 EMACS_INT offset = src - coding->source;
6053
6054 dst = alloc_destination (coding, src_end - src + 1,
6055 dst);
6056 dst_end = coding->destination + coding->dst_bytes;
6057 coding_set_source (coding);
6058 src = coding->source + offset;
6059 src_end = coding->source + coding->src_bytes;
df7492f9 6060 }
df7492f9
KH
6061 }
6062 *dst++ = c;
6063 produced_chars++;
6064 }
6065 no_more_source:
6066 ;
fa42c37f
KH
6067 }
6068 else
df7492f9
KH
6069 while (src < src_end)
6070 {
71c81426 6071 int multibytep = 1;
df7492f9 6072 int c = *src++;
b73bfc1c 6073
df7492f9
KH
6074 if (dst >= dst_end - 1)
6075 {
2c78b7e1 6076 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6077 dst_end = (unsigned char *) src;
2c78b7e1
KH
6078 if (dst >= dst_end - 1)
6079 {
119852e7
KH
6080 EMACS_INT offset = src - coding->source;
6081
2c78b7e1
KH
6082 dst = alloc_destination (coding, src_end - src + 2,
6083 dst);
6084 dst_end = coding->destination + coding->dst_bytes;
6085 coding_set_source (coding);
119852e7 6086 src = coding->source + offset;
2c78b7e1
KH
6087 src_end = coding->source + coding->src_bytes;
6088 }
df7492f9
KH
6089 }
6090 EMIT_ONE_BYTE (c);
6091 }
d46c5b12 6092 }
df7492f9
KH
6093 else
6094 {
6095 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6096 {
119852e7 6097 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6098
df7492f9 6099 if (require > 0)
fa42c37f 6100 {
df7492f9
KH
6101 EMACS_INT offset = src - coding->source;
6102
6103 dst = alloc_destination (coding, require, dst);
6104 coding_set_source (coding);
6105 src = coding->source + offset;
6106 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6107 }
6108 }
119852e7 6109 produced_chars = coding->consumed_char;
df7492f9 6110 while (src < src_end)
14daee73 6111 *dst++ = *src++;
fa42c37f
KH
6112 }
6113 }
6114
df7492f9 6115 produced = dst - (coding->destination + coding->produced);
284201e4 6116 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6117 insert_from_gap (produced_chars, produced);
6118 coding->produced += produced;
6119 coding->produced_char += produced_chars;
69a80ea3 6120 return carryover;
fa42c37f
KH
6121}
6122
ff0dacd7
KH
6123/* Compose text in CODING->object according to the annotation data at
6124 CHARBUF. CHARBUF is an array:
6125 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 6126 */
4ed46869 6127
df7492f9 6128static INLINE void
69a80ea3 6129produce_composition (coding, charbuf, pos)
4ed46869 6130 struct coding_system *coding;
df7492f9 6131 int *charbuf;
69a80ea3 6132 EMACS_INT pos;
4ed46869 6133{
df7492f9 6134 int len;
69a80ea3 6135 EMACS_INT to;
df7492f9 6136 enum composition_method method;
df7492f9 6137 Lisp_Object components;
fa42c37f 6138
df7492f9 6139 len = -charbuf[0];
69a80ea3 6140 to = pos + charbuf[2];
9ffd559c
KH
6141 if (to <= pos)
6142 return;
69a80ea3 6143 method = (enum composition_method) (charbuf[3]);
d46c5b12 6144
df7492f9
KH
6145 if (method == COMPOSITION_RELATIVE)
6146 components = Qnil;
9ffd559c
KH
6147 else if (method >= COMPOSITION_WITH_RULE
6148 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6149 {
df7492f9
KH
6150 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6151 int i;
b73bfc1c 6152
69a80ea3
KH
6153 len -= 4;
6154 charbuf += 4;
df7492f9 6155 for (i = 0; i < len; i++)
9ffd559c
KH
6156 {
6157 args[i] = make_number (charbuf[i]);
f75c90a9 6158 if (charbuf[i] < 0)
9ffd559c
KH
6159 return;
6160 }
df7492f9
KH
6161 components = (method == COMPOSITION_WITH_ALTCHARS
6162 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6163 }
9ffd559c
KH
6164 else
6165 return;
69a80ea3 6166 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6167}
6168
d46c5b12 6169
ff0dacd7
KH
6170/* Put `charset' property on text in CODING->object according to
6171 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6172 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6173 */
d46c5b12 6174
ff0dacd7 6175static INLINE void
69a80ea3 6176produce_charset (coding, charbuf, pos)
d46c5b12 6177 struct coding_system *coding;
ff0dacd7 6178 int *charbuf;
69a80ea3 6179 EMACS_INT pos;
d46c5b12 6180{
69a80ea3
KH
6181 EMACS_INT from = pos - charbuf[2];
6182 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6183
69a80ea3 6184 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6185 Qcharset, CHARSET_NAME (charset),
6186 coding->dst_object);
d46c5b12
KH
6187}
6188
d46c5b12 6189
df7492f9
KH
6190#define CHARBUF_SIZE 0x4000
6191
6192#define ALLOC_CONVERSION_WORK_AREA(coding) \
6193 do { \
6194 int size = CHARBUF_SIZE;; \
6195 \
6196 coding->charbuf = NULL; \
6197 while (size > 1024) \
6198 { \
6199 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6200 if (coding->charbuf) \
6201 break; \
6202 size >>= 1; \
6203 } \
6204 if (! coding->charbuf) \
6205 { \
065e3595 6206 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6207 return coding->result; \
6208 } \
6209 coding->charbuf_size = size; \
6210 } while (0)
4ed46869 6211
d46c5b12
KH
6212
6213static void
69a80ea3 6214produce_annotation (coding, pos)
d46c5b12 6215 struct coding_system *coding;
69a80ea3 6216 EMACS_INT pos;
d46c5b12 6217{
df7492f9
KH
6218 int *charbuf = coding->charbuf;
6219 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6220
ff0dacd7
KH
6221 if (NILP (coding->dst_object))
6222 return;
d46c5b12 6223
df7492f9 6224 while (charbuf < charbuf_end)
a84f1519 6225 {
df7492f9 6226 if (*charbuf >= 0)
69a80ea3 6227 pos += *charbuf++;
d46c5b12 6228 else
d46c5b12 6229 {
df7492f9 6230 int len = -*charbuf;
ff0dacd7 6231 switch (charbuf[1])
df7492f9
KH
6232 {
6233 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6234 produce_composition (coding, charbuf, pos);
df7492f9 6235 break;
ff0dacd7 6236 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6237 produce_charset (coding, charbuf, pos);
ff0dacd7 6238 break;
df7492f9
KH
6239 default:
6240 abort ();
6241 }
6242 charbuf += len;
d46c5b12 6243 }
a84f1519 6244 }
d46c5b12
KH
6245}
6246
df7492f9
KH
6247/* Decode the data at CODING->src_object into CODING->dst_object.
6248 CODING->src_object is a buffer, a string, or nil.
6249 CODING->dst_object is a buffer.
d46c5b12 6250
df7492f9
KH
6251 If CODING->src_object is a buffer, it must be the current buffer.
6252 In this case, if CODING->src_pos is positive, it is a position of
6253 the source text in the buffer, otherwise, the source text is in the
6254 gap area of the buffer, and CODING->src_pos specifies the offset of
6255 the text from GPT (which must be the same as PT). If this is the
6256 same buffer as CODING->dst_object, CODING->src_pos must be
6257 negative.
d46c5b12 6258
b6828792 6259 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 6260 that string.
d46c5b12 6261
df7492f9
KH
6262 If CODING->src_object is nil, CODING->source must already point to
6263 the non-relocatable memory area. In this case, CODING->src_pos is
6264 an offset from CODING->source.
73be902c 6265
df7492f9
KH
6266 The decoded data is inserted at the current point of the buffer
6267 CODING->dst_object.
6268*/
d46c5b12 6269
df7492f9
KH
6270static int
6271decode_coding (coding)
d46c5b12 6272 struct coding_system *coding;
d46c5b12 6273{
df7492f9 6274 Lisp_Object attrs;
24a73b0a 6275 Lisp_Object undo_list;
7d64c6ad 6276 Lisp_Object translation_table;
69a80ea3
KH
6277 int carryover;
6278 int i;
d46c5b12 6279
df7492f9
KH
6280 if (BUFFERP (coding->src_object)
6281 && coding->src_pos > 0
6282 && coding->src_pos < GPT
6283 && coding->src_pos + coding->src_chars > GPT)
6284 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6285
24a73b0a 6286 undo_list = Qt;
df7492f9 6287 if (BUFFERP (coding->dst_object))
1c3478b0 6288 {
df7492f9
KH
6289 if (current_buffer != XBUFFER (coding->dst_object))
6290 set_buffer_internal (XBUFFER (coding->dst_object));
6291 if (GPT != PT)
6292 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6293 undo_list = current_buffer->undo_list;
6294 current_buffer->undo_list = Qt;
1c3478b0
KH
6295 }
6296
df7492f9
KH
6297 coding->consumed = coding->consumed_char = 0;
6298 coding->produced = coding->produced_char = 0;
6299 coding->chars_at_source = 0;
065e3595 6300 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6301 coding->errors = 0;
1c3478b0 6302
df7492f9
KH
6303 ALLOC_CONVERSION_WORK_AREA (coding);
6304
6305 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6306 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6307
69a80ea3 6308 carryover = 0;
df7492f9 6309 do
b73bfc1c 6310 {
69a80ea3
KH
6311 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6312
df7492f9
KH
6313 coding_set_source (coding);
6314 coding->annotated = 0;
69a80ea3 6315 coding->charbuf_used = carryover;
df7492f9 6316 (*(coding->decoder)) (coding);
df7492f9 6317 coding_set_destination (coding);
69a80ea3 6318 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6319 if (coding->annotated)
69a80ea3
KH
6320 produce_annotation (coding, pos);
6321 for (i = 0; i < carryover; i++)
6322 coding->charbuf[i]
6323 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6324 }
df7492f9 6325 while (coding->consumed < coding->src_bytes
54b367bb
KH
6326 && (coding->result == CODING_RESULT_SUCCESS
6327 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 6328
69a80ea3
KH
6329 if (carryover > 0)
6330 {
6331 coding_set_destination (coding);
6332 coding->charbuf_used = carryover;
6333 produce_chars (coding, translation_table, 1);
6334 }
6335
df7492f9
KH
6336 coding->carryover_bytes = 0;
6337 if (coding->consumed < coding->src_bytes)
d46c5b12 6338 {
df7492f9 6339 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6340 const unsigned char *src;
df7492f9
KH
6341
6342 coding_set_source (coding);
6343 coding_set_destination (coding);
6344 src = coding->source + coding->consumed;
6345
6346 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6347 {
df7492f9
KH
6348 /* Flush out unprocessed data as binary chars. We are sure
6349 that the number of data is less than the size of
6350 coding->charbuf. */
065e3595 6351 coding->charbuf_used = 0;
df7492f9 6352 while (nbytes-- > 0)
1c3478b0 6353 {
df7492f9 6354 int c = *src++;
98725083 6355
1c91457d
KH
6356 if (c & 0x80)
6357 c = BYTE8_TO_CHAR (c);
6358 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6359 }
f6cbaf43 6360 produce_chars (coding, Qnil, 1);
d46c5b12 6361 }
d46c5b12 6362 else
df7492f9
KH
6363 {
6364 /* Record unprocessed bytes in coding->carryover. We are
6365 sure that the number of data is less than the size of
6366 coding->carryover. */
6367 unsigned char *p = coding->carryover;
6368
6369 coding->carryover_bytes = nbytes;
6370 while (nbytes-- > 0)
6371 *p++ = *src++;
1c3478b0 6372 }
df7492f9 6373 coding->consumed = coding->src_bytes;
b73bfc1c 6374 }
69f76525 6375
4347441b
KH
6376 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6377 decode_eol (coding);
24a73b0a
KH
6378 if (BUFFERP (coding->dst_object))
6379 {
6380 current_buffer->undo_list = undo_list;
6381 record_insert (coding->dst_pos, coding->produced_char);
6382 }
73be902c 6383 return coding->result;
4ed46869
KH
6384}
6385
aaaf0b1e 6386
e1c23804 6387/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6388 ending before LIMIT of CODING->src_object (buffer or string), store
6389 the data in BUF, set *STOP to a starting position of the next
6390 composition (if any) or to LIMIT, and return the address of the
6391 next element of BUF.
6392
6393 If such an annotation is not found, set *STOP to a starting
6394 position of a composition after POS (if any) or to LIMIT, and
6395 return BUF. */
6396
6397static INLINE int *
6398handle_composition_annotation (pos, limit, coding, buf, stop)
6399 EMACS_INT pos, limit;
aaaf0b1e 6400 struct coding_system *coding;
ff0dacd7
KH
6401 int *buf;
6402 EMACS_INT *stop;
aaaf0b1e 6403{
ff0dacd7
KH
6404 EMACS_INT start, end;
6405 Lisp_Object prop;
aaaf0b1e 6406
ff0dacd7
KH
6407 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6408 || end > limit)
6409 *stop = limit;
6410 else if (start > pos)
6411 *stop = start;
6412 else
aaaf0b1e 6413 {
ff0dacd7 6414 if (start == pos)
aaaf0b1e 6415 {
ff0dacd7
KH
6416 /* We found a composition. Store the corresponding
6417 annotation data in BUF. */
6418 int *head = buf;
6419 enum composition_method method = COMPOSITION_METHOD (prop);
6420 int nchars = COMPOSITION_LENGTH (prop);
6421
69a80ea3 6422 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6423 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6424 {
ff0dacd7
KH
6425 Lisp_Object components;
6426 int len, i, i_byte;
6427
6428 components = COMPOSITION_COMPONENTS (prop);
6429 if (VECTORP (components))
aaaf0b1e 6430 {
ff0dacd7
KH
6431 len = XVECTOR (components)->size;
6432 for (i = 0; i < len; i++)
6433 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6434 }
ff0dacd7 6435 else if (STRINGP (components))
aaaf0b1e 6436 {
8f924df7 6437 len = SCHARS (components);
ff0dacd7
KH
6438 i = i_byte = 0;
6439 while (i < len)
6440 {
6441 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6442 buf++;
6443 }
6444 }
6445 else if (INTEGERP (components))
6446 {
6447 len = 1;
6448 *buf++ = XINT (components);
6449 }
6450 else if (CONSP (components))
6451 {
6452 for (len = 0; CONSP (components);
6453 len++, components = XCDR (components))
6454 *buf++ = XINT (XCAR (components));
aaaf0b1e 6455 }
aaaf0b1e 6456 else
ff0dacd7
KH
6457 abort ();
6458 *head -= len;
aaaf0b1e 6459 }
aaaf0b1e 6460 }
ff0dacd7
KH
6461
6462 if (find_composition (end, limit, &start, &end, &prop,
6463 coding->src_object)
6464 && end <= limit)
6465 *stop = start;
6466 else
6467 *stop = limit;
aaaf0b1e 6468 }
ff0dacd7
KH
6469 return buf;
6470}
6471
6472
e1c23804 6473/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6474 CODING->src_object (buffer of string), store the data in BUF, set
6475 *STOP to the position where the value of `charset' property changes
6476 (limiting by LIMIT), and return the address of the next element of
6477 BUF.
6478
6479 If the property value is nil, set *STOP to the position where the
6480 property value is non-nil (limiting by LIMIT), and return BUF. */
6481
6482static INLINE int *
6483handle_charset_annotation (pos, limit, coding, buf, stop)
6484 EMACS_INT pos, limit;
6485 struct coding_system *coding;
6486 int *buf;
6487 EMACS_INT *stop;
6488{
6489 Lisp_Object val, next;
6490 int id;
6491
6492 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6493 if (! NILP (val) && CHARSETP (val))
6494 id = XINT (CHARSET_SYMBOL_ID (val));
6495 else
6496 id = -1;
69a80ea3 6497 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6498 next = Fnext_single_property_change (make_number (pos), Qcharset,
6499 coding->src_object,
6500 make_number (limit));
6501 *stop = XINT (next);
6502 return buf;
6503}
6504
6505
df7492f9 6506static void
09ee6fdd 6507consume_chars (coding, translation_table, max_lookup)
df7492f9 6508 struct coding_system *coding;
433f7f87 6509 Lisp_Object translation_table;
09ee6fdd 6510 int max_lookup;
df7492f9
KH
6511{
6512 int *buf = coding->charbuf;
ff0dacd7 6513 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6514 const unsigned char *src = coding->source + coding->consumed;
4776e638 6515 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6516 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6517 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6518 int multibytep = coding->src_multibyte;
6519 Lisp_Object eol_type;
6520 int c;
ff0dacd7 6521 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6522 int *lookup_buf = NULL;
433f7f87
KH
6523
6524 if (! NILP (translation_table))
09ee6fdd 6525 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6526
df7492f9
KH
6527 eol_type = CODING_ID_EOL_TYPE (coding->id);
6528 if (VECTORP (eol_type))
6529 eol_type = Qunix;
88993dfd 6530
df7492f9
KH
6531 /* Note: composition handling is not yet implemented. */
6532 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6533
0b5670c9
KH
6534 if (NILP (coding->src_object))
6535 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6536 else
0b5670c9
KH
6537 {
6538 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6539 stop = stop_composition = pos;
6540 else
6541 stop = stop_composition = end_pos;
6542 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6543 stop = stop_charset = pos;
6544 else
6545 stop_charset = end_pos;
6546 }
ec6d2bb8 6547
24a73b0a 6548 /* Compensate for CRLF and conversion. */
ff0dacd7 6549 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6550 while (buf < buf_end)
aaaf0b1e 6551 {
433f7f87
KH
6552 Lisp_Object trans;
6553
df7492f9 6554 if (pos == stop)
ec6d2bb8 6555 {
df7492f9
KH
6556 if (pos == end_pos)
6557 break;
ff0dacd7
KH
6558 if (pos == stop_composition)
6559 buf = handle_composition_annotation (pos, end_pos, coding,
6560 buf, &stop_composition);
6561 if (pos == stop_charset)
6562 buf = handle_charset_annotation (pos, end_pos, coding,
6563 buf, &stop_charset);
6564 stop = (stop_composition < stop_charset
6565 ? stop_composition : stop_charset);
df7492f9
KH
6566 }
6567
6568 if (! multibytep)
4776e638 6569 {
d3e4cb56 6570 EMACS_INT bytes;
aaaf0b1e 6571
ea29edf2
KH
6572 if (coding->encoder == encode_coding_raw_text)
6573 c = *src++, pos++;
6574 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6575 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6576 else
f03caae0 6577 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6578 }
df7492f9 6579 else
4776e638 6580 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6581 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6582 c = '\n';
6583 if (! EQ (eol_type, Qunix))
aaaf0b1e 6584 {
df7492f9 6585 if (c == '\n')
aaaf0b1e 6586 {
df7492f9
KH
6587 if (EQ (eol_type, Qdos))
6588 *buf++ = '\r';
6589 else
6590 c = '\r';
aaaf0b1e
KH
6591 }
6592 }
433f7f87 6593
e6a54062 6594 trans = Qnil;
09ee6fdd 6595 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6596 if (NILP (trans))
433f7f87
KH
6597 *buf++ = c;
6598 else
6599 {
6600 int from_nchars = 1, to_nchars = 1;
6601 int *lookup_buf_end;
6602 const unsigned char *p = src;
6603 int i;
6604
6605 lookup_buf[0] = c;
6606 for (i = 1; i < max_lookup && p < src_end; i++)
6607 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6608 lookup_buf_end = lookup_buf + i;
6609 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6610 &from_nchars, &to_nchars);
6611 if (EQ (trans, Qt)
6612 || buf + to_nchars > buf_end)
6613 break;
6614 *buf++ = *lookup_buf;
6615 for (i = 1; i < to_nchars; i++)
6616 *buf++ = XINT (AREF (trans, i));
6617 for (i = 1; i < from_nchars; i++, pos++)
6618 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6619 }
aaaf0b1e 6620 }
ec6d2bb8 6621
df7492f9
KH
6622 coding->consumed = src - coding->source;
6623 coding->consumed_char = pos - coding->src_pos;
6624 coding->charbuf_used = buf - coding->charbuf;
6625 coding->chars_at_source = 0;
aaaf0b1e
KH
6626}
6627
4ed46869 6628
df7492f9
KH
6629/* Encode the text at CODING->src_object into CODING->dst_object.
6630 CODING->src_object is a buffer or a string.
6631 CODING->dst_object is a buffer or nil.
6632
6633 If CODING->src_object is a buffer, it must be the current buffer.
6634 In this case, if CODING->src_pos is positive, it is a position of
6635 the source text in the buffer, otherwise. the source text is in the
6636 gap area of the buffer, and coding->src_pos specifies the offset of
6637 the text from GPT (which must be the same as PT). If this is the
6638 same buffer as CODING->dst_object, CODING->src_pos must be
6639 negative and CODING should not have `pre-write-conversion'.
6640
6641 If CODING->src_object is a string, CODING should not have
6642 `pre-write-conversion'.
6643
6644 If CODING->dst_object is a buffer, the encoded data is inserted at
6645 the current point of that buffer.
6646
6647 If CODING->dst_object is nil, the encoded data is placed at the
6648 memory area specified by CODING->destination. */
6649
6650static int
6651encode_coding (coding)
4ed46869 6652 struct coding_system *coding;
4ed46869 6653{
df7492f9 6654 Lisp_Object attrs;
7d64c6ad 6655 Lisp_Object translation_table;
09ee6fdd 6656 int max_lookup;
9861e777 6657
df7492f9 6658 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
6659 if (coding->encoder == encode_coding_raw_text)
6660 translation_table = Qnil, max_lookup = 0;
6661 else
6662 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6663
df7492f9 6664 if (BUFFERP (coding->dst_object))
8844fa83 6665 {
df7492f9
KH
6666 set_buffer_internal (XBUFFER (coding->dst_object));
6667 coding->dst_multibyte
6668 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6669 }
4ed46869 6670
b73bfc1c 6671 coding->consumed = coding->consumed_char = 0;
df7492f9 6672 coding->produced = coding->produced_char = 0;
065e3595 6673 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6674 coding->errors = 0;
b73bfc1c 6675
df7492f9 6676 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6677
df7492f9
KH
6678 do {
6679 coding_set_source (coding);
09ee6fdd 6680 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6681 coding_set_destination (coding);
6682 (*(coding->encoder)) (coding);
6683 } while (coding->consumed_char < coding->src_chars);
6684
284201e4 6685 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
6686 insert_from_gap (coding->produced_char, coding->produced);
6687
6688 return (coding->result);
ec6d2bb8
KH
6689}
6690
fb88bf2d 6691
24a73b0a
KH
6692/* Name (or base name) of work buffer for code conversion. */
6693static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6694
24a73b0a
KH
6695/* A working buffer used by the top level conversion. Once it is
6696 created, it is never destroyed. It has the name
6697 Vcode_conversion_workbuf_name. The other working buffers are
6698 destroyed after the use is finished, and their names are modified
6699 versions of Vcode_conversion_workbuf_name. */
6700static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6701
24a73b0a
KH
6702/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6703static int reused_workbuf_in_use;
4ed46869 6704
24a73b0a
KH
6705
6706/* Return a working buffer of code convesion. MULTIBYTE specifies the
6707 multibyteness of returning buffer. */
b73bfc1c 6708
f6cbaf43 6709static Lisp_Object
24a73b0a 6710make_conversion_work_buffer (multibyte)
f6cbaf43 6711 int multibyte;
df7492f9 6712{
24a73b0a
KH
6713 Lisp_Object name, workbuf;
6714 struct buffer *current;
4ed46869 6715
24a73b0a 6716 if (reused_workbuf_in_use++)
065e3595
KH
6717 {
6718 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6719 workbuf = Fget_buffer_create (name);
6720 }
df7492f9 6721 else
065e3595
KH
6722 {
6723 name = Vcode_conversion_workbuf_name;
6724 workbuf = Fget_buffer_create (name);
6725 if (NILP (Vcode_conversion_reused_workbuf))
6726 Vcode_conversion_reused_workbuf = workbuf;
6727 }
24a73b0a
KH
6728 current = current_buffer;
6729 set_buffer_internal (XBUFFER (workbuf));
6730 Ferase_buffer ();
df7492f9 6731 current_buffer->undo_list = Qt;
24a73b0a 6732 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6733 set_buffer_internal (current);
24a73b0a 6734 return workbuf;
df7492f9 6735}
d46c5b12 6736
24a73b0a 6737
4776e638 6738static Lisp_Object
24a73b0a
KH
6739code_conversion_restore (arg)
6740 Lisp_Object arg;
4776e638 6741{
24a73b0a 6742 Lisp_Object current, workbuf;
948bdcf3 6743 struct gcpro gcpro1;
24a73b0a 6744
948bdcf3 6745 GCPRO1 (arg);
24a73b0a
KH
6746 current = XCAR (arg);
6747 workbuf = XCDR (arg);
6748 if (! NILP (workbuf))
6749 {
6750 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6751 reused_workbuf_in_use = 0;
6752 else if (! NILP (Fbuffer_live_p (workbuf)))
6753 Fkill_buffer (workbuf);
6754 }
6755 set_buffer_internal (XBUFFER (current));
948bdcf3 6756 UNGCPRO;
4776e638
KH
6757 return Qnil;
6758}
b73bfc1c 6759
24a73b0a
KH
6760Lisp_Object
6761code_conversion_save (with_work_buf, multibyte)
4776e638 6762 int with_work_buf, multibyte;
df7492f9 6763{
24a73b0a 6764 Lisp_Object workbuf = Qnil;
b73bfc1c 6765
4776e638 6766 if (with_work_buf)
24a73b0a
KH
6767 workbuf = make_conversion_work_buffer (multibyte);
6768 record_unwind_protect (code_conversion_restore,
6769 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6770 return workbuf;
df7492f9 6771}
d46c5b12 6772
df7492f9
KH
6773int
6774decode_coding_gap (coding, chars, bytes)
6775 struct coding_system *coding;
6776 EMACS_INT chars, bytes;
6777{
6778 int count = specpdl_ptr - specpdl;
5e5c78be 6779 Lisp_Object attrs;
fb88bf2d 6780
24a73b0a 6781 code_conversion_save (0, 0);
ec6d2bb8 6782
24a73b0a 6783 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6784 coding->src_chars = chars;
6785 coding->src_bytes = bytes;
6786 coding->src_pos = -chars;
6787 coding->src_pos_byte = -bytes;
6788 coding->src_multibyte = chars < bytes;
24a73b0a 6789 coding->dst_object = coding->src_object;
df7492f9
KH
6790 coding->dst_pos = PT;
6791 coding->dst_pos_byte = PT_BYTE;
71c81426 6792 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 6793
df7492f9
KH
6794 if (CODING_REQUIRE_DETECTION (coding))
6795 detect_coding (coding);
8f924df7 6796
9286b333 6797 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 6798 current_buffer->text->inhibit_shrinking = 1;
df7492f9 6799 decode_coding (coding);
287c57d7 6800 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 6801
5e5c78be
KH
6802 attrs = CODING_ID_ATTRS (coding->id);
6803 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6804 {
5e5c78be
KH
6805 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6806 Lisp_Object val;
6807
6808 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6809 val = call1 (CODING_ATTR_POST_READ (attrs),
6810 make_number (coding->produced_char));
5e5c78be
KH
6811 CHECK_NATNUM (val);
6812 coding->produced_char += Z - prev_Z;
6813 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6814 }
4ed46869 6815
df7492f9 6816 unbind_to (count, Qnil);
b73bfc1c
KH
6817 return coding->result;
6818}
52d41803 6819
4ed46869 6820int
df7492f9 6821encode_coding_gap (coding, chars, bytes)
4ed46869 6822 struct coding_system *coding;
df7492f9 6823 EMACS_INT chars, bytes;
4ed46869 6824{
df7492f9 6825 int count = specpdl_ptr - specpdl;
4ed46869 6826
24a73b0a 6827 code_conversion_save (0, 0);
4ed46869 6828
24a73b0a 6829 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6830 coding->src_chars = chars;
6831 coding->src_bytes = bytes;
6832 coding->src_pos = -chars;
6833 coding->src_pos_byte = -bytes;
6834 coding->src_multibyte = chars < bytes;
6835 coding->dst_object = coding->src_object;
6836 coding->dst_pos = PT;
6837 coding->dst_pos_byte = PT_BYTE;
4ed46869 6838
df7492f9 6839 encode_coding (coding);
b73bfc1c 6840
df7492f9
KH
6841 unbind_to (count, Qnil);
6842 return coding->result;
6843}
4ed46869 6844
d46c5b12 6845
df7492f9
KH
6846/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6847 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6848
df7492f9 6849 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6850
df7492f9
KH
6851 If it is a buffer, the text is at point of the buffer. FROM and TO
6852 are positions in the buffer.
b73bfc1c 6853
df7492f9
KH
6854 If it is a string, the text is at the beginning of the string.
6855 FROM and TO are indices to the string.
4ed46869 6856
df7492f9
KH
6857 If it is nil, the text is at coding->source. FROM and TO are
6858 indices to coding->source.
bb10be8b 6859
df7492f9 6860 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6861
df7492f9
KH
6862 If it is a buffer, the decoded text is inserted at point of the
6863 buffer. If the buffer is the same as SRC_OBJECT, the source text
6864 is deleted.
4ed46869 6865
df7492f9
KH
6866 If it is Qt, a string is made from the decoded text, and
6867 set in CODING->dst_object.
d46c5b12 6868
df7492f9 6869 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6870 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6871 CODING->destination by xmalloc. If the decoded text is longer than
6872 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6873 */
d46c5b12 6874
df7492f9
KH
6875void
6876decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6877 dst_object)
d46c5b12 6878 struct coding_system *coding;
df7492f9
KH
6879 Lisp_Object src_object;
6880 EMACS_INT from, from_byte, to, to_byte;
6881 Lisp_Object dst_object;
d46c5b12 6882{
df7492f9
KH
6883 int count = specpdl_ptr - specpdl;
6884 unsigned char *destination;
6885 EMACS_INT dst_bytes;
6886 EMACS_INT chars = to - from;
6887 EMACS_INT bytes = to_byte - from_byte;
6888 Lisp_Object attrs;
4776e638 6889 int saved_pt = -1, saved_pt_byte;
64cedb0c 6890 int need_marker_adjustment = 0;
b3bfad50 6891 Lisp_Object old_deactivate_mark;
d46c5b12 6892
b3bfad50 6893 old_deactivate_mark = Vdeactivate_mark;
93dec019 6894
df7492f9 6895 if (NILP (dst_object))
d46c5b12 6896 {
df7492f9
KH
6897 destination = coding->destination;
6898 dst_bytes = coding->dst_bytes;
d46c5b12 6899 }
93dec019 6900
df7492f9
KH
6901 coding->src_object = src_object;
6902 coding->src_chars = chars;
6903 coding->src_bytes = bytes;
6904 coding->src_multibyte = chars < bytes;
70ad9fc4 6905
df7492f9 6906 if (STRINGP (src_object))
d46c5b12 6907 {
df7492f9
KH
6908 coding->src_pos = from;
6909 coding->src_pos_byte = from_byte;
d46c5b12 6910 }
df7492f9 6911 else if (BUFFERP (src_object))
88993dfd 6912 {
df7492f9
KH
6913 set_buffer_internal (XBUFFER (src_object));
6914 if (from != GPT)
6915 move_gap_both (from, from_byte);
6916 if (EQ (src_object, dst_object))
fb88bf2d 6917 {
64cedb0c
KH
6918 struct Lisp_Marker *tail;
6919
6920 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
6921 {
6922 tail->need_adjustment
6923 = tail->charpos == (tail->insertion_type ? from : to);
6924 need_marker_adjustment |= tail->need_adjustment;
6925 }
4776e638 6926 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6927 TEMP_SET_PT_BOTH (from, from_byte);
6928 del_range_both (from, from_byte, to, to_byte, 1);
6929 coding->src_pos = -chars;
6930 coding->src_pos_byte = -bytes;
fb88bf2d 6931 }
df7492f9 6932 else
fb88bf2d 6933 {
df7492f9
KH
6934 coding->src_pos = from;
6935 coding->src_pos_byte = from_byte;
fb88bf2d 6936 }
88993dfd
KH
6937 }
6938
df7492f9
KH
6939 if (CODING_REQUIRE_DETECTION (coding))
6940 detect_coding (coding);
6941 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6942
2cb26057
KH
6943 if (EQ (dst_object, Qt)
6944 || (! NILP (CODING_ATTR_POST_READ (attrs))
6945 && NILP (dst_object)))
b73bfc1c 6946 {
24a73b0a 6947 coding->dst_object = code_conversion_save (1, 1);
df7492f9
KH
6948 coding->dst_pos = BEG;
6949 coding->dst_pos_byte = BEG_BYTE;
6950 coding->dst_multibyte = 1;
b73bfc1c 6951 }
df7492f9 6952 else if (BUFFERP (dst_object))
d46c5b12 6953 {
24a73b0a 6954 code_conversion_save (0, 0);
df7492f9
KH
6955 coding->dst_object = dst_object;
6956 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6957 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6958 coding->dst_multibyte
6959 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6960 }
6961 else
6962 {
24a73b0a 6963 code_conversion_save (0, 0);
df7492f9
KH
6964 coding->dst_object = Qnil;
6965 coding->dst_multibyte = 1;
d46c5b12
KH
6966 }
6967
df7492f9 6968 decode_coding (coding);
fa46990e 6969
df7492f9
KH
6970 if (BUFFERP (coding->dst_object))
6971 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6972
df7492f9 6973 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6974 {
b3bfad50 6975 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 6976 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6977 Lisp_Object val;
d46c5b12 6978
c0cc7f7f 6979 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
6980 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
6981 old_deactivate_mark);
d4850d67
KH
6982 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
6983 make_number (coding->produced_char));
df7492f9
KH
6984 UNGCPRO;
6985 CHECK_NATNUM (val);
6986 coding->produced_char += Z - prev_Z;
6987 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6988 }
de79a6a5 6989
df7492f9 6990 if (EQ (dst_object, Qt))
ec6d2bb8 6991 {
df7492f9
KH
6992 coding->dst_object = Fbuffer_string ();
6993 }
6994 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6995 {
6996 set_buffer_internal (XBUFFER (coding->dst_object));
6997 if (dst_bytes < coding->produced)
6998 {
b3bfad50 6999 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7000 if (! destination)
7001 {
065e3595
KH
7002 record_conversion_result (coding,
7003 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
7004 unbind_to (count, Qnil);
7005 return;
7006 }
7007 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7008 move_gap_both (BEGV, BEGV_BYTE);
7009 bcopy (BEGV_ADDR, destination, coding->produced);
7010 coding->destination = destination;
d46c5b12 7011 }
ec6d2bb8 7012 }
b73bfc1c 7013
4776e638
KH
7014 if (saved_pt >= 0)
7015 {
7016 /* This is the case of:
7017 (BUFFERP (src_object) && EQ (src_object, dst_object))
7018 As we have moved PT while replacing the original buffer
7019 contents, we must recover it now. */
7020 set_buffer_internal (XBUFFER (src_object));
7021 if (saved_pt < from)
7022 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7023 else if (saved_pt < from + chars)
7024 TEMP_SET_PT_BOTH (from, from_byte);
7025 else if (! NILP (current_buffer->enable_multibyte_characters))
7026 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7027 saved_pt_byte + (coding->produced - bytes));
7028 else
7029 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7030 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7031
7032 if (need_marker_adjustment)
7033 {
7034 struct Lisp_Marker *tail;
7035
7036 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7037 if (tail->need_adjustment)
7038 {
7039 tail->need_adjustment = 0;
7040 if (tail->insertion_type)
7041 {
7042 tail->bytepos = from_byte;
7043 tail->charpos = from;
7044 }
7045 else
7046 {
7047 tail->bytepos = from_byte + coding->produced;
7048 tail->charpos
7049 = (NILP (current_buffer->enable_multibyte_characters)
7050 ? tail->bytepos : from + coding->produced_char);
7051 }
7052 }
7053 }
d46c5b12 7054 }
4776e638 7055
b3bfad50 7056 Vdeactivate_mark = old_deactivate_mark;
065e3595 7057 unbind_to (count, coding->dst_object);
d46c5b12
KH
7058}
7059
d46c5b12 7060
df7492f9
KH
7061void
7062encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7063 dst_object)
d46c5b12 7064 struct coding_system *coding;
df7492f9
KH
7065 Lisp_Object src_object;
7066 EMACS_INT from, from_byte, to, to_byte;
7067 Lisp_Object dst_object;
d46c5b12 7068{
b73bfc1c 7069 int count = specpdl_ptr - specpdl;
df7492f9
KH
7070 EMACS_INT chars = to - from;
7071 EMACS_INT bytes = to_byte - from_byte;
7072 Lisp_Object attrs;
4776e638 7073 int saved_pt = -1, saved_pt_byte;
64cedb0c 7074 int need_marker_adjustment = 0;
c02d943b 7075 int kill_src_buffer = 0;
b3bfad50 7076 Lisp_Object old_deactivate_mark;
df7492f9 7077
b3bfad50 7078 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7079
7080 coding->src_object = src_object;
7081 coding->src_chars = chars;
7082 coding->src_bytes = bytes;
7083 coding->src_multibyte = chars < bytes;
7084
7085 attrs = CODING_ID_ATTRS (coding->id);
7086
64cedb0c
KH
7087 if (EQ (src_object, dst_object))
7088 {
7089 struct Lisp_Marker *tail;
7090
7091 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7092 {
7093 tail->need_adjustment
7094 = tail->charpos == (tail->insertion_type ? from : to);
7095 need_marker_adjustment |= tail->need_adjustment;
7096 }
7097 }
7098
df7492f9 7099 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7100 {
24a73b0a 7101 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7102 set_buffer_internal (XBUFFER (coding->src_object));
7103 if (STRINGP (src_object))
7104 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7105 else if (BUFFERP (src_object))
7106 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7107 else
7108 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7109
df7492f9
KH
7110 if (EQ (src_object, dst_object))
7111 {
7112 set_buffer_internal (XBUFFER (src_object));
4776e638 7113 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7114 del_range_both (from, from_byte, to, to_byte, 1);
7115 set_buffer_internal (XBUFFER (coding->src_object));
7116 }
7117
d4850d67
KH
7118 {
7119 Lisp_Object args[3];
b3bfad50 7120 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7121
b3bfad50
KH
7122 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7123 old_deactivate_mark);
d4850d67
KH
7124 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7125 args[1] = make_number (BEG);
7126 args[2] = make_number (Z);
7127 safe_call (3, args);
b3bfad50 7128 UNGCPRO;
d4850d67 7129 }
c02d943b
KH
7130 if (XBUFFER (coding->src_object) != current_buffer)
7131 kill_src_buffer = 1;
ac87bbef 7132 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7133 if (BEG != GPT)
7134 move_gap_both (BEG, BEG_BYTE);
7135 coding->src_chars = Z - BEG;
7136 coding->src_bytes = Z_BYTE - BEG_BYTE;
7137 coding->src_pos = BEG;
7138 coding->src_pos_byte = BEG_BYTE;
7139 coding->src_multibyte = Z < Z_BYTE;
7140 }
7141 else if (STRINGP (src_object))
d46c5b12 7142 {
24a73b0a 7143 code_conversion_save (0, 0);
df7492f9
KH
7144 coding->src_pos = from;
7145 coding->src_pos_byte = from_byte;
b73bfc1c 7146 }
df7492f9 7147 else if (BUFFERP (src_object))
b73bfc1c 7148 {
24a73b0a 7149 code_conversion_save (0, 0);
df7492f9 7150 set_buffer_internal (XBUFFER (src_object));
df7492f9 7151 if (EQ (src_object, dst_object))
d46c5b12 7152 {
4776e638 7153 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7154 coding->src_object = del_range_1 (from, to, 1, 1);
7155 coding->src_pos = 0;
7156 coding->src_pos_byte = 0;
d46c5b12 7157 }
df7492f9 7158 else
d46c5b12 7159 {
ff0dacd7
KH
7160 if (from < GPT && to >= GPT)
7161 move_gap_both (from, from_byte);
df7492f9
KH
7162 coding->src_pos = from;
7163 coding->src_pos_byte = from_byte;
d46c5b12 7164 }
d46c5b12 7165 }
4776e638 7166 else
24a73b0a 7167 code_conversion_save (0, 0);
d46c5b12 7168
df7492f9 7169 if (BUFFERP (dst_object))
88993dfd 7170 {
df7492f9 7171 coding->dst_object = dst_object;
28f67a95
KH
7172 if (EQ (src_object, dst_object))
7173 {
7174 coding->dst_pos = from;
7175 coding->dst_pos_byte = from_byte;
7176 }
7177 else
7178 {
7179 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7180 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7181 }
df7492f9
KH
7182 coding->dst_multibyte
7183 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7184 }
df7492f9 7185 else if (EQ (dst_object, Qt))
d46c5b12 7186 {
df7492f9 7187 coding->dst_object = Qnil;
df7492f9 7188 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7189 if (coding->dst_bytes == 0)
7190 coding->dst_bytes = 1;
7191 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7192 coding->dst_multibyte = 0;
d46c5b12
KH
7193 }
7194 else
7195 {
df7492f9
KH
7196 coding->dst_object = Qnil;
7197 coding->dst_multibyte = 0;
d46c5b12
KH
7198 }
7199
df7492f9 7200 encode_coding (coding);
d46c5b12 7201
df7492f9 7202 if (EQ (dst_object, Qt))
d46c5b12 7203 {
df7492f9
KH
7204 if (BUFFERP (coding->dst_object))
7205 coding->dst_object = Fbuffer_string ();
7206 else
d46c5b12 7207 {
df7492f9
KH
7208 coding->dst_object
7209 = make_unibyte_string ((char *) coding->destination,
7210 coding->produced);
7211 xfree (coding->destination);
d46c5b12 7212 }
4ed46869 7213 }
d46c5b12 7214
4776e638
KH
7215 if (saved_pt >= 0)
7216 {
7217 /* This is the case of:
7218 (BUFFERP (src_object) && EQ (src_object, dst_object))
7219 As we have moved PT while replacing the original buffer
7220 contents, we must recover it now. */
7221 set_buffer_internal (XBUFFER (src_object));
7222 if (saved_pt < from)
7223 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7224 else if (saved_pt < from + chars)
7225 TEMP_SET_PT_BOTH (from, from_byte);
7226 else if (! NILP (current_buffer->enable_multibyte_characters))
7227 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7228 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7229 else
4776e638
KH
7230 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7231 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7232
7233 if (need_marker_adjustment)
7234 {
7235 struct Lisp_Marker *tail;
7236
7237 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7238 if (tail->need_adjustment)
7239 {
7240 tail->need_adjustment = 0;
7241 if (tail->insertion_type)
7242 {
7243 tail->bytepos = from_byte;
7244 tail->charpos = from;
7245 }
7246 else
7247 {
7248 tail->bytepos = from_byte + coding->produced;
7249 tail->charpos
7250 = (NILP (current_buffer->enable_multibyte_characters)
7251 ? tail->bytepos : from + coding->produced_char);
7252 }
7253 }
7254 }
4776e638
KH
7255 }
7256
c02d943b
KH
7257 if (kill_src_buffer)
7258 Fkill_buffer (coding->src_object);
b3bfad50
KH
7259
7260 Vdeactivate_mark = old_deactivate_mark;
df7492f9 7261 unbind_to (count, Qnil);
b73bfc1c
KH
7262}
7263
df7492f9 7264
b73bfc1c 7265Lisp_Object
df7492f9 7266preferred_coding_system ()
b73bfc1c 7267{
df7492f9 7268 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7269
df7492f9 7270 return CODING_ID_NAME (id);
4ed46869
KH
7271}
7272
7273\f
7274#ifdef emacs
1397dc18 7275/*** 8. Emacs Lisp library functions ***/
4ed46869 7276
4ed46869 7277DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7278 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7279See the documentation of `define-coding-system' for information
48b0f3ae
PJ
7280about coding-system objects. */)
7281 (obj)
4ed46869
KH
7282 Lisp_Object obj;
7283{
44e8490d
KH
7284 if (NILP (obj)
7285 || CODING_SYSTEM_ID (obj) >= 0)
7286 return Qt;
7287 if (! SYMBOLP (obj)
7288 || NILP (Fget (obj, Qcoding_system_define_form)))
7289 return Qnil;
7290 return Qt;
4ed46869
KH
7291}
7292
9d991de8
RS
7293DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7294 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7295 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7296 (prompt)
4ed46869
KH
7297 Lisp_Object prompt;
7298{
e0e989f6 7299 Lisp_Object val;
9d991de8
RS
7300 do
7301 {
4608c386
KH
7302 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7303 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7304 }
8f924df7 7305 while (SCHARS (val) == 0);
e0e989f6 7306 return (Fintern (val, Qnil));
4ed46869
KH
7307}
7308
9b787f3e 7309DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 7310 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
7311If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7312Ignores case when completing coding systems (all Emacs coding systems
7313are lower-case). */)
48b0f3ae 7314 (prompt, default_coding_system)
9b787f3e 7315 Lisp_Object prompt, default_coding_system;
4ed46869 7316{
f44d27ce 7317 Lisp_Object val;
c7183fb8
GM
7318 int count = SPECPDL_INDEX ();
7319
9b787f3e 7320 if (SYMBOLP (default_coding_system))
57d25e6f 7321 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 7322 specbind (Qcompletion_ignore_case, Qt);
4608c386 7323 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7324 Qt, Qnil, Qcoding_system_history,
7325 default_coding_system, Qnil);
c7183fb8 7326 unbind_to (count, Qnil);
8f924df7 7327 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7328}
7329
7330DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7331 1, 1, 0,
48b0f3ae 7332 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7333If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7334It is valid if it is nil or a symbol defined as a coding system by the
7335function `define-coding-system'. */)
df7492f9 7336 (coding_system)
4ed46869
KH
7337 Lisp_Object coding_system;
7338{
44e8490d
KH
7339 Lisp_Object define_form;
7340
7341 define_form = Fget (coding_system, Qcoding_system_define_form);
7342 if (! NILP (define_form))
7343 {
7344 Fput (coding_system, Qcoding_system_define_form, Qnil);
7345 safe_eval (define_form);
7346 }
4ed46869
KH
7347 if (!NILP (Fcoding_system_p (coding_system)))
7348 return coding_system;
fcad4ec4 7349 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 7350}
df7492f9 7351
3a73fa5d 7352\f
89528eb3
KH
7353/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7354 HIGHEST is nonzero, return the coding system of the highest
7355 priority among the detected coding systems. Otherwize return a
7356 list of detected coding systems sorted by their priorities. If
7357 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7358 multibyte form but contains only ASCII and eight-bit chars.
7359 Otherwise, the bytes are raw bytes.
7360
7361 CODING-SYSTEM controls the detection as below:
7362
7363 If it is nil, detect both text-format and eol-format. If the
7364 text-format part of CODING-SYSTEM is already specified
7365 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7366 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7367 detect only text-format. */
7368
d46c5b12 7369Lisp_Object
24a73b0a
KH
7370detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7371 coding_system)
8f924df7 7372 const unsigned char *src;
13818c30
SM
7373 EMACS_INT src_chars, src_bytes;
7374 int highest;
0a28aafb 7375 int multibytep;
df7492f9 7376 Lisp_Object coding_system;
4ed46869 7377{
8f924df7 7378 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7379 Lisp_Object attrs, eol_type;
7380 Lisp_Object val;
7381 struct coding_system coding;
89528eb3 7382 int id;
ff0dacd7 7383 struct coding_detection_info detect_info;
24a73b0a 7384 enum coding_category base_category;
b73bfc1c 7385
df7492f9
KH
7386 if (NILP (coding_system))
7387 coding_system = Qundecided;
7388 setup_coding_system (coding_system, &coding);
7389 attrs = CODING_ID_ATTRS (coding.id);
7390 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7391 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7392
df7492f9 7393 coding.source = src;
24a73b0a 7394 coding.src_chars = src_chars;
df7492f9
KH
7395 coding.src_bytes = src_bytes;
7396 coding.src_multibyte = multibytep;
7397 coding.consumed = 0;
89528eb3 7398 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7399
ff0dacd7 7400 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7401
89528eb3 7402 /* At first, detect text-format if necessary. */
24a73b0a
KH
7403 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7404 if (base_category == coding_category_undecided)
4ed46869 7405 {
ff0dacd7
KH
7406 enum coding_category category;
7407 struct coding_system *this;
7408 int c, i;
88993dfd 7409
24a73b0a
KH
7410 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7411 for (i = 0; src < src_end; i++, src++)
4ed46869 7412 {
df7492f9 7413 c = *src;
6cb21a4f 7414 if (c & 0x80)
d46c5b12 7415 break;
6cb21a4f
KH
7416 if (c < 0x20
7417 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
c7266f4a 7418 && ! inhibit_iso_escape_detection)
6cb21a4f
KH
7419 {
7420 coding.head_ascii = src - coding.source;
7421 if (detect_coding_iso_2022 (&coding, &detect_info))
7422 {
7423 /* We have scanned the whole data. */
7424 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7425 /* We didn't find an 8-bit code. */
7426 src = src_end;
7427 break;
7428 }
7429 }
4ed46869 7430 }
df7492f9 7431 coding.head_ascii = src - coding.source;
88993dfd 7432
6cb21a4f
KH
7433 if (src < src_end
7434 || detect_info.found)
7435 {
7436 if (src == src_end)
7437 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7438 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7439 {
6cb21a4f 7440 category = coding_priorities[i];
c7266f4a 7441 this = coding_categories + category;
6cb21a4f 7442 if (detect_info.found & (1 << category))
ff0dacd7
KH
7443 break;
7444 }
6cb21a4f
KH
7445 else
7446 for (i = 0; i < coding_category_raw_text; i++)
df7492f9 7447 {
6cb21a4f
KH
7448 category = coding_priorities[i];
7449 this = coding_categories + category;
7450
7451 if (this->id < 0)
24a73b0a 7452 {
6cb21a4f
KH
7453 /* No coding system of this category is defined. */
7454 detect_info.rejected |= (1 << category);
7455 }
7456 else if (category >= coding_category_raw_text)
7457 continue;
7458 else if (detect_info.checked & (1 << category))
7459 {
7460 if (highest
7461 && (detect_info.found & (1 << category)))
7462 break;
7463 }
7464 else
7465 {
7466 if ((*(this->detector)) (&coding, &detect_info)
7467 && highest
7468 && (detect_info.found & (1 << category)))
24a73b0a 7469 {
6cb21a4f
KH
7470 if (category == coding_category_utf_16_auto)
7471 {
7472 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7473 category = coding_category_utf_16_le;
7474 else
7475 category = coding_category_utf_16_be;
7476 }
7477 break;
24a73b0a 7478 }
24a73b0a 7479 }
df7492f9 7480 }
6cb21a4f 7481 }
ec6d2bb8 7482
ff0dacd7 7483 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7484 {
ff0dacd7 7485 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7486 id = coding_categories[coding_category_raw_text].id;
7487 val = Fcons (make_number (id), Qnil);
7488 }
ff0dacd7 7489 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7490 {
ff0dacd7 7491 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7492 id = coding_categories[coding_category_undecided].id;
7493 val = Fcons (make_number (id), Qnil);
7494 }
7495 else if (highest)
7496 {
ff0dacd7 7497 if (detect_info.found)
ec6d2bb8 7498 {
ff0dacd7
KH
7499 detect_info.found = 1 << category;
7500 val = Fcons (make_number (this->id), Qnil);
7501 }
7502 else
7503 for (i = 0; i < coding_category_raw_text; i++)
7504 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7505 {
7506 detect_info.found = 1 << coding_priorities[i];
7507 id = coding_categories[coding_priorities[i]].id;
7508 val = Fcons (make_number (id), Qnil);
7509 break;
7510 }
7511 }
89528eb3
KH
7512 else
7513 {
ff0dacd7
KH
7514 int mask = detect_info.rejected | detect_info.found;
7515 int found = 0;
89528eb3 7516 val = Qnil;
ec6d2bb8 7517
89528eb3 7518 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7519 {
7520 category = coding_priorities[i];
7521 if (! (mask & (1 << category)))
ec6d2bb8 7522 {
ff0dacd7
KH
7523 found |= 1 << category;
7524 id = coding_categories[category].id;
c7266f4a
KH
7525 if (id >= 0)
7526 val = Fcons (make_number (id), val);
ff0dacd7
KH
7527 }
7528 }
7529 for (i = coding_category_raw_text - 1; i >= 0; i--)
7530 {
7531 category = coding_priorities[i];
7532 if (detect_info.found & (1 << category))
7533 {
7534 id = coding_categories[category].id;
7535 val = Fcons (make_number (id), val);
ec6d2bb8 7536 }
ec6d2bb8 7537 }
ff0dacd7 7538 detect_info.found |= found;
ec6d2bb8 7539 }
ec6d2bb8 7540 }
24a73b0a
KH
7541 else if (base_category == coding_category_utf_16_auto)
7542 {
7543 if (detect_coding_utf_16 (&coding, &detect_info))
7544 {
24a73b0a
KH
7545 struct coding_system *this;
7546
7547 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7548 this = coding_categories + coding_category_utf_16_le;
7549 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7550 this = coding_categories + coding_category_utf_16_be;
7551 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7552 this = coding_categories + coding_category_utf_16_be_nosig;
7553 else
7554 this = coding_categories + coding_category_utf_16_le_nosig;
7555 val = Fcons (make_number (this->id), Qnil);
7556 }
7557 }
df7492f9
KH
7558 else
7559 {
ff0dacd7 7560 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7561 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7562 }
df7492f9 7563
89528eb3 7564 /* Then, detect eol-format if necessary. */
df7492f9 7565 {
89528eb3 7566 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7567 Lisp_Object tail;
7568
89528eb3
KH
7569 if (VECTORP (eol_type))
7570 {
ff0dacd7 7571 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7572 normal_eol = detect_eol (coding.source, src_bytes,
7573 coding_category_raw_text);
ff0dacd7
KH
7574 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7575 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7576 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7577 coding_category_utf_16_be);
ff0dacd7
KH
7578 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7579 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7580 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7581 coding_category_utf_16_le);
7582 }
7583 else
7584 {
7585 if (EQ (eol_type, Qunix))
7586 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7587 else if (EQ (eol_type, Qdos))
7588 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7589 else
7590 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7591 }
7592
df7492f9
KH
7593 for (tail = val; CONSP (tail); tail = XCDR (tail))
7594 {
89528eb3 7595 enum coding_category category;
df7492f9 7596 int this_eol;
89528eb3
KH
7597
7598 id = XINT (XCAR (tail));
7599 attrs = CODING_ID_ATTRS (id);
7600 category = XINT (CODING_ATTR_CATEGORY (attrs));
7601 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7602 if (VECTORP (eol_type))
7603 {
89528eb3
KH
7604 if (category == coding_category_utf_16_be
7605 || category == coding_category_utf_16_be_nosig)
7606 this_eol = utf_16_be_eol;
7607 else if (category == coding_category_utf_16_le
7608 || category == coding_category_utf_16_le_nosig)
7609 this_eol = utf_16_le_eol;
df7492f9 7610 else
89528eb3
KH
7611 this_eol = normal_eol;
7612
df7492f9
KH
7613 if (this_eol == EOL_SEEN_LF)
7614 XSETCAR (tail, AREF (eol_type, 0));
7615 else if (this_eol == EOL_SEEN_CRLF)
7616 XSETCAR (tail, AREF (eol_type, 1));
7617 else if (this_eol == EOL_SEEN_CR)
7618 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7619 else
7620 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7621 }
89528eb3
KH
7622 else
7623 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7624 }
7625 }
ec6d2bb8 7626
03699b14 7627 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7628}
7629
ec6d2bb8 7630
d46c5b12
KH
7631DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7632 2, 3, 0,
48b0f3ae
PJ
7633 doc: /* Detect coding system of the text in the region between START and END.
7634Return a list of possible coding systems ordered by priority.
ec6d2bb8 7635
12e0131a
KH
7636If only ASCII characters are found (except for such ISO-2022 control
7637characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7638`undecided' or its subsidiary coding system according to a detected
7639end-of-line format.
ec6d2bb8 7640
48b0f3ae
PJ
7641If optional argument HIGHEST is non-nil, return the coding system of
7642highest priority. */)
7643 (start, end, highest)
d46c5b12
KH
7644 Lisp_Object start, end, highest;
7645{
7646 int from, to;
7647 int from_byte, to_byte;
ec6d2bb8 7648
b7826503
PJ
7649 CHECK_NUMBER_COERCE_MARKER (start);
7650 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7651
d46c5b12
KH
7652 validate_region (&start, &end);
7653 from = XINT (start), to = XINT (end);
7654 from_byte = CHAR_TO_BYTE (from);
7655 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7656
d46c5b12
KH
7657 if (from < GPT && to >= GPT)
7658 move_gap_both (to, to_byte);
c210f766 7659
d46c5b12 7660 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7661 to - from, to_byte - from_byte,
0a28aafb
KH
7662 !NILP (highest),
7663 !NILP (current_buffer
df7492f9
KH
7664 ->enable_multibyte_characters),
7665 Qnil);
ec6d2bb8
KH
7666}
7667
d46c5b12
KH
7668DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7669 1, 2, 0,
48b0f3ae
PJ
7670 doc: /* Detect coding system of the text in STRING.
7671Return a list of possible coding systems ordered by priority.
fb88bf2d 7672
12e0131a
KH
7673If only ASCII characters are found (except for such ISO-2022 control
7674characters ISO-2022 as ESC), it returns a list of single element
48b0f3ae
PJ
7675`undecided' or its subsidiary coding system according to a detected
7676end-of-line format.
d46c5b12 7677
48b0f3ae
PJ
7678If optional argument HIGHEST is non-nil, return the coding system of
7679highest priority. */)
7680 (string, highest)
d46c5b12
KH
7681 Lisp_Object string, highest;
7682{
b7826503 7683 CHECK_STRING (string);
b73bfc1c 7684
24a73b0a
KH
7685 return detect_coding_system (SDATA (string),
7686 SCHARS (string), SBYTES (string),
8f924df7 7687 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7688 Qnil);
4ed46869 7689}
4ed46869 7690
b73bfc1c 7691
df7492f9
KH
7692static INLINE int
7693char_encodable_p (c, attrs)
7694 int c;
7695 Lisp_Object attrs;
05e6f5dc 7696{
df7492f9 7697 Lisp_Object tail;
df7492f9 7698 struct charset *charset;
7d64c6ad 7699 Lisp_Object translation_table;
d46c5b12 7700
7d64c6ad 7701 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7702 if (! NILP (translation_table))
7d64c6ad 7703 c = translate_char (translation_table, c);
df7492f9
KH
7704 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7705 CONSP (tail); tail = XCDR (tail))
e133c8fa 7706 {
df7492f9
KH
7707 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7708 if (CHAR_CHARSET_P (c, charset))
7709 break;
e133c8fa 7710 }
df7492f9 7711 return (! NILP (tail));
05e6f5dc 7712}
83fa074f 7713
fb88bf2d 7714
df7492f9
KH
7715/* Return a list of coding systems that safely encode the text between
7716 START and END. If EXCLUDE is non-nil, it is a list of coding
7717 systems not to check. The returned list doesn't contain any such
48468dac 7718 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7719 unibyte, return t. */
e077cc80 7720
df7492f9
KH
7721DEFUN ("find-coding-systems-region-internal",
7722 Ffind_coding_systems_region_internal,
7723 Sfind_coding_systems_region_internal, 2, 3, 0,
7724 doc: /* Internal use only. */)
7725 (start, end, exclude)
7726 Lisp_Object start, end, exclude;
7727{
7728 Lisp_Object coding_attrs_list, safe_codings;
7729 EMACS_INT start_byte, end_byte;
7c78e542 7730 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7731 int c;
7732 Lisp_Object tail, elt;
d46c5b12 7733
df7492f9
KH
7734 if (STRINGP (start))
7735 {
7736 if (!STRING_MULTIBYTE (start)
8f924df7 7737 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7738 return Qt;
7739 start_byte = 0;
8f924df7 7740 end_byte = SBYTES (start);
df7492f9
KH
7741 }
7742 else
d46c5b12 7743 {
df7492f9
KH
7744 CHECK_NUMBER_COERCE_MARKER (start);
7745 CHECK_NUMBER_COERCE_MARKER (end);
7746 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7747 args_out_of_range (start, end);
7748 if (NILP (current_buffer->enable_multibyte_characters))
7749 return Qt;
7750 start_byte = CHAR_TO_BYTE (XINT (start));
7751 end_byte = CHAR_TO_BYTE (XINT (end));
7752 if (XINT (end) - XINT (start) == end_byte - start_byte)
7753 return Qt;
d46c5b12 7754
e1c23804 7755 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7756 {
e1c23804
DL
7757 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7758 move_gap_both (XINT (start), start_byte);
df7492f9 7759 else
e1c23804 7760 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7761 }
7762 }
7763
df7492f9
KH
7764 coding_attrs_list = Qnil;
7765 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7766 if (NILP (exclude)
7767 || NILP (Fmemq (XCAR (tail), exclude)))
7768 {
7769 Lisp_Object attrs;
d46c5b12 7770
df7492f9
KH
7771 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7772 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7773 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7774 {
7775 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7776 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7777 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7778 }
df7492f9 7779 }
d46c5b12 7780
df7492f9 7781 if (STRINGP (start))
8f924df7 7782 p = pbeg = SDATA (start);
df7492f9
KH
7783 else
7784 p = pbeg = BYTE_POS_ADDR (start_byte);
7785 pend = p + (end_byte - start_byte);
b843d1ae 7786
df7492f9
KH
7787 while (p < pend && ASCII_BYTE_P (*p)) p++;
7788 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7789
05e6f5dc 7790 while (p < pend)
72d1a715 7791 {
df7492f9
KH
7792 if (ASCII_BYTE_P (*p))
7793 p++;
72d1a715
RS
7794 else
7795 {
df7492f9 7796 c = STRING_CHAR_ADVANCE (p);
12410ef1 7797
df7492f9
KH
7798 charset_map_loaded = 0;
7799 for (tail = coding_attrs_list; CONSP (tail);)
7800 {
7801 elt = XCAR (tail);
7802 if (NILP (elt))
7803 tail = XCDR (tail);
7804 else if (char_encodable_p (c, elt))
7805 tail = XCDR (tail);
7806 else if (CONSP (XCDR (tail)))
7807 {
7808 XSETCAR (tail, XCAR (XCDR (tail)));
7809 XSETCDR (tail, XCDR (XCDR (tail)));
7810 }
7811 else
7812 {
7813 XSETCAR (tail, Qnil);
7814 tail = XCDR (tail);
7815 }
7816 }
7817 if (charset_map_loaded)
7818 {
7819 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7820
df7492f9 7821 if (STRINGP (start))
8f924df7 7822 pbeg = SDATA (start);
df7492f9
KH
7823 else
7824 pbeg = BYTE_POS_ADDR (start_byte);
7825 p = pbeg + p_offset;
7826 pend = pbeg + pend_offset;
7827 }
7828 }
ec6d2bb8 7829 }
fb88bf2d 7830
988b3759 7831 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
7832 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7833 if (! NILP (XCAR (tail)))
7834 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7835
05e6f5dc
KH
7836 return safe_codings;
7837}
4956c225 7838
d46c5b12 7839
8f924df7
KH
7840DEFUN ("unencodable-char-position", Funencodable_char_position,
7841 Sunencodable_char_position, 3, 5, 0,
7842 doc: /*
7843Return position of first un-encodable character in a region.
7844START and END specfiy the region and CODING-SYSTEM specifies the
7845encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7846
8f924df7
KH
7847If optional 4th argument COUNT is non-nil, it specifies at most how
7848many un-encodable characters to search. In this case, the value is a
7849list of positions.
d46c5b12 7850
8f924df7
KH
7851If optional 5th argument STRING is non-nil, it is a string to search
7852for un-encodable characters. In that case, START and END are indexes
7853to the string. */)
7854 (start, end, coding_system, count, string)
7855 Lisp_Object start, end, coding_system, count, string;
7856{
7857 int n;
7858 struct coding_system coding;
7d64c6ad 7859 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7860 Lisp_Object positions;
7861 int from, to;
7862 const unsigned char *p, *stop, *pend;
7863 int ascii_compatible;
fb88bf2d 7864
8f924df7
KH
7865 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7866 attrs = CODING_ID_ATTRS (coding.id);
7867 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7868 return Qnil;
7869 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7870 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7871 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7872
8f924df7
KH
7873 if (NILP (string))
7874 {
7875 validate_region (&start, &end);
7876 from = XINT (start);
7877 to = XINT (end);
7878 if (NILP (current_buffer->enable_multibyte_characters)
7879 || (ascii_compatible
7880 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7881 return Qnil;
7882 p = CHAR_POS_ADDR (from);
7883 pend = CHAR_POS_ADDR (to);
7884 if (from < GPT && to >= GPT)
7885 stop = GPT_ADDR;
7886 else
7887 stop = pend;
7888 }
7889 else
7890 {
7891 CHECK_STRING (string);
7892 CHECK_NATNUM (start);
7893 CHECK_NATNUM (end);
7894 from = XINT (start);
7895 to = XINT (end);
7896 if (from > to
7897 || to > SCHARS (string))
7898 args_out_of_range_3 (string, start, end);
7899 if (! STRING_MULTIBYTE (string))
7900 return Qnil;
7901 p = SDATA (string) + string_char_to_byte (string, from);
7902 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7903 if (ascii_compatible && (to - from) == (pend - p))
7904 return Qnil;
7905 }
f2558efd 7906
8f924df7
KH
7907 if (NILP (count))
7908 n = 1;
7909 else
b73bfc1c 7910 {
8f924df7
KH
7911 CHECK_NATNUM (count);
7912 n = XINT (count);
b73bfc1c
KH
7913 }
7914
8f924df7
KH
7915 positions = Qnil;
7916 while (1)
d46c5b12 7917 {
8f924df7 7918 int c;
ec6d2bb8 7919
8f924df7
KH
7920 if (ascii_compatible)
7921 while (p < stop && ASCII_BYTE_P (*p))
7922 p++, from++;
7923 if (p >= stop)
0e79d667 7924 {
8f924df7
KH
7925 if (p >= pend)
7926 break;
7927 stop = pend;
7928 p = GAP_END_ADDR;
0e79d667 7929 }
ec6d2bb8 7930
8f924df7
KH
7931 c = STRING_CHAR_ADVANCE (p);
7932 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
7933 && ! char_charset (translate_char (translation_table, c),
7934 charset_list, NULL))
ec6d2bb8 7935 {
8f924df7
KH
7936 positions = Fcons (make_number (from), positions);
7937 n--;
7938 if (n == 0)
7939 break;
ec6d2bb8
KH
7940 }
7941
8f924df7
KH
7942 from++;
7943 }
d46c5b12 7944
8f924df7
KH
7945 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7946}
d46c5b12 7947
d46c5b12 7948
df7492f9
KH
7949DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7950 Scheck_coding_systems_region, 3, 3, 0,
7951 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7952
df7492f9
KH
7953START and END are buffer positions specifying the region.
7954CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7955
df7492f9
KH
7956The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7957CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7958whole region, POS0, POS1, ... are buffer positions where non-encodable
7959characters are found.
93dec019 7960
df7492f9
KH
7961If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7962value is nil.
93dec019 7963
df7492f9
KH
7964START may be a string. In that case, check if the string is
7965encodable, and the value contains indices to the string instead of
7966buffer positions. END is ignored. */)
7967 (start, end, coding_system_list)
7968 Lisp_Object start, end, coding_system_list;
05e6f5dc 7969{
df7492f9
KH
7970 Lisp_Object list;
7971 EMACS_INT start_byte, end_byte;
7972 int pos;
7c78e542 7973 const unsigned char *p, *pbeg, *pend;
df7492f9 7974 int c;
7d64c6ad 7975 Lisp_Object tail, elt, attrs;
70ad9fc4 7976
05e6f5dc
KH
7977 if (STRINGP (start))
7978 {
df7492f9 7979 if (!STRING_MULTIBYTE (start)
8f924df7 7980 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7981 return Qnil;
7982 start_byte = 0;
8f924df7 7983 end_byte = SBYTES (start);
df7492f9 7984 pos = 0;
d46c5b12 7985 }
05e6f5dc 7986 else
b73bfc1c 7987 {
b7826503
PJ
7988 CHECK_NUMBER_COERCE_MARKER (start);
7989 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7990 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7991 args_out_of_range (start, end);
7992 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7993 return Qnil;
7994 start_byte = CHAR_TO_BYTE (XINT (start));
7995 end_byte = CHAR_TO_BYTE (XINT (end));
7996 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7997 return Qt;
df7492f9 7998
e1c23804 7999 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8000 {
e1c23804
DL
8001 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8002 move_gap_both (XINT (start), start_byte);
df7492f9 8003 else
e1c23804 8004 move_gap_both (XINT (end), end_byte);
b73bfc1c 8005 }
e1c23804 8006 pos = XINT (start);
b73bfc1c 8007 }
7553d0e1 8008
df7492f9
KH
8009 list = Qnil;
8010 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8011 {
df7492f9 8012 elt = XCAR (tail);
7d64c6ad 8013 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8014 ASET (attrs, coding_attr_trans_tbl,
8015 get_translation_table (attrs, 1, NULL));
7d64c6ad 8016 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8017 }
8018
df7492f9 8019 if (STRINGP (start))
8f924df7 8020 p = pbeg = SDATA (start);
72d1a715 8021 else
df7492f9
KH
8022 p = pbeg = BYTE_POS_ADDR (start_byte);
8023 pend = p + (end_byte - start_byte);
4ed46869 8024
df7492f9
KH
8025 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8026 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8027
df7492f9 8028 while (p < pend)
d46c5b12 8029 {
df7492f9
KH
8030 if (ASCII_BYTE_P (*p))
8031 p++;
e133c8fa 8032 else
05e6f5dc 8033 {
df7492f9
KH
8034 c = STRING_CHAR_ADVANCE (p);
8035
8036 charset_map_loaded = 0;
8037 for (tail = list; CONSP (tail); tail = XCDR (tail))
8038 {
8039 elt = XCDR (XCAR (tail));
8040 if (! char_encodable_p (c, XCAR (elt)))
8041 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8042 }
8043 if (charset_map_loaded)
8044 {
8045 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8046
8047 if (STRINGP (start))
8f924df7 8048 pbeg = SDATA (start);
df7492f9
KH
8049 else
8050 pbeg = BYTE_POS_ADDR (start_byte);
8051 p = pbeg + p_offset;
8052 pend = pbeg + pend_offset;
8053 }
05e6f5dc 8054 }
df7492f9 8055 pos++;
d46c5b12 8056 }
4ed46869 8057
df7492f9
KH
8058 tail = list;
8059 list = Qnil;
8060 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8061 {
df7492f9
KH
8062 elt = XCAR (tail);
8063 if (CONSP (XCDR (XCDR (elt))))
8064 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8065 list);
ec6d2bb8 8066 }
2b4f9037 8067
df7492f9 8068 return list;
d46c5b12
KH
8069}
8070
3fd9494b 8071
b73bfc1c 8072Lisp_Object
df7492f9
KH
8073code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8074 Lisp_Object start, end, coding_system, dst_object;
8075 int encodep, norecord;
4ed46869 8076{
3a73fa5d 8077 struct coding_system coding;
df7492f9
KH
8078 EMACS_INT from, from_byte, to, to_byte;
8079 Lisp_Object src_object;
4ed46869 8080
b7826503
PJ
8081 CHECK_NUMBER_COERCE_MARKER (start);
8082 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8083 if (NILP (coding_system))
8084 coding_system = Qno_conversion;
8085 else
8086 CHECK_CODING_SYSTEM (coding_system);
8087 src_object = Fcurrent_buffer ();
8088 if (NILP (dst_object))
8089 dst_object = src_object;
8090 else if (! EQ (dst_object, Qt))
8091 CHECK_BUFFER (dst_object);
3a73fa5d 8092
d46c5b12
KH
8093 validate_region (&start, &end);
8094 from = XFASTINT (start);
df7492f9 8095 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8096 to = XFASTINT (end);
df7492f9 8097 to_byte = CHAR_TO_BYTE (to);
764ca8da 8098
df7492f9
KH
8099 setup_coding_system (coding_system, &coding);
8100 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8101
df7492f9
KH
8102 if (encodep)
8103 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8104 dst_object);
8105 else
8106 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8107 dst_object);
8108 if (! norecord)
8109 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8110
df7492f9
KH
8111 return (BUFFERP (dst_object)
8112 ? make_number (coding.produced_char)
8113 : coding.dst_object);
4031e2bf 8114}
78108bcd 8115
4ed46869 8116
4031e2bf 8117DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8118 3, 4, "r\nzCoding system: ",
48b0f3ae 8119 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8120When called from a program, takes four arguments:
8121 START, END, CODING-SYSTEM, and DESTINATION.
8122START and END are buffer positions.
8844fa83 8123
df7492f9 8124Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8125If nil, the region between START and END is replaced by the decoded text.
df7492f9
KH
8126If buffer, the decoded text is inserted in the buffer.
8127If t, the decoded text is returned.
8844fa83 8128
48b0f3ae
PJ
8129This function sets `last-coding-system-used' to the precise coding system
8130used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8131not fully specified.)
8132It returns the length of the decoded text. */)
df7492f9
KH
8133 (start, end, coding_system, destination)
8134 Lisp_Object start, end, coding_system, destination;
4031e2bf 8135{
df7492f9 8136 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8137}
8844fa83 8138
3a73fa5d 8139DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8140 3, 4, "r\nzCoding system: ",
8141 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
8142When called from a program, takes three arguments:
8143START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 8144
df7492f9
KH
8145Optional 4th arguments DESTINATION specifies where the encoded text goes.
8146If nil, the region between START and END is replace by the encoded text.
8147If buffer, the encoded text is inserted in the buffer.
8148If t, the encoded text is returned.
2391eaa4 8149
48b0f3ae
PJ
8150This function sets `last-coding-system-used' to the precise coding system
8151used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8152not fully specified.)
8153It returns the length of the encoded text. */)
df7492f9
KH
8154 (start, end, coding_system, destination)
8155 Lisp_Object start, end, coding_system, destination;
3a73fa5d 8156{
df7492f9 8157 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
8158}
8159
8160Lisp_Object
df7492f9
KH
8161code_convert_string (string, coding_system, dst_object,
8162 encodep, nocopy, norecord)
8163 Lisp_Object string, coding_system, dst_object;
8164 int encodep, nocopy, norecord;
b73bfc1c 8165{
4031e2bf 8166 struct coding_system coding;
df7492f9 8167 EMACS_INT chars, bytes;
ec6d2bb8 8168
b7826503 8169 CHECK_STRING (string);
d46c5b12 8170 if (NILP (coding_system))
4956c225 8171 {
df7492f9
KH
8172 if (! norecord)
8173 Vlast_coding_system_used = Qno_conversion;
8174 if (NILP (dst_object))
8175 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 8176 }
b73bfc1c 8177
df7492f9
KH
8178 if (NILP (coding_system))
8179 coding_system = Qno_conversion;
8180 else
8181 CHECK_CODING_SYSTEM (coding_system);
8182 if (NILP (dst_object))
8183 dst_object = Qt;
8184 else if (! EQ (dst_object, Qt))
8185 CHECK_BUFFER (dst_object);
73be902c 8186
df7492f9 8187 setup_coding_system (coding_system, &coding);
d46c5b12 8188 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
8189 chars = SCHARS (string);
8190 bytes = SBYTES (string);
df7492f9
KH
8191 if (encodep)
8192 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8193 else
8194 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8195 if (! norecord)
8196 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 8197
df7492f9
KH
8198 return (BUFFERP (dst_object)
8199 ? make_number (coding.produced_char)
8200 : coding.dst_object);
4ed46869 8201}
73be902c 8202
b73bfc1c 8203
ecec61c1 8204/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 8205 Do not set Vlast_coding_system_used.
4ed46869 8206
ec6d2bb8
KH
8207 This function is called only from macros DECODE_FILE and
8208 ENCODE_FILE, thus we ignore character composition. */
4ed46869 8209
ecec61c1
KH
8210Lisp_Object
8211code_convert_string_norecord (string, coding_system, encodep)
8212 Lisp_Object string, coding_system;
8213 int encodep;
4ed46869 8214{
0be8721c 8215 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
8216}
8217
4ed46869 8218
df7492f9
KH
8219DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8220 2, 4, 0,
8221 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8222
8223Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8224if the decoding operation is trivial.
ecec61c1 8225
df7492f9 8226Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 8227inserted in BUFFER instead of returned as a string. In this case,
df7492f9 8228the return value is BUFFER.
ecec61c1 8229
df7492f9
KH
8230This function sets `last-coding-system-used' to the precise coding system
8231used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8232not fully specified. */)
8233 (string, coding_system, nocopy, buffer)
8234 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8235{
df7492f9
KH
8236 return code_convert_string (string, coding_system, buffer,
8237 0, ! NILP (nocopy), 0);
4ed46869
KH
8238}
8239
df7492f9
KH
8240DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8241 2, 4, 0,
8242 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8243
8244Optional third arg NOCOPY non-nil means it is OK to return STRING
8245itself if the encoding operation is trivial.
8246
8247Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 8248inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
8249the return value is BUFFER.
8250
8251This function sets `last-coding-system-used' to the precise coding system
8252used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8253not fully specified.) */)
8254 (string, coding_system, nocopy, buffer)
8255 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8256{
df7492f9 8257 return code_convert_string (string, coding_system, buffer,
c197f191 8258 1, ! NILP (nocopy), 1);
4ed46869 8259}
df7492f9 8260
3a73fa5d 8261\f
4ed46869 8262DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8263 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8264Return the corresponding character. */)
8265 (code)
4ed46869 8266 Lisp_Object code;
4ed46869 8267{
df7492f9
KH
8268 Lisp_Object spec, attrs, val;
8269 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8270 int c;
4ed46869 8271
df7492f9
KH
8272 CHECK_NATNUM (code);
8273 c = XFASTINT (code);
8274 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8275 attrs = AREF (spec, 0);
4ed46869 8276
df7492f9
KH
8277 if (ASCII_BYTE_P (c)
8278 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8279 return code;
4ed46869 8280
df7492f9
KH
8281 val = CODING_ATTR_CHARSET_LIST (attrs);
8282 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8283 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8284 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8285
df7492f9
KH
8286 if (c <= 0x7F)
8287 charset = charset_roman;
8288 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8289 {
df7492f9
KH
8290 charset = charset_kana;
8291 c -= 0x80;
4ed46869 8292 }
55ab7be3 8293 else
4ed46869 8294 {
004068e4 8295 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8296
8297 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8298 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8299 error ("Invalid code: %d", code);
8300 SJIS_TO_JIS (c);
8301 charset = charset_kanji;
4ed46869 8302 }
df7492f9
KH
8303 c = DECODE_CHAR (charset, c);
8304 if (c < 0)
8305 error ("Invalid code: %d", code);
8306 return make_number (c);
93dec019 8307}
4ed46869 8308
48b0f3ae 8309
4ed46869 8310DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 8311 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
8312Return the corresponding code in SJIS. */)
8313 (ch)
df7492f9 8314 Lisp_Object ch;
4ed46869 8315{
df7492f9
KH
8316 Lisp_Object spec, attrs, charset_list;
8317 int c;
8318 struct charset *charset;
8319 unsigned code;
48b0f3ae 8320
df7492f9
KH
8321 CHECK_CHARACTER (ch);
8322 c = XFASTINT (ch);
8323 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8324 attrs = AREF (spec, 0);
8325
8326 if (ASCII_CHAR_P (c)
8327 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8328 return ch;
8329
8330 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8331 charset = char_charset (c, charset_list, &code);
8332 if (code == CHARSET_INVALID_CODE (charset))
8333 error ("Can't encode by shift_jis encoding: %d", c);
8334 JIS_TO_SJIS (code);
8335
8336 return make_number (code);
4ed46869
KH
8337}
8338
8339DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8340 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8341Return the corresponding character. */)
8342 (code)
4ed46869 8343 Lisp_Object code;
d46c5b12 8344{
df7492f9
KH
8345 Lisp_Object spec, attrs, val;
8346 struct charset *charset_roman, *charset_big5, *charset;
8347 int c;
6289dd10 8348
df7492f9
KH
8349 CHECK_NATNUM (code);
8350 c = XFASTINT (code);
8351 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8352 attrs = AREF (spec, 0);
4ed46869 8353
df7492f9
KH
8354 if (ASCII_BYTE_P (c)
8355 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8356 return code;
6289dd10 8357
df7492f9
KH
8358 val = CODING_ATTR_CHARSET_LIST (attrs);
8359 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8360 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8361
df7492f9
KH
8362 if (c <= 0x7F)
8363 charset = charset_roman;
c28a9453
KH
8364 else
8365 {
df7492f9
KH
8366 int b1 = c >> 8, b2 = c & 0x7F;
8367 if (b1 < 0xA1 || b1 > 0xFE
8368 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8369 error ("Invalid code: %d", code);
8370 charset = charset_big5;
c28a9453 8371 }
df7492f9
KH
8372 c = DECODE_CHAR (charset, (unsigned )c);
8373 if (c < 0)
8374 error ("Invalid code: %d", code);
8375 return make_number (c);
d46c5b12 8376}
6289dd10 8377
4ed46869 8378DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 8379 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
8380Return the corresponding character code in Big5. */)
8381 (ch)
4ed46869
KH
8382 Lisp_Object ch;
8383{
df7492f9
KH
8384 Lisp_Object spec, attrs, charset_list;
8385 struct charset *charset;
8386 int c;
8387 unsigned code;
8388
8389 CHECK_CHARACTER (ch);
8390 c = XFASTINT (ch);
8391 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8392 attrs = AREF (spec, 0);
8393 if (ASCII_CHAR_P (c)
8394 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8395 return ch;
8396
8397 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8398 charset = char_charset (c, charset_list, &code);
8399 if (code == CHARSET_INVALID_CODE (charset))
8400 error ("Can't encode by Big5 encoding: %d", c);
8401
8402 return make_number (code);
4ed46869 8403}
48b0f3ae 8404
3a73fa5d 8405\f
002fdb44 8406DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 8407 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 8408 doc: /* Internal use only. */)
6ed8eeff 8409 (coding_system, terminal)
b74e4686 8410 Lisp_Object coding_system;
6ed8eeff 8411 Lisp_Object terminal;
4ed46869 8412{
6ed8eeff 8413 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 8414 CHECK_SYMBOL (coding_system);
b8299c66 8415 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 8416 /* We had better not send unsafe characters to terminal. */
c73bd236 8417 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 8418 /* Characer composition should be disabled. */
c73bd236 8419 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
8420 terminal_coding->src_multibyte = 1;
8421 terminal_coding->dst_multibyte = 0;
4ed46869
KH
8422 return Qnil;
8423}
8424
c4825358
KH
8425DEFUN ("set-safe-terminal-coding-system-internal",
8426 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8427 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8428 doc: /* Internal use only. */)
48b0f3ae 8429 (coding_system)
b74e4686 8430 Lisp_Object coding_system;
d46c5b12 8431{
b7826503 8432 CHECK_SYMBOL (coding_system);
c4825358
KH
8433 setup_coding_system (Fcheck_coding_system (coding_system),
8434 &safe_terminal_coding);
df7492f9
KH
8435 /* Characer composition should be disabled. */
8436 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8437 safe_terminal_coding.src_multibyte = 1;
8438 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8439 return Qnil;
8440}
4ed46869 8441
002fdb44 8442DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 8443 Sterminal_coding_system, 0, 1, 0,
6ed8eeff
KL
8444 doc: /* Return coding system specified for terminal output on the given terminal.
8445TERMINAL may be a terminal id, a frame, or nil for the selected
8446frame's terminal device. */)
8447 (terminal)
8448 Lisp_Object terminal;
4ed46869 8449{
985773c9
MB
8450 struct coding_system *terminal_coding
8451 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8452 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 8453
ae6f73fa 8454 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 8455 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
8456}
8457
002fdb44 8458DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 8459 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 8460 doc: /* Internal use only. */)
6ed8eeff 8461 (coding_system, terminal)
4ed46869 8462 Lisp_Object coding_system;
6ed8eeff 8463 Lisp_Object terminal;
4ed46869 8464{
6ed8eeff 8465 struct terminal *t = get_terminal (terminal, 1);
b7826503 8466 CHECK_SYMBOL (coding_system);
df7492f9 8467 setup_coding_system (Fcheck_coding_system (coding_system),
c73bd236 8468 TERMINAL_KEYBOARD_CODING (t));
df7492f9 8469 /* Characer composition should be disabled. */
c73bd236
MB
8470 TERMINAL_KEYBOARD_CODING (t)->common_flags
8471 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8472 return Qnil;
8473}
8474
8475DEFUN ("keyboard-coding-system",
985773c9 8476 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 8477 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
8478 (terminal)
8479 Lisp_Object terminal;
4ed46869 8480{
985773c9
MB
8481 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8482 (get_terminal (terminal, 1))->id);
4ed46869
KH
8483}
8484
4ed46869 8485\f
a5d301df
KH
8486DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8487 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8488 doc: /* Choose a coding system for an operation based on the target name.
8489The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8490DECODING-SYSTEM is the coding system to use for decoding
8491\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8492for encoding (in case OPERATION does encoding).
05e6f5dc 8493
48b0f3ae
PJ
8494The first argument OPERATION specifies an I/O primitive:
8495 For file I/O, `insert-file-contents' or `write-region'.
8496 For process I/O, `call-process', `call-process-region', or `start-process'.
8497 For network I/O, `open-network-stream'.
05e6f5dc 8498
48b0f3ae
PJ
8499The remaining arguments should be the same arguments that were passed
8500to the primitive. Depending on which primitive, one of those arguments
8501is selected as the TARGET. For example, if OPERATION does file I/O,
8502whichever argument specifies the file name is TARGET.
05e6f5dc 8503
48b0f3ae 8504TARGET has a meaning which depends on OPERATION:
b883cdb2 8505 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae
PJ
8506 For process I/O, TARGET is a process name.
8507 For network I/O, TARGET is a service name or a port number
05e6f5dc 8508
48b0f3ae
PJ
8509This function looks up what specified for TARGET in,
8510`file-coding-system-alist', `process-coding-system-alist',
8511or `network-coding-system-alist' depending on OPERATION.
8512They may specify a coding system, a cons of coding systems,
8513or a function symbol to call.
8514In the last case, we call the function with one argument,
8515which is a list of all the arguments given to this function.
1011c487
MB
8516If the function can't decide a coding system, it can return
8517`undecided' so that the normal code-detection is performed.
48b0f3ae 8518
b883cdb2
MB
8519If OPERATION is `insert-file-contents', the argument corresponding to
8520TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
8521file name to look up, and BUFFER is a buffer that contains the file's
8522contents (not yet decoded). If `file-coding-system-alist' specifies a
8523function to call for FILENAME, that function should examine the
8524contents of BUFFER instead of reading the file.
8525
d918f936 8526usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 8527 (nargs, args)
4ed46869
KH
8528 int nargs;
8529 Lisp_Object *args;
6b89e3aa 8530{
4ed46869
KH
8531 Lisp_Object operation, target_idx, target, val;
8532 register Lisp_Object chain;
177c0ea7 8533
4ed46869
KH
8534 if (nargs < 2)
8535 error ("Too few arguments");
8536 operation = args[0];
8537 if (!SYMBOLP (operation)
8538 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8539 error ("Invalid first arguement");
4ed46869
KH
8540 if (nargs < 1 + XINT (target_idx))
8541 error ("Too few arguments for operation: %s",
8f924df7 8542 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8543 target = args[XINT (target_idx) + 1];
8544 if (!(STRINGP (target)
091a0ff0
KH
8545 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8546 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 8547 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8548 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
8549 if (CONSP (target))
8550 target = XCAR (target);
4ed46869 8551
2e34157c
RS
8552 chain = ((EQ (operation, Qinsert_file_contents)
8553 || EQ (operation, Qwrite_region))
02ba4723 8554 ? Vfile_coding_system_alist
2e34157c 8555 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8556 ? Vnetwork_coding_system_alist
8557 : Vprocess_coding_system_alist));
4ed46869
KH
8558 if (NILP (chain))
8559 return Qnil;
8560
03699b14 8561 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8562 {
f44d27ce 8563 Lisp_Object elt;
6b89e3aa 8564
df7492f9 8565 elt = XCAR (chain);
4ed46869
KH
8566 if (CONSP (elt)
8567 && ((STRINGP (target)
03699b14
KR
8568 && STRINGP (XCAR (elt))
8569 && fast_string_match (XCAR (elt), target) >= 0)
8570 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8571 {
03699b14 8572 val = XCDR (elt);
b19fd4c5
KH
8573 /* Here, if VAL is both a valid coding system and a valid
8574 function symbol, we return VAL as a coding system. */
02ba4723
KH
8575 if (CONSP (val))
8576 return val;
8577 if (! SYMBOLP (val))
8578 return Qnil;
8579 if (! NILP (Fcoding_system_p (val)))
8580 return Fcons (val, val);
b19fd4c5 8581 if (! NILP (Ffboundp (val)))
6b89e3aa 8582 {
e2b97060
MB
8583 /* We use call1 rather than safe_call1
8584 so as to get bug reports about functions called here
8585 which don't handle the current interface. */
8586 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
8587 if (CONSP (val))
8588 return val;
8589 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8590 return Fcons (val, val);
6b89e3aa 8591 }
02ba4723 8592 return Qnil;
6b89e3aa
KH
8593 }
8594 }
4ed46869 8595 return Qnil;
6b89e3aa
KH
8596}
8597
df7492f9 8598DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8599 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8600 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8601If multiple coding systems belongs to the same category,
a3181084
DL
8602all but the first one are ignored.
8603
8604usage: (set-coding-system-priority ...) */)
df7492f9
KH
8605 (nargs, args)
8606 int nargs;
8607 Lisp_Object *args;
8608{
8609 int i, j;
8610 int changed[coding_category_max];
8611 enum coding_category priorities[coding_category_max];
8612
8613 bzero (changed, sizeof changed);
6b89e3aa 8614
df7492f9 8615 for (i = j = 0; i < nargs; i++)
6b89e3aa 8616 {
df7492f9
KH
8617 enum coding_category category;
8618 Lisp_Object spec, attrs;
6b89e3aa 8619
df7492f9
KH
8620 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8621 attrs = AREF (spec, 0);
8622 category = XINT (CODING_ATTR_CATEGORY (attrs));
8623 if (changed[category])
8624 /* Ignore this coding system because a coding system of the
8625 same category already had a higher priority. */
8626 continue;
8627 changed[category] = 1;
8628 priorities[j++] = category;
8629 if (coding_categories[category].id >= 0
8630 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8631 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8632 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8633 }
6b89e3aa 8634
df7492f9
KH
8635 /* Now we have decided top J priorities. Reflect the order of the
8636 original priorities to the remaining priorities. */
6b89e3aa 8637
df7492f9 8638 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8639 {
df7492f9
KH
8640 while (j < coding_category_max
8641 && changed[coding_priorities[j]])
8642 j++;
8643 if (j == coding_category_max)
8644 abort ();
8645 priorities[i] = coding_priorities[j];
8646 }
6b89e3aa 8647
df7492f9 8648 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8649
ff563fce
KH
8650 /* Update `coding-category-list'. */
8651 Vcoding_category_list = Qnil;
8652 for (i = coding_category_max - 1; i >= 0; i--)
8653 Vcoding_category_list
8654 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8655 Vcoding_category_list);
6b89e3aa 8656
df7492f9 8657 return Qnil;
6b89e3aa
KH
8658}
8659
df7492f9
KH
8660DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8661 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8662 doc: /* Return a list of coding systems ordered by their priorities.
8663HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8664 (highestp)
8665 Lisp_Object highestp;
d46c5b12
KH
8666{
8667 int i;
df7492f9 8668 Lisp_Object val;
6b89e3aa 8669
df7492f9 8670 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8671 {
df7492f9
KH
8672 enum coding_category category = coding_priorities[i];
8673 int id = coding_categories[category].id;
8674 Lisp_Object attrs;
068a9dbd 8675
df7492f9
KH
8676 if (id < 0)
8677 continue;
8678 attrs = CODING_ID_ATTRS (id);
8679 if (! NILP (highestp))
8680 return CODING_ATTR_BASE_NAME (attrs);
8681 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8682 }
8683 return Fnreverse (val);
8684}
068a9dbd 8685
f0064e1f 8686static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8687
8688static Lisp_Object
df7492f9
KH
8689make_subsidiaries (base)
8690 Lisp_Object base;
068a9dbd 8691{
df7492f9 8692 Lisp_Object subsidiaries;
8f924df7 8693 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8694 char *buf = (char *) alloca (base_name_len + 6);
8695 int i;
068a9dbd 8696
8f924df7 8697 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8698 subsidiaries = Fmake_vector (make_number (3), Qnil);
8699 for (i = 0; i < 3; i++)
068a9dbd 8700 {
df7492f9
KH
8701 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8702 ASET (subsidiaries, i, intern (buf));
068a9dbd 8703 }
df7492f9 8704 return subsidiaries;
068a9dbd
KH
8705}
8706
8707
df7492f9
KH
8708DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8709 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8710 doc: /* For internal use only.
8711usage: (define-coding-system-internal ...) */)
df7492f9
KH
8712 (nargs, args)
8713 int nargs;
8714 Lisp_Object *args;
068a9dbd 8715{
df7492f9
KH
8716 Lisp_Object name;
8717 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8718 Lisp_Object attrs; /* Vector of attributes. */
8719 Lisp_Object eol_type;
8720 Lisp_Object aliases;
8721 Lisp_Object coding_type, charset_list, safe_charsets;
8722 enum coding_category category;
8723 Lisp_Object tail, val;
8724 int max_charset_id = 0;
8725 int i;
068a9dbd 8726
df7492f9
KH
8727 if (nargs < coding_arg_max)
8728 goto short_args;
068a9dbd 8729
df7492f9 8730 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8731
df7492f9
KH
8732 name = args[coding_arg_name];
8733 CHECK_SYMBOL (name);
8734 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8735
df7492f9
KH
8736 val = args[coding_arg_mnemonic];
8737 if (! STRINGP (val))
8738 CHECK_CHARACTER (val);
8739 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8740
df7492f9
KH
8741 coding_type = args[coding_arg_coding_type];
8742 CHECK_SYMBOL (coding_type);
8743 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8744
df7492f9
KH
8745 charset_list = args[coding_arg_charset_list];
8746 if (SYMBOLP (charset_list))
8747 {
8748 if (EQ (charset_list, Qiso_2022))
8749 {
8750 if (! EQ (coding_type, Qiso_2022))
8751 error ("Invalid charset-list");
8752 charset_list = Viso_2022_charset_list;
8753 }
8754 else if (EQ (charset_list, Qemacs_mule))
8755 {
8756 if (! EQ (coding_type, Qemacs_mule))
8757 error ("Invalid charset-list");
8758 charset_list = Vemacs_mule_charset_list;
8759 }
8760 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8761 if (max_charset_id < XFASTINT (XCAR (tail)))
8762 max_charset_id = XFASTINT (XCAR (tail));
8763 }
068a9dbd
KH
8764 else
8765 {
df7492f9 8766 charset_list = Fcopy_sequence (charset_list);
985773c9 8767 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 8768 {
df7492f9
KH
8769 struct charset *charset;
8770
985773c9 8771 val = XCAR (tail);
df7492f9
KH
8772 CHECK_CHARSET_GET_CHARSET (val, charset);
8773 if (EQ (coding_type, Qiso_2022)
8774 ? CHARSET_ISO_FINAL (charset) < 0
8775 : EQ (coding_type, Qemacs_mule)
8776 ? CHARSET_EMACS_MULE_ID (charset) < 0
8777 : 0)
8778 error ("Can't handle charset `%s'",
8f924df7 8779 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8780
8f924df7 8781 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8782 if (max_charset_id < charset->id)
8783 max_charset_id = charset->id;
068a9dbd
KH
8784 }
8785 }
df7492f9 8786 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8787
df7492f9
KH
8788 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8789 make_number (255));
8790 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8791 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8792 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8793
584948ac 8794 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8795
df7492f9 8796 val = args[coding_arg_decode_translation_table];
a6f87d34 8797 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8798 CHECK_SYMBOL (val);
df7492f9 8799 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8800
df7492f9 8801 val = args[coding_arg_encode_translation_table];
a6f87d34 8802 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8803 CHECK_SYMBOL (val);
df7492f9 8804 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8805
df7492f9
KH
8806 val = args[coding_arg_post_read_conversion];
8807 CHECK_SYMBOL (val);
8808 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8809
df7492f9
KH
8810 val = args[coding_arg_pre_write_conversion];
8811 CHECK_SYMBOL (val);
8812 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8813
df7492f9
KH
8814 val = args[coding_arg_default_char];
8815 if (NILP (val))
8816 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8817 else
8818 {
8f924df7 8819 CHECK_CHARACTER (val);
df7492f9
KH
8820 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8821 }
4031e2bf 8822
8f924df7
KH
8823 val = args[coding_arg_for_unibyte];
8824 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8825
df7492f9
KH
8826 val = args[coding_arg_plist];
8827 CHECK_LIST (val);
8828 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8829
df7492f9
KH
8830 if (EQ (coding_type, Qcharset))
8831 {
c7c66a95
KH
8832 /* Generate a lisp vector of 256 elements. Each element is nil,
8833 integer, or a list of charset IDs.
3a73fa5d 8834
c7c66a95
KH
8835 If Nth element is nil, the byte code N is invalid in this
8836 coding system.
4ed46869 8837
c7c66a95
KH
8838 If Nth element is a number NUM, N is the first byte of a
8839 charset whose ID is NUM.
4ed46869 8840
c7c66a95
KH
8841 If Nth element is a list of charset IDs, N is the first byte
8842 of one of them. The list is sorted by dimensions of the
2bc515e4 8843 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8844 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8845
5c99c2e6 8846 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8847 {
c7c66a95
KH
8848 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8849 int dim = CHARSET_DIMENSION (charset);
8850 int idx = (dim - 1) * 4;
4ed46869 8851
5c99c2e6 8852 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8853 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8854
15d143f7
KH
8855 for (i = charset->code_space[idx];
8856 i <= charset->code_space[idx + 1]; i++)
8857 {
c7c66a95
KH
8858 Lisp_Object tmp, tmp2;
8859 int dim2;
ec6d2bb8 8860
c7c66a95
KH
8861 tmp = AREF (val, i);
8862 if (NILP (tmp))
8863 tmp = XCAR (tail);
8864 else if (NUMBERP (tmp))
8865 {
8866 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8867 if (dim < dim2)
c7c66a95 8868 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8869 else
8870 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8871 }
15d143f7 8872 else
c7c66a95
KH
8873 {
8874 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8875 {
8876 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8877 if (dim < dim2)
8878 break;
8879 }
8880 if (NILP (tmp2))
8881 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8882 else
8883 {
8884 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8885 XSETCAR (tmp2, XCAR (tail));
8886 }
8887 }
8888 ASET (val, i, tmp);
15d143f7 8889 }
df7492f9
KH
8890 }
8891 ASET (attrs, coding_attr_charset_valids, val);
8892 category = coding_category_charset;
8893 }
8894 else if (EQ (coding_type, Qccl))
8895 {
8896 Lisp_Object valids;
ecec61c1 8897
df7492f9
KH
8898 if (nargs < coding_arg_ccl_max)
8899 goto short_args;
ecec61c1 8900
df7492f9
KH
8901 val = args[coding_arg_ccl_decoder];
8902 CHECK_CCL_PROGRAM (val);
8903 if (VECTORP (val))
8904 val = Fcopy_sequence (val);
8905 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8906
df7492f9
KH
8907 val = args[coding_arg_ccl_encoder];
8908 CHECK_CCL_PROGRAM (val);
8909 if (VECTORP (val))
8910 val = Fcopy_sequence (val);
8911 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8912
df7492f9
KH
8913 val = args[coding_arg_ccl_valids];
8914 valids = Fmake_string (make_number (256), make_number (0));
8915 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8916 {
8dcbea82 8917 int from, to;
ecec61c1 8918
df7492f9
KH
8919 val = Fcar (tail);
8920 if (INTEGERP (val))
8dcbea82
KH
8921 {
8922 from = to = XINT (val);
8923 if (from < 0 || from > 255)
8924 args_out_of_range_3 (val, make_number (0), make_number (255));
8925 }
df7492f9
KH
8926 else
8927 {
df7492f9 8928 CHECK_CONS (val);
8f924df7
KH
8929 CHECK_NATNUM_CAR (val);
8930 CHECK_NATNUM_CDR (val);
df7492f9 8931 from = XINT (XCAR (val));
8f924df7 8932 if (from > 255)
8dcbea82
KH
8933 args_out_of_range_3 (XCAR (val),
8934 make_number (0), make_number (255));
df7492f9 8935 to = XINT (XCDR (val));
8dcbea82
KH
8936 if (to < from || to > 255)
8937 args_out_of_range_3 (XCDR (val),
8938 XCAR (val), make_number (255));
df7492f9 8939 }
8dcbea82 8940 for (i = from; i <= to; i++)
8f924df7 8941 SSET (valids, i, 1);
df7492f9
KH
8942 }
8943 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8944
df7492f9 8945 category = coding_category_ccl;
55ab7be3 8946 }
df7492f9 8947 else if (EQ (coding_type, Qutf_16))
55ab7be3 8948 {
df7492f9 8949 Lisp_Object bom, endian;
4ed46869 8950
584948ac 8951 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8952
df7492f9
KH
8953 if (nargs < coding_arg_utf16_max)
8954 goto short_args;
4ed46869 8955
df7492f9
KH
8956 bom = args[coding_arg_utf16_bom];
8957 if (! NILP (bom) && ! EQ (bom, Qt))
8958 {
8959 CHECK_CONS (bom);
8f924df7
KH
8960 val = XCAR (bom);
8961 CHECK_CODING_SYSTEM (val);
8962 val = XCDR (bom);
8963 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8964 }
8965 ASET (attrs, coding_attr_utf_16_bom, bom);
8966
8967 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8968 CHECK_SYMBOL (endian);
8969 if (NILP (endian))
8970 endian = Qbig;
8971 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8972 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8973 ASET (attrs, coding_attr_utf_16_endian, endian);
8974
8975 category = (CONSP (bom)
8976 ? coding_category_utf_16_auto
8977 : NILP (bom)
b49a1807 8978 ? (EQ (endian, Qbig)
df7492f9
KH
8979 ? coding_category_utf_16_be_nosig
8980 : coding_category_utf_16_le_nosig)
b49a1807 8981 : (EQ (endian, Qbig)
df7492f9
KH
8982 ? coding_category_utf_16_be
8983 : coding_category_utf_16_le));
8984 }
8985 else if (EQ (coding_type, Qiso_2022))
8986 {
8987 Lisp_Object initial, reg_usage, request, flags;
4776e638 8988 int i;
1397dc18 8989
df7492f9
KH
8990 if (nargs < coding_arg_iso2022_max)
8991 goto short_args;
8992
8993 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8994 CHECK_VECTOR (initial);
8995 for (i = 0; i < 4; i++)
8996 {
8997 val = Faref (initial, make_number (i));
8998 if (! NILP (val))
8999 {
584948ac
KH
9000 struct charset *charset;
9001
9002 CHECK_CHARSET_GET_CHARSET (val, charset);
9003 ASET (initial, i, make_number (CHARSET_ID (charset)));
9004 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9005 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9006 }
9007 else
9008 ASET (initial, i, make_number (-1));
9009 }
9010
9011 reg_usage = args[coding_arg_iso2022_reg_usage];
9012 CHECK_CONS (reg_usage);
8f924df7
KH
9013 CHECK_NUMBER_CAR (reg_usage);
9014 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9015
9016 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9017 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9018 {
df7492f9 9019 int id;
8f924df7 9020 Lisp_Object tmp;
df7492f9
KH
9021
9022 val = Fcar (tail);
9023 CHECK_CONS (val);
8f924df7
KH
9024 tmp = XCAR (val);
9025 CHECK_CHARSET_GET_ID (tmp, id);
9026 CHECK_NATNUM_CDR (val);
df7492f9
KH
9027 if (XINT (XCDR (val)) >= 4)
9028 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9029 XSETCAR (val, make_number (id));
1397dc18 9030 }
4ed46869 9031
df7492f9
KH
9032 flags = args[coding_arg_iso2022_flags];
9033 CHECK_NATNUM (flags);
9034 i = XINT (flags);
9035 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9036 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9037
9038 ASET (attrs, coding_attr_iso_initial, initial);
9039 ASET (attrs, coding_attr_iso_usage, reg_usage);
9040 ASET (attrs, coding_attr_iso_request, request);
9041 ASET (attrs, coding_attr_iso_flags, flags);
9042 setup_iso_safe_charsets (attrs);
9043
9044 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9045 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9046 | CODING_ISO_FLAG_SINGLE_SHIFT))
9047 ? coding_category_iso_7_else
9048 : EQ (args[coding_arg_charset_list], Qiso_2022)
9049 ? coding_category_iso_7
9050 : coding_category_iso_7_tight);
9051 else
9052 {
9053 int id = XINT (AREF (initial, 1));
9054
c6fb6e98 9055 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9056 || EQ (args[coding_arg_charset_list], Qiso_2022)
9057 || id < 0)
9058 ? coding_category_iso_8_else
9059 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9060 ? coding_category_iso_8_1
9061 : coding_category_iso_8_2);
9062 }
0ce7886f
KH
9063 if (category != coding_category_iso_8_1
9064 && category != coding_category_iso_8_2)
9065 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9066 }
9067 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9068 {
df7492f9
KH
9069 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9070 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9071 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9072 category = coding_category_emacs_mule;
c28a9453 9073 }
df7492f9 9074 else if (EQ (coding_type, Qshift_jis))
c28a9453 9075 {
df7492f9
KH
9076
9077 struct charset *charset;
9078
7d64c6ad 9079 if (XINT (Flength (charset_list)) != 3
6e07c25f 9080 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9081 error ("There should be three or four charsets");
df7492f9
KH
9082
9083 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9084 if (CHARSET_DIMENSION (charset) != 1)
9085 error ("Dimension of charset %s is not one",
8f924df7 9086 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9087 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9088 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9089
9090 charset_list = XCDR (charset_list);
9091 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9092 if (CHARSET_DIMENSION (charset) != 1)
9093 error ("Dimension of charset %s is not one",
8f924df7 9094 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9095
9096 charset_list = XCDR (charset_list);
9097 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9098 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9099 error ("Dimension of charset %s is not two",
9100 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9101
9102 charset_list = XCDR (charset_list);
2b917a06
KH
9103 if (! NILP (charset_list))
9104 {
9105 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9106 if (CHARSET_DIMENSION (charset) != 2)
9107 error ("Dimension of charset %s is not two",
9108 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9109 }
df7492f9
KH
9110
9111 category = coding_category_sjis;
9112 Vsjis_coding_system = name;
c28a9453 9113 }
df7492f9
KH
9114 else if (EQ (coding_type, Qbig5))
9115 {
9116 struct charset *charset;
4ed46869 9117
df7492f9
KH
9118 if (XINT (Flength (charset_list)) != 2)
9119 error ("There should be just two charsets");
9120
9121 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9122 if (CHARSET_DIMENSION (charset) != 1)
9123 error ("Dimension of charset %s is not one",
8f924df7 9124 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9125 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9126 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9127
9128 charset_list = XCDR (charset_list);
9129 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9130 if (CHARSET_DIMENSION (charset) != 2)
9131 error ("Dimension of charset %s is not two",
8f924df7 9132 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9133
df7492f9
KH
9134 category = coding_category_big5;
9135 Vbig5_coding_system = name;
9136 }
9137 else if (EQ (coding_type, Qraw_text))
c28a9453 9138 {
584948ac
KH
9139 category = coding_category_raw_text;
9140 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9141 }
df7492f9 9142 else if (EQ (coding_type, Qutf_8))
4ed46869 9143 {
584948ac
KH
9144 category = coding_category_utf_8;
9145 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 9146 }
df7492f9
KH
9147 else if (EQ (coding_type, Qundecided))
9148 category = coding_category_undecided;
4ed46869 9149 else
df7492f9 9150 error ("Invalid coding system type: %s",
8f924df7 9151 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 9152
df7492f9 9153 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
9154 CODING_ATTR_PLIST (attrs)
9155 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9156 CODING_ATTR_PLIST (attrs)));
35befdaa
KH
9157 CODING_ATTR_PLIST (attrs)
9158 = Fcons (QCascii_compatible_p,
9159 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9160 CODING_ATTR_PLIST (attrs)));
c4825358 9161
df7492f9
KH
9162 eol_type = args[coding_arg_eol_type];
9163 if (! NILP (eol_type)
9164 && ! EQ (eol_type, Qunix)
9165 && ! EQ (eol_type, Qdos)
9166 && ! EQ (eol_type, Qmac))
9167 error ("Invalid eol-type");
4ed46869 9168
df7492f9 9169 aliases = Fcons (name, Qnil);
4ed46869 9170
df7492f9
KH
9171 if (NILP (eol_type))
9172 {
9173 eol_type = make_subsidiaries (name);
9174 for (i = 0; i < 3; i++)
1397dc18 9175 {
df7492f9
KH
9176 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9177
9178 this_name = AREF (eol_type, i);
9179 this_aliases = Fcons (this_name, Qnil);
9180 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9181 this_spec = Fmake_vector (make_number (3), attrs);
9182 ASET (this_spec, 1, this_aliases);
9183 ASET (this_spec, 2, this_eol_type);
9184 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9185 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
9186 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9187 if (NILP (val))
9188 Vcoding_system_alist
9189 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9190 Vcoding_system_alist);
1397dc18 9191 }
d46c5b12 9192 }
4ed46869 9193
df7492f9
KH
9194 spec_vec = Fmake_vector (make_number (3), attrs);
9195 ASET (spec_vec, 1, aliases);
9196 ASET (spec_vec, 2, eol_type);
48b0f3ae 9197
df7492f9
KH
9198 Fputhash (name, spec_vec, Vcoding_system_hash_table);
9199 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
9200 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9201 if (NILP (val))
9202 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9203 Vcoding_system_alist);
48b0f3ae 9204
df7492f9
KH
9205 {
9206 int id = coding_categories[category].id;
48b0f3ae 9207
df7492f9
KH
9208 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9209 setup_coding_system (name, &coding_categories[category]);
9210 }
48b0f3ae 9211
d46c5b12 9212 return Qnil;
48b0f3ae 9213
df7492f9
KH
9214 short_args:
9215 return Fsignal (Qwrong_number_of_arguments,
9216 Fcons (intern ("define-coding-system-internal"),
9217 make_number (nargs)));
d46c5b12 9218}
4ed46869 9219
d6925f38 9220
a6f87d34
KH
9221DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9222 3, 3, 0,
9223 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9224 (coding_system, prop, val)
9225 Lisp_Object coding_system, prop, val;
9226{
3dbe7859 9227 Lisp_Object spec, attrs;
a6f87d34
KH
9228
9229 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9230 attrs = AREF (spec, 0);
9231 if (EQ (prop, QCmnemonic))
9232 {
9233 if (! STRINGP (val))
9234 CHECK_CHARACTER (val);
9235 CODING_ATTR_MNEMONIC (attrs) = val;
9236 }
9237 else if (EQ (prop, QCdefalut_char))
9238 {
9239 if (NILP (val))
9240 val = make_number (' ');
9241 else
9242 CHECK_CHARACTER (val);
9243 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9244 }
9245 else if (EQ (prop, QCdecode_translation_table))
9246 {
9247 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9248 CHECK_SYMBOL (val);
9249 CODING_ATTR_DECODE_TBL (attrs) = val;
9250 }
9251 else if (EQ (prop, QCencode_translation_table))
9252 {
9253 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9254 CHECK_SYMBOL (val);
9255 CODING_ATTR_ENCODE_TBL (attrs) = val;
9256 }
9257 else if (EQ (prop, QCpost_read_conversion))
9258 {
9259 CHECK_SYMBOL (val);
9260 CODING_ATTR_POST_READ (attrs) = val;
9261 }
9262 else if (EQ (prop, QCpre_write_conversion))
9263 {
9264 CHECK_SYMBOL (val);
9265 CODING_ATTR_PRE_WRITE (attrs) = val;
9266 }
35befdaa
KH
9267 else if (EQ (prop, QCascii_compatible_p))
9268 {
9269 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9270 }
a6f87d34
KH
9271
9272 CODING_ATTR_PLIST (attrs)
9273 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9274 return val;
9275}
9276
9277
df7492f9
KH
9278DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9279 Sdefine_coding_system_alias, 2, 2, 0,
9280 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9281 (alias, coding_system)
9282 Lisp_Object alias, coding_system;
66cfb530 9283{
583f71ca 9284 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9285
df7492f9
KH
9286 CHECK_SYMBOL (alias);
9287 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9288 aliases = AREF (spec, 1);
d6925f38
KH
9289 /* ALISES should be a list of length more than zero, and the first
9290 element is a base coding system. Append ALIAS at the tail of the
9291 list. */
df7492f9
KH
9292 while (!NILP (XCDR (aliases)))
9293 aliases = XCDR (aliases);
8f924df7 9294 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9295
df7492f9
KH
9296 eol_type = AREF (spec, 2);
9297 if (VECTORP (eol_type))
4ed46869 9298 {
df7492f9
KH
9299 Lisp_Object subsidiaries;
9300 int i;
4ed46869 9301
df7492f9
KH
9302 subsidiaries = make_subsidiaries (alias);
9303 for (i = 0; i < 3; i++)
9304 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9305 AREF (eol_type, i));
4ed46869 9306 }
df7492f9
KH
9307
9308 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9309 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9310 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9311 if (NILP (val))
9312 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9313 Vcoding_system_alist);
66cfb530 9314
4ed46869
KH
9315 return Qnil;
9316}
9317
df7492f9
KH
9318DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9319 1, 1, 0,
9320 doc: /* Return the base of CODING-SYSTEM.
da7db224 9321Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9322 (coding_system)
9323 Lisp_Object coding_system;
d46c5b12 9324{
df7492f9 9325 Lisp_Object spec, attrs;
d46c5b12 9326
df7492f9
KH
9327 if (NILP (coding_system))
9328 return (Qno_conversion);
9329 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9330 attrs = AREF (spec, 0);
9331 return CODING_ATTR_BASE_NAME (attrs);
9332}
1397dc18 9333
df7492f9
KH
9334DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9335 1, 1, 0,
9336 doc: "Return the property list of CODING-SYSTEM.")
9337 (coding_system)
9338 Lisp_Object coding_system;
9339{
9340 Lisp_Object spec, attrs;
1397dc18 9341
df7492f9
KH
9342 if (NILP (coding_system))
9343 coding_system = Qno_conversion;
9344 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9345 attrs = AREF (spec, 0);
9346 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9347}
9348
df7492f9
KH
9349
9350DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9351 1, 1, 0,
da7db224 9352 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9353 (coding_system)
9354 Lisp_Object coding_system;
66cfb530 9355{
df7492f9 9356 Lisp_Object spec;
84d60297 9357
df7492f9
KH
9358 if (NILP (coding_system))
9359 coding_system = Qno_conversion;
9360 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9361 return AREF (spec, 1);
df7492f9 9362}
66cfb530 9363
df7492f9
KH
9364DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9365 Scoding_system_eol_type, 1, 1, 0,
9366 doc: /* Return eol-type of CODING-SYSTEM.
9367An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 9368
df7492f9
KH
9369Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9370and CR respectively.
66cfb530 9371
df7492f9
KH
9372A vector value indicates that a format of end-of-line should be
9373detected automatically. Nth element of the vector is the subsidiary
9374coding system whose eol-type is N. */)
6b89e3aa
KH
9375 (coding_system)
9376 Lisp_Object coding_system;
9377{
df7492f9
KH
9378 Lisp_Object spec, eol_type;
9379 int n;
6b89e3aa 9380
df7492f9
KH
9381 if (NILP (coding_system))
9382 coding_system = Qno_conversion;
9383 if (! CODING_SYSTEM_P (coding_system))
9384 return Qnil;
9385 spec = CODING_SYSTEM_SPEC (coding_system);
9386 eol_type = AREF (spec, 2);
9387 if (VECTORP (eol_type))
9388 return Fcopy_sequence (eol_type);
9389 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9390 return make_number (n);
6b89e3aa
KH
9391}
9392
4ed46869
KH
9393#endif /* emacs */
9394
9395\f
1397dc18 9396/*** 9. Post-amble ***/
4ed46869 9397
dfcf069d 9398void
4ed46869
KH
9399init_coding_once ()
9400{
9401 int i;
9402
df7492f9
KH
9403 for (i = 0; i < coding_category_max; i++)
9404 {
9405 coding_categories[i].id = -1;
9406 coding_priorities[i] = i;
9407 }
4ed46869
KH
9408
9409 /* ISO2022 specific initialize routine. */
9410 for (i = 0; i < 0x20; i++)
b73bfc1c 9411 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9412 for (i = 0x21; i < 0x7F; i++)
9413 iso_code_class[i] = ISO_graphic_plane_0;
9414 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9415 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9416 for (i = 0xA1; i < 0xFF; i++)
9417 iso_code_class[i] = ISO_graphic_plane_1;
9418 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9419 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9420 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9421 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9422 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9423 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9424 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9425 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9426 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9427
df7492f9
KH
9428 for (i = 0; i < 256; i++)
9429 {
9430 emacs_mule_bytes[i] = 1;
9431 }
7c78e542
KH
9432 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9433 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9434 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9435 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9436}
9437
9438#ifdef emacs
9439
dfcf069d 9440void
e0e989f6
KH
9441syms_of_coding ()
9442{
df7492f9 9443 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9444 {
9445 Lisp_Object args[2];
9446 args[0] = QCtest;
9447 args[1] = Qeq;
9448 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9449 }
df7492f9
KH
9450
9451 staticpro (&Vsjis_coding_system);
9452 Vsjis_coding_system = Qnil;
e0e989f6 9453
df7492f9
KH
9454 staticpro (&Vbig5_coding_system);
9455 Vbig5_coding_system = Qnil;
9456
24a73b0a
KH
9457 staticpro (&Vcode_conversion_reused_workbuf);
9458 Vcode_conversion_reused_workbuf = Qnil;
9459
9460 staticpro (&Vcode_conversion_workbuf_name);
9461 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9462
24a73b0a 9463 reused_workbuf_in_use = 0;
df7492f9
KH
9464
9465 DEFSYM (Qcharset, "charset");
9466 DEFSYM (Qtarget_idx, "target-idx");
9467 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9468 Fset (Qcoding_system_history, Qnil);
9469
9ce27fde 9470 /* Target FILENAME is the first argument. */
e0e989f6 9471 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9472 /* Target FILENAME is the third argument. */
e0e989f6
KH
9473 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9474
df7492f9 9475 DEFSYM (Qcall_process, "call-process");
9ce27fde 9476 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9477 Fput (Qcall_process, Qtarget_idx, make_number (0));
9478
df7492f9 9479 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9480 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9481 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9482
df7492f9 9483 DEFSYM (Qstart_process, "start-process");
9ce27fde 9484 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9485 Fput (Qstart_process, Qtarget_idx, make_number (2));
9486
df7492f9 9487 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9488 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9489 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9490
df7492f9
KH
9491 DEFSYM (Qcoding_system, "coding-system");
9492 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9493
df7492f9
KH
9494 DEFSYM (Qeol_type, "eol-type");
9495 DEFSYM (Qunix, "unix");
9496 DEFSYM (Qdos, "dos");
4ed46869 9497
df7492f9
KH
9498 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9499 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9500 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9501 DEFSYM (Qdefault_char, "default-char");
9502 DEFSYM (Qundecided, "undecided");
9503 DEFSYM (Qno_conversion, "no-conversion");
9504 DEFSYM (Qraw_text, "raw-text");
4ed46869 9505
df7492f9 9506 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9507
df7492f9 9508 DEFSYM (Qutf_8, "utf-8");
8f924df7 9509 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9510
df7492f9 9511 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9512 DEFSYM (Qbig, "big");
9513 DEFSYM (Qlittle, "little");
27901516 9514
df7492f9
KH
9515 DEFSYM (Qshift_jis, "shift-jis");
9516 DEFSYM (Qbig5, "big5");
4ed46869 9517
df7492f9 9518 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9519
df7492f9 9520 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9521 Fput (Qcoding_system_error, Qerror_conditions,
9522 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9523 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9524 build_string ("Invalid coding system"));
4ed46869 9525
05e6f5dc
KH
9526 /* Intern this now in case it isn't already done.
9527 Setting this variable twice is harmless.
9528 But don't staticpro it here--that is done in alloc.c. */
9529 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9530
df7492f9 9531 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9532 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9533 DEFSYM (Qtranslation_table_id, "translation-table-id");
9534 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9535 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9536
df7492f9 9537 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9538
df7492f9 9539 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9540
01378f49 9541 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9542 DEFSYM (QCmnemonic, ":mnemonic");
9543 DEFSYM (QCdefalut_char, ":default-char");
9544 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9545 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9546 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9547 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 9548 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 9549
df7492f9
KH
9550 Vcoding_category_table
9551 = Fmake_vector (make_number (coding_category_max), Qnil);
9552 staticpro (&Vcoding_category_table);
9553 /* Followings are target of code detection. */
9554 ASET (Vcoding_category_table, coding_category_iso_7,
9555 intern ("coding-category-iso-7"));
9556 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9557 intern ("coding-category-iso-7-tight"));
9558 ASET (Vcoding_category_table, coding_category_iso_8_1,
9559 intern ("coding-category-iso-8-1"));
9560 ASET (Vcoding_category_table, coding_category_iso_8_2,
9561 intern ("coding-category-iso-8-2"));
9562 ASET (Vcoding_category_table, coding_category_iso_7_else,
9563 intern ("coding-category-iso-7-else"));
9564 ASET (Vcoding_category_table, coding_category_iso_8_else,
9565 intern ("coding-category-iso-8-else"));
9566 ASET (Vcoding_category_table, coding_category_utf_8,
9567 intern ("coding-category-utf-8"));
9568 ASET (Vcoding_category_table, coding_category_utf_16_be,
9569 intern ("coding-category-utf-16-be"));
ff563fce
KH
9570 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9571 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9572 ASET (Vcoding_category_table, coding_category_utf_16_le,
9573 intern ("coding-category-utf-16-le"));
9574 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9575 intern ("coding-category-utf-16-be-nosig"));
9576 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9577 intern ("coding-category-utf-16-le-nosig"));
9578 ASET (Vcoding_category_table, coding_category_charset,
9579 intern ("coding-category-charset"));
9580 ASET (Vcoding_category_table, coding_category_sjis,
9581 intern ("coding-category-sjis"));
9582 ASET (Vcoding_category_table, coding_category_big5,
9583 intern ("coding-category-big5"));
9584 ASET (Vcoding_category_table, coding_category_ccl,
9585 intern ("coding-category-ccl"));
9586 ASET (Vcoding_category_table, coding_category_emacs_mule,
9587 intern ("coding-category-emacs-mule"));
9588 /* Followings are NOT target of code detection. */
9589 ASET (Vcoding_category_table, coding_category_raw_text,
9590 intern ("coding-category-raw-text"));
9591 ASET (Vcoding_category_table, coding_category_undecided,
9592 intern ("coding-category-undecided"));
ecf488bc 9593
065e3595
KH
9594 DEFSYM (Qinsufficient_source, "insufficient-source");
9595 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9596 DEFSYM (Qinvalid_source, "invalid-source");
9597 DEFSYM (Qinterrupted, "interrupted");
9598 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 9599 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 9600
4ed46869
KH
9601 defsubr (&Scoding_system_p);
9602 defsubr (&Sread_coding_system);
9603 defsubr (&Sread_non_nil_coding_system);
9604 defsubr (&Scheck_coding_system);
9605 defsubr (&Sdetect_coding_region);
d46c5b12 9606 defsubr (&Sdetect_coding_string);
05e6f5dc 9607 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9608 defsubr (&Sunencodable_char_position);
df7492f9 9609 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9610 defsubr (&Sdecode_coding_region);
9611 defsubr (&Sencode_coding_region);
9612 defsubr (&Sdecode_coding_string);
9613 defsubr (&Sencode_coding_string);
9614 defsubr (&Sdecode_sjis_char);
9615 defsubr (&Sencode_sjis_char);
9616 defsubr (&Sdecode_big5_char);
9617 defsubr (&Sencode_big5_char);
1ba9e4ab 9618 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9619 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9620 defsubr (&Sterminal_coding_system);
1ba9e4ab 9621 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9622 defsubr (&Skeyboard_coding_system);
a5d301df 9623 defsubr (&Sfind_operation_coding_system);
df7492f9 9624 defsubr (&Sset_coding_system_priority);
6b89e3aa 9625 defsubr (&Sdefine_coding_system_internal);
df7492f9 9626 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9627 defsubr (&Scoding_system_put);
df7492f9
KH
9628 defsubr (&Scoding_system_base);
9629 defsubr (&Scoding_system_plist);
9630 defsubr (&Scoding_system_aliases);
9631 defsubr (&Scoding_system_eol_type);
9632 defsubr (&Scoding_system_priority_list);
4ed46869 9633
4608c386 9634 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9635 doc: /* List of coding systems.
9636
9637Do not alter the value of this variable manually. This variable should be
df7492f9 9638updated by the functions `define-coding-system' and
48b0f3ae 9639`define-coding-system-alias'. */);
4608c386
KH
9640 Vcoding_system_list = Qnil;
9641
9642 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9643 doc: /* Alist of coding system names.
9644Each element is one element list of coding system name.
9645This variable is given to `completing-read' as TABLE argument.
9646
9647Do not alter the value of this variable manually. This variable should be
9648updated by the functions `make-coding-system' and
9649`define-coding-system-alias'. */);
4608c386
KH
9650 Vcoding_system_alist = Qnil;
9651
4ed46869 9652 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9653 doc: /* List of coding-categories (symbols) ordered by priority.
9654
9655On detecting a coding system, Emacs tries code detection algorithms
9656associated with each coding-category one by one in this order. When
9657one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
9658system bound to the corresponding coding-category is selected.
9659
42205607 9660Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
9661 {
9662 int i;
9663
9664 Vcoding_category_list = Qnil;
df7492f9 9665 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9666 Vcoding_category_list
d46c5b12
KH
9667 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9668 Vcoding_category_list);
4ed46869
KH
9669 }
9670
9671 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9672 doc: /* Specify the coding system for read operations.
9673It is useful to bind this variable with `let', but do not set it globally.
9674If the value is a coding system, it is used for decoding on read operation.
9675If not, an appropriate element is used from one of the coding system alists:
9676There are three such tables, `file-coding-system-alist',
9677`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9678 Vcoding_system_for_read = Qnil;
9679
9680 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9681 doc: /* Specify the coding system for write operations.
9682Programs bind this variable with `let', but you should not set it globally.
9683If the value is a coding system, it is used for encoding of output,
9684when writing it to a file and when sending it to a file or subprocess.
9685
9686If this does not specify a coding system, an appropriate element
9687is used from one of the coding system alists:
9688There are three such tables, `file-coding-system-alist',
9689`process-coding-system-alist', and `network-coding-system-alist'.
9690For output to files, if the above procedure does not specify a coding system,
9691the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9692 Vcoding_system_for_write = Qnil;
9693
9694 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9695 doc: /*
9696Coding system used in the latest file or process I/O. */);
4ed46869
KH
9697 Vlast_coding_system_used = Qnil;
9698
065e3595
KH
9699 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9700 doc: /*
9701Error status of the last code conversion.
9702
9703When an error was detected in the last code conversion, this variable
9704is set to one of the following symbols.
9705 `insufficient-source'
9706 `inconsistent-eol'
9707 `invalid-source'
9708 `interrupted'
9709 `insufficient-memory'
9710When no error was detected, the value doesn't change. So, to check
9711the error status of a code conversion by this variable, you must
9712explicitly set this variable to nil before performing code
9713conversion. */);
9714 Vlast_code_conversion_error = Qnil;
9715
9ce27fde 9716 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9717 doc: /*
9718*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9719See info node `Coding Systems' and info node `Text and Binary' concerning
9720such conversion. */);
9ce27fde
KH
9721 inhibit_eol_conversion = 0;
9722
ed29121d 9723 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9724 doc: /*
9725Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9726Bind it to t if the process output is to be treated as if it were a file
9727read from some filesystem. */);
ed29121d
EZ
9728 inherit_process_coding_system = 0;
9729
02ba4723 9730 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9731 doc: /*
9732Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9733The format is ((PATTERN . VAL) ...),
9734where PATTERN is a regular expression matching a file name,
9735VAL is a coding system, a cons of coding systems, or a function symbol.
9736If VAL is a coding system, it is used for both decoding and encoding
9737the file contents.
9738If VAL is a cons of coding systems, the car part is used for decoding,
9739and the cdr part is used for encoding.
9740If VAL is a function symbol, the function must return a coding system
2c53e699
KH
9741or a cons of coding systems which are used as above. The function is
9742called with an argument that is a list of the arguments with which
5a0bbd9a
KH
9743`find-operation-coding-system' was called. If the function can't decide
9744a coding system, it can return `undecided' so that the normal
9745code-detection is performed.
48b0f3ae
PJ
9746
9747See also the function `find-operation-coding-system'
9748and the variable `auto-coding-alist'. */);
02ba4723
KH
9749 Vfile_coding_system_alist = Qnil;
9750
9751 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9752 doc: /*
9753Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9754The format is ((PATTERN . VAL) ...),
9755where PATTERN is a regular expression matching a program name,
9756VAL is a coding system, a cons of coding systems, or a function symbol.
9757If VAL is a coding system, it is used for both decoding what received
9758from the program and encoding what sent to the program.
9759If VAL is a cons of coding systems, the car part is used for decoding,
9760and the cdr part is used for encoding.
9761If VAL is a function symbol, the function must return a coding system
9762or a cons of coding systems which are used as above.
9763
9764See also the function `find-operation-coding-system'. */);
02ba4723
KH
9765 Vprocess_coding_system_alist = Qnil;
9766
9767 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9768 doc: /*
9769Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9770The format is ((PATTERN . VAL) ...),
9771where PATTERN is a regular expression matching a network service name
9772or is a port number to connect to,
9773VAL is a coding system, a cons of coding systems, or a function symbol.
9774If VAL is a coding system, it is used for both decoding what received
9775from the network stream and encoding what sent to the network stream.
9776If VAL is a cons of coding systems, the car part is used for decoding,
9777and the cdr part is used for encoding.
9778If VAL is a function symbol, the function must return a coding system
9779or a cons of coding systems which are used as above.
9780
9781See also the function `find-operation-coding-system'. */);
02ba4723 9782 Vnetwork_coding_system_alist = Qnil;
4ed46869 9783
68c45bf0 9784 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9785 doc: /* Coding system to use with system messages.
9786Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9787 Vlocale_coding_system = Qnil;
9788
005f0d35 9789 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9790 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9791 doc: /*
9792*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9793 eol_mnemonic_unix = build_string (":");
4ed46869 9794
7722baf9 9795 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9796 doc: /*
9797*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9798 eol_mnemonic_dos = build_string ("\\");
4ed46869 9799
7722baf9 9800 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9801 doc: /*
9802*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9803 eol_mnemonic_mac = build_string ("/");
4ed46869 9804
7722baf9 9805 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9806 doc: /*
9807*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9808 eol_mnemonic_undecided = build_string (":");
4ed46869 9809
84fbb8a0 9810 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9811 doc: /*
9812*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9813 Venable_character_translation = Qt;
bdd9fb48 9814
f967223b 9815 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9816 &Vstandard_translation_table_for_decode,
9817 doc: /* Table for translating characters while decoding. */);
f967223b 9818 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9819
f967223b 9820 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9821 &Vstandard_translation_table_for_encode,
9822 doc: /* Table for translating characters while encoding. */);
f967223b 9823 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9824
df7492f9 9825 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9826 doc: /* Alist of charsets vs revision numbers.
9827While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9828designate it with the escape sequence identifying revision (cdr part
9829of the element). */);
9830 Vcharset_revision_table = Qnil;
02ba4723
KH
9831
9832 DEFVAR_LISP ("default-process-coding-system",
9833 &Vdefault_process_coding_system,
48b0f3ae
PJ
9834 doc: /* Cons of coding systems used for process I/O by default.
9835The car part is used for decoding a process output,
9836the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9837 Vdefault_process_coding_system = Qnil;
c4825358 9838
3f003981 9839 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9840 doc: /*
9841Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9842This is a vector of length 256.
9843If Nth element is non-nil, the existence of code N in a file
9844\(or output of subprocess) doesn't prevent it to be detected as
9845a coding system of ISO 2022 variant which has a flag
9846`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9847or reading output of a subprocess.
9848Only 128th through 159th elements has a meaning. */);
3f003981 9849 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9850
9851 DEFVAR_LISP ("select-safe-coding-system-function",
9852 &Vselect_safe_coding_system_function,
df7492f9
KH
9853 doc: /*
9854Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9855
9856If set, this function is called to force a user to select a proper
9857coding system which can encode the text in the case that a default
fdecf907
GM
9858coding system used in each operation can't encode the text. The
9859function should take care that the buffer is not modified while
9860the coding system is being selected.
48b0f3ae
PJ
9861
9862The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9863 Vselect_safe_coding_system_function = Qnil;
9864
5d5bf4d8
KH
9865 DEFVAR_BOOL ("coding-system-require-warning",
9866 &coding_system_require_warning,
9867 doc: /* Internal use only.
6b89e3aa
KH
9868If non-nil, on writing a file, `select-safe-coding-system-function' is
9869called even if `coding-system-for-write' is non-nil. The command
9870`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9871 coding_system_require_warning = 0;
9872
9873
22ab2303 9874 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9875 &inhibit_iso_escape_detection,
df7492f9
KH
9876 doc: /*
9877If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9878
9879By default, on reading a file, Emacs tries to detect how the text is
9880encoded. This code detection is sensitive to escape sequences. If
9881the sequence is valid as ISO2022, the code is determined as one of
9882the ISO2022 encodings, and the file is decoded by the corresponding
9883coding system (e.g. `iso-2022-7bit').
9884
9885However, there may be a case that you want to read escape sequences in
9886a file as is. In such a case, you can set this variable to non-nil.
9887Then, as the code detection ignores any escape sequences, no file is
9888detected as encoded in some ISO2022 encoding. The result is that all
9889escape sequences become visible in a buffer.
9890
9891The default value is nil, and it is strongly recommended not to change
9892it. That is because many Emacs Lisp source files that contain
9893non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9894in Emacs's distribution, and they won't be decoded correctly on
9895reading if you suppress escape sequence detection.
9896
9897The other way to read escape sequences in a file without decoding is
9898to explicitly specify some coding system that doesn't use ISO2022's
9899escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9900 inhibit_iso_escape_detection = 0;
002fdb44
DL
9901
9902 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9903 doc: /* Char table for translating self-inserting characters.
9904This is applied to the result of input methods, not their input. See also
9905`keyboard-translate-table'. */);
002fdb44 9906 Vtranslation_table_for_input = Qnil;
8f924df7 9907
2c78b7e1
KH
9908 {
9909 Lisp_Object args[coding_arg_max];
8f924df7 9910 Lisp_Object plist[16];
2c78b7e1
KH
9911 int i;
9912
9913 for (i = 0; i < coding_arg_max; i++)
9914 args[i] = Qnil;
9915
9916 plist[0] = intern (":name");
9917 plist[1] = args[coding_arg_name] = Qno_conversion;
9918 plist[2] = intern (":mnemonic");
9919 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9920 plist[4] = intern (":coding-type");
9921 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9922 plist[6] = intern (":ascii-compatible-p");
9923 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9924 plist[8] = intern (":default-char");
9925 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9926 plist[10] = intern (":for-unibyte");
9927 plist[11] = args[coding_arg_for_unibyte] = Qt;
9928 plist[12] = intern (":docstring");
9929 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9930\n\
9931When you visit a file with this coding, the file is read into a\n\
9932unibyte buffer as is, thus each byte of a file is treated as a\n\
9933character.");
8f924df7
KH
9934 plist[14] = intern (":eol-type");
9935 plist[15] = args[coding_arg_eol_type] = Qunix;
9936 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 9937 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
9938
9939 plist[1] = args[coding_arg_name] = Qundecided;
9940 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
9941 plist[5] = args[coding_arg_coding_type] = Qundecided;
9942 /* This is already set.
35befdaa 9943 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
9944 plist[8] = intern (":charset-list");
9945 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
9946 plist[11] = args[coding_arg_for_unibyte] = Qnil;
9947 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9948 plist[15] = args[coding_arg_eol_type] = Qnil;
9949 args[coding_arg_plist] = Flist (16, plist);
9950 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
9951 }
9952
2c78b7e1 9953 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9954
9955 {
9956 int i;
9957
9958 for (i = 0; i < coding_category_max; i++)
9959 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9960 }
fcbcfb64
KH
9961#if defined (MSDOS) || defined (WINDOWSNT)
9962 system_eol_type = Qdos;
9963#else
9964 system_eol_type = Qunix;
9965#endif
9966 staticpro (&system_eol_type);
4ed46869
KH
9967}
9968
68c45bf0
PE
9969char *
9970emacs_strerror (error_number)
9971 int error_number;
9972{
9973 char *str;
9974
ca9c0567 9975 synchronize_system_messages_locale ();
68c45bf0
PE
9976 str = strerror (error_number);
9977
9978 if (! NILP (Vlocale_coding_system))
9979 {
9980 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9981 Vlocale_coding_system,
9982 0);
d5db4077 9983 str = (char *) SDATA (dec);
68c45bf0
PE
9984 }
9985
9986 return str;
9987}
9988
4ed46869 9989#endif /* emacs */
9ffd559c
KH
9990
9991/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9992 (do not change this comment) */