* process.c (ifflag_def): Make flag_sym constant.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
76b6f707 3 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
76b6f707 5 2005, 2006, 2007, 2008, 2009
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27/*** TABLE OF CONTENTS ***
28
b73bfc1c 29 0. General comments
4ed46869 30 1. Preamble
df7492f9
KH
31 2. Emacs' internal format (emacs-utf-8) handlers
32 3. UTF-8 handlers
33 4. UTF-16 handlers
34 5. Charset-base coding systems handlers
35 6. emacs-mule (old Emacs' internal format) handlers
36 7. ISO2022 handlers
37 8. Shift-JIS and BIG5 handlers
38 9. CCL handlers
39 10. C library functions
40 11. Emacs Lisp library functions
41 12. Postamble
4ed46869
KH
42
43*/
44
df7492f9 45/*** 0. General comments ***
b73bfc1c
KH
46
47
df7492f9 48CODING SYSTEM
4ed46869 49
5bad0796
DL
50 A coding system is an object for an encoding mechanism that contains
51 information about how to convert byte sequences to character
e19c3639
KH
52 sequences and vice versa. When we say "decode", it means converting
53 a byte sequence of a specific coding system into a character
54 sequence that is represented by Emacs' internal coding system
55 `emacs-utf-8', and when we say "encode", it means converting a
56 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 57 coding system.
4ed46869 58
e19c3639
KH
59 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
60 C level, a coding system is represented by a vector of attributes
5bad0796 61 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
62 coding system symbol to attributes vector is done by looking up
63 Vcharset_hash_table by the symbol.
4ed46869 64
e19c3639 65 Coding systems are classified into the following types depending on
5bad0796 66 the encoding mechanism. Here's a brief description of the types.
4ed46869 67
df7492f9
KH
68 o UTF-8
69
70 o UTF-16
71
72 o Charset-base coding system
73
74 A coding system defined by one or more (coded) character sets.
5bad0796 75 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
76 character set.
77
5bad0796 78 o Old Emacs internal format (emacs-mule)
df7492f9 79
5bad0796 80 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 81
df7492f9 82 o ISO2022-base coding system
4ed46869
KH
83
84 The most famous coding system for multiple character sets. X's
df7492f9
KH
85 Compound Text, various EUCs (Extended Unix Code), and coding systems
86 used in the Internet communication such as ISO-2022-JP are all
87 variants of ISO2022.
4ed46869 88
df7492f9 89 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 90
4ed46869
KH
91 A coding system to encode character sets: ASCII, JISX0201, and
92 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 93 section 8.
4ed46869 94
df7492f9 95 o BIG5
4ed46869 96
df7492f9 97 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 98 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
99 described in section 8. In this file, when we write "big5" (all
100 lowercase), we mean the coding system, and when we write "Big5"
101 (capitalized), we mean the character set.
4ed46869 102
df7492f9 103 o CCL
27901516 104
5bad0796 105 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
106 not listed above, he can supply a decoder and an encoder for it in
107 CCL (Code Conversion Language) programs. Emacs executes the CCL
108 program while decoding/encoding.
27901516 109
df7492f9 110 o Raw-text
4ed46869 111
5a936b46 112 A coding system for text containing raw eight-bit data. Emacs
5bad0796 113 treats each byte of source text as a character (except for
df7492f9 114 end-of-line conversion).
4ed46869 115
df7492f9
KH
116 o No-conversion
117
118 Like raw text, but don't do end-of-line conversion.
4ed46869 119
4ed46869 120
df7492f9 121END-OF-LINE FORMAT
4ed46869 122
5bad0796 123 How text end-of-line is encoded depends on operating system. For
df7492f9 124 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 125 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
126 `line-feed' codes. MacOS's format is usually one byte of
127 `carriage-return'.
4ed46869 128
cfb43547 129 Since text character encoding and end-of-line encoding are
df7492f9
KH
130 independent, any coding system described above can take any format
131 of end-of-line (except for no-conversion).
4ed46869 132
e19c3639
KH
133STRUCT CODING_SYSTEM
134
135 Before using a coding system for code conversion (i.e. decoding and
136 encoding), we setup a structure of type `struct coding_system'.
137 This structure keeps various information about a specific code
5bad0796 138 conversion (e.g. the location of source and destination data).
4ed46869
KH
139
140*/
141
df7492f9
KH
142/* COMMON MACROS */
143
144
4ed46869
KH
145/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146
df7492f9 147 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
148 CODING conforms to the format of XXX, and update the members of
149 DETECT_INFO.
df7492f9 150
ff0dacd7 151 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
152
153 Below is the template of these functions. */
154
4ed46869 155#if 0
df7492f9 156static int
ff0dacd7 157detect_coding_XXX (coding, detect_info)
df7492f9 158 struct coding_system *coding;
ff0dacd7 159 struct coding_detection_info *detect_info;
4ed46869 160{
f1d34bca
MB
161 const unsigned char *src = coding->source;
162 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 163 int multibytep = coding->src_multibyte;
ff0dacd7 164 int consumed_chars = 0;
df7492f9
KH
165 int found = 0;
166 ...;
167
168 while (1)
169 {
170 /* Get one byte from the source. If the souce is exausted, jump
171 to no_more_source:. */
172 ONE_MORE_BYTE (c);
ff0dacd7
KH
173
174 if (! __C_conforms_to_XXX___ (c))
175 break;
176 if (! __C_strongly_suggests_XXX__ (c))
177 found = CATEGORY_MASK_XXX;
df7492f9 178 }
ff0dacd7
KH
179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 181 return 0;
ff0dacd7 182
df7492f9 183 no_more_source:
ff0dacd7
KH
184 /* The source exausted successfully. */
185 detect_info->found |= found;
df7492f9 186 return 1;
4ed46869
KH
187}
188#endif
189
190/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191
df7492f9
KH
192 These functions decode a byte sequence specified as a source by
193 CODING. The resulting multibyte text goes to a place pointed to by
194 CODING->charbuf, the length of which should not exceed
195 CODING->charbuf_size;
d46c5b12 196
df7492f9
KH
197 These functions set the information of original and decoded texts in
198 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
199 They also set CODING->result to one of CODING_RESULT_XXX indicating
200 how the decoding is finished.
d46c5b12 201
df7492f9 202 Below is the template of these functions. */
d46c5b12 203
4ed46869 204#if 0
b73bfc1c 205static void
df7492f9 206decode_coding_XXXX (coding)
4ed46869 207 struct coding_system *coding;
4ed46869 208{
f1d34bca
MB
209 const unsigned char *src = coding->source + coding->consumed;
210 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
211 /* SRC_BASE remembers the start position in source in each loop.
212 The loop will be exited when there's not enough source code, or
213 when there's no room in CHARBUF for a decoded character. */
f1d34bca 214 const unsigned char *src_base;
df7492f9 215 /* A buffer to produce decoded characters. */
69a80ea3
KH
216 int *charbuf = coding->charbuf + coding->charbuf_used;
217 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
218 int multibytep = coding->src_multibyte;
219
220 while (1)
221 {
222 src_base = src;
223 if (charbuf < charbuf_end)
224 /* No more room to produce a decoded character. */
225 break;
226 ONE_MORE_BYTE (c);
227 /* Decode it. */
228 }
229
230 no_more_source:
231 if (src_base < src_end
232 && coding->mode & CODING_MODE_LAST_BLOCK)
233 /* If the source ends by partial bytes to construct a character,
234 treat them as eight-bit raw data. */
235 while (src_base < src_end && charbuf < charbuf_end)
236 *charbuf++ = *src_base++;
237 /* Remember how many bytes and characters we consumed. If the
238 source is multibyte, the bytes and chars are not identical. */
239 coding->consumed = coding->consumed_char = src_base - coding->source;
240 /* Remember how many characters we produced. */
241 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
242}
243#endif
244
245/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246
df7492f9
KH
247 These functions encode SRC_BYTES length text at SOURCE of Emacs'
248 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
249 goes to a place pointed to by DESTINATION, the length of which
250 should not exceed DST_BYTES.
d46c5b12 251
df7492f9
KH
252 These functions set the information of original and encoded texts in
253 the members produced, produced_char, consumed, and consumed_char of
254 the structure *CODING. They also set the member result to one of
255 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 256
df7492f9
KH
257 DST_BYTES zero means that source area and destination area are
258 overlapped, which means that we can produce a encoded text until it
259 reaches at the head of not-yet-encoded source text.
d46c5b12 260
df7492f9 261 Below is a template of these functions. */
4ed46869 262#if 0
b73bfc1c 263static void
df7492f9 264encode_coding_XXX (coding)
4ed46869 265 struct coding_system *coding;
4ed46869 266{
df7492f9
KH
267 int multibytep = coding->dst_multibyte;
268 int *charbuf = coding->charbuf;
269 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
270 unsigned char *dst = coding->destination + coding->produced;
271 unsigned char *dst_end = coding->destination + coding->dst_bytes;
272 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
273 int produced_chars = 0;
274
275 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
276 {
277 int c = *charbuf;
278 /* Encode C into DST, and increment DST. */
279 }
280 label_no_more_destination:
281 /* How many chars and bytes we produced. */
282 coding->produced_char += produced_chars;
283 coding->produced = dst - coding->destination;
4ed46869
KH
284}
285#endif
286
4ed46869
KH
287\f
288/*** 1. Preamble ***/
289
68c45bf0 290#include <config.h>
4ed46869 291#include <stdio.h>
d7306fe6 292#include <setjmp.h>
4ed46869 293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
b8299c66
KL
302#include "frame.h"
303#include "termhooks.h"
4ed46869 304
df7492f9 305Lisp_Object Vcoding_system_hash_table;
4ed46869 306
df7492f9 307Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
308Lisp_Object Qunix, Qdos;
309extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
310Lisp_Object Qbuffer_file_coding_system;
311Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 312Lisp_Object Qdefault_char;
27901516 313Lisp_Object Qno_conversion, Qundecided;
df7492f9 314Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 315Lisp_Object Qbig, Qlittle;
bb0115a2 316Lisp_Object Qcoding_system_history;
1397dc18 317Lisp_Object Qvalid_codes;
2133e2d1 318Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
a6f87d34
KH
319Lisp_Object QCdecode_translation_table, QCencode_translation_table;
320Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 321Lisp_Object QCascii_compatible_p;
4ed46869
KH
322
323extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 324Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
325Lisp_Object Qstart_process, Qopen_network_stream;
326Lisp_Object Qtarget_idx;
327
065e3595
KH
328Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
329Lisp_Object Qinterrupted, Qinsufficient_memory;
330
c7183fb8
GM
331extern Lisp_Object Qcompletion_ignore_case;
332
44e8490d
KH
333/* If a symbol has this property, evaluate the value to define the
334 symbol as a coding system. */
335static Lisp_Object Qcoding_system_define_form;
336
5d5bf4d8
KH
337int coding_system_require_warning;
338
d46c5b12
KH
339Lisp_Object Vselect_safe_coding_system_function;
340
7722baf9
EZ
341/* Mnemonic string for each format of end-of-line. */
342Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
343/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 344 decided. */
7722baf9 345Lisp_Object eol_mnemonic_undecided;
4ed46869 346
fcbcfb64
KH
347/* Format of end-of-line decided by system. This is Qunix on
348 Unix and Mac, Qdos on DOS/Windows.
349 This has an effect only for external encoding (i.e. for output to
350 file and process), not for in-buffer or Lisp string encoding. */
351static Lisp_Object system_eol_type;
352
4ed46869
KH
353#ifdef emacs
354
4608c386
KH
355Lisp_Object Vcoding_system_list, Vcoding_system_alist;
356
357Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 358
d46c5b12
KH
359/* Coding system emacs-mule and raw-text are for converting only
360 end-of-line format. */
361Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 362Lisp_Object Qutf_8_emacs;
ecf488bc 363
4ed46869
KH
364/* Coding-systems are handed between Emacs Lisp programs and C internal
365 routines by the following three variables. */
366/* Coding-system for reading files and receiving data from process. */
367Lisp_Object Vcoding_system_for_read;
368/* Coding-system for writing files and sending data to process. */
369Lisp_Object Vcoding_system_for_write;
370/* Coding-system actually used in the latest I/O. */
371Lisp_Object Vlast_coding_system_used;
065e3595
KH
372/* Set to non-nil when an error is detected while code conversion. */
373Lisp_Object Vlast_code_conversion_error;
c4825358 374/* A vector of length 256 which contains information about special
94487c4e 375 Latin codes (especially for dealing with Microsoft codes). */
3f003981 376Lisp_Object Vlatin_extra_code_table;
c4825358 377
9ce27fde
KH
378/* Flag to inhibit code conversion of end-of-line format. */
379int inhibit_eol_conversion;
380
74383408
KH
381/* Flag to inhibit ISO2022 escape sequence detection. */
382int inhibit_iso_escape_detection;
383
97b1b294
EZ
384/* Flag to inhibit detection of binary files through null bytes. */
385int inhibit_null_byte_detection;
386
ed29121d
EZ
387/* Flag to make buffer-file-coding-system inherit from process-coding. */
388int inherit_process_coding_system;
389
c4825358
KH
390/* Coding system to be used to encode text for terminal display when
391 terminal coding system is nil. */
392struct coding_system safe_terminal_coding;
393
02ba4723
KH
394Lisp_Object Vfile_coding_system_alist;
395Lisp_Object Vprocess_coding_system_alist;
396Lisp_Object Vnetwork_coding_system_alist;
4ed46869 397
68c45bf0
PE
398Lisp_Object Vlocale_coding_system;
399
4ed46869
KH
400#endif /* emacs */
401
f967223b
KH
402/* Flag to tell if we look up translation table on character code
403 conversion. */
84fbb8a0 404Lisp_Object Venable_character_translation;
f967223b
KH
405/* Standard translation table to look up on decoding (reading). */
406Lisp_Object Vstandard_translation_table_for_decode;
407/* Standard translation table to look up on encoding (writing). */
408Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 409
f967223b
KH
410Lisp_Object Qtranslation_table;
411Lisp_Object Qtranslation_table_id;
412Lisp_Object Qtranslation_table_for_decode;
413Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
414
415/* Alist of charsets vs revision number. */
df7492f9 416static Lisp_Object Vcharset_revision_table;
4ed46869 417
02ba4723
KH
418/* Default coding systems used for process I/O. */
419Lisp_Object Vdefault_process_coding_system;
420
002fdb44
DL
421/* Char table for translating Quail and self-inserting input. */
422Lisp_Object Vtranslation_table_for_input;
423
df7492f9
KH
424/* Two special coding systems. */
425Lisp_Object Vsjis_coding_system;
426Lisp_Object Vbig5_coding_system;
427
df7492f9
KH
428/* ISO2022 section */
429
430#define CODING_ISO_INITIAL(coding, reg) \
431 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
432 coding_attr_iso_initial), \
433 reg)))
434
435
1b3b981b
AS
436#define CODING_ISO_REQUEST(coding, charset_id) \
437 (((charset_id) <= (coding)->max_charset_id \
438 ? ((coding)->safe_charsets[charset_id] != 255 \
439 ? (coding)->safe_charsets[charset_id] \
440 : -1) \
df7492f9
KH
441 : -1))
442
443
444#define CODING_ISO_FLAGS(coding) \
445 ((coding)->spec.iso_2022.flags)
446#define CODING_ISO_DESIGNATION(coding, reg) \
447 ((coding)->spec.iso_2022.current_designation[reg])
448#define CODING_ISO_INVOCATION(coding, plane) \
449 ((coding)->spec.iso_2022.current_invocation[plane])
450#define CODING_ISO_SINGLE_SHIFTING(coding) \
451 ((coding)->spec.iso_2022.single_shifting)
452#define CODING_ISO_BOL(coding) \
453 ((coding)->spec.iso_2022.bol)
454#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
455 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
e951386e
KH
456#define CODING_ISO_CMP_STATUS(coding) \
457 (&(coding)->spec.iso_2022.cmp_status)
458#define CODING_ISO_EXTSEGMENT_LEN(coding) \
459 ((coding)->spec.iso_2022.ctext_extended_segment_len)
460#define CODING_ISO_EMBEDDED_UTF_8(coding) \
461 ((coding)->spec.iso_2022.embedded_utf_8)
df7492f9
KH
462
463/* Control characters of ISO2022. */
464 /* code */ /* function */
465#define ISO_CODE_LF 0x0A /* line-feed */
466#define ISO_CODE_CR 0x0D /* carriage-return */
467#define ISO_CODE_SO 0x0E /* shift-out */
468#define ISO_CODE_SI 0x0F /* shift-in */
469#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
470#define ISO_CODE_ESC 0x1B /* escape */
471#define ISO_CODE_SS2 0x8E /* single-shift-2 */
472#define ISO_CODE_SS3 0x8F /* single-shift-3 */
473#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
474
475/* All code (1-byte) of ISO2022 is classified into one of the
476 followings. */
477enum iso_code_class_type
478 {
479 ISO_control_0, /* Control codes in the range
480 0x00..0x1F and 0x7F, except for the
481 following 5 codes. */
df7492f9
KH
482 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
483 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
484 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
485 ISO_escape, /* ISO_CODE_SO (0x1B) */
486 ISO_control_1, /* Control codes in the range
487 0x80..0x9F, except for the
488 following 3 codes. */
489 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
490 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
491 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
492 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
493 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
494 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
495 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
496 };
05e6f5dc 497
df7492f9
KH
498/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
499 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 500
df7492f9
KH
501/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
502 instead of the correct short-form sequence (e.g. ESC $ A). */
503#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 504
df7492f9
KH
505/* If set, reset graphic planes and registers at end-of-line to the
506 initial state. */
507#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 508
df7492f9
KH
509/* If set, reset graphic planes and registers before any control
510 characters to the initial state. */
511#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 512
df7492f9
KH
513/* If set, encode by 7-bit environment. */
514#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 515
df7492f9
KH
516/* If set, use locking-shift function. */
517#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 518
df7492f9
KH
519/* If set, use single-shift function. Overwrite
520 CODING_ISO_FLAG_LOCKING_SHIFT. */
521#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 522
df7492f9
KH
523/* If set, use designation escape sequence. */
524#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 525
df7492f9
KH
526/* If set, produce revision number sequence. */
527#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 528
df7492f9
KH
529/* If set, produce ISO6429's direction specifying sequence. */
530#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 531
df7492f9
KH
532/* If set, assume designation states are reset at beginning of line on
533 output. */
534#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 535
df7492f9
KH
536/* If set, designation sequence should be placed at beginning of line
537 on output. */
538#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 539
df7492f9
KH
540/* If set, do not encode unsafe charactes on output. */
541#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 542
df7492f9
KH
543/* If set, extra latin codes (128..159) are accepted as a valid code
544 on input. */
545#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 546
df7492f9 547#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 548
df7492f9 549#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 550
bf16eb23 551#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 552
bf16eb23 553#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 554
bf16eb23 555#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 556
df7492f9
KH
557/* A character to be produced on output if encoding of the original
558 character is prohibited by CODING_ISO_FLAG_SAFE. */
559#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 560
a470d443
KH
561/* UTF-8 section */
562#define CODING_UTF_8_BOM(coding) \
563 ((coding)->spec.utf_8_bom)
4ed46869 564
df7492f9
KH
565/* UTF-16 section */
566#define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
4ed46869 568
df7492f9
KH
569#define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
4ed46869 571
df7492f9
KH
572#define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
4ed46869 574
4ed46869 575
df7492f9
KH
576/* CCL section */
577#define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579#define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581#define CODING_CCL_VALIDS(coding) \
8f924df7 582 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 583
5a936b46 584/* Index for each coding category in `coding_categories' */
4ed46869 585
df7492f9
KH
586enum coding_category
587 {
588 coding_category_iso_7,
589 coding_category_iso_7_tight,
590 coding_category_iso_8_1,
591 coding_category_iso_8_2,
592 coding_category_iso_7_else,
593 coding_category_iso_8_else,
a470d443
KH
594 coding_category_utf_8_auto,
595 coding_category_utf_8_nosig,
596 coding_category_utf_8_sig,
df7492f9
KH
597 coding_category_utf_16_auto,
598 coding_category_utf_16_be,
599 coding_category_utf_16_le,
600 coding_category_utf_16_be_nosig,
601 coding_category_utf_16_le_nosig,
602 coding_category_charset,
603 coding_category_sjis,
604 coding_category_big5,
605 coding_category_ccl,
606 coding_category_emacs_mule,
607 /* All above are targets of code detection. */
608 coding_category_raw_text,
609 coding_category_undecided,
610 coding_category_max
611 };
612
613/* Definitions of flag bits used in detect_coding_XXXX. */
614#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
615#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
616#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
617#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
618#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
619#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
620#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
621#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
622#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 623#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
624#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 633#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
644 | CATEGORY_MASK_UTF_8_AUTO \
645 | CATEGORY_MASK_UTF_8_NOSIG \
646 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 647 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
648 | CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG \
652 | CATEGORY_MASK_CHARSET \
653 | CATEGORY_MASK_SJIS \
654 | CATEGORY_MASK_BIG5 \
655 | CATEGORY_MASK_CCL \
656 | CATEGORY_MASK_EMACS_MULE)
657
658
659#define CATEGORY_MASK_ISO_7BIT \
660 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
661
662#define CATEGORY_MASK_ISO_8BIT \
663 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
664
665#define CATEGORY_MASK_ISO_ELSE \
666 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
667
668#define CATEGORY_MASK_ISO_ESCAPE \
669 (CATEGORY_MASK_ISO_7 \
670 | CATEGORY_MASK_ISO_7_TIGHT \
671 | CATEGORY_MASK_ISO_7_ELSE \
672 | CATEGORY_MASK_ISO_8_ELSE)
673
674#define CATEGORY_MASK_ISO \
675 ( CATEGORY_MASK_ISO_7BIT \
676 | CATEGORY_MASK_ISO_8BIT \
677 | CATEGORY_MASK_ISO_ELSE)
678
679#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
680 (CATEGORY_MASK_UTF_16_AUTO \
681 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
682 | CATEGORY_MASK_UTF_16_LE \
683 | CATEGORY_MASK_UTF_16_BE_NOSIG \
684 | CATEGORY_MASK_UTF_16_LE_NOSIG)
685
a470d443
KH
686#define CATEGORY_MASK_UTF_8 \
687 (CATEGORY_MASK_UTF_8_AUTO \
688 | CATEGORY_MASK_UTF_8_NOSIG \
689 | CATEGORY_MASK_UTF_8_SIG)
df7492f9
KH
690
691/* List of symbols `coding-category-xxx' ordered by priority. This
692 variable is exposed to Emacs Lisp. */
693static Lisp_Object Vcoding_category_list;
694
695/* Table of coding categories (Lisp symbols). This variable is for
696 internal use oly. */
697static Lisp_Object Vcoding_category_table;
698
699/* Table of coding-categories ordered by priority. */
700static enum coding_category coding_priorities[coding_category_max];
701
702/* Nth element is a coding context for the coding system bound to the
703 Nth coding category. */
704static struct coding_system coding_categories[coding_category_max];
705
df7492f9
KH
706/*** Commonly used macros and functions ***/
707
708#ifndef min
709#define min(a, b) ((a) < (b) ? (a) : (b))
710#endif
711#ifndef max
712#define max(a, b) ((a) > (b) ? (a) : (b))
713#endif
4ed46869 714
24a73b0a
KH
715#define CODING_GET_INFO(coding, attrs, charset_list) \
716 do { \
717 (attrs) = CODING_ID_ATTRS ((coding)->id); \
718 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 719 } while (0)
4ed46869 720
4ed46869 721
df7492f9
KH
722/* Safely get one byte from the source text pointed by SRC which ends
723 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
724 in the source, it jumps to `no_more_source'. If multibytep is
725 nonzero, and a multibyte character is found at SRC, set C to the
726 negative value of the character code. The caller should declare
727 and set these variables appropriately in advance:
728 src, src_end, multibytep */
aa72b389 729
065e3595
KH
730#define ONE_MORE_BYTE(c) \
731 do { \
732 if (src == src_end) \
733 { \
734 if (src_base < src) \
735 record_conversion_result \
736 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
737 goto no_more_source; \
738 } \
739 c = *src++; \
740 if (multibytep && (c & 0x80)) \
741 { \
742 if ((c & 0xFE) == 0xC0) \
743 c = ((c & 1) << 6) | *src++; \
744 else \
745 { \
35befdaa
KH
746 src--; \
747 c = - string_char (src, &src, NULL); \
065e3595
KH
748 record_conversion_result \
749 (coding, CODING_RESULT_INVALID_SRC); \
750 } \
751 } \
752 consumed_chars++; \
aa72b389
KH
753 } while (0)
754
f56a4450 755/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
756 at SRC_END, and set C1 and C2 to those bytes while skipping the
757 heading multibyte characters. If there are not enough bytes in the
758 source, it jumps to `no_more_source'. If multibytep is nonzero and
759 a multibyte character is found for C2, set C2 to the negative value
760 of the character code. The caller should declare and set these
761 variables appropriately in advance:
f56a4450
KH
762 src, src_end, multibytep
763 It is intended that this macro is used in detect_coding_utf_16. */
764
220eeac9
KH
765#define TWO_MORE_BYTES(c1, c2) \
766 do { \
767 do { \
768 if (src == src_end) \
769 goto no_more_source; \
770 c1 = *src++; \
771 if (multibytep && (c1 & 0x80)) \
772 { \
773 if ((c1 & 0xFE) == 0xC0) \
774 c1 = ((c1 & 1) << 6) | *src++; \
775 else \
776 { \
777 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
778 c1 = -1; \
779 } \
780 } \
781 } while (c1 < 0); \
782 if (src == src_end) \
783 goto no_more_source; \
784 c2 = *src++; \
785 if (multibytep && (c2 & 0x80)) \
786 { \
787 if ((c2 & 0xFE) == 0xC0) \
788 c2 = ((c2 & 1) << 6) | *src++; \
789 else \
790 c2 = -1; \
791 } \
f56a4450
KH
792 } while (0)
793
aa72b389 794
065e3595
KH
795#define ONE_MORE_BYTE_NO_CHECK(c) \
796 do { \
797 c = *src++; \
798 if (multibytep && (c & 0x80)) \
799 { \
800 if ((c & 0xFE) == 0xC0) \
801 c = ((c & 1) << 6) | *src++; \
802 else \
803 { \
35befdaa
KH
804 src--; \
805 c = - string_char (src, &src, NULL); \
065e3595
KH
806 record_conversion_result \
807 (coding, CODING_RESULT_INVALID_SRC); \
808 } \
809 } \
810 consumed_chars++; \
aa72b389
KH
811 } while (0)
812
aa72b389 813
df7492f9
KH
814/* Store a byte C in the place pointed by DST and increment DST to the
815 next free point, and increment PRODUCED_CHARS. The caller should
816 assure that C is 0..127, and declare and set the variable `dst'
817 appropriately in advance.
818*/
aa72b389
KH
819
820
df7492f9
KH
821#define EMIT_ONE_ASCII_BYTE(c) \
822 do { \
823 produced_chars++; \
824 *dst++ = (c); \
b6871cc7 825 } while (0)
aa72b389
KH
826
827
df7492f9 828/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 829
df7492f9
KH
830#define EMIT_TWO_ASCII_BYTES(c1, c2) \
831 do { \
832 produced_chars += 2; \
833 *dst++ = (c1), *dst++ = (c2); \
834 } while (0)
aa72b389
KH
835
836
df7492f9
KH
837/* Store a byte C in the place pointed by DST and increment DST to the
838 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
839 nonzero, store in an appropriate multibyte from. The caller should
840 declare and set the variables `dst' and `multibytep' appropriately
841 in advance. */
842
843#define EMIT_ONE_BYTE(c) \
844 do { \
845 produced_chars++; \
846 if (multibytep) \
847 { \
848 int ch = (c); \
849 if (ch >= 0x80) \
850 ch = BYTE8_TO_CHAR (ch); \
851 CHAR_STRING_ADVANCE (ch, dst); \
852 } \
853 else \
854 *dst++ = (c); \
aa72b389 855 } while (0)
aa72b389 856
aa72b389 857
df7492f9 858/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 859
e19c3639
KH
860#define EMIT_TWO_BYTES(c1, c2) \
861 do { \
862 produced_chars += 2; \
863 if (multibytep) \
864 { \
865 int ch; \
866 \
867 ch = (c1); \
868 if (ch >= 0x80) \
869 ch = BYTE8_TO_CHAR (ch); \
870 CHAR_STRING_ADVANCE (ch, dst); \
871 ch = (c2); \
872 if (ch >= 0x80) \
873 ch = BYTE8_TO_CHAR (ch); \
874 CHAR_STRING_ADVANCE (ch, dst); \
875 } \
876 else \
877 { \
878 *dst++ = (c1); \
879 *dst++ = (c2); \
880 } \
aa72b389
KH
881 } while (0)
882
883
df7492f9
KH
884#define EMIT_THREE_BYTES(c1, c2, c3) \
885 do { \
886 EMIT_ONE_BYTE (c1); \
887 EMIT_TWO_BYTES (c2, c3); \
888 } while (0)
aa72b389 889
aa72b389 890
df7492f9
KH
891#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
892 do { \
893 EMIT_TWO_BYTES (c1, c2); \
894 EMIT_TWO_BYTES (c3, c4); \
895 } while (0)
aa72b389 896
aa72b389 897
f6cbaf43
KH
898/* Prototypes for static functions. */
899static void record_conversion_result P_ ((struct coding_system *coding,
900 enum coding_result_code result));
901static int detect_coding_utf_8 P_ ((struct coding_system *,
902 struct coding_detection_info *info));
903static void decode_coding_utf_8 P_ ((struct coding_system *));
904static int encode_coding_utf_8 P_ ((struct coding_system *));
905
906static int detect_coding_utf_16 P_ ((struct coding_system *,
907 struct coding_detection_info *info));
908static void decode_coding_utf_16 P_ ((struct coding_system *));
909static int encode_coding_utf_16 P_ ((struct coding_system *));
910
911static int detect_coding_iso_2022 P_ ((struct coding_system *,
912 struct coding_detection_info *info));
913static void decode_coding_iso_2022 P_ ((struct coding_system *));
914static int encode_coding_iso_2022 P_ ((struct coding_system *));
915
916static int detect_coding_emacs_mule P_ ((struct coding_system *,
917 struct coding_detection_info *info));
918static void decode_coding_emacs_mule P_ ((struct coding_system *));
919static int encode_coding_emacs_mule P_ ((struct coding_system *));
920
921static int detect_coding_sjis P_ ((struct coding_system *,
922 struct coding_detection_info *info));
923static void decode_coding_sjis P_ ((struct coding_system *));
924static int encode_coding_sjis P_ ((struct coding_system *));
925
926static int detect_coding_big5 P_ ((struct coding_system *,
927 struct coding_detection_info *info));
928static void decode_coding_big5 P_ ((struct coding_system *));
929static int encode_coding_big5 P_ ((struct coding_system *));
930
931static int detect_coding_ccl P_ ((struct coding_system *,
932 struct coding_detection_info *info));
933static void decode_coding_ccl P_ ((struct coding_system *));
934static int encode_coding_ccl P_ ((struct coding_system *));
935
936static void decode_coding_raw_text P_ ((struct coding_system *));
937static int encode_coding_raw_text P_ ((struct coding_system *));
938
939static void coding_set_source P_ ((struct coding_system *));
940static void coding_set_destination P_ ((struct coding_system *));
941static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
942static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 943 EMACS_INT, EMACS_INT));
f6cbaf43
KH
944static unsigned char *alloc_destination P_ ((struct coding_system *,
945 EMACS_INT, unsigned char *));
946static void setup_iso_safe_charsets P_ ((Lisp_Object));
947static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
948 int *, int *,
949 unsigned char *));
950static int detect_eol P_ ((const unsigned char *,
951 EMACS_INT, enum coding_category));
952static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
953static void decode_eol P_ ((struct coding_system *));
954static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
e951386e 955static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
f6cbaf43 956static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
f6cbaf43
KH
957static INLINE void produce_charset P_ ((struct coding_system *, int *,
958 EMACS_INT));
959static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
960static int decode_coding P_ ((struct coding_system *));
961static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
3ed051d4 962 struct coding_system *,
f6cbaf43
KH
963 int *, EMACS_INT *));
964static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
965 struct coding_system *,
966 int *, EMACS_INT *));
967static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
968static int encode_coding P_ ((struct coding_system *));
969static Lisp_Object make_conversion_work_buffer P_ ((int));
970static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
971static INLINE int char_encodable_p P_ ((int, Lisp_Object));
972static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
973
065e3595
KH
974static void
975record_conversion_result (struct coding_system *coding,
976 enum coding_result_code result)
977{
978 coding->result = result;
979 switch (result)
980 {
981 case CODING_RESULT_INSUFFICIENT_SRC:
982 Vlast_code_conversion_error = Qinsufficient_source;
983 break;
984 case CODING_RESULT_INCONSISTENT_EOL:
985 Vlast_code_conversion_error = Qinconsistent_eol;
986 break;
987 case CODING_RESULT_INVALID_SRC:
988 Vlast_code_conversion_error = Qinvalid_source;
989 break;
990 case CODING_RESULT_INTERRUPT:
991 Vlast_code_conversion_error = Qinterrupted;
992 break;
993 case CODING_RESULT_INSUFFICIENT_MEM:
994 Vlast_code_conversion_error = Qinsufficient_memory;
995 break;
409ea3a1
AS
996 case CODING_RESULT_SUCCESS:
997 break;
35befdaa
KH
998 default:
999 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
1000 }
1001}
1002
df7492f9
KH
1003#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1004 do { \
1005 charset_map_loaded = 0; \
1006 c = DECODE_CHAR (charset, code); \
1007 if (charset_map_loaded) \
1008 { \
8f924df7 1009 const unsigned char *orig = coding->source; \
df7492f9
KH
1010 EMACS_INT offset; \
1011 \
1012 coding_set_source (coding); \
1013 offset = coding->source - orig; \
1014 src += offset; \
1015 src_base += offset; \
1016 src_end += offset; \
1017 } \
aa72b389
KH
1018 } while (0)
1019
1020
119852e7
KH
1021/* If there are at least BYTES length of room at dst, allocate memory
1022 for coding->destination and update dst and dst_end. We don't have
1023 to take care of coding->source which will be relocated. It is
1024 handled by calling coding_set_source in encode_coding. */
1025
df7492f9
KH
1026#define ASSURE_DESTINATION(bytes) \
1027 do { \
1028 if (dst + (bytes) >= dst_end) \
1029 { \
1030 int more_bytes = charbuf_end - charbuf + (bytes); \
1031 \
1032 dst = alloc_destination (coding, more_bytes, dst); \
1033 dst_end = coding->destination + coding->dst_bytes; \
1034 } \
1035 } while (0)
aa72b389 1036
aa72b389 1037
db274c7a
KH
1038/* Store multibyte form of the character C in P, and advance P to the
1039 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
1040 never calls MAYBE_UNIFY_CHAR. */
1041
1042#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1043 do { \
1044 if ((c) <= MAX_1_BYTE_CHAR) \
1045 *(p)++ = (c); \
1046 else if ((c) <= MAX_2_BYTE_CHAR) \
1047 *(p)++ = (0xC0 | ((c) >> 6)), \
1048 *(p)++ = (0x80 | ((c) & 0x3F)); \
1049 else if ((c) <= MAX_3_BYTE_CHAR) \
1050 *(p)++ = (0xE0 | ((c) >> 12)), \
1051 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1052 *(p)++ = (0x80 | ((c) & 0x3F)); \
1053 else if ((c) <= MAX_4_BYTE_CHAR) \
1054 *(p)++ = (0xF0 | (c >> 18)), \
1055 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1056 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1057 *(p)++ = (0x80 | (c & 0x3F)); \
1058 else if ((c) <= MAX_5_BYTE_CHAR) \
1059 *(p)++ = 0xF8, \
1060 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1061 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1062 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1063 *(p)++ = (0x80 | (c & 0x3F)); \
1064 else \
1065 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1066 } while (0)
1067
1068
1069/* Return the character code of character whose multibyte form is at
1070 P, and advance P to the end of the multibyte form. This is like
1071 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1072
1073#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1074 (!((p)[0] & 0x80) \
1075 ? *(p)++ \
1076 : ! ((p)[0] & 0x20) \
1077 ? ((p) += 2, \
1078 ((((p)[-2] & 0x1F) << 6) \
1079 | ((p)[-1] & 0x3F) \
1080 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1081 : ! ((p)[0] & 0x10) \
1082 ? ((p) += 3, \
1083 ((((p)[-3] & 0x0F) << 12) \
1084 | (((p)[-2] & 0x3F) << 6) \
1085 | ((p)[-1] & 0x3F))) \
1086 : ! ((p)[0] & 0x08) \
1087 ? ((p) += 4, \
1088 ((((p)[-4] & 0xF) << 18) \
1089 | (((p)[-3] & 0x3F) << 12) \
1090 | (((p)[-2] & 0x3F) << 6) \
1091 | ((p)[-1] & 0x3F))) \
1092 : ((p) += 5, \
1093 ((((p)[-4] & 0x3F) << 18) \
1094 | (((p)[-3] & 0x3F) << 12) \
1095 | (((p)[-2] & 0x3F) << 6) \
1096 | ((p)[-1] & 0x3F))))
1097
aa72b389 1098
df7492f9
KH
1099static void
1100coding_set_source (coding)
aa72b389 1101 struct coding_system *coding;
aa72b389 1102{
df7492f9
KH
1103 if (BUFFERP (coding->src_object))
1104 {
2cb26057 1105 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1106
df7492f9 1107 if (coding->src_pos < 0)
2cb26057 1108 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1109 else
2cb26057 1110 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1111 }
df7492f9 1112 else if (STRINGP (coding->src_object))
aa72b389 1113 {
8f924df7 1114 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1115 }
df7492f9
KH
1116 else
1117 /* Otherwise, the source is C string and is never relocated
1118 automatically. Thus we don't have to update anything. */
1119 ;
1120}
aa72b389 1121
df7492f9
KH
1122static void
1123coding_set_destination (coding)
1124 struct coding_system *coding;
1125{
1126 if (BUFFERP (coding->dst_object))
aa72b389 1127 {
df7492f9 1128 if (coding->src_pos < 0)
aa72b389 1129 {
13818c30 1130 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1131 coding->dst_bytes = (GAP_END_ADDR
1132 - (coding->src_bytes - coding->consumed)
1133 - coding->destination);
aa72b389 1134 }
df7492f9 1135 else
28f67a95
KH
1136 {
1137 /* We are sure that coding->dst_pos_byte is before the gap
1138 of the buffer. */
1139 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1140 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1141 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1142 - coding->destination);
1143 }
df7492f9
KH
1144 }
1145 else
1146 /* Otherwise, the destination is C string and is never relocated
1147 automatically. Thus we don't have to update anything. */
1148 ;
1149}
1150
1151
1152static void
1153coding_alloc_by_realloc (coding, bytes)
1154 struct coding_system *coding;
1155 EMACS_INT bytes;
1156{
1157 coding->destination = (unsigned char *) xrealloc (coding->destination,
1158 coding->dst_bytes + bytes);
1159 coding->dst_bytes += bytes;
1160}
1161
1162static void
db274c7a 1163coding_alloc_by_making_gap (coding, gap_head_used, bytes)
df7492f9 1164 struct coding_system *coding;
db274c7a 1165 EMACS_INT gap_head_used, bytes;
df7492f9 1166{
db274c7a 1167 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1168 {
db274c7a
KH
1169 /* The gap may contain the produced data at the head and not-yet
1170 consumed data at the tail. To preserve those data, we at
1171 first make the gap size to zero, then increase the gap
1172 size. */
1173 EMACS_INT add = GAP_SIZE;
1174
1175 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1176 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1177 make_gap (bytes);
1178 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1179 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1180 }
730fff51 1181 else
df7492f9 1182 {
2c78b7e1
KH
1183 Lisp_Object this_buffer;
1184
1185 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1186 set_buffer_internal (XBUFFER (coding->dst_object));
1187 make_gap (bytes);
1188 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1189 }
df7492f9 1190}
8f924df7 1191
df7492f9
KH
1192
1193static unsigned char *
1194alloc_destination (coding, nbytes, dst)
1195 struct coding_system *coding;
3e139625 1196 EMACS_INT nbytes;
df7492f9
KH
1197 unsigned char *dst;
1198{
1199 EMACS_INT offset = dst - coding->destination;
1200
1201 if (BUFFERP (coding->dst_object))
db274c7a
KH
1202 {
1203 struct buffer *buf = XBUFFER (coding->dst_object);
1204
1205 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1206 }
aa72b389 1207 else
df7492f9 1208 coding_alloc_by_realloc (coding, nbytes);
df7492f9
KH
1209 coding_set_destination (coding);
1210 dst = coding->destination + offset;
1211 return dst;
1212}
aa72b389 1213
ff0dacd7
KH
1214/** Macros for annotations. */
1215
ff0dacd7
KH
1216/* An annotation data is stored in the array coding->charbuf in this
1217 format:
69a80ea3 1218 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1219 LENGTH is the number of elements in the annotation.
1220 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1221 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1222
1223 The format of the following elements depend on ANNOTATION_MASK.
1224
1225 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1226 follows:
e951386e
KH
1227 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1228
1229 NBYTES is the number of bytes specified in the header part of
1230 old-style emacs-mule encoding, or 0 for the other kind of
1231 composition.
1232
ff0dacd7 1233 METHOD is one of enum composition_method.
e951386e 1234
ff0dacd7
KH
1235 Optionnal COMPOSITION-COMPONENTS are characters and composition
1236 rules.
1237
1238 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
e951386e
KH
1239 follows.
1240
1241 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1242 recover from an invalid annotation, and should be skipped by
1243 produce_annotation. */
1244
1245/* Maximum length of the header of annotation data. */
1246#define MAX_ANNOTATION_LENGTH 5
ff0dacd7 1247
69a80ea3 1248#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1249 do { \
1250 *(buf)++ = -(len); \
1251 *(buf)++ = (mask); \
69a80ea3 1252 *(buf)++ = (nchars); \
ff0dacd7
KH
1253 coding->annotated = 1; \
1254 } while (0);
1255
e951386e 1256#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
69a80ea3 1257 do { \
e951386e
KH
1258 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1259 *buf++ = nbytes; \
69a80ea3 1260 *buf++ = method; \
ff0dacd7
KH
1261 } while (0)
1262
1263
69a80ea3
KH
1264#define ADD_CHARSET_DATA(buf, nchars, id) \
1265 do { \
1266 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1267 *buf++ = id; \
ff0dacd7
KH
1268 } while (0)
1269
df7492f9
KH
1270\f
1271/*** 2. Emacs' internal format (emacs-utf-8) ***/
1272
1273
1274
1275\f
1276/*** 3. UTF-8 ***/
1277
1278/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1279 Check if a text is encoded in UTF-8. If it is, return 1, else
1280 return 0. */
df7492f9
KH
1281
1282#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1283#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1284#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1285#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1286#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1287#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1288
a470d443
KH
1289#define UTF_BOM 0xFEFF
1290#define UTF_8_BOM_1 0xEF
1291#define UTF_8_BOM_2 0xBB
1292#define UTF_8_BOM_3 0xBF
1293
df7492f9 1294static int
ff0dacd7 1295detect_coding_utf_8 (coding, detect_info)
df7492f9 1296 struct coding_system *coding;
ff0dacd7 1297 struct coding_detection_info *detect_info;
df7492f9 1298{
065e3595 1299 const unsigned char *src = coding->source, *src_base;
8f924df7 1300 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1301 int multibytep = coding->src_multibyte;
1302 int consumed_chars = 0;
a470d443 1303 int bom_found = 0;
df7492f9
KH
1304 int found = 0;
1305
ff0dacd7 1306 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1307 /* A coding system of this category is always ASCII compatible. */
1308 src += coding->head_ascii;
1309
1310 while (1)
aa72b389 1311 {
df7492f9 1312 int c, c1, c2, c3, c4;
aa72b389 1313
065e3595 1314 src_base = src;
df7492f9 1315 ONE_MORE_BYTE (c);
065e3595 1316 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1317 continue;
1318 ONE_MORE_BYTE (c1);
065e3595 1319 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1320 break;
1321 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1322 {
a470d443 1323 found = 1;
df7492f9 1324 continue;
aa72b389 1325 }
df7492f9 1326 ONE_MORE_BYTE (c2);
065e3595 1327 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1328 break;
1329 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1330 {
a470d443
KH
1331 found = 1;
1332 if (src_base == coding->source
1333 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1334 bom_found = 1;
df7492f9 1335 continue;
aa72b389 1336 }
df7492f9 1337 ONE_MORE_BYTE (c3);
065e3595 1338 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1339 break;
1340 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1341 {
a470d443 1342 found = 1;
df7492f9
KH
1343 continue;
1344 }
1345 ONE_MORE_BYTE (c4);
065e3595 1346 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1347 break;
1348 if (UTF_8_5_OCTET_LEADING_P (c))
1349 {
a470d443 1350 found = 1;
df7492f9
KH
1351 continue;
1352 }
1353 break;
aa72b389 1354 }
ff0dacd7 1355 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1356 return 0;
aa72b389 1357
df7492f9 1358 no_more_source:
065e3595 1359 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1360 {
ff0dacd7 1361 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1362 return 0;
aa72b389 1363 }
a470d443
KH
1364 if (bom_found)
1365 {
1366 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1367 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1368 }
1369 else
1370 {
1371 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1372 if (found)
1373 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1374 }
ff0dacd7 1375 return 1;
aa72b389
KH
1376}
1377
4ed46869 1378
b73bfc1c 1379static void
df7492f9 1380decode_coding_utf_8 (coding)
b73bfc1c 1381 struct coding_system *coding;
b73bfc1c 1382{
8f924df7
KH
1383 const unsigned char *src = coding->source + coding->consumed;
1384 const unsigned char *src_end = coding->source + coding->src_bytes;
1385 const unsigned char *src_base;
69a80ea3
KH
1386 int *charbuf = coding->charbuf + coding->charbuf_used;
1387 int *charbuf_end = coding->charbuf + coding->charbuf_size;
453b38f0 1388 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1389 int multibytep = coding->src_multibyte;
a470d443 1390 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
24a73b0a 1391 Lisp_Object attr, charset_list;
0a9564cb
EZ
1392 int eol_crlf =
1393 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1394 int byte_after_cr = -1;
4ed46869 1395
24a73b0a 1396 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1397
a470d443
KH
1398 if (bom != utf_without_bom)
1399 {
1400 int c1, c2, c3;
1401
1402 src_base = src;
1403 ONE_MORE_BYTE (c1);
1404 if (! UTF_8_3_OCTET_LEADING_P (c1))
1405 src = src_base;
1406 else
1407 {
159bd5a2 1408 ONE_MORE_BYTE (c2);
a470d443
KH
1409 if (! UTF_8_EXTRA_OCTET_P (c2))
1410 src = src_base;
1411 else
1412 {
159bd5a2 1413 ONE_MORE_BYTE (c3);
a470d443
KH
1414 if (! UTF_8_EXTRA_OCTET_P (c3))
1415 src = src_base;
1416 else
1417 {
1418 if ((c1 != UTF_8_BOM_1)
1419 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1420 src = src_base;
1421 else
1422 CODING_UTF_8_BOM (coding) = utf_without_bom;
1423 }
1424 }
1425 }
1426 }
1427 CODING_UTF_8_BOM (coding) = utf_without_bom;
1428
1429
1430
df7492f9 1431 while (1)
b73bfc1c 1432 {
df7492f9 1433 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1434
df7492f9
KH
1435 src_base = src;
1436 consumed_chars_base = consumed_chars;
4af310db 1437
df7492f9 1438 if (charbuf >= charbuf_end)
b71f6f73
KH
1439 {
1440 if (byte_after_cr >= 0)
1441 src_base--;
1442 break;
1443 }
df7492f9 1444
119852e7
KH
1445 if (byte_after_cr >= 0)
1446 c1 = byte_after_cr, byte_after_cr = -1;
1447 else
1448 ONE_MORE_BYTE (c1);
065e3595
KH
1449 if (c1 < 0)
1450 {
1451 c = - c1;
1452 }
1453 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1454 {
119852e7
KH
1455 if (eol_crlf && c1 == '\r')
1456 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1457 c = c1;
4af310db 1458 }
df7492f9 1459 else
4af310db 1460 {
df7492f9 1461 ONE_MORE_BYTE (c2);
065e3595 1462 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1463 goto invalid_code;
1464 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1465 {
b0edb2c5
DL
1466 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1467 /* Reject overlong sequences here and below. Encoders
1468 producing them are incorrect, they can be misleading,
1469 and they mess up read/write invariance. */
1470 if (c < 128)
1471 goto invalid_code;
4af310db 1472 }
df7492f9 1473 else
aa72b389 1474 {
df7492f9 1475 ONE_MORE_BYTE (c3);
065e3595 1476 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1477 goto invalid_code;
1478 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1479 {
1480 c = (((c1 & 0xF) << 12)
1481 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1482 if (c < 0x800
1483 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1484 goto invalid_code;
1485 }
df7492f9
KH
1486 else
1487 {
1488 ONE_MORE_BYTE (c4);
065e3595 1489 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1490 goto invalid_code;
1491 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1492 {
df7492f9
KH
1493 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1494 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1495 if (c < 0x10000)
1496 goto invalid_code;
1497 }
df7492f9
KH
1498 else
1499 {
1500 ONE_MORE_BYTE (c5);
065e3595 1501 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1502 goto invalid_code;
1503 if (UTF_8_5_OCTET_LEADING_P (c1))
1504 {
1505 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1506 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1507 | (c5 & 0x3F));
b0edb2c5 1508 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1509 goto invalid_code;
1510 }
1511 else
1512 goto invalid_code;
1513 }
1514 }
aa72b389 1515 }
b73bfc1c 1516 }
df7492f9
KH
1517
1518 *charbuf++ = c;
1519 continue;
1520
1521 invalid_code:
1522 src = src_base;
1523 consumed_chars = consumed_chars_base;
1524 ONE_MORE_BYTE (c);
1525 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1526 coding->errors++;
aa72b389
KH
1527 }
1528
df7492f9
KH
1529 no_more_source:
1530 coding->consumed_char += consumed_chars_base;
1531 coding->consumed = src_base - coding->source;
1532 coding->charbuf_used = charbuf - coding->charbuf;
1533}
1534
1535
1536static int
1537encode_coding_utf_8 (coding)
1538 struct coding_system *coding;
1539{
1540 int multibytep = coding->dst_multibyte;
1541 int *charbuf = coding->charbuf;
1542 int *charbuf_end = charbuf + coding->charbuf_used;
1543 unsigned char *dst = coding->destination + coding->produced;
1544 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1545 int produced_chars = 0;
df7492f9
KH
1546 int c;
1547
a470d443
KH
1548 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1549 {
1550 ASSURE_DESTINATION (3);
1551 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1552 CODING_UTF_8_BOM (coding) = utf_without_bom;
1553 }
1554
df7492f9 1555 if (multibytep)
aa72b389 1556 {
df7492f9
KH
1557 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1558
1559 while (charbuf < charbuf_end)
b73bfc1c 1560 {
df7492f9 1561 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1562
df7492f9
KH
1563 ASSURE_DESTINATION (safe_room);
1564 c = *charbuf++;
28f67a95
KH
1565 if (CHAR_BYTE8_P (c))
1566 {
1567 c = CHAR_TO_BYTE8 (c);
1568 EMIT_ONE_BYTE (c);
1569 }
1570 else
1571 {
db274c7a 1572 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1573 for (p = str; p < pend; p++)
1574 EMIT_ONE_BYTE (*p);
1575 }
b73bfc1c 1576 }
aa72b389 1577 }
df7492f9
KH
1578 else
1579 {
1580 int safe_room = MAX_MULTIBYTE_LENGTH;
1581
1582 while (charbuf < charbuf_end)
b73bfc1c 1583 {
df7492f9
KH
1584 ASSURE_DESTINATION (safe_room);
1585 c = *charbuf++;
f03caae0
KH
1586 if (CHAR_BYTE8_P (c))
1587 *dst++ = CHAR_TO_BYTE8 (c);
1588 else
db274c7a 1589 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1590 produced_chars++;
4ed46869
KH
1591 }
1592 }
065e3595 1593 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1594 coding->produced_char += produced_chars;
1595 coding->produced = dst - coding->destination;
1596 return 0;
4ed46869
KH
1597}
1598
b73bfc1c 1599
df7492f9 1600/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1601 Check if a text is encoded in one of UTF-16 based coding systems.
1602 If it is, return 1, else return 0. */
aa72b389 1603
df7492f9
KH
1604#define UTF_16_HIGH_SURROGATE_P(val) \
1605 (((val) & 0xFC00) == 0xD800)
1606
1607#define UTF_16_LOW_SURROGATE_P(val) \
1608 (((val) & 0xFC00) == 0xDC00)
93dec019 1609
df7492f9
KH
1610#define UTF_16_INVALID_P(val) \
1611 (((val) == 0xFFFE) \
1612 || ((val) == 0xFFFF) \
1613 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1614
aa72b389 1615
df7492f9 1616static int
ff0dacd7 1617detect_coding_utf_16 (coding, detect_info)
aa72b389 1618 struct coding_system *coding;
ff0dacd7 1619 struct coding_detection_info *detect_info;
aa72b389 1620{
8f924df7
KH
1621 const unsigned char *src = coding->source, *src_base = src;
1622 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1623 int multibytep = coding->src_multibyte;
1624 int consumed_chars = 0;
1625 int c1, c2;
aa72b389 1626
ff0dacd7 1627 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1628 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1629 && (coding->src_chars & 1))
ff0dacd7
KH
1630 {
1631 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1632 return 0;
1633 }
24a73b0a 1634
f56a4450 1635 TWO_MORE_BYTES (c1, c2);
df7492f9 1636 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1637 {
b49a1807
KH
1638 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1639 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1640 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1641 | CATEGORY_MASK_UTF_16_BE_NOSIG
1642 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1643 }
df7492f9 1644 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1645 {
b49a1807
KH
1646 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1647 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1648 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1649 | CATEGORY_MASK_UTF_16_BE_NOSIG
1650 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1651 }
220eeac9 1652 else if (c2 < 0)
f56a4450
KH
1653 {
1654 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1655 return 0;
1656 }
2f3cbb32 1657 else
24a73b0a 1658 {
2f3cbb32
KH
1659 /* We check the dispersion of Eth and Oth bytes where E is even and
1660 O is odd. If both are high, we assume binary data.*/
1661 unsigned char e[256], o[256];
1662 unsigned e_num = 1, o_num = 1;
1663
1664 memset (e, 0, 256);
1665 memset (o, 0, 256);
1666 e[c1] = 1;
1667 o[c2] = 1;
1668
cc13543e
KH
1669 detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1670 |CATEGORY_MASK_UTF_16_BE
1671 | CATEGORY_MASK_UTF_16_LE);
2f3cbb32 1672
7f1faf1c
KH
1673 while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1674 != CATEGORY_MASK_UTF_16)
2f3cbb32 1675 {
f56a4450 1676 TWO_MORE_BYTES (c1, c2);
220eeac9 1677 if (c2 < 0)
f56a4450 1678 break;
2f3cbb32
KH
1679 if (! e[c1])
1680 {
1681 e[c1] = 1;
1682 e_num++;
cc13543e
KH
1683 if (e_num >= 128)
1684 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
2f3cbb32
KH
1685 }
1686 if (! o[c2])
1687 {
977b85f4 1688 o[c2] = 1;
2f3cbb32 1689 o_num++;
cc13543e
KH
1690 if (o_num >= 128)
1691 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
2f3cbb32
KH
1692 }
1693 }
2f3cbb32 1694 return 0;
ff0dacd7 1695 }
2f3cbb32 1696
df7492f9 1697 no_more_source:
ff0dacd7 1698 return 1;
df7492f9 1699}
aa72b389 1700
df7492f9
KH
1701static void
1702decode_coding_utf_16 (coding)
1703 struct coding_system *coding;
1704{
8f924df7
KH
1705 const unsigned char *src = coding->source + coding->consumed;
1706 const unsigned char *src_end = coding->source + coding->src_bytes;
1707 const unsigned char *src_base;
69a80ea3 1708 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
1709 /* We may produces at most 3 chars in one loop. */
1710 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
3a8406e1 1711 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1712 int multibytep = coding->src_multibyte;
a470d443 1713 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1714 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1715 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1716 Lisp_Object attr, charset_list;
0a9564cb
EZ
1717 int eol_crlf =
1718 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 1719 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1720
24a73b0a 1721 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1722
a470d443 1723 if (bom == utf_with_bom)
aa72b389 1724 {
df7492f9 1725 int c, c1, c2;
4af310db 1726
aa72b389 1727 src_base = src;
df7492f9
KH
1728 ONE_MORE_BYTE (c1);
1729 ONE_MORE_BYTE (c2);
e19c3639 1730 c = (c1 << 8) | c2;
aa72b389 1731
b49a1807
KH
1732 if (endian == utf_16_big_endian
1733 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1734 {
b49a1807
KH
1735 /* The first two bytes are not BOM. Treat them as bytes
1736 for a normal character. */
1737 src = src_base;
1738 coding->errors++;
aa72b389 1739 }
a470d443 1740 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1741 }
a470d443 1742 else if (bom == utf_detect_bom)
b49a1807
KH
1743 {
1744 /* We have already tried to detect BOM and failed in
1745 detect_coding. */
a470d443 1746 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1747 }
aa72b389 1748
df7492f9
KH
1749 while (1)
1750 {
1751 int c, c1, c2;
1752
1753 src_base = src;
1754 consumed_chars_base = consumed_chars;
1755
df80c7f0 1756 if (charbuf >= charbuf_end)
b71f6f73
KH
1757 {
1758 if (byte_after_cr1 >= 0)
1759 src_base -= 2;
1760 break;
1761 }
df7492f9 1762
119852e7
KH
1763 if (byte_after_cr1 >= 0)
1764 c1 = byte_after_cr1, byte_after_cr1 = -1;
1765 else
1766 ONE_MORE_BYTE (c1);
065e3595
KH
1767 if (c1 < 0)
1768 {
1769 *charbuf++ = -c1;
1770 continue;
1771 }
119852e7
KH
1772 if (byte_after_cr2 >= 0)
1773 c2 = byte_after_cr2, byte_after_cr2 = -1;
1774 else
1775 ONE_MORE_BYTE (c2);
065e3595
KH
1776 if (c2 < 0)
1777 {
1778 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1779 *charbuf++ = -c2;
1780 continue;
1781 }
df7492f9 1782 c = (endian == utf_16_big_endian
e19c3639 1783 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1784
df7492f9 1785 if (surrogate)
fd3ae0b9 1786 {
df7492f9 1787 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1788 {
df7492f9
KH
1789 if (endian == utf_16_big_endian)
1790 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1791 else
1792 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1793 *charbuf++ = c1;
1794 *charbuf++ = c2;
1795 coding->errors++;
1796 if (UTF_16_HIGH_SURROGATE_P (c))
1797 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1798 else
df7492f9 1799 *charbuf++ = c;
fd3ae0b9
KH
1800 }
1801 else
df7492f9
KH
1802 {
1803 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1804 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1805 *charbuf++ = 0x10000 + c;
df7492f9 1806 }
fd3ae0b9 1807 }
aa72b389 1808 else
df7492f9
KH
1809 {
1810 if (UTF_16_HIGH_SURROGATE_P (c))
1811 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1812 else
119852e7
KH
1813 {
1814 if (eol_crlf && c == '\r')
1815 {
1816 ONE_MORE_BYTE (byte_after_cr1);
1817 ONE_MORE_BYTE (byte_after_cr2);
1818 }
1819 *charbuf++ = c;
1820 }
8f924df7 1821 }
aa72b389 1822 }
df7492f9
KH
1823
1824 no_more_source:
1825 coding->consumed_char += consumed_chars_base;
1826 coding->consumed = src_base - coding->source;
1827 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1828}
b73bfc1c 1829
df7492f9
KH
1830static int
1831encode_coding_utf_16 (coding)
1832 struct coding_system *coding;
1833{
1834 int multibytep = coding->dst_multibyte;
1835 int *charbuf = coding->charbuf;
1836 int *charbuf_end = charbuf + coding->charbuf_used;
1837 unsigned char *dst = coding->destination + coding->produced;
1838 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1839 int safe_room = 8;
a470d443 1840 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1841 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1842 int produced_chars = 0;
24a73b0a 1843 Lisp_Object attrs, charset_list;
df7492f9 1844 int c;
4ed46869 1845
24a73b0a 1846 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1847
a470d443 1848 if (bom != utf_without_bom)
df7492f9
KH
1849 {
1850 ASSURE_DESTINATION (safe_room);
1851 if (big_endian)
df7492f9 1852 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1853 else
1854 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1855 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1856 }
1857
1858 while (charbuf < charbuf_end)
1859 {
1860 ASSURE_DESTINATION (safe_room);
1861 c = *charbuf++;
60afa08d 1862 if (c > MAX_UNICODE_CHAR)
e19c3639 1863 c = coding->default_char;
df7492f9
KH
1864
1865 if (c < 0x10000)
1866 {
1867 if (big_endian)
1868 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1869 else
1870 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1871 }
1872 else
1873 {
1874 int c1, c2;
1875
1876 c -= 0x10000;
1877 c1 = (c >> 10) + 0xD800;
1878 c2 = (c & 0x3FF) + 0xDC00;
1879 if (big_endian)
1880 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1881 else
1882 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1883 }
1884 }
065e3595 1885 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1886 coding->produced = dst - coding->destination;
1887 coding->produced_char += produced_chars;
1888 return 0;
1889}
1890
1891\f
1892/*** 6. Old Emacs' internal format (emacs-mule) ***/
1893
1894/* Emacs' internal format for representation of multiple character
1895 sets is a kind of multi-byte encoding, i.e. characters are
1896 represented by variable-length sequences of one-byte codes.
1897
1898 ASCII characters and control characters (e.g. `tab', `newline') are
1899 represented by one-byte sequences which are their ASCII codes, in
1900 the range 0x00 through 0x7F.
1901
1902 8-bit characters of the range 0x80..0x9F are represented by
1903 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1904 code + 0x20).
1905
1906 8-bit characters of the range 0xA0..0xFF are represented by
1907 one-byte sequences which are their 8-bit code.
1908
1909 The other characters are represented by a sequence of `base
1910 leading-code', optional `extended leading-code', and one or two
1911 `position-code's. The length of the sequence is determined by the
1912 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1913 whereas extended leading-code and position-code take the range 0xA0
1914 through 0xFF. See `charset.h' for more details about leading-code
1915 and position-code.
1916
1917 --- CODE RANGE of Emacs' internal format ---
1918 character set range
1919 ------------- -----
1920 ascii 0x00..0x7F
1921 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1922 eight-bit-graphic 0xA0..0xBF
1923 ELSE 0x81..0x9D + [0xA0..0xFF]+
1924 ---------------------------------------------
1925
1926 As this is the internal character representation, the format is
1927 usually not used externally (i.e. in a file or in a data sent to a
1928 process). But, it is possible to have a text externally in this
1929 format (i.e. by encoding by the coding system `emacs-mule').
1930
1931 In that case, a sequence of one-byte codes has a slightly different
1932 form.
1933
1934 At first, all characters in eight-bit-control are represented by
1935 one-byte sequences which are their 8-bit code.
1936
1937 Next, character composition data are represented by the byte
1938 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1939 where,
e951386e 1940 METHOD is 0xF2 plus one of composition method (enum
df7492f9
KH
1941 composition_method),
1942
1943 BYTES is 0xA0 plus a byte length of this composition data,
1944
e951386e 1945 CHARS is 0xA0 plus a number of characters composed by this
df7492f9
KH
1946 data,
1947
1948 COMPONENTs are characters of multibye form or composition
1949 rules encoded by two-byte of ASCII codes.
1950
1951 In addition, for backward compatibility, the following formats are
1952 also recognized as composition data on decoding.
1953
1954 0x80 MSEQ ...
1955 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1956
1957 Here,
1958 MSEQ is a multibyte form but in these special format:
1959 ASCII: 0xA0 ASCII_CODE+0x80,
1960 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1961 RULE is a one byte code of the range 0xA0..0xF0 that
1962 represents a composition rule.
1963 */
1964
1965char emacs_mule_bytes[256];
1966
e951386e
KH
1967
1968/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1969 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1970 else return 0. */
1971
1972static int
1973detect_coding_emacs_mule (coding, detect_info)
1974 struct coding_system *coding;
1975 struct coding_detection_info *detect_info;
1976{
1977 const unsigned char *src = coding->source, *src_base;
1978 const unsigned char *src_end = coding->source + coding->src_bytes;
1979 int multibytep = coding->src_multibyte;
1980 int consumed_chars = 0;
1981 int c;
1982 int found = 0;
1983
1984 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1985 /* A coding system of this category is always ASCII compatible. */
1986 src += coding->head_ascii;
1987
1988 while (1)
1989 {
1990 src_base = src;
1991 ONE_MORE_BYTE (c);
1992 if (c < 0)
1993 continue;
1994 if (c == 0x80)
1995 {
1996 /* Perhaps the start of composite character. We simply skip
1997 it because analyzing it is too heavy for detecting. But,
1998 at least, we check that the composite character
1999 constitutes of more than 4 bytes. */
2000 const unsigned char *src_base;
2001
2002 repeat:
2003 src_base = src;
2004 do
2005 {
2006 ONE_MORE_BYTE (c);
2007 }
2008 while (c >= 0xA0);
2009
2010 if (src - src_base <= 4)
2011 break;
2012 found = CATEGORY_MASK_EMACS_MULE;
2013 if (c == 0x80)
2014 goto repeat;
2015 }
2016
2017 if (c < 0x80)
2018 {
2019 if (c < 0x20
2020 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2021 break;
2022 }
2023 else
2024 {
2025 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2026
2027 while (more_bytes > 0)
2028 {
2029 ONE_MORE_BYTE (c);
2030 if (c < 0xA0)
2031 {
2032 src--; /* Unread the last byte. */
2033 break;
2034 }
2035 more_bytes--;
2036 }
2037 if (more_bytes != 0)
2038 break;
2039 found = CATEGORY_MASK_EMACS_MULE;
2040 }
2041 }
2042 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2043 return 0;
2044
2045 no_more_source:
2046 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2047 {
2048 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2049 return 0;
2050 }
2051 detect_info->found |= found;
2052 return 1;
2053}
2054
2055
2056/* Parse emacs-mule multibyte sequence at SRC and return the decoded
2057 character. If CMP_STATUS indicates that we must expect MSEQ or
2058 RULE described above, decode it and return the negative value of
2059 the deocded character or rule. If an invalid byte is found, return
2060 -1. If SRC is too short, return -2. */
2061
df7492f9 2062int
e951386e 2063emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
df7492f9 2064 struct coding_system *coding;
065e3595 2065 const unsigned char *src;
ff0dacd7 2066 int *nbytes, *nchars, *id;
e951386e 2067 struct composition_status *cmp_status;
df7492f9 2068{
8f924df7
KH
2069 const unsigned char *src_end = coding->source + coding->src_bytes;
2070 const unsigned char *src_base = src;
df7492f9 2071 int multibytep = coding->src_multibyte;
df7492f9
KH
2072 struct charset *charset;
2073 unsigned code;
2074 int c;
2075 int consumed_chars = 0;
e951386e 2076 int mseq_found = 0;
df7492f9
KH
2077
2078 ONE_MORE_BYTE (c);
065e3595 2079 if (c < 0)
df7492f9 2080 {
065e3595
KH
2081 c = -c;
2082 charset = emacs_mule_charset[0];
2083 }
2084 else
2085 {
4d41e8b7
KH
2086 if (c >= 0xA0)
2087 {
e951386e
KH
2088 if (cmp_status->state != COMPOSING_NO
2089 && cmp_status->old_form)
4d41e8b7 2090 {
e951386e
KH
2091 if (cmp_status->state == COMPOSING_CHAR)
2092 {
2093 if (c == 0xA0)
2094 {
2095 ONE_MORE_BYTE (c);
2096 c -= 0x80;
2097 if (c < 0)
2098 goto invalid_code;
2099 }
2100 else
2101 c -= 0x20;
2102 mseq_found = 1;
2103 }
2104 else
2105 {
2106 *nbytes = src - src_base;
2107 *nchars = consumed_chars;
2108 return -c;
2109 }
4d41e8b7
KH
2110 }
2111 else
e951386e 2112 goto invalid_code;
4d41e8b7
KH
2113 }
2114
065e3595 2115 switch (emacs_mule_bytes[c])
b73bfc1c 2116 {
065e3595 2117 case 2:
df7492f9
KH
2118 if (! (charset = emacs_mule_charset[c]))
2119 goto invalid_code;
2120 ONE_MORE_BYTE (c);
9ffd559c 2121 if (c < 0xA0)
065e3595 2122 goto invalid_code;
df7492f9 2123 code = c & 0x7F;
065e3595
KH
2124 break;
2125
2126 case 3:
2127 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2128 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2129 {
2130 ONE_MORE_BYTE (c);
9ffd559c 2131 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
2132 goto invalid_code;
2133 ONE_MORE_BYTE (c);
9ffd559c 2134 if (c < 0xA0)
065e3595
KH
2135 goto invalid_code;
2136 code = c & 0x7F;
2137 }
2138 else
2139 {
2140 if (! (charset = emacs_mule_charset[c]))
2141 goto invalid_code;
2142 ONE_MORE_BYTE (c);
9ffd559c 2143 if (c < 0xA0)
065e3595
KH
2144 goto invalid_code;
2145 code = (c & 0x7F) << 8;
2146 ONE_MORE_BYTE (c);
9ffd559c 2147 if (c < 0xA0)
065e3595
KH
2148 goto invalid_code;
2149 code |= c & 0x7F;
2150 }
2151 break;
2152
2153 case 4:
2154 ONE_MORE_BYTE (c);
2155 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
2156 goto invalid_code;
2157 ONE_MORE_BYTE (c);
9ffd559c 2158 if (c < 0xA0)
065e3595 2159 goto invalid_code;
781d7a48 2160 code = (c & 0x7F) << 8;
df7492f9 2161 ONE_MORE_BYTE (c);
9ffd559c 2162 if (c < 0xA0)
065e3595 2163 goto invalid_code;
df7492f9 2164 code |= c & 0x7F;
065e3595 2165 break;
df7492f9 2166
065e3595
KH
2167 case 1:
2168 code = c;
2169 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2170 ? charset_ascii : charset_eight_bit);
2171 break;
df7492f9 2172
065e3595
KH
2173 default:
2174 abort ();
2175 }
2176 c = DECODE_CHAR (charset, code);
2177 if (c < 0)
2178 goto invalid_code;
df7492f9 2179 }
df7492f9
KH
2180 *nbytes = src - src_base;
2181 *nchars = consumed_chars;
ff0dacd7
KH
2182 if (id)
2183 *id = charset->id;
e951386e 2184 return (mseq_found ? -c : c);
df7492f9
KH
2185
2186 no_more_source:
2187 return -2;
2188
2189 invalid_code:
2190 return -1;
2191}
2192
2193
e951386e 2194/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
df7492f9 2195
e951386e
KH
2196/* Handle these composition sequence ('|': the end of header elements,
2197 BYTES and CHARS >= 0xA0):
df7492f9 2198
e951386e
KH
2199 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2200 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2201 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
df7492f9 2202
e951386e
KH
2203 and these old form:
2204
2205 (4) relative composition: 0x80 | MSEQ ... MSEQ
2206 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
df7492f9 2207
e951386e
KH
2208 When the starter 0x80 and the following header elements are found,
2209 this annotation header is produced.
df7492f9 2210
e951386e 2211 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
df7492f9 2212
e951386e
KH
2213 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2214 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
df7492f9 2215
e951386e
KH
2216 Then, upon reading the following elements, these codes are produced
2217 until the composition end is found:
df7492f9 2218
e951386e
KH
2219 (1) CHAR ... CHAR
2220 (2) ALT ... ALT CHAR ... CHAR
2221 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2222 (4) CHAR ... CHAR
2223 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
4ed46869 2224
e951386e
KH
2225 When the composition end is found, LENGTH and NCHARS in the
2226 annotation header is updated as below:
b73bfc1c 2227
e951386e
KH
2228 (1) LENGTH: unchanged, NCHARS: unchanged
2229 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2230 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2231 (4) LENGTH: unchanged, NCHARS: number of CHARs
2232 (5) LENGTH: unchanged, NCHARS: number of CHARs
df7492f9 2233
e951386e
KH
2234 If an error is found while composing, the annotation header is
2235 changed to the original composition header (plus filler -1s) as
2236 below:
2237
2238 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2239 (5) [ 0x80 0xFF -1 -1- -1 ]
2240
2241 and the sequence [ -2 DECODED-RULE ] is changed to the original
2242 byte sequence as below:
2243 o the original byte sequence is B: [ B -1 ]
2244 o the original byte sequence is B1 B2: [ B1 B2 ]
2245
2246 Most of the routines are implemented by macros because many
2247 variables and labels in the caller decode_coding_emacs_mule must be
2248 accessible, and they are usually called just once (thus doesn't
2249 increase the size of compiled object). */
2250
2251/* Decode a composition rule represented by C as a component of
2252 composition sequence of Emacs 20 style. Set RULE to the decoded
2253 rule. */
2254
2255#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
df7492f9 2256 do { \
e951386e
KH
2257 int gref, nref; \
2258 \
4d41e8b7 2259 c -= 0xA0; \
df7492f9
KH
2260 if (c < 0 || c >= 81) \
2261 goto invalid_code; \
df7492f9 2262 gref = c / 9, nref = c % 9; \
e951386e
KH
2263 if (gref == 4) gref = 10; \
2264 if (nref == 4) nref = 10; \
2265 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
df7492f9
KH
2266 } while (0)
2267
2268
e951386e
KH
2269/* Decode a composition rule represented by C and the following byte
2270 at SRC as a component of composition sequence of Emacs 21 style.
2271 Set RULE to the decoded rule. */
781d7a48 2272
e951386e 2273#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
781d7a48
KH
2274 do { \
2275 int gref, nref; \
e951386e
KH
2276 \
2277 gref = c - 0x20; \
2278 if (gref < 0 || gref >= 81) \
781d7a48 2279 goto invalid_code; \
e951386e
KH
2280 ONE_MORE_BYTE (c); \
2281 nref = c - 0x20; \
2282 if (nref < 0 || nref >= 81) \
781d7a48 2283 goto invalid_code; \
e951386e 2284 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
781d7a48
KH
2285 } while (0)
2286
2287
e951386e
KH
2288/* Start of Emacs 21 style format. The first three bytes at SRC are
2289 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2290 byte length of this composition information, CHARS is the number of
2291 characters composed by this composition. */
2292
2293#define DECODE_EMACS_MULE_21_COMPOSITION() \
aa72b389 2294 do { \
781d7a48
KH
2295 enum composition_method method = c - 0xF2; \
2296 int *charbuf_base = charbuf; \
df7492f9 2297 int nbytes, nchars; \
e951386e 2298 \
df7492f9 2299 ONE_MORE_BYTE (c); \
065e3595
KH
2300 if (c < 0) \
2301 goto invalid_code; \
df7492f9 2302 nbytes = c - 0xA0; \
e951386e 2303 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
df7492f9
KH
2304 goto invalid_code; \
2305 ONE_MORE_BYTE (c); \
2306 nchars = c - 0xA0; \
e951386e
KH
2307 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2308 goto invalid_code; \
2309 cmp_status->old_form = 0; \
2310 cmp_status->method = method; \
2311 if (method == COMPOSITION_RELATIVE) \
2312 cmp_status->state = COMPOSING_CHAR; \
2313 else \
2314 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2315 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2316 cmp_status->nchars = nchars; \
2317 cmp_status->ncomps = nbytes - 4; \
2318 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
aa72b389 2319 } while (0)
93dec019 2320
aa72b389 2321
e951386e
KH
2322/* Start of Emacs 20 style format for relative composition. */
2323
2324#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2325 do { \
2326 cmp_status->old_form = 1; \
2327 cmp_status->method = COMPOSITION_RELATIVE; \
2328 cmp_status->state = COMPOSING_CHAR; \
2329 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2330 cmp_status->nchars = cmp_status->ncomps = 0; \
2331 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2332 } while (0)
2333
2334
2335/* Start of Emacs 20 style format for rule-base composition. */
2336
2337#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2338 do { \
2339 cmp_status->old_form = 1; \
2340 cmp_status->method = COMPOSITION_WITH_RULE; \
2341 cmp_status->state = COMPOSING_CHAR; \
2342 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2343 cmp_status->nchars = cmp_status->ncomps = 0; \
2344 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
df7492f9
KH
2345 } while (0)
2346
2347
e951386e
KH
2348#define DECODE_EMACS_MULE_COMPOSITION_START() \
2349 do { \
2350 const unsigned char *current_src = src; \
2351 \
2352 ONE_MORE_BYTE (c); \
2353 if (c < 0) \
2354 goto invalid_code; \
2355 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2356 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2357 DECODE_EMACS_MULE_21_COMPOSITION (); \
2358 else if (c < 0xA0) \
2359 goto invalid_code; \
2360 else if (c < 0xC0) \
2361 { \
2362 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2363 /* Re-read C as a composition component. */ \
2364 src = current_src; \
2365 } \
2366 else if (c == 0xFF) \
2367 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2368 else \
2369 goto invalid_code; \
2370 } while (0)
2371
2372#define EMACS_MULE_COMPOSITION_END() \
df7492f9 2373 do { \
e951386e 2374 int idx = - cmp_status->length; \
4d41e8b7 2375 \
e951386e
KH
2376 if (cmp_status->old_form) \
2377 charbuf[idx + 2] = cmp_status->nchars; \
2378 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2379 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2380 cmp_status->state = COMPOSING_NO; \
2381 } while (0)
2382
2383
2384static int
2385emacs_mule_finish_composition (charbuf, cmp_status)
2386 int *charbuf;
2387 struct composition_status *cmp_status;
2388{
2389 int idx = - cmp_status->length;
2390 int new_chars;
2391
2392 if (cmp_status->old_form && cmp_status->nchars > 0)
2393 {
2394 charbuf[idx + 2] = cmp_status->nchars;
2395 new_chars = 0;
2396 if (cmp_status->method == COMPOSITION_WITH_RULE
2397 && cmp_status->state == COMPOSING_CHAR)
2398 {
2399 /* The last rule was invalid. */
2400 int rule = charbuf[-1] + 0xA0;
2401
2402 charbuf[-2] = BYTE8_TO_CHAR (rule);
2403 charbuf[-1] = -1;
2404 new_chars = 1;
2405 }
2406 }
2407 else
2408 {
2409 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2410
2411 if (cmp_status->method == COMPOSITION_WITH_RULE)
2412 {
2413 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2414 charbuf[idx++] = -3;
2415 charbuf[idx++] = 0;
2416 new_chars = 1;
2417 }
2418 else
2419 {
2420 int nchars = charbuf[idx + 1] + 0xA0;
2421 int nbytes = charbuf[idx + 2] + 0xA0;
2422
2423 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2424 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2425 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2426 charbuf[idx++] = -1;
2427 new_chars = 4;
2428 }
2429 }
2430 cmp_status->state = COMPOSING_NO;
2431 return new_chars;
2432}
2433
2434#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2435 do { \
2436 if (cmp_status->state != COMPOSING_NO) \
2437 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
df7492f9
KH
2438 } while (0)
2439
aa72b389
KH
2440
2441static void
df7492f9 2442decode_coding_emacs_mule (coding)
aa72b389 2443 struct coding_system *coding;
aa72b389 2444{
8f924df7
KH
2445 const unsigned char *src = coding->source + coding->consumed;
2446 const unsigned char *src_end = coding->source + coding->src_bytes;
2447 const unsigned char *src_base;
69a80ea3 2448 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
2449 /* We may produce two annocations (charset and composition) in one
2450 loop and one more charset annocation at the end. */
69a80ea3 2451 int *charbuf_end
df80c7f0 2452 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 2453 int consumed_chars = 0, consumed_chars_base;
df7492f9 2454 int multibytep = coding->src_multibyte;
24a73b0a 2455 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2456 int char_offset = coding->produced_char;
2457 int last_offset = char_offset;
2458 int last_id = charset_ascii;
0a9564cb
EZ
2459 int eol_crlf =
2460 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 2461 int byte_after_cr = -1;
e951386e 2462 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
aa72b389 2463
24a73b0a 2464 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2465
e951386e
KH
2466 if (cmp_status->state != COMPOSING_NO)
2467 {
2468 int i;
2469
2470 for (i = 0; i < cmp_status->length; i++)
2471 *charbuf++ = cmp_status->carryover[i];
2472 coding->annotated = 1;
2473 }
2474
aa72b389
KH
2475 while (1)
2476 {
e951386e 2477 int c, id;
df7492f9 2478
aa72b389 2479 src_base = src;
df7492f9
KH
2480 consumed_chars_base = consumed_chars;
2481
2482 if (charbuf >= charbuf_end)
b71f6f73
KH
2483 {
2484 if (byte_after_cr >= 0)
2485 src_base--;
2486 break;
2487 }
aa72b389 2488
119852e7
KH
2489 if (byte_after_cr >= 0)
2490 c = byte_after_cr, byte_after_cr = -1;
2491 else
2492 ONE_MORE_BYTE (c);
e951386e
KH
2493
2494 if (c < 0 || c == 0x80)
065e3595 2495 {
e951386e
KH
2496 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2497 if (c < 0)
2498 {
2499 *charbuf++ = -c;
2500 char_offset++;
2501 }
2502 else
2503 DECODE_EMACS_MULE_COMPOSITION_START ();
2504 continue;
065e3595 2505 }
e951386e
KH
2506
2507 if (c < 0x80)
aa72b389 2508 {
119852e7
KH
2509 if (eol_crlf && c == '\r')
2510 ONE_MORE_BYTE (byte_after_cr);
e951386e
KH
2511 id = charset_ascii;
2512 if (cmp_status->state != COMPOSING_NO)
2513 {
2514 if (cmp_status->old_form)
2515 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2516 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2517 cmp_status->ncomps--;
2518 }
2519 }
2520 else
2521 {
2522 int nchars, nbytes;
2523
2524 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2525 cmp_status);
2526 if (c < 0)
2527 {
2528 if (c == -1)
2529 goto invalid_code;
2530 if (c == -2)
2531 break;
2532 }
2533 src = src_base + nbytes;
2534 consumed_chars = consumed_chars_base + nchars;
2535 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2536 cmp_status->ncomps -= nchars;
2537 }
2538
2539 /* Now if C >= 0, we found a normally encoded characer, if C <
2540 0, we found an old-style composition component character or
2541 rule. */
2542
2543 if (cmp_status->state == COMPOSING_NO)
2544 {
2545 if (last_id != id)
2546 {
2547 if (last_id != charset_ascii)
2548 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2549 last_id);
2550 last_id = id;
2551 last_offset = char_offset;
2552 }
df7492f9
KH
2553 *charbuf++ = c;
2554 char_offset++;
aa72b389 2555 }
e951386e 2556 else if (cmp_status->state == COMPOSING_CHAR)
df7492f9 2557 {
e951386e
KH
2558 if (cmp_status->old_form)
2559 {
2560 if (c >= 0)
2561 {
2562 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2563 *charbuf++ = c;
2564 char_offset++;
2565 }
2566 else
2567 {
2568 *charbuf++ = -c;
2569 cmp_status->nchars++;
2570 cmp_status->length++;
2571 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2572 EMACS_MULE_COMPOSITION_END ();
2573 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2574 cmp_status->state = COMPOSING_RULE;
2575 }
2576 }
df7492f9 2577 else
e951386e
KH
2578 {
2579 *charbuf++ = c;
2580 cmp_status->length++;
2581 cmp_status->nchars--;
2582 if (cmp_status->nchars == 0)
2583 EMACS_MULE_COMPOSITION_END ();
2584 }
df7492f9 2585 }
e951386e 2586 else if (cmp_status->state == COMPOSING_RULE)
df7492f9 2587 {
e951386e 2588 int rule;
ff0dacd7 2589
e951386e 2590 if (c >= 0)
df7492f9 2591 {
e951386e
KH
2592 EMACS_MULE_COMPOSITION_END ();
2593 *charbuf++ = c;
2594 char_offset++;
df7492f9 2595 }
e951386e 2596 else
ff0dacd7 2597 {
e951386e
KH
2598 c = -c;
2599 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2600 if (rule < 0)
2601 goto invalid_code;
2602 *charbuf++ = -2;
2603 *charbuf++ = rule;
2604 cmp_status->length += 2;
2605 cmp_status->state = COMPOSING_CHAR;
ff0dacd7 2606 }
e951386e
KH
2607 }
2608 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2609 {
df7492f9 2610 *charbuf++ = c;
e951386e
KH
2611 cmp_status->length++;
2612 if (cmp_status->ncomps == 0)
2613 cmp_status->state = COMPOSING_CHAR;
2614 else if (cmp_status->ncomps > 0)
2615 {
2616 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2617 cmp_status->state = COMPOSING_COMPONENT_RULE;
2618 }
2619 else
2620 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9 2621 }
e951386e
KH
2622 else /* COMPOSING_COMPONENT_RULE */
2623 {
2624 int rule;
2625
2626 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2627 if (rule < 0)
2628 goto invalid_code;
2629 *charbuf++ = -2;
2630 *charbuf++ = rule;
2631 cmp_status->length += 2;
2632 cmp_status->ncomps--;
2633 if (cmp_status->ncomps > 0)
2634 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2635 else
2636 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2637 }
2638 continue;
2639
2640 retry:
2641 src = src_base;
2642 consumed_chars = consumed_chars_base;
df7492f9
KH
2643 continue;
2644
2645 invalid_code:
e951386e 2646 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
df7492f9
KH
2647 src = src_base;
2648 consumed_chars = consumed_chars_base;
2649 ONE_MORE_BYTE (c);
2650 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2651 char_offset++;
df7492f9
KH
2652 coding->errors++;
2653 }
2654
2655 no_more_source:
e951386e
KH
2656 if (cmp_status->state != COMPOSING_NO)
2657 {
2658 if (coding->mode & CODING_MODE_LAST_BLOCK)
2659 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660 else
2661 {
2662 int i;
2663
2664 charbuf -= cmp_status->length;
2665 for (i = 0; i < cmp_status->length; i++)
2666 cmp_status->carryover[i] = charbuf[i];
2667 }
2668 }
ff0dacd7 2669 if (last_id != charset_ascii)
69a80ea3 2670 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2671 coding->consumed_char += consumed_chars_base;
2672 coding->consumed = src_base - coding->source;
2673 coding->charbuf_used = charbuf - coding->charbuf;
2674}
2675
2676
2677#define EMACS_MULE_LEADING_CODES(id, codes) \
2678 do { \
2679 if (id < 0xA0) \
2680 codes[0] = id, codes[1] = 0; \
2681 else if (id < 0xE0) \
2682 codes[0] = 0x9A, codes[1] = id; \
2683 else if (id < 0xF0) \
2684 codes[0] = 0x9B, codes[1] = id; \
2685 else if (id < 0xF5) \
2686 codes[0] = 0x9C, codes[1] = id; \
2687 else \
2688 codes[0] = 0x9D, codes[1] = id; \
2689 } while (0);
2690
aa72b389 2691
df7492f9
KH
2692static int
2693encode_coding_emacs_mule (coding)
2694 struct coding_system *coding;
2695{
2696 int multibytep = coding->dst_multibyte;
2697 int *charbuf = coding->charbuf;
2698 int *charbuf_end = charbuf + coding->charbuf_used;
2699 unsigned char *dst = coding->destination + coding->produced;
2700 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2701 int safe_room = 8;
df7492f9 2702 int produced_chars = 0;
24a73b0a 2703 Lisp_Object attrs, charset_list;
df7492f9 2704 int c;
ff0dacd7 2705 int preferred_charset_id = -1;
df7492f9 2706
24a73b0a 2707 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2708 if (! EQ (charset_list, Vemacs_mule_charset_list))
2709 {
2710 CODING_ATTR_CHARSET_LIST (attrs)
2711 = charset_list = Vemacs_mule_charset_list;
2712 }
df7492f9
KH
2713
2714 while (charbuf < charbuf_end)
2715 {
2716 ASSURE_DESTINATION (safe_room);
2717 c = *charbuf++;
ff0dacd7
KH
2718
2719 if (c < 0)
2720 {
2721 /* Handle an annotation. */
2722 switch (*charbuf)
2723 {
2724 case CODING_ANNOTATE_COMPOSITION_MASK:
2725 /* Not yet implemented. */
2726 break;
2727 case CODING_ANNOTATE_CHARSET_MASK:
2728 preferred_charset_id = charbuf[3];
2729 if (preferred_charset_id >= 0
2730 && NILP (Fmemq (make_number (preferred_charset_id),
2731 charset_list)))
2732 preferred_charset_id = -1;
2733 break;
2734 default:
2735 abort ();
2736 }
2737 charbuf += -c - 1;
2738 continue;
2739 }
2740
df7492f9
KH
2741 if (ASCII_CHAR_P (c))
2742 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2743 else if (CHAR_BYTE8_P (c))
2744 {
2745 c = CHAR_TO_BYTE8 (c);
2746 EMIT_ONE_BYTE (c);
2747 }
df7492f9 2748 else
aa72b389 2749 {
df7492f9
KH
2750 struct charset *charset;
2751 unsigned code;
2752 int dimension;
2753 int emacs_mule_id;
2754 unsigned char leading_codes[2];
2755
ff0dacd7
KH
2756 if (preferred_charset_id >= 0)
2757 {
2758 charset = CHARSET_FROM_ID (preferred_charset_id);
905ca9d2
KH
2759 if (CHAR_CHARSET_P (c, charset))
2760 code = ENCODE_CHAR (charset, c);
2761 else
2762 charset = char_charset (c, charset_list, &code);
ff0dacd7
KH
2763 }
2764 else
2765 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2766 if (! charset)
2767 {
2768 c = coding->default_char;
2769 if (ASCII_CHAR_P (c))
2770 {
2771 EMIT_ONE_ASCII_BYTE (c);
2772 continue;
2773 }
2774 charset = char_charset (c, charset_list, &code);
2775 }
2776 dimension = CHARSET_DIMENSION (charset);
2777 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2778 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2779 EMIT_ONE_BYTE (leading_codes[0]);
2780 if (leading_codes[1])
2781 EMIT_ONE_BYTE (leading_codes[1]);
2782 if (dimension == 1)
1fa663f9 2783 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2784 else
df7492f9 2785 {
1fa663f9 2786 code |= 0x8080;
df7492f9
KH
2787 EMIT_ONE_BYTE (code >> 8);
2788 EMIT_ONE_BYTE (code & 0xFF);
2789 }
aa72b389 2790 }
aa72b389 2791 }
065e3595 2792 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2793 coding->produced_char += produced_chars;
2794 coding->produced = dst - coding->destination;
2795 return 0;
aa72b389 2796}
b73bfc1c 2797
4ed46869 2798\f
df7492f9 2799/*** 7. ISO2022 handlers ***/
4ed46869
KH
2800
2801/* The following note describes the coding system ISO2022 briefly.
39787efd 2802 Since the intention of this note is to help understand the
5a936b46 2803 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2804 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2805 original document of ISO2022. This is equivalent to the standard
cfb43547 2806 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2807
2808 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2809 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2810 is encoded using bytes less than 128. This may make the encoded
2811 text a little bit longer, but the text passes more easily through
cfb43547 2812 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2813 Significant Bit).
b73bfc1c 2814
cfb43547
DL
2815 There are two kinds of character sets: control character sets and
2816 graphic character sets. The former contain control characters such
4ed46869 2817 as `newline' and `escape' to provide control functions (control
39787efd 2818 functions are also provided by escape sequences). The latter
cfb43547 2819 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2820 two control character sets and many graphic character sets.
2821
2822 Graphic character sets are classified into one of the following
39787efd
KH
2823 four classes, according to the number of bytes (DIMENSION) and
2824 number of characters in one dimension (CHARS) of the set:
2825 - DIMENSION1_CHARS94
2826 - DIMENSION1_CHARS96
2827 - DIMENSION2_CHARS94
2828 - DIMENSION2_CHARS96
2829
2830 In addition, each character set is assigned an identification tag,
cfb43547 2831 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2832 hereafter). The <F> of each character set is decided by ECMA(*)
2833 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2834 (0x30..0x3F are for private use only).
4ed46869
KH
2835
2836 Note (*): ECMA = European Computer Manufacturers Association
2837
cfb43547 2838 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2839 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2840 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2841 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2842 o DIMENSION2_CHARS96 -- none for the moment
2843
39787efd 2844 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2845 C0 [0x00..0x1F] -- control character plane 0
2846 GL [0x20..0x7F] -- graphic character plane 0
2847 C1 [0x80..0x9F] -- control character plane 1
2848 GR [0xA0..0xFF] -- graphic character plane 1
2849
2850 A control character set is directly designated and invoked to C0 or
39787efd
KH
2851 C1 by an escape sequence. The most common case is that:
2852 - ISO646's control character set is designated/invoked to C0, and
2853 - ISO6429's control character set is designated/invoked to C1,
2854 and usually these designations/invocations are omitted in encoded
2855 text. In a 7-bit environment, only C0 can be used, and a control
2856 character for C1 is encoded by an appropriate escape sequence to
2857 fit into the environment. All control characters for C1 are
2858 defined to have corresponding escape sequences.
4ed46869
KH
2859
2860 A graphic character set is at first designated to one of four
2861 graphic registers (G0 through G3), then these graphic registers are
2862 invoked to GL or GR. These designations and invocations can be
2863 done independently. The most common case is that G0 is invoked to
39787efd
KH
2864 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2865 these invocations and designations are omitted in encoded text.
2866 In a 7-bit environment, only GL can be used.
4ed46869 2867
39787efd
KH
2868 When a graphic character set of CHARS94 is invoked to GL, codes
2869 0x20 and 0x7F of the GL area work as control characters SPACE and
2870 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2871 be used.
4ed46869
KH
2872
2873 There are two ways of invocation: locking-shift and single-shift.
2874 With locking-shift, the invocation lasts until the next different
39787efd
KH
2875 invocation, whereas with single-shift, the invocation affects the
2876 following character only and doesn't affect the locking-shift
2877 state. Invocations are done by the following control characters or
2878 escape sequences:
4ed46869
KH
2879
2880 ----------------------------------------------------------------------
39787efd 2881 abbrev function cntrl escape seq description
4ed46869 2882 ----------------------------------------------------------------------
39787efd
KH
2883 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2884 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2885 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2886 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2887 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2888 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2889 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2890 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2891 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2892 ----------------------------------------------------------------------
39787efd
KH
2893 (*) These are not used by any known coding system.
2894
2895 Control characters for these functions are defined by macros
2896 ISO_CODE_XXX in `coding.h'.
4ed46869 2897
39787efd 2898 Designations are done by the following escape sequences:
4ed46869
KH
2899 ----------------------------------------------------------------------
2900 escape sequence description
2901 ----------------------------------------------------------------------
2902 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2903 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2904 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2905 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2906 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2907 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2908 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2909 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2910 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2911 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2912 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2913 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2914 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2915 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2916 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2917 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2918 ----------------------------------------------------------------------
2919
2920 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2921 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2922
2923 Note (*): Although these designations are not allowed in ISO2022,
2924 Emacs accepts them on decoding, and produces them on encoding
39787efd 2925 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2926 7-bit environment, non-locking-shift, and non-single-shift.
2927
2928 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2929 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2930
cfb43547 2931 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2932 same multilingual text in ISO2022. Actually, there exist many
2933 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2934 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2935 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2936 localized platforms), and all of these are variants of ISO2022.
2937
2938 In addition to the above, Emacs handles two more kinds of escape
2939 sequences: ISO6429's direction specification and Emacs' private
2940 sequence for specifying character composition.
2941
39787efd 2942 ISO6429's direction specification takes the following form:
4ed46869
KH
2943 o CSI ']' -- end of the current direction
2944 o CSI '0' ']' -- end of the current direction
2945 o CSI '1' ']' -- start of left-to-right text
2946 o CSI '2' ']' -- start of right-to-left text
2947 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2948 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2949
2950 Character composition specification takes the following form:
ec6d2bb8
KH
2951 o ESC '0' -- start relative composition
2952 o ESC '1' -- end composition
2953 o ESC '2' -- start rule-base composition (*)
2954 o ESC '3' -- start relative composition with alternate chars (**)
2955 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2956 Since these are not standard escape sequences of any ISO standard,
cfb43547 2957 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2958
5a936b46
DL
2959 (*) This form is used only in Emacs 20.7 and older versions,
2960 but newer versions can safely decode it.
cfb43547 2961 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2962 and older versions can't decode it.
ec6d2bb8 2963
cfb43547 2964 Here's a list of example usages of these composition escape
b73bfc1c 2965 sequences (categorized by `enum composition_method').
ec6d2bb8 2966
b73bfc1c 2967 COMPOSITION_RELATIVE:
ec6d2bb8 2968 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2969 COMPOSITION_WITH_RULE:
ec6d2bb8 2970 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2971 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2972 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2973 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2974 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2975
2976enum iso_code_class_type iso_code_class[256];
2977
df7492f9
KH
2978#define SAFE_CHARSET_P(coding, id) \
2979 ((id) <= (coding)->max_charset_id \
1b3b981b 2980 && (coding)->safe_charsets[id] != 255)
df7492f9
KH
2981
2982
2983#define SHIFT_OUT_OK(category) \
2984 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2985
2986static void
f0064e1f
DL
2987setup_iso_safe_charsets (attrs)
2988 Lisp_Object attrs;
df7492f9
KH
2989{
2990 Lisp_Object charset_list, safe_charsets;
2991 Lisp_Object request;
2992 Lisp_Object reg_usage;
2993 Lisp_Object tail;
2994 int reg94, reg96;
2995 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2996 int max_charset_id;
2997
2998 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2999 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3000 && ! EQ (charset_list, Viso_2022_charset_list))
3001 {
3002 CODING_ATTR_CHARSET_LIST (attrs)
3003 = charset_list = Viso_2022_charset_list;
3004 ASET (attrs, coding_attr_safe_charsets, Qnil);
3005 }
3006
3007 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3008 return;
3009
3010 max_charset_id = 0;
3011 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3012 {
3013 int id = XINT (XCAR (tail));
3014 if (max_charset_id < id)
3015 max_charset_id = id;
3016 }
d46c5b12 3017
1b3b981b
AS
3018 safe_charsets = make_uninit_string (max_charset_id + 1);
3019 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
3020 request = AREF (attrs, coding_attr_iso_request);
3021 reg_usage = AREF (attrs, coding_attr_iso_usage);
3022 reg94 = XINT (XCAR (reg_usage));
3023 reg96 = XINT (XCDR (reg_usage));
3024
3025 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3026 {
3027 Lisp_Object id;
3028 Lisp_Object reg;
3029 struct charset *charset;
3030
3031 id = XCAR (tail);
3032 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 3033 reg = Fcdr (Fassq (id, request));
df7492f9 3034 if (! NILP (reg))
8f924df7 3035 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
3036 else if (charset->iso_chars_96)
3037 {
3038 if (reg96 < 4)
8f924df7 3039 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
3040 }
3041 else
3042 {
3043 if (reg94 < 4)
8f924df7 3044 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
3045 }
3046 }
3047 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3048}
d46c5b12 3049
b6871cc7 3050
4ed46869 3051/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
3052 Check if a text is encoded in one of ISO-2022 based codig systems.
3053 If it is, return 1, else return 0. */
4ed46869 3054
0a28aafb 3055static int
ff0dacd7 3056detect_coding_iso_2022 (coding, detect_info)
df7492f9 3057 struct coding_system *coding;
ff0dacd7 3058 struct coding_detection_info *detect_info;
4ed46869 3059{
8f924df7
KH
3060 const unsigned char *src = coding->source, *src_base = src;
3061 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 3062 int multibytep = coding->src_multibyte;
ff0dacd7 3063 int single_shifting = 0;
df7492f9
KH
3064 int id;
3065 int c, c1;
3066 int consumed_chars = 0;
3067 int i;
ff0dacd7
KH
3068 int rejected = 0;
3069 int found = 0;
cee53ed4 3070 int composition_count = -1;
ff0dacd7
KH
3071
3072 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
3073
3074 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3075 {
3076 struct coding_system *this = &(coding_categories[i]);
3077 Lisp_Object attrs, val;
3078
c6b278e7
KH
3079 if (this->id < 0)
3080 continue;
df7492f9
KH
3081 attrs = CODING_ID_ATTRS (this->id);
3082 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 3083 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
3084 setup_iso_safe_charsets (attrs);
3085 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 3086 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 3087 this->safe_charsets = SDATA (val);
df7492f9
KH
3088 }
3089
3090 /* A coding system of this category is always ASCII compatible. */
3091 src += coding->head_ascii;
3f003981 3092
ff0dacd7 3093 while (rejected != CATEGORY_MASK_ISO)
4ed46869 3094 {
065e3595 3095 src_base = src;
df7492f9 3096 ONE_MORE_BYTE (c);
4ed46869
KH
3097 switch (c)
3098 {
3099 case ISO_CODE_ESC:
74383408
KH
3100 if (inhibit_iso_escape_detection)
3101 break;
f46869e4 3102 single_shifting = 0;
df7492f9 3103 ONE_MORE_BYTE (c);
d46c5b12 3104 if (c >= '(' && c <= '/')
4ed46869 3105 {
bf9cdd4e 3106 /* Designation sequence for a charset of dimension 1. */
df7492f9 3107 ONE_MORE_BYTE (c1);
d46c5b12 3108 if (c1 < ' ' || c1 >= 0x80
df7492f9 3109 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
3110 /* Invalid designation sequence. Just ignore. */
3111 break;
bf9cdd4e
KH
3112 }
3113 else if (c == '$')
3114 {
3115 /* Designation sequence for a charset of dimension 2. */
df7492f9 3116 ONE_MORE_BYTE (c);
bf9cdd4e
KH
3117 if (c >= '@' && c <= 'B')
3118 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 3119 id = iso_charset_table[1][0][c];
bf9cdd4e 3120 else if (c >= '(' && c <= '/')
bcf26d6a 3121 {
df7492f9 3122 ONE_MORE_BYTE (c1);
d46c5b12 3123 if (c1 < ' ' || c1 >= 0x80
df7492f9 3124 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
3125 /* Invalid designation sequence. Just ignore. */
3126 break;
bcf26d6a 3127 }
bf9cdd4e 3128 else
ff0dacd7 3129 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
3130 break;
3131 }
ae9ff118 3132 else if (c == 'N' || c == 'O')
d46c5b12 3133 {
ae9ff118 3134 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
3135 single_shifting = 1;
3136 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 3137 break;
4ed46869 3138 }
cee53ed4
KH
3139 else if (c == '1')
3140 {
3141 /* End of composition. */
3142 if (composition_count < 0
3143 || composition_count > MAX_COMPOSITION_COMPONENTS)
3144 /* Invalid */
3145 break;
3146 composition_count = -1;
3147 found |= CATEGORY_MASK_ISO;
3148 }
ec6d2bb8
KH
3149 else if (c >= '0' && c <= '4')
3150 {
3151 /* ESC <Fp> for start/end composition. */
cee53ed4 3152 composition_count = 0;
ec6d2bb8
KH
3153 break;
3154 }
bf9cdd4e 3155 else
df7492f9 3156 {
ff0dacd7 3157 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
3158 break;
3159 }
d46c5b12
KH
3160
3161 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 3162 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
3163 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3164 id))
ff0dacd7 3165 found |= CATEGORY_MASK_ISO_7;
d46c5b12 3166 else
ff0dacd7 3167 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
3168 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3169 id))
ff0dacd7 3170 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 3171 else
ff0dacd7 3172 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
3173 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3174 id))
ff0dacd7 3175 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 3176 else
ff0dacd7 3177 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
3178 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3179 id))
ff0dacd7 3180 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 3181 else
ff0dacd7 3182 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
3183 break;
3184
4ed46869 3185 case ISO_CODE_SO:
d46c5b12 3186 case ISO_CODE_SI:
ff0dacd7 3187 /* Locking shift out/in. */
74383408
KH
3188 if (inhibit_iso_escape_detection)
3189 break;
f46869e4 3190 single_shifting = 0;
ff0dacd7 3191 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
3192 break;
3193
4ed46869 3194 case ISO_CODE_CSI:
ff0dacd7 3195 /* Control sequence introducer. */
f46869e4 3196 single_shifting = 0;
ff0dacd7
KH
3197 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3198 found |= CATEGORY_MASK_ISO_8_ELSE;
3199 goto check_extra_latin;
3200
4ed46869
KH
3201 case ISO_CODE_SS2:
3202 case ISO_CODE_SS3:
ff0dacd7
KH
3203 /* Single shift. */
3204 if (inhibit_iso_escape_detection)
3205 break;
75e2a253 3206 single_shifting = 0;
ff0dacd7
KH
3207 rejected |= CATEGORY_MASK_ISO_7BIT;
3208 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3209 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 3210 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
3211 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3212 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
3213 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3214 if (single_shifting)
3215 break;
ff0dacd7 3216 goto check_extra_latin;
4ed46869
KH
3217
3218 default:
065e3595
KH
3219 if (c < 0)
3220 continue;
4ed46869 3221 if (c < 0x80)
f46869e4 3222 {
cee53ed4
KH
3223 if (composition_count >= 0)
3224 composition_count++;
f46869e4
KH
3225 single_shifting = 0;
3226 break;
3227 }
ff0dacd7 3228 if (c >= 0xA0)
c4825358 3229 {
ff0dacd7
KH
3230 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3231 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 3232 /* Check the length of succeeding codes of the range
ff0dacd7
KH
3233 0xA0..0FF. If the byte length is even, we include
3234 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
3235 only when we are not single shifting. */
3236 if (! single_shifting
3237 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 3238 {
e17de821 3239 int i = 1;
b73bfc1c
KH
3240 while (src < src_end)
3241 {
df7492f9 3242 ONE_MORE_BYTE (c);
b73bfc1c
KH
3243 if (c < 0xA0)
3244 break;
3245 i++;
3246 }
3247
3248 if (i & 1 && src < src_end)
cee53ed4
KH
3249 {
3250 rejected |= CATEGORY_MASK_ISO_8_2;
3251 if (composition_count >= 0)
3252 composition_count += i;
3253 }
f46869e4 3254 else
cee53ed4
KH
3255 {
3256 found |= CATEGORY_MASK_ISO_8_2;
3257 if (composition_count >= 0)
3258 composition_count += i / 2;
3259 }
f46869e4 3260 }
ff0dacd7 3261 break;
4ed46869 3262 }
ff0dacd7
KH
3263 check_extra_latin:
3264 single_shifting = 0;
3265 if (! VECTORP (Vlatin_extra_code_table)
3266 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3267 {
3268 rejected = CATEGORY_MASK_ISO;
3269 break;
3270 }
3271 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3272 & CODING_ISO_FLAG_LATIN_EXTRA)
3273 found |= CATEGORY_MASK_ISO_8_1;
3274 else
3275 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 3276 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
3277 }
3278 }
ff0dacd7
KH
3279 detect_info->rejected |= CATEGORY_MASK_ISO;
3280 return 0;
4ed46869 3281
df7492f9 3282 no_more_source:
ff0dacd7
KH
3283 detect_info->rejected |= rejected;
3284 detect_info->found |= (found & ~rejected);
df7492f9 3285 return 1;
4ed46869 3286}
ec6d2bb8 3287
4ed46869 3288
134b9549
KH
3289/* Set designation state into CODING. Set CHARS_96 to -1 if the
3290 escape sequence should be kept. */
df7492f9
KH
3291#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3292 do { \
3293 int id, prev; \
3294 \
3295 if (final < '0' || final >= 128 \
3296 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3297 || !SAFE_CHARSET_P (coding, id)) \
3298 { \
3299 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3300 chars_96 = -1; \
3301 break; \
df7492f9
KH
3302 } \
3303 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3304 if (id == charset_jisx0201_roman) \
3305 { \
3306 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3307 id = charset_ascii; \
3308 } \
3309 else if (id == charset_jisx0208_1978) \
3310 { \
3311 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3312 id = charset_jisx0208; \
3313 } \
df7492f9
KH
3314 CODING_ISO_DESIGNATION (coding, reg) = id; \
3315 /* If there was an invalid designation to REG previously, and this \
3316 designation is ASCII to REG, we should keep this designation \
3317 sequence. */ \
3318 if (prev == -2 && id == charset_ascii) \
134b9549 3319 chars_96 = -1; \
4ed46869
KH
3320 } while (0)
3321
d46c5b12 3322
e951386e
KH
3323/* Handle these composition sequence (ALT: alternate char):
3324
3325 (1) relative composition: ESC 0 CHAR ... ESC 1
3326 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3327 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3328 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3329
3330 When the start sequence (ESC 0/2/3/4) is found, this annotation
3331 header is produced.
3332
3333 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3334
3335 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3336 produced until the end sequence (ESC 1) is found:
3337
3338 (1) CHAR ... CHAR
3339 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3340 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3341 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3342
3343 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3344 annotation header is updated as below:
3345
3346 (1) LENGTH: unchanged, NCHARS: number of CHARs
3347 (2) LENGTH: unchanged, NCHARS: number of CHARs
3348 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3349 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3350
3351 If an error is found while composing, the annotation header is
3352 changed to:
3353
3354 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3355
3356 and the sequence [ -2 DECODED-RULE ] is changed to the original
3357 byte sequence as below:
3358 o the original byte sequence is B: [ B -1 ]
3359 o the original byte sequence is B1 B2: [ B1 B2 ]
3360 and the sequence [ -1 -1 ] is changed to the original byte
3361 sequence:
3362 [ ESC '0' ]
3363*/
3364
3365/* Decode a composition rule C1 and maybe one more byte from the
3366 source, and set RULE to the encoded composition rule, NBYTES to the
3367 length of the composition rule. If the rule is invalid, set RULE
3368 to some negative value. */
3369
3370#define DECODE_COMPOSITION_RULE(rule, nbytes) \
3371 do { \
3372 rule = c1 - 32; \
3373 if (rule < 0) \
3374 break; \
3375 if (rule < 81) /* old format (before ver.21) */ \
3376 { \
3377 int gref = (rule) / 9; \
3378 int nref = (rule) % 9; \
3379 if (gref == 4) gref = 10; \
3380 if (nref == 4) nref = 10; \
3381 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3382 nbytes = 1; \
3383 } \
3384 else /* new format (after ver.21) */ \
3385 { \
3386 int c; \
3387 \
3388 ONE_MORE_BYTE (c); \
3389 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3390 if (rule >= 0) \
3391 rule += 0x100; /* to destinguish it from the old format */ \
3392 nbytes = 2; \
3393 } \
3394 } while (0)
3395
3396#define ENCODE_COMPOSITION_RULE(rule) \
df7492f9 3397 do { \
e951386e
KH
3398 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3399 \
3400 if (rule < 0x100) /* old format */ \
df7492f9 3401 { \
e951386e
KH
3402 if (gref == 10) gref = 4; \
3403 if (nref == 10) nref = 4; \
3404 charbuf[idx] = 32 + gref * 9 + nref; \
3405 charbuf[idx + 1] = -1; \
3406 new_chars++; \
df7492f9 3407 } \
e951386e 3408 else /* new format */ \
df7492f9 3409 { \
e951386e
KH
3410 charbuf[idx] = 32 + 81 + gref; \
3411 charbuf[idx + 1] = 32 + nref; \
3412 new_chars += 2; \
df7492f9
KH
3413 } \
3414 } while (0)
3415
e951386e
KH
3416/* Finish the current composition as invalid. */
3417
3418static int finish_composition P_ ((int *, struct composition_status *));
3419
3420static int
3421finish_composition (charbuf, cmp_status)
3422 int *charbuf;
3423 struct composition_status *cmp_status;
3424{
3425 int idx = - cmp_status->length;
3426 int new_chars;
3427
3428 /* Recover the original ESC sequence */
3429 charbuf[idx++] = ISO_CODE_ESC;
3430 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3431 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3432 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3433 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3434 : '4');
3435 charbuf[idx++] = -2;
3436 charbuf[idx++] = 0;
3437 charbuf[idx++] = -1;
3438 new_chars = cmp_status->nchars;
3439 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3440 for (; idx < 0; idx++)
3441 {
3442 int elt = charbuf[idx];
3443
3444 if (elt == -2)
3445 {
3446 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3447 idx++;
3448 }
3449 else if (elt == -1)
3450 {
3451 charbuf[idx++] = ISO_CODE_ESC;
3452 charbuf[idx] = '0';
3453 new_chars += 2;
3454 }
3455 }
3456 cmp_status->state = COMPOSING_NO;
3457 return new_chars;
3458}
3459
3460/* If characers are under composition, finish the composition. */
3461#define MAYBE_FINISH_COMPOSITION() \
3462 do { \
3463 if (cmp_status->state != COMPOSING_NO) \
3464 char_offset += finish_composition (charbuf, cmp_status); \
3465 } while (0)
d46c5b12 3466
aa72b389 3467/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
e951386e 3468
aa72b389
KH
3469 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3470 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3471 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3472 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
ec6d2bb8 3473
e951386e
KH
3474 Produce this annotation sequence now:
3475
3476 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3477*/
3478
3479#define DECODE_COMPOSITION_START(c1) \
3480 do { \
3481 if (c1 == '0' \
3482 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3483 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3484 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3485 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3486 { \
3487 *charbuf++ = -1; \
3488 *charbuf++= -1; \
3489 cmp_status->state = COMPOSING_CHAR; \
3490 cmp_status->length += 2; \
3491 } \
3492 else \
3493 { \
3494 MAYBE_FINISH_COMPOSITION (); \
3495 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3496 : c1 == '2' ? COMPOSITION_WITH_RULE \
3497 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3498 : COMPOSITION_WITH_RULE_ALTCHARS); \
3499 cmp_status->state \
3500 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3501 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3502 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3503 cmp_status->nchars = cmp_status->ncomps = 0; \
3504 coding->annotated = 1; \
3505 } \
ec6d2bb8
KH
3506 } while (0)
3507
ec6d2bb8 3508
e951386e 3509/* Handle composition end sequence ESC 1. */
df7492f9
KH
3510
3511#define DECODE_COMPOSITION_END() \
ec6d2bb8 3512 do { \
e951386e
KH
3513 if (cmp_status->nchars == 0 \
3514 || ((cmp_status->state == COMPOSING_CHAR) \
3515 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
ec6d2bb8 3516 { \
e951386e
KH
3517 MAYBE_FINISH_COMPOSITION (); \
3518 goto invalid_code; \
ec6d2bb8 3519 } \
e951386e
KH
3520 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3521 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3522 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3523 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3524 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3525 char_offset += cmp_status->nchars; \
3526 cmp_status->state = COMPOSING_NO; \
ec6d2bb8
KH
3527 } while (0)
3528
e951386e 3529/* Store a composition rule RULE in charbuf, and update cmp_status. */
df7492f9 3530
e951386e
KH
3531#define STORE_COMPOSITION_RULE(rule) \
3532 do { \
3533 *charbuf++ = -2; \
3534 *charbuf++ = rule; \
3535 cmp_status->length += 2; \
3536 cmp_status->state--; \
3537 } while (0)
ec6d2bb8 3538
e951386e
KH
3539/* Store a composed char or a component char C in charbuf, and update
3540 cmp_status. */
3541
3542#define STORE_COMPOSITION_CHAR(c) \
ec6d2bb8 3543 do { \
e951386e
KH
3544 *charbuf++ = (c); \
3545 cmp_status->length++; \
3546 if (cmp_status->state == COMPOSING_CHAR) \
3547 cmp_status->nchars++; \
df7492f9 3548 else \
e951386e
KH
3549 cmp_status->ncomps++; \
3550 if (cmp_status->method == COMPOSITION_WITH_RULE \
3551 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3552 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3553 cmp_status->state++; \
ec6d2bb8 3554 } while (0)
88993dfd 3555
d46c5b12 3556
4ed46869
KH
3557/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3558
b73bfc1c 3559static void
df7492f9 3560decode_coding_iso_2022 (coding)
4ed46869 3561 struct coding_system *coding;
4ed46869 3562{
8f924df7
KH
3563 const unsigned char *src = coding->source + coding->consumed;
3564 const unsigned char *src_end = coding->source + coding->src_bytes;
3565 const unsigned char *src_base;
69a80ea3 3566 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
3567 /* We may produce two annocations (charset and composition) in one
3568 loop and one more charset annocation at the end. */
ff0dacd7 3569 int *charbuf_end
df80c7f0 3570 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
df7492f9 3571 int consumed_chars = 0, consumed_chars_base;
df7492f9 3572 int multibytep = coding->src_multibyte;
4ed46869 3573 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3574 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3575 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3576 int charset_id_2, charset_id_3;
df7492f9
KH
3577 struct charset *charset;
3578 int c;
e951386e 3579 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
24a73b0a 3580 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3581 int char_offset = coding->produced_char;
3582 int last_offset = char_offset;
3583 int last_id = charset_ascii;
0a9564cb
EZ
3584 int eol_crlf =
3585 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 3586 int byte_after_cr = -1;
e951386e 3587 int i;
df7492f9 3588
24a73b0a 3589 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3590 setup_iso_safe_charsets (attrs);
287c57d7
KH
3591 /* Charset list may have been changed. */
3592 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 3593 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c 3594
e951386e
KH
3595 if (cmp_status->state != COMPOSING_NO)
3596 {
3597 for (i = 0; i < cmp_status->length; i++)
3598 *charbuf++ = cmp_status->carryover[i];
3599 coding->annotated = 1;
3600 }
3601
b73bfc1c 3602 while (1)
4ed46869 3603 {
cf299835 3604 int c1, c2, c3;
b73bfc1c
KH
3605
3606 src_base = src;
df7492f9
KH
3607 consumed_chars_base = consumed_chars;
3608
3609 if (charbuf >= charbuf_end)
b71f6f73
KH
3610 {
3611 if (byte_after_cr >= 0)
3612 src_base--;
3613 break;
3614 }
df7492f9 3615
119852e7
KH
3616 if (byte_after_cr >= 0)
3617 c1 = byte_after_cr, byte_after_cr = -1;
3618 else
3619 ONE_MORE_BYTE (c1);
065e3595
KH
3620 if (c1 < 0)
3621 goto invalid_code;
4ed46869 3622
e951386e 3623 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
4ed46869 3624 {
e951386e
KH
3625 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3626 char_offset++;
3627 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3628 continue;
3629 }
3630
3631 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3632 {
3633 if (c1 == ISO_CODE_ESC)
ec6d2bb8 3634 {
e951386e
KH
3635 if (src + 1 >= src_end)
3636 goto no_more_source;
3637 *charbuf++ = ISO_CODE_ESC;
3638 char_offset++;
3639 if (src[0] == '%' && src[1] == '@')
df7492f9 3640 {
e951386e
KH
3641 src += 2;
3642 consumed_chars += 2;
3643 char_offset += 2;
3644 /* We are sure charbuf can contain two more chars. */
3645 *charbuf++ = '%';
3646 *charbuf++ = '@';
3647 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
df7492f9 3648 }
4ed46869 3649 }
e951386e
KH
3650 else
3651 {
3652 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3653 char_offset++;
3654 }
3655 continue;
3656 }
3657
3658 if ((cmp_status->state == COMPOSING_RULE
3659 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3660 && c1 != ISO_CODE_ESC)
3661 {
3662 int rule, nbytes;
3663
3664 DECODE_COMPOSITION_RULE (rule, nbytes);
3665 if (rule < 0)
3666 goto invalid_code;
3667 STORE_COMPOSITION_RULE (rule);
3668 continue;
3669 }
3670
3671 /* We produce at most one character. */
3672 switch (iso_code_class [c1])
3673 {
3674 case ISO_0x20_or_0x7F:
df7492f9
KH
3675 if (charset_id_0 < 0
3676 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3677 /* This is SPACE or DEL. */
3678 charset = CHARSET_FROM_ID (charset_ascii);
3679 else
3680 charset = CHARSET_FROM_ID (charset_id_0);
3681 break;
4ed46869
KH
3682
3683 case ISO_graphic_plane_0:
134b9549
KH
3684 if (charset_id_0 < 0)
3685 charset = CHARSET_FROM_ID (charset_ascii);
3686 else
3687 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3688 break;
3689
3690 case ISO_0xA0_or_0xFF:
df7492f9
KH
3691 if (charset_id_1 < 0
3692 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3693 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3694 goto invalid_code;
4ed46869
KH
3695 /* This is a graphic character, we fall down ... */
3696
3697 case ISO_graphic_plane_1:
df7492f9
KH
3698 if (charset_id_1 < 0)
3699 goto invalid_code;
3700 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3701 break;
3702
df7492f9 3703 case ISO_control_0:
119852e7
KH
3704 if (eol_crlf && c1 == '\r')
3705 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3706 MAYBE_FINISH_COMPOSITION ();
3707 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3708 break;
3709
df7492f9 3710 case ISO_control_1:
df7492f9
KH
3711 goto invalid_code;
3712
4ed46869 3713 case ISO_shift_out:
df7492f9
KH
3714 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3715 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3716 goto invalid_code;
3717 CODING_ISO_INVOCATION (coding, 0) = 1;
3718 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3719 continue;
4ed46869
KH
3720
3721 case ISO_shift_in:
df7492f9
KH
3722 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3723 goto invalid_code;
3724 CODING_ISO_INVOCATION (coding, 0) = 0;
3725 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3726 continue;
4ed46869
KH
3727
3728 case ISO_single_shift_2_7:
3729 case ISO_single_shift_2:
df7492f9
KH
3730 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3731 goto invalid_code;
4ed46869
KH
3732 /* SS2 is handled as an escape sequence of ESC 'N' */
3733 c1 = 'N';
3734 goto label_escape_sequence;
3735
3736 case ISO_single_shift_3:
df7492f9
KH
3737 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3738 goto invalid_code;
4ed46869
KH
3739 /* SS2 is handled as an escape sequence of ESC 'O' */
3740 c1 = 'O';
3741 goto label_escape_sequence;
3742
3743 case ISO_control_sequence_introducer:
3744 /* CSI is handled as an escape sequence of ESC '[' ... */
3745 c1 = '[';
3746 goto label_escape_sequence;
3747
3748 case ISO_escape:
3749 ONE_MORE_BYTE (c1);
3750 label_escape_sequence:
df7492f9 3751 /* Escape sequences handled here are invocation,
4ed46869
KH
3752 designation, direction specification, and character
3753 composition specification. */
3754 switch (c1)
3755 {
3756 case '&': /* revision of following character set */
3757 ONE_MORE_BYTE (c1);
3758 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3759 goto invalid_code;
4ed46869
KH
3760 ONE_MORE_BYTE (c1);
3761 if (c1 != ISO_CODE_ESC)
df7492f9 3762 goto invalid_code;
4ed46869
KH
3763 ONE_MORE_BYTE (c1);
3764 goto label_escape_sequence;
3765
3766 case '$': /* designation of 2-byte character set */
df7492f9
KH
3767 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3768 goto invalid_code;
134b9549
KH
3769 {
3770 int reg, chars96;
3771
3772 ONE_MORE_BYTE (c1);
3773 if (c1 >= '@' && c1 <= 'B')
3774 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3775 or JISX0208.1980 */
134b9549
KH
3776 reg = 0, chars96 = 0;
3777 }
3778 else if (c1 >= 0x28 && c1 <= 0x2B)
3779 { /* designation of DIMENSION2_CHARS94 character set */
3780 reg = c1 - 0x28, chars96 = 0;
3781 ONE_MORE_BYTE (c1);
3782 }
3783 else if (c1 >= 0x2C && c1 <= 0x2F)
3784 { /* designation of DIMENSION2_CHARS96 character set */
3785 reg = c1 - 0x2C, chars96 = 1;
3786 ONE_MORE_BYTE (c1);
3787 }
3788 else
3789 goto invalid_code;
3790 DECODE_DESIGNATION (reg, 2, chars96, c1);
3791 /* We must update these variables now. */
3792 if (reg == 0)
3793 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3794 else if (reg == 1)
3795 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3796 if (chars96 < 0)
3797 goto invalid_code;
3798 }
b73bfc1c 3799 continue;
4ed46869
KH
3800
3801 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3802 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3803 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3804 goto invalid_code;
3805 CODING_ISO_INVOCATION (coding, 0) = 2;
3806 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3807 continue;
4ed46869
KH
3808
3809 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3810 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3811 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3812 goto invalid_code;
3813 CODING_ISO_INVOCATION (coding, 0) = 3;
3814 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3815 continue;
4ed46869
KH
3816
3817 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3818 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3819 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3820 goto invalid_code;
134b9549
KH
3821 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3822 if (charset_id_2 < 0)
3823 charset = CHARSET_FROM_ID (charset_ascii);
3824 else
3825 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3826 ONE_MORE_BYTE (c1);
e7046a18 3827 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3828 goto invalid_code;
4ed46869
KH
3829 break;
3830
3831 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3832 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3833 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3834 goto invalid_code;
134b9549
KH
3835 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3836 if (charset_id_3 < 0)
3837 charset = CHARSET_FROM_ID (charset_ascii);
3838 else
3839 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3840 ONE_MORE_BYTE (c1);
e7046a18 3841 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3842 goto invalid_code;
4ed46869
KH
3843 break;
3844
ec6d2bb8 3845 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3846 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3847 goto invalid_code;
e951386e
KH
3848 if (last_id != charset_ascii)
3849 {
3850 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3851 last_id = charset_ascii;
3852 last_offset = char_offset;
3853 }
ec6d2bb8 3854 DECODE_COMPOSITION_START (c1);
b73bfc1c 3855 continue;
4ed46869 3856
ec6d2bb8 3857 case '1': /* end composition */
e951386e 3858 if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
3859 goto invalid_code;
3860 DECODE_COMPOSITION_END ();
b73bfc1c 3861 continue;
4ed46869
KH
3862
3863 case '[': /* specification of direction */
de59072a 3864 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
df7492f9 3865 goto invalid_code;
4ed46869 3866 /* For the moment, nested direction is not supported.
d46c5b12 3867 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3868 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3869 ONE_MORE_BYTE (c1);
3870 switch (c1)
3871 {
3872 case ']': /* end of the current direction */
d46c5b12 3873 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3874
3875 case '0': /* end of the current direction */
3876 case '1': /* start of left-to-right direction */
3877 ONE_MORE_BYTE (c1);
3878 if (c1 == ']')
d46c5b12 3879 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3880 else
df7492f9 3881 goto invalid_code;
4ed46869
KH
3882 break;
3883
3884 case '2': /* start of right-to-left direction */
3885 ONE_MORE_BYTE (c1);
3886 if (c1 == ']')
d46c5b12 3887 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3888 else
df7492f9 3889 goto invalid_code;
4ed46869
KH
3890 break;
3891
3892 default:
df7492f9 3893 goto invalid_code;
4ed46869 3894 }
b73bfc1c 3895 continue;
4ed46869 3896
103e0180 3897 case '%':
103e0180
KH
3898 ONE_MORE_BYTE (c1);
3899 if (c1 == '/')
3900 {
3901 /* CTEXT extended segment:
3902 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3903 We keep these bytes as is for the moment.
3904 They may be decoded by post-read-conversion. */
3905 int dim, M, L;
4776e638 3906 int size;
8f924df7 3907
103e0180 3908 ONE_MORE_BYTE (dim);
e951386e
KH
3909 if (dim < 0 || dim > 4)
3910 goto invalid_code;
103e0180 3911 ONE_MORE_BYTE (M);
e951386e
KH
3912 if (M < 128)
3913 goto invalid_code;
103e0180 3914 ONE_MORE_BYTE (L);
e951386e
KH
3915 if (L < 128)
3916 goto invalid_code;
103e0180 3917 size = ((M - 128) * 128) + (L - 128);
e951386e 3918 if (charbuf + 6 > charbuf_end)
4776e638
KH
3919 goto break_loop;
3920 *charbuf++ = ISO_CODE_ESC;
3921 *charbuf++ = '%';
3922 *charbuf++ = '/';
3923 *charbuf++ = dim;
3924 *charbuf++ = BYTE8_TO_CHAR (M);
3925 *charbuf++ = BYTE8_TO_CHAR (L);
e951386e 3926 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
103e0180
KH
3927 }
3928 else if (c1 == 'G')
3929 {
103e0180
KH
3930 /* XFree86 extension for embedding UTF-8 in CTEXT:
3931 ESC % G --UTF-8-BYTES-- ESC % @
3932 We keep these bytes as is for the moment.
3933 They may be decoded by post-read-conversion. */
e951386e 3934 if (charbuf + 3 > charbuf_end)
4776e638 3935 goto break_loop;
e951386e
KH
3936 *charbuf++ = ISO_CODE_ESC;
3937 *charbuf++ = '%';
3938 *charbuf++ = 'G';
3939 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
103e0180
KH
3940 }
3941 else
4776e638 3942 goto invalid_code;
103e0180 3943 continue;
4776e638 3944 break;
103e0180 3945
4ed46869 3946 default:
df7492f9
KH
3947 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3948 goto invalid_code;
134b9549
KH
3949 {
3950 int reg, chars96;
3951
3952 if (c1 >= 0x28 && c1 <= 0x2B)
3953 { /* designation of DIMENSION1_CHARS94 character set */
3954 reg = c1 - 0x28, chars96 = 0;
3955 ONE_MORE_BYTE (c1);
3956 }
3957 else if (c1 >= 0x2C && c1 <= 0x2F)
3958 { /* designation of DIMENSION1_CHARS96 character set */
3959 reg = c1 - 0x2C, chars96 = 1;
3960 ONE_MORE_BYTE (c1);
3961 }
3962 else
3963 goto invalid_code;
3964 DECODE_DESIGNATION (reg, 1, chars96, c1);
3965 /* We must update these variables now. */
3966 if (reg == 0)
3967 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3968 else if (reg == 1)
3969 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3970 if (chars96 < 0)
3971 goto invalid_code;
3972 }
b73bfc1c 3973 continue;
4ed46869 3974 }
b73bfc1c 3975 }
4ed46869 3976
e951386e
KH
3977 if (cmp_status->state == COMPOSING_NO
3978 && charset->id != charset_ascii
ff0dacd7
KH
3979 && last_id != charset->id)
3980 {
3981 if (last_id != charset_ascii)
69a80ea3 3982 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3983 last_id = charset->id;
3984 last_offset = char_offset;
3985 }
3986
b73bfc1c 3987 /* Now we know CHARSET and 1st position code C1 of a character.
cf299835
KH
3988 Produce a decoded character while getting 2nd and 3rd
3989 position codes C2, C3 if necessary. */
df7492f9 3990 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3991 {
3992 ONE_MORE_BYTE (c2);
cf299835
KH
3993 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3994 || ((c1 & 0x80) != (c2 & 0x80)))
b73bfc1c 3995 /* C2 is not in a valid range. */
df7492f9 3996 goto invalid_code;
cf299835
KH
3997 if (CHARSET_DIMENSION (charset) == 2)
3998 c1 = (c1 << 8) | c2;
3999 else
df7492f9 4000 {
cf299835
KH
4001 ONE_MORE_BYTE (c3);
4002 if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4003 || ((c1 & 0x80) != (c3 & 0x80)))
4004 /* C3 is not in a valid range. */
df7492f9 4005 goto invalid_code;
cf299835 4006 c1 = (c1 << 16) | (c2 << 8) | c2;
df7492f9
KH
4007 }
4008 }
cf299835 4009 c1 &= 0x7F7F7F;
df7492f9
KH
4010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4011 if (c < 0)
4012 {
4013 MAYBE_FINISH_COMPOSITION ();
4014 for (; src_base < src; src_base++, char_offset++)
4015 {
4016 if (ASCII_BYTE_P (*src_base))
4017 *charbuf++ = *src_base;
4018 else
4019 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4020 }
4021 }
e951386e 4022 else if (cmp_status->state == COMPOSING_NO)
df7492f9
KH
4023 {
4024 *charbuf++ = c;
4025 char_offset++;
4ed46869 4026 }
e951386e
KH
4027 else if ((cmp_status->state == COMPOSING_CHAR
4028 ? cmp_status->nchars
4029 : cmp_status->ncomps)
4030 >= MAX_COMPOSITION_COMPONENTS)
781d7a48 4031 {
e951386e
KH
4032 /* Too long composition. */
4033 MAYBE_FINISH_COMPOSITION ();
4034 *charbuf++ = c;
4035 char_offset++;
4ed46869 4036 }
e951386e
KH
4037 else
4038 STORE_COMPOSITION_CHAR (c);
4ed46869
KH
4039 continue;
4040
df7492f9
KH
4041 invalid_code:
4042 MAYBE_FINISH_COMPOSITION ();
4ed46869 4043 src = src_base;
df7492f9
KH
4044 consumed_chars = consumed_chars_base;
4045 ONE_MORE_BYTE (c);
065e3595 4046 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4047 char_offset++;
df7492f9 4048 coding->errors++;
4776e638
KH
4049 continue;
4050
4051 break_loop:
4052 break;
4ed46869 4053 }
fb88bf2d 4054
df7492f9 4055 no_more_source:
e951386e
KH
4056 if (cmp_status->state != COMPOSING_NO)
4057 {
4058 if (coding->mode & CODING_MODE_LAST_BLOCK)
4059 MAYBE_FINISH_COMPOSITION ();
4060 else
4061 {
4062 charbuf -= cmp_status->length;
4063 for (i = 0; i < cmp_status->length; i++)
4064 cmp_status->carryover[i] = charbuf[i];
4065 }
4066 }
4067 else if (last_id != charset_ascii)
69a80ea3 4068 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4069 coding->consumed_char += consumed_chars_base;
4070 coding->consumed = src_base - coding->source;
4071 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4072}
4073
b73bfc1c 4074
f4dee582 4075/* ISO2022 encoding stuff. */
4ed46869
KH
4076
4077/*
f4dee582 4078 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 4079 specify more details. In Emacs, each coding system of ISO2022
4ed46869 4080 variant has the following specifications:
df7492f9 4081 1. Initial designation to G0 thru G3.
4ed46869
KH
4082 2. Allows short-form designation?
4083 3. ASCII should be designated to G0 before control characters?
4084 4. ASCII should be designated to G0 at end of line?
4085 5. 7-bit environment or 8-bit environment?
4086 6. Use locking-shift?
4087 7. Use Single-shift?
4088 And the following two are only for Japanese:
4089 8. Use ASCII in place of JIS0201-1976-Roman?
4090 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
4091 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4092 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 4093 details.
4ed46869
KH
4094*/
4095
4096/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
4097 register REG at DST, and increment DST. If <final-char> of CHARSET is
4098 '@', 'A', or 'B' and the coding system CODING allows, produce
4099 designation sequence of short-form. */
4ed46869
KH
4100
4101#define ENCODE_DESIGNATION(charset, reg, coding) \
4102 do { \
df7492f9 4103 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
4104 char *intermediate_char_94 = "()*+"; \
4105 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
4106 int revision = -1; \
4107 int c; \
4108 \
4109 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 4110 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
4111 \
4112 if (revision >= 0) \
70c22245 4113 { \
df7492f9
KH
4114 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
4115 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 4116 } \
df7492f9 4117 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
4118 if (CHARSET_DIMENSION (charset) == 1) \
4119 { \
df7492f9
KH
4120 if (! CHARSET_ISO_CHARS_96 (charset)) \
4121 c = intermediate_char_94[reg]; \
4ed46869 4122 else \
df7492f9
KH
4123 c = intermediate_char_96[reg]; \
4124 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
4125 } \
4126 else \
4127 { \
df7492f9
KH
4128 EMIT_ONE_ASCII_BYTE ('$'); \
4129 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 4130 { \
df7492f9 4131 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
4132 || reg != 0 \
4133 || final_char < '@' || final_char > 'B') \
df7492f9 4134 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
4135 } \
4136 else \
df7492f9 4137 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 4138 } \
df7492f9
KH
4139 EMIT_ONE_ASCII_BYTE (final_char); \
4140 \
4141 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
4142 } while (0)
4143
df7492f9 4144
4ed46869
KH
4145/* The following two macros produce codes (control character or escape
4146 sequence) for ISO2022 single-shift functions (single-shift-2 and
4147 single-shift-3). */
4148
df7492f9
KH
4149#define ENCODE_SINGLE_SHIFT_2 \
4150 do { \
4151 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4152 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
4153 else \
4154 EMIT_ONE_BYTE (ISO_CODE_SS2); \
4155 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4156 } while (0)
4157
df7492f9
KH
4158
4159#define ENCODE_SINGLE_SHIFT_3 \
4160 do { \
4161 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4162 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
4163 else \
4164 EMIT_ONE_BYTE (ISO_CODE_SS3); \
4165 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
4166 } while (0)
4167
df7492f9 4168
4ed46869
KH
4169/* The following four macros produce codes (control character or
4170 escape sequence) for ISO2022 locking-shift functions (shift-in,
4171 shift-out, locking-shift-2, and locking-shift-3). */
4172
df7492f9
KH
4173#define ENCODE_SHIFT_IN \
4174 do { \
4175 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
4176 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
4177 } while (0)
4178
df7492f9
KH
4179
4180#define ENCODE_SHIFT_OUT \
4181 do { \
4182 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
4183 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
4184 } while (0)
4185
df7492f9
KH
4186
4187#define ENCODE_LOCKING_SHIFT_2 \
4188 do { \
4189 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4190 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
4191 } while (0)
4192
df7492f9
KH
4193
4194#define ENCODE_LOCKING_SHIFT_3 \
4195 do { \
4196 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
4197 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
4198 } while (0)
4199
df7492f9 4200
f4dee582
RS
4201/* Produce codes for a DIMENSION1 character whose character set is
4202 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
4203 sequences are also produced in advance if necessary. */
4204
6e85d753
KH
4205#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
4206 do { \
df7492f9 4207 int id = CHARSET_ID (charset); \
bf16eb23
KH
4208 \
4209 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
4210 && id == charset_ascii) \
4211 { \
4212 id = charset_jisx0201_roman; \
4213 charset = CHARSET_FROM_ID (id); \
4214 } \
4215 \
df7492f9 4216 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4217 { \
df7492f9
KH
4218 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4219 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 4220 else \
df7492f9
KH
4221 EMIT_ONE_BYTE (c1 | 0x80); \
4222 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4223 break; \
4224 } \
df7492f9 4225 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4226 { \
df7492f9 4227 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
4228 break; \
4229 } \
df7492f9 4230 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4231 { \
df7492f9 4232 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
4233 break; \
4234 } \
6e85d753
KH
4235 else \
4236 /* Since CHARSET is not yet invoked to any graphic planes, we \
4237 must invoke it, or, at first, designate it to some graphic \
4238 register. Then repeat the loop to actually produce the \
4239 character. */ \
df7492f9
KH
4240 dst = encode_invocation_designation (charset, coding, dst, \
4241 &produced_chars); \
4ed46869
KH
4242 } while (1)
4243
df7492f9 4244
f4dee582
RS
4245/* Produce codes for a DIMENSION2 character whose character set is
4246 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
4247 invocation codes are also produced in advance if necessary. */
4248
6e85d753
KH
4249#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
4250 do { \
df7492f9 4251 int id = CHARSET_ID (charset); \
bf16eb23
KH
4252 \
4253 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
4254 && id == charset_jisx0208) \
4255 { \
4256 id = charset_jisx0208_1978; \
4257 charset = CHARSET_FROM_ID (id); \
4258 } \
4259 \
df7492f9 4260 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 4261 { \
df7492f9
KH
4262 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
4263 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 4264 else \
df7492f9
KH
4265 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
4266 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
4267 break; \
4268 } \
df7492f9 4269 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 4270 { \
df7492f9 4271 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
4272 break; \
4273 } \
df7492f9 4274 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 4275 { \
df7492f9 4276 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
4277 break; \
4278 } \
6e85d753
KH
4279 else \
4280 /* Since CHARSET is not yet invoked to any graphic planes, we \
4281 must invoke it, or, at first, designate it to some graphic \
4282 register. Then repeat the loop to actually produce the \
4283 character. */ \
df7492f9
KH
4284 dst = encode_invocation_designation (charset, coding, dst, \
4285 &produced_chars); \
4ed46869
KH
4286 } while (1)
4287
05e6f5dc 4288
df7492f9
KH
4289#define ENCODE_ISO_CHARACTER(charset, c) \
4290 do { \
4291 int code = ENCODE_CHAR ((charset),(c)); \
4292 \
4293 if (CHARSET_DIMENSION (charset) == 1) \
4294 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
4295 else \
4296 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 4297 } while (0)
bdd9fb48 4298
05e6f5dc 4299
4ed46869 4300/* Produce designation and invocation codes at a place pointed by DST
df7492f9 4301 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
4302 Return new DST. */
4303
4304unsigned char *
df7492f9
KH
4305encode_invocation_designation (charset, coding, dst, p_nchars)
4306 struct charset *charset;
4ed46869
KH
4307 struct coding_system *coding;
4308 unsigned char *dst;
df7492f9 4309 int *p_nchars;
4ed46869 4310{
df7492f9
KH
4311 int multibytep = coding->dst_multibyte;
4312 int produced_chars = *p_nchars;
4ed46869 4313 int reg; /* graphic register number */
df7492f9 4314 int id = CHARSET_ID (charset);
4ed46869
KH
4315
4316 /* At first, check designations. */
4317 for (reg = 0; reg < 4; reg++)
df7492f9 4318 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
4319 break;
4320
4321 if (reg >= 4)
4322 {
4323 /* CHARSET is not yet designated to any graphic registers. */
4324 /* At first check the requested designation. */
df7492f9
KH
4325 reg = CODING_ISO_REQUEST (coding, id);
4326 if (reg < 0)
1ba9e4ab
KH
4327 /* Since CHARSET requests no special designation, designate it
4328 to graphic register 0. */
4ed46869
KH
4329 reg = 0;
4330
4331 ENCODE_DESIGNATION (charset, reg, coding);
4332 }
4333
df7492f9
KH
4334 if (CODING_ISO_INVOCATION (coding, 0) != reg
4335 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
4336 {
4337 /* Since the graphic register REG is not invoked to any graphic
4338 planes, invoke it to graphic plane 0. */
4339 switch (reg)
4340 {
4341 case 0: /* graphic register 0 */
4342 ENCODE_SHIFT_IN;
4343 break;
4344
4345 case 1: /* graphic register 1 */
4346 ENCODE_SHIFT_OUT;
4347 break;
4348
4349 case 2: /* graphic register 2 */
df7492f9 4350 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4351 ENCODE_SINGLE_SHIFT_2;
4352 else
4353 ENCODE_LOCKING_SHIFT_2;
4354 break;
4355
4356 case 3: /* graphic register 3 */
df7492f9 4357 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
4358 ENCODE_SINGLE_SHIFT_3;
4359 else
4360 ENCODE_LOCKING_SHIFT_3;
4361 break;
4362 }
4363 }
b73bfc1c 4364
df7492f9 4365 *p_nchars = produced_chars;
4ed46869
KH
4366 return dst;
4367}
4368
df7492f9
KH
4369/* The following three macros produce codes for indicating direction
4370 of text. */
4371#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 4372 do { \
df7492f9
KH
4373 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4374 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 4375 else \
df7492f9 4376 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
4377 } while (0)
4378
ec6d2bb8 4379
df7492f9
KH
4380#define ENCODE_DIRECTION_R2L() \
4381 do { \
4382 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4383 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
4384 } while (0)
4385
ec6d2bb8 4386
df7492f9 4387#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 4388 do { \
df7492f9
KH
4389 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4390 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 4391 } while (0)
4ed46869 4392
4ed46869
KH
4393
4394/* Produce codes for designation and invocation to reset the graphic
4395 planes and registers to initial state. */
df7492f9
KH
4396#define ENCODE_RESET_PLANE_AND_REGISTER() \
4397 do { \
4398 int reg; \
4399 struct charset *charset; \
4400 \
4401 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4402 ENCODE_SHIFT_IN; \
4403 for (reg = 0; reg < 4; reg++) \
4404 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4405 && (CODING_ISO_DESIGNATION (coding, reg) \
4406 != CODING_ISO_INITIAL (coding, reg))) \
4407 { \
4408 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4409 ENCODE_DESIGNATION (charset, reg, coding); \
4410 } \
4ed46869
KH
4411 } while (0)
4412
df7492f9 4413
bdd9fb48 4414/* Produce designation sequences of charsets in the line started from
b73bfc1c 4415 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
4416
4417 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4418 find all the necessary designations. */
4419
b73bfc1c 4420static unsigned char *
df7492f9 4421encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 4422 struct coding_system *coding;
df7492f9
KH
4423 int *charbuf, *charbuf_end;
4424 unsigned char *dst;
e0e989f6 4425{
df7492f9 4426 struct charset *charset;
bdd9fb48
KH
4427 /* Table of charsets to be designated to each graphic register. */
4428 int r[4];
df7492f9
KH
4429 int c, found = 0, reg;
4430 int produced_chars = 0;
4431 int multibytep = coding->dst_multibyte;
4432 Lisp_Object attrs;
4433 Lisp_Object charset_list;
4434
4435 attrs = CODING_ID_ATTRS (coding->id);
4436 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4437 if (EQ (charset_list, Qiso_2022))
4438 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4439
4440 for (reg = 0; reg < 4; reg++)
4441 r[reg] = -1;
4442
b73bfc1c 4443 while (found < 4)
e0e989f6 4444 {
df7492f9
KH
4445 int id;
4446
4447 c = *charbuf++;
b73bfc1c
KH
4448 if (c == '\n')
4449 break;
df7492f9
KH
4450 charset = char_charset (c, charset_list, NULL);
4451 id = CHARSET_ID (charset);
4452 reg = CODING_ISO_REQUEST (coding, id);
4453 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4454 {
4455 found++;
df7492f9 4456 r[reg] = id;
bdd9fb48 4457 }
bdd9fb48
KH
4458 }
4459
4460 if (found)
4461 {
4462 for (reg = 0; reg < 4; reg++)
4463 if (r[reg] >= 0
df7492f9
KH
4464 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4465 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4466 }
b73bfc1c
KH
4467
4468 return dst;
e0e989f6
KH
4469}
4470
4ed46869
KH
4471/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4472
df7492f9
KH
4473static int
4474encode_coding_iso_2022 (coding)
4ed46869 4475 struct coding_system *coding;
4ed46869 4476{
df7492f9
KH
4477 int multibytep = coding->dst_multibyte;
4478 int *charbuf = coding->charbuf;
4479 int *charbuf_end = charbuf + coding->charbuf_used;
4480 unsigned char *dst = coding->destination + coding->produced;
4481 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4482 int safe_room = 16;
4483 int bol_designation
4484 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4485 && CODING_ISO_BOL (coding));
4486 int produced_chars = 0;
4487 Lisp_Object attrs, eol_type, charset_list;
4488 int ascii_compatible;
b73bfc1c 4489 int c;
ff0dacd7 4490 int preferred_charset_id = -1;
05e6f5dc 4491
24a73b0a 4492 CODING_GET_INFO (coding, attrs, charset_list);
0a9564cb 4493 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
24a73b0a
KH
4494 if (VECTORP (eol_type))
4495 eol_type = Qunix;
4496
004068e4 4497 setup_iso_safe_charsets (attrs);
ff0dacd7 4498 /* Charset list may have been changed. */
287c57d7 4499 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4500 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4501
df7492f9 4502 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 4503
df7492f9 4504 while (charbuf < charbuf_end)
4ed46869 4505 {
df7492f9 4506 ASSURE_DESTINATION (safe_room);
b73bfc1c 4507
df7492f9 4508 if (bol_designation)
b73bfc1c 4509 {
df7492f9 4510 unsigned char *dst_prev = dst;
4ed46869 4511
bdd9fb48 4512 /* We have to produce designation sequences if any now. */
df7492f9
KH
4513 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4514 bol_designation = 0;
4515 /* We are sure that designation sequences are all ASCII bytes. */
4516 produced_chars += dst - dst_prev;
e0e989f6
KH
4517 }
4518
df7492f9 4519 c = *charbuf++;
ec6d2bb8 4520
ff0dacd7
KH
4521 if (c < 0)
4522 {
4523 /* Handle an annotation. */
4524 switch (*charbuf)
ec6d2bb8 4525 {
ff0dacd7
KH
4526 case CODING_ANNOTATE_COMPOSITION_MASK:
4527 /* Not yet implemented. */
4528 break;
4529 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4530 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4531 if (preferred_charset_id >= 0
4532 && NILP (Fmemq (make_number (preferred_charset_id),
4533 charset_list)))
4534 preferred_charset_id = -1;
4535 break;
4536 default:
4537 abort ();
4ed46869 4538 }
ff0dacd7
KH
4539 charbuf += -c - 1;
4540 continue;
4ed46869 4541 }
ec6d2bb8 4542
b73bfc1c
KH
4543 /* Now encode the character C. */
4544 if (c < 0x20 || c == 0x7F)
4545 {
df7492f9
KH
4546 if (c == '\n'
4547 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4548 {
df7492f9
KH
4549 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4550 ENCODE_RESET_PLANE_AND_REGISTER ();
4551 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4552 {
df7492f9
KH
4553 int i;
4554
4555 for (i = 0; i < 4; i++)
4556 CODING_ISO_DESIGNATION (coding, i)
4557 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4558 }
df7492f9
KH
4559 bol_designation
4560 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4561 }
df7492f9
KH
4562 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4563 ENCODE_RESET_PLANE_AND_REGISTER ();
4564 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4565 }
df7492f9 4566 else if (ASCII_CHAR_P (c))
88993dfd 4567 {
df7492f9
KH
4568 if (ascii_compatible)
4569 EMIT_ONE_ASCII_BYTE (c);
93dec019 4570 else
19a8d9e0 4571 {
bf16eb23
KH
4572 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4573 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4574 }
4ed46869 4575 }
16eafb5d 4576 else if (CHAR_BYTE8_P (c))
88993dfd 4577 {
16eafb5d
KH
4578 c = CHAR_TO_BYTE8 (c);
4579 EMIT_ONE_BYTE (c);
88993dfd 4580 }
b73bfc1c 4581 else
df7492f9 4582 {
ff0dacd7 4583 struct charset *charset;
b73bfc1c 4584
ff0dacd7
KH
4585 if (preferred_charset_id >= 0)
4586 {
4587 charset = CHARSET_FROM_ID (preferred_charset_id);
4588 if (! CHAR_CHARSET_P (c, charset))
4589 charset = char_charset (c, charset_list, NULL);
4590 }
4591 else
4592 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4593 if (!charset)
4594 {
41cbe562
KH
4595 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4596 {
4597 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4598 charset = CHARSET_FROM_ID (charset_ascii);
4599 }
4600 else
4601 {
4602 c = coding->default_char;
4603 charset = char_charset (c, charset_list, NULL);
4604 }
df7492f9
KH
4605 }
4606 ENCODE_ISO_CHARACTER (charset, c);
4607 }
84fbb8a0 4608 }
b73bfc1c 4609
df7492f9
KH
4610 if (coding->mode & CODING_MODE_LAST_BLOCK
4611 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4612 {
4613 ASSURE_DESTINATION (safe_room);
4614 ENCODE_RESET_PLANE_AND_REGISTER ();
4615 }
065e3595 4616 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4617 CODING_ISO_BOL (coding) = bol_designation;
4618 coding->produced_char += produced_chars;
4619 coding->produced = dst - coding->destination;
4620 return 0;
4ed46869
KH
4621}
4622
4623\f
df7492f9 4624/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4625
df7492f9 4626/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4627 quite widely. So, for the moment, Emacs supports them in the bare
4628 C code. But, in the future, they may be supported only by CCL. */
4629
4630/* SJIS is a coding system encoding three character sets: ASCII, right
4631 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4632 as is. A character of charset katakana-jisx0201 is encoded by
4633 "position-code + 0x80". A character of charset japanese-jisx0208
4634 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4635 so that it fit in the range below.
4ed46869
KH
4636
4637 --- CODE RANGE of SJIS ---
4638 (character set) (range)
4639 ASCII 0x00 .. 0x7F
df7492f9 4640 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4641 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4642 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4643 -------------------------------
4644
4645*/
4646
4647/* BIG5 is a coding system encoding two character sets: ASCII and
4648 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4649 character set and is encoded in two-byte.
4ed46869
KH
4650
4651 --- CODE RANGE of BIG5 ---
4652 (character set) (range)
4653 ASCII 0x00 .. 0x7F
4654 Big5 (1st byte) 0xA1 .. 0xFE
4655 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4656 --------------------------
4657
df7492f9 4658 */
4ed46869
KH
4659
4660/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4661 Check if a text is encoded in SJIS. If it is, return
df7492f9 4662 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4663
0a28aafb 4664static int
ff0dacd7 4665detect_coding_sjis (coding, detect_info)
df7492f9 4666 struct coding_system *coding;
ff0dacd7 4667 struct coding_detection_info *detect_info;
4ed46869 4668{
065e3595 4669 const unsigned char *src = coding->source, *src_base;
8f924df7 4670 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4671 int multibytep = coding->src_multibyte;
4672 int consumed_chars = 0;
4673 int found = 0;
b73bfc1c 4674 int c;
f07190ca
KH
4675 Lisp_Object attrs, charset_list;
4676 int max_first_byte_of_2_byte_code;
4677
4678 CODING_GET_INFO (coding, attrs, charset_list);
4679 max_first_byte_of_2_byte_code
4680 = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
df7492f9 4681
ff0dacd7 4682 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4683 /* A coding system of this category is always ASCII compatible. */
4684 src += coding->head_ascii;
4ed46869 4685
b73bfc1c 4686 while (1)
4ed46869 4687 {
065e3595 4688 src_base = src;
df7492f9 4689 ONE_MORE_BYTE (c);
682169fe
KH
4690 if (c < 0x80)
4691 continue;
f07190ca
KH
4692 if ((c >= 0x81 && c <= 0x9F)
4693 || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4ed46869 4694 {
df7492f9 4695 ONE_MORE_BYTE (c);
682169fe 4696 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4697 break;
ff0dacd7 4698 found = CATEGORY_MASK_SJIS;
4ed46869 4699 }
df7492f9 4700 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4701 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4702 else
4703 break;
4ed46869 4704 }
ff0dacd7 4705 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4706 return 0;
4707
4708 no_more_source:
065e3595 4709 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4710 {
ff0dacd7 4711 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4712 return 0;
4ed46869 4713 }
ff0dacd7
KH
4714 detect_info->found |= found;
4715 return 1;
4ed46869
KH
4716}
4717
4718/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4719 Check if a text is encoded in BIG5. If it is, return
df7492f9 4720 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4721
0a28aafb 4722static int
ff0dacd7 4723detect_coding_big5 (coding, detect_info)
df7492f9 4724 struct coding_system *coding;
ff0dacd7 4725 struct coding_detection_info *detect_info;
4ed46869 4726{
065e3595 4727 const unsigned char *src = coding->source, *src_base;
8f924df7 4728 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4729 int multibytep = coding->src_multibyte;
4730 int consumed_chars = 0;
4731 int found = 0;
b73bfc1c 4732 int c;
fa42c37f 4733
ff0dacd7 4734 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4735 /* A coding system of this category is always ASCII compatible. */
4736 src += coding->head_ascii;
fa42c37f 4737
b73bfc1c 4738 while (1)
fa42c37f 4739 {
065e3595 4740 src_base = src;
df7492f9
KH
4741 ONE_MORE_BYTE (c);
4742 if (c < 0x80)
fa42c37f 4743 continue;
df7492f9 4744 if (c >= 0xA1)
fa42c37f 4745 {
df7492f9
KH
4746 ONE_MORE_BYTE (c);
4747 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4748 return 0;
ff0dacd7 4749 found = CATEGORY_MASK_BIG5;
fa42c37f 4750 }
df7492f9
KH
4751 else
4752 break;
fa42c37f 4753 }
ff0dacd7 4754 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4755 return 0;
fa42c37f 4756
df7492f9 4757 no_more_source:
065e3595 4758 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4759 {
ff0dacd7 4760 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4761 return 0;
4762 }
ff0dacd7
KH
4763 detect_info->found |= found;
4764 return 1;
fa42c37f
KH
4765}
4766
4ed46869
KH
4767/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4768 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4769
b73bfc1c 4770static void
df7492f9 4771decode_coding_sjis (coding)
4ed46869 4772 struct coding_system *coding;
4ed46869 4773{
8f924df7
KH
4774 const unsigned char *src = coding->source + coding->consumed;
4775 const unsigned char *src_end = coding->source + coding->src_bytes;
4776 const unsigned char *src_base;
69a80ea3 4777 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
4778 /* We may produce one charset annocation in one loop and one more at
4779 the end. */
69a80ea3 4780 int *charbuf_end
df80c7f0 4781 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4782 int consumed_chars = 0, consumed_chars_base;
4783 int multibytep = coding->src_multibyte;
4784 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4785 struct charset *charset_kanji2;
24a73b0a 4786 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4787 int char_offset = coding->produced_char;
4788 int last_offset = char_offset;
4789 int last_id = charset_ascii;
0a9564cb
EZ
4790 int eol_crlf =
4791 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4792 int byte_after_cr = -1;
a5d301df 4793
24a73b0a 4794 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4795
4796 val = charset_list;
4797 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4798 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4799 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4800 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4801
b73bfc1c 4802 while (1)
4ed46869 4803 {
df7492f9 4804 int c, c1;
24a73b0a 4805 struct charset *charset;
fa42c37f 4806
b73bfc1c 4807 src_base = src;
df7492f9 4808 consumed_chars_base = consumed_chars;
fa42c37f 4809
df7492f9 4810 if (charbuf >= charbuf_end)
b71f6f73
KH
4811 {
4812 if (byte_after_cr >= 0)
4813 src_base--;
4814 break;
4815 }
df7492f9 4816
119852e7
KH
4817 if (byte_after_cr >= 0)
4818 c = byte_after_cr, byte_after_cr = -1;
4819 else
4820 ONE_MORE_BYTE (c);
065e3595
KH
4821 if (c < 0)
4822 goto invalid_code;
24a73b0a 4823 if (c < 0x80)
119852e7
KH
4824 {
4825 if (eol_crlf && c == '\r')
4826 ONE_MORE_BYTE (byte_after_cr);
4827 charset = charset_roman;
4828 }
57a47f8a 4829 else if (c == 0x80 || c == 0xA0)
8e921c4b 4830 goto invalid_code;
57a47f8a
KH
4831 else if (c >= 0xA1 && c <= 0xDF)
4832 {
4833 /* SJIS -> JISX0201-Kana */
4834 c &= 0x7F;
4835 charset = charset_kana;
4836 }
4837 else if (c <= 0xEF)
df7492f9 4838 {
57a47f8a
KH
4839 /* SJIS -> JISX0208 */
4840 ONE_MORE_BYTE (c1);
4841 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4842 goto invalid_code;
57a47f8a
KH
4843 c = (c << 8) | c1;
4844 SJIS_TO_JIS (c);
4845 charset = charset_kanji;
4846 }
4847 else if (c <= 0xFC && charset_kanji2)
4848 {
c6876370 4849 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4850 ONE_MORE_BYTE (c1);
4851 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4852 goto invalid_code;
57a47f8a
KH
4853 c = (c << 8) | c1;
4854 SJIS_TO_JIS2 (c);
4855 charset = charset_kanji2;
df7492f9 4856 }
57a47f8a
KH
4857 else
4858 goto invalid_code;
24a73b0a
KH
4859 if (charset->id != charset_ascii
4860 && last_id != charset->id)
4861 {
4862 if (last_id != charset_ascii)
69a80ea3 4863 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4864 last_id = charset->id;
4865 last_offset = char_offset;
4866 }
4867 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4868 *charbuf++ = c;
ff0dacd7 4869 char_offset++;
df7492f9 4870 continue;
b73bfc1c 4871
df7492f9
KH
4872 invalid_code:
4873 src = src_base;
4874 consumed_chars = consumed_chars_base;
4875 ONE_MORE_BYTE (c);
065e3595 4876 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4877 char_offset++;
df7492f9
KH
4878 coding->errors++;
4879 }
fa42c37f 4880
df7492f9 4881 no_more_source:
ff0dacd7 4882 if (last_id != charset_ascii)
69a80ea3 4883 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4884 coding->consumed_char += consumed_chars_base;
4885 coding->consumed = src_base - coding->source;
4886 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4887}
4888
b73bfc1c 4889static void
df7492f9 4890decode_coding_big5 (coding)
4ed46869 4891 struct coding_system *coding;
4ed46869 4892{
8f924df7
KH
4893 const unsigned char *src = coding->source + coding->consumed;
4894 const unsigned char *src_end = coding->source + coding->src_bytes;
4895 const unsigned char *src_base;
69a80ea3 4896 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
4897 /* We may produce one charset annocation in one loop and one more at
4898 the end. */
69a80ea3 4899 int *charbuf_end
df80c7f0 4900 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
4901 int consumed_chars = 0, consumed_chars_base;
4902 int multibytep = coding->src_multibyte;
4903 struct charset *charset_roman, *charset_big5;
24a73b0a 4904 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4905 int char_offset = coding->produced_char;
4906 int last_offset = char_offset;
4907 int last_id = charset_ascii;
0a9564cb
EZ
4908 int eol_crlf =
4909 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 4910 int byte_after_cr = -1;
df7492f9 4911
24a73b0a 4912 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4913 val = charset_list;
4914 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4915 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4916
b73bfc1c 4917 while (1)
4ed46869 4918 {
df7492f9 4919 int c, c1;
24a73b0a 4920 struct charset *charset;
b73bfc1c
KH
4921
4922 src_base = src;
df7492f9
KH
4923 consumed_chars_base = consumed_chars;
4924
4925 if (charbuf >= charbuf_end)
b71f6f73
KH
4926 {
4927 if (byte_after_cr >= 0)
4928 src_base--;
4929 break;
4930 }
df7492f9 4931
119852e7 4932 if (byte_after_cr >= 0)
14daee73 4933 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4934 else
4935 ONE_MORE_BYTE (c);
b73bfc1c 4936
065e3595
KH
4937 if (c < 0)
4938 goto invalid_code;
24a73b0a 4939 if (c < 0x80)
119852e7 4940 {
14daee73 4941 if (eol_crlf && c == '\r')
119852e7
KH
4942 ONE_MORE_BYTE (byte_after_cr);
4943 charset = charset_roman;
4944 }
24a73b0a 4945 else
4ed46869 4946 {
24a73b0a
KH
4947 /* BIG5 -> Big5 */
4948 if (c < 0xA1 || c > 0xFE)
4949 goto invalid_code;
4950 ONE_MORE_BYTE (c1);
4951 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4952 goto invalid_code;
4953 c = c << 8 | c1;
4954 charset = charset_big5;
4ed46869 4955 }
24a73b0a
KH
4956 if (charset->id != charset_ascii
4957 && last_id != charset->id)
df7492f9 4958 {
24a73b0a 4959 if (last_id != charset_ascii)
69a80ea3 4960 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4961 last_id = charset->id;
4962 last_offset = char_offset;
4ed46869 4963 }
24a73b0a 4964 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4965 *charbuf++ = c;
ff0dacd7 4966 char_offset++;
fb88bf2d
KH
4967 continue;
4968
df7492f9 4969 invalid_code:
4ed46869 4970 src = src_base;
df7492f9
KH
4971 consumed_chars = consumed_chars_base;
4972 ONE_MORE_BYTE (c);
065e3595 4973 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4974 char_offset++;
df7492f9 4975 coding->errors++;
fb88bf2d 4976 }
d46c5b12 4977
df7492f9 4978 no_more_source:
ff0dacd7 4979 if (last_id != charset_ascii)
69a80ea3 4980 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4981 coding->consumed_char += consumed_chars_base;
4982 coding->consumed = src_base - coding->source;
4983 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4984}
4985
4986/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4987 This function can encode charsets `ascii', `katakana-jisx0201',
4988 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4989 are sure that all these charsets are registered as official charset
4ed46869
KH
4990 (i.e. do not have extended leading-codes). Characters of other
4991 charsets are produced without any encoding. If SJIS_P is 1, encode
4992 SJIS text, else encode BIG5 text. */
4993
df7492f9
KH
4994static int
4995encode_coding_sjis (coding)
4ed46869 4996 struct coding_system *coding;
4ed46869 4997{
df7492f9
KH
4998 int multibytep = coding->dst_multibyte;
4999 int *charbuf = coding->charbuf;
5000 int *charbuf_end = charbuf + coding->charbuf_used;
5001 unsigned char *dst = coding->destination + coding->produced;
5002 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5003 int safe_room = 4;
5004 int produced_chars = 0;
24a73b0a 5005 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5006 int ascii_compatible;
5007 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 5008 struct charset *charset_kanji2;
df7492f9 5009 int c;
a5d301df 5010
24a73b0a 5011 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5012 val = charset_list;
5013 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5014 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
5015 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5016 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 5017
df7492f9 5018 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 5019
df7492f9
KH
5020 while (charbuf < charbuf_end)
5021 {
5022 ASSURE_DESTINATION (safe_room);
5023 c = *charbuf++;
b73bfc1c 5024 /* Now encode the character C. */
df7492f9
KH
5025 if (ASCII_CHAR_P (c) && ascii_compatible)
5026 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5027 else if (CHAR_BYTE8_P (c))
5028 {
5029 c = CHAR_TO_BYTE8 (c);
5030 EMIT_ONE_BYTE (c);
5031 }
df7492f9 5032 else
b73bfc1c 5033 {
df7492f9
KH
5034 unsigned code;
5035 struct charset *charset = char_charset (c, charset_list, &code);
5036
5037 if (!charset)
4ed46869 5038 {
41cbe562 5039 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5040 {
41cbe562
KH
5041 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5042 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5043 }
41cbe562 5044 else
b73bfc1c 5045 {
41cbe562
KH
5046 c = coding->default_char;
5047 charset = char_charset (c, charset_list, &code);
b73bfc1c 5048 }
b73bfc1c 5049 }
df7492f9
KH
5050 if (code == CHARSET_INVALID_CODE (charset))
5051 abort ();
5052 if (charset == charset_kanji)
5053 {
5054 int c1, c2;
5055 JIS_TO_SJIS (code);
5056 c1 = code >> 8, c2 = code & 0xFF;
5057 EMIT_TWO_BYTES (c1, c2);
5058 }
5059 else if (charset == charset_kana)
5060 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
5061 else if (charset_kanji2 && charset == charset_kanji2)
5062 {
5063 int c1, c2;
5064
5065 c1 = code >> 8;
f07190ca
KH
5066 if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5067 || c1 == 0x28
57a47f8a
KH
5068 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5069 {
5070 JIS_TO_SJIS2 (code);
5071 c1 = code >> 8, c2 = code & 0xFF;
5072 EMIT_TWO_BYTES (c1, c2);
5073 }
5074 else
5075 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5076 }
df7492f9
KH
5077 else
5078 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5079 }
5080 }
065e3595 5081 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5082 coding->produced_char += produced_chars;
5083 coding->produced = dst - coding->destination;
5084 return 0;
5085}
5086
5087static int
5088encode_coding_big5 (coding)
5089 struct coding_system *coding;
5090{
5091 int multibytep = coding->dst_multibyte;
5092 int *charbuf = coding->charbuf;
5093 int *charbuf_end = charbuf + coding->charbuf_used;
5094 unsigned char *dst = coding->destination + coding->produced;
5095 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5096 int safe_room = 4;
5097 int produced_chars = 0;
24a73b0a 5098 Lisp_Object attrs, charset_list, val;
df7492f9
KH
5099 int ascii_compatible;
5100 struct charset *charset_roman, *charset_big5;
5101 int c;
5102
24a73b0a 5103 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5104 val = charset_list;
5105 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5106 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5107 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5108
5109 while (charbuf < charbuf_end)
5110 {
5111 ASSURE_DESTINATION (safe_room);
5112 c = *charbuf++;
5113 /* Now encode the character C. */
5114 if (ASCII_CHAR_P (c) && ascii_compatible)
5115 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
5116 else if (CHAR_BYTE8_P (c))
5117 {
5118 c = CHAR_TO_BYTE8 (c);
5119 EMIT_ONE_BYTE (c);
b73bfc1c
KH
5120 }
5121 else
5122 {
df7492f9
KH
5123 unsigned code;
5124 struct charset *charset = char_charset (c, charset_list, &code);
5125
5126 if (! charset)
b73bfc1c 5127 {
41cbe562 5128 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 5129 {
41cbe562
KH
5130 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5131 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 5132 }
41cbe562 5133 else
0eecad43 5134 {
41cbe562
KH
5135 c = coding->default_char;
5136 charset = char_charset (c, charset_list, &code);
0eecad43 5137 }
4ed46869 5138 }
df7492f9
KH
5139 if (code == CHARSET_INVALID_CODE (charset))
5140 abort ();
5141 if (charset == charset_big5)
b73bfc1c 5142 {
df7492f9
KH
5143 int c1, c2;
5144
5145 c1 = code >> 8, c2 = code & 0xFF;
5146 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 5147 }
df7492f9
KH
5148 else
5149 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 5150 }
4ed46869 5151 }
065e3595 5152 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5153 coding->produced_char += produced_chars;
5154 coding->produced = dst - coding->destination;
5155 return 0;
4ed46869
KH
5156}
5157
5158\f
df7492f9 5159/*** 10. CCL handlers ***/
1397dc18
KH
5160
5161/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5162 Check if a text is encoded in a coding system of which
5163 encoder/decoder are written in CCL program. If it is, return
df7492f9 5164 CATEGORY_MASK_CCL, else return 0. */
1397dc18 5165
0a28aafb 5166static int
ff0dacd7 5167detect_coding_ccl (coding, detect_info)
df7492f9 5168 struct coding_system *coding;
ff0dacd7 5169 struct coding_detection_info *detect_info;
1397dc18 5170{
065e3595 5171 const unsigned char *src = coding->source, *src_base;
8f924df7 5172 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5173 int multibytep = coding->src_multibyte;
5174 int consumed_chars = 0;
5175 int found = 0;
0e219d54 5176 unsigned char *valids;
df7492f9
KH
5177 int head_ascii = coding->head_ascii;
5178 Lisp_Object attrs;
5179
ff0dacd7
KH
5180 detect_info->checked |= CATEGORY_MASK_CCL;
5181
df7492f9 5182 coding = &coding_categories[coding_category_ccl];
0e219d54 5183 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
5184 attrs = CODING_ID_ATTRS (coding->id);
5185 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5186 src += head_ascii;
1397dc18 5187
b73bfc1c 5188 while (1)
1397dc18 5189 {
df7492f9 5190 int c;
065e3595
KH
5191
5192 src_base = src;
df7492f9 5193 ONE_MORE_BYTE (c);
065e3595 5194 if (c < 0 || ! valids[c])
df7492f9 5195 break;
ff0dacd7
KH
5196 if ((valids[c] > 1))
5197 found = CATEGORY_MASK_CCL;
df7492f9 5198 }
ff0dacd7 5199 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
5200 return 0;
5201
5202 no_more_source:
ff0dacd7
KH
5203 detect_info->found |= found;
5204 return 1;
df7492f9
KH
5205}
5206
5207static void
5208decode_coding_ccl (coding)
5209 struct coding_system *coding;
5210{
7c78e542 5211 const unsigned char *src = coding->source + coding->consumed;
8f924df7 5212 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
5213 int *charbuf = coding->charbuf + coding->charbuf_used;
5214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
5215 int consumed_chars = 0;
5216 int multibytep = coding->src_multibyte;
5217 struct ccl_program ccl;
5218 int source_charbuf[1024];
5219 int source_byteidx[1024];
24a73b0a 5220 Lisp_Object attrs, charset_list;
df7492f9 5221
24a73b0a 5222 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5223 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
5224
5225 while (src < src_end)
5226 {
7c78e542 5227 const unsigned char *p = src;
df7492f9
KH
5228 int *source, *source_end;
5229 int i = 0;
5230
5231 if (multibytep)
5232 while (i < 1024 && p < src_end)
5233 {
5234 source_byteidx[i] = p - src;
5235 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5236 }
5237 else
5238 while (i < 1024 && p < src_end)
5239 source_charbuf[i++] = *p++;
8f924df7 5240
df7492f9
KH
5241 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5242 ccl.last_block = 1;
5243
5244 source = source_charbuf;
5245 source_end = source + i;
5246 while (source < source_end)
5247 {
5248 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
5249 source_end - source, charbuf_end - charbuf,
5250 charset_list);
df7492f9
KH
5251 source += ccl.consumed;
5252 charbuf += ccl.produced;
5253 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
5254 break;
5255 }
5256 if (source < source_end)
5257 src += source_byteidx[source - source_charbuf];
5258 else
5259 src = p;
5260 consumed_chars += source - source_charbuf;
5261
5262 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
5263 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
5264 break;
5265 }
5266
5267 switch (ccl.status)
5268 {
5269 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5270 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5271 break;
5272 case CCL_STAT_SUSPEND_BY_DST:
5273 break;
5274 case CCL_STAT_QUIT:
5275 case CCL_STAT_INVALID_CMD:
065e3595 5276 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5277 break;
5278 default:
065e3595 5279 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5280 break;
5281 }
5282 coding->consumed_char += consumed_chars;
5283 coding->consumed = src - coding->source;
5284 coding->charbuf_used = charbuf - coding->charbuf;
5285}
5286
5287static int
5288encode_coding_ccl (coding)
5289 struct coding_system *coding;
5290{
5291 struct ccl_program ccl;
5292 int multibytep = coding->dst_multibyte;
5293 int *charbuf = coding->charbuf;
5294 int *charbuf_end = charbuf + coding->charbuf_used;
5295 unsigned char *dst = coding->destination + coding->produced;
5296 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
5297 int destination_charbuf[1024];
5298 int i, produced_chars = 0;
24a73b0a 5299 Lisp_Object attrs, charset_list;
df7492f9 5300
24a73b0a 5301 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
5302 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5303
5304 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5305 ccl.dst_multibyte = coding->dst_multibyte;
5306
8cffd3e7 5307 while (charbuf < charbuf_end)
df7492f9 5308 {
df7492f9 5309 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 5310 charbuf_end - charbuf, 1024, charset_list);
df7492f9 5311 if (multibytep)
8cffd3e7
KH
5312 {
5313 ASSURE_DESTINATION (ccl.produced * 2);
5314 for (i = 0; i < ccl.produced; i++)
5315 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5316 }
df7492f9
KH
5317 else
5318 {
8cffd3e7 5319 ASSURE_DESTINATION (ccl.produced);
3ed051d4 5320 for (i = 0; i < ccl.produced; i++)
df7492f9
KH
5321 *dst++ = destination_charbuf[i] & 0xFF;
5322 produced_chars += ccl.produced;
5323 }
8cffd3e7
KH
5324 charbuf += ccl.consumed;
5325 if (ccl.status == CCL_STAT_QUIT
5326 || ccl.status == CCL_STAT_INVALID_CMD)
5327 break;
df7492f9
KH
5328 }
5329
5330 switch (ccl.status)
5331 {
5332 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 5333 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
5334 break;
5335 case CCL_STAT_SUSPEND_BY_DST:
065e3595 5336 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
5337 break;
5338 case CCL_STAT_QUIT:
5339 case CCL_STAT_INVALID_CMD:
065e3595 5340 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
5341 break;
5342 default:
065e3595 5343 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5344 break;
1397dc18 5345 }
df7492f9
KH
5346
5347 coding->produced_char += produced_chars;
5348 coding->produced = dst - coding->destination;
5349 return 0;
1397dc18
KH
5350}
5351
df7492f9 5352
1397dc18 5353\f
df7492f9 5354/*** 10, 11. no-conversion handlers ***/
4ed46869 5355
b73bfc1c 5356/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 5357
b73bfc1c 5358static void
df7492f9 5359decode_coding_raw_text (coding)
4ed46869 5360 struct coding_system *coding;
4ed46869 5361{
0a9564cb
EZ
5362 int eol_crlf =
5363 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5364
df7492f9 5365 coding->chars_at_source = 1;
119852e7
KH
5366 coding->consumed_char = coding->src_chars;
5367 coding->consumed = coding->src_bytes;
5368 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5369 {
5370 coding->consumed_char--;
5371 coding->consumed--;
5372 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5373 }
5374 else
5375 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 5376}
4ed46869 5377
df7492f9
KH
5378static int
5379encode_coding_raw_text (coding)
5380 struct coding_system *coding;
5381{
5382 int multibytep = coding->dst_multibyte;
5383 int *charbuf = coding->charbuf;
5384 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5385 unsigned char *dst = coding->destination + coding->produced;
5386 unsigned char *dst_end = coding->destination + coding->dst_bytes;
a0ed9b27 5387 int produced_chars = 0;
b73bfc1c
KH
5388 int c;
5389
df7492f9 5390 if (multibytep)
b73bfc1c 5391 {
df7492f9 5392 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5393
df7492f9
KH
5394 if (coding->src_multibyte)
5395 while (charbuf < charbuf_end)
5396 {
5397 ASSURE_DESTINATION (safe_room);
5398 c = *charbuf++;
5399 if (ASCII_CHAR_P (c))
5400 EMIT_ONE_ASCII_BYTE (c);
5401 else if (CHAR_BYTE8_P (c))
5402 {
5403 c = CHAR_TO_BYTE8 (c);
5404 EMIT_ONE_BYTE (c);
5405 }
5406 else
5407 {
5408 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5409
df7492f9
KH
5410 CHAR_STRING_ADVANCE (c, p1);
5411 while (p0 < p1)
9d123124
KH
5412 {
5413 EMIT_ONE_BYTE (*p0);
5414 p0++;
5415 }
df7492f9
KH
5416 }
5417 }
b73bfc1c 5418 else
df7492f9
KH
5419 while (charbuf < charbuf_end)
5420 {
5421 ASSURE_DESTINATION (safe_room);
5422 c = *charbuf++;
5423 EMIT_ONE_BYTE (c);
5424 }
5425 }
5426 else
4ed46869 5427 {
df7492f9 5428 if (coding->src_multibyte)
d46c5b12 5429 {
df7492f9
KH
5430 int safe_room = MAX_MULTIBYTE_LENGTH;
5431
5432 while (charbuf < charbuf_end)
d46c5b12 5433 {
df7492f9
KH
5434 ASSURE_DESTINATION (safe_room);
5435 c = *charbuf++;
5436 if (ASCII_CHAR_P (c))
5437 *dst++ = c;
5438 else if (CHAR_BYTE8_P (c))
5439 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5440 else
df7492f9 5441 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5442 }
5443 }
df7492f9
KH
5444 else
5445 {
5446 ASSURE_DESTINATION (charbuf_end - charbuf);
5447 while (charbuf < charbuf_end && dst < dst_end)
5448 *dst++ = *charbuf++;
8f924df7 5449 }
319a3947 5450 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5451 }
065e3595 5452 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5453 coding->produced_char += produced_chars;
df7492f9
KH
5454 coding->produced = dst - coding->destination;
5455 return 0;
4ed46869
KH
5456}
5457
ff0dacd7
KH
5458/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5459 Check if a text is encoded in a charset-based coding system. If it
5460 is, return 1, else return 0. */
5461
0a28aafb 5462static int
ff0dacd7 5463detect_coding_charset (coding, detect_info)
df7492f9 5464 struct coding_system *coding;
ff0dacd7 5465 struct coding_detection_info *detect_info;
1397dc18 5466{
065e3595 5467 const unsigned char *src = coding->source, *src_base;
8f924df7 5468 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5469 int multibytep = coding->src_multibyte;
5470 int consumed_chars = 0;
07295713 5471 Lisp_Object attrs, valids, name;
584948ac 5472 int found = 0;
716b3fa0 5473 int head_ascii = coding->head_ascii;
07295713 5474 int check_latin_extra = 0;
1397dc18 5475
ff0dacd7
KH
5476 detect_info->checked |= CATEGORY_MASK_CHARSET;
5477
df7492f9
KH
5478 coding = &coding_categories[coding_category_charset];
5479 attrs = CODING_ID_ATTRS (coding->id);
5480 valids = AREF (attrs, coding_attr_charset_valids);
07295713 5481 name = CODING_ID_NAME (coding->id);
237aabf4
JR
5482 if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5483 "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5484 || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5485 "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
07295713 5486 check_latin_extra = 1;
237aabf4 5487
df7492f9 5488 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5489 src += head_ascii;
1397dc18 5490
b73bfc1c 5491 while (1)
1397dc18 5492 {
df7492f9 5493 int c;
716b3fa0
KH
5494 Lisp_Object val;
5495 struct charset *charset;
5496 int dim, idx;
1397dc18 5497
065e3595 5498 src_base = src;
df7492f9 5499 ONE_MORE_BYTE (c);
065e3595
KH
5500 if (c < 0)
5501 continue;
716b3fa0
KH
5502 val = AREF (valids, c);
5503 if (NILP (val))
df7492f9 5504 break;
584948ac 5505 if (c >= 0x80)
07295713
KH
5506 {
5507 if (c < 0xA0
237aabf4
JR
5508 && check_latin_extra
5509 && (!VECTORP (Vlatin_extra_code_table)
9f0526cb 5510 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
07295713
KH
5511 break;
5512 found = CATEGORY_MASK_CHARSET;
5513 }
716b3fa0
KH
5514 if (INTEGERP (val))
5515 {
5516 charset = CHARSET_FROM_ID (XFASTINT (val));
5517 dim = CHARSET_DIMENSION (charset);
5518 for (idx = 1; idx < dim; idx++)
5519 {
5520 if (src == src_end)
5521 goto too_short;
5522 ONE_MORE_BYTE (c);
3ed051d4 5523 if (c < charset->code_space[(dim - 1 - idx) * 2]
716b3fa0
KH
5524 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5525 break;
5526 }
5527 if (idx < dim)
5528 break;
5529 }
5530 else
5531 {
5532 idx = 1;
5533 for (; CONSP (val); val = XCDR (val))
5534 {
5535 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5536 dim = CHARSET_DIMENSION (charset);
5537 while (idx < dim)
5538 {
5539 if (src == src_end)
5540 goto too_short;
5541 ONE_MORE_BYTE (c);
5542 if (c < charset->code_space[(dim - 1 - idx) * 4]
5543 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5544 break;
5545 idx++;
5546 }
5547 if (idx == dim)
5548 {
5549 val = Qnil;
5550 break;
5551 }
5552 }
5553 if (CONSP (val))
5554 break;
5555 }
df7492f9 5556 }
716b3fa0 5557 too_short:
ff0dacd7 5558 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5559 return 0;
4ed46869 5560
df7492f9 5561 no_more_source:
ff0dacd7
KH
5562 detect_info->found |= found;
5563 return 1;
df7492f9 5564}
b73bfc1c 5565
b73bfc1c 5566static void
df7492f9 5567decode_coding_charset (coding)
4ed46869 5568 struct coding_system *coding;
4ed46869 5569{
8f924df7
KH
5570 const unsigned char *src = coding->source + coding->consumed;
5571 const unsigned char *src_end = coding->source + coding->src_bytes;
5572 const unsigned char *src_base;
69a80ea3 5573 int *charbuf = coding->charbuf + coding->charbuf_used;
df80c7f0
KH
5574 /* We may produce one charset annocation in one loop and one more at
5575 the end. */
69a80ea3 5576 int *charbuf_end
df80c7f0 5577 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
df7492f9
KH
5578 int consumed_chars = 0, consumed_chars_base;
5579 int multibytep = coding->src_multibyte;
24a73b0a 5580 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5581 int char_offset = coding->produced_char;
5582 int last_offset = char_offset;
5583 int last_id = charset_ascii;
0a9564cb
EZ
5584 int eol_crlf =
5585 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
119852e7 5586 int byte_after_cr = -1;
df7492f9 5587
24a73b0a 5588 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5589 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5590
df7492f9 5591 while (1)
4ed46869 5592 {
4eb6d3f1 5593 int c;
24a73b0a
KH
5594 Lisp_Object val;
5595 struct charset *charset;
5596 int dim;
5597 int len = 1;
5598 unsigned code;
df7492f9
KH
5599
5600 src_base = src;
5601 consumed_chars_base = consumed_chars;
b73bfc1c 5602
df7492f9 5603 if (charbuf >= charbuf_end)
b71f6f73
KH
5604 {
5605 if (byte_after_cr >= 0)
5606 src_base--;
5607 break;
5608 }
df7492f9 5609
119852e7
KH
5610 if (byte_after_cr >= 0)
5611 {
5612 c = byte_after_cr;
5613 byte_after_cr = -1;
5614 }
5615 else
5616 {
5617 ONE_MORE_BYTE (c);
5618 if (eol_crlf && c == '\r')
5619 ONE_MORE_BYTE (byte_after_cr);
5620 }
065e3595
KH
5621 if (c < 0)
5622 goto invalid_code;
24a73b0a
KH
5623 code = c;
5624
5625 val = AREF (valids, c);
1b17adfd 5626 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5627 goto invalid_code;
5628 if (INTEGERP (val))
d46c5b12 5629 {
24a73b0a
KH
5630 charset = CHARSET_FROM_ID (XFASTINT (val));
5631 dim = CHARSET_DIMENSION (charset);
5632 while (len < dim)
b73bfc1c 5633 {
24a73b0a
KH
5634 ONE_MORE_BYTE (c);
5635 code = (code << 8) | c;
5636 len++;
b73bfc1c 5637 }
24a73b0a
KH
5638 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5639 charset, code, c);
d46c5b12 5640 }
df7492f9 5641 else
d46c5b12 5642 {
24a73b0a
KH
5643 /* VAL is a list of charset IDs. It is assured that the
5644 list is sorted by charset dimensions (smaller one
5645 comes first). */
5646 while (CONSP (val))
4eb6d3f1 5647 {
24a73b0a 5648 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5649 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5650 while (len < dim)
4eb6d3f1 5651 {
acb2a965
KH
5652 ONE_MORE_BYTE (c);
5653 code = (code << 8) | c;
f9d71dcd 5654 len++;
4eb6d3f1 5655 }
24a73b0a
KH
5656 CODING_DECODE_CHAR (coding, src, src_base,
5657 src_end, charset, code, c);
5658 if (c >= 0)
5659 break;
5660 val = XCDR (val);
ff0dacd7 5661 }
d46c5b12 5662 }
24a73b0a
KH
5663 if (c < 0)
5664 goto invalid_code;
5665 if (charset->id != charset_ascii
5666 && last_id != charset->id)
5667 {
5668 if (last_id != charset_ascii)
69a80ea3 5669 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5670 last_id = charset->id;
5671 last_offset = char_offset;
5672 }
5673
df7492f9 5674 *charbuf++ = c;
ff0dacd7 5675 char_offset++;
df7492f9
KH
5676 continue;
5677
5678 invalid_code:
5679 src = src_base;
5680 consumed_chars = consumed_chars_base;
5681 ONE_MORE_BYTE (c);
065e3595 5682 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5683 char_offset++;
df7492f9 5684 coding->errors++;
4ed46869
KH
5685 }
5686
df7492f9 5687 no_more_source:
ff0dacd7 5688 if (last_id != charset_ascii)
69a80ea3 5689 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5690 coding->consumed_char += consumed_chars_base;
5691 coding->consumed = src_base - coding->source;
5692 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5693}
5694
df7492f9
KH
5695static int
5696encode_coding_charset (coding)
4ed46869 5697 struct coding_system *coding;
4ed46869 5698{
df7492f9
KH
5699 int multibytep = coding->dst_multibyte;
5700 int *charbuf = coding->charbuf;
5701 int *charbuf_end = charbuf + coding->charbuf_used;
5702 unsigned char *dst = coding->destination + coding->produced;
5703 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5704 int safe_room = MAX_MULTIBYTE_LENGTH;
5705 int produced_chars = 0;
24a73b0a 5706 Lisp_Object attrs, charset_list;
df7492f9 5707 int ascii_compatible;
b73bfc1c 5708 int c;
b73bfc1c 5709
24a73b0a 5710 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5711 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5712
df7492f9 5713 while (charbuf < charbuf_end)
4ed46869 5714 {
4eb6d3f1 5715 struct charset *charset;
df7492f9 5716 unsigned code;
8f924df7 5717
df7492f9
KH
5718 ASSURE_DESTINATION (safe_room);
5719 c = *charbuf++;
5720 if (ascii_compatible && ASCII_CHAR_P (c))
5721 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5722 else if (CHAR_BYTE8_P (c))
4ed46869 5723 {
16eafb5d
KH
5724 c = CHAR_TO_BYTE8 (c);
5725 EMIT_ONE_BYTE (c);
d46c5b12 5726 }
d46c5b12 5727 else
b73bfc1c 5728 {
4eb6d3f1
KH
5729 charset = char_charset (c, charset_list, &code);
5730 if (charset)
5731 {
5732 if (CHARSET_DIMENSION (charset) == 1)
5733 EMIT_ONE_BYTE (code);
5734 else if (CHARSET_DIMENSION (charset) == 2)
5735 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5736 else if (CHARSET_DIMENSION (charset) == 3)
5737 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5738 else
5739 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5740 (code >> 8) & 0xFF, code & 0xFF);
5741 }
5742 else
41cbe562
KH
5743 {
5744 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5745 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5746 else
5747 c = coding->default_char;
5748 EMIT_ONE_BYTE (c);
5749 }
4ed46869 5750 }
4ed46869
KH
5751 }
5752
065e3595 5753 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5754 coding->produced_char += produced_chars;
5755 coding->produced = dst - coding->destination;
5756 return 0;
4ed46869
KH
5757}
5758
5759\f
1397dc18 5760/*** 7. C library functions ***/
4ed46869 5761
df7492f9
KH
5762/* Setup coding context CODING from information about CODING_SYSTEM.
5763 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5764 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5765
ec6d2bb8 5766void
e0e989f6
KH
5767setup_coding_system (coding_system, coding)
5768 Lisp_Object coding_system;
4ed46869
KH
5769 struct coding_system *coding;
5770{
df7492f9
KH
5771 Lisp_Object attrs;
5772 Lisp_Object eol_type;
5773 Lisp_Object coding_type;
4608c386 5774 Lisp_Object val;
4ed46869 5775
df7492f9 5776 if (NILP (coding_system))
ae6f73fa 5777 coding_system = Qundecided;
c07c8e12 5778
df7492f9 5779 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5780
df7492f9 5781 attrs = CODING_ID_ATTRS (coding->id);
0a9564cb 5782 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5783
df7492f9
KH
5784 coding->mode = 0;
5785 coding->head_ascii = -1;
4a015c45
KH
5786 if (VECTORP (eol_type))
5787 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5788 | CODING_REQUIRE_DETECTION_MASK);
5789 else if (! EQ (eol_type, Qunix))
5790 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5791 | CODING_REQUIRE_ENCODING_MASK);
5792 else
5793 coding->common_flags = 0;
5e5c78be
KH
5794 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5795 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5796 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5797 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5798 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5799 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5800
df7492f9 5801 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5802 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5803 coding->safe_charsets = SDATA (val);
df7492f9 5804 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
624bda09 5805 coding->carryover_bytes = 0;
4608c386 5806
df7492f9
KH
5807 coding_type = CODING_ATTR_TYPE (attrs);
5808 if (EQ (coding_type, Qundecided))
d46c5b12 5809 {
df7492f9
KH
5810 coding->detector = NULL;
5811 coding->decoder = decode_coding_raw_text;
5812 coding->encoder = encode_coding_raw_text;
5813 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5814 }
df7492f9 5815 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5816 {
df7492f9
KH
5817 int i;
5818 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5819
5820 /* Invoke graphic register 0 to plane 0. */
5821 CODING_ISO_INVOCATION (coding, 0) = 0;
5822 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5823 CODING_ISO_INVOCATION (coding, 1)
5824 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5825 /* Setup the initial status of designation. */
5826 for (i = 0; i < 4; i++)
5827 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5828 /* Not single shifting initially. */
5829 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5830 /* Beginning of buffer should also be regarded as bol. */
5831 CODING_ISO_BOL (coding) = 1;
5832 coding->detector = detect_coding_iso_2022;
5833 coding->decoder = decode_coding_iso_2022;
5834 coding->encoder = encode_coding_iso_2022;
5835 if (flags & CODING_ISO_FLAG_SAFE)
5836 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5837 coding->common_flags
df7492f9
KH
5838 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5839 | CODING_REQUIRE_FLUSHING_MASK);
5840 if (flags & CODING_ISO_FLAG_COMPOSITION)
5841 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5842 if (flags & CODING_ISO_FLAG_DESIGNATION)
5843 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5844 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5845 {
5846 setup_iso_safe_charsets (attrs);
5847 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5848 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5849 coding->safe_charsets = SDATA (val);
df7492f9
KH
5850 }
5851 CODING_ISO_FLAGS (coding) = flags;
e951386e
KH
5852 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5853 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5854 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5855 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
d46c5b12 5856 }
df7492f9 5857 else if (EQ (coding_type, Qcharset))
d46c5b12 5858 {
df7492f9
KH
5859 coding->detector = detect_coding_charset;
5860 coding->decoder = decode_coding_charset;
5861 coding->encoder = encode_coding_charset;
d46c5b12 5862 coding->common_flags
df7492f9 5863 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5864 }
df7492f9 5865 else if (EQ (coding_type, Qutf_8))
d46c5b12 5866 {
a470d443
KH
5867 val = AREF (attrs, coding_attr_utf_bom);
5868 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5869 : EQ (val, Qt) ? utf_with_bom
5870 : utf_without_bom);
df7492f9
KH
5871 coding->detector = detect_coding_utf_8;
5872 coding->decoder = decode_coding_utf_8;
5873 coding->encoder = encode_coding_utf_8;
5874 coding->common_flags
5875 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5876 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5877 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5878 }
5879 else if (EQ (coding_type, Qutf_16))
5880 {
a470d443
KH
5881 val = AREF (attrs, coding_attr_utf_bom);
5882 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5883 : EQ (val, Qt) ? utf_with_bom
5884 : utf_without_bom);
df7492f9 5885 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5886 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5887 : utf_16_little_endian);
e19c3639 5888 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5889 coding->detector = detect_coding_utf_16;
5890 coding->decoder = decode_coding_utf_16;
5891 coding->encoder = encode_coding_utf_16;
5892 coding->common_flags
5893 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5894 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5895 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5896 }
df7492f9 5897 else if (EQ (coding_type, Qccl))
4ed46869 5898 {
df7492f9
KH
5899 coding->detector = detect_coding_ccl;
5900 coding->decoder = decode_coding_ccl;
5901 coding->encoder = encode_coding_ccl;
c952af22 5902 coding->common_flags
df7492f9
KH
5903 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5904 | CODING_REQUIRE_FLUSHING_MASK);
5905 }
5906 else if (EQ (coding_type, Qemacs_mule))
5907 {
5908 coding->detector = detect_coding_emacs_mule;
5909 coding->decoder = decode_coding_emacs_mule;
5910 coding->encoder = encode_coding_emacs_mule;
c952af22 5911 coding->common_flags
df7492f9 5912 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
e951386e 5913 coding->spec.emacs_mule.full_support = 1;
df7492f9
KH
5914 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5915 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5916 {
5917 Lisp_Object tail, safe_charsets;
5918 int max_charset_id = 0;
5919
5920 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5921 tail = XCDR (tail))
5922 if (max_charset_id < XFASTINT (XCAR (tail)))
5923 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5924 safe_charsets = make_uninit_string (max_charset_id + 1);
5925 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5926 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5927 tail = XCDR (tail))
8f924df7 5928 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5929 coding->max_charset_id = max_charset_id;
1b3b981b 5930 coding->safe_charsets = SDATA (safe_charsets);
e951386e 5931 coding->spec.emacs_mule.full_support = 1;
df7492f9 5932 }
e951386e
KH
5933 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5934 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
df7492f9
KH
5935 }
5936 else if (EQ (coding_type, Qshift_jis))
5937 {
5938 coding->detector = detect_coding_sjis;
5939 coding->decoder = decode_coding_sjis;
5940 coding->encoder = encode_coding_sjis;
c952af22 5941 coding->common_flags
df7492f9
KH
5942 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5943 }
5944 else if (EQ (coding_type, Qbig5))
5945 {
5946 coding->detector = detect_coding_big5;
5947 coding->decoder = decode_coding_big5;
5948 coding->encoder = encode_coding_big5;
c952af22 5949 coding->common_flags
df7492f9
KH
5950 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5951 }
5952 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5953 {
df7492f9
KH
5954 coding->detector = NULL;
5955 coding->decoder = decode_coding_raw_text;
5956 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5957 if (! EQ (eol_type, Qunix))
5958 {
5959 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5960 if (! VECTORP (eol_type))
5961 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5962 }
5963
4ed46869 5964 }
4ed46869 5965
df7492f9 5966 return;
4ed46869
KH
5967}
5968
0ff61e78
KH
5969/* Return a list of charsets supported by CODING. */
5970
5971Lisp_Object
5972coding_charset_list (coding)
5973 struct coding_system *coding;
5974{
35befdaa 5975 Lisp_Object attrs, charset_list;
0ff61e78
KH
5976
5977 CODING_GET_INFO (coding, attrs, charset_list);
5978 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5979 {
5980 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5981
5982 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5983 charset_list = Viso_2022_charset_list;
5984 }
5985 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5986 {
5987 charset_list = Vemacs_mule_charset_list;
5988 }
5989 return charset_list;
5990}
5991
5992
e9f91ece
KH
5993/* Return a list of charsets supported by CODING-SYSTEM. */
5994
5995Lisp_Object
5996coding_system_charset_list (coding_system)
5997 Lisp_Object coding_system;
5998{
5999 int id;
6000 Lisp_Object attrs, charset_list;
6001
6002 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6003 attrs = CODING_ID_ATTRS (id);
6004
6005 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6006 {
6007 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6008
6009 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6010 charset_list = Viso_2022_charset_list;
6011 else
6012 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6013 }
6014 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6015 {
6016 charset_list = Vemacs_mule_charset_list;
6017 }
6018 else
6019 {
6020 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6021 }
6022 return charset_list;
6023}
6024
6025
df7492f9
KH
6026/* Return raw-text or one of its subsidiaries that has the same
6027 eol_type as CODING-SYSTEM. */
ec6d2bb8 6028
df7492f9
KH
6029Lisp_Object
6030raw_text_coding_system (coding_system)
6031 Lisp_Object coding_system;
ec6d2bb8 6032{
0be8721c 6033 Lisp_Object spec, attrs;
df7492f9 6034 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 6035
d3e4cb56
KH
6036 if (NILP (coding_system))
6037 return Qraw_text;
df7492f9
KH
6038 spec = CODING_SYSTEM_SPEC (coding_system);
6039 attrs = AREF (spec, 0);
ec6d2bb8 6040
df7492f9
KH
6041 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6042 return coding_system;
ec6d2bb8 6043
df7492f9
KH
6044 eol_type = AREF (spec, 2);
6045 if (VECTORP (eol_type))
6046 return Qraw_text;
6047 spec = CODING_SYSTEM_SPEC (Qraw_text);
6048 raw_text_eol_type = AREF (spec, 2);
6049 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6050 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6051 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
6052}
6053
54f78171 6054
df7492f9
KH
6055/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6056 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
6057 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
6058 inherit end-of-line format from the system's setting
6059 (system_eol_type). */
df7492f9
KH
6060
6061Lisp_Object
6062coding_inherit_eol_type (coding_system, parent)
b74e4686 6063 Lisp_Object coding_system, parent;
54f78171 6064{
3e139625 6065 Lisp_Object spec, eol_type;
54f78171 6066
d3e4cb56
KH
6067 if (NILP (coding_system))
6068 coding_system = Qraw_text;
df7492f9 6069 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 6070 eol_type = AREF (spec, 2);
fcbcfb64 6071 if (VECTORP (eol_type))
df7492f9 6072 {
df7492f9
KH
6073 Lisp_Object parent_eol_type;
6074
fcbcfb64
KH
6075 if (! NILP (parent))
6076 {
6077 Lisp_Object parent_spec;
6078
4a015c45 6079 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
6080 parent_eol_type = AREF (parent_spec, 2);
6081 }
6082 else
6083 parent_eol_type = system_eol_type;
df7492f9
KH
6084 if (EQ (parent_eol_type, Qunix))
6085 coding_system = AREF (eol_type, 0);
6086 else if (EQ (parent_eol_type, Qdos))
6087 coding_system = AREF (eol_type, 1);
6088 else if (EQ (parent_eol_type, Qmac))
6089 coding_system = AREF (eol_type, 2);
54f78171 6090 }
df7492f9 6091 return coding_system;
54f78171
KH
6092}
6093
4ed46869
KH
6094/* Emacs has a mechanism to automatically detect a coding system if it
6095 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
6096 it's impossible to distinguish some coding systems accurately
6097 because they use the same range of codes. So, at first, coding
6098 systems are categorized into 7, those are:
6099
0ef69138 6100 o coding-category-emacs-mule
4ed46869
KH
6101
6102 The category for a coding system which has the same code range
6103 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 6104 symbol) `emacs-mule' by default.
4ed46869
KH
6105
6106 o coding-category-sjis
6107
6108 The category for a coding system which has the same code range
6109 as SJIS. Assigned the coding-system (Lisp
7717c392 6110 symbol) `japanese-shift-jis' by default.
4ed46869
KH
6111
6112 o coding-category-iso-7
6113
6114 The category for a coding system which has the same code range
7717c392 6115 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
6116 shift and single shift functions. This can encode/decode all
6117 charsets. Assigned the coding-system (Lisp symbol)
6118 `iso-2022-7bit' by default.
6119
6120 o coding-category-iso-7-tight
6121
6122 Same as coding-category-iso-7 except that this can
6123 encode/decode only the specified charsets.
4ed46869
KH
6124
6125 o coding-category-iso-8-1
6126
6127 The category for a coding system which has the same code range
6128 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6129 for DIMENSION1 charset. This doesn't use any locking shift
6130 and single shift functions. Assigned the coding-system (Lisp
6131 symbol) `iso-latin-1' by default.
4ed46869
KH
6132
6133 o coding-category-iso-8-2
6134
6135 The category for a coding system which has the same code range
6136 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
6137 for DIMENSION2 charset. This doesn't use any locking shift
6138 and single shift functions. Assigned the coding-system (Lisp
6139 symbol) `japanese-iso-8bit' by default.
4ed46869 6140
7717c392 6141 o coding-category-iso-7-else
4ed46869
KH
6142
6143 The category for a coding system which has the same code range
df7492f9 6144 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
6145 single shift functions. Assigned the coding-system (Lisp
6146 symbol) `iso-2022-7bit-lock' by default.
6147
6148 o coding-category-iso-8-else
6149
6150 The category for a coding system which has the same code range
df7492f9 6151 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
6152 single shift functions. Assigned the coding-system (Lisp
6153 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
6154
6155 o coding-category-big5
6156
6157 The category for a coding system which has the same code range
6158 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 6159 `cn-big5' by default.
4ed46869 6160
fa42c37f
KH
6161 o coding-category-utf-8
6162
6163 The category for a coding system which has the same code range
6e76ae91 6164 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
6165 symbol) `utf-8' by default.
6166
6167 o coding-category-utf-16-be
6168
6169 The category for a coding system in which a text has an
6170 Unicode signature (cf. Unicode Standard) in the order of BIG
6171 endian at the head. Assigned the coding-system (Lisp symbol)
6172 `utf-16-be' by default.
6173
6174 o coding-category-utf-16-le
6175
6176 The category for a coding system in which a text has an
6177 Unicode signature (cf. Unicode Standard) in the order of
6178 LITTLE endian at the head. Assigned the coding-system (Lisp
6179 symbol) `utf-16-le' by default.
6180
1397dc18
KH
6181 o coding-category-ccl
6182
6183 The category for a coding system of which encoder/decoder is
6184 written in CCL programs. The default value is nil, i.e., no
6185 coding system is assigned.
6186
4ed46869
KH
6187 o coding-category-binary
6188
6189 The category for a coding system not categorized in any of the
6190 above. Assigned the coding-system (Lisp symbol)
e0e989f6 6191 `no-conversion' by default.
4ed46869
KH
6192
6193 Each of them is a Lisp symbol and the value is an actual
df7492f9 6194 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
6195 What Emacs does actually is to detect a category of coding system.
6196 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 6197 decide only one possible category, it selects a category of the
4ed46869
KH
6198 highest priority. Priorities of categories are also specified by a
6199 user in a Lisp variable `coding-category-list'.
6200
6201*/
6202
df7492f9
KH
6203#define EOL_SEEN_NONE 0
6204#define EOL_SEEN_LF 1
6205#define EOL_SEEN_CR 2
6206#define EOL_SEEN_CRLF 4
66cfb530 6207
ff0dacd7
KH
6208/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6209 SOURCE is encoded. If CATEGORY is one of
6210 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6211 two-byte, else they are encoded by one-byte.
6212
6213 Return one of EOL_SEEN_XXX. */
4ed46869 6214
bc4bc72a 6215#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
6216
6217static int
89528eb3 6218detect_eol (source, src_bytes, category)
f6cbaf43 6219 const unsigned char *source;
df7492f9 6220 EMACS_INT src_bytes;
89528eb3 6221 enum coding_category category;
4ed46869 6222{
f6cbaf43 6223 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 6224 unsigned char c;
df7492f9
KH
6225 int total = 0;
6226 int eol_seen = EOL_SEEN_NONE;
4ed46869 6227
89528eb3 6228 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 6229 {
df7492f9 6230 int msb, lsb;
fa42c37f 6231
89528eb3
KH
6232 msb = category == (coding_category_utf_16_le
6233 | coding_category_utf_16_le_nosig);
df7492f9 6234 lsb = 1 - msb;
fa42c37f 6235
df7492f9 6236 while (src + 1 < src_end)
fa42c37f 6237 {
df7492f9
KH
6238 c = src[lsb];
6239 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 6240 {
df7492f9
KH
6241 int this_eol;
6242
6243 if (c == '\n')
6244 this_eol = EOL_SEEN_LF;
6245 else if (src + 3 >= src_end
6246 || src[msb + 2] != 0
6247 || src[lsb + 2] != '\n')
6248 this_eol = EOL_SEEN_CR;
fa42c37f 6249 else
75f4f1ac
EZ
6250 {
6251 this_eol = EOL_SEEN_CRLF;
6252 src += 2;
6253 }
df7492f9
KH
6254
6255 if (eol_seen == EOL_SEEN_NONE)
6256 /* This is the first end-of-line. */
6257 eol_seen = this_eol;
6258 else if (eol_seen != this_eol)
fa42c37f 6259 {
75f4f1ac
EZ
6260 /* The found type is different from what found before.
6261 Allow for stray ^M characters in DOS EOL files. */
6262 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6263 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6264 eol_seen = EOL_SEEN_CRLF;
6265 else
6266 {
6267 eol_seen = EOL_SEEN_LF;
6268 break;
6269 }
fa42c37f 6270 }
df7492f9
KH
6271 if (++total == MAX_EOL_CHECK_COUNT)
6272 break;
fa42c37f 6273 }
df7492f9 6274 src += 2;
fa42c37f 6275 }
bcf26d6a 6276 }
d46c5b12 6277 else
c4825358 6278 {
df7492f9 6279 while (src < src_end)
27901516 6280 {
df7492f9
KH
6281 c = *src++;
6282 if (c == '\n' || c == '\r')
6283 {
6284 int this_eol;
d46c5b12 6285
df7492f9
KH
6286 if (c == '\n')
6287 this_eol = EOL_SEEN_LF;
6288 else if (src >= src_end || *src != '\n')
6289 this_eol = EOL_SEEN_CR;
6290 else
6291 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 6292
df7492f9
KH
6293 if (eol_seen == EOL_SEEN_NONE)
6294 /* This is the first end-of-line. */
6295 eol_seen = this_eol;
6296 else if (eol_seen != this_eol)
6297 {
75f4f1ac
EZ
6298 /* The found type is different from what found before.
6299 Allow for stray ^M characters in DOS EOL files. */
6300 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6301 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6302 eol_seen = EOL_SEEN_CRLF;
6303 else
6304 {
6305 eol_seen = EOL_SEEN_LF;
6306 break;
6307 }
df7492f9
KH
6308 }
6309 if (++total == MAX_EOL_CHECK_COUNT)
6310 break;
6311 }
6312 }
73be902c 6313 }
df7492f9 6314 return eol_seen;
73be902c
KH
6315}
6316
df7492f9 6317
24a73b0a 6318static Lisp_Object
df7492f9
KH
6319adjust_coding_eol_type (coding, eol_seen)
6320 struct coding_system *coding;
6321 int eol_seen;
73be902c 6322{
0be8721c 6323 Lisp_Object eol_type;
8f924df7 6324
df7492f9
KH
6325 eol_type = CODING_ID_EOL_TYPE (coding->id);
6326 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
6327 {
6328 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6329 eol_type = Qunix;
6330 }
6f197c07 6331 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
6332 {
6333 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6334 eol_type = Qdos;
6335 }
6f197c07 6336 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
6337 {
6338 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6339 eol_type = Qmac;
6340 }
6341 return eol_type;
d46c5b12 6342}
4ed46869 6343
df7492f9
KH
6344/* Detect how a text specified in CODING is encoded. If a coding
6345 system is detected, update fields of CODING by the detected coding
6346 system. */
0a28aafb 6347
df7492f9
KH
6348void
6349detect_coding (coding)
d46c5b12 6350 struct coding_system *coding;
d46c5b12 6351{
8f924df7 6352 const unsigned char *src, *src_end;
73cce38d 6353 int saved_mode = coding->mode;
d46c5b12 6354
df7492f9
KH
6355 coding->consumed = coding->consumed_char = 0;
6356 coding->produced = coding->produced_char = 0;
6357 coding_set_source (coding);
1c3478b0 6358
df7492f9 6359 src_end = coding->source + coding->src_bytes;
c0e16b14 6360 coding->head_ascii = 0;
1c3478b0 6361
df7492f9
KH
6362 /* If we have not yet decided the text encoding type, detect it
6363 now. */
6364 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 6365 {
df7492f9 6366 int c, i;
6cb21a4f 6367 struct coding_detection_info detect_info;
2f3cbb32 6368 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 6369
6cb21a4f 6370 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 6371 for (src = coding->source; src < src_end; src++)
d46c5b12 6372 {
df7492f9 6373 c = *src;
6cb21a4f 6374 if (c & 0x80)
6cb21a4f 6375 {
2f3cbb32 6376 eight_bit_found = 1;
2f3cbb32
KH
6377 if (null_byte_found)
6378 break;
6379 }
6380 else if (c < 0x20)
6381 {
6382 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6383 && ! inhibit_iso_escape_detection
6384 && ! detect_info.checked)
6cb21a4f 6385 {
2f3cbb32
KH
6386 if (detect_coding_iso_2022 (coding, &detect_info))
6387 {
6388 /* We have scanned the whole data. */
6389 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
6390 {
6391 /* We didn't find an 8-bit code. We may
6392 have found a null-byte, but it's very
6393 rare that a binary file confirm to
6394 ISO-2022. */
6395 src = src_end;
6396 coding->head_ascii = src - coding->source;
6397 }
6398 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6399 break;
6400 }
6401 }
97b1b294 6402 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6403 {
6404 null_byte_found = 1;
6405 if (eight_bit_found)
6406 break;
6cb21a4f 6407 }
c006c0c8
KH
6408 if (! eight_bit_found)
6409 coding->head_ascii++;
6cb21a4f 6410 }
c006c0c8 6411 else if (! eight_bit_found)
c0e16b14 6412 coding->head_ascii++;
d46c5b12 6413 }
df7492f9 6414
2f3cbb32
KH
6415 if (null_byte_found || eight_bit_found
6416 || coding->head_ascii < coding->src_bytes
6cb21a4f 6417 || detect_info.found)
d46c5b12 6418 {
ff0dacd7
KH
6419 enum coding_category category;
6420 struct coding_system *this;
df7492f9 6421
6cb21a4f
KH
6422 if (coding->head_ascii == coding->src_bytes)
6423 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6424 for (i = 0; i < coding_category_raw_text; i++)
6425 {
6426 category = coding_priorities[i];
6427 this = coding_categories + category;
6428 if (detect_info.found & (1 << category))
24a73b0a 6429 break;
6cb21a4f
KH
6430 }
6431 else
2f3cbb32
KH
6432 {
6433 if (null_byte_found)
ff0dacd7 6434 {
2f3cbb32
KH
6435 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6436 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6437 }
2f3cbb32
KH
6438 for (i = 0; i < coding_category_raw_text; i++)
6439 {
6440 category = coding_priorities[i];
6441 this = coding_categories + category;
6442 if (this->id < 0)
6443 {
6444 /* No coding system of this category is defined. */
6445 detect_info.rejected |= (1 << category);
6446 }
6447 else if (category >= coding_category_raw_text)
6448 continue;
6449 else if (detect_info.checked & (1 << category))
6450 {
6451 if (detect_info.found & (1 << category))
6452 break;
6453 }
6454 else if ((*(this->detector)) (coding, &detect_info)
6455 && detect_info.found & (1 << category))
6456 {
6457 if (category == coding_category_utf_16_auto)
6458 {
6459 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6460 category = coding_category_utf_16_le;
6461 else
6462 category = coding_category_utf_16_be;
6463 }
6464 break;
6465 }
6466 }
2f3cbb32 6467 }
c0e16b14
KH
6468
6469 if (i < coding_category_raw_text)
6470 setup_coding_system (CODING_ID_NAME (this->id), coding);
6471 else if (null_byte_found)
6472 setup_coding_system (Qno_conversion, coding);
6473 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6474 == CATEGORY_MASK_ANY)
6475 setup_coding_system (Qraw_text, coding);
6476 else if (detect_info.rejected)
6477 for (i = 0; i < coding_category_raw_text; i++)
6478 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6479 {
6480 this = coding_categories + coding_priorities[i];
6481 setup_coding_system (CODING_ID_NAME (this->id), coding);
6482 break;
6483 }
d46c5b12 6484 }
b73bfc1c 6485 }
a470d443
KH
6486 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6487 == coding_category_utf_8_auto)
6488 {
6489 Lisp_Object coding_systems;
6490 struct coding_detection_info detect_info;
6491
6492 coding_systems
6493 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6494 detect_info.found = detect_info.rejected = 0;
6495 coding->head_ascii = 0;
6496 if (CONSP (coding_systems)
6497 && detect_coding_utf_8 (coding, &detect_info))
6498 {
6499 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6500 setup_coding_system (XCAR (coding_systems), coding);
6501 else
6502 setup_coding_system (XCDR (coding_systems), coding);
6503 }
6504 }
24a73b0a
KH
6505 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6506 == coding_category_utf_16_auto)
b49a1807
KH
6507 {
6508 Lisp_Object coding_systems;
6509 struct coding_detection_info detect_info;
6510
6511 coding_systems
a470d443 6512 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6513 detect_info.found = detect_info.rejected = 0;
a470d443 6514 coding->head_ascii = 0;
b49a1807 6515 if (CONSP (coding_systems)
24a73b0a 6516 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6517 {
6518 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6519 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6520 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6521 setup_coding_system (XCDR (coding_systems), coding);
6522 }
6523 }
73cce38d 6524 coding->mode = saved_mode;
4ed46869 6525}
4ed46869 6526
d46c5b12 6527
aaaf0b1e 6528static void
df7492f9 6529decode_eol (coding)
aaaf0b1e 6530 struct coding_system *coding;
aaaf0b1e 6531{
24a73b0a
KH
6532 Lisp_Object eol_type;
6533 unsigned char *p, *pbeg, *pend;
3ed051d4 6534
24a73b0a 6535 eol_type = CODING_ID_EOL_TYPE (coding->id);
0a9564cb 6536 if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
24a73b0a
KH
6537 return;
6538
6539 if (NILP (coding->dst_object))
6540 pbeg = coding->destination;
6541 else
6542 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6543 pend = pbeg + coding->produced;
6544
6545 if (VECTORP (eol_type))
aaaf0b1e 6546 {
df7492f9 6547 int eol_seen = EOL_SEEN_NONE;
4ed46869 6548
24a73b0a 6549 for (p = pbeg; p < pend; p++)
aaaf0b1e 6550 {
df7492f9
KH
6551 if (*p == '\n')
6552 eol_seen |= EOL_SEEN_LF;
6553 else if (*p == '\r')
aaaf0b1e 6554 {
df7492f9 6555 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6556 {
df7492f9
KH
6557 eol_seen |= EOL_SEEN_CRLF;
6558 p++;
aaaf0b1e 6559 }
aaaf0b1e 6560 else
df7492f9 6561 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6562 }
aaaf0b1e 6563 }
75f4f1ac
EZ
6564 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6565 if ((eol_seen & EOL_SEEN_CRLF) != 0
6566 && (eol_seen & EOL_SEEN_CR) != 0
6567 && (eol_seen & EOL_SEEN_LF) == 0)
6568 eol_seen = EOL_SEEN_CRLF;
6569 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6570 && eol_seen != EOL_SEEN_LF
6571 && eol_seen != EOL_SEEN_CRLF
6572 && eol_seen != EOL_SEEN_CR)
6573 eol_seen = EOL_SEEN_LF;
df7492f9 6574 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6575 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6576 }
d46c5b12 6577
24a73b0a 6578 if (EQ (eol_type, Qmac))
27901516 6579 {
24a73b0a 6580 for (p = pbeg; p < pend; p++)
df7492f9
KH
6581 if (*p == '\r')
6582 *p = '\n';
4ed46869 6583 }
24a73b0a 6584 else if (EQ (eol_type, Qdos))
df7492f9 6585 {
24a73b0a 6586 int n = 0;
b73bfc1c 6587
24a73b0a
KH
6588 if (NILP (coding->dst_object))
6589 {
4347441b
KH
6590 /* Start deleting '\r' from the tail to minimize the memory
6591 movement. */
24a73b0a
KH
6592 for (p = pend - 2; p >= pbeg; p--)
6593 if (*p == '\r')
6594 {
6595 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6596 n++;
6597 }
6598 }
6599 else
6600 {
4347441b
KH
6601 int pos_byte = coding->dst_pos_byte;
6602 int pos = coding->dst_pos;
6603 int pos_end = pos + coding->produced_char - 1;
6604
6605 while (pos < pos_end)
6606 {
6607 p = BYTE_POS_ADDR (pos_byte);
6608 if (*p == '\r' && p[1] == '\n')
6609 {
6610 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6611 n++;
6612 pos_end--;
6613 }
6614 pos++;
69b8522d
KH
6615 if (coding->dst_multibyte)
6616 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6617 else
6618 pos_byte++;
4347441b 6619 }
24a73b0a
KH
6620 }
6621 coding->produced -= n;
6622 coding->produced_char -= n;
aaaf0b1e 6623 }
4ed46869
KH
6624}
6625
7d64c6ad 6626
a6f87d34
KH
6627/* Return a translation table (or list of them) from coding system
6628 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6629 decoding (ENCODEP is zero). */
7d64c6ad 6630
e6a54062 6631static Lisp_Object
09ee6fdd
KH
6632get_translation_table (attrs, encodep, max_lookup)
6633 Lisp_Object attrs;
6634 int encodep, *max_lookup;
7d64c6ad
KH
6635{
6636 Lisp_Object standard, translation_table;
09ee6fdd 6637 Lisp_Object val;
7d64c6ad 6638
4bed5909
CY
6639 if (NILP (Venable_character_translation))
6640 {
6641 if (max_lookup)
6642 *max_lookup = 0;
6643 return Qnil;
6644 }
7d64c6ad
KH
6645 if (encodep)
6646 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6647 standard = Vstandard_translation_table_for_encode;
6648 else
6649 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6650 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6651 if (NILP (translation_table))
09ee6fdd
KH
6652 translation_table = standard;
6653 else
a6f87d34 6654 {
09ee6fdd
KH
6655 if (SYMBOLP (translation_table))
6656 translation_table = Fget (translation_table, Qtranslation_table);
6657 else if (CONSP (translation_table))
6658 {
6659 translation_table = Fcopy_sequence (translation_table);
6660 for (val = translation_table; CONSP (val); val = XCDR (val))
6661 if (SYMBOLP (XCAR (val)))
6662 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6663 }
6664 if (CHAR_TABLE_P (standard))
6665 {
6666 if (CONSP (translation_table))
6667 translation_table = nconc2 (translation_table,
6668 Fcons (standard, Qnil));
6669 else
6670 translation_table = Fcons (translation_table,
6671 Fcons (standard, Qnil));
6672 }
a6f87d34 6673 }
2170c8f0
KH
6674
6675 if (max_lookup)
09ee6fdd 6676 {
2170c8f0
KH
6677 *max_lookup = 1;
6678 if (CHAR_TABLE_P (translation_table)
6679 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6680 {
6681 val = XCHAR_TABLE (translation_table)->extras[1];
6682 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6683 *max_lookup = XFASTINT (val);
6684 }
6685 else if (CONSP (translation_table))
6686 {
6687 Lisp_Object tail, val;
09ee6fdd 6688
2170c8f0
KH
6689 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6690 if (CHAR_TABLE_P (XCAR (tail))
6691 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6692 {
6693 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6694 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6695 *max_lookup = XFASTINT (val);
6696 }
6697 }
a6f87d34 6698 }
7d64c6ad
KH
6699 return translation_table;
6700}
6701
09ee6fdd
KH
6702#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6703 do { \
6704 trans = Qnil; \
6705 if (CHAR_TABLE_P (table)) \
6706 { \
6707 trans = CHAR_TABLE_REF (table, c); \
6708 if (CHARACTERP (trans)) \
6709 c = XFASTINT (trans), trans = Qnil; \
6710 } \
6711 else if (CONSP (table)) \
6712 { \
6713 Lisp_Object tail; \
6714 \
6715 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6716 if (CHAR_TABLE_P (XCAR (tail))) \
6717 { \
6718 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6719 if (CHARACTERP (trans)) \
6720 c = XFASTINT (trans), trans = Qnil; \
6721 else if (! NILP (trans)) \
6722 break; \
6723 } \
6724 } \
e6a54062
KH
6725 } while (0)
6726
7d64c6ad 6727
e951386e
KH
6728/* Return a translation of character(s) at BUF according to TRANS.
6729 TRANS is TO-CHAR or ((FROM . TO) ...) where
6730 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6731 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6732 translation is found, and Qnil if not found..
6733 If BUF is too short to lookup characters in FROM, return Qt. */
6734
69a80ea3 6735static Lisp_Object
e951386e
KH
6736get_translation (trans, buf, buf_end)
6737 Lisp_Object trans;
69a80ea3 6738 int *buf, *buf_end;
69a80ea3 6739{
e951386e
KH
6740
6741 if (INTEGERP (trans))
6742 return trans;
6743 for (; CONSP (trans); trans = XCDR (trans))
69a80ea3 6744 {
e951386e
KH
6745 Lisp_Object val = XCAR (trans);
6746 Lisp_Object from = XCAR (val);
6747 int len = ASIZE (from);
6748 int i;
69a80ea3 6749
e951386e 6750 for (i = 0; i < len; i++)
69a80ea3 6751 {
e951386e
KH
6752 if (buf + i == buf_end)
6753 return Qt;
6754 if (XINT (AREF (from, i)) != buf[i])
6755 break;
69a80ea3 6756 }
e951386e
KH
6757 if (i == len)
6758 return val;
69a80ea3 6759 }
e951386e 6760 return Qnil;
69a80ea3
KH
6761}
6762
6763
d46c5b12 6764static int
69a80ea3 6765produce_chars (coding, translation_table, last_block)
df7492f9 6766 struct coding_system *coding;
69a80ea3
KH
6767 Lisp_Object translation_table;
6768 int last_block;
4ed46869 6769{
df7492f9
KH
6770 unsigned char *dst = coding->destination + coding->produced;
6771 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6772 EMACS_INT produced;
6773 EMACS_INT produced_chars = 0;
69a80ea3 6774 int carryover = 0;
4ed46869 6775
df7492f9 6776 if (! coding->chars_at_source)
4ed46869 6777 {
119852e7 6778 /* Source characters are in coding->charbuf. */
fba4576f
AS
6779 int *buf = coding->charbuf;
6780 int *buf_end = buf + coding->charbuf_used;
4ed46869 6781
db274c7a
KH
6782 if (EQ (coding->src_object, coding->dst_object))
6783 {
6784 coding_set_source (coding);
6785 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6786 }
4ed46869 6787
df7492f9 6788 while (buf < buf_end)
4ed46869 6789 {
69a80ea3 6790 int c = *buf, i;
bc4bc72a 6791
df7492f9
KH
6792 if (c >= 0)
6793 {
69a80ea3
KH
6794 int from_nchars = 1, to_nchars = 1;
6795 Lisp_Object trans = Qnil;
6796
09ee6fdd 6797 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6798 if (! NILP (trans))
69a80ea3 6799 {
e951386e
KH
6800 trans = get_translation (trans, buf, buf_end);
6801 if (INTEGERP (trans))
6802 c = XINT (trans);
6803 else if (CONSP (trans))
6804 {
6805 from_nchars = ASIZE (XCAR (trans));
6806 trans = XCDR (trans);
6807 if (INTEGERP (trans))
6808 c = XINT (trans);
6809 else
6810 {
6811 to_nchars = ASIZE (trans);
6812 c = XINT (AREF (trans, 0));
6813 }
6814 }
6815 else if (EQ (trans, Qt) && ! last_block)
69a80ea3 6816 break;
69a80ea3
KH
6817 }
6818
6819 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6820 {
6821 dst = alloc_destination (coding,
6822 buf_end - buf
6823 + MAX_MULTIBYTE_LENGTH * to_nchars,
6824 dst);
db274c7a
KH
6825 if (EQ (coding->src_object, coding->dst_object))
6826 {
6827 coding_set_source (coding);
e951386e
KH
6828 dst_end = (((unsigned char *) coding->source)
6829 + coding->consumed);
db274c7a
KH
6830 }
6831 else
6832 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6833 }
6834
433f7f87 6835 for (i = 0; i < to_nchars; i++)
69a80ea3 6836 {
433f7f87
KH
6837 if (i > 0)
6838 c = XINT (AREF (trans, i));
69a80ea3
KH
6839 if (coding->dst_multibyte
6840 || ! CHAR_BYTE8_P (c))
db274c7a 6841 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6842 else
6843 *dst++ = CHAR_TO_BYTE8 (c);
6844 }
6845 produced_chars += to_nchars;
e951386e 6846 buf += from_nchars;
d46c5b12 6847 }
df7492f9 6848 else
69a80ea3
KH
6849 /* This is an annotation datum. (-C) is the length. */
6850 buf += -c;
4ed46869 6851 }
69a80ea3 6852 carryover = buf_end - buf;
4ed46869 6853 }
fa42c37f 6854 else
fa42c37f 6855 {
119852e7 6856 /* Source characters are at coding->source. */
8f924df7 6857 const unsigned char *src = coding->source;
119852e7 6858 const unsigned char *src_end = src + coding->consumed;
4ed46869 6859
db274c7a
KH
6860 if (EQ (coding->dst_object, coding->src_object))
6861 dst_end = (unsigned char *) src;
df7492f9 6862 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6863 {
df7492f9 6864 if (coding->src_multibyte)
fa42c37f 6865 {
71c81426 6866 int multibytep = 1;
4533845d 6867 EMACS_INT consumed_chars = 0;
d46c5b12 6868
df7492f9
KH
6869 while (1)
6870 {
8f924df7 6871 const unsigned char *src_base = src;
df7492f9 6872 int c;
b73bfc1c 6873
df7492f9 6874 ONE_MORE_BYTE (c);
119852e7 6875 if (dst == dst_end)
df7492f9 6876 {
119852e7
KH
6877 if (EQ (coding->src_object, coding->dst_object))
6878 dst_end = (unsigned char *) src;
6879 if (dst == dst_end)
df7492f9 6880 {
119852e7
KH
6881 EMACS_INT offset = src - coding->source;
6882
6883 dst = alloc_destination (coding, src_end - src + 1,
6884 dst);
6885 dst_end = coding->destination + coding->dst_bytes;
6886 coding_set_source (coding);
6887 src = coding->source + offset;
6888 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6889 if (EQ (coding->src_object, coding->dst_object))
6890 dst_end = (unsigned char *) src;
df7492f9 6891 }
df7492f9
KH
6892 }
6893 *dst++ = c;
6894 produced_chars++;
6895 }
6896 no_more_source:
6897 ;
fa42c37f
KH
6898 }
6899 else
df7492f9
KH
6900 while (src < src_end)
6901 {
71c81426 6902 int multibytep = 1;
df7492f9 6903 int c = *src++;
b73bfc1c 6904
df7492f9
KH
6905 if (dst >= dst_end - 1)
6906 {
2c78b7e1 6907 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6908 dst_end = (unsigned char *) src;
2c78b7e1
KH
6909 if (dst >= dst_end - 1)
6910 {
119852e7 6911 EMACS_INT offset = src - coding->source;
db274c7a 6912 EMACS_INT more_bytes;
119852e7 6913
db274c7a
KH
6914 if (EQ (coding->src_object, coding->dst_object))
6915 more_bytes = ((src_end - src) / 2) + 2;
6916 else
6917 more_bytes = src_end - src + 2;
6918 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6919 dst_end = coding->destination + coding->dst_bytes;
6920 coding_set_source (coding);
119852e7 6921 src = coding->source + offset;
2c78b7e1 6922 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6923 if (EQ (coding->src_object, coding->dst_object))
6924 dst_end = (unsigned char *) src;
2c78b7e1 6925 }
df7492f9
KH
6926 }
6927 EMIT_ONE_BYTE (c);
6928 }
d46c5b12 6929 }
df7492f9
KH
6930 else
6931 {
6932 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6933 {
119852e7 6934 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6935
df7492f9 6936 if (require > 0)
fa42c37f 6937 {
df7492f9
KH
6938 EMACS_INT offset = src - coding->source;
6939
6940 dst = alloc_destination (coding, require, dst);
6941 coding_set_source (coding);
6942 src = coding->source + offset;
6943 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6944 }
6945 }
119852e7 6946 produced_chars = coding->consumed_char;
df7492f9 6947 while (src < src_end)
14daee73 6948 *dst++ = *src++;
fa42c37f
KH
6949 }
6950 }
6951
df7492f9 6952 produced = dst - (coding->destination + coding->produced);
284201e4 6953 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6954 insert_from_gap (produced_chars, produced);
6955 coding->produced += produced;
6956 coding->produced_char += produced_chars;
69a80ea3 6957 return carryover;
fa42c37f
KH
6958}
6959
ff0dacd7
KH
6960/* Compose text in CODING->object according to the annotation data at
6961 CHARBUF. CHARBUF is an array:
e951386e 6962 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
df7492f9 6963 */
4ed46869 6964
df7492f9 6965static INLINE void
69a80ea3 6966produce_composition (coding, charbuf, pos)
4ed46869 6967 struct coding_system *coding;
df7492f9 6968 int *charbuf;
69a80ea3 6969 EMACS_INT pos;
4ed46869 6970{
df7492f9 6971 int len;
69a80ea3 6972 EMACS_INT to;
df7492f9 6973 enum composition_method method;
df7492f9 6974 Lisp_Object components;
fa42c37f 6975
e951386e 6976 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
69a80ea3 6977 to = pos + charbuf[2];
e951386e 6978 method = (enum composition_method) (charbuf[4]);
d46c5b12 6979
df7492f9
KH
6980 if (method == COMPOSITION_RELATIVE)
6981 components = Qnil;
e951386e 6982 else
d46c5b12 6983 {
df7492f9 6984 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
e951386e 6985 int i, j;
b73bfc1c 6986
e951386e
KH
6987 if (method == COMPOSITION_WITH_RULE)
6988 len = charbuf[2] * 3 - 2;
6989 charbuf += MAX_ANNOTATION_LENGTH;
6990 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6991 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
9ffd559c 6992 {
e951386e
KH
6993 if (charbuf[i] >= 0)
6994 args[j] = make_number (charbuf[i]);
6995 else
6996 {
6997 i++;
6998 args[j] = make_number (charbuf[i] % 0x100);
6999 }
9ffd559c 7000 }
e951386e 7001 components = (i == j ? Fstring (j, args) : Fvector (j, args));
d46c5b12 7002 }
69a80ea3 7003 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
7004}
7005
d46c5b12 7006
ff0dacd7
KH
7007/* Put `charset' property on text in CODING->object according to
7008 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 7009 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 7010 */
d46c5b12 7011
ff0dacd7 7012static INLINE void
69a80ea3 7013produce_charset (coding, charbuf, pos)
d46c5b12 7014 struct coding_system *coding;
ff0dacd7 7015 int *charbuf;
69a80ea3 7016 EMACS_INT pos;
d46c5b12 7017{
69a80ea3
KH
7018 EMACS_INT from = pos - charbuf[2];
7019 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 7020
69a80ea3 7021 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
7022 Qcharset, CHARSET_NAME (charset),
7023 coding->dst_object);
d46c5b12
KH
7024}
7025
d46c5b12 7026
df7492f9
KH
7027#define CHARBUF_SIZE 0x4000
7028
7029#define ALLOC_CONVERSION_WORK_AREA(coding) \
7030 do { \
8510724d 7031 int size = CHARBUF_SIZE; \
df7492f9
KH
7032 \
7033 coding->charbuf = NULL; \
7034 while (size > 1024) \
7035 { \
7036 coding->charbuf = (int *) alloca (sizeof (int) * size); \
7037 if (coding->charbuf) \
7038 break; \
7039 size >>= 1; \
7040 } \
7041 if (! coding->charbuf) \
7042 { \
065e3595 7043 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
7044 return coding->result; \
7045 } \
7046 coding->charbuf_size = size; \
7047 } while (0)
4ed46869 7048
d46c5b12
KH
7049
7050static void
69a80ea3 7051produce_annotation (coding, pos)
d46c5b12 7052 struct coding_system *coding;
69a80ea3 7053 EMACS_INT pos;
d46c5b12 7054{
df7492f9
KH
7055 int *charbuf = coding->charbuf;
7056 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 7057
ff0dacd7
KH
7058 if (NILP (coding->dst_object))
7059 return;
d46c5b12 7060
df7492f9 7061 while (charbuf < charbuf_end)
a84f1519 7062 {
df7492f9 7063 if (*charbuf >= 0)
e951386e 7064 pos++, charbuf++;
d46c5b12 7065 else
d46c5b12 7066 {
df7492f9 7067 int len = -*charbuf;
e951386e
KH
7068
7069 if (len > 2)
7070 switch (charbuf[1])
7071 {
7072 case CODING_ANNOTATE_COMPOSITION_MASK:
7073 produce_composition (coding, charbuf, pos);
7074 break;
7075 case CODING_ANNOTATE_CHARSET_MASK:
7076 produce_charset (coding, charbuf, pos);
7077 break;
7078 }
df7492f9 7079 charbuf += len;
d46c5b12 7080 }
a84f1519 7081 }
d46c5b12
KH
7082}
7083
df7492f9
KH
7084/* Decode the data at CODING->src_object into CODING->dst_object.
7085 CODING->src_object is a buffer, a string, or nil.
7086 CODING->dst_object is a buffer.
d46c5b12 7087
df7492f9
KH
7088 If CODING->src_object is a buffer, it must be the current buffer.
7089 In this case, if CODING->src_pos is positive, it is a position of
7090 the source text in the buffer, otherwise, the source text is in the
7091 gap area of the buffer, and CODING->src_pos specifies the offset of
7092 the text from GPT (which must be the same as PT). If this is the
7093 same buffer as CODING->dst_object, CODING->src_pos must be
7094 negative.
d46c5b12 7095
b6828792 7096 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 7097 that string.
d46c5b12 7098
df7492f9
KH
7099 If CODING->src_object is nil, CODING->source must already point to
7100 the non-relocatable memory area. In this case, CODING->src_pos is
7101 an offset from CODING->source.
73be902c 7102
df7492f9
KH
7103 The decoded data is inserted at the current point of the buffer
7104 CODING->dst_object.
7105*/
d46c5b12 7106
df7492f9
KH
7107static int
7108decode_coding (coding)
d46c5b12 7109 struct coding_system *coding;
d46c5b12 7110{
df7492f9 7111 Lisp_Object attrs;
24a73b0a 7112 Lisp_Object undo_list;
7d64c6ad 7113 Lisp_Object translation_table;
69a80ea3
KH
7114 int carryover;
7115 int i;
d46c5b12 7116
df7492f9
KH
7117 if (BUFFERP (coding->src_object)
7118 && coding->src_pos > 0
7119 && coding->src_pos < GPT
7120 && coding->src_pos + coding->src_chars > GPT)
7121 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 7122
24a73b0a 7123 undo_list = Qt;
df7492f9 7124 if (BUFFERP (coding->dst_object))
1c3478b0 7125 {
df7492f9
KH
7126 if (current_buffer != XBUFFER (coding->dst_object))
7127 set_buffer_internal (XBUFFER (coding->dst_object));
7128 if (GPT != PT)
7129 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
7130 undo_list = current_buffer->undo_list;
7131 current_buffer->undo_list = Qt;
1c3478b0
KH
7132 }
7133
df7492f9
KH
7134 coding->consumed = coding->consumed_char = 0;
7135 coding->produced = coding->produced_char = 0;
7136 coding->chars_at_source = 0;
065e3595 7137 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 7138 coding->errors = 0;
1c3478b0 7139
df7492f9
KH
7140 ALLOC_CONVERSION_WORK_AREA (coding);
7141
7142 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 7143 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 7144
69a80ea3 7145 carryover = 0;
df7492f9 7146 do
b73bfc1c 7147 {
69a80ea3
KH
7148 EMACS_INT pos = coding->dst_pos + coding->produced_char;
7149
df7492f9
KH
7150 coding_set_source (coding);
7151 coding->annotated = 0;
69a80ea3 7152 coding->charbuf_used = carryover;
df7492f9 7153 (*(coding->decoder)) (coding);
df7492f9 7154 coding_set_destination (coding);
69a80ea3 7155 carryover = produce_chars (coding, translation_table, 0);
df7492f9 7156 if (coding->annotated)
69a80ea3
KH
7157 produce_annotation (coding, pos);
7158 for (i = 0; i < carryover; i++)
7159 coding->charbuf[i]
7160 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 7161 }
df7492f9 7162 while (coding->consumed < coding->src_bytes
54b367bb
KH
7163 && (coding->result == CODING_RESULT_SUCCESS
7164 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 7165
69a80ea3
KH
7166 if (carryover > 0)
7167 {
7168 coding_set_destination (coding);
7169 coding->charbuf_used = carryover;
7170 produce_chars (coding, translation_table, 1);
7171 }
7172
df7492f9
KH
7173 coding->carryover_bytes = 0;
7174 if (coding->consumed < coding->src_bytes)
d46c5b12 7175 {
df7492f9 7176 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 7177 const unsigned char *src;
df7492f9
KH
7178
7179 coding_set_source (coding);
7180 coding_set_destination (coding);
7181 src = coding->source + coding->consumed;
7182
7183 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 7184 {
df7492f9
KH
7185 /* Flush out unprocessed data as binary chars. We are sure
7186 that the number of data is less than the size of
7187 coding->charbuf. */
065e3595 7188 coding->charbuf_used = 0;
b2dab6c8
JR
7189 coding->chars_at_source = 0;
7190
df7492f9 7191 while (nbytes-- > 0)
1c3478b0 7192 {
df7492f9 7193 int c = *src++;
98725083 7194
1c91457d
KH
7195 if (c & 0x80)
7196 c = BYTE8_TO_CHAR (c);
7197 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 7198 }
f6cbaf43 7199 produce_chars (coding, Qnil, 1);
d46c5b12 7200 }
d46c5b12 7201 else
df7492f9
KH
7202 {
7203 /* Record unprocessed bytes in coding->carryover. We are
7204 sure that the number of data is less than the size of
7205 coding->carryover. */
7206 unsigned char *p = coding->carryover;
7207
f289d375
KH
7208 if (nbytes > sizeof coding->carryover)
7209 nbytes = sizeof coding->carryover;
df7492f9
KH
7210 coding->carryover_bytes = nbytes;
7211 while (nbytes-- > 0)
7212 *p++ = *src++;
1c3478b0 7213 }
df7492f9 7214 coding->consumed = coding->src_bytes;
b73bfc1c 7215 }
69f76525 7216
0a9564cb
EZ
7217 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7218 && !inhibit_eol_conversion)
4347441b 7219 decode_eol (coding);
24a73b0a
KH
7220 if (BUFFERP (coding->dst_object))
7221 {
7222 current_buffer->undo_list = undo_list;
7223 record_insert (coding->dst_pos, coding->produced_char);
7224 }
73be902c 7225 return coding->result;
4ed46869
KH
7226}
7227
aaaf0b1e 7228
e1c23804 7229/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
7230 ending before LIMIT of CODING->src_object (buffer or string), store
7231 the data in BUF, set *STOP to a starting position of the next
7232 composition (if any) or to LIMIT, and return the address of the
7233 next element of BUF.
7234
7235 If such an annotation is not found, set *STOP to a starting
7236 position of a composition after POS (if any) or to LIMIT, and
7237 return BUF. */
7238
7239static INLINE int *
7240handle_composition_annotation (pos, limit, coding, buf, stop)
7241 EMACS_INT pos, limit;
aaaf0b1e 7242 struct coding_system *coding;
ff0dacd7
KH
7243 int *buf;
7244 EMACS_INT *stop;
aaaf0b1e 7245{
ff0dacd7
KH
7246 EMACS_INT start, end;
7247 Lisp_Object prop;
aaaf0b1e 7248
ff0dacd7
KH
7249 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7250 || end > limit)
7251 *stop = limit;
7252 else if (start > pos)
7253 *stop = start;
7254 else
aaaf0b1e 7255 {
ff0dacd7 7256 if (start == pos)
aaaf0b1e 7257 {
ff0dacd7
KH
7258 /* We found a composition. Store the corresponding
7259 annotation data in BUF. */
7260 int *head = buf;
7261 enum composition_method method = COMPOSITION_METHOD (prop);
7262 int nchars = COMPOSITION_LENGTH (prop);
7263
e951386e 7264 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
ff0dacd7 7265 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 7266 {
ff0dacd7
KH
7267 Lisp_Object components;
7268 int len, i, i_byte;
7269
7270 components = COMPOSITION_COMPONENTS (prop);
7271 if (VECTORP (components))
aaaf0b1e 7272 {
ff0dacd7
KH
7273 len = XVECTOR (components)->size;
7274 for (i = 0; i < len; i++)
7275 *buf++ = XINT (AREF (components, i));
aaaf0b1e 7276 }
ff0dacd7 7277 else if (STRINGP (components))
aaaf0b1e 7278 {
8f924df7 7279 len = SCHARS (components);
ff0dacd7
KH
7280 i = i_byte = 0;
7281 while (i < len)
7282 {
7283 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7284 buf++;
7285 }
7286 }
7287 else if (INTEGERP (components))
7288 {
7289 len = 1;
7290 *buf++ = XINT (components);
7291 }
7292 else if (CONSP (components))
7293 {
7294 for (len = 0; CONSP (components);
7295 len++, components = XCDR (components))
7296 *buf++ = XINT (XCAR (components));
aaaf0b1e 7297 }
aaaf0b1e 7298 else
ff0dacd7
KH
7299 abort ();
7300 *head -= len;
aaaf0b1e 7301 }
aaaf0b1e 7302 }
ff0dacd7
KH
7303
7304 if (find_composition (end, limit, &start, &end, &prop,
7305 coding->src_object)
7306 && end <= limit)
7307 *stop = start;
7308 else
7309 *stop = limit;
aaaf0b1e 7310 }
ff0dacd7
KH
7311 return buf;
7312}
7313
7314
e1c23804 7315/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
7316 CODING->src_object (buffer of string), store the data in BUF, set
7317 *STOP to the position where the value of `charset' property changes
7318 (limiting by LIMIT), and return the address of the next element of
7319 BUF.
7320
7321 If the property value is nil, set *STOP to the position where the
7322 property value is non-nil (limiting by LIMIT), and return BUF. */
7323
7324static INLINE int *
7325handle_charset_annotation (pos, limit, coding, buf, stop)
7326 EMACS_INT pos, limit;
7327 struct coding_system *coding;
7328 int *buf;
7329 EMACS_INT *stop;
7330{
7331 Lisp_Object val, next;
7332 int id;
7333
7334 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7335 if (! NILP (val) && CHARSETP (val))
7336 id = XINT (CHARSET_SYMBOL_ID (val));
7337 else
7338 id = -1;
69a80ea3 7339 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
7340 next = Fnext_single_property_change (make_number (pos), Qcharset,
7341 coding->src_object,
7342 make_number (limit));
7343 *stop = XINT (next);
7344 return buf;
7345}
7346
7347
df7492f9 7348static void
09ee6fdd 7349consume_chars (coding, translation_table, max_lookup)
df7492f9 7350 struct coding_system *coding;
433f7f87 7351 Lisp_Object translation_table;
09ee6fdd 7352 int max_lookup;
df7492f9
KH
7353{
7354 int *buf = coding->charbuf;
ff0dacd7 7355 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 7356 const unsigned char *src = coding->source + coding->consumed;
4776e638 7357 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
7358 EMACS_INT pos = coding->src_pos + coding->consumed_char;
7359 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
7360 int multibytep = coding->src_multibyte;
7361 Lisp_Object eol_type;
7362 int c;
ff0dacd7 7363 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 7364 int *lookup_buf = NULL;
433f7f87
KH
7365
7366 if (! NILP (translation_table))
09ee6fdd 7367 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 7368
0a9564cb 7369 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
df7492f9
KH
7370 if (VECTORP (eol_type))
7371 eol_type = Qunix;
88993dfd 7372
df7492f9
KH
7373 /* Note: composition handling is not yet implemented. */
7374 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 7375
0b5670c9
KH
7376 if (NILP (coding->src_object))
7377 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 7378 else
0b5670c9
KH
7379 {
7380 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7381 stop = stop_composition = pos;
7382 else
7383 stop = stop_composition = end_pos;
7384 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7385 stop = stop_charset = pos;
7386 else
7387 stop_charset = end_pos;
7388 }
ec6d2bb8 7389
24a73b0a 7390 /* Compensate for CRLF and conversion. */
ff0dacd7 7391 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 7392 while (buf < buf_end)
aaaf0b1e 7393 {
433f7f87
KH
7394 Lisp_Object trans;
7395
df7492f9 7396 if (pos == stop)
ec6d2bb8 7397 {
df7492f9
KH
7398 if (pos == end_pos)
7399 break;
ff0dacd7
KH
7400 if (pos == stop_composition)
7401 buf = handle_composition_annotation (pos, end_pos, coding,
7402 buf, &stop_composition);
7403 if (pos == stop_charset)
7404 buf = handle_charset_annotation (pos, end_pos, coding,
7405 buf, &stop_charset);
7406 stop = (stop_composition < stop_charset
7407 ? stop_composition : stop_charset);
df7492f9
KH
7408 }
7409
7410 if (! multibytep)
4776e638 7411 {
d3e4cb56 7412 EMACS_INT bytes;
aaaf0b1e 7413
ea29edf2
KH
7414 if (coding->encoder == encode_coding_raw_text)
7415 c = *src++, pos++;
7416 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7417 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7418 else
f03caae0 7419 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7420 }
df7492f9 7421 else
db274c7a 7422 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7423 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7424 c = '\n';
7425 if (! EQ (eol_type, Qunix))
aaaf0b1e 7426 {
df7492f9 7427 if (c == '\n')
aaaf0b1e 7428 {
df7492f9
KH
7429 if (EQ (eol_type, Qdos))
7430 *buf++ = '\r';
7431 else
7432 c = '\r';
aaaf0b1e
KH
7433 }
7434 }
433f7f87 7435
e6a54062 7436 trans = Qnil;
09ee6fdd 7437 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7438 if (NILP (trans))
433f7f87
KH
7439 *buf++ = c;
7440 else
7441 {
7442 int from_nchars = 1, to_nchars = 1;
7443 int *lookup_buf_end;
7444 const unsigned char *p = src;
7445 int i;
7446
7447 lookup_buf[0] = c;
7448 for (i = 1; i < max_lookup && p < src_end; i++)
7449 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7450 lookup_buf_end = lookup_buf + i;
e951386e
KH
7451 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7452 if (INTEGERP (trans))
7453 c = XINT (trans);
7454 else if (CONSP (trans))
7455 {
7456 from_nchars = ASIZE (XCAR (trans));
7457 trans = XCDR (trans);
7458 if (INTEGERP (trans))
7459 c = XINT (trans);
7460 else
7461 {
7462 to_nchars = ASIZE (trans);
7463 if (buf + to_nchars > buf_end)
7464 break;
7465 c = XINT (AREF (trans, 0));
7466 }
7467 }
7468 else
433f7f87 7469 break;
e951386e 7470 *buf++ = c;
433f7f87
KH
7471 for (i = 1; i < to_nchars; i++)
7472 *buf++ = XINT (AREF (trans, i));
7473 for (i = 1; i < from_nchars; i++, pos++)
7474 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7475 }
aaaf0b1e 7476 }
ec6d2bb8 7477
df7492f9
KH
7478 coding->consumed = src - coding->source;
7479 coding->consumed_char = pos - coding->src_pos;
7480 coding->charbuf_used = buf - coding->charbuf;
7481 coding->chars_at_source = 0;
aaaf0b1e
KH
7482}
7483
4ed46869 7484
df7492f9
KH
7485/* Encode the text at CODING->src_object into CODING->dst_object.
7486 CODING->src_object is a buffer or a string.
7487 CODING->dst_object is a buffer or nil.
7488
7489 If CODING->src_object is a buffer, it must be the current buffer.
7490 In this case, if CODING->src_pos is positive, it is a position of
7491 the source text in the buffer, otherwise. the source text is in the
7492 gap area of the buffer, and coding->src_pos specifies the offset of
7493 the text from GPT (which must be the same as PT). If this is the
7494 same buffer as CODING->dst_object, CODING->src_pos must be
7495 negative and CODING should not have `pre-write-conversion'.
7496
7497 If CODING->src_object is a string, CODING should not have
7498 `pre-write-conversion'.
7499
7500 If CODING->dst_object is a buffer, the encoded data is inserted at
7501 the current point of that buffer.
7502
7503 If CODING->dst_object is nil, the encoded data is placed at the
7504 memory area specified by CODING->destination. */
7505
7506static int
7507encode_coding (coding)
4ed46869 7508 struct coding_system *coding;
4ed46869 7509{
df7492f9 7510 Lisp_Object attrs;
7d64c6ad 7511 Lisp_Object translation_table;
09ee6fdd 7512 int max_lookup;
9861e777 7513
df7492f9 7514 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7515 if (coding->encoder == encode_coding_raw_text)
7516 translation_table = Qnil, max_lookup = 0;
7517 else
7518 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7519
df7492f9 7520 if (BUFFERP (coding->dst_object))
8844fa83 7521 {
df7492f9
KH
7522 set_buffer_internal (XBUFFER (coding->dst_object));
7523 coding->dst_multibyte
7524 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 7525 }
4ed46869 7526
b73bfc1c 7527 coding->consumed = coding->consumed_char = 0;
df7492f9 7528 coding->produced = coding->produced_char = 0;
065e3595 7529 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7530 coding->errors = 0;
b73bfc1c 7531
df7492f9 7532 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7533
df7492f9
KH
7534 do {
7535 coding_set_source (coding);
09ee6fdd 7536 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7537 coding_set_destination (coding);
7538 (*(coding->encoder)) (coding);
7539 } while (coding->consumed_char < coding->src_chars);
7540
284201e4 7541 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7542 insert_from_gap (coding->produced_char, coding->produced);
7543
7544 return (coding->result);
ec6d2bb8
KH
7545}
7546
fb88bf2d 7547
24a73b0a
KH
7548/* Name (or base name) of work buffer for code conversion. */
7549static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7550
24a73b0a
KH
7551/* A working buffer used by the top level conversion. Once it is
7552 created, it is never destroyed. It has the name
7553 Vcode_conversion_workbuf_name. The other working buffers are
7554 destroyed after the use is finished, and their names are modified
7555 versions of Vcode_conversion_workbuf_name. */
7556static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7557
24a73b0a
KH
7558/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7559static int reused_workbuf_in_use;
4ed46869 7560
24a73b0a
KH
7561
7562/* Return a working buffer of code convesion. MULTIBYTE specifies the
7563 multibyteness of returning buffer. */
b73bfc1c 7564
f6cbaf43 7565static Lisp_Object
24a73b0a 7566make_conversion_work_buffer (multibyte)
f6cbaf43 7567 int multibyte;
df7492f9 7568{
24a73b0a
KH
7569 Lisp_Object name, workbuf;
7570 struct buffer *current;
4ed46869 7571
24a73b0a 7572 if (reused_workbuf_in_use++)
065e3595
KH
7573 {
7574 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7575 workbuf = Fget_buffer_create (name);
7576 }
df7492f9 7577 else
065e3595 7578 {
159bd5a2 7579 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7580 Vcode_conversion_reused_workbuf
7581 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7582 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7583 }
24a73b0a
KH
7584 current = current_buffer;
7585 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7586 /* We can't allow modification hooks to run in the work buffer. For
7587 instance, directory_files_internal assumes that file decoding
7588 doesn't compile new regexps. */
7589 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7590 Ferase_buffer ();
df7492f9 7591 current_buffer->undo_list = Qt;
24a73b0a 7592 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 7593 set_buffer_internal (current);
24a73b0a 7594 return workbuf;
df7492f9 7595}
d46c5b12 7596
24a73b0a 7597
4776e638 7598static Lisp_Object
24a73b0a
KH
7599code_conversion_restore (arg)
7600 Lisp_Object arg;
4776e638 7601{
24a73b0a 7602 Lisp_Object current, workbuf;
948bdcf3 7603 struct gcpro gcpro1;
24a73b0a 7604
948bdcf3 7605 GCPRO1 (arg);
24a73b0a
KH
7606 current = XCAR (arg);
7607 workbuf = XCDR (arg);
7608 if (! NILP (workbuf))
7609 {
7610 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7611 reused_workbuf_in_use = 0;
7612 else if (! NILP (Fbuffer_live_p (workbuf)))
7613 Fkill_buffer (workbuf);
7614 }
7615 set_buffer_internal (XBUFFER (current));
948bdcf3 7616 UNGCPRO;
4776e638
KH
7617 return Qnil;
7618}
b73bfc1c 7619
24a73b0a
KH
7620Lisp_Object
7621code_conversion_save (with_work_buf, multibyte)
4776e638 7622 int with_work_buf, multibyte;
df7492f9 7623{
24a73b0a 7624 Lisp_Object workbuf = Qnil;
b73bfc1c 7625
4776e638 7626 if (with_work_buf)
24a73b0a
KH
7627 workbuf = make_conversion_work_buffer (multibyte);
7628 record_unwind_protect (code_conversion_restore,
7629 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7630 return workbuf;
df7492f9 7631}
d46c5b12 7632
df7492f9
KH
7633int
7634decode_coding_gap (coding, chars, bytes)
7635 struct coding_system *coding;
7636 EMACS_INT chars, bytes;
7637{
7638 int count = specpdl_ptr - specpdl;
5e5c78be 7639 Lisp_Object attrs;
fb88bf2d 7640
24a73b0a 7641 code_conversion_save (0, 0);
ec6d2bb8 7642
24a73b0a 7643 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7644 coding->src_chars = chars;
7645 coding->src_bytes = bytes;
7646 coding->src_pos = -chars;
7647 coding->src_pos_byte = -bytes;
7648 coding->src_multibyte = chars < bytes;
24a73b0a 7649 coding->dst_object = coding->src_object;
df7492f9
KH
7650 coding->dst_pos = PT;
7651 coding->dst_pos_byte = PT_BYTE;
71c81426 7652 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 7653
df7492f9
KH
7654 if (CODING_REQUIRE_DETECTION (coding))
7655 detect_coding (coding);
8f924df7 7656
9286b333 7657 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7658 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7659 decode_coding (coding);
287c57d7 7660 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7661
5e5c78be
KH
7662 attrs = CODING_ID_ATTRS (coding->id);
7663 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7664 {
5e5c78be
KH
7665 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7666 Lisp_Object val;
7667
7668 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7669 val = call1 (CODING_ATTR_POST_READ (attrs),
7670 make_number (coding->produced_char));
5e5c78be
KH
7671 CHECK_NATNUM (val);
7672 coding->produced_char += Z - prev_Z;
7673 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7674 }
4ed46869 7675
df7492f9 7676 unbind_to (count, Qnil);
b73bfc1c
KH
7677 return coding->result;
7678}
52d41803 7679
4ed46869 7680int
df7492f9 7681encode_coding_gap (coding, chars, bytes)
4ed46869 7682 struct coding_system *coding;
df7492f9 7683 EMACS_INT chars, bytes;
4ed46869 7684{
df7492f9 7685 int count = specpdl_ptr - specpdl;
4ed46869 7686
24a73b0a 7687 code_conversion_save (0, 0);
4ed46869 7688
24a73b0a 7689 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7690 coding->src_chars = chars;
7691 coding->src_bytes = bytes;
7692 coding->src_pos = -chars;
7693 coding->src_pos_byte = -bytes;
7694 coding->src_multibyte = chars < bytes;
7695 coding->dst_object = coding->src_object;
7696 coding->dst_pos = PT;
7697 coding->dst_pos_byte = PT_BYTE;
4ed46869 7698
df7492f9 7699 encode_coding (coding);
b73bfc1c 7700
df7492f9
KH
7701 unbind_to (count, Qnil);
7702 return coding->result;
7703}
4ed46869 7704
d46c5b12 7705
df7492f9
KH
7706/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7707 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7708
df7492f9 7709 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7710
df7492f9
KH
7711 If it is a buffer, the text is at point of the buffer. FROM and TO
7712 are positions in the buffer.
b73bfc1c 7713
df7492f9
KH
7714 If it is a string, the text is at the beginning of the string.
7715 FROM and TO are indices to the string.
4ed46869 7716
df7492f9
KH
7717 If it is nil, the text is at coding->source. FROM and TO are
7718 indices to coding->source.
bb10be8b 7719
df7492f9 7720 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7721
df7492f9
KH
7722 If it is a buffer, the decoded text is inserted at point of the
7723 buffer. If the buffer is the same as SRC_OBJECT, the source text
7724 is deleted.
4ed46869 7725
df7492f9
KH
7726 If it is Qt, a string is made from the decoded text, and
7727 set in CODING->dst_object.
d46c5b12 7728
df7492f9 7729 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7730 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7731 CODING->destination by xmalloc. If the decoded text is longer than
7732 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7733 */
d46c5b12 7734
df7492f9
KH
7735void
7736decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7737 dst_object)
d46c5b12 7738 struct coding_system *coding;
df7492f9
KH
7739 Lisp_Object src_object;
7740 EMACS_INT from, from_byte, to, to_byte;
7741 Lisp_Object dst_object;
d46c5b12 7742{
df7492f9
KH
7743 int count = specpdl_ptr - specpdl;
7744 unsigned char *destination;
7745 EMACS_INT dst_bytes;
7746 EMACS_INT chars = to - from;
7747 EMACS_INT bytes = to_byte - from_byte;
7748 Lisp_Object attrs;
4776e638 7749 int saved_pt = -1, saved_pt_byte;
64cedb0c 7750 int need_marker_adjustment = 0;
b3bfad50 7751 Lisp_Object old_deactivate_mark;
d46c5b12 7752
b3bfad50 7753 old_deactivate_mark = Vdeactivate_mark;
93dec019 7754
df7492f9 7755 if (NILP (dst_object))
d46c5b12 7756 {
df7492f9
KH
7757 destination = coding->destination;
7758 dst_bytes = coding->dst_bytes;
d46c5b12 7759 }
93dec019 7760
df7492f9
KH
7761 coding->src_object = src_object;
7762 coding->src_chars = chars;
7763 coding->src_bytes = bytes;
7764 coding->src_multibyte = chars < bytes;
70ad9fc4 7765
df7492f9 7766 if (STRINGP (src_object))
d46c5b12 7767 {
df7492f9
KH
7768 coding->src_pos = from;
7769 coding->src_pos_byte = from_byte;
d46c5b12 7770 }
df7492f9 7771 else if (BUFFERP (src_object))
88993dfd 7772 {
df7492f9
KH
7773 set_buffer_internal (XBUFFER (src_object));
7774 if (from != GPT)
7775 move_gap_both (from, from_byte);
7776 if (EQ (src_object, dst_object))
fb88bf2d 7777 {
64cedb0c
KH
7778 struct Lisp_Marker *tail;
7779
7780 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7781 {
7782 tail->need_adjustment
7783 = tail->charpos == (tail->insertion_type ? from : to);
7784 need_marker_adjustment |= tail->need_adjustment;
7785 }
4776e638 7786 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7787 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7788 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7789 del_range_both (from, from_byte, to, to_byte, 1);
7790 coding->src_pos = -chars;
7791 coding->src_pos_byte = -bytes;
fb88bf2d 7792 }
df7492f9 7793 else
fb88bf2d 7794 {
df7492f9
KH
7795 coding->src_pos = from;
7796 coding->src_pos_byte = from_byte;
fb88bf2d 7797 }
88993dfd
KH
7798 }
7799
df7492f9
KH
7800 if (CODING_REQUIRE_DETECTION (coding))
7801 detect_coding (coding);
7802 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7803
2cb26057
KH
7804 if (EQ (dst_object, Qt)
7805 || (! NILP (CODING_ATTR_POST_READ (attrs))
7806 && NILP (dst_object)))
b73bfc1c 7807 {
a1567c45
SM
7808 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7809 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7810 coding->dst_pos = BEG;
7811 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7812 }
df7492f9 7813 else if (BUFFERP (dst_object))
d46c5b12 7814 {
24a73b0a 7815 code_conversion_save (0, 0);
df7492f9
KH
7816 coding->dst_object = dst_object;
7817 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7818 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7819 coding->dst_multibyte
7820 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7821 }
7822 else
7823 {
24a73b0a 7824 code_conversion_save (0, 0);
df7492f9 7825 coding->dst_object = Qnil;
0154725e
SM
7826 /* Most callers presume this will return a multibyte result, and they
7827 won't use `binary' or `raw-text' anyway, so let's not worry about
7828 CODING_FOR_UNIBYTE. */
bb555731 7829 coding->dst_multibyte = 1;
d46c5b12
KH
7830 }
7831
df7492f9 7832 decode_coding (coding);
fa46990e 7833
df7492f9
KH
7834 if (BUFFERP (coding->dst_object))
7835 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7836
df7492f9 7837 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7838 {
b3bfad50 7839 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7840 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7841 Lisp_Object val;
d46c5b12 7842
c0cc7f7f 7843 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7844 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7845 old_deactivate_mark);
d4850d67
KH
7846 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7847 make_number (coding->produced_char));
df7492f9
KH
7848 UNGCPRO;
7849 CHECK_NATNUM (val);
7850 coding->produced_char += Z - prev_Z;
7851 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7852 }
de79a6a5 7853
df7492f9 7854 if (EQ (dst_object, Qt))
ec6d2bb8 7855 {
df7492f9
KH
7856 coding->dst_object = Fbuffer_string ();
7857 }
7858 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7859 {
7860 set_buffer_internal (XBUFFER (coding->dst_object));
7861 if (dst_bytes < coding->produced)
7862 {
b3bfad50 7863 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7864 if (! destination)
7865 {
065e3595
KH
7866 record_conversion_result (coding,
7867 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
7868 unbind_to (count, Qnil);
7869 return;
7870 }
7871 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7872 move_gap_both (BEGV, BEGV_BYTE);
7873 bcopy (BEGV_ADDR, destination, coding->produced);
7874 coding->destination = destination;
d46c5b12 7875 }
ec6d2bb8 7876 }
b73bfc1c 7877
4776e638
KH
7878 if (saved_pt >= 0)
7879 {
7880 /* This is the case of:
7881 (BUFFERP (src_object) && EQ (src_object, dst_object))
7882 As we have moved PT while replacing the original buffer
7883 contents, we must recover it now. */
7884 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7885 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7886 if (saved_pt < from)
7887 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7888 else if (saved_pt < from + chars)
7889 TEMP_SET_PT_BOTH (from, from_byte);
7890 else if (! NILP (current_buffer->enable_multibyte_characters))
7891 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7892 saved_pt_byte + (coding->produced - bytes));
7893 else
7894 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7895 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7896
7897 if (need_marker_adjustment)
7898 {
7899 struct Lisp_Marker *tail;
7900
7901 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7902 if (tail->need_adjustment)
7903 {
7904 tail->need_adjustment = 0;
7905 if (tail->insertion_type)
7906 {
7907 tail->bytepos = from_byte;
7908 tail->charpos = from;
7909 }
7910 else
7911 {
7912 tail->bytepos = from_byte + coding->produced;
7913 tail->charpos
7914 = (NILP (current_buffer->enable_multibyte_characters)
7915 ? tail->bytepos : from + coding->produced_char);
7916 }
7917 }
7918 }
d46c5b12 7919 }
4776e638 7920
b3bfad50 7921 Vdeactivate_mark = old_deactivate_mark;
065e3595 7922 unbind_to (count, coding->dst_object);
d46c5b12
KH
7923}
7924
d46c5b12 7925
df7492f9
KH
7926void
7927encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7928 dst_object)
d46c5b12 7929 struct coding_system *coding;
df7492f9
KH
7930 Lisp_Object src_object;
7931 EMACS_INT from, from_byte, to, to_byte;
7932 Lisp_Object dst_object;
d46c5b12 7933{
b73bfc1c 7934 int count = specpdl_ptr - specpdl;
df7492f9
KH
7935 EMACS_INT chars = to - from;
7936 EMACS_INT bytes = to_byte - from_byte;
7937 Lisp_Object attrs;
4776e638 7938 int saved_pt = -1, saved_pt_byte;
64cedb0c 7939 int need_marker_adjustment = 0;
c02d943b 7940 int kill_src_buffer = 0;
b3bfad50 7941 Lisp_Object old_deactivate_mark;
df7492f9 7942
b3bfad50 7943 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7944
7945 coding->src_object = src_object;
7946 coding->src_chars = chars;
7947 coding->src_bytes = bytes;
7948 coding->src_multibyte = chars < bytes;
7949
7950 attrs = CODING_ID_ATTRS (coding->id);
7951
64cedb0c
KH
7952 if (EQ (src_object, dst_object))
7953 {
7954 struct Lisp_Marker *tail;
7955
7956 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7957 {
7958 tail->need_adjustment
7959 = tail->charpos == (tail->insertion_type ? from : to);
7960 need_marker_adjustment |= tail->need_adjustment;
7961 }
7962 }
7963
df7492f9 7964 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7965 {
24a73b0a 7966 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7967 set_buffer_internal (XBUFFER (coding->src_object));
7968 if (STRINGP (src_object))
7969 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7970 else if (BUFFERP (src_object))
7971 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7972 else
7973 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7974
df7492f9
KH
7975 if (EQ (src_object, dst_object))
7976 {
7977 set_buffer_internal (XBUFFER (src_object));
4776e638 7978 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7979 del_range_both (from, from_byte, to, to_byte, 1);
7980 set_buffer_internal (XBUFFER (coding->src_object));
7981 }
7982
d4850d67
KH
7983 {
7984 Lisp_Object args[3];
b3bfad50 7985 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7986
b3bfad50
KH
7987 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7988 old_deactivate_mark);
d4850d67
KH
7989 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7990 args[1] = make_number (BEG);
7991 args[2] = make_number (Z);
7992 safe_call (3, args);
b3bfad50 7993 UNGCPRO;
d4850d67 7994 }
c02d943b
KH
7995 if (XBUFFER (coding->src_object) != current_buffer)
7996 kill_src_buffer = 1;
ac87bbef 7997 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7998 if (BEG != GPT)
7999 move_gap_both (BEG, BEG_BYTE);
8000 coding->src_chars = Z - BEG;
8001 coding->src_bytes = Z_BYTE - BEG_BYTE;
8002 coding->src_pos = BEG;
8003 coding->src_pos_byte = BEG_BYTE;
8004 coding->src_multibyte = Z < Z_BYTE;
8005 }
8006 else if (STRINGP (src_object))
d46c5b12 8007 {
24a73b0a 8008 code_conversion_save (0, 0);
df7492f9
KH
8009 coding->src_pos = from;
8010 coding->src_pos_byte = from_byte;
b73bfc1c 8011 }
df7492f9 8012 else if (BUFFERP (src_object))
b73bfc1c 8013 {
24a73b0a 8014 code_conversion_save (0, 0);
df7492f9 8015 set_buffer_internal (XBUFFER (src_object));
df7492f9 8016 if (EQ (src_object, dst_object))
d46c5b12 8017 {
4776e638 8018 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
8019 coding->src_object = del_range_1 (from, to, 1, 1);
8020 coding->src_pos = 0;
8021 coding->src_pos_byte = 0;
d46c5b12 8022 }
df7492f9 8023 else
d46c5b12 8024 {
ff0dacd7
KH
8025 if (from < GPT && to >= GPT)
8026 move_gap_both (from, from_byte);
df7492f9
KH
8027 coding->src_pos = from;
8028 coding->src_pos_byte = from_byte;
d46c5b12 8029 }
d46c5b12 8030 }
4776e638 8031 else
24a73b0a 8032 code_conversion_save (0, 0);
d46c5b12 8033
df7492f9 8034 if (BUFFERP (dst_object))
88993dfd 8035 {
df7492f9 8036 coding->dst_object = dst_object;
28f67a95
KH
8037 if (EQ (src_object, dst_object))
8038 {
8039 coding->dst_pos = from;
8040 coding->dst_pos_byte = from_byte;
8041 }
8042 else
8043 {
319a3947
KH
8044 struct buffer *current = current_buffer;
8045
8046 set_buffer_temp (XBUFFER (dst_object));
8047 coding->dst_pos = PT;
8048 coding->dst_pos_byte = PT_BYTE;
8049 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8050 set_buffer_temp (current);
28f67a95 8051 }
df7492f9
KH
8052 coding->dst_multibyte
8053 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 8054 }
df7492f9 8055 else if (EQ (dst_object, Qt))
d46c5b12 8056 {
df7492f9 8057 coding->dst_object = Qnil;
df7492f9 8058 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
8059 if (coding->dst_bytes == 0)
8060 coding->dst_bytes = 1;
8061 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 8062 coding->dst_multibyte = 0;
d46c5b12
KH
8063 }
8064 else
8065 {
df7492f9
KH
8066 coding->dst_object = Qnil;
8067 coding->dst_multibyte = 0;
d46c5b12
KH
8068 }
8069
df7492f9 8070 encode_coding (coding);
d46c5b12 8071
df7492f9 8072 if (EQ (dst_object, Qt))
d46c5b12 8073 {
df7492f9
KH
8074 if (BUFFERP (coding->dst_object))
8075 coding->dst_object = Fbuffer_string ();
8076 else
d46c5b12 8077 {
df7492f9
KH
8078 coding->dst_object
8079 = make_unibyte_string ((char *) coding->destination,
8080 coding->produced);
8081 xfree (coding->destination);
d46c5b12 8082 }
4ed46869 8083 }
d46c5b12 8084
4776e638
KH
8085 if (saved_pt >= 0)
8086 {
8087 /* This is the case of:
8088 (BUFFERP (src_object) && EQ (src_object, dst_object))
8089 As we have moved PT while replacing the original buffer
8090 contents, we must recover it now. */
8091 set_buffer_internal (XBUFFER (src_object));
8092 if (saved_pt < from)
8093 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8094 else if (saved_pt < from + chars)
8095 TEMP_SET_PT_BOTH (from, from_byte);
8096 else if (! NILP (current_buffer->enable_multibyte_characters))
8097 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8098 saved_pt_byte + (coding->produced - bytes));
d46c5b12 8099 else
4776e638
KH
8100 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8101 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
8102
8103 if (need_marker_adjustment)
8104 {
8105 struct Lisp_Marker *tail;
8106
8107 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8108 if (tail->need_adjustment)
8109 {
8110 tail->need_adjustment = 0;
8111 if (tail->insertion_type)
8112 {
8113 tail->bytepos = from_byte;
8114 tail->charpos = from;
8115 }
8116 else
8117 {
8118 tail->bytepos = from_byte + coding->produced;
8119 tail->charpos
8120 = (NILP (current_buffer->enable_multibyte_characters)
8121 ? tail->bytepos : from + coding->produced_char);
8122 }
8123 }
8124 }
4776e638
KH
8125 }
8126
c02d943b
KH
8127 if (kill_src_buffer)
8128 Fkill_buffer (coding->src_object);
b3bfad50
KH
8129
8130 Vdeactivate_mark = old_deactivate_mark;
df7492f9 8131 unbind_to (count, Qnil);
b73bfc1c
KH
8132}
8133
df7492f9 8134
b73bfc1c 8135Lisp_Object
df7492f9 8136preferred_coding_system ()
b73bfc1c 8137{
df7492f9 8138 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 8139
df7492f9 8140 return CODING_ID_NAME (id);
4ed46869
KH
8141}
8142
8143\f
8144#ifdef emacs
1397dc18 8145/*** 8. Emacs Lisp library functions ***/
4ed46869 8146
4ed46869 8147DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 8148 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 8149See the documentation of `define-coding-system' for information
48b0f3ae 8150about coding-system objects. */)
d4a1d553
JB
8151 (object)
8152 Lisp_Object object;
4ed46869 8153{
d4a1d553
JB
8154 if (NILP (object)
8155 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 8156 return Qt;
d4a1d553
JB
8157 if (! SYMBOLP (object)
8158 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
8159 return Qnil;
8160 return Qt;
4ed46869
KH
8161}
8162
9d991de8
RS
8163DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8164 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
8165 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
8166 (prompt)
4ed46869
KH
8167 Lisp_Object prompt;
8168{
e0e989f6 8169 Lisp_Object val;
9d991de8
RS
8170 do
8171 {
4608c386
KH
8172 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8173 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 8174 }
8f924df7 8175 while (SCHARS (val) == 0);
e0e989f6 8176 return (Fintern (val, Qnil));
4ed46869
KH
8177}
8178
9b787f3e 8179DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 8180 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
8181If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8182Ignores case when completing coding systems (all Emacs coding systems
8183are lower-case). */)
48b0f3ae 8184 (prompt, default_coding_system)
9b787f3e 8185 Lisp_Object prompt, default_coding_system;
4ed46869 8186{
f44d27ce 8187 Lisp_Object val;
c7183fb8
GM
8188 int count = SPECPDL_INDEX ();
8189
9b787f3e 8190 if (SYMBOLP (default_coding_system))
57d25e6f 8191 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 8192 specbind (Qcompletion_ignore_case, Qt);
4608c386 8193 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
8194 Qt, Qnil, Qcoding_system_history,
8195 default_coding_system, Qnil);
c7183fb8 8196 unbind_to (count, Qnil);
8f924df7 8197 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
8198}
8199
8200DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8201 1, 1, 0,
48b0f3ae 8202 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
8203If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8204It is valid if it is nil or a symbol defined as a coding system by the
8205function `define-coding-system'. */)
df7492f9 8206 (coding_system)
4ed46869
KH
8207 Lisp_Object coding_system;
8208{
44e8490d
KH
8209 Lisp_Object define_form;
8210
8211 define_form = Fget (coding_system, Qcoding_system_define_form);
8212 if (! NILP (define_form))
8213 {
8214 Fput (coding_system, Qcoding_system_define_form, Qnil);
8215 safe_eval (define_form);
8216 }
4ed46869
KH
8217 if (!NILP (Fcoding_system_p (coding_system)))
8218 return coding_system;
fcad4ec4 8219 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 8220}
df7492f9 8221
3a73fa5d 8222\f
89528eb3
KH
8223/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8224 HIGHEST is nonzero, return the coding system of the highest
8225 priority among the detected coding systems. Otherwize return a
8226 list of detected coding systems sorted by their priorities. If
8227 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8228 multibyte form but contains only ASCII and eight-bit chars.
8229 Otherwise, the bytes are raw bytes.
8230
8231 CODING-SYSTEM controls the detection as below:
8232
8233 If it is nil, detect both text-format and eol-format. If the
8234 text-format part of CODING-SYSTEM is already specified
8235 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
8236 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8237 detect only text-format. */
8238
d46c5b12 8239Lisp_Object
24a73b0a
KH
8240detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8241 coding_system)
8f924df7 8242 const unsigned char *src;
13818c30
SM
8243 EMACS_INT src_chars, src_bytes;
8244 int highest;
0a28aafb 8245 int multibytep;
df7492f9 8246 Lisp_Object coding_system;
4ed46869 8247{
8f924df7 8248 const unsigned char *src_end = src + src_bytes;
df7492f9 8249 Lisp_Object attrs, eol_type;
4533845d 8250 Lisp_Object val = Qnil;
df7492f9 8251 struct coding_system coding;
89528eb3 8252 int id;
ff0dacd7 8253 struct coding_detection_info detect_info;
24a73b0a 8254 enum coding_category base_category;
2f3cbb32 8255 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 8256
df7492f9
KH
8257 if (NILP (coding_system))
8258 coding_system = Qundecided;
8259 setup_coding_system (coding_system, &coding);
8260 attrs = CODING_ID_ATTRS (coding.id);
8261 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 8262 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 8263
df7492f9 8264 coding.source = src;
24a73b0a 8265 coding.src_chars = src_chars;
df7492f9
KH
8266 coding.src_bytes = src_bytes;
8267 coding.src_multibyte = multibytep;
8268 coding.consumed = 0;
89528eb3 8269 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 8270 coding.head_ascii = 0;
d46c5b12 8271
ff0dacd7 8272 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 8273
89528eb3 8274 /* At first, detect text-format if necessary. */
24a73b0a
KH
8275 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8276 if (base_category == coding_category_undecided)
4ed46869 8277 {
ff0dacd7
KH
8278 enum coding_category category;
8279 struct coding_system *this;
8280 int c, i;
88993dfd 8281
24a73b0a 8282 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 8283 for (; src < src_end; src++)
4ed46869 8284 {
df7492f9 8285 c = *src;
6cb21a4f 8286 if (c & 0x80)
6cb21a4f 8287 {
2f3cbb32 8288 eight_bit_found = 1;
2f3cbb32
KH
8289 if (null_byte_found)
8290 break;
8291 }
c0e16b14 8292 else if (c < 0x20)
2f3cbb32
KH
8293 {
8294 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8295 && ! inhibit_iso_escape_detection
8296 && ! detect_info.checked)
6cb21a4f 8297 {
2f3cbb32
KH
8298 if (detect_coding_iso_2022 (&coding, &detect_info))
8299 {
8300 /* We have scanned the whole data. */
8301 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
8302 {
8303 /* We didn't find an 8-bit code. We may
8304 have found a null-byte, but it's very
8305 rare that a binary file confirm to
8306 ISO-2022. */
8307 src = src_end;
8308 coding.head_ascii = src - coding.source;
8309 }
8310 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
8311 break;
8312 }
8313 }
97b1b294 8314 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
8315 {
8316 null_byte_found = 1;
8317 if (eight_bit_found)
8318 break;
6cb21a4f 8319 }
c006c0c8
KH
8320 if (! eight_bit_found)
8321 coding.head_ascii++;
6cb21a4f 8322 }
c006c0c8 8323 else if (! eight_bit_found)
c0e16b14 8324 coding.head_ascii++;
4ed46869 8325 }
88993dfd 8326
2f3cbb32
KH
8327 if (null_byte_found || eight_bit_found
8328 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
8329 || detect_info.found)
8330 {
2f3cbb32 8331 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
8332 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
8333 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 8334 {
6cb21a4f 8335 category = coding_priorities[i];
c7266f4a 8336 this = coding_categories + category;
6cb21a4f 8337 if (detect_info.found & (1 << category))
ff0dacd7
KH
8338 break;
8339 }
6cb21a4f 8340 else
2f3cbb32
KH
8341 {
8342 if (null_byte_found)
8343 {
8344 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8345 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8346 }
8347 for (i = 0; i < coding_category_raw_text; i++)
8348 {
8349 category = coding_priorities[i];
8350 this = coding_categories + category;
6cb21a4f 8351
2f3cbb32
KH
8352 if (this->id < 0)
8353 {
8354 /* No coding system of this category is defined. */
8355 detect_info.rejected |= (1 << category);
8356 }
8357 else if (category >= coding_category_raw_text)
8358 continue;
8359 else if (detect_info.checked & (1 << category))
8360 {
8361 if (highest
8362 && (detect_info.found & (1 << category)))
6cb21a4f 8363 break;
2f3cbb32
KH
8364 }
8365 else if ((*(this->detector)) (&coding, &detect_info)
8366 && highest
8367 && (detect_info.found & (1 << category)))
8368 {
8369 if (category == coding_category_utf_16_auto)
8370 {
8371 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8372 category = coding_category_utf_16_le;
8373 else
8374 category = coding_category_utf_16_be;
8375 }
8376 break;
8377 }
8378 }
8379 }
6cb21a4f 8380 }
ec6d2bb8 8381
4cddb209
KH
8382 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8383 || null_byte_found)
ec6d2bb8 8384 {
ff0dacd7 8385 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 8386 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
8387 val = Fcons (make_number (id), Qnil);
8388 }
ff0dacd7 8389 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 8390 {
ff0dacd7 8391 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
8392 id = coding_categories[coding_category_undecided].id;
8393 val = Fcons (make_number (id), Qnil);
8394 }
8395 else if (highest)
8396 {
ff0dacd7 8397 if (detect_info.found)
ec6d2bb8 8398 {
ff0dacd7
KH
8399 detect_info.found = 1 << category;
8400 val = Fcons (make_number (this->id), Qnil);
8401 }
8402 else
8403 for (i = 0; i < coding_category_raw_text; i++)
8404 if (! (detect_info.rejected & (1 << coding_priorities[i])))
8405 {
8406 detect_info.found = 1 << coding_priorities[i];
8407 id = coding_categories[coding_priorities[i]].id;
8408 val = Fcons (make_number (id), Qnil);
8409 break;
8410 }
8411 }
89528eb3
KH
8412 else
8413 {
ff0dacd7
KH
8414 int mask = detect_info.rejected | detect_info.found;
8415 int found = 0;
ec6d2bb8 8416
89528eb3 8417 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8418 {
8419 category = coding_priorities[i];
8420 if (! (mask & (1 << category)))
ec6d2bb8 8421 {
ff0dacd7
KH
8422 found |= 1 << category;
8423 id = coding_categories[category].id;
c7266f4a
KH
8424 if (id >= 0)
8425 val = Fcons (make_number (id), val);
ff0dacd7
KH
8426 }
8427 }
8428 for (i = coding_category_raw_text - 1; i >= 0; i--)
8429 {
8430 category = coding_priorities[i];
8431 if (detect_info.found & (1 << category))
8432 {
8433 id = coding_categories[category].id;
8434 val = Fcons (make_number (id), val);
ec6d2bb8 8435 }
ec6d2bb8 8436 }
ff0dacd7 8437 detect_info.found |= found;
ec6d2bb8 8438 }
ec6d2bb8 8439 }
a470d443
KH
8440 else if (base_category == coding_category_utf_8_auto)
8441 {
8442 if (detect_coding_utf_8 (&coding, &detect_info))
8443 {
8444 struct coding_system *this;
8445
8446 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8447 this = coding_categories + coding_category_utf_8_sig;
8448 else
8449 this = coding_categories + coding_category_utf_8_nosig;
8450 val = Fcons (make_number (this->id), Qnil);
8451 }
8452 }
24a73b0a
KH
8453 else if (base_category == coding_category_utf_16_auto)
8454 {
8455 if (detect_coding_utf_16 (&coding, &detect_info))
8456 {
24a73b0a
KH
8457 struct coding_system *this;
8458
8459 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8460 this = coding_categories + coding_category_utf_16_le;
8461 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8462 this = coding_categories + coding_category_utf_16_be;
8463 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8464 this = coding_categories + coding_category_utf_16_be_nosig;
8465 else
8466 this = coding_categories + coding_category_utf_16_le_nosig;
8467 val = Fcons (make_number (this->id), Qnil);
8468 }
8469 }
df7492f9
KH
8470 else
8471 {
ff0dacd7 8472 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8473 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8474 }
df7492f9 8475
89528eb3 8476 /* Then, detect eol-format if necessary. */
df7492f9 8477 {
4533845d 8478 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8479 Lisp_Object tail;
8480
89528eb3
KH
8481 if (VECTORP (eol_type))
8482 {
ff0dacd7 8483 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8484 {
8485 if (null_byte_found)
8486 normal_eol = EOL_SEEN_LF;
8487 else
8488 normal_eol = detect_eol (coding.source, src_bytes,
8489 coding_category_raw_text);
8490 }
ff0dacd7
KH
8491 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8492 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8493 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8494 coding_category_utf_16_be);
ff0dacd7
KH
8495 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8496 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8497 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8498 coding_category_utf_16_le);
8499 }
8500 else
8501 {
8502 if (EQ (eol_type, Qunix))
8503 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8504 else if (EQ (eol_type, Qdos))
8505 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8506 else
8507 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8508 }
8509
df7492f9
KH
8510 for (tail = val; CONSP (tail); tail = XCDR (tail))
8511 {
89528eb3 8512 enum coding_category category;
df7492f9 8513 int this_eol;
89528eb3
KH
8514
8515 id = XINT (XCAR (tail));
8516 attrs = CODING_ID_ATTRS (id);
8517 category = XINT (CODING_ATTR_CATEGORY (attrs));
8518 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8519 if (VECTORP (eol_type))
8520 {
89528eb3
KH
8521 if (category == coding_category_utf_16_be
8522 || category == coding_category_utf_16_be_nosig)
8523 this_eol = utf_16_be_eol;
8524 else if (category == coding_category_utf_16_le
8525 || category == coding_category_utf_16_le_nosig)
8526 this_eol = utf_16_le_eol;
df7492f9 8527 else
89528eb3
KH
8528 this_eol = normal_eol;
8529
df7492f9
KH
8530 if (this_eol == EOL_SEEN_LF)
8531 XSETCAR (tail, AREF (eol_type, 0));
8532 else if (this_eol == EOL_SEEN_CRLF)
8533 XSETCAR (tail, AREF (eol_type, 1));
8534 else if (this_eol == EOL_SEEN_CR)
8535 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8536 else
8537 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8538 }
89528eb3
KH
8539 else
8540 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8541 }
8542 }
ec6d2bb8 8543
4533845d 8544 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8545}
8546
ec6d2bb8 8547
d46c5b12
KH
8548DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8549 2, 3, 0,
48b0f3ae
PJ
8550 doc: /* Detect coding system of the text in the region between START and END.
8551Return a list of possible coding systems ordered by priority.
b811c52b
KH
8552The coding systems to try and their priorities follows what
8553the function `coding-system-priority-list' (which see) returns.
ec6d2bb8 8554
12e0131a 8555If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8556characters as ESC), it returns a list of single element `undecided'
8557or its subsidiary coding system according to a detected end-of-line
8558format.
ec6d2bb8 8559
48b0f3ae
PJ
8560If optional argument HIGHEST is non-nil, return the coding system of
8561highest priority. */)
8562 (start, end, highest)
d46c5b12
KH
8563 Lisp_Object start, end, highest;
8564{
8565 int from, to;
8566 int from_byte, to_byte;
ec6d2bb8 8567
b7826503
PJ
8568 CHECK_NUMBER_COERCE_MARKER (start);
8569 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8570
d46c5b12
KH
8571 validate_region (&start, &end);
8572 from = XINT (start), to = XINT (end);
8573 from_byte = CHAR_TO_BYTE (from);
8574 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8575
d46c5b12
KH
8576 if (from < GPT && to >= GPT)
8577 move_gap_both (to, to_byte);
c210f766 8578
d46c5b12 8579 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8580 to - from, to_byte - from_byte,
0a28aafb
KH
8581 !NILP (highest),
8582 !NILP (current_buffer
df7492f9
KH
8583 ->enable_multibyte_characters),
8584 Qnil);
ec6d2bb8
KH
8585}
8586
d46c5b12
KH
8587DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8588 1, 2, 0,
48b0f3ae
PJ
8589 doc: /* Detect coding system of the text in STRING.
8590Return a list of possible coding systems ordered by priority.
67ceab9d
KH
8591The coding systems to try and their priorities follows what
8592the function `coding-system-priority-list' (which see) returns.
fb88bf2d 8593
12e0131a 8594If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8595characters as ESC), it returns a list of single element `undecided'
8596or its subsidiary coding system according to a detected end-of-line
8597format.
d46c5b12 8598
48b0f3ae
PJ
8599If optional argument HIGHEST is non-nil, return the coding system of
8600highest priority. */)
8601 (string, highest)
d46c5b12
KH
8602 Lisp_Object string, highest;
8603{
b7826503 8604 CHECK_STRING (string);
b73bfc1c 8605
24a73b0a
KH
8606 return detect_coding_system (SDATA (string),
8607 SCHARS (string), SBYTES (string),
8f924df7 8608 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8609 Qnil);
4ed46869 8610}
4ed46869 8611
b73bfc1c 8612
df7492f9
KH
8613static INLINE int
8614char_encodable_p (c, attrs)
8615 int c;
8616 Lisp_Object attrs;
05e6f5dc 8617{
df7492f9 8618 Lisp_Object tail;
df7492f9 8619 struct charset *charset;
7d64c6ad 8620 Lisp_Object translation_table;
d46c5b12 8621
7d64c6ad 8622 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8623 if (! NILP (translation_table))
7d64c6ad 8624 c = translate_char (translation_table, c);
df7492f9
KH
8625 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8626 CONSP (tail); tail = XCDR (tail))
e133c8fa 8627 {
df7492f9
KH
8628 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8629 if (CHAR_CHARSET_P (c, charset))
8630 break;
e133c8fa 8631 }
df7492f9 8632 return (! NILP (tail));
05e6f5dc 8633}
83fa074f 8634
fb88bf2d 8635
df7492f9
KH
8636/* Return a list of coding systems that safely encode the text between
8637 START and END. If EXCLUDE is non-nil, it is a list of coding
8638 systems not to check. The returned list doesn't contain any such
48468dac 8639 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8640 unibyte, return t. */
e077cc80 8641
df7492f9
KH
8642DEFUN ("find-coding-systems-region-internal",
8643 Ffind_coding_systems_region_internal,
8644 Sfind_coding_systems_region_internal, 2, 3, 0,
8645 doc: /* Internal use only. */)
8646 (start, end, exclude)
8647 Lisp_Object start, end, exclude;
8648{
8649 Lisp_Object coding_attrs_list, safe_codings;
8650 EMACS_INT start_byte, end_byte;
7c78e542 8651 const unsigned char *p, *pbeg, *pend;
df7492f9 8652 int c;
0e727afa 8653 Lisp_Object tail, elt, work_table;
d46c5b12 8654
df7492f9
KH
8655 if (STRINGP (start))
8656 {
8657 if (!STRING_MULTIBYTE (start)
8f924df7 8658 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8659 return Qt;
8660 start_byte = 0;
8f924df7 8661 end_byte = SBYTES (start);
df7492f9
KH
8662 }
8663 else
d46c5b12 8664 {
df7492f9
KH
8665 CHECK_NUMBER_COERCE_MARKER (start);
8666 CHECK_NUMBER_COERCE_MARKER (end);
8667 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8668 args_out_of_range (start, end);
8669 if (NILP (current_buffer->enable_multibyte_characters))
8670 return Qt;
8671 start_byte = CHAR_TO_BYTE (XINT (start));
8672 end_byte = CHAR_TO_BYTE (XINT (end));
8673 if (XINT (end) - XINT (start) == end_byte - start_byte)
8674 return Qt;
d46c5b12 8675
e1c23804 8676 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8677 {
e1c23804
DL
8678 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8679 move_gap_both (XINT (start), start_byte);
df7492f9 8680 else
e1c23804 8681 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8682 }
8683 }
8684
df7492f9
KH
8685 coding_attrs_list = Qnil;
8686 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8687 if (NILP (exclude)
8688 || NILP (Fmemq (XCAR (tail), exclude)))
8689 {
8690 Lisp_Object attrs;
d46c5b12 8691
df7492f9
KH
8692 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8693 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8694 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8695 {
8696 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8697 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8698 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8699 }
df7492f9 8700 }
d46c5b12 8701
df7492f9 8702 if (STRINGP (start))
8f924df7 8703 p = pbeg = SDATA (start);
df7492f9
KH
8704 else
8705 p = pbeg = BYTE_POS_ADDR (start_byte);
8706 pend = p + (end_byte - start_byte);
b843d1ae 8707
df7492f9
KH
8708 while (p < pend && ASCII_BYTE_P (*p)) p++;
8709 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8710
0e727afa 8711 work_table = Fmake_char_table (Qnil, Qnil);
05e6f5dc 8712 while (p < pend)
72d1a715 8713 {
df7492f9
KH
8714 if (ASCII_BYTE_P (*p))
8715 p++;
72d1a715
RS
8716 else
8717 {
df7492f9 8718 c = STRING_CHAR_ADVANCE (p);
0e727afa
YM
8719 if (!NILP (char_table_ref (work_table, c)))
8720 /* This character was already checked. Ignore it. */
8721 continue;
12410ef1 8722
df7492f9
KH
8723 charset_map_loaded = 0;
8724 for (tail = coding_attrs_list; CONSP (tail);)
8725 {
8726 elt = XCAR (tail);
8727 if (NILP (elt))
8728 tail = XCDR (tail);
8729 else if (char_encodable_p (c, elt))
8730 tail = XCDR (tail);
8731 else if (CONSP (XCDR (tail)))
8732 {
8733 XSETCAR (tail, XCAR (XCDR (tail)));
8734 XSETCDR (tail, XCDR (XCDR (tail)));
8735 }
8736 else
8737 {
8738 XSETCAR (tail, Qnil);
8739 tail = XCDR (tail);
8740 }
8741 }
8742 if (charset_map_loaded)
8743 {
8744 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8745
df7492f9 8746 if (STRINGP (start))
8f924df7 8747 pbeg = SDATA (start);
df7492f9
KH
8748 else
8749 pbeg = BYTE_POS_ADDR (start_byte);
8750 p = pbeg + p_offset;
8751 pend = pbeg + pend_offset;
8752 }
0e727afa 8753 char_table_set (work_table, c, Qt);
df7492f9 8754 }
ec6d2bb8 8755 }
fb88bf2d 8756
988b3759 8757 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8758 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8759 if (! NILP (XCAR (tail)))
8760 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8761
05e6f5dc
KH
8762 return safe_codings;
8763}
4956c225 8764
d46c5b12 8765
8f924df7
KH
8766DEFUN ("unencodable-char-position", Funencodable_char_position,
8767 Sunencodable_char_position, 3, 5, 0,
8768 doc: /*
8769Return position of first un-encodable character in a region.
d4a1d553 8770START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8771encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8772
8f924df7
KH
8773If optional 4th argument COUNT is non-nil, it specifies at most how
8774many un-encodable characters to search. In this case, the value is a
8775list of positions.
d46c5b12 8776
8f924df7
KH
8777If optional 5th argument STRING is non-nil, it is a string to search
8778for un-encodable characters. In that case, START and END are indexes
8779to the string. */)
8780 (start, end, coding_system, count, string)
8781 Lisp_Object start, end, coding_system, count, string;
8782{
8783 int n;
8784 struct coding_system coding;
7d64c6ad 8785 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8786 Lisp_Object positions;
8787 int from, to;
8788 const unsigned char *p, *stop, *pend;
8789 int ascii_compatible;
fb88bf2d 8790
8f924df7
KH
8791 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8792 attrs = CODING_ID_ATTRS (coding.id);
8793 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8794 return Qnil;
8795 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8796 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8797 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8798
8f924df7
KH
8799 if (NILP (string))
8800 {
8801 validate_region (&start, &end);
8802 from = XINT (start);
8803 to = XINT (end);
8804 if (NILP (current_buffer->enable_multibyte_characters)
8805 || (ascii_compatible
8806 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8807 return Qnil;
8808 p = CHAR_POS_ADDR (from);
8809 pend = CHAR_POS_ADDR (to);
8810 if (from < GPT && to >= GPT)
8811 stop = GPT_ADDR;
8812 else
8813 stop = pend;
8814 }
8815 else
8816 {
8817 CHECK_STRING (string);
8818 CHECK_NATNUM (start);
8819 CHECK_NATNUM (end);
8820 from = XINT (start);
8821 to = XINT (end);
8822 if (from > to
8823 || to > SCHARS (string))
8824 args_out_of_range_3 (string, start, end);
8825 if (! STRING_MULTIBYTE (string))
8826 return Qnil;
8827 p = SDATA (string) + string_char_to_byte (string, from);
8828 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8829 if (ascii_compatible && (to - from) == (pend - p))
8830 return Qnil;
8831 }
f2558efd 8832
8f924df7
KH
8833 if (NILP (count))
8834 n = 1;
8835 else
b73bfc1c 8836 {
8f924df7
KH
8837 CHECK_NATNUM (count);
8838 n = XINT (count);
b73bfc1c
KH
8839 }
8840
8f924df7
KH
8841 positions = Qnil;
8842 while (1)
d46c5b12 8843 {
8f924df7 8844 int c;
ec6d2bb8 8845
8f924df7
KH
8846 if (ascii_compatible)
8847 while (p < stop && ASCII_BYTE_P (*p))
8848 p++, from++;
8849 if (p >= stop)
0e79d667 8850 {
8f924df7
KH
8851 if (p >= pend)
8852 break;
8853 stop = pend;
8854 p = GAP_END_ADDR;
0e79d667 8855 }
ec6d2bb8 8856
8f924df7
KH
8857 c = STRING_CHAR_ADVANCE (p);
8858 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8859 && ! char_charset (translate_char (translation_table, c),
8860 charset_list, NULL))
ec6d2bb8 8861 {
8f924df7
KH
8862 positions = Fcons (make_number (from), positions);
8863 n--;
8864 if (n == 0)
8865 break;
ec6d2bb8
KH
8866 }
8867
8f924df7
KH
8868 from++;
8869 }
d46c5b12 8870
8f924df7
KH
8871 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8872}
d46c5b12 8873
d46c5b12 8874
df7492f9
KH
8875DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8876 Scheck_coding_systems_region, 3, 3, 0,
8877 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8878
df7492f9
KH
8879START and END are buffer positions specifying the region.
8880CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8881
df7492f9 8882The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8883CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8884whole region, POS0, POS1, ... are buffer positions where non-encodable
8885characters are found.
93dec019 8886
df7492f9
KH
8887If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8888value is nil.
93dec019 8889
df7492f9
KH
8890START may be a string. In that case, check if the string is
8891encodable, and the value contains indices to the string instead of
5704f39a
KH
8892buffer positions. END is ignored.
8893
4c1958f4 8894If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8895is nil. */)
df7492f9
KH
8896 (start, end, coding_system_list)
8897 Lisp_Object start, end, coding_system_list;
05e6f5dc 8898{
df7492f9
KH
8899 Lisp_Object list;
8900 EMACS_INT start_byte, end_byte;
8901 int pos;
7c78e542 8902 const unsigned char *p, *pbeg, *pend;
df7492f9 8903 int c;
7d64c6ad 8904 Lisp_Object tail, elt, attrs;
70ad9fc4 8905
05e6f5dc
KH
8906 if (STRINGP (start))
8907 {
df7492f9 8908 if (!STRING_MULTIBYTE (start)
4c1958f4 8909 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8910 return Qnil;
8911 start_byte = 0;
8f924df7 8912 end_byte = SBYTES (start);
df7492f9 8913 pos = 0;
d46c5b12 8914 }
05e6f5dc 8915 else
b73bfc1c 8916 {
b7826503
PJ
8917 CHECK_NUMBER_COERCE_MARKER (start);
8918 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8919 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8920 args_out_of_range (start, end);
8921 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8922 return Qnil;
8923 start_byte = CHAR_TO_BYTE (XINT (start));
8924 end_byte = CHAR_TO_BYTE (XINT (end));
8925 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 8926 return Qnil;
df7492f9 8927
e1c23804 8928 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8929 {
e1c23804
DL
8930 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8931 move_gap_both (XINT (start), start_byte);
df7492f9 8932 else
e1c23804 8933 move_gap_both (XINT (end), end_byte);
b73bfc1c 8934 }
e1c23804 8935 pos = XINT (start);
b73bfc1c 8936 }
7553d0e1 8937
df7492f9
KH
8938 list = Qnil;
8939 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8940 {
df7492f9 8941 elt = XCAR (tail);
7d64c6ad 8942 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8943 ASET (attrs, coding_attr_trans_tbl,
8944 get_translation_table (attrs, 1, NULL));
7d64c6ad 8945 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8946 }
8947
df7492f9 8948 if (STRINGP (start))
8f924df7 8949 p = pbeg = SDATA (start);
72d1a715 8950 else
df7492f9
KH
8951 p = pbeg = BYTE_POS_ADDR (start_byte);
8952 pend = p + (end_byte - start_byte);
4ed46869 8953
df7492f9
KH
8954 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8955 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8956
df7492f9 8957 while (p < pend)
d46c5b12 8958 {
df7492f9
KH
8959 if (ASCII_BYTE_P (*p))
8960 p++;
e133c8fa 8961 else
05e6f5dc 8962 {
df7492f9
KH
8963 c = STRING_CHAR_ADVANCE (p);
8964
8965 charset_map_loaded = 0;
8966 for (tail = list; CONSP (tail); tail = XCDR (tail))
8967 {
8968 elt = XCDR (XCAR (tail));
8969 if (! char_encodable_p (c, XCAR (elt)))
8970 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8971 }
8972 if (charset_map_loaded)
8973 {
8974 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8975
8976 if (STRINGP (start))
8f924df7 8977 pbeg = SDATA (start);
df7492f9
KH
8978 else
8979 pbeg = BYTE_POS_ADDR (start_byte);
8980 p = pbeg + p_offset;
8981 pend = pbeg + pend_offset;
8982 }
05e6f5dc 8983 }
df7492f9 8984 pos++;
d46c5b12 8985 }
4ed46869 8986
df7492f9
KH
8987 tail = list;
8988 list = Qnil;
8989 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8990 {
df7492f9
KH
8991 elt = XCAR (tail);
8992 if (CONSP (XCDR (XCDR (elt))))
8993 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8994 list);
ec6d2bb8 8995 }
2b4f9037 8996
df7492f9 8997 return list;
d46c5b12
KH
8998}
8999
3fd9494b 9000
b73bfc1c 9001Lisp_Object
df7492f9
KH
9002code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9003 Lisp_Object start, end, coding_system, dst_object;
9004 int encodep, norecord;
4ed46869 9005{
3a73fa5d 9006 struct coding_system coding;
df7492f9
KH
9007 EMACS_INT from, from_byte, to, to_byte;
9008 Lisp_Object src_object;
4ed46869 9009
b7826503
PJ
9010 CHECK_NUMBER_COERCE_MARKER (start);
9011 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
9012 if (NILP (coding_system))
9013 coding_system = Qno_conversion;
9014 else
9015 CHECK_CODING_SYSTEM (coding_system);
9016 src_object = Fcurrent_buffer ();
9017 if (NILP (dst_object))
9018 dst_object = src_object;
9019 else if (! EQ (dst_object, Qt))
9020 CHECK_BUFFER (dst_object);
3a73fa5d 9021
d46c5b12
KH
9022 validate_region (&start, &end);
9023 from = XFASTINT (start);
df7492f9 9024 from_byte = CHAR_TO_BYTE (from);
d46c5b12 9025 to = XFASTINT (end);
df7492f9 9026 to_byte = CHAR_TO_BYTE (to);
764ca8da 9027
df7492f9
KH
9028 setup_coding_system (coding_system, &coding);
9029 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 9030
df7492f9
KH
9031 if (encodep)
9032 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9033 dst_object);
9034 else
9035 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9036 dst_object);
9037 if (! norecord)
9038 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 9039
df7492f9
KH
9040 return (BUFFERP (dst_object)
9041 ? make_number (coding.produced_char)
9042 : coding.dst_object);
4031e2bf 9043}
78108bcd 9044
4ed46869 9045
4031e2bf 9046DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 9047 3, 4, "r\nzCoding system: ",
48b0f3ae 9048 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
9049When called from a program, takes four arguments:
9050 START, END, CODING-SYSTEM, and DESTINATION.
9051START and END are buffer positions.
8844fa83 9052
df7492f9 9053Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 9054If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
9055If buffer, the decoded text is inserted in that buffer after point (point
9056does not move).
446dcd75 9057In those cases, the length of the decoded text is returned.
319a3947 9058If DESTINATION is t, the decoded text is returned.
8844fa83 9059
48b0f3ae
PJ
9060This function sets `last-coding-system-used' to the precise coding system
9061used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9062not fully specified.) */)
df7492f9
KH
9063 (start, end, coding_system, destination)
9064 Lisp_Object start, end, coding_system, destination;
4031e2bf 9065{
df7492f9 9066 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 9067}
8844fa83 9068
3a73fa5d 9069DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
9070 3, 4, "r\nzCoding system: ",
9071 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
9072When called from a program, takes four arguments:
9073 START, END, CODING-SYSTEM and DESTINATION.
9074START and END are buffer positions.
d46c5b12 9075
df7492f9
KH
9076Optional 4th arguments DESTINATION specifies where the encoded text goes.
9077If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
9078If buffer, the encoded text is inserted in that buffer after point (point
9079does not move).
446dcd75 9080In those cases, the length of the encoded text is returned.
319a3947 9081If DESTINATION is t, the encoded text is returned.
2391eaa4 9082
48b0f3ae
PJ
9083This function sets `last-coding-system-used' to the precise coding system
9084used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 9085not fully specified.) */)
df7492f9
KH
9086 (start, end, coding_system, destination)
9087 Lisp_Object start, end, coding_system, destination;
3a73fa5d 9088{
df7492f9 9089 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
9090}
9091
9092Lisp_Object
df7492f9
KH
9093code_convert_string (string, coding_system, dst_object,
9094 encodep, nocopy, norecord)
9095 Lisp_Object string, coding_system, dst_object;
9096 int encodep, nocopy, norecord;
b73bfc1c 9097{
4031e2bf 9098 struct coding_system coding;
df7492f9 9099 EMACS_INT chars, bytes;
ec6d2bb8 9100
b7826503 9101 CHECK_STRING (string);
d46c5b12 9102 if (NILP (coding_system))
4956c225 9103 {
df7492f9
KH
9104 if (! norecord)
9105 Vlast_coding_system_used = Qno_conversion;
9106 if (NILP (dst_object))
9107 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 9108 }
b73bfc1c 9109
df7492f9
KH
9110 if (NILP (coding_system))
9111 coding_system = Qno_conversion;
9112 else
9113 CHECK_CODING_SYSTEM (coding_system);
9114 if (NILP (dst_object))
9115 dst_object = Qt;
9116 else if (! EQ (dst_object, Qt))
9117 CHECK_BUFFER (dst_object);
73be902c 9118
df7492f9 9119 setup_coding_system (coding_system, &coding);
d46c5b12 9120 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
9121 chars = SCHARS (string);
9122 bytes = SBYTES (string);
df7492f9
KH
9123 if (encodep)
9124 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9125 else
9126 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9127 if (! norecord)
9128 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 9129
df7492f9
KH
9130 return (BUFFERP (dst_object)
9131 ? make_number (coding.produced_char)
9132 : coding.dst_object);
4ed46869 9133}
73be902c 9134
b73bfc1c 9135
ecec61c1 9136/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 9137 Do not set Vlast_coding_system_used.
4ed46869 9138
ec6d2bb8
KH
9139 This function is called only from macros DECODE_FILE and
9140 ENCODE_FILE, thus we ignore character composition. */
4ed46869 9141
ecec61c1
KH
9142Lisp_Object
9143code_convert_string_norecord (string, coding_system, encodep)
9144 Lisp_Object string, coding_system;
9145 int encodep;
4ed46869 9146{
0be8721c 9147 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
9148}
9149
4ed46869 9150
df7492f9
KH
9151DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9152 2, 4, 0,
9153 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9154
9155Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9156if the decoding operation is trivial.
ecec61c1 9157
d4a1d553 9158Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
9159inserted in that buffer after point (point does not move). In this
9160case, the return value is the length of the decoded text.
ecec61c1 9161
df7492f9
KH
9162This function sets `last-coding-system-used' to the precise coding system
9163used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 9164not fully specified.) */)
df7492f9
KH
9165 (string, coding_system, nocopy, buffer)
9166 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9167{
df7492f9
KH
9168 return code_convert_string (string, coding_system, buffer,
9169 0, ! NILP (nocopy), 0);
4ed46869
KH
9170}
9171
df7492f9
KH
9172DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9173 2, 4, 0,
9174 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9175
9176Optional third arg NOCOPY non-nil means it is OK to return STRING
9177itself if the encoding operation is trivial.
9178
d4a1d553 9179Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
9180inserted in that buffer after point (point does not move). In this
9181case, the return value is the length of the encoded text.
df7492f9
KH
9182
9183This function sets `last-coding-system-used' to the precise coding system
9184used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9185not fully specified.) */)
9186 (string, coding_system, nocopy, buffer)
9187 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 9188{
df7492f9 9189 return code_convert_string (string, coding_system, buffer,
c197f191 9190 1, ! NILP (nocopy), 1);
4ed46869 9191}
df7492f9 9192
3a73fa5d 9193\f
4ed46869 9194DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
9195 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9196Return the corresponding character. */)
9197 (code)
4ed46869 9198 Lisp_Object code;
4ed46869 9199{
df7492f9
KH
9200 Lisp_Object spec, attrs, val;
9201 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9202 int c;
4ed46869 9203
df7492f9
KH
9204 CHECK_NATNUM (code);
9205 c = XFASTINT (code);
9206 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9207 attrs = AREF (spec, 0);
4ed46869 9208
df7492f9
KH
9209 if (ASCII_BYTE_P (c)
9210 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9211 return code;
4ed46869 9212
df7492f9
KH
9213 val = CODING_ATTR_CHARSET_LIST (attrs);
9214 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
9215 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9216 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 9217
df7492f9
KH
9218 if (c <= 0x7F)
9219 charset = charset_roman;
9220 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 9221 {
df7492f9
KH
9222 charset = charset_kana;
9223 c -= 0x80;
4ed46869 9224 }
55ab7be3 9225 else
4ed46869 9226 {
004068e4 9227 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
9228
9229 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9230 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9231 error ("Invalid code: %d", code);
9232 SJIS_TO_JIS (c);
9233 charset = charset_kanji;
4ed46869 9234 }
df7492f9
KH
9235 c = DECODE_CHAR (charset, c);
9236 if (c < 0)
9237 error ("Invalid code: %d", code);
9238 return make_number (c);
93dec019 9239}
4ed46869 9240
48b0f3ae 9241
4ed46869 9242DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 9243 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
9244Return the corresponding code in SJIS. */)
9245 (ch)
df7492f9 9246 Lisp_Object ch;
4ed46869 9247{
df7492f9
KH
9248 Lisp_Object spec, attrs, charset_list;
9249 int c;
9250 struct charset *charset;
9251 unsigned code;
48b0f3ae 9252
df7492f9
KH
9253 CHECK_CHARACTER (ch);
9254 c = XFASTINT (ch);
9255 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9256 attrs = AREF (spec, 0);
9257
9258 if (ASCII_CHAR_P (c)
9259 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9260 return ch;
9261
9262 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9263 charset = char_charset (c, charset_list, &code);
9264 if (code == CHARSET_INVALID_CODE (charset))
9265 error ("Can't encode by shift_jis encoding: %d", c);
9266 JIS_TO_SJIS (code);
9267
9268 return make_number (code);
4ed46869
KH
9269}
9270
9271DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
9272 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9273Return the corresponding character. */)
9274 (code)
4ed46869 9275 Lisp_Object code;
d46c5b12 9276{
df7492f9
KH
9277 Lisp_Object spec, attrs, val;
9278 struct charset *charset_roman, *charset_big5, *charset;
9279 int c;
6289dd10 9280
df7492f9
KH
9281 CHECK_NATNUM (code);
9282 c = XFASTINT (code);
9283 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9284 attrs = AREF (spec, 0);
4ed46869 9285
df7492f9
KH
9286 if (ASCII_BYTE_P (c)
9287 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9288 return code;
6289dd10 9289
df7492f9
KH
9290 val = CODING_ATTR_CHARSET_LIST (attrs);
9291 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9292 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 9293
df7492f9
KH
9294 if (c <= 0x7F)
9295 charset = charset_roman;
c28a9453
KH
9296 else
9297 {
df7492f9
KH
9298 int b1 = c >> 8, b2 = c & 0x7F;
9299 if (b1 < 0xA1 || b1 > 0xFE
9300 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9301 error ("Invalid code: %d", code);
9302 charset = charset_big5;
c28a9453 9303 }
df7492f9
KH
9304 c = DECODE_CHAR (charset, (unsigned )c);
9305 if (c < 0)
9306 error ("Invalid code: %d", code);
9307 return make_number (c);
d46c5b12 9308}
6289dd10 9309
4ed46869 9310DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 9311 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
9312Return the corresponding character code in Big5. */)
9313 (ch)
4ed46869
KH
9314 Lisp_Object ch;
9315{
df7492f9
KH
9316 Lisp_Object spec, attrs, charset_list;
9317 struct charset *charset;
9318 int c;
9319 unsigned code;
9320
9321 CHECK_CHARACTER (ch);
9322 c = XFASTINT (ch);
9323 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9324 attrs = AREF (spec, 0);
9325 if (ASCII_CHAR_P (c)
9326 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9327 return ch;
9328
9329 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9330 charset = char_charset (c, charset_list, &code);
9331 if (code == CHARSET_INVALID_CODE (charset))
9332 error ("Can't encode by Big5 encoding: %d", c);
9333
9334 return make_number (code);
4ed46869 9335}
48b0f3ae 9336
3a73fa5d 9337\f
002fdb44 9338DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 9339 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 9340 doc: /* Internal use only. */)
6ed8eeff 9341 (coding_system, terminal)
b74e4686 9342 Lisp_Object coding_system;
6ed8eeff 9343 Lisp_Object terminal;
4ed46869 9344{
6ed8eeff 9345 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 9346 CHECK_SYMBOL (coding_system);
b8299c66 9347 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 9348 /* We had better not send unsafe characters to terminal. */
c73bd236 9349 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 9350 /* Characer composition should be disabled. */
c73bd236 9351 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
9352 terminal_coding->src_multibyte = 1;
9353 terminal_coding->dst_multibyte = 0;
4ed46869
KH
9354 return Qnil;
9355}
9356
c4825358
KH
9357DEFUN ("set-safe-terminal-coding-system-internal",
9358 Fset_safe_terminal_coding_system_internal,
48b0f3ae 9359 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 9360 doc: /* Internal use only. */)
48b0f3ae 9361 (coding_system)
b74e4686 9362 Lisp_Object coding_system;
d46c5b12 9363{
b7826503 9364 CHECK_SYMBOL (coding_system);
c4825358
KH
9365 setup_coding_system (Fcheck_coding_system (coding_system),
9366 &safe_terminal_coding);
df7492f9
KH
9367 /* Characer composition should be disabled. */
9368 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
9369 safe_terminal_coding.src_multibyte = 1;
9370 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
9371 return Qnil;
9372}
4ed46869 9373
002fdb44 9374DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 9375 Sterminal_coding_system, 0, 1, 0,
6ed8eeff 9376 doc: /* Return coding system specified for terminal output on the given terminal.
708e05dc 9377TERMINAL may be a terminal object, a frame, or nil for the selected
6ed8eeff
KL
9378frame's terminal device. */)
9379 (terminal)
9380 Lisp_Object terminal;
4ed46869 9381{
985773c9
MB
9382 struct coding_system *terminal_coding
9383 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9384 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 9385
ae6f73fa 9386 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 9387 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
9388}
9389
002fdb44 9390DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 9391 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 9392 doc: /* Internal use only. */)
6ed8eeff 9393 (coding_system, terminal)
4ed46869 9394 Lisp_Object coding_system;
6ed8eeff 9395 Lisp_Object terminal;
4ed46869 9396{
6ed8eeff 9397 struct terminal *t = get_terminal (terminal, 1);
b7826503 9398 CHECK_SYMBOL (coding_system);
624bda09
KH
9399 if (NILP (coding_system))
9400 coding_system = Qno_conversion;
9401 else
9402 Fcheck_coding_system (coding_system);
9403 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
df7492f9 9404 /* Characer composition should be disabled. */
c73bd236
MB
9405 TERMINAL_KEYBOARD_CODING (t)->common_flags
9406 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
9407 return Qnil;
9408}
9409
9410DEFUN ("keyboard-coding-system",
985773c9 9411 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 9412 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
9413 (terminal)
9414 Lisp_Object terminal;
4ed46869 9415{
985773c9
MB
9416 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9417 (get_terminal (terminal, 1))->id);
4ed46869
KH
9418}
9419
4ed46869 9420\f
a5d301df
KH
9421DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9422 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
9423 doc: /* Choose a coding system for an operation based on the target name.
9424The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9425DECODING-SYSTEM is the coding system to use for decoding
9426\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9427for encoding (in case OPERATION does encoding).
05e6f5dc 9428
48b0f3ae
PJ
9429The first argument OPERATION specifies an I/O primitive:
9430 For file I/O, `insert-file-contents' or `write-region'.
9431 For process I/O, `call-process', `call-process-region', or `start-process'.
9432 For network I/O, `open-network-stream'.
05e6f5dc 9433
48b0f3ae
PJ
9434The remaining arguments should be the same arguments that were passed
9435to the primitive. Depending on which primitive, one of those arguments
9436is selected as the TARGET. For example, if OPERATION does file I/O,
9437whichever argument specifies the file name is TARGET.
05e6f5dc 9438
48b0f3ae 9439TARGET has a meaning which depends on OPERATION:
b883cdb2 9440 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9441 For process I/O, TARGET is a process name.
d4a1d553 9442 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9443
d4a1d553 9444This function looks up what is specified for TARGET in
48b0f3ae
PJ
9445`file-coding-system-alist', `process-coding-system-alist',
9446or `network-coding-system-alist' depending on OPERATION.
9447They may specify a coding system, a cons of coding systems,
9448or a function symbol to call.
9449In the last case, we call the function with one argument,
9450which is a list of all the arguments given to this function.
1011c487
MB
9451If the function can't decide a coding system, it can return
9452`undecided' so that the normal code-detection is performed.
48b0f3ae 9453
b883cdb2
MB
9454If OPERATION is `insert-file-contents', the argument corresponding to
9455TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9456file name to look up, and BUFFER is a buffer that contains the file's
9457contents (not yet decoded). If `file-coding-system-alist' specifies a
9458function to call for FILENAME, that function should examine the
9459contents of BUFFER instead of reading the file.
9460
d918f936 9461usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 9462 (nargs, args)
4ed46869
KH
9463 int nargs;
9464 Lisp_Object *args;
6b89e3aa 9465{
4ed46869
KH
9466 Lisp_Object operation, target_idx, target, val;
9467 register Lisp_Object chain;
177c0ea7 9468
4ed46869
KH
9469 if (nargs < 2)
9470 error ("Too few arguments");
9471 operation = args[0];
9472 if (!SYMBOLP (operation)
9473 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3ed051d4 9474 error ("Invalid first argument");
4ed46869
KH
9475 if (nargs < 1 + XINT (target_idx))
9476 error ("Too few arguments for operation: %s",
8f924df7 9477 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
9478 target = args[XINT (target_idx) + 1];
9479 if (!(STRINGP (target)
091a0ff0
KH
9480 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9481 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9482 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 9483 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
9484 if (CONSP (target))
9485 target = XCAR (target);
4ed46869 9486
2e34157c
RS
9487 chain = ((EQ (operation, Qinsert_file_contents)
9488 || EQ (operation, Qwrite_region))
02ba4723 9489 ? Vfile_coding_system_alist
2e34157c 9490 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9491 ? Vnetwork_coding_system_alist
9492 : Vprocess_coding_system_alist));
4ed46869
KH
9493 if (NILP (chain))
9494 return Qnil;
9495
03699b14 9496 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9497 {
f44d27ce 9498 Lisp_Object elt;
6b89e3aa 9499
df7492f9 9500 elt = XCAR (chain);
4ed46869
KH
9501 if (CONSP (elt)
9502 && ((STRINGP (target)
03699b14
KR
9503 && STRINGP (XCAR (elt))
9504 && fast_string_match (XCAR (elt), target) >= 0)
9505 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9506 {
03699b14 9507 val = XCDR (elt);
b19fd4c5
KH
9508 /* Here, if VAL is both a valid coding system and a valid
9509 function symbol, we return VAL as a coding system. */
02ba4723
KH
9510 if (CONSP (val))
9511 return val;
9512 if (! SYMBOLP (val))
9513 return Qnil;
9514 if (! NILP (Fcoding_system_p (val)))
9515 return Fcons (val, val);
b19fd4c5 9516 if (! NILP (Ffboundp (val)))
6b89e3aa 9517 {
e2b97060
MB
9518 /* We use call1 rather than safe_call1
9519 so as to get bug reports about functions called here
9520 which don't handle the current interface. */
9521 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9522 if (CONSP (val))
9523 return val;
9524 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9525 return Fcons (val, val);
6b89e3aa 9526 }
02ba4723 9527 return Qnil;
6b89e3aa
KH
9528 }
9529 }
4ed46869 9530 return Qnil;
6b89e3aa
KH
9531}
9532
df7492f9 9533DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9534 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9535 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9536If multiple coding systems belong to the same category,
a3181084
DL
9537all but the first one are ignored.
9538
d4a1d553 9539usage: (set-coding-system-priority &rest coding-systems) */)
df7492f9
KH
9540 (nargs, args)
9541 int nargs;
9542 Lisp_Object *args;
9543{
9544 int i, j;
9545 int changed[coding_category_max];
9546 enum coding_category priorities[coding_category_max];
9547
9548 bzero (changed, sizeof changed);
6b89e3aa 9549
df7492f9 9550 for (i = j = 0; i < nargs; i++)
6b89e3aa 9551 {
df7492f9
KH
9552 enum coding_category category;
9553 Lisp_Object spec, attrs;
6b89e3aa 9554
df7492f9
KH
9555 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9556 attrs = AREF (spec, 0);
9557 category = XINT (CODING_ATTR_CATEGORY (attrs));
9558 if (changed[category])
9559 /* Ignore this coding system because a coding system of the
9560 same category already had a higher priority. */
9561 continue;
9562 changed[category] = 1;
9563 priorities[j++] = category;
9564 if (coding_categories[category].id >= 0
9565 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9566 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9567 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9568 }
6b89e3aa 9569
df7492f9
KH
9570 /* Now we have decided top J priorities. Reflect the order of the
9571 original priorities to the remaining priorities. */
6b89e3aa 9572
df7492f9 9573 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9574 {
df7492f9
KH
9575 while (j < coding_category_max
9576 && changed[coding_priorities[j]])
9577 j++;
9578 if (j == coding_category_max)
9579 abort ();
9580 priorities[i] = coding_priorities[j];
9581 }
6b89e3aa 9582
df7492f9 9583 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 9584
ff563fce
KH
9585 /* Update `coding-category-list'. */
9586 Vcoding_category_list = Qnil;
9587 for (i = coding_category_max - 1; i >= 0; i--)
9588 Vcoding_category_list
9589 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9590 Vcoding_category_list);
6b89e3aa 9591
df7492f9 9592 return Qnil;
6b89e3aa
KH
9593}
9594
df7492f9
KH
9595DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9596 Scoding_system_priority_list, 0, 1, 0,
da7db224 9597 doc: /* Return a list of coding systems ordered by their priorities.
b811c52b
KH
9598The list contains a subset of coding systems; i.e. coding systems
9599assigned to each coding category (see `coding-category-list').
9600
da7db224 9601HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
9602 (highestp)
9603 Lisp_Object highestp;
d46c5b12
KH
9604{
9605 int i;
df7492f9 9606 Lisp_Object val;
6b89e3aa 9607
df7492f9 9608 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9609 {
df7492f9
KH
9610 enum coding_category category = coding_priorities[i];
9611 int id = coding_categories[category].id;
9612 Lisp_Object attrs;
068a9dbd 9613
df7492f9
KH
9614 if (id < 0)
9615 continue;
9616 attrs = CODING_ID_ATTRS (id);
9617 if (! NILP (highestp))
9618 return CODING_ATTR_BASE_NAME (attrs);
9619 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9620 }
9621 return Fnreverse (val);
9622}
068a9dbd 9623
91433552 9624static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9625
9626static Lisp_Object
df7492f9
KH
9627make_subsidiaries (base)
9628 Lisp_Object base;
068a9dbd 9629{
df7492f9 9630 Lisp_Object subsidiaries;
8f924df7 9631 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
9632 char *buf = (char *) alloca (base_name_len + 6);
9633 int i;
068a9dbd 9634
8f924df7 9635 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
9636 subsidiaries = Fmake_vector (make_number (3), Qnil);
9637 for (i = 0; i < 3; i++)
068a9dbd 9638 {
df7492f9
KH
9639 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9640 ASET (subsidiaries, i, intern (buf));
068a9dbd 9641 }
df7492f9 9642 return subsidiaries;
068a9dbd
KH
9643}
9644
9645
df7492f9
KH
9646DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9647 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9648 doc: /* For internal use only.
9649usage: (define-coding-system-internal ...) */)
df7492f9
KH
9650 (nargs, args)
9651 int nargs;
9652 Lisp_Object *args;
068a9dbd 9653{
df7492f9
KH
9654 Lisp_Object name;
9655 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9656 Lisp_Object attrs; /* Vector of attributes. */
9657 Lisp_Object eol_type;
9658 Lisp_Object aliases;
9659 Lisp_Object coding_type, charset_list, safe_charsets;
9660 enum coding_category category;
9661 Lisp_Object tail, val;
9662 int max_charset_id = 0;
9663 int i;
068a9dbd 9664
df7492f9
KH
9665 if (nargs < coding_arg_max)
9666 goto short_args;
068a9dbd 9667
df7492f9 9668 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9669
df7492f9
KH
9670 name = args[coding_arg_name];
9671 CHECK_SYMBOL (name);
9672 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9673
df7492f9
KH
9674 val = args[coding_arg_mnemonic];
9675 if (! STRINGP (val))
9676 CHECK_CHARACTER (val);
9677 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9678
df7492f9
KH
9679 coding_type = args[coding_arg_coding_type];
9680 CHECK_SYMBOL (coding_type);
9681 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9682
df7492f9
KH
9683 charset_list = args[coding_arg_charset_list];
9684 if (SYMBOLP (charset_list))
9685 {
9686 if (EQ (charset_list, Qiso_2022))
9687 {
9688 if (! EQ (coding_type, Qiso_2022))
9689 error ("Invalid charset-list");
9690 charset_list = Viso_2022_charset_list;
9691 }
9692 else if (EQ (charset_list, Qemacs_mule))
9693 {
9694 if (! EQ (coding_type, Qemacs_mule))
9695 error ("Invalid charset-list");
9696 charset_list = Vemacs_mule_charset_list;
9697 }
9698 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9699 if (max_charset_id < XFASTINT (XCAR (tail)))
9700 max_charset_id = XFASTINT (XCAR (tail));
9701 }
068a9dbd
KH
9702 else
9703 {
df7492f9 9704 charset_list = Fcopy_sequence (charset_list);
985773c9 9705 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9706 {
df7492f9
KH
9707 struct charset *charset;
9708
985773c9 9709 val = XCAR (tail);
df7492f9
KH
9710 CHECK_CHARSET_GET_CHARSET (val, charset);
9711 if (EQ (coding_type, Qiso_2022)
9712 ? CHARSET_ISO_FINAL (charset) < 0
9713 : EQ (coding_type, Qemacs_mule)
9714 ? CHARSET_EMACS_MULE_ID (charset) < 0
9715 : 0)
9716 error ("Can't handle charset `%s'",
8f924df7 9717 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9718
8f924df7 9719 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9720 if (max_charset_id < charset->id)
9721 max_charset_id = charset->id;
068a9dbd
KH
9722 }
9723 }
df7492f9 9724 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9725
1b3b981b
AS
9726 safe_charsets = make_uninit_string (max_charset_id + 1);
9727 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9728 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9729 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9730 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9731
584948ac 9732 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9733
df7492f9 9734 val = args[coding_arg_decode_translation_table];
a6f87d34 9735 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9736 CHECK_SYMBOL (val);
df7492f9 9737 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9738
df7492f9 9739 val = args[coding_arg_encode_translation_table];
a6f87d34 9740 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9741 CHECK_SYMBOL (val);
df7492f9 9742 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9743
df7492f9
KH
9744 val = args[coding_arg_post_read_conversion];
9745 CHECK_SYMBOL (val);
9746 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9747
df7492f9
KH
9748 val = args[coding_arg_pre_write_conversion];
9749 CHECK_SYMBOL (val);
9750 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9751
df7492f9
KH
9752 val = args[coding_arg_default_char];
9753 if (NILP (val))
9754 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9755 else
9756 {
8f924df7 9757 CHECK_CHARACTER (val);
df7492f9
KH
9758 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9759 }
4031e2bf 9760
8f924df7
KH
9761 val = args[coding_arg_for_unibyte];
9762 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9763
df7492f9
KH
9764 val = args[coding_arg_plist];
9765 CHECK_LIST (val);
9766 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9767
df7492f9
KH
9768 if (EQ (coding_type, Qcharset))
9769 {
c7c66a95
KH
9770 /* Generate a lisp vector of 256 elements. Each element is nil,
9771 integer, or a list of charset IDs.
3a73fa5d 9772
c7c66a95
KH
9773 If Nth element is nil, the byte code N is invalid in this
9774 coding system.
4ed46869 9775
c7c66a95
KH
9776 If Nth element is a number NUM, N is the first byte of a
9777 charset whose ID is NUM.
4ed46869 9778
c7c66a95
KH
9779 If Nth element is a list of charset IDs, N is the first byte
9780 of one of them. The list is sorted by dimensions of the
2bc515e4 9781 charsets. A charset of smaller dimension comes firtst. */
df7492f9 9782 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9783
5c99c2e6 9784 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9785 {
c7c66a95
KH
9786 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9787 int dim = CHARSET_DIMENSION (charset);
9788 int idx = (dim - 1) * 4;
4ed46869 9789
5c99c2e6 9790 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9791 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9792
15d143f7
KH
9793 for (i = charset->code_space[idx];
9794 i <= charset->code_space[idx + 1]; i++)
9795 {
c7c66a95
KH
9796 Lisp_Object tmp, tmp2;
9797 int dim2;
ec6d2bb8 9798
c7c66a95
KH
9799 tmp = AREF (val, i);
9800 if (NILP (tmp))
9801 tmp = XCAR (tail);
9802 else if (NUMBERP (tmp))
9803 {
9804 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9805 if (dim < dim2)
c7c66a95 9806 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9807 else
9808 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9809 }
15d143f7 9810 else
c7c66a95
KH
9811 {
9812 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9813 {
9814 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9815 if (dim < dim2)
9816 break;
9817 }
9818 if (NILP (tmp2))
9819 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9820 else
9821 {
9822 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9823 XSETCAR (tmp2, XCAR (tail));
9824 }
9825 }
9826 ASET (val, i, tmp);
15d143f7 9827 }
df7492f9
KH
9828 }
9829 ASET (attrs, coding_attr_charset_valids, val);
9830 category = coding_category_charset;
9831 }
9832 else if (EQ (coding_type, Qccl))
9833 {
9834 Lisp_Object valids;
ecec61c1 9835
df7492f9
KH
9836 if (nargs < coding_arg_ccl_max)
9837 goto short_args;
ecec61c1 9838
df7492f9
KH
9839 val = args[coding_arg_ccl_decoder];
9840 CHECK_CCL_PROGRAM (val);
9841 if (VECTORP (val))
9842 val = Fcopy_sequence (val);
9843 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9844
df7492f9
KH
9845 val = args[coding_arg_ccl_encoder];
9846 CHECK_CCL_PROGRAM (val);
9847 if (VECTORP (val))
9848 val = Fcopy_sequence (val);
9849 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9850
df7492f9
KH
9851 val = args[coding_arg_ccl_valids];
9852 valids = Fmake_string (make_number (256), make_number (0));
9853 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9854 {
8dcbea82 9855 int from, to;
ecec61c1 9856
df7492f9
KH
9857 val = Fcar (tail);
9858 if (INTEGERP (val))
8dcbea82
KH
9859 {
9860 from = to = XINT (val);
9861 if (from < 0 || from > 255)
9862 args_out_of_range_3 (val, make_number (0), make_number (255));
9863 }
df7492f9
KH
9864 else
9865 {
df7492f9 9866 CHECK_CONS (val);
8f924df7
KH
9867 CHECK_NATNUM_CAR (val);
9868 CHECK_NATNUM_CDR (val);
df7492f9 9869 from = XINT (XCAR (val));
8f924df7 9870 if (from > 255)
8dcbea82
KH
9871 args_out_of_range_3 (XCAR (val),
9872 make_number (0), make_number (255));
df7492f9 9873 to = XINT (XCDR (val));
8dcbea82
KH
9874 if (to < from || to > 255)
9875 args_out_of_range_3 (XCDR (val),
9876 XCAR (val), make_number (255));
df7492f9 9877 }
8dcbea82 9878 for (i = from; i <= to; i++)
8f924df7 9879 SSET (valids, i, 1);
df7492f9
KH
9880 }
9881 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9882
df7492f9 9883 category = coding_category_ccl;
55ab7be3 9884 }
df7492f9 9885 else if (EQ (coding_type, Qutf_16))
55ab7be3 9886 {
df7492f9 9887 Lisp_Object bom, endian;
4ed46869 9888
584948ac 9889 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9890
df7492f9
KH
9891 if (nargs < coding_arg_utf16_max)
9892 goto short_args;
4ed46869 9893
df7492f9
KH
9894 bom = args[coding_arg_utf16_bom];
9895 if (! NILP (bom) && ! EQ (bom, Qt))
9896 {
9897 CHECK_CONS (bom);
8f924df7
KH
9898 val = XCAR (bom);
9899 CHECK_CODING_SYSTEM (val);
9900 val = XCDR (bom);
9901 CHECK_CODING_SYSTEM (val);
df7492f9 9902 }
a470d443 9903 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9904
9905 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9906 CHECK_SYMBOL (endian);
9907 if (NILP (endian))
9908 endian = Qbig;
9909 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9910 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9911 ASET (attrs, coding_attr_utf_16_endian, endian);
9912
9913 category = (CONSP (bom)
9914 ? coding_category_utf_16_auto
9915 : NILP (bom)
b49a1807 9916 ? (EQ (endian, Qbig)
df7492f9
KH
9917 ? coding_category_utf_16_be_nosig
9918 : coding_category_utf_16_le_nosig)
b49a1807 9919 : (EQ (endian, Qbig)
df7492f9
KH
9920 ? coding_category_utf_16_be
9921 : coding_category_utf_16_le));
9922 }
9923 else if (EQ (coding_type, Qiso_2022))
9924 {
9925 Lisp_Object initial, reg_usage, request, flags;
4776e638 9926 int i;
1397dc18 9927
df7492f9
KH
9928 if (nargs < coding_arg_iso2022_max)
9929 goto short_args;
9930
9931 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9932 CHECK_VECTOR (initial);
9933 for (i = 0; i < 4; i++)
9934 {
9935 val = Faref (initial, make_number (i));
9936 if (! NILP (val))
9937 {
584948ac
KH
9938 struct charset *charset;
9939
9940 CHECK_CHARSET_GET_CHARSET (val, charset);
9941 ASET (initial, i, make_number (CHARSET_ID (charset)));
9942 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9943 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9944 }
9945 else
9946 ASET (initial, i, make_number (-1));
9947 }
9948
9949 reg_usage = args[coding_arg_iso2022_reg_usage];
9950 CHECK_CONS (reg_usage);
8f924df7
KH
9951 CHECK_NUMBER_CAR (reg_usage);
9952 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9953
9954 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9955 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9956 {
df7492f9 9957 int id;
8f924df7 9958 Lisp_Object tmp;
df7492f9
KH
9959
9960 val = Fcar (tail);
9961 CHECK_CONS (val);
8f924df7
KH
9962 tmp = XCAR (val);
9963 CHECK_CHARSET_GET_ID (tmp, id);
9964 CHECK_NATNUM_CDR (val);
df7492f9
KH
9965 if (XINT (XCDR (val)) >= 4)
9966 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9967 XSETCAR (val, make_number (id));
1397dc18 9968 }
4ed46869 9969
df7492f9
KH
9970 flags = args[coding_arg_iso2022_flags];
9971 CHECK_NATNUM (flags);
9972 i = XINT (flags);
9973 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9974 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9975
9976 ASET (attrs, coding_attr_iso_initial, initial);
9977 ASET (attrs, coding_attr_iso_usage, reg_usage);
9978 ASET (attrs, coding_attr_iso_request, request);
9979 ASET (attrs, coding_attr_iso_flags, flags);
9980 setup_iso_safe_charsets (attrs);
9981
9982 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9983 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9984 | CODING_ISO_FLAG_SINGLE_SHIFT))
9985 ? coding_category_iso_7_else
9986 : EQ (args[coding_arg_charset_list], Qiso_2022)
9987 ? coding_category_iso_7
9988 : coding_category_iso_7_tight);
9989 else
9990 {
9991 int id = XINT (AREF (initial, 1));
9992
c6fb6e98 9993 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9994 || EQ (args[coding_arg_charset_list], Qiso_2022)
9995 || id < 0)
9996 ? coding_category_iso_8_else
9997 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9998 ? coding_category_iso_8_1
9999 : coding_category_iso_8_2);
10000 }
0ce7886f
KH
10001 if (category != coding_category_iso_8_1
10002 && category != coding_category_iso_8_2)
10003 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
10004 }
10005 else if (EQ (coding_type, Qemacs_mule))
c28a9453 10006 {
df7492f9
KH
10007 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10008 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 10009 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 10010 category = coding_category_emacs_mule;
c28a9453 10011 }
df7492f9 10012 else if (EQ (coding_type, Qshift_jis))
c28a9453 10013 {
df7492f9
KH
10014
10015 struct charset *charset;
10016
7d64c6ad 10017 if (XINT (Flength (charset_list)) != 3
6e07c25f 10018 && XINT (Flength (charset_list)) != 4)
7d64c6ad 10019 error ("There should be three or four charsets");
df7492f9
KH
10020
10021 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10022 if (CHARSET_DIMENSION (charset) != 1)
10023 error ("Dimension of charset %s is not one",
8f924df7 10024 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
10025 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10026 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10027
10028 charset_list = XCDR (charset_list);
10029 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10030 if (CHARSET_DIMENSION (charset) != 1)
10031 error ("Dimension of charset %s is not one",
8f924df7 10032 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
10033
10034 charset_list = XCDR (charset_list);
10035 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10036 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
10037 error ("Dimension of charset %s is not two",
10038 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10039
10040 charset_list = XCDR (charset_list);
2b917a06
KH
10041 if (! NILP (charset_list))
10042 {
10043 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10044 if (CHARSET_DIMENSION (charset) != 2)
10045 error ("Dimension of charset %s is not two",
10046 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10047 }
df7492f9
KH
10048
10049 category = coding_category_sjis;
10050 Vsjis_coding_system = name;
c28a9453 10051 }
df7492f9
KH
10052 else if (EQ (coding_type, Qbig5))
10053 {
10054 struct charset *charset;
4ed46869 10055
df7492f9
KH
10056 if (XINT (Flength (charset_list)) != 2)
10057 error ("There should be just two charsets");
10058
10059 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10060 if (CHARSET_DIMENSION (charset) != 1)
10061 error ("Dimension of charset %s is not one",
8f924df7 10062 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
10063 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10064 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
10065
10066 charset_list = XCDR (charset_list);
10067 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10068 if (CHARSET_DIMENSION (charset) != 2)
10069 error ("Dimension of charset %s is not two",
8f924df7 10070 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 10071
df7492f9
KH
10072 category = coding_category_big5;
10073 Vbig5_coding_system = name;
10074 }
10075 else if (EQ (coding_type, Qraw_text))
c28a9453 10076 {
584948ac
KH
10077 category = coding_category_raw_text;
10078 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 10079 }
df7492f9 10080 else if (EQ (coding_type, Qutf_8))
4ed46869 10081 {
a470d443
KH
10082 Lisp_Object bom;
10083
584948ac 10084 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
10085
10086 if (nargs < coding_arg_utf8_max)
10087 goto short_args;
10088
10089 bom = args[coding_arg_utf8_bom];
10090 if (! NILP (bom) && ! EQ (bom, Qt))
10091 {
10092 CHECK_CONS (bom);
10093 val = XCAR (bom);
10094 CHECK_CODING_SYSTEM (val);
10095 val = XCDR (bom);
10096 CHECK_CODING_SYSTEM (val);
10097 }
10098 ASET (attrs, coding_attr_utf_bom, bom);
10099
10100 category = (CONSP (bom) ? coding_category_utf_8_auto
10101 : NILP (bom) ? coding_category_utf_8_nosig
10102 : coding_category_utf_8_sig);
4ed46869 10103 }
df7492f9
KH
10104 else if (EQ (coding_type, Qundecided))
10105 category = coding_category_undecided;
4ed46869 10106 else
df7492f9 10107 error ("Invalid coding system type: %s",
8f924df7 10108 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 10109
df7492f9 10110 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
10111 CODING_ATTR_PLIST (attrs)
10112 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10113 CODING_ATTR_PLIST (attrs)));
35befdaa 10114 CODING_ATTR_PLIST (attrs)
3ed051d4 10115 = Fcons (QCascii_compatible_p,
35befdaa
KH
10116 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10117 CODING_ATTR_PLIST (attrs)));
c4825358 10118
df7492f9
KH
10119 eol_type = args[coding_arg_eol_type];
10120 if (! NILP (eol_type)
10121 && ! EQ (eol_type, Qunix)
10122 && ! EQ (eol_type, Qdos)
10123 && ! EQ (eol_type, Qmac))
10124 error ("Invalid eol-type");
4ed46869 10125
df7492f9 10126 aliases = Fcons (name, Qnil);
4ed46869 10127
df7492f9
KH
10128 if (NILP (eol_type))
10129 {
10130 eol_type = make_subsidiaries (name);
10131 for (i = 0; i < 3; i++)
1397dc18 10132 {
df7492f9
KH
10133 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10134
10135 this_name = AREF (eol_type, i);
10136 this_aliases = Fcons (this_name, Qnil);
10137 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10138 this_spec = Fmake_vector (make_number (3), attrs);
10139 ASET (this_spec, 1, this_aliases);
10140 ASET (this_spec, 2, this_eol_type);
10141 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10142 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
10143 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10144 if (NILP (val))
10145 Vcoding_system_alist
10146 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10147 Vcoding_system_alist);
1397dc18 10148 }
d46c5b12 10149 }
4ed46869 10150
df7492f9
KH
10151 spec_vec = Fmake_vector (make_number (3), attrs);
10152 ASET (spec_vec, 1, aliases);
10153 ASET (spec_vec, 2, eol_type);
48b0f3ae 10154
df7492f9
KH
10155 Fputhash (name, spec_vec, Vcoding_system_hash_table);
10156 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
10157 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10158 if (NILP (val))
10159 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10160 Vcoding_system_alist);
48b0f3ae 10161
df7492f9
KH
10162 {
10163 int id = coding_categories[category].id;
48b0f3ae 10164
df7492f9
KH
10165 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10166 setup_coding_system (name, &coding_categories[category]);
10167 }
48b0f3ae 10168
d46c5b12 10169 return Qnil;
48b0f3ae 10170
df7492f9
KH
10171 short_args:
10172 return Fsignal (Qwrong_number_of_arguments,
10173 Fcons (intern ("define-coding-system-internal"),
10174 make_number (nargs)));
d46c5b12 10175}
4ed46869 10176
d6925f38 10177
a6f87d34
KH
10178DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10179 3, 3, 0,
10180 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
10181 (coding_system, prop, val)
10182 Lisp_Object coding_system, prop, val;
10183{
3dbe7859 10184 Lisp_Object spec, attrs;
a6f87d34
KH
10185
10186 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10187 attrs = AREF (spec, 0);
10188 if (EQ (prop, QCmnemonic))
10189 {
10190 if (! STRINGP (val))
10191 CHECK_CHARACTER (val);
10192 CODING_ATTR_MNEMONIC (attrs) = val;
10193 }
2133e2d1 10194 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
10195 {
10196 if (NILP (val))
10197 val = make_number (' ');
10198 else
10199 CHECK_CHARACTER (val);
10200 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10201 }
10202 else if (EQ (prop, QCdecode_translation_table))
10203 {
10204 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10205 CHECK_SYMBOL (val);
10206 CODING_ATTR_DECODE_TBL (attrs) = val;
10207 }
10208 else if (EQ (prop, QCencode_translation_table))
10209 {
10210 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10211 CHECK_SYMBOL (val);
10212 CODING_ATTR_ENCODE_TBL (attrs) = val;
10213 }
10214 else if (EQ (prop, QCpost_read_conversion))
10215 {
10216 CHECK_SYMBOL (val);
10217 CODING_ATTR_POST_READ (attrs) = val;
10218 }
10219 else if (EQ (prop, QCpre_write_conversion))
10220 {
10221 CHECK_SYMBOL (val);
10222 CODING_ATTR_PRE_WRITE (attrs) = val;
10223 }
35befdaa
KH
10224 else if (EQ (prop, QCascii_compatible_p))
10225 {
10226 CODING_ATTR_ASCII_COMPAT (attrs) = val;
10227 }
a6f87d34
KH
10228
10229 CODING_ATTR_PLIST (attrs)
10230 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10231 return val;
10232}
10233
10234
df7492f9
KH
10235DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10236 Sdefine_coding_system_alias, 2, 2, 0,
10237 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
10238 (alias, coding_system)
10239 Lisp_Object alias, coding_system;
66cfb530 10240{
583f71ca 10241 Lisp_Object spec, aliases, eol_type, val;
4ed46869 10242
df7492f9
KH
10243 CHECK_SYMBOL (alias);
10244 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10245 aliases = AREF (spec, 1);
d4a1d553 10246 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
10247 element is a base coding system. Append ALIAS at the tail of the
10248 list. */
df7492f9
KH
10249 while (!NILP (XCDR (aliases)))
10250 aliases = XCDR (aliases);
8f924df7 10251 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 10252
df7492f9
KH
10253 eol_type = AREF (spec, 2);
10254 if (VECTORP (eol_type))
4ed46869 10255 {
df7492f9
KH
10256 Lisp_Object subsidiaries;
10257 int i;
4ed46869 10258
df7492f9
KH
10259 subsidiaries = make_subsidiaries (alias);
10260 for (i = 0; i < 3; i++)
10261 Fdefine_coding_system_alias (AREF (subsidiaries, i),
10262 AREF (eol_type, i));
4ed46869 10263 }
df7492f9
KH
10264
10265 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 10266 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
10267 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10268 if (NILP (val))
10269 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10270 Vcoding_system_alist);
66cfb530 10271
4ed46869
KH
10272 return Qnil;
10273}
10274
df7492f9
KH
10275DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10276 1, 1, 0,
10277 doc: /* Return the base of CODING-SYSTEM.
da7db224 10278Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
10279 (coding_system)
10280 Lisp_Object coding_system;
d46c5b12 10281{
df7492f9 10282 Lisp_Object spec, attrs;
d46c5b12 10283
df7492f9
KH
10284 if (NILP (coding_system))
10285 return (Qno_conversion);
10286 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10287 attrs = AREF (spec, 0);
10288 return CODING_ATTR_BASE_NAME (attrs);
10289}
1397dc18 10290
df7492f9
KH
10291DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10292 1, 1, 0,
10293 doc: "Return the property list of CODING-SYSTEM.")
10294 (coding_system)
10295 Lisp_Object coding_system;
10296{
10297 Lisp_Object spec, attrs;
1397dc18 10298
df7492f9
KH
10299 if (NILP (coding_system))
10300 coding_system = Qno_conversion;
10301 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10302 attrs = AREF (spec, 0);
10303 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
10304}
10305
df7492f9
KH
10306
10307DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10308 1, 1, 0,
da7db224 10309 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
10310 (coding_system)
10311 Lisp_Object coding_system;
66cfb530 10312{
df7492f9 10313 Lisp_Object spec;
84d60297 10314
df7492f9
KH
10315 if (NILP (coding_system))
10316 coding_system = Qno_conversion;
10317 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 10318 return AREF (spec, 1);
df7492f9 10319}
66cfb530 10320
df7492f9
KH
10321DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10322 Scoding_system_eol_type, 1, 1, 0,
10323 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 10324An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 10325
df7492f9
KH
10326Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10327and CR respectively.
66cfb530 10328
df7492f9
KH
10329A vector value indicates that a format of end-of-line should be
10330detected automatically. Nth element of the vector is the subsidiary
10331coding system whose eol-type is N. */)
6b89e3aa
KH
10332 (coding_system)
10333 Lisp_Object coding_system;
10334{
df7492f9
KH
10335 Lisp_Object spec, eol_type;
10336 int n;
6b89e3aa 10337
df7492f9
KH
10338 if (NILP (coding_system))
10339 coding_system = Qno_conversion;
10340 if (! CODING_SYSTEM_P (coding_system))
10341 return Qnil;
10342 spec = CODING_SYSTEM_SPEC (coding_system);
10343 eol_type = AREF (spec, 2);
10344 if (VECTORP (eol_type))
10345 return Fcopy_sequence (eol_type);
10346 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10347 return make_number (n);
6b89e3aa
KH
10348}
10349
4ed46869
KH
10350#endif /* emacs */
10351
10352\f
1397dc18 10353/*** 9. Post-amble ***/
4ed46869 10354
dfcf069d 10355void
4ed46869
KH
10356init_coding_once ()
10357{
10358 int i;
10359
df7492f9
KH
10360 for (i = 0; i < coding_category_max; i++)
10361 {
10362 coding_categories[i].id = -1;
10363 coding_priorities[i] = i;
10364 }
4ed46869
KH
10365
10366 /* ISO2022 specific initialize routine. */
10367 for (i = 0; i < 0x20; i++)
b73bfc1c 10368 iso_code_class[i] = ISO_control_0;
4ed46869
KH
10369 for (i = 0x21; i < 0x7F; i++)
10370 iso_code_class[i] = ISO_graphic_plane_0;
10371 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 10372 iso_code_class[i] = ISO_control_1;
4ed46869
KH
10373 for (i = 0xA1; i < 0xFF; i++)
10374 iso_code_class[i] = ISO_graphic_plane_1;
10375 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10376 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
10377 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10378 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10379 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10380 iso_code_class[ISO_CODE_ESC] = ISO_escape;
10381 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10382 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10383 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10384
df7492f9
KH
10385 for (i = 0; i < 256; i++)
10386 {
10387 emacs_mule_bytes[i] = 1;
10388 }
7c78e542
KH
10389 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10390 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10391 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10392 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
10393}
10394
10395#ifdef emacs
10396
dfcf069d 10397void
e0e989f6
KH
10398syms_of_coding ()
10399{
df7492f9 10400 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
10401 {
10402 Lisp_Object args[2];
10403 args[0] = QCtest;
10404 args[1] = Qeq;
10405 Vcoding_system_hash_table = Fmake_hash_table (2, args);
10406 }
df7492f9
KH
10407
10408 staticpro (&Vsjis_coding_system);
10409 Vsjis_coding_system = Qnil;
e0e989f6 10410
df7492f9
KH
10411 staticpro (&Vbig5_coding_system);
10412 Vbig5_coding_system = Qnil;
10413
24a73b0a
KH
10414 staticpro (&Vcode_conversion_reused_workbuf);
10415 Vcode_conversion_reused_workbuf = Qnil;
10416
10417 staticpro (&Vcode_conversion_workbuf_name);
d67b4f80 10418 Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
e0e989f6 10419
24a73b0a 10420 reused_workbuf_in_use = 0;
df7492f9
KH
10421
10422 DEFSYM (Qcharset, "charset");
10423 DEFSYM (Qtarget_idx, "target-idx");
10424 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
10425 Fset (Qcoding_system_history, Qnil);
10426
9ce27fde 10427 /* Target FILENAME is the first argument. */
e0e989f6 10428 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10429 /* Target FILENAME is the third argument. */
e0e989f6
KH
10430 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10431
df7492f9 10432 DEFSYM (Qcall_process, "call-process");
9ce27fde 10433 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10434 Fput (Qcall_process, Qtarget_idx, make_number (0));
10435
df7492f9 10436 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10437 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10438 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10439
df7492f9 10440 DEFSYM (Qstart_process, "start-process");
9ce27fde 10441 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10442 Fput (Qstart_process, Qtarget_idx, make_number (2));
10443
df7492f9 10444 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10445 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10446 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10447
df7492f9
KH
10448 DEFSYM (Qcoding_system, "coding-system");
10449 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10450
df7492f9
KH
10451 DEFSYM (Qeol_type, "eol-type");
10452 DEFSYM (Qunix, "unix");
10453 DEFSYM (Qdos, "dos");
4ed46869 10454
df7492f9
KH
10455 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10456 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10457 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10458 DEFSYM (Qdefault_char, "default-char");
10459 DEFSYM (Qundecided, "undecided");
10460 DEFSYM (Qno_conversion, "no-conversion");
10461 DEFSYM (Qraw_text, "raw-text");
4ed46869 10462
df7492f9 10463 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10464
df7492f9 10465 DEFSYM (Qutf_8, "utf-8");
8f924df7 10466 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10467
df7492f9 10468 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10469 DEFSYM (Qbig, "big");
10470 DEFSYM (Qlittle, "little");
27901516 10471
df7492f9
KH
10472 DEFSYM (Qshift_jis, "shift-jis");
10473 DEFSYM (Qbig5, "big5");
4ed46869 10474
df7492f9 10475 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10476
df7492f9 10477 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869 10478 Fput (Qcoding_system_error, Qerror_conditions,
d67b4f80 10479 pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
4ed46869 10480 Fput (Qcoding_system_error, Qerror_message,
d67b4f80 10481 make_pure_c_string ("Invalid coding system"));
4ed46869 10482
05e6f5dc
KH
10483 /* Intern this now in case it isn't already done.
10484 Setting this variable twice is harmless.
10485 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 10486 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
70c22245 10487
df7492f9 10488 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10489 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10490 DEFSYM (Qtranslation_table_id, "translation-table-id");
10491 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10492 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10493
df7492f9 10494 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10495
df7492f9 10496 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10497
01378f49 10498 DEFSYM (QCcategory, ":category");
a6f87d34 10499 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10500 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10501 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10502 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10503 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10504 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10505 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10506
df7492f9
KH
10507 Vcoding_category_table
10508 = Fmake_vector (make_number (coding_category_max), Qnil);
10509 staticpro (&Vcoding_category_table);
10510 /* Followings are target of code detection. */
10511 ASET (Vcoding_category_table, coding_category_iso_7,
d67b4f80 10512 intern_c_string ("coding-category-iso-7"));
df7492f9 10513 ASET (Vcoding_category_table, coding_category_iso_7_tight,
d67b4f80 10514 intern_c_string ("coding-category-iso-7-tight"));
df7492f9 10515 ASET (Vcoding_category_table, coding_category_iso_8_1,
d67b4f80 10516 intern_c_string ("coding-category-iso-8-1"));
df7492f9 10517 ASET (Vcoding_category_table, coding_category_iso_8_2,
d67b4f80 10518 intern_c_string ("coding-category-iso-8-2"));
df7492f9 10519 ASET (Vcoding_category_table, coding_category_iso_7_else,
d67b4f80 10520 intern_c_string ("coding-category-iso-7-else"));
df7492f9 10521 ASET (Vcoding_category_table, coding_category_iso_8_else,
d67b4f80 10522 intern_c_string ("coding-category-iso-8-else"));
a470d443 10523 ASET (Vcoding_category_table, coding_category_utf_8_auto,
d67b4f80 10524 intern_c_string ("coding-category-utf-8-auto"));
a470d443 10525 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
d67b4f80 10526 intern_c_string ("coding-category-utf-8"));
a470d443 10527 ASET (Vcoding_category_table, coding_category_utf_8_sig,
d67b4f80 10528 intern_c_string ("coding-category-utf-8-sig"));
df7492f9 10529 ASET (Vcoding_category_table, coding_category_utf_16_be,
d67b4f80 10530 intern_c_string ("coding-category-utf-16-be"));
ff563fce 10531 ASET (Vcoding_category_table, coding_category_utf_16_auto,
d67b4f80 10532 intern_c_string ("coding-category-utf-16-auto"));
df7492f9 10533 ASET (Vcoding_category_table, coding_category_utf_16_le,
d67b4f80 10534 intern_c_string ("coding-category-utf-16-le"));
df7492f9 10535 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
d67b4f80 10536 intern_c_string ("coding-category-utf-16-be-nosig"));
df7492f9 10537 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
d67b4f80 10538 intern_c_string ("coding-category-utf-16-le-nosig"));
df7492f9 10539 ASET (Vcoding_category_table, coding_category_charset,
d67b4f80 10540 intern_c_string ("coding-category-charset"));
df7492f9 10541 ASET (Vcoding_category_table, coding_category_sjis,
d67b4f80 10542 intern_c_string ("coding-category-sjis"));
df7492f9 10543 ASET (Vcoding_category_table, coding_category_big5,
d67b4f80 10544 intern_c_string ("coding-category-big5"));
df7492f9 10545 ASET (Vcoding_category_table, coding_category_ccl,
d67b4f80 10546 intern_c_string ("coding-category-ccl"));
df7492f9 10547 ASET (Vcoding_category_table, coding_category_emacs_mule,
d67b4f80 10548 intern_c_string ("coding-category-emacs-mule"));
df7492f9
KH
10549 /* Followings are NOT target of code detection. */
10550 ASET (Vcoding_category_table, coding_category_raw_text,
d67b4f80 10551 intern_c_string ("coding-category-raw-text"));
df7492f9 10552 ASET (Vcoding_category_table, coding_category_undecided,
d67b4f80 10553 intern_c_string ("coding-category-undecided"));
ecf488bc 10554
065e3595
KH
10555 DEFSYM (Qinsufficient_source, "insufficient-source");
10556 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10557 DEFSYM (Qinvalid_source, "invalid-source");
10558 DEFSYM (Qinterrupted, "interrupted");
10559 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10560 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10561
4ed46869
KH
10562 defsubr (&Scoding_system_p);
10563 defsubr (&Sread_coding_system);
10564 defsubr (&Sread_non_nil_coding_system);
10565 defsubr (&Scheck_coding_system);
10566 defsubr (&Sdetect_coding_region);
d46c5b12 10567 defsubr (&Sdetect_coding_string);
05e6f5dc 10568 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10569 defsubr (&Sunencodable_char_position);
df7492f9 10570 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10571 defsubr (&Sdecode_coding_region);
10572 defsubr (&Sencode_coding_region);
10573 defsubr (&Sdecode_coding_string);
10574 defsubr (&Sencode_coding_string);
10575 defsubr (&Sdecode_sjis_char);
10576 defsubr (&Sencode_sjis_char);
10577 defsubr (&Sdecode_big5_char);
10578 defsubr (&Sencode_big5_char);
1ba9e4ab 10579 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10580 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10581 defsubr (&Sterminal_coding_system);
1ba9e4ab 10582 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10583 defsubr (&Skeyboard_coding_system);
a5d301df 10584 defsubr (&Sfind_operation_coding_system);
df7492f9 10585 defsubr (&Sset_coding_system_priority);
6b89e3aa 10586 defsubr (&Sdefine_coding_system_internal);
df7492f9 10587 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10588 defsubr (&Scoding_system_put);
df7492f9
KH
10589 defsubr (&Scoding_system_base);
10590 defsubr (&Scoding_system_plist);
10591 defsubr (&Scoding_system_aliases);
10592 defsubr (&Scoding_system_eol_type);
10593 defsubr (&Scoding_system_priority_list);
4ed46869 10594
4608c386 10595 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
10596 doc: /* List of coding systems.
10597
10598Do not alter the value of this variable manually. This variable should be
df7492f9 10599updated by the functions `define-coding-system' and
48b0f3ae 10600`define-coding-system-alias'. */);
4608c386
KH
10601 Vcoding_system_list = Qnil;
10602
10603 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
10604 doc: /* Alist of coding system names.
10605Each element is one element list of coding system name.
446dcd75 10606This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10607
10608Do not alter the value of this variable manually. This variable should be
10609updated by the functions `make-coding-system' and
10610`define-coding-system-alias'. */);
4608c386
KH
10611 Vcoding_system_alist = Qnil;
10612
4ed46869 10613 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
10614 doc: /* List of coding-categories (symbols) ordered by priority.
10615
10616On detecting a coding system, Emacs tries code detection algorithms
10617associated with each coding-category one by one in this order. When
10618one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10619system bound to the corresponding coding-category is selected.
10620
42205607 10621Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
10622 {
10623 int i;
10624
10625 Vcoding_category_list = Qnil;
df7492f9 10626 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10627 Vcoding_category_list
d46c5b12
KH
10628 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10629 Vcoding_category_list);
4ed46869
KH
10630 }
10631
10632 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
10633 doc: /* Specify the coding system for read operations.
10634It is useful to bind this variable with `let', but do not set it globally.
10635If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10636If not, an appropriate element is used from one of the coding system alists.
10637There are three such tables: `file-coding-system-alist',
48b0f3ae 10638`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10639 Vcoding_system_for_read = Qnil;
10640
10641 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
10642 doc: /* Specify the coding system for write operations.
10643Programs bind this variable with `let', but you should not set it globally.
10644If the value is a coding system, it is used for encoding of output,
10645when writing it to a file and when sending it to a file or subprocess.
10646
10647If this does not specify a coding system, an appropriate element
446dcd75
JB
10648is used from one of the coding system alists.
10649There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10650`process-coding-system-alist', and `network-coding-system-alist'.
10651For output to files, if the above procedure does not specify a coding system,
10652the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10653 Vcoding_system_for_write = Qnil;
10654
10655 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
10656 doc: /*
10657Coding system used in the latest file or process I/O. */);
4ed46869
KH
10658 Vlast_coding_system_used = Qnil;
10659
065e3595
KH
10660 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10661 doc: /*
10662Error status of the last code conversion.
10663
10664When an error was detected in the last code conversion, this variable
10665is set to one of the following symbols.
10666 `insufficient-source'
10667 `inconsistent-eol'
10668 `invalid-source'
10669 `interrupted'
10670 `insufficient-memory'
10671When no error was detected, the value doesn't change. So, to check
10672the error status of a code conversion by this variable, you must
10673explicitly set this variable to nil before performing code
10674conversion. */);
10675 Vlast_code_conversion_error = Qnil;
10676
9ce27fde 10677 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
10678 doc: /*
10679*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10680See info node `Coding Systems' and info node `Text and Binary' concerning
10681such conversion. */);
9ce27fde
KH
10682 inhibit_eol_conversion = 0;
10683
ed29121d 10684 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
10685 doc: /*
10686Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10687Bind it to t if the process output is to be treated as if it were a file
10688read from some filesystem. */);
ed29121d
EZ
10689 inherit_process_coding_system = 0;
10690
02ba4723 10691 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
10692 doc: /*
10693Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10694The format is ((PATTERN . VAL) ...),
10695where PATTERN is a regular expression matching a file name,
10696VAL is a coding system, a cons of coding systems, or a function symbol.
10697If VAL is a coding system, it is used for both decoding and encoding
10698the file contents.
10699If VAL is a cons of coding systems, the car part is used for decoding,
10700and the cdr part is used for encoding.
10701If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10702or a cons of coding systems which are used as above. The function is
10703called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10704`find-operation-coding-system' was called. If the function can't decide
10705a coding system, it can return `undecided' so that the normal
10706code-detection is performed.
48b0f3ae
PJ
10707
10708See also the function `find-operation-coding-system'
10709and the variable `auto-coding-alist'. */);
02ba4723
KH
10710 Vfile_coding_system_alist = Qnil;
10711
10712 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
10713 doc: /*
10714Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10715The format is ((PATTERN . VAL) ...),
10716where PATTERN is a regular expression matching a program name,
10717VAL is a coding system, a cons of coding systems, or a function symbol.
10718If VAL is a coding system, it is used for both decoding what received
10719from the program and encoding what sent to the program.
10720If VAL is a cons of coding systems, the car part is used for decoding,
10721and the cdr part is used for encoding.
10722If VAL is a function symbol, the function must return a coding system
10723or a cons of coding systems which are used as above.
10724
10725See also the function `find-operation-coding-system'. */);
02ba4723
KH
10726 Vprocess_coding_system_alist = Qnil;
10727
10728 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
10729 doc: /*
10730Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10731The format is ((PATTERN . VAL) ...),
10732where PATTERN is a regular expression matching a network service name
10733or is a port number to connect to,
10734VAL is a coding system, a cons of coding systems, or a function symbol.
10735If VAL is a coding system, it is used for both decoding what received
10736from the network stream and encoding what sent to the network stream.
10737If VAL is a cons of coding systems, the car part is used for decoding,
10738and the cdr part is used for encoding.
10739If VAL is a function symbol, the function must return a coding system
10740or a cons of coding systems which are used as above.
10741
10742See also the function `find-operation-coding-system'. */);
02ba4723 10743 Vnetwork_coding_system_alist = Qnil;
4ed46869 10744
68c45bf0 10745 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
10746 doc: /* Coding system to use with system messages.
10747Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10748 Vlocale_coding_system = Qnil;
10749
005f0d35 10750 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 10751 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
10752 doc: /*
10753*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
d67b4f80 10754 eol_mnemonic_unix = make_pure_c_string (":");
4ed46869 10755
7722baf9 10756 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
10757 doc: /*
10758*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
d67b4f80 10759 eol_mnemonic_dos = make_pure_c_string ("\\");
4ed46869 10760
7722baf9 10761 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
10762 doc: /*
10763*String displayed in mode line for MAC-like (CR) end-of-line format. */);
d67b4f80 10764 eol_mnemonic_mac = make_pure_c_string ("/");
4ed46869 10765
7722baf9 10766 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
10767 doc: /*
10768*String displayed in mode line when end-of-line format is not yet determined. */);
d67b4f80 10769 eol_mnemonic_undecided = make_pure_c_string (":");
4ed46869 10770
84fbb8a0 10771 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
10772 doc: /*
10773*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10774 Venable_character_translation = Qt;
bdd9fb48 10775
f967223b 10776 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
10777 &Vstandard_translation_table_for_decode,
10778 doc: /* Table for translating characters while decoding. */);
f967223b 10779 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10780
f967223b 10781 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
10782 &Vstandard_translation_table_for_encode,
10783 doc: /* Table for translating characters while encoding. */);
f967223b 10784 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10785
df7492f9 10786 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
10787 doc: /* Alist of charsets vs revision numbers.
10788While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10789designate it with the escape sequence identifying revision (cdr part
10790of the element). */);
10791 Vcharset_revision_table = Qnil;
02ba4723
KH
10792
10793 DEFVAR_LISP ("default-process-coding-system",
10794 &Vdefault_process_coding_system,
48b0f3ae
PJ
10795 doc: /* Cons of coding systems used for process I/O by default.
10796The car part is used for decoding a process output,
10797the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10798 Vdefault_process_coding_system = Qnil;
c4825358 10799
3f003981 10800 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
10801 doc: /*
10802Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10803This is a vector of length 256.
10804If Nth element is non-nil, the existence of code N in a file
10805\(or output of subprocess) doesn't prevent it to be detected as
10806a coding system of ISO 2022 variant which has a flag
10807`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10808or reading output of a subprocess.
446dcd75 10809Only 128th through 159th elements have a meaning. */);
3f003981 10810 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10811
10812 DEFVAR_LISP ("select-safe-coding-system-function",
10813 &Vselect_safe_coding_system_function,
df7492f9
KH
10814 doc: /*
10815Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10816
10817If set, this function is called to force a user to select a proper
10818coding system which can encode the text in the case that a default
fdecf907
GM
10819coding system used in each operation can't encode the text. The
10820function should take care that the buffer is not modified while
10821the coding system is being selected.
48b0f3ae
PJ
10822
10823The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10824 Vselect_safe_coding_system_function = Qnil;
10825
5d5bf4d8
KH
10826 DEFVAR_BOOL ("coding-system-require-warning",
10827 &coding_system_require_warning,
10828 doc: /* Internal use only.
6b89e3aa
KH
10829If non-nil, on writing a file, `select-safe-coding-system-function' is
10830called even if `coding-system-for-write' is non-nil. The command
10831`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10832 coding_system_require_warning = 0;
10833
10834
22ab2303 10835 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 10836 &inhibit_iso_escape_detection,
df7492f9 10837 doc: /*
97b1b294 10838If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10839
97b1b294
EZ
10840When Emacs reads text, it tries to detect how the text is encoded.
10841This code detection is sensitive to escape sequences. If Emacs sees
10842a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10843of the ISO2022 encodings, and decodes text by the corresponding coding
10844system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10845
10846However, there may be a case that you want to read escape sequences in
10847a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10848Then the code detection will ignore any escape sequences, and no text is
10849detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10850escape sequences become visible in a buffer.
10851
10852The default value is nil, and it is strongly recommended not to change
10853it. That is because many Emacs Lisp source files that contain
10854non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10855in Emacs's distribution, and they won't be decoded correctly on
10856reading if you suppress escape sequence detection.
10857
10858The other way to read escape sequences in a file without decoding is
97b1b294 10859to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10860escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10861 inhibit_iso_escape_detection = 0;
002fdb44 10862
97b1b294
EZ
10863 DEFVAR_BOOL ("inhibit-null-byte-detection",
10864 &inhibit_null_byte_detection,
10865 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10866By default, Emacs treats it as binary data, and does not attempt to
10867decode it. The effect is as if you specified `no-conversion' for
10868reading that text.
10869
10870Set this to non-nil when a regular text happens to include null bytes.
10871Examples are Index nodes of Info files and null-byte delimited output
10872from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10873decode text as usual. */);
10874 inhibit_null_byte_detection = 0;
10875
002fdb44 10876 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1 10877 doc: /* Char table for translating self-inserting characters.
446dcd75 10878This is applied to the result of input methods, not their input.
8434d0b8
EZ
10879See also `keyboard-translate-table'.
10880
10881Use of this variable for character code unification was rendered
10882obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10883internal character representation. */);
002fdb44 10884 Vtranslation_table_for_input = Qnil;
8f924df7 10885
2c78b7e1
KH
10886 {
10887 Lisp_Object args[coding_arg_max];
8f924df7 10888 Lisp_Object plist[16];
2c78b7e1
KH
10889 int i;
10890
10891 for (i = 0; i < coding_arg_max; i++)
10892 args[i] = Qnil;
10893
d67b4f80 10894 plist[0] = intern_c_string (":name");
2c78b7e1 10895 plist[1] = args[coding_arg_name] = Qno_conversion;
d67b4f80 10896 plist[2] = intern_c_string (":mnemonic");
2c78b7e1 10897 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
d67b4f80 10898 plist[4] = intern_c_string (":coding-type");
2c78b7e1 10899 plist[5] = args[coding_arg_coding_type] = Qraw_text;
d67b4f80 10900 plist[6] = intern_c_string (":ascii-compatible-p");
2c78b7e1 10901 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
d67b4f80 10902 plist[8] = intern_c_string (":default-char");
2c78b7e1 10903 plist[9] = args[coding_arg_default_char] = make_number (0);
d67b4f80 10904 plist[10] = intern_c_string (":for-unibyte");
8f924df7 10905 plist[11] = args[coding_arg_for_unibyte] = Qt;
d67b4f80
DN
10906 plist[12] = intern_c_string (":docstring");
10907 plist[13] = make_pure_c_string ("Do no conversion.\n\
2c78b7e1
KH
10908\n\
10909When you visit a file with this coding, the file is read into a\n\
10910unibyte buffer as is, thus each byte of a file is treated as a\n\
10911character.");
d67b4f80 10912 plist[14] = intern_c_string (":eol-type");
8f924df7
KH
10913 plist[15] = args[coding_arg_eol_type] = Qunix;
10914 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10915 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10916
10917 plist[1] = args[coding_arg_name] = Qundecided;
10918 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10919 plist[5] = args[coding_arg_coding_type] = Qundecided;
10920 /* This is already set.
35befdaa 10921 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
d67b4f80 10922 plist[8] = intern_c_string (":charset-list");
ae6f73fa
KH
10923 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10924 plist[11] = args[coding_arg_for_unibyte] = Qnil;
d67b4f80 10925 plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
ae6f73fa
KH
10926 plist[15] = args[coding_arg_eol_type] = Qnil;
10927 args[coding_arg_plist] = Flist (16, plist);
10928 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10929 }
10930
2c78b7e1 10931 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10932
10933 {
10934 int i;
10935
10936 for (i = 0; i < coding_category_max; i++)
10937 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10938 }
fcbcfb64
KH
10939#if defined (MSDOS) || defined (WINDOWSNT)
10940 system_eol_type = Qdos;
10941#else
10942 system_eol_type = Qunix;
10943#endif
10944 staticpro (&system_eol_type);
4ed46869
KH
10945}
10946
68c45bf0
PE
10947char *
10948emacs_strerror (error_number)
10949 int error_number;
10950{
10951 char *str;
10952
ca9c0567 10953 synchronize_system_messages_locale ();
68c45bf0
PE
10954 str = strerror (error_number);
10955
10956 if (! NILP (Vlocale_coding_system))
10957 {
10958 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10959 Vlocale_coding_system,
10960 0);
d5db4077 10961 str = (char *) SDATA (dec);
68c45bf0
PE
10962 }
10963
10964 return str;
10965}
10966
4ed46869 10967#endif /* emacs */
9ffd559c
KH
10968
10969/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10970 (do not change this comment) */