* coding.h (struct coding_system): Make safe_charsets a pointer to
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
76b6f707 3 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
76b6f707 5 2005, 2006, 2007, 2008, 2009
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869
KH
26
27/*** TABLE OF CONTENTS ***
28
b73bfc1c 29 0. General comments
4ed46869 30 1. Preamble
df7492f9
KH
31 2. Emacs' internal format (emacs-utf-8) handlers
32 3. UTF-8 handlers
33 4. UTF-16 handlers
34 5. Charset-base coding systems handlers
35 6. emacs-mule (old Emacs' internal format) handlers
36 7. ISO2022 handlers
37 8. Shift-JIS and BIG5 handlers
38 9. CCL handlers
39 10. C library functions
40 11. Emacs Lisp library functions
41 12. Postamble
4ed46869
KH
42
43*/
44
df7492f9 45/*** 0. General comments ***
b73bfc1c
KH
46
47
df7492f9 48CODING SYSTEM
4ed46869 49
5bad0796
DL
50 A coding system is an object for an encoding mechanism that contains
51 information about how to convert byte sequences to character
e19c3639
KH
52 sequences and vice versa. When we say "decode", it means converting
53 a byte sequence of a specific coding system into a character
54 sequence that is represented by Emacs' internal coding system
55 `emacs-utf-8', and when we say "encode", it means converting a
56 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 57 coding system.
4ed46869 58
e19c3639
KH
59 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
60 C level, a coding system is represented by a vector of attributes
5bad0796 61 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
62 coding system symbol to attributes vector is done by looking up
63 Vcharset_hash_table by the symbol.
4ed46869 64
e19c3639 65 Coding systems are classified into the following types depending on
5bad0796 66 the encoding mechanism. Here's a brief description of the types.
4ed46869 67
df7492f9
KH
68 o UTF-8
69
70 o UTF-16
71
72 o Charset-base coding system
73
74 A coding system defined by one or more (coded) character sets.
5bad0796 75 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
76 character set.
77
5bad0796 78 o Old Emacs internal format (emacs-mule)
df7492f9 79
5bad0796 80 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 81
df7492f9 82 o ISO2022-base coding system
4ed46869
KH
83
84 The most famous coding system for multiple character sets. X's
df7492f9
KH
85 Compound Text, various EUCs (Extended Unix Code), and coding systems
86 used in the Internet communication such as ISO-2022-JP are all
87 variants of ISO2022.
4ed46869 88
df7492f9 89 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 90
4ed46869
KH
91 A coding system to encode character sets: ASCII, JISX0201, and
92 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 93 section 8.
4ed46869 94
df7492f9 95 o BIG5
4ed46869 96
df7492f9 97 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 98 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
99 described in section 8. In this file, when we write "big5" (all
100 lowercase), we mean the coding system, and when we write "Big5"
101 (capitalized), we mean the character set.
4ed46869 102
df7492f9 103 o CCL
27901516 104
5bad0796 105 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
106 not listed above, he can supply a decoder and an encoder for it in
107 CCL (Code Conversion Language) programs. Emacs executes the CCL
108 program while decoding/encoding.
27901516 109
df7492f9 110 o Raw-text
4ed46869 111
5a936b46 112 A coding system for text containing raw eight-bit data. Emacs
5bad0796 113 treats each byte of source text as a character (except for
df7492f9 114 end-of-line conversion).
4ed46869 115
df7492f9
KH
116 o No-conversion
117
118 Like raw text, but don't do end-of-line conversion.
4ed46869 119
4ed46869 120
df7492f9 121END-OF-LINE FORMAT
4ed46869 122
5bad0796 123 How text end-of-line is encoded depends on operating system. For
df7492f9 124 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 125 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
126 `line-feed' codes. MacOS's format is usually one byte of
127 `carriage-return'.
4ed46869 128
cfb43547 129 Since text character encoding and end-of-line encoding are
df7492f9
KH
130 independent, any coding system described above can take any format
131 of end-of-line (except for no-conversion).
4ed46869 132
e19c3639
KH
133STRUCT CODING_SYSTEM
134
135 Before using a coding system for code conversion (i.e. decoding and
136 encoding), we setup a structure of type `struct coding_system'.
137 This structure keeps various information about a specific code
5bad0796 138 conversion (e.g. the location of source and destination data).
4ed46869
KH
139
140*/
141
df7492f9
KH
142/* COMMON MACROS */
143
144
4ed46869
KH
145/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146
df7492f9 147 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
148 CODING conforms to the format of XXX, and update the members of
149 DETECT_INFO.
df7492f9 150
ff0dacd7 151 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
152
153 Below is the template of these functions. */
154
4ed46869 155#if 0
df7492f9 156static int
ff0dacd7 157detect_coding_XXX (coding, detect_info)
df7492f9 158 struct coding_system *coding;
ff0dacd7 159 struct coding_detection_info *detect_info;
4ed46869 160{
f1d34bca
MB
161 const unsigned char *src = coding->source;
162 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 163 int multibytep = coding->src_multibyte;
ff0dacd7 164 int consumed_chars = 0;
df7492f9
KH
165 int found = 0;
166 ...;
167
168 while (1)
169 {
170 /* Get one byte from the source. If the souce is exausted, jump
171 to no_more_source:. */
172 ONE_MORE_BYTE (c);
ff0dacd7
KH
173
174 if (! __C_conforms_to_XXX___ (c))
175 break;
176 if (! __C_strongly_suggests_XXX__ (c))
177 found = CATEGORY_MASK_XXX;
df7492f9 178 }
ff0dacd7
KH
179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 181 return 0;
ff0dacd7 182
df7492f9 183 no_more_source:
ff0dacd7
KH
184 /* The source exausted successfully. */
185 detect_info->found |= found;
df7492f9 186 return 1;
4ed46869
KH
187}
188#endif
189
190/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191
df7492f9
KH
192 These functions decode a byte sequence specified as a source by
193 CODING. The resulting multibyte text goes to a place pointed to by
194 CODING->charbuf, the length of which should not exceed
195 CODING->charbuf_size;
d46c5b12 196
df7492f9
KH
197 These functions set the information of original and decoded texts in
198 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
199 They also set CODING->result to one of CODING_RESULT_XXX indicating
200 how the decoding is finished.
d46c5b12 201
df7492f9 202 Below is the template of these functions. */
d46c5b12 203
4ed46869 204#if 0
b73bfc1c 205static void
df7492f9 206decode_coding_XXXX (coding)
4ed46869 207 struct coding_system *coding;
4ed46869 208{
f1d34bca
MB
209 const unsigned char *src = coding->source + coding->consumed;
210 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
211 /* SRC_BASE remembers the start position in source in each loop.
212 The loop will be exited when there's not enough source code, or
213 when there's no room in CHARBUF for a decoded character. */
f1d34bca 214 const unsigned char *src_base;
df7492f9 215 /* A buffer to produce decoded characters. */
69a80ea3
KH
216 int *charbuf = coding->charbuf + coding->charbuf_used;
217 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
218 int multibytep = coding->src_multibyte;
219
220 while (1)
221 {
222 src_base = src;
223 if (charbuf < charbuf_end)
224 /* No more room to produce a decoded character. */
225 break;
226 ONE_MORE_BYTE (c);
227 /* Decode it. */
228 }
229
230 no_more_source:
231 if (src_base < src_end
232 && coding->mode & CODING_MODE_LAST_BLOCK)
233 /* If the source ends by partial bytes to construct a character,
234 treat them as eight-bit raw data. */
235 while (src_base < src_end && charbuf < charbuf_end)
236 *charbuf++ = *src_base++;
237 /* Remember how many bytes and characters we consumed. If the
238 source is multibyte, the bytes and chars are not identical. */
239 coding->consumed = coding->consumed_char = src_base - coding->source;
240 /* Remember how many characters we produced. */
241 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
242}
243#endif
244
245/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246
df7492f9
KH
247 These functions encode SRC_BYTES length text at SOURCE of Emacs'
248 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
249 goes to a place pointed to by DESTINATION, the length of which
250 should not exceed DST_BYTES.
d46c5b12 251
df7492f9
KH
252 These functions set the information of original and encoded texts in
253 the members produced, produced_char, consumed, and consumed_char of
254 the structure *CODING. They also set the member result to one of
255 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 256
df7492f9
KH
257 DST_BYTES zero means that source area and destination area are
258 overlapped, which means that we can produce a encoded text until it
259 reaches at the head of not-yet-encoded source text.
d46c5b12 260
df7492f9 261 Below is a template of these functions. */
4ed46869 262#if 0
b73bfc1c 263static void
df7492f9 264encode_coding_XXX (coding)
4ed46869 265 struct coding_system *coding;
4ed46869 266{
df7492f9
KH
267 int multibytep = coding->dst_multibyte;
268 int *charbuf = coding->charbuf;
269 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
270 unsigned char *dst = coding->destination + coding->produced;
271 unsigned char *dst_end = coding->destination + coding->dst_bytes;
272 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
273 int produced_chars = 0;
274
275 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
276 {
277 int c = *charbuf;
278 /* Encode C into DST, and increment DST. */
279 }
280 label_no_more_destination:
281 /* How many chars and bytes we produced. */
282 coding->produced_char += produced_chars;
283 coding->produced = dst - coding->destination;
4ed46869
KH
284}
285#endif
286
4ed46869
KH
287\f
288/*** 1. Preamble ***/
289
68c45bf0 290#include <config.h>
4ed46869
KH
291#include <stdio.h>
292
4ed46869
KH
293#include "lisp.h"
294#include "buffer.h"
df7492f9 295#include "character.h"
4ed46869
KH
296#include "charset.h"
297#include "ccl.h"
df7492f9 298#include "composite.h"
4ed46869
KH
299#include "coding.h"
300#include "window.h"
b8299c66
KL
301#include "frame.h"
302#include "termhooks.h"
4ed46869 303
df7492f9 304Lisp_Object Vcoding_system_hash_table;
4ed46869 305
df7492f9 306Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
307Lisp_Object Qunix, Qdos;
308extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
309Lisp_Object Qbuffer_file_coding_system;
310Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 311Lisp_Object Qdefault_char;
27901516 312Lisp_Object Qno_conversion, Qundecided;
df7492f9 313Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 314Lisp_Object Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
2133e2d1 317Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
a6f87d34
KH
318Lisp_Object QCdecode_translation_table, QCencode_translation_table;
319Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 320Lisp_Object QCascii_compatible_p;
4ed46869
KH
321
322extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 323Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
324Lisp_Object Qstart_process, Qopen_network_stream;
325Lisp_Object Qtarget_idx;
326
065e3595
KH
327Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
328Lisp_Object Qinterrupted, Qinsufficient_memory;
329
c7183fb8
GM
330extern Lisp_Object Qcompletion_ignore_case;
331
44e8490d
KH
332/* If a symbol has this property, evaluate the value to define the
333 symbol as a coding system. */
334static Lisp_Object Qcoding_system_define_form;
335
5d5bf4d8
KH
336int coding_system_require_warning;
337
d46c5b12
KH
338Lisp_Object Vselect_safe_coding_system_function;
339
7722baf9
EZ
340/* Mnemonic string for each format of end-of-line. */
341Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
342/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 343 decided. */
7722baf9 344Lisp_Object eol_mnemonic_undecided;
4ed46869 345
fcbcfb64
KH
346/* Format of end-of-line decided by system. This is Qunix on
347 Unix and Mac, Qdos on DOS/Windows.
348 This has an effect only for external encoding (i.e. for output to
349 file and process), not for in-buffer or Lisp string encoding. */
350static Lisp_Object system_eol_type;
351
4ed46869
KH
352#ifdef emacs
353
4608c386
KH
354Lisp_Object Vcoding_system_list, Vcoding_system_alist;
355
356Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 357
d46c5b12
KH
358/* Coding system emacs-mule and raw-text are for converting only
359 end-of-line format. */
360Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 361Lisp_Object Qutf_8_emacs;
ecf488bc 362
4ed46869
KH
363/* Coding-systems are handed between Emacs Lisp programs and C internal
364 routines by the following three variables. */
365/* Coding-system for reading files and receiving data from process. */
366Lisp_Object Vcoding_system_for_read;
367/* Coding-system for writing files and sending data to process. */
368Lisp_Object Vcoding_system_for_write;
369/* Coding-system actually used in the latest I/O. */
370Lisp_Object Vlast_coding_system_used;
065e3595
KH
371/* Set to non-nil when an error is detected while code conversion. */
372Lisp_Object Vlast_code_conversion_error;
c4825358 373/* A vector of length 256 which contains information about special
94487c4e 374 Latin codes (especially for dealing with Microsoft codes). */
3f003981 375Lisp_Object Vlatin_extra_code_table;
c4825358 376
9ce27fde
KH
377/* Flag to inhibit code conversion of end-of-line format. */
378int inhibit_eol_conversion;
379
74383408
KH
380/* Flag to inhibit ISO2022 escape sequence detection. */
381int inhibit_iso_escape_detection;
382
97b1b294
EZ
383/* Flag to inhibit detection of binary files through null bytes. */
384int inhibit_null_byte_detection;
385
ed29121d
EZ
386/* Flag to make buffer-file-coding-system inherit from process-coding. */
387int inherit_process_coding_system;
388
c4825358
KH
389/* Coding system to be used to encode text for terminal display when
390 terminal coding system is nil. */
391struct coding_system safe_terminal_coding;
392
02ba4723
KH
393Lisp_Object Vfile_coding_system_alist;
394Lisp_Object Vprocess_coding_system_alist;
395Lisp_Object Vnetwork_coding_system_alist;
4ed46869 396
68c45bf0
PE
397Lisp_Object Vlocale_coding_system;
398
4ed46869
KH
399#endif /* emacs */
400
f967223b
KH
401/* Flag to tell if we look up translation table on character code
402 conversion. */
84fbb8a0 403Lisp_Object Venable_character_translation;
f967223b
KH
404/* Standard translation table to look up on decoding (reading). */
405Lisp_Object Vstandard_translation_table_for_decode;
406/* Standard translation table to look up on encoding (writing). */
407Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 408
f967223b
KH
409Lisp_Object Qtranslation_table;
410Lisp_Object Qtranslation_table_id;
411Lisp_Object Qtranslation_table_for_decode;
412Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
413
414/* Alist of charsets vs revision number. */
df7492f9 415static Lisp_Object Vcharset_revision_table;
4ed46869 416
02ba4723
KH
417/* Default coding systems used for process I/O. */
418Lisp_Object Vdefault_process_coding_system;
419
002fdb44
DL
420/* Char table for translating Quail and self-inserting input. */
421Lisp_Object Vtranslation_table_for_input;
422
df7492f9
KH
423/* Two special coding systems. */
424Lisp_Object Vsjis_coding_system;
425Lisp_Object Vbig5_coding_system;
426
df7492f9
KH
427/* ISO2022 section */
428
429#define CODING_ISO_INITIAL(coding, reg) \
430 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
431 coding_attr_iso_initial), \
432 reg)))
433
434
1b3b981b
AS
435#define CODING_ISO_REQUEST(coding, charset_id) \
436 (((charset_id) <= (coding)->max_charset_id \
437 ? ((coding)->safe_charsets[charset_id] != 255 \
438 ? (coding)->safe_charsets[charset_id] \
439 : -1) \
df7492f9
KH
440 : -1))
441
442
443#define CODING_ISO_FLAGS(coding) \
444 ((coding)->spec.iso_2022.flags)
445#define CODING_ISO_DESIGNATION(coding, reg) \
446 ((coding)->spec.iso_2022.current_designation[reg])
447#define CODING_ISO_INVOCATION(coding, plane) \
448 ((coding)->spec.iso_2022.current_invocation[plane])
449#define CODING_ISO_SINGLE_SHIFTING(coding) \
450 ((coding)->spec.iso_2022.single_shifting)
451#define CODING_ISO_BOL(coding) \
452 ((coding)->spec.iso_2022.bol)
453#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
455
456/* Control characters of ISO2022. */
457 /* code */ /* function */
458#define ISO_CODE_LF 0x0A /* line-feed */
459#define ISO_CODE_CR 0x0D /* carriage-return */
460#define ISO_CODE_SO 0x0E /* shift-out */
461#define ISO_CODE_SI 0x0F /* shift-in */
462#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
463#define ISO_CODE_ESC 0x1B /* escape */
464#define ISO_CODE_SS2 0x8E /* single-shift-2 */
465#define ISO_CODE_SS3 0x8F /* single-shift-3 */
466#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
467
468/* All code (1-byte) of ISO2022 is classified into one of the
469 followings. */
470enum iso_code_class_type
471 {
472 ISO_control_0, /* Control codes in the range
473 0x00..0x1F and 0x7F, except for the
474 following 5 codes. */
df7492f9
KH
475 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
476 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
477 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
478 ISO_escape, /* ISO_CODE_SO (0x1B) */
479 ISO_control_1, /* Control codes in the range
480 0x80..0x9F, except for the
481 following 3 codes. */
482 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
483 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
484 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
485 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
486 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
487 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
488 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
489 };
05e6f5dc 490
df7492f9
KH
491/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
492 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 493
df7492f9
KH
494/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
495 instead of the correct short-form sequence (e.g. ESC $ A). */
496#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 497
df7492f9
KH
498/* If set, reset graphic planes and registers at end-of-line to the
499 initial state. */
500#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 501
df7492f9
KH
502/* If set, reset graphic planes and registers before any control
503 characters to the initial state. */
504#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 505
df7492f9
KH
506/* If set, encode by 7-bit environment. */
507#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 508
df7492f9
KH
509/* If set, use locking-shift function. */
510#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 511
df7492f9
KH
512/* If set, use single-shift function. Overwrite
513 CODING_ISO_FLAG_LOCKING_SHIFT. */
514#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 515
df7492f9
KH
516/* If set, use designation escape sequence. */
517#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 518
df7492f9
KH
519/* If set, produce revision number sequence. */
520#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 521
df7492f9
KH
522/* If set, produce ISO6429's direction specifying sequence. */
523#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 524
df7492f9
KH
525/* If set, assume designation states are reset at beginning of line on
526 output. */
527#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 528
df7492f9
KH
529/* If set, designation sequence should be placed at beginning of line
530 on output. */
531#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 532
df7492f9
KH
533/* If set, do not encode unsafe charactes on output. */
534#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 535
df7492f9
KH
536/* If set, extra latin codes (128..159) are accepted as a valid code
537 on input. */
538#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 539
df7492f9 540#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 541
df7492f9 542#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 543
bf16eb23 544#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 545
bf16eb23 546#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 547
bf16eb23 548#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 549
df7492f9
KH
550/* A character to be produced on output if encoding of the original
551 character is prohibited by CODING_ISO_FLAG_SAFE. */
552#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 553
a470d443
KH
554/* UTF-8 section */
555#define CODING_UTF_8_BOM(coding) \
556 ((coding)->spec.utf_8_bom)
4ed46869 557
df7492f9
KH
558/* UTF-16 section */
559#define CODING_UTF_16_BOM(coding) \
560 ((coding)->spec.utf_16.bom)
4ed46869 561
df7492f9
KH
562#define CODING_UTF_16_ENDIAN(coding) \
563 ((coding)->spec.utf_16.endian)
4ed46869 564
df7492f9
KH
565#define CODING_UTF_16_SURROGATE(coding) \
566 ((coding)->spec.utf_16.surrogate)
4ed46869 567
4ed46869 568
df7492f9
KH
569/* CCL section */
570#define CODING_CCL_DECODER(coding) \
571 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
572#define CODING_CCL_ENCODER(coding) \
573 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
574#define CODING_CCL_VALIDS(coding) \
8f924df7 575 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 576
5a936b46 577/* Index for each coding category in `coding_categories' */
4ed46869 578
df7492f9
KH
579enum coding_category
580 {
581 coding_category_iso_7,
582 coding_category_iso_7_tight,
583 coding_category_iso_8_1,
584 coding_category_iso_8_2,
585 coding_category_iso_7_else,
586 coding_category_iso_8_else,
a470d443
KH
587 coding_category_utf_8_auto,
588 coding_category_utf_8_nosig,
589 coding_category_utf_8_sig,
df7492f9
KH
590 coding_category_utf_16_auto,
591 coding_category_utf_16_be,
592 coding_category_utf_16_le,
593 coding_category_utf_16_be_nosig,
594 coding_category_utf_16_le_nosig,
595 coding_category_charset,
596 coding_category_sjis,
597 coding_category_big5,
598 coding_category_ccl,
599 coding_category_emacs_mule,
600 /* All above are targets of code detection. */
601 coding_category_raw_text,
602 coding_category_undecided,
603 coding_category_max
604 };
605
606/* Definitions of flag bits used in detect_coding_XXXX. */
607#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
608#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
609#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
610#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
611#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
612#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
a470d443
KH
613#define CATEGORY_MASK_UTF_8_AUTO (1 << coding_category_utf_8_auto)
614#define CATEGORY_MASK_UTF_8_NOSIG (1 << coding_category_utf_8_nosig)
615#define CATEGORY_MASK_UTF_8_SIG (1 << coding_category_utf_8_sig)
b49a1807 616#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
617#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
618#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
619#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
620#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
621#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
622#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
623#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
624#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
625#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 626#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
627
628/* This value is returned if detect_coding_mask () find nothing other
629 than ASCII characters. */
630#define CATEGORY_MASK_ANY \
631 (CATEGORY_MASK_ISO_7 \
632 | CATEGORY_MASK_ISO_7_TIGHT \
633 | CATEGORY_MASK_ISO_8_1 \
634 | CATEGORY_MASK_ISO_8_2 \
635 | CATEGORY_MASK_ISO_7_ELSE \
636 | CATEGORY_MASK_ISO_8_ELSE \
a470d443
KH
637 | CATEGORY_MASK_UTF_8_AUTO \
638 | CATEGORY_MASK_UTF_8_NOSIG \
639 | CATEGORY_MASK_UTF_8_SIG \
2f3cbb32 640 | CATEGORY_MASK_UTF_16_AUTO \
df7492f9
KH
641 | CATEGORY_MASK_UTF_16_BE \
642 | CATEGORY_MASK_UTF_16_LE \
643 | CATEGORY_MASK_UTF_16_BE_NOSIG \
644 | CATEGORY_MASK_UTF_16_LE_NOSIG \
645 | CATEGORY_MASK_CHARSET \
646 | CATEGORY_MASK_SJIS \
647 | CATEGORY_MASK_BIG5 \
648 | CATEGORY_MASK_CCL \
649 | CATEGORY_MASK_EMACS_MULE)
650
651
652#define CATEGORY_MASK_ISO_7BIT \
653 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
654
655#define CATEGORY_MASK_ISO_8BIT \
656 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
657
658#define CATEGORY_MASK_ISO_ELSE \
659 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
660
661#define CATEGORY_MASK_ISO_ESCAPE \
662 (CATEGORY_MASK_ISO_7 \
663 | CATEGORY_MASK_ISO_7_TIGHT \
664 | CATEGORY_MASK_ISO_7_ELSE \
665 | CATEGORY_MASK_ISO_8_ELSE)
666
667#define CATEGORY_MASK_ISO \
668 ( CATEGORY_MASK_ISO_7BIT \
669 | CATEGORY_MASK_ISO_8BIT \
670 | CATEGORY_MASK_ISO_ELSE)
671
672#define CATEGORY_MASK_UTF_16 \
2f3cbb32
KH
673 (CATEGORY_MASK_UTF_16_AUTO \
674 | CATEGORY_MASK_UTF_16_BE \
df7492f9
KH
675 | CATEGORY_MASK_UTF_16_LE \
676 | CATEGORY_MASK_UTF_16_BE_NOSIG \
677 | CATEGORY_MASK_UTF_16_LE_NOSIG)
678
a470d443
KH
679#define CATEGORY_MASK_UTF_8 \
680 (CATEGORY_MASK_UTF_8_AUTO \
681 | CATEGORY_MASK_UTF_8_NOSIG \
682 | CATEGORY_MASK_UTF_8_SIG)
df7492f9
KH
683
684/* List of symbols `coding-category-xxx' ordered by priority. This
685 variable is exposed to Emacs Lisp. */
686static Lisp_Object Vcoding_category_list;
687
688/* Table of coding categories (Lisp symbols). This variable is for
689 internal use oly. */
690static Lisp_Object Vcoding_category_table;
691
692/* Table of coding-categories ordered by priority. */
693static enum coding_category coding_priorities[coding_category_max];
694
695/* Nth element is a coding context for the coding system bound to the
696 Nth coding category. */
697static struct coding_system coding_categories[coding_category_max];
698
df7492f9
KH
699/*** Commonly used macros and functions ***/
700
701#ifndef min
702#define min(a, b) ((a) < (b) ? (a) : (b))
703#endif
704#ifndef max
705#define max(a, b) ((a) > (b) ? (a) : (b))
706#endif
4ed46869 707
24a73b0a
KH
708#define CODING_GET_INFO(coding, attrs, charset_list) \
709 do { \
710 (attrs) = CODING_ID_ATTRS ((coding)->id); \
711 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 712 } while (0)
4ed46869 713
4ed46869 714
df7492f9
KH
715/* Safely get one byte from the source text pointed by SRC which ends
716 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
717 in the source, it jumps to `no_more_source'. If multibytep is
718 nonzero, and a multibyte character is found at SRC, set C to the
719 negative value of the character code. The caller should declare
720 and set these variables appropriately in advance:
721 src, src_end, multibytep */
aa72b389 722
065e3595
KH
723#define ONE_MORE_BYTE(c) \
724 do { \
725 if (src == src_end) \
726 { \
727 if (src_base < src) \
728 record_conversion_result \
729 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
730 goto no_more_source; \
731 } \
732 c = *src++; \
733 if (multibytep && (c & 0x80)) \
734 { \
735 if ((c & 0xFE) == 0xC0) \
736 c = ((c & 1) << 6) | *src++; \
737 else \
738 { \
35befdaa
KH
739 src--; \
740 c = - string_char (src, &src, NULL); \
065e3595
KH
741 record_conversion_result \
742 (coding, CODING_RESULT_INVALID_SRC); \
743 } \
744 } \
745 consumed_chars++; \
aa72b389
KH
746 } while (0)
747
f56a4450 748/* Safely get two bytes from the source text pointed by SRC which ends
220eeac9
KH
749 at SRC_END, and set C1 and C2 to those bytes while skipping the
750 heading multibyte characters. If there are not enough bytes in the
751 source, it jumps to `no_more_source'. If multibytep is nonzero and
752 a multibyte character is found for C2, set C2 to the negative value
753 of the character code. The caller should declare and set these
754 variables appropriately in advance:
f56a4450
KH
755 src, src_end, multibytep
756 It is intended that this macro is used in detect_coding_utf_16. */
757
220eeac9
KH
758#define TWO_MORE_BYTES(c1, c2) \
759 do { \
760 do { \
761 if (src == src_end) \
762 goto no_more_source; \
763 c1 = *src++; \
764 if (multibytep && (c1 & 0x80)) \
765 { \
766 if ((c1 & 0xFE) == 0xC0) \
767 c1 = ((c1 & 1) << 6) | *src++; \
768 else \
769 { \
770 src += BYTES_BY_CHAR_HEAD (c1) - 1; \
771 c1 = -1; \
772 } \
773 } \
774 } while (c1 < 0); \
775 if (src == src_end) \
776 goto no_more_source; \
777 c2 = *src++; \
778 if (multibytep && (c2 & 0x80)) \
779 { \
780 if ((c2 & 0xFE) == 0xC0) \
781 c2 = ((c2 & 1) << 6) | *src++; \
782 else \
783 c2 = -1; \
784 } \
f56a4450
KH
785 } while (0)
786
aa72b389 787
065e3595
KH
788#define ONE_MORE_BYTE_NO_CHECK(c) \
789 do { \
790 c = *src++; \
791 if (multibytep && (c & 0x80)) \
792 { \
793 if ((c & 0xFE) == 0xC0) \
794 c = ((c & 1) << 6) | *src++; \
795 else \
796 { \
35befdaa
KH
797 src--; \
798 c = - string_char (src, &src, NULL); \
065e3595
KH
799 record_conversion_result \
800 (coding, CODING_RESULT_INVALID_SRC); \
801 } \
802 } \
803 consumed_chars++; \
aa72b389
KH
804 } while (0)
805
aa72b389 806
df7492f9
KH
807/* Store a byte C in the place pointed by DST and increment DST to the
808 next free point, and increment PRODUCED_CHARS. The caller should
809 assure that C is 0..127, and declare and set the variable `dst'
810 appropriately in advance.
811*/
aa72b389
KH
812
813
df7492f9
KH
814#define EMIT_ONE_ASCII_BYTE(c) \
815 do { \
816 produced_chars++; \
817 *dst++ = (c); \
b6871cc7 818 } while (0)
aa72b389
KH
819
820
df7492f9 821/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 822
df7492f9
KH
823#define EMIT_TWO_ASCII_BYTES(c1, c2) \
824 do { \
825 produced_chars += 2; \
826 *dst++ = (c1), *dst++ = (c2); \
827 } while (0)
aa72b389
KH
828
829
df7492f9
KH
830/* Store a byte C in the place pointed by DST and increment DST to the
831 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
832 nonzero, store in an appropriate multibyte from. The caller should
833 declare and set the variables `dst' and `multibytep' appropriately
834 in advance. */
835
836#define EMIT_ONE_BYTE(c) \
837 do { \
838 produced_chars++; \
839 if (multibytep) \
840 { \
841 int ch = (c); \
842 if (ch >= 0x80) \
843 ch = BYTE8_TO_CHAR (ch); \
844 CHAR_STRING_ADVANCE (ch, dst); \
845 } \
846 else \
847 *dst++ = (c); \
aa72b389 848 } while (0)
aa72b389 849
aa72b389 850
df7492f9 851/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 852
e19c3639
KH
853#define EMIT_TWO_BYTES(c1, c2) \
854 do { \
855 produced_chars += 2; \
856 if (multibytep) \
857 { \
858 int ch; \
859 \
860 ch = (c1); \
861 if (ch >= 0x80) \
862 ch = BYTE8_TO_CHAR (ch); \
863 CHAR_STRING_ADVANCE (ch, dst); \
864 ch = (c2); \
865 if (ch >= 0x80) \
866 ch = BYTE8_TO_CHAR (ch); \
867 CHAR_STRING_ADVANCE (ch, dst); \
868 } \
869 else \
870 { \
871 *dst++ = (c1); \
872 *dst++ = (c2); \
873 } \
aa72b389
KH
874 } while (0)
875
876
df7492f9
KH
877#define EMIT_THREE_BYTES(c1, c2, c3) \
878 do { \
879 EMIT_ONE_BYTE (c1); \
880 EMIT_TWO_BYTES (c2, c3); \
881 } while (0)
aa72b389 882
aa72b389 883
df7492f9
KH
884#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
885 do { \
886 EMIT_TWO_BYTES (c1, c2); \
887 EMIT_TWO_BYTES (c3, c4); \
888 } while (0)
aa72b389 889
aa72b389 890
f6cbaf43
KH
891/* Prototypes for static functions. */
892static void record_conversion_result P_ ((struct coding_system *coding,
893 enum coding_result_code result));
894static int detect_coding_utf_8 P_ ((struct coding_system *,
895 struct coding_detection_info *info));
896static void decode_coding_utf_8 P_ ((struct coding_system *));
897static int encode_coding_utf_8 P_ ((struct coding_system *));
898
899static int detect_coding_utf_16 P_ ((struct coding_system *,
900 struct coding_detection_info *info));
901static void decode_coding_utf_16 P_ ((struct coding_system *));
902static int encode_coding_utf_16 P_ ((struct coding_system *));
903
904static int detect_coding_iso_2022 P_ ((struct coding_system *,
905 struct coding_detection_info *info));
906static void decode_coding_iso_2022 P_ ((struct coding_system *));
907static int encode_coding_iso_2022 P_ ((struct coding_system *));
908
909static int detect_coding_emacs_mule P_ ((struct coding_system *,
910 struct coding_detection_info *info));
911static void decode_coding_emacs_mule P_ ((struct coding_system *));
912static int encode_coding_emacs_mule P_ ((struct coding_system *));
913
914static int detect_coding_sjis P_ ((struct coding_system *,
915 struct coding_detection_info *info));
916static void decode_coding_sjis P_ ((struct coding_system *));
917static int encode_coding_sjis P_ ((struct coding_system *));
918
919static int detect_coding_big5 P_ ((struct coding_system *,
920 struct coding_detection_info *info));
921static void decode_coding_big5 P_ ((struct coding_system *));
922static int encode_coding_big5 P_ ((struct coding_system *));
923
924static int detect_coding_ccl P_ ((struct coding_system *,
925 struct coding_detection_info *info));
926static void decode_coding_ccl P_ ((struct coding_system *));
927static int encode_coding_ccl P_ ((struct coding_system *));
928
929static void decode_coding_raw_text P_ ((struct coding_system *));
930static int encode_coding_raw_text P_ ((struct coding_system *));
931
932static void coding_set_source P_ ((struct coding_system *));
933static void coding_set_destination P_ ((struct coding_system *));
934static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
935static void coding_alloc_by_making_gap P_ ((struct coding_system *,
287c57d7 936 EMACS_INT, EMACS_INT));
f6cbaf43
KH
937static unsigned char *alloc_destination P_ ((struct coding_system *,
938 EMACS_INT, unsigned char *));
939static void setup_iso_safe_charsets P_ ((Lisp_Object));
940static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
941 int *, int *,
942 unsigned char *));
943static int detect_eol P_ ((const unsigned char *,
944 EMACS_INT, enum coding_category));
945static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
946static void decode_eol P_ ((struct coding_system *));
947static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
948static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
949 int, int *, int *));
950static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
951static INLINE void produce_composition P_ ((struct coding_system *, int *,
952 EMACS_INT));
953static INLINE void produce_charset P_ ((struct coding_system *, int *,
954 EMACS_INT));
955static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
956static int decode_coding P_ ((struct coding_system *));
957static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
3ed051d4 958 struct coding_system *,
f6cbaf43
KH
959 int *, EMACS_INT *));
960static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
961 struct coding_system *,
962 int *, EMACS_INT *));
963static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
964static int encode_coding P_ ((struct coding_system *));
965static Lisp_Object make_conversion_work_buffer P_ ((int));
966static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
967static INLINE int char_encodable_p P_ ((int, Lisp_Object));
968static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
969
065e3595
KH
970static void
971record_conversion_result (struct coding_system *coding,
972 enum coding_result_code result)
973{
974 coding->result = result;
975 switch (result)
976 {
977 case CODING_RESULT_INSUFFICIENT_SRC:
978 Vlast_code_conversion_error = Qinsufficient_source;
979 break;
980 case CODING_RESULT_INCONSISTENT_EOL:
981 Vlast_code_conversion_error = Qinconsistent_eol;
982 break;
983 case CODING_RESULT_INVALID_SRC:
984 Vlast_code_conversion_error = Qinvalid_source;
985 break;
986 case CODING_RESULT_INTERRUPT:
987 Vlast_code_conversion_error = Qinterrupted;
988 break;
989 case CODING_RESULT_INSUFFICIENT_MEM:
990 Vlast_code_conversion_error = Qinsufficient_memory;
991 break;
35befdaa
KH
992 default:
993 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
994 }
995}
996
df7492f9
KH
997#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
998 do { \
999 charset_map_loaded = 0; \
1000 c = DECODE_CHAR (charset, code); \
1001 if (charset_map_loaded) \
1002 { \
8f924df7 1003 const unsigned char *orig = coding->source; \
df7492f9
KH
1004 EMACS_INT offset; \
1005 \
1006 coding_set_source (coding); \
1007 offset = coding->source - orig; \
1008 src += offset; \
1009 src_base += offset; \
1010 src_end += offset; \
1011 } \
aa72b389
KH
1012 } while (0)
1013
1014
119852e7
KH
1015/* If there are at least BYTES length of room at dst, allocate memory
1016 for coding->destination and update dst and dst_end. We don't have
1017 to take care of coding->source which will be relocated. It is
1018 handled by calling coding_set_source in encode_coding. */
1019
df7492f9
KH
1020#define ASSURE_DESTINATION(bytes) \
1021 do { \
1022 if (dst + (bytes) >= dst_end) \
1023 { \
1024 int more_bytes = charbuf_end - charbuf + (bytes); \
1025 \
1026 dst = alloc_destination (coding, more_bytes, dst); \
1027 dst_end = coding->destination + coding->dst_bytes; \
1028 } \
1029 } while (0)
aa72b389 1030
aa72b389 1031
db274c7a
KH
1032/* Store multibyte form of the character C in P, and advance P to the
1033 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
1034 never calls MAYBE_UNIFY_CHAR. */
1035
1036#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1037 do { \
1038 if ((c) <= MAX_1_BYTE_CHAR) \
1039 *(p)++ = (c); \
1040 else if ((c) <= MAX_2_BYTE_CHAR) \
1041 *(p)++ = (0xC0 | ((c) >> 6)), \
1042 *(p)++ = (0x80 | ((c) & 0x3F)); \
1043 else if ((c) <= MAX_3_BYTE_CHAR) \
1044 *(p)++ = (0xE0 | ((c) >> 12)), \
1045 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1046 *(p)++ = (0x80 | ((c) & 0x3F)); \
1047 else if ((c) <= MAX_4_BYTE_CHAR) \
1048 *(p)++ = (0xF0 | (c >> 18)), \
1049 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1050 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1051 *(p)++ = (0x80 | (c & 0x3F)); \
1052 else if ((c) <= MAX_5_BYTE_CHAR) \
1053 *(p)++ = 0xF8, \
1054 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1055 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1056 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1057 *(p)++ = (0x80 | (c & 0x3F)); \
1058 else \
1059 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1060 } while (0)
1061
1062
1063/* Return the character code of character whose multibyte form is at
1064 P, and advance P to the end of the multibyte form. This is like
1065 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1066
1067#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1068 (!((p)[0] & 0x80) \
1069 ? *(p)++ \
1070 : ! ((p)[0] & 0x20) \
1071 ? ((p) += 2, \
1072 ((((p)[-2] & 0x1F) << 6) \
1073 | ((p)[-1] & 0x3F) \
1074 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1075 : ! ((p)[0] & 0x10) \
1076 ? ((p) += 3, \
1077 ((((p)[-3] & 0x0F) << 12) \
1078 | (((p)[-2] & 0x3F) << 6) \
1079 | ((p)[-1] & 0x3F))) \
1080 : ! ((p)[0] & 0x08) \
1081 ? ((p) += 4, \
1082 ((((p)[-4] & 0xF) << 18) \
1083 | (((p)[-3] & 0x3F) << 12) \
1084 | (((p)[-2] & 0x3F) << 6) \
1085 | ((p)[-1] & 0x3F))) \
1086 : ((p) += 5, \
1087 ((((p)[-4] & 0x3F) << 18) \
1088 | (((p)[-3] & 0x3F) << 12) \
1089 | (((p)[-2] & 0x3F) << 6) \
1090 | ((p)[-1] & 0x3F))))
1091
aa72b389 1092
df7492f9
KH
1093static void
1094coding_set_source (coding)
aa72b389 1095 struct coding_system *coding;
aa72b389 1096{
df7492f9
KH
1097 if (BUFFERP (coding->src_object))
1098 {
2cb26057 1099 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 1100
df7492f9 1101 if (coding->src_pos < 0)
2cb26057 1102 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 1103 else
2cb26057 1104 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 1105 }
df7492f9 1106 else if (STRINGP (coding->src_object))
aa72b389 1107 {
8f924df7 1108 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 1109 }
df7492f9
KH
1110 else
1111 /* Otherwise, the source is C string and is never relocated
1112 automatically. Thus we don't have to update anything. */
1113 ;
1114}
aa72b389 1115
df7492f9
KH
1116static void
1117coding_set_destination (coding)
1118 struct coding_system *coding;
1119{
1120 if (BUFFERP (coding->dst_object))
aa72b389 1121 {
df7492f9 1122 if (coding->src_pos < 0)
aa72b389 1123 {
13818c30 1124 coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
28f67a95
KH
1125 coding->dst_bytes = (GAP_END_ADDR
1126 - (coding->src_bytes - coding->consumed)
1127 - coding->destination);
aa72b389 1128 }
df7492f9 1129 else
28f67a95
KH
1130 {
1131 /* We are sure that coding->dst_pos_byte is before the gap
1132 of the buffer. */
1133 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
13818c30 1134 + coding->dst_pos_byte - BEG_BYTE);
28f67a95
KH
1135 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1136 - coding->destination);
1137 }
df7492f9
KH
1138 }
1139 else
1140 /* Otherwise, the destination is C string and is never relocated
1141 automatically. Thus we don't have to update anything. */
1142 ;
1143}
1144
1145
1146static void
1147coding_alloc_by_realloc (coding, bytes)
1148 struct coding_system *coding;
1149 EMACS_INT bytes;
1150{
1151 coding->destination = (unsigned char *) xrealloc (coding->destination,
1152 coding->dst_bytes + bytes);
1153 coding->dst_bytes += bytes;
1154}
1155
1156static void
db274c7a 1157coding_alloc_by_making_gap (coding, gap_head_used, bytes)
df7492f9 1158 struct coding_system *coding;
db274c7a 1159 EMACS_INT gap_head_used, bytes;
df7492f9 1160{
db274c7a 1161 if (EQ (coding->src_object, coding->dst_object))
df7492f9 1162 {
db274c7a
KH
1163 /* The gap may contain the produced data at the head and not-yet
1164 consumed data at the tail. To preserve those data, we at
1165 first make the gap size to zero, then increase the gap
1166 size. */
1167 EMACS_INT add = GAP_SIZE;
1168
1169 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1170 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
df7492f9
KH
1171 make_gap (bytes);
1172 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
db274c7a 1173 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
df7492f9 1174 }
730fff51 1175 else
df7492f9 1176 {
2c78b7e1
KH
1177 Lisp_Object this_buffer;
1178
1179 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1180 set_buffer_internal (XBUFFER (coding->dst_object));
1181 make_gap (bytes);
1182 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1183 }
df7492f9 1184}
8f924df7 1185
df7492f9
KH
1186
1187static unsigned char *
1188alloc_destination (coding, nbytes, dst)
1189 struct coding_system *coding;
3e139625 1190 EMACS_INT nbytes;
df7492f9
KH
1191 unsigned char *dst;
1192{
1193 EMACS_INT offset = dst - coding->destination;
1194
1195 if (BUFFERP (coding->dst_object))
db274c7a
KH
1196 {
1197 struct buffer *buf = XBUFFER (coding->dst_object);
1198
1199 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1200 }
aa72b389 1201 else
df7492f9 1202 coding_alloc_by_realloc (coding, nbytes);
065e3595 1203 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1204 coding_set_destination (coding);
1205 dst = coding->destination + offset;
1206 return dst;
1207}
aa72b389 1208
ff0dacd7
KH
1209/** Macros for annotations. */
1210
1211/* Maximum length of annotation data (sum of annotations for
1212 composition and charset). */
69a80ea3 1213#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1214
1215/* An annotation data is stored in the array coding->charbuf in this
1216 format:
69a80ea3 1217 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1218 LENGTH is the number of elements in the annotation.
1219 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1220 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1221
1222 The format of the following elements depend on ANNOTATION_MASK.
1223
1224 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225 follows:
1226 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1227 METHOD is one of enum composition_method.
1228 Optionnal COMPOSITION-COMPONENTS are characters and composition
1229 rules.
1230
1231 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1232 follows. */
1233
69a80ea3 1234#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1235 do { \
1236 *(buf)++ = -(len); \
1237 *(buf)++ = (mask); \
69a80ea3 1238 *(buf)++ = (nchars); \
ff0dacd7
KH
1239 coding->annotated = 1; \
1240 } while (0);
1241
69a80ea3
KH
1242#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1243 do { \
1244 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1245 *buf++ = method; \
ff0dacd7
KH
1246 } while (0)
1247
1248
69a80ea3
KH
1249#define ADD_CHARSET_DATA(buf, nchars, id) \
1250 do { \
1251 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1252 *buf++ = id; \
ff0dacd7
KH
1253 } while (0)
1254
df7492f9
KH
1255\f
1256/*** 2. Emacs' internal format (emacs-utf-8) ***/
1257
1258
1259
1260\f
1261/*** 3. UTF-8 ***/
1262
1263/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1264 Check if a text is encoded in UTF-8. If it is, return 1, else
1265 return 0. */
df7492f9
KH
1266
1267#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1268#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1269#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1270#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1271#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1272#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1273
a470d443
KH
1274#define UTF_BOM 0xFEFF
1275#define UTF_8_BOM_1 0xEF
1276#define UTF_8_BOM_2 0xBB
1277#define UTF_8_BOM_3 0xBF
1278
df7492f9 1279static int
ff0dacd7 1280detect_coding_utf_8 (coding, detect_info)
df7492f9 1281 struct coding_system *coding;
ff0dacd7 1282 struct coding_detection_info *detect_info;
df7492f9 1283{
065e3595 1284 const unsigned char *src = coding->source, *src_base;
8f924df7 1285 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1286 int multibytep = coding->src_multibyte;
1287 int consumed_chars = 0;
a470d443 1288 int bom_found = 0;
df7492f9
KH
1289 int found = 0;
1290
ff0dacd7 1291 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1292 /* A coding system of this category is always ASCII compatible. */
1293 src += coding->head_ascii;
1294
1295 while (1)
aa72b389 1296 {
df7492f9 1297 int c, c1, c2, c3, c4;
aa72b389 1298
065e3595 1299 src_base = src;
df7492f9 1300 ONE_MORE_BYTE (c);
065e3595 1301 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1302 continue;
1303 ONE_MORE_BYTE (c1);
065e3595 1304 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1305 break;
1306 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1307 {
a470d443 1308 found = 1;
df7492f9 1309 continue;
aa72b389 1310 }
df7492f9 1311 ONE_MORE_BYTE (c2);
065e3595 1312 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1313 break;
1314 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1315 {
a470d443
KH
1316 found = 1;
1317 if (src_base == coding->source
1318 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1319 bom_found = 1;
df7492f9 1320 continue;
aa72b389 1321 }
df7492f9 1322 ONE_MORE_BYTE (c3);
065e3595 1323 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1324 break;
1325 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1326 {
a470d443 1327 found = 1;
df7492f9
KH
1328 continue;
1329 }
1330 ONE_MORE_BYTE (c4);
065e3595 1331 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1332 break;
1333 if (UTF_8_5_OCTET_LEADING_P (c))
1334 {
a470d443 1335 found = 1;
df7492f9
KH
1336 continue;
1337 }
1338 break;
aa72b389 1339 }
ff0dacd7 1340 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1341 return 0;
aa72b389 1342
df7492f9 1343 no_more_source:
065e3595 1344 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1345 {
ff0dacd7 1346 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1347 return 0;
aa72b389 1348 }
a470d443
KH
1349 if (bom_found)
1350 {
1351 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1352 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1353 }
1354 else
1355 {
1356 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
0e17387a
KH
1357 if (found)
1358 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
a470d443 1359 }
ff0dacd7 1360 return 1;
aa72b389
KH
1361}
1362
4ed46869 1363
b73bfc1c 1364static void
df7492f9 1365decode_coding_utf_8 (coding)
b73bfc1c 1366 struct coding_system *coding;
b73bfc1c 1367{
8f924df7
KH
1368 const unsigned char *src = coding->source + coding->consumed;
1369 const unsigned char *src_end = coding->source + coding->src_bytes;
1370 const unsigned char *src_base;
69a80ea3
KH
1371 int *charbuf = coding->charbuf + coding->charbuf_used;
1372 int *charbuf_end = coding->charbuf + coding->charbuf_size;
453b38f0 1373 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1374 int multibytep = coding->src_multibyte;
a470d443 1375 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
24a73b0a 1376 Lisp_Object attr, charset_list;
119852e7
KH
1377 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1378 int byte_after_cr = -1;
4ed46869 1379
24a73b0a 1380 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1381
a470d443
KH
1382 if (bom != utf_without_bom)
1383 {
1384 int c1, c2, c3;
1385
1386 src_base = src;
1387 ONE_MORE_BYTE (c1);
1388 if (! UTF_8_3_OCTET_LEADING_P (c1))
1389 src = src_base;
1390 else
1391 {
159bd5a2 1392 ONE_MORE_BYTE (c2);
a470d443
KH
1393 if (! UTF_8_EXTRA_OCTET_P (c2))
1394 src = src_base;
1395 else
1396 {
159bd5a2 1397 ONE_MORE_BYTE (c3);
a470d443
KH
1398 if (! UTF_8_EXTRA_OCTET_P (c3))
1399 src = src_base;
1400 else
1401 {
1402 if ((c1 != UTF_8_BOM_1)
1403 || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1404 src = src_base;
1405 else
1406 CODING_UTF_8_BOM (coding) = utf_without_bom;
1407 }
1408 }
1409 }
1410 }
1411 CODING_UTF_8_BOM (coding) = utf_without_bom;
1412
1413
1414
df7492f9 1415 while (1)
b73bfc1c 1416 {
df7492f9 1417 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1418
df7492f9
KH
1419 src_base = src;
1420 consumed_chars_base = consumed_chars;
4af310db 1421
df7492f9 1422 if (charbuf >= charbuf_end)
b71f6f73
KH
1423 {
1424 if (byte_after_cr >= 0)
1425 src_base--;
1426 break;
1427 }
df7492f9 1428
119852e7
KH
1429 if (byte_after_cr >= 0)
1430 c1 = byte_after_cr, byte_after_cr = -1;
1431 else
1432 ONE_MORE_BYTE (c1);
065e3595
KH
1433 if (c1 < 0)
1434 {
1435 c = - c1;
1436 }
1437 else if (UTF_8_1_OCTET_P(c1))
df7492f9 1438 {
119852e7
KH
1439 if (eol_crlf && c1 == '\r')
1440 ONE_MORE_BYTE (byte_after_cr);
df7492f9 1441 c = c1;
4af310db 1442 }
df7492f9 1443 else
4af310db 1444 {
df7492f9 1445 ONE_MORE_BYTE (c2);
065e3595 1446 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1447 goto invalid_code;
1448 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1449 {
b0edb2c5
DL
1450 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1451 /* Reject overlong sequences here and below. Encoders
1452 producing them are incorrect, they can be misleading,
1453 and they mess up read/write invariance. */
1454 if (c < 128)
1455 goto invalid_code;
4af310db 1456 }
df7492f9 1457 else
aa72b389 1458 {
df7492f9 1459 ONE_MORE_BYTE (c3);
065e3595 1460 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1461 goto invalid_code;
1462 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1463 {
1464 c = (((c1 & 0xF) << 12)
1465 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1466 if (c < 0x800
1467 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1468 goto invalid_code;
1469 }
df7492f9
KH
1470 else
1471 {
1472 ONE_MORE_BYTE (c4);
065e3595 1473 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1474 goto invalid_code;
1475 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1476 {
df7492f9
KH
1477 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1478 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1479 if (c < 0x10000)
1480 goto invalid_code;
1481 }
df7492f9
KH
1482 else
1483 {
1484 ONE_MORE_BYTE (c5);
065e3595 1485 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1486 goto invalid_code;
1487 if (UTF_8_5_OCTET_LEADING_P (c1))
1488 {
1489 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1490 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1491 | (c5 & 0x3F));
b0edb2c5 1492 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1493 goto invalid_code;
1494 }
1495 else
1496 goto invalid_code;
1497 }
1498 }
aa72b389 1499 }
b73bfc1c 1500 }
df7492f9
KH
1501
1502 *charbuf++ = c;
1503 continue;
1504
1505 invalid_code:
1506 src = src_base;
1507 consumed_chars = consumed_chars_base;
1508 ONE_MORE_BYTE (c);
1509 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1510 coding->errors++;
aa72b389
KH
1511 }
1512
df7492f9
KH
1513 no_more_source:
1514 coding->consumed_char += consumed_chars_base;
1515 coding->consumed = src_base - coding->source;
1516 coding->charbuf_used = charbuf - coding->charbuf;
1517}
1518
1519
1520static int
1521encode_coding_utf_8 (coding)
1522 struct coding_system *coding;
1523{
1524 int multibytep = coding->dst_multibyte;
1525 int *charbuf = coding->charbuf;
1526 int *charbuf_end = charbuf + coding->charbuf_used;
1527 unsigned char *dst = coding->destination + coding->produced;
1528 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1529 int produced_chars = 0;
df7492f9
KH
1530 int c;
1531
a470d443
KH
1532 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1533 {
1534 ASSURE_DESTINATION (3);
1535 EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1536 CODING_UTF_8_BOM (coding) = utf_without_bom;
1537 }
1538
df7492f9 1539 if (multibytep)
aa72b389 1540 {
df7492f9
KH
1541 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1542
1543 while (charbuf < charbuf_end)
b73bfc1c 1544 {
df7492f9 1545 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1546
df7492f9
KH
1547 ASSURE_DESTINATION (safe_room);
1548 c = *charbuf++;
28f67a95
KH
1549 if (CHAR_BYTE8_P (c))
1550 {
1551 c = CHAR_TO_BYTE8 (c);
1552 EMIT_ONE_BYTE (c);
1553 }
1554 else
1555 {
db274c7a 1556 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
28f67a95
KH
1557 for (p = str; p < pend; p++)
1558 EMIT_ONE_BYTE (*p);
1559 }
b73bfc1c 1560 }
aa72b389 1561 }
df7492f9
KH
1562 else
1563 {
1564 int safe_room = MAX_MULTIBYTE_LENGTH;
1565
1566 while (charbuf < charbuf_end)
b73bfc1c 1567 {
df7492f9
KH
1568 ASSURE_DESTINATION (safe_room);
1569 c = *charbuf++;
f03caae0
KH
1570 if (CHAR_BYTE8_P (c))
1571 *dst++ = CHAR_TO_BYTE8 (c);
1572 else
db274c7a 1573 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
df7492f9 1574 produced_chars++;
4ed46869
KH
1575 }
1576 }
065e3595 1577 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1578 coding->produced_char += produced_chars;
1579 coding->produced = dst - coding->destination;
1580 return 0;
4ed46869
KH
1581}
1582
b73bfc1c 1583
df7492f9 1584/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1585 Check if a text is encoded in one of UTF-16 based coding systems.
1586 If it is, return 1, else return 0. */
aa72b389 1587
df7492f9
KH
1588#define UTF_16_HIGH_SURROGATE_P(val) \
1589 (((val) & 0xFC00) == 0xD800)
1590
1591#define UTF_16_LOW_SURROGATE_P(val) \
1592 (((val) & 0xFC00) == 0xDC00)
93dec019 1593
df7492f9
KH
1594#define UTF_16_INVALID_P(val) \
1595 (((val) == 0xFFFE) \
1596 || ((val) == 0xFFFF) \
1597 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1598
aa72b389 1599
df7492f9 1600static int
ff0dacd7 1601detect_coding_utf_16 (coding, detect_info)
aa72b389 1602 struct coding_system *coding;
ff0dacd7 1603 struct coding_detection_info *detect_info;
aa72b389 1604{
8f924df7
KH
1605 const unsigned char *src = coding->source, *src_base = src;
1606 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1607 int multibytep = coding->src_multibyte;
1608 int consumed_chars = 0;
1609 int c1, c2;
aa72b389 1610
ff0dacd7 1611 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1612 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1613 && (coding->src_chars & 1))
ff0dacd7
KH
1614 {
1615 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1616 return 0;
1617 }
24a73b0a 1618
f56a4450 1619 TWO_MORE_BYTES (c1, c2);
df7492f9 1620 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1621 {
b49a1807
KH
1622 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1623 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1624 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1625 | CATEGORY_MASK_UTF_16_BE_NOSIG
1626 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1627 }
df7492f9 1628 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1629 {
b49a1807
KH
1630 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1631 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1632 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1633 | CATEGORY_MASK_UTF_16_BE_NOSIG
1634 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1635 }
220eeac9 1636 else if (c2 < 0)
f56a4450
KH
1637 {
1638 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1639 return 0;
1640 }
2f3cbb32 1641 else
24a73b0a 1642 {
2f3cbb32
KH
1643 /* We check the dispersion of Eth and Oth bytes where E is even and
1644 O is odd. If both are high, we assume binary data.*/
1645 unsigned char e[256], o[256];
1646 unsigned e_num = 1, o_num = 1;
1647
1648 memset (e, 0, 256);
1649 memset (o, 0, 256);
1650 e[c1] = 1;
1651 o[c2] = 1;
1652
24a73b0a
KH
1653 detect_info->rejected
1654 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
2f3cbb32
KH
1655
1656 while (1)
1657 {
f56a4450 1658 TWO_MORE_BYTES (c1, c2);
220eeac9 1659 if (c2 < 0)
f56a4450 1660 break;
2f3cbb32
KH
1661 if (! e[c1])
1662 {
1663 e[c1] = 1;
1664 e_num++;
1665 if (e_num >= 128)
1666 break;
1667 }
1668 if (! o[c2])
1669 {
1670 o[c1] = 1;
1671 o_num++;
1672 if (o_num >= 128)
1673 break;
1674 }
1675 }
1676 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1677 return 0;
ff0dacd7 1678 }
2f3cbb32 1679
df7492f9 1680 no_more_source:
ff0dacd7 1681 return 1;
df7492f9 1682}
aa72b389 1683
df7492f9
KH
1684static void
1685decode_coding_utf_16 (coding)
1686 struct coding_system *coding;
1687{
8f924df7
KH
1688 const unsigned char *src = coding->source + coding->consumed;
1689 const unsigned char *src_end = coding->source + coding->src_bytes;
1690 const unsigned char *src_base;
69a80ea3
KH
1691 int *charbuf = coding->charbuf + coding->charbuf_used;
1692 int *charbuf_end = coding->charbuf + coding->charbuf_size;
3a8406e1 1693 int consumed_chars = 0, consumed_chars_base = 0;
df7492f9 1694 int multibytep = coding->src_multibyte;
a470d443 1695 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1696 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1697 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1698 Lisp_Object attr, charset_list;
119852e7
KH
1699 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1700 int byte_after_cr1 = -1, byte_after_cr2 = -1;
df7492f9 1701
24a73b0a 1702 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1703
a470d443 1704 if (bom == utf_with_bom)
aa72b389 1705 {
df7492f9 1706 int c, c1, c2;
4af310db 1707
aa72b389 1708 src_base = src;
df7492f9
KH
1709 ONE_MORE_BYTE (c1);
1710 ONE_MORE_BYTE (c2);
e19c3639 1711 c = (c1 << 8) | c2;
aa72b389 1712
b49a1807
KH
1713 if (endian == utf_16_big_endian
1714 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1715 {
b49a1807
KH
1716 /* The first two bytes are not BOM. Treat them as bytes
1717 for a normal character. */
1718 src = src_base;
1719 coding->errors++;
aa72b389 1720 }
a470d443 1721 CODING_UTF_16_BOM (coding) = utf_without_bom;
b49a1807 1722 }
a470d443 1723 else if (bom == utf_detect_bom)
b49a1807
KH
1724 {
1725 /* We have already tried to detect BOM and failed in
1726 detect_coding. */
a470d443 1727 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9 1728 }
aa72b389 1729
df7492f9
KH
1730 while (1)
1731 {
1732 int c, c1, c2;
1733
1734 src_base = src;
1735 consumed_chars_base = consumed_chars;
1736
1737 if (charbuf + 2 >= charbuf_end)
b71f6f73
KH
1738 {
1739 if (byte_after_cr1 >= 0)
1740 src_base -= 2;
1741 break;
1742 }
df7492f9 1743
119852e7
KH
1744 if (byte_after_cr1 >= 0)
1745 c1 = byte_after_cr1, byte_after_cr1 = -1;
1746 else
1747 ONE_MORE_BYTE (c1);
065e3595
KH
1748 if (c1 < 0)
1749 {
1750 *charbuf++ = -c1;
1751 continue;
1752 }
119852e7
KH
1753 if (byte_after_cr2 >= 0)
1754 c2 = byte_after_cr2, byte_after_cr2 = -1;
1755 else
1756 ONE_MORE_BYTE (c2);
065e3595
KH
1757 if (c2 < 0)
1758 {
1759 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1760 *charbuf++ = -c2;
1761 continue;
1762 }
df7492f9 1763 c = (endian == utf_16_big_endian
e19c3639 1764 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
119852e7 1765
df7492f9 1766 if (surrogate)
fd3ae0b9 1767 {
df7492f9 1768 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1769 {
df7492f9
KH
1770 if (endian == utf_16_big_endian)
1771 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1772 else
1773 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1774 *charbuf++ = c1;
1775 *charbuf++ = c2;
1776 coding->errors++;
1777 if (UTF_16_HIGH_SURROGATE_P (c))
1778 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1779 else
df7492f9 1780 *charbuf++ = c;
fd3ae0b9
KH
1781 }
1782 else
df7492f9
KH
1783 {
1784 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1785 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1786 *charbuf++ = 0x10000 + c;
df7492f9 1787 }
fd3ae0b9 1788 }
aa72b389 1789 else
df7492f9
KH
1790 {
1791 if (UTF_16_HIGH_SURROGATE_P (c))
1792 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1793 else
119852e7
KH
1794 {
1795 if (eol_crlf && c == '\r')
1796 {
1797 ONE_MORE_BYTE (byte_after_cr1);
1798 ONE_MORE_BYTE (byte_after_cr2);
1799 }
1800 *charbuf++ = c;
1801 }
8f924df7 1802 }
aa72b389 1803 }
df7492f9
KH
1804
1805 no_more_source:
1806 coding->consumed_char += consumed_chars_base;
1807 coding->consumed = src_base - coding->source;
1808 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1809}
b73bfc1c 1810
df7492f9
KH
1811static int
1812encode_coding_utf_16 (coding)
1813 struct coding_system *coding;
1814{
1815 int multibytep = coding->dst_multibyte;
1816 int *charbuf = coding->charbuf;
1817 int *charbuf_end = charbuf + coding->charbuf_used;
1818 unsigned char *dst = coding->destination + coding->produced;
1819 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1820 int safe_room = 8;
a470d443 1821 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
df7492f9
KH
1822 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1823 int produced_chars = 0;
24a73b0a 1824 Lisp_Object attrs, charset_list;
df7492f9 1825 int c;
4ed46869 1826
24a73b0a 1827 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1828
a470d443 1829 if (bom != utf_without_bom)
df7492f9
KH
1830 {
1831 ASSURE_DESTINATION (safe_room);
1832 if (big_endian)
df7492f9 1833 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1834 else
1835 EMIT_TWO_BYTES (0xFF, 0xFE);
a470d443 1836 CODING_UTF_16_BOM (coding) = utf_without_bom;
df7492f9
KH
1837 }
1838
1839 while (charbuf < charbuf_end)
1840 {
1841 ASSURE_DESTINATION (safe_room);
1842 c = *charbuf++;
e19c3639
KH
1843 if (c >= MAX_UNICODE_CHAR)
1844 c = coding->default_char;
df7492f9
KH
1845
1846 if (c < 0x10000)
1847 {
1848 if (big_endian)
1849 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1850 else
1851 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1852 }
1853 else
1854 {
1855 int c1, c2;
1856
1857 c -= 0x10000;
1858 c1 = (c >> 10) + 0xD800;
1859 c2 = (c & 0x3FF) + 0xDC00;
1860 if (big_endian)
1861 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1862 else
1863 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1864 }
1865 }
065e3595 1866 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1867 coding->produced = dst - coding->destination;
1868 coding->produced_char += produced_chars;
1869 return 0;
1870}
1871
1872\f
1873/*** 6. Old Emacs' internal format (emacs-mule) ***/
1874
1875/* Emacs' internal format for representation of multiple character
1876 sets is a kind of multi-byte encoding, i.e. characters are
1877 represented by variable-length sequences of one-byte codes.
1878
1879 ASCII characters and control characters (e.g. `tab', `newline') are
1880 represented by one-byte sequences which are their ASCII codes, in
1881 the range 0x00 through 0x7F.
1882
1883 8-bit characters of the range 0x80..0x9F are represented by
1884 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1885 code + 0x20).
1886
1887 8-bit characters of the range 0xA0..0xFF are represented by
1888 one-byte sequences which are their 8-bit code.
1889
1890 The other characters are represented by a sequence of `base
1891 leading-code', optional `extended leading-code', and one or two
1892 `position-code's. The length of the sequence is determined by the
1893 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1894 whereas extended leading-code and position-code take the range 0xA0
1895 through 0xFF. See `charset.h' for more details about leading-code
1896 and position-code.
1897
1898 --- CODE RANGE of Emacs' internal format ---
1899 character set range
1900 ------------- -----
1901 ascii 0x00..0x7F
1902 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1903 eight-bit-graphic 0xA0..0xBF
1904 ELSE 0x81..0x9D + [0xA0..0xFF]+
1905 ---------------------------------------------
1906
1907 As this is the internal character representation, the format is
1908 usually not used externally (i.e. in a file or in a data sent to a
1909 process). But, it is possible to have a text externally in this
1910 format (i.e. by encoding by the coding system `emacs-mule').
1911
1912 In that case, a sequence of one-byte codes has a slightly different
1913 form.
1914
1915 At first, all characters in eight-bit-control are represented by
1916 one-byte sequences which are their 8-bit code.
1917
1918 Next, character composition data are represented by the byte
1919 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1920 where,
1921 METHOD is 0xF0 plus one of composition method (enum
1922 composition_method),
1923
1924 BYTES is 0xA0 plus a byte length of this composition data,
1925
1926 CHARS is 0x20 plus a number of characters composed by this
1927 data,
1928
1929 COMPONENTs are characters of multibye form or composition
1930 rules encoded by two-byte of ASCII codes.
1931
1932 In addition, for backward compatibility, the following formats are
1933 also recognized as composition data on decoding.
1934
1935 0x80 MSEQ ...
1936 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1937
1938 Here,
1939 MSEQ is a multibyte form but in these special format:
1940 ASCII: 0xA0 ASCII_CODE+0x80,
1941 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1942 RULE is a one byte code of the range 0xA0..0xF0 that
1943 represents a composition rule.
1944 */
1945
1946char emacs_mule_bytes[256];
1947
df7492f9 1948int
ff0dacd7 1949emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1950 struct coding_system *coding;
065e3595 1951 const unsigned char *src;
ff0dacd7 1952 int *nbytes, *nchars, *id;
df7492f9 1953{
8f924df7
KH
1954 const unsigned char *src_end = coding->source + coding->src_bytes;
1955 const unsigned char *src_base = src;
df7492f9 1956 int multibytep = coding->src_multibyte;
df7492f9
KH
1957 struct charset *charset;
1958 unsigned code;
1959 int c;
1960 int consumed_chars = 0;
1961
1962 ONE_MORE_BYTE (c);
065e3595 1963 if (c < 0)
df7492f9 1964 {
065e3595
KH
1965 c = -c;
1966 charset = emacs_mule_charset[0];
1967 }
1968 else
1969 {
4d41e8b7
KH
1970 if (c >= 0xA0)
1971 {
b3af4b28 1972 /* Old style component character of a composition. */
4d41e8b7
KH
1973 if (c == 0xA0)
1974 {
1975 ONE_MORE_BYTE (c);
1976 c -= 0x80;
1977 }
1978 else
1979 c -= 0x20;
1980 }
1981
065e3595 1982 switch (emacs_mule_bytes[c])
b73bfc1c 1983 {
065e3595 1984 case 2:
df7492f9
KH
1985 if (! (charset = emacs_mule_charset[c]))
1986 goto invalid_code;
1987 ONE_MORE_BYTE (c);
9ffd559c 1988 if (c < 0xA0)
065e3595 1989 goto invalid_code;
df7492f9 1990 code = c & 0x7F;
065e3595
KH
1991 break;
1992
1993 case 3:
1994 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1995 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1996 {
1997 ONE_MORE_BYTE (c);
9ffd559c 1998 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1999 goto invalid_code;
2000 ONE_MORE_BYTE (c);
9ffd559c 2001 if (c < 0xA0)
065e3595
KH
2002 goto invalid_code;
2003 code = c & 0x7F;
2004 }
2005 else
2006 {
2007 if (! (charset = emacs_mule_charset[c]))
2008 goto invalid_code;
2009 ONE_MORE_BYTE (c);
9ffd559c 2010 if (c < 0xA0)
065e3595
KH
2011 goto invalid_code;
2012 code = (c & 0x7F) << 8;
2013 ONE_MORE_BYTE (c);
9ffd559c 2014 if (c < 0xA0)
065e3595
KH
2015 goto invalid_code;
2016 code |= c & 0x7F;
2017 }
2018 break;
2019
2020 case 4:
2021 ONE_MORE_BYTE (c);
2022 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
2023 goto invalid_code;
2024 ONE_MORE_BYTE (c);
9ffd559c 2025 if (c < 0xA0)
065e3595 2026 goto invalid_code;
781d7a48 2027 code = (c & 0x7F) << 8;
df7492f9 2028 ONE_MORE_BYTE (c);
9ffd559c 2029 if (c < 0xA0)
065e3595 2030 goto invalid_code;
df7492f9 2031 code |= c & 0x7F;
065e3595 2032 break;
df7492f9 2033
065e3595
KH
2034 case 1:
2035 code = c;
2036 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2037 ? charset_ascii : charset_eight_bit);
2038 break;
df7492f9 2039
065e3595
KH
2040 default:
2041 abort ();
2042 }
2043 c = DECODE_CHAR (charset, code);
2044 if (c < 0)
2045 goto invalid_code;
df7492f9 2046 }
df7492f9
KH
2047 *nbytes = src - src_base;
2048 *nchars = consumed_chars;
ff0dacd7
KH
2049 if (id)
2050 *id = charset->id;
df7492f9
KH
2051 return c;
2052
2053 no_more_source:
2054 return -2;
2055
2056 invalid_code:
2057 return -1;
2058}
2059
2060
2061/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2062 Check if a text is encoded in `emacs-mule'. If it is, return 1,
2063 else return 0. */
df7492f9
KH
2064
2065static int
ff0dacd7 2066detect_coding_emacs_mule (coding, detect_info)
df7492f9 2067 struct coding_system *coding;
ff0dacd7 2068 struct coding_detection_info *detect_info;
df7492f9 2069{
065e3595 2070 const unsigned char *src = coding->source, *src_base;
8f924df7 2071 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
2072 int multibytep = coding->src_multibyte;
2073 int consumed_chars = 0;
2074 int c;
2075 int found = 0;
2076
ff0dacd7 2077 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
2078 /* A coding system of this category is always ASCII compatible. */
2079 src += coding->head_ascii;
2080
2081 while (1)
2082 {
065e3595 2083 src_base = src;
df7492f9 2084 ONE_MORE_BYTE (c);
065e3595
KH
2085 if (c < 0)
2086 continue;
df7492f9
KH
2087 if (c == 0x80)
2088 {
2089 /* Perhaps the start of composite character. We simple skip
2090 it because analyzing it is too heavy for detecting. But,
2091 at least, we check that the composite character
3ed051d4 2092 constitutes of more than 4 bytes. */
8f924df7 2093 const unsigned char *src_base;
df7492f9
KH
2094
2095 repeat:
2096 src_base = src;
2097 do
2098 {
2099 ONE_MORE_BYTE (c);
2100 }
2101 while (c >= 0xA0);
2102
2103 if (src - src_base <= 4)
2104 break;
ff0dacd7 2105 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
2106 if (c == 0x80)
2107 goto repeat;
b73bfc1c 2108 }
df7492f9
KH
2109
2110 if (c < 0x80)
b73bfc1c 2111 {
df7492f9
KH
2112 if (c < 0x20
2113 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2114 break;
2115 }
2116 else
2117 {
0e219d54 2118 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 2119
0e219d54 2120 while (more_bytes > 0)
df7492f9
KH
2121 {
2122 ONE_MORE_BYTE (c);
0e219d54
KH
2123 if (c < 0xA0)
2124 {
2125 src--; /* Unread the last byte. */
2126 break;
2127 }
2128 more_bytes--;
df7492f9 2129 }
0e219d54 2130 if (more_bytes != 0)
df7492f9 2131 break;
ff0dacd7 2132 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
2133 }
2134 }
ff0dacd7 2135 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
2136 return 0;
2137
2138 no_more_source:
065e3595 2139 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 2140 {
ff0dacd7 2141 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
2142 return 0;
2143 }
ff0dacd7
KH
2144 detect_info->found |= found;
2145 return 1;
4ed46869
KH
2146}
2147
b73bfc1c 2148
df7492f9
KH
2149/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2150
2151/* Decode a character represented as a component of composition
2152 sequence of Emacs 20/21 style at SRC. Set C to that character and
2153 update SRC to the head of next character (or an encoded composition
2154 rule). If SRC doesn't points a composition component, set C to -1.
2155 If SRC points an invalid byte sequence, global exit by a return
2156 value 0. */
2157
2158#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
f937a7db 2159 do \
df7492f9
KH
2160 { \
2161 int c; \
2162 int nbytes, nchars; \
2163 \
2164 if (src == src_end) \
2165 break; \
ff0dacd7 2166 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
2167 if (c < 0) \
2168 { \
2169 if (c == -2) \
2170 break; \
2171 goto invalid_code; \
2172 } \
2173 *buf++ = c; \
2174 src += nbytes; \
2175 consumed_chars += nchars; \
2176 } \
f937a7db 2177 while (0)
df7492f9
KH
2178
2179
2180/* Decode a composition rule represented as a component of composition
781d7a48
KH
2181 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
2182 and increment BUF. If SRC points an invalid byte sequence, set C
2183 to -1. */
df7492f9 2184
781d7a48 2185#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
2186 do { \
2187 int c, gref, nref; \
2188 \
781d7a48 2189 if (src >= src_end) \
df7492f9
KH
2190 goto invalid_code; \
2191 ONE_MORE_BYTE_NO_CHECK (c); \
4d41e8b7 2192 c -= 0xA0; \
df7492f9
KH
2193 if (c < 0 || c >= 81) \
2194 goto invalid_code; \
2195 \
2196 gref = c / 9, nref = c % 9; \
2197 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2198 } while (0)
2199
2200
781d7a48
KH
2201/* Decode a composition rule represented as a component of composition
2202 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
2203 and increment BUF. If SRC points an invalid byte sequence, set C
2204 to -1. */
2205
2206#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
2207 do { \
2208 int gref, nref; \
2209 \
2210 if (src + 1>= src_end) \
2211 goto invalid_code; \
2212 ONE_MORE_BYTE_NO_CHECK (gref); \
2213 gref -= 0x20; \
2214 ONE_MORE_BYTE_NO_CHECK (nref); \
2215 nref -= 0x20; \
2216 if (gref < 0 || gref >= 81 \
2217 || nref < 0 || nref >= 81) \
2218 goto invalid_code; \
2219 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
2220 } while (0)
2221
2222
df7492f9 2223#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 2224 do { \
df7492f9 2225 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 2226 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
2227 the byte length of this composition information, CHARS is the \
2228 number of characters composed by this composition. */ \
781d7a48
KH
2229 enum composition_method method = c - 0xF2; \
2230 int *charbuf_base = charbuf; \
df7492f9
KH
2231 int consumed_chars_limit; \
2232 int nbytes, nchars; \
2233 \
2234 ONE_MORE_BYTE (c); \
065e3595
KH
2235 if (c < 0) \
2236 goto invalid_code; \
df7492f9
KH
2237 nbytes = c - 0xA0; \
2238 if (nbytes < 3) \
2239 goto invalid_code; \
2240 ONE_MORE_BYTE (c); \
065e3595
KH
2241 if (c < 0) \
2242 goto invalid_code; \
df7492f9 2243 nchars = c - 0xA0; \
69a80ea3 2244 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
2245 consumed_chars_limit = consumed_chars_base + nbytes; \
2246 if (method != COMPOSITION_RELATIVE) \
aa72b389 2247 { \
df7492f9
KH
2248 int i = 0; \
2249 while (consumed_chars < consumed_chars_limit) \
aa72b389 2250 { \
df7492f9 2251 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 2252 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
2253 else \
2254 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 2255 i++; \
aa72b389 2256 } \
df7492f9
KH
2257 if (consumed_chars < consumed_chars_limit) \
2258 goto invalid_code; \
781d7a48 2259 charbuf_base[0] -= i; \
aa72b389
KH
2260 } \
2261 } while (0)
93dec019 2262
aa72b389 2263
d959f512
KH
2264#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2265 do { \
2266 /* Emacs 20 style format for relative composition. */ \
2267 /* Store multibyte form of characters to be composed. */ \
2268 enum composition_method method = COMPOSITION_RELATIVE; \
2269 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2270 int *buf = components; \
2271 int i, j; \
2272 \
2273 src = src_base; \
2274 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2275 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2276 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2277 if (i < 2) \
2278 goto invalid_code; \
2279 ADD_COMPOSITION_DATA (charbuf, i, method); \
2280 for (j = 0; j < i; j++) \
2281 *charbuf++ = components[j]; \
df7492f9
KH
2282 } while (0)
2283
2284
2285#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2286 do { \
2287 /* Emacs 20 style format for rule-base composition. */ \
2288 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2289 enum composition_method method = COMPOSITION_WITH_RULE; \
4d41e8b7 2290 int *charbuf_base = charbuf; \
df7492f9
KH
2291 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2292 int *buf = components; \
2293 int i, j; \
4d41e8b7 2294 \
df7492f9 2295 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
4d41e8b7 2296 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
df7492f9 2297 { \
4d41e8b7
KH
2298 if (*src < 0xA0) \
2299 break; \
781d7a48 2300 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2301 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2302 } \
4d41e8b7 2303 if (i <= 1 || (buf - components) % 2 == 0) \
df7492f9 2304 goto invalid_code; \
4d41e8b7 2305 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
df7492f9 2306 goto no_more_source; \
4d41e8b7
KH
2307 ADD_COMPOSITION_DATA (charbuf, i, method); \
2308 i = i * 2 - 1; \
df7492f9
KH
2309 for (j = 0; j < i; j++) \
2310 *charbuf++ = components[j]; \
4d41e8b7 2311 charbuf_base[0] -= i; \
df7492f9
KH
2312 for (j = 0; j < i; j += 2) \
2313 *charbuf++ = components[j]; \
2314 } while (0)
2315
aa72b389
KH
2316
2317static void
df7492f9 2318decode_coding_emacs_mule (coding)
aa72b389 2319 struct coding_system *coding;
aa72b389 2320{
8f924df7
KH
2321 const unsigned char *src = coding->source + coding->consumed;
2322 const unsigned char *src_end = coding->source + coding->src_bytes;
2323 const unsigned char *src_base;
69a80ea3
KH
2324 int *charbuf = coding->charbuf + coding->charbuf_used;
2325 int *charbuf_end
2326 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2327 int consumed_chars = 0, consumed_chars_base;
df7492f9 2328 int multibytep = coding->src_multibyte;
24a73b0a 2329 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2330 int char_offset = coding->produced_char;
2331 int last_offset = char_offset;
2332 int last_id = charset_ascii;
119852e7
KH
2333 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2334 int byte_after_cr = -1;
aa72b389 2335
24a73b0a 2336 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2337
aa72b389
KH
2338 while (1)
2339 {
df7492f9
KH
2340 int c;
2341
aa72b389 2342 src_base = src;
df7492f9
KH
2343 consumed_chars_base = consumed_chars;
2344
2345 if (charbuf >= charbuf_end)
b71f6f73
KH
2346 {
2347 if (byte_after_cr >= 0)
2348 src_base--;
2349 break;
2350 }
aa72b389 2351
119852e7
KH
2352 if (byte_after_cr >= 0)
2353 c = byte_after_cr, byte_after_cr = -1;
2354 else
2355 ONE_MORE_BYTE (c);
065e3595
KH
2356 if (c < 0)
2357 {
2358 *charbuf++ = -c;
2359 char_offset++;
2360 }
2361 else if (c < 0x80)
aa72b389 2362 {
119852e7
KH
2363 if (eol_crlf && c == '\r')
2364 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
2365 *charbuf++ = c;
2366 char_offset++;
aa72b389 2367 }
df7492f9
KH
2368 else if (c == 0x80)
2369 {
df7492f9 2370 ONE_MORE_BYTE (c);
065e3595
KH
2371 if (c < 0)
2372 goto invalid_code;
781d7a48
KH
2373 if (c - 0xF2 >= COMPOSITION_RELATIVE
2374 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2375 DECODE_EMACS_MULE_21_COMPOSITION (c);
2376 else if (c < 0xC0)
2377 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2378 else if (c == 0xFF)
2379 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2380 else
2381 goto invalid_code;
2382 }
2383 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2384 {
2385 int nbytes, nchars;
ff0dacd7
KH
2386 int id;
2387
781d7a48
KH
2388 src = src_base;
2389 consumed_chars = consumed_chars_base;
ff0dacd7 2390 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2391 if (c < 0)
2392 {
2393 if (c == -2)
2394 break;
2395 goto invalid_code;
2396 }
ff0dacd7
KH
2397 if (last_id != id)
2398 {
2399 if (last_id != charset_ascii)
69a80ea3 2400 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2401 last_id = id;
2402 last_offset = char_offset;
2403 }
df7492f9 2404 *charbuf++ = c;
781d7a48
KH
2405 src += nbytes;
2406 consumed_chars += nchars;
df7492f9
KH
2407 char_offset++;
2408 }
4d41e8b7
KH
2409 else
2410 goto invalid_code;
df7492f9
KH
2411 continue;
2412
2413 invalid_code:
2414 src = src_base;
2415 consumed_chars = consumed_chars_base;
2416 ONE_MORE_BYTE (c);
2417 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2418 char_offset++;
df7492f9
KH
2419 coding->errors++;
2420 }
2421
2422 no_more_source:
ff0dacd7 2423 if (last_id != charset_ascii)
69a80ea3 2424 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2425 coding->consumed_char += consumed_chars_base;
2426 coding->consumed = src_base - coding->source;
2427 coding->charbuf_used = charbuf - coding->charbuf;
2428}
2429
2430
2431#define EMACS_MULE_LEADING_CODES(id, codes) \
2432 do { \
2433 if (id < 0xA0) \
2434 codes[0] = id, codes[1] = 0; \
2435 else if (id < 0xE0) \
2436 codes[0] = 0x9A, codes[1] = id; \
2437 else if (id < 0xF0) \
2438 codes[0] = 0x9B, codes[1] = id; \
2439 else if (id < 0xF5) \
2440 codes[0] = 0x9C, codes[1] = id; \
2441 else \
2442 codes[0] = 0x9D, codes[1] = id; \
2443 } while (0);
2444
aa72b389 2445
df7492f9
KH
2446static int
2447encode_coding_emacs_mule (coding)
2448 struct coding_system *coding;
2449{
2450 int multibytep = coding->dst_multibyte;
2451 int *charbuf = coding->charbuf;
2452 int *charbuf_end = charbuf + coding->charbuf_used;
2453 unsigned char *dst = coding->destination + coding->produced;
2454 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2455 int safe_room = 8;
df7492f9 2456 int produced_chars = 0;
24a73b0a 2457 Lisp_Object attrs, charset_list;
df7492f9 2458 int c;
ff0dacd7 2459 int preferred_charset_id = -1;
df7492f9 2460
24a73b0a 2461 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2462 if (! EQ (charset_list, Vemacs_mule_charset_list))
2463 {
2464 CODING_ATTR_CHARSET_LIST (attrs)
2465 = charset_list = Vemacs_mule_charset_list;
2466 }
df7492f9
KH
2467
2468 while (charbuf < charbuf_end)
2469 {
2470 ASSURE_DESTINATION (safe_room);
2471 c = *charbuf++;
ff0dacd7
KH
2472
2473 if (c < 0)
2474 {
2475 /* Handle an annotation. */
2476 switch (*charbuf)
2477 {
2478 case CODING_ANNOTATE_COMPOSITION_MASK:
2479 /* Not yet implemented. */
2480 break;
2481 case CODING_ANNOTATE_CHARSET_MASK:
2482 preferred_charset_id = charbuf[3];
2483 if (preferred_charset_id >= 0
2484 && NILP (Fmemq (make_number (preferred_charset_id),
2485 charset_list)))
2486 preferred_charset_id = -1;
2487 break;
2488 default:
2489 abort ();
2490 }
2491 charbuf += -c - 1;
2492 continue;
2493 }
2494
df7492f9
KH
2495 if (ASCII_CHAR_P (c))
2496 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2497 else if (CHAR_BYTE8_P (c))
2498 {
2499 c = CHAR_TO_BYTE8 (c);
2500 EMIT_ONE_BYTE (c);
2501 }
df7492f9 2502 else
aa72b389 2503 {
df7492f9
KH
2504 struct charset *charset;
2505 unsigned code;
2506 int dimension;
2507 int emacs_mule_id;
2508 unsigned char leading_codes[2];
2509
ff0dacd7
KH
2510 if (preferred_charset_id >= 0)
2511 {
2512 charset = CHARSET_FROM_ID (preferred_charset_id);
905ca9d2
KH
2513 if (CHAR_CHARSET_P (c, charset))
2514 code = ENCODE_CHAR (charset, c);
2515 else
2516 charset = char_charset (c, charset_list, &code);
ff0dacd7
KH
2517 }
2518 else
2519 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2520 if (! charset)
2521 {
2522 c = coding->default_char;
2523 if (ASCII_CHAR_P (c))
2524 {
2525 EMIT_ONE_ASCII_BYTE (c);
2526 continue;
2527 }
2528 charset = char_charset (c, charset_list, &code);
2529 }
2530 dimension = CHARSET_DIMENSION (charset);
2531 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2532 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2533 EMIT_ONE_BYTE (leading_codes[0]);
2534 if (leading_codes[1])
2535 EMIT_ONE_BYTE (leading_codes[1]);
2536 if (dimension == 1)
1fa663f9 2537 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2538 else
df7492f9 2539 {
1fa663f9 2540 code |= 0x8080;
df7492f9
KH
2541 EMIT_ONE_BYTE (code >> 8);
2542 EMIT_ONE_BYTE (code & 0xFF);
2543 }
aa72b389 2544 }
aa72b389 2545 }
065e3595 2546 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2547 coding->produced_char += produced_chars;
2548 coding->produced = dst - coding->destination;
2549 return 0;
aa72b389 2550}
b73bfc1c 2551
4ed46869 2552\f
df7492f9 2553/*** 7. ISO2022 handlers ***/
4ed46869
KH
2554
2555/* The following note describes the coding system ISO2022 briefly.
39787efd 2556 Since the intention of this note is to help understand the
5a936b46 2557 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2558 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2559 original document of ISO2022. This is equivalent to the standard
cfb43547 2560 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2561
2562 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2563 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2564 is encoded using bytes less than 128. This may make the encoded
2565 text a little bit longer, but the text passes more easily through
cfb43547 2566 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2567 Significant Bit).
b73bfc1c 2568
cfb43547
DL
2569 There are two kinds of character sets: control character sets and
2570 graphic character sets. The former contain control characters such
4ed46869 2571 as `newline' and `escape' to provide control functions (control
39787efd 2572 functions are also provided by escape sequences). The latter
cfb43547 2573 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2574 two control character sets and many graphic character sets.
2575
2576 Graphic character sets are classified into one of the following
39787efd
KH
2577 four classes, according to the number of bytes (DIMENSION) and
2578 number of characters in one dimension (CHARS) of the set:
2579 - DIMENSION1_CHARS94
2580 - DIMENSION1_CHARS96
2581 - DIMENSION2_CHARS94
2582 - DIMENSION2_CHARS96
2583
2584 In addition, each character set is assigned an identification tag,
cfb43547 2585 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2586 hereafter). The <F> of each character set is decided by ECMA(*)
2587 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2588 (0x30..0x3F are for private use only).
4ed46869
KH
2589
2590 Note (*): ECMA = European Computer Manufacturers Association
2591
cfb43547 2592 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2593 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2594 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2595 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2596 o DIMENSION2_CHARS96 -- none for the moment
2597
39787efd 2598 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2599 C0 [0x00..0x1F] -- control character plane 0
2600 GL [0x20..0x7F] -- graphic character plane 0
2601 C1 [0x80..0x9F] -- control character plane 1
2602 GR [0xA0..0xFF] -- graphic character plane 1
2603
2604 A control character set is directly designated and invoked to C0 or
39787efd
KH
2605 C1 by an escape sequence. The most common case is that:
2606 - ISO646's control character set is designated/invoked to C0, and
2607 - ISO6429's control character set is designated/invoked to C1,
2608 and usually these designations/invocations are omitted in encoded
2609 text. In a 7-bit environment, only C0 can be used, and a control
2610 character for C1 is encoded by an appropriate escape sequence to
2611 fit into the environment. All control characters for C1 are
2612 defined to have corresponding escape sequences.
4ed46869
KH
2613
2614 A graphic character set is at first designated to one of four
2615 graphic registers (G0 through G3), then these graphic registers are
2616 invoked to GL or GR. These designations and invocations can be
2617 done independently. The most common case is that G0 is invoked to
39787efd
KH
2618 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2619 these invocations and designations are omitted in encoded text.
2620 In a 7-bit environment, only GL can be used.
4ed46869 2621
39787efd
KH
2622 When a graphic character set of CHARS94 is invoked to GL, codes
2623 0x20 and 0x7F of the GL area work as control characters SPACE and
2624 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2625 be used.
4ed46869
KH
2626
2627 There are two ways of invocation: locking-shift and single-shift.
2628 With locking-shift, the invocation lasts until the next different
39787efd
KH
2629 invocation, whereas with single-shift, the invocation affects the
2630 following character only and doesn't affect the locking-shift
2631 state. Invocations are done by the following control characters or
2632 escape sequences:
4ed46869
KH
2633
2634 ----------------------------------------------------------------------
39787efd 2635 abbrev function cntrl escape seq description
4ed46869 2636 ----------------------------------------------------------------------
39787efd
KH
2637 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2638 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2639 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2640 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2641 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2642 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2643 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2644 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2645 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2646 ----------------------------------------------------------------------
39787efd
KH
2647 (*) These are not used by any known coding system.
2648
2649 Control characters for these functions are defined by macros
2650 ISO_CODE_XXX in `coding.h'.
4ed46869 2651
39787efd 2652 Designations are done by the following escape sequences:
4ed46869
KH
2653 ----------------------------------------------------------------------
2654 escape sequence description
2655 ----------------------------------------------------------------------
2656 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2657 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2658 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2659 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2660 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2661 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2662 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2663 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2664 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2665 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2666 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2667 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2668 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2669 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2670 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2671 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2672 ----------------------------------------------------------------------
2673
2674 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2675 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2676
2677 Note (*): Although these designations are not allowed in ISO2022,
2678 Emacs accepts them on decoding, and produces them on encoding
39787efd 2679 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2680 7-bit environment, non-locking-shift, and non-single-shift.
2681
2682 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2683 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2684
cfb43547 2685 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2686 same multilingual text in ISO2022. Actually, there exist many
2687 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2688 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2689 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2690 localized platforms), and all of these are variants of ISO2022.
2691
2692 In addition to the above, Emacs handles two more kinds of escape
2693 sequences: ISO6429's direction specification and Emacs' private
2694 sequence for specifying character composition.
2695
39787efd 2696 ISO6429's direction specification takes the following form:
4ed46869
KH
2697 o CSI ']' -- end of the current direction
2698 o CSI '0' ']' -- end of the current direction
2699 o CSI '1' ']' -- start of left-to-right text
2700 o CSI '2' ']' -- start of right-to-left text
2701 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2702 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2703
2704 Character composition specification takes the following form:
ec6d2bb8
KH
2705 o ESC '0' -- start relative composition
2706 o ESC '1' -- end composition
2707 o ESC '2' -- start rule-base composition (*)
2708 o ESC '3' -- start relative composition with alternate chars (**)
2709 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2710 Since these are not standard escape sequences of any ISO standard,
cfb43547 2711 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2712
5a936b46
DL
2713 (*) This form is used only in Emacs 20.7 and older versions,
2714 but newer versions can safely decode it.
cfb43547 2715 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2716 and older versions can't decode it.
ec6d2bb8 2717
cfb43547 2718 Here's a list of example usages of these composition escape
b73bfc1c 2719 sequences (categorized by `enum composition_method').
ec6d2bb8 2720
b73bfc1c 2721 COMPOSITION_RELATIVE:
ec6d2bb8 2722 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2723 COMPOSITION_WITH_RULE:
ec6d2bb8 2724 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2725 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2726 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2727 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2728 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2729
2730enum iso_code_class_type iso_code_class[256];
2731
df7492f9
KH
2732#define SAFE_CHARSET_P(coding, id) \
2733 ((id) <= (coding)->max_charset_id \
1b3b981b 2734 && (coding)->safe_charsets[id] != 255)
df7492f9
KH
2735
2736
2737#define SHIFT_OUT_OK(category) \
2738 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2739
2740static void
f0064e1f
DL
2741setup_iso_safe_charsets (attrs)
2742 Lisp_Object attrs;
df7492f9
KH
2743{
2744 Lisp_Object charset_list, safe_charsets;
2745 Lisp_Object request;
2746 Lisp_Object reg_usage;
2747 Lisp_Object tail;
2748 int reg94, reg96;
2749 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2750 int max_charset_id;
2751
2752 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2753 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2754 && ! EQ (charset_list, Viso_2022_charset_list))
2755 {
2756 CODING_ATTR_CHARSET_LIST (attrs)
2757 = charset_list = Viso_2022_charset_list;
2758 ASET (attrs, coding_attr_safe_charsets, Qnil);
2759 }
2760
2761 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2762 return;
2763
2764 max_charset_id = 0;
2765 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2766 {
2767 int id = XINT (XCAR (tail));
2768 if (max_charset_id < id)
2769 max_charset_id = id;
2770 }
d46c5b12 2771
1b3b981b
AS
2772 safe_charsets = make_uninit_string (max_charset_id + 1);
2773 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
2774 request = AREF (attrs, coding_attr_iso_request);
2775 reg_usage = AREF (attrs, coding_attr_iso_usage);
2776 reg94 = XINT (XCAR (reg_usage));
2777 reg96 = XINT (XCDR (reg_usage));
2778
2779 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2780 {
2781 Lisp_Object id;
2782 Lisp_Object reg;
2783 struct charset *charset;
2784
2785 id = XCAR (tail);
2786 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2787 reg = Fcdr (Fassq (id, request));
df7492f9 2788 if (! NILP (reg))
8f924df7 2789 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2790 else if (charset->iso_chars_96)
2791 {
2792 if (reg96 < 4)
8f924df7 2793 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2794 }
2795 else
2796 {
2797 if (reg94 < 4)
8f924df7 2798 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2799 }
2800 }
2801 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2802}
d46c5b12 2803
b6871cc7 2804
4ed46869 2805/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2806 Check if a text is encoded in one of ISO-2022 based codig systems.
2807 If it is, return 1, else return 0. */
4ed46869 2808
0a28aafb 2809static int
ff0dacd7 2810detect_coding_iso_2022 (coding, detect_info)
df7492f9 2811 struct coding_system *coding;
ff0dacd7 2812 struct coding_detection_info *detect_info;
4ed46869 2813{
8f924df7
KH
2814 const unsigned char *src = coding->source, *src_base = src;
2815 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2816 int multibytep = coding->src_multibyte;
ff0dacd7 2817 int single_shifting = 0;
df7492f9
KH
2818 int id;
2819 int c, c1;
2820 int consumed_chars = 0;
2821 int i;
ff0dacd7
KH
2822 int rejected = 0;
2823 int found = 0;
cee53ed4 2824 int composition_count = -1;
ff0dacd7
KH
2825
2826 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2827
2828 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2829 {
2830 struct coding_system *this = &(coding_categories[i]);
2831 Lisp_Object attrs, val;
2832
c6b278e7
KH
2833 if (this->id < 0)
2834 continue;
df7492f9
KH
2835 attrs = CODING_ID_ATTRS (this->id);
2836 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
1b3b981b 2837 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
df7492f9
KH
2838 setup_iso_safe_charsets (attrs);
2839 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 2840 this->max_charset_id = SCHARS (val) - 1;
1b3b981b 2841 this->safe_charsets = SDATA (val);
df7492f9
KH
2842 }
2843
2844 /* A coding system of this category is always ASCII compatible. */
2845 src += coding->head_ascii;
3f003981 2846
ff0dacd7 2847 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2848 {
065e3595 2849 src_base = src;
df7492f9 2850 ONE_MORE_BYTE (c);
4ed46869
KH
2851 switch (c)
2852 {
2853 case ISO_CODE_ESC:
74383408
KH
2854 if (inhibit_iso_escape_detection)
2855 break;
f46869e4 2856 single_shifting = 0;
df7492f9 2857 ONE_MORE_BYTE (c);
d46c5b12 2858 if (c >= '(' && c <= '/')
4ed46869 2859 {
bf9cdd4e 2860 /* Designation sequence for a charset of dimension 1. */
df7492f9 2861 ONE_MORE_BYTE (c1);
d46c5b12 2862 if (c1 < ' ' || c1 >= 0x80
df7492f9 2863 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2864 /* Invalid designation sequence. Just ignore. */
2865 break;
bf9cdd4e
KH
2866 }
2867 else if (c == '$')
2868 {
2869 /* Designation sequence for a charset of dimension 2. */
df7492f9 2870 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2871 if (c >= '@' && c <= 'B')
2872 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2873 id = iso_charset_table[1][0][c];
bf9cdd4e 2874 else if (c >= '(' && c <= '/')
bcf26d6a 2875 {
df7492f9 2876 ONE_MORE_BYTE (c1);
d46c5b12 2877 if (c1 < ' ' || c1 >= 0x80
df7492f9 2878 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2879 /* Invalid designation sequence. Just ignore. */
2880 break;
bcf26d6a 2881 }
bf9cdd4e 2882 else
ff0dacd7 2883 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2884 break;
2885 }
ae9ff118 2886 else if (c == 'N' || c == 'O')
d46c5b12 2887 {
ae9ff118 2888 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2889 single_shifting = 1;
2890 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2891 break;
4ed46869 2892 }
cee53ed4
KH
2893 else if (c == '1')
2894 {
2895 /* End of composition. */
2896 if (composition_count < 0
2897 || composition_count > MAX_COMPOSITION_COMPONENTS)
2898 /* Invalid */
2899 break;
2900 composition_count = -1;
2901 found |= CATEGORY_MASK_ISO;
2902 }
ec6d2bb8
KH
2903 else if (c >= '0' && c <= '4')
2904 {
2905 /* ESC <Fp> for start/end composition. */
cee53ed4 2906 composition_count = 0;
ec6d2bb8
KH
2907 break;
2908 }
bf9cdd4e 2909 else
df7492f9 2910 {
ff0dacd7 2911 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2912 break;
2913 }
d46c5b12
KH
2914
2915 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2916 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2917 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2918 id))
ff0dacd7 2919 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2920 else
ff0dacd7 2921 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2922 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2923 id))
ff0dacd7 2924 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2925 else
ff0dacd7 2926 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2927 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2928 id))
ff0dacd7 2929 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2930 else
ff0dacd7 2931 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2932 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2933 id))
ff0dacd7 2934 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2935 else
ff0dacd7 2936 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2937 break;
2938
4ed46869 2939 case ISO_CODE_SO:
d46c5b12 2940 case ISO_CODE_SI:
ff0dacd7 2941 /* Locking shift out/in. */
74383408
KH
2942 if (inhibit_iso_escape_detection)
2943 break;
f46869e4 2944 single_shifting = 0;
ff0dacd7 2945 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12
KH
2946 break;
2947
4ed46869 2948 case ISO_CODE_CSI:
ff0dacd7 2949 /* Control sequence introducer. */
f46869e4 2950 single_shifting = 0;
ff0dacd7
KH
2951 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2952 found |= CATEGORY_MASK_ISO_8_ELSE;
2953 goto check_extra_latin;
2954
4ed46869
KH
2955 case ISO_CODE_SS2:
2956 case ISO_CODE_SS3:
ff0dacd7
KH
2957 /* Single shift. */
2958 if (inhibit_iso_escape_detection)
2959 break;
75e2a253 2960 single_shifting = 0;
ff0dacd7
KH
2961 rejected |= CATEGORY_MASK_ISO_7BIT;
2962 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2963 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2964 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2965 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2966 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2967 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2968 if (single_shifting)
2969 break;
ff0dacd7 2970 goto check_extra_latin;
4ed46869
KH
2971
2972 default:
065e3595
KH
2973 if (c < 0)
2974 continue;
4ed46869 2975 if (c < 0x80)
f46869e4 2976 {
cee53ed4
KH
2977 if (composition_count >= 0)
2978 composition_count++;
f46869e4
KH
2979 single_shifting = 0;
2980 break;
2981 }
ff0dacd7 2982 if (c >= 0xA0)
c4825358 2983 {
ff0dacd7
KH
2984 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2985 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2986 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2987 0xA0..0FF. If the byte length is even, we include
2988 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2989 only when we are not single shifting. */
2990 if (! single_shifting
2991 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2992 {
e17de821 2993 int i = 1;
b73bfc1c
KH
2994 while (src < src_end)
2995 {
df7492f9 2996 ONE_MORE_BYTE (c);
b73bfc1c
KH
2997 if (c < 0xA0)
2998 break;
2999 i++;
3000 }
3001
3002 if (i & 1 && src < src_end)
cee53ed4
KH
3003 {
3004 rejected |= CATEGORY_MASK_ISO_8_2;
3005 if (composition_count >= 0)
3006 composition_count += i;
3007 }
f46869e4 3008 else
cee53ed4
KH
3009 {
3010 found |= CATEGORY_MASK_ISO_8_2;
3011 if (composition_count >= 0)
3012 composition_count += i / 2;
3013 }
f46869e4 3014 }
ff0dacd7 3015 break;
4ed46869 3016 }
ff0dacd7
KH
3017 check_extra_latin:
3018 single_shifting = 0;
3019 if (! VECTORP (Vlatin_extra_code_table)
3020 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3021 {
3022 rejected = CATEGORY_MASK_ISO;
3023 break;
3024 }
3025 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3026 & CODING_ISO_FLAG_LATIN_EXTRA)
3027 found |= CATEGORY_MASK_ISO_8_1;
3028 else
3029 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 3030 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
3031 }
3032 }
ff0dacd7
KH
3033 detect_info->rejected |= CATEGORY_MASK_ISO;
3034 return 0;
4ed46869 3035
df7492f9 3036 no_more_source:
ff0dacd7
KH
3037 detect_info->rejected |= rejected;
3038 detect_info->found |= (found & ~rejected);
df7492f9 3039 return 1;
4ed46869 3040}
ec6d2bb8 3041
4ed46869 3042
134b9549
KH
3043/* Set designation state into CODING. Set CHARS_96 to -1 if the
3044 escape sequence should be kept. */
df7492f9
KH
3045#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
3046 do { \
3047 int id, prev; \
3048 \
3049 if (final < '0' || final >= 128 \
3050 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
3051 || !SAFE_CHARSET_P (coding, id)) \
3052 { \
3053 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
3054 chars_96 = -1; \
3055 break; \
df7492f9
KH
3056 } \
3057 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
3058 if (id == charset_jisx0201_roman) \
3059 { \
3060 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3061 id = charset_ascii; \
3062 } \
3063 else if (id == charset_jisx0208_1978) \
3064 { \
3065 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3066 id = charset_jisx0208; \
3067 } \
df7492f9
KH
3068 CODING_ISO_DESIGNATION (coding, reg) = id; \
3069 /* If there was an invalid designation to REG previously, and this \
3070 designation is ASCII to REG, we should keep this designation \
3071 sequence. */ \
3072 if (prev == -2 && id == charset_ascii) \
134b9549 3073 chars_96 = -1; \
4ed46869
KH
3074 } while (0)
3075
d46c5b12 3076
df7492f9
KH
3077#define MAYBE_FINISH_COMPOSITION() \
3078 do { \
3079 int i; \
3080 if (composition_state == COMPOSING_NO) \
3081 break; \
3082 /* It is assured that we have enough room for producing \
3083 characters stored in the table `components'. */ \
3084 if (charbuf + component_idx > charbuf_end) \
3085 goto no_more_source; \
3086 composition_state = COMPOSING_NO; \
3087 if (method == COMPOSITION_RELATIVE \
3088 || method == COMPOSITION_WITH_ALTCHARS) \
3089 { \
3090 for (i = 0; i < component_idx; i++) \
3091 *charbuf++ = components[i]; \
3092 char_offset += component_idx; \
3093 } \
3094 else \
3095 { \
3096 for (i = 0; i < component_idx; i += 2) \
3097 *charbuf++ = components[i]; \
3098 char_offset += (component_idx / 2) + 1; \
3099 } \
3100 } while (0)
3101
d46c5b12 3102
aa72b389
KH
3103/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3104 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3105 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
3106 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3107 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 3108 */
ec6d2bb8 3109
df7492f9
KH
3110#define DECODE_COMPOSITION_START(c1) \
3111 do { \
3112 if (c1 == '0' \
781d7a48 3113 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
3114 { \
3115 component_len = component_idx; \
3116 composition_state = COMPOSING_CHAR; \
3117 } \
3118 else \
3119 { \
8f924df7 3120 const unsigned char *p; \
df7492f9
KH
3121 \
3122 MAYBE_FINISH_COMPOSITION (); \
3123 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
3124 goto no_more_source; \
3125 for (p = src; p < src_end - 1; p++) \
3126 if (*p == ISO_CODE_ESC && p[1] == '1') \
3127 break; \
3128 if (p == src_end - 1) \
3129 { \
cee53ed4
KH
3130 if (coding->mode & CODING_MODE_LAST_BLOCK) \
3131 goto invalid_code; \
9286b333
KH
3132 /* The current composition doesn't end in the current \
3133 source. */ \
3134 record_conversion_result \
3135 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
df7492f9
KH
3136 goto no_more_source; \
3137 } \
3138 \
3139 /* This is surely the start of a composition. */ \
3140 method = (c1 == '0' ? COMPOSITION_RELATIVE \
3141 : c1 == '2' ? COMPOSITION_WITH_RULE \
3142 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3143 : COMPOSITION_WITH_RULE_ALTCHARS); \
3144 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
3145 : COMPOSING_COMPONENT_CHAR); \
3146 component_idx = component_len = 0; \
3147 } \
ec6d2bb8
KH
3148 } while (0)
3149
ec6d2bb8 3150
df7492f9
KH
3151/* Handle compositoin end sequence ESC 1. */
3152
3153#define DECODE_COMPOSITION_END() \
ec6d2bb8 3154 do { \
df7492f9
KH
3155 int nchars = (component_len > 0 ? component_idx - component_len \
3156 : method == COMPOSITION_RELATIVE ? component_idx \
3157 : (component_idx + 1) / 2); \
3158 int i; \
3159 int *saved_charbuf = charbuf; \
3160 \
69a80ea3 3161 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 3162 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 3163 { \
df7492f9
KH
3164 if (component_len == 0) \
3165 for (i = 0; i < component_idx; i++) \
3166 *charbuf++ = components[i]; \
3167 else \
3168 for (i = 0; i < component_len; i++) \
3169 *charbuf++ = components[i]; \
3170 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 3171 } \
df7492f9
KH
3172 if (method == COMPOSITION_WITH_RULE) \
3173 for (i = 0; i < component_idx; i += 2, char_offset++) \
3174 *charbuf++ = components[i]; \
ec6d2bb8 3175 else \
df7492f9
KH
3176 for (i = component_len; i < component_idx; i++, char_offset++) \
3177 *charbuf++ = components[i]; \
3178 coding->annotated = 1; \
3179 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
3180 } while (0)
3181
df7492f9 3182
ec6d2bb8
KH
3183/* Decode a composition rule from the byte C1 (and maybe one more byte
3184 from SRC) and store one encoded composition rule in
3185 coding->cmp_data. */
3186
3187#define DECODE_COMPOSITION_RULE(c1) \
3188 do { \
ec6d2bb8
KH
3189 (c1) -= 32; \
3190 if (c1 < 81) /* old format (before ver.21) */ \
3191 { \
3192 int gref = (c1) / 9; \
3193 int nref = (c1) % 9; \
3194 if (gref == 4) gref = 10; \
3195 if (nref == 4) nref = 10; \
df7492f9 3196 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 3197 } \
b73bfc1c 3198 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
3199 { \
3200 ONE_MORE_BYTE (c2); \
df7492f9 3201 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 3202 } \
df7492f9
KH
3203 else \
3204 c1 = 0; \
ec6d2bb8 3205 } while (0)
88993dfd 3206
d46c5b12 3207
4ed46869
KH
3208/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3209
b73bfc1c 3210static void
df7492f9 3211decode_coding_iso_2022 (coding)
4ed46869 3212 struct coding_system *coding;
4ed46869 3213{
8f924df7
KH
3214 const unsigned char *src = coding->source + coding->consumed;
3215 const unsigned char *src_end = coding->source + coding->src_bytes;
3216 const unsigned char *src_base;
69a80ea3 3217 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 3218 int *charbuf_end
69a80ea3 3219 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 3220 int consumed_chars = 0, consumed_chars_base;
df7492f9 3221 int multibytep = coding->src_multibyte;
4ed46869 3222 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
3223 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3224 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 3225 int charset_id_2, charset_id_3;
df7492f9
KH
3226 struct charset *charset;
3227 int c;
3228 /* For handling composition sequence. */
3229#define COMPOSING_NO 0
3230#define COMPOSING_CHAR 1
3231#define COMPOSING_RULE 2
3232#define COMPOSING_COMPONENT_CHAR 3
3233#define COMPOSING_COMPONENT_RULE 4
3234
3235 int composition_state = COMPOSING_NO;
3236 enum composition_method method;
3237 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3238 int component_idx;
3239 int component_len;
24a73b0a 3240 Lisp_Object attrs, charset_list;
ff0dacd7
KH
3241 int char_offset = coding->produced_char;
3242 int last_offset = char_offset;
3243 int last_id = charset_ascii;
119852e7
KH
3244 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3245 int byte_after_cr = -1;
df7492f9 3246
24a73b0a 3247 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 3248 setup_iso_safe_charsets (attrs);
287c57d7
KH
3249 /* Charset list may have been changed. */
3250 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 3251 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
b73bfc1c
KH
3252
3253 while (1)
4ed46869 3254 {
463f5630 3255 int c1, c2;
b73bfc1c
KH
3256
3257 src_base = src;
df7492f9
KH
3258 consumed_chars_base = consumed_chars;
3259
3260 if (charbuf >= charbuf_end)
b71f6f73
KH
3261 {
3262 if (byte_after_cr >= 0)
3263 src_base--;
3264 break;
3265 }
df7492f9 3266
119852e7
KH
3267 if (byte_after_cr >= 0)
3268 c1 = byte_after_cr, byte_after_cr = -1;
3269 else
3270 ONE_MORE_BYTE (c1);
065e3595
KH
3271 if (c1 < 0)
3272 goto invalid_code;
4ed46869 3273
98725083 3274 /* We produce at most one character. */
4ed46869
KH
3275 switch (iso_code_class [c1])
3276 {
3277 case ISO_0x20_or_0x7F:
df7492f9 3278 if (composition_state != COMPOSING_NO)
ec6d2bb8 3279 {
df7492f9
KH
3280 if (composition_state == COMPOSING_RULE
3281 || composition_state == COMPOSING_COMPONENT_RULE)
3282 {
cee53ed4
KH
3283 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3284 {
3285 DECODE_COMPOSITION_RULE (c1);
3286 components[component_idx++] = c1;
3287 composition_state--;
3288 continue;
3289 }
3290 /* Too long composition. */
3291 MAYBE_FINISH_COMPOSITION ();
df7492f9 3292 }
4ed46869 3293 }
df7492f9
KH
3294 if (charset_id_0 < 0
3295 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
3296 /* This is SPACE or DEL. */
3297 charset = CHARSET_FROM_ID (charset_ascii);
3298 else
3299 charset = CHARSET_FROM_ID (charset_id_0);
3300 break;
4ed46869
KH
3301
3302 case ISO_graphic_plane_0:
781d7a48 3303 if (composition_state != COMPOSING_NO)
b73bfc1c 3304 {
781d7a48
KH
3305 if (composition_state == COMPOSING_RULE
3306 || composition_state == COMPOSING_COMPONENT_RULE)
3307 {
cee53ed4
KH
3308 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3309 {
3310 DECODE_COMPOSITION_RULE (c1);
3311 components[component_idx++] = c1;
3312 composition_state--;
3313 continue;
3314 }
3315 MAYBE_FINISH_COMPOSITION ();
781d7a48 3316 }
b73bfc1c 3317 }
134b9549
KH
3318 if (charset_id_0 < 0)
3319 charset = CHARSET_FROM_ID (charset_ascii);
3320 else
3321 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3322 break;
3323
3324 case ISO_0xA0_or_0xFF:
df7492f9
KH
3325 if (charset_id_1 < 0
3326 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3327 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3328 goto invalid_code;
4ed46869
KH
3329 /* This is a graphic character, we fall down ... */
3330
3331 case ISO_graphic_plane_1:
df7492f9
KH
3332 if (charset_id_1 < 0)
3333 goto invalid_code;
3334 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3335 break;
3336
df7492f9 3337 case ISO_control_0:
119852e7
KH
3338 if (eol_crlf && c1 == '\r')
3339 ONE_MORE_BYTE (byte_after_cr);
df7492f9
KH
3340 MAYBE_FINISH_COMPOSITION ();
3341 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3342 break;
3343
df7492f9
KH
3344 case ISO_control_1:
3345 MAYBE_FINISH_COMPOSITION ();
3346 goto invalid_code;
3347
4ed46869 3348 case ISO_shift_out:
df7492f9
KH
3349 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3350 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3351 goto invalid_code;
3352 CODING_ISO_INVOCATION (coding, 0) = 1;
3353 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3354 continue;
4ed46869
KH
3355
3356 case ISO_shift_in:
df7492f9
KH
3357 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3358 goto invalid_code;
3359 CODING_ISO_INVOCATION (coding, 0) = 0;
3360 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3361 continue;
4ed46869
KH
3362
3363 case ISO_single_shift_2_7:
3364 case ISO_single_shift_2:
df7492f9
KH
3365 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3366 goto invalid_code;
4ed46869
KH
3367 /* SS2 is handled as an escape sequence of ESC 'N' */
3368 c1 = 'N';
3369 goto label_escape_sequence;
3370
3371 case ISO_single_shift_3:
df7492f9
KH
3372 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3373 goto invalid_code;
4ed46869
KH
3374 /* SS2 is handled as an escape sequence of ESC 'O' */
3375 c1 = 'O';
3376 goto label_escape_sequence;
3377
3378 case ISO_control_sequence_introducer:
3379 /* CSI is handled as an escape sequence of ESC '[' ... */
3380 c1 = '[';
3381 goto label_escape_sequence;
3382
3383 case ISO_escape:
3384 ONE_MORE_BYTE (c1);
3385 label_escape_sequence:
df7492f9 3386 /* Escape sequences handled here are invocation,
4ed46869
KH
3387 designation, direction specification, and character
3388 composition specification. */
3389 switch (c1)
3390 {
3391 case '&': /* revision of following character set */
3392 ONE_MORE_BYTE (c1);
3393 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3394 goto invalid_code;
4ed46869
KH
3395 ONE_MORE_BYTE (c1);
3396 if (c1 != ISO_CODE_ESC)
df7492f9 3397 goto invalid_code;
4ed46869
KH
3398 ONE_MORE_BYTE (c1);
3399 goto label_escape_sequence;
3400
3401 case '$': /* designation of 2-byte character set */
df7492f9
KH
3402 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3403 goto invalid_code;
134b9549
KH
3404 {
3405 int reg, chars96;
3406
3407 ONE_MORE_BYTE (c1);
3408 if (c1 >= '@' && c1 <= 'B')
3409 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3410 or JISX0208.1980 */
134b9549
KH
3411 reg = 0, chars96 = 0;
3412 }
3413 else if (c1 >= 0x28 && c1 <= 0x2B)
3414 { /* designation of DIMENSION2_CHARS94 character set */
3415 reg = c1 - 0x28, chars96 = 0;
3416 ONE_MORE_BYTE (c1);
3417 }
3418 else if (c1 >= 0x2C && c1 <= 0x2F)
3419 { /* designation of DIMENSION2_CHARS96 character set */
3420 reg = c1 - 0x2C, chars96 = 1;
3421 ONE_MORE_BYTE (c1);
3422 }
3423 else
3424 goto invalid_code;
3425 DECODE_DESIGNATION (reg, 2, chars96, c1);
3426 /* We must update these variables now. */
3427 if (reg == 0)
3428 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3429 else if (reg == 1)
3430 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3431 if (chars96 < 0)
3432 goto invalid_code;
3433 }
b73bfc1c 3434 continue;
4ed46869
KH
3435
3436 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3437 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3438 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3439 goto invalid_code;
3440 CODING_ISO_INVOCATION (coding, 0) = 2;
3441 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3442 continue;
4ed46869
KH
3443
3444 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3445 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3446 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3447 goto invalid_code;
3448 CODING_ISO_INVOCATION (coding, 0) = 3;
3449 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3450 continue;
4ed46869
KH
3451
3452 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3453 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3454 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3455 goto invalid_code;
134b9549
KH
3456 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3457 if (charset_id_2 < 0)
3458 charset = CHARSET_FROM_ID (charset_ascii);
3459 else
3460 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3461 ONE_MORE_BYTE (c1);
e7046a18 3462 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3463 goto invalid_code;
4ed46869
KH
3464 break;
3465
3466 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3467 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3468 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3469 goto invalid_code;
134b9549
KH
3470 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3471 if (charset_id_3 < 0)
3472 charset = CHARSET_FROM_ID (charset_ascii);
3473 else
3474 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3475 ONE_MORE_BYTE (c1);
e7046a18 3476 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3477 goto invalid_code;
4ed46869
KH
3478 break;
3479
ec6d2bb8 3480 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3481 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3482 goto invalid_code;
ec6d2bb8 3483 DECODE_COMPOSITION_START (c1);
b73bfc1c 3484 continue;
4ed46869 3485
ec6d2bb8 3486 case '1': /* end composition */
df7492f9
KH
3487 if (composition_state == COMPOSING_NO)
3488 goto invalid_code;
3489 DECODE_COMPOSITION_END ();
b73bfc1c 3490 continue;
4ed46869
KH
3491
3492 case '[': /* specification of direction */
df7492f9
KH
3493 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3494 goto invalid_code;
4ed46869 3495 /* For the moment, nested direction is not supported.
d46c5b12 3496 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3497 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3498 ONE_MORE_BYTE (c1);
3499 switch (c1)
3500 {
3501 case ']': /* end of the current direction */
d46c5b12 3502 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3503
3504 case '0': /* end of the current direction */
3505 case '1': /* start of left-to-right direction */
3506 ONE_MORE_BYTE (c1);
3507 if (c1 == ']')
d46c5b12 3508 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3509 else
df7492f9 3510 goto invalid_code;
4ed46869
KH
3511 break;
3512
3513 case '2': /* start of right-to-left direction */
3514 ONE_MORE_BYTE (c1);
3515 if (c1 == ']')
d46c5b12 3516 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3517 else
df7492f9 3518 goto invalid_code;
4ed46869
KH
3519 break;
3520
3521 default:
df7492f9 3522 goto invalid_code;
4ed46869 3523 }
b73bfc1c 3524 continue;
4ed46869 3525
103e0180 3526 case '%':
103e0180
KH
3527 ONE_MORE_BYTE (c1);
3528 if (c1 == '/')
3529 {
3530 /* CTEXT extended segment:
3531 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3532 We keep these bytes as is for the moment.
3533 They may be decoded by post-read-conversion. */
3534 int dim, M, L;
4776e638 3535 int size;
8f924df7 3536
103e0180
KH
3537 ONE_MORE_BYTE (dim);
3538 ONE_MORE_BYTE (M);
3539 ONE_MORE_BYTE (L);
3540 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3541 if (charbuf + 8 + size > charbuf_end)
3542 goto break_loop;
3543 *charbuf++ = ISO_CODE_ESC;
3544 *charbuf++ = '%';
3545 *charbuf++ = '/';
3546 *charbuf++ = dim;
3547 *charbuf++ = BYTE8_TO_CHAR (M);
3548 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3549 while (size-- > 0)
3550 {
3551 ONE_MORE_BYTE (c1);
4776e638 3552 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3553 }
103e0180
KH
3554 }
3555 else if (c1 == 'G')
3556 {
103e0180
KH
3557 /* XFree86 extension for embedding UTF-8 in CTEXT:
3558 ESC % G --UTF-8-BYTES-- ESC % @
3559 We keep these bytes as is for the moment.
3560 They may be decoded by post-read-conversion. */
4776e638
KH
3561 int *p = charbuf;
3562
3563 if (p + 6 > charbuf_end)
3564 goto break_loop;
3565 *p++ = ISO_CODE_ESC;
3566 *p++ = '%';
3567 *p++ = 'G';
3568 while (p < charbuf_end)
103e0180
KH
3569 {
3570 ONE_MORE_BYTE (c1);
3571 if (c1 == ISO_CODE_ESC
3572 && src + 1 < src_end
3573 && src[0] == '%'
3574 && src[1] == '@')
9ffd559c
KH
3575 {
3576 src += 2;
3577 break;
3578 }
4776e638 3579 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3580 }
4776e638
KH
3581 if (p + 3 > charbuf_end)
3582 goto break_loop;
3583 *p++ = ISO_CODE_ESC;
3584 *p++ = '%';
3585 *p++ = '@';
3586 charbuf = p;
103e0180
KH
3587 }
3588 else
4776e638 3589 goto invalid_code;
103e0180 3590 continue;
4776e638 3591 break;
103e0180 3592
4ed46869 3593 default:
df7492f9
KH
3594 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3595 goto invalid_code;
134b9549
KH
3596 {
3597 int reg, chars96;
3598
3599 if (c1 >= 0x28 && c1 <= 0x2B)
3600 { /* designation of DIMENSION1_CHARS94 character set */
3601 reg = c1 - 0x28, chars96 = 0;
3602 ONE_MORE_BYTE (c1);
3603 }
3604 else if (c1 >= 0x2C && c1 <= 0x2F)
3605 { /* designation of DIMENSION1_CHARS96 character set */
3606 reg = c1 - 0x2C, chars96 = 1;
3607 ONE_MORE_BYTE (c1);
3608 }
3609 else
3610 goto invalid_code;
3611 DECODE_DESIGNATION (reg, 1, chars96, c1);
3612 /* We must update these variables now. */
3613 if (reg == 0)
3614 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3615 else if (reg == 1)
3616 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3617 if (chars96 < 0)
3618 goto invalid_code;
3619 }
b73bfc1c 3620 continue;
4ed46869 3621 }
b73bfc1c 3622 }
4ed46869 3623
ff0dacd7
KH
3624 if (charset->id != charset_ascii
3625 && last_id != charset->id)
3626 {
3627 if (last_id != charset_ascii)
69a80ea3 3628 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3629 last_id = charset->id;
3630 last_offset = char_offset;
3631 }
3632
b73bfc1c 3633 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3634 Produce a decoded character while getting 2nd position code
3635 C2 if necessary. */
3636 c1 &= 0x7F;
3637 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3638 {
3639 ONE_MORE_BYTE (c2);
df7492f9 3640 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3641 /* C2 is not in a valid range. */
df7492f9
KH
3642 goto invalid_code;
3643 c1 = (c1 << 8) | (c2 & 0x7F);
3644 if (CHARSET_DIMENSION (charset) > 2)
3645 {
3646 ONE_MORE_BYTE (c2);
3647 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3648 /* C2 is not in a valid range. */
3649 goto invalid_code;
3650 c1 = (c1 << 8) | (c2 & 0x7F);
3651 }
3652 }
3653
3654 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3655 if (c < 0)
3656 {
3657 MAYBE_FINISH_COMPOSITION ();
3658 for (; src_base < src; src_base++, char_offset++)
3659 {
3660 if (ASCII_BYTE_P (*src_base))
3661 *charbuf++ = *src_base;
3662 else
3663 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3664 }
3665 }
3666 else if (composition_state == COMPOSING_NO)
3667 {
3668 *charbuf++ = c;
3669 char_offset++;
4ed46869 3670 }
df7492f9 3671 else
781d7a48 3672 {
cee53ed4
KH
3673 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3674 {
3675 components[component_idx++] = c;
3676 if (method == COMPOSITION_WITH_RULE
3677 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3678 && composition_state == COMPOSING_COMPONENT_CHAR))
3679 composition_state++;
3680 }
3681 else
3682 {
3683 MAYBE_FINISH_COMPOSITION ();
3684 *charbuf++ = c;
3685 char_offset++;
3686 }
4ed46869
KH
3687 }
3688 continue;
3689
df7492f9
KH
3690 invalid_code:
3691 MAYBE_FINISH_COMPOSITION ();
4ed46869 3692 src = src_base;
df7492f9
KH
3693 consumed_chars = consumed_chars_base;
3694 ONE_MORE_BYTE (c);
065e3595 3695 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3696 char_offset++;
df7492f9 3697 coding->errors++;
4776e638
KH
3698 continue;
3699
3700 break_loop:
3701 break;
4ed46869 3702 }
fb88bf2d 3703
df7492f9 3704 no_more_source:
ff0dacd7 3705 if (last_id != charset_ascii)
69a80ea3 3706 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3707 coding->consumed_char += consumed_chars_base;
3708 coding->consumed = src_base - coding->source;
3709 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3710}
3711
b73bfc1c 3712
f4dee582 3713/* ISO2022 encoding stuff. */
4ed46869
KH
3714
3715/*
f4dee582 3716 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3717 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3718 variant has the following specifications:
df7492f9 3719 1. Initial designation to G0 thru G3.
4ed46869
KH
3720 2. Allows short-form designation?
3721 3. ASCII should be designated to G0 before control characters?
3722 4. ASCII should be designated to G0 at end of line?
3723 5. 7-bit environment or 8-bit environment?
3724 6. Use locking-shift?
3725 7. Use Single-shift?
3726 And the following two are only for Japanese:
3727 8. Use ASCII in place of JIS0201-1976-Roman?
3728 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3729 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3730 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3731 details.
4ed46869
KH
3732*/
3733
3734/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3735 register REG at DST, and increment DST. If <final-char> of CHARSET is
3736 '@', 'A', or 'B' and the coding system CODING allows, produce
3737 designation sequence of short-form. */
4ed46869
KH
3738
3739#define ENCODE_DESIGNATION(charset, reg, coding) \
3740 do { \
df7492f9 3741 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3742 char *intermediate_char_94 = "()*+"; \
3743 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3744 int revision = -1; \
3745 int c; \
3746 \
3747 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3748 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3749 \
3750 if (revision >= 0) \
70c22245 3751 { \
df7492f9
KH
3752 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3753 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3754 } \
df7492f9 3755 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3756 if (CHARSET_DIMENSION (charset) == 1) \
3757 { \
df7492f9
KH
3758 if (! CHARSET_ISO_CHARS_96 (charset)) \
3759 c = intermediate_char_94[reg]; \
4ed46869 3760 else \
df7492f9
KH
3761 c = intermediate_char_96[reg]; \
3762 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3763 } \
3764 else \
3765 { \
df7492f9
KH
3766 EMIT_ONE_ASCII_BYTE ('$'); \
3767 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3768 { \
df7492f9 3769 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3770 || reg != 0 \
3771 || final_char < '@' || final_char > 'B') \
df7492f9 3772 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3773 } \
3774 else \
df7492f9 3775 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3776 } \
df7492f9
KH
3777 EMIT_ONE_ASCII_BYTE (final_char); \
3778 \
3779 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3780 } while (0)
3781
df7492f9 3782
4ed46869
KH
3783/* The following two macros produce codes (control character or escape
3784 sequence) for ISO2022 single-shift functions (single-shift-2 and
3785 single-shift-3). */
3786
df7492f9
KH
3787#define ENCODE_SINGLE_SHIFT_2 \
3788 do { \
3789 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3790 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3791 else \
3792 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3793 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3794 } while (0)
3795
df7492f9
KH
3796
3797#define ENCODE_SINGLE_SHIFT_3 \
3798 do { \
3799 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3800 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3801 else \
3802 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3803 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3804 } while (0)
3805
df7492f9 3806
4ed46869
KH
3807/* The following four macros produce codes (control character or
3808 escape sequence) for ISO2022 locking-shift functions (shift-in,
3809 shift-out, locking-shift-2, and locking-shift-3). */
3810
df7492f9
KH
3811#define ENCODE_SHIFT_IN \
3812 do { \
3813 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3814 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3815 } while (0)
3816
df7492f9
KH
3817
3818#define ENCODE_SHIFT_OUT \
3819 do { \
3820 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3821 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3822 } while (0)
3823
df7492f9
KH
3824
3825#define ENCODE_LOCKING_SHIFT_2 \
3826 do { \
3827 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3828 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3829 } while (0)
3830
df7492f9
KH
3831
3832#define ENCODE_LOCKING_SHIFT_3 \
3833 do { \
3834 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3835 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3836 } while (0)
3837
df7492f9 3838
f4dee582
RS
3839/* Produce codes for a DIMENSION1 character whose character set is
3840 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3841 sequences are also produced in advance if necessary. */
3842
6e85d753
KH
3843#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3844 do { \
df7492f9 3845 int id = CHARSET_ID (charset); \
bf16eb23
KH
3846 \
3847 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3848 && id == charset_ascii) \
3849 { \
3850 id = charset_jisx0201_roman; \
3851 charset = CHARSET_FROM_ID (id); \
3852 } \
3853 \
df7492f9 3854 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3855 { \
df7492f9
KH
3856 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3857 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3858 else \
df7492f9
KH
3859 EMIT_ONE_BYTE (c1 | 0x80); \
3860 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3861 break; \
3862 } \
df7492f9 3863 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3864 { \
df7492f9 3865 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3866 break; \
3867 } \
df7492f9 3868 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3869 { \
df7492f9 3870 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3871 break; \
3872 } \
6e85d753
KH
3873 else \
3874 /* Since CHARSET is not yet invoked to any graphic planes, we \
3875 must invoke it, or, at first, designate it to some graphic \
3876 register. Then repeat the loop to actually produce the \
3877 character. */ \
df7492f9
KH
3878 dst = encode_invocation_designation (charset, coding, dst, \
3879 &produced_chars); \
4ed46869
KH
3880 } while (1)
3881
df7492f9 3882
f4dee582
RS
3883/* Produce codes for a DIMENSION2 character whose character set is
3884 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3885 invocation codes are also produced in advance if necessary. */
3886
6e85d753
KH
3887#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3888 do { \
df7492f9 3889 int id = CHARSET_ID (charset); \
bf16eb23
KH
3890 \
3891 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3892 && id == charset_jisx0208) \
3893 { \
3894 id = charset_jisx0208_1978; \
3895 charset = CHARSET_FROM_ID (id); \
3896 } \
3897 \
df7492f9 3898 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3899 { \
df7492f9
KH
3900 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3901 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3902 else \
df7492f9
KH
3903 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3904 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3905 break; \
3906 } \
df7492f9 3907 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3908 { \
df7492f9 3909 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3910 break; \
3911 } \
df7492f9 3912 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3913 { \
df7492f9 3914 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3915 break; \
3916 } \
6e85d753
KH
3917 else \
3918 /* Since CHARSET is not yet invoked to any graphic planes, we \
3919 must invoke it, or, at first, designate it to some graphic \
3920 register. Then repeat the loop to actually produce the \
3921 character. */ \
df7492f9
KH
3922 dst = encode_invocation_designation (charset, coding, dst, \
3923 &produced_chars); \
4ed46869
KH
3924 } while (1)
3925
05e6f5dc 3926
df7492f9
KH
3927#define ENCODE_ISO_CHARACTER(charset, c) \
3928 do { \
3929 int code = ENCODE_CHAR ((charset),(c)); \
3930 \
3931 if (CHARSET_DIMENSION (charset) == 1) \
3932 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3933 else \
3934 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3935 } while (0)
bdd9fb48 3936
05e6f5dc 3937
4ed46869 3938/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3939 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3940 Return new DST. */
3941
3942unsigned char *
df7492f9
KH
3943encode_invocation_designation (charset, coding, dst, p_nchars)
3944 struct charset *charset;
4ed46869
KH
3945 struct coding_system *coding;
3946 unsigned char *dst;
df7492f9 3947 int *p_nchars;
4ed46869 3948{
df7492f9
KH
3949 int multibytep = coding->dst_multibyte;
3950 int produced_chars = *p_nchars;
4ed46869 3951 int reg; /* graphic register number */
df7492f9 3952 int id = CHARSET_ID (charset);
4ed46869
KH
3953
3954 /* At first, check designations. */
3955 for (reg = 0; reg < 4; reg++)
df7492f9 3956 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3957 break;
3958
3959 if (reg >= 4)
3960 {
3961 /* CHARSET is not yet designated to any graphic registers. */
3962 /* At first check the requested designation. */
df7492f9
KH
3963 reg = CODING_ISO_REQUEST (coding, id);
3964 if (reg < 0)
1ba9e4ab
KH
3965 /* Since CHARSET requests no special designation, designate it
3966 to graphic register 0. */
4ed46869
KH
3967 reg = 0;
3968
3969 ENCODE_DESIGNATION (charset, reg, coding);
3970 }
3971
df7492f9
KH
3972 if (CODING_ISO_INVOCATION (coding, 0) != reg
3973 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3974 {
3975 /* Since the graphic register REG is not invoked to any graphic
3976 planes, invoke it to graphic plane 0. */
3977 switch (reg)
3978 {
3979 case 0: /* graphic register 0 */
3980 ENCODE_SHIFT_IN;
3981 break;
3982
3983 case 1: /* graphic register 1 */
3984 ENCODE_SHIFT_OUT;
3985 break;
3986
3987 case 2: /* graphic register 2 */
df7492f9 3988 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3989 ENCODE_SINGLE_SHIFT_2;
3990 else
3991 ENCODE_LOCKING_SHIFT_2;
3992 break;
3993
3994 case 3: /* graphic register 3 */
df7492f9 3995 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3996 ENCODE_SINGLE_SHIFT_3;
3997 else
3998 ENCODE_LOCKING_SHIFT_3;
3999 break;
4000 }
4001 }
b73bfc1c 4002
df7492f9 4003 *p_nchars = produced_chars;
4ed46869
KH
4004 return dst;
4005}
4006
df7492f9
KH
4007/* The following three macros produce codes for indicating direction
4008 of text. */
4009#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 4010 do { \
df7492f9
KH
4011 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4012 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 4013 else \
df7492f9 4014 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
4015 } while (0)
4016
ec6d2bb8 4017
df7492f9
KH
4018#define ENCODE_DIRECTION_R2L() \
4019 do { \
4020 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4021 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
4022 } while (0)
4023
ec6d2bb8 4024
df7492f9 4025#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 4026 do { \
df7492f9
KH
4027 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4028 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 4029 } while (0)
4ed46869 4030
4ed46869
KH
4031
4032/* Produce codes for designation and invocation to reset the graphic
4033 planes and registers to initial state. */
df7492f9
KH
4034#define ENCODE_RESET_PLANE_AND_REGISTER() \
4035 do { \
4036 int reg; \
4037 struct charset *charset; \
4038 \
4039 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
4040 ENCODE_SHIFT_IN; \
4041 for (reg = 0; reg < 4; reg++) \
4042 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
4043 && (CODING_ISO_DESIGNATION (coding, reg) \
4044 != CODING_ISO_INITIAL (coding, reg))) \
4045 { \
4046 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4047 ENCODE_DESIGNATION (charset, reg, coding); \
4048 } \
4ed46869
KH
4049 } while (0)
4050
df7492f9 4051
bdd9fb48 4052/* Produce designation sequences of charsets in the line started from
b73bfc1c 4053 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
4054
4055 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
4056 find all the necessary designations. */
4057
b73bfc1c 4058static unsigned char *
df7492f9 4059encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 4060 struct coding_system *coding;
df7492f9
KH
4061 int *charbuf, *charbuf_end;
4062 unsigned char *dst;
e0e989f6 4063{
df7492f9 4064 struct charset *charset;
bdd9fb48
KH
4065 /* Table of charsets to be designated to each graphic register. */
4066 int r[4];
df7492f9
KH
4067 int c, found = 0, reg;
4068 int produced_chars = 0;
4069 int multibytep = coding->dst_multibyte;
4070 Lisp_Object attrs;
4071 Lisp_Object charset_list;
4072
4073 attrs = CODING_ID_ATTRS (coding->id);
4074 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4075 if (EQ (charset_list, Qiso_2022))
4076 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
4077
4078 for (reg = 0; reg < 4; reg++)
4079 r[reg] = -1;
4080
b73bfc1c 4081 while (found < 4)
e0e989f6 4082 {
df7492f9
KH
4083 int id;
4084
4085 c = *charbuf++;
b73bfc1c
KH
4086 if (c == '\n')
4087 break;
df7492f9
KH
4088 charset = char_charset (c, charset_list, NULL);
4089 id = CHARSET_ID (charset);
4090 reg = CODING_ISO_REQUEST (coding, id);
4091 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
4092 {
4093 found++;
df7492f9 4094 r[reg] = id;
bdd9fb48 4095 }
bdd9fb48
KH
4096 }
4097
4098 if (found)
4099 {
4100 for (reg = 0; reg < 4; reg++)
4101 if (r[reg] >= 0
df7492f9
KH
4102 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4103 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 4104 }
b73bfc1c
KH
4105
4106 return dst;
e0e989f6
KH
4107}
4108
4ed46869
KH
4109/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4110
df7492f9
KH
4111static int
4112encode_coding_iso_2022 (coding)
4ed46869 4113 struct coding_system *coding;
4ed46869 4114{
df7492f9
KH
4115 int multibytep = coding->dst_multibyte;
4116 int *charbuf = coding->charbuf;
4117 int *charbuf_end = charbuf + coding->charbuf_used;
4118 unsigned char *dst = coding->destination + coding->produced;
4119 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4120 int safe_room = 16;
4121 int bol_designation
4122 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4123 && CODING_ISO_BOL (coding));
4124 int produced_chars = 0;
4125 Lisp_Object attrs, eol_type, charset_list;
4126 int ascii_compatible;
b73bfc1c 4127 int c;
ff0dacd7 4128 int preferred_charset_id = -1;
05e6f5dc 4129
24a73b0a
KH
4130 CODING_GET_INFO (coding, attrs, charset_list);
4131 eol_type = CODING_ID_EOL_TYPE (coding->id);
4132 if (VECTORP (eol_type))
4133 eol_type = Qunix;
4134
004068e4 4135 setup_iso_safe_charsets (attrs);
ff0dacd7 4136 /* Charset list may have been changed. */
287c57d7 4137 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
1b3b981b 4138 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
0eecad43 4139
df7492f9 4140 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 4141
df7492f9 4142 while (charbuf < charbuf_end)
4ed46869 4143 {
df7492f9 4144 ASSURE_DESTINATION (safe_room);
b73bfc1c 4145
df7492f9 4146 if (bol_designation)
b73bfc1c 4147 {
df7492f9 4148 unsigned char *dst_prev = dst;
4ed46869 4149
bdd9fb48 4150 /* We have to produce designation sequences if any now. */
df7492f9
KH
4151 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4152 bol_designation = 0;
4153 /* We are sure that designation sequences are all ASCII bytes. */
4154 produced_chars += dst - dst_prev;
e0e989f6
KH
4155 }
4156
df7492f9 4157 c = *charbuf++;
ec6d2bb8 4158
ff0dacd7
KH
4159 if (c < 0)
4160 {
4161 /* Handle an annotation. */
4162 switch (*charbuf)
ec6d2bb8 4163 {
ff0dacd7
KH
4164 case CODING_ANNOTATE_COMPOSITION_MASK:
4165 /* Not yet implemented. */
4166 break;
4167 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 4168 preferred_charset_id = charbuf[2];
ff0dacd7
KH
4169 if (preferred_charset_id >= 0
4170 && NILP (Fmemq (make_number (preferred_charset_id),
4171 charset_list)))
4172 preferred_charset_id = -1;
4173 break;
4174 default:
4175 abort ();
4ed46869 4176 }
ff0dacd7
KH
4177 charbuf += -c - 1;
4178 continue;
4ed46869 4179 }
ec6d2bb8 4180
b73bfc1c
KH
4181 /* Now encode the character C. */
4182 if (c < 0x20 || c == 0x7F)
4183 {
df7492f9
KH
4184 if (c == '\n'
4185 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 4186 {
df7492f9
KH
4187 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4188 ENCODE_RESET_PLANE_AND_REGISTER ();
4189 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 4190 {
df7492f9
KH
4191 int i;
4192
4193 for (i = 0; i < 4; i++)
4194 CODING_ISO_DESIGNATION (coding, i)
4195 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 4196 }
df7492f9
KH
4197 bol_designation
4198 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 4199 }
df7492f9
KH
4200 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4201 ENCODE_RESET_PLANE_AND_REGISTER ();
4202 EMIT_ONE_ASCII_BYTE (c);
4ed46869 4203 }
df7492f9 4204 else if (ASCII_CHAR_P (c))
88993dfd 4205 {
df7492f9
KH
4206 if (ascii_compatible)
4207 EMIT_ONE_ASCII_BYTE (c);
93dec019 4208 else
19a8d9e0 4209 {
bf16eb23
KH
4210 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4211 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 4212 }
4ed46869 4213 }
16eafb5d 4214 else if (CHAR_BYTE8_P (c))
88993dfd 4215 {
16eafb5d
KH
4216 c = CHAR_TO_BYTE8 (c);
4217 EMIT_ONE_BYTE (c);
88993dfd 4218 }
b73bfc1c 4219 else
df7492f9 4220 {
ff0dacd7 4221 struct charset *charset;
b73bfc1c 4222
ff0dacd7
KH
4223 if (preferred_charset_id >= 0)
4224 {
4225 charset = CHARSET_FROM_ID (preferred_charset_id);
4226 if (! CHAR_CHARSET_P (c, charset))
4227 charset = char_charset (c, charset_list, NULL);
4228 }
4229 else
4230 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
4231 if (!charset)
4232 {
41cbe562
KH
4233 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4234 {
4235 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4236 charset = CHARSET_FROM_ID (charset_ascii);
4237 }
4238 else
4239 {
4240 c = coding->default_char;
4241 charset = char_charset (c, charset_list, NULL);
4242 }
df7492f9
KH
4243 }
4244 ENCODE_ISO_CHARACTER (charset, c);
4245 }
84fbb8a0 4246 }
b73bfc1c 4247
df7492f9
KH
4248 if (coding->mode & CODING_MODE_LAST_BLOCK
4249 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4250 {
4251 ASSURE_DESTINATION (safe_room);
4252 ENCODE_RESET_PLANE_AND_REGISTER ();
4253 }
065e3595 4254 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4255 CODING_ISO_BOL (coding) = bol_designation;
4256 coding->produced_char += produced_chars;
4257 coding->produced = dst - coding->destination;
4258 return 0;
4ed46869
KH
4259}
4260
4261\f
df7492f9 4262/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 4263
df7492f9 4264/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
4265 quite widely. So, for the moment, Emacs supports them in the bare
4266 C code. But, in the future, they may be supported only by CCL. */
4267
4268/* SJIS is a coding system encoding three character sets: ASCII, right
4269 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
4270 as is. A character of charset katakana-jisx0201 is encoded by
4271 "position-code + 0x80". A character of charset japanese-jisx0208
4272 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 4273 so that it fit in the range below.
4ed46869
KH
4274
4275 --- CODE RANGE of SJIS ---
4276 (character set) (range)
4277 ASCII 0x00 .. 0x7F
df7492f9 4278 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 4279 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 4280 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
4281 -------------------------------
4282
4283*/
4284
4285/* BIG5 is a coding system encoding two character sets: ASCII and
4286 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 4287 character set and is encoded in two-byte.
4ed46869
KH
4288
4289 --- CODE RANGE of BIG5 ---
4290 (character set) (range)
4291 ASCII 0x00 .. 0x7F
4292 Big5 (1st byte) 0xA1 .. 0xFE
4293 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
4294 --------------------------
4295
df7492f9 4296 */
4ed46869
KH
4297
4298/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4299 Check if a text is encoded in SJIS. If it is, return
df7492f9 4300 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 4301
0a28aafb 4302static int
ff0dacd7 4303detect_coding_sjis (coding, detect_info)
df7492f9 4304 struct coding_system *coding;
ff0dacd7 4305 struct coding_detection_info *detect_info;
4ed46869 4306{
065e3595 4307 const unsigned char *src = coding->source, *src_base;
8f924df7 4308 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4309 int multibytep = coding->src_multibyte;
4310 int consumed_chars = 0;
4311 int found = 0;
b73bfc1c 4312 int c;
df7492f9 4313
ff0dacd7 4314 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
4315 /* A coding system of this category is always ASCII compatible. */
4316 src += coding->head_ascii;
4ed46869 4317
b73bfc1c 4318 while (1)
4ed46869 4319 {
065e3595 4320 src_base = src;
df7492f9 4321 ONE_MORE_BYTE (c);
682169fe
KH
4322 if (c < 0x80)
4323 continue;
df7492f9 4324 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 4325 {
df7492f9 4326 ONE_MORE_BYTE (c);
682169fe 4327 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4328 break;
ff0dacd7 4329 found = CATEGORY_MASK_SJIS;
4ed46869 4330 }
df7492f9 4331 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4332 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4333 else
4334 break;
4ed46869 4335 }
ff0dacd7 4336 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4337 return 0;
4338
4339 no_more_source:
065e3595 4340 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4341 {
ff0dacd7 4342 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4343 return 0;
4ed46869 4344 }
ff0dacd7
KH
4345 detect_info->found |= found;
4346 return 1;
4ed46869
KH
4347}
4348
4349/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4350 Check if a text is encoded in BIG5. If it is, return
df7492f9 4351 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4352
0a28aafb 4353static int
ff0dacd7 4354detect_coding_big5 (coding, detect_info)
df7492f9 4355 struct coding_system *coding;
ff0dacd7 4356 struct coding_detection_info *detect_info;
4ed46869 4357{
065e3595 4358 const unsigned char *src = coding->source, *src_base;
8f924df7 4359 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4360 int multibytep = coding->src_multibyte;
4361 int consumed_chars = 0;
4362 int found = 0;
b73bfc1c 4363 int c;
fa42c37f 4364
ff0dacd7 4365 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4366 /* A coding system of this category is always ASCII compatible. */
4367 src += coding->head_ascii;
fa42c37f 4368
b73bfc1c 4369 while (1)
fa42c37f 4370 {
065e3595 4371 src_base = src;
df7492f9
KH
4372 ONE_MORE_BYTE (c);
4373 if (c < 0x80)
fa42c37f 4374 continue;
df7492f9 4375 if (c >= 0xA1)
fa42c37f 4376 {
df7492f9
KH
4377 ONE_MORE_BYTE (c);
4378 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4379 return 0;
ff0dacd7 4380 found = CATEGORY_MASK_BIG5;
fa42c37f 4381 }
df7492f9
KH
4382 else
4383 break;
fa42c37f 4384 }
ff0dacd7 4385 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4386 return 0;
fa42c37f 4387
df7492f9 4388 no_more_source:
065e3595 4389 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4390 {
ff0dacd7 4391 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4392 return 0;
4393 }
ff0dacd7
KH
4394 detect_info->found |= found;
4395 return 1;
fa42c37f
KH
4396}
4397
4ed46869
KH
4398/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4399 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4400
b73bfc1c 4401static void
df7492f9 4402decode_coding_sjis (coding)
4ed46869 4403 struct coding_system *coding;
4ed46869 4404{
8f924df7
KH
4405 const unsigned char *src = coding->source + coding->consumed;
4406 const unsigned char *src_end = coding->source + coding->src_bytes;
4407 const unsigned char *src_base;
69a80ea3
KH
4408 int *charbuf = coding->charbuf + coding->charbuf_used;
4409 int *charbuf_end
4410 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4411 int consumed_chars = 0, consumed_chars_base;
4412 int multibytep = coding->src_multibyte;
4413 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4414 struct charset *charset_kanji2;
24a73b0a 4415 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4416 int char_offset = coding->produced_char;
4417 int last_offset = char_offset;
4418 int last_id = charset_ascii;
119852e7
KH
4419 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4420 int byte_after_cr = -1;
a5d301df 4421
24a73b0a 4422 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4423
4424 val = charset_list;
4425 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4426 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4427 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4428 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4429
b73bfc1c 4430 while (1)
4ed46869 4431 {
df7492f9 4432 int c, c1;
24a73b0a 4433 struct charset *charset;
fa42c37f 4434
b73bfc1c 4435 src_base = src;
df7492f9 4436 consumed_chars_base = consumed_chars;
fa42c37f 4437
df7492f9 4438 if (charbuf >= charbuf_end)
b71f6f73
KH
4439 {
4440 if (byte_after_cr >= 0)
4441 src_base--;
4442 break;
4443 }
df7492f9 4444
119852e7
KH
4445 if (byte_after_cr >= 0)
4446 c = byte_after_cr, byte_after_cr = -1;
4447 else
4448 ONE_MORE_BYTE (c);
065e3595
KH
4449 if (c < 0)
4450 goto invalid_code;
24a73b0a 4451 if (c < 0x80)
119852e7
KH
4452 {
4453 if (eol_crlf && c == '\r')
4454 ONE_MORE_BYTE (byte_after_cr);
4455 charset = charset_roman;
4456 }
57a47f8a 4457 else if (c == 0x80 || c == 0xA0)
8e921c4b 4458 goto invalid_code;
57a47f8a
KH
4459 else if (c >= 0xA1 && c <= 0xDF)
4460 {
4461 /* SJIS -> JISX0201-Kana */
4462 c &= 0x7F;
4463 charset = charset_kana;
4464 }
4465 else if (c <= 0xEF)
df7492f9 4466 {
57a47f8a
KH
4467 /* SJIS -> JISX0208 */
4468 ONE_MORE_BYTE (c1);
4469 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4470 goto invalid_code;
57a47f8a
KH
4471 c = (c << 8) | c1;
4472 SJIS_TO_JIS (c);
4473 charset = charset_kanji;
4474 }
4475 else if (c <= 0xFC && charset_kanji2)
4476 {
c6876370 4477 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4478 ONE_MORE_BYTE (c1);
4479 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4480 goto invalid_code;
57a47f8a
KH
4481 c = (c << 8) | c1;
4482 SJIS_TO_JIS2 (c);
4483 charset = charset_kanji2;
df7492f9 4484 }
57a47f8a
KH
4485 else
4486 goto invalid_code;
24a73b0a
KH
4487 if (charset->id != charset_ascii
4488 && last_id != charset->id)
4489 {
4490 if (last_id != charset_ascii)
69a80ea3 4491 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4492 last_id = charset->id;
4493 last_offset = char_offset;
4494 }
4495 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4496 *charbuf++ = c;
ff0dacd7 4497 char_offset++;
df7492f9 4498 continue;
b73bfc1c 4499
df7492f9
KH
4500 invalid_code:
4501 src = src_base;
4502 consumed_chars = consumed_chars_base;
4503 ONE_MORE_BYTE (c);
065e3595 4504 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4505 char_offset++;
df7492f9
KH
4506 coding->errors++;
4507 }
fa42c37f 4508
df7492f9 4509 no_more_source:
ff0dacd7 4510 if (last_id != charset_ascii)
69a80ea3 4511 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4512 coding->consumed_char += consumed_chars_base;
4513 coding->consumed = src_base - coding->source;
4514 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4515}
4516
b73bfc1c 4517static void
df7492f9 4518decode_coding_big5 (coding)
4ed46869 4519 struct coding_system *coding;
4ed46869 4520{
8f924df7
KH
4521 const unsigned char *src = coding->source + coding->consumed;
4522 const unsigned char *src_end = coding->source + coding->src_bytes;
4523 const unsigned char *src_base;
69a80ea3
KH
4524 int *charbuf = coding->charbuf + coding->charbuf_used;
4525 int *charbuf_end
4526 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4527 int consumed_chars = 0, consumed_chars_base;
4528 int multibytep = coding->src_multibyte;
4529 struct charset *charset_roman, *charset_big5;
24a73b0a 4530 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4531 int char_offset = coding->produced_char;
4532 int last_offset = char_offset;
4533 int last_id = charset_ascii;
119852e7
KH
4534 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4535 int byte_after_cr = -1;
df7492f9 4536
24a73b0a 4537 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4538 val = charset_list;
4539 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4540 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4541
b73bfc1c 4542 while (1)
4ed46869 4543 {
df7492f9 4544 int c, c1;
24a73b0a 4545 struct charset *charset;
b73bfc1c
KH
4546
4547 src_base = src;
df7492f9
KH
4548 consumed_chars_base = consumed_chars;
4549
4550 if (charbuf >= charbuf_end)
b71f6f73
KH
4551 {
4552 if (byte_after_cr >= 0)
4553 src_base--;
4554 break;
4555 }
df7492f9 4556
119852e7 4557 if (byte_after_cr >= 0)
14daee73 4558 c = byte_after_cr, byte_after_cr = -1;
119852e7
KH
4559 else
4560 ONE_MORE_BYTE (c);
b73bfc1c 4561
065e3595
KH
4562 if (c < 0)
4563 goto invalid_code;
24a73b0a 4564 if (c < 0x80)
119852e7 4565 {
14daee73 4566 if (eol_crlf && c == '\r')
119852e7
KH
4567 ONE_MORE_BYTE (byte_after_cr);
4568 charset = charset_roman;
4569 }
24a73b0a 4570 else
4ed46869 4571 {
24a73b0a
KH
4572 /* BIG5 -> Big5 */
4573 if (c < 0xA1 || c > 0xFE)
4574 goto invalid_code;
4575 ONE_MORE_BYTE (c1);
4576 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4577 goto invalid_code;
4578 c = c << 8 | c1;
4579 charset = charset_big5;
4ed46869 4580 }
24a73b0a
KH
4581 if (charset->id != charset_ascii
4582 && last_id != charset->id)
df7492f9 4583 {
24a73b0a 4584 if (last_id != charset_ascii)
69a80ea3 4585 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4586 last_id = charset->id;
4587 last_offset = char_offset;
4ed46869 4588 }
24a73b0a 4589 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4590 *charbuf++ = c;
ff0dacd7 4591 char_offset++;
fb88bf2d
KH
4592 continue;
4593
df7492f9 4594 invalid_code:
4ed46869 4595 src = src_base;
df7492f9
KH
4596 consumed_chars = consumed_chars_base;
4597 ONE_MORE_BYTE (c);
065e3595 4598 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4599 char_offset++;
df7492f9 4600 coding->errors++;
fb88bf2d 4601 }
d46c5b12 4602
df7492f9 4603 no_more_source:
ff0dacd7 4604 if (last_id != charset_ascii)
69a80ea3 4605 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4606 coding->consumed_char += consumed_chars_base;
4607 coding->consumed = src_base - coding->source;
4608 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4609}
4610
4611/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4612 This function can encode charsets `ascii', `katakana-jisx0201',
4613 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4614 are sure that all these charsets are registered as official charset
4ed46869
KH
4615 (i.e. do not have extended leading-codes). Characters of other
4616 charsets are produced without any encoding. If SJIS_P is 1, encode
4617 SJIS text, else encode BIG5 text. */
4618
df7492f9
KH
4619static int
4620encode_coding_sjis (coding)
4ed46869 4621 struct coding_system *coding;
4ed46869 4622{
df7492f9
KH
4623 int multibytep = coding->dst_multibyte;
4624 int *charbuf = coding->charbuf;
4625 int *charbuf_end = charbuf + coding->charbuf_used;
4626 unsigned char *dst = coding->destination + coding->produced;
4627 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4628 int safe_room = 4;
4629 int produced_chars = 0;
24a73b0a 4630 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4631 int ascii_compatible;
4632 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4633 struct charset *charset_kanji2;
df7492f9 4634 int c;
a5d301df 4635
24a73b0a 4636 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4637 val = charset_list;
4638 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4639 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4640 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4641 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4642
df7492f9 4643 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4644
df7492f9
KH
4645 while (charbuf < charbuf_end)
4646 {
4647 ASSURE_DESTINATION (safe_room);
4648 c = *charbuf++;
b73bfc1c 4649 /* Now encode the character C. */
df7492f9
KH
4650 if (ASCII_CHAR_P (c) && ascii_compatible)
4651 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4652 else if (CHAR_BYTE8_P (c))
4653 {
4654 c = CHAR_TO_BYTE8 (c);
4655 EMIT_ONE_BYTE (c);
4656 }
df7492f9 4657 else
b73bfc1c 4658 {
df7492f9
KH
4659 unsigned code;
4660 struct charset *charset = char_charset (c, charset_list, &code);
4661
4662 if (!charset)
4ed46869 4663 {
41cbe562 4664 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4665 {
41cbe562
KH
4666 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4667 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4668 }
41cbe562 4669 else
b73bfc1c 4670 {
41cbe562
KH
4671 c = coding->default_char;
4672 charset = char_charset (c, charset_list, &code);
b73bfc1c 4673 }
b73bfc1c 4674 }
df7492f9
KH
4675 if (code == CHARSET_INVALID_CODE (charset))
4676 abort ();
4677 if (charset == charset_kanji)
4678 {
4679 int c1, c2;
4680 JIS_TO_SJIS (code);
4681 c1 = code >> 8, c2 = code & 0xFF;
4682 EMIT_TWO_BYTES (c1, c2);
4683 }
4684 else if (charset == charset_kana)
4685 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4686 else if (charset_kanji2 && charset == charset_kanji2)
4687 {
4688 int c1, c2;
4689
4690 c1 = code >> 8;
4691 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4692 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4693 {
4694 JIS_TO_SJIS2 (code);
4695 c1 = code >> 8, c2 = code & 0xFF;
4696 EMIT_TWO_BYTES (c1, c2);
4697 }
4698 else
4699 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4700 }
df7492f9
KH
4701 else
4702 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4703 }
4704 }
065e3595 4705 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4706 coding->produced_char += produced_chars;
4707 coding->produced = dst - coding->destination;
4708 return 0;
4709}
4710
4711static int
4712encode_coding_big5 (coding)
4713 struct coding_system *coding;
4714{
4715 int multibytep = coding->dst_multibyte;
4716 int *charbuf = coding->charbuf;
4717 int *charbuf_end = charbuf + coding->charbuf_used;
4718 unsigned char *dst = coding->destination + coding->produced;
4719 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4720 int safe_room = 4;
4721 int produced_chars = 0;
24a73b0a 4722 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4723 int ascii_compatible;
4724 struct charset *charset_roman, *charset_big5;
4725 int c;
4726
24a73b0a 4727 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4728 val = charset_list;
4729 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4730 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4731 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4732
4733 while (charbuf < charbuf_end)
4734 {
4735 ASSURE_DESTINATION (safe_room);
4736 c = *charbuf++;
4737 /* Now encode the character C. */
4738 if (ASCII_CHAR_P (c) && ascii_compatible)
4739 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4740 else if (CHAR_BYTE8_P (c))
4741 {
4742 c = CHAR_TO_BYTE8 (c);
4743 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4744 }
4745 else
4746 {
df7492f9
KH
4747 unsigned code;
4748 struct charset *charset = char_charset (c, charset_list, &code);
4749
4750 if (! charset)
b73bfc1c 4751 {
41cbe562 4752 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4753 {
41cbe562
KH
4754 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4755 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4756 }
41cbe562 4757 else
0eecad43 4758 {
41cbe562
KH
4759 c = coding->default_char;
4760 charset = char_charset (c, charset_list, &code);
0eecad43 4761 }
4ed46869 4762 }
df7492f9
KH
4763 if (code == CHARSET_INVALID_CODE (charset))
4764 abort ();
4765 if (charset == charset_big5)
b73bfc1c 4766 {
df7492f9
KH
4767 int c1, c2;
4768
4769 c1 = code >> 8, c2 = code & 0xFF;
4770 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4771 }
df7492f9
KH
4772 else
4773 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4774 }
4ed46869 4775 }
065e3595 4776 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4777 coding->produced_char += produced_chars;
4778 coding->produced = dst - coding->destination;
4779 return 0;
4ed46869
KH
4780}
4781
4782\f
df7492f9 4783/*** 10. CCL handlers ***/
1397dc18
KH
4784
4785/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4786 Check if a text is encoded in a coding system of which
4787 encoder/decoder are written in CCL program. If it is, return
df7492f9 4788 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4789
0a28aafb 4790static int
ff0dacd7 4791detect_coding_ccl (coding, detect_info)
df7492f9 4792 struct coding_system *coding;
ff0dacd7 4793 struct coding_detection_info *detect_info;
1397dc18 4794{
065e3595 4795 const unsigned char *src = coding->source, *src_base;
8f924df7 4796 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4797 int multibytep = coding->src_multibyte;
4798 int consumed_chars = 0;
4799 int found = 0;
0e219d54 4800 unsigned char *valids;
df7492f9
KH
4801 int head_ascii = coding->head_ascii;
4802 Lisp_Object attrs;
4803
ff0dacd7
KH
4804 detect_info->checked |= CATEGORY_MASK_CCL;
4805
df7492f9 4806 coding = &coding_categories[coding_category_ccl];
0e219d54 4807 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4808 attrs = CODING_ID_ATTRS (coding->id);
4809 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4810 src += head_ascii;
1397dc18 4811
b73bfc1c 4812 while (1)
1397dc18 4813 {
df7492f9 4814 int c;
065e3595
KH
4815
4816 src_base = src;
df7492f9 4817 ONE_MORE_BYTE (c);
065e3595 4818 if (c < 0 || ! valids[c])
df7492f9 4819 break;
ff0dacd7
KH
4820 if ((valids[c] > 1))
4821 found = CATEGORY_MASK_CCL;
df7492f9 4822 }
ff0dacd7 4823 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4824 return 0;
4825
4826 no_more_source:
ff0dacd7
KH
4827 detect_info->found |= found;
4828 return 1;
df7492f9
KH
4829}
4830
4831static void
4832decode_coding_ccl (coding)
4833 struct coding_system *coding;
4834{
7c78e542 4835 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4836 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4837 int *charbuf = coding->charbuf + coding->charbuf_used;
4838 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4839 int consumed_chars = 0;
4840 int multibytep = coding->src_multibyte;
4841 struct ccl_program ccl;
4842 int source_charbuf[1024];
4843 int source_byteidx[1024];
24a73b0a 4844 Lisp_Object attrs, charset_list;
df7492f9 4845
24a73b0a 4846 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4847 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4848
4849 while (src < src_end)
4850 {
7c78e542 4851 const unsigned char *p = src;
df7492f9
KH
4852 int *source, *source_end;
4853 int i = 0;
4854
4855 if (multibytep)
4856 while (i < 1024 && p < src_end)
4857 {
4858 source_byteidx[i] = p - src;
4859 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4860 }
4861 else
4862 while (i < 1024 && p < src_end)
4863 source_charbuf[i++] = *p++;
8f924df7 4864
df7492f9
KH
4865 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4866 ccl.last_block = 1;
4867
4868 source = source_charbuf;
4869 source_end = source + i;
4870 while (source < source_end)
4871 {
4872 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4873 source_end - source, charbuf_end - charbuf,
4874 charset_list);
df7492f9
KH
4875 source += ccl.consumed;
4876 charbuf += ccl.produced;
4877 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4878 break;
4879 }
4880 if (source < source_end)
4881 src += source_byteidx[source - source_charbuf];
4882 else
4883 src = p;
4884 consumed_chars += source - source_charbuf;
4885
4886 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4887 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4888 break;
4889 }
4890
4891 switch (ccl.status)
4892 {
4893 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4894 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4895 break;
4896 case CCL_STAT_SUSPEND_BY_DST:
4897 break;
4898 case CCL_STAT_QUIT:
4899 case CCL_STAT_INVALID_CMD:
065e3595 4900 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4901 break;
4902 default:
065e3595 4903 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4904 break;
4905 }
4906 coding->consumed_char += consumed_chars;
4907 coding->consumed = src - coding->source;
4908 coding->charbuf_used = charbuf - coding->charbuf;
4909}
4910
4911static int
4912encode_coding_ccl (coding)
4913 struct coding_system *coding;
4914{
4915 struct ccl_program ccl;
4916 int multibytep = coding->dst_multibyte;
4917 int *charbuf = coding->charbuf;
4918 int *charbuf_end = charbuf + coding->charbuf_used;
4919 unsigned char *dst = coding->destination + coding->produced;
4920 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
4921 int destination_charbuf[1024];
4922 int i, produced_chars = 0;
24a73b0a 4923 Lisp_Object attrs, charset_list;
df7492f9 4924
24a73b0a 4925 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4926 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4927
4928 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4929 ccl.dst_multibyte = coding->dst_multibyte;
4930
8cffd3e7 4931 while (charbuf < charbuf_end)
df7492f9 4932 {
df7492f9 4933 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 4934 charbuf_end - charbuf, 1024, charset_list);
df7492f9 4935 if (multibytep)
8cffd3e7
KH
4936 {
4937 ASSURE_DESTINATION (ccl.produced * 2);
4938 for (i = 0; i < ccl.produced; i++)
4939 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4940 }
df7492f9
KH
4941 else
4942 {
8cffd3e7 4943 ASSURE_DESTINATION (ccl.produced);
3ed051d4 4944 for (i = 0; i < ccl.produced; i++)
df7492f9
KH
4945 *dst++ = destination_charbuf[i] & 0xFF;
4946 produced_chars += ccl.produced;
4947 }
8cffd3e7
KH
4948 charbuf += ccl.consumed;
4949 if (ccl.status == CCL_STAT_QUIT
4950 || ccl.status == CCL_STAT_INVALID_CMD)
4951 break;
df7492f9
KH
4952 }
4953
4954 switch (ccl.status)
4955 {
4956 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4957 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4958 break;
4959 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4960 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4961 break;
4962 case CCL_STAT_QUIT:
4963 case CCL_STAT_INVALID_CMD:
065e3595 4964 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4965 break;
4966 default:
065e3595 4967 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4968 break;
1397dc18 4969 }
df7492f9
KH
4970
4971 coding->produced_char += produced_chars;
4972 coding->produced = dst - coding->destination;
4973 return 0;
1397dc18
KH
4974}
4975
df7492f9 4976
1397dc18 4977\f
df7492f9 4978/*** 10, 11. no-conversion handlers ***/
4ed46869 4979
b73bfc1c 4980/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4981
b73bfc1c 4982static void
df7492f9 4983decode_coding_raw_text (coding)
4ed46869 4984 struct coding_system *coding;
4ed46869 4985{
119852e7
KH
4986 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4987
df7492f9 4988 coding->chars_at_source = 1;
119852e7
KH
4989 coding->consumed_char = coding->src_chars;
4990 coding->consumed = coding->src_bytes;
4991 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4992 {
4993 coding->consumed_char--;
4994 coding->consumed--;
4995 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4996 }
4997 else
4998 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4999}
4ed46869 5000
df7492f9
KH
5001static int
5002encode_coding_raw_text (coding)
5003 struct coding_system *coding;
5004{
5005 int multibytep = coding->dst_multibyte;
5006 int *charbuf = coding->charbuf;
5007 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5008 unsigned char *dst = coding->destination + coding->produced;
5009 unsigned char *dst_end = coding->destination + coding->dst_bytes;
a0ed9b27 5010 int produced_chars = 0;
b73bfc1c
KH
5011 int c;
5012
df7492f9 5013 if (multibytep)
b73bfc1c 5014 {
df7492f9 5015 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 5016
df7492f9
KH
5017 if (coding->src_multibyte)
5018 while (charbuf < charbuf_end)
5019 {
5020 ASSURE_DESTINATION (safe_room);
5021 c = *charbuf++;
5022 if (ASCII_CHAR_P (c))
5023 EMIT_ONE_ASCII_BYTE (c);
5024 else if (CHAR_BYTE8_P (c))
5025 {
5026 c = CHAR_TO_BYTE8 (c);
5027 EMIT_ONE_BYTE (c);
5028 }
5029 else
5030 {
5031 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 5032
df7492f9
KH
5033 CHAR_STRING_ADVANCE (c, p1);
5034 while (p0 < p1)
9d123124
KH
5035 {
5036 EMIT_ONE_BYTE (*p0);
5037 p0++;
5038 }
df7492f9
KH
5039 }
5040 }
b73bfc1c 5041 else
df7492f9
KH
5042 while (charbuf < charbuf_end)
5043 {
5044 ASSURE_DESTINATION (safe_room);
5045 c = *charbuf++;
5046 EMIT_ONE_BYTE (c);
5047 }
5048 }
5049 else
4ed46869 5050 {
df7492f9 5051 if (coding->src_multibyte)
d46c5b12 5052 {
df7492f9
KH
5053 int safe_room = MAX_MULTIBYTE_LENGTH;
5054
5055 while (charbuf < charbuf_end)
d46c5b12 5056 {
df7492f9
KH
5057 ASSURE_DESTINATION (safe_room);
5058 c = *charbuf++;
5059 if (ASCII_CHAR_P (c))
5060 *dst++ = c;
5061 else if (CHAR_BYTE8_P (c))
5062 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 5063 else
df7492f9 5064 CHAR_STRING_ADVANCE (c, dst);
d46c5b12
KH
5065 }
5066 }
df7492f9
KH
5067 else
5068 {
5069 ASSURE_DESTINATION (charbuf_end - charbuf);
5070 while (charbuf < charbuf_end && dst < dst_end)
5071 *dst++ = *charbuf++;
8f924df7 5072 }
319a3947 5073 produced_chars = dst - (coding->destination + coding->produced);
4ed46869 5074 }
065e3595 5075 record_conversion_result (coding, CODING_RESULT_SUCCESS);
a0ed9b27 5076 coding->produced_char += produced_chars;
df7492f9
KH
5077 coding->produced = dst - coding->destination;
5078 return 0;
4ed46869
KH
5079}
5080
ff0dacd7
KH
5081/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5082 Check if a text is encoded in a charset-based coding system. If it
5083 is, return 1, else return 0. */
5084
0a28aafb 5085static int
ff0dacd7 5086detect_coding_charset (coding, detect_info)
df7492f9 5087 struct coding_system *coding;
ff0dacd7 5088 struct coding_detection_info *detect_info;
1397dc18 5089{
065e3595 5090 const unsigned char *src = coding->source, *src_base;
8f924df7 5091 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
5092 int multibytep = coding->src_multibyte;
5093 int consumed_chars = 0;
07295713 5094 Lisp_Object attrs, valids, name;
584948ac 5095 int found = 0;
716b3fa0 5096 int head_ascii = coding->head_ascii;
07295713 5097 int check_latin_extra = 0;
1397dc18 5098
ff0dacd7
KH
5099 detect_info->checked |= CATEGORY_MASK_CHARSET;
5100
df7492f9
KH
5101 coding = &coding_categories[coding_category_charset];
5102 attrs = CODING_ID_ATTRS (coding->id);
5103 valids = AREF (attrs, coding_attr_charset_valids);
07295713
KH
5104 name = CODING_ID_NAME (coding->id);
5105 if (VECTORP (Vlatin_extra_code_table)
cb84a2be 5106 && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-") == 0)
07295713 5107 check_latin_extra = 1;
df7492f9 5108 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
716b3fa0 5109 src += head_ascii;
1397dc18 5110
b73bfc1c 5111 while (1)
1397dc18 5112 {
df7492f9 5113 int c;
716b3fa0
KH
5114 Lisp_Object val;
5115 struct charset *charset;
5116 int dim, idx;
1397dc18 5117
065e3595 5118 src_base = src;
df7492f9 5119 ONE_MORE_BYTE (c);
065e3595
KH
5120 if (c < 0)
5121 continue;
716b3fa0
KH
5122 val = AREF (valids, c);
5123 if (NILP (val))
df7492f9 5124 break;
584948ac 5125 if (c >= 0x80)
07295713
KH
5126 {
5127 if (c < 0xA0
5128 && check_latin_extra
5129 && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
5130 break;
5131 found = CATEGORY_MASK_CHARSET;
5132 }
716b3fa0
KH
5133 if (INTEGERP (val))
5134 {
5135 charset = CHARSET_FROM_ID (XFASTINT (val));
5136 dim = CHARSET_DIMENSION (charset);
5137 for (idx = 1; idx < dim; idx++)
5138 {
5139 if (src == src_end)
5140 goto too_short;
5141 ONE_MORE_BYTE (c);
3ed051d4 5142 if (c < charset->code_space[(dim - 1 - idx) * 2]
716b3fa0
KH
5143 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5144 break;
5145 }
5146 if (idx < dim)
5147 break;
5148 }
5149 else
5150 {
5151 idx = 1;
5152 for (; CONSP (val); val = XCDR (val))
5153 {
5154 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5155 dim = CHARSET_DIMENSION (charset);
5156 while (idx < dim)
5157 {
5158 if (src == src_end)
5159 goto too_short;
5160 ONE_MORE_BYTE (c);
5161 if (c < charset->code_space[(dim - 1 - idx) * 4]
5162 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5163 break;
5164 idx++;
5165 }
5166 if (idx == dim)
5167 {
5168 val = Qnil;
5169 break;
5170 }
5171 }
5172 if (CONSP (val))
5173 break;
5174 }
df7492f9 5175 }
716b3fa0 5176 too_short:
ff0dacd7 5177 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 5178 return 0;
4ed46869 5179
df7492f9 5180 no_more_source:
ff0dacd7
KH
5181 detect_info->found |= found;
5182 return 1;
df7492f9 5183}
b73bfc1c 5184
b73bfc1c 5185static void
df7492f9 5186decode_coding_charset (coding)
4ed46869 5187 struct coding_system *coding;
4ed46869 5188{
8f924df7
KH
5189 const unsigned char *src = coding->source + coding->consumed;
5190 const unsigned char *src_end = coding->source + coding->src_bytes;
5191 const unsigned char *src_base;
69a80ea3
KH
5192 int *charbuf = coding->charbuf + coding->charbuf_used;
5193 int *charbuf_end
5194 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
5195 int consumed_chars = 0, consumed_chars_base;
5196 int multibytep = coding->src_multibyte;
24a73b0a 5197 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
5198 int char_offset = coding->produced_char;
5199 int last_offset = char_offset;
5200 int last_id = charset_ascii;
119852e7
KH
5201 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5202 int byte_after_cr = -1;
df7492f9 5203
24a73b0a 5204 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 5205 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 5206
df7492f9 5207 while (1)
4ed46869 5208 {
4eb6d3f1 5209 int c;
24a73b0a
KH
5210 Lisp_Object val;
5211 struct charset *charset;
5212 int dim;
5213 int len = 1;
5214 unsigned code;
df7492f9
KH
5215
5216 src_base = src;
5217 consumed_chars_base = consumed_chars;
b73bfc1c 5218
df7492f9 5219 if (charbuf >= charbuf_end)
b71f6f73
KH
5220 {
5221 if (byte_after_cr >= 0)
5222 src_base--;
5223 break;
5224 }
df7492f9 5225
119852e7
KH
5226 if (byte_after_cr >= 0)
5227 {
5228 c = byte_after_cr;
5229 byte_after_cr = -1;
5230 }
5231 else
5232 {
5233 ONE_MORE_BYTE (c);
5234 if (eol_crlf && c == '\r')
5235 ONE_MORE_BYTE (byte_after_cr);
5236 }
065e3595
KH
5237 if (c < 0)
5238 goto invalid_code;
24a73b0a
KH
5239 code = c;
5240
5241 val = AREF (valids, c);
1b17adfd 5242 if (! INTEGERP (val) && ! CONSP (val))
24a73b0a
KH
5243 goto invalid_code;
5244 if (INTEGERP (val))
d46c5b12 5245 {
24a73b0a
KH
5246 charset = CHARSET_FROM_ID (XFASTINT (val));
5247 dim = CHARSET_DIMENSION (charset);
5248 while (len < dim)
b73bfc1c 5249 {
24a73b0a
KH
5250 ONE_MORE_BYTE (c);
5251 code = (code << 8) | c;
5252 len++;
b73bfc1c 5253 }
24a73b0a
KH
5254 CODING_DECODE_CHAR (coding, src, src_base, src_end,
5255 charset, code, c);
d46c5b12 5256 }
df7492f9 5257 else
d46c5b12 5258 {
24a73b0a
KH
5259 /* VAL is a list of charset IDs. It is assured that the
5260 list is sorted by charset dimensions (smaller one
5261 comes first). */
5262 while (CONSP (val))
4eb6d3f1 5263 {
24a73b0a 5264 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 5265 dim = CHARSET_DIMENSION (charset);
f9d71dcd 5266 while (len < dim)
4eb6d3f1 5267 {
acb2a965
KH
5268 ONE_MORE_BYTE (c);
5269 code = (code << 8) | c;
f9d71dcd 5270 len++;
4eb6d3f1 5271 }
24a73b0a
KH
5272 CODING_DECODE_CHAR (coding, src, src_base,
5273 src_end, charset, code, c);
5274 if (c >= 0)
5275 break;
5276 val = XCDR (val);
ff0dacd7 5277 }
d46c5b12 5278 }
24a73b0a
KH
5279 if (c < 0)
5280 goto invalid_code;
5281 if (charset->id != charset_ascii
5282 && last_id != charset->id)
5283 {
5284 if (last_id != charset_ascii)
69a80ea3 5285 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
5286 last_id = charset->id;
5287 last_offset = char_offset;
5288 }
5289
df7492f9 5290 *charbuf++ = c;
ff0dacd7 5291 char_offset++;
df7492f9
KH
5292 continue;
5293
5294 invalid_code:
5295 src = src_base;
5296 consumed_chars = consumed_chars_base;
5297 ONE_MORE_BYTE (c);
065e3595 5298 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 5299 char_offset++;
df7492f9 5300 coding->errors++;
4ed46869
KH
5301 }
5302
df7492f9 5303 no_more_source:
ff0dacd7 5304 if (last_id != charset_ascii)
69a80ea3 5305 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
5306 coding->consumed_char += consumed_chars_base;
5307 coding->consumed = src_base - coding->source;
5308 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
5309}
5310
df7492f9
KH
5311static int
5312encode_coding_charset (coding)
4ed46869 5313 struct coding_system *coding;
4ed46869 5314{
df7492f9
KH
5315 int multibytep = coding->dst_multibyte;
5316 int *charbuf = coding->charbuf;
5317 int *charbuf_end = charbuf + coding->charbuf_used;
5318 unsigned char *dst = coding->destination + coding->produced;
5319 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5320 int safe_room = MAX_MULTIBYTE_LENGTH;
5321 int produced_chars = 0;
24a73b0a 5322 Lisp_Object attrs, charset_list;
df7492f9 5323 int ascii_compatible;
b73bfc1c 5324 int c;
b73bfc1c 5325
24a73b0a 5326 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 5327 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 5328
df7492f9 5329 while (charbuf < charbuf_end)
4ed46869 5330 {
4eb6d3f1 5331 struct charset *charset;
df7492f9 5332 unsigned code;
8f924df7 5333
df7492f9
KH
5334 ASSURE_DESTINATION (safe_room);
5335 c = *charbuf++;
5336 if (ascii_compatible && ASCII_CHAR_P (c))
5337 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 5338 else if (CHAR_BYTE8_P (c))
4ed46869 5339 {
16eafb5d
KH
5340 c = CHAR_TO_BYTE8 (c);
5341 EMIT_ONE_BYTE (c);
d46c5b12 5342 }
d46c5b12 5343 else
b73bfc1c 5344 {
4eb6d3f1
KH
5345 charset = char_charset (c, charset_list, &code);
5346 if (charset)
5347 {
5348 if (CHARSET_DIMENSION (charset) == 1)
5349 EMIT_ONE_BYTE (code);
5350 else if (CHARSET_DIMENSION (charset) == 2)
5351 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5352 else if (CHARSET_DIMENSION (charset) == 3)
5353 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5354 else
5355 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5356 (code >> 8) & 0xFF, code & 0xFF);
5357 }
5358 else
41cbe562
KH
5359 {
5360 if (coding->mode & CODING_MODE_SAFE_ENCODING)
5361 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5362 else
5363 c = coding->default_char;
5364 EMIT_ONE_BYTE (c);
5365 }
4ed46869 5366 }
4ed46869
KH
5367 }
5368
065e3595 5369 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
5370 coding->produced_char += produced_chars;
5371 coding->produced = dst - coding->destination;
5372 return 0;
4ed46869
KH
5373}
5374
5375\f
1397dc18 5376/*** 7. C library functions ***/
4ed46869 5377
df7492f9
KH
5378/* Setup coding context CODING from information about CODING_SYSTEM.
5379 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
5380 CODING_SYSTEM is invalid, signal an error. */
4ed46869 5381
ec6d2bb8 5382void
e0e989f6
KH
5383setup_coding_system (coding_system, coding)
5384 Lisp_Object coding_system;
4ed46869
KH
5385 struct coding_system *coding;
5386{
df7492f9
KH
5387 Lisp_Object attrs;
5388 Lisp_Object eol_type;
5389 Lisp_Object coding_type;
4608c386 5390 Lisp_Object val;
4ed46869 5391
df7492f9 5392 if (NILP (coding_system))
ae6f73fa 5393 coding_system = Qundecided;
c07c8e12 5394
df7492f9 5395 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 5396
df7492f9
KH
5397 attrs = CODING_ID_ATTRS (coding->id);
5398 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 5399
df7492f9
KH
5400 coding->mode = 0;
5401 coding->head_ascii = -1;
4a015c45
KH
5402 if (VECTORP (eol_type))
5403 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5404 | CODING_REQUIRE_DETECTION_MASK);
5405 else if (! EQ (eol_type, Qunix))
5406 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5407 | CODING_REQUIRE_ENCODING_MASK);
5408 else
5409 coding->common_flags = 0;
5e5c78be
KH
5410 if (! NILP (CODING_ATTR_POST_READ (attrs)))
5411 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5412 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5413 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
5414 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5415 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 5416
df7492f9 5417 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5418 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5419 coding->safe_charsets = SDATA (val);
df7492f9 5420 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 5421
df7492f9
KH
5422 coding_type = CODING_ATTR_TYPE (attrs);
5423 if (EQ (coding_type, Qundecided))
d46c5b12 5424 {
df7492f9
KH
5425 coding->detector = NULL;
5426 coding->decoder = decode_coding_raw_text;
5427 coding->encoder = encode_coding_raw_text;
5428 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5429 }
df7492f9 5430 else if (EQ (coding_type, Qiso_2022))
d46c5b12 5431 {
df7492f9
KH
5432 int i;
5433 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5434
5435 /* Invoke graphic register 0 to plane 0. */
5436 CODING_ISO_INVOCATION (coding, 0) = 0;
5437 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
5438 CODING_ISO_INVOCATION (coding, 1)
5439 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5440 /* Setup the initial status of designation. */
5441 for (i = 0; i < 4; i++)
5442 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5443 /* Not single shifting initially. */
5444 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5445 /* Beginning of buffer should also be regarded as bol. */
5446 CODING_ISO_BOL (coding) = 1;
5447 coding->detector = detect_coding_iso_2022;
5448 coding->decoder = decode_coding_iso_2022;
5449 coding->encoder = encode_coding_iso_2022;
5450 if (flags & CODING_ISO_FLAG_SAFE)
5451 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5452 coding->common_flags
df7492f9
KH
5453 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5454 | CODING_REQUIRE_FLUSHING_MASK);
5455 if (flags & CODING_ISO_FLAG_COMPOSITION)
5456 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5457 if (flags & CODING_ISO_FLAG_DESIGNATION)
5458 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5459 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5460 {
5461 setup_iso_safe_charsets (attrs);
5462 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7 5463 coding->max_charset_id = SCHARS (val) - 1;
1b3b981b 5464 coding->safe_charsets = SDATA (val);
df7492f9
KH
5465 }
5466 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5467 }
df7492f9 5468 else if (EQ (coding_type, Qcharset))
d46c5b12 5469 {
df7492f9
KH
5470 coding->detector = detect_coding_charset;
5471 coding->decoder = decode_coding_charset;
5472 coding->encoder = encode_coding_charset;
d46c5b12 5473 coding->common_flags
df7492f9 5474 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5475 }
df7492f9 5476 else if (EQ (coding_type, Qutf_8))
d46c5b12 5477 {
a470d443
KH
5478 val = AREF (attrs, coding_attr_utf_bom);
5479 CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5480 : EQ (val, Qt) ? utf_with_bom
5481 : utf_without_bom);
df7492f9
KH
5482 coding->detector = detect_coding_utf_8;
5483 coding->decoder = decode_coding_utf_8;
5484 coding->encoder = encode_coding_utf_8;
5485 coding->common_flags
5486 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443
KH
5487 if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5488 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
df7492f9
KH
5489 }
5490 else if (EQ (coding_type, Qutf_16))
5491 {
a470d443
KH
5492 val = AREF (attrs, coding_attr_utf_bom);
5493 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5494 : EQ (val, Qt) ? utf_with_bom
5495 : utf_without_bom);
df7492f9 5496 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5497 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5498 : utf_16_little_endian);
e19c3639 5499 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5500 coding->detector = detect_coding_utf_16;
5501 coding->decoder = decode_coding_utf_16;
5502 coding->encoder = encode_coding_utf_16;
5503 coding->common_flags
5504 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
a470d443 5505 if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
b49a1807 5506 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5507 }
df7492f9 5508 else if (EQ (coding_type, Qccl))
4ed46869 5509 {
df7492f9
KH
5510 coding->detector = detect_coding_ccl;
5511 coding->decoder = decode_coding_ccl;
5512 coding->encoder = encode_coding_ccl;
c952af22 5513 coding->common_flags
df7492f9
KH
5514 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5515 | CODING_REQUIRE_FLUSHING_MASK);
5516 }
5517 else if (EQ (coding_type, Qemacs_mule))
5518 {
5519 coding->detector = detect_coding_emacs_mule;
5520 coding->decoder = decode_coding_emacs_mule;
5521 coding->encoder = encode_coding_emacs_mule;
c952af22 5522 coding->common_flags
df7492f9
KH
5523 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5524 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5525 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5526 {
5527 Lisp_Object tail, safe_charsets;
5528 int max_charset_id = 0;
5529
5530 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5531 tail = XCDR (tail))
5532 if (max_charset_id < XFASTINT (XCAR (tail)))
5533 max_charset_id = XFASTINT (XCAR (tail));
1b3b981b
AS
5534 safe_charsets = make_uninit_string (max_charset_id + 1);
5535 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9
KH
5536 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5537 tail = XCDR (tail))
8f924df7 5538 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5539 coding->max_charset_id = max_charset_id;
1b3b981b 5540 coding->safe_charsets = SDATA (safe_charsets);
df7492f9
KH
5541 }
5542 }
5543 else if (EQ (coding_type, Qshift_jis))
5544 {
5545 coding->detector = detect_coding_sjis;
5546 coding->decoder = decode_coding_sjis;
5547 coding->encoder = encode_coding_sjis;
c952af22 5548 coding->common_flags
df7492f9
KH
5549 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5550 }
5551 else if (EQ (coding_type, Qbig5))
5552 {
5553 coding->detector = detect_coding_big5;
5554 coding->decoder = decode_coding_big5;
5555 coding->encoder = encode_coding_big5;
c952af22 5556 coding->common_flags
df7492f9
KH
5557 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5558 }
5559 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5560 {
df7492f9
KH
5561 coding->detector = NULL;
5562 coding->decoder = decode_coding_raw_text;
5563 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5564 if (! EQ (eol_type, Qunix))
5565 {
5566 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5567 if (! VECTORP (eol_type))
5568 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5569 }
5570
4ed46869 5571 }
4ed46869 5572
df7492f9 5573 return;
4ed46869
KH
5574}
5575
0ff61e78
KH
5576/* Return a list of charsets supported by CODING. */
5577
5578Lisp_Object
5579coding_charset_list (coding)
5580 struct coding_system *coding;
5581{
35befdaa 5582 Lisp_Object attrs, charset_list;
0ff61e78
KH
5583
5584 CODING_GET_INFO (coding, attrs, charset_list);
5585 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5586 {
5587 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5588
5589 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5590 charset_list = Viso_2022_charset_list;
5591 }
5592 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5593 {
5594 charset_list = Vemacs_mule_charset_list;
5595 }
5596 return charset_list;
5597}
5598
5599
e9f91ece
KH
5600/* Return a list of charsets supported by CODING-SYSTEM. */
5601
5602Lisp_Object
5603coding_system_charset_list (coding_system)
5604 Lisp_Object coding_system;
5605{
5606 int id;
5607 Lisp_Object attrs, charset_list;
5608
5609 CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5610 attrs = CODING_ID_ATTRS (id);
5611
5612 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5613 {
5614 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5615
5616 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5617 charset_list = Viso_2022_charset_list;
5618 else
5619 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5620 }
5621 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5622 {
5623 charset_list = Vemacs_mule_charset_list;
5624 }
5625 else
5626 {
5627 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5628 }
5629 return charset_list;
5630}
5631
5632
df7492f9
KH
5633/* Return raw-text or one of its subsidiaries that has the same
5634 eol_type as CODING-SYSTEM. */
ec6d2bb8 5635
df7492f9
KH
5636Lisp_Object
5637raw_text_coding_system (coding_system)
5638 Lisp_Object coding_system;
ec6d2bb8 5639{
0be8721c 5640 Lisp_Object spec, attrs;
df7492f9 5641 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5642
d3e4cb56
KH
5643 if (NILP (coding_system))
5644 return Qraw_text;
df7492f9
KH
5645 spec = CODING_SYSTEM_SPEC (coding_system);
5646 attrs = AREF (spec, 0);
ec6d2bb8 5647
df7492f9
KH
5648 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5649 return coding_system;
ec6d2bb8 5650
df7492f9
KH
5651 eol_type = AREF (spec, 2);
5652 if (VECTORP (eol_type))
5653 return Qraw_text;
5654 spec = CODING_SYSTEM_SPEC (Qraw_text);
5655 raw_text_eol_type = AREF (spec, 2);
5656 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5657 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5658 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5659}
5660
54f78171 5661
df7492f9
KH
5662/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5663 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
5664 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5665 inherit end-of-line format from the system's setting
5666 (system_eol_type). */
df7492f9
KH
5667
5668Lisp_Object
5669coding_inherit_eol_type (coding_system, parent)
b74e4686 5670 Lisp_Object coding_system, parent;
54f78171 5671{
3e139625 5672 Lisp_Object spec, eol_type;
54f78171 5673
d3e4cb56
KH
5674 if (NILP (coding_system))
5675 coding_system = Qraw_text;
df7492f9 5676 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5677 eol_type = AREF (spec, 2);
fcbcfb64 5678 if (VECTORP (eol_type))
df7492f9 5679 {
df7492f9
KH
5680 Lisp_Object parent_eol_type;
5681
fcbcfb64
KH
5682 if (! NILP (parent))
5683 {
5684 Lisp_Object parent_spec;
5685
4a015c45 5686 parent_spec = CODING_SYSTEM_SPEC (parent);
fcbcfb64
KH
5687 parent_eol_type = AREF (parent_spec, 2);
5688 }
5689 else
5690 parent_eol_type = system_eol_type;
df7492f9
KH
5691 if (EQ (parent_eol_type, Qunix))
5692 coding_system = AREF (eol_type, 0);
5693 else if (EQ (parent_eol_type, Qdos))
5694 coding_system = AREF (eol_type, 1);
5695 else if (EQ (parent_eol_type, Qmac))
5696 coding_system = AREF (eol_type, 2);
54f78171 5697 }
df7492f9 5698 return coding_system;
54f78171
KH
5699}
5700
4ed46869
KH
5701/* Emacs has a mechanism to automatically detect a coding system if it
5702 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5703 it's impossible to distinguish some coding systems accurately
5704 because they use the same range of codes. So, at first, coding
5705 systems are categorized into 7, those are:
5706
0ef69138 5707 o coding-category-emacs-mule
4ed46869
KH
5708
5709 The category for a coding system which has the same code range
5710 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5711 symbol) `emacs-mule' by default.
4ed46869
KH
5712
5713 o coding-category-sjis
5714
5715 The category for a coding system which has the same code range
5716 as SJIS. Assigned the coding-system (Lisp
7717c392 5717 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5718
5719 o coding-category-iso-7
5720
5721 The category for a coding system which has the same code range
7717c392 5722 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5723 shift and single shift functions. This can encode/decode all
5724 charsets. Assigned the coding-system (Lisp symbol)
5725 `iso-2022-7bit' by default.
5726
5727 o coding-category-iso-7-tight
5728
5729 Same as coding-category-iso-7 except that this can
5730 encode/decode only the specified charsets.
4ed46869
KH
5731
5732 o coding-category-iso-8-1
5733
5734 The category for a coding system which has the same code range
5735 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5736 for DIMENSION1 charset. This doesn't use any locking shift
5737 and single shift functions. Assigned the coding-system (Lisp
5738 symbol) `iso-latin-1' by default.
4ed46869
KH
5739
5740 o coding-category-iso-8-2
5741
5742 The category for a coding system which has the same code range
5743 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5744 for DIMENSION2 charset. This doesn't use any locking shift
5745 and single shift functions. Assigned the coding-system (Lisp
5746 symbol) `japanese-iso-8bit' by default.
4ed46869 5747
7717c392 5748 o coding-category-iso-7-else
4ed46869
KH
5749
5750 The category for a coding system which has the same code range
df7492f9 5751 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5752 single shift functions. Assigned the coding-system (Lisp
5753 symbol) `iso-2022-7bit-lock' by default.
5754
5755 o coding-category-iso-8-else
5756
5757 The category for a coding system which has the same code range
df7492f9 5758 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5759 single shift functions. Assigned the coding-system (Lisp
5760 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5761
5762 o coding-category-big5
5763
5764 The category for a coding system which has the same code range
5765 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5766 `cn-big5' by default.
4ed46869 5767
fa42c37f
KH
5768 o coding-category-utf-8
5769
5770 The category for a coding system which has the same code range
6e76ae91 5771 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5772 symbol) `utf-8' by default.
5773
5774 o coding-category-utf-16-be
5775
5776 The category for a coding system in which a text has an
5777 Unicode signature (cf. Unicode Standard) in the order of BIG
5778 endian at the head. Assigned the coding-system (Lisp symbol)
5779 `utf-16-be' by default.
5780
5781 o coding-category-utf-16-le
5782
5783 The category for a coding system in which a text has an
5784 Unicode signature (cf. Unicode Standard) in the order of
5785 LITTLE endian at the head. Assigned the coding-system (Lisp
5786 symbol) `utf-16-le' by default.
5787
1397dc18
KH
5788 o coding-category-ccl
5789
5790 The category for a coding system of which encoder/decoder is
5791 written in CCL programs. The default value is nil, i.e., no
5792 coding system is assigned.
5793
4ed46869
KH
5794 o coding-category-binary
5795
5796 The category for a coding system not categorized in any of the
5797 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5798 `no-conversion' by default.
4ed46869
KH
5799
5800 Each of them is a Lisp symbol and the value is an actual
df7492f9 5801 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5802 What Emacs does actually is to detect a category of coding system.
5803 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5804 decide only one possible category, it selects a category of the
4ed46869
KH
5805 highest priority. Priorities of categories are also specified by a
5806 user in a Lisp variable `coding-category-list'.
5807
5808*/
5809
df7492f9
KH
5810#define EOL_SEEN_NONE 0
5811#define EOL_SEEN_LF 1
5812#define EOL_SEEN_CR 2
5813#define EOL_SEEN_CRLF 4
66cfb530 5814
ff0dacd7
KH
5815/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5816 SOURCE is encoded. If CATEGORY is one of
5817 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5818 two-byte, else they are encoded by one-byte.
5819
5820 Return one of EOL_SEEN_XXX. */
4ed46869 5821
bc4bc72a 5822#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5823
5824static int
89528eb3 5825detect_eol (source, src_bytes, category)
f6cbaf43 5826 const unsigned char *source;
df7492f9 5827 EMACS_INT src_bytes;
89528eb3 5828 enum coding_category category;
4ed46869 5829{
f6cbaf43 5830 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5831 unsigned char c;
df7492f9
KH
5832 int total = 0;
5833 int eol_seen = EOL_SEEN_NONE;
4ed46869 5834
89528eb3 5835 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5836 {
df7492f9 5837 int msb, lsb;
fa42c37f 5838
89528eb3
KH
5839 msb = category == (coding_category_utf_16_le
5840 | coding_category_utf_16_le_nosig);
df7492f9 5841 lsb = 1 - msb;
fa42c37f 5842
df7492f9 5843 while (src + 1 < src_end)
fa42c37f 5844 {
df7492f9
KH
5845 c = src[lsb];
5846 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5847 {
df7492f9
KH
5848 int this_eol;
5849
5850 if (c == '\n')
5851 this_eol = EOL_SEEN_LF;
5852 else if (src + 3 >= src_end
5853 || src[msb + 2] != 0
5854 || src[lsb + 2] != '\n')
5855 this_eol = EOL_SEEN_CR;
fa42c37f 5856 else
75f4f1ac
EZ
5857 {
5858 this_eol = EOL_SEEN_CRLF;
5859 src += 2;
5860 }
df7492f9
KH
5861
5862 if (eol_seen == EOL_SEEN_NONE)
5863 /* This is the first end-of-line. */
5864 eol_seen = this_eol;
5865 else if (eol_seen != this_eol)
fa42c37f 5866 {
75f4f1ac
EZ
5867 /* The found type is different from what found before.
5868 Allow for stray ^M characters in DOS EOL files. */
5869 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
5870 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
5871 eol_seen = EOL_SEEN_CRLF;
5872 else
5873 {
5874 eol_seen = EOL_SEEN_LF;
5875 break;
5876 }
fa42c37f 5877 }
df7492f9
KH
5878 if (++total == MAX_EOL_CHECK_COUNT)
5879 break;
fa42c37f 5880 }
df7492f9 5881 src += 2;
fa42c37f 5882 }
bcf26d6a 5883 }
d46c5b12 5884 else
c4825358 5885 {
df7492f9 5886 while (src < src_end)
27901516 5887 {
df7492f9
KH
5888 c = *src++;
5889 if (c == '\n' || c == '\r')
5890 {
5891 int this_eol;
d46c5b12 5892
df7492f9
KH
5893 if (c == '\n')
5894 this_eol = EOL_SEEN_LF;
5895 else if (src >= src_end || *src != '\n')
5896 this_eol = EOL_SEEN_CR;
5897 else
5898 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5899
df7492f9
KH
5900 if (eol_seen == EOL_SEEN_NONE)
5901 /* This is the first end-of-line. */
5902 eol_seen = this_eol;
5903 else if (eol_seen != this_eol)
5904 {
75f4f1ac
EZ
5905 /* The found type is different from what found before.
5906 Allow for stray ^M characters in DOS EOL files. */
5907 if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
5908 || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
5909 eol_seen = EOL_SEEN_CRLF;
5910 else
5911 {
5912 eol_seen = EOL_SEEN_LF;
5913 break;
5914 }
df7492f9
KH
5915 }
5916 if (++total == MAX_EOL_CHECK_COUNT)
5917 break;
5918 }
5919 }
73be902c 5920 }
df7492f9 5921 return eol_seen;
73be902c
KH
5922}
5923
df7492f9 5924
24a73b0a 5925static Lisp_Object
df7492f9
KH
5926adjust_coding_eol_type (coding, eol_seen)
5927 struct coding_system *coding;
5928 int eol_seen;
73be902c 5929{
0be8721c 5930 Lisp_Object eol_type;
8f924df7 5931
df7492f9
KH
5932 eol_type = CODING_ID_EOL_TYPE (coding->id);
5933 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5934 {
5935 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5936 eol_type = Qunix;
5937 }
6f197c07 5938 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5939 {
5940 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5941 eol_type = Qdos;
5942 }
6f197c07 5943 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5944 {
5945 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5946 eol_type = Qmac;
5947 }
5948 return eol_type;
d46c5b12 5949}
4ed46869 5950
df7492f9
KH
5951/* Detect how a text specified in CODING is encoded. If a coding
5952 system is detected, update fields of CODING by the detected coding
5953 system. */
0a28aafb 5954
df7492f9
KH
5955void
5956detect_coding (coding)
d46c5b12 5957 struct coding_system *coding;
d46c5b12 5958{
8f924df7 5959 const unsigned char *src, *src_end;
73cce38d 5960 int saved_mode = coding->mode;
d46c5b12 5961
df7492f9
KH
5962 coding->consumed = coding->consumed_char = 0;
5963 coding->produced = coding->produced_char = 0;
5964 coding_set_source (coding);
1c3478b0 5965
df7492f9 5966 src_end = coding->source + coding->src_bytes;
c0e16b14 5967 coding->head_ascii = 0;
1c3478b0 5968
df7492f9
KH
5969 /* If we have not yet decided the text encoding type, detect it
5970 now. */
5971 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5972 {
df7492f9 5973 int c, i;
6cb21a4f 5974 struct coding_detection_info detect_info;
2f3cbb32 5975 int null_byte_found = 0, eight_bit_found = 0;
df7492f9 5976
6cb21a4f 5977 detect_info.checked = detect_info.found = detect_info.rejected = 0;
2f3cbb32 5978 for (src = coding->source; src < src_end; src++)
d46c5b12 5979 {
df7492f9 5980 c = *src;
6cb21a4f 5981 if (c & 0x80)
6cb21a4f 5982 {
2f3cbb32 5983 eight_bit_found = 1;
2f3cbb32
KH
5984 if (null_byte_found)
5985 break;
5986 }
5987 else if (c < 0x20)
5988 {
5989 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5990 && ! inhibit_iso_escape_detection
5991 && ! detect_info.checked)
6cb21a4f 5992 {
2f3cbb32
KH
5993 if (detect_coding_iso_2022 (coding, &detect_info))
5994 {
5995 /* We have scanned the whole data. */
5996 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
5997 {
5998 /* We didn't find an 8-bit code. We may
5999 have found a null-byte, but it's very
6000 rare that a binary file confirm to
6001 ISO-2022. */
6002 src = src_end;
6003 coding->head_ascii = src - coding->source;
6004 }
6005 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
6006 break;
6007 }
6008 }
97b1b294 6009 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
6010 {
6011 null_byte_found = 1;
6012 if (eight_bit_found)
6013 break;
6cb21a4f 6014 }
c006c0c8
KH
6015 if (! eight_bit_found)
6016 coding->head_ascii++;
6cb21a4f 6017 }
c006c0c8 6018 else if (! eight_bit_found)
c0e16b14 6019 coding->head_ascii++;
d46c5b12 6020 }
df7492f9 6021
2f3cbb32
KH
6022 if (null_byte_found || eight_bit_found
6023 || coding->head_ascii < coding->src_bytes
6cb21a4f 6024 || detect_info.found)
d46c5b12 6025 {
ff0dacd7
KH
6026 enum coding_category category;
6027 struct coding_system *this;
df7492f9 6028
6cb21a4f
KH
6029 if (coding->head_ascii == coding->src_bytes)
6030 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
6031 for (i = 0; i < coding_category_raw_text; i++)
6032 {
6033 category = coding_priorities[i];
6034 this = coding_categories + category;
6035 if (detect_info.found & (1 << category))
24a73b0a 6036 break;
6cb21a4f
KH
6037 }
6038 else
2f3cbb32
KH
6039 {
6040 if (null_byte_found)
ff0dacd7 6041 {
2f3cbb32
KH
6042 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6043 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
ff0dacd7 6044 }
2f3cbb32
KH
6045 for (i = 0; i < coding_category_raw_text; i++)
6046 {
6047 category = coding_priorities[i];
6048 this = coding_categories + category;
6049 if (this->id < 0)
6050 {
6051 /* No coding system of this category is defined. */
6052 detect_info.rejected |= (1 << category);
6053 }
6054 else if (category >= coding_category_raw_text)
6055 continue;
6056 else if (detect_info.checked & (1 << category))
6057 {
6058 if (detect_info.found & (1 << category))
6059 break;
6060 }
6061 else if ((*(this->detector)) (coding, &detect_info)
6062 && detect_info.found & (1 << category))
6063 {
6064 if (category == coding_category_utf_16_auto)
6065 {
6066 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6067 category = coding_category_utf_16_le;
6068 else
6069 category = coding_category_utf_16_be;
6070 }
6071 break;
6072 }
6073 }
2f3cbb32 6074 }
c0e16b14
KH
6075
6076 if (i < coding_category_raw_text)
6077 setup_coding_system (CODING_ID_NAME (this->id), coding);
6078 else if (null_byte_found)
6079 setup_coding_system (Qno_conversion, coding);
6080 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6081 == CATEGORY_MASK_ANY)
6082 setup_coding_system (Qraw_text, coding);
6083 else if (detect_info.rejected)
6084 for (i = 0; i < coding_category_raw_text; i++)
6085 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6086 {
6087 this = coding_categories + coding_priorities[i];
6088 setup_coding_system (CODING_ID_NAME (this->id), coding);
6089 break;
6090 }
d46c5b12 6091 }
b73bfc1c 6092 }
a470d443
KH
6093 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6094 == coding_category_utf_8_auto)
6095 {
6096 Lisp_Object coding_systems;
6097 struct coding_detection_info detect_info;
6098
6099 coding_systems
6100 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6101 detect_info.found = detect_info.rejected = 0;
6102 coding->head_ascii = 0;
6103 if (CONSP (coding_systems)
6104 && detect_coding_utf_8 (coding, &detect_info))
6105 {
6106 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6107 setup_coding_system (XCAR (coding_systems), coding);
6108 else
6109 setup_coding_system (XCDR (coding_systems), coding);
6110 }
6111 }
24a73b0a
KH
6112 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6113 == coding_category_utf_16_auto)
b49a1807
KH
6114 {
6115 Lisp_Object coding_systems;
6116 struct coding_detection_info detect_info;
6117
6118 coding_systems
a470d443 6119 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
b49a1807 6120 detect_info.found = detect_info.rejected = 0;
a470d443 6121 coding->head_ascii = 0;
b49a1807 6122 if (CONSP (coding_systems)
24a73b0a 6123 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
6124 {
6125 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6126 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 6127 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
6128 setup_coding_system (XCDR (coding_systems), coding);
6129 }
6130 }
73cce38d 6131 coding->mode = saved_mode;
4ed46869 6132}
4ed46869 6133
d46c5b12 6134
aaaf0b1e 6135static void
df7492f9 6136decode_eol (coding)
aaaf0b1e 6137 struct coding_system *coding;
aaaf0b1e 6138{
24a73b0a
KH
6139 Lisp_Object eol_type;
6140 unsigned char *p, *pbeg, *pend;
3ed051d4 6141
24a73b0a
KH
6142 eol_type = CODING_ID_EOL_TYPE (coding->id);
6143 if (EQ (eol_type, Qunix))
6144 return;
6145
6146 if (NILP (coding->dst_object))
6147 pbeg = coding->destination;
6148 else
6149 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6150 pend = pbeg + coding->produced;
6151
6152 if (VECTORP (eol_type))
aaaf0b1e 6153 {
df7492f9 6154 int eol_seen = EOL_SEEN_NONE;
4ed46869 6155
24a73b0a 6156 for (p = pbeg; p < pend; p++)
aaaf0b1e 6157 {
df7492f9
KH
6158 if (*p == '\n')
6159 eol_seen |= EOL_SEEN_LF;
6160 else if (*p == '\r')
aaaf0b1e 6161 {
df7492f9 6162 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 6163 {
df7492f9
KH
6164 eol_seen |= EOL_SEEN_CRLF;
6165 p++;
aaaf0b1e 6166 }
aaaf0b1e 6167 else
df7492f9 6168 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 6169 }
aaaf0b1e 6170 }
75f4f1ac
EZ
6171 /* Handle DOS-style EOLs in a file with stray ^M characters. */
6172 if ((eol_seen & EOL_SEEN_CRLF) != 0
6173 && (eol_seen & EOL_SEEN_CR) != 0
6174 && (eol_seen & EOL_SEEN_LF) == 0)
6175 eol_seen = EOL_SEEN_CRLF;
6176 else if (eol_seen != EOL_SEEN_NONE
24a73b0a
KH
6177 && eol_seen != EOL_SEEN_LF
6178 && eol_seen != EOL_SEEN_CRLF
6179 && eol_seen != EOL_SEEN_CR)
6180 eol_seen = EOL_SEEN_LF;
df7492f9 6181 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 6182 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 6183 }
d46c5b12 6184
24a73b0a 6185 if (EQ (eol_type, Qmac))
27901516 6186 {
24a73b0a 6187 for (p = pbeg; p < pend; p++)
df7492f9
KH
6188 if (*p == '\r')
6189 *p = '\n';
4ed46869 6190 }
24a73b0a 6191 else if (EQ (eol_type, Qdos))
df7492f9 6192 {
24a73b0a 6193 int n = 0;
b73bfc1c 6194
24a73b0a
KH
6195 if (NILP (coding->dst_object))
6196 {
4347441b
KH
6197 /* Start deleting '\r' from the tail to minimize the memory
6198 movement. */
24a73b0a
KH
6199 for (p = pend - 2; p >= pbeg; p--)
6200 if (*p == '\r')
6201 {
6202 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6203 n++;
6204 }
6205 }
6206 else
6207 {
4347441b
KH
6208 int pos_byte = coding->dst_pos_byte;
6209 int pos = coding->dst_pos;
6210 int pos_end = pos + coding->produced_char - 1;
6211
6212 while (pos < pos_end)
6213 {
6214 p = BYTE_POS_ADDR (pos_byte);
6215 if (*p == '\r' && p[1] == '\n')
6216 {
6217 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6218 n++;
6219 pos_end--;
6220 }
6221 pos++;
69b8522d
KH
6222 if (coding->dst_multibyte)
6223 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6224 else
6225 pos_byte++;
4347441b 6226 }
24a73b0a
KH
6227 }
6228 coding->produced -= n;
6229 coding->produced_char -= n;
aaaf0b1e 6230 }
4ed46869
KH
6231}
6232
7d64c6ad 6233
a6f87d34
KH
6234/* Return a translation table (or list of them) from coding system
6235 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6236 decoding (ENCODEP is zero). */
7d64c6ad 6237
e6a54062 6238static Lisp_Object
09ee6fdd
KH
6239get_translation_table (attrs, encodep, max_lookup)
6240 Lisp_Object attrs;
6241 int encodep, *max_lookup;
7d64c6ad
KH
6242{
6243 Lisp_Object standard, translation_table;
09ee6fdd 6244 Lisp_Object val;
7d64c6ad
KH
6245
6246 if (encodep)
6247 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6248 standard = Vstandard_translation_table_for_encode;
6249 else
6250 translation_table = CODING_ATTR_DECODE_TBL (attrs),
6251 standard = Vstandard_translation_table_for_decode;
7d64c6ad 6252 if (NILP (translation_table))
09ee6fdd
KH
6253 translation_table = standard;
6254 else
a6f87d34 6255 {
09ee6fdd
KH
6256 if (SYMBOLP (translation_table))
6257 translation_table = Fget (translation_table, Qtranslation_table);
6258 else if (CONSP (translation_table))
6259 {
6260 translation_table = Fcopy_sequence (translation_table);
6261 for (val = translation_table; CONSP (val); val = XCDR (val))
6262 if (SYMBOLP (XCAR (val)))
6263 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6264 }
6265 if (CHAR_TABLE_P (standard))
6266 {
6267 if (CONSP (translation_table))
6268 translation_table = nconc2 (translation_table,
6269 Fcons (standard, Qnil));
6270 else
6271 translation_table = Fcons (translation_table,
6272 Fcons (standard, Qnil));
6273 }
a6f87d34 6274 }
2170c8f0
KH
6275
6276 if (max_lookup)
09ee6fdd 6277 {
2170c8f0
KH
6278 *max_lookup = 1;
6279 if (CHAR_TABLE_P (translation_table)
6280 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6281 {
6282 val = XCHAR_TABLE (translation_table)->extras[1];
6283 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6284 *max_lookup = XFASTINT (val);
6285 }
6286 else if (CONSP (translation_table))
6287 {
6288 Lisp_Object tail, val;
09ee6fdd 6289
2170c8f0
KH
6290 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6291 if (CHAR_TABLE_P (XCAR (tail))
6292 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6293 {
6294 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6295 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6296 *max_lookup = XFASTINT (val);
6297 }
6298 }
a6f87d34 6299 }
7d64c6ad
KH
6300 return translation_table;
6301}
6302
09ee6fdd
KH
6303#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
6304 do { \
6305 trans = Qnil; \
6306 if (CHAR_TABLE_P (table)) \
6307 { \
6308 trans = CHAR_TABLE_REF (table, c); \
6309 if (CHARACTERP (trans)) \
6310 c = XFASTINT (trans), trans = Qnil; \
6311 } \
6312 else if (CONSP (table)) \
6313 { \
6314 Lisp_Object tail; \
6315 \
6316 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
6317 if (CHAR_TABLE_P (XCAR (tail))) \
6318 { \
6319 trans = CHAR_TABLE_REF (XCAR (tail), c); \
6320 if (CHARACTERP (trans)) \
6321 c = XFASTINT (trans), trans = Qnil; \
6322 else if (! NILP (trans)) \
6323 break; \
6324 } \
6325 } \
e6a54062
KH
6326 } while (0)
6327
7d64c6ad 6328
69a80ea3
KH
6329static Lisp_Object
6330get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6331 Lisp_Object val;
6332 int *buf, *buf_end;
6333 int last_block;
6334 int *from_nchars, *to_nchars;
6335{
433f7f87
KH
6336 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
6337 [TO-CHAR ...]. */
69a80ea3
KH
6338 if (CONSP (val))
6339 {
433f7f87 6340 Lisp_Object from, tail;
69a80ea3
KH
6341 int i, len;
6342
433f7f87 6343 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 6344 {
433f7f87
KH
6345 val = XCAR (tail);
6346 from = XCAR (val);
6347 len = ASIZE (from);
6348 for (i = 0; i < len; i++)
6349 {
6350 if (buf + i == buf_end)
6351 {
6352 if (! last_block)
6353 return Qt;
6354 break;
6355 }
6356 if (XINT (AREF (from, i)) != buf[i])
6357 break;
6358 }
6359 if (i == len)
6360 {
6361 val = XCDR (val);
6362 *from_nchars = len;
6363 break;
6364 }
69a80ea3 6365 }
433f7f87
KH
6366 if (! CONSP (tail))
6367 return Qnil;
69a80ea3
KH
6368 }
6369 if (VECTORP (val))
6370 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6371 else
6372 *buf = XINT (val);
6373 return val;
6374}
6375
6376
d46c5b12 6377static int
69a80ea3 6378produce_chars (coding, translation_table, last_block)
df7492f9 6379 struct coding_system *coding;
69a80ea3
KH
6380 Lisp_Object translation_table;
6381 int last_block;
4ed46869 6382{
df7492f9
KH
6383 unsigned char *dst = coding->destination + coding->produced;
6384 unsigned char *dst_end = coding->destination + coding->dst_bytes;
119852e7
KH
6385 EMACS_INT produced;
6386 EMACS_INT produced_chars = 0;
69a80ea3 6387 int carryover = 0;
4ed46869 6388
df7492f9 6389 if (! coding->chars_at_source)
4ed46869 6390 {
119852e7 6391 /* Source characters are in coding->charbuf. */
fba4576f
AS
6392 int *buf = coding->charbuf;
6393 int *buf_end = buf + coding->charbuf_used;
4ed46869 6394
db274c7a
KH
6395 if (EQ (coding->src_object, coding->dst_object))
6396 {
6397 coding_set_source (coding);
6398 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6399 }
4ed46869 6400
df7492f9 6401 while (buf < buf_end)
4ed46869 6402 {
69a80ea3 6403 int c = *buf, i;
bc4bc72a 6404
df7492f9
KH
6405 if (c >= 0)
6406 {
69a80ea3
KH
6407 int from_nchars = 1, to_nchars = 1;
6408 Lisp_Object trans = Qnil;
6409
09ee6fdd 6410 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6411 if (! NILP (trans))
69a80ea3
KH
6412 {
6413 trans = get_translation (trans, buf, buf_end, last_block,
6414 &from_nchars, &to_nchars);
6415 if (EQ (trans, Qt))
6416 break;
6417 c = *buf;
6418 }
6419
6420 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6421 {
6422 dst = alloc_destination (coding,
6423 buf_end - buf
6424 + MAX_MULTIBYTE_LENGTH * to_nchars,
6425 dst);
db274c7a
KH
6426 if (EQ (coding->src_object, coding->dst_object))
6427 {
6428 coding_set_source (coding);
6429 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6430 }
6431 else
6432 dst_end = coding->destination + coding->dst_bytes;
69a80ea3
KH
6433 }
6434
433f7f87 6435 for (i = 0; i < to_nchars; i++)
69a80ea3 6436 {
433f7f87
KH
6437 if (i > 0)
6438 c = XINT (AREF (trans, i));
69a80ea3
KH
6439 if (coding->dst_multibyte
6440 || ! CHAR_BYTE8_P (c))
db274c7a 6441 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
69a80ea3
KH
6442 else
6443 *dst++ = CHAR_TO_BYTE8 (c);
6444 }
6445 produced_chars += to_nchars;
6446 *buf++ = to_nchars;
6447 while (--from_nchars > 0)
6448 *buf++ = 0;
d46c5b12 6449 }
df7492f9 6450 else
69a80ea3
KH
6451 /* This is an annotation datum. (-C) is the length. */
6452 buf += -c;
4ed46869 6453 }
69a80ea3 6454 carryover = buf_end - buf;
4ed46869 6455 }
fa42c37f 6456 else
fa42c37f 6457 {
119852e7 6458 /* Source characters are at coding->source. */
8f924df7 6459 const unsigned char *src = coding->source;
119852e7 6460 const unsigned char *src_end = src + coding->consumed;
4ed46869 6461
db274c7a
KH
6462 if (EQ (coding->dst_object, coding->src_object))
6463 dst_end = (unsigned char *) src;
df7492f9 6464 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 6465 {
df7492f9 6466 if (coding->src_multibyte)
fa42c37f 6467 {
71c81426 6468 int multibytep = 1;
4533845d 6469 EMACS_INT consumed_chars = 0;
d46c5b12 6470
df7492f9
KH
6471 while (1)
6472 {
8f924df7 6473 const unsigned char *src_base = src;
df7492f9 6474 int c;
b73bfc1c 6475
df7492f9 6476 ONE_MORE_BYTE (c);
119852e7 6477 if (dst == dst_end)
df7492f9 6478 {
119852e7
KH
6479 if (EQ (coding->src_object, coding->dst_object))
6480 dst_end = (unsigned char *) src;
6481 if (dst == dst_end)
df7492f9 6482 {
119852e7
KH
6483 EMACS_INT offset = src - coding->source;
6484
6485 dst = alloc_destination (coding, src_end - src + 1,
6486 dst);
6487 dst_end = coding->destination + coding->dst_bytes;
6488 coding_set_source (coding);
6489 src = coding->source + offset;
6490 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6491 if (EQ (coding->src_object, coding->dst_object))
6492 dst_end = (unsigned char *) src;
df7492f9 6493 }
df7492f9
KH
6494 }
6495 *dst++ = c;
6496 produced_chars++;
6497 }
6498 no_more_source:
6499 ;
fa42c37f
KH
6500 }
6501 else
df7492f9
KH
6502 while (src < src_end)
6503 {
71c81426 6504 int multibytep = 1;
df7492f9 6505 int c = *src++;
b73bfc1c 6506
df7492f9
KH
6507 if (dst >= dst_end - 1)
6508 {
2c78b7e1 6509 if (EQ (coding->src_object, coding->dst_object))
8f924df7 6510 dst_end = (unsigned char *) src;
2c78b7e1
KH
6511 if (dst >= dst_end - 1)
6512 {
119852e7 6513 EMACS_INT offset = src - coding->source;
db274c7a 6514 EMACS_INT more_bytes;
119852e7 6515
db274c7a
KH
6516 if (EQ (coding->src_object, coding->dst_object))
6517 more_bytes = ((src_end - src) / 2) + 2;
6518 else
6519 more_bytes = src_end - src + 2;
6520 dst = alloc_destination (coding, more_bytes, dst);
2c78b7e1
KH
6521 dst_end = coding->destination + coding->dst_bytes;
6522 coding_set_source (coding);
119852e7 6523 src = coding->source + offset;
2c78b7e1 6524 src_end = coding->source + coding->src_bytes;
db274c7a
KH
6525 if (EQ (coding->src_object, coding->dst_object))
6526 dst_end = (unsigned char *) src;
2c78b7e1 6527 }
df7492f9
KH
6528 }
6529 EMIT_ONE_BYTE (c);
6530 }
d46c5b12 6531 }
df7492f9
KH
6532 else
6533 {
6534 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 6535 {
119852e7 6536 EMACS_INT require = coding->src_bytes - coding->dst_bytes;
4ed46869 6537
df7492f9 6538 if (require > 0)
fa42c37f 6539 {
df7492f9
KH
6540 EMACS_INT offset = src - coding->source;
6541
6542 dst = alloc_destination (coding, require, dst);
6543 coding_set_source (coding);
6544 src = coding->source + offset;
6545 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
6546 }
6547 }
119852e7 6548 produced_chars = coding->consumed_char;
df7492f9 6549 while (src < src_end)
14daee73 6550 *dst++ = *src++;
fa42c37f
KH
6551 }
6552 }
6553
df7492f9 6554 produced = dst - (coding->destination + coding->produced);
284201e4 6555 if (BUFFERP (coding->dst_object) && produced_chars > 0)
df7492f9
KH
6556 insert_from_gap (produced_chars, produced);
6557 coding->produced += produced;
6558 coding->produced_char += produced_chars;
69a80ea3 6559 return carryover;
fa42c37f
KH
6560}
6561
ff0dacd7
KH
6562/* Compose text in CODING->object according to the annotation data at
6563 CHARBUF. CHARBUF is an array:
6564 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 6565 */
4ed46869 6566
df7492f9 6567static INLINE void
69a80ea3 6568produce_composition (coding, charbuf, pos)
4ed46869 6569 struct coding_system *coding;
df7492f9 6570 int *charbuf;
69a80ea3 6571 EMACS_INT pos;
4ed46869 6572{
df7492f9 6573 int len;
69a80ea3 6574 EMACS_INT to;
df7492f9 6575 enum composition_method method;
df7492f9 6576 Lisp_Object components;
fa42c37f 6577
df7492f9 6578 len = -charbuf[0];
69a80ea3 6579 to = pos + charbuf[2];
9ffd559c
KH
6580 if (to <= pos)
6581 return;
69a80ea3 6582 method = (enum composition_method) (charbuf[3]);
d46c5b12 6583
df7492f9
KH
6584 if (method == COMPOSITION_RELATIVE)
6585 components = Qnil;
9ffd559c
KH
6586 else if (method >= COMPOSITION_WITH_RULE
6587 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6588 {
df7492f9
KH
6589 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6590 int i;
b73bfc1c 6591
69a80ea3
KH
6592 len -= 4;
6593 charbuf += 4;
df7492f9 6594 for (i = 0; i < len; i++)
9ffd559c
KH
6595 {
6596 args[i] = make_number (charbuf[i]);
f75c90a9 6597 if (charbuf[i] < 0)
9ffd559c
KH
6598 return;
6599 }
df7492f9
KH
6600 components = (method == COMPOSITION_WITH_ALTCHARS
6601 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6602 }
9ffd559c
KH
6603 else
6604 return;
69a80ea3 6605 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6606}
6607
d46c5b12 6608
ff0dacd7
KH
6609/* Put `charset' property on text in CODING->object according to
6610 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6611 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6612 */
d46c5b12 6613
ff0dacd7 6614static INLINE void
69a80ea3 6615produce_charset (coding, charbuf, pos)
d46c5b12 6616 struct coding_system *coding;
ff0dacd7 6617 int *charbuf;
69a80ea3 6618 EMACS_INT pos;
d46c5b12 6619{
69a80ea3
KH
6620 EMACS_INT from = pos - charbuf[2];
6621 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6622
69a80ea3 6623 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6624 Qcharset, CHARSET_NAME (charset),
6625 coding->dst_object);
d46c5b12
KH
6626}
6627
d46c5b12 6628
df7492f9
KH
6629#define CHARBUF_SIZE 0x4000
6630
6631#define ALLOC_CONVERSION_WORK_AREA(coding) \
6632 do { \
8510724d 6633 int size = CHARBUF_SIZE; \
df7492f9
KH
6634 \
6635 coding->charbuf = NULL; \
6636 while (size > 1024) \
6637 { \
6638 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6639 if (coding->charbuf) \
6640 break; \
6641 size >>= 1; \
6642 } \
6643 if (! coding->charbuf) \
6644 { \
065e3595 6645 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6646 return coding->result; \
6647 } \
6648 coding->charbuf_size = size; \
6649 } while (0)
4ed46869 6650
d46c5b12
KH
6651
6652static void
69a80ea3 6653produce_annotation (coding, pos)
d46c5b12 6654 struct coding_system *coding;
69a80ea3 6655 EMACS_INT pos;
d46c5b12 6656{
df7492f9
KH
6657 int *charbuf = coding->charbuf;
6658 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6659
ff0dacd7
KH
6660 if (NILP (coding->dst_object))
6661 return;
d46c5b12 6662
df7492f9 6663 while (charbuf < charbuf_end)
a84f1519 6664 {
df7492f9 6665 if (*charbuf >= 0)
69a80ea3 6666 pos += *charbuf++;
d46c5b12 6667 else
d46c5b12 6668 {
df7492f9 6669 int len = -*charbuf;
ff0dacd7 6670 switch (charbuf[1])
df7492f9
KH
6671 {
6672 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6673 produce_composition (coding, charbuf, pos);
df7492f9 6674 break;
ff0dacd7 6675 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6676 produce_charset (coding, charbuf, pos);
ff0dacd7 6677 break;
df7492f9
KH
6678 default:
6679 abort ();
6680 }
6681 charbuf += len;
d46c5b12 6682 }
a84f1519 6683 }
d46c5b12
KH
6684}
6685
df7492f9
KH
6686/* Decode the data at CODING->src_object into CODING->dst_object.
6687 CODING->src_object is a buffer, a string, or nil.
6688 CODING->dst_object is a buffer.
d46c5b12 6689
df7492f9
KH
6690 If CODING->src_object is a buffer, it must be the current buffer.
6691 In this case, if CODING->src_pos is positive, it is a position of
6692 the source text in the buffer, otherwise, the source text is in the
6693 gap area of the buffer, and CODING->src_pos specifies the offset of
6694 the text from GPT (which must be the same as PT). If this is the
6695 same buffer as CODING->dst_object, CODING->src_pos must be
6696 negative.
d46c5b12 6697
b6828792 6698 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 6699 that string.
d46c5b12 6700
df7492f9
KH
6701 If CODING->src_object is nil, CODING->source must already point to
6702 the non-relocatable memory area. In this case, CODING->src_pos is
6703 an offset from CODING->source.
73be902c 6704
df7492f9
KH
6705 The decoded data is inserted at the current point of the buffer
6706 CODING->dst_object.
6707*/
d46c5b12 6708
df7492f9
KH
6709static int
6710decode_coding (coding)
d46c5b12 6711 struct coding_system *coding;
d46c5b12 6712{
df7492f9 6713 Lisp_Object attrs;
24a73b0a 6714 Lisp_Object undo_list;
7d64c6ad 6715 Lisp_Object translation_table;
69a80ea3
KH
6716 int carryover;
6717 int i;
d46c5b12 6718
df7492f9
KH
6719 if (BUFFERP (coding->src_object)
6720 && coding->src_pos > 0
6721 && coding->src_pos < GPT
6722 && coding->src_pos + coding->src_chars > GPT)
6723 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6724
24a73b0a 6725 undo_list = Qt;
df7492f9 6726 if (BUFFERP (coding->dst_object))
1c3478b0 6727 {
df7492f9
KH
6728 if (current_buffer != XBUFFER (coding->dst_object))
6729 set_buffer_internal (XBUFFER (coding->dst_object));
6730 if (GPT != PT)
6731 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6732 undo_list = current_buffer->undo_list;
6733 current_buffer->undo_list = Qt;
1c3478b0
KH
6734 }
6735
df7492f9
KH
6736 coding->consumed = coding->consumed_char = 0;
6737 coding->produced = coding->produced_char = 0;
6738 coding->chars_at_source = 0;
065e3595 6739 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6740 coding->errors = 0;
1c3478b0 6741
df7492f9
KH
6742 ALLOC_CONVERSION_WORK_AREA (coding);
6743
6744 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6745 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6746
69a80ea3 6747 carryover = 0;
df7492f9 6748 do
b73bfc1c 6749 {
69a80ea3
KH
6750 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6751
df7492f9
KH
6752 coding_set_source (coding);
6753 coding->annotated = 0;
69a80ea3 6754 coding->charbuf_used = carryover;
df7492f9 6755 (*(coding->decoder)) (coding);
df7492f9 6756 coding_set_destination (coding);
69a80ea3 6757 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6758 if (coding->annotated)
69a80ea3
KH
6759 produce_annotation (coding, pos);
6760 for (i = 0; i < carryover; i++)
6761 coding->charbuf[i]
6762 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6763 }
df7492f9 6764 while (coding->consumed < coding->src_bytes
54b367bb
KH
6765 && (coding->result == CODING_RESULT_SUCCESS
6766 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 6767
69a80ea3
KH
6768 if (carryover > 0)
6769 {
6770 coding_set_destination (coding);
6771 coding->charbuf_used = carryover;
6772 produce_chars (coding, translation_table, 1);
6773 }
6774
df7492f9
KH
6775 coding->carryover_bytes = 0;
6776 if (coding->consumed < coding->src_bytes)
d46c5b12 6777 {
df7492f9 6778 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6779 const unsigned char *src;
df7492f9
KH
6780
6781 coding_set_source (coding);
6782 coding_set_destination (coding);
6783 src = coding->source + coding->consumed;
6784
6785 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6786 {
df7492f9
KH
6787 /* Flush out unprocessed data as binary chars. We are sure
6788 that the number of data is less than the size of
6789 coding->charbuf. */
065e3595 6790 coding->charbuf_used = 0;
b2dab6c8
JR
6791 coding->chars_at_source = 0;
6792
df7492f9 6793 while (nbytes-- > 0)
1c3478b0 6794 {
df7492f9 6795 int c = *src++;
98725083 6796
1c91457d
KH
6797 if (c & 0x80)
6798 c = BYTE8_TO_CHAR (c);
6799 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6800 }
f6cbaf43 6801 produce_chars (coding, Qnil, 1);
d46c5b12 6802 }
d46c5b12 6803 else
df7492f9
KH
6804 {
6805 /* Record unprocessed bytes in coding->carryover. We are
6806 sure that the number of data is less than the size of
6807 coding->carryover. */
6808 unsigned char *p = coding->carryover;
6809
f289d375
KH
6810 if (nbytes > sizeof coding->carryover)
6811 nbytes = sizeof coding->carryover;
df7492f9
KH
6812 coding->carryover_bytes = nbytes;
6813 while (nbytes-- > 0)
6814 *p++ = *src++;
1c3478b0 6815 }
df7492f9 6816 coding->consumed = coding->src_bytes;
b73bfc1c 6817 }
69f76525 6818
4347441b
KH
6819 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6820 decode_eol (coding);
24a73b0a
KH
6821 if (BUFFERP (coding->dst_object))
6822 {
6823 current_buffer->undo_list = undo_list;
6824 record_insert (coding->dst_pos, coding->produced_char);
6825 }
73be902c 6826 return coding->result;
4ed46869
KH
6827}
6828
aaaf0b1e 6829
e1c23804 6830/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6831 ending before LIMIT of CODING->src_object (buffer or string), store
6832 the data in BUF, set *STOP to a starting position of the next
6833 composition (if any) or to LIMIT, and return the address of the
6834 next element of BUF.
6835
6836 If such an annotation is not found, set *STOP to a starting
6837 position of a composition after POS (if any) or to LIMIT, and
6838 return BUF. */
6839
6840static INLINE int *
6841handle_composition_annotation (pos, limit, coding, buf, stop)
6842 EMACS_INT pos, limit;
aaaf0b1e 6843 struct coding_system *coding;
ff0dacd7
KH
6844 int *buf;
6845 EMACS_INT *stop;
aaaf0b1e 6846{
ff0dacd7
KH
6847 EMACS_INT start, end;
6848 Lisp_Object prop;
aaaf0b1e 6849
ff0dacd7
KH
6850 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6851 || end > limit)
6852 *stop = limit;
6853 else if (start > pos)
6854 *stop = start;
6855 else
aaaf0b1e 6856 {
ff0dacd7 6857 if (start == pos)
aaaf0b1e 6858 {
ff0dacd7
KH
6859 /* We found a composition. Store the corresponding
6860 annotation data in BUF. */
6861 int *head = buf;
6862 enum composition_method method = COMPOSITION_METHOD (prop);
6863 int nchars = COMPOSITION_LENGTH (prop);
6864
69a80ea3 6865 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6866 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6867 {
ff0dacd7
KH
6868 Lisp_Object components;
6869 int len, i, i_byte;
6870
6871 components = COMPOSITION_COMPONENTS (prop);
6872 if (VECTORP (components))
aaaf0b1e 6873 {
ff0dacd7
KH
6874 len = XVECTOR (components)->size;
6875 for (i = 0; i < len; i++)
6876 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6877 }
ff0dacd7 6878 else if (STRINGP (components))
aaaf0b1e 6879 {
8f924df7 6880 len = SCHARS (components);
ff0dacd7
KH
6881 i = i_byte = 0;
6882 while (i < len)
6883 {
6884 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6885 buf++;
6886 }
6887 }
6888 else if (INTEGERP (components))
6889 {
6890 len = 1;
6891 *buf++ = XINT (components);
6892 }
6893 else if (CONSP (components))
6894 {
6895 for (len = 0; CONSP (components);
6896 len++, components = XCDR (components))
6897 *buf++ = XINT (XCAR (components));
aaaf0b1e 6898 }
aaaf0b1e 6899 else
ff0dacd7
KH
6900 abort ();
6901 *head -= len;
aaaf0b1e 6902 }
aaaf0b1e 6903 }
ff0dacd7
KH
6904
6905 if (find_composition (end, limit, &start, &end, &prop,
6906 coding->src_object)
6907 && end <= limit)
6908 *stop = start;
6909 else
6910 *stop = limit;
aaaf0b1e 6911 }
ff0dacd7
KH
6912 return buf;
6913}
6914
6915
e1c23804 6916/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6917 CODING->src_object (buffer of string), store the data in BUF, set
6918 *STOP to the position where the value of `charset' property changes
6919 (limiting by LIMIT), and return the address of the next element of
6920 BUF.
6921
6922 If the property value is nil, set *STOP to the position where the
6923 property value is non-nil (limiting by LIMIT), and return BUF. */
6924
6925static INLINE int *
6926handle_charset_annotation (pos, limit, coding, buf, stop)
6927 EMACS_INT pos, limit;
6928 struct coding_system *coding;
6929 int *buf;
6930 EMACS_INT *stop;
6931{
6932 Lisp_Object val, next;
6933 int id;
6934
6935 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6936 if (! NILP (val) && CHARSETP (val))
6937 id = XINT (CHARSET_SYMBOL_ID (val));
6938 else
6939 id = -1;
69a80ea3 6940 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6941 next = Fnext_single_property_change (make_number (pos), Qcharset,
6942 coding->src_object,
6943 make_number (limit));
6944 *stop = XINT (next);
6945 return buf;
6946}
6947
6948
df7492f9 6949static void
09ee6fdd 6950consume_chars (coding, translation_table, max_lookup)
df7492f9 6951 struct coding_system *coding;
433f7f87 6952 Lisp_Object translation_table;
09ee6fdd 6953 int max_lookup;
df7492f9
KH
6954{
6955 int *buf = coding->charbuf;
ff0dacd7 6956 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6957 const unsigned char *src = coding->source + coding->consumed;
4776e638 6958 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6959 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6960 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6961 int multibytep = coding->src_multibyte;
6962 Lisp_Object eol_type;
6963 int c;
ff0dacd7 6964 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6965 int *lookup_buf = NULL;
433f7f87
KH
6966
6967 if (! NILP (translation_table))
09ee6fdd 6968 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6969
df7492f9
KH
6970 eol_type = CODING_ID_EOL_TYPE (coding->id);
6971 if (VECTORP (eol_type))
6972 eol_type = Qunix;
88993dfd 6973
df7492f9
KH
6974 /* Note: composition handling is not yet implemented. */
6975 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6976
0b5670c9
KH
6977 if (NILP (coding->src_object))
6978 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6979 else
0b5670c9
KH
6980 {
6981 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6982 stop = stop_composition = pos;
6983 else
6984 stop = stop_composition = end_pos;
6985 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6986 stop = stop_charset = pos;
6987 else
6988 stop_charset = end_pos;
6989 }
ec6d2bb8 6990
24a73b0a 6991 /* Compensate for CRLF and conversion. */
ff0dacd7 6992 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6993 while (buf < buf_end)
aaaf0b1e 6994 {
433f7f87
KH
6995 Lisp_Object trans;
6996
df7492f9 6997 if (pos == stop)
ec6d2bb8 6998 {
df7492f9
KH
6999 if (pos == end_pos)
7000 break;
ff0dacd7
KH
7001 if (pos == stop_composition)
7002 buf = handle_composition_annotation (pos, end_pos, coding,
7003 buf, &stop_composition);
7004 if (pos == stop_charset)
7005 buf = handle_charset_annotation (pos, end_pos, coding,
7006 buf, &stop_charset);
7007 stop = (stop_composition < stop_charset
7008 ? stop_composition : stop_charset);
df7492f9
KH
7009 }
7010
7011 if (! multibytep)
4776e638 7012 {
d3e4cb56 7013 EMACS_INT bytes;
aaaf0b1e 7014
ea29edf2
KH
7015 if (coding->encoder == encode_coding_raw_text)
7016 c = *src++, pos++;
7017 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
db274c7a 7018 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
4776e638 7019 else
f03caae0 7020 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 7021 }
df7492f9 7022 else
db274c7a 7023 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
df7492f9
KH
7024 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7025 c = '\n';
7026 if (! EQ (eol_type, Qunix))
aaaf0b1e 7027 {
df7492f9 7028 if (c == '\n')
aaaf0b1e 7029 {
df7492f9
KH
7030 if (EQ (eol_type, Qdos))
7031 *buf++ = '\r';
7032 else
7033 c = '\r';
aaaf0b1e
KH
7034 }
7035 }
433f7f87 7036
e6a54062 7037 trans = Qnil;
09ee6fdd 7038 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 7039 if (NILP (trans))
433f7f87
KH
7040 *buf++ = c;
7041 else
7042 {
7043 int from_nchars = 1, to_nchars = 1;
7044 int *lookup_buf_end;
7045 const unsigned char *p = src;
7046 int i;
7047
7048 lookup_buf[0] = c;
7049 for (i = 1; i < max_lookup && p < src_end; i++)
7050 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7051 lookup_buf_end = lookup_buf + i;
7052 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
7053 &from_nchars, &to_nchars);
7054 if (EQ (trans, Qt)
7055 || buf + to_nchars > buf_end)
7056 break;
7057 *buf++ = *lookup_buf;
7058 for (i = 1; i < to_nchars; i++)
7059 *buf++ = XINT (AREF (trans, i));
7060 for (i = 1; i < from_nchars; i++, pos++)
7061 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7062 }
aaaf0b1e 7063 }
ec6d2bb8 7064
df7492f9
KH
7065 coding->consumed = src - coding->source;
7066 coding->consumed_char = pos - coding->src_pos;
7067 coding->charbuf_used = buf - coding->charbuf;
7068 coding->chars_at_source = 0;
aaaf0b1e
KH
7069}
7070
4ed46869 7071
df7492f9
KH
7072/* Encode the text at CODING->src_object into CODING->dst_object.
7073 CODING->src_object is a buffer or a string.
7074 CODING->dst_object is a buffer or nil.
7075
7076 If CODING->src_object is a buffer, it must be the current buffer.
7077 In this case, if CODING->src_pos is positive, it is a position of
7078 the source text in the buffer, otherwise. the source text is in the
7079 gap area of the buffer, and coding->src_pos specifies the offset of
7080 the text from GPT (which must be the same as PT). If this is the
7081 same buffer as CODING->dst_object, CODING->src_pos must be
7082 negative and CODING should not have `pre-write-conversion'.
7083
7084 If CODING->src_object is a string, CODING should not have
7085 `pre-write-conversion'.
7086
7087 If CODING->dst_object is a buffer, the encoded data is inserted at
7088 the current point of that buffer.
7089
7090 If CODING->dst_object is nil, the encoded data is placed at the
7091 memory area specified by CODING->destination. */
7092
7093static int
7094encode_coding (coding)
4ed46869 7095 struct coding_system *coding;
4ed46869 7096{
df7492f9 7097 Lisp_Object attrs;
7d64c6ad 7098 Lisp_Object translation_table;
09ee6fdd 7099 int max_lookup;
9861e777 7100
df7492f9 7101 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
7102 if (coding->encoder == encode_coding_raw_text)
7103 translation_table = Qnil, max_lookup = 0;
7104 else
7105 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 7106
df7492f9 7107 if (BUFFERP (coding->dst_object))
8844fa83 7108 {
df7492f9
KH
7109 set_buffer_internal (XBUFFER (coding->dst_object));
7110 coding->dst_multibyte
7111 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 7112 }
4ed46869 7113
b73bfc1c 7114 coding->consumed = coding->consumed_char = 0;
df7492f9 7115 coding->produced = coding->produced_char = 0;
065e3595 7116 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 7117 coding->errors = 0;
b73bfc1c 7118
df7492f9 7119 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 7120
df7492f9
KH
7121 do {
7122 coding_set_source (coding);
09ee6fdd 7123 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
7124 coding_set_destination (coding);
7125 (*(coding->encoder)) (coding);
7126 } while (coding->consumed_char < coding->src_chars);
7127
284201e4 7128 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
df7492f9
KH
7129 insert_from_gap (coding->produced_char, coding->produced);
7130
7131 return (coding->result);
ec6d2bb8
KH
7132}
7133
fb88bf2d 7134
24a73b0a
KH
7135/* Name (or base name) of work buffer for code conversion. */
7136static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 7137
24a73b0a
KH
7138/* A working buffer used by the top level conversion. Once it is
7139 created, it is never destroyed. It has the name
7140 Vcode_conversion_workbuf_name. The other working buffers are
7141 destroyed after the use is finished, and their names are modified
7142 versions of Vcode_conversion_workbuf_name. */
7143static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 7144
24a73b0a
KH
7145/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7146static int reused_workbuf_in_use;
4ed46869 7147
24a73b0a
KH
7148
7149/* Return a working buffer of code convesion. MULTIBYTE specifies the
7150 multibyteness of returning buffer. */
b73bfc1c 7151
f6cbaf43 7152static Lisp_Object
24a73b0a 7153make_conversion_work_buffer (multibyte)
f6cbaf43 7154 int multibyte;
df7492f9 7155{
24a73b0a
KH
7156 Lisp_Object name, workbuf;
7157 struct buffer *current;
4ed46869 7158
24a73b0a 7159 if (reused_workbuf_in_use++)
065e3595
KH
7160 {
7161 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7162 workbuf = Fget_buffer_create (name);
7163 }
df7492f9 7164 else
065e3595 7165 {
159bd5a2 7166 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
a993c7a1
KH
7167 Vcode_conversion_reused_workbuf
7168 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7169 workbuf = Vcode_conversion_reused_workbuf;
065e3595 7170 }
24a73b0a
KH
7171 current = current_buffer;
7172 set_buffer_internal (XBUFFER (workbuf));
df36ff1f
CY
7173 /* We can't allow modification hooks to run in the work buffer. For
7174 instance, directory_files_internal assumes that file decoding
7175 doesn't compile new regexps. */
7176 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
3ed051d4 7177 Ferase_buffer ();
df7492f9 7178 current_buffer->undo_list = Qt;
24a73b0a 7179 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 7180 set_buffer_internal (current);
24a73b0a 7181 return workbuf;
df7492f9 7182}
d46c5b12 7183
24a73b0a 7184
4776e638 7185static Lisp_Object
24a73b0a
KH
7186code_conversion_restore (arg)
7187 Lisp_Object arg;
4776e638 7188{
24a73b0a 7189 Lisp_Object current, workbuf;
948bdcf3 7190 struct gcpro gcpro1;
24a73b0a 7191
948bdcf3 7192 GCPRO1 (arg);
24a73b0a
KH
7193 current = XCAR (arg);
7194 workbuf = XCDR (arg);
7195 if (! NILP (workbuf))
7196 {
7197 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7198 reused_workbuf_in_use = 0;
7199 else if (! NILP (Fbuffer_live_p (workbuf)))
7200 Fkill_buffer (workbuf);
7201 }
7202 set_buffer_internal (XBUFFER (current));
948bdcf3 7203 UNGCPRO;
4776e638
KH
7204 return Qnil;
7205}
b73bfc1c 7206
24a73b0a
KH
7207Lisp_Object
7208code_conversion_save (with_work_buf, multibyte)
4776e638 7209 int with_work_buf, multibyte;
df7492f9 7210{
24a73b0a 7211 Lisp_Object workbuf = Qnil;
b73bfc1c 7212
4776e638 7213 if (with_work_buf)
24a73b0a
KH
7214 workbuf = make_conversion_work_buffer (multibyte);
7215 record_unwind_protect (code_conversion_restore,
7216 Fcons (Fcurrent_buffer (), workbuf));
4776e638 7217 return workbuf;
df7492f9 7218}
d46c5b12 7219
df7492f9
KH
7220int
7221decode_coding_gap (coding, chars, bytes)
7222 struct coding_system *coding;
7223 EMACS_INT chars, bytes;
7224{
7225 int count = specpdl_ptr - specpdl;
5e5c78be 7226 Lisp_Object attrs;
fb88bf2d 7227
24a73b0a 7228 code_conversion_save (0, 0);
ec6d2bb8 7229
24a73b0a 7230 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7231 coding->src_chars = chars;
7232 coding->src_bytes = bytes;
7233 coding->src_pos = -chars;
7234 coding->src_pos_byte = -bytes;
7235 coding->src_multibyte = chars < bytes;
24a73b0a 7236 coding->dst_object = coding->src_object;
df7492f9
KH
7237 coding->dst_pos = PT;
7238 coding->dst_pos_byte = PT_BYTE;
71c81426 7239 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 7240
df7492f9
KH
7241 if (CODING_REQUIRE_DETECTION (coding))
7242 detect_coding (coding);
8f924df7 7243
9286b333 7244 coding->mode |= CODING_MODE_LAST_BLOCK;
287c57d7 7245 current_buffer->text->inhibit_shrinking = 1;
df7492f9 7246 decode_coding (coding);
287c57d7 7247 current_buffer->text->inhibit_shrinking = 0;
d46c5b12 7248
5e5c78be
KH
7249 attrs = CODING_ID_ATTRS (coding->id);
7250 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 7251 {
5e5c78be
KH
7252 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7253 Lisp_Object val;
7254
7255 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
7256 val = call1 (CODING_ATTR_POST_READ (attrs),
7257 make_number (coding->produced_char));
5e5c78be
KH
7258 CHECK_NATNUM (val);
7259 coding->produced_char += Z - prev_Z;
7260 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 7261 }
4ed46869 7262
df7492f9 7263 unbind_to (count, Qnil);
b73bfc1c
KH
7264 return coding->result;
7265}
52d41803 7266
4ed46869 7267int
df7492f9 7268encode_coding_gap (coding, chars, bytes)
4ed46869 7269 struct coding_system *coding;
df7492f9 7270 EMACS_INT chars, bytes;
4ed46869 7271{
df7492f9 7272 int count = specpdl_ptr - specpdl;
4ed46869 7273
24a73b0a 7274 code_conversion_save (0, 0);
4ed46869 7275
24a73b0a 7276 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7277 coding->src_chars = chars;
7278 coding->src_bytes = bytes;
7279 coding->src_pos = -chars;
7280 coding->src_pos_byte = -bytes;
7281 coding->src_multibyte = chars < bytes;
7282 coding->dst_object = coding->src_object;
7283 coding->dst_pos = PT;
7284 coding->dst_pos_byte = PT_BYTE;
4ed46869 7285
df7492f9 7286 encode_coding (coding);
b73bfc1c 7287
df7492f9
KH
7288 unbind_to (count, Qnil);
7289 return coding->result;
7290}
4ed46869 7291
d46c5b12 7292
df7492f9
KH
7293/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7294 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 7295
df7492f9 7296 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 7297
df7492f9
KH
7298 If it is a buffer, the text is at point of the buffer. FROM and TO
7299 are positions in the buffer.
b73bfc1c 7300
df7492f9
KH
7301 If it is a string, the text is at the beginning of the string.
7302 FROM and TO are indices to the string.
4ed46869 7303
df7492f9
KH
7304 If it is nil, the text is at coding->source. FROM and TO are
7305 indices to coding->source.
bb10be8b 7306
df7492f9 7307 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 7308
df7492f9
KH
7309 If it is a buffer, the decoded text is inserted at point of the
7310 buffer. If the buffer is the same as SRC_OBJECT, the source text
7311 is deleted.
4ed46869 7312
df7492f9
KH
7313 If it is Qt, a string is made from the decoded text, and
7314 set in CODING->dst_object.
d46c5b12 7315
df7492f9 7316 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 7317 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
7318 CODING->destination by xmalloc. If the decoded text is longer than
7319 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7320 */
d46c5b12 7321
df7492f9
KH
7322void
7323decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7324 dst_object)
d46c5b12 7325 struct coding_system *coding;
df7492f9
KH
7326 Lisp_Object src_object;
7327 EMACS_INT from, from_byte, to, to_byte;
7328 Lisp_Object dst_object;
d46c5b12 7329{
df7492f9
KH
7330 int count = specpdl_ptr - specpdl;
7331 unsigned char *destination;
7332 EMACS_INT dst_bytes;
7333 EMACS_INT chars = to - from;
7334 EMACS_INT bytes = to_byte - from_byte;
7335 Lisp_Object attrs;
4776e638 7336 int saved_pt = -1, saved_pt_byte;
64cedb0c 7337 int need_marker_adjustment = 0;
b3bfad50 7338 Lisp_Object old_deactivate_mark;
d46c5b12 7339
b3bfad50 7340 old_deactivate_mark = Vdeactivate_mark;
93dec019 7341
df7492f9 7342 if (NILP (dst_object))
d46c5b12 7343 {
df7492f9
KH
7344 destination = coding->destination;
7345 dst_bytes = coding->dst_bytes;
d46c5b12 7346 }
93dec019 7347
df7492f9
KH
7348 coding->src_object = src_object;
7349 coding->src_chars = chars;
7350 coding->src_bytes = bytes;
7351 coding->src_multibyte = chars < bytes;
70ad9fc4 7352
df7492f9 7353 if (STRINGP (src_object))
d46c5b12 7354 {
df7492f9
KH
7355 coding->src_pos = from;
7356 coding->src_pos_byte = from_byte;
d46c5b12 7357 }
df7492f9 7358 else if (BUFFERP (src_object))
88993dfd 7359 {
df7492f9
KH
7360 set_buffer_internal (XBUFFER (src_object));
7361 if (from != GPT)
7362 move_gap_both (from, from_byte);
7363 if (EQ (src_object, dst_object))
fb88bf2d 7364 {
64cedb0c
KH
7365 struct Lisp_Marker *tail;
7366
7367 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7368 {
7369 tail->need_adjustment
7370 = tail->charpos == (tail->insertion_type ? from : to);
7371 need_marker_adjustment |= tail->need_adjustment;
7372 }
4776e638 7373 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9 7374 TEMP_SET_PT_BOTH (from, from_byte);
f4a3cc44 7375 current_buffer->text->inhibit_shrinking = 1;
df7492f9
KH
7376 del_range_both (from, from_byte, to, to_byte, 1);
7377 coding->src_pos = -chars;
7378 coding->src_pos_byte = -bytes;
fb88bf2d 7379 }
df7492f9 7380 else
fb88bf2d 7381 {
df7492f9
KH
7382 coding->src_pos = from;
7383 coding->src_pos_byte = from_byte;
fb88bf2d 7384 }
88993dfd
KH
7385 }
7386
df7492f9
KH
7387 if (CODING_REQUIRE_DETECTION (coding))
7388 detect_coding (coding);
7389 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 7390
2cb26057
KH
7391 if (EQ (dst_object, Qt)
7392 || (! NILP (CODING_ATTR_POST_READ (attrs))
7393 && NILP (dst_object)))
b73bfc1c 7394 {
a1567c45
SM
7395 coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7396 coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
df7492f9
KH
7397 coding->dst_pos = BEG;
7398 coding->dst_pos_byte = BEG_BYTE;
b73bfc1c 7399 }
df7492f9 7400 else if (BUFFERP (dst_object))
d46c5b12 7401 {
24a73b0a 7402 code_conversion_save (0, 0);
df7492f9
KH
7403 coding->dst_object = dst_object;
7404 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7405 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7406 coding->dst_multibyte
7407 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
7408 }
7409 else
7410 {
24a73b0a 7411 code_conversion_save (0, 0);
df7492f9 7412 coding->dst_object = Qnil;
0154725e
SM
7413 /* Most callers presume this will return a multibyte result, and they
7414 won't use `binary' or `raw-text' anyway, so let's not worry about
7415 CODING_FOR_UNIBYTE. */
bb555731 7416 coding->dst_multibyte = 1;
d46c5b12
KH
7417 }
7418
df7492f9 7419 decode_coding (coding);
fa46990e 7420
df7492f9
KH
7421 if (BUFFERP (coding->dst_object))
7422 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 7423
df7492f9 7424 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 7425 {
b3bfad50 7426 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
df7492f9 7427 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 7428 Lisp_Object val;
d46c5b12 7429
c0cc7f7f 7430 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
b3bfad50
KH
7431 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7432 old_deactivate_mark);
d4850d67
KH
7433 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7434 make_number (coding->produced_char));
df7492f9
KH
7435 UNGCPRO;
7436 CHECK_NATNUM (val);
7437 coding->produced_char += Z - prev_Z;
7438 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 7439 }
de79a6a5 7440
df7492f9 7441 if (EQ (dst_object, Qt))
ec6d2bb8 7442 {
df7492f9
KH
7443 coding->dst_object = Fbuffer_string ();
7444 }
7445 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7446 {
7447 set_buffer_internal (XBUFFER (coding->dst_object));
7448 if (dst_bytes < coding->produced)
7449 {
b3bfad50 7450 destination = xrealloc (destination, coding->produced);
df7492f9
KH
7451 if (! destination)
7452 {
065e3595
KH
7453 record_conversion_result (coding,
7454 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
7455 unbind_to (count, Qnil);
7456 return;
7457 }
7458 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7459 move_gap_both (BEGV, BEGV_BYTE);
7460 bcopy (BEGV_ADDR, destination, coding->produced);
7461 coding->destination = destination;
d46c5b12 7462 }
ec6d2bb8 7463 }
b73bfc1c 7464
4776e638
KH
7465 if (saved_pt >= 0)
7466 {
7467 /* This is the case of:
7468 (BUFFERP (src_object) && EQ (src_object, dst_object))
7469 As we have moved PT while replacing the original buffer
7470 contents, we must recover it now. */
7471 set_buffer_internal (XBUFFER (src_object));
f4a3cc44 7472 current_buffer->text->inhibit_shrinking = 0;
4776e638
KH
7473 if (saved_pt < from)
7474 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7475 else if (saved_pt < from + chars)
7476 TEMP_SET_PT_BOTH (from, from_byte);
7477 else if (! NILP (current_buffer->enable_multibyte_characters))
7478 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7479 saved_pt_byte + (coding->produced - bytes));
7480 else
7481 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7482 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7483
7484 if (need_marker_adjustment)
7485 {
7486 struct Lisp_Marker *tail;
7487
7488 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7489 if (tail->need_adjustment)
7490 {
7491 tail->need_adjustment = 0;
7492 if (tail->insertion_type)
7493 {
7494 tail->bytepos = from_byte;
7495 tail->charpos = from;
7496 }
7497 else
7498 {
7499 tail->bytepos = from_byte + coding->produced;
7500 tail->charpos
7501 = (NILP (current_buffer->enable_multibyte_characters)
7502 ? tail->bytepos : from + coding->produced_char);
7503 }
7504 }
7505 }
d46c5b12 7506 }
4776e638 7507
b3bfad50 7508 Vdeactivate_mark = old_deactivate_mark;
065e3595 7509 unbind_to (count, coding->dst_object);
d46c5b12
KH
7510}
7511
d46c5b12 7512
df7492f9
KH
7513void
7514encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7515 dst_object)
d46c5b12 7516 struct coding_system *coding;
df7492f9
KH
7517 Lisp_Object src_object;
7518 EMACS_INT from, from_byte, to, to_byte;
7519 Lisp_Object dst_object;
d46c5b12 7520{
b73bfc1c 7521 int count = specpdl_ptr - specpdl;
df7492f9
KH
7522 EMACS_INT chars = to - from;
7523 EMACS_INT bytes = to_byte - from_byte;
7524 Lisp_Object attrs;
4776e638 7525 int saved_pt = -1, saved_pt_byte;
64cedb0c 7526 int need_marker_adjustment = 0;
c02d943b 7527 int kill_src_buffer = 0;
b3bfad50 7528 Lisp_Object old_deactivate_mark;
df7492f9 7529
b3bfad50 7530 old_deactivate_mark = Vdeactivate_mark;
df7492f9
KH
7531
7532 coding->src_object = src_object;
7533 coding->src_chars = chars;
7534 coding->src_bytes = bytes;
7535 coding->src_multibyte = chars < bytes;
7536
7537 attrs = CODING_ID_ATTRS (coding->id);
7538
64cedb0c
KH
7539 if (EQ (src_object, dst_object))
7540 {
7541 struct Lisp_Marker *tail;
7542
7543 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7544 {
7545 tail->need_adjustment
7546 = tail->charpos == (tail->insertion_type ? from : to);
7547 need_marker_adjustment |= tail->need_adjustment;
7548 }
7549 }
7550
df7492f9 7551 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 7552 {
24a73b0a 7553 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
7554 set_buffer_internal (XBUFFER (coding->src_object));
7555 if (STRINGP (src_object))
7556 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7557 else if (BUFFERP (src_object))
7558 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7559 else
7560 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 7561
df7492f9
KH
7562 if (EQ (src_object, dst_object))
7563 {
7564 set_buffer_internal (XBUFFER (src_object));
4776e638 7565 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
7566 del_range_both (from, from_byte, to, to_byte, 1);
7567 set_buffer_internal (XBUFFER (coding->src_object));
7568 }
7569
d4850d67
KH
7570 {
7571 Lisp_Object args[3];
b3bfad50 7572 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
d4850d67 7573
b3bfad50
KH
7574 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7575 old_deactivate_mark);
d4850d67
KH
7576 args[0] = CODING_ATTR_PRE_WRITE (attrs);
7577 args[1] = make_number (BEG);
7578 args[2] = make_number (Z);
7579 safe_call (3, args);
b3bfad50 7580 UNGCPRO;
d4850d67 7581 }
c02d943b
KH
7582 if (XBUFFER (coding->src_object) != current_buffer)
7583 kill_src_buffer = 1;
ac87bbef 7584 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
7585 if (BEG != GPT)
7586 move_gap_both (BEG, BEG_BYTE);
7587 coding->src_chars = Z - BEG;
7588 coding->src_bytes = Z_BYTE - BEG_BYTE;
7589 coding->src_pos = BEG;
7590 coding->src_pos_byte = BEG_BYTE;
7591 coding->src_multibyte = Z < Z_BYTE;
7592 }
7593 else if (STRINGP (src_object))
d46c5b12 7594 {
24a73b0a 7595 code_conversion_save (0, 0);
df7492f9
KH
7596 coding->src_pos = from;
7597 coding->src_pos_byte = from_byte;
b73bfc1c 7598 }
df7492f9 7599 else if (BUFFERP (src_object))
b73bfc1c 7600 {
24a73b0a 7601 code_conversion_save (0, 0);
df7492f9 7602 set_buffer_internal (XBUFFER (src_object));
df7492f9 7603 if (EQ (src_object, dst_object))
d46c5b12 7604 {
4776e638 7605 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7606 coding->src_object = del_range_1 (from, to, 1, 1);
7607 coding->src_pos = 0;
7608 coding->src_pos_byte = 0;
d46c5b12 7609 }
df7492f9 7610 else
d46c5b12 7611 {
ff0dacd7
KH
7612 if (from < GPT && to >= GPT)
7613 move_gap_both (from, from_byte);
df7492f9
KH
7614 coding->src_pos = from;
7615 coding->src_pos_byte = from_byte;
d46c5b12 7616 }
d46c5b12 7617 }
4776e638 7618 else
24a73b0a 7619 code_conversion_save (0, 0);
d46c5b12 7620
df7492f9 7621 if (BUFFERP (dst_object))
88993dfd 7622 {
df7492f9 7623 coding->dst_object = dst_object;
28f67a95
KH
7624 if (EQ (src_object, dst_object))
7625 {
7626 coding->dst_pos = from;
7627 coding->dst_pos_byte = from_byte;
7628 }
7629 else
7630 {
319a3947
KH
7631 struct buffer *current = current_buffer;
7632
7633 set_buffer_temp (XBUFFER (dst_object));
7634 coding->dst_pos = PT;
7635 coding->dst_pos_byte = PT_BYTE;
7636 move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7637 set_buffer_temp (current);
28f67a95 7638 }
df7492f9
KH
7639 coding->dst_multibyte
7640 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7641 }
df7492f9 7642 else if (EQ (dst_object, Qt))
d46c5b12 7643 {
df7492f9 7644 coding->dst_object = Qnil;
df7492f9 7645 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7646 if (coding->dst_bytes == 0)
7647 coding->dst_bytes = 1;
7648 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7649 coding->dst_multibyte = 0;
d46c5b12
KH
7650 }
7651 else
7652 {
df7492f9
KH
7653 coding->dst_object = Qnil;
7654 coding->dst_multibyte = 0;
d46c5b12
KH
7655 }
7656
df7492f9 7657 encode_coding (coding);
d46c5b12 7658
df7492f9 7659 if (EQ (dst_object, Qt))
d46c5b12 7660 {
df7492f9
KH
7661 if (BUFFERP (coding->dst_object))
7662 coding->dst_object = Fbuffer_string ();
7663 else
d46c5b12 7664 {
df7492f9
KH
7665 coding->dst_object
7666 = make_unibyte_string ((char *) coding->destination,
7667 coding->produced);
7668 xfree (coding->destination);
d46c5b12 7669 }
4ed46869 7670 }
d46c5b12 7671
4776e638
KH
7672 if (saved_pt >= 0)
7673 {
7674 /* This is the case of:
7675 (BUFFERP (src_object) && EQ (src_object, dst_object))
7676 As we have moved PT while replacing the original buffer
7677 contents, we must recover it now. */
7678 set_buffer_internal (XBUFFER (src_object));
7679 if (saved_pt < from)
7680 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7681 else if (saved_pt < from + chars)
7682 TEMP_SET_PT_BOTH (from, from_byte);
7683 else if (! NILP (current_buffer->enable_multibyte_characters))
7684 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7685 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7686 else
4776e638
KH
7687 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7688 saved_pt_byte + (coding->produced - bytes));
64cedb0c
KH
7689
7690 if (need_marker_adjustment)
7691 {
7692 struct Lisp_Marker *tail;
7693
7694 for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7695 if (tail->need_adjustment)
7696 {
7697 tail->need_adjustment = 0;
7698 if (tail->insertion_type)
7699 {
7700 tail->bytepos = from_byte;
7701 tail->charpos = from;
7702 }
7703 else
7704 {
7705 tail->bytepos = from_byte + coding->produced;
7706 tail->charpos
7707 = (NILP (current_buffer->enable_multibyte_characters)
7708 ? tail->bytepos : from + coding->produced_char);
7709 }
7710 }
7711 }
4776e638
KH
7712 }
7713
c02d943b
KH
7714 if (kill_src_buffer)
7715 Fkill_buffer (coding->src_object);
b3bfad50
KH
7716
7717 Vdeactivate_mark = old_deactivate_mark;
df7492f9 7718 unbind_to (count, Qnil);
b73bfc1c
KH
7719}
7720
df7492f9 7721
b73bfc1c 7722Lisp_Object
df7492f9 7723preferred_coding_system ()
b73bfc1c 7724{
df7492f9 7725 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7726
df7492f9 7727 return CODING_ID_NAME (id);
4ed46869
KH
7728}
7729
7730\f
7731#ifdef emacs
1397dc18 7732/*** 8. Emacs Lisp library functions ***/
4ed46869 7733
4ed46869 7734DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7735 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7736See the documentation of `define-coding-system' for information
48b0f3ae 7737about coding-system objects. */)
d4a1d553
JB
7738 (object)
7739 Lisp_Object object;
4ed46869 7740{
d4a1d553
JB
7741 if (NILP (object)
7742 || CODING_SYSTEM_ID (object) >= 0)
44e8490d 7743 return Qt;
d4a1d553
JB
7744 if (! SYMBOLP (object)
7745 || NILP (Fget (object, Qcoding_system_define_form)))
44e8490d
KH
7746 return Qnil;
7747 return Qt;
4ed46869
KH
7748}
7749
9d991de8
RS
7750DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7751 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7752 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7753 (prompt)
4ed46869
KH
7754 Lisp_Object prompt;
7755{
e0e989f6 7756 Lisp_Object val;
9d991de8
RS
7757 do
7758 {
4608c386
KH
7759 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7760 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7761 }
8f924df7 7762 while (SCHARS (val) == 0);
e0e989f6 7763 return (Fintern (val, Qnil));
4ed46869
KH
7764}
7765
9b787f3e 7766DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae 7767 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
c7183fb8
GM
7768If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7769Ignores case when completing coding systems (all Emacs coding systems
7770are lower-case). */)
48b0f3ae 7771 (prompt, default_coding_system)
9b787f3e 7772 Lisp_Object prompt, default_coding_system;
4ed46869 7773{
f44d27ce 7774 Lisp_Object val;
c7183fb8
GM
7775 int count = SPECPDL_INDEX ();
7776
9b787f3e 7777 if (SYMBOLP (default_coding_system))
57d25e6f 7778 default_coding_system = SYMBOL_NAME (default_coding_system);
c7183fb8 7779 specbind (Qcompletion_ignore_case, Qt);
4608c386 7780 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7781 Qt, Qnil, Qcoding_system_history,
7782 default_coding_system, Qnil);
c7183fb8 7783 unbind_to (count, Qnil);
8f924df7 7784 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7785}
7786
7787DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7788 1, 1, 0,
48b0f3ae 7789 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7790If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7791It is valid if it is nil or a symbol defined as a coding system by the
7792function `define-coding-system'. */)
df7492f9 7793 (coding_system)
4ed46869
KH
7794 Lisp_Object coding_system;
7795{
44e8490d
KH
7796 Lisp_Object define_form;
7797
7798 define_form = Fget (coding_system, Qcoding_system_define_form);
7799 if (! NILP (define_form))
7800 {
7801 Fput (coding_system, Qcoding_system_define_form, Qnil);
7802 safe_eval (define_form);
7803 }
4ed46869
KH
7804 if (!NILP (Fcoding_system_p (coding_system)))
7805 return coding_system;
fcad4ec4 7806 xsignal1 (Qcoding_system_error, coding_system);
4ed46869 7807}
df7492f9 7808
3a73fa5d 7809\f
89528eb3
KH
7810/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7811 HIGHEST is nonzero, return the coding system of the highest
7812 priority among the detected coding systems. Otherwize return a
7813 list of detected coding systems sorted by their priorities. If
7814 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7815 multibyte form but contains only ASCII and eight-bit chars.
7816 Otherwise, the bytes are raw bytes.
7817
7818 CODING-SYSTEM controls the detection as below:
7819
7820 If it is nil, detect both text-format and eol-format. If the
7821 text-format part of CODING-SYSTEM is already specified
7822 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7823 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7824 detect only text-format. */
7825
d46c5b12 7826Lisp_Object
24a73b0a
KH
7827detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7828 coding_system)
8f924df7 7829 const unsigned char *src;
13818c30
SM
7830 EMACS_INT src_chars, src_bytes;
7831 int highest;
0a28aafb 7832 int multibytep;
df7492f9 7833 Lisp_Object coding_system;
4ed46869 7834{
8f924df7 7835 const unsigned char *src_end = src + src_bytes;
df7492f9 7836 Lisp_Object attrs, eol_type;
4533845d 7837 Lisp_Object val = Qnil;
df7492f9 7838 struct coding_system coding;
89528eb3 7839 int id;
ff0dacd7 7840 struct coding_detection_info detect_info;
24a73b0a 7841 enum coding_category base_category;
2f3cbb32 7842 int null_byte_found = 0, eight_bit_found = 0;
b73bfc1c 7843
df7492f9
KH
7844 if (NILP (coding_system))
7845 coding_system = Qundecided;
7846 setup_coding_system (coding_system, &coding);
7847 attrs = CODING_ID_ATTRS (coding.id);
7848 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7849 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7850
df7492f9 7851 coding.source = src;
24a73b0a 7852 coding.src_chars = src_chars;
df7492f9
KH
7853 coding.src_bytes = src_bytes;
7854 coding.src_multibyte = multibytep;
7855 coding.consumed = 0;
89528eb3 7856 coding.mode |= CODING_MODE_LAST_BLOCK;
c0e16b14 7857 coding.head_ascii = 0;
d46c5b12 7858
ff0dacd7 7859 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7860
89528eb3 7861 /* At first, detect text-format if necessary. */
24a73b0a
KH
7862 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7863 if (base_category == coding_category_undecided)
4ed46869 7864 {
ff0dacd7
KH
7865 enum coding_category category;
7866 struct coding_system *this;
7867 int c, i;
88993dfd 7868
24a73b0a 7869 /* Skip all ASCII bytes except for a few ISO2022 controls. */
2f3cbb32 7870 for (; src < src_end; src++)
4ed46869 7871 {
df7492f9 7872 c = *src;
6cb21a4f 7873 if (c & 0x80)
6cb21a4f 7874 {
2f3cbb32 7875 eight_bit_found = 1;
2f3cbb32
KH
7876 if (null_byte_found)
7877 break;
7878 }
c0e16b14 7879 else if (c < 0x20)
2f3cbb32
KH
7880 {
7881 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7882 && ! inhibit_iso_escape_detection
7883 && ! detect_info.checked)
6cb21a4f 7884 {
2f3cbb32
KH
7885 if (detect_coding_iso_2022 (&coding, &detect_info))
7886 {
7887 /* We have scanned the whole data. */
7888 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
c0e16b14
KH
7889 {
7890 /* We didn't find an 8-bit code. We may
7891 have found a null-byte, but it's very
7892 rare that a binary file confirm to
7893 ISO-2022. */
7894 src = src_end;
7895 coding.head_ascii = src - coding.source;
7896 }
7897 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
2f3cbb32
KH
7898 break;
7899 }
7900 }
97b1b294 7901 else if (! c && !inhibit_null_byte_detection)
2f3cbb32
KH
7902 {
7903 null_byte_found = 1;
7904 if (eight_bit_found)
7905 break;
6cb21a4f 7906 }
c006c0c8
KH
7907 if (! eight_bit_found)
7908 coding.head_ascii++;
6cb21a4f 7909 }
c006c0c8 7910 else if (! eight_bit_found)
c0e16b14 7911 coding.head_ascii++;
4ed46869 7912 }
88993dfd 7913
2f3cbb32
KH
7914 if (null_byte_found || eight_bit_found
7915 || coding.head_ascii < coding.src_bytes
6cb21a4f
KH
7916 || detect_info.found)
7917 {
2f3cbb32 7918 if (coding.head_ascii == coding.src_bytes)
6cb21a4f
KH
7919 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7920 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7921 {
6cb21a4f 7922 category = coding_priorities[i];
c7266f4a 7923 this = coding_categories + category;
6cb21a4f 7924 if (detect_info.found & (1 << category))
ff0dacd7
KH
7925 break;
7926 }
6cb21a4f 7927 else
2f3cbb32
KH
7928 {
7929 if (null_byte_found)
7930 {
7931 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7932 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7933 }
7934 for (i = 0; i < coding_category_raw_text; i++)
7935 {
7936 category = coding_priorities[i];
7937 this = coding_categories + category;
6cb21a4f 7938
2f3cbb32
KH
7939 if (this->id < 0)
7940 {
7941 /* No coding system of this category is defined. */
7942 detect_info.rejected |= (1 << category);
7943 }
7944 else if (category >= coding_category_raw_text)
7945 continue;
7946 else if (detect_info.checked & (1 << category))
7947 {
7948 if (highest
7949 && (detect_info.found & (1 << category)))
6cb21a4f 7950 break;
2f3cbb32
KH
7951 }
7952 else if ((*(this->detector)) (&coding, &detect_info)
7953 && highest
7954 && (detect_info.found & (1 << category)))
7955 {
7956 if (category == coding_category_utf_16_auto)
7957 {
7958 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7959 category = coding_category_utf_16_le;
7960 else
7961 category = coding_category_utf_16_be;
7962 }
7963 break;
7964 }
7965 }
7966 }
6cb21a4f 7967 }
ec6d2bb8 7968
4cddb209
KH
7969 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
7970 || null_byte_found)
ec6d2bb8 7971 {
ff0dacd7 7972 detect_info.found = CATEGORY_MASK_RAW_TEXT;
4cddb209 7973 id = CODING_SYSTEM_ID (Qno_conversion);
89528eb3
KH
7974 val = Fcons (make_number (id), Qnil);
7975 }
ff0dacd7 7976 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7977 {
ff0dacd7 7978 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7979 id = coding_categories[coding_category_undecided].id;
7980 val = Fcons (make_number (id), Qnil);
7981 }
7982 else if (highest)
7983 {
ff0dacd7 7984 if (detect_info.found)
ec6d2bb8 7985 {
ff0dacd7
KH
7986 detect_info.found = 1 << category;
7987 val = Fcons (make_number (this->id), Qnil);
7988 }
7989 else
7990 for (i = 0; i < coding_category_raw_text; i++)
7991 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7992 {
7993 detect_info.found = 1 << coding_priorities[i];
7994 id = coding_categories[coding_priorities[i]].id;
7995 val = Fcons (make_number (id), Qnil);
7996 break;
7997 }
7998 }
89528eb3
KH
7999 else
8000 {
ff0dacd7
KH
8001 int mask = detect_info.rejected | detect_info.found;
8002 int found = 0;
ec6d2bb8 8003
89528eb3 8004 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
8005 {
8006 category = coding_priorities[i];
8007 if (! (mask & (1 << category)))
ec6d2bb8 8008 {
ff0dacd7
KH
8009 found |= 1 << category;
8010 id = coding_categories[category].id;
c7266f4a
KH
8011 if (id >= 0)
8012 val = Fcons (make_number (id), val);
ff0dacd7
KH
8013 }
8014 }
8015 for (i = coding_category_raw_text - 1; i >= 0; i--)
8016 {
8017 category = coding_priorities[i];
8018 if (detect_info.found & (1 << category))
8019 {
8020 id = coding_categories[category].id;
8021 val = Fcons (make_number (id), val);
ec6d2bb8 8022 }
ec6d2bb8 8023 }
ff0dacd7 8024 detect_info.found |= found;
ec6d2bb8 8025 }
ec6d2bb8 8026 }
a470d443
KH
8027 else if (base_category == coding_category_utf_8_auto)
8028 {
8029 if (detect_coding_utf_8 (&coding, &detect_info))
8030 {
8031 struct coding_system *this;
8032
8033 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8034 this = coding_categories + coding_category_utf_8_sig;
8035 else
8036 this = coding_categories + coding_category_utf_8_nosig;
8037 val = Fcons (make_number (this->id), Qnil);
8038 }
8039 }
24a73b0a
KH
8040 else if (base_category == coding_category_utf_16_auto)
8041 {
8042 if (detect_coding_utf_16 (&coding, &detect_info))
8043 {
24a73b0a
KH
8044 struct coding_system *this;
8045
8046 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8047 this = coding_categories + coding_category_utf_16_le;
8048 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8049 this = coding_categories + coding_category_utf_16_be;
8050 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8051 this = coding_categories + coding_category_utf_16_be_nosig;
8052 else
8053 this = coding_categories + coding_category_utf_16_le_nosig;
8054 val = Fcons (make_number (this->id), Qnil);
8055 }
8056 }
df7492f9
KH
8057 else
8058 {
ff0dacd7 8059 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 8060 val = Fcons (make_number (coding.id), Qnil);
4ed46869 8061 }
df7492f9 8062
89528eb3 8063 /* Then, detect eol-format if necessary. */
df7492f9 8064 {
4533845d 8065 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
df7492f9
KH
8066 Lisp_Object tail;
8067
89528eb3
KH
8068 if (VECTORP (eol_type))
8069 {
ff0dacd7 8070 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
2f3cbb32
KH
8071 {
8072 if (null_byte_found)
8073 normal_eol = EOL_SEEN_LF;
8074 else
8075 normal_eol = detect_eol (coding.source, src_bytes,
8076 coding_category_raw_text);
8077 }
ff0dacd7
KH
8078 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8079 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
8080 utf_16_be_eol = detect_eol (coding.source, src_bytes,
8081 coding_category_utf_16_be);
ff0dacd7
KH
8082 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8083 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
8084 utf_16_le_eol = detect_eol (coding.source, src_bytes,
8085 coding_category_utf_16_le);
8086 }
8087 else
8088 {
8089 if (EQ (eol_type, Qunix))
8090 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8091 else if (EQ (eol_type, Qdos))
8092 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8093 else
8094 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8095 }
8096
df7492f9
KH
8097 for (tail = val; CONSP (tail); tail = XCDR (tail))
8098 {
89528eb3 8099 enum coding_category category;
df7492f9 8100 int this_eol;
89528eb3
KH
8101
8102 id = XINT (XCAR (tail));
8103 attrs = CODING_ID_ATTRS (id);
8104 category = XINT (CODING_ATTR_CATEGORY (attrs));
8105 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
8106 if (VECTORP (eol_type))
8107 {
89528eb3
KH
8108 if (category == coding_category_utf_16_be
8109 || category == coding_category_utf_16_be_nosig)
8110 this_eol = utf_16_be_eol;
8111 else if (category == coding_category_utf_16_le
8112 || category == coding_category_utf_16_le_nosig)
8113 this_eol = utf_16_le_eol;
df7492f9 8114 else
89528eb3
KH
8115 this_eol = normal_eol;
8116
df7492f9
KH
8117 if (this_eol == EOL_SEEN_LF)
8118 XSETCAR (tail, AREF (eol_type, 0));
8119 else if (this_eol == EOL_SEEN_CRLF)
8120 XSETCAR (tail, AREF (eol_type, 1));
8121 else if (this_eol == EOL_SEEN_CR)
8122 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
8123 else
8124 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 8125 }
89528eb3
KH
8126 else
8127 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
8128 }
8129 }
ec6d2bb8 8130
4533845d 8131 return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
ec6d2bb8
KH
8132}
8133
ec6d2bb8 8134
d46c5b12
KH
8135DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8136 2, 3, 0,
48b0f3ae
PJ
8137 doc: /* Detect coding system of the text in the region between START and END.
8138Return a list of possible coding systems ordered by priority.
ec6d2bb8 8139
12e0131a 8140If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8141characters as ESC), it returns a list of single element `undecided'
8142or its subsidiary coding system according to a detected end-of-line
8143format.
ec6d2bb8 8144
48b0f3ae
PJ
8145If optional argument HIGHEST is non-nil, return the coding system of
8146highest priority. */)
8147 (start, end, highest)
d46c5b12
KH
8148 Lisp_Object start, end, highest;
8149{
8150 int from, to;
8151 int from_byte, to_byte;
ec6d2bb8 8152
b7826503
PJ
8153 CHECK_NUMBER_COERCE_MARKER (start);
8154 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 8155
d46c5b12
KH
8156 validate_region (&start, &end);
8157 from = XINT (start), to = XINT (end);
8158 from_byte = CHAR_TO_BYTE (from);
8159 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 8160
d46c5b12
KH
8161 if (from < GPT && to >= GPT)
8162 move_gap_both (to, to_byte);
c210f766 8163
d46c5b12 8164 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 8165 to - from, to_byte - from_byte,
0a28aafb
KH
8166 !NILP (highest),
8167 !NILP (current_buffer
df7492f9
KH
8168 ->enable_multibyte_characters),
8169 Qnil);
ec6d2bb8
KH
8170}
8171
d46c5b12
KH
8172DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8173 1, 2, 0,
48b0f3ae
PJ
8174 doc: /* Detect coding system of the text in STRING.
8175Return a list of possible coding systems ordered by priority.
fb88bf2d 8176
12e0131a 8177If only ASCII characters are found (except for such ISO-2022 control
d4a1d553
JB
8178characters as ESC), it returns a list of single element `undecided'
8179or its subsidiary coding system according to a detected end-of-line
8180format.
d46c5b12 8181
48b0f3ae
PJ
8182If optional argument HIGHEST is non-nil, return the coding system of
8183highest priority. */)
8184 (string, highest)
d46c5b12
KH
8185 Lisp_Object string, highest;
8186{
b7826503 8187 CHECK_STRING (string);
b73bfc1c 8188
24a73b0a
KH
8189 return detect_coding_system (SDATA (string),
8190 SCHARS (string), SBYTES (string),
8f924df7 8191 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 8192 Qnil);
4ed46869 8193}
4ed46869 8194
b73bfc1c 8195
df7492f9
KH
8196static INLINE int
8197char_encodable_p (c, attrs)
8198 int c;
8199 Lisp_Object attrs;
05e6f5dc 8200{
df7492f9 8201 Lisp_Object tail;
df7492f9 8202 struct charset *charset;
7d64c6ad 8203 Lisp_Object translation_table;
d46c5b12 8204
7d64c6ad 8205 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 8206 if (! NILP (translation_table))
7d64c6ad 8207 c = translate_char (translation_table, c);
df7492f9
KH
8208 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8209 CONSP (tail); tail = XCDR (tail))
e133c8fa 8210 {
df7492f9
KH
8211 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8212 if (CHAR_CHARSET_P (c, charset))
8213 break;
e133c8fa 8214 }
df7492f9 8215 return (! NILP (tail));
05e6f5dc 8216}
83fa074f 8217
fb88bf2d 8218
df7492f9
KH
8219/* Return a list of coding systems that safely encode the text between
8220 START and END. If EXCLUDE is non-nil, it is a list of coding
8221 systems not to check. The returned list doesn't contain any such
48468dac 8222 coding systems. In any case, if the text contains only ASCII or is
df7492f9 8223 unibyte, return t. */
e077cc80 8224
df7492f9
KH
8225DEFUN ("find-coding-systems-region-internal",
8226 Ffind_coding_systems_region_internal,
8227 Sfind_coding_systems_region_internal, 2, 3, 0,
8228 doc: /* Internal use only. */)
8229 (start, end, exclude)
8230 Lisp_Object start, end, exclude;
8231{
8232 Lisp_Object coding_attrs_list, safe_codings;
8233 EMACS_INT start_byte, end_byte;
7c78e542 8234 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
8235 int c;
8236 Lisp_Object tail, elt;
d46c5b12 8237
df7492f9
KH
8238 if (STRINGP (start))
8239 {
8240 if (!STRING_MULTIBYTE (start)
8f924df7 8241 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8242 return Qt;
8243 start_byte = 0;
8f924df7 8244 end_byte = SBYTES (start);
df7492f9
KH
8245 }
8246 else
d46c5b12 8247 {
df7492f9
KH
8248 CHECK_NUMBER_COERCE_MARKER (start);
8249 CHECK_NUMBER_COERCE_MARKER (end);
8250 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8251 args_out_of_range (start, end);
8252 if (NILP (current_buffer->enable_multibyte_characters))
8253 return Qt;
8254 start_byte = CHAR_TO_BYTE (XINT (start));
8255 end_byte = CHAR_TO_BYTE (XINT (end));
8256 if (XINT (end) - XINT (start) == end_byte - start_byte)
8257 return Qt;
d46c5b12 8258
e1c23804 8259 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 8260 {
e1c23804
DL
8261 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8262 move_gap_both (XINT (start), start_byte);
df7492f9 8263 else
e1c23804 8264 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
8265 }
8266 }
8267
df7492f9
KH
8268 coding_attrs_list = Qnil;
8269 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8270 if (NILP (exclude)
8271 || NILP (Fmemq (XCAR (tail), exclude)))
8272 {
8273 Lisp_Object attrs;
d46c5b12 8274
df7492f9
KH
8275 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8276 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8277 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
8278 {
8279 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 8280 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
8281 coding_attrs_list = Fcons (attrs, coding_attrs_list);
8282 }
df7492f9 8283 }
d46c5b12 8284
df7492f9 8285 if (STRINGP (start))
8f924df7 8286 p = pbeg = SDATA (start);
df7492f9
KH
8287 else
8288 p = pbeg = BYTE_POS_ADDR (start_byte);
8289 pend = p + (end_byte - start_byte);
b843d1ae 8290
df7492f9
KH
8291 while (p < pend && ASCII_BYTE_P (*p)) p++;
8292 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 8293
05e6f5dc 8294 while (p < pend)
72d1a715 8295 {
df7492f9
KH
8296 if (ASCII_BYTE_P (*p))
8297 p++;
72d1a715
RS
8298 else
8299 {
df7492f9 8300 c = STRING_CHAR_ADVANCE (p);
12410ef1 8301
df7492f9
KH
8302 charset_map_loaded = 0;
8303 for (tail = coding_attrs_list; CONSP (tail);)
8304 {
8305 elt = XCAR (tail);
8306 if (NILP (elt))
8307 tail = XCDR (tail);
8308 else if (char_encodable_p (c, elt))
8309 tail = XCDR (tail);
8310 else if (CONSP (XCDR (tail)))
8311 {
8312 XSETCAR (tail, XCAR (XCDR (tail)));
8313 XSETCDR (tail, XCDR (XCDR (tail)));
8314 }
8315 else
8316 {
8317 XSETCAR (tail, Qnil);
8318 tail = XCDR (tail);
8319 }
8320 }
8321 if (charset_map_loaded)
8322 {
8323 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 8324
df7492f9 8325 if (STRINGP (start))
8f924df7 8326 pbeg = SDATA (start);
df7492f9
KH
8327 else
8328 pbeg = BYTE_POS_ADDR (start_byte);
8329 p = pbeg + p_offset;
8330 pend = pbeg + pend_offset;
8331 }
8332 }
ec6d2bb8 8333 }
fb88bf2d 8334
988b3759 8335 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
8336 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8337 if (! NILP (XCAR (tail)))
8338 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 8339
05e6f5dc
KH
8340 return safe_codings;
8341}
4956c225 8342
d46c5b12 8343
8f924df7
KH
8344DEFUN ("unencodable-char-position", Funencodable_char_position,
8345 Sunencodable_char_position, 3, 5, 0,
8346 doc: /*
8347Return position of first un-encodable character in a region.
d4a1d553 8348START and END specify the region and CODING-SYSTEM specifies the
8f924df7 8349encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 8350
8f924df7
KH
8351If optional 4th argument COUNT is non-nil, it specifies at most how
8352many un-encodable characters to search. In this case, the value is a
8353list of positions.
d46c5b12 8354
8f924df7
KH
8355If optional 5th argument STRING is non-nil, it is a string to search
8356for un-encodable characters. In that case, START and END are indexes
8357to the string. */)
8358 (start, end, coding_system, count, string)
8359 Lisp_Object start, end, coding_system, count, string;
8360{
8361 int n;
8362 struct coding_system coding;
7d64c6ad 8363 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
8364 Lisp_Object positions;
8365 int from, to;
8366 const unsigned char *p, *stop, *pend;
8367 int ascii_compatible;
fb88bf2d 8368
8f924df7
KH
8369 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8370 attrs = CODING_ID_ATTRS (coding.id);
8371 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8372 return Qnil;
8373 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8374 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 8375 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 8376
8f924df7
KH
8377 if (NILP (string))
8378 {
8379 validate_region (&start, &end);
8380 from = XINT (start);
8381 to = XINT (end);
8382 if (NILP (current_buffer->enable_multibyte_characters)
8383 || (ascii_compatible
8384 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8385 return Qnil;
8386 p = CHAR_POS_ADDR (from);
8387 pend = CHAR_POS_ADDR (to);
8388 if (from < GPT && to >= GPT)
8389 stop = GPT_ADDR;
8390 else
8391 stop = pend;
8392 }
8393 else
8394 {
8395 CHECK_STRING (string);
8396 CHECK_NATNUM (start);
8397 CHECK_NATNUM (end);
8398 from = XINT (start);
8399 to = XINT (end);
8400 if (from > to
8401 || to > SCHARS (string))
8402 args_out_of_range_3 (string, start, end);
8403 if (! STRING_MULTIBYTE (string))
8404 return Qnil;
8405 p = SDATA (string) + string_char_to_byte (string, from);
8406 stop = pend = SDATA (string) + string_char_to_byte (string, to);
8407 if (ascii_compatible && (to - from) == (pend - p))
8408 return Qnil;
8409 }
f2558efd 8410
8f924df7
KH
8411 if (NILP (count))
8412 n = 1;
8413 else
b73bfc1c 8414 {
8f924df7
KH
8415 CHECK_NATNUM (count);
8416 n = XINT (count);
b73bfc1c
KH
8417 }
8418
8f924df7
KH
8419 positions = Qnil;
8420 while (1)
d46c5b12 8421 {
8f924df7 8422 int c;
ec6d2bb8 8423
8f924df7
KH
8424 if (ascii_compatible)
8425 while (p < stop && ASCII_BYTE_P (*p))
8426 p++, from++;
8427 if (p >= stop)
0e79d667 8428 {
8f924df7
KH
8429 if (p >= pend)
8430 break;
8431 stop = pend;
8432 p = GAP_END_ADDR;
0e79d667 8433 }
ec6d2bb8 8434
8f924df7
KH
8435 c = STRING_CHAR_ADVANCE (p);
8436 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
8437 && ! char_charset (translate_char (translation_table, c),
8438 charset_list, NULL))
ec6d2bb8 8439 {
8f924df7
KH
8440 positions = Fcons (make_number (from), positions);
8441 n--;
8442 if (n == 0)
8443 break;
ec6d2bb8
KH
8444 }
8445
8f924df7
KH
8446 from++;
8447 }
d46c5b12 8448
8f924df7
KH
8449 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8450}
d46c5b12 8451
d46c5b12 8452
df7492f9
KH
8453DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8454 Scheck_coding_systems_region, 3, 3, 0,
8455 doc: /* Check if the region is encodable by coding systems.
d46c5b12 8456
df7492f9
KH
8457START and END are buffer positions specifying the region.
8458CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 8459
df7492f9 8460The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
d4a1d553 8461CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
df7492f9
KH
8462whole region, POS0, POS1, ... are buffer positions where non-encodable
8463characters are found.
93dec019 8464
df7492f9
KH
8465If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8466value is nil.
93dec019 8467
df7492f9
KH
8468START may be a string. In that case, check if the string is
8469encodable, and the value contains indices to the string instead of
5704f39a
KH
8470buffer positions. END is ignored.
8471
4c1958f4 8472If the current buffer (or START if it is a string) is unibyte, the value
5704f39a 8473is nil. */)
df7492f9
KH
8474 (start, end, coding_system_list)
8475 Lisp_Object start, end, coding_system_list;
05e6f5dc 8476{
df7492f9
KH
8477 Lisp_Object list;
8478 EMACS_INT start_byte, end_byte;
8479 int pos;
7c78e542 8480 const unsigned char *p, *pbeg, *pend;
df7492f9 8481 int c;
7d64c6ad 8482 Lisp_Object tail, elt, attrs;
70ad9fc4 8483
05e6f5dc
KH
8484 if (STRINGP (start))
8485 {
df7492f9 8486 if (!STRING_MULTIBYTE (start)
4c1958f4 8487 || SCHARS (start) == SBYTES (start))
df7492f9
KH
8488 return Qnil;
8489 start_byte = 0;
8f924df7 8490 end_byte = SBYTES (start);
df7492f9 8491 pos = 0;
d46c5b12 8492 }
05e6f5dc 8493 else
b73bfc1c 8494 {
b7826503
PJ
8495 CHECK_NUMBER_COERCE_MARKER (start);
8496 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
8497 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8498 args_out_of_range (start, end);
8499 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
8500 return Qnil;
8501 start_byte = CHAR_TO_BYTE (XINT (start));
8502 end_byte = CHAR_TO_BYTE (XINT (end));
8503 if (XINT (end) - XINT (start) == end_byte - start_byte)
5704f39a 8504 return Qnil;
df7492f9 8505
e1c23804 8506 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 8507 {
e1c23804
DL
8508 if ((GPT - XINT (start)) < (XINT (end) - GPT))
8509 move_gap_both (XINT (start), start_byte);
df7492f9 8510 else
e1c23804 8511 move_gap_both (XINT (end), end_byte);
b73bfc1c 8512 }
e1c23804 8513 pos = XINT (start);
b73bfc1c 8514 }
7553d0e1 8515
df7492f9
KH
8516 list = Qnil;
8517 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 8518 {
df7492f9 8519 elt = XCAR (tail);
7d64c6ad 8520 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
8521 ASET (attrs, coding_attr_trans_tbl,
8522 get_translation_table (attrs, 1, NULL));
7d64c6ad 8523 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
8524 }
8525
df7492f9 8526 if (STRINGP (start))
8f924df7 8527 p = pbeg = SDATA (start);
72d1a715 8528 else
df7492f9
KH
8529 p = pbeg = BYTE_POS_ADDR (start_byte);
8530 pend = p + (end_byte - start_byte);
4ed46869 8531
df7492f9
KH
8532 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8533 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 8534
df7492f9 8535 while (p < pend)
d46c5b12 8536 {
df7492f9
KH
8537 if (ASCII_BYTE_P (*p))
8538 p++;
e133c8fa 8539 else
05e6f5dc 8540 {
df7492f9
KH
8541 c = STRING_CHAR_ADVANCE (p);
8542
8543 charset_map_loaded = 0;
8544 for (tail = list; CONSP (tail); tail = XCDR (tail))
8545 {
8546 elt = XCDR (XCAR (tail));
8547 if (! char_encodable_p (c, XCAR (elt)))
8548 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8549 }
8550 if (charset_map_loaded)
8551 {
8552 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8553
8554 if (STRINGP (start))
8f924df7 8555 pbeg = SDATA (start);
df7492f9
KH
8556 else
8557 pbeg = BYTE_POS_ADDR (start_byte);
8558 p = pbeg + p_offset;
8559 pend = pbeg + pend_offset;
8560 }
05e6f5dc 8561 }
df7492f9 8562 pos++;
d46c5b12 8563 }
4ed46869 8564
df7492f9
KH
8565 tail = list;
8566 list = Qnil;
8567 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 8568 {
df7492f9
KH
8569 elt = XCAR (tail);
8570 if (CONSP (XCDR (XCDR (elt))))
8571 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8572 list);
ec6d2bb8 8573 }
2b4f9037 8574
df7492f9 8575 return list;
d46c5b12
KH
8576}
8577
3fd9494b 8578
b73bfc1c 8579Lisp_Object
df7492f9
KH
8580code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8581 Lisp_Object start, end, coding_system, dst_object;
8582 int encodep, norecord;
4ed46869 8583{
3a73fa5d 8584 struct coding_system coding;
df7492f9
KH
8585 EMACS_INT from, from_byte, to, to_byte;
8586 Lisp_Object src_object;
4ed46869 8587
b7826503
PJ
8588 CHECK_NUMBER_COERCE_MARKER (start);
8589 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
8590 if (NILP (coding_system))
8591 coding_system = Qno_conversion;
8592 else
8593 CHECK_CODING_SYSTEM (coding_system);
8594 src_object = Fcurrent_buffer ();
8595 if (NILP (dst_object))
8596 dst_object = src_object;
8597 else if (! EQ (dst_object, Qt))
8598 CHECK_BUFFER (dst_object);
3a73fa5d 8599
d46c5b12
KH
8600 validate_region (&start, &end);
8601 from = XFASTINT (start);
df7492f9 8602 from_byte = CHAR_TO_BYTE (from);
d46c5b12 8603 to = XFASTINT (end);
df7492f9 8604 to_byte = CHAR_TO_BYTE (to);
764ca8da 8605
df7492f9
KH
8606 setup_coding_system (coding_system, &coding);
8607 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 8608
df7492f9
KH
8609 if (encodep)
8610 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8611 dst_object);
8612 else
8613 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8614 dst_object);
8615 if (! norecord)
8616 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 8617
df7492f9
KH
8618 return (BUFFERP (dst_object)
8619 ? make_number (coding.produced_char)
8620 : coding.dst_object);
4031e2bf 8621}
78108bcd 8622
4ed46869 8623
4031e2bf 8624DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 8625 3, 4, "r\nzCoding system: ",
48b0f3ae 8626 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
8627When called from a program, takes four arguments:
8628 START, END, CODING-SYSTEM, and DESTINATION.
8629START and END are buffer positions.
8844fa83 8630
df7492f9 8631Optional 4th arguments DESTINATION specifies where the decoded text goes.
2354b80b 8632If nil, the region between START and END is replaced by the decoded text.
1560f91a
EZ
8633If buffer, the decoded text is inserted in that buffer after point (point
8634does not move).
446dcd75 8635In those cases, the length of the decoded text is returned.
319a3947 8636If DESTINATION is t, the decoded text is returned.
8844fa83 8637
48b0f3ae
PJ
8638This function sets `last-coding-system-used' to the precise coding system
8639used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 8640not fully specified.) */)
df7492f9
KH
8641 (start, end, coding_system, destination)
8642 Lisp_Object start, end, coding_system, destination;
4031e2bf 8643{
df7492f9 8644 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 8645}
8844fa83 8646
3a73fa5d 8647DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
8648 3, 4, "r\nzCoding system: ",
8649 doc: /* Encode the current region by specified coding system.
d4a1d553
JB
8650When called from a program, takes four arguments:
8651 START, END, CODING-SYSTEM and DESTINATION.
8652START and END are buffer positions.
d46c5b12 8653
df7492f9
KH
8654Optional 4th arguments DESTINATION specifies where the encoded text goes.
8655If nil, the region between START and END is replace by the encoded text.
1560f91a
EZ
8656If buffer, the encoded text is inserted in that buffer after point (point
8657does not move).
446dcd75 8658In those cases, the length of the encoded text is returned.
319a3947 8659If DESTINATION is t, the encoded text is returned.
2391eaa4 8660
48b0f3ae
PJ
8661This function sets `last-coding-system-used' to the precise coding system
8662used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
319a3947 8663not fully specified.) */)
df7492f9
KH
8664 (start, end, coding_system, destination)
8665 Lisp_Object start, end, coding_system, destination;
3a73fa5d 8666{
df7492f9 8667 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
8668}
8669
8670Lisp_Object
df7492f9
KH
8671code_convert_string (string, coding_system, dst_object,
8672 encodep, nocopy, norecord)
8673 Lisp_Object string, coding_system, dst_object;
8674 int encodep, nocopy, norecord;
b73bfc1c 8675{
4031e2bf 8676 struct coding_system coding;
df7492f9 8677 EMACS_INT chars, bytes;
ec6d2bb8 8678
b7826503 8679 CHECK_STRING (string);
d46c5b12 8680 if (NILP (coding_system))
4956c225 8681 {
df7492f9
KH
8682 if (! norecord)
8683 Vlast_coding_system_used = Qno_conversion;
8684 if (NILP (dst_object))
8685 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 8686 }
b73bfc1c 8687
df7492f9
KH
8688 if (NILP (coding_system))
8689 coding_system = Qno_conversion;
8690 else
8691 CHECK_CODING_SYSTEM (coding_system);
8692 if (NILP (dst_object))
8693 dst_object = Qt;
8694 else if (! EQ (dst_object, Qt))
8695 CHECK_BUFFER (dst_object);
73be902c 8696
df7492f9 8697 setup_coding_system (coding_system, &coding);
d46c5b12 8698 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
8699 chars = SCHARS (string);
8700 bytes = SBYTES (string);
df7492f9
KH
8701 if (encodep)
8702 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8703 else
8704 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8705 if (! norecord)
8706 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 8707
df7492f9
KH
8708 return (BUFFERP (dst_object)
8709 ? make_number (coding.produced_char)
8710 : coding.dst_object);
4ed46869 8711}
73be902c 8712
b73bfc1c 8713
ecec61c1 8714/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 8715 Do not set Vlast_coding_system_used.
4ed46869 8716
ec6d2bb8
KH
8717 This function is called only from macros DECODE_FILE and
8718 ENCODE_FILE, thus we ignore character composition. */
4ed46869 8719
ecec61c1
KH
8720Lisp_Object
8721code_convert_string_norecord (string, coding_system, encodep)
8722 Lisp_Object string, coding_system;
8723 int encodep;
4ed46869 8724{
0be8721c 8725 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
8726}
8727
4ed46869 8728
df7492f9
KH
8729DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8730 2, 4, 0,
8731 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8732
8733Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8734if the decoding operation is trivial.
ecec61c1 8735
d4a1d553 8736Optional fourth arg BUFFER non-nil means that the decoded text is
1560f91a
EZ
8737inserted in that buffer after point (point does not move). In this
8738case, the return value is the length of the decoded text.
ecec61c1 8739
df7492f9
KH
8740This function sets `last-coding-system-used' to the precise coding system
8741used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
d4a1d553 8742not fully specified.) */)
df7492f9
KH
8743 (string, coding_system, nocopy, buffer)
8744 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8745{
df7492f9
KH
8746 return code_convert_string (string, coding_system, buffer,
8747 0, ! NILP (nocopy), 0);
4ed46869
KH
8748}
8749
df7492f9
KH
8750DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8751 2, 4, 0,
8752 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8753
8754Optional third arg NOCOPY non-nil means it is OK to return STRING
8755itself if the encoding operation is trivial.
8756
d4a1d553 8757Optional fourth arg BUFFER non-nil means that the encoded text is
1560f91a
EZ
8758inserted in that buffer after point (point does not move). In this
8759case, the return value is the length of the encoded text.
df7492f9
KH
8760
8761This function sets `last-coding-system-used' to the precise coding system
8762used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8763not fully specified.) */)
8764 (string, coding_system, nocopy, buffer)
8765 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8766{
df7492f9 8767 return code_convert_string (string, coding_system, buffer,
c197f191 8768 1, ! NILP (nocopy), 1);
4ed46869 8769}
df7492f9 8770
3a73fa5d 8771\f
4ed46869 8772DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8773 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8774Return the corresponding character. */)
8775 (code)
4ed46869 8776 Lisp_Object code;
4ed46869 8777{
df7492f9
KH
8778 Lisp_Object spec, attrs, val;
8779 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8780 int c;
4ed46869 8781
df7492f9
KH
8782 CHECK_NATNUM (code);
8783 c = XFASTINT (code);
8784 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8785 attrs = AREF (spec, 0);
4ed46869 8786
df7492f9
KH
8787 if (ASCII_BYTE_P (c)
8788 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8789 return code;
4ed46869 8790
df7492f9
KH
8791 val = CODING_ATTR_CHARSET_LIST (attrs);
8792 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8793 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8794 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8795
df7492f9
KH
8796 if (c <= 0x7F)
8797 charset = charset_roman;
8798 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8799 {
df7492f9
KH
8800 charset = charset_kana;
8801 c -= 0x80;
4ed46869 8802 }
55ab7be3 8803 else
4ed46869 8804 {
004068e4 8805 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8806
8807 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8808 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8809 error ("Invalid code: %d", code);
8810 SJIS_TO_JIS (c);
8811 charset = charset_kanji;
4ed46869 8812 }
df7492f9
KH
8813 c = DECODE_CHAR (charset, c);
8814 if (c < 0)
8815 error ("Invalid code: %d", code);
8816 return make_number (c);
93dec019 8817}
4ed46869 8818
48b0f3ae 8819
4ed46869 8820DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8acf0c0e 8821 doc: /* Encode a Japanese character CH to shift_jis encoding.
48b0f3ae
PJ
8822Return the corresponding code in SJIS. */)
8823 (ch)
df7492f9 8824 Lisp_Object ch;
4ed46869 8825{
df7492f9
KH
8826 Lisp_Object spec, attrs, charset_list;
8827 int c;
8828 struct charset *charset;
8829 unsigned code;
48b0f3ae 8830
df7492f9
KH
8831 CHECK_CHARACTER (ch);
8832 c = XFASTINT (ch);
8833 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8834 attrs = AREF (spec, 0);
8835
8836 if (ASCII_CHAR_P (c)
8837 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8838 return ch;
8839
8840 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8841 charset = char_charset (c, charset_list, &code);
8842 if (code == CHARSET_INVALID_CODE (charset))
8843 error ("Can't encode by shift_jis encoding: %d", c);
8844 JIS_TO_SJIS (code);
8845
8846 return make_number (code);
4ed46869
KH
8847}
8848
8849DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8850 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8851Return the corresponding character. */)
8852 (code)
4ed46869 8853 Lisp_Object code;
d46c5b12 8854{
df7492f9
KH
8855 Lisp_Object spec, attrs, val;
8856 struct charset *charset_roman, *charset_big5, *charset;
8857 int c;
6289dd10 8858
df7492f9
KH
8859 CHECK_NATNUM (code);
8860 c = XFASTINT (code);
8861 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8862 attrs = AREF (spec, 0);
4ed46869 8863
df7492f9
KH
8864 if (ASCII_BYTE_P (c)
8865 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8866 return code;
6289dd10 8867
df7492f9
KH
8868 val = CODING_ATTR_CHARSET_LIST (attrs);
8869 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8870 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8871
df7492f9
KH
8872 if (c <= 0x7F)
8873 charset = charset_roman;
c28a9453
KH
8874 else
8875 {
df7492f9
KH
8876 int b1 = c >> 8, b2 = c & 0x7F;
8877 if (b1 < 0xA1 || b1 > 0xFE
8878 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8879 error ("Invalid code: %d", code);
8880 charset = charset_big5;
c28a9453 8881 }
df7492f9
KH
8882 c = DECODE_CHAR (charset, (unsigned )c);
8883 if (c < 0)
8884 error ("Invalid code: %d", code);
8885 return make_number (c);
d46c5b12 8886}
6289dd10 8887
4ed46869 8888DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8acf0c0e 8889 doc: /* Encode the Big5 character CH to BIG5 coding system.
48b0f3ae
PJ
8890Return the corresponding character code in Big5. */)
8891 (ch)
4ed46869
KH
8892 Lisp_Object ch;
8893{
df7492f9
KH
8894 Lisp_Object spec, attrs, charset_list;
8895 struct charset *charset;
8896 int c;
8897 unsigned code;
8898
8899 CHECK_CHARACTER (ch);
8900 c = XFASTINT (ch);
8901 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8902 attrs = AREF (spec, 0);
8903 if (ASCII_CHAR_P (c)
8904 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8905 return ch;
8906
8907 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8908 charset = char_charset (c, charset_list, &code);
8909 if (code == CHARSET_INVALID_CODE (charset))
8910 error ("Can't encode by Big5 encoding: %d", c);
8911
8912 return make_number (code);
4ed46869 8913}
48b0f3ae 8914
3a73fa5d 8915\f
002fdb44 8916DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
68bba4e4 8917 Sset_terminal_coding_system_internal, 1, 2, 0,
48b0f3ae 8918 doc: /* Internal use only. */)
6ed8eeff 8919 (coding_system, terminal)
b74e4686 8920 Lisp_Object coding_system;
6ed8eeff 8921 Lisp_Object terminal;
4ed46869 8922{
6ed8eeff 8923 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
b7826503 8924 CHECK_SYMBOL (coding_system);
b8299c66 8925 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
70c22245 8926 /* We had better not send unsafe characters to terminal. */
c73bd236 8927 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
df7492f9 8928 /* Characer composition should be disabled. */
c73bd236 8929 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b8299c66
KL
8930 terminal_coding->src_multibyte = 1;
8931 terminal_coding->dst_multibyte = 0;
4ed46869
KH
8932 return Qnil;
8933}
8934
c4825358
KH
8935DEFUN ("set-safe-terminal-coding-system-internal",
8936 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8937 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8938 doc: /* Internal use only. */)
48b0f3ae 8939 (coding_system)
b74e4686 8940 Lisp_Object coding_system;
d46c5b12 8941{
b7826503 8942 CHECK_SYMBOL (coding_system);
c4825358
KH
8943 setup_coding_system (Fcheck_coding_system (coding_system),
8944 &safe_terminal_coding);
df7492f9
KH
8945 /* Characer composition should be disabled. */
8946 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8947 safe_terminal_coding.src_multibyte = 1;
8948 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8949 return Qnil;
8950}
4ed46869 8951
002fdb44 8952DEFUN ("terminal-coding-system", Fterminal_coding_system,
68bba4e4 8953 Sterminal_coding_system, 0, 1, 0,
6ed8eeff
KL
8954 doc: /* Return coding system specified for terminal output on the given terminal.
8955TERMINAL may be a terminal id, a frame, or nil for the selected
8956frame's terminal device. */)
8957 (terminal)
8958 Lisp_Object terminal;
4ed46869 8959{
985773c9
MB
8960 struct coding_system *terminal_coding
8961 = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8962 Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
ae6f73fa 8963
ae6f73fa 8964 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 8965 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
8966}
8967
002fdb44 8968DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
68bba4e4 8969 Sset_keyboard_coding_system_internal, 1, 2, 0,
48b0f3ae 8970 doc: /* Internal use only. */)
6ed8eeff 8971 (coding_system, terminal)
4ed46869 8972 Lisp_Object coding_system;
6ed8eeff 8973 Lisp_Object terminal;
4ed46869 8974{
6ed8eeff 8975 struct terminal *t = get_terminal (terminal, 1);
b7826503 8976 CHECK_SYMBOL (coding_system);
df7492f9 8977 setup_coding_system (Fcheck_coding_system (coding_system),
c73bd236 8978 TERMINAL_KEYBOARD_CODING (t));
df7492f9 8979 /* Characer composition should be disabled. */
c73bd236
MB
8980 TERMINAL_KEYBOARD_CODING (t)->common_flags
8981 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8982 return Qnil;
8983}
8984
8985DEFUN ("keyboard-coding-system",
985773c9 8986 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
48b0f3ae 8987 doc: /* Return coding system specified for decoding keyboard input. */)
985773c9
MB
8988 (terminal)
8989 Lisp_Object terminal;
4ed46869 8990{
985773c9
MB
8991 return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8992 (get_terminal (terminal, 1))->id);
4ed46869
KH
8993}
8994
4ed46869 8995\f
a5d301df
KH
8996DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8997 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8998 doc: /* Choose a coding system for an operation based on the target name.
8999The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9000DECODING-SYSTEM is the coding system to use for decoding
9001\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9002for encoding (in case OPERATION does encoding).
05e6f5dc 9003
48b0f3ae
PJ
9004The first argument OPERATION specifies an I/O primitive:
9005 For file I/O, `insert-file-contents' or `write-region'.
9006 For process I/O, `call-process', `call-process-region', or `start-process'.
9007 For network I/O, `open-network-stream'.
05e6f5dc 9008
48b0f3ae
PJ
9009The remaining arguments should be the same arguments that were passed
9010to the primitive. Depending on which primitive, one of those arguments
9011is selected as the TARGET. For example, if OPERATION does file I/O,
9012whichever argument specifies the file name is TARGET.
05e6f5dc 9013
48b0f3ae 9014TARGET has a meaning which depends on OPERATION:
b883cdb2 9015 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae 9016 For process I/O, TARGET is a process name.
d4a1d553 9017 For network I/O, TARGET is a service name or a port number.
05e6f5dc 9018
d4a1d553 9019This function looks up what is specified for TARGET in
48b0f3ae
PJ
9020`file-coding-system-alist', `process-coding-system-alist',
9021or `network-coding-system-alist' depending on OPERATION.
9022They may specify a coding system, a cons of coding systems,
9023or a function symbol to call.
9024In the last case, we call the function with one argument,
9025which is a list of all the arguments given to this function.
1011c487
MB
9026If the function can't decide a coding system, it can return
9027`undecided' so that the normal code-detection is performed.
48b0f3ae 9028
b883cdb2
MB
9029If OPERATION is `insert-file-contents', the argument corresponding to
9030TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
9031file name to look up, and BUFFER is a buffer that contains the file's
9032contents (not yet decoded). If `file-coding-system-alist' specifies a
9033function to call for FILENAME, that function should examine the
9034contents of BUFFER instead of reading the file.
9035
d918f936 9036usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
48b0f3ae 9037 (nargs, args)
4ed46869
KH
9038 int nargs;
9039 Lisp_Object *args;
6b89e3aa 9040{
4ed46869
KH
9041 Lisp_Object operation, target_idx, target, val;
9042 register Lisp_Object chain;
177c0ea7 9043
4ed46869
KH
9044 if (nargs < 2)
9045 error ("Too few arguments");
9046 operation = args[0];
9047 if (!SYMBOLP (operation)
9048 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3ed051d4 9049 error ("Invalid first argument");
4ed46869
KH
9050 if (nargs < 1 + XINT (target_idx))
9051 error ("Too few arguments for operation: %s",
8f924df7 9052 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
9053 target = args[XINT (target_idx) + 1];
9054 if (!(STRINGP (target)
091a0ff0
KH
9055 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9056 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 9057 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 9058 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
9059 if (CONSP (target))
9060 target = XCAR (target);
4ed46869 9061
2e34157c
RS
9062 chain = ((EQ (operation, Qinsert_file_contents)
9063 || EQ (operation, Qwrite_region))
02ba4723 9064 ? Vfile_coding_system_alist
2e34157c 9065 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
9066 ? Vnetwork_coding_system_alist
9067 : Vprocess_coding_system_alist));
4ed46869
KH
9068 if (NILP (chain))
9069 return Qnil;
9070
03699b14 9071 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 9072 {
f44d27ce 9073 Lisp_Object elt;
6b89e3aa 9074
df7492f9 9075 elt = XCAR (chain);
4ed46869
KH
9076 if (CONSP (elt)
9077 && ((STRINGP (target)
03699b14
KR
9078 && STRINGP (XCAR (elt))
9079 && fast_string_match (XCAR (elt), target) >= 0)
9080 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 9081 {
03699b14 9082 val = XCDR (elt);
b19fd4c5
KH
9083 /* Here, if VAL is both a valid coding system and a valid
9084 function symbol, we return VAL as a coding system. */
02ba4723
KH
9085 if (CONSP (val))
9086 return val;
9087 if (! SYMBOLP (val))
9088 return Qnil;
9089 if (! NILP (Fcoding_system_p (val)))
9090 return Fcons (val, val);
b19fd4c5 9091 if (! NILP (Ffboundp (val)))
6b89e3aa 9092 {
e2b97060
MB
9093 /* We use call1 rather than safe_call1
9094 so as to get bug reports about functions called here
9095 which don't handle the current interface. */
9096 val = call1 (val, Flist (nargs, args));
b19fd4c5
KH
9097 if (CONSP (val))
9098 return val;
9099 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9100 return Fcons (val, val);
6b89e3aa 9101 }
02ba4723 9102 return Qnil;
6b89e3aa
KH
9103 }
9104 }
4ed46869 9105 return Qnil;
6b89e3aa
KH
9106}
9107
df7492f9 9108DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 9109 Sset_coding_system_priority, 0, MANY, 0,
da7db224 9110 doc: /* Assign higher priority to the coding systems given as arguments.
d4a1d553 9111If multiple coding systems belong to the same category,
a3181084
DL
9112all but the first one are ignored.
9113
d4a1d553 9114usage: (set-coding-system-priority &rest coding-systems) */)
df7492f9
KH
9115 (nargs, args)
9116 int nargs;
9117 Lisp_Object *args;
9118{
9119 int i, j;
9120 int changed[coding_category_max];
9121 enum coding_category priorities[coding_category_max];
9122
9123 bzero (changed, sizeof changed);
6b89e3aa 9124
df7492f9 9125 for (i = j = 0; i < nargs; i++)
6b89e3aa 9126 {
df7492f9
KH
9127 enum coding_category category;
9128 Lisp_Object spec, attrs;
6b89e3aa 9129
df7492f9
KH
9130 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9131 attrs = AREF (spec, 0);
9132 category = XINT (CODING_ATTR_CATEGORY (attrs));
9133 if (changed[category])
9134 /* Ignore this coding system because a coding system of the
9135 same category already had a higher priority. */
9136 continue;
9137 changed[category] = 1;
9138 priorities[j++] = category;
9139 if (coding_categories[category].id >= 0
9140 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9141 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 9142 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 9143 }
6b89e3aa 9144
df7492f9
KH
9145 /* Now we have decided top J priorities. Reflect the order of the
9146 original priorities to the remaining priorities. */
6b89e3aa 9147
df7492f9 9148 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 9149 {
df7492f9
KH
9150 while (j < coding_category_max
9151 && changed[coding_priorities[j]])
9152 j++;
9153 if (j == coding_category_max)
9154 abort ();
9155 priorities[i] = coding_priorities[j];
9156 }
6b89e3aa 9157
df7492f9 9158 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 9159
ff563fce
KH
9160 /* Update `coding-category-list'. */
9161 Vcoding_category_list = Qnil;
9162 for (i = coding_category_max - 1; i >= 0; i--)
9163 Vcoding_category_list
9164 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9165 Vcoding_category_list);
6b89e3aa 9166
df7492f9 9167 return Qnil;
6b89e3aa
KH
9168}
9169
df7492f9
KH
9170DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9171 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
9172 doc: /* Return a list of coding systems ordered by their priorities.
9173HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
9174 (highestp)
9175 Lisp_Object highestp;
d46c5b12
KH
9176{
9177 int i;
df7492f9 9178 Lisp_Object val;
6b89e3aa 9179
df7492f9 9180 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 9181 {
df7492f9
KH
9182 enum coding_category category = coding_priorities[i];
9183 int id = coding_categories[category].id;
9184 Lisp_Object attrs;
068a9dbd 9185
df7492f9
KH
9186 if (id < 0)
9187 continue;
9188 attrs = CODING_ID_ATTRS (id);
9189 if (! NILP (highestp))
9190 return CODING_ATTR_BASE_NAME (attrs);
9191 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9192 }
9193 return Fnreverse (val);
9194}
068a9dbd 9195
f0064e1f 9196static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
9197
9198static Lisp_Object
df7492f9
KH
9199make_subsidiaries (base)
9200 Lisp_Object base;
068a9dbd 9201{
df7492f9 9202 Lisp_Object subsidiaries;
8f924df7 9203 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
9204 char *buf = (char *) alloca (base_name_len + 6);
9205 int i;
068a9dbd 9206
8f924df7 9207 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
9208 subsidiaries = Fmake_vector (make_number (3), Qnil);
9209 for (i = 0; i < 3; i++)
068a9dbd 9210 {
df7492f9
KH
9211 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9212 ASET (subsidiaries, i, intern (buf));
068a9dbd 9213 }
df7492f9 9214 return subsidiaries;
068a9dbd
KH
9215}
9216
9217
df7492f9
KH
9218DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9219 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
9220 doc: /* For internal use only.
9221usage: (define-coding-system-internal ...) */)
df7492f9
KH
9222 (nargs, args)
9223 int nargs;
9224 Lisp_Object *args;
068a9dbd 9225{
df7492f9
KH
9226 Lisp_Object name;
9227 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
9228 Lisp_Object attrs; /* Vector of attributes. */
9229 Lisp_Object eol_type;
9230 Lisp_Object aliases;
9231 Lisp_Object coding_type, charset_list, safe_charsets;
9232 enum coding_category category;
9233 Lisp_Object tail, val;
9234 int max_charset_id = 0;
9235 int i;
068a9dbd 9236
df7492f9
KH
9237 if (nargs < coding_arg_max)
9238 goto short_args;
068a9dbd 9239
df7492f9 9240 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 9241
df7492f9
KH
9242 name = args[coding_arg_name];
9243 CHECK_SYMBOL (name);
9244 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 9245
df7492f9
KH
9246 val = args[coding_arg_mnemonic];
9247 if (! STRINGP (val))
9248 CHECK_CHARACTER (val);
9249 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 9250
df7492f9
KH
9251 coding_type = args[coding_arg_coding_type];
9252 CHECK_SYMBOL (coding_type);
9253 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 9254
df7492f9
KH
9255 charset_list = args[coding_arg_charset_list];
9256 if (SYMBOLP (charset_list))
9257 {
9258 if (EQ (charset_list, Qiso_2022))
9259 {
9260 if (! EQ (coding_type, Qiso_2022))
9261 error ("Invalid charset-list");
9262 charset_list = Viso_2022_charset_list;
9263 }
9264 else if (EQ (charset_list, Qemacs_mule))
9265 {
9266 if (! EQ (coding_type, Qemacs_mule))
9267 error ("Invalid charset-list");
9268 charset_list = Vemacs_mule_charset_list;
9269 }
9270 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9271 if (max_charset_id < XFASTINT (XCAR (tail)))
9272 max_charset_id = XFASTINT (XCAR (tail));
9273 }
068a9dbd
KH
9274 else
9275 {
df7492f9 9276 charset_list = Fcopy_sequence (charset_list);
985773c9 9277 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
068a9dbd 9278 {
df7492f9
KH
9279 struct charset *charset;
9280
985773c9 9281 val = XCAR (tail);
df7492f9
KH
9282 CHECK_CHARSET_GET_CHARSET (val, charset);
9283 if (EQ (coding_type, Qiso_2022)
9284 ? CHARSET_ISO_FINAL (charset) < 0
9285 : EQ (coding_type, Qemacs_mule)
9286 ? CHARSET_EMACS_MULE_ID (charset) < 0
9287 : 0)
9288 error ("Can't handle charset `%s'",
8f924df7 9289 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 9290
8f924df7 9291 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
9292 if (max_charset_id < charset->id)
9293 max_charset_id = charset->id;
068a9dbd
KH
9294 }
9295 }
df7492f9 9296 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 9297
1b3b981b
AS
9298 safe_charsets = make_uninit_string (max_charset_id + 1);
9299 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
df7492f9 9300 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 9301 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 9302 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 9303
584948ac 9304 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 9305
df7492f9 9306 val = args[coding_arg_decode_translation_table];
a6f87d34 9307 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9308 CHECK_SYMBOL (val);
df7492f9 9309 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 9310
df7492f9 9311 val = args[coding_arg_encode_translation_table];
a6f87d34 9312 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 9313 CHECK_SYMBOL (val);
df7492f9 9314 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 9315
df7492f9
KH
9316 val = args[coding_arg_post_read_conversion];
9317 CHECK_SYMBOL (val);
9318 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 9319
df7492f9
KH
9320 val = args[coding_arg_pre_write_conversion];
9321 CHECK_SYMBOL (val);
9322 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 9323
df7492f9
KH
9324 val = args[coding_arg_default_char];
9325 if (NILP (val))
9326 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9327 else
9328 {
8f924df7 9329 CHECK_CHARACTER (val);
df7492f9
KH
9330 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9331 }
4031e2bf 9332
8f924df7
KH
9333 val = args[coding_arg_for_unibyte];
9334 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 9335
df7492f9
KH
9336 val = args[coding_arg_plist];
9337 CHECK_LIST (val);
9338 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 9339
df7492f9
KH
9340 if (EQ (coding_type, Qcharset))
9341 {
c7c66a95
KH
9342 /* Generate a lisp vector of 256 elements. Each element is nil,
9343 integer, or a list of charset IDs.
3a73fa5d 9344
c7c66a95
KH
9345 If Nth element is nil, the byte code N is invalid in this
9346 coding system.
4ed46869 9347
c7c66a95
KH
9348 If Nth element is a number NUM, N is the first byte of a
9349 charset whose ID is NUM.
4ed46869 9350
c7c66a95
KH
9351 If Nth element is a list of charset IDs, N is the first byte
9352 of one of them. The list is sorted by dimensions of the
2bc515e4 9353 charsets. A charset of smaller dimension comes firtst. */
df7492f9 9354 val = Fmake_vector (make_number (256), Qnil);
4ed46869 9355
5c99c2e6 9356 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 9357 {
c7c66a95
KH
9358 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9359 int dim = CHARSET_DIMENSION (charset);
9360 int idx = (dim - 1) * 4;
4ed46869 9361
5c99c2e6 9362 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 9363 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 9364
15d143f7
KH
9365 for (i = charset->code_space[idx];
9366 i <= charset->code_space[idx + 1]; i++)
9367 {
c7c66a95
KH
9368 Lisp_Object tmp, tmp2;
9369 int dim2;
ec6d2bb8 9370
c7c66a95
KH
9371 tmp = AREF (val, i);
9372 if (NILP (tmp))
9373 tmp = XCAR (tail);
9374 else if (NUMBERP (tmp))
9375 {
9376 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9377 if (dim < dim2)
c7c66a95 9378 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
9379 else
9380 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 9381 }
15d143f7 9382 else
c7c66a95
KH
9383 {
9384 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9385 {
9386 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9387 if (dim < dim2)
9388 break;
9389 }
9390 if (NILP (tmp2))
9391 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9392 else
9393 {
9394 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9395 XSETCAR (tmp2, XCAR (tail));
9396 }
9397 }
9398 ASET (val, i, tmp);
15d143f7 9399 }
df7492f9
KH
9400 }
9401 ASET (attrs, coding_attr_charset_valids, val);
9402 category = coding_category_charset;
9403 }
9404 else if (EQ (coding_type, Qccl))
9405 {
9406 Lisp_Object valids;
ecec61c1 9407
df7492f9
KH
9408 if (nargs < coding_arg_ccl_max)
9409 goto short_args;
ecec61c1 9410
df7492f9
KH
9411 val = args[coding_arg_ccl_decoder];
9412 CHECK_CCL_PROGRAM (val);
9413 if (VECTORP (val))
9414 val = Fcopy_sequence (val);
9415 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 9416
df7492f9
KH
9417 val = args[coding_arg_ccl_encoder];
9418 CHECK_CCL_PROGRAM (val);
9419 if (VECTORP (val))
9420 val = Fcopy_sequence (val);
9421 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 9422
df7492f9
KH
9423 val = args[coding_arg_ccl_valids];
9424 valids = Fmake_string (make_number (256), make_number (0));
9425 for (tail = val; !NILP (tail); tail = Fcdr (tail))
9426 {
8dcbea82 9427 int from, to;
ecec61c1 9428
df7492f9
KH
9429 val = Fcar (tail);
9430 if (INTEGERP (val))
8dcbea82
KH
9431 {
9432 from = to = XINT (val);
9433 if (from < 0 || from > 255)
9434 args_out_of_range_3 (val, make_number (0), make_number (255));
9435 }
df7492f9
KH
9436 else
9437 {
df7492f9 9438 CHECK_CONS (val);
8f924df7
KH
9439 CHECK_NATNUM_CAR (val);
9440 CHECK_NATNUM_CDR (val);
df7492f9 9441 from = XINT (XCAR (val));
8f924df7 9442 if (from > 255)
8dcbea82
KH
9443 args_out_of_range_3 (XCAR (val),
9444 make_number (0), make_number (255));
df7492f9 9445 to = XINT (XCDR (val));
8dcbea82
KH
9446 if (to < from || to > 255)
9447 args_out_of_range_3 (XCDR (val),
9448 XCAR (val), make_number (255));
df7492f9 9449 }
8dcbea82 9450 for (i = from; i <= to; i++)
8f924df7 9451 SSET (valids, i, 1);
df7492f9
KH
9452 }
9453 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 9454
df7492f9 9455 category = coding_category_ccl;
55ab7be3 9456 }
df7492f9 9457 else if (EQ (coding_type, Qutf_16))
55ab7be3 9458 {
df7492f9 9459 Lisp_Object bom, endian;
4ed46869 9460
584948ac 9461 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 9462
df7492f9
KH
9463 if (nargs < coding_arg_utf16_max)
9464 goto short_args;
4ed46869 9465
df7492f9
KH
9466 bom = args[coding_arg_utf16_bom];
9467 if (! NILP (bom) && ! EQ (bom, Qt))
9468 {
9469 CHECK_CONS (bom);
8f924df7
KH
9470 val = XCAR (bom);
9471 CHECK_CODING_SYSTEM (val);
9472 val = XCDR (bom);
9473 CHECK_CODING_SYSTEM (val);
df7492f9 9474 }
a470d443 9475 ASET (attrs, coding_attr_utf_bom, bom);
df7492f9
KH
9476
9477 endian = args[coding_arg_utf16_endian];
b49a1807
KH
9478 CHECK_SYMBOL (endian);
9479 if (NILP (endian))
9480 endian = Qbig;
9481 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 9482 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
9483 ASET (attrs, coding_attr_utf_16_endian, endian);
9484
9485 category = (CONSP (bom)
9486 ? coding_category_utf_16_auto
9487 : NILP (bom)
b49a1807 9488 ? (EQ (endian, Qbig)
df7492f9
KH
9489 ? coding_category_utf_16_be_nosig
9490 : coding_category_utf_16_le_nosig)
b49a1807 9491 : (EQ (endian, Qbig)
df7492f9
KH
9492 ? coding_category_utf_16_be
9493 : coding_category_utf_16_le));
9494 }
9495 else if (EQ (coding_type, Qiso_2022))
9496 {
9497 Lisp_Object initial, reg_usage, request, flags;
4776e638 9498 int i;
1397dc18 9499
df7492f9
KH
9500 if (nargs < coding_arg_iso2022_max)
9501 goto short_args;
9502
9503 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9504 CHECK_VECTOR (initial);
9505 for (i = 0; i < 4; i++)
9506 {
9507 val = Faref (initial, make_number (i));
9508 if (! NILP (val))
9509 {
584948ac
KH
9510 struct charset *charset;
9511
9512 CHECK_CHARSET_GET_CHARSET (val, charset);
9513 ASET (initial, i, make_number (CHARSET_ID (charset)));
9514 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9515 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9516 }
9517 else
9518 ASET (initial, i, make_number (-1));
9519 }
9520
9521 reg_usage = args[coding_arg_iso2022_reg_usage];
9522 CHECK_CONS (reg_usage);
8f924df7
KH
9523 CHECK_NUMBER_CAR (reg_usage);
9524 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
9525
9526 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9527 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 9528 {
df7492f9 9529 int id;
8f924df7 9530 Lisp_Object tmp;
df7492f9
KH
9531
9532 val = Fcar (tail);
9533 CHECK_CONS (val);
8f924df7
KH
9534 tmp = XCAR (val);
9535 CHECK_CHARSET_GET_ID (tmp, id);
9536 CHECK_NATNUM_CDR (val);
df7492f9
KH
9537 if (XINT (XCDR (val)) >= 4)
9538 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 9539 XSETCAR (val, make_number (id));
1397dc18 9540 }
4ed46869 9541
df7492f9
KH
9542 flags = args[coding_arg_iso2022_flags];
9543 CHECK_NATNUM (flags);
9544 i = XINT (flags);
9545 if (EQ (args[coding_arg_charset_list], Qiso_2022))
9546 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9547
9548 ASET (attrs, coding_attr_iso_initial, initial);
9549 ASET (attrs, coding_attr_iso_usage, reg_usage);
9550 ASET (attrs, coding_attr_iso_request, request);
9551 ASET (attrs, coding_attr_iso_flags, flags);
9552 setup_iso_safe_charsets (attrs);
9553
9554 if (i & CODING_ISO_FLAG_SEVEN_BITS)
9555 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9556 | CODING_ISO_FLAG_SINGLE_SHIFT))
9557 ? coding_category_iso_7_else
9558 : EQ (args[coding_arg_charset_list], Qiso_2022)
9559 ? coding_category_iso_7
9560 : coding_category_iso_7_tight);
9561 else
9562 {
9563 int id = XINT (AREF (initial, 1));
9564
c6fb6e98 9565 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
9566 || EQ (args[coding_arg_charset_list], Qiso_2022)
9567 || id < 0)
9568 ? coding_category_iso_8_else
9569 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9570 ? coding_category_iso_8_1
9571 : coding_category_iso_8_2);
9572 }
0ce7886f
KH
9573 if (category != coding_category_iso_8_1
9574 && category != coding_category_iso_8_2)
9575 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
9576 }
9577 else if (EQ (coding_type, Qemacs_mule))
c28a9453 9578 {
df7492f9
KH
9579 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9580 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 9581 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 9582 category = coding_category_emacs_mule;
c28a9453 9583 }
df7492f9 9584 else if (EQ (coding_type, Qshift_jis))
c28a9453 9585 {
df7492f9
KH
9586
9587 struct charset *charset;
9588
7d64c6ad 9589 if (XINT (Flength (charset_list)) != 3
6e07c25f 9590 && XINT (Flength (charset_list)) != 4)
7d64c6ad 9591 error ("There should be three or four charsets");
df7492f9
KH
9592
9593 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9594 if (CHARSET_DIMENSION (charset) != 1)
9595 error ("Dimension of charset %s is not one",
8f924df7 9596 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9597 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9598 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9599
9600 charset_list = XCDR (charset_list);
9601 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9602 if (CHARSET_DIMENSION (charset) != 1)
9603 error ("Dimension of charset %s is not one",
8f924df7 9604 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
9605
9606 charset_list = XCDR (charset_list);
9607 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9608 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
9609 error ("Dimension of charset %s is not two",
9610 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9611
9612 charset_list = XCDR (charset_list);
2b917a06
KH
9613 if (! NILP (charset_list))
9614 {
9615 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9616 if (CHARSET_DIMENSION (charset) != 2)
9617 error ("Dimension of charset %s is not two",
9618 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9619 }
df7492f9
KH
9620
9621 category = coding_category_sjis;
9622 Vsjis_coding_system = name;
c28a9453 9623 }
df7492f9
KH
9624 else if (EQ (coding_type, Qbig5))
9625 {
9626 struct charset *charset;
4ed46869 9627
df7492f9
KH
9628 if (XINT (Flength (charset_list)) != 2)
9629 error ("There should be just two charsets");
9630
9631 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9632 if (CHARSET_DIMENSION (charset) != 1)
9633 error ("Dimension of charset %s is not one",
8f924df7 9634 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
9635 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9636 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
9637
9638 charset_list = XCDR (charset_list);
9639 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9640 if (CHARSET_DIMENSION (charset) != 2)
9641 error ("Dimension of charset %s is not two",
8f924df7 9642 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 9643
df7492f9
KH
9644 category = coding_category_big5;
9645 Vbig5_coding_system = name;
9646 }
9647 else if (EQ (coding_type, Qraw_text))
c28a9453 9648 {
584948ac
KH
9649 category = coding_category_raw_text;
9650 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 9651 }
df7492f9 9652 else if (EQ (coding_type, Qutf_8))
4ed46869 9653 {
a470d443
KH
9654 Lisp_Object bom;
9655
584948ac 9656 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
a470d443
KH
9657
9658 if (nargs < coding_arg_utf8_max)
9659 goto short_args;
9660
9661 bom = args[coding_arg_utf8_bom];
9662 if (! NILP (bom) && ! EQ (bom, Qt))
9663 {
9664 CHECK_CONS (bom);
9665 val = XCAR (bom);
9666 CHECK_CODING_SYSTEM (val);
9667 val = XCDR (bom);
9668 CHECK_CODING_SYSTEM (val);
9669 }
9670 ASET (attrs, coding_attr_utf_bom, bom);
9671
9672 category = (CONSP (bom) ? coding_category_utf_8_auto
9673 : NILP (bom) ? coding_category_utf_8_nosig
9674 : coding_category_utf_8_sig);
4ed46869 9675 }
df7492f9
KH
9676 else if (EQ (coding_type, Qundecided))
9677 category = coding_category_undecided;
4ed46869 9678 else
df7492f9 9679 error ("Invalid coding system type: %s",
8f924df7 9680 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 9681
df7492f9 9682 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
9683 CODING_ATTR_PLIST (attrs)
9684 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9685 CODING_ATTR_PLIST (attrs)));
35befdaa 9686 CODING_ATTR_PLIST (attrs)
3ed051d4 9687 = Fcons (QCascii_compatible_p,
35befdaa
KH
9688 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9689 CODING_ATTR_PLIST (attrs)));
c4825358 9690
df7492f9
KH
9691 eol_type = args[coding_arg_eol_type];
9692 if (! NILP (eol_type)
9693 && ! EQ (eol_type, Qunix)
9694 && ! EQ (eol_type, Qdos)
9695 && ! EQ (eol_type, Qmac))
9696 error ("Invalid eol-type");
4ed46869 9697
df7492f9 9698 aliases = Fcons (name, Qnil);
4ed46869 9699
df7492f9
KH
9700 if (NILP (eol_type))
9701 {
9702 eol_type = make_subsidiaries (name);
9703 for (i = 0; i < 3; i++)
1397dc18 9704 {
df7492f9
KH
9705 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9706
9707 this_name = AREF (eol_type, i);
9708 this_aliases = Fcons (this_name, Qnil);
9709 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9710 this_spec = Fmake_vector (make_number (3), attrs);
9711 ASET (this_spec, 1, this_aliases);
9712 ASET (this_spec, 2, this_eol_type);
9713 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9714 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
9715 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9716 if (NILP (val))
9717 Vcoding_system_alist
9718 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9719 Vcoding_system_alist);
1397dc18 9720 }
d46c5b12 9721 }
4ed46869 9722
df7492f9
KH
9723 spec_vec = Fmake_vector (make_number (3), attrs);
9724 ASET (spec_vec, 1, aliases);
9725 ASET (spec_vec, 2, eol_type);
48b0f3ae 9726
df7492f9
KH
9727 Fputhash (name, spec_vec, Vcoding_system_hash_table);
9728 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
9729 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9730 if (NILP (val))
9731 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9732 Vcoding_system_alist);
48b0f3ae 9733
df7492f9
KH
9734 {
9735 int id = coding_categories[category].id;
48b0f3ae 9736
df7492f9
KH
9737 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9738 setup_coding_system (name, &coding_categories[category]);
9739 }
48b0f3ae 9740
d46c5b12 9741 return Qnil;
48b0f3ae 9742
df7492f9
KH
9743 short_args:
9744 return Fsignal (Qwrong_number_of_arguments,
9745 Fcons (intern ("define-coding-system-internal"),
9746 make_number (nargs)));
d46c5b12 9747}
4ed46869 9748
d6925f38 9749
a6f87d34
KH
9750DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9751 3, 3, 0,
9752 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9753 (coding_system, prop, val)
9754 Lisp_Object coding_system, prop, val;
9755{
3dbe7859 9756 Lisp_Object spec, attrs;
a6f87d34
KH
9757
9758 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9759 attrs = AREF (spec, 0);
9760 if (EQ (prop, QCmnemonic))
9761 {
9762 if (! STRINGP (val))
9763 CHECK_CHARACTER (val);
9764 CODING_ATTR_MNEMONIC (attrs) = val;
9765 }
2133e2d1 9766 else if (EQ (prop, QCdefault_char))
a6f87d34
KH
9767 {
9768 if (NILP (val))
9769 val = make_number (' ');
9770 else
9771 CHECK_CHARACTER (val);
9772 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9773 }
9774 else if (EQ (prop, QCdecode_translation_table))
9775 {
9776 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9777 CHECK_SYMBOL (val);
9778 CODING_ATTR_DECODE_TBL (attrs) = val;
9779 }
9780 else if (EQ (prop, QCencode_translation_table))
9781 {
9782 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9783 CHECK_SYMBOL (val);
9784 CODING_ATTR_ENCODE_TBL (attrs) = val;
9785 }
9786 else if (EQ (prop, QCpost_read_conversion))
9787 {
9788 CHECK_SYMBOL (val);
9789 CODING_ATTR_POST_READ (attrs) = val;
9790 }
9791 else if (EQ (prop, QCpre_write_conversion))
9792 {
9793 CHECK_SYMBOL (val);
9794 CODING_ATTR_PRE_WRITE (attrs) = val;
9795 }
35befdaa
KH
9796 else if (EQ (prop, QCascii_compatible_p))
9797 {
9798 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9799 }
a6f87d34
KH
9800
9801 CODING_ATTR_PLIST (attrs)
9802 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9803 return val;
9804}
9805
9806
df7492f9
KH
9807DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9808 Sdefine_coding_system_alias, 2, 2, 0,
9809 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9810 (alias, coding_system)
9811 Lisp_Object alias, coding_system;
66cfb530 9812{
583f71ca 9813 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9814
df7492f9
KH
9815 CHECK_SYMBOL (alias);
9816 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9817 aliases = AREF (spec, 1);
d4a1d553 9818 /* ALIASES should be a list of length more than zero, and the first
d6925f38
KH
9819 element is a base coding system. Append ALIAS at the tail of the
9820 list. */
df7492f9
KH
9821 while (!NILP (XCDR (aliases)))
9822 aliases = XCDR (aliases);
8f924df7 9823 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9824
df7492f9
KH
9825 eol_type = AREF (spec, 2);
9826 if (VECTORP (eol_type))
4ed46869 9827 {
df7492f9
KH
9828 Lisp_Object subsidiaries;
9829 int i;
4ed46869 9830
df7492f9
KH
9831 subsidiaries = make_subsidiaries (alias);
9832 for (i = 0; i < 3; i++)
9833 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9834 AREF (eol_type, i));
4ed46869 9835 }
df7492f9
KH
9836
9837 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9838 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9839 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9840 if (NILP (val))
9841 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9842 Vcoding_system_alist);
66cfb530 9843
4ed46869
KH
9844 return Qnil;
9845}
9846
df7492f9
KH
9847DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9848 1, 1, 0,
9849 doc: /* Return the base of CODING-SYSTEM.
da7db224 9850Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9851 (coding_system)
9852 Lisp_Object coding_system;
d46c5b12 9853{
df7492f9 9854 Lisp_Object spec, attrs;
d46c5b12 9855
df7492f9
KH
9856 if (NILP (coding_system))
9857 return (Qno_conversion);
9858 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9859 attrs = AREF (spec, 0);
9860 return CODING_ATTR_BASE_NAME (attrs);
9861}
1397dc18 9862
df7492f9
KH
9863DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9864 1, 1, 0,
9865 doc: "Return the property list of CODING-SYSTEM.")
9866 (coding_system)
9867 Lisp_Object coding_system;
9868{
9869 Lisp_Object spec, attrs;
1397dc18 9870
df7492f9
KH
9871 if (NILP (coding_system))
9872 coding_system = Qno_conversion;
9873 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9874 attrs = AREF (spec, 0);
9875 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9876}
9877
df7492f9
KH
9878
9879DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9880 1, 1, 0,
da7db224 9881 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9882 (coding_system)
9883 Lisp_Object coding_system;
66cfb530 9884{
df7492f9 9885 Lisp_Object spec;
84d60297 9886
df7492f9
KH
9887 if (NILP (coding_system))
9888 coding_system = Qno_conversion;
9889 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9890 return AREF (spec, 1);
df7492f9 9891}
66cfb530 9892
df7492f9
KH
9893DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9894 Scoding_system_eol_type, 1, 1, 0,
9895 doc: /* Return eol-type of CODING-SYSTEM.
d4a1d553 9896An eol-type is an integer 0, 1, 2, or a vector of coding systems.
66cfb530 9897
df7492f9
KH
9898Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9899and CR respectively.
66cfb530 9900
df7492f9
KH
9901A vector value indicates that a format of end-of-line should be
9902detected automatically. Nth element of the vector is the subsidiary
9903coding system whose eol-type is N. */)
6b89e3aa
KH
9904 (coding_system)
9905 Lisp_Object coding_system;
9906{
df7492f9
KH
9907 Lisp_Object spec, eol_type;
9908 int n;
6b89e3aa 9909
df7492f9
KH
9910 if (NILP (coding_system))
9911 coding_system = Qno_conversion;
9912 if (! CODING_SYSTEM_P (coding_system))
9913 return Qnil;
9914 spec = CODING_SYSTEM_SPEC (coding_system);
9915 eol_type = AREF (spec, 2);
9916 if (VECTORP (eol_type))
9917 return Fcopy_sequence (eol_type);
9918 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9919 return make_number (n);
6b89e3aa
KH
9920}
9921
4ed46869
KH
9922#endif /* emacs */
9923
9924\f
1397dc18 9925/*** 9. Post-amble ***/
4ed46869 9926
dfcf069d 9927void
4ed46869
KH
9928init_coding_once ()
9929{
9930 int i;
9931
df7492f9
KH
9932 for (i = 0; i < coding_category_max; i++)
9933 {
9934 coding_categories[i].id = -1;
9935 coding_priorities[i] = i;
9936 }
4ed46869
KH
9937
9938 /* ISO2022 specific initialize routine. */
9939 for (i = 0; i < 0x20; i++)
b73bfc1c 9940 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9941 for (i = 0x21; i < 0x7F; i++)
9942 iso_code_class[i] = ISO_graphic_plane_0;
9943 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9944 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9945 for (i = 0xA1; i < 0xFF; i++)
9946 iso_code_class[i] = ISO_graphic_plane_1;
9947 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9948 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9949 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9950 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9951 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9952 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9953 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9954 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9955 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9956
df7492f9
KH
9957 for (i = 0; i < 256; i++)
9958 {
9959 emacs_mule_bytes[i] = 1;
9960 }
7c78e542
KH
9961 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9962 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9963 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9964 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9965}
9966
9967#ifdef emacs
9968
dfcf069d 9969void
e0e989f6
KH
9970syms_of_coding ()
9971{
df7492f9 9972 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9973 {
9974 Lisp_Object args[2];
9975 args[0] = QCtest;
9976 args[1] = Qeq;
9977 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9978 }
df7492f9
KH
9979
9980 staticpro (&Vsjis_coding_system);
9981 Vsjis_coding_system = Qnil;
e0e989f6 9982
df7492f9
KH
9983 staticpro (&Vbig5_coding_system);
9984 Vbig5_coding_system = Qnil;
9985
24a73b0a
KH
9986 staticpro (&Vcode_conversion_reused_workbuf);
9987 Vcode_conversion_reused_workbuf = Qnil;
9988
9989 staticpro (&Vcode_conversion_workbuf_name);
9990 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9991
24a73b0a 9992 reused_workbuf_in_use = 0;
df7492f9
KH
9993
9994 DEFSYM (Qcharset, "charset");
9995 DEFSYM (Qtarget_idx, "target-idx");
9996 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9997 Fset (Qcoding_system_history, Qnil);
9998
9ce27fde 9999 /* Target FILENAME is the first argument. */
e0e989f6 10000 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 10001 /* Target FILENAME is the third argument. */
e0e989f6
KH
10002 Fput (Qwrite_region, Qtarget_idx, make_number (2));
10003
df7492f9 10004 DEFSYM (Qcall_process, "call-process");
9ce27fde 10005 /* Target PROGRAM is the first argument. */
e0e989f6
KH
10006 Fput (Qcall_process, Qtarget_idx, make_number (0));
10007
df7492f9 10008 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 10009 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10010 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10011
df7492f9 10012 DEFSYM (Qstart_process, "start-process");
9ce27fde 10013 /* Target PROGRAM is the third argument. */
e0e989f6
KH
10014 Fput (Qstart_process, Qtarget_idx, make_number (2));
10015
df7492f9 10016 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 10017 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
10018 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10019
df7492f9
KH
10020 DEFSYM (Qcoding_system, "coding-system");
10021 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 10022
df7492f9
KH
10023 DEFSYM (Qeol_type, "eol-type");
10024 DEFSYM (Qunix, "unix");
10025 DEFSYM (Qdos, "dos");
4ed46869 10026
df7492f9
KH
10027 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10028 DEFSYM (Qpost_read_conversion, "post-read-conversion");
10029 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10030 DEFSYM (Qdefault_char, "default-char");
10031 DEFSYM (Qundecided, "undecided");
10032 DEFSYM (Qno_conversion, "no-conversion");
10033 DEFSYM (Qraw_text, "raw-text");
4ed46869 10034
df7492f9 10035 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 10036
df7492f9 10037 DEFSYM (Qutf_8, "utf-8");
8f924df7 10038 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 10039
df7492f9 10040 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
10041 DEFSYM (Qbig, "big");
10042 DEFSYM (Qlittle, "little");
27901516 10043
df7492f9
KH
10044 DEFSYM (Qshift_jis, "shift-jis");
10045 DEFSYM (Qbig5, "big5");
4ed46869 10046
df7492f9 10047 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 10048
df7492f9 10049 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
10050 Fput (Qcoding_system_error, Qerror_conditions,
10051 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
10052 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 10053 build_string ("Invalid coding system"));
4ed46869 10054
05e6f5dc
KH
10055 /* Intern this now in case it isn't already done.
10056 Setting this variable twice is harmless.
10057 But don't staticpro it here--that is done in alloc.c. */
10058 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 10059
df7492f9 10060 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 10061 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
10062 DEFSYM (Qtranslation_table_id, "translation-table-id");
10063 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10064 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 10065
df7492f9 10066 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 10067
df7492f9 10068 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 10069
01378f49 10070 DEFSYM (QCcategory, ":category");
a6f87d34 10071 DEFSYM (QCmnemonic, ":mnemonic");
2133e2d1 10072 DEFSYM (QCdefault_char, ":default-char");
a6f87d34
KH
10073 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10074 DEFSYM (QCencode_translation_table, ":encode-translation-table");
10075 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10076 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 10077 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 10078
df7492f9
KH
10079 Vcoding_category_table
10080 = Fmake_vector (make_number (coding_category_max), Qnil);
10081 staticpro (&Vcoding_category_table);
10082 /* Followings are target of code detection. */
10083 ASET (Vcoding_category_table, coding_category_iso_7,
10084 intern ("coding-category-iso-7"));
10085 ASET (Vcoding_category_table, coding_category_iso_7_tight,
10086 intern ("coding-category-iso-7-tight"));
10087 ASET (Vcoding_category_table, coding_category_iso_8_1,
10088 intern ("coding-category-iso-8-1"));
10089 ASET (Vcoding_category_table, coding_category_iso_8_2,
10090 intern ("coding-category-iso-8-2"));
10091 ASET (Vcoding_category_table, coding_category_iso_7_else,
10092 intern ("coding-category-iso-7-else"));
10093 ASET (Vcoding_category_table, coding_category_iso_8_else,
10094 intern ("coding-category-iso-8-else"));
a470d443
KH
10095 ASET (Vcoding_category_table, coding_category_utf_8_auto,
10096 intern ("coding-category-utf-8-auto"));
10097 ASET (Vcoding_category_table, coding_category_utf_8_nosig,
df7492f9 10098 intern ("coding-category-utf-8"));
a470d443
KH
10099 ASET (Vcoding_category_table, coding_category_utf_8_sig,
10100 intern ("coding-category-utf-8-sig"));
df7492f9
KH
10101 ASET (Vcoding_category_table, coding_category_utf_16_be,
10102 intern ("coding-category-utf-16-be"));
ff563fce
KH
10103 ASET (Vcoding_category_table, coding_category_utf_16_auto,
10104 intern ("coding-category-utf-16-auto"));
df7492f9
KH
10105 ASET (Vcoding_category_table, coding_category_utf_16_le,
10106 intern ("coding-category-utf-16-le"));
10107 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10108 intern ("coding-category-utf-16-be-nosig"));
10109 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10110 intern ("coding-category-utf-16-le-nosig"));
10111 ASET (Vcoding_category_table, coding_category_charset,
10112 intern ("coding-category-charset"));
10113 ASET (Vcoding_category_table, coding_category_sjis,
10114 intern ("coding-category-sjis"));
10115 ASET (Vcoding_category_table, coding_category_big5,
10116 intern ("coding-category-big5"));
10117 ASET (Vcoding_category_table, coding_category_ccl,
10118 intern ("coding-category-ccl"));
10119 ASET (Vcoding_category_table, coding_category_emacs_mule,
10120 intern ("coding-category-emacs-mule"));
10121 /* Followings are NOT target of code detection. */
10122 ASET (Vcoding_category_table, coding_category_raw_text,
10123 intern ("coding-category-raw-text"));
10124 ASET (Vcoding_category_table, coding_category_undecided,
10125 intern ("coding-category-undecided"));
ecf488bc 10126
065e3595
KH
10127 DEFSYM (Qinsufficient_source, "insufficient-source");
10128 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10129 DEFSYM (Qinvalid_source, "invalid-source");
10130 DEFSYM (Qinterrupted, "interrupted");
10131 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 10132 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 10133
4ed46869
KH
10134 defsubr (&Scoding_system_p);
10135 defsubr (&Sread_coding_system);
10136 defsubr (&Sread_non_nil_coding_system);
10137 defsubr (&Scheck_coding_system);
10138 defsubr (&Sdetect_coding_region);
d46c5b12 10139 defsubr (&Sdetect_coding_string);
05e6f5dc 10140 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 10141 defsubr (&Sunencodable_char_position);
df7492f9 10142 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
10143 defsubr (&Sdecode_coding_region);
10144 defsubr (&Sencode_coding_region);
10145 defsubr (&Sdecode_coding_string);
10146 defsubr (&Sencode_coding_string);
10147 defsubr (&Sdecode_sjis_char);
10148 defsubr (&Sencode_sjis_char);
10149 defsubr (&Sdecode_big5_char);
10150 defsubr (&Sencode_big5_char);
1ba9e4ab 10151 defsubr (&Sset_terminal_coding_system_internal);
c4825358 10152 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 10153 defsubr (&Sterminal_coding_system);
1ba9e4ab 10154 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 10155 defsubr (&Skeyboard_coding_system);
a5d301df 10156 defsubr (&Sfind_operation_coding_system);
df7492f9 10157 defsubr (&Sset_coding_system_priority);
6b89e3aa 10158 defsubr (&Sdefine_coding_system_internal);
df7492f9 10159 defsubr (&Sdefine_coding_system_alias);
a6f87d34 10160 defsubr (&Scoding_system_put);
df7492f9
KH
10161 defsubr (&Scoding_system_base);
10162 defsubr (&Scoding_system_plist);
10163 defsubr (&Scoding_system_aliases);
10164 defsubr (&Scoding_system_eol_type);
10165 defsubr (&Scoding_system_priority_list);
4ed46869 10166
4608c386 10167 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
10168 doc: /* List of coding systems.
10169
10170Do not alter the value of this variable manually. This variable should be
df7492f9 10171updated by the functions `define-coding-system' and
48b0f3ae 10172`define-coding-system-alias'. */);
4608c386
KH
10173 Vcoding_system_list = Qnil;
10174
10175 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
10176 doc: /* Alist of coding system names.
10177Each element is one element list of coding system name.
446dcd75 10178This variable is given to `completing-read' as COLLECTION argument.
48b0f3ae
PJ
10179
10180Do not alter the value of this variable manually. This variable should be
10181updated by the functions `make-coding-system' and
10182`define-coding-system-alias'. */);
4608c386
KH
10183 Vcoding_system_alist = Qnil;
10184
4ed46869 10185 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
10186 doc: /* List of coding-categories (symbols) ordered by priority.
10187
10188On detecting a coding system, Emacs tries code detection algorithms
10189associated with each coding-category one by one in this order. When
10190one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
10191system bound to the corresponding coding-category is selected.
10192
42205607 10193Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
10194 {
10195 int i;
10196
10197 Vcoding_category_list = Qnil;
df7492f9 10198 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 10199 Vcoding_category_list
d46c5b12
KH
10200 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10201 Vcoding_category_list);
4ed46869
KH
10202 }
10203
10204 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
10205 doc: /* Specify the coding system for read operations.
10206It is useful to bind this variable with `let', but do not set it globally.
10207If the value is a coding system, it is used for decoding on read operation.
446dcd75
JB
10208If not, an appropriate element is used from one of the coding system alists.
10209There are three such tables: `file-coding-system-alist',
48b0f3ae 10210`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
10211 Vcoding_system_for_read = Qnil;
10212
10213 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
10214 doc: /* Specify the coding system for write operations.
10215Programs bind this variable with `let', but you should not set it globally.
10216If the value is a coding system, it is used for encoding of output,
10217when writing it to a file and when sending it to a file or subprocess.
10218
10219If this does not specify a coding system, an appropriate element
446dcd75
JB
10220is used from one of the coding system alists.
10221There are three such tables: `file-coding-system-alist',
48b0f3ae
PJ
10222`process-coding-system-alist', and `network-coding-system-alist'.
10223For output to files, if the above procedure does not specify a coding system,
10224the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
10225 Vcoding_system_for_write = Qnil;
10226
10227 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
10228 doc: /*
10229Coding system used in the latest file or process I/O. */);
4ed46869
KH
10230 Vlast_coding_system_used = Qnil;
10231
065e3595
KH
10232 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10233 doc: /*
10234Error status of the last code conversion.
10235
10236When an error was detected in the last code conversion, this variable
10237is set to one of the following symbols.
10238 `insufficient-source'
10239 `inconsistent-eol'
10240 `invalid-source'
10241 `interrupted'
10242 `insufficient-memory'
10243When no error was detected, the value doesn't change. So, to check
10244the error status of a code conversion by this variable, you must
10245explicitly set this variable to nil before performing code
10246conversion. */);
10247 Vlast_code_conversion_error = Qnil;
10248
9ce27fde 10249 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
10250 doc: /*
10251*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
10252See info node `Coding Systems' and info node `Text and Binary' concerning
10253such conversion. */);
9ce27fde
KH
10254 inhibit_eol_conversion = 0;
10255
ed29121d 10256 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
10257 doc: /*
10258Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
10259Bind it to t if the process output is to be treated as if it were a file
10260read from some filesystem. */);
ed29121d
EZ
10261 inherit_process_coding_system = 0;
10262
02ba4723 10263 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
10264 doc: /*
10265Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
10266The format is ((PATTERN . VAL) ...),
10267where PATTERN is a regular expression matching a file name,
10268VAL is a coding system, a cons of coding systems, or a function symbol.
10269If VAL is a coding system, it is used for both decoding and encoding
10270the file contents.
10271If VAL is a cons of coding systems, the car part is used for decoding,
10272and the cdr part is used for encoding.
10273If VAL is a function symbol, the function must return a coding system
2c53e699
KH
10274or a cons of coding systems which are used as above. The function is
10275called with an argument that is a list of the arguments with which
5a0bbd9a
KH
10276`find-operation-coding-system' was called. If the function can't decide
10277a coding system, it can return `undecided' so that the normal
10278code-detection is performed.
48b0f3ae
PJ
10279
10280See also the function `find-operation-coding-system'
10281and the variable `auto-coding-alist'. */);
02ba4723
KH
10282 Vfile_coding_system_alist = Qnil;
10283
10284 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
10285 doc: /*
10286Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
10287The format is ((PATTERN . VAL) ...),
10288where PATTERN is a regular expression matching a program name,
10289VAL is a coding system, a cons of coding systems, or a function symbol.
10290If VAL is a coding system, it is used for both decoding what received
10291from the program and encoding what sent to the program.
10292If VAL is a cons of coding systems, the car part is used for decoding,
10293and the cdr part is used for encoding.
10294If VAL is a function symbol, the function must return a coding system
10295or a cons of coding systems which are used as above.
10296
10297See also the function `find-operation-coding-system'. */);
02ba4723
KH
10298 Vprocess_coding_system_alist = Qnil;
10299
10300 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
10301 doc: /*
10302Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
10303The format is ((PATTERN . VAL) ...),
10304where PATTERN is a regular expression matching a network service name
10305or is a port number to connect to,
10306VAL is a coding system, a cons of coding systems, or a function symbol.
10307If VAL is a coding system, it is used for both decoding what received
10308from the network stream and encoding what sent to the network stream.
10309If VAL is a cons of coding systems, the car part is used for decoding,
10310and the cdr part is used for encoding.
10311If VAL is a function symbol, the function must return a coding system
10312or a cons of coding systems which are used as above.
10313
10314See also the function `find-operation-coding-system'. */);
02ba4723 10315 Vnetwork_coding_system_alist = Qnil;
4ed46869 10316
68c45bf0 10317 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
10318 doc: /* Coding system to use with system messages.
10319Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
10320 Vlocale_coding_system = Qnil;
10321
005f0d35 10322 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 10323 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
10324 doc: /*
10325*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 10326 eol_mnemonic_unix = build_string (":");
4ed46869 10327
7722baf9 10328 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
10329 doc: /*
10330*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 10331 eol_mnemonic_dos = build_string ("\\");
4ed46869 10332
7722baf9 10333 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
10334 doc: /*
10335*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 10336 eol_mnemonic_mac = build_string ("/");
4ed46869 10337
7722baf9 10338 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
10339 doc: /*
10340*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 10341 eol_mnemonic_undecided = build_string (":");
4ed46869 10342
84fbb8a0 10343 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
10344 doc: /*
10345*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 10346 Venable_character_translation = Qt;
bdd9fb48 10347
f967223b 10348 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
10349 &Vstandard_translation_table_for_decode,
10350 doc: /* Table for translating characters while decoding. */);
f967223b 10351 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 10352
f967223b 10353 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
10354 &Vstandard_translation_table_for_encode,
10355 doc: /* Table for translating characters while encoding. */);
f967223b 10356 Vstandard_translation_table_for_encode = Qnil;
4ed46869 10357
df7492f9 10358 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
10359 doc: /* Alist of charsets vs revision numbers.
10360While encoding, if a charset (car part of an element) is found,
df7492f9
KH
10361designate it with the escape sequence identifying revision (cdr part
10362of the element). */);
10363 Vcharset_revision_table = Qnil;
02ba4723
KH
10364
10365 DEFVAR_LISP ("default-process-coding-system",
10366 &Vdefault_process_coding_system,
48b0f3ae
PJ
10367 doc: /* Cons of coding systems used for process I/O by default.
10368The car part is used for decoding a process output,
10369the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 10370 Vdefault_process_coding_system = Qnil;
c4825358 10371
3f003981 10372 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
10373 doc: /*
10374Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
10375This is a vector of length 256.
10376If Nth element is non-nil, the existence of code N in a file
10377\(or output of subprocess) doesn't prevent it to be detected as
10378a coding system of ISO 2022 variant which has a flag
10379`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10380or reading output of a subprocess.
446dcd75 10381Only 128th through 159th elements have a meaning. */);
3f003981 10382 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
10383
10384 DEFVAR_LISP ("select-safe-coding-system-function",
10385 &Vselect_safe_coding_system_function,
df7492f9
KH
10386 doc: /*
10387Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
10388
10389If set, this function is called to force a user to select a proper
10390coding system which can encode the text in the case that a default
fdecf907
GM
10391coding system used in each operation can't encode the text. The
10392function should take care that the buffer is not modified while
10393the coding system is being selected.
48b0f3ae
PJ
10394
10395The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
10396 Vselect_safe_coding_system_function = Qnil;
10397
5d5bf4d8
KH
10398 DEFVAR_BOOL ("coding-system-require-warning",
10399 &coding_system_require_warning,
10400 doc: /* Internal use only.
6b89e3aa
KH
10401If non-nil, on writing a file, `select-safe-coding-system-function' is
10402called even if `coding-system-for-write' is non-nil. The command
10403`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
10404 coding_system_require_warning = 0;
10405
10406
22ab2303 10407 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 10408 &inhibit_iso_escape_detection,
df7492f9 10409 doc: /*
97b1b294 10410If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
48b0f3ae 10411
97b1b294
EZ
10412When Emacs reads text, it tries to detect how the text is encoded.
10413This code detection is sensitive to escape sequences. If Emacs sees
10414a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10415of the ISO2022 encodings, and decodes text by the corresponding coding
10416system (e.g. `iso-2022-7bit').
48b0f3ae
PJ
10417
10418However, there may be a case that you want to read escape sequences in
10419a file as is. In such a case, you can set this variable to non-nil.
97b1b294
EZ
10420Then the code detection will ignore any escape sequences, and no text is
10421detected as encoded in some ISO-2022 encoding. The result is that all
48b0f3ae
PJ
10422escape sequences become visible in a buffer.
10423
10424The default value is nil, and it is strongly recommended not to change
10425it. That is because many Emacs Lisp source files that contain
10426non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10427in Emacs's distribution, and they won't be decoded correctly on
10428reading if you suppress escape sequence detection.
10429
10430The other way to read escape sequences in a file without decoding is
97b1b294 10431to explicitly specify some coding system that doesn't use ISO-2022
48b0f3ae 10432escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 10433 inhibit_iso_escape_detection = 0;
002fdb44 10434
97b1b294
EZ
10435 DEFVAR_BOOL ("inhibit-null-byte-detection",
10436 &inhibit_null_byte_detection,
10437 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10438By default, Emacs treats it as binary data, and does not attempt to
10439decode it. The effect is as if you specified `no-conversion' for
10440reading that text.
10441
10442Set this to non-nil when a regular text happens to include null bytes.
10443Examples are Index nodes of Info files and null-byte delimited output
10444from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10445decode text as usual. */);
10446 inhibit_null_byte_detection = 0;
10447
002fdb44 10448 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1 10449 doc: /* Char table for translating self-inserting characters.
446dcd75 10450This is applied to the result of input methods, not their input.
8434d0b8
EZ
10451See also `keyboard-translate-table'.
10452
10453Use of this variable for character code unification was rendered
10454obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10455internal character representation. */);
002fdb44 10456 Vtranslation_table_for_input = Qnil;
8f924df7 10457
2c78b7e1
KH
10458 {
10459 Lisp_Object args[coding_arg_max];
8f924df7 10460 Lisp_Object plist[16];
2c78b7e1
KH
10461 int i;
10462
10463 for (i = 0; i < coding_arg_max; i++)
10464 args[i] = Qnil;
10465
10466 plist[0] = intern (":name");
10467 plist[1] = args[coding_arg_name] = Qno_conversion;
10468 plist[2] = intern (":mnemonic");
10469 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10470 plist[4] = intern (":coding-type");
10471 plist[5] = args[coding_arg_coding_type] = Qraw_text;
10472 plist[6] = intern (":ascii-compatible-p");
10473 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10474 plist[8] = intern (":default-char");
10475 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
10476 plist[10] = intern (":for-unibyte");
10477 plist[11] = args[coding_arg_for_unibyte] = Qt;
10478 plist[12] = intern (":docstring");
10479 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
10480\n\
10481When you visit a file with this coding, the file is read into a\n\
10482unibyte buffer as is, thus each byte of a file is treated as a\n\
10483character.");
8f924df7
KH
10484 plist[14] = intern (":eol-type");
10485 plist[15] = args[coding_arg_eol_type] = Qunix;
10486 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 10487 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
10488
10489 plist[1] = args[coding_arg_name] = Qundecided;
10490 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10491 plist[5] = args[coding_arg_coding_type] = Qundecided;
10492 /* This is already set.
35befdaa 10493 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
10494 plist[8] = intern (":charset-list");
10495 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10496 plist[11] = args[coding_arg_for_unibyte] = Qnil;
10497 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10498 plist[15] = args[coding_arg_eol_type] = Qnil;
10499 args[coding_arg_plist] = Flist (16, plist);
10500 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
10501 }
10502
2c78b7e1 10503 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
10504
10505 {
10506 int i;
10507
10508 for (i = 0; i < coding_category_max; i++)
10509 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10510 }
fcbcfb64
KH
10511#if defined (MSDOS) || defined (WINDOWSNT)
10512 system_eol_type = Qdos;
10513#else
10514 system_eol_type = Qunix;
10515#endif
10516 staticpro (&system_eol_type);
4ed46869
KH
10517}
10518
68c45bf0
PE
10519char *
10520emacs_strerror (error_number)
10521 int error_number;
10522{
10523 char *str;
10524
ca9c0567 10525 synchronize_system_messages_locale ();
68c45bf0
PE
10526 str = strerror (error_number);
10527
10528 if (! NILP (Vlocale_coding_system))
10529 {
10530 Lisp_Object dec = code_convert_string_norecord (build_string (str),
10531 Vlocale_coding_system,
10532 0);
d5db4077 10533 str = (char *) SDATA (dec);
68c45bf0
PE
10534 }
10535
10536 return str;
10537}
10538
4ed46869 10539#endif /* emacs */
9ffd559c
KH
10540
10541/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10542 (do not change this comment) */