(Ffind_operation_coding_system): Sync with HEAD.
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
aaef169d
TTN
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
ce03bf76
KH
4 Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8f924df7 7 Copyright (C) 2003
df7492f9
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
13GNU Emacs is free software; you can redistribute it and/or modify
14it under the terms of the GNU General Public License as published by
15the Free Software Foundation; either version 2, or (at your option)
16any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc
KH
23You should have received a copy of the GNU General Public License
24along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
25the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26Boston, MA 02110-1301, USA. */
4ed46869
KH
27
28/*** TABLE OF CONTENTS ***
29
b73bfc1c 30 0. General comments
4ed46869 31 1. Preamble
df7492f9
KH
32 2. Emacs' internal format (emacs-utf-8) handlers
33 3. UTF-8 handlers
34 4. UTF-16 handlers
35 5. Charset-base coding systems handlers
36 6. emacs-mule (old Emacs' internal format) handlers
37 7. ISO2022 handlers
38 8. Shift-JIS and BIG5 handlers
39 9. CCL handlers
40 10. C library functions
41 11. Emacs Lisp library functions
42 12. Postamble
4ed46869
KH
43
44*/
45
df7492f9 46/*** 0. General comments ***
b73bfc1c
KH
47
48
df7492f9 49CODING SYSTEM
4ed46869 50
5bad0796
DL
51 A coding system is an object for an encoding mechanism that contains
52 information about how to convert byte sequences to character
e19c3639
KH
53 sequences and vice versa. When we say "decode", it means converting
54 a byte sequence of a specific coding system into a character
55 sequence that is represented by Emacs' internal coding system
56 `emacs-utf-8', and when we say "encode", it means converting a
57 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 58 coding system.
4ed46869 59
e19c3639
KH
60 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
61 C level, a coding system is represented by a vector of attributes
5bad0796 62 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
63 coding system symbol to attributes vector is done by looking up
64 Vcharset_hash_table by the symbol.
4ed46869 65
e19c3639 66 Coding systems are classified into the following types depending on
5bad0796 67 the encoding mechanism. Here's a brief description of the types.
4ed46869 68
df7492f9
KH
69 o UTF-8
70
71 o UTF-16
72
73 o Charset-base coding system
74
75 A coding system defined by one or more (coded) character sets.
5bad0796 76 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
77 character set.
78
5bad0796 79 o Old Emacs internal format (emacs-mule)
df7492f9 80
5bad0796 81 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 82
df7492f9 83 o ISO2022-base coding system
4ed46869
KH
84
85 The most famous coding system for multiple character sets. X's
df7492f9
KH
86 Compound Text, various EUCs (Extended Unix Code), and coding systems
87 used in the Internet communication such as ISO-2022-JP are all
88 variants of ISO2022.
4ed46869 89
df7492f9 90 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 91
4ed46869
KH
92 A coding system to encode character sets: ASCII, JISX0201, and
93 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 94 section 8.
4ed46869 95
df7492f9 96 o BIG5
4ed46869 97
df7492f9 98 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 99 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
100 described in section 8. In this file, when we write "big5" (all
101 lowercase), we mean the coding system, and when we write "Big5"
102 (capitalized), we mean the character set.
4ed46869 103
df7492f9 104 o CCL
27901516 105
5bad0796 106 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
107 not listed above, he can supply a decoder and an encoder for it in
108 CCL (Code Conversion Language) programs. Emacs executes the CCL
109 program while decoding/encoding.
27901516 110
df7492f9 111 o Raw-text
4ed46869 112
5a936b46 113 A coding system for text containing raw eight-bit data. Emacs
5bad0796 114 treats each byte of source text as a character (except for
df7492f9 115 end-of-line conversion).
4ed46869 116
df7492f9
KH
117 o No-conversion
118
119 Like raw text, but don't do end-of-line conversion.
4ed46869 120
4ed46869 121
df7492f9 122END-OF-LINE FORMAT
4ed46869 123
5bad0796 124 How text end-of-line is encoded depends on operating system. For
df7492f9 125 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 126 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
127 `line-feed' codes. MacOS's format is usually one byte of
128 `carriage-return'.
4ed46869 129
cfb43547 130 Since text character encoding and end-of-line encoding are
df7492f9
KH
131 independent, any coding system described above can take any format
132 of end-of-line (except for no-conversion).
4ed46869 133
e19c3639
KH
134STRUCT CODING_SYSTEM
135
136 Before using a coding system for code conversion (i.e. decoding and
137 encoding), we setup a structure of type `struct coding_system'.
138 This structure keeps various information about a specific code
5bad0796 139 conversion (e.g. the location of source and destination data).
4ed46869
KH
140
141*/
142
df7492f9
KH
143/* COMMON MACROS */
144
145
4ed46869
KH
146/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
147
df7492f9 148 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
149 CODING conforms to the format of XXX, and update the members of
150 DETECT_INFO.
df7492f9 151
ff0dacd7 152 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
153
154 Below is the template of these functions. */
155
4ed46869 156#if 0
df7492f9 157static int
ff0dacd7 158detect_coding_XXX (coding, detect_info)
df7492f9 159 struct coding_system *coding;
ff0dacd7 160 struct coding_detection_info *detect_info;
4ed46869 161{
f1d34bca
MB
162 const unsigned char *src = coding->source;
163 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 164 int multibytep = coding->src_multibyte;
ff0dacd7 165 int consumed_chars = 0;
df7492f9
KH
166 int found = 0;
167 ...;
168
169 while (1)
170 {
171 /* Get one byte from the source. If the souce is exausted, jump
172 to no_more_source:. */
173 ONE_MORE_BYTE (c);
ff0dacd7
KH
174
175 if (! __C_conforms_to_XXX___ (c))
176 break;
177 if (! __C_strongly_suggests_XXX__ (c))
178 found = CATEGORY_MASK_XXX;
df7492f9 179 }
ff0dacd7
KH
180 /* The byte sequence is invalid for XXX. */
181 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 182 return 0;
ff0dacd7 183
df7492f9 184 no_more_source:
ff0dacd7
KH
185 /* The source exausted successfully. */
186 detect_info->found |= found;
df7492f9 187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
f1d34bca
MB
210 const unsigned char *src = coding->source + coding->consumed;
211 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
f1d34bca 215 const unsigned char *src_base;
df7492f9 216 /* A buffer to produce decoded characters. */
69a80ea3
KH
217 int *charbuf = coding->charbuf + coding->charbuf_used;
218 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
4ed46869 302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9 312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 313Lisp_Object Qbig, Qlittle;
bb0115a2 314Lisp_Object Qcoding_system_history;
1397dc18 315Lisp_Object Qvalid_codes;
a6f87d34
KH
316Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
317Lisp_Object QCdecode_translation_table, QCencode_translation_table;
318Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 319Lisp_Object QCascii_compatible_p;
4ed46869
KH
320
321extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 322Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
323Lisp_Object Qstart_process, Qopen_network_stream;
324Lisp_Object Qtarget_idx;
325
065e3595
KH
326Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
327Lisp_Object Qinterrupted, Qinsufficient_memory;
328
44e8490d
KH
329/* If a symbol has this property, evaluate the value to define the
330 symbol as a coding system. */
331static Lisp_Object Qcoding_system_define_form;
332
5d5bf4d8
KH
333int coding_system_require_warning;
334
d46c5b12
KH
335Lisp_Object Vselect_safe_coding_system_function;
336
7722baf9
EZ
337/* Mnemonic string for each format of end-of-line. */
338Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
339/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 340 decided. */
7722baf9 341Lisp_Object eol_mnemonic_undecided;
4ed46869 342
fcbcfb64
KH
343/* Format of end-of-line decided by system. This is Qunix on
344 Unix and Mac, Qdos on DOS/Windows.
345 This has an effect only for external encoding (i.e. for output to
346 file and process), not for in-buffer or Lisp string encoding. */
347static Lisp_Object system_eol_type;
348
4ed46869
KH
349#ifdef emacs
350
4608c386
KH
351Lisp_Object Vcoding_system_list, Vcoding_system_alist;
352
353Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 354
d46c5b12
KH
355/* Coding system emacs-mule and raw-text are for converting only
356 end-of-line format. */
357Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 358Lisp_Object Qutf_8_emacs;
ecf488bc 359
4ed46869
KH
360/* Coding-systems are handed between Emacs Lisp programs and C internal
361 routines by the following three variables. */
362/* Coding-system for reading files and receiving data from process. */
363Lisp_Object Vcoding_system_for_read;
364/* Coding-system for writing files and sending data to process. */
365Lisp_Object Vcoding_system_for_write;
366/* Coding-system actually used in the latest I/O. */
367Lisp_Object Vlast_coding_system_used;
065e3595
KH
368/* Set to non-nil when an error is detected while code conversion. */
369Lisp_Object Vlast_code_conversion_error;
c4825358 370/* A vector of length 256 which contains information about special
94487c4e 371 Latin codes (especially for dealing with Microsoft codes). */
3f003981 372Lisp_Object Vlatin_extra_code_table;
c4825358 373
9ce27fde
KH
374/* Flag to inhibit code conversion of end-of-line format. */
375int inhibit_eol_conversion;
376
74383408
KH
377/* Flag to inhibit ISO2022 escape sequence detection. */
378int inhibit_iso_escape_detection;
379
ed29121d
EZ
380/* Flag to make buffer-file-coding-system inherit from process-coding. */
381int inherit_process_coding_system;
382
c4825358 383/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
384struct coding_system terminal_coding;
385
c4825358
KH
386/* Coding system to be used to encode text for terminal display when
387 terminal coding system is nil. */
388struct coding_system safe_terminal_coding;
389
390/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
391struct coding_system keyboard_coding;
392
02ba4723
KH
393Lisp_Object Vfile_coding_system_alist;
394Lisp_Object Vprocess_coding_system_alist;
395Lisp_Object Vnetwork_coding_system_alist;
4ed46869 396
68c45bf0
PE
397Lisp_Object Vlocale_coding_system;
398
4ed46869
KH
399#endif /* emacs */
400
f967223b
KH
401/* Flag to tell if we look up translation table on character code
402 conversion. */
84fbb8a0 403Lisp_Object Venable_character_translation;
f967223b
KH
404/* Standard translation table to look up on decoding (reading). */
405Lisp_Object Vstandard_translation_table_for_decode;
406/* Standard translation table to look up on encoding (writing). */
407Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 408
f967223b
KH
409Lisp_Object Qtranslation_table;
410Lisp_Object Qtranslation_table_id;
411Lisp_Object Qtranslation_table_for_decode;
412Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
413
414/* Alist of charsets vs revision number. */
df7492f9 415static Lisp_Object Vcharset_revision_table;
4ed46869 416
02ba4723
KH
417/* Default coding systems used for process I/O. */
418Lisp_Object Vdefault_process_coding_system;
419
002fdb44
DL
420/* Char table for translating Quail and self-inserting input. */
421Lisp_Object Vtranslation_table_for_input;
422
df7492f9
KH
423/* Two special coding systems. */
424Lisp_Object Vsjis_coding_system;
425Lisp_Object Vbig5_coding_system;
426
df7492f9
KH
427/* ISO2022 section */
428
429#define CODING_ISO_INITIAL(coding, reg) \
430 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
431 coding_attr_iso_initial), \
432 reg)))
433
434
435#define CODING_ISO_REQUEST(coding, charset_id) \
436 ((charset_id <= (coding)->max_charset_id \
437 ? (coding)->safe_charsets[charset_id] \
438 : -1))
439
440
441#define CODING_ISO_FLAGS(coding) \
442 ((coding)->spec.iso_2022.flags)
443#define CODING_ISO_DESIGNATION(coding, reg) \
444 ((coding)->spec.iso_2022.current_designation[reg])
445#define CODING_ISO_INVOCATION(coding, plane) \
446 ((coding)->spec.iso_2022.current_invocation[plane])
447#define CODING_ISO_SINGLE_SHIFTING(coding) \
448 ((coding)->spec.iso_2022.single_shifting)
449#define CODING_ISO_BOL(coding) \
450 ((coding)->spec.iso_2022.bol)
451#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
452 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
453
454/* Control characters of ISO2022. */
455 /* code */ /* function */
456#define ISO_CODE_LF 0x0A /* line-feed */
457#define ISO_CODE_CR 0x0D /* carriage-return */
458#define ISO_CODE_SO 0x0E /* shift-out */
459#define ISO_CODE_SI 0x0F /* shift-in */
460#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
461#define ISO_CODE_ESC 0x1B /* escape */
462#define ISO_CODE_SS2 0x8E /* single-shift-2 */
463#define ISO_CODE_SS3 0x8F /* single-shift-3 */
464#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
465
466/* All code (1-byte) of ISO2022 is classified into one of the
467 followings. */
468enum iso_code_class_type
469 {
470 ISO_control_0, /* Control codes in the range
471 0x00..0x1F and 0x7F, except for the
472 following 5 codes. */
df7492f9
KH
473 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
474 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
475 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
476 ISO_escape, /* ISO_CODE_SO (0x1B) */
477 ISO_control_1, /* Control codes in the range
478 0x80..0x9F, except for the
479 following 3 codes. */
480 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
481 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
482 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
483 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
484 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
485 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
486 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
487 };
05e6f5dc 488
df7492f9
KH
489/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
490 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 491
df7492f9
KH
492/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
493 instead of the correct short-form sequence (e.g. ESC $ A). */
494#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 495
df7492f9
KH
496/* If set, reset graphic planes and registers at end-of-line to the
497 initial state. */
498#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 499
df7492f9
KH
500/* If set, reset graphic planes and registers before any control
501 characters to the initial state. */
502#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 503
df7492f9
KH
504/* If set, encode by 7-bit environment. */
505#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 506
df7492f9
KH
507/* If set, use locking-shift function. */
508#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 509
df7492f9
KH
510/* If set, use single-shift function. Overwrite
511 CODING_ISO_FLAG_LOCKING_SHIFT. */
512#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 513
df7492f9
KH
514/* If set, use designation escape sequence. */
515#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 516
df7492f9
KH
517/* If set, produce revision number sequence. */
518#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 519
df7492f9
KH
520/* If set, produce ISO6429's direction specifying sequence. */
521#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 522
df7492f9
KH
523/* If set, assume designation states are reset at beginning of line on
524 output. */
525#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 526
df7492f9
KH
527/* If set, designation sequence should be placed at beginning of line
528 on output. */
529#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 530
df7492f9
KH
531/* If set, do not encode unsafe charactes on output. */
532#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 533
df7492f9
KH
534/* If set, extra latin codes (128..159) are accepted as a valid code
535 on input. */
536#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 537
df7492f9 538#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 539
df7492f9 540#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 541
bf16eb23 542#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 543
bf16eb23 544#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 545
bf16eb23 546#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 547
df7492f9
KH
548/* A character to be produced on output if encoding of the original
549 character is prohibited by CODING_ISO_FLAG_SAFE. */
550#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 551
4ed46869 552
df7492f9
KH
553/* UTF-16 section */
554#define CODING_UTF_16_BOM(coding) \
555 ((coding)->spec.utf_16.bom)
4ed46869 556
df7492f9
KH
557#define CODING_UTF_16_ENDIAN(coding) \
558 ((coding)->spec.utf_16.endian)
4ed46869 559
df7492f9
KH
560#define CODING_UTF_16_SURROGATE(coding) \
561 ((coding)->spec.utf_16.surrogate)
4ed46869 562
4ed46869 563
df7492f9
KH
564/* CCL section */
565#define CODING_CCL_DECODER(coding) \
566 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
567#define CODING_CCL_ENCODER(coding) \
568 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
569#define CODING_CCL_VALIDS(coding) \
8f924df7 570 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 571
5a936b46 572/* Index for each coding category in `coding_categories' */
4ed46869 573
df7492f9
KH
574enum coding_category
575 {
576 coding_category_iso_7,
577 coding_category_iso_7_tight,
578 coding_category_iso_8_1,
579 coding_category_iso_8_2,
580 coding_category_iso_7_else,
581 coding_category_iso_8_else,
582 coding_category_utf_8,
583 coding_category_utf_16_auto,
584 coding_category_utf_16_be,
585 coding_category_utf_16_le,
586 coding_category_utf_16_be_nosig,
587 coding_category_utf_16_le_nosig,
588 coding_category_charset,
589 coding_category_sjis,
590 coding_category_big5,
591 coding_category_ccl,
592 coding_category_emacs_mule,
593 /* All above are targets of code detection. */
594 coding_category_raw_text,
595 coding_category_undecided,
596 coding_category_max
597 };
598
599/* Definitions of flag bits used in detect_coding_XXXX. */
600#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
601#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
602#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
603#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
604#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
605#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
606#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 607#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
608#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
609#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
610#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
611#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
612#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
613#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
614#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
615#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
616#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 617#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
618
619/* This value is returned if detect_coding_mask () find nothing other
620 than ASCII characters. */
621#define CATEGORY_MASK_ANY \
622 (CATEGORY_MASK_ISO_7 \
623 | CATEGORY_MASK_ISO_7_TIGHT \
624 | CATEGORY_MASK_ISO_8_1 \
625 | CATEGORY_MASK_ISO_8_2 \
626 | CATEGORY_MASK_ISO_7_ELSE \
627 | CATEGORY_MASK_ISO_8_ELSE \
628 | CATEGORY_MASK_UTF_8 \
629 | CATEGORY_MASK_UTF_16_BE \
630 | CATEGORY_MASK_UTF_16_LE \
631 | CATEGORY_MASK_UTF_16_BE_NOSIG \
632 | CATEGORY_MASK_UTF_16_LE_NOSIG \
633 | CATEGORY_MASK_CHARSET \
634 | CATEGORY_MASK_SJIS \
635 | CATEGORY_MASK_BIG5 \
636 | CATEGORY_MASK_CCL \
637 | CATEGORY_MASK_EMACS_MULE)
638
639
640#define CATEGORY_MASK_ISO_7BIT \
641 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
642
643#define CATEGORY_MASK_ISO_8BIT \
644 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
645
646#define CATEGORY_MASK_ISO_ELSE \
647 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
648
649#define CATEGORY_MASK_ISO_ESCAPE \
650 (CATEGORY_MASK_ISO_7 \
651 | CATEGORY_MASK_ISO_7_TIGHT \
652 | CATEGORY_MASK_ISO_7_ELSE \
653 | CATEGORY_MASK_ISO_8_ELSE)
654
655#define CATEGORY_MASK_ISO \
656 ( CATEGORY_MASK_ISO_7BIT \
657 | CATEGORY_MASK_ISO_8BIT \
658 | CATEGORY_MASK_ISO_ELSE)
659
660#define CATEGORY_MASK_UTF_16 \
661 (CATEGORY_MASK_UTF_16_BE \
662 | CATEGORY_MASK_UTF_16_LE \
663 | CATEGORY_MASK_UTF_16_BE_NOSIG \
664 | CATEGORY_MASK_UTF_16_LE_NOSIG)
665
666
667/* List of symbols `coding-category-xxx' ordered by priority. This
668 variable is exposed to Emacs Lisp. */
669static Lisp_Object Vcoding_category_list;
670
671/* Table of coding categories (Lisp symbols). This variable is for
672 internal use oly. */
673static Lisp_Object Vcoding_category_table;
674
675/* Table of coding-categories ordered by priority. */
676static enum coding_category coding_priorities[coding_category_max];
677
678/* Nth element is a coding context for the coding system bound to the
679 Nth coding category. */
680static struct coding_system coding_categories[coding_category_max];
681
df7492f9
KH
682/*** Commonly used macros and functions ***/
683
684#ifndef min
685#define min(a, b) ((a) < (b) ? (a) : (b))
686#endif
687#ifndef max
688#define max(a, b) ((a) > (b) ? (a) : (b))
689#endif
4ed46869 690
24a73b0a
KH
691#define CODING_GET_INFO(coding, attrs, charset_list) \
692 do { \
693 (attrs) = CODING_ID_ATTRS ((coding)->id); \
694 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 695 } while (0)
4ed46869 696
4ed46869 697
df7492f9
KH
698/* Safely get one byte from the source text pointed by SRC which ends
699 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
700 in the source, it jumps to `no_more_source'. If multibytep is
701 nonzero, and a multibyte character is found at SRC, set C to the
702 negative value of the character code. The caller should declare
703 and set these variables appropriately in advance:
704 src, src_end, multibytep */
aa72b389 705
065e3595
KH
706#define ONE_MORE_BYTE(c) \
707 do { \
708 if (src == src_end) \
709 { \
710 if (src_base < src) \
711 record_conversion_result \
712 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
713 goto no_more_source; \
714 } \
715 c = *src++; \
716 if (multibytep && (c & 0x80)) \
717 { \
718 if ((c & 0xFE) == 0xC0) \
719 c = ((c & 1) << 6) | *src++; \
720 else \
721 { \
35befdaa
KH
722 src--; \
723 c = - string_char (src, &src, NULL); \
065e3595
KH
724 record_conversion_result \
725 (coding, CODING_RESULT_INVALID_SRC); \
726 } \
727 } \
728 consumed_chars++; \
aa72b389
KH
729 } while (0)
730
aa72b389 731
065e3595
KH
732#define ONE_MORE_BYTE_NO_CHECK(c) \
733 do { \
734 c = *src++; \
735 if (multibytep && (c & 0x80)) \
736 { \
737 if ((c & 0xFE) == 0xC0) \
738 c = ((c & 1) << 6) | *src++; \
739 else \
740 { \
35befdaa
KH
741 src--; \
742 c = - string_char (src, &src, NULL); \
065e3595
KH
743 record_conversion_result \
744 (coding, CODING_RESULT_INVALID_SRC); \
745 } \
746 } \
747 consumed_chars++; \
aa72b389
KH
748 } while (0)
749
aa72b389 750
df7492f9
KH
751/* Store a byte C in the place pointed by DST and increment DST to the
752 next free point, and increment PRODUCED_CHARS. The caller should
753 assure that C is 0..127, and declare and set the variable `dst'
754 appropriately in advance.
755*/
aa72b389
KH
756
757
df7492f9
KH
758#define EMIT_ONE_ASCII_BYTE(c) \
759 do { \
760 produced_chars++; \
761 *dst++ = (c); \
b6871cc7 762 } while (0)
aa72b389
KH
763
764
df7492f9 765/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 766
df7492f9
KH
767#define EMIT_TWO_ASCII_BYTES(c1, c2) \
768 do { \
769 produced_chars += 2; \
770 *dst++ = (c1), *dst++ = (c2); \
771 } while (0)
aa72b389
KH
772
773
df7492f9
KH
774/* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
776 nonzero, store in an appropriate multibyte from. The caller should
777 declare and set the variables `dst' and `multibytep' appropriately
778 in advance. */
779
780#define EMIT_ONE_BYTE(c) \
781 do { \
782 produced_chars++; \
783 if (multibytep) \
784 { \
785 int ch = (c); \
786 if (ch >= 0x80) \
787 ch = BYTE8_TO_CHAR (ch); \
788 CHAR_STRING_ADVANCE (ch, dst); \
789 } \
790 else \
791 *dst++ = (c); \
aa72b389 792 } while (0)
aa72b389 793
aa72b389 794
df7492f9 795/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 796
e19c3639
KH
797#define EMIT_TWO_BYTES(c1, c2) \
798 do { \
799 produced_chars += 2; \
800 if (multibytep) \
801 { \
802 int ch; \
803 \
804 ch = (c1); \
805 if (ch >= 0x80) \
806 ch = BYTE8_TO_CHAR (ch); \
807 CHAR_STRING_ADVANCE (ch, dst); \
808 ch = (c2); \
809 if (ch >= 0x80) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
812 } \
813 else \
814 { \
815 *dst++ = (c1); \
816 *dst++ = (c2); \
817 } \
aa72b389
KH
818 } while (0)
819
820
df7492f9
KH
821#define EMIT_THREE_BYTES(c1, c2, c3) \
822 do { \
823 EMIT_ONE_BYTE (c1); \
824 EMIT_TWO_BYTES (c2, c3); \
825 } while (0)
aa72b389 826
aa72b389 827
df7492f9
KH
828#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
829 do { \
830 EMIT_TWO_BYTES (c1, c2); \
831 EMIT_TWO_BYTES (c3, c4); \
832 } while (0)
aa72b389 833
aa72b389 834
f6cbaf43
KH
835/* Prototypes for static functions. */
836static void record_conversion_result P_ ((struct coding_system *coding,
837 enum coding_result_code result));
838static int detect_coding_utf_8 P_ ((struct coding_system *,
839 struct coding_detection_info *info));
840static void decode_coding_utf_8 P_ ((struct coding_system *));
841static int encode_coding_utf_8 P_ ((struct coding_system *));
842
843static int detect_coding_utf_16 P_ ((struct coding_system *,
844 struct coding_detection_info *info));
845static void decode_coding_utf_16 P_ ((struct coding_system *));
846static int encode_coding_utf_16 P_ ((struct coding_system *));
847
848static int detect_coding_iso_2022 P_ ((struct coding_system *,
849 struct coding_detection_info *info));
850static void decode_coding_iso_2022 P_ ((struct coding_system *));
851static int encode_coding_iso_2022 P_ ((struct coding_system *));
852
853static int detect_coding_emacs_mule P_ ((struct coding_system *,
854 struct coding_detection_info *info));
855static void decode_coding_emacs_mule P_ ((struct coding_system *));
856static int encode_coding_emacs_mule P_ ((struct coding_system *));
857
858static int detect_coding_sjis P_ ((struct coding_system *,
859 struct coding_detection_info *info));
860static void decode_coding_sjis P_ ((struct coding_system *));
861static int encode_coding_sjis P_ ((struct coding_system *));
862
863static int detect_coding_big5 P_ ((struct coding_system *,
864 struct coding_detection_info *info));
865static void decode_coding_big5 P_ ((struct coding_system *));
866static int encode_coding_big5 P_ ((struct coding_system *));
867
868static int detect_coding_ccl P_ ((struct coding_system *,
869 struct coding_detection_info *info));
870static void decode_coding_ccl P_ ((struct coding_system *));
871static int encode_coding_ccl P_ ((struct coding_system *));
872
873static void decode_coding_raw_text P_ ((struct coding_system *));
874static int encode_coding_raw_text P_ ((struct coding_system *));
875
876static void coding_set_source P_ ((struct coding_system *));
877static void coding_set_destination P_ ((struct coding_system *));
878static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
879static void coding_alloc_by_making_gap P_ ((struct coding_system *,
880 EMACS_INT));
881static unsigned char *alloc_destination P_ ((struct coding_system *,
882 EMACS_INT, unsigned char *));
883static void setup_iso_safe_charsets P_ ((Lisp_Object));
884static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
885 int *, int *,
886 unsigned char *));
887static int detect_eol P_ ((const unsigned char *,
888 EMACS_INT, enum coding_category));
889static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
890static void decode_eol P_ ((struct coding_system *));
891static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
892static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
893 int, int *, int *));
894static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
895static INLINE void produce_composition P_ ((struct coding_system *, int *,
896 EMACS_INT));
897static INLINE void produce_charset P_ ((struct coding_system *, int *,
898 EMACS_INT));
899static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
900static int decode_coding P_ ((struct coding_system *));
901static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
902 struct coding_system *,
903 int *, EMACS_INT *));
904static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
905 struct coding_system *,
906 int *, EMACS_INT *));
907static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
908static int encode_coding P_ ((struct coding_system *));
909static Lisp_Object make_conversion_work_buffer P_ ((int));
910static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
911static INLINE int char_encodable_p P_ ((int, Lisp_Object));
912static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
913
065e3595
KH
914static void
915record_conversion_result (struct coding_system *coding,
916 enum coding_result_code result)
917{
918 coding->result = result;
919 switch (result)
920 {
921 case CODING_RESULT_INSUFFICIENT_SRC:
922 Vlast_code_conversion_error = Qinsufficient_source;
923 break;
924 case CODING_RESULT_INCONSISTENT_EOL:
925 Vlast_code_conversion_error = Qinconsistent_eol;
926 break;
927 case CODING_RESULT_INVALID_SRC:
928 Vlast_code_conversion_error = Qinvalid_source;
929 break;
930 case CODING_RESULT_INTERRUPT:
931 Vlast_code_conversion_error = Qinterrupted;
932 break;
933 case CODING_RESULT_INSUFFICIENT_MEM:
934 Vlast_code_conversion_error = Qinsufficient_memory;
935 break;
35befdaa
KH
936 default:
937 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
938 }
939}
940
df7492f9
KH
941#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
942 do { \
943 charset_map_loaded = 0; \
944 c = DECODE_CHAR (charset, code); \
945 if (charset_map_loaded) \
946 { \
8f924df7 947 const unsigned char *orig = coding->source; \
df7492f9
KH
948 EMACS_INT offset; \
949 \
950 coding_set_source (coding); \
951 offset = coding->source - orig; \
952 src += offset; \
953 src_base += offset; \
954 src_end += offset; \
955 } \
aa72b389
KH
956 } while (0)
957
958
df7492f9
KH
959#define ASSURE_DESTINATION(bytes) \
960 do { \
961 if (dst + (bytes) >= dst_end) \
962 { \
963 int more_bytes = charbuf_end - charbuf + (bytes); \
964 \
965 dst = alloc_destination (coding, more_bytes, dst); \
966 dst_end = coding->destination + coding->dst_bytes; \
967 } \
968 } while (0)
aa72b389 969
aa72b389 970
aa72b389 971
df7492f9
KH
972static void
973coding_set_source (coding)
aa72b389 974 struct coding_system *coding;
aa72b389 975{
df7492f9
KH
976 if (BUFFERP (coding->src_object))
977 {
2cb26057 978 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 979
df7492f9 980 if (coding->src_pos < 0)
2cb26057 981 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 982 else
2cb26057 983 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 984 }
df7492f9 985 else if (STRINGP (coding->src_object))
aa72b389 986 {
8f924df7 987 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 988 }
df7492f9
KH
989 else
990 /* Otherwise, the source is C string and is never relocated
991 automatically. Thus we don't have to update anything. */
992 ;
993}
aa72b389 994
df7492f9
KH
995static void
996coding_set_destination (coding)
997 struct coding_system *coding;
998{
999 if (BUFFERP (coding->dst_object))
aa72b389 1000 {
df7492f9 1001 if (coding->src_pos < 0)
aa72b389 1002 {
28f67a95
KH
1003 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
1004 coding->dst_bytes = (GAP_END_ADDR
1005 - (coding->src_bytes - coding->consumed)
1006 - coding->destination);
aa72b389 1007 }
df7492f9 1008 else
28f67a95
KH
1009 {
1010 /* We are sure that coding->dst_pos_byte is before the gap
1011 of the buffer. */
1012 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1013 + coding->dst_pos_byte - 1);
1014 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1015 - coding->destination);
1016 }
df7492f9
KH
1017 }
1018 else
1019 /* Otherwise, the destination is C string and is never relocated
1020 automatically. Thus we don't have to update anything. */
1021 ;
1022}
1023
1024
1025static void
1026coding_alloc_by_realloc (coding, bytes)
1027 struct coding_system *coding;
1028 EMACS_INT bytes;
1029{
1030 coding->destination = (unsigned char *) xrealloc (coding->destination,
1031 coding->dst_bytes + bytes);
1032 coding->dst_bytes += bytes;
1033}
1034
1035static void
1036coding_alloc_by_making_gap (coding, bytes)
1037 struct coding_system *coding;
1038 EMACS_INT bytes;
1039{
2c78b7e1
KH
1040 if (BUFFERP (coding->dst_object)
1041 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
1042 {
1043 EMACS_INT add = coding->src_bytes - coding->consumed;
1044
1045 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1046 make_gap (bytes);
1047 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1048 }
730fff51 1049 else
df7492f9 1050 {
2c78b7e1
KH
1051 Lisp_Object this_buffer;
1052
1053 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1054 set_buffer_internal (XBUFFER (coding->dst_object));
1055 make_gap (bytes);
1056 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1057 }
df7492f9 1058}
8f924df7 1059
df7492f9
KH
1060
1061static unsigned char *
1062alloc_destination (coding, nbytes, dst)
1063 struct coding_system *coding;
3e139625 1064 EMACS_INT nbytes;
df7492f9
KH
1065 unsigned char *dst;
1066{
1067 EMACS_INT offset = dst - coding->destination;
1068
1069 if (BUFFERP (coding->dst_object))
1070 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 1071 else
df7492f9 1072 coding_alloc_by_realloc (coding, nbytes);
065e3595 1073 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1074 coding_set_destination (coding);
1075 dst = coding->destination + offset;
1076 return dst;
1077}
aa72b389 1078
ff0dacd7
KH
1079/** Macros for annotations. */
1080
1081/* Maximum length of annotation data (sum of annotations for
1082 composition and charset). */
69a80ea3 1083#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1084
1085/* An annotation data is stored in the array coding->charbuf in this
1086 format:
69a80ea3 1087 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1088 LENGTH is the number of elements in the annotation.
1089 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1090 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1091
1092 The format of the following elements depend on ANNOTATION_MASK.
1093
1094 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1095 follows:
1096 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1097 METHOD is one of enum composition_method.
1098 Optionnal COMPOSITION-COMPONENTS are characters and composition
1099 rules.
1100
1101 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1102 follows. */
1103
69a80ea3 1104#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1105 do { \
1106 *(buf)++ = -(len); \
1107 *(buf)++ = (mask); \
69a80ea3 1108 *(buf)++ = (nchars); \
ff0dacd7
KH
1109 coding->annotated = 1; \
1110 } while (0);
1111
69a80ea3
KH
1112#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1113 do { \
1114 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1115 *buf++ = method; \
ff0dacd7
KH
1116 } while (0)
1117
1118
69a80ea3
KH
1119#define ADD_CHARSET_DATA(buf, nchars, id) \
1120 do { \
1121 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1122 *buf++ = id; \
ff0dacd7
KH
1123 } while (0)
1124
df7492f9
KH
1125\f
1126/*** 2. Emacs' internal format (emacs-utf-8) ***/
1127
1128
1129
1130\f
1131/*** 3. UTF-8 ***/
1132
1133/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1134 Check if a text is encoded in UTF-8. If it is, return 1, else
1135 return 0. */
df7492f9
KH
1136
1137#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1138#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1139#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1140#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1141#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1142#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1143
1144static int
ff0dacd7 1145detect_coding_utf_8 (coding, detect_info)
df7492f9 1146 struct coding_system *coding;
ff0dacd7 1147 struct coding_detection_info *detect_info;
df7492f9 1148{
065e3595 1149 const unsigned char *src = coding->source, *src_base;
8f924df7 1150 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1151 int multibytep = coding->src_multibyte;
1152 int consumed_chars = 0;
1153 int found = 0;
1154
ff0dacd7 1155 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1156 /* A coding system of this category is always ASCII compatible. */
1157 src += coding->head_ascii;
1158
1159 while (1)
aa72b389 1160 {
df7492f9 1161 int c, c1, c2, c3, c4;
aa72b389 1162
065e3595 1163 src_base = src;
df7492f9 1164 ONE_MORE_BYTE (c);
065e3595 1165 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1166 continue;
1167 ONE_MORE_BYTE (c1);
065e3595 1168 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1169 break;
1170 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1171 {
ff0dacd7 1172 found = CATEGORY_MASK_UTF_8;
df7492f9 1173 continue;
aa72b389 1174 }
df7492f9 1175 ONE_MORE_BYTE (c2);
065e3595 1176 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1177 break;
1178 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1179 {
ff0dacd7 1180 found = CATEGORY_MASK_UTF_8;
df7492f9 1181 continue;
aa72b389 1182 }
df7492f9 1183 ONE_MORE_BYTE (c3);
065e3595 1184 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1185 break;
1186 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1187 {
ff0dacd7 1188 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1189 continue;
1190 }
1191 ONE_MORE_BYTE (c4);
065e3595 1192 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1193 break;
1194 if (UTF_8_5_OCTET_LEADING_P (c))
1195 {
ff0dacd7 1196 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1197 continue;
1198 }
1199 break;
aa72b389 1200 }
ff0dacd7 1201 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1202 return 0;
aa72b389 1203
df7492f9 1204 no_more_source:
065e3595 1205 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1206 {
ff0dacd7 1207 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1208 return 0;
aa72b389 1209 }
ff0dacd7
KH
1210 detect_info->found |= found;
1211 return 1;
aa72b389
KH
1212}
1213
4ed46869 1214
b73bfc1c 1215static void
df7492f9 1216decode_coding_utf_8 (coding)
b73bfc1c 1217 struct coding_system *coding;
b73bfc1c 1218{
8f924df7
KH
1219 const unsigned char *src = coding->source + coding->consumed;
1220 const unsigned char *src_end = coding->source + coding->src_bytes;
1221 const unsigned char *src_base;
69a80ea3
KH
1222 int *charbuf = coding->charbuf + coding->charbuf_used;
1223 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1224 int consumed_chars = 0, consumed_chars_base;
1225 int multibytep = coding->src_multibyte;
24a73b0a 1226 Lisp_Object attr, charset_list;
4ed46869 1227
24a73b0a 1228 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1229
1230 while (1)
b73bfc1c 1231 {
df7492f9 1232 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1233
df7492f9
KH
1234 src_base = src;
1235 consumed_chars_base = consumed_chars;
4af310db 1236
df7492f9
KH
1237 if (charbuf >= charbuf_end)
1238 break;
1239
1240 ONE_MORE_BYTE (c1);
065e3595
KH
1241 if (c1 < 0)
1242 {
1243 c = - c1;
1244 }
1245 else if (UTF_8_1_OCTET_P(c1))
df7492f9
KH
1246 {
1247 c = c1;
4af310db 1248 }
df7492f9 1249 else
4af310db 1250 {
df7492f9 1251 ONE_MORE_BYTE (c2);
065e3595 1252 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1253 goto invalid_code;
1254 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1255 {
b0edb2c5
DL
1256 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1257 /* Reject overlong sequences here and below. Encoders
1258 producing them are incorrect, they can be misleading,
1259 and they mess up read/write invariance. */
1260 if (c < 128)
1261 goto invalid_code;
4af310db 1262 }
df7492f9 1263 else
aa72b389 1264 {
df7492f9 1265 ONE_MORE_BYTE (c3);
065e3595 1266 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1267 goto invalid_code;
1268 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1269 {
1270 c = (((c1 & 0xF) << 12)
1271 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1272 if (c < 0x800
1273 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1274 goto invalid_code;
1275 }
df7492f9
KH
1276 else
1277 {
1278 ONE_MORE_BYTE (c4);
065e3595 1279 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1280 goto invalid_code;
1281 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1282 {
df7492f9
KH
1283 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1284 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1285 if (c < 0x10000)
1286 goto invalid_code;
1287 }
df7492f9
KH
1288 else
1289 {
1290 ONE_MORE_BYTE (c5);
065e3595 1291 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1292 goto invalid_code;
1293 if (UTF_8_5_OCTET_LEADING_P (c1))
1294 {
1295 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1296 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1297 | (c5 & 0x3F));
b0edb2c5 1298 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1299 goto invalid_code;
1300 }
1301 else
1302 goto invalid_code;
1303 }
1304 }
aa72b389 1305 }
b73bfc1c 1306 }
df7492f9
KH
1307
1308 *charbuf++ = c;
1309 continue;
1310
1311 invalid_code:
1312 src = src_base;
1313 consumed_chars = consumed_chars_base;
1314 ONE_MORE_BYTE (c);
1315 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1316 coding->errors++;
aa72b389
KH
1317 }
1318
df7492f9
KH
1319 no_more_source:
1320 coding->consumed_char += consumed_chars_base;
1321 coding->consumed = src_base - coding->source;
1322 coding->charbuf_used = charbuf - coding->charbuf;
1323}
1324
1325
1326static int
1327encode_coding_utf_8 (coding)
1328 struct coding_system *coding;
1329{
1330 int multibytep = coding->dst_multibyte;
1331 int *charbuf = coding->charbuf;
1332 int *charbuf_end = charbuf + coding->charbuf_used;
1333 unsigned char *dst = coding->destination + coding->produced;
1334 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1335 int produced_chars = 0;
df7492f9
KH
1336 int c;
1337
1338 if (multibytep)
aa72b389 1339 {
df7492f9
KH
1340 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1341
1342 while (charbuf < charbuf_end)
b73bfc1c 1343 {
df7492f9 1344 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1345
df7492f9
KH
1346 ASSURE_DESTINATION (safe_room);
1347 c = *charbuf++;
28f67a95
KH
1348 if (CHAR_BYTE8_P (c))
1349 {
1350 c = CHAR_TO_BYTE8 (c);
1351 EMIT_ONE_BYTE (c);
1352 }
1353 else
1354 {
1355 CHAR_STRING_ADVANCE (c, pend);
1356 for (p = str; p < pend; p++)
1357 EMIT_ONE_BYTE (*p);
1358 }
b73bfc1c 1359 }
aa72b389 1360 }
df7492f9
KH
1361 else
1362 {
1363 int safe_room = MAX_MULTIBYTE_LENGTH;
1364
1365 while (charbuf < charbuf_end)
b73bfc1c 1366 {
df7492f9
KH
1367 ASSURE_DESTINATION (safe_room);
1368 c = *charbuf++;
f03caae0
KH
1369 if (CHAR_BYTE8_P (c))
1370 *dst++ = CHAR_TO_BYTE8 (c);
1371 else
1372 dst += CHAR_STRING (c, dst);
df7492f9 1373 produced_chars++;
4ed46869
KH
1374 }
1375 }
065e3595 1376 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1377 coding->produced_char += produced_chars;
1378 coding->produced = dst - coding->destination;
1379 return 0;
4ed46869
KH
1380}
1381
b73bfc1c 1382
df7492f9 1383/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1384 Check if a text is encoded in one of UTF-16 based coding systems.
1385 If it is, return 1, else return 0. */
aa72b389 1386
df7492f9
KH
1387#define UTF_16_HIGH_SURROGATE_P(val) \
1388 (((val) & 0xFC00) == 0xD800)
1389
1390#define UTF_16_LOW_SURROGATE_P(val) \
1391 (((val) & 0xFC00) == 0xDC00)
93dec019 1392
df7492f9
KH
1393#define UTF_16_INVALID_P(val) \
1394 (((val) == 0xFFFE) \
1395 || ((val) == 0xFFFF) \
1396 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1397
aa72b389 1398
df7492f9 1399static int
ff0dacd7 1400detect_coding_utf_16 (coding, detect_info)
aa72b389 1401 struct coding_system *coding;
ff0dacd7 1402 struct coding_detection_info *detect_info;
aa72b389 1403{
8f924df7
KH
1404 const unsigned char *src = coding->source, *src_base = src;
1405 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1406 int multibytep = coding->src_multibyte;
1407 int consumed_chars = 0;
1408 int c1, c2;
aa72b389 1409
ff0dacd7 1410 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1411 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1412 && (coding->src_chars & 1))
ff0dacd7
KH
1413 {
1414 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1415 return 0;
1416 }
24a73b0a 1417
df7492f9
KH
1418 ONE_MORE_BYTE (c1);
1419 ONE_MORE_BYTE (c2);
df7492f9 1420 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1421 {
b49a1807
KH
1422 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1423 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1424 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1425 | CATEGORY_MASK_UTF_16_BE_NOSIG
1426 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1427 }
df7492f9 1428 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1429 {
b49a1807
KH
1430 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1431 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1432 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1433 | CATEGORY_MASK_UTF_16_BE_NOSIG
1434 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1435 }
065e3595 1436 else if (c1 >= 0 && c2 >= 0)
24a73b0a 1437 {
24a73b0a
KH
1438 detect_info->rejected
1439 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1440 }
df7492f9 1441 no_more_source:
ff0dacd7 1442 return 1;
df7492f9 1443}
aa72b389 1444
df7492f9
KH
1445static void
1446decode_coding_utf_16 (coding)
1447 struct coding_system *coding;
1448{
8f924df7
KH
1449 const unsigned char *src = coding->source + coding->consumed;
1450 const unsigned char *src_end = coding->source + coding->src_bytes;
1451 const unsigned char *src_base;
69a80ea3
KH
1452 int *charbuf = coding->charbuf + coding->charbuf_used;
1453 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1454 int consumed_chars = 0, consumed_chars_base;
1455 int multibytep = coding->src_multibyte;
1456 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1457 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1458 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1459 Lisp_Object attr, charset_list;
df7492f9 1460
24a73b0a 1461 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1462
b49a1807 1463 if (bom == utf_16_with_bom)
aa72b389 1464 {
df7492f9 1465 int c, c1, c2;
4af310db 1466
aa72b389 1467 src_base = src;
df7492f9
KH
1468 ONE_MORE_BYTE (c1);
1469 ONE_MORE_BYTE (c2);
e19c3639 1470 c = (c1 << 8) | c2;
aa72b389 1471
b49a1807
KH
1472 if (endian == utf_16_big_endian
1473 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1474 {
b49a1807
KH
1475 /* The first two bytes are not BOM. Treat them as bytes
1476 for a normal character. */
1477 src = src_base;
1478 coding->errors++;
aa72b389 1479 }
b49a1807
KH
1480 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1481 }
1482 else if (bom == utf_16_detect_bom)
1483 {
1484 /* We have already tried to detect BOM and failed in
1485 detect_coding. */
1486 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1487 }
aa72b389 1488
df7492f9
KH
1489 while (1)
1490 {
1491 int c, c1, c2;
1492
1493 src_base = src;
1494 consumed_chars_base = consumed_chars;
1495
1496 if (charbuf + 2 >= charbuf_end)
1497 break;
1498
1499 ONE_MORE_BYTE (c1);
065e3595
KH
1500 if (c1 < 0)
1501 {
1502 *charbuf++ = -c1;
1503 continue;
1504 }
df7492f9 1505 ONE_MORE_BYTE (c2);
065e3595
KH
1506 if (c2 < 0)
1507 {
1508 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1509 *charbuf++ = -c2;
1510 continue;
1511 }
df7492f9 1512 c = (endian == utf_16_big_endian
e19c3639 1513 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1514 if (surrogate)
fd3ae0b9 1515 {
df7492f9 1516 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1517 {
df7492f9
KH
1518 if (endian == utf_16_big_endian)
1519 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1520 else
1521 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1522 *charbuf++ = c1;
1523 *charbuf++ = c2;
1524 coding->errors++;
1525 if (UTF_16_HIGH_SURROGATE_P (c))
1526 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1527 else
df7492f9 1528 *charbuf++ = c;
fd3ae0b9
KH
1529 }
1530 else
df7492f9
KH
1531 {
1532 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1533 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1534 *charbuf++ = 0x10000 + c;
df7492f9 1535 }
fd3ae0b9 1536 }
aa72b389 1537 else
df7492f9
KH
1538 {
1539 if (UTF_16_HIGH_SURROGATE_P (c))
1540 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1541 else
1542 *charbuf++ = c;
8f924df7 1543 }
aa72b389 1544 }
df7492f9
KH
1545
1546 no_more_source:
1547 coding->consumed_char += consumed_chars_base;
1548 coding->consumed = src_base - coding->source;
1549 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1550}
b73bfc1c 1551
df7492f9
KH
1552static int
1553encode_coding_utf_16 (coding)
1554 struct coding_system *coding;
1555{
1556 int multibytep = coding->dst_multibyte;
1557 int *charbuf = coding->charbuf;
1558 int *charbuf_end = charbuf + coding->charbuf_used;
1559 unsigned char *dst = coding->destination + coding->produced;
1560 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1561 int safe_room = 8;
1562 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1563 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1564 int produced_chars = 0;
24a73b0a 1565 Lisp_Object attrs, charset_list;
df7492f9 1566 int c;
4ed46869 1567
24a73b0a 1568 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1569
b49a1807 1570 if (bom != utf_16_without_bom)
df7492f9
KH
1571 {
1572 ASSURE_DESTINATION (safe_room);
1573 if (big_endian)
df7492f9 1574 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1575 else
1576 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1577 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1578 }
1579
1580 while (charbuf < charbuf_end)
1581 {
1582 ASSURE_DESTINATION (safe_room);
1583 c = *charbuf++;
e19c3639
KH
1584 if (c >= MAX_UNICODE_CHAR)
1585 c = coding->default_char;
df7492f9
KH
1586
1587 if (c < 0x10000)
1588 {
1589 if (big_endian)
1590 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1591 else
1592 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1593 }
1594 else
1595 {
1596 int c1, c2;
1597
1598 c -= 0x10000;
1599 c1 = (c >> 10) + 0xD800;
1600 c2 = (c & 0x3FF) + 0xDC00;
1601 if (big_endian)
1602 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1603 else
1604 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1605 }
1606 }
065e3595 1607 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1608 coding->produced = dst - coding->destination;
1609 coding->produced_char += produced_chars;
1610 return 0;
1611}
1612
1613\f
1614/*** 6. Old Emacs' internal format (emacs-mule) ***/
1615
1616/* Emacs' internal format for representation of multiple character
1617 sets is a kind of multi-byte encoding, i.e. characters are
1618 represented by variable-length sequences of one-byte codes.
1619
1620 ASCII characters and control characters (e.g. `tab', `newline') are
1621 represented by one-byte sequences which are their ASCII codes, in
1622 the range 0x00 through 0x7F.
1623
1624 8-bit characters of the range 0x80..0x9F are represented by
1625 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1626 code + 0x20).
1627
1628 8-bit characters of the range 0xA0..0xFF are represented by
1629 one-byte sequences which are their 8-bit code.
1630
1631 The other characters are represented by a sequence of `base
1632 leading-code', optional `extended leading-code', and one or two
1633 `position-code's. The length of the sequence is determined by the
1634 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1635 whereas extended leading-code and position-code take the range 0xA0
1636 through 0xFF. See `charset.h' for more details about leading-code
1637 and position-code.
1638
1639 --- CODE RANGE of Emacs' internal format ---
1640 character set range
1641 ------------- -----
1642 ascii 0x00..0x7F
1643 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644 eight-bit-graphic 0xA0..0xBF
1645 ELSE 0x81..0x9D + [0xA0..0xFF]+
1646 ---------------------------------------------
1647
1648 As this is the internal character representation, the format is
1649 usually not used externally (i.e. in a file or in a data sent to a
1650 process). But, it is possible to have a text externally in this
1651 format (i.e. by encoding by the coding system `emacs-mule').
1652
1653 In that case, a sequence of one-byte codes has a slightly different
1654 form.
1655
1656 At first, all characters in eight-bit-control are represented by
1657 one-byte sequences which are their 8-bit code.
1658
1659 Next, character composition data are represented by the byte
1660 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1661 where,
1662 METHOD is 0xF0 plus one of composition method (enum
1663 composition_method),
1664
1665 BYTES is 0xA0 plus a byte length of this composition data,
1666
1667 CHARS is 0x20 plus a number of characters composed by this
1668 data,
1669
1670 COMPONENTs are characters of multibye form or composition
1671 rules encoded by two-byte of ASCII codes.
1672
1673 In addition, for backward compatibility, the following formats are
1674 also recognized as composition data on decoding.
1675
1676 0x80 MSEQ ...
1677 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1678
1679 Here,
1680 MSEQ is a multibyte form but in these special format:
1681 ASCII: 0xA0 ASCII_CODE+0x80,
1682 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683 RULE is a one byte code of the range 0xA0..0xF0 that
1684 represents a composition rule.
1685 */
1686
1687char emacs_mule_bytes[256];
1688
df7492f9 1689int
ff0dacd7 1690emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1691 struct coding_system *coding;
065e3595 1692 const unsigned char *src;
ff0dacd7 1693 int *nbytes, *nchars, *id;
df7492f9 1694{
8f924df7
KH
1695 const unsigned char *src_end = coding->source + coding->src_bytes;
1696 const unsigned char *src_base = src;
df7492f9 1697 int multibytep = coding->src_multibyte;
df7492f9
KH
1698 struct charset *charset;
1699 unsigned code;
1700 int c;
1701 int consumed_chars = 0;
1702
1703 ONE_MORE_BYTE (c);
065e3595 1704 if (c < 0)
df7492f9 1705 {
065e3595
KH
1706 c = -c;
1707 charset = emacs_mule_charset[0];
1708 }
1709 else
1710 {
4d41e8b7
KH
1711 if (c >= 0xA0)
1712 {
1713 /* Old style component character of a compostion. */
1714 if (c == 0xA0)
1715 {
1716 ONE_MORE_BYTE (c);
1717 c -= 0x80;
1718 }
1719 else
1720 c -= 0x20;
1721 }
1722
065e3595 1723 switch (emacs_mule_bytes[c])
b73bfc1c 1724 {
065e3595 1725 case 2:
df7492f9
KH
1726 if (! (charset = emacs_mule_charset[c]))
1727 goto invalid_code;
1728 ONE_MORE_BYTE (c);
9ffd559c 1729 if (c < 0xA0)
065e3595 1730 goto invalid_code;
df7492f9 1731 code = c & 0x7F;
065e3595
KH
1732 break;
1733
1734 case 3:
1735 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1736 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1737 {
1738 ONE_MORE_BYTE (c);
9ffd559c 1739 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1740 goto invalid_code;
1741 ONE_MORE_BYTE (c);
9ffd559c 1742 if (c < 0xA0)
065e3595
KH
1743 goto invalid_code;
1744 code = c & 0x7F;
1745 }
1746 else
1747 {
1748 if (! (charset = emacs_mule_charset[c]))
1749 goto invalid_code;
1750 ONE_MORE_BYTE (c);
9ffd559c 1751 if (c < 0xA0)
065e3595
KH
1752 goto invalid_code;
1753 code = (c & 0x7F) << 8;
1754 ONE_MORE_BYTE (c);
9ffd559c 1755 if (c < 0xA0)
065e3595
KH
1756 goto invalid_code;
1757 code |= c & 0x7F;
1758 }
1759 break;
1760
1761 case 4:
1762 ONE_MORE_BYTE (c);
1763 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1764 goto invalid_code;
1765 ONE_MORE_BYTE (c);
9ffd559c 1766 if (c < 0xA0)
065e3595 1767 goto invalid_code;
781d7a48 1768 code = (c & 0x7F) << 8;
df7492f9 1769 ONE_MORE_BYTE (c);
9ffd559c 1770 if (c < 0xA0)
065e3595 1771 goto invalid_code;
df7492f9 1772 code |= c & 0x7F;
065e3595 1773 break;
df7492f9 1774
065e3595
KH
1775 case 1:
1776 code = c;
1777 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1778 ? charset_ascii : charset_eight_bit);
1779 break;
df7492f9 1780
065e3595
KH
1781 default:
1782 abort ();
1783 }
1784 c = DECODE_CHAR (charset, code);
1785 if (c < 0)
1786 goto invalid_code;
df7492f9 1787 }
df7492f9
KH
1788 *nbytes = src - src_base;
1789 *nchars = consumed_chars;
ff0dacd7
KH
1790 if (id)
1791 *id = charset->id;
df7492f9
KH
1792 return c;
1793
1794 no_more_source:
1795 return -2;
1796
1797 invalid_code:
1798 return -1;
1799}
1800
1801
1802/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1803 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1804 else return 0. */
df7492f9
KH
1805
1806static int
ff0dacd7 1807detect_coding_emacs_mule (coding, detect_info)
df7492f9 1808 struct coding_system *coding;
ff0dacd7 1809 struct coding_detection_info *detect_info;
df7492f9 1810{
065e3595 1811 const unsigned char *src = coding->source, *src_base;
8f924df7 1812 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1813 int multibytep = coding->src_multibyte;
1814 int consumed_chars = 0;
1815 int c;
1816 int found = 0;
1817
ff0dacd7 1818 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1819 /* A coding system of this category is always ASCII compatible. */
1820 src += coding->head_ascii;
1821
1822 while (1)
1823 {
065e3595 1824 src_base = src;
df7492f9 1825 ONE_MORE_BYTE (c);
065e3595
KH
1826 if (c < 0)
1827 continue;
df7492f9
KH
1828 if (c == 0x80)
1829 {
1830 /* Perhaps the start of composite character. We simple skip
1831 it because analyzing it is too heavy for detecting. But,
1832 at least, we check that the composite character
1833 constitues of more than 4 bytes. */
8f924df7 1834 const unsigned char *src_base;
df7492f9
KH
1835
1836 repeat:
1837 src_base = src;
1838 do
1839 {
1840 ONE_MORE_BYTE (c);
1841 }
1842 while (c >= 0xA0);
1843
1844 if (src - src_base <= 4)
1845 break;
ff0dacd7 1846 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1847 if (c == 0x80)
1848 goto repeat;
b73bfc1c 1849 }
df7492f9
KH
1850
1851 if (c < 0x80)
b73bfc1c 1852 {
df7492f9
KH
1853 if (c < 0x20
1854 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1855 break;
1856 }
1857 else
1858 {
0e219d54 1859 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1860
0e219d54 1861 while (more_bytes > 0)
df7492f9
KH
1862 {
1863 ONE_MORE_BYTE (c);
0e219d54
KH
1864 if (c < 0xA0)
1865 {
1866 src--; /* Unread the last byte. */
1867 break;
1868 }
1869 more_bytes--;
df7492f9 1870 }
0e219d54 1871 if (more_bytes != 0)
df7492f9 1872 break;
ff0dacd7 1873 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1874 }
1875 }
ff0dacd7 1876 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1877 return 0;
1878
1879 no_more_source:
065e3595 1880 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1881 {
ff0dacd7 1882 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1883 return 0;
1884 }
ff0dacd7
KH
1885 detect_info->found |= found;
1886 return 1;
4ed46869
KH
1887}
1888
b73bfc1c 1889
df7492f9
KH
1890/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1891
1892/* Decode a character represented as a component of composition
1893 sequence of Emacs 20/21 style at SRC. Set C to that character and
1894 update SRC to the head of next character (or an encoded composition
1895 rule). If SRC doesn't points a composition component, set C to -1.
1896 If SRC points an invalid byte sequence, global exit by a return
1897 value 0. */
1898
1899#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1900 if (1) \
1901 { \
1902 int c; \
1903 int nbytes, nchars; \
1904 \
1905 if (src == src_end) \
1906 break; \
ff0dacd7 1907 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1908 if (c < 0) \
1909 { \
1910 if (c == -2) \
1911 break; \
1912 goto invalid_code; \
1913 } \
1914 *buf++ = c; \
1915 src += nbytes; \
1916 consumed_chars += nchars; \
1917 } \
1918 else
1919
1920
1921/* Decode a composition rule represented as a component of composition
781d7a48
KH
1922 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1923 and increment BUF. If SRC points an invalid byte sequence, set C
1924 to -1. */
df7492f9 1925
781d7a48 1926#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1927 do { \
1928 int c, gref, nref; \
1929 \
781d7a48 1930 if (src >= src_end) \
df7492f9
KH
1931 goto invalid_code; \
1932 ONE_MORE_BYTE_NO_CHECK (c); \
4d41e8b7 1933 c -= 0xA0; \
df7492f9
KH
1934 if (c < 0 || c >= 81) \
1935 goto invalid_code; \
1936 \
1937 gref = c / 9, nref = c % 9; \
1938 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1939 } while (0)
1940
1941
781d7a48
KH
1942/* Decode a composition rule represented as a component of composition
1943 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1944 and increment BUF. If SRC points an invalid byte sequence, set C
1945 to -1. */
1946
1947#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1948 do { \
1949 int gref, nref; \
1950 \
1951 if (src + 1>= src_end) \
1952 goto invalid_code; \
1953 ONE_MORE_BYTE_NO_CHECK (gref); \
1954 gref -= 0x20; \
1955 ONE_MORE_BYTE_NO_CHECK (nref); \
1956 nref -= 0x20; \
1957 if (gref < 0 || gref >= 81 \
1958 || nref < 0 || nref >= 81) \
1959 goto invalid_code; \
1960 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1961 } while (0)
1962
1963
df7492f9 1964#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1965 do { \
df7492f9 1966 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1967 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1968 the byte length of this composition information, CHARS is the \
1969 number of characters composed by this composition. */ \
781d7a48
KH
1970 enum composition_method method = c - 0xF2; \
1971 int *charbuf_base = charbuf; \
df7492f9
KH
1972 int consumed_chars_limit; \
1973 int nbytes, nchars; \
1974 \
1975 ONE_MORE_BYTE (c); \
065e3595
KH
1976 if (c < 0) \
1977 goto invalid_code; \
df7492f9
KH
1978 nbytes = c - 0xA0; \
1979 if (nbytes < 3) \
1980 goto invalid_code; \
1981 ONE_MORE_BYTE (c); \
065e3595
KH
1982 if (c < 0) \
1983 goto invalid_code; \
df7492f9 1984 nchars = c - 0xA0; \
69a80ea3 1985 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
1986 consumed_chars_limit = consumed_chars_base + nbytes; \
1987 if (method != COMPOSITION_RELATIVE) \
aa72b389 1988 { \
df7492f9
KH
1989 int i = 0; \
1990 while (consumed_chars < consumed_chars_limit) \
aa72b389 1991 { \
df7492f9 1992 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1993 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1994 else \
1995 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1996 i++; \
aa72b389 1997 } \
df7492f9
KH
1998 if (consumed_chars < consumed_chars_limit) \
1999 goto invalid_code; \
781d7a48 2000 charbuf_base[0] -= i; \
aa72b389
KH
2001 } \
2002 } while (0)
93dec019 2003
aa72b389 2004
d959f512
KH
2005#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
2006 do { \
2007 /* Emacs 20 style format for relative composition. */ \
2008 /* Store multibyte form of characters to be composed. */ \
2009 enum composition_method method = COMPOSITION_RELATIVE; \
2010 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2011 int *buf = components; \
2012 int i, j; \
2013 \
2014 src = src_base; \
2015 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2016 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2017 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2018 if (i < 2) \
2019 goto invalid_code; \
2020 ADD_COMPOSITION_DATA (charbuf, i, method); \
2021 for (j = 0; j < i; j++) \
2022 *charbuf++ = components[j]; \
df7492f9
KH
2023 } while (0)
2024
2025
2026#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2027 do { \
2028 /* Emacs 20 style format for rule-base composition. */ \
2029 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2030 enum composition_method method = COMPOSITION_WITH_RULE; \
4d41e8b7 2031 int *charbuf_base = charbuf; \
df7492f9
KH
2032 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2033 int *buf = components; \
2034 int i, j; \
4d41e8b7 2035 \
df7492f9 2036 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
4d41e8b7 2037 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \
df7492f9 2038 { \
4d41e8b7
KH
2039 if (*src < 0xA0) \
2040 break; \
781d7a48 2041 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2042 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2043 } \
4d41e8b7 2044 if (i <= 1 || (buf - components) % 2 == 0) \
df7492f9 2045 goto invalid_code; \
4d41e8b7 2046 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \
df7492f9 2047 goto no_more_source; \
4d41e8b7
KH
2048 ADD_COMPOSITION_DATA (charbuf, i, method); \
2049 i = i * 2 - 1; \
df7492f9
KH
2050 for (j = 0; j < i; j++) \
2051 *charbuf++ = components[j]; \
4d41e8b7 2052 charbuf_base[0] -= i; \
df7492f9
KH
2053 for (j = 0; j < i; j += 2) \
2054 *charbuf++ = components[j]; \
2055 } while (0)
2056
aa72b389
KH
2057
2058static void
df7492f9 2059decode_coding_emacs_mule (coding)
aa72b389 2060 struct coding_system *coding;
aa72b389 2061{
8f924df7
KH
2062 const unsigned char *src = coding->source + coding->consumed;
2063 const unsigned char *src_end = coding->source + coding->src_bytes;
2064 const unsigned char *src_base;
69a80ea3
KH
2065 int *charbuf = coding->charbuf + coding->charbuf_used;
2066 int *charbuf_end
2067 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2068 int consumed_chars = 0, consumed_chars_base;
df7492f9 2069 int multibytep = coding->src_multibyte;
24a73b0a 2070 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2071 int char_offset = coding->produced_char;
2072 int last_offset = char_offset;
2073 int last_id = charset_ascii;
aa72b389 2074
24a73b0a 2075 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2076
aa72b389
KH
2077 while (1)
2078 {
df7492f9
KH
2079 int c;
2080
aa72b389 2081 src_base = src;
df7492f9
KH
2082 consumed_chars_base = consumed_chars;
2083
2084 if (charbuf >= charbuf_end)
2085 break;
aa72b389 2086
df7492f9 2087 ONE_MORE_BYTE (c);
065e3595
KH
2088 if (c < 0)
2089 {
2090 *charbuf++ = -c;
2091 char_offset++;
2092 }
2093 else if (c < 0x80)
aa72b389 2094 {
df7492f9
KH
2095 *charbuf++ = c;
2096 char_offset++;
aa72b389 2097 }
df7492f9
KH
2098 else if (c == 0x80)
2099 {
df7492f9 2100 ONE_MORE_BYTE (c);
065e3595
KH
2101 if (c < 0)
2102 goto invalid_code;
781d7a48
KH
2103 if (c - 0xF2 >= COMPOSITION_RELATIVE
2104 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2105 DECODE_EMACS_MULE_21_COMPOSITION (c);
2106 else if (c < 0xC0)
2107 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2108 else if (c == 0xFF)
2109 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2110 else
2111 goto invalid_code;
2112 }
2113 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2114 {
2115 int nbytes, nchars;
ff0dacd7
KH
2116 int id;
2117
781d7a48
KH
2118 src = src_base;
2119 consumed_chars = consumed_chars_base;
ff0dacd7 2120 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2121 if (c < 0)
2122 {
2123 if (c == -2)
2124 break;
2125 goto invalid_code;
2126 }
ff0dacd7
KH
2127 if (last_id != id)
2128 {
2129 if (last_id != charset_ascii)
69a80ea3 2130 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2131 last_id = id;
2132 last_offset = char_offset;
2133 }
df7492f9 2134 *charbuf++ = c;
781d7a48
KH
2135 src += nbytes;
2136 consumed_chars += nchars;
df7492f9
KH
2137 char_offset++;
2138 }
4d41e8b7
KH
2139 else
2140 goto invalid_code;
df7492f9
KH
2141 continue;
2142
2143 invalid_code:
2144 src = src_base;
2145 consumed_chars = consumed_chars_base;
2146 ONE_MORE_BYTE (c);
2147 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2148 char_offset++;
df7492f9
KH
2149 coding->errors++;
2150 }
2151
2152 no_more_source:
ff0dacd7 2153 if (last_id != charset_ascii)
69a80ea3 2154 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2155 coding->consumed_char += consumed_chars_base;
2156 coding->consumed = src_base - coding->source;
2157 coding->charbuf_used = charbuf - coding->charbuf;
2158}
2159
2160
2161#define EMACS_MULE_LEADING_CODES(id, codes) \
2162 do { \
2163 if (id < 0xA0) \
2164 codes[0] = id, codes[1] = 0; \
2165 else if (id < 0xE0) \
2166 codes[0] = 0x9A, codes[1] = id; \
2167 else if (id < 0xF0) \
2168 codes[0] = 0x9B, codes[1] = id; \
2169 else if (id < 0xF5) \
2170 codes[0] = 0x9C, codes[1] = id; \
2171 else \
2172 codes[0] = 0x9D, codes[1] = id; \
2173 } while (0);
2174
aa72b389 2175
df7492f9
KH
2176static int
2177encode_coding_emacs_mule (coding)
2178 struct coding_system *coding;
2179{
2180 int multibytep = coding->dst_multibyte;
2181 int *charbuf = coding->charbuf;
2182 int *charbuf_end = charbuf + coding->charbuf_used;
2183 unsigned char *dst = coding->destination + coding->produced;
2184 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2185 int safe_room = 8;
df7492f9 2186 int produced_chars = 0;
24a73b0a 2187 Lisp_Object attrs, charset_list;
df7492f9 2188 int c;
ff0dacd7 2189 int preferred_charset_id = -1;
df7492f9 2190
24a73b0a 2191 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2192 if (! EQ (charset_list, Vemacs_mule_charset_list))
2193 {
2194 CODING_ATTR_CHARSET_LIST (attrs)
2195 = charset_list = Vemacs_mule_charset_list;
2196 }
df7492f9
KH
2197
2198 while (charbuf < charbuf_end)
2199 {
2200 ASSURE_DESTINATION (safe_room);
2201 c = *charbuf++;
ff0dacd7
KH
2202
2203 if (c < 0)
2204 {
2205 /* Handle an annotation. */
2206 switch (*charbuf)
2207 {
2208 case CODING_ANNOTATE_COMPOSITION_MASK:
2209 /* Not yet implemented. */
2210 break;
2211 case CODING_ANNOTATE_CHARSET_MASK:
2212 preferred_charset_id = charbuf[3];
2213 if (preferred_charset_id >= 0
2214 && NILP (Fmemq (make_number (preferred_charset_id),
2215 charset_list)))
2216 preferred_charset_id = -1;
2217 break;
2218 default:
2219 abort ();
2220 }
2221 charbuf += -c - 1;
2222 continue;
2223 }
2224
df7492f9
KH
2225 if (ASCII_CHAR_P (c))
2226 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2227 else if (CHAR_BYTE8_P (c))
2228 {
2229 c = CHAR_TO_BYTE8 (c);
2230 EMIT_ONE_BYTE (c);
2231 }
df7492f9 2232 else
aa72b389 2233 {
df7492f9
KH
2234 struct charset *charset;
2235 unsigned code;
2236 int dimension;
2237 int emacs_mule_id;
2238 unsigned char leading_codes[2];
2239
ff0dacd7
KH
2240 if (preferred_charset_id >= 0)
2241 {
2242 charset = CHARSET_FROM_ID (preferred_charset_id);
2243 if (! CHAR_CHARSET_P (c, charset))
2244 charset = char_charset (c, charset_list, NULL);
2245 }
2246 else
2247 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2248 if (! charset)
2249 {
2250 c = coding->default_char;
2251 if (ASCII_CHAR_P (c))
2252 {
2253 EMIT_ONE_ASCII_BYTE (c);
2254 continue;
2255 }
2256 charset = char_charset (c, charset_list, &code);
2257 }
2258 dimension = CHARSET_DIMENSION (charset);
2259 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2260 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2261 EMIT_ONE_BYTE (leading_codes[0]);
2262 if (leading_codes[1])
2263 EMIT_ONE_BYTE (leading_codes[1]);
2264 if (dimension == 1)
1fa663f9 2265 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2266 else
df7492f9 2267 {
1fa663f9 2268 code |= 0x8080;
df7492f9
KH
2269 EMIT_ONE_BYTE (code >> 8);
2270 EMIT_ONE_BYTE (code & 0xFF);
2271 }
aa72b389 2272 }
aa72b389 2273 }
065e3595 2274 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2275 coding->produced_char += produced_chars;
2276 coding->produced = dst - coding->destination;
2277 return 0;
aa72b389 2278}
b73bfc1c 2279
4ed46869 2280\f
df7492f9 2281/*** 7. ISO2022 handlers ***/
4ed46869
KH
2282
2283/* The following note describes the coding system ISO2022 briefly.
39787efd 2284 Since the intention of this note is to help understand the
5a936b46 2285 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2286 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2287 original document of ISO2022. This is equivalent to the standard
cfb43547 2288 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2289
2290 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2291 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2292 is encoded using bytes less than 128. This may make the encoded
2293 text a little bit longer, but the text passes more easily through
cfb43547 2294 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2295 Significant Bit).
b73bfc1c 2296
cfb43547
DL
2297 There are two kinds of character sets: control character sets and
2298 graphic character sets. The former contain control characters such
4ed46869 2299 as `newline' and `escape' to provide control functions (control
39787efd 2300 functions are also provided by escape sequences). The latter
cfb43547 2301 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2302 two control character sets and many graphic character sets.
2303
2304 Graphic character sets are classified into one of the following
39787efd
KH
2305 four classes, according to the number of bytes (DIMENSION) and
2306 number of characters in one dimension (CHARS) of the set:
2307 - DIMENSION1_CHARS94
2308 - DIMENSION1_CHARS96
2309 - DIMENSION2_CHARS94
2310 - DIMENSION2_CHARS96
2311
2312 In addition, each character set is assigned an identification tag,
cfb43547 2313 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2314 hereafter). The <F> of each character set is decided by ECMA(*)
2315 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2316 (0x30..0x3F are for private use only).
4ed46869
KH
2317
2318 Note (*): ECMA = European Computer Manufacturers Association
2319
cfb43547 2320 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2321 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2322 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2323 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2324 o DIMENSION2_CHARS96 -- none for the moment
2325
39787efd 2326 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2327 C0 [0x00..0x1F] -- control character plane 0
2328 GL [0x20..0x7F] -- graphic character plane 0
2329 C1 [0x80..0x9F] -- control character plane 1
2330 GR [0xA0..0xFF] -- graphic character plane 1
2331
2332 A control character set is directly designated and invoked to C0 or
39787efd
KH
2333 C1 by an escape sequence. The most common case is that:
2334 - ISO646's control character set is designated/invoked to C0, and
2335 - ISO6429's control character set is designated/invoked to C1,
2336 and usually these designations/invocations are omitted in encoded
2337 text. In a 7-bit environment, only C0 can be used, and a control
2338 character for C1 is encoded by an appropriate escape sequence to
2339 fit into the environment. All control characters for C1 are
2340 defined to have corresponding escape sequences.
4ed46869
KH
2341
2342 A graphic character set is at first designated to one of four
2343 graphic registers (G0 through G3), then these graphic registers are
2344 invoked to GL or GR. These designations and invocations can be
2345 done independently. The most common case is that G0 is invoked to
39787efd
KH
2346 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2347 these invocations and designations are omitted in encoded text.
2348 In a 7-bit environment, only GL can be used.
4ed46869 2349
39787efd
KH
2350 When a graphic character set of CHARS94 is invoked to GL, codes
2351 0x20 and 0x7F of the GL area work as control characters SPACE and
2352 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2353 be used.
4ed46869
KH
2354
2355 There are two ways of invocation: locking-shift and single-shift.
2356 With locking-shift, the invocation lasts until the next different
39787efd
KH
2357 invocation, whereas with single-shift, the invocation affects the
2358 following character only and doesn't affect the locking-shift
2359 state. Invocations are done by the following control characters or
2360 escape sequences:
4ed46869
KH
2361
2362 ----------------------------------------------------------------------
39787efd 2363 abbrev function cntrl escape seq description
4ed46869 2364 ----------------------------------------------------------------------
39787efd
KH
2365 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2366 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2367 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2368 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2369 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2370 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2371 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2372 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2373 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2374 ----------------------------------------------------------------------
39787efd
KH
2375 (*) These are not used by any known coding system.
2376
2377 Control characters for these functions are defined by macros
2378 ISO_CODE_XXX in `coding.h'.
4ed46869 2379
39787efd 2380 Designations are done by the following escape sequences:
4ed46869
KH
2381 ----------------------------------------------------------------------
2382 escape sequence description
2383 ----------------------------------------------------------------------
2384 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2385 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2386 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2387 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2388 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2389 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2390 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2391 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2392 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2393 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2394 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2395 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2396 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2397 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2398 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2399 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2400 ----------------------------------------------------------------------
2401
2402 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2403 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2404
2405 Note (*): Although these designations are not allowed in ISO2022,
2406 Emacs accepts them on decoding, and produces them on encoding
39787efd 2407 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2408 7-bit environment, non-locking-shift, and non-single-shift.
2409
2410 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2411 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2412
cfb43547 2413 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2414 same multilingual text in ISO2022. Actually, there exist many
2415 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2416 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2417 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2418 localized platforms), and all of these are variants of ISO2022.
2419
2420 In addition to the above, Emacs handles two more kinds of escape
2421 sequences: ISO6429's direction specification and Emacs' private
2422 sequence for specifying character composition.
2423
39787efd 2424 ISO6429's direction specification takes the following form:
4ed46869
KH
2425 o CSI ']' -- end of the current direction
2426 o CSI '0' ']' -- end of the current direction
2427 o CSI '1' ']' -- start of left-to-right text
2428 o CSI '2' ']' -- start of right-to-left text
2429 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2430 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2431
2432 Character composition specification takes the following form:
ec6d2bb8
KH
2433 o ESC '0' -- start relative composition
2434 o ESC '1' -- end composition
2435 o ESC '2' -- start rule-base composition (*)
2436 o ESC '3' -- start relative composition with alternate chars (**)
2437 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2438 Since these are not standard escape sequences of any ISO standard,
cfb43547 2439 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2440
5a936b46
DL
2441 (*) This form is used only in Emacs 20.7 and older versions,
2442 but newer versions can safely decode it.
cfb43547 2443 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2444 and older versions can't decode it.
ec6d2bb8 2445
cfb43547 2446 Here's a list of example usages of these composition escape
b73bfc1c 2447 sequences (categorized by `enum composition_method').
ec6d2bb8 2448
b73bfc1c 2449 COMPOSITION_RELATIVE:
ec6d2bb8 2450 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2451 COMPOSITION_WITH_RULE:
ec6d2bb8 2452 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2453 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2454 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2455 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2456 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2457
2458enum iso_code_class_type iso_code_class[256];
2459
df7492f9
KH
2460#define SAFE_CHARSET_P(coding, id) \
2461 ((id) <= (coding)->max_charset_id \
2462 && (coding)->safe_charsets[id] >= 0)
2463
2464
2465#define SHIFT_OUT_OK(category) \
2466 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2467
2468static void
f0064e1f
DL
2469setup_iso_safe_charsets (attrs)
2470 Lisp_Object attrs;
df7492f9
KH
2471{
2472 Lisp_Object charset_list, safe_charsets;
2473 Lisp_Object request;
2474 Lisp_Object reg_usage;
2475 Lisp_Object tail;
2476 int reg94, reg96;
2477 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2478 int max_charset_id;
2479
2480 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2481 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2482 && ! EQ (charset_list, Viso_2022_charset_list))
2483 {
2484 CODING_ATTR_CHARSET_LIST (attrs)
2485 = charset_list = Viso_2022_charset_list;
2486 ASET (attrs, coding_attr_safe_charsets, Qnil);
2487 }
2488
2489 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2490 return;
2491
2492 max_charset_id = 0;
2493 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2494 {
2495 int id = XINT (XCAR (tail));
2496 if (max_charset_id < id)
2497 max_charset_id = id;
2498 }
d46c5b12 2499
df7492f9
KH
2500 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2501 make_number (255));
2502 request = AREF (attrs, coding_attr_iso_request);
2503 reg_usage = AREF (attrs, coding_attr_iso_usage);
2504 reg94 = XINT (XCAR (reg_usage));
2505 reg96 = XINT (XCDR (reg_usage));
2506
2507 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2508 {
2509 Lisp_Object id;
2510 Lisp_Object reg;
2511 struct charset *charset;
2512
2513 id = XCAR (tail);
2514 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2515 reg = Fcdr (Fassq (id, request));
df7492f9 2516 if (! NILP (reg))
8f924df7 2517 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2518 else if (charset->iso_chars_96)
2519 {
2520 if (reg96 < 4)
8f924df7 2521 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2522 }
2523 else
2524 {
2525 if (reg94 < 4)
8f924df7 2526 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2527 }
2528 }
2529 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2530}
d46c5b12 2531
b6871cc7 2532
4ed46869 2533/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2534 Check if a text is encoded in one of ISO-2022 based codig systems.
2535 If it is, return 1, else return 0. */
4ed46869 2536
0a28aafb 2537static int
ff0dacd7 2538detect_coding_iso_2022 (coding, detect_info)
df7492f9 2539 struct coding_system *coding;
ff0dacd7 2540 struct coding_detection_info *detect_info;
4ed46869 2541{
8f924df7
KH
2542 const unsigned char *src = coding->source, *src_base = src;
2543 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2544 int multibytep = coding->src_multibyte;
ff0dacd7 2545 int single_shifting = 0;
df7492f9
KH
2546 int id;
2547 int c, c1;
2548 int consumed_chars = 0;
2549 int i;
ff0dacd7
KH
2550 int rejected = 0;
2551 int found = 0;
2552
2553 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2554
2555 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2556 {
2557 struct coding_system *this = &(coding_categories[i]);
2558 Lisp_Object attrs, val;
2559
2560 attrs = CODING_ID_ATTRS (this->id);
2561 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2562 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2563 setup_iso_safe_charsets (attrs);
2564 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2565 this->max_charset_id = SCHARS (val) - 1;
2566 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2567 }
2568
2569 /* A coding system of this category is always ASCII compatible. */
2570 src += coding->head_ascii;
3f003981 2571
ff0dacd7 2572 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2573 {
065e3595 2574 src_base = src;
df7492f9 2575 ONE_MORE_BYTE (c);
4ed46869
KH
2576 switch (c)
2577 {
2578 case ISO_CODE_ESC:
74383408
KH
2579 if (inhibit_iso_escape_detection)
2580 break;
f46869e4 2581 single_shifting = 0;
df7492f9 2582 ONE_MORE_BYTE (c);
d46c5b12 2583 if (c >= '(' && c <= '/')
4ed46869 2584 {
bf9cdd4e 2585 /* Designation sequence for a charset of dimension 1. */
df7492f9 2586 ONE_MORE_BYTE (c1);
d46c5b12 2587 if (c1 < ' ' || c1 >= 0x80
df7492f9 2588 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2589 /* Invalid designation sequence. Just ignore. */
2590 break;
bf9cdd4e
KH
2591 }
2592 else if (c == '$')
2593 {
2594 /* Designation sequence for a charset of dimension 2. */
df7492f9 2595 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2596 if (c >= '@' && c <= 'B')
2597 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2598 id = iso_charset_table[1][0][c];
bf9cdd4e 2599 else if (c >= '(' && c <= '/')
bcf26d6a 2600 {
df7492f9 2601 ONE_MORE_BYTE (c1);
d46c5b12 2602 if (c1 < ' ' || c1 >= 0x80
df7492f9 2603 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2604 /* Invalid designation sequence. Just ignore. */
2605 break;
bcf26d6a 2606 }
bf9cdd4e 2607 else
ff0dacd7 2608 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2609 break;
2610 }
ae9ff118 2611 else if (c == 'N' || c == 'O')
d46c5b12 2612 {
ae9ff118 2613 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2614 single_shifting = 1;
2615 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2616 break;
4ed46869 2617 }
ec6d2bb8
KH
2618 else if (c >= '0' && c <= '4')
2619 {
2620 /* ESC <Fp> for start/end composition. */
ff0dacd7 2621 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2622 break;
2623 }
bf9cdd4e 2624 else
df7492f9 2625 {
ff0dacd7 2626 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2627 break;
2628 }
d46c5b12
KH
2629
2630 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2631 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2632 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2633 id))
ff0dacd7 2634 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2635 else
ff0dacd7 2636 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2637 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2638 id))
ff0dacd7 2639 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2640 else
ff0dacd7 2641 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2642 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2643 id))
ff0dacd7 2644 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2645 else
ff0dacd7 2646 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2647 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2648 id))
ff0dacd7 2649 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2650 else
ff0dacd7 2651 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2652 break;
2653
4ed46869 2654 case ISO_CODE_SO:
d46c5b12 2655 case ISO_CODE_SI:
ff0dacd7 2656 /* Locking shift out/in. */
74383408
KH
2657 if (inhibit_iso_escape_detection)
2658 break;
f46869e4 2659 single_shifting = 0;
ff0dacd7
KH
2660 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2661 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2662 break;
2663
4ed46869 2664 case ISO_CODE_CSI:
ff0dacd7 2665 /* Control sequence introducer. */
f46869e4 2666 single_shifting = 0;
ff0dacd7
KH
2667 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2668 found |= CATEGORY_MASK_ISO_8_ELSE;
2669 goto check_extra_latin;
2670
4ed46869
KH
2671 case ISO_CODE_SS2:
2672 case ISO_CODE_SS3:
ff0dacd7
KH
2673 /* Single shift. */
2674 if (inhibit_iso_escape_detection)
2675 break;
75e2a253 2676 single_shifting = 0;
ff0dacd7
KH
2677 rejected |= CATEGORY_MASK_ISO_7BIT;
2678 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2679 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2680 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2681 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2682 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2683 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2684 if (single_shifting)
2685 break;
ff0dacd7 2686 goto check_extra_latin;
4ed46869
KH
2687
2688 default:
065e3595
KH
2689 if (c < 0)
2690 continue;
4ed46869 2691 if (c < 0x80)
f46869e4
KH
2692 {
2693 single_shifting = 0;
2694 break;
2695 }
ff0dacd7 2696 if (c >= 0xA0)
c4825358 2697 {
ff0dacd7
KH
2698 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2699 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2700 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2701 0xA0..0FF. If the byte length is even, we include
2702 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2703 only when we are not single shifting. */
2704 if (! single_shifting
2705 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2706 {
e17de821 2707 int i = 1;
b73bfc1c
KH
2708 while (src < src_end)
2709 {
df7492f9 2710 ONE_MORE_BYTE (c);
b73bfc1c
KH
2711 if (c < 0xA0)
2712 break;
2713 i++;
2714 }
2715
2716 if (i & 1 && src < src_end)
ff0dacd7 2717 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2718 else
ff0dacd7 2719 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2720 }
ff0dacd7 2721 break;
4ed46869 2722 }
ff0dacd7
KH
2723 check_extra_latin:
2724 single_shifting = 0;
2725 if (! VECTORP (Vlatin_extra_code_table)
2726 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2727 {
2728 rejected = CATEGORY_MASK_ISO;
2729 break;
2730 }
2731 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2732 & CODING_ISO_FLAG_LATIN_EXTRA)
2733 found |= CATEGORY_MASK_ISO_8_1;
2734 else
2735 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2736 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2737 }
2738 }
ff0dacd7
KH
2739 detect_info->rejected |= CATEGORY_MASK_ISO;
2740 return 0;
4ed46869 2741
df7492f9 2742 no_more_source:
ff0dacd7
KH
2743 detect_info->rejected |= rejected;
2744 detect_info->found |= (found & ~rejected);
df7492f9 2745 return 1;
4ed46869 2746}
ec6d2bb8 2747
4ed46869 2748
134b9549
KH
2749/* Set designation state into CODING. Set CHARS_96 to -1 if the
2750 escape sequence should be kept. */
df7492f9
KH
2751#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2752 do { \
2753 int id, prev; \
2754 \
2755 if (final < '0' || final >= 128 \
2756 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2757 || !SAFE_CHARSET_P (coding, id)) \
2758 { \
2759 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2760 chars_96 = -1; \
2761 break; \
df7492f9
KH
2762 } \
2763 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2764 if (id == charset_jisx0201_roman) \
2765 { \
2766 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2767 id = charset_ascii; \
2768 } \
2769 else if (id == charset_jisx0208_1978) \
2770 { \
2771 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2772 id = charset_jisx0208; \
2773 } \
df7492f9
KH
2774 CODING_ISO_DESIGNATION (coding, reg) = id; \
2775 /* If there was an invalid designation to REG previously, and this \
2776 designation is ASCII to REG, we should keep this designation \
2777 sequence. */ \
2778 if (prev == -2 && id == charset_ascii) \
134b9549 2779 chars_96 = -1; \
4ed46869
KH
2780 } while (0)
2781
d46c5b12 2782
df7492f9
KH
2783#define MAYBE_FINISH_COMPOSITION() \
2784 do { \
2785 int i; \
2786 if (composition_state == COMPOSING_NO) \
2787 break; \
2788 /* It is assured that we have enough room for producing \
2789 characters stored in the table `components'. */ \
2790 if (charbuf + component_idx > charbuf_end) \
2791 goto no_more_source; \
2792 composition_state = COMPOSING_NO; \
2793 if (method == COMPOSITION_RELATIVE \
2794 || method == COMPOSITION_WITH_ALTCHARS) \
2795 { \
2796 for (i = 0; i < component_idx; i++) \
2797 *charbuf++ = components[i]; \
2798 char_offset += component_idx; \
2799 } \
2800 else \
2801 { \
2802 for (i = 0; i < component_idx; i += 2) \
2803 *charbuf++ = components[i]; \
2804 char_offset += (component_idx / 2) + 1; \
2805 } \
2806 } while (0)
2807
d46c5b12 2808
aa72b389
KH
2809/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2810 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2811 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2812 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2813 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2814 */
ec6d2bb8 2815
df7492f9
KH
2816#define DECODE_COMPOSITION_START(c1) \
2817 do { \
2818 if (c1 == '0' \
781d7a48 2819 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2820 { \
2821 component_len = component_idx; \
2822 composition_state = COMPOSING_CHAR; \
2823 } \
2824 else \
2825 { \
8f924df7 2826 const unsigned char *p; \
df7492f9
KH
2827 \
2828 MAYBE_FINISH_COMPOSITION (); \
2829 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2830 goto no_more_source; \
2831 for (p = src; p < src_end - 1; p++) \
2832 if (*p == ISO_CODE_ESC && p[1] == '1') \
2833 break; \
2834 if (p == src_end - 1) \
2835 { \
9286b333
KH
2836 /* The current composition doesn't end in the current \
2837 source. */ \
2838 record_conversion_result \
2839 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
df7492f9
KH
2840 goto no_more_source; \
2841 } \
2842 \
2843 /* This is surely the start of a composition. */ \
2844 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2845 : c1 == '2' ? COMPOSITION_WITH_RULE \
2846 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2847 : COMPOSITION_WITH_RULE_ALTCHARS); \
2848 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2849 : COMPOSING_COMPONENT_CHAR); \
2850 component_idx = component_len = 0; \
2851 } \
ec6d2bb8
KH
2852 } while (0)
2853
ec6d2bb8 2854
df7492f9
KH
2855/* Handle compositoin end sequence ESC 1. */
2856
2857#define DECODE_COMPOSITION_END() \
ec6d2bb8 2858 do { \
df7492f9
KH
2859 int nchars = (component_len > 0 ? component_idx - component_len \
2860 : method == COMPOSITION_RELATIVE ? component_idx \
2861 : (component_idx + 1) / 2); \
2862 int i; \
2863 int *saved_charbuf = charbuf; \
2864 \
69a80ea3 2865 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2866 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2867 { \
df7492f9
KH
2868 if (component_len == 0) \
2869 for (i = 0; i < component_idx; i++) \
2870 *charbuf++ = components[i]; \
2871 else \
2872 for (i = 0; i < component_len; i++) \
2873 *charbuf++ = components[i]; \
2874 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2875 } \
df7492f9
KH
2876 if (method == COMPOSITION_WITH_RULE) \
2877 for (i = 0; i < component_idx; i += 2, char_offset++) \
2878 *charbuf++ = components[i]; \
ec6d2bb8 2879 else \
df7492f9
KH
2880 for (i = component_len; i < component_idx; i++, char_offset++) \
2881 *charbuf++ = components[i]; \
2882 coding->annotated = 1; \
2883 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2884 } while (0)
2885
df7492f9 2886
ec6d2bb8
KH
2887/* Decode a composition rule from the byte C1 (and maybe one more byte
2888 from SRC) and store one encoded composition rule in
2889 coding->cmp_data. */
2890
2891#define DECODE_COMPOSITION_RULE(c1) \
2892 do { \
ec6d2bb8
KH
2893 (c1) -= 32; \
2894 if (c1 < 81) /* old format (before ver.21) */ \
2895 { \
2896 int gref = (c1) / 9; \
2897 int nref = (c1) % 9; \
2898 if (gref == 4) gref = 10; \
2899 if (nref == 4) nref = 10; \
df7492f9 2900 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2901 } \
b73bfc1c 2902 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2903 { \
2904 ONE_MORE_BYTE (c2); \
df7492f9 2905 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2906 } \
df7492f9
KH
2907 else \
2908 c1 = 0; \
ec6d2bb8 2909 } while (0)
88993dfd 2910
d46c5b12 2911
4ed46869
KH
2912/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2913
b73bfc1c 2914static void
df7492f9 2915decode_coding_iso_2022 (coding)
4ed46869 2916 struct coding_system *coding;
4ed46869 2917{
8f924df7
KH
2918 const unsigned char *src = coding->source + coding->consumed;
2919 const unsigned char *src_end = coding->source + coding->src_bytes;
2920 const unsigned char *src_base;
69a80ea3 2921 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 2922 int *charbuf_end
69a80ea3 2923 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2924 int consumed_chars = 0, consumed_chars_base;
df7492f9 2925 int multibytep = coding->src_multibyte;
4ed46869 2926 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2927 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2928 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 2929 int charset_id_2, charset_id_3;
df7492f9
KH
2930 struct charset *charset;
2931 int c;
2932 /* For handling composition sequence. */
2933#define COMPOSING_NO 0
2934#define COMPOSING_CHAR 1
2935#define COMPOSING_RULE 2
2936#define COMPOSING_COMPONENT_CHAR 3
2937#define COMPOSING_COMPONENT_RULE 4
2938
2939 int composition_state = COMPOSING_NO;
2940 enum composition_method method;
2941 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2942 int component_idx;
2943 int component_len;
24a73b0a 2944 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2945 int char_offset = coding->produced_char;
2946 int last_offset = char_offset;
2947 int last_id = charset_ascii;
df7492f9 2948
24a73b0a 2949 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 2950 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2951
2952 while (1)
4ed46869 2953 {
463f5630 2954 int c1, c2;
b73bfc1c
KH
2955
2956 src_base = src;
df7492f9
KH
2957 consumed_chars_base = consumed_chars;
2958
2959 if (charbuf >= charbuf_end)
2960 break;
2961
b73bfc1c 2962 ONE_MORE_BYTE (c1);
065e3595
KH
2963 if (c1 < 0)
2964 goto invalid_code;
4ed46869 2965
98725083 2966 /* We produce at most one character. */
4ed46869
KH
2967 switch (iso_code_class [c1])
2968 {
2969 case ISO_0x20_or_0x7F:
df7492f9 2970 if (composition_state != COMPOSING_NO)
ec6d2bb8 2971 {
df7492f9
KH
2972 if (composition_state == COMPOSING_RULE
2973 || composition_state == COMPOSING_COMPONENT_RULE)
2974 {
2975 DECODE_COMPOSITION_RULE (c1);
2976 components[component_idx++] = c1;
2977 composition_state--;
2978 continue;
2979 }
4ed46869 2980 }
df7492f9
KH
2981 if (charset_id_0 < 0
2982 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2983 /* This is SPACE or DEL. */
2984 charset = CHARSET_FROM_ID (charset_ascii);
2985 else
2986 charset = CHARSET_FROM_ID (charset_id_0);
2987 break;
4ed46869
KH
2988
2989 case ISO_graphic_plane_0:
781d7a48 2990 if (composition_state != COMPOSING_NO)
b73bfc1c 2991 {
781d7a48
KH
2992 if (composition_state == COMPOSING_RULE
2993 || composition_state == COMPOSING_COMPONENT_RULE)
2994 {
2995 DECODE_COMPOSITION_RULE (c1);
2996 components[component_idx++] = c1;
2997 composition_state--;
2998 continue;
2999 }
b73bfc1c 3000 }
134b9549
KH
3001 if (charset_id_0 < 0)
3002 charset = CHARSET_FROM_ID (charset_ascii);
3003 else
3004 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
3005 break;
3006
3007 case ISO_0xA0_or_0xFF:
df7492f9
KH
3008 if (charset_id_1 < 0
3009 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3010 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3011 goto invalid_code;
4ed46869
KH
3012 /* This is a graphic character, we fall down ... */
3013
3014 case ISO_graphic_plane_1:
df7492f9
KH
3015 if (charset_id_1 < 0)
3016 goto invalid_code;
3017 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
3018 break;
3019
df7492f9
KH
3020 case ISO_control_0:
3021 MAYBE_FINISH_COMPOSITION ();
3022 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
3023 break;
3024
df7492f9
KH
3025 case ISO_control_1:
3026 MAYBE_FINISH_COMPOSITION ();
3027 goto invalid_code;
3028
4ed46869 3029 case ISO_shift_out:
df7492f9
KH
3030 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3031 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3032 goto invalid_code;
3033 CODING_ISO_INVOCATION (coding, 0) = 1;
3034 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3035 continue;
4ed46869
KH
3036
3037 case ISO_shift_in:
df7492f9
KH
3038 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3039 goto invalid_code;
3040 CODING_ISO_INVOCATION (coding, 0) = 0;
3041 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3042 continue;
4ed46869
KH
3043
3044 case ISO_single_shift_2_7:
3045 case ISO_single_shift_2:
df7492f9
KH
3046 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3047 goto invalid_code;
4ed46869
KH
3048 /* SS2 is handled as an escape sequence of ESC 'N' */
3049 c1 = 'N';
3050 goto label_escape_sequence;
3051
3052 case ISO_single_shift_3:
df7492f9
KH
3053 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3054 goto invalid_code;
4ed46869
KH
3055 /* SS2 is handled as an escape sequence of ESC 'O' */
3056 c1 = 'O';
3057 goto label_escape_sequence;
3058
3059 case ISO_control_sequence_introducer:
3060 /* CSI is handled as an escape sequence of ESC '[' ... */
3061 c1 = '[';
3062 goto label_escape_sequence;
3063
3064 case ISO_escape:
3065 ONE_MORE_BYTE (c1);
3066 label_escape_sequence:
df7492f9 3067 /* Escape sequences handled here are invocation,
4ed46869
KH
3068 designation, direction specification, and character
3069 composition specification. */
3070 switch (c1)
3071 {
3072 case '&': /* revision of following character set */
3073 ONE_MORE_BYTE (c1);
3074 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3075 goto invalid_code;
4ed46869
KH
3076 ONE_MORE_BYTE (c1);
3077 if (c1 != ISO_CODE_ESC)
df7492f9 3078 goto invalid_code;
4ed46869
KH
3079 ONE_MORE_BYTE (c1);
3080 goto label_escape_sequence;
3081
3082 case '$': /* designation of 2-byte character set */
df7492f9
KH
3083 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3084 goto invalid_code;
134b9549
KH
3085 {
3086 int reg, chars96;
3087
3088 ONE_MORE_BYTE (c1);
3089 if (c1 >= '@' && c1 <= 'B')
3090 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3091 or JISX0208.1980 */
134b9549
KH
3092 reg = 0, chars96 = 0;
3093 }
3094 else if (c1 >= 0x28 && c1 <= 0x2B)
3095 { /* designation of DIMENSION2_CHARS94 character set */
3096 reg = c1 - 0x28, chars96 = 0;
3097 ONE_MORE_BYTE (c1);
3098 }
3099 else if (c1 >= 0x2C && c1 <= 0x2F)
3100 { /* designation of DIMENSION2_CHARS96 character set */
3101 reg = c1 - 0x2C, chars96 = 1;
3102 ONE_MORE_BYTE (c1);
3103 }
3104 else
3105 goto invalid_code;
3106 DECODE_DESIGNATION (reg, 2, chars96, c1);
3107 /* We must update these variables now. */
3108 if (reg == 0)
3109 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3110 else if (reg == 1)
3111 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3112 if (chars96 < 0)
3113 goto invalid_code;
3114 }
b73bfc1c 3115 continue;
4ed46869
KH
3116
3117 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3118 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3119 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3120 goto invalid_code;
3121 CODING_ISO_INVOCATION (coding, 0) = 2;
3122 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3123 continue;
4ed46869
KH
3124
3125 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3126 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3127 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3128 goto invalid_code;
3129 CODING_ISO_INVOCATION (coding, 0) = 3;
3130 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3131 continue;
4ed46869
KH
3132
3133 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3134 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3135 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3136 goto invalid_code;
134b9549
KH
3137 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3138 if (charset_id_2 < 0)
3139 charset = CHARSET_FROM_ID (charset_ascii);
3140 else
3141 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3142 ONE_MORE_BYTE (c1);
e7046a18 3143 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3144 goto invalid_code;
4ed46869
KH
3145 break;
3146
3147 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3148 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3149 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3150 goto invalid_code;
134b9549
KH
3151 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3152 if (charset_id_3 < 0)
3153 charset = CHARSET_FROM_ID (charset_ascii);
3154 else
3155 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3156 ONE_MORE_BYTE (c1);
e7046a18 3157 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3158 goto invalid_code;
4ed46869
KH
3159 break;
3160
ec6d2bb8 3161 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3162 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3163 goto invalid_code;
ec6d2bb8 3164 DECODE_COMPOSITION_START (c1);
b73bfc1c 3165 continue;
4ed46869 3166
ec6d2bb8 3167 case '1': /* end composition */
df7492f9
KH
3168 if (composition_state == COMPOSING_NO)
3169 goto invalid_code;
3170 DECODE_COMPOSITION_END ();
b73bfc1c 3171 continue;
4ed46869
KH
3172
3173 case '[': /* specification of direction */
df7492f9
KH
3174 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3175 goto invalid_code;
4ed46869 3176 /* For the moment, nested direction is not supported.
d46c5b12 3177 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3178 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3179 ONE_MORE_BYTE (c1);
3180 switch (c1)
3181 {
3182 case ']': /* end of the current direction */
d46c5b12 3183 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3184
3185 case '0': /* end of the current direction */
3186 case '1': /* start of left-to-right direction */
3187 ONE_MORE_BYTE (c1);
3188 if (c1 == ']')
d46c5b12 3189 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3190 else
df7492f9 3191 goto invalid_code;
4ed46869
KH
3192 break;
3193
3194 case '2': /* start of right-to-left direction */
3195 ONE_MORE_BYTE (c1);
3196 if (c1 == ']')
d46c5b12 3197 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3198 else
df7492f9 3199 goto invalid_code;
4ed46869
KH
3200 break;
3201
3202 default:
df7492f9 3203 goto invalid_code;
4ed46869 3204 }
b73bfc1c 3205 continue;
4ed46869 3206
103e0180 3207 case '%':
103e0180
KH
3208 ONE_MORE_BYTE (c1);
3209 if (c1 == '/')
3210 {
3211 /* CTEXT extended segment:
3212 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3213 We keep these bytes as is for the moment.
3214 They may be decoded by post-read-conversion. */
3215 int dim, M, L;
4776e638 3216 int size;
8f924df7 3217
103e0180
KH
3218 ONE_MORE_BYTE (dim);
3219 ONE_MORE_BYTE (M);
3220 ONE_MORE_BYTE (L);
3221 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3222 if (charbuf + 8 + size > charbuf_end)
3223 goto break_loop;
3224 *charbuf++ = ISO_CODE_ESC;
3225 *charbuf++ = '%';
3226 *charbuf++ = '/';
3227 *charbuf++ = dim;
3228 *charbuf++ = BYTE8_TO_CHAR (M);
3229 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3230 while (size-- > 0)
3231 {
3232 ONE_MORE_BYTE (c1);
4776e638 3233 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3234 }
103e0180
KH
3235 }
3236 else if (c1 == 'G')
3237 {
103e0180
KH
3238 /* XFree86 extension for embedding UTF-8 in CTEXT:
3239 ESC % G --UTF-8-BYTES-- ESC % @
3240 We keep these bytes as is for the moment.
3241 They may be decoded by post-read-conversion. */
4776e638
KH
3242 int *p = charbuf;
3243
3244 if (p + 6 > charbuf_end)
3245 goto break_loop;
3246 *p++ = ISO_CODE_ESC;
3247 *p++ = '%';
3248 *p++ = 'G';
3249 while (p < charbuf_end)
103e0180
KH
3250 {
3251 ONE_MORE_BYTE (c1);
3252 if (c1 == ISO_CODE_ESC
3253 && src + 1 < src_end
3254 && src[0] == '%'
3255 && src[1] == '@')
9ffd559c
KH
3256 {
3257 src += 2;
3258 break;
3259 }
4776e638 3260 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3261 }
4776e638
KH
3262 if (p + 3 > charbuf_end)
3263 goto break_loop;
3264 *p++ = ISO_CODE_ESC;
3265 *p++ = '%';
3266 *p++ = '@';
3267 charbuf = p;
103e0180
KH
3268 }
3269 else
4776e638 3270 goto invalid_code;
103e0180 3271 continue;
4776e638 3272 break;
103e0180 3273
4ed46869 3274 default:
df7492f9
KH
3275 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3276 goto invalid_code;
134b9549
KH
3277 {
3278 int reg, chars96;
3279
3280 if (c1 >= 0x28 && c1 <= 0x2B)
3281 { /* designation of DIMENSION1_CHARS94 character set */
3282 reg = c1 - 0x28, chars96 = 0;
3283 ONE_MORE_BYTE (c1);
3284 }
3285 else if (c1 >= 0x2C && c1 <= 0x2F)
3286 { /* designation of DIMENSION1_CHARS96 character set */
3287 reg = c1 - 0x2C, chars96 = 1;
3288 ONE_MORE_BYTE (c1);
3289 }
3290 else
3291 goto invalid_code;
3292 DECODE_DESIGNATION (reg, 1, chars96, c1);
3293 /* We must update these variables now. */
3294 if (reg == 0)
3295 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3296 else if (reg == 1)
3297 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3298 if (chars96 < 0)
3299 goto invalid_code;
3300 }
b73bfc1c 3301 continue;
4ed46869 3302 }
b73bfc1c 3303 }
4ed46869 3304
ff0dacd7
KH
3305 if (charset->id != charset_ascii
3306 && last_id != charset->id)
3307 {
3308 if (last_id != charset_ascii)
69a80ea3 3309 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3310 last_id = charset->id;
3311 last_offset = char_offset;
3312 }
3313
b73bfc1c 3314 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3315 Produce a decoded character while getting 2nd position code
3316 C2 if necessary. */
3317 c1 &= 0x7F;
3318 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3319 {
3320 ONE_MORE_BYTE (c2);
df7492f9 3321 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3322 /* C2 is not in a valid range. */
df7492f9
KH
3323 goto invalid_code;
3324 c1 = (c1 << 8) | (c2 & 0x7F);
3325 if (CHARSET_DIMENSION (charset) > 2)
3326 {
3327 ONE_MORE_BYTE (c2);
3328 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3329 /* C2 is not in a valid range. */
3330 goto invalid_code;
3331 c1 = (c1 << 8) | (c2 & 0x7F);
3332 }
3333 }
3334
3335 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3336 if (c < 0)
3337 {
3338 MAYBE_FINISH_COMPOSITION ();
3339 for (; src_base < src; src_base++, char_offset++)
3340 {
3341 if (ASCII_BYTE_P (*src_base))
3342 *charbuf++ = *src_base;
3343 else
3344 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3345 }
3346 }
3347 else if (composition_state == COMPOSING_NO)
3348 {
3349 *charbuf++ = c;
3350 char_offset++;
4ed46869 3351 }
df7492f9 3352 else
781d7a48
KH
3353 {
3354 components[component_idx++] = c;
3355 if (method == COMPOSITION_WITH_RULE
3356 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3357 && composition_state == COMPOSING_COMPONENT_CHAR))
3358 composition_state++;
4ed46869
KH
3359 }
3360 continue;
3361
df7492f9
KH
3362 invalid_code:
3363 MAYBE_FINISH_COMPOSITION ();
4ed46869 3364 src = src_base;
df7492f9
KH
3365 consumed_chars = consumed_chars_base;
3366 ONE_MORE_BYTE (c);
065e3595 3367 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3368 char_offset++;
df7492f9 3369 coding->errors++;
4776e638
KH
3370 continue;
3371
3372 break_loop:
3373 break;
4ed46869 3374 }
fb88bf2d 3375
df7492f9 3376 no_more_source:
ff0dacd7 3377 if (last_id != charset_ascii)
69a80ea3 3378 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3379 coding->consumed_char += consumed_chars_base;
3380 coding->consumed = src_base - coding->source;
3381 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3382}
3383
b73bfc1c 3384
f4dee582 3385/* ISO2022 encoding stuff. */
4ed46869
KH
3386
3387/*
f4dee582 3388 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3389 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3390 variant has the following specifications:
df7492f9 3391 1. Initial designation to G0 thru G3.
4ed46869
KH
3392 2. Allows short-form designation?
3393 3. ASCII should be designated to G0 before control characters?
3394 4. ASCII should be designated to G0 at end of line?
3395 5. 7-bit environment or 8-bit environment?
3396 6. Use locking-shift?
3397 7. Use Single-shift?
3398 And the following two are only for Japanese:
3399 8. Use ASCII in place of JIS0201-1976-Roman?
3400 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3401 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3402 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3403 details.
4ed46869
KH
3404*/
3405
3406/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3407 register REG at DST, and increment DST. If <final-char> of CHARSET is
3408 '@', 'A', or 'B' and the coding system CODING allows, produce
3409 designation sequence of short-form. */
4ed46869
KH
3410
3411#define ENCODE_DESIGNATION(charset, reg, coding) \
3412 do { \
df7492f9 3413 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3414 char *intermediate_char_94 = "()*+"; \
3415 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3416 int revision = -1; \
3417 int c; \
3418 \
3419 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3420 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3421 \
3422 if (revision >= 0) \
70c22245 3423 { \
df7492f9
KH
3424 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3425 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3426 } \
df7492f9 3427 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3428 if (CHARSET_DIMENSION (charset) == 1) \
3429 { \
df7492f9
KH
3430 if (! CHARSET_ISO_CHARS_96 (charset)) \
3431 c = intermediate_char_94[reg]; \
4ed46869 3432 else \
df7492f9
KH
3433 c = intermediate_char_96[reg]; \
3434 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3435 } \
3436 else \
3437 { \
df7492f9
KH
3438 EMIT_ONE_ASCII_BYTE ('$'); \
3439 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3440 { \
df7492f9 3441 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3442 || reg != 0 \
3443 || final_char < '@' || final_char > 'B') \
df7492f9 3444 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3445 } \
3446 else \
df7492f9 3447 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3448 } \
df7492f9
KH
3449 EMIT_ONE_ASCII_BYTE (final_char); \
3450 \
3451 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3452 } while (0)
3453
df7492f9 3454
4ed46869
KH
3455/* The following two macros produce codes (control character or escape
3456 sequence) for ISO2022 single-shift functions (single-shift-2 and
3457 single-shift-3). */
3458
df7492f9
KH
3459#define ENCODE_SINGLE_SHIFT_2 \
3460 do { \
3461 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3462 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3463 else \
3464 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3465 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3466 } while (0)
3467
df7492f9
KH
3468
3469#define ENCODE_SINGLE_SHIFT_3 \
3470 do { \
3471 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3472 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3473 else \
3474 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3475 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3476 } while (0)
3477
df7492f9 3478
4ed46869
KH
3479/* The following four macros produce codes (control character or
3480 escape sequence) for ISO2022 locking-shift functions (shift-in,
3481 shift-out, locking-shift-2, and locking-shift-3). */
3482
df7492f9
KH
3483#define ENCODE_SHIFT_IN \
3484 do { \
3485 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3486 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3487 } while (0)
3488
df7492f9
KH
3489
3490#define ENCODE_SHIFT_OUT \
3491 do { \
3492 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3493 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3494 } while (0)
3495
df7492f9
KH
3496
3497#define ENCODE_LOCKING_SHIFT_2 \
3498 do { \
3499 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3500 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3501 } while (0)
3502
df7492f9
KH
3503
3504#define ENCODE_LOCKING_SHIFT_3 \
3505 do { \
3506 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3507 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3508 } while (0)
3509
df7492f9 3510
f4dee582
RS
3511/* Produce codes for a DIMENSION1 character whose character set is
3512 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3513 sequences are also produced in advance if necessary. */
3514
6e85d753
KH
3515#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3516 do { \
df7492f9 3517 int id = CHARSET_ID (charset); \
bf16eb23
KH
3518 \
3519 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3520 && id == charset_ascii) \
3521 { \
3522 id = charset_jisx0201_roman; \
3523 charset = CHARSET_FROM_ID (id); \
3524 } \
3525 \
df7492f9 3526 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3527 { \
df7492f9
KH
3528 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3529 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3530 else \
df7492f9
KH
3531 EMIT_ONE_BYTE (c1 | 0x80); \
3532 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3533 break; \
3534 } \
df7492f9 3535 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3536 { \
df7492f9 3537 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3538 break; \
3539 } \
df7492f9 3540 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3541 { \
df7492f9 3542 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3543 break; \
3544 } \
6e85d753
KH
3545 else \
3546 /* Since CHARSET is not yet invoked to any graphic planes, we \
3547 must invoke it, or, at first, designate it to some graphic \
3548 register. Then repeat the loop to actually produce the \
3549 character. */ \
df7492f9
KH
3550 dst = encode_invocation_designation (charset, coding, dst, \
3551 &produced_chars); \
4ed46869
KH
3552 } while (1)
3553
df7492f9 3554
f4dee582
RS
3555/* Produce codes for a DIMENSION2 character whose character set is
3556 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3557 invocation codes are also produced in advance if necessary. */
3558
6e85d753
KH
3559#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3560 do { \
df7492f9 3561 int id = CHARSET_ID (charset); \
bf16eb23
KH
3562 \
3563 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3564 && id == charset_jisx0208) \
3565 { \
3566 id = charset_jisx0208_1978; \
3567 charset = CHARSET_FROM_ID (id); \
3568 } \
3569 \
df7492f9 3570 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3571 { \
df7492f9
KH
3572 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3573 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3574 else \
df7492f9
KH
3575 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3576 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3577 break; \
3578 } \
df7492f9 3579 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3580 { \
df7492f9 3581 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3582 break; \
3583 } \
df7492f9 3584 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3585 { \
df7492f9 3586 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3587 break; \
3588 } \
6e85d753
KH
3589 else \
3590 /* Since CHARSET is not yet invoked to any graphic planes, we \
3591 must invoke it, or, at first, designate it to some graphic \
3592 register. Then repeat the loop to actually produce the \
3593 character. */ \
df7492f9
KH
3594 dst = encode_invocation_designation (charset, coding, dst, \
3595 &produced_chars); \
4ed46869
KH
3596 } while (1)
3597
05e6f5dc 3598
df7492f9
KH
3599#define ENCODE_ISO_CHARACTER(charset, c) \
3600 do { \
3601 int code = ENCODE_CHAR ((charset),(c)); \
3602 \
3603 if (CHARSET_DIMENSION (charset) == 1) \
3604 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3605 else \
3606 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3607 } while (0)
bdd9fb48 3608
05e6f5dc 3609
4ed46869 3610/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3611 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3612 Return new DST. */
3613
3614unsigned char *
df7492f9
KH
3615encode_invocation_designation (charset, coding, dst, p_nchars)
3616 struct charset *charset;
4ed46869
KH
3617 struct coding_system *coding;
3618 unsigned char *dst;
df7492f9 3619 int *p_nchars;
4ed46869 3620{
df7492f9
KH
3621 int multibytep = coding->dst_multibyte;
3622 int produced_chars = *p_nchars;
4ed46869 3623 int reg; /* graphic register number */
df7492f9 3624 int id = CHARSET_ID (charset);
4ed46869
KH
3625
3626 /* At first, check designations. */
3627 for (reg = 0; reg < 4; reg++)
df7492f9 3628 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3629 break;
3630
3631 if (reg >= 4)
3632 {
3633 /* CHARSET is not yet designated to any graphic registers. */
3634 /* At first check the requested designation. */
df7492f9
KH
3635 reg = CODING_ISO_REQUEST (coding, id);
3636 if (reg < 0)
1ba9e4ab
KH
3637 /* Since CHARSET requests no special designation, designate it
3638 to graphic register 0. */
4ed46869
KH
3639 reg = 0;
3640
3641 ENCODE_DESIGNATION (charset, reg, coding);
3642 }
3643
df7492f9
KH
3644 if (CODING_ISO_INVOCATION (coding, 0) != reg
3645 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3646 {
3647 /* Since the graphic register REG is not invoked to any graphic
3648 planes, invoke it to graphic plane 0. */
3649 switch (reg)
3650 {
3651 case 0: /* graphic register 0 */
3652 ENCODE_SHIFT_IN;
3653 break;
3654
3655 case 1: /* graphic register 1 */
3656 ENCODE_SHIFT_OUT;
3657 break;
3658
3659 case 2: /* graphic register 2 */
df7492f9 3660 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3661 ENCODE_SINGLE_SHIFT_2;
3662 else
3663 ENCODE_LOCKING_SHIFT_2;
3664 break;
3665
3666 case 3: /* graphic register 3 */
df7492f9 3667 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3668 ENCODE_SINGLE_SHIFT_3;
3669 else
3670 ENCODE_LOCKING_SHIFT_3;
3671 break;
3672 }
3673 }
b73bfc1c 3674
df7492f9 3675 *p_nchars = produced_chars;
4ed46869
KH
3676 return dst;
3677}
3678
df7492f9
KH
3679/* The following three macros produce codes for indicating direction
3680 of text. */
3681#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3682 do { \
df7492f9
KH
3683 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3684 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3685 else \
df7492f9 3686 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3687 } while (0)
3688
ec6d2bb8 3689
df7492f9
KH
3690#define ENCODE_DIRECTION_R2L() \
3691 do { \
3692 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3693 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3694 } while (0)
3695
ec6d2bb8 3696
df7492f9 3697#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3698 do { \
df7492f9
KH
3699 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3700 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3701 } while (0)
4ed46869 3702
4ed46869
KH
3703
3704/* Produce codes for designation and invocation to reset the graphic
3705 planes and registers to initial state. */
df7492f9
KH
3706#define ENCODE_RESET_PLANE_AND_REGISTER() \
3707 do { \
3708 int reg; \
3709 struct charset *charset; \
3710 \
3711 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3712 ENCODE_SHIFT_IN; \
3713 for (reg = 0; reg < 4; reg++) \
3714 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3715 && (CODING_ISO_DESIGNATION (coding, reg) \
3716 != CODING_ISO_INITIAL (coding, reg))) \
3717 { \
3718 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3719 ENCODE_DESIGNATION (charset, reg, coding); \
3720 } \
4ed46869
KH
3721 } while (0)
3722
df7492f9 3723
bdd9fb48 3724/* Produce designation sequences of charsets in the line started from
b73bfc1c 3725 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3726
3727 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3728 find all the necessary designations. */
3729
b73bfc1c 3730static unsigned char *
df7492f9 3731encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3732 struct coding_system *coding;
df7492f9
KH
3733 int *charbuf, *charbuf_end;
3734 unsigned char *dst;
e0e989f6 3735{
df7492f9 3736 struct charset *charset;
bdd9fb48
KH
3737 /* Table of charsets to be designated to each graphic register. */
3738 int r[4];
df7492f9
KH
3739 int c, found = 0, reg;
3740 int produced_chars = 0;
3741 int multibytep = coding->dst_multibyte;
3742 Lisp_Object attrs;
3743 Lisp_Object charset_list;
3744
3745 attrs = CODING_ID_ATTRS (coding->id);
3746 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3747 if (EQ (charset_list, Qiso_2022))
3748 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3749
3750 for (reg = 0; reg < 4; reg++)
3751 r[reg] = -1;
3752
b73bfc1c 3753 while (found < 4)
e0e989f6 3754 {
df7492f9
KH
3755 int id;
3756
3757 c = *charbuf++;
b73bfc1c
KH
3758 if (c == '\n')
3759 break;
df7492f9
KH
3760 charset = char_charset (c, charset_list, NULL);
3761 id = CHARSET_ID (charset);
3762 reg = CODING_ISO_REQUEST (coding, id);
3763 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3764 {
3765 found++;
df7492f9 3766 r[reg] = id;
bdd9fb48 3767 }
bdd9fb48
KH
3768 }
3769
3770 if (found)
3771 {
3772 for (reg = 0; reg < 4; reg++)
3773 if (r[reg] >= 0
df7492f9
KH
3774 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3775 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3776 }
b73bfc1c
KH
3777
3778 return dst;
e0e989f6
KH
3779}
3780
4ed46869
KH
3781/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3782
df7492f9
KH
3783static int
3784encode_coding_iso_2022 (coding)
4ed46869 3785 struct coding_system *coding;
4ed46869 3786{
df7492f9
KH
3787 int multibytep = coding->dst_multibyte;
3788 int *charbuf = coding->charbuf;
3789 int *charbuf_end = charbuf + coding->charbuf_used;
3790 unsigned char *dst = coding->destination + coding->produced;
3791 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3792 int safe_room = 16;
3793 int bol_designation
3794 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3795 && CODING_ISO_BOL (coding));
3796 int produced_chars = 0;
3797 Lisp_Object attrs, eol_type, charset_list;
3798 int ascii_compatible;
b73bfc1c 3799 int c;
ff0dacd7 3800 int preferred_charset_id = -1;
05e6f5dc 3801
24a73b0a
KH
3802 CODING_GET_INFO (coding, attrs, charset_list);
3803 eol_type = CODING_ID_EOL_TYPE (coding->id);
3804 if (VECTORP (eol_type))
3805 eol_type = Qunix;
3806
004068e4 3807 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3808 /* Charset list may have been changed. */
3809 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3810 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3811
df7492f9 3812 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3813
df7492f9 3814 while (charbuf < charbuf_end)
4ed46869 3815 {
df7492f9 3816 ASSURE_DESTINATION (safe_room);
b73bfc1c 3817
df7492f9 3818 if (bol_designation)
b73bfc1c 3819 {
df7492f9 3820 unsigned char *dst_prev = dst;
4ed46869 3821
bdd9fb48 3822 /* We have to produce designation sequences if any now. */
df7492f9
KH
3823 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3824 bol_designation = 0;
3825 /* We are sure that designation sequences are all ASCII bytes. */
3826 produced_chars += dst - dst_prev;
e0e989f6
KH
3827 }
3828
df7492f9 3829 c = *charbuf++;
ec6d2bb8 3830
ff0dacd7
KH
3831 if (c < 0)
3832 {
3833 /* Handle an annotation. */
3834 switch (*charbuf)
ec6d2bb8 3835 {
ff0dacd7
KH
3836 case CODING_ANNOTATE_COMPOSITION_MASK:
3837 /* Not yet implemented. */
3838 break;
3839 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 3840 preferred_charset_id = charbuf[2];
ff0dacd7
KH
3841 if (preferred_charset_id >= 0
3842 && NILP (Fmemq (make_number (preferred_charset_id),
3843 charset_list)))
3844 preferred_charset_id = -1;
3845 break;
3846 default:
3847 abort ();
4ed46869 3848 }
ff0dacd7
KH
3849 charbuf += -c - 1;
3850 continue;
4ed46869 3851 }
ec6d2bb8 3852
b73bfc1c
KH
3853 /* Now encode the character C. */
3854 if (c < 0x20 || c == 0x7F)
3855 {
df7492f9
KH
3856 if (c == '\n'
3857 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3858 {
df7492f9
KH
3859 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3860 ENCODE_RESET_PLANE_AND_REGISTER ();
3861 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3862 {
df7492f9
KH
3863 int i;
3864
3865 for (i = 0; i < 4; i++)
3866 CODING_ISO_DESIGNATION (coding, i)
3867 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3868 }
df7492f9
KH
3869 bol_designation
3870 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3871 }
df7492f9
KH
3872 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3873 ENCODE_RESET_PLANE_AND_REGISTER ();
3874 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3875 }
df7492f9 3876 else if (ASCII_CHAR_P (c))
88993dfd 3877 {
df7492f9
KH
3878 if (ascii_compatible)
3879 EMIT_ONE_ASCII_BYTE (c);
93dec019 3880 else
19a8d9e0 3881 {
bf16eb23
KH
3882 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3883 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3884 }
4ed46869 3885 }
16eafb5d 3886 else if (CHAR_BYTE8_P (c))
88993dfd 3887 {
16eafb5d
KH
3888 c = CHAR_TO_BYTE8 (c);
3889 EMIT_ONE_BYTE (c);
88993dfd 3890 }
b73bfc1c 3891 else
df7492f9 3892 {
ff0dacd7 3893 struct charset *charset;
b73bfc1c 3894
ff0dacd7
KH
3895 if (preferred_charset_id >= 0)
3896 {
3897 charset = CHARSET_FROM_ID (preferred_charset_id);
3898 if (! CHAR_CHARSET_P (c, charset))
3899 charset = char_charset (c, charset_list, NULL);
3900 }
3901 else
3902 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3903 if (!charset)
3904 {
41cbe562
KH
3905 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3906 {
3907 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3908 charset = CHARSET_FROM_ID (charset_ascii);
3909 }
3910 else
3911 {
3912 c = coding->default_char;
3913 charset = char_charset (c, charset_list, NULL);
3914 }
df7492f9
KH
3915 }
3916 ENCODE_ISO_CHARACTER (charset, c);
3917 }
84fbb8a0 3918 }
b73bfc1c 3919
df7492f9
KH
3920 if (coding->mode & CODING_MODE_LAST_BLOCK
3921 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3922 {
3923 ASSURE_DESTINATION (safe_room);
3924 ENCODE_RESET_PLANE_AND_REGISTER ();
3925 }
065e3595 3926 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
3927 CODING_ISO_BOL (coding) = bol_designation;
3928 coding->produced_char += produced_chars;
3929 coding->produced = dst - coding->destination;
3930 return 0;
4ed46869
KH
3931}
3932
3933\f
df7492f9 3934/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3935
df7492f9 3936/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3937 quite widely. So, for the moment, Emacs supports them in the bare
3938 C code. But, in the future, they may be supported only by CCL. */
3939
3940/* SJIS is a coding system encoding three character sets: ASCII, right
3941 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3942 as is. A character of charset katakana-jisx0201 is encoded by
3943 "position-code + 0x80". A character of charset japanese-jisx0208
3944 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3945 so that it fit in the range below.
4ed46869
KH
3946
3947 --- CODE RANGE of SJIS ---
3948 (character set) (range)
3949 ASCII 0x00 .. 0x7F
df7492f9 3950 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3951 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3952 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3953 -------------------------------
3954
3955*/
3956
3957/* BIG5 is a coding system encoding two character sets: ASCII and
3958 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3959 character set and is encoded in two-byte.
4ed46869
KH
3960
3961 --- CODE RANGE of BIG5 ---
3962 (character set) (range)
3963 ASCII 0x00 .. 0x7F
3964 Big5 (1st byte) 0xA1 .. 0xFE
3965 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3966 --------------------------
3967
df7492f9 3968 */
4ed46869
KH
3969
3970/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3971 Check if a text is encoded in SJIS. If it is, return
df7492f9 3972 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3973
0a28aafb 3974static int
ff0dacd7 3975detect_coding_sjis (coding, detect_info)
df7492f9 3976 struct coding_system *coding;
ff0dacd7 3977 struct coding_detection_info *detect_info;
4ed46869 3978{
065e3595 3979 const unsigned char *src = coding->source, *src_base;
8f924df7 3980 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3981 int multibytep = coding->src_multibyte;
3982 int consumed_chars = 0;
3983 int found = 0;
b73bfc1c 3984 int c;
df7492f9 3985
ff0dacd7 3986 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3987 /* A coding system of this category is always ASCII compatible. */
3988 src += coding->head_ascii;
4ed46869 3989
b73bfc1c 3990 while (1)
4ed46869 3991 {
065e3595 3992 src_base = src;
df7492f9 3993 ONE_MORE_BYTE (c);
682169fe
KH
3994 if (c < 0x80)
3995 continue;
df7492f9 3996 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3997 {
df7492f9 3998 ONE_MORE_BYTE (c);
682169fe 3999 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 4000 break;
ff0dacd7 4001 found = CATEGORY_MASK_SJIS;
4ed46869 4002 }
df7492f9 4003 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 4004 found = CATEGORY_MASK_SJIS;
df7492f9
KH
4005 else
4006 break;
4ed46869 4007 }
ff0dacd7 4008 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
4009 return 0;
4010
4011 no_more_source:
065e3595 4012 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4013 {
ff0dacd7 4014 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 4015 return 0;
4ed46869 4016 }
ff0dacd7
KH
4017 detect_info->found |= found;
4018 return 1;
4ed46869
KH
4019}
4020
4021/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4022 Check if a text is encoded in BIG5. If it is, return
df7492f9 4023 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 4024
0a28aafb 4025static int
ff0dacd7 4026detect_coding_big5 (coding, detect_info)
df7492f9 4027 struct coding_system *coding;
ff0dacd7 4028 struct coding_detection_info *detect_info;
4ed46869 4029{
065e3595 4030 const unsigned char *src = coding->source, *src_base;
8f924df7 4031 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4032 int multibytep = coding->src_multibyte;
4033 int consumed_chars = 0;
4034 int found = 0;
b73bfc1c 4035 int c;
fa42c37f 4036
ff0dacd7 4037 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4038 /* A coding system of this category is always ASCII compatible. */
4039 src += coding->head_ascii;
fa42c37f 4040
b73bfc1c 4041 while (1)
fa42c37f 4042 {
065e3595 4043 src_base = src;
df7492f9
KH
4044 ONE_MORE_BYTE (c);
4045 if (c < 0x80)
fa42c37f 4046 continue;
df7492f9 4047 if (c >= 0xA1)
fa42c37f 4048 {
df7492f9
KH
4049 ONE_MORE_BYTE (c);
4050 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4051 return 0;
ff0dacd7 4052 found = CATEGORY_MASK_BIG5;
fa42c37f 4053 }
df7492f9
KH
4054 else
4055 break;
fa42c37f 4056 }
ff0dacd7 4057 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4058 return 0;
fa42c37f 4059
df7492f9 4060 no_more_source:
065e3595 4061 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4062 {
ff0dacd7 4063 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4064 return 0;
4065 }
ff0dacd7
KH
4066 detect_info->found |= found;
4067 return 1;
fa42c37f
KH
4068}
4069
4ed46869
KH
4070/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4071 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4072
b73bfc1c 4073static void
df7492f9 4074decode_coding_sjis (coding)
4ed46869 4075 struct coding_system *coding;
4ed46869 4076{
8f924df7
KH
4077 const unsigned char *src = coding->source + coding->consumed;
4078 const unsigned char *src_end = coding->source + coding->src_bytes;
4079 const unsigned char *src_base;
69a80ea3
KH
4080 int *charbuf = coding->charbuf + coding->charbuf_used;
4081 int *charbuf_end
4082 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4083 int consumed_chars = 0, consumed_chars_base;
4084 int multibytep = coding->src_multibyte;
4085 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4086 struct charset *charset_kanji2;
24a73b0a 4087 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4088 int char_offset = coding->produced_char;
4089 int last_offset = char_offset;
4090 int last_id = charset_ascii;
a5d301df 4091
24a73b0a 4092 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4093
4094 val = charset_list;
4095 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4096 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4097 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4098 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4099
b73bfc1c 4100 while (1)
4ed46869 4101 {
df7492f9 4102 int c, c1;
24a73b0a 4103 struct charset *charset;
fa42c37f 4104
b73bfc1c 4105 src_base = src;
df7492f9 4106 consumed_chars_base = consumed_chars;
fa42c37f 4107
df7492f9
KH
4108 if (charbuf >= charbuf_end)
4109 break;
4110
4111 ONE_MORE_BYTE (c);
065e3595
KH
4112 if (c < 0)
4113 goto invalid_code;
24a73b0a
KH
4114 if (c < 0x80)
4115 charset = charset_roman;
57a47f8a 4116 else if (c == 0x80 || c == 0xA0)
8e921c4b 4117 goto invalid_code;
57a47f8a
KH
4118 else if (c >= 0xA1 && c <= 0xDF)
4119 {
4120 /* SJIS -> JISX0201-Kana */
4121 c &= 0x7F;
4122 charset = charset_kana;
4123 }
4124 else if (c <= 0xEF)
df7492f9 4125 {
57a47f8a
KH
4126 /* SJIS -> JISX0208 */
4127 ONE_MORE_BYTE (c1);
4128 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4129 goto invalid_code;
57a47f8a
KH
4130 c = (c << 8) | c1;
4131 SJIS_TO_JIS (c);
4132 charset = charset_kanji;
4133 }
4134 else if (c <= 0xFC && charset_kanji2)
4135 {
c6876370 4136 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4137 ONE_MORE_BYTE (c1);
4138 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4139 goto invalid_code;
57a47f8a
KH
4140 c = (c << 8) | c1;
4141 SJIS_TO_JIS2 (c);
4142 charset = charset_kanji2;
df7492f9 4143 }
57a47f8a
KH
4144 else
4145 goto invalid_code;
24a73b0a
KH
4146 if (charset->id != charset_ascii
4147 && last_id != charset->id)
4148 {
4149 if (last_id != charset_ascii)
69a80ea3 4150 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4151 last_id = charset->id;
4152 last_offset = char_offset;
4153 }
4154 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4155 *charbuf++ = c;
ff0dacd7 4156 char_offset++;
df7492f9 4157 continue;
b73bfc1c 4158
df7492f9
KH
4159 invalid_code:
4160 src = src_base;
4161 consumed_chars = consumed_chars_base;
4162 ONE_MORE_BYTE (c);
065e3595 4163 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4164 char_offset++;
df7492f9
KH
4165 coding->errors++;
4166 }
fa42c37f 4167
df7492f9 4168 no_more_source:
ff0dacd7 4169 if (last_id != charset_ascii)
69a80ea3 4170 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4171 coding->consumed_char += consumed_chars_base;
4172 coding->consumed = src_base - coding->source;
4173 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4174}
4175
b73bfc1c 4176static void
df7492f9 4177decode_coding_big5 (coding)
4ed46869 4178 struct coding_system *coding;
4ed46869 4179{
8f924df7
KH
4180 const unsigned char *src = coding->source + coding->consumed;
4181 const unsigned char *src_end = coding->source + coding->src_bytes;
4182 const unsigned char *src_base;
69a80ea3
KH
4183 int *charbuf = coding->charbuf + coding->charbuf_used;
4184 int *charbuf_end
4185 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4186 int consumed_chars = 0, consumed_chars_base;
4187 int multibytep = coding->src_multibyte;
4188 struct charset *charset_roman, *charset_big5;
24a73b0a 4189 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4190 int char_offset = coding->produced_char;
4191 int last_offset = char_offset;
4192 int last_id = charset_ascii;
df7492f9 4193
24a73b0a 4194 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4195 val = charset_list;
4196 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4197 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4198
b73bfc1c 4199 while (1)
4ed46869 4200 {
df7492f9 4201 int c, c1;
24a73b0a 4202 struct charset *charset;
b73bfc1c
KH
4203
4204 src_base = src;
df7492f9
KH
4205 consumed_chars_base = consumed_chars;
4206
4207 if (charbuf >= charbuf_end)
4208 break;
4209
4210 ONE_MORE_BYTE (c);
b73bfc1c 4211
065e3595
KH
4212 if (c < 0)
4213 goto invalid_code;
24a73b0a
KH
4214 if (c < 0x80)
4215 charset = charset_roman;
4216 else
4ed46869 4217 {
24a73b0a
KH
4218 /* BIG5 -> Big5 */
4219 if (c < 0xA1 || c > 0xFE)
4220 goto invalid_code;
4221 ONE_MORE_BYTE (c1);
4222 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4223 goto invalid_code;
4224 c = c << 8 | c1;
4225 charset = charset_big5;
4ed46869 4226 }
24a73b0a
KH
4227 if (charset->id != charset_ascii
4228 && last_id != charset->id)
df7492f9 4229 {
24a73b0a 4230 if (last_id != charset_ascii)
69a80ea3 4231 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4232 last_id = charset->id;
4233 last_offset = char_offset;
4ed46869 4234 }
24a73b0a 4235 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4236 *charbuf++ = c;
ff0dacd7 4237 char_offset++;
fb88bf2d
KH
4238 continue;
4239
df7492f9 4240 invalid_code:
4ed46869 4241 src = src_base;
df7492f9
KH
4242 consumed_chars = consumed_chars_base;
4243 ONE_MORE_BYTE (c);
065e3595 4244 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4245 char_offset++;
df7492f9 4246 coding->errors++;
fb88bf2d 4247 }
d46c5b12 4248
df7492f9 4249 no_more_source:
ff0dacd7 4250 if (last_id != charset_ascii)
69a80ea3 4251 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4252 coding->consumed_char += consumed_chars_base;
4253 coding->consumed = src_base - coding->source;
4254 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4255}
4256
4257/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4258 This function can encode charsets `ascii', `katakana-jisx0201',
4259 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4260 are sure that all these charsets are registered as official charset
4ed46869
KH
4261 (i.e. do not have extended leading-codes). Characters of other
4262 charsets are produced without any encoding. If SJIS_P is 1, encode
4263 SJIS text, else encode BIG5 text. */
4264
df7492f9
KH
4265static int
4266encode_coding_sjis (coding)
4ed46869 4267 struct coding_system *coding;
4ed46869 4268{
df7492f9
KH
4269 int multibytep = coding->dst_multibyte;
4270 int *charbuf = coding->charbuf;
4271 int *charbuf_end = charbuf + coding->charbuf_used;
4272 unsigned char *dst = coding->destination + coding->produced;
4273 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4274 int safe_room = 4;
4275 int produced_chars = 0;
24a73b0a 4276 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4277 int ascii_compatible;
4278 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4279 struct charset *charset_kanji2;
df7492f9 4280 int c;
a5d301df 4281
24a73b0a 4282 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4283 val = charset_list;
4284 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4285 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4286 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4287 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4288
df7492f9 4289 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4290
df7492f9
KH
4291 while (charbuf < charbuf_end)
4292 {
4293 ASSURE_DESTINATION (safe_room);
4294 c = *charbuf++;
b73bfc1c 4295 /* Now encode the character C. */
df7492f9
KH
4296 if (ASCII_CHAR_P (c) && ascii_compatible)
4297 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4298 else if (CHAR_BYTE8_P (c))
4299 {
4300 c = CHAR_TO_BYTE8 (c);
4301 EMIT_ONE_BYTE (c);
4302 }
df7492f9 4303 else
b73bfc1c 4304 {
df7492f9
KH
4305 unsigned code;
4306 struct charset *charset = char_charset (c, charset_list, &code);
4307
4308 if (!charset)
4ed46869 4309 {
41cbe562 4310 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4311 {
41cbe562
KH
4312 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4313 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4314 }
41cbe562 4315 else
b73bfc1c 4316 {
41cbe562
KH
4317 c = coding->default_char;
4318 charset = char_charset (c, charset_list, &code);
b73bfc1c 4319 }
b73bfc1c 4320 }
df7492f9
KH
4321 if (code == CHARSET_INVALID_CODE (charset))
4322 abort ();
4323 if (charset == charset_kanji)
4324 {
4325 int c1, c2;
4326 JIS_TO_SJIS (code);
4327 c1 = code >> 8, c2 = code & 0xFF;
4328 EMIT_TWO_BYTES (c1, c2);
4329 }
4330 else if (charset == charset_kana)
4331 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4332 else if (charset_kanji2 && charset == charset_kanji2)
4333 {
4334 int c1, c2;
4335
4336 c1 = code >> 8;
4337 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4338 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4339 {
4340 JIS_TO_SJIS2 (code);
4341 c1 = code >> 8, c2 = code & 0xFF;
4342 EMIT_TWO_BYTES (c1, c2);
4343 }
4344 else
4345 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4346 }
df7492f9
KH
4347 else
4348 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4349 }
4350 }
065e3595 4351 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4352 coding->produced_char += produced_chars;
4353 coding->produced = dst - coding->destination;
4354 return 0;
4355}
4356
4357static int
4358encode_coding_big5 (coding)
4359 struct coding_system *coding;
4360{
4361 int multibytep = coding->dst_multibyte;
4362 int *charbuf = coding->charbuf;
4363 int *charbuf_end = charbuf + coding->charbuf_used;
4364 unsigned char *dst = coding->destination + coding->produced;
4365 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4366 int safe_room = 4;
4367 int produced_chars = 0;
24a73b0a 4368 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4369 int ascii_compatible;
4370 struct charset *charset_roman, *charset_big5;
4371 int c;
4372
24a73b0a 4373 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4374 val = charset_list;
4375 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4376 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4377 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4378
4379 while (charbuf < charbuf_end)
4380 {
4381 ASSURE_DESTINATION (safe_room);
4382 c = *charbuf++;
4383 /* Now encode the character C. */
4384 if (ASCII_CHAR_P (c) && ascii_compatible)
4385 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4386 else if (CHAR_BYTE8_P (c))
4387 {
4388 c = CHAR_TO_BYTE8 (c);
4389 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4390 }
4391 else
4392 {
df7492f9
KH
4393 unsigned code;
4394 struct charset *charset = char_charset (c, charset_list, &code);
4395
4396 if (! charset)
b73bfc1c 4397 {
41cbe562 4398 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4399 {
41cbe562
KH
4400 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4401 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4402 }
41cbe562 4403 else
0eecad43 4404 {
41cbe562
KH
4405 c = coding->default_char;
4406 charset = char_charset (c, charset_list, &code);
0eecad43 4407 }
4ed46869 4408 }
df7492f9
KH
4409 if (code == CHARSET_INVALID_CODE (charset))
4410 abort ();
4411 if (charset == charset_big5)
b73bfc1c 4412 {
df7492f9
KH
4413 int c1, c2;
4414
4415 c1 = code >> 8, c2 = code & 0xFF;
4416 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4417 }
df7492f9
KH
4418 else
4419 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4420 }
4ed46869 4421 }
065e3595 4422 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4423 coding->produced_char += produced_chars;
4424 coding->produced = dst - coding->destination;
4425 return 0;
4ed46869
KH
4426}
4427
4428\f
df7492f9 4429/*** 10. CCL handlers ***/
1397dc18
KH
4430
4431/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4432 Check if a text is encoded in a coding system of which
4433 encoder/decoder are written in CCL program. If it is, return
df7492f9 4434 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4435
0a28aafb 4436static int
ff0dacd7 4437detect_coding_ccl (coding, detect_info)
df7492f9 4438 struct coding_system *coding;
ff0dacd7 4439 struct coding_detection_info *detect_info;
1397dc18 4440{
065e3595 4441 const unsigned char *src = coding->source, *src_base;
8f924df7 4442 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4443 int multibytep = coding->src_multibyte;
4444 int consumed_chars = 0;
4445 int found = 0;
0e219d54 4446 unsigned char *valids;
df7492f9
KH
4447 int head_ascii = coding->head_ascii;
4448 Lisp_Object attrs;
4449
ff0dacd7
KH
4450 detect_info->checked |= CATEGORY_MASK_CCL;
4451
df7492f9 4452 coding = &coding_categories[coding_category_ccl];
0e219d54 4453 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4454 attrs = CODING_ID_ATTRS (coding->id);
4455 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4456 src += head_ascii;
1397dc18 4457
b73bfc1c 4458 while (1)
1397dc18 4459 {
df7492f9 4460 int c;
065e3595
KH
4461
4462 src_base = src;
df7492f9 4463 ONE_MORE_BYTE (c);
065e3595 4464 if (c < 0 || ! valids[c])
df7492f9 4465 break;
ff0dacd7
KH
4466 if ((valids[c] > 1))
4467 found = CATEGORY_MASK_CCL;
df7492f9 4468 }
ff0dacd7 4469 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4470 return 0;
4471
4472 no_more_source:
ff0dacd7
KH
4473 detect_info->found |= found;
4474 return 1;
df7492f9
KH
4475}
4476
4477static void
4478decode_coding_ccl (coding)
4479 struct coding_system *coding;
4480{
7c78e542 4481 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4482 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4483 int *charbuf = coding->charbuf + coding->charbuf_used;
4484 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4485 int consumed_chars = 0;
4486 int multibytep = coding->src_multibyte;
4487 struct ccl_program ccl;
4488 int source_charbuf[1024];
4489 int source_byteidx[1024];
24a73b0a 4490 Lisp_Object attrs, charset_list;
df7492f9 4491
24a73b0a 4492 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4493 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4494
4495 while (src < src_end)
4496 {
7c78e542 4497 const unsigned char *p = src;
df7492f9
KH
4498 int *source, *source_end;
4499 int i = 0;
4500
4501 if (multibytep)
4502 while (i < 1024 && p < src_end)
4503 {
4504 source_byteidx[i] = p - src;
4505 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4506 }
4507 else
4508 while (i < 1024 && p < src_end)
4509 source_charbuf[i++] = *p++;
8f924df7 4510
df7492f9
KH
4511 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4512 ccl.last_block = 1;
4513
4514 source = source_charbuf;
4515 source_end = source + i;
4516 while (source < source_end)
4517 {
4518 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4519 source_end - source, charbuf_end - charbuf,
4520 charset_list);
df7492f9
KH
4521 source += ccl.consumed;
4522 charbuf += ccl.produced;
4523 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4524 break;
4525 }
4526 if (source < source_end)
4527 src += source_byteidx[source - source_charbuf];
4528 else
4529 src = p;
4530 consumed_chars += source - source_charbuf;
4531
4532 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4533 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4534 break;
4535 }
4536
4537 switch (ccl.status)
4538 {
4539 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4540 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4541 break;
4542 case CCL_STAT_SUSPEND_BY_DST:
4543 break;
4544 case CCL_STAT_QUIT:
4545 case CCL_STAT_INVALID_CMD:
065e3595 4546 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4547 break;
4548 default:
065e3595 4549 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4550 break;
4551 }
4552 coding->consumed_char += consumed_chars;
4553 coding->consumed = src - coding->source;
4554 coding->charbuf_used = charbuf - coding->charbuf;
4555}
4556
4557static int
4558encode_coding_ccl (coding)
4559 struct coding_system *coding;
4560{
4561 struct ccl_program ccl;
4562 int multibytep = coding->dst_multibyte;
4563 int *charbuf = coding->charbuf;
4564 int *charbuf_end = charbuf + coding->charbuf_used;
4565 unsigned char *dst = coding->destination + coding->produced;
4566 unsigned char *dst_end = coding->destination + coding->dst_bytes;
df7492f9
KH
4567 int destination_charbuf[1024];
4568 int i, produced_chars = 0;
24a73b0a 4569 Lisp_Object attrs, charset_list;
df7492f9 4570
24a73b0a 4571 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4572 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4573
4574 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4575 ccl.dst_multibyte = coding->dst_multibyte;
4576
8cffd3e7 4577 while (charbuf < charbuf_end)
df7492f9 4578 {
df7492f9 4579 ccl_driver (&ccl, charbuf, destination_charbuf,
8cffd3e7 4580 charbuf_end - charbuf, 1024, charset_list);
df7492f9 4581 if (multibytep)
8cffd3e7
KH
4582 {
4583 ASSURE_DESTINATION (ccl.produced * 2);
4584 for (i = 0; i < ccl.produced; i++)
4585 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4586 }
df7492f9
KH
4587 else
4588 {
8cffd3e7 4589 ASSURE_DESTINATION (ccl.produced);
df7492f9
KH
4590 for (i = 0; i < ccl.produced; i++)
4591 *dst++ = destination_charbuf[i] & 0xFF;
4592 produced_chars += ccl.produced;
4593 }
8cffd3e7
KH
4594 charbuf += ccl.consumed;
4595 if (ccl.status == CCL_STAT_QUIT
4596 || ccl.status == CCL_STAT_INVALID_CMD)
4597 break;
df7492f9
KH
4598 }
4599
4600 switch (ccl.status)
4601 {
4602 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4603 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4604 break;
4605 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4606 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4607 break;
4608 case CCL_STAT_QUIT:
4609 case CCL_STAT_INVALID_CMD:
065e3595 4610 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4611 break;
4612 default:
065e3595 4613 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4614 break;
1397dc18 4615 }
df7492f9
KH
4616
4617 coding->produced_char += produced_chars;
4618 coding->produced = dst - coding->destination;
4619 return 0;
1397dc18
KH
4620}
4621
df7492f9 4622
1397dc18 4623\f
df7492f9 4624/*** 10, 11. no-conversion handlers ***/
4ed46869 4625
b73bfc1c 4626/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4627
b73bfc1c 4628static void
df7492f9 4629decode_coding_raw_text (coding)
4ed46869 4630 struct coding_system *coding;
4ed46869 4631{
df7492f9 4632 coding->chars_at_source = 1;
2c78b7e1
KH
4633 coding->consumed_char = 0;
4634 coding->consumed = 0;
065e3595 4635 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4636}
4ed46869 4637
df7492f9
KH
4638static int
4639encode_coding_raw_text (coding)
4640 struct coding_system *coding;
4641{
4642 int multibytep = coding->dst_multibyte;
4643 int *charbuf = coding->charbuf;
4644 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4645 unsigned char *dst = coding->destination + coding->produced;
4646 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4647 int produced_chars = 0;
b73bfc1c
KH
4648 int c;
4649
df7492f9 4650 if (multibytep)
b73bfc1c 4651 {
df7492f9 4652 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4653
df7492f9
KH
4654 if (coding->src_multibyte)
4655 while (charbuf < charbuf_end)
4656 {
4657 ASSURE_DESTINATION (safe_room);
4658 c = *charbuf++;
4659 if (ASCII_CHAR_P (c))
4660 EMIT_ONE_ASCII_BYTE (c);
4661 else if (CHAR_BYTE8_P (c))
4662 {
4663 c = CHAR_TO_BYTE8 (c);
4664 EMIT_ONE_BYTE (c);
4665 }
4666 else
4667 {
4668 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4669
df7492f9
KH
4670 CHAR_STRING_ADVANCE (c, p1);
4671 while (p0 < p1)
9d123124
KH
4672 {
4673 EMIT_ONE_BYTE (*p0);
4674 p0++;
4675 }
df7492f9
KH
4676 }
4677 }
b73bfc1c 4678 else
df7492f9
KH
4679 while (charbuf < charbuf_end)
4680 {
4681 ASSURE_DESTINATION (safe_room);
4682 c = *charbuf++;
4683 EMIT_ONE_BYTE (c);
4684 }
4685 }
4686 else
4ed46869 4687 {
df7492f9 4688 if (coding->src_multibyte)
d46c5b12 4689 {
df7492f9
KH
4690 int safe_room = MAX_MULTIBYTE_LENGTH;
4691
4692 while (charbuf < charbuf_end)
d46c5b12 4693 {
df7492f9
KH
4694 ASSURE_DESTINATION (safe_room);
4695 c = *charbuf++;
4696 if (ASCII_CHAR_P (c))
4697 *dst++ = c;
4698 else if (CHAR_BYTE8_P (c))
4699 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4700 else
df7492f9
KH
4701 CHAR_STRING_ADVANCE (c, dst);
4702 produced_chars++;
d46c5b12
KH
4703 }
4704 }
df7492f9
KH
4705 else
4706 {
4707 ASSURE_DESTINATION (charbuf_end - charbuf);
4708 while (charbuf < charbuf_end && dst < dst_end)
4709 *dst++ = *charbuf++;
4710 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4711 }
4ed46869 4712 }
065e3595 4713 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4714 coding->produced_char += produced_chars;
4715 coding->produced = dst - coding->destination;
4716 return 0;
4ed46869
KH
4717}
4718
ff0dacd7
KH
4719/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4720 Check if a text is encoded in a charset-based coding system. If it
4721 is, return 1, else return 0. */
4722
0a28aafb 4723static int
ff0dacd7 4724detect_coding_charset (coding, detect_info)
df7492f9 4725 struct coding_system *coding;
ff0dacd7 4726 struct coding_detection_info *detect_info;
1397dc18 4727{
065e3595 4728 const unsigned char *src = coding->source, *src_base;
8f924df7 4729 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4730 int multibytep = coding->src_multibyte;
4731 int consumed_chars = 0;
4732 Lisp_Object attrs, valids;
584948ac 4733 int found = 0;
1397dc18 4734
ff0dacd7
KH
4735 detect_info->checked |= CATEGORY_MASK_CHARSET;
4736
df7492f9
KH
4737 coding = &coding_categories[coding_category_charset];
4738 attrs = CODING_ID_ATTRS (coding->id);
4739 valids = AREF (attrs, coding_attr_charset_valids);
4740
4741 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4742 src += coding->head_ascii;
1397dc18 4743
b73bfc1c 4744 while (1)
1397dc18 4745 {
df7492f9 4746 int c;
1397dc18 4747
065e3595 4748 src_base = src;
df7492f9 4749 ONE_MORE_BYTE (c);
065e3595
KH
4750 if (c < 0)
4751 continue;
df7492f9
KH
4752 if (NILP (AREF (valids, c)))
4753 break;
584948ac 4754 if (c >= 0x80)
ff0dacd7 4755 found = CATEGORY_MASK_CHARSET;
df7492f9 4756 }
ff0dacd7 4757 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4758 return 0;
4ed46869 4759
df7492f9 4760 no_more_source:
ff0dacd7
KH
4761 detect_info->found |= found;
4762 return 1;
df7492f9 4763}
b73bfc1c 4764
b73bfc1c 4765static void
df7492f9 4766decode_coding_charset (coding)
4ed46869 4767 struct coding_system *coding;
4ed46869 4768{
8f924df7
KH
4769 const unsigned char *src = coding->source + coding->consumed;
4770 const unsigned char *src_end = coding->source + coding->src_bytes;
4771 const unsigned char *src_base;
69a80ea3
KH
4772 int *charbuf = coding->charbuf + coding->charbuf_used;
4773 int *charbuf_end
4774 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4775 int consumed_chars = 0, consumed_chars_base;
4776 int multibytep = coding->src_multibyte;
24a73b0a 4777 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4778 int char_offset = coding->produced_char;
4779 int last_offset = char_offset;
4780 int last_id = charset_ascii;
df7492f9 4781
24a73b0a 4782 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4783 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4784
df7492f9 4785 while (1)
4ed46869 4786 {
4eb6d3f1 4787 int c;
24a73b0a
KH
4788 Lisp_Object val;
4789 struct charset *charset;
4790 int dim;
4791 int len = 1;
4792 unsigned code;
df7492f9
KH
4793
4794 src_base = src;
4795 consumed_chars_base = consumed_chars;
b73bfc1c 4796
df7492f9
KH
4797 if (charbuf >= charbuf_end)
4798 break;
4799
4eb6d3f1 4800 ONE_MORE_BYTE (c);
065e3595
KH
4801 if (c < 0)
4802 goto invalid_code;
24a73b0a
KH
4803 code = c;
4804
4805 val = AREF (valids, c);
4806 if (NILP (val))
4807 goto invalid_code;
4808 if (INTEGERP (val))
d46c5b12 4809 {
24a73b0a
KH
4810 charset = CHARSET_FROM_ID (XFASTINT (val));
4811 dim = CHARSET_DIMENSION (charset);
4812 while (len < dim)
b73bfc1c 4813 {
24a73b0a
KH
4814 ONE_MORE_BYTE (c);
4815 code = (code << 8) | c;
4816 len++;
b73bfc1c 4817 }
24a73b0a
KH
4818 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4819 charset, code, c);
d46c5b12 4820 }
df7492f9 4821 else
d46c5b12 4822 {
24a73b0a
KH
4823 /* VAL is a list of charset IDs. It is assured that the
4824 list is sorted by charset dimensions (smaller one
4825 comes first). */
4826 while (CONSP (val))
4eb6d3f1 4827 {
24a73b0a 4828 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 4829 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4830 while (len < dim)
4eb6d3f1 4831 {
acb2a965
KH
4832 ONE_MORE_BYTE (c);
4833 code = (code << 8) | c;
f9d71dcd 4834 len++;
4eb6d3f1 4835 }
24a73b0a
KH
4836 CODING_DECODE_CHAR (coding, src, src_base,
4837 src_end, charset, code, c);
4838 if (c >= 0)
4839 break;
4840 val = XCDR (val);
ff0dacd7 4841 }
d46c5b12 4842 }
24a73b0a
KH
4843 if (c < 0)
4844 goto invalid_code;
4845 if (charset->id != charset_ascii
4846 && last_id != charset->id)
4847 {
4848 if (last_id != charset_ascii)
69a80ea3 4849 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4850 last_id = charset->id;
4851 last_offset = char_offset;
4852 }
4853
df7492f9 4854 *charbuf++ = c;
ff0dacd7 4855 char_offset++;
df7492f9
KH
4856 continue;
4857
4858 invalid_code:
4859 src = src_base;
4860 consumed_chars = consumed_chars_base;
4861 ONE_MORE_BYTE (c);
065e3595 4862 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4863 char_offset++;
df7492f9 4864 coding->errors++;
4ed46869
KH
4865 }
4866
df7492f9 4867 no_more_source:
ff0dacd7 4868 if (last_id != charset_ascii)
69a80ea3 4869 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4870 coding->consumed_char += consumed_chars_base;
4871 coding->consumed = src_base - coding->source;
4872 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4873}
4874
df7492f9
KH
4875static int
4876encode_coding_charset (coding)
4ed46869 4877 struct coding_system *coding;
4ed46869 4878{
df7492f9
KH
4879 int multibytep = coding->dst_multibyte;
4880 int *charbuf = coding->charbuf;
4881 int *charbuf_end = charbuf + coding->charbuf_used;
4882 unsigned char *dst = coding->destination + coding->produced;
4883 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4884 int safe_room = MAX_MULTIBYTE_LENGTH;
4885 int produced_chars = 0;
24a73b0a 4886 Lisp_Object attrs, charset_list;
df7492f9 4887 int ascii_compatible;
b73bfc1c 4888 int c;
b73bfc1c 4889
24a73b0a 4890 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 4891 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4892
df7492f9 4893 while (charbuf < charbuf_end)
4ed46869 4894 {
4eb6d3f1 4895 struct charset *charset;
df7492f9 4896 unsigned code;
8f924df7 4897
df7492f9
KH
4898 ASSURE_DESTINATION (safe_room);
4899 c = *charbuf++;
4900 if (ascii_compatible && ASCII_CHAR_P (c))
4901 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4902 else if (CHAR_BYTE8_P (c))
4ed46869 4903 {
16eafb5d
KH
4904 c = CHAR_TO_BYTE8 (c);
4905 EMIT_ONE_BYTE (c);
d46c5b12 4906 }
d46c5b12 4907 else
b73bfc1c 4908 {
4eb6d3f1
KH
4909 charset = char_charset (c, charset_list, &code);
4910 if (charset)
4911 {
4912 if (CHARSET_DIMENSION (charset) == 1)
4913 EMIT_ONE_BYTE (code);
4914 else if (CHARSET_DIMENSION (charset) == 2)
4915 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4916 else if (CHARSET_DIMENSION (charset) == 3)
4917 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4918 else
4919 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4920 (code >> 8) & 0xFF, code & 0xFF);
4921 }
4922 else
41cbe562
KH
4923 {
4924 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4925 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4926 else
4927 c = coding->default_char;
4928 EMIT_ONE_BYTE (c);
4929 }
4ed46869 4930 }
4ed46869
KH
4931 }
4932
065e3595 4933 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4934 coding->produced_char += produced_chars;
4935 coding->produced = dst - coding->destination;
4936 return 0;
4ed46869
KH
4937}
4938
4939\f
1397dc18 4940/*** 7. C library functions ***/
4ed46869 4941
df7492f9
KH
4942/* Setup coding context CODING from information about CODING_SYSTEM.
4943 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4944 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4945
ec6d2bb8 4946void
e0e989f6
KH
4947setup_coding_system (coding_system, coding)
4948 Lisp_Object coding_system;
4ed46869
KH
4949 struct coding_system *coding;
4950{
df7492f9
KH
4951 Lisp_Object attrs;
4952 Lisp_Object eol_type;
4953 Lisp_Object coding_type;
4608c386 4954 Lisp_Object val;
4ed46869 4955
df7492f9 4956 if (NILP (coding_system))
ae6f73fa 4957 coding_system = Qundecided;
c07c8e12 4958
df7492f9 4959 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4960
df7492f9
KH
4961 attrs = CODING_ID_ATTRS (coding->id);
4962 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4963
df7492f9
KH
4964 coding->mode = 0;
4965 coding->head_ascii = -1;
4966 coding->common_flags
4967 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4968 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4969 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4970 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4971 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4972 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4973 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4974
df7492f9 4975 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4976 coding->max_charset_id = SCHARS (val) - 1;
4977 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4978 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4979
df7492f9
KH
4980 coding_type = CODING_ATTR_TYPE (attrs);
4981 if (EQ (coding_type, Qundecided))
d46c5b12 4982 {
df7492f9
KH
4983 coding->detector = NULL;
4984 coding->decoder = decode_coding_raw_text;
4985 coding->encoder = encode_coding_raw_text;
4986 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4987 }
df7492f9 4988 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4989 {
df7492f9
KH
4990 int i;
4991 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4992
4993 /* Invoke graphic register 0 to plane 0. */
4994 CODING_ISO_INVOCATION (coding, 0) = 0;
4995 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4996 CODING_ISO_INVOCATION (coding, 1)
4997 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4998 /* Setup the initial status of designation. */
4999 for (i = 0; i < 4; i++)
5000 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5001 /* Not single shifting initially. */
5002 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5003 /* Beginning of buffer should also be regarded as bol. */
5004 CODING_ISO_BOL (coding) = 1;
5005 coding->detector = detect_coding_iso_2022;
5006 coding->decoder = decode_coding_iso_2022;
5007 coding->encoder = encode_coding_iso_2022;
5008 if (flags & CODING_ISO_FLAG_SAFE)
5009 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 5010 coding->common_flags
df7492f9
KH
5011 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5012 | CODING_REQUIRE_FLUSHING_MASK);
5013 if (flags & CODING_ISO_FLAG_COMPOSITION)
5014 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
5015 if (flags & CODING_ISO_FLAG_DESIGNATION)
5016 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
5017 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5018 {
5019 setup_iso_safe_charsets (attrs);
5020 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
5021 coding->max_charset_id = SCHARS (val) - 1;
5022 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
5023 }
5024 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 5025 }
df7492f9 5026 else if (EQ (coding_type, Qcharset))
d46c5b12 5027 {
df7492f9
KH
5028 coding->detector = detect_coding_charset;
5029 coding->decoder = decode_coding_charset;
5030 coding->encoder = encode_coding_charset;
d46c5b12 5031 coding->common_flags
df7492f9 5032 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5033 }
df7492f9 5034 else if (EQ (coding_type, Qutf_8))
d46c5b12 5035 {
df7492f9
KH
5036 coding->detector = detect_coding_utf_8;
5037 coding->decoder = decode_coding_utf_8;
5038 coding->encoder = encode_coding_utf_8;
5039 coding->common_flags
5040 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5041 }
5042 else if (EQ (coding_type, Qutf_16))
5043 {
5044 val = AREF (attrs, coding_attr_utf_16_bom);
5045 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5046 : EQ (val, Qt) ? utf_16_with_bom
5047 : utf_16_without_bom);
5048 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5049 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5050 : utf_16_little_endian);
e19c3639 5051 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5052 coding->detector = detect_coding_utf_16;
5053 coding->decoder = decode_coding_utf_16;
5054 coding->encoder = encode_coding_utf_16;
5055 coding->common_flags
5056 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5057 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5058 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5059 }
df7492f9 5060 else if (EQ (coding_type, Qccl))
4ed46869 5061 {
df7492f9
KH
5062 coding->detector = detect_coding_ccl;
5063 coding->decoder = decode_coding_ccl;
5064 coding->encoder = encode_coding_ccl;
c952af22 5065 coding->common_flags
df7492f9
KH
5066 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5067 | CODING_REQUIRE_FLUSHING_MASK);
5068 }
5069 else if (EQ (coding_type, Qemacs_mule))
5070 {
5071 coding->detector = detect_coding_emacs_mule;
5072 coding->decoder = decode_coding_emacs_mule;
5073 coding->encoder = encode_coding_emacs_mule;
c952af22 5074 coding->common_flags
df7492f9
KH
5075 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5076 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5077 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5078 {
5079 Lisp_Object tail, safe_charsets;
5080 int max_charset_id = 0;
5081
5082 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5083 tail = XCDR (tail))
5084 if (max_charset_id < XFASTINT (XCAR (tail)))
5085 max_charset_id = XFASTINT (XCAR (tail));
5086 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5087 make_number (255));
5088 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5089 tail = XCDR (tail))
8f924df7 5090 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5091 coding->max_charset_id = max_charset_id;
8f924df7 5092 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5093 }
5094 }
5095 else if (EQ (coding_type, Qshift_jis))
5096 {
5097 coding->detector = detect_coding_sjis;
5098 coding->decoder = decode_coding_sjis;
5099 coding->encoder = encode_coding_sjis;
c952af22 5100 coding->common_flags
df7492f9
KH
5101 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5102 }
5103 else if (EQ (coding_type, Qbig5))
5104 {
5105 coding->detector = detect_coding_big5;
5106 coding->decoder = decode_coding_big5;
5107 coding->encoder = encode_coding_big5;
c952af22 5108 coding->common_flags
df7492f9
KH
5109 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5110 }
5111 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5112 {
df7492f9
KH
5113 coding->detector = NULL;
5114 coding->decoder = decode_coding_raw_text;
5115 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5116 if (! EQ (eol_type, Qunix))
5117 {
5118 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5119 if (! VECTORP (eol_type))
5120 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5121 }
5122
4ed46869 5123 }
4ed46869 5124
df7492f9 5125 return;
4ed46869
KH
5126}
5127
0ff61e78
KH
5128/* Return a list of charsets supported by CODING. */
5129
5130Lisp_Object
5131coding_charset_list (coding)
5132 struct coding_system *coding;
5133{
35befdaa 5134 Lisp_Object attrs, charset_list;
0ff61e78
KH
5135
5136 CODING_GET_INFO (coding, attrs, charset_list);
5137 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5138 {
5139 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5140
5141 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5142 charset_list = Viso_2022_charset_list;
5143 }
5144 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5145 {
5146 charset_list = Vemacs_mule_charset_list;
5147 }
5148 return charset_list;
5149}
5150
5151
df7492f9
KH
5152/* Return raw-text or one of its subsidiaries that has the same
5153 eol_type as CODING-SYSTEM. */
ec6d2bb8 5154
df7492f9
KH
5155Lisp_Object
5156raw_text_coding_system (coding_system)
5157 Lisp_Object coding_system;
ec6d2bb8 5158{
0be8721c 5159 Lisp_Object spec, attrs;
df7492f9 5160 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5161
d3e4cb56
KH
5162 if (NILP (coding_system))
5163 return Qraw_text;
df7492f9
KH
5164 spec = CODING_SYSTEM_SPEC (coding_system);
5165 attrs = AREF (spec, 0);
ec6d2bb8 5166
df7492f9
KH
5167 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5168 return coding_system;
ec6d2bb8 5169
df7492f9
KH
5170 eol_type = AREF (spec, 2);
5171 if (VECTORP (eol_type))
5172 return Qraw_text;
5173 spec = CODING_SYSTEM_SPEC (Qraw_text);
5174 raw_text_eol_type = AREF (spec, 2);
5175 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5176 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5177 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5178}
5179
54f78171 5180
df7492f9
KH
5181/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5182 does, return one of the subsidiary that has the same eol-spec as
fcbcfb64
KH
5183 PARENT. Otherwise, return CODING_SYSTEM. If PARENT is nil,
5184 inherit end-of-line format from the system's setting
5185 (system_eol_type). */
df7492f9
KH
5186
5187Lisp_Object
5188coding_inherit_eol_type (coding_system, parent)
b74e4686 5189 Lisp_Object coding_system, parent;
54f78171 5190{
3e139625 5191 Lisp_Object spec, eol_type;
54f78171 5192
d3e4cb56
KH
5193 if (NILP (coding_system))
5194 coding_system = Qraw_text;
df7492f9 5195 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5196 eol_type = AREF (spec, 2);
fcbcfb64 5197 if (VECTORP (eol_type))
df7492f9 5198 {
df7492f9
KH
5199 Lisp_Object parent_eol_type;
5200
fcbcfb64
KH
5201 if (! NILP (parent))
5202 {
5203 Lisp_Object parent_spec;
5204
5205 parent_spec
5206 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5207 parent_eol_type = AREF (parent_spec, 2);
5208 }
5209 else
5210 parent_eol_type = system_eol_type;
df7492f9
KH
5211 if (EQ (parent_eol_type, Qunix))
5212 coding_system = AREF (eol_type, 0);
5213 else if (EQ (parent_eol_type, Qdos))
5214 coding_system = AREF (eol_type, 1);
5215 else if (EQ (parent_eol_type, Qmac))
5216 coding_system = AREF (eol_type, 2);
54f78171 5217 }
df7492f9 5218 return coding_system;
54f78171
KH
5219}
5220
4ed46869
KH
5221/* Emacs has a mechanism to automatically detect a coding system if it
5222 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5223 it's impossible to distinguish some coding systems accurately
5224 because they use the same range of codes. So, at first, coding
5225 systems are categorized into 7, those are:
5226
0ef69138 5227 o coding-category-emacs-mule
4ed46869
KH
5228
5229 The category for a coding system which has the same code range
5230 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5231 symbol) `emacs-mule' by default.
4ed46869
KH
5232
5233 o coding-category-sjis
5234
5235 The category for a coding system which has the same code range
5236 as SJIS. Assigned the coding-system (Lisp
7717c392 5237 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5238
5239 o coding-category-iso-7
5240
5241 The category for a coding system which has the same code range
7717c392 5242 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5243 shift and single shift functions. This can encode/decode all
5244 charsets. Assigned the coding-system (Lisp symbol)
5245 `iso-2022-7bit' by default.
5246
5247 o coding-category-iso-7-tight
5248
5249 Same as coding-category-iso-7 except that this can
5250 encode/decode only the specified charsets.
4ed46869
KH
5251
5252 o coding-category-iso-8-1
5253
5254 The category for a coding system which has the same code range
5255 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5256 for DIMENSION1 charset. This doesn't use any locking shift
5257 and single shift functions. Assigned the coding-system (Lisp
5258 symbol) `iso-latin-1' by default.
4ed46869
KH
5259
5260 o coding-category-iso-8-2
5261
5262 The category for a coding system which has the same code range
5263 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5264 for DIMENSION2 charset. This doesn't use any locking shift
5265 and single shift functions. Assigned the coding-system (Lisp
5266 symbol) `japanese-iso-8bit' by default.
4ed46869 5267
7717c392 5268 o coding-category-iso-7-else
4ed46869
KH
5269
5270 The category for a coding system which has the same code range
df7492f9 5271 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5272 single shift functions. Assigned the coding-system (Lisp
5273 symbol) `iso-2022-7bit-lock' by default.
5274
5275 o coding-category-iso-8-else
5276
5277 The category for a coding system which has the same code range
df7492f9 5278 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5279 single shift functions. Assigned the coding-system (Lisp
5280 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5281
5282 o coding-category-big5
5283
5284 The category for a coding system which has the same code range
5285 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5286 `cn-big5' by default.
4ed46869 5287
fa42c37f
KH
5288 o coding-category-utf-8
5289
5290 The category for a coding system which has the same code range
6e76ae91 5291 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5292 symbol) `utf-8' by default.
5293
5294 o coding-category-utf-16-be
5295
5296 The category for a coding system in which a text has an
5297 Unicode signature (cf. Unicode Standard) in the order of BIG
5298 endian at the head. Assigned the coding-system (Lisp symbol)
5299 `utf-16-be' by default.
5300
5301 o coding-category-utf-16-le
5302
5303 The category for a coding system in which a text has an
5304 Unicode signature (cf. Unicode Standard) in the order of
5305 LITTLE endian at the head. Assigned the coding-system (Lisp
5306 symbol) `utf-16-le' by default.
5307
1397dc18
KH
5308 o coding-category-ccl
5309
5310 The category for a coding system of which encoder/decoder is
5311 written in CCL programs. The default value is nil, i.e., no
5312 coding system is assigned.
5313
4ed46869
KH
5314 o coding-category-binary
5315
5316 The category for a coding system not categorized in any of the
5317 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5318 `no-conversion' by default.
4ed46869
KH
5319
5320 Each of them is a Lisp symbol and the value is an actual
df7492f9 5321 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5322 What Emacs does actually is to detect a category of coding system.
5323 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5324 decide only one possible category, it selects a category of the
4ed46869
KH
5325 highest priority. Priorities of categories are also specified by a
5326 user in a Lisp variable `coding-category-list'.
5327
5328*/
5329
df7492f9
KH
5330#define EOL_SEEN_NONE 0
5331#define EOL_SEEN_LF 1
5332#define EOL_SEEN_CR 2
5333#define EOL_SEEN_CRLF 4
66cfb530 5334
ff0dacd7
KH
5335/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5336 SOURCE is encoded. If CATEGORY is one of
5337 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5338 two-byte, else they are encoded by one-byte.
5339
5340 Return one of EOL_SEEN_XXX. */
4ed46869 5341
bc4bc72a 5342#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5343
5344static int
89528eb3 5345detect_eol (source, src_bytes, category)
f6cbaf43 5346 const unsigned char *source;
df7492f9 5347 EMACS_INT src_bytes;
89528eb3 5348 enum coding_category category;
4ed46869 5349{
f6cbaf43 5350 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5351 unsigned char c;
df7492f9
KH
5352 int total = 0;
5353 int eol_seen = EOL_SEEN_NONE;
4ed46869 5354
89528eb3 5355 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5356 {
df7492f9 5357 int msb, lsb;
fa42c37f 5358
89528eb3
KH
5359 msb = category == (coding_category_utf_16_le
5360 | coding_category_utf_16_le_nosig);
df7492f9 5361 lsb = 1 - msb;
fa42c37f 5362
df7492f9 5363 while (src + 1 < src_end)
fa42c37f 5364 {
df7492f9
KH
5365 c = src[lsb];
5366 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5367 {
df7492f9
KH
5368 int this_eol;
5369
5370 if (c == '\n')
5371 this_eol = EOL_SEEN_LF;
5372 else if (src + 3 >= src_end
5373 || src[msb + 2] != 0
5374 || src[lsb + 2] != '\n')
5375 this_eol = EOL_SEEN_CR;
fa42c37f 5376 else
8f924df7 5377 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5378
5379 if (eol_seen == EOL_SEEN_NONE)
5380 /* This is the first end-of-line. */
5381 eol_seen = this_eol;
5382 else if (eol_seen != this_eol)
fa42c37f 5383 {
df7492f9
KH
5384 /* The found type is different from what found before. */
5385 eol_seen = EOL_SEEN_LF;
5386 break;
fa42c37f 5387 }
df7492f9
KH
5388 if (++total == MAX_EOL_CHECK_COUNT)
5389 break;
fa42c37f 5390 }
df7492f9 5391 src += 2;
fa42c37f 5392 }
bcf26d6a 5393 }
d46c5b12 5394 else
c4825358 5395 {
df7492f9 5396 while (src < src_end)
27901516 5397 {
df7492f9
KH
5398 c = *src++;
5399 if (c == '\n' || c == '\r')
5400 {
5401 int this_eol;
d46c5b12 5402
df7492f9
KH
5403 if (c == '\n')
5404 this_eol = EOL_SEEN_LF;
5405 else if (src >= src_end || *src != '\n')
5406 this_eol = EOL_SEEN_CR;
5407 else
5408 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5409
df7492f9
KH
5410 if (eol_seen == EOL_SEEN_NONE)
5411 /* This is the first end-of-line. */
5412 eol_seen = this_eol;
5413 else if (eol_seen != this_eol)
5414 {
5415 /* The found type is different from what found before. */
5416 eol_seen = EOL_SEEN_LF;
5417 break;
5418 }
5419 if (++total == MAX_EOL_CHECK_COUNT)
5420 break;
5421 }
5422 }
73be902c 5423 }
df7492f9 5424 return eol_seen;
73be902c
KH
5425}
5426
df7492f9 5427
24a73b0a 5428static Lisp_Object
df7492f9
KH
5429adjust_coding_eol_type (coding, eol_seen)
5430 struct coding_system *coding;
5431 int eol_seen;
73be902c 5432{
0be8721c 5433 Lisp_Object eol_type;
8f924df7 5434
df7492f9
KH
5435 eol_type = CODING_ID_EOL_TYPE (coding->id);
5436 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5437 {
5438 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5439 eol_type = Qunix;
5440 }
6f197c07 5441 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5442 {
5443 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5444 eol_type = Qdos;
5445 }
6f197c07 5446 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5447 {
5448 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5449 eol_type = Qmac;
5450 }
5451 return eol_type;
d46c5b12 5452}
4ed46869 5453
df7492f9
KH
5454/* Detect how a text specified in CODING is encoded. If a coding
5455 system is detected, update fields of CODING by the detected coding
5456 system. */
0a28aafb 5457
df7492f9
KH
5458void
5459detect_coding (coding)
d46c5b12 5460 struct coding_system *coding;
d46c5b12 5461{
8f924df7 5462 const unsigned char *src, *src_end;
d46c5b12 5463
df7492f9
KH
5464 coding->consumed = coding->consumed_char = 0;
5465 coding->produced = coding->produced_char = 0;
5466 coding_set_source (coding);
1c3478b0 5467
df7492f9 5468 src_end = coding->source + coding->src_bytes;
1c3478b0 5469
df7492f9
KH
5470 /* If we have not yet decided the text encoding type, detect it
5471 now. */
5472 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5473 {
df7492f9 5474 int c, i;
6cb21a4f 5475 struct coding_detection_info detect_info;
df7492f9 5476
6cb21a4f 5477 detect_info.checked = detect_info.found = detect_info.rejected = 0;
24a73b0a 5478 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5479 {
df7492f9 5480 c = *src;
6cb21a4f 5481 if (c & 0x80)
df7492f9 5482 break;
6cb21a4f
KH
5483 if (c < 0x20
5484 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5485 && ! inhibit_iso_escape_detection
5486 && ! detect_info.checked)
5487 {
5488 coding->head_ascii = src - (coding->source + coding->consumed);
5489 if (detect_coding_iso_2022 (coding, &detect_info))
5490 {
5491 /* We have scanned the whole data. */
5492 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5493 /* We didn't find an 8-bit code. */
5494 src = src_end;
5495 break;
5496 }
5497 }
d46c5b12 5498 }
df7492f9
KH
5499 coding->head_ascii = src - (coding->source + coding->consumed);
5500
3aef54f3 5501 if (coding->head_ascii < coding->src_bytes
6cb21a4f 5502 || detect_info.found)
d46c5b12 5503 {
ff0dacd7
KH
5504 enum coding_category category;
5505 struct coding_system *this;
df7492f9 5506
6cb21a4f
KH
5507 if (coding->head_ascii == coding->src_bytes)
5508 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5509 for (i = 0; i < coding_category_raw_text; i++)
5510 {
5511 category = coding_priorities[i];
5512 this = coding_categories + category;
5513 if (detect_info.found & (1 << category))
24a73b0a 5514 break;
6cb21a4f
KH
5515 }
5516 else
5517 for (i = 0; i < coding_category_raw_text; i++)
5518 {
5519 category = coding_priorities[i];
5520 this = coding_categories + category;
5521 if (this->id < 0)
5522 {
5523 /* No coding system of this category is defined. */
5524 detect_info.rejected |= (1 << category);
5525 }
5526 else if (category >= coding_category_raw_text)
5527 continue;
5528 else if (detect_info.checked & (1 << category))
5529 {
5530 if (detect_info.found & (1 << category))
5531 break;
5532 }
5533 else if ((*(this->detector)) (coding, &detect_info)
5534 && detect_info.found & (1 << category))
5535 {
5536 if (category == coding_category_utf_16_auto)
5537 {
5538 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5539 category = coding_category_utf_16_le;
5540 else
5541 category = coding_category_utf_16_be;
5542 }
5543 break;
5544 }
5545 }
5546
ff0dacd7
KH
5547 if (i < coding_category_raw_text)
5548 setup_coding_system (CODING_ID_NAME (this->id), coding);
5549 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5550 setup_coding_system (Qraw_text, coding);
ff0dacd7 5551 else if (detect_info.rejected)
df7492f9 5552 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5553 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5554 {
5555 this = coding_categories + coding_priorities[i];
5556 setup_coding_system (CODING_ID_NAME (this->id), coding);
5557 break;
5558 }
d46c5b12 5559 }
b73bfc1c 5560 }
24a73b0a
KH
5561 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5562 == coding_category_utf_16_auto)
b49a1807
KH
5563 {
5564 Lisp_Object coding_systems;
5565 struct coding_detection_info detect_info;
5566
5567 coding_systems
5568 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5569 detect_info.found = detect_info.rejected = 0;
5570 if (CONSP (coding_systems)
24a73b0a 5571 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5572 {
5573 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5574 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5575 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5576 setup_coding_system (XCDR (coding_systems), coding);
5577 }
5578 }
4ed46869 5579}
4ed46869 5580
d46c5b12 5581
aaaf0b1e 5582static void
df7492f9 5583decode_eol (coding)
aaaf0b1e 5584 struct coding_system *coding;
aaaf0b1e 5585{
24a73b0a
KH
5586 Lisp_Object eol_type;
5587 unsigned char *p, *pbeg, *pend;
5588
5589 eol_type = CODING_ID_EOL_TYPE (coding->id);
5590 if (EQ (eol_type, Qunix))
5591 return;
5592
5593 if (NILP (coding->dst_object))
5594 pbeg = coding->destination;
5595 else
5596 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5597 pend = pbeg + coding->produced;
5598
5599 if (VECTORP (eol_type))
aaaf0b1e 5600 {
df7492f9 5601 int eol_seen = EOL_SEEN_NONE;
4ed46869 5602
24a73b0a 5603 for (p = pbeg; p < pend; p++)
aaaf0b1e 5604 {
df7492f9
KH
5605 if (*p == '\n')
5606 eol_seen |= EOL_SEEN_LF;
5607 else if (*p == '\r')
aaaf0b1e 5608 {
df7492f9 5609 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5610 {
df7492f9
KH
5611 eol_seen |= EOL_SEEN_CRLF;
5612 p++;
aaaf0b1e 5613 }
aaaf0b1e 5614 else
df7492f9 5615 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5616 }
aaaf0b1e 5617 }
24a73b0a
KH
5618 if (eol_seen != EOL_SEEN_NONE
5619 && eol_seen != EOL_SEEN_LF
5620 && eol_seen != EOL_SEEN_CRLF
5621 && eol_seen != EOL_SEEN_CR)
5622 eol_seen = EOL_SEEN_LF;
df7492f9 5623 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5624 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5625 }
d46c5b12 5626
24a73b0a 5627 if (EQ (eol_type, Qmac))
27901516 5628 {
24a73b0a 5629 for (p = pbeg; p < pend; p++)
df7492f9
KH
5630 if (*p == '\r')
5631 *p = '\n';
4ed46869 5632 }
24a73b0a 5633 else if (EQ (eol_type, Qdos))
df7492f9 5634 {
24a73b0a 5635 int n = 0;
b73bfc1c 5636
24a73b0a
KH
5637 if (NILP (coding->dst_object))
5638 {
4347441b
KH
5639 /* Start deleting '\r' from the tail to minimize the memory
5640 movement. */
24a73b0a
KH
5641 for (p = pend - 2; p >= pbeg; p--)
5642 if (*p == '\r')
5643 {
5644 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5645 n++;
5646 }
5647 }
5648 else
5649 {
4347441b
KH
5650 int pos_byte = coding->dst_pos_byte;
5651 int pos = coding->dst_pos;
5652 int pos_end = pos + coding->produced_char - 1;
5653
5654 while (pos < pos_end)
5655 {
5656 p = BYTE_POS_ADDR (pos_byte);
5657 if (*p == '\r' && p[1] == '\n')
5658 {
5659 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5660 n++;
5661 pos_end--;
5662 }
5663 pos++;
5664 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5665 }
24a73b0a
KH
5666 }
5667 coding->produced -= n;
5668 coding->produced_char -= n;
aaaf0b1e 5669 }
4ed46869
KH
5670}
5671
7d64c6ad 5672
a6f87d34
KH
5673/* Return a translation table (or list of them) from coding system
5674 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5675 decoding (ENCODEP is zero). */
7d64c6ad 5676
e6a54062 5677static Lisp_Object
09ee6fdd
KH
5678get_translation_table (attrs, encodep, max_lookup)
5679 Lisp_Object attrs;
5680 int encodep, *max_lookup;
7d64c6ad
KH
5681{
5682 Lisp_Object standard, translation_table;
09ee6fdd 5683 Lisp_Object val;
7d64c6ad
KH
5684
5685 if (encodep)
5686 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5687 standard = Vstandard_translation_table_for_encode;
5688 else
5689 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5690 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5691 if (NILP (translation_table))
09ee6fdd
KH
5692 translation_table = standard;
5693 else
a6f87d34 5694 {
09ee6fdd
KH
5695 if (SYMBOLP (translation_table))
5696 translation_table = Fget (translation_table, Qtranslation_table);
5697 else if (CONSP (translation_table))
5698 {
5699 translation_table = Fcopy_sequence (translation_table);
5700 for (val = translation_table; CONSP (val); val = XCDR (val))
5701 if (SYMBOLP (XCAR (val)))
5702 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5703 }
5704 if (CHAR_TABLE_P (standard))
5705 {
5706 if (CONSP (translation_table))
5707 translation_table = nconc2 (translation_table,
5708 Fcons (standard, Qnil));
5709 else
5710 translation_table = Fcons (translation_table,
5711 Fcons (standard, Qnil));
5712 }
a6f87d34 5713 }
2170c8f0
KH
5714
5715 if (max_lookup)
09ee6fdd 5716 {
2170c8f0
KH
5717 *max_lookup = 1;
5718 if (CHAR_TABLE_P (translation_table)
5719 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5720 {
5721 val = XCHAR_TABLE (translation_table)->extras[1];
5722 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5723 *max_lookup = XFASTINT (val);
5724 }
5725 else if (CONSP (translation_table))
5726 {
5727 Lisp_Object tail, val;
09ee6fdd 5728
2170c8f0
KH
5729 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5730 if (CHAR_TABLE_P (XCAR (tail))
5731 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5732 {
5733 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5734 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5735 *max_lookup = XFASTINT (val);
5736 }
5737 }
a6f87d34 5738 }
7d64c6ad
KH
5739 return translation_table;
5740}
5741
09ee6fdd
KH
5742#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5743 do { \
5744 trans = Qnil; \
5745 if (CHAR_TABLE_P (table)) \
5746 { \
5747 trans = CHAR_TABLE_REF (table, c); \
5748 if (CHARACTERP (trans)) \
5749 c = XFASTINT (trans), trans = Qnil; \
5750 } \
5751 else if (CONSP (table)) \
5752 { \
5753 Lisp_Object tail; \
5754 \
5755 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5756 if (CHAR_TABLE_P (XCAR (tail))) \
5757 { \
5758 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5759 if (CHARACTERP (trans)) \
5760 c = XFASTINT (trans), trans = Qnil; \
5761 else if (! NILP (trans)) \
5762 break; \
5763 } \
5764 } \
e6a54062
KH
5765 } while (0)
5766
7d64c6ad 5767
69a80ea3
KH
5768static Lisp_Object
5769get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5770 Lisp_Object val;
5771 int *buf, *buf_end;
5772 int last_block;
5773 int *from_nchars, *to_nchars;
5774{
433f7f87
KH
5775 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5776 [TO-CHAR ...]. */
69a80ea3
KH
5777 if (CONSP (val))
5778 {
433f7f87 5779 Lisp_Object from, tail;
69a80ea3
KH
5780 int i, len;
5781
433f7f87 5782 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5783 {
433f7f87
KH
5784 val = XCAR (tail);
5785 from = XCAR (val);
5786 len = ASIZE (from);
5787 for (i = 0; i < len; i++)
5788 {
5789 if (buf + i == buf_end)
5790 {
5791 if (! last_block)
5792 return Qt;
5793 break;
5794 }
5795 if (XINT (AREF (from, i)) != buf[i])
5796 break;
5797 }
5798 if (i == len)
5799 {
5800 val = XCDR (val);
5801 *from_nchars = len;
5802 break;
5803 }
69a80ea3 5804 }
433f7f87
KH
5805 if (! CONSP (tail))
5806 return Qnil;
69a80ea3
KH
5807 }
5808 if (VECTORP (val))
5809 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5810 else
5811 *buf = XINT (val);
5812 return val;
5813}
5814
5815
d46c5b12 5816static int
69a80ea3 5817produce_chars (coding, translation_table, last_block)
df7492f9 5818 struct coding_system *coding;
69a80ea3
KH
5819 Lisp_Object translation_table;
5820 int last_block;
4ed46869 5821{
df7492f9
KH
5822 unsigned char *dst = coding->destination + coding->produced;
5823 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5824 int produced;
5825 int produced_chars = 0;
69a80ea3 5826 int carryover = 0;
4ed46869 5827
df7492f9 5828 if (! coding->chars_at_source)
4ed46869 5829 {
df7492f9 5830 /* Characters are in coding->charbuf. */
fba4576f
AS
5831 int *buf = coding->charbuf;
5832 int *buf_end = buf + coding->charbuf_used;
4ed46869 5833
df7492f9
KH
5834 if (BUFFERP (coding->src_object)
5835 && EQ (coding->src_object, coding->dst_object))
8f924df7 5836 dst_end = ((unsigned char *) coding->source) + coding->consumed;
4ed46869 5837
df7492f9 5838 while (buf < buf_end)
4ed46869 5839 {
69a80ea3 5840 int c = *buf, i;
bc4bc72a 5841
df7492f9
KH
5842 if (c >= 0)
5843 {
69a80ea3
KH
5844 int from_nchars = 1, to_nchars = 1;
5845 Lisp_Object trans = Qnil;
5846
09ee6fdd 5847 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 5848 if (! NILP (trans))
69a80ea3
KH
5849 {
5850 trans = get_translation (trans, buf, buf_end, last_block,
5851 &from_nchars, &to_nchars);
5852 if (EQ (trans, Qt))
5853 break;
5854 c = *buf;
5855 }
5856
5857 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5858 {
5859 dst = alloc_destination (coding,
5860 buf_end - buf
5861 + MAX_MULTIBYTE_LENGTH * to_nchars,
5862 dst);
5863 dst_end = coding->destination + coding->dst_bytes;
5864 }
5865
433f7f87 5866 for (i = 0; i < to_nchars; i++)
69a80ea3 5867 {
433f7f87
KH
5868 if (i > 0)
5869 c = XINT (AREF (trans, i));
69a80ea3
KH
5870 if (coding->dst_multibyte
5871 || ! CHAR_BYTE8_P (c))
5872 CHAR_STRING_ADVANCE (c, dst);
5873 else
5874 *dst++ = CHAR_TO_BYTE8 (c);
5875 }
5876 produced_chars += to_nchars;
5877 *buf++ = to_nchars;
5878 while (--from_nchars > 0)
5879 *buf++ = 0;
d46c5b12 5880 }
df7492f9 5881 else
69a80ea3
KH
5882 /* This is an annotation datum. (-C) is the length. */
5883 buf += -c;
4ed46869 5884 }
69a80ea3 5885 carryover = buf_end - buf;
4ed46869 5886 }
fa42c37f 5887 else
fa42c37f 5888 {
8f924df7
KH
5889 const unsigned char *src = coding->source;
5890 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5891 Lisp_Object eol_type;
fa42c37f 5892
df7492f9 5893 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5894
df7492f9 5895 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5896 {
df7492f9 5897 if (coding->src_multibyte)
fa42c37f 5898 {
71c81426 5899 int multibytep = 1;
df7492f9 5900 int consumed_chars;
d46c5b12 5901
df7492f9
KH
5902 while (1)
5903 {
8f924df7 5904 const unsigned char *src_base = src;
df7492f9 5905 int c;
b73bfc1c 5906
df7492f9
KH
5907 ONE_MORE_BYTE (c);
5908 if (c == '\r')
5909 {
5910 if (EQ (eol_type, Qdos))
5911 {
98725083
KH
5912 if (src == src_end)
5913 {
065e3595
KH
5914 record_conversion_result
5915 (coding, CODING_RESULT_INSUFFICIENT_SRC);
98725083
KH
5916 goto no_more_source;
5917 }
5918 if (*src == '\n')
df7492f9
KH
5919 c = *src++;
5920 }
5921 else if (EQ (eol_type, Qmac))
5922 c = '\n';
5923 }
5924 if (dst == dst_end)
5925 {
2c78b7e1 5926 coding->consumed = src - coding->source;
b73bfc1c 5927
2c78b7e1 5928 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5929 dst_end = (unsigned char *) src;
2c78b7e1
KH
5930 if (dst == dst_end)
5931 {
5932 dst = alloc_destination (coding, src_end - src + 1,
5933 dst);
5934 dst_end = coding->destination + coding->dst_bytes;
5935 coding_set_source (coding);
5936 src = coding->source + coding->consumed;
5937 src_end = coding->source + coding->src_bytes;
5938 }
df7492f9
KH
5939 }
5940 *dst++ = c;
5941 produced_chars++;
5942 }
5943 no_more_source:
5944 ;
fa42c37f
KH
5945 }
5946 else
df7492f9
KH
5947 while (src < src_end)
5948 {
71c81426 5949 int multibytep = 1;
df7492f9 5950 int c = *src++;
b73bfc1c 5951
df7492f9
KH
5952 if (c == '\r')
5953 {
5954 if (EQ (eol_type, Qdos))
5955 {
5956 if (src < src_end
5957 && *src == '\n')
5958 c = *src++;
5959 }
5960 else if (EQ (eol_type, Qmac))
5961 c = '\n';
5962 }
5963 if (dst >= dst_end - 1)
5964 {
2c78b7e1 5965 coding->consumed = src - coding->source;
df7492f9 5966
2c78b7e1 5967 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5968 dst_end = (unsigned char *) src;
2c78b7e1
KH
5969 if (dst >= dst_end - 1)
5970 {
5971 dst = alloc_destination (coding, src_end - src + 2,
5972 dst);
5973 dst_end = coding->destination + coding->dst_bytes;
5974 coding_set_source (coding);
5975 src = coding->source + coding->consumed;
5976 src_end = coding->source + coding->src_bytes;
5977 }
df7492f9
KH
5978 }
5979 EMIT_ONE_BYTE (c);
5980 }
d46c5b12 5981 }
df7492f9
KH
5982 else
5983 {
5984 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5985 {
df7492f9 5986 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5987
df7492f9 5988 if (require > 0)
fa42c37f 5989 {
df7492f9
KH
5990 EMACS_INT offset = src - coding->source;
5991
5992 dst = alloc_destination (coding, require, dst);
5993 coding_set_source (coding);
5994 src = coding->source + offset;
5995 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5996 }
5997 }
df7492f9
KH
5998 produced_chars = coding->src_chars;
5999 while (src < src_end)
fa42c37f 6000 {
df7492f9
KH
6001 int c = *src++;
6002
6003 if (c == '\r')
6004 {
6005 if (EQ (eol_type, Qdos))
6006 {
6007 if (src < src_end
6008 && *src == '\n')
6009 c = *src++;
6010 produced_chars--;
6011 }
6012 else if (EQ (eol_type, Qmac))
6013 c = '\n';
6014 }
6015 *dst++ = c;
fa42c37f
KH
6016 }
6017 }
2c78b7e1
KH
6018 coding->consumed = coding->src_bytes;
6019 coding->consumed_char = coding->src_chars;
fa42c37f
KH
6020 }
6021
df7492f9
KH
6022 produced = dst - (coding->destination + coding->produced);
6023 if (BUFFERP (coding->dst_object))
6024 insert_from_gap (produced_chars, produced);
6025 coding->produced += produced;
6026 coding->produced_char += produced_chars;
69a80ea3 6027 return carryover;
fa42c37f
KH
6028}
6029
ff0dacd7
KH
6030/* Compose text in CODING->object according to the annotation data at
6031 CHARBUF. CHARBUF is an array:
6032 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 6033 */
4ed46869 6034
df7492f9 6035static INLINE void
69a80ea3 6036produce_composition (coding, charbuf, pos)
4ed46869 6037 struct coding_system *coding;
df7492f9 6038 int *charbuf;
69a80ea3 6039 EMACS_INT pos;
4ed46869 6040{
df7492f9 6041 int len;
69a80ea3 6042 EMACS_INT to;
df7492f9 6043 enum composition_method method;
df7492f9 6044 Lisp_Object components;
fa42c37f 6045
df7492f9 6046 len = -charbuf[0];
69a80ea3 6047 to = pos + charbuf[2];
9ffd559c
KH
6048 if (to <= pos)
6049 return;
69a80ea3 6050 method = (enum composition_method) (charbuf[3]);
d46c5b12 6051
df7492f9
KH
6052 if (method == COMPOSITION_RELATIVE)
6053 components = Qnil;
9ffd559c
KH
6054 else if (method >= COMPOSITION_WITH_RULE
6055 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6056 {
df7492f9
KH
6057 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6058 int i;
b73bfc1c 6059
69a80ea3
KH
6060 len -= 4;
6061 charbuf += 4;
df7492f9 6062 for (i = 0; i < len; i++)
9ffd559c
KH
6063 {
6064 args[i] = make_number (charbuf[i]);
f75c90a9 6065 if (charbuf[i] < 0)
9ffd559c
KH
6066 return;
6067 }
df7492f9
KH
6068 components = (method == COMPOSITION_WITH_ALTCHARS
6069 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6070 }
9ffd559c
KH
6071 else
6072 return;
69a80ea3 6073 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6074}
6075
d46c5b12 6076
ff0dacd7
KH
6077/* Put `charset' property on text in CODING->object according to
6078 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6079 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6080 */
d46c5b12 6081
ff0dacd7 6082static INLINE void
69a80ea3 6083produce_charset (coding, charbuf, pos)
d46c5b12 6084 struct coding_system *coding;
ff0dacd7 6085 int *charbuf;
69a80ea3 6086 EMACS_INT pos;
d46c5b12 6087{
69a80ea3
KH
6088 EMACS_INT from = pos - charbuf[2];
6089 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6090
69a80ea3 6091 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6092 Qcharset, CHARSET_NAME (charset),
6093 coding->dst_object);
d46c5b12
KH
6094}
6095
d46c5b12 6096
df7492f9
KH
6097#define CHARBUF_SIZE 0x4000
6098
6099#define ALLOC_CONVERSION_WORK_AREA(coding) \
6100 do { \
6101 int size = CHARBUF_SIZE;; \
6102 \
6103 coding->charbuf = NULL; \
6104 while (size > 1024) \
6105 { \
6106 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6107 if (coding->charbuf) \
6108 break; \
6109 size >>= 1; \
6110 } \
6111 if (! coding->charbuf) \
6112 { \
065e3595 6113 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6114 return coding->result; \
6115 } \
6116 coding->charbuf_size = size; \
6117 } while (0)
4ed46869 6118
d46c5b12
KH
6119
6120static void
69a80ea3 6121produce_annotation (coding, pos)
d46c5b12 6122 struct coding_system *coding;
69a80ea3 6123 EMACS_INT pos;
d46c5b12 6124{
df7492f9
KH
6125 int *charbuf = coding->charbuf;
6126 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6127
ff0dacd7
KH
6128 if (NILP (coding->dst_object))
6129 return;
d46c5b12 6130
df7492f9 6131 while (charbuf < charbuf_end)
a84f1519 6132 {
df7492f9 6133 if (*charbuf >= 0)
69a80ea3 6134 pos += *charbuf++;
d46c5b12 6135 else
d46c5b12 6136 {
df7492f9 6137 int len = -*charbuf;
ff0dacd7 6138 switch (charbuf[1])
df7492f9
KH
6139 {
6140 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6141 produce_composition (coding, charbuf, pos);
df7492f9 6142 break;
ff0dacd7 6143 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6144 produce_charset (coding, charbuf, pos);
ff0dacd7 6145 break;
df7492f9
KH
6146 default:
6147 abort ();
6148 }
6149 charbuf += len;
d46c5b12 6150 }
a84f1519 6151 }
d46c5b12
KH
6152}
6153
df7492f9
KH
6154/* Decode the data at CODING->src_object into CODING->dst_object.
6155 CODING->src_object is a buffer, a string, or nil.
6156 CODING->dst_object is a buffer.
d46c5b12 6157
df7492f9
KH
6158 If CODING->src_object is a buffer, it must be the current buffer.
6159 In this case, if CODING->src_pos is positive, it is a position of
6160 the source text in the buffer, otherwise, the source text is in the
6161 gap area of the buffer, and CODING->src_pos specifies the offset of
6162 the text from GPT (which must be the same as PT). If this is the
6163 same buffer as CODING->dst_object, CODING->src_pos must be
6164 negative.
d46c5b12 6165
b6828792 6166 If CODING->src_object is a string, CODING->src_pos is an index to
df7492f9 6167 that string.
d46c5b12 6168
df7492f9
KH
6169 If CODING->src_object is nil, CODING->source must already point to
6170 the non-relocatable memory area. In this case, CODING->src_pos is
6171 an offset from CODING->source.
73be902c 6172
df7492f9
KH
6173 The decoded data is inserted at the current point of the buffer
6174 CODING->dst_object.
6175*/
d46c5b12 6176
df7492f9
KH
6177static int
6178decode_coding (coding)
d46c5b12 6179 struct coding_system *coding;
d46c5b12 6180{
df7492f9 6181 Lisp_Object attrs;
24a73b0a 6182 Lisp_Object undo_list;
7d64c6ad 6183 Lisp_Object translation_table;
69a80ea3
KH
6184 int carryover;
6185 int i;
d46c5b12 6186
df7492f9
KH
6187 if (BUFFERP (coding->src_object)
6188 && coding->src_pos > 0
6189 && coding->src_pos < GPT
6190 && coding->src_pos + coding->src_chars > GPT)
6191 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6192
24a73b0a 6193 undo_list = Qt;
df7492f9 6194 if (BUFFERP (coding->dst_object))
1c3478b0 6195 {
df7492f9
KH
6196 if (current_buffer != XBUFFER (coding->dst_object))
6197 set_buffer_internal (XBUFFER (coding->dst_object));
6198 if (GPT != PT)
6199 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6200 undo_list = current_buffer->undo_list;
6201 current_buffer->undo_list = Qt;
1c3478b0
KH
6202 }
6203
df7492f9
KH
6204 coding->consumed = coding->consumed_char = 0;
6205 coding->produced = coding->produced_char = 0;
6206 coding->chars_at_source = 0;
065e3595 6207 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6208 coding->errors = 0;
1c3478b0 6209
df7492f9
KH
6210 ALLOC_CONVERSION_WORK_AREA (coding);
6211
6212 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6213 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6214
69a80ea3 6215 carryover = 0;
df7492f9 6216 do
b73bfc1c 6217 {
69a80ea3
KH
6218 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6219
df7492f9
KH
6220 coding_set_source (coding);
6221 coding->annotated = 0;
69a80ea3 6222 coding->charbuf_used = carryover;
df7492f9 6223 (*(coding->decoder)) (coding);
df7492f9 6224 coding_set_destination (coding);
69a80ea3 6225 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6226 if (coding->annotated)
69a80ea3
KH
6227 produce_annotation (coding, pos);
6228 for (i = 0; i < carryover; i++)
6229 coding->charbuf[i]
6230 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6231 }
df7492f9 6232 while (coding->consumed < coding->src_bytes
54b367bb
KH
6233 && (coding->result == CODING_RESULT_SUCCESS
6234 || coding->result == CODING_RESULT_INVALID_SRC));
d46c5b12 6235
69a80ea3
KH
6236 if (carryover > 0)
6237 {
6238 coding_set_destination (coding);
6239 coding->charbuf_used = carryover;
6240 produce_chars (coding, translation_table, 1);
6241 }
6242
df7492f9
KH
6243 coding->carryover_bytes = 0;
6244 if (coding->consumed < coding->src_bytes)
d46c5b12 6245 {
df7492f9 6246 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6247 const unsigned char *src;
df7492f9
KH
6248
6249 coding_set_source (coding);
6250 coding_set_destination (coding);
6251 src = coding->source + coding->consumed;
6252
6253 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6254 {
df7492f9
KH
6255 /* Flush out unprocessed data as binary chars. We are sure
6256 that the number of data is less than the size of
6257 coding->charbuf. */
065e3595 6258 coding->charbuf_used = 0;
df7492f9 6259 while (nbytes-- > 0)
1c3478b0 6260 {
df7492f9 6261 int c = *src++;
98725083 6262
1c91457d
KH
6263 if (c & 0x80)
6264 c = BYTE8_TO_CHAR (c);
6265 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6266 }
f6cbaf43 6267 produce_chars (coding, Qnil, 1);
d46c5b12 6268 }
d46c5b12 6269 else
df7492f9
KH
6270 {
6271 /* Record unprocessed bytes in coding->carryover. We are
6272 sure that the number of data is less than the size of
6273 coding->carryover. */
6274 unsigned char *p = coding->carryover;
6275
6276 coding->carryover_bytes = nbytes;
6277 while (nbytes-- > 0)
6278 *p++ = *src++;
1c3478b0 6279 }
df7492f9 6280 coding->consumed = coding->src_bytes;
b73bfc1c 6281 }
69f76525 6282
4347441b
KH
6283 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6284 decode_eol (coding);
24a73b0a
KH
6285 if (BUFFERP (coding->dst_object))
6286 {
6287 current_buffer->undo_list = undo_list;
6288 record_insert (coding->dst_pos, coding->produced_char);
6289 }
73be902c 6290 return coding->result;
4ed46869
KH
6291}
6292
aaaf0b1e 6293
e1c23804 6294/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6295 ending before LIMIT of CODING->src_object (buffer or string), store
6296 the data in BUF, set *STOP to a starting position of the next
6297 composition (if any) or to LIMIT, and return the address of the
6298 next element of BUF.
6299
6300 If such an annotation is not found, set *STOP to a starting
6301 position of a composition after POS (if any) or to LIMIT, and
6302 return BUF. */
6303
6304static INLINE int *
6305handle_composition_annotation (pos, limit, coding, buf, stop)
6306 EMACS_INT pos, limit;
aaaf0b1e 6307 struct coding_system *coding;
ff0dacd7
KH
6308 int *buf;
6309 EMACS_INT *stop;
aaaf0b1e 6310{
ff0dacd7
KH
6311 EMACS_INT start, end;
6312 Lisp_Object prop;
aaaf0b1e 6313
ff0dacd7
KH
6314 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6315 || end > limit)
6316 *stop = limit;
6317 else if (start > pos)
6318 *stop = start;
6319 else
aaaf0b1e 6320 {
ff0dacd7 6321 if (start == pos)
aaaf0b1e 6322 {
ff0dacd7
KH
6323 /* We found a composition. Store the corresponding
6324 annotation data in BUF. */
6325 int *head = buf;
6326 enum composition_method method = COMPOSITION_METHOD (prop);
6327 int nchars = COMPOSITION_LENGTH (prop);
6328
69a80ea3 6329 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6330 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6331 {
ff0dacd7
KH
6332 Lisp_Object components;
6333 int len, i, i_byte;
6334
6335 components = COMPOSITION_COMPONENTS (prop);
6336 if (VECTORP (components))
aaaf0b1e 6337 {
ff0dacd7
KH
6338 len = XVECTOR (components)->size;
6339 for (i = 0; i < len; i++)
6340 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6341 }
ff0dacd7 6342 else if (STRINGP (components))
aaaf0b1e 6343 {
8f924df7 6344 len = SCHARS (components);
ff0dacd7
KH
6345 i = i_byte = 0;
6346 while (i < len)
6347 {
6348 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6349 buf++;
6350 }
6351 }
6352 else if (INTEGERP (components))
6353 {
6354 len = 1;
6355 *buf++ = XINT (components);
6356 }
6357 else if (CONSP (components))
6358 {
6359 for (len = 0; CONSP (components);
6360 len++, components = XCDR (components))
6361 *buf++ = XINT (XCAR (components));
aaaf0b1e 6362 }
aaaf0b1e 6363 else
ff0dacd7
KH
6364 abort ();
6365 *head -= len;
aaaf0b1e 6366 }
aaaf0b1e 6367 }
ff0dacd7
KH
6368
6369 if (find_composition (end, limit, &start, &end, &prop,
6370 coding->src_object)
6371 && end <= limit)
6372 *stop = start;
6373 else
6374 *stop = limit;
aaaf0b1e 6375 }
ff0dacd7
KH
6376 return buf;
6377}
6378
6379
e1c23804 6380/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6381 CODING->src_object (buffer of string), store the data in BUF, set
6382 *STOP to the position where the value of `charset' property changes
6383 (limiting by LIMIT), and return the address of the next element of
6384 BUF.
6385
6386 If the property value is nil, set *STOP to the position where the
6387 property value is non-nil (limiting by LIMIT), and return BUF. */
6388
6389static INLINE int *
6390handle_charset_annotation (pos, limit, coding, buf, stop)
6391 EMACS_INT pos, limit;
6392 struct coding_system *coding;
6393 int *buf;
6394 EMACS_INT *stop;
6395{
6396 Lisp_Object val, next;
6397 int id;
6398
6399 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6400 if (! NILP (val) && CHARSETP (val))
6401 id = XINT (CHARSET_SYMBOL_ID (val));
6402 else
6403 id = -1;
69a80ea3 6404 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6405 next = Fnext_single_property_change (make_number (pos), Qcharset,
6406 coding->src_object,
6407 make_number (limit));
6408 *stop = XINT (next);
6409 return buf;
6410}
6411
6412
df7492f9 6413static void
09ee6fdd 6414consume_chars (coding, translation_table, max_lookup)
df7492f9 6415 struct coding_system *coding;
433f7f87 6416 Lisp_Object translation_table;
09ee6fdd 6417 int max_lookup;
df7492f9
KH
6418{
6419 int *buf = coding->charbuf;
ff0dacd7 6420 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6421 const unsigned char *src = coding->source + coding->consumed;
4776e638 6422 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6423 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6424 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6425 int multibytep = coding->src_multibyte;
6426 Lisp_Object eol_type;
6427 int c;
ff0dacd7 6428 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6429 int *lookup_buf = NULL;
433f7f87
KH
6430
6431 if (! NILP (translation_table))
09ee6fdd 6432 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6433
df7492f9
KH
6434 eol_type = CODING_ID_EOL_TYPE (coding->id);
6435 if (VECTORP (eol_type))
6436 eol_type = Qunix;
88993dfd 6437
df7492f9
KH
6438 /* Note: composition handling is not yet implemented. */
6439 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6440
0b5670c9
KH
6441 if (NILP (coding->src_object))
6442 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6443 else
0b5670c9
KH
6444 {
6445 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6446 stop = stop_composition = pos;
6447 else
6448 stop = stop_composition = end_pos;
6449 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6450 stop = stop_charset = pos;
6451 else
6452 stop_charset = end_pos;
6453 }
ec6d2bb8 6454
24a73b0a 6455 /* Compensate for CRLF and conversion. */
ff0dacd7 6456 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6457 while (buf < buf_end)
aaaf0b1e 6458 {
433f7f87
KH
6459 Lisp_Object trans;
6460
df7492f9 6461 if (pos == stop)
ec6d2bb8 6462 {
df7492f9
KH
6463 if (pos == end_pos)
6464 break;
ff0dacd7
KH
6465 if (pos == stop_composition)
6466 buf = handle_composition_annotation (pos, end_pos, coding,
6467 buf, &stop_composition);
6468 if (pos == stop_charset)
6469 buf = handle_charset_annotation (pos, end_pos, coding,
6470 buf, &stop_charset);
6471 stop = (stop_composition < stop_charset
6472 ? stop_composition : stop_charset);
df7492f9
KH
6473 }
6474
6475 if (! multibytep)
4776e638 6476 {
d3e4cb56 6477 EMACS_INT bytes;
aaaf0b1e 6478
ea29edf2
KH
6479 if (coding->encoder == encode_coding_raw_text)
6480 c = *src++, pos++;
6481 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6482 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6483 else
f03caae0 6484 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6485 }
df7492f9 6486 else
4776e638 6487 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6488 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6489 c = '\n';
6490 if (! EQ (eol_type, Qunix))
aaaf0b1e 6491 {
df7492f9 6492 if (c == '\n')
aaaf0b1e 6493 {
df7492f9
KH
6494 if (EQ (eol_type, Qdos))
6495 *buf++ = '\r';
6496 else
6497 c = '\r';
aaaf0b1e
KH
6498 }
6499 }
433f7f87 6500
e6a54062 6501 trans = Qnil;
09ee6fdd 6502 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6503 if (NILP (trans))
433f7f87
KH
6504 *buf++ = c;
6505 else
6506 {
6507 int from_nchars = 1, to_nchars = 1;
6508 int *lookup_buf_end;
6509 const unsigned char *p = src;
6510 int i;
6511
6512 lookup_buf[0] = c;
6513 for (i = 1; i < max_lookup && p < src_end; i++)
6514 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6515 lookup_buf_end = lookup_buf + i;
6516 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6517 &from_nchars, &to_nchars);
6518 if (EQ (trans, Qt)
6519 || buf + to_nchars > buf_end)
6520 break;
6521 *buf++ = *lookup_buf;
6522 for (i = 1; i < to_nchars; i++)
6523 *buf++ = XINT (AREF (trans, i));
6524 for (i = 1; i < from_nchars; i++, pos++)
6525 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6526 }
aaaf0b1e 6527 }
ec6d2bb8 6528
df7492f9
KH
6529 coding->consumed = src - coding->source;
6530 coding->consumed_char = pos - coding->src_pos;
6531 coding->charbuf_used = buf - coding->charbuf;
6532 coding->chars_at_source = 0;
aaaf0b1e
KH
6533}
6534
4ed46869 6535
df7492f9
KH
6536/* Encode the text at CODING->src_object into CODING->dst_object.
6537 CODING->src_object is a buffer or a string.
6538 CODING->dst_object is a buffer or nil.
6539
6540 If CODING->src_object is a buffer, it must be the current buffer.
6541 In this case, if CODING->src_pos is positive, it is a position of
6542 the source text in the buffer, otherwise. the source text is in the
6543 gap area of the buffer, and coding->src_pos specifies the offset of
6544 the text from GPT (which must be the same as PT). If this is the
6545 same buffer as CODING->dst_object, CODING->src_pos must be
6546 negative and CODING should not have `pre-write-conversion'.
6547
6548 If CODING->src_object is a string, CODING should not have
6549 `pre-write-conversion'.
6550
6551 If CODING->dst_object is a buffer, the encoded data is inserted at
6552 the current point of that buffer.
6553
6554 If CODING->dst_object is nil, the encoded data is placed at the
6555 memory area specified by CODING->destination. */
6556
6557static int
6558encode_coding (coding)
4ed46869 6559 struct coding_system *coding;
4ed46869 6560{
df7492f9 6561 Lisp_Object attrs;
7d64c6ad 6562 Lisp_Object translation_table;
09ee6fdd 6563 int max_lookup;
9861e777 6564
df7492f9 6565 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
6566 if (coding->encoder == encode_coding_raw_text)
6567 translation_table = Qnil, max_lookup = 0;
6568 else
6569 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6570
df7492f9 6571 if (BUFFERP (coding->dst_object))
8844fa83 6572 {
df7492f9
KH
6573 set_buffer_internal (XBUFFER (coding->dst_object));
6574 coding->dst_multibyte
6575 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6576 }
4ed46869 6577
b73bfc1c 6578 coding->consumed = coding->consumed_char = 0;
df7492f9 6579 coding->produced = coding->produced_char = 0;
065e3595 6580 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6581 coding->errors = 0;
b73bfc1c 6582
df7492f9 6583 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6584
df7492f9
KH
6585 do {
6586 coding_set_source (coding);
09ee6fdd 6587 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6588 coding_set_destination (coding);
6589 (*(coding->encoder)) (coding);
6590 } while (coding->consumed_char < coding->src_chars);
6591
6592 if (BUFFERP (coding->dst_object))
6593 insert_from_gap (coding->produced_char, coding->produced);
6594
6595 return (coding->result);
ec6d2bb8
KH
6596}
6597
fb88bf2d 6598
24a73b0a
KH
6599/* Name (or base name) of work buffer for code conversion. */
6600static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6601
24a73b0a
KH
6602/* A working buffer used by the top level conversion. Once it is
6603 created, it is never destroyed. It has the name
6604 Vcode_conversion_workbuf_name. The other working buffers are
6605 destroyed after the use is finished, and their names are modified
6606 versions of Vcode_conversion_workbuf_name. */
6607static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6608
24a73b0a
KH
6609/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6610static int reused_workbuf_in_use;
4ed46869 6611
24a73b0a
KH
6612
6613/* Return a working buffer of code convesion. MULTIBYTE specifies the
6614 multibyteness of returning buffer. */
b73bfc1c 6615
f6cbaf43 6616static Lisp_Object
24a73b0a 6617make_conversion_work_buffer (multibyte)
f6cbaf43 6618 int multibyte;
df7492f9 6619{
24a73b0a
KH
6620 Lisp_Object name, workbuf;
6621 struct buffer *current;
4ed46869 6622
24a73b0a 6623 if (reused_workbuf_in_use++)
065e3595
KH
6624 {
6625 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6626 workbuf = Fget_buffer_create (name);
6627 }
df7492f9 6628 else
065e3595
KH
6629 {
6630 name = Vcode_conversion_workbuf_name;
6631 workbuf = Fget_buffer_create (name);
6632 if (NILP (Vcode_conversion_reused_workbuf))
6633 Vcode_conversion_reused_workbuf = workbuf;
6634 }
24a73b0a
KH
6635 current = current_buffer;
6636 set_buffer_internal (XBUFFER (workbuf));
6637 Ferase_buffer ();
df7492f9 6638 current_buffer->undo_list = Qt;
24a73b0a 6639 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6640 set_buffer_internal (current);
24a73b0a 6641 return workbuf;
df7492f9 6642}
d46c5b12 6643
24a73b0a 6644
4776e638 6645static Lisp_Object
24a73b0a
KH
6646code_conversion_restore (arg)
6647 Lisp_Object arg;
4776e638 6648{
24a73b0a 6649 Lisp_Object current, workbuf;
948bdcf3 6650 struct gcpro gcpro1;
24a73b0a 6651
948bdcf3 6652 GCPRO1 (arg);
24a73b0a
KH
6653 current = XCAR (arg);
6654 workbuf = XCDR (arg);
6655 if (! NILP (workbuf))
6656 {
6657 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6658 reused_workbuf_in_use = 0;
6659 else if (! NILP (Fbuffer_live_p (workbuf)))
6660 Fkill_buffer (workbuf);
6661 }
6662 set_buffer_internal (XBUFFER (current));
948bdcf3 6663 UNGCPRO;
4776e638
KH
6664 return Qnil;
6665}
b73bfc1c 6666
24a73b0a
KH
6667Lisp_Object
6668code_conversion_save (with_work_buf, multibyte)
4776e638 6669 int with_work_buf, multibyte;
df7492f9 6670{
24a73b0a 6671 Lisp_Object workbuf = Qnil;
b73bfc1c 6672
4776e638 6673 if (with_work_buf)
24a73b0a
KH
6674 workbuf = make_conversion_work_buffer (multibyte);
6675 record_unwind_protect (code_conversion_restore,
6676 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6677 return workbuf;
df7492f9 6678}
d46c5b12 6679
df7492f9
KH
6680int
6681decode_coding_gap (coding, chars, bytes)
6682 struct coding_system *coding;
6683 EMACS_INT chars, bytes;
6684{
6685 int count = specpdl_ptr - specpdl;
5e5c78be 6686 Lisp_Object attrs;
fb88bf2d 6687
24a73b0a 6688 code_conversion_save (0, 0);
ec6d2bb8 6689
24a73b0a 6690 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6691 coding->src_chars = chars;
6692 coding->src_bytes = bytes;
6693 coding->src_pos = -chars;
6694 coding->src_pos_byte = -bytes;
6695 coding->src_multibyte = chars < bytes;
24a73b0a 6696 coding->dst_object = coding->src_object;
df7492f9
KH
6697 coding->dst_pos = PT;
6698 coding->dst_pos_byte = PT_BYTE;
71c81426 6699 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869 6700
df7492f9
KH
6701 if (CODING_REQUIRE_DETECTION (coding))
6702 detect_coding (coding);
8f924df7 6703
9286b333 6704 coding->mode |= CODING_MODE_LAST_BLOCK;
df7492f9 6705 decode_coding (coding);
d46c5b12 6706
5e5c78be
KH
6707 attrs = CODING_ID_ATTRS (coding->id);
6708 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6709 {
5e5c78be
KH
6710 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6711 Lisp_Object val;
6712
6713 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6714 val = call1 (CODING_ATTR_POST_READ (attrs),
6715 make_number (coding->produced_char));
5e5c78be
KH
6716 CHECK_NATNUM (val);
6717 coding->produced_char += Z - prev_Z;
6718 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6719 }
4ed46869 6720
df7492f9 6721 unbind_to (count, Qnil);
b73bfc1c
KH
6722 return coding->result;
6723}
52d41803 6724
4ed46869 6725int
df7492f9 6726encode_coding_gap (coding, chars, bytes)
4ed46869 6727 struct coding_system *coding;
df7492f9 6728 EMACS_INT chars, bytes;
4ed46869 6729{
df7492f9 6730 int count = specpdl_ptr - specpdl;
4ed46869 6731
24a73b0a 6732 code_conversion_save (0, 0);
4ed46869 6733
24a73b0a 6734 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6735 coding->src_chars = chars;
6736 coding->src_bytes = bytes;
6737 coding->src_pos = -chars;
6738 coding->src_pos_byte = -bytes;
6739 coding->src_multibyte = chars < bytes;
6740 coding->dst_object = coding->src_object;
6741 coding->dst_pos = PT;
6742 coding->dst_pos_byte = PT_BYTE;
4ed46869 6743
df7492f9 6744 encode_coding (coding);
b73bfc1c 6745
df7492f9
KH
6746 unbind_to (count, Qnil);
6747 return coding->result;
6748}
4ed46869 6749
d46c5b12 6750
df7492f9
KH
6751/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6752 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6753
df7492f9 6754 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6755
df7492f9
KH
6756 If it is a buffer, the text is at point of the buffer. FROM and TO
6757 are positions in the buffer.
b73bfc1c 6758
df7492f9
KH
6759 If it is a string, the text is at the beginning of the string.
6760 FROM and TO are indices to the string.
4ed46869 6761
df7492f9
KH
6762 If it is nil, the text is at coding->source. FROM and TO are
6763 indices to coding->source.
bb10be8b 6764
df7492f9 6765 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6766
df7492f9
KH
6767 If it is a buffer, the decoded text is inserted at point of the
6768 buffer. If the buffer is the same as SRC_OBJECT, the source text
6769 is deleted.
4ed46869 6770
df7492f9
KH
6771 If it is Qt, a string is made from the decoded text, and
6772 set in CODING->dst_object.
d46c5b12 6773
df7492f9 6774 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6775 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6776 CODING->destination by xmalloc. If the decoded text is longer than
6777 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6778 */
d46c5b12 6779
df7492f9
KH
6780void
6781decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6782 dst_object)
d46c5b12 6783 struct coding_system *coding;
df7492f9
KH
6784 Lisp_Object src_object;
6785 EMACS_INT from, from_byte, to, to_byte;
6786 Lisp_Object dst_object;
d46c5b12 6787{
df7492f9
KH
6788 int count = specpdl_ptr - specpdl;
6789 unsigned char *destination;
6790 EMACS_INT dst_bytes;
6791 EMACS_INT chars = to - from;
6792 EMACS_INT bytes = to_byte - from_byte;
6793 Lisp_Object attrs;
4776e638
KH
6794 Lisp_Object buffer;
6795 int saved_pt = -1, saved_pt_byte;
d46c5b12 6796
4776e638 6797 buffer = Fcurrent_buffer ();
93dec019 6798
df7492f9 6799 if (NILP (dst_object))
d46c5b12 6800 {
df7492f9
KH
6801 destination = coding->destination;
6802 dst_bytes = coding->dst_bytes;
d46c5b12 6803 }
93dec019 6804
df7492f9
KH
6805 coding->src_object = src_object;
6806 coding->src_chars = chars;
6807 coding->src_bytes = bytes;
6808 coding->src_multibyte = chars < bytes;
70ad9fc4 6809
df7492f9 6810 if (STRINGP (src_object))
d46c5b12 6811 {
df7492f9
KH
6812 coding->src_pos = from;
6813 coding->src_pos_byte = from_byte;
d46c5b12 6814 }
df7492f9 6815 else if (BUFFERP (src_object))
88993dfd 6816 {
df7492f9
KH
6817 set_buffer_internal (XBUFFER (src_object));
6818 if (from != GPT)
6819 move_gap_both (from, from_byte);
6820 if (EQ (src_object, dst_object))
fb88bf2d 6821 {
4776e638 6822 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6823 TEMP_SET_PT_BOTH (from, from_byte);
6824 del_range_both (from, from_byte, to, to_byte, 1);
6825 coding->src_pos = -chars;
6826 coding->src_pos_byte = -bytes;
fb88bf2d 6827 }
df7492f9 6828 else
fb88bf2d 6829 {
df7492f9
KH
6830 coding->src_pos = from;
6831 coding->src_pos_byte = from_byte;
fb88bf2d 6832 }
88993dfd
KH
6833 }
6834
df7492f9
KH
6835 if (CODING_REQUIRE_DETECTION (coding))
6836 detect_coding (coding);
6837 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6838
2cb26057
KH
6839 if (EQ (dst_object, Qt)
6840 || (! NILP (CODING_ATTR_POST_READ (attrs))
6841 && NILP (dst_object)))
b73bfc1c 6842 {
24a73b0a 6843 coding->dst_object = code_conversion_save (1, 1);
df7492f9
KH
6844 coding->dst_pos = BEG;
6845 coding->dst_pos_byte = BEG_BYTE;
6846 coding->dst_multibyte = 1;
b73bfc1c 6847 }
df7492f9 6848 else if (BUFFERP (dst_object))
d46c5b12 6849 {
24a73b0a 6850 code_conversion_save (0, 0);
df7492f9
KH
6851 coding->dst_object = dst_object;
6852 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6853 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6854 coding->dst_multibyte
6855 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6856 }
6857 else
6858 {
24a73b0a 6859 code_conversion_save (0, 0);
df7492f9
KH
6860 coding->dst_object = Qnil;
6861 coding->dst_multibyte = 1;
d46c5b12
KH
6862 }
6863
df7492f9 6864 decode_coding (coding);
fa46990e 6865
df7492f9
KH
6866 if (BUFFERP (coding->dst_object))
6867 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6868
df7492f9 6869 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6870 {
df7492f9
KH
6871 struct gcpro gcpro1, gcpro2;
6872 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6873 Lisp_Object val;
d46c5b12 6874
c0cc7f7f 6875 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9 6876 GCPRO2 (coding->src_object, coding->dst_object);
d4850d67
KH
6877 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
6878 make_number (coding->produced_char));
df7492f9
KH
6879 UNGCPRO;
6880 CHECK_NATNUM (val);
6881 coding->produced_char += Z - prev_Z;
6882 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6883 }
de79a6a5 6884
df7492f9 6885 if (EQ (dst_object, Qt))
ec6d2bb8 6886 {
df7492f9
KH
6887 coding->dst_object = Fbuffer_string ();
6888 }
6889 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6890 {
6891 set_buffer_internal (XBUFFER (coding->dst_object));
6892 if (dst_bytes < coding->produced)
6893 {
6894 destination
6895 = (unsigned char *) xrealloc (destination, coding->produced);
6896 if (! destination)
6897 {
065e3595
KH
6898 record_conversion_result (coding,
6899 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
6900 unbind_to (count, Qnil);
6901 return;
6902 }
6903 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6904 move_gap_both (BEGV, BEGV_BYTE);
6905 bcopy (BEGV_ADDR, destination, coding->produced);
6906 coding->destination = destination;
d46c5b12 6907 }
ec6d2bb8 6908 }
b73bfc1c 6909
4776e638
KH
6910 if (saved_pt >= 0)
6911 {
6912 /* This is the case of:
6913 (BUFFERP (src_object) && EQ (src_object, dst_object))
6914 As we have moved PT while replacing the original buffer
6915 contents, we must recover it now. */
6916 set_buffer_internal (XBUFFER (src_object));
6917 if (saved_pt < from)
6918 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6919 else if (saved_pt < from + chars)
6920 TEMP_SET_PT_BOTH (from, from_byte);
6921 else if (! NILP (current_buffer->enable_multibyte_characters))
6922 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6923 saved_pt_byte + (coding->produced - bytes));
6924 else
6925 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6926 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6927 }
4776e638 6928
065e3595 6929 unbind_to (count, coding->dst_object);
d46c5b12
KH
6930}
6931
d46c5b12 6932
df7492f9
KH
6933void
6934encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6935 dst_object)
d46c5b12 6936 struct coding_system *coding;
df7492f9
KH
6937 Lisp_Object src_object;
6938 EMACS_INT from, from_byte, to, to_byte;
6939 Lisp_Object dst_object;
d46c5b12 6940{
b73bfc1c 6941 int count = specpdl_ptr - specpdl;
df7492f9
KH
6942 EMACS_INT chars = to - from;
6943 EMACS_INT bytes = to_byte - from_byte;
6944 Lisp_Object attrs;
4776e638
KH
6945 Lisp_Object buffer;
6946 int saved_pt = -1, saved_pt_byte;
c02d943b 6947 int kill_src_buffer = 0;
df7492f9 6948
4776e638 6949 buffer = Fcurrent_buffer ();
df7492f9
KH
6950
6951 coding->src_object = src_object;
6952 coding->src_chars = chars;
6953 coding->src_bytes = bytes;
6954 coding->src_multibyte = chars < bytes;
6955
6956 attrs = CODING_ID_ATTRS (coding->id);
6957
6958 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6959 {
24a73b0a 6960 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
6961 set_buffer_internal (XBUFFER (coding->src_object));
6962 if (STRINGP (src_object))
6963 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6964 else if (BUFFERP (src_object))
6965 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6966 else
6967 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6968
df7492f9
KH
6969 if (EQ (src_object, dst_object))
6970 {
6971 set_buffer_internal (XBUFFER (src_object));
4776e638 6972 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6973 del_range_both (from, from_byte, to, to_byte, 1);
6974 set_buffer_internal (XBUFFER (coding->src_object));
6975 }
6976
d4850d67
KH
6977 {
6978 Lisp_Object args[3];
6979
6980 args[0] = CODING_ATTR_PRE_WRITE (attrs);
6981 args[1] = make_number (BEG);
6982 args[2] = make_number (Z);
6983 safe_call (3, args);
6984 }
c02d943b
KH
6985 if (XBUFFER (coding->src_object) != current_buffer)
6986 kill_src_buffer = 1;
ac87bbef 6987 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6988 if (BEG != GPT)
6989 move_gap_both (BEG, BEG_BYTE);
6990 coding->src_chars = Z - BEG;
6991 coding->src_bytes = Z_BYTE - BEG_BYTE;
6992 coding->src_pos = BEG;
6993 coding->src_pos_byte = BEG_BYTE;
6994 coding->src_multibyte = Z < Z_BYTE;
6995 }
6996 else if (STRINGP (src_object))
d46c5b12 6997 {
24a73b0a 6998 code_conversion_save (0, 0);
df7492f9
KH
6999 coding->src_pos = from;
7000 coding->src_pos_byte = from_byte;
b73bfc1c 7001 }
df7492f9 7002 else if (BUFFERP (src_object))
b73bfc1c 7003 {
24a73b0a 7004 code_conversion_save (0, 0);
df7492f9 7005 set_buffer_internal (XBUFFER (src_object));
df7492f9 7006 if (EQ (src_object, dst_object))
d46c5b12 7007 {
4776e638 7008 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
7009 coding->src_object = del_range_1 (from, to, 1, 1);
7010 coding->src_pos = 0;
7011 coding->src_pos_byte = 0;
d46c5b12 7012 }
df7492f9 7013 else
d46c5b12 7014 {
ff0dacd7
KH
7015 if (from < GPT && to >= GPT)
7016 move_gap_both (from, from_byte);
df7492f9
KH
7017 coding->src_pos = from;
7018 coding->src_pos_byte = from_byte;
d46c5b12 7019 }
d46c5b12 7020 }
4776e638 7021 else
24a73b0a 7022 code_conversion_save (0, 0);
d46c5b12 7023
df7492f9 7024 if (BUFFERP (dst_object))
88993dfd 7025 {
df7492f9 7026 coding->dst_object = dst_object;
28f67a95
KH
7027 if (EQ (src_object, dst_object))
7028 {
7029 coding->dst_pos = from;
7030 coding->dst_pos_byte = from_byte;
7031 }
7032 else
7033 {
7034 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7035 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7036 }
df7492f9
KH
7037 coding->dst_multibyte
7038 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 7039 }
df7492f9 7040 else if (EQ (dst_object, Qt))
d46c5b12 7041 {
df7492f9 7042 coding->dst_object = Qnil;
df7492f9 7043 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
7044 if (coding->dst_bytes == 0)
7045 coding->dst_bytes = 1;
7046 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7047 coding->dst_multibyte = 0;
d46c5b12
KH
7048 }
7049 else
7050 {
df7492f9
KH
7051 coding->dst_object = Qnil;
7052 coding->dst_multibyte = 0;
d46c5b12
KH
7053 }
7054
df7492f9 7055 encode_coding (coding);
d46c5b12 7056
df7492f9 7057 if (EQ (dst_object, Qt))
d46c5b12 7058 {
df7492f9
KH
7059 if (BUFFERP (coding->dst_object))
7060 coding->dst_object = Fbuffer_string ();
7061 else
d46c5b12 7062 {
df7492f9
KH
7063 coding->dst_object
7064 = make_unibyte_string ((char *) coding->destination,
7065 coding->produced);
7066 xfree (coding->destination);
d46c5b12 7067 }
4ed46869 7068 }
d46c5b12 7069
4776e638
KH
7070 if (saved_pt >= 0)
7071 {
7072 /* This is the case of:
7073 (BUFFERP (src_object) && EQ (src_object, dst_object))
7074 As we have moved PT while replacing the original buffer
7075 contents, we must recover it now. */
7076 set_buffer_internal (XBUFFER (src_object));
7077 if (saved_pt < from)
7078 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7079 else if (saved_pt < from + chars)
7080 TEMP_SET_PT_BOTH (from, from_byte);
7081 else if (! NILP (current_buffer->enable_multibyte_characters))
7082 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7083 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7084 else
4776e638
KH
7085 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7086 saved_pt_byte + (coding->produced - bytes));
7087 }
7088
c02d943b
KH
7089 if (kill_src_buffer)
7090 Fkill_buffer (coding->src_object);
df7492f9 7091 unbind_to (count, Qnil);
b73bfc1c
KH
7092}
7093
df7492f9 7094
b73bfc1c 7095Lisp_Object
df7492f9 7096preferred_coding_system ()
b73bfc1c 7097{
df7492f9 7098 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7099
df7492f9 7100 return CODING_ID_NAME (id);
4ed46869
KH
7101}
7102
7103\f
7104#ifdef emacs
1397dc18 7105/*** 8. Emacs Lisp library functions ***/
4ed46869 7106
4ed46869 7107DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7108 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7109See the documentation of `define-coding-system' for information
48b0f3ae
PJ
7110about coding-system objects. */)
7111 (obj)
4ed46869
KH
7112 Lisp_Object obj;
7113{
44e8490d
KH
7114 if (NILP (obj)
7115 || CODING_SYSTEM_ID (obj) >= 0)
7116 return Qt;
7117 if (! SYMBOLP (obj)
7118 || NILP (Fget (obj, Qcoding_system_define_form)))
7119 return Qnil;
7120 return Qt;
4ed46869
KH
7121}
7122
9d991de8
RS
7123DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7124 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7125 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7126 (prompt)
4ed46869
KH
7127 Lisp_Object prompt;
7128{
e0e989f6 7129 Lisp_Object val;
9d991de8
RS
7130 do
7131 {
4608c386
KH
7132 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7133 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7134 }
8f924df7 7135 while (SCHARS (val) == 0);
e0e989f6 7136 return (Fintern (val, Qnil));
4ed46869
KH
7137}
7138
9b787f3e 7139DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
7140 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7141If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
7142 (prompt, default_coding_system)
9b787f3e 7143 Lisp_Object prompt, default_coding_system;
4ed46869 7144{
f44d27ce 7145 Lisp_Object val;
9b787f3e 7146 if (SYMBOLP (default_coding_system))
a3181084 7147 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 7148 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7149 Qt, Qnil, Qcoding_system_history,
7150 default_coding_system, Qnil);
8f924df7 7151 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7152}
7153
7154DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7155 1, 1, 0,
48b0f3ae 7156 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7157If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7158It is valid if it is nil or a symbol defined as a coding system by the
7159function `define-coding-system'. */)
df7492f9 7160 (coding_system)
4ed46869
KH
7161 Lisp_Object coding_system;
7162{
44e8490d
KH
7163 Lisp_Object define_form;
7164
7165 define_form = Fget (coding_system, Qcoding_system_define_form);
7166 if (! NILP (define_form))
7167 {
7168 Fput (coding_system, Qcoding_system_define_form, Qnil);
7169 safe_eval (define_form);
7170 }
4ed46869
KH
7171 if (!NILP (Fcoding_system_p (coding_system)))
7172 return coding_system;
7173 while (1)
02ba4723 7174 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 7175}
df7492f9 7176
3a73fa5d 7177\f
89528eb3
KH
7178/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7179 HIGHEST is nonzero, return the coding system of the highest
7180 priority among the detected coding systems. Otherwize return a
7181 list of detected coding systems sorted by their priorities. If
7182 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7183 multibyte form but contains only ASCII and eight-bit chars.
7184 Otherwise, the bytes are raw bytes.
7185
7186 CODING-SYSTEM controls the detection as below:
7187
7188 If it is nil, detect both text-format and eol-format. If the
7189 text-format part of CODING-SYSTEM is already specified
7190 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7191 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7192 detect only text-format. */
7193
d46c5b12 7194Lisp_Object
24a73b0a
KH
7195detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7196 coding_system)
8f924df7 7197 const unsigned char *src;
24a73b0a 7198 int src_chars, src_bytes, highest;
0a28aafb 7199 int multibytep;
df7492f9 7200 Lisp_Object coding_system;
4ed46869 7201{
8f924df7 7202 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7203 Lisp_Object attrs, eol_type;
7204 Lisp_Object val;
7205 struct coding_system coding;
89528eb3 7206 int id;
ff0dacd7 7207 struct coding_detection_info detect_info;
24a73b0a 7208 enum coding_category base_category;
b73bfc1c 7209
df7492f9
KH
7210 if (NILP (coding_system))
7211 coding_system = Qundecided;
7212 setup_coding_system (coding_system, &coding);
7213 attrs = CODING_ID_ATTRS (coding.id);
7214 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7215 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7216
df7492f9 7217 coding.source = src;
24a73b0a 7218 coding.src_chars = src_chars;
df7492f9
KH
7219 coding.src_bytes = src_bytes;
7220 coding.src_multibyte = multibytep;
7221 coding.consumed = 0;
89528eb3 7222 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7223
ff0dacd7 7224 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7225
89528eb3 7226 /* At first, detect text-format if necessary. */
24a73b0a
KH
7227 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7228 if (base_category == coding_category_undecided)
4ed46869 7229 {
ff0dacd7
KH
7230 enum coding_category category;
7231 struct coding_system *this;
7232 int c, i;
88993dfd 7233
24a73b0a
KH
7234 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7235 for (i = 0; src < src_end; i++, src++)
4ed46869 7236 {
df7492f9 7237 c = *src;
6cb21a4f 7238 if (c & 0x80)
d46c5b12 7239 break;
6cb21a4f
KH
7240 if (c < 0x20
7241 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7242 && inhibit_iso_escape_detection)
7243 {
7244 coding.head_ascii = src - coding.source;
7245 if (detect_coding_iso_2022 (&coding, &detect_info))
7246 {
7247 /* We have scanned the whole data. */
7248 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7249 /* We didn't find an 8-bit code. */
7250 src = src_end;
7251 break;
7252 }
7253 }
4ed46869 7254 }
df7492f9 7255 coding.head_ascii = src - coding.source;
88993dfd 7256
6cb21a4f
KH
7257 if (src < src_end
7258 || detect_info.found)
7259 {
7260 if (src == src_end)
7261 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7262 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7263 {
6cb21a4f
KH
7264 category = coding_priorities[i];
7265 if (detect_info.found & (1 << category))
ff0dacd7
KH
7266 break;
7267 }
6cb21a4f
KH
7268 else
7269 for (i = 0; i < coding_category_raw_text; i++)
df7492f9 7270 {
6cb21a4f
KH
7271 category = coding_priorities[i];
7272 this = coding_categories + category;
7273
7274 if (this->id < 0)
24a73b0a 7275 {
6cb21a4f
KH
7276 /* No coding system of this category is defined. */
7277 detect_info.rejected |= (1 << category);
7278 }
7279 else if (category >= coding_category_raw_text)
7280 continue;
7281 else if (detect_info.checked & (1 << category))
7282 {
7283 if (highest
7284 && (detect_info.found & (1 << category)))
7285 break;
7286 }
7287 else
7288 {
7289 if ((*(this->detector)) (&coding, &detect_info)
7290 && highest
7291 && (detect_info.found & (1 << category)))
24a73b0a 7292 {
6cb21a4f
KH
7293 if (category == coding_category_utf_16_auto)
7294 {
7295 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7296 category = coding_category_utf_16_le;
7297 else
7298 category = coding_category_utf_16_be;
7299 }
7300 break;
24a73b0a 7301 }
24a73b0a 7302 }
df7492f9 7303 }
6cb21a4f 7304 }
ec6d2bb8 7305
ff0dacd7 7306 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7307 {
ff0dacd7 7308 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7309 id = coding_categories[coding_category_raw_text].id;
7310 val = Fcons (make_number (id), Qnil);
7311 }
ff0dacd7 7312 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7313 {
ff0dacd7 7314 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7315 id = coding_categories[coding_category_undecided].id;
7316 val = Fcons (make_number (id), Qnil);
7317 }
7318 else if (highest)
7319 {
ff0dacd7 7320 if (detect_info.found)
ec6d2bb8 7321 {
ff0dacd7
KH
7322 detect_info.found = 1 << category;
7323 val = Fcons (make_number (this->id), Qnil);
7324 }
7325 else
7326 for (i = 0; i < coding_category_raw_text; i++)
7327 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7328 {
7329 detect_info.found = 1 << coding_priorities[i];
7330 id = coding_categories[coding_priorities[i]].id;
7331 val = Fcons (make_number (id), Qnil);
7332 break;
7333 }
7334 }
89528eb3
KH
7335 else
7336 {
ff0dacd7
KH
7337 int mask = detect_info.rejected | detect_info.found;
7338 int found = 0;
89528eb3 7339 val = Qnil;
ec6d2bb8 7340
89528eb3 7341 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7342 {
7343 category = coding_priorities[i];
7344 if (! (mask & (1 << category)))
ec6d2bb8 7345 {
ff0dacd7
KH
7346 found |= 1 << category;
7347 id = coding_categories[category].id;
7348 val = Fcons (make_number (id), val);
7349 }
7350 }
7351 for (i = coding_category_raw_text - 1; i >= 0; i--)
7352 {
7353 category = coding_priorities[i];
7354 if (detect_info.found & (1 << category))
7355 {
7356 id = coding_categories[category].id;
7357 val = Fcons (make_number (id), val);
ec6d2bb8 7358 }
ec6d2bb8 7359 }
ff0dacd7 7360 detect_info.found |= found;
ec6d2bb8 7361 }
ec6d2bb8 7362 }
24a73b0a
KH
7363 else if (base_category == coding_category_utf_16_auto)
7364 {
7365 if (detect_coding_utf_16 (&coding, &detect_info))
7366 {
24a73b0a
KH
7367 struct coding_system *this;
7368
7369 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7370 this = coding_categories + coding_category_utf_16_le;
7371 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7372 this = coding_categories + coding_category_utf_16_be;
7373 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7374 this = coding_categories + coding_category_utf_16_be_nosig;
7375 else
7376 this = coding_categories + coding_category_utf_16_le_nosig;
7377 val = Fcons (make_number (this->id), Qnil);
7378 }
7379 }
df7492f9
KH
7380 else
7381 {
ff0dacd7 7382 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7383 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7384 }
df7492f9 7385
89528eb3 7386 /* Then, detect eol-format if necessary. */
df7492f9 7387 {
89528eb3 7388 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7389 Lisp_Object tail;
7390
89528eb3
KH
7391 if (VECTORP (eol_type))
7392 {
ff0dacd7 7393 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7394 normal_eol = detect_eol (coding.source, src_bytes,
7395 coding_category_raw_text);
ff0dacd7
KH
7396 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7397 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7398 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7399 coding_category_utf_16_be);
ff0dacd7
KH
7400 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7401 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7402 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7403 coding_category_utf_16_le);
7404 }
7405 else
7406 {
7407 if (EQ (eol_type, Qunix))
7408 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7409 else if (EQ (eol_type, Qdos))
7410 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7411 else
7412 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7413 }
7414
df7492f9
KH
7415 for (tail = val; CONSP (tail); tail = XCDR (tail))
7416 {
89528eb3 7417 enum coding_category category;
df7492f9 7418 int this_eol;
89528eb3
KH
7419
7420 id = XINT (XCAR (tail));
7421 attrs = CODING_ID_ATTRS (id);
7422 category = XINT (CODING_ATTR_CATEGORY (attrs));
7423 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7424 if (VECTORP (eol_type))
7425 {
89528eb3
KH
7426 if (category == coding_category_utf_16_be
7427 || category == coding_category_utf_16_be_nosig)
7428 this_eol = utf_16_be_eol;
7429 else if (category == coding_category_utf_16_le
7430 || category == coding_category_utf_16_le_nosig)
7431 this_eol = utf_16_le_eol;
df7492f9 7432 else
89528eb3
KH
7433 this_eol = normal_eol;
7434
df7492f9
KH
7435 if (this_eol == EOL_SEEN_LF)
7436 XSETCAR (tail, AREF (eol_type, 0));
7437 else if (this_eol == EOL_SEEN_CRLF)
7438 XSETCAR (tail, AREF (eol_type, 1));
7439 else if (this_eol == EOL_SEEN_CR)
7440 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7441 else
7442 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7443 }
89528eb3
KH
7444 else
7445 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7446 }
7447 }
ec6d2bb8 7448
03699b14 7449 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7450}
7451
ec6d2bb8 7452
d46c5b12
KH
7453DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7454 2, 3, 0,
48b0f3ae
PJ
7455 doc: /* Detect coding system of the text in the region between START and END.
7456Return a list of possible coding systems ordered by priority.
ec6d2bb8 7457
48b0f3ae
PJ
7458If only ASCII characters are found, it returns a list of single element
7459`undecided' or its subsidiary coding system according to a detected
7460end-of-line format.
ec6d2bb8 7461
48b0f3ae
PJ
7462If optional argument HIGHEST is non-nil, return the coding system of
7463highest priority. */)
7464 (start, end, highest)
d46c5b12
KH
7465 Lisp_Object start, end, highest;
7466{
7467 int from, to;
7468 int from_byte, to_byte;
ec6d2bb8 7469
b7826503
PJ
7470 CHECK_NUMBER_COERCE_MARKER (start);
7471 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7472
d46c5b12
KH
7473 validate_region (&start, &end);
7474 from = XINT (start), to = XINT (end);
7475 from_byte = CHAR_TO_BYTE (from);
7476 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7477
d46c5b12
KH
7478 if (from < GPT && to >= GPT)
7479 move_gap_both (to, to_byte);
c210f766 7480
d46c5b12 7481 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7482 to - from, to_byte - from_byte,
0a28aafb
KH
7483 !NILP (highest),
7484 !NILP (current_buffer
df7492f9
KH
7485 ->enable_multibyte_characters),
7486 Qnil);
ec6d2bb8
KH
7487}
7488
d46c5b12
KH
7489DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7490 1, 2, 0,
48b0f3ae
PJ
7491 doc: /* Detect coding system of the text in STRING.
7492Return a list of possible coding systems ordered by priority.
fb88bf2d 7493
48b0f3ae
PJ
7494If only ASCII characters are found, it returns a list of single element
7495`undecided' or its subsidiary coding system according to a detected
7496end-of-line format.
d46c5b12 7497
48b0f3ae
PJ
7498If optional argument HIGHEST is non-nil, return the coding system of
7499highest priority. */)
7500 (string, highest)
d46c5b12
KH
7501 Lisp_Object string, highest;
7502{
b7826503 7503 CHECK_STRING (string);
b73bfc1c 7504
24a73b0a
KH
7505 return detect_coding_system (SDATA (string),
7506 SCHARS (string), SBYTES (string),
8f924df7 7507 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7508 Qnil);
4ed46869 7509}
4ed46869 7510
b73bfc1c 7511
df7492f9
KH
7512static INLINE int
7513char_encodable_p (c, attrs)
7514 int c;
7515 Lisp_Object attrs;
05e6f5dc 7516{
df7492f9 7517 Lisp_Object tail;
df7492f9 7518 struct charset *charset;
7d64c6ad 7519 Lisp_Object translation_table;
d46c5b12 7520
7d64c6ad 7521 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7522 if (! NILP (translation_table))
7d64c6ad 7523 c = translate_char (translation_table, c);
df7492f9
KH
7524 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7525 CONSP (tail); tail = XCDR (tail))
e133c8fa 7526 {
df7492f9
KH
7527 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7528 if (CHAR_CHARSET_P (c, charset))
7529 break;
e133c8fa 7530 }
df7492f9 7531 return (! NILP (tail));
05e6f5dc 7532}
83fa074f 7533
fb88bf2d 7534
df7492f9
KH
7535/* Return a list of coding systems that safely encode the text between
7536 START and END. If EXCLUDE is non-nil, it is a list of coding
7537 systems not to check. The returned list doesn't contain any such
48468dac 7538 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7539 unibyte, return t. */
e077cc80 7540
df7492f9
KH
7541DEFUN ("find-coding-systems-region-internal",
7542 Ffind_coding_systems_region_internal,
7543 Sfind_coding_systems_region_internal, 2, 3, 0,
7544 doc: /* Internal use only. */)
7545 (start, end, exclude)
7546 Lisp_Object start, end, exclude;
7547{
7548 Lisp_Object coding_attrs_list, safe_codings;
7549 EMACS_INT start_byte, end_byte;
7c78e542 7550 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7551 int c;
7552 Lisp_Object tail, elt;
d46c5b12 7553
df7492f9
KH
7554 if (STRINGP (start))
7555 {
7556 if (!STRING_MULTIBYTE (start)
8f924df7 7557 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7558 return Qt;
7559 start_byte = 0;
8f924df7 7560 end_byte = SBYTES (start);
df7492f9
KH
7561 }
7562 else
d46c5b12 7563 {
df7492f9
KH
7564 CHECK_NUMBER_COERCE_MARKER (start);
7565 CHECK_NUMBER_COERCE_MARKER (end);
7566 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7567 args_out_of_range (start, end);
7568 if (NILP (current_buffer->enable_multibyte_characters))
7569 return Qt;
7570 start_byte = CHAR_TO_BYTE (XINT (start));
7571 end_byte = CHAR_TO_BYTE (XINT (end));
7572 if (XINT (end) - XINT (start) == end_byte - start_byte)
7573 return Qt;
d46c5b12 7574
e1c23804 7575 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7576 {
e1c23804
DL
7577 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7578 move_gap_both (XINT (start), start_byte);
df7492f9 7579 else
e1c23804 7580 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7581 }
7582 }
7583
df7492f9
KH
7584 coding_attrs_list = Qnil;
7585 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7586 if (NILP (exclude)
7587 || NILP (Fmemq (XCAR (tail), exclude)))
7588 {
7589 Lisp_Object attrs;
d46c5b12 7590
df7492f9
KH
7591 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7592 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7593 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7594 {
7595 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7596 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7597 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7598 }
df7492f9 7599 }
d46c5b12 7600
df7492f9 7601 if (STRINGP (start))
8f924df7 7602 p = pbeg = SDATA (start);
df7492f9
KH
7603 else
7604 p = pbeg = BYTE_POS_ADDR (start_byte);
7605 pend = p + (end_byte - start_byte);
b843d1ae 7606
df7492f9
KH
7607 while (p < pend && ASCII_BYTE_P (*p)) p++;
7608 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7609
05e6f5dc 7610 while (p < pend)
72d1a715 7611 {
df7492f9
KH
7612 if (ASCII_BYTE_P (*p))
7613 p++;
72d1a715
RS
7614 else
7615 {
df7492f9 7616 c = STRING_CHAR_ADVANCE (p);
12410ef1 7617
df7492f9
KH
7618 charset_map_loaded = 0;
7619 for (tail = coding_attrs_list; CONSP (tail);)
7620 {
7621 elt = XCAR (tail);
7622 if (NILP (elt))
7623 tail = XCDR (tail);
7624 else if (char_encodable_p (c, elt))
7625 tail = XCDR (tail);
7626 else if (CONSP (XCDR (tail)))
7627 {
7628 XSETCAR (tail, XCAR (XCDR (tail)));
7629 XSETCDR (tail, XCDR (XCDR (tail)));
7630 }
7631 else
7632 {
7633 XSETCAR (tail, Qnil);
7634 tail = XCDR (tail);
7635 }
7636 }
7637 if (charset_map_loaded)
7638 {
7639 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7640
df7492f9 7641 if (STRINGP (start))
8f924df7 7642 pbeg = SDATA (start);
df7492f9
KH
7643 else
7644 pbeg = BYTE_POS_ADDR (start_byte);
7645 p = pbeg + p_offset;
7646 pend = pbeg + pend_offset;
7647 }
7648 }
ec6d2bb8 7649 }
fb88bf2d 7650
988b3759 7651 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
7652 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7653 if (! NILP (XCAR (tail)))
7654 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7655
05e6f5dc
KH
7656 return safe_codings;
7657}
4956c225 7658
d46c5b12 7659
8f924df7
KH
7660DEFUN ("unencodable-char-position", Funencodable_char_position,
7661 Sunencodable_char_position, 3, 5, 0,
7662 doc: /*
7663Return position of first un-encodable character in a region.
7664START and END specfiy the region and CODING-SYSTEM specifies the
7665encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7666
8f924df7
KH
7667If optional 4th argument COUNT is non-nil, it specifies at most how
7668many un-encodable characters to search. In this case, the value is a
7669list of positions.
d46c5b12 7670
8f924df7
KH
7671If optional 5th argument STRING is non-nil, it is a string to search
7672for un-encodable characters. In that case, START and END are indexes
7673to the string. */)
7674 (start, end, coding_system, count, string)
7675 Lisp_Object start, end, coding_system, count, string;
7676{
7677 int n;
7678 struct coding_system coding;
7d64c6ad 7679 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7680 Lisp_Object positions;
7681 int from, to;
7682 const unsigned char *p, *stop, *pend;
7683 int ascii_compatible;
fb88bf2d 7684
8f924df7
KH
7685 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7686 attrs = CODING_ID_ATTRS (coding.id);
7687 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7688 return Qnil;
7689 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7690 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7691 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7692
8f924df7
KH
7693 if (NILP (string))
7694 {
7695 validate_region (&start, &end);
7696 from = XINT (start);
7697 to = XINT (end);
7698 if (NILP (current_buffer->enable_multibyte_characters)
7699 || (ascii_compatible
7700 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7701 return Qnil;
7702 p = CHAR_POS_ADDR (from);
7703 pend = CHAR_POS_ADDR (to);
7704 if (from < GPT && to >= GPT)
7705 stop = GPT_ADDR;
7706 else
7707 stop = pend;
7708 }
7709 else
7710 {
7711 CHECK_STRING (string);
7712 CHECK_NATNUM (start);
7713 CHECK_NATNUM (end);
7714 from = XINT (start);
7715 to = XINT (end);
7716 if (from > to
7717 || to > SCHARS (string))
7718 args_out_of_range_3 (string, start, end);
7719 if (! STRING_MULTIBYTE (string))
7720 return Qnil;
7721 p = SDATA (string) + string_char_to_byte (string, from);
7722 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7723 if (ascii_compatible && (to - from) == (pend - p))
7724 return Qnil;
7725 }
f2558efd 7726
8f924df7
KH
7727 if (NILP (count))
7728 n = 1;
7729 else
b73bfc1c 7730 {
8f924df7
KH
7731 CHECK_NATNUM (count);
7732 n = XINT (count);
b73bfc1c
KH
7733 }
7734
8f924df7
KH
7735 positions = Qnil;
7736 while (1)
d46c5b12 7737 {
8f924df7 7738 int c;
ec6d2bb8 7739
8f924df7
KH
7740 if (ascii_compatible)
7741 while (p < stop && ASCII_BYTE_P (*p))
7742 p++, from++;
7743 if (p >= stop)
0e79d667 7744 {
8f924df7
KH
7745 if (p >= pend)
7746 break;
7747 stop = pend;
7748 p = GAP_END_ADDR;
0e79d667 7749 }
ec6d2bb8 7750
8f924df7
KH
7751 c = STRING_CHAR_ADVANCE (p);
7752 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
7753 && ! char_charset (translate_char (translation_table, c),
7754 charset_list, NULL))
ec6d2bb8 7755 {
8f924df7
KH
7756 positions = Fcons (make_number (from), positions);
7757 n--;
7758 if (n == 0)
7759 break;
ec6d2bb8
KH
7760 }
7761
8f924df7
KH
7762 from++;
7763 }
d46c5b12 7764
8f924df7
KH
7765 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7766}
d46c5b12 7767
d46c5b12 7768
df7492f9
KH
7769DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7770 Scheck_coding_systems_region, 3, 3, 0,
7771 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7772
df7492f9
KH
7773START and END are buffer positions specifying the region.
7774CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7775
df7492f9
KH
7776The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7777CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7778whole region, POS0, POS1, ... are buffer positions where non-encodable
7779characters are found.
93dec019 7780
df7492f9
KH
7781If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7782value is nil.
93dec019 7783
df7492f9
KH
7784START may be a string. In that case, check if the string is
7785encodable, and the value contains indices to the string instead of
7786buffer positions. END is ignored. */)
7787 (start, end, coding_system_list)
7788 Lisp_Object start, end, coding_system_list;
05e6f5dc 7789{
df7492f9
KH
7790 Lisp_Object list;
7791 EMACS_INT start_byte, end_byte;
7792 int pos;
7c78e542 7793 const unsigned char *p, *pbeg, *pend;
df7492f9 7794 int c;
7d64c6ad 7795 Lisp_Object tail, elt, attrs;
70ad9fc4 7796
05e6f5dc
KH
7797 if (STRINGP (start))
7798 {
df7492f9 7799 if (!STRING_MULTIBYTE (start)
8f924df7 7800 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7801 return Qnil;
7802 start_byte = 0;
8f924df7 7803 end_byte = SBYTES (start);
df7492f9 7804 pos = 0;
d46c5b12 7805 }
05e6f5dc 7806 else
b73bfc1c 7807 {
b7826503
PJ
7808 CHECK_NUMBER_COERCE_MARKER (start);
7809 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7810 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7811 args_out_of_range (start, end);
7812 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7813 return Qnil;
7814 start_byte = CHAR_TO_BYTE (XINT (start));
7815 end_byte = CHAR_TO_BYTE (XINT (end));
7816 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7817 return Qt;
df7492f9 7818
e1c23804 7819 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7820 {
e1c23804
DL
7821 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7822 move_gap_both (XINT (start), start_byte);
df7492f9 7823 else
e1c23804 7824 move_gap_both (XINT (end), end_byte);
b73bfc1c 7825 }
e1c23804 7826 pos = XINT (start);
b73bfc1c 7827 }
7553d0e1 7828
df7492f9
KH
7829 list = Qnil;
7830 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7831 {
df7492f9 7832 elt = XCAR (tail);
7d64c6ad 7833 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
7834 ASET (attrs, coding_attr_trans_tbl,
7835 get_translation_table (attrs, 1, NULL));
7d64c6ad 7836 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
7837 }
7838
df7492f9 7839 if (STRINGP (start))
8f924df7 7840 p = pbeg = SDATA (start);
72d1a715 7841 else
df7492f9
KH
7842 p = pbeg = BYTE_POS_ADDR (start_byte);
7843 pend = p + (end_byte - start_byte);
4ed46869 7844
df7492f9
KH
7845 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7846 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7847
df7492f9 7848 while (p < pend)
d46c5b12 7849 {
df7492f9
KH
7850 if (ASCII_BYTE_P (*p))
7851 p++;
e133c8fa 7852 else
05e6f5dc 7853 {
df7492f9
KH
7854 c = STRING_CHAR_ADVANCE (p);
7855
7856 charset_map_loaded = 0;
7857 for (tail = list; CONSP (tail); tail = XCDR (tail))
7858 {
7859 elt = XCDR (XCAR (tail));
7860 if (! char_encodable_p (c, XCAR (elt)))
7861 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7862 }
7863 if (charset_map_loaded)
7864 {
7865 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7866
7867 if (STRINGP (start))
8f924df7 7868 pbeg = SDATA (start);
df7492f9
KH
7869 else
7870 pbeg = BYTE_POS_ADDR (start_byte);
7871 p = pbeg + p_offset;
7872 pend = pbeg + pend_offset;
7873 }
05e6f5dc 7874 }
df7492f9 7875 pos++;
d46c5b12 7876 }
4ed46869 7877
df7492f9
KH
7878 tail = list;
7879 list = Qnil;
7880 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7881 {
df7492f9
KH
7882 elt = XCAR (tail);
7883 if (CONSP (XCDR (XCDR (elt))))
7884 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7885 list);
ec6d2bb8 7886 }
2b4f9037 7887
df7492f9 7888 return list;
d46c5b12
KH
7889}
7890
3fd9494b 7891
b73bfc1c 7892Lisp_Object
df7492f9
KH
7893code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7894 Lisp_Object start, end, coding_system, dst_object;
7895 int encodep, norecord;
4ed46869 7896{
3a73fa5d 7897 struct coding_system coding;
df7492f9
KH
7898 EMACS_INT from, from_byte, to, to_byte;
7899 Lisp_Object src_object;
4ed46869 7900
b7826503
PJ
7901 CHECK_NUMBER_COERCE_MARKER (start);
7902 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7903 if (NILP (coding_system))
7904 coding_system = Qno_conversion;
7905 else
7906 CHECK_CODING_SYSTEM (coding_system);
7907 src_object = Fcurrent_buffer ();
7908 if (NILP (dst_object))
7909 dst_object = src_object;
7910 else if (! EQ (dst_object, Qt))
7911 CHECK_BUFFER (dst_object);
3a73fa5d 7912
d46c5b12
KH
7913 validate_region (&start, &end);
7914 from = XFASTINT (start);
df7492f9 7915 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7916 to = XFASTINT (end);
df7492f9 7917 to_byte = CHAR_TO_BYTE (to);
764ca8da 7918
df7492f9
KH
7919 setup_coding_system (coding_system, &coding);
7920 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7921
df7492f9
KH
7922 if (encodep)
7923 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7924 dst_object);
7925 else
7926 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7927 dst_object);
7928 if (! norecord)
7929 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7930
df7492f9
KH
7931 return (BUFFERP (dst_object)
7932 ? make_number (coding.produced_char)
7933 : coding.dst_object);
4031e2bf 7934}
78108bcd 7935
4ed46869 7936
4031e2bf 7937DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7938 3, 4, "r\nzCoding system: ",
48b0f3ae 7939 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7940When called from a program, takes four arguments:
7941 START, END, CODING-SYSTEM, and DESTINATION.
7942START and END are buffer positions.
8844fa83 7943
df7492f9
KH
7944Optional 4th arguments DESTINATION specifies where the decoded text goes.
7945If nil, the region between START and END is replace by the decoded text.
7946If buffer, the decoded text is inserted in the buffer.
7947If t, the decoded text is returned.
8844fa83 7948
48b0f3ae
PJ
7949This function sets `last-coding-system-used' to the precise coding system
7950used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7951not fully specified.)
7952It returns the length of the decoded text. */)
df7492f9
KH
7953 (start, end, coding_system, destination)
7954 Lisp_Object start, end, coding_system, destination;
4031e2bf 7955{
df7492f9 7956 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7957}
8844fa83 7958
3a73fa5d 7959DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7960 3, 4, "r\nzCoding system: ",
7961 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7962When called from a program, takes three arguments:
7963START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7964
df7492f9
KH
7965Optional 4th arguments DESTINATION specifies where the encoded text goes.
7966If nil, the region between START and END is replace by the encoded text.
7967If buffer, the encoded text is inserted in the buffer.
7968If t, the encoded text is returned.
2391eaa4 7969
48b0f3ae
PJ
7970This function sets `last-coding-system-used' to the precise coding system
7971used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7972not fully specified.)
7973It returns the length of the encoded text. */)
df7492f9
KH
7974 (start, end, coding_system, destination)
7975 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7976{
df7492f9 7977 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7978}
7979
7980Lisp_Object
df7492f9
KH
7981code_convert_string (string, coding_system, dst_object,
7982 encodep, nocopy, norecord)
7983 Lisp_Object string, coding_system, dst_object;
7984 int encodep, nocopy, norecord;
b73bfc1c 7985{
4031e2bf 7986 struct coding_system coding;
df7492f9 7987 EMACS_INT chars, bytes;
ec6d2bb8 7988
b7826503 7989 CHECK_STRING (string);
d46c5b12 7990 if (NILP (coding_system))
4956c225 7991 {
df7492f9
KH
7992 if (! norecord)
7993 Vlast_coding_system_used = Qno_conversion;
7994 if (NILP (dst_object))
7995 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7996 }
b73bfc1c 7997
df7492f9
KH
7998 if (NILP (coding_system))
7999 coding_system = Qno_conversion;
8000 else
8001 CHECK_CODING_SYSTEM (coding_system);
8002 if (NILP (dst_object))
8003 dst_object = Qt;
8004 else if (! EQ (dst_object, Qt))
8005 CHECK_BUFFER (dst_object);
73be902c 8006
df7492f9 8007 setup_coding_system (coding_system, &coding);
d46c5b12 8008 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
8009 chars = SCHARS (string);
8010 bytes = SBYTES (string);
df7492f9
KH
8011 if (encodep)
8012 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8013 else
8014 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8015 if (! norecord)
8016 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 8017
df7492f9
KH
8018 return (BUFFERP (dst_object)
8019 ? make_number (coding.produced_char)
8020 : coding.dst_object);
4ed46869 8021}
73be902c 8022
b73bfc1c 8023
ecec61c1 8024/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 8025 Do not set Vlast_coding_system_used.
4ed46869 8026
ec6d2bb8
KH
8027 This function is called only from macros DECODE_FILE and
8028 ENCODE_FILE, thus we ignore character composition. */
4ed46869 8029
ecec61c1
KH
8030Lisp_Object
8031code_convert_string_norecord (string, coding_system, encodep)
8032 Lisp_Object string, coding_system;
8033 int encodep;
4ed46869 8034{
0be8721c 8035 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
8036}
8037
4ed46869 8038
df7492f9
KH
8039DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8040 2, 4, 0,
8041 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8042
8043Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8044if the decoding operation is trivial.
ecec61c1 8045
df7492f9 8046Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 8047inserted in BUFFER instead of returned as a string. In this case,
df7492f9 8048the return value is BUFFER.
ecec61c1 8049
df7492f9
KH
8050This function sets `last-coding-system-used' to the precise coding system
8051used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8052not fully specified. */)
8053 (string, coding_system, nocopy, buffer)
8054 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8055{
df7492f9
KH
8056 return code_convert_string (string, coding_system, buffer,
8057 0, ! NILP (nocopy), 0);
4ed46869
KH
8058}
8059
df7492f9
KH
8060DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8061 2, 4, 0,
8062 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8063
8064Optional third arg NOCOPY non-nil means it is OK to return STRING
8065itself if the encoding operation is trivial.
8066
8067Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 8068inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
8069the return value is BUFFER.
8070
8071This function sets `last-coding-system-used' to the precise coding system
8072used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8073not fully specified.) */)
8074 (string, coding_system, nocopy, buffer)
8075 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8076{
df7492f9 8077 return code_convert_string (string, coding_system, buffer,
c197f191 8078 1, ! NILP (nocopy), 1);
4ed46869 8079}
df7492f9 8080
3a73fa5d 8081\f
4ed46869 8082DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8083 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8084Return the corresponding character. */)
8085 (code)
4ed46869 8086 Lisp_Object code;
4ed46869 8087{
df7492f9
KH
8088 Lisp_Object spec, attrs, val;
8089 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8090 int c;
4ed46869 8091
df7492f9
KH
8092 CHECK_NATNUM (code);
8093 c = XFASTINT (code);
8094 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8095 attrs = AREF (spec, 0);
4ed46869 8096
df7492f9
KH
8097 if (ASCII_BYTE_P (c)
8098 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8099 return code;
4ed46869 8100
df7492f9
KH
8101 val = CODING_ATTR_CHARSET_LIST (attrs);
8102 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8103 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8104 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8105
df7492f9
KH
8106 if (c <= 0x7F)
8107 charset = charset_roman;
8108 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8109 {
df7492f9
KH
8110 charset = charset_kana;
8111 c -= 0x80;
4ed46869 8112 }
55ab7be3 8113 else
4ed46869 8114 {
004068e4 8115 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8116
8117 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8118 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8119 error ("Invalid code: %d", code);
8120 SJIS_TO_JIS (c);
8121 charset = charset_kanji;
4ed46869 8122 }
df7492f9
KH
8123 c = DECODE_CHAR (charset, c);
8124 if (c < 0)
8125 error ("Invalid code: %d", code);
8126 return make_number (c);
93dec019 8127}
4ed46869 8128
48b0f3ae 8129
4ed46869 8130DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8131 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
8132Return the corresponding code in SJIS. */)
8133 (ch)
df7492f9 8134 Lisp_Object ch;
4ed46869 8135{
df7492f9
KH
8136 Lisp_Object spec, attrs, charset_list;
8137 int c;
8138 struct charset *charset;
8139 unsigned code;
48b0f3ae 8140
df7492f9
KH
8141 CHECK_CHARACTER (ch);
8142 c = XFASTINT (ch);
8143 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8144 attrs = AREF (spec, 0);
8145
8146 if (ASCII_CHAR_P (c)
8147 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8148 return ch;
8149
8150 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8151 charset = char_charset (c, charset_list, &code);
8152 if (code == CHARSET_INVALID_CODE (charset))
8153 error ("Can't encode by shift_jis encoding: %d", c);
8154 JIS_TO_SJIS (code);
8155
8156 return make_number (code);
4ed46869
KH
8157}
8158
8159DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8160 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8161Return the corresponding character. */)
8162 (code)
4ed46869 8163 Lisp_Object code;
d46c5b12 8164{
df7492f9
KH
8165 Lisp_Object spec, attrs, val;
8166 struct charset *charset_roman, *charset_big5, *charset;
8167 int c;
6289dd10 8168
df7492f9
KH
8169 CHECK_NATNUM (code);
8170 c = XFASTINT (code);
8171 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8172 attrs = AREF (spec, 0);
4ed46869 8173
df7492f9
KH
8174 if (ASCII_BYTE_P (c)
8175 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8176 return code;
6289dd10 8177
df7492f9
KH
8178 val = CODING_ATTR_CHARSET_LIST (attrs);
8179 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8180 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8181
df7492f9
KH
8182 if (c <= 0x7F)
8183 charset = charset_roman;
c28a9453
KH
8184 else
8185 {
df7492f9
KH
8186 int b1 = c >> 8, b2 = c & 0x7F;
8187 if (b1 < 0xA1 || b1 > 0xFE
8188 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8189 error ("Invalid code: %d", code);
8190 charset = charset_big5;
c28a9453 8191 }
df7492f9
KH
8192 c = DECODE_CHAR (charset, (unsigned )c);
8193 if (c < 0)
8194 error ("Invalid code: %d", code);
8195 return make_number (c);
d46c5b12 8196}
6289dd10 8197
4ed46869 8198DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8199 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8200Return the corresponding character code in Big5. */)
8201 (ch)
4ed46869
KH
8202 Lisp_Object ch;
8203{
df7492f9
KH
8204 Lisp_Object spec, attrs, charset_list;
8205 struct charset *charset;
8206 int c;
8207 unsigned code;
8208
8209 CHECK_CHARACTER (ch);
8210 c = XFASTINT (ch);
8211 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8212 attrs = AREF (spec, 0);
8213 if (ASCII_CHAR_P (c)
8214 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8215 return ch;
8216
8217 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8218 charset = char_charset (c, charset_list, &code);
8219 if (code == CHARSET_INVALID_CODE (charset))
8220 error ("Can't encode by Big5 encoding: %d", c);
8221
8222 return make_number (code);
4ed46869 8223}
48b0f3ae 8224
3a73fa5d 8225\f
1ba9e4ab
KH
8226DEFUN ("set-terminal-coding-system-internal",
8227 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
8228 Sset_terminal_coding_system_internal, 1, 1, 0,
8229 doc: /* Internal use only. */)
8230 (coding_system)
b74e4686 8231 Lisp_Object coding_system;
4ed46869 8232{
b7826503 8233 CHECK_SYMBOL (coding_system);
df7492f9
KH
8234 setup_coding_system (Fcheck_coding_system (coding_system),
8235 &terminal_coding);
48b0f3ae 8236
70c22245 8237 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
8238 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8239 /* Characer composition should be disabled. */
8240 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8241 terminal_coding.src_multibyte = 1;
8242 terminal_coding.dst_multibyte = 0;
4ed46869
KH
8243 return Qnil;
8244}
8245
c4825358
KH
8246DEFUN ("set-safe-terminal-coding-system-internal",
8247 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8248 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8249 doc: /* Internal use only. */)
48b0f3ae 8250 (coding_system)
b74e4686 8251 Lisp_Object coding_system;
d46c5b12 8252{
b7826503 8253 CHECK_SYMBOL (coding_system);
c4825358
KH
8254 setup_coding_system (Fcheck_coding_system (coding_system),
8255 &safe_terminal_coding);
df7492f9
KH
8256 /* Characer composition should be disabled. */
8257 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8258 safe_terminal_coding.src_multibyte = 1;
8259 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8260 return Qnil;
8261}
4ed46869 8262
4ed46869
KH
8263DEFUN ("terminal-coding-system",
8264 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
8265 doc: /* Return coding system specified for terminal output. */)
8266 ()
4ed46869 8267{
ae6f73fa
KH
8268 Lisp_Object coding_system;
8269
8270 coding_system = CODING_ID_NAME (terminal_coding.id);
8271 /* For backward compatibility, return nil if it is `undecided'. */
f75c90a9 8272 return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
4ed46869
KH
8273}
8274
1ba9e4ab
KH
8275DEFUN ("set-keyboard-coding-system-internal",
8276 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
8277 Sset_keyboard_coding_system_internal, 1, 1, 0,
8278 doc: /* Internal use only. */)
8279 (coding_system)
4ed46869
KH
8280 Lisp_Object coding_system;
8281{
b7826503 8282 CHECK_SYMBOL (coding_system);
df7492f9
KH
8283 setup_coding_system (Fcheck_coding_system (coding_system),
8284 &keyboard_coding);
8285 /* Characer composition should be disabled. */
8286 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8287 return Qnil;
8288}
8289
8290DEFUN ("keyboard-coding-system",
8291 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
8292 doc: /* Return coding system specified for decoding keyboard input. */)
8293 ()
4ed46869 8294{
df7492f9 8295 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
8296}
8297
4ed46869 8298\f
a5d301df
KH
8299DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8300 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8301 doc: /* Choose a coding system for an operation based on the target name.
8302The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8303DECODING-SYSTEM is the coding system to use for decoding
8304\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8305for encoding (in case OPERATION does encoding).
05e6f5dc 8306
48b0f3ae
PJ
8307The first argument OPERATION specifies an I/O primitive:
8308 For file I/O, `insert-file-contents' or `write-region'.
8309 For process I/O, `call-process', `call-process-region', or `start-process'.
8310 For network I/O, `open-network-stream'.
05e6f5dc 8311
48b0f3ae
PJ
8312The remaining arguments should be the same arguments that were passed
8313to the primitive. Depending on which primitive, one of those arguments
8314is selected as the TARGET. For example, if OPERATION does file I/O,
8315whichever argument specifies the file name is TARGET.
05e6f5dc 8316
48b0f3ae 8317TARGET has a meaning which depends on OPERATION:
b883cdb2 8318 For file I/O, TARGET is a file name (except for the special case below).
48b0f3ae
PJ
8319 For process I/O, TARGET is a process name.
8320 For network I/O, TARGET is a service name or a port number
05e6f5dc 8321
48b0f3ae
PJ
8322This function looks up what specified for TARGET in,
8323`file-coding-system-alist', `process-coding-system-alist',
8324or `network-coding-system-alist' depending on OPERATION.
8325They may specify a coding system, a cons of coding systems,
8326or a function symbol to call.
8327In the last case, we call the function with one argument,
8328which is a list of all the arguments given to this function.
8329
b883cdb2
MB
8330If OPERATION is `insert-file-contents', the argument corresponding to
8331TARGET may be a cons (FILENAME . BUFFER). In that case, FILENAME is a
8332file name to look up, and BUFFER is a buffer that contains the file's
8333contents (not yet decoded). If `file-coding-system-alist' specifies a
8334function to call for FILENAME, that function should examine the
8335contents of BUFFER instead of reading the file.
8336
48b0f3ae
PJ
8337usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8338 (nargs, args)
4ed46869
KH
8339 int nargs;
8340 Lisp_Object *args;
6b89e3aa 8341{
4ed46869
KH
8342 Lisp_Object operation, target_idx, target, val;
8343 register Lisp_Object chain;
177c0ea7 8344
4ed46869
KH
8345 if (nargs < 2)
8346 error ("Too few arguments");
8347 operation = args[0];
8348 if (!SYMBOLP (operation)
8349 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8350 error ("Invalid first arguement");
4ed46869
KH
8351 if (nargs < 1 + XINT (target_idx))
8352 error ("Too few arguments for operation: %s",
8f924df7 8353 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8354 target = args[XINT (target_idx) + 1];
8355 if (!(STRINGP (target)
091a0ff0
KH
8356 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8357 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
4ed46869 8358 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8359 error ("Invalid %dth argument", XINT (target_idx) + 1);
091a0ff0
KH
8360 if (CONSP (target))
8361 target = XCAR (target);
4ed46869 8362
2e34157c
RS
8363 chain = ((EQ (operation, Qinsert_file_contents)
8364 || EQ (operation, Qwrite_region))
02ba4723 8365 ? Vfile_coding_system_alist
2e34157c 8366 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8367 ? Vnetwork_coding_system_alist
8368 : Vprocess_coding_system_alist));
4ed46869
KH
8369 if (NILP (chain))
8370 return Qnil;
8371
03699b14 8372 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8373 {
f44d27ce 8374 Lisp_Object elt;
6b89e3aa 8375
df7492f9 8376 elt = XCAR (chain);
4ed46869
KH
8377 if (CONSP (elt)
8378 && ((STRINGP (target)
03699b14
KR
8379 && STRINGP (XCAR (elt))
8380 && fast_string_match (XCAR (elt), target) >= 0)
8381 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8382 {
03699b14 8383 val = XCDR (elt);
b19fd4c5
KH
8384 /* Here, if VAL is both a valid coding system and a valid
8385 function symbol, we return VAL as a coding system. */
02ba4723
KH
8386 if (CONSP (val))
8387 return val;
8388 if (! SYMBOLP (val))
8389 return Qnil;
8390 if (! NILP (Fcoding_system_p (val)))
8391 return Fcons (val, val);
b19fd4c5 8392 if (! NILP (Ffboundp (val)))
6b89e3aa 8393 {
b883cdb2 8394 val = safe_call1 (val, Flist (nargs, args));
b19fd4c5
KH
8395 if (CONSP (val))
8396 return val;
8397 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8398 return Fcons (val, val);
6b89e3aa 8399 }
02ba4723 8400 return Qnil;
6b89e3aa
KH
8401 }
8402 }
4ed46869 8403 return Qnil;
6b89e3aa
KH
8404}
8405
df7492f9 8406DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8407 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8408 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8409If multiple coding systems belongs to the same category,
a3181084
DL
8410all but the first one are ignored.
8411
8412usage: (set-coding-system-priority ...) */)
df7492f9
KH
8413 (nargs, args)
8414 int nargs;
8415 Lisp_Object *args;
8416{
8417 int i, j;
8418 int changed[coding_category_max];
8419 enum coding_category priorities[coding_category_max];
8420
8421 bzero (changed, sizeof changed);
6b89e3aa 8422
df7492f9 8423 for (i = j = 0; i < nargs; i++)
6b89e3aa 8424 {
df7492f9
KH
8425 enum coding_category category;
8426 Lisp_Object spec, attrs;
6b89e3aa 8427
df7492f9
KH
8428 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8429 attrs = AREF (spec, 0);
8430 category = XINT (CODING_ATTR_CATEGORY (attrs));
8431 if (changed[category])
8432 /* Ignore this coding system because a coding system of the
8433 same category already had a higher priority. */
8434 continue;
8435 changed[category] = 1;
8436 priorities[j++] = category;
8437 if (coding_categories[category].id >= 0
8438 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8439 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8440 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8441 }
6b89e3aa 8442
df7492f9
KH
8443 /* Now we have decided top J priorities. Reflect the order of the
8444 original priorities to the remaining priorities. */
6b89e3aa 8445
df7492f9 8446 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8447 {
df7492f9
KH
8448 while (j < coding_category_max
8449 && changed[coding_priorities[j]])
8450 j++;
8451 if (j == coding_category_max)
8452 abort ();
8453 priorities[i] = coding_priorities[j];
8454 }
6b89e3aa 8455
df7492f9 8456 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8457
ff563fce
KH
8458 /* Update `coding-category-list'. */
8459 Vcoding_category_list = Qnil;
8460 for (i = coding_category_max - 1; i >= 0; i--)
8461 Vcoding_category_list
8462 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8463 Vcoding_category_list);
6b89e3aa 8464
df7492f9 8465 return Qnil;
6b89e3aa
KH
8466}
8467
df7492f9
KH
8468DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8469 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8470 doc: /* Return a list of coding systems ordered by their priorities.
8471HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8472 (highestp)
8473 Lisp_Object highestp;
d46c5b12
KH
8474{
8475 int i;
df7492f9 8476 Lisp_Object val;
6b89e3aa 8477
df7492f9 8478 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8479 {
df7492f9
KH
8480 enum coding_category category = coding_priorities[i];
8481 int id = coding_categories[category].id;
8482 Lisp_Object attrs;
068a9dbd 8483
df7492f9
KH
8484 if (id < 0)
8485 continue;
8486 attrs = CODING_ID_ATTRS (id);
8487 if (! NILP (highestp))
8488 return CODING_ATTR_BASE_NAME (attrs);
8489 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8490 }
8491 return Fnreverse (val);
8492}
068a9dbd 8493
f0064e1f 8494static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8495
8496static Lisp_Object
df7492f9
KH
8497make_subsidiaries (base)
8498 Lisp_Object base;
068a9dbd 8499{
df7492f9 8500 Lisp_Object subsidiaries;
8f924df7 8501 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8502 char *buf = (char *) alloca (base_name_len + 6);
8503 int i;
068a9dbd 8504
8f924df7 8505 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8506 subsidiaries = Fmake_vector (make_number (3), Qnil);
8507 for (i = 0; i < 3; i++)
068a9dbd 8508 {
df7492f9
KH
8509 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8510 ASET (subsidiaries, i, intern (buf));
068a9dbd 8511 }
df7492f9 8512 return subsidiaries;
068a9dbd
KH
8513}
8514
8515
df7492f9
KH
8516DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8517 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8518 doc: /* For internal use only.
8519usage: (define-coding-system-internal ...) */)
df7492f9
KH
8520 (nargs, args)
8521 int nargs;
8522 Lisp_Object *args;
068a9dbd 8523{
df7492f9
KH
8524 Lisp_Object name;
8525 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8526 Lisp_Object attrs; /* Vector of attributes. */
8527 Lisp_Object eol_type;
8528 Lisp_Object aliases;
8529 Lisp_Object coding_type, charset_list, safe_charsets;
8530 enum coding_category category;
8531 Lisp_Object tail, val;
8532 int max_charset_id = 0;
8533 int i;
068a9dbd 8534
df7492f9
KH
8535 if (nargs < coding_arg_max)
8536 goto short_args;
068a9dbd 8537
df7492f9 8538 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8539
df7492f9
KH
8540 name = args[coding_arg_name];
8541 CHECK_SYMBOL (name);
8542 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8543
df7492f9
KH
8544 val = args[coding_arg_mnemonic];
8545 if (! STRINGP (val))
8546 CHECK_CHARACTER (val);
8547 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8548
df7492f9
KH
8549 coding_type = args[coding_arg_coding_type];
8550 CHECK_SYMBOL (coding_type);
8551 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8552
df7492f9
KH
8553 charset_list = args[coding_arg_charset_list];
8554 if (SYMBOLP (charset_list))
8555 {
8556 if (EQ (charset_list, Qiso_2022))
8557 {
8558 if (! EQ (coding_type, Qiso_2022))
8559 error ("Invalid charset-list");
8560 charset_list = Viso_2022_charset_list;
8561 }
8562 else if (EQ (charset_list, Qemacs_mule))
8563 {
8564 if (! EQ (coding_type, Qemacs_mule))
8565 error ("Invalid charset-list");
8566 charset_list = Vemacs_mule_charset_list;
8567 }
8568 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8569 if (max_charset_id < XFASTINT (XCAR (tail)))
8570 max_charset_id = XFASTINT (XCAR (tail));
8571 }
068a9dbd
KH
8572 else
8573 {
df7492f9
KH
8574 charset_list = Fcopy_sequence (charset_list);
8575 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8576 {
df7492f9
KH
8577 struct charset *charset;
8578
8579 val = Fcar (tail);
8580 CHECK_CHARSET_GET_CHARSET (val, charset);
8581 if (EQ (coding_type, Qiso_2022)
8582 ? CHARSET_ISO_FINAL (charset) < 0
8583 : EQ (coding_type, Qemacs_mule)
8584 ? CHARSET_EMACS_MULE_ID (charset) < 0
8585 : 0)
8586 error ("Can't handle charset `%s'",
8f924df7 8587 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8588
8f924df7 8589 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8590 if (max_charset_id < charset->id)
8591 max_charset_id = charset->id;
068a9dbd
KH
8592 }
8593 }
df7492f9 8594 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8595
df7492f9
KH
8596 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8597 make_number (255));
8598 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8599 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8600 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8601
584948ac 8602 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8603
df7492f9 8604 val = args[coding_arg_decode_translation_table];
a6f87d34 8605 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8606 CHECK_SYMBOL (val);
df7492f9 8607 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8608
df7492f9 8609 val = args[coding_arg_encode_translation_table];
a6f87d34 8610 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8611 CHECK_SYMBOL (val);
df7492f9 8612 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8613
df7492f9
KH
8614 val = args[coding_arg_post_read_conversion];
8615 CHECK_SYMBOL (val);
8616 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8617
df7492f9
KH
8618 val = args[coding_arg_pre_write_conversion];
8619 CHECK_SYMBOL (val);
8620 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8621
df7492f9
KH
8622 val = args[coding_arg_default_char];
8623 if (NILP (val))
8624 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8625 else
8626 {
8f924df7 8627 CHECK_CHARACTER (val);
df7492f9
KH
8628 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8629 }
4031e2bf 8630
8f924df7
KH
8631 val = args[coding_arg_for_unibyte];
8632 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8633
df7492f9
KH
8634 val = args[coding_arg_plist];
8635 CHECK_LIST (val);
8636 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8637
df7492f9
KH
8638 if (EQ (coding_type, Qcharset))
8639 {
c7c66a95
KH
8640 /* Generate a lisp vector of 256 elements. Each element is nil,
8641 integer, or a list of charset IDs.
3a73fa5d 8642
c7c66a95
KH
8643 If Nth element is nil, the byte code N is invalid in this
8644 coding system.
4ed46869 8645
c7c66a95
KH
8646 If Nth element is a number NUM, N is the first byte of a
8647 charset whose ID is NUM.
4ed46869 8648
c7c66a95
KH
8649 If Nth element is a list of charset IDs, N is the first byte
8650 of one of them. The list is sorted by dimensions of the
2bc515e4 8651 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8652 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8653
5c99c2e6 8654 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8655 {
c7c66a95
KH
8656 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8657 int dim = CHARSET_DIMENSION (charset);
8658 int idx = (dim - 1) * 4;
4ed46869 8659
5c99c2e6 8660 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8661 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8662
15d143f7
KH
8663 for (i = charset->code_space[idx];
8664 i <= charset->code_space[idx + 1]; i++)
8665 {
c7c66a95
KH
8666 Lisp_Object tmp, tmp2;
8667 int dim2;
ec6d2bb8 8668
c7c66a95
KH
8669 tmp = AREF (val, i);
8670 if (NILP (tmp))
8671 tmp = XCAR (tail);
8672 else if (NUMBERP (tmp))
8673 {
8674 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8675 if (dim < dim2)
c7c66a95 8676 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8677 else
8678 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8679 }
15d143f7 8680 else
c7c66a95
KH
8681 {
8682 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8683 {
8684 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8685 if (dim < dim2)
8686 break;
8687 }
8688 if (NILP (tmp2))
8689 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8690 else
8691 {
8692 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8693 XSETCAR (tmp2, XCAR (tail));
8694 }
8695 }
8696 ASET (val, i, tmp);
15d143f7 8697 }
df7492f9
KH
8698 }
8699 ASET (attrs, coding_attr_charset_valids, val);
8700 category = coding_category_charset;
8701 }
8702 else if (EQ (coding_type, Qccl))
8703 {
8704 Lisp_Object valids;
ecec61c1 8705
df7492f9
KH
8706 if (nargs < coding_arg_ccl_max)
8707 goto short_args;
ecec61c1 8708
df7492f9
KH
8709 val = args[coding_arg_ccl_decoder];
8710 CHECK_CCL_PROGRAM (val);
8711 if (VECTORP (val))
8712 val = Fcopy_sequence (val);
8713 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8714
df7492f9
KH
8715 val = args[coding_arg_ccl_encoder];
8716 CHECK_CCL_PROGRAM (val);
8717 if (VECTORP (val))
8718 val = Fcopy_sequence (val);
8719 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8720
df7492f9
KH
8721 val = args[coding_arg_ccl_valids];
8722 valids = Fmake_string (make_number (256), make_number (0));
8723 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8724 {
8dcbea82 8725 int from, to;
ecec61c1 8726
df7492f9
KH
8727 val = Fcar (tail);
8728 if (INTEGERP (val))
8dcbea82
KH
8729 {
8730 from = to = XINT (val);
8731 if (from < 0 || from > 255)
8732 args_out_of_range_3 (val, make_number (0), make_number (255));
8733 }
df7492f9
KH
8734 else
8735 {
df7492f9 8736 CHECK_CONS (val);
8f924df7
KH
8737 CHECK_NATNUM_CAR (val);
8738 CHECK_NATNUM_CDR (val);
df7492f9 8739 from = XINT (XCAR (val));
8f924df7 8740 if (from > 255)
8dcbea82
KH
8741 args_out_of_range_3 (XCAR (val),
8742 make_number (0), make_number (255));
df7492f9 8743 to = XINT (XCDR (val));
8dcbea82
KH
8744 if (to < from || to > 255)
8745 args_out_of_range_3 (XCDR (val),
8746 XCAR (val), make_number (255));
df7492f9 8747 }
8dcbea82 8748 for (i = from; i <= to; i++)
8f924df7 8749 SSET (valids, i, 1);
df7492f9
KH
8750 }
8751 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8752
df7492f9 8753 category = coding_category_ccl;
55ab7be3 8754 }
df7492f9 8755 else if (EQ (coding_type, Qutf_16))
55ab7be3 8756 {
df7492f9 8757 Lisp_Object bom, endian;
4ed46869 8758
584948ac 8759 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8760
df7492f9
KH
8761 if (nargs < coding_arg_utf16_max)
8762 goto short_args;
4ed46869 8763
df7492f9
KH
8764 bom = args[coding_arg_utf16_bom];
8765 if (! NILP (bom) && ! EQ (bom, Qt))
8766 {
8767 CHECK_CONS (bom);
8f924df7
KH
8768 val = XCAR (bom);
8769 CHECK_CODING_SYSTEM (val);
8770 val = XCDR (bom);
8771 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8772 }
8773 ASET (attrs, coding_attr_utf_16_bom, bom);
8774
8775 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8776 CHECK_SYMBOL (endian);
8777 if (NILP (endian))
8778 endian = Qbig;
8779 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8780 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8781 ASET (attrs, coding_attr_utf_16_endian, endian);
8782
8783 category = (CONSP (bom)
8784 ? coding_category_utf_16_auto
8785 : NILP (bom)
b49a1807 8786 ? (EQ (endian, Qbig)
df7492f9
KH
8787 ? coding_category_utf_16_be_nosig
8788 : coding_category_utf_16_le_nosig)
b49a1807 8789 : (EQ (endian, Qbig)
df7492f9
KH
8790 ? coding_category_utf_16_be
8791 : coding_category_utf_16_le));
8792 }
8793 else if (EQ (coding_type, Qiso_2022))
8794 {
8795 Lisp_Object initial, reg_usage, request, flags;
4776e638 8796 int i;
1397dc18 8797
df7492f9
KH
8798 if (nargs < coding_arg_iso2022_max)
8799 goto short_args;
8800
8801 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8802 CHECK_VECTOR (initial);
8803 for (i = 0; i < 4; i++)
8804 {
8805 val = Faref (initial, make_number (i));
8806 if (! NILP (val))
8807 {
584948ac
KH
8808 struct charset *charset;
8809
8810 CHECK_CHARSET_GET_CHARSET (val, charset);
8811 ASET (initial, i, make_number (CHARSET_ID (charset)));
8812 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8813 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8814 }
8815 else
8816 ASET (initial, i, make_number (-1));
8817 }
8818
8819 reg_usage = args[coding_arg_iso2022_reg_usage];
8820 CHECK_CONS (reg_usage);
8f924df7
KH
8821 CHECK_NUMBER_CAR (reg_usage);
8822 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8823
8824 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8825 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8826 {
df7492f9 8827 int id;
8f924df7 8828 Lisp_Object tmp;
df7492f9
KH
8829
8830 val = Fcar (tail);
8831 CHECK_CONS (val);
8f924df7
KH
8832 tmp = XCAR (val);
8833 CHECK_CHARSET_GET_ID (tmp, id);
8834 CHECK_NATNUM_CDR (val);
df7492f9
KH
8835 if (XINT (XCDR (val)) >= 4)
8836 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8837 XSETCAR (val, make_number (id));
1397dc18 8838 }
4ed46869 8839
df7492f9
KH
8840 flags = args[coding_arg_iso2022_flags];
8841 CHECK_NATNUM (flags);
8842 i = XINT (flags);
8843 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8844 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8845
8846 ASET (attrs, coding_attr_iso_initial, initial);
8847 ASET (attrs, coding_attr_iso_usage, reg_usage);
8848 ASET (attrs, coding_attr_iso_request, request);
8849 ASET (attrs, coding_attr_iso_flags, flags);
8850 setup_iso_safe_charsets (attrs);
8851
8852 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8853 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8854 | CODING_ISO_FLAG_SINGLE_SHIFT))
8855 ? coding_category_iso_7_else
8856 : EQ (args[coding_arg_charset_list], Qiso_2022)
8857 ? coding_category_iso_7
8858 : coding_category_iso_7_tight);
8859 else
8860 {
8861 int id = XINT (AREF (initial, 1));
8862
c6fb6e98 8863 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8864 || EQ (args[coding_arg_charset_list], Qiso_2022)
8865 || id < 0)
8866 ? coding_category_iso_8_else
8867 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8868 ? coding_category_iso_8_1
8869 : coding_category_iso_8_2);
8870 }
0ce7886f
KH
8871 if (category != coding_category_iso_8_1
8872 && category != coding_category_iso_8_2)
8873 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8874 }
8875 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8876 {
df7492f9
KH
8877 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8878 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8879 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8880 category = coding_category_emacs_mule;
c28a9453 8881 }
df7492f9 8882 else if (EQ (coding_type, Qshift_jis))
c28a9453 8883 {
df7492f9
KH
8884
8885 struct charset *charset;
8886
7d64c6ad 8887 if (XINT (Flength (charset_list)) != 3
6e07c25f 8888 && XINT (Flength (charset_list)) != 4)
7d64c6ad 8889 error ("There should be three or four charsets");
df7492f9
KH
8890
8891 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8892 if (CHARSET_DIMENSION (charset) != 1)
8893 error ("Dimension of charset %s is not one",
8f924df7 8894 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8895 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8896 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8897
8898 charset_list = XCDR (charset_list);
8899 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8900 if (CHARSET_DIMENSION (charset) != 1)
8901 error ("Dimension of charset %s is not one",
8f924df7 8902 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8903
8904 charset_list = XCDR (charset_list);
8905 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8906 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
8907 error ("Dimension of charset %s is not two",
8908 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8909
8910 charset_list = XCDR (charset_list);
2b917a06
KH
8911 if (! NILP (charset_list))
8912 {
8913 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8914 if (CHARSET_DIMENSION (charset) != 2)
8915 error ("Dimension of charset %s is not two",
8916 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8917 }
df7492f9
KH
8918
8919 category = coding_category_sjis;
8920 Vsjis_coding_system = name;
c28a9453 8921 }
df7492f9
KH
8922 else if (EQ (coding_type, Qbig5))
8923 {
8924 struct charset *charset;
4ed46869 8925
df7492f9
KH
8926 if (XINT (Flength (charset_list)) != 2)
8927 error ("There should be just two charsets");
8928
8929 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8930 if (CHARSET_DIMENSION (charset) != 1)
8931 error ("Dimension of charset %s is not one",
8f924df7 8932 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8933 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8934 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8935
8936 charset_list = XCDR (charset_list);
8937 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8938 if (CHARSET_DIMENSION (charset) != 2)
8939 error ("Dimension of charset %s is not two",
8f924df7 8940 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8941
df7492f9
KH
8942 category = coding_category_big5;
8943 Vbig5_coding_system = name;
8944 }
8945 else if (EQ (coding_type, Qraw_text))
c28a9453 8946 {
584948ac
KH
8947 category = coding_category_raw_text;
8948 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8949 }
df7492f9 8950 else if (EQ (coding_type, Qutf_8))
4ed46869 8951 {
584948ac
KH
8952 category = coding_category_utf_8;
8953 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8954 }
df7492f9
KH
8955 else if (EQ (coding_type, Qundecided))
8956 category = coding_category_undecided;
4ed46869 8957 else
df7492f9 8958 error ("Invalid coding system type: %s",
8f924df7 8959 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8960
df7492f9 8961 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8962 CODING_ATTR_PLIST (attrs)
8963 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8964 CODING_ATTR_PLIST (attrs)));
35befdaa
KH
8965 CODING_ATTR_PLIST (attrs)
8966 = Fcons (QCascii_compatible_p,
8967 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
8968 CODING_ATTR_PLIST (attrs)));
c4825358 8969
df7492f9
KH
8970 eol_type = args[coding_arg_eol_type];
8971 if (! NILP (eol_type)
8972 && ! EQ (eol_type, Qunix)
8973 && ! EQ (eol_type, Qdos)
8974 && ! EQ (eol_type, Qmac))
8975 error ("Invalid eol-type");
4ed46869 8976
df7492f9 8977 aliases = Fcons (name, Qnil);
4ed46869 8978
df7492f9
KH
8979 if (NILP (eol_type))
8980 {
8981 eol_type = make_subsidiaries (name);
8982 for (i = 0; i < 3; i++)
1397dc18 8983 {
df7492f9
KH
8984 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8985
8986 this_name = AREF (eol_type, i);
8987 this_aliases = Fcons (this_name, Qnil);
8988 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8989 this_spec = Fmake_vector (make_number (3), attrs);
8990 ASET (this_spec, 1, this_aliases);
8991 ASET (this_spec, 2, this_eol_type);
8992 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8993 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
8994 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
8995 if (NILP (val))
8996 Vcoding_system_alist
8997 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8998 Vcoding_system_alist);
1397dc18 8999 }
d46c5b12 9000 }
4ed46869 9001
df7492f9
KH
9002 spec_vec = Fmake_vector (make_number (3), attrs);
9003 ASET (spec_vec, 1, aliases);
9004 ASET (spec_vec, 2, eol_type);
48b0f3ae 9005
df7492f9
KH
9006 Fputhash (name, spec_vec, Vcoding_system_hash_table);
9007 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
9008 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9009 if (NILP (val))
9010 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9011 Vcoding_system_alist);
48b0f3ae 9012
df7492f9
KH
9013 {
9014 int id = coding_categories[category].id;
48b0f3ae 9015
df7492f9
KH
9016 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9017 setup_coding_system (name, &coding_categories[category]);
9018 }
48b0f3ae 9019
d46c5b12 9020 return Qnil;
48b0f3ae 9021
df7492f9
KH
9022 short_args:
9023 return Fsignal (Qwrong_number_of_arguments,
9024 Fcons (intern ("define-coding-system-internal"),
9025 make_number (nargs)));
d46c5b12 9026}
4ed46869 9027
d6925f38 9028
a6f87d34
KH
9029DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9030 3, 3, 0,
9031 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
9032 (coding_system, prop, val)
9033 Lisp_Object coding_system, prop, val;
9034{
3dbe7859 9035 Lisp_Object spec, attrs;
a6f87d34
KH
9036
9037 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9038 attrs = AREF (spec, 0);
9039 if (EQ (prop, QCmnemonic))
9040 {
9041 if (! STRINGP (val))
9042 CHECK_CHARACTER (val);
9043 CODING_ATTR_MNEMONIC (attrs) = val;
9044 }
9045 else if (EQ (prop, QCdefalut_char))
9046 {
9047 if (NILP (val))
9048 val = make_number (' ');
9049 else
9050 CHECK_CHARACTER (val);
9051 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9052 }
9053 else if (EQ (prop, QCdecode_translation_table))
9054 {
9055 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9056 CHECK_SYMBOL (val);
9057 CODING_ATTR_DECODE_TBL (attrs) = val;
9058 }
9059 else if (EQ (prop, QCencode_translation_table))
9060 {
9061 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9062 CHECK_SYMBOL (val);
9063 CODING_ATTR_ENCODE_TBL (attrs) = val;
9064 }
9065 else if (EQ (prop, QCpost_read_conversion))
9066 {
9067 CHECK_SYMBOL (val);
9068 CODING_ATTR_POST_READ (attrs) = val;
9069 }
9070 else if (EQ (prop, QCpre_write_conversion))
9071 {
9072 CHECK_SYMBOL (val);
9073 CODING_ATTR_PRE_WRITE (attrs) = val;
9074 }
35befdaa
KH
9075 else if (EQ (prop, QCascii_compatible_p))
9076 {
9077 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9078 }
a6f87d34
KH
9079
9080 CODING_ATTR_PLIST (attrs)
9081 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9082 return val;
9083}
9084
9085
df7492f9
KH
9086DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9087 Sdefine_coding_system_alias, 2, 2, 0,
9088 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9089 (alias, coding_system)
9090 Lisp_Object alias, coding_system;
66cfb530 9091{
583f71ca 9092 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9093
df7492f9
KH
9094 CHECK_SYMBOL (alias);
9095 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9096 aliases = AREF (spec, 1);
d6925f38
KH
9097 /* ALISES should be a list of length more than zero, and the first
9098 element is a base coding system. Append ALIAS at the tail of the
9099 list. */
df7492f9
KH
9100 while (!NILP (XCDR (aliases)))
9101 aliases = XCDR (aliases);
8f924df7 9102 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9103
df7492f9
KH
9104 eol_type = AREF (spec, 2);
9105 if (VECTORP (eol_type))
4ed46869 9106 {
df7492f9
KH
9107 Lisp_Object subsidiaries;
9108 int i;
4ed46869 9109
df7492f9
KH
9110 subsidiaries = make_subsidiaries (alias);
9111 for (i = 0; i < 3; i++)
9112 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9113 AREF (eol_type, i));
4ed46869 9114 }
df7492f9
KH
9115
9116 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9117 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9118 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9119 if (NILP (val))
9120 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9121 Vcoding_system_alist);
66cfb530 9122
4ed46869
KH
9123 return Qnil;
9124}
9125
df7492f9
KH
9126DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9127 1, 1, 0,
9128 doc: /* Return the base of CODING-SYSTEM.
da7db224 9129Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9130 (coding_system)
9131 Lisp_Object coding_system;
d46c5b12 9132{
df7492f9 9133 Lisp_Object spec, attrs;
d46c5b12 9134
df7492f9
KH
9135 if (NILP (coding_system))
9136 return (Qno_conversion);
9137 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9138 attrs = AREF (spec, 0);
9139 return CODING_ATTR_BASE_NAME (attrs);
9140}
1397dc18 9141
df7492f9
KH
9142DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9143 1, 1, 0,
9144 doc: "Return the property list of CODING-SYSTEM.")
9145 (coding_system)
9146 Lisp_Object coding_system;
9147{
9148 Lisp_Object spec, attrs;
1397dc18 9149
df7492f9
KH
9150 if (NILP (coding_system))
9151 coding_system = Qno_conversion;
9152 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9153 attrs = AREF (spec, 0);
9154 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9155}
9156
df7492f9
KH
9157
9158DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9159 1, 1, 0,
da7db224 9160 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9161 (coding_system)
9162 Lisp_Object coding_system;
66cfb530 9163{
df7492f9 9164 Lisp_Object spec;
84d60297 9165
df7492f9
KH
9166 if (NILP (coding_system))
9167 coding_system = Qno_conversion;
9168 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9169 return AREF (spec, 1);
df7492f9 9170}
66cfb530 9171
df7492f9
KH
9172DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9173 Scoding_system_eol_type, 1, 1, 0,
9174 doc: /* Return eol-type of CODING-SYSTEM.
9175An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 9176
df7492f9
KH
9177Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9178and CR respectively.
66cfb530 9179
df7492f9
KH
9180A vector value indicates that a format of end-of-line should be
9181detected automatically. Nth element of the vector is the subsidiary
9182coding system whose eol-type is N. */)
6b89e3aa
KH
9183 (coding_system)
9184 Lisp_Object coding_system;
9185{
df7492f9
KH
9186 Lisp_Object spec, eol_type;
9187 int n;
6b89e3aa 9188
df7492f9
KH
9189 if (NILP (coding_system))
9190 coding_system = Qno_conversion;
9191 if (! CODING_SYSTEM_P (coding_system))
9192 return Qnil;
9193 spec = CODING_SYSTEM_SPEC (coding_system);
9194 eol_type = AREF (spec, 2);
9195 if (VECTORP (eol_type))
9196 return Fcopy_sequence (eol_type);
9197 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9198 return make_number (n);
6b89e3aa
KH
9199}
9200
4ed46869
KH
9201#endif /* emacs */
9202
9203\f
1397dc18 9204/*** 9. Post-amble ***/
4ed46869 9205
dfcf069d 9206void
4ed46869
KH
9207init_coding_once ()
9208{
9209 int i;
9210
df7492f9
KH
9211 for (i = 0; i < coding_category_max; i++)
9212 {
9213 coding_categories[i].id = -1;
9214 coding_priorities[i] = i;
9215 }
4ed46869
KH
9216
9217 /* ISO2022 specific initialize routine. */
9218 for (i = 0; i < 0x20; i++)
b73bfc1c 9219 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9220 for (i = 0x21; i < 0x7F; i++)
9221 iso_code_class[i] = ISO_graphic_plane_0;
9222 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9223 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9224 for (i = 0xA1; i < 0xFF; i++)
9225 iso_code_class[i] = ISO_graphic_plane_1;
9226 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9227 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9228 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9229 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9230 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9231 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9232 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9233 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9234 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9235
df7492f9
KH
9236 for (i = 0; i < 256; i++)
9237 {
9238 emacs_mule_bytes[i] = 1;
9239 }
7c78e542
KH
9240 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9241 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9242 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9243 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9244}
9245
9246#ifdef emacs
9247
dfcf069d 9248void
e0e989f6
KH
9249syms_of_coding ()
9250{
df7492f9 9251 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9252 {
9253 Lisp_Object args[2];
9254 args[0] = QCtest;
9255 args[1] = Qeq;
9256 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9257 }
df7492f9
KH
9258
9259 staticpro (&Vsjis_coding_system);
9260 Vsjis_coding_system = Qnil;
e0e989f6 9261
df7492f9
KH
9262 staticpro (&Vbig5_coding_system);
9263 Vbig5_coding_system = Qnil;
9264
24a73b0a
KH
9265 staticpro (&Vcode_conversion_reused_workbuf);
9266 Vcode_conversion_reused_workbuf = Qnil;
9267
9268 staticpro (&Vcode_conversion_workbuf_name);
9269 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9270
24a73b0a 9271 reused_workbuf_in_use = 0;
df7492f9
KH
9272
9273 DEFSYM (Qcharset, "charset");
9274 DEFSYM (Qtarget_idx, "target-idx");
9275 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9276 Fset (Qcoding_system_history, Qnil);
9277
9ce27fde 9278 /* Target FILENAME is the first argument. */
e0e989f6 9279 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9280 /* Target FILENAME is the third argument. */
e0e989f6
KH
9281 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9282
df7492f9 9283 DEFSYM (Qcall_process, "call-process");
9ce27fde 9284 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9285 Fput (Qcall_process, Qtarget_idx, make_number (0));
9286
df7492f9 9287 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9288 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9289 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9290
df7492f9 9291 DEFSYM (Qstart_process, "start-process");
9ce27fde 9292 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9293 Fput (Qstart_process, Qtarget_idx, make_number (2));
9294
df7492f9 9295 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9296 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9297 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9298
df7492f9
KH
9299 DEFSYM (Qcoding_system, "coding-system");
9300 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9301
df7492f9
KH
9302 DEFSYM (Qeol_type, "eol-type");
9303 DEFSYM (Qunix, "unix");
9304 DEFSYM (Qdos, "dos");
4ed46869 9305
df7492f9
KH
9306 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9307 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9308 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9309 DEFSYM (Qdefault_char, "default-char");
9310 DEFSYM (Qundecided, "undecided");
9311 DEFSYM (Qno_conversion, "no-conversion");
9312 DEFSYM (Qraw_text, "raw-text");
4ed46869 9313
df7492f9 9314 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9315
df7492f9 9316 DEFSYM (Qutf_8, "utf-8");
8f924df7 9317 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9318
df7492f9 9319 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9320 DEFSYM (Qbig, "big");
9321 DEFSYM (Qlittle, "little");
27901516 9322
df7492f9
KH
9323 DEFSYM (Qshift_jis, "shift-jis");
9324 DEFSYM (Qbig5, "big5");
4ed46869 9325
df7492f9 9326 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9327
df7492f9 9328 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9329 Fput (Qcoding_system_error, Qerror_conditions,
9330 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9331 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9332 build_string ("Invalid coding system"));
4ed46869 9333
05e6f5dc
KH
9334 /* Intern this now in case it isn't already done.
9335 Setting this variable twice is harmless.
9336 But don't staticpro it here--that is done in alloc.c. */
9337 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9338
df7492f9 9339 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9340 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9341 DEFSYM (Qtranslation_table_id, "translation-table-id");
9342 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9343 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9344
df7492f9 9345 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9346
df7492f9 9347 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9348
01378f49 9349 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9350 DEFSYM (QCmnemonic, ":mnemonic");
9351 DEFSYM (QCdefalut_char, ":default-char");
9352 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9353 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9354 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9355 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 9356 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 9357
df7492f9
KH
9358 Vcoding_category_table
9359 = Fmake_vector (make_number (coding_category_max), Qnil);
9360 staticpro (&Vcoding_category_table);
9361 /* Followings are target of code detection. */
9362 ASET (Vcoding_category_table, coding_category_iso_7,
9363 intern ("coding-category-iso-7"));
9364 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9365 intern ("coding-category-iso-7-tight"));
9366 ASET (Vcoding_category_table, coding_category_iso_8_1,
9367 intern ("coding-category-iso-8-1"));
9368 ASET (Vcoding_category_table, coding_category_iso_8_2,
9369 intern ("coding-category-iso-8-2"));
9370 ASET (Vcoding_category_table, coding_category_iso_7_else,
9371 intern ("coding-category-iso-7-else"));
9372 ASET (Vcoding_category_table, coding_category_iso_8_else,
9373 intern ("coding-category-iso-8-else"));
9374 ASET (Vcoding_category_table, coding_category_utf_8,
9375 intern ("coding-category-utf-8"));
9376 ASET (Vcoding_category_table, coding_category_utf_16_be,
9377 intern ("coding-category-utf-16-be"));
ff563fce
KH
9378 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9379 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9380 ASET (Vcoding_category_table, coding_category_utf_16_le,
9381 intern ("coding-category-utf-16-le"));
9382 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9383 intern ("coding-category-utf-16-be-nosig"));
9384 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9385 intern ("coding-category-utf-16-le-nosig"));
9386 ASET (Vcoding_category_table, coding_category_charset,
9387 intern ("coding-category-charset"));
9388 ASET (Vcoding_category_table, coding_category_sjis,
9389 intern ("coding-category-sjis"));
9390 ASET (Vcoding_category_table, coding_category_big5,
9391 intern ("coding-category-big5"));
9392 ASET (Vcoding_category_table, coding_category_ccl,
9393 intern ("coding-category-ccl"));
9394 ASET (Vcoding_category_table, coding_category_emacs_mule,
9395 intern ("coding-category-emacs-mule"));
9396 /* Followings are NOT target of code detection. */
9397 ASET (Vcoding_category_table, coding_category_raw_text,
9398 intern ("coding-category-raw-text"));
9399 ASET (Vcoding_category_table, coding_category_undecided,
9400 intern ("coding-category-undecided"));
ecf488bc 9401
065e3595
KH
9402 DEFSYM (Qinsufficient_source, "insufficient-source");
9403 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9404 DEFSYM (Qinvalid_source, "invalid-source");
9405 DEFSYM (Qinterrupted, "interrupted");
9406 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 9407 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 9408
4ed46869
KH
9409 defsubr (&Scoding_system_p);
9410 defsubr (&Sread_coding_system);
9411 defsubr (&Sread_non_nil_coding_system);
9412 defsubr (&Scheck_coding_system);
9413 defsubr (&Sdetect_coding_region);
d46c5b12 9414 defsubr (&Sdetect_coding_string);
05e6f5dc 9415 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9416 defsubr (&Sunencodable_char_position);
df7492f9 9417 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9418 defsubr (&Sdecode_coding_region);
9419 defsubr (&Sencode_coding_region);
9420 defsubr (&Sdecode_coding_string);
9421 defsubr (&Sencode_coding_string);
9422 defsubr (&Sdecode_sjis_char);
9423 defsubr (&Sencode_sjis_char);
9424 defsubr (&Sdecode_big5_char);
9425 defsubr (&Sencode_big5_char);
1ba9e4ab 9426 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9427 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9428 defsubr (&Sterminal_coding_system);
1ba9e4ab 9429 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9430 defsubr (&Skeyboard_coding_system);
a5d301df 9431 defsubr (&Sfind_operation_coding_system);
df7492f9 9432 defsubr (&Sset_coding_system_priority);
6b89e3aa 9433 defsubr (&Sdefine_coding_system_internal);
df7492f9 9434 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9435 defsubr (&Scoding_system_put);
df7492f9
KH
9436 defsubr (&Scoding_system_base);
9437 defsubr (&Scoding_system_plist);
9438 defsubr (&Scoding_system_aliases);
9439 defsubr (&Scoding_system_eol_type);
9440 defsubr (&Scoding_system_priority_list);
4ed46869 9441
4608c386 9442 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9443 doc: /* List of coding systems.
9444
9445Do not alter the value of this variable manually. This variable should be
df7492f9 9446updated by the functions `define-coding-system' and
48b0f3ae 9447`define-coding-system-alias'. */);
4608c386
KH
9448 Vcoding_system_list = Qnil;
9449
9450 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9451 doc: /* Alist of coding system names.
9452Each element is one element list of coding system name.
9453This variable is given to `completing-read' as TABLE argument.
9454
9455Do not alter the value of this variable manually. This variable should be
9456updated by the functions `make-coding-system' and
9457`define-coding-system-alias'. */);
4608c386
KH
9458 Vcoding_system_alist = Qnil;
9459
4ed46869 9460 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9461 doc: /* List of coding-categories (symbols) ordered by priority.
9462
9463On detecting a coding system, Emacs tries code detection algorithms
9464associated with each coding-category one by one in this order. When
9465one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
9466system bound to the corresponding coding-category is selected.
9467
42205607 9468Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
9469 {
9470 int i;
9471
9472 Vcoding_category_list = Qnil;
df7492f9 9473 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9474 Vcoding_category_list
d46c5b12
KH
9475 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9476 Vcoding_category_list);
4ed46869
KH
9477 }
9478
9479 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9480 doc: /* Specify the coding system for read operations.
9481It is useful to bind this variable with `let', but do not set it globally.
9482If the value is a coding system, it is used for decoding on read operation.
9483If not, an appropriate element is used from one of the coding system alists:
9484There are three such tables, `file-coding-system-alist',
9485`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9486 Vcoding_system_for_read = Qnil;
9487
9488 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9489 doc: /* Specify the coding system for write operations.
9490Programs bind this variable with `let', but you should not set it globally.
9491If the value is a coding system, it is used for encoding of output,
9492when writing it to a file and when sending it to a file or subprocess.
9493
9494If this does not specify a coding system, an appropriate element
9495is used from one of the coding system alists:
9496There are three such tables, `file-coding-system-alist',
9497`process-coding-system-alist', and `network-coding-system-alist'.
9498For output to files, if the above procedure does not specify a coding system,
9499the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9500 Vcoding_system_for_write = Qnil;
9501
9502 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9503 doc: /*
9504Coding system used in the latest file or process I/O. */);
4ed46869
KH
9505 Vlast_coding_system_used = Qnil;
9506
065e3595
KH
9507 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9508 doc: /*
9509Error status of the last code conversion.
9510
9511When an error was detected in the last code conversion, this variable
9512is set to one of the following symbols.
9513 `insufficient-source'
9514 `inconsistent-eol'
9515 `invalid-source'
9516 `interrupted'
9517 `insufficient-memory'
9518When no error was detected, the value doesn't change. So, to check
9519the error status of a code conversion by this variable, you must
9520explicitly set this variable to nil before performing code
9521conversion. */);
9522 Vlast_code_conversion_error = Qnil;
9523
9ce27fde 9524 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9525 doc: /*
9526*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9527See info node `Coding Systems' and info node `Text and Binary' concerning
9528such conversion. */);
9ce27fde
KH
9529 inhibit_eol_conversion = 0;
9530
ed29121d 9531 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9532 doc: /*
9533Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9534Bind it to t if the process output is to be treated as if it were a file
9535read from some filesystem. */);
ed29121d
EZ
9536 inherit_process_coding_system = 0;
9537
02ba4723 9538 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9539 doc: /*
9540Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9541The format is ((PATTERN . VAL) ...),
9542where PATTERN is a regular expression matching a file name,
9543VAL is a coding system, a cons of coding systems, or a function symbol.
9544If VAL is a coding system, it is used for both decoding and encoding
9545the file contents.
9546If VAL is a cons of coding systems, the car part is used for decoding,
9547and the cdr part is used for encoding.
9548If VAL is a function symbol, the function must return a coding system
0192762c
DL
9549or a cons of coding systems which are used as above. The function gets
9550the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
9551
9552See also the function `find-operation-coding-system'
9553and the variable `auto-coding-alist'. */);
02ba4723
KH
9554 Vfile_coding_system_alist = Qnil;
9555
9556 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9557 doc: /*
9558Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9559The format is ((PATTERN . VAL) ...),
9560where PATTERN is a regular expression matching a program name,
9561VAL is a coding system, a cons of coding systems, or a function symbol.
9562If VAL is a coding system, it is used for both decoding what received
9563from the program and encoding what sent to the program.
9564If VAL is a cons of coding systems, the car part is used for decoding,
9565and the cdr part is used for encoding.
9566If VAL is a function symbol, the function must return a coding system
9567or a cons of coding systems which are used as above.
9568
9569See also the function `find-operation-coding-system'. */);
02ba4723
KH
9570 Vprocess_coding_system_alist = Qnil;
9571
9572 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9573 doc: /*
9574Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9575The format is ((PATTERN . VAL) ...),
9576where PATTERN is a regular expression matching a network service name
9577or is a port number to connect to,
9578VAL is a coding system, a cons of coding systems, or a function symbol.
9579If VAL is a coding system, it is used for both decoding what received
9580from the network stream and encoding what sent to the network stream.
9581If VAL is a cons of coding systems, the car part is used for decoding,
9582and the cdr part is used for encoding.
9583If VAL is a function symbol, the function must return a coding system
9584or a cons of coding systems which are used as above.
9585
9586See also the function `find-operation-coding-system'. */);
02ba4723 9587 Vnetwork_coding_system_alist = Qnil;
4ed46869 9588
68c45bf0 9589 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9590 doc: /* Coding system to use with system messages.
9591Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9592 Vlocale_coding_system = Qnil;
9593
005f0d35 9594 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9595 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9596 doc: /*
9597*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9598 eol_mnemonic_unix = build_string (":");
4ed46869 9599
7722baf9 9600 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9601 doc: /*
9602*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9603 eol_mnemonic_dos = build_string ("\\");
4ed46869 9604
7722baf9 9605 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9606 doc: /*
9607*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9608 eol_mnemonic_mac = build_string ("/");
4ed46869 9609
7722baf9 9610 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9611 doc: /*
9612*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9613 eol_mnemonic_undecided = build_string (":");
4ed46869 9614
84fbb8a0 9615 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9616 doc: /*
9617*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9618 Venable_character_translation = Qt;
bdd9fb48 9619
f967223b 9620 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9621 &Vstandard_translation_table_for_decode,
9622 doc: /* Table for translating characters while decoding. */);
f967223b 9623 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9624
f967223b 9625 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9626 &Vstandard_translation_table_for_encode,
9627 doc: /* Table for translating characters while encoding. */);
f967223b 9628 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9629
df7492f9 9630 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9631 doc: /* Alist of charsets vs revision numbers.
9632While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9633designate it with the escape sequence identifying revision (cdr part
9634of the element). */);
9635 Vcharset_revision_table = Qnil;
02ba4723
KH
9636
9637 DEFVAR_LISP ("default-process-coding-system",
9638 &Vdefault_process_coding_system,
48b0f3ae
PJ
9639 doc: /* Cons of coding systems used for process I/O by default.
9640The car part is used for decoding a process output,
9641the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9642 Vdefault_process_coding_system = Qnil;
c4825358 9643
3f003981 9644 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9645 doc: /*
9646Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9647This is a vector of length 256.
9648If Nth element is non-nil, the existence of code N in a file
9649\(or output of subprocess) doesn't prevent it to be detected as
9650a coding system of ISO 2022 variant which has a flag
9651`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9652or reading output of a subprocess.
9653Only 128th through 159th elements has a meaning. */);
3f003981 9654 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9655
9656 DEFVAR_LISP ("select-safe-coding-system-function",
9657 &Vselect_safe_coding_system_function,
df7492f9
KH
9658 doc: /*
9659Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9660
9661If set, this function is called to force a user to select a proper
9662coding system which can encode the text in the case that a default
9663coding system used in each operation can't encode the text.
9664
9665The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9666 Vselect_safe_coding_system_function = Qnil;
9667
5d5bf4d8
KH
9668 DEFVAR_BOOL ("coding-system-require-warning",
9669 &coding_system_require_warning,
9670 doc: /* Internal use only.
6b89e3aa
KH
9671If non-nil, on writing a file, `select-safe-coding-system-function' is
9672called even if `coding-system-for-write' is non-nil. The command
9673`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9674 coding_system_require_warning = 0;
9675
9676
22ab2303 9677 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9678 &inhibit_iso_escape_detection,
df7492f9
KH
9679 doc: /*
9680If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9681
9682By default, on reading a file, Emacs tries to detect how the text is
9683encoded. This code detection is sensitive to escape sequences. If
9684the sequence is valid as ISO2022, the code is determined as one of
9685the ISO2022 encodings, and the file is decoded by the corresponding
9686coding system (e.g. `iso-2022-7bit').
9687
9688However, there may be a case that you want to read escape sequences in
9689a file as is. In such a case, you can set this variable to non-nil.
9690Then, as the code detection ignores any escape sequences, no file is
9691detected as encoded in some ISO2022 encoding. The result is that all
9692escape sequences become visible in a buffer.
9693
9694The default value is nil, and it is strongly recommended not to change
9695it. That is because many Emacs Lisp source files that contain
9696non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9697in Emacs's distribution, and they won't be decoded correctly on
9698reading if you suppress escape sequence detection.
9699
9700The other way to read escape sequences in a file without decoding is
9701to explicitly specify some coding system that doesn't use ISO2022's
9702escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9703 inhibit_iso_escape_detection = 0;
002fdb44
DL
9704
9705 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9706 doc: /* Char table for translating self-inserting characters.
9707This is applied to the result of input methods, not their input. See also
9708`keyboard-translate-table'. */);
002fdb44 9709 Vtranslation_table_for_input = Qnil;
8f924df7 9710
2c78b7e1
KH
9711 {
9712 Lisp_Object args[coding_arg_max];
8f924df7 9713 Lisp_Object plist[16];
2c78b7e1
KH
9714 int i;
9715
9716 for (i = 0; i < coding_arg_max; i++)
9717 args[i] = Qnil;
9718
9719 plist[0] = intern (":name");
9720 plist[1] = args[coding_arg_name] = Qno_conversion;
9721 plist[2] = intern (":mnemonic");
9722 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9723 plist[4] = intern (":coding-type");
9724 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9725 plist[6] = intern (":ascii-compatible-p");
9726 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9727 plist[8] = intern (":default-char");
9728 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9729 plist[10] = intern (":for-unibyte");
9730 plist[11] = args[coding_arg_for_unibyte] = Qt;
9731 plist[12] = intern (":docstring");
9732 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9733\n\
9734When you visit a file with this coding, the file is read into a\n\
9735unibyte buffer as is, thus each byte of a file is treated as a\n\
9736character.");
8f924df7
KH
9737 plist[14] = intern (":eol-type");
9738 plist[15] = args[coding_arg_eol_type] = Qunix;
9739 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 9740 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
9741
9742 plist[1] = args[coding_arg_name] = Qundecided;
9743 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
9744 plist[5] = args[coding_arg_coding_type] = Qundecided;
9745 /* This is already set.
35befdaa 9746 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
9747 plist[8] = intern (":charset-list");
9748 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
9749 plist[11] = args[coding_arg_for_unibyte] = Qnil;
9750 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9751 plist[15] = args[coding_arg_eol_type] = Qnil;
9752 args[coding_arg_plist] = Flist (16, plist);
9753 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
9754 }
9755
9756 setup_coding_system (Qno_conversion, &keyboard_coding);
ae6f73fa 9757 setup_coding_system (Qundecided, &terminal_coding);
2c78b7e1 9758 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9759
9760 {
9761 int i;
9762
9763 for (i = 0; i < coding_category_max; i++)
9764 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9765 }
fcbcfb64
KH
9766#if defined (MSDOS) || defined (WINDOWSNT)
9767 system_eol_type = Qdos;
9768#else
9769 system_eol_type = Qunix;
9770#endif
9771 staticpro (&system_eol_type);
4ed46869
KH
9772}
9773
68c45bf0
PE
9774char *
9775emacs_strerror (error_number)
9776 int error_number;
9777{
9778 char *str;
9779
ca9c0567 9780 synchronize_system_messages_locale ();
68c45bf0
PE
9781 str = strerror (error_number);
9782
9783 if (! NILP (Vlocale_coding_system))
9784 {
9785 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9786 Vlocale_coding_system,
9787 0);
d5db4077 9788 str = (char *) SDATA (dec);
68c45bf0
PE
9789 }
9790
9791 return str;
9792}
9793
4ed46869 9794#endif /* emacs */
9ffd559c
KH
9795
9796/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9797 (do not change this comment) */