Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-11
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
0b5538bd 2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
ce03bf76
KH
3 Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
8f924df7 6 Copyright (C) 2003
df7492f9
KH
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
4ed46869 9
369314dc
KH
10This file is part of GNU Emacs.
11
12GNU Emacs is free software; you can redistribute it and/or modify
13it under the terms of the GNU General Public License as published by
14the Free Software Foundation; either version 2, or (at your option)
15any later version.
4ed46869 16
369314dc
KH
17GNU Emacs is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20GNU General Public License for more details.
4ed46869 21
369314dc
KH
22You should have received a copy of the GNU General Public License
23along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
24the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25Boston, MA 02110-1301, USA. */
4ed46869
KH
26
27/*** TABLE OF CONTENTS ***
28
b73bfc1c 29 0. General comments
4ed46869 30 1. Preamble
df7492f9
KH
31 2. Emacs' internal format (emacs-utf-8) handlers
32 3. UTF-8 handlers
33 4. UTF-16 handlers
34 5. Charset-base coding systems handlers
35 6. emacs-mule (old Emacs' internal format) handlers
36 7. ISO2022 handlers
37 8. Shift-JIS and BIG5 handlers
38 9. CCL handlers
39 10. C library functions
40 11. Emacs Lisp library functions
41 12. Postamble
4ed46869
KH
42
43*/
44
df7492f9 45/*** 0. General comments ***
b73bfc1c
KH
46
47
df7492f9 48CODING SYSTEM
4ed46869 49
5bad0796
DL
50 A coding system is an object for an encoding mechanism that contains
51 information about how to convert byte sequences to character
e19c3639
KH
52 sequences and vice versa. When we say "decode", it means converting
53 a byte sequence of a specific coding system into a character
54 sequence that is represented by Emacs' internal coding system
55 `emacs-utf-8', and when we say "encode", it means converting a
56 character sequence of emacs-utf-8 to a byte sequence of a specific
0ef69138 57 coding system.
4ed46869 58
e19c3639
KH
59 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
60 C level, a coding system is represented by a vector of attributes
5bad0796 61 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
62 coding system symbol to attributes vector is done by looking up
63 Vcharset_hash_table by the symbol.
4ed46869 64
e19c3639 65 Coding systems are classified into the following types depending on
5bad0796 66 the encoding mechanism. Here's a brief description of the types.
4ed46869 67
df7492f9
KH
68 o UTF-8
69
70 o UTF-16
71
72 o Charset-base coding system
73
74 A coding system defined by one or more (coded) character sets.
5bad0796 75 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
76 character set.
77
5bad0796 78 o Old Emacs internal format (emacs-mule)
df7492f9 79
5bad0796 80 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 81
df7492f9 82 o ISO2022-base coding system
4ed46869
KH
83
84 The most famous coding system for multiple character sets. X's
df7492f9
KH
85 Compound Text, various EUCs (Extended Unix Code), and coding systems
86 used in the Internet communication such as ISO-2022-JP are all
87 variants of ISO2022.
4ed46869 88
df7492f9 89 o SJIS (or Shift-JIS or MS-Kanji-Code)
93dec019 90
4ed46869
KH
91 A coding system to encode character sets: ASCII, JISX0201, and
92 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 93 section 8.
4ed46869 94
df7492f9 95 o BIG5
4ed46869 96
df7492f9 97 A coding system to encode character sets: ASCII and Big5. Widely
cfb43547 98 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
99 described in section 8. In this file, when we write "big5" (all
100 lowercase), we mean the coding system, and when we write "Big5"
101 (capitalized), we mean the character set.
4ed46869 102
df7492f9 103 o CCL
27901516 104
5bad0796 105 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
106 not listed above, he can supply a decoder and an encoder for it in
107 CCL (Code Conversion Language) programs. Emacs executes the CCL
108 program while decoding/encoding.
27901516 109
df7492f9 110 o Raw-text
4ed46869 111
5a936b46 112 A coding system for text containing raw eight-bit data. Emacs
5bad0796 113 treats each byte of source text as a character (except for
df7492f9 114 end-of-line conversion).
4ed46869 115
df7492f9
KH
116 o No-conversion
117
118 Like raw text, but don't do end-of-line conversion.
4ed46869 119
4ed46869 120
df7492f9 121END-OF-LINE FORMAT
4ed46869 122
5bad0796 123 How text end-of-line is encoded depends on operating system. For
df7492f9 124 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 125 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
126 `line-feed' codes. MacOS's format is usually one byte of
127 `carriage-return'.
4ed46869 128
cfb43547 129 Since text character encoding and end-of-line encoding are
df7492f9
KH
130 independent, any coding system described above can take any format
131 of end-of-line (except for no-conversion).
4ed46869 132
e19c3639
KH
133STRUCT CODING_SYSTEM
134
135 Before using a coding system for code conversion (i.e. decoding and
136 encoding), we setup a structure of type `struct coding_system'.
137 This structure keeps various information about a specific code
5bad0796 138 conversion (e.g. the location of source and destination data).
4ed46869
KH
139
140*/
141
df7492f9
KH
142/* COMMON MACROS */
143
144
4ed46869
KH
145/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146
df7492f9 147 These functions check if a byte sequence specified as a source in
ff0dacd7
KH
148 CODING conforms to the format of XXX, and update the members of
149 DETECT_INFO.
df7492f9 150
ff0dacd7 151 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
df7492f9
KH
152
153 Below is the template of these functions. */
154
4ed46869 155#if 0
df7492f9 156static int
ff0dacd7 157detect_coding_XXX (coding, detect_info)
df7492f9 158 struct coding_system *coding;
ff0dacd7 159 struct coding_detection_info *detect_info;
4ed46869 160{
f1d34bca
MB
161 const unsigned char *src = coding->source;
162 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 163 int multibytep = coding->src_multibyte;
ff0dacd7 164 int consumed_chars = 0;
df7492f9
KH
165 int found = 0;
166 ...;
167
168 while (1)
169 {
170 /* Get one byte from the source. If the souce is exausted, jump
171 to no_more_source:. */
172 ONE_MORE_BYTE (c);
ff0dacd7
KH
173
174 if (! __C_conforms_to_XXX___ (c))
175 break;
176 if (! __C_strongly_suggests_XXX__ (c))
177 found = CATEGORY_MASK_XXX;
df7492f9 178 }
ff0dacd7
KH
179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX;
df7492f9 181 return 0;
ff0dacd7 182
df7492f9 183 no_more_source:
ff0dacd7
KH
184 /* The source exausted successfully. */
185 detect_info->found |= found;
df7492f9 186 return 1;
4ed46869
KH
187}
188#endif
189
190/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191
df7492f9
KH
192 These functions decode a byte sequence specified as a source by
193 CODING. The resulting multibyte text goes to a place pointed to by
194 CODING->charbuf, the length of which should not exceed
195 CODING->charbuf_size;
d46c5b12 196
df7492f9
KH
197 These functions set the information of original and decoded texts in
198 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
199 They also set CODING->result to one of CODING_RESULT_XXX indicating
200 how the decoding is finished.
d46c5b12 201
df7492f9 202 Below is the template of these functions. */
d46c5b12 203
4ed46869 204#if 0
b73bfc1c 205static void
df7492f9 206decode_coding_XXXX (coding)
4ed46869 207 struct coding_system *coding;
4ed46869 208{
f1d34bca
MB
209 const unsigned char *src = coding->source + coding->consumed;
210 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
211 /* SRC_BASE remembers the start position in source in each loop.
212 The loop will be exited when there's not enough source code, or
213 when there's no room in CHARBUF for a decoded character. */
f1d34bca 214 const unsigned char *src_base;
df7492f9 215 /* A buffer to produce decoded characters. */
69a80ea3
KH
216 int *charbuf = coding->charbuf + coding->charbuf_used;
217 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
218 int multibytep = coding->src_multibyte;
219
220 while (1)
221 {
222 src_base = src;
223 if (charbuf < charbuf_end)
224 /* No more room to produce a decoded character. */
225 break;
226 ONE_MORE_BYTE (c);
227 /* Decode it. */
228 }
229
230 no_more_source:
231 if (src_base < src_end
232 && coding->mode & CODING_MODE_LAST_BLOCK)
233 /* If the source ends by partial bytes to construct a character,
234 treat them as eight-bit raw data. */
235 while (src_base < src_end && charbuf < charbuf_end)
236 *charbuf++ = *src_base++;
237 /* Remember how many bytes and characters we consumed. If the
238 source is multibyte, the bytes and chars are not identical. */
239 coding->consumed = coding->consumed_char = src_base - coding->source;
240 /* Remember how many characters we produced. */
241 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
242}
243#endif
244
245/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246
df7492f9
KH
247 These functions encode SRC_BYTES length text at SOURCE of Emacs'
248 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
249 goes to a place pointed to by DESTINATION, the length of which
250 should not exceed DST_BYTES.
d46c5b12 251
df7492f9
KH
252 These functions set the information of original and encoded texts in
253 the members produced, produced_char, consumed, and consumed_char of
254 the structure *CODING. They also set the member result to one of
255 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 256
df7492f9
KH
257 DST_BYTES zero means that source area and destination area are
258 overlapped, which means that we can produce a encoded text until it
259 reaches at the head of not-yet-encoded source text.
d46c5b12 260
df7492f9 261 Below is a template of these functions. */
4ed46869 262#if 0
b73bfc1c 263static void
df7492f9 264encode_coding_XXX (coding)
4ed46869 265 struct coding_system *coding;
4ed46869 266{
df7492f9
KH
267 int multibytep = coding->dst_multibyte;
268 int *charbuf = coding->charbuf;
269 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
270 unsigned char *dst = coding->destination + coding->produced;
271 unsigned char *dst_end = coding->destination + coding->dst_bytes;
272 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
273 int produced_chars = 0;
274
275 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
276 {
277 int c = *charbuf;
278 /* Encode C into DST, and increment DST. */
279 }
280 label_no_more_destination:
281 /* How many chars and bytes we produced. */
282 coding->produced_char += produced_chars;
283 coding->produced = dst - coding->destination;
4ed46869
KH
284}
285#endif
286
4ed46869
KH
287\f
288/*** 1. Preamble ***/
289
68c45bf0 290#include <config.h>
4ed46869
KH
291#include <stdio.h>
292
4ed46869
KH
293#include "lisp.h"
294#include "buffer.h"
df7492f9 295#include "character.h"
4ed46869
KH
296#include "charset.h"
297#include "ccl.h"
df7492f9 298#include "composite.h"
4ed46869
KH
299#include "coding.h"
300#include "window.h"
4ed46869 301
df7492f9 302Lisp_Object Vcoding_system_hash_table;
4ed46869 303
df7492f9 304Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
305Lisp_Object Qunix, Qdos;
306extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
307Lisp_Object Qbuffer_file_coding_system;
308Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 309Lisp_Object Qdefault_char;
27901516 310Lisp_Object Qno_conversion, Qundecided;
df7492f9 311Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
b49a1807 312Lisp_Object Qbig, Qlittle;
bb0115a2 313Lisp_Object Qcoding_system_history;
1397dc18 314Lisp_Object Qvalid_codes;
a6f87d34
KH
315Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
316Lisp_Object QCdecode_translation_table, QCencode_translation_table;
317Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
35befdaa 318Lisp_Object QCascii_compatible_p;
4ed46869
KH
319
320extern Lisp_Object Qinsert_file_contents, Qwrite_region;
387f6ba5 321Lisp_Object Qcall_process, Qcall_process_region;
4ed46869
KH
322Lisp_Object Qstart_process, Qopen_network_stream;
323Lisp_Object Qtarget_idx;
324
065e3595
KH
325Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
326Lisp_Object Qinterrupted, Qinsufficient_memory;
327
44e8490d
KH
328/* If a symbol has this property, evaluate the value to define the
329 symbol as a coding system. */
330static Lisp_Object Qcoding_system_define_form;
331
5d5bf4d8
KH
332int coding_system_require_warning;
333
d46c5b12
KH
334Lisp_Object Vselect_safe_coding_system_function;
335
7722baf9
EZ
336/* Mnemonic string for each format of end-of-line. */
337Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
338/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 339 decided. */
7722baf9 340Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
341
342#ifdef emacs
343
4608c386
KH
344Lisp_Object Vcoding_system_list, Vcoding_system_alist;
345
346Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 347
d46c5b12
KH
348/* Coding system emacs-mule and raw-text are for converting only
349 end-of-line format. */
350Lisp_Object Qemacs_mule, Qraw_text;
8f924df7 351Lisp_Object Qutf_8_emacs;
ecf488bc 352
4ed46869
KH
353/* Coding-systems are handed between Emacs Lisp programs and C internal
354 routines by the following three variables. */
355/* Coding-system for reading files and receiving data from process. */
356Lisp_Object Vcoding_system_for_read;
357/* Coding-system for writing files and sending data to process. */
358Lisp_Object Vcoding_system_for_write;
359/* Coding-system actually used in the latest I/O. */
360Lisp_Object Vlast_coding_system_used;
065e3595
KH
361/* Set to non-nil when an error is detected while code conversion. */
362Lisp_Object Vlast_code_conversion_error;
c4825358 363/* A vector of length 256 which contains information about special
94487c4e 364 Latin codes (especially for dealing with Microsoft codes). */
3f003981 365Lisp_Object Vlatin_extra_code_table;
c4825358 366
9ce27fde
KH
367/* Flag to inhibit code conversion of end-of-line format. */
368int inhibit_eol_conversion;
369
74383408
KH
370/* Flag to inhibit ISO2022 escape sequence detection. */
371int inhibit_iso_escape_detection;
372
ed29121d
EZ
373/* Flag to make buffer-file-coding-system inherit from process-coding. */
374int inherit_process_coding_system;
375
c4825358 376/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
377struct coding_system terminal_coding;
378
c4825358
KH
379/* Coding system to be used to encode text for terminal display when
380 terminal coding system is nil. */
381struct coding_system safe_terminal_coding;
382
383/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
384struct coding_system keyboard_coding;
385
02ba4723
KH
386Lisp_Object Vfile_coding_system_alist;
387Lisp_Object Vprocess_coding_system_alist;
388Lisp_Object Vnetwork_coding_system_alist;
4ed46869 389
68c45bf0
PE
390Lisp_Object Vlocale_coding_system;
391
4ed46869
KH
392#endif /* emacs */
393
f967223b
KH
394/* Flag to tell if we look up translation table on character code
395 conversion. */
84fbb8a0 396Lisp_Object Venable_character_translation;
f967223b
KH
397/* Standard translation table to look up on decoding (reading). */
398Lisp_Object Vstandard_translation_table_for_decode;
399/* Standard translation table to look up on encoding (writing). */
400Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 401
f967223b
KH
402Lisp_Object Qtranslation_table;
403Lisp_Object Qtranslation_table_id;
404Lisp_Object Qtranslation_table_for_decode;
405Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
406
407/* Alist of charsets vs revision number. */
df7492f9 408static Lisp_Object Vcharset_revision_table;
4ed46869 409
02ba4723
KH
410/* Default coding systems used for process I/O. */
411Lisp_Object Vdefault_process_coding_system;
412
002fdb44
DL
413/* Char table for translating Quail and self-inserting input. */
414Lisp_Object Vtranslation_table_for_input;
415
df7492f9
KH
416/* Two special coding systems. */
417Lisp_Object Vsjis_coding_system;
418Lisp_Object Vbig5_coding_system;
419
df7492f9
KH
420/* ISO2022 section */
421
422#define CODING_ISO_INITIAL(coding, reg) \
423 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
424 coding_attr_iso_initial), \
425 reg)))
426
427
428#define CODING_ISO_REQUEST(coding, charset_id) \
429 ((charset_id <= (coding)->max_charset_id \
430 ? (coding)->safe_charsets[charset_id] \
431 : -1))
432
433
434#define CODING_ISO_FLAGS(coding) \
435 ((coding)->spec.iso_2022.flags)
436#define CODING_ISO_DESIGNATION(coding, reg) \
437 ((coding)->spec.iso_2022.current_designation[reg])
438#define CODING_ISO_INVOCATION(coding, plane) \
439 ((coding)->spec.iso_2022.current_invocation[plane])
440#define CODING_ISO_SINGLE_SHIFTING(coding) \
441 ((coding)->spec.iso_2022.single_shifting)
442#define CODING_ISO_BOL(coding) \
443 ((coding)->spec.iso_2022.bol)
444#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
445 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
446
447/* Control characters of ISO2022. */
448 /* code */ /* function */
449#define ISO_CODE_LF 0x0A /* line-feed */
450#define ISO_CODE_CR 0x0D /* carriage-return */
451#define ISO_CODE_SO 0x0E /* shift-out */
452#define ISO_CODE_SI 0x0F /* shift-in */
453#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
454#define ISO_CODE_ESC 0x1B /* escape */
455#define ISO_CODE_SS2 0x8E /* single-shift-2 */
456#define ISO_CODE_SS3 0x8F /* single-shift-3 */
457#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
458
459/* All code (1-byte) of ISO2022 is classified into one of the
460 followings. */
461enum iso_code_class_type
462 {
463 ISO_control_0, /* Control codes in the range
464 0x00..0x1F and 0x7F, except for the
465 following 5 codes. */
df7492f9
KH
466 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
467 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
468 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
469 ISO_escape, /* ISO_CODE_SO (0x1B) */
470 ISO_control_1, /* Control codes in the range
471 0x80..0x9F, except for the
472 following 3 codes. */
473 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
474 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
475 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
476 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
477 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
478 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
479 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
480 };
05e6f5dc 481
df7492f9
KH
482/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
483 `iso-flags' attribute of an iso2022 coding system. */
05e6f5dc 484
df7492f9
KH
485/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
486 instead of the correct short-form sequence (e.g. ESC $ A). */
487#define CODING_ISO_FLAG_LONG_FORM 0x0001
93dec019 488
df7492f9
KH
489/* If set, reset graphic planes and registers at end-of-line to the
490 initial state. */
491#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 492
df7492f9
KH
493/* If set, reset graphic planes and registers before any control
494 characters to the initial state. */
495#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
05e6f5dc 496
df7492f9
KH
497/* If set, encode by 7-bit environment. */
498#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
4ed46869 499
df7492f9
KH
500/* If set, use locking-shift function. */
501#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 502
df7492f9
KH
503/* If set, use single-shift function. Overwrite
504 CODING_ISO_FLAG_LOCKING_SHIFT. */
505#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 506
df7492f9
KH
507/* If set, use designation escape sequence. */
508#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 509
df7492f9
KH
510/* If set, produce revision number sequence. */
511#define CODING_ISO_FLAG_REVISION 0x0080
b73bfc1c 512
df7492f9
KH
513/* If set, produce ISO6429's direction specifying sequence. */
514#define CODING_ISO_FLAG_DIRECTION 0x0100
f4dee582 515
df7492f9
KH
516/* If set, assume designation states are reset at beginning of line on
517 output. */
518#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
4ed46869 519
df7492f9
KH
520/* If set, designation sequence should be placed at beginning of line
521 on output. */
522#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 523
df7492f9
KH
524/* If set, do not encode unsafe charactes on output. */
525#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 526
df7492f9
KH
527/* If set, extra latin codes (128..159) are accepted as a valid code
528 on input. */
529#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 530
df7492f9 531#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 532
df7492f9 533#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 534
bf16eb23 535#define CODING_ISO_FLAG_USE_ROMAN 0x8000
aa72b389 536
bf16eb23 537#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
aa72b389 538
bf16eb23 539#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 540
df7492f9
KH
541/* A character to be produced on output if encoding of the original
542 character is prohibited by CODING_ISO_FLAG_SAFE. */
543#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 544
4ed46869 545
df7492f9
KH
546/* UTF-16 section */
547#define CODING_UTF_16_BOM(coding) \
548 ((coding)->spec.utf_16.bom)
4ed46869 549
df7492f9
KH
550#define CODING_UTF_16_ENDIAN(coding) \
551 ((coding)->spec.utf_16.endian)
4ed46869 552
df7492f9
KH
553#define CODING_UTF_16_SURROGATE(coding) \
554 ((coding)->spec.utf_16.surrogate)
4ed46869 555
4ed46869 556
df7492f9
KH
557/* CCL section */
558#define CODING_CCL_DECODER(coding) \
559 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
560#define CODING_CCL_ENCODER(coding) \
561 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
562#define CODING_CCL_VALIDS(coding) \
8f924df7 563 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
4ed46869 564
5a936b46 565/* Index for each coding category in `coding_categories' */
4ed46869 566
df7492f9
KH
567enum coding_category
568 {
569 coding_category_iso_7,
570 coding_category_iso_7_tight,
571 coding_category_iso_8_1,
572 coding_category_iso_8_2,
573 coding_category_iso_7_else,
574 coding_category_iso_8_else,
575 coding_category_utf_8,
576 coding_category_utf_16_auto,
577 coding_category_utf_16_be,
578 coding_category_utf_16_le,
579 coding_category_utf_16_be_nosig,
580 coding_category_utf_16_le_nosig,
581 coding_category_charset,
582 coding_category_sjis,
583 coding_category_big5,
584 coding_category_ccl,
585 coding_category_emacs_mule,
586 /* All above are targets of code detection. */
587 coding_category_raw_text,
588 coding_category_undecided,
589 coding_category_max
590 };
591
592/* Definitions of flag bits used in detect_coding_XXXX. */
593#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
594#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
595#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
596#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
597#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
598#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
599#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
b49a1807 600#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
df7492f9
KH
601#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
602#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
603#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
604#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
605#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
606#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
607#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
608#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
609#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
ff0dacd7 610#define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
df7492f9
KH
611
612/* This value is returned if detect_coding_mask () find nothing other
613 than ASCII characters. */
614#define CATEGORY_MASK_ANY \
615 (CATEGORY_MASK_ISO_7 \
616 | CATEGORY_MASK_ISO_7_TIGHT \
617 | CATEGORY_MASK_ISO_8_1 \
618 | CATEGORY_MASK_ISO_8_2 \
619 | CATEGORY_MASK_ISO_7_ELSE \
620 | CATEGORY_MASK_ISO_8_ELSE \
621 | CATEGORY_MASK_UTF_8 \
622 | CATEGORY_MASK_UTF_16_BE \
623 | CATEGORY_MASK_UTF_16_LE \
624 | CATEGORY_MASK_UTF_16_BE_NOSIG \
625 | CATEGORY_MASK_UTF_16_LE_NOSIG \
626 | CATEGORY_MASK_CHARSET \
627 | CATEGORY_MASK_SJIS \
628 | CATEGORY_MASK_BIG5 \
629 | CATEGORY_MASK_CCL \
630 | CATEGORY_MASK_EMACS_MULE)
631
632
633#define CATEGORY_MASK_ISO_7BIT \
634 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
635
636#define CATEGORY_MASK_ISO_8BIT \
637 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
638
639#define CATEGORY_MASK_ISO_ELSE \
640 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
641
642#define CATEGORY_MASK_ISO_ESCAPE \
643 (CATEGORY_MASK_ISO_7 \
644 | CATEGORY_MASK_ISO_7_TIGHT \
645 | CATEGORY_MASK_ISO_7_ELSE \
646 | CATEGORY_MASK_ISO_8_ELSE)
647
648#define CATEGORY_MASK_ISO \
649 ( CATEGORY_MASK_ISO_7BIT \
650 | CATEGORY_MASK_ISO_8BIT \
651 | CATEGORY_MASK_ISO_ELSE)
652
653#define CATEGORY_MASK_UTF_16 \
654 (CATEGORY_MASK_UTF_16_BE \
655 | CATEGORY_MASK_UTF_16_LE \
656 | CATEGORY_MASK_UTF_16_BE_NOSIG \
657 | CATEGORY_MASK_UTF_16_LE_NOSIG)
658
659
660/* List of symbols `coding-category-xxx' ordered by priority. This
661 variable is exposed to Emacs Lisp. */
662static Lisp_Object Vcoding_category_list;
663
664/* Table of coding categories (Lisp symbols). This variable is for
665 internal use oly. */
666static Lisp_Object Vcoding_category_table;
667
668/* Table of coding-categories ordered by priority. */
669static enum coding_category coding_priorities[coding_category_max];
670
671/* Nth element is a coding context for the coding system bound to the
672 Nth coding category. */
673static struct coding_system coding_categories[coding_category_max];
674
df7492f9
KH
675/*** Commonly used macros and functions ***/
676
677#ifndef min
678#define min(a, b) ((a) < (b) ? (a) : (b))
679#endif
680#ifndef max
681#define max(a, b) ((a) > (b) ? (a) : (b))
682#endif
4ed46869 683
24a73b0a
KH
684#define CODING_GET_INFO(coding, attrs, charset_list) \
685 do { \
686 (attrs) = CODING_ID_ATTRS ((coding)->id); \
687 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
df7492f9 688 } while (0)
4ed46869 689
4ed46869 690
df7492f9
KH
691/* Safely get one byte from the source text pointed by SRC which ends
692 at SRC_END, and set C to that byte. If there are not enough bytes
065e3595
KH
693 in the source, it jumps to `no_more_source'. If multibytep is
694 nonzero, and a multibyte character is found at SRC, set C to the
695 negative value of the character code. The caller should declare
696 and set these variables appropriately in advance:
697 src, src_end, multibytep */
aa72b389 698
065e3595
KH
699#define ONE_MORE_BYTE(c) \
700 do { \
701 if (src == src_end) \
702 { \
703 if (src_base < src) \
704 record_conversion_result \
705 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
706 goto no_more_source; \
707 } \
708 c = *src++; \
709 if (multibytep && (c & 0x80)) \
710 { \
711 if ((c & 0xFE) == 0xC0) \
712 c = ((c & 1) << 6) | *src++; \
713 else \
714 { \
35befdaa
KH
715 src--; \
716 c = - string_char (src, &src, NULL); \
065e3595
KH
717 record_conversion_result \
718 (coding, CODING_RESULT_INVALID_SRC); \
719 } \
720 } \
721 consumed_chars++; \
aa72b389
KH
722 } while (0)
723
aa72b389 724
065e3595
KH
725#define ONE_MORE_BYTE_NO_CHECK(c) \
726 do { \
727 c = *src++; \
728 if (multibytep && (c & 0x80)) \
729 { \
730 if ((c & 0xFE) == 0xC0) \
731 c = ((c & 1) << 6) | *src++; \
732 else \
733 { \
35befdaa
KH
734 src--; \
735 c = - string_char (src, &src, NULL); \
065e3595
KH
736 record_conversion_result \
737 (coding, CODING_RESULT_INVALID_SRC); \
738 } \
739 } \
740 consumed_chars++; \
aa72b389
KH
741 } while (0)
742
aa72b389 743
df7492f9
KH
744/* Store a byte C in the place pointed by DST and increment DST to the
745 next free point, and increment PRODUCED_CHARS. The caller should
746 assure that C is 0..127, and declare and set the variable `dst'
747 appropriately in advance.
748*/
aa72b389
KH
749
750
df7492f9
KH
751#define EMIT_ONE_ASCII_BYTE(c) \
752 do { \
753 produced_chars++; \
754 *dst++ = (c); \
b6871cc7 755 } while (0)
aa72b389
KH
756
757
df7492f9 758/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 759
df7492f9
KH
760#define EMIT_TWO_ASCII_BYTES(c1, c2) \
761 do { \
762 produced_chars += 2; \
763 *dst++ = (c1), *dst++ = (c2); \
764 } while (0)
aa72b389
KH
765
766
df7492f9
KH
767/* Store a byte C in the place pointed by DST and increment DST to the
768 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
769 nonzero, store in an appropriate multibyte from. The caller should
770 declare and set the variables `dst' and `multibytep' appropriately
771 in advance. */
772
773#define EMIT_ONE_BYTE(c) \
774 do { \
775 produced_chars++; \
776 if (multibytep) \
777 { \
778 int ch = (c); \
779 if (ch >= 0x80) \
780 ch = BYTE8_TO_CHAR (ch); \
781 CHAR_STRING_ADVANCE (ch, dst); \
782 } \
783 else \
784 *dst++ = (c); \
aa72b389 785 } while (0)
aa72b389 786
aa72b389 787
df7492f9 788/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 789
e19c3639
KH
790#define EMIT_TWO_BYTES(c1, c2) \
791 do { \
792 produced_chars += 2; \
793 if (multibytep) \
794 { \
795 int ch; \
796 \
797 ch = (c1); \
798 if (ch >= 0x80) \
799 ch = BYTE8_TO_CHAR (ch); \
800 CHAR_STRING_ADVANCE (ch, dst); \
801 ch = (c2); \
802 if (ch >= 0x80) \
803 ch = BYTE8_TO_CHAR (ch); \
804 CHAR_STRING_ADVANCE (ch, dst); \
805 } \
806 else \
807 { \
808 *dst++ = (c1); \
809 *dst++ = (c2); \
810 } \
aa72b389
KH
811 } while (0)
812
813
df7492f9
KH
814#define EMIT_THREE_BYTES(c1, c2, c3) \
815 do { \
816 EMIT_ONE_BYTE (c1); \
817 EMIT_TWO_BYTES (c2, c3); \
818 } while (0)
aa72b389 819
aa72b389 820
df7492f9
KH
821#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
822 do { \
823 EMIT_TWO_BYTES (c1, c2); \
824 EMIT_TWO_BYTES (c3, c4); \
825 } while (0)
aa72b389 826
aa72b389 827
f6cbaf43
KH
828/* Prototypes for static functions. */
829static void record_conversion_result P_ ((struct coding_system *coding,
830 enum coding_result_code result));
831static int detect_coding_utf_8 P_ ((struct coding_system *,
832 struct coding_detection_info *info));
833static void decode_coding_utf_8 P_ ((struct coding_system *));
834static int encode_coding_utf_8 P_ ((struct coding_system *));
835
836static int detect_coding_utf_16 P_ ((struct coding_system *,
837 struct coding_detection_info *info));
838static void decode_coding_utf_16 P_ ((struct coding_system *));
839static int encode_coding_utf_16 P_ ((struct coding_system *));
840
841static int detect_coding_iso_2022 P_ ((struct coding_system *,
842 struct coding_detection_info *info));
843static void decode_coding_iso_2022 P_ ((struct coding_system *));
844static int encode_coding_iso_2022 P_ ((struct coding_system *));
845
846static int detect_coding_emacs_mule P_ ((struct coding_system *,
847 struct coding_detection_info *info));
848static void decode_coding_emacs_mule P_ ((struct coding_system *));
849static int encode_coding_emacs_mule P_ ((struct coding_system *));
850
851static int detect_coding_sjis P_ ((struct coding_system *,
852 struct coding_detection_info *info));
853static void decode_coding_sjis P_ ((struct coding_system *));
854static int encode_coding_sjis P_ ((struct coding_system *));
855
856static int detect_coding_big5 P_ ((struct coding_system *,
857 struct coding_detection_info *info));
858static void decode_coding_big5 P_ ((struct coding_system *));
859static int encode_coding_big5 P_ ((struct coding_system *));
860
861static int detect_coding_ccl P_ ((struct coding_system *,
862 struct coding_detection_info *info));
863static void decode_coding_ccl P_ ((struct coding_system *));
864static int encode_coding_ccl P_ ((struct coding_system *));
865
866static void decode_coding_raw_text P_ ((struct coding_system *));
867static int encode_coding_raw_text P_ ((struct coding_system *));
868
869static void coding_set_source P_ ((struct coding_system *));
870static void coding_set_destination P_ ((struct coding_system *));
871static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
872static void coding_alloc_by_making_gap P_ ((struct coding_system *,
873 EMACS_INT));
874static unsigned char *alloc_destination P_ ((struct coding_system *,
875 EMACS_INT, unsigned char *));
876static void setup_iso_safe_charsets P_ ((Lisp_Object));
877static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
878 int *, int *,
879 unsigned char *));
880static int detect_eol P_ ((const unsigned char *,
881 EMACS_INT, enum coding_category));
882static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
883static void decode_eol P_ ((struct coding_system *));
884static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
885static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
886 int, int *, int *));
887static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
888static INLINE void produce_composition P_ ((struct coding_system *, int *,
889 EMACS_INT));
890static INLINE void produce_charset P_ ((struct coding_system *, int *,
891 EMACS_INT));
892static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
893static int decode_coding P_ ((struct coding_system *));
894static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
895 struct coding_system *,
896 int *, EMACS_INT *));
897static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
898 struct coding_system *,
899 int *, EMACS_INT *));
900static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
901static int encode_coding P_ ((struct coding_system *));
902static Lisp_Object make_conversion_work_buffer P_ ((int));
903static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
904static INLINE int char_encodable_p P_ ((int, Lisp_Object));
905static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
906
065e3595
KH
907static void
908record_conversion_result (struct coding_system *coding,
909 enum coding_result_code result)
910{
911 coding->result = result;
912 switch (result)
913 {
914 case CODING_RESULT_INSUFFICIENT_SRC:
915 Vlast_code_conversion_error = Qinsufficient_source;
916 break;
917 case CODING_RESULT_INCONSISTENT_EOL:
918 Vlast_code_conversion_error = Qinconsistent_eol;
919 break;
920 case CODING_RESULT_INVALID_SRC:
921 Vlast_code_conversion_error = Qinvalid_source;
922 break;
923 case CODING_RESULT_INTERRUPT:
924 Vlast_code_conversion_error = Qinterrupted;
925 break;
926 case CODING_RESULT_INSUFFICIENT_MEM:
927 Vlast_code_conversion_error = Qinsufficient_memory;
928 break;
35befdaa
KH
929 default:
930 Vlast_code_conversion_error = intern ("Unknown error");
065e3595
KH
931 }
932}
933
df7492f9
KH
934#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
935 do { \
936 charset_map_loaded = 0; \
937 c = DECODE_CHAR (charset, code); \
938 if (charset_map_loaded) \
939 { \
8f924df7 940 const unsigned char *orig = coding->source; \
df7492f9
KH
941 EMACS_INT offset; \
942 \
943 coding_set_source (coding); \
944 offset = coding->source - orig; \
945 src += offset; \
946 src_base += offset; \
947 src_end += offset; \
948 } \
aa72b389
KH
949 } while (0)
950
951
df7492f9
KH
952#define ASSURE_DESTINATION(bytes) \
953 do { \
954 if (dst + (bytes) >= dst_end) \
955 { \
956 int more_bytes = charbuf_end - charbuf + (bytes); \
957 \
958 dst = alloc_destination (coding, more_bytes, dst); \
959 dst_end = coding->destination + coding->dst_bytes; \
960 } \
961 } while (0)
aa72b389 962
aa72b389 963
aa72b389 964
df7492f9
KH
965static void
966coding_set_source (coding)
aa72b389 967 struct coding_system *coding;
aa72b389 968{
df7492f9
KH
969 if (BUFFERP (coding->src_object))
970 {
2cb26057 971 struct buffer *buf = XBUFFER (coding->src_object);
aa72b389 972
df7492f9 973 if (coding->src_pos < 0)
2cb26057 974 coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
df7492f9 975 else
2cb26057 976 coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
aa72b389 977 }
df7492f9 978 else if (STRINGP (coding->src_object))
aa72b389 979 {
8f924df7 980 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
aa72b389 981 }
df7492f9
KH
982 else
983 /* Otherwise, the source is C string and is never relocated
984 automatically. Thus we don't have to update anything. */
985 ;
986}
aa72b389 987
df7492f9
KH
988static void
989coding_set_destination (coding)
990 struct coding_system *coding;
991{
992 if (BUFFERP (coding->dst_object))
aa72b389 993 {
df7492f9 994 if (coding->src_pos < 0)
aa72b389 995 {
28f67a95
KH
996 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
997 coding->dst_bytes = (GAP_END_ADDR
998 - (coding->src_bytes - coding->consumed)
999 - coding->destination);
aa72b389 1000 }
df7492f9 1001 else
28f67a95
KH
1002 {
1003 /* We are sure that coding->dst_pos_byte is before the gap
1004 of the buffer. */
1005 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1006 + coding->dst_pos_byte - 1);
1007 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1008 - coding->destination);
1009 }
df7492f9
KH
1010 }
1011 else
1012 /* Otherwise, the destination is C string and is never relocated
1013 automatically. Thus we don't have to update anything. */
1014 ;
1015}
1016
1017
1018static void
1019coding_alloc_by_realloc (coding, bytes)
1020 struct coding_system *coding;
1021 EMACS_INT bytes;
1022{
1023 coding->destination = (unsigned char *) xrealloc (coding->destination,
1024 coding->dst_bytes + bytes);
1025 coding->dst_bytes += bytes;
1026}
1027
1028static void
1029coding_alloc_by_making_gap (coding, bytes)
1030 struct coding_system *coding;
1031 EMACS_INT bytes;
1032{
2c78b7e1
KH
1033 if (BUFFERP (coding->dst_object)
1034 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
1035 {
1036 EMACS_INT add = coding->src_bytes - coding->consumed;
1037
1038 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1039 make_gap (bytes);
1040 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1041 }
307f5c57 1042 else if (c >= 0x80)
df7492f9 1043 {
2c78b7e1
KH
1044 Lisp_Object this_buffer;
1045
1046 this_buffer = Fcurrent_buffer ();
df7492f9
KH
1047 set_buffer_internal (XBUFFER (coding->dst_object));
1048 make_gap (bytes);
1049 set_buffer_internal (XBUFFER (this_buffer));
aa72b389 1050 }
df7492f9 1051}
8f924df7 1052
df7492f9
KH
1053
1054static unsigned char *
1055alloc_destination (coding, nbytes, dst)
1056 struct coding_system *coding;
3e139625 1057 EMACS_INT nbytes;
df7492f9
KH
1058 unsigned char *dst;
1059{
1060 EMACS_INT offset = dst - coding->destination;
1061
1062 if (BUFFERP (coding->dst_object))
1063 coding_alloc_by_making_gap (coding, nbytes);
aa72b389 1064 else
df7492f9 1065 coding_alloc_by_realloc (coding, nbytes);
065e3595 1066 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1067 coding_set_destination (coding);
1068 dst = coding->destination + offset;
1069 return dst;
1070}
aa72b389 1071
ff0dacd7
KH
1072/** Macros for annotations. */
1073
1074/* Maximum length of annotation data (sum of annotations for
1075 composition and charset). */
69a80ea3 1076#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
ff0dacd7
KH
1077
1078/* An annotation data is stored in the array coding->charbuf in this
1079 format:
69a80ea3 1080 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
ff0dacd7
KH
1081 LENGTH is the number of elements in the annotation.
1082 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
69a80ea3 1083 NCHARS is the number of characters in the text annotated.
ff0dacd7
KH
1084
1085 The format of the following elements depend on ANNOTATION_MASK.
1086
1087 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1088 follows:
1089 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1090 METHOD is one of enum composition_method.
1091 Optionnal COMPOSITION-COMPONENTS are characters and composition
1092 rules.
1093
1094 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1095 follows. */
1096
69a80ea3 1097#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
ff0dacd7
KH
1098 do { \
1099 *(buf)++ = -(len); \
1100 *(buf)++ = (mask); \
69a80ea3 1101 *(buf)++ = (nchars); \
ff0dacd7
KH
1102 coding->annotated = 1; \
1103 } while (0);
1104
69a80ea3
KH
1105#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1106 do { \
1107 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1108 *buf++ = method; \
ff0dacd7
KH
1109 } while (0)
1110
1111
69a80ea3
KH
1112#define ADD_CHARSET_DATA(buf, nchars, id) \
1113 do { \
1114 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1115 *buf++ = id; \
ff0dacd7
KH
1116 } while (0)
1117
df7492f9
KH
1118\f
1119/*** 2. Emacs' internal format (emacs-utf-8) ***/
1120
1121
1122
1123\f
1124/*** 3. UTF-8 ***/
1125
1126/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1127 Check if a text is encoded in UTF-8. If it is, return 1, else
1128 return 0. */
df7492f9
KH
1129
1130#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1131#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1132#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1133#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1134#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1135#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1136
1137static int
ff0dacd7 1138detect_coding_utf_8 (coding, detect_info)
df7492f9 1139 struct coding_system *coding;
ff0dacd7 1140 struct coding_detection_info *detect_info;
df7492f9 1141{
065e3595 1142 const unsigned char *src = coding->source, *src_base;
8f924df7 1143 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1144 int multibytep = coding->src_multibyte;
1145 int consumed_chars = 0;
1146 int found = 0;
1147
ff0dacd7 1148 detect_info->checked |= CATEGORY_MASK_UTF_8;
df7492f9
KH
1149 /* A coding system of this category is always ASCII compatible. */
1150 src += coding->head_ascii;
1151
1152 while (1)
aa72b389 1153 {
df7492f9 1154 int c, c1, c2, c3, c4;
aa72b389 1155
065e3595 1156 src_base = src;
df7492f9 1157 ONE_MORE_BYTE (c);
065e3595 1158 if (c < 0 || UTF_8_1_OCTET_P (c))
df7492f9
KH
1159 continue;
1160 ONE_MORE_BYTE (c1);
065e3595 1161 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
df7492f9
KH
1162 break;
1163 if (UTF_8_2_OCTET_LEADING_P (c))
aa72b389 1164 {
ff0dacd7 1165 found = CATEGORY_MASK_UTF_8;
df7492f9 1166 continue;
aa72b389 1167 }
df7492f9 1168 ONE_MORE_BYTE (c2);
065e3595 1169 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1170 break;
1171 if (UTF_8_3_OCTET_LEADING_P (c))
aa72b389 1172 {
ff0dacd7 1173 found = CATEGORY_MASK_UTF_8;
df7492f9 1174 continue;
aa72b389 1175 }
df7492f9 1176 ONE_MORE_BYTE (c3);
065e3595 1177 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1178 break;
1179 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1180 {
ff0dacd7 1181 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1182 continue;
1183 }
1184 ONE_MORE_BYTE (c4);
065e3595 1185 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1186 break;
1187 if (UTF_8_5_OCTET_LEADING_P (c))
1188 {
ff0dacd7 1189 found = CATEGORY_MASK_UTF_8;
df7492f9
KH
1190 continue;
1191 }
1192 break;
aa72b389 1193 }
ff0dacd7 1194 detect_info->rejected |= CATEGORY_MASK_UTF_8;
df7492f9 1195 return 0;
aa72b389 1196
df7492f9 1197 no_more_source:
065e3595 1198 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
aa72b389 1199 {
ff0dacd7 1200 detect_info->rejected |= CATEGORY_MASK_UTF_8;
89528eb3 1201 return 0;
aa72b389 1202 }
ff0dacd7
KH
1203 detect_info->found |= found;
1204 return 1;
aa72b389
KH
1205}
1206
4ed46869 1207
b73bfc1c 1208static void
df7492f9 1209decode_coding_utf_8 (coding)
b73bfc1c 1210 struct coding_system *coding;
b73bfc1c 1211{
8f924df7
KH
1212 const unsigned char *src = coding->source + coding->consumed;
1213 const unsigned char *src_end = coding->source + coding->src_bytes;
1214 const unsigned char *src_base;
69a80ea3
KH
1215 int *charbuf = coding->charbuf + coding->charbuf_used;
1216 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1217 int consumed_chars = 0, consumed_chars_base;
1218 int multibytep = coding->src_multibyte;
24a73b0a 1219 Lisp_Object attr, charset_list;
4ed46869 1220
24a73b0a 1221 CODING_GET_INFO (coding, attr, charset_list);
df7492f9
KH
1222
1223 while (1)
b73bfc1c 1224 {
df7492f9 1225 int c, c1, c2, c3, c4, c5;
ec6d2bb8 1226
df7492f9
KH
1227 src_base = src;
1228 consumed_chars_base = consumed_chars;
4af310db 1229
df7492f9
KH
1230 if (charbuf >= charbuf_end)
1231 break;
1232
1233 ONE_MORE_BYTE (c1);
065e3595
KH
1234 if (c1 < 0)
1235 {
1236 c = - c1;
1237 }
1238 else if (UTF_8_1_OCTET_P(c1))
df7492f9
KH
1239 {
1240 c = c1;
4af310db 1241 }
df7492f9 1242 else
4af310db 1243 {
df7492f9 1244 ONE_MORE_BYTE (c2);
065e3595 1245 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
df7492f9
KH
1246 goto invalid_code;
1247 if (UTF_8_2_OCTET_LEADING_P (c1))
4af310db 1248 {
b0edb2c5
DL
1249 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1250 /* Reject overlong sequences here and below. Encoders
1251 producing them are incorrect, they can be misleading,
1252 and they mess up read/write invariance. */
1253 if (c < 128)
1254 goto invalid_code;
4af310db 1255 }
df7492f9 1256 else
aa72b389 1257 {
df7492f9 1258 ONE_MORE_BYTE (c3);
065e3595 1259 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
df7492f9
KH
1260 goto invalid_code;
1261 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1262 {
1263 c = (((c1 & 0xF) << 12)
1264 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
72fe1301
DL
1265 if (c < 0x800
1266 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
b0edb2c5
DL
1267 goto invalid_code;
1268 }
df7492f9
KH
1269 else
1270 {
1271 ONE_MORE_BYTE (c4);
065e3595 1272 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
df7492f9
KH
1273 goto invalid_code;
1274 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1275 {
df7492f9
KH
1276 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1277 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1278 if (c < 0x10000)
1279 goto invalid_code;
1280 }
df7492f9
KH
1281 else
1282 {
1283 ONE_MORE_BYTE (c5);
065e3595 1284 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
df7492f9
KH
1285 goto invalid_code;
1286 if (UTF_8_5_OCTET_LEADING_P (c1))
1287 {
1288 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1289 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1290 | (c5 & 0x3F));
b0edb2c5 1291 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1292 goto invalid_code;
1293 }
1294 else
1295 goto invalid_code;
1296 }
1297 }
aa72b389 1298 }
b73bfc1c 1299 }
df7492f9
KH
1300
1301 *charbuf++ = c;
1302 continue;
1303
1304 invalid_code:
1305 src = src_base;
1306 consumed_chars = consumed_chars_base;
1307 ONE_MORE_BYTE (c);
1308 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1309 coding->errors++;
aa72b389 1310 }
307f5c57
MB
1311 else
1312 return 0;
aa72b389 1313
df7492f9
KH
1314 no_more_source:
1315 coding->consumed_char += consumed_chars_base;
1316 coding->consumed = src_base - coding->source;
1317 coding->charbuf_used = charbuf - coding->charbuf;
1318}
1319
1320
1321static int
1322encode_coding_utf_8 (coding)
1323 struct coding_system *coding;
1324{
1325 int multibytep = coding->dst_multibyte;
1326 int *charbuf = coding->charbuf;
1327 int *charbuf_end = charbuf + coding->charbuf_used;
1328 unsigned char *dst = coding->destination + coding->produced;
1329 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1330 int produced_chars = 0;
df7492f9
KH
1331 int c;
1332
1333 if (multibytep)
aa72b389 1334 {
df7492f9
KH
1335 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1336
1337 while (charbuf < charbuf_end)
b73bfc1c 1338 {
df7492f9 1339 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
8f924df7 1340
df7492f9
KH
1341 ASSURE_DESTINATION (safe_room);
1342 c = *charbuf++;
28f67a95
KH
1343 if (CHAR_BYTE8_P (c))
1344 {
1345 c = CHAR_TO_BYTE8 (c);
1346 EMIT_ONE_BYTE (c);
1347 }
1348 else
1349 {
1350 CHAR_STRING_ADVANCE (c, pend);
1351 for (p = str; p < pend; p++)
1352 EMIT_ONE_BYTE (*p);
1353 }
b73bfc1c 1354 }
aa72b389 1355 }
df7492f9
KH
1356 else
1357 {
1358 int safe_room = MAX_MULTIBYTE_LENGTH;
1359
1360 while (charbuf < charbuf_end)
b73bfc1c 1361 {
df7492f9
KH
1362 ASSURE_DESTINATION (safe_room);
1363 c = *charbuf++;
f03caae0
KH
1364 if (CHAR_BYTE8_P (c))
1365 *dst++ = CHAR_TO_BYTE8 (c);
1366 else
1367 dst += CHAR_STRING (c, dst);
df7492f9 1368 produced_chars++;
4ed46869
KH
1369 }
1370 }
065e3595 1371 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1372 coding->produced_char += produced_chars;
1373 coding->produced = dst - coding->destination;
1374 return 0;
4ed46869
KH
1375}
1376
b73bfc1c 1377
df7492f9 1378/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1379 Check if a text is encoded in one of UTF-16 based coding systems.
1380 If it is, return 1, else return 0. */
aa72b389 1381
df7492f9
KH
1382#define UTF_16_HIGH_SURROGATE_P(val) \
1383 (((val) & 0xFC00) == 0xD800)
1384
1385#define UTF_16_LOW_SURROGATE_P(val) \
1386 (((val) & 0xFC00) == 0xDC00)
93dec019 1387
df7492f9
KH
1388#define UTF_16_INVALID_P(val) \
1389 (((val) == 0xFFFE) \
1390 || ((val) == 0xFFFF) \
1391 || UTF_16_LOW_SURROGATE_P (val))
aa72b389 1392
aa72b389 1393
df7492f9 1394static int
ff0dacd7 1395detect_coding_utf_16 (coding, detect_info)
aa72b389 1396 struct coding_system *coding;
ff0dacd7 1397 struct coding_detection_info *detect_info;
aa72b389 1398{
8f924df7
KH
1399 const unsigned char *src = coding->source, *src_base = src;
1400 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1401 int multibytep = coding->src_multibyte;
1402 int consumed_chars = 0;
1403 int c1, c2;
aa72b389 1404
ff0dacd7 1405 detect_info->checked |= CATEGORY_MASK_UTF_16;
ff0dacd7 1406 if (coding->mode & CODING_MODE_LAST_BLOCK
24a73b0a 1407 && (coding->src_chars & 1))
ff0dacd7
KH
1408 {
1409 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1410 return 0;
1411 }
24a73b0a 1412
df7492f9
KH
1413 ONE_MORE_BYTE (c1);
1414 ONE_MORE_BYTE (c2);
df7492f9 1415 if ((c1 == 0xFF) && (c2 == 0xFE))
aa72b389 1416 {
b49a1807
KH
1417 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1418 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1419 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1420 | CATEGORY_MASK_UTF_16_BE_NOSIG
1421 | CATEGORY_MASK_UTF_16_LE_NOSIG);
aa72b389 1422 }
df7492f9 1423 else if ((c1 == 0xFE) && (c2 == 0xFF))
ff0dacd7 1424 {
b49a1807
KH
1425 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1426 | CATEGORY_MASK_UTF_16_AUTO);
24a73b0a
KH
1427 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1428 | CATEGORY_MASK_UTF_16_BE_NOSIG
1429 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1430 }
065e3595 1431 else if (c1 >= 0 && c2 >= 0)
24a73b0a 1432 {
24a73b0a
KH
1433 detect_info->rejected
1434 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
ff0dacd7 1435 }
df7492f9 1436 no_more_source:
ff0dacd7 1437 return 1;
df7492f9 1438}
aa72b389 1439
df7492f9
KH
1440static void
1441decode_coding_utf_16 (coding)
1442 struct coding_system *coding;
1443{
8f924df7
KH
1444 const unsigned char *src = coding->source + coding->consumed;
1445 const unsigned char *src_end = coding->source + coding->src_bytes;
1446 const unsigned char *src_base;
69a80ea3
KH
1447 int *charbuf = coding->charbuf + coding->charbuf_used;
1448 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
1449 int consumed_chars = 0, consumed_chars_base;
1450 int multibytep = coding->src_multibyte;
1451 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1452 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1453 int surrogate = CODING_UTF_16_SURROGATE (coding);
24a73b0a 1454 Lisp_Object attr, charset_list;
df7492f9 1455
24a73b0a 1456 CODING_GET_INFO (coding, attr, charset_list);
df7492f9 1457
b49a1807 1458 if (bom == utf_16_with_bom)
aa72b389 1459 {
df7492f9 1460 int c, c1, c2;
4af310db 1461
aa72b389 1462 src_base = src;
df7492f9
KH
1463 ONE_MORE_BYTE (c1);
1464 ONE_MORE_BYTE (c2);
e19c3639 1465 c = (c1 << 8) | c2;
aa72b389 1466
b49a1807
KH
1467 if (endian == utf_16_big_endian
1468 ? c != 0xFEFF : c != 0xFFFE)
aa72b389 1469 {
b49a1807
KH
1470 /* The first two bytes are not BOM. Treat them as bytes
1471 for a normal character. */
1472 src = src_base;
1473 coding->errors++;
aa72b389 1474 }
b49a1807
KH
1475 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1476 }
1477 else if (bom == utf_16_detect_bom)
1478 {
1479 /* We have already tried to detect BOM and failed in
1480 detect_coding. */
1481 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
df7492f9 1482 }
aa72b389 1483
df7492f9
KH
1484 while (1)
1485 {
1486 int c, c1, c2;
1487
1488 src_base = src;
1489 consumed_chars_base = consumed_chars;
1490
1491 if (charbuf + 2 >= charbuf_end)
1492 break;
1493
1494 ONE_MORE_BYTE (c1);
065e3595
KH
1495 if (c1 < 0)
1496 {
1497 *charbuf++ = -c1;
1498 continue;
1499 }
df7492f9 1500 ONE_MORE_BYTE (c2);
065e3595
KH
1501 if (c2 < 0)
1502 {
1503 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1504 *charbuf++ = -c2;
1505 continue;
1506 }
df7492f9 1507 c = (endian == utf_16_big_endian
e19c3639 1508 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1509 if (surrogate)
fd3ae0b9 1510 {
df7492f9 1511 if (! UTF_16_LOW_SURROGATE_P (c))
fd3ae0b9 1512 {
df7492f9
KH
1513 if (endian == utf_16_big_endian)
1514 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1515 else
1516 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1517 *charbuf++ = c1;
1518 *charbuf++ = c2;
1519 coding->errors++;
1520 if (UTF_16_HIGH_SURROGATE_P (c))
1521 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
fd3ae0b9 1522 else
df7492f9 1523 *charbuf++ = c;
fd3ae0b9
KH
1524 }
1525 else
df7492f9
KH
1526 {
1527 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1528 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
29f7ffd0 1529 *charbuf++ = 0x10000 + c;
df7492f9 1530 }
fd3ae0b9 1531 }
aa72b389 1532 else
df7492f9
KH
1533 {
1534 if (UTF_16_HIGH_SURROGATE_P (c))
1535 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1536 else
1537 *charbuf++ = c;
8f924df7 1538 }
aa72b389 1539 }
df7492f9
KH
1540
1541 no_more_source:
1542 coding->consumed_char += consumed_chars_base;
1543 coding->consumed = src_base - coding->source;
1544 coding->charbuf_used = charbuf - coding->charbuf;
aa72b389 1545}
b73bfc1c 1546
df7492f9
KH
1547static int
1548encode_coding_utf_16 (coding)
1549 struct coding_system *coding;
1550{
1551 int multibytep = coding->dst_multibyte;
1552 int *charbuf = coding->charbuf;
1553 int *charbuf_end = charbuf + coding->charbuf_used;
1554 unsigned char *dst = coding->destination + coding->produced;
1555 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1556 int safe_room = 8;
1557 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1558 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1559 int produced_chars = 0;
24a73b0a 1560 Lisp_Object attrs, charset_list;
df7492f9 1561 int c;
4ed46869 1562
24a73b0a 1563 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 1564
b49a1807 1565 if (bom != utf_16_without_bom)
df7492f9
KH
1566 {
1567 ASSURE_DESTINATION (safe_room);
1568 if (big_endian)
df7492f9 1569 EMIT_TWO_BYTES (0xFE, 0xFF);
880cf180
KH
1570 else
1571 EMIT_TWO_BYTES (0xFF, 0xFE);
df7492f9
KH
1572 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1573 }
1574
1575 while (charbuf < charbuf_end)
1576 {
1577 ASSURE_DESTINATION (safe_room);
1578 c = *charbuf++;
e19c3639
KH
1579 if (c >= MAX_UNICODE_CHAR)
1580 c = coding->default_char;
df7492f9
KH
1581
1582 if (c < 0x10000)
1583 {
1584 if (big_endian)
1585 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1586 else
1587 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1588 }
1589 else
1590 {
1591 int c1, c2;
1592
1593 c -= 0x10000;
1594 c1 = (c >> 10) + 0xD800;
1595 c2 = (c & 0x3FF) + 0xDC00;
1596 if (big_endian)
1597 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1598 else
1599 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1600 }
1601 }
065e3595 1602 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
1603 coding->produced = dst - coding->destination;
1604 coding->produced_char += produced_chars;
1605 return 0;
1606}
1607
1608\f
1609/*** 6. Old Emacs' internal format (emacs-mule) ***/
1610
1611/* Emacs' internal format for representation of multiple character
1612 sets is a kind of multi-byte encoding, i.e. characters are
1613 represented by variable-length sequences of one-byte codes.
1614
1615 ASCII characters and control characters (e.g. `tab', `newline') are
1616 represented by one-byte sequences which are their ASCII codes, in
1617 the range 0x00 through 0x7F.
1618
1619 8-bit characters of the range 0x80..0x9F are represented by
1620 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1621 code + 0x20).
1622
1623 8-bit characters of the range 0xA0..0xFF are represented by
1624 one-byte sequences which are their 8-bit code.
1625
1626 The other characters are represented by a sequence of `base
1627 leading-code', optional `extended leading-code', and one or two
1628 `position-code's. The length of the sequence is determined by the
1629 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1630 whereas extended leading-code and position-code take the range 0xA0
1631 through 0xFF. See `charset.h' for more details about leading-code
1632 and position-code.
1633
1634 --- CODE RANGE of Emacs' internal format ---
1635 character set range
1636 ------------- -----
1637 ascii 0x00..0x7F
1638 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1639 eight-bit-graphic 0xA0..0xBF
1640 ELSE 0x81..0x9D + [0xA0..0xFF]+
1641 ---------------------------------------------
1642
1643 As this is the internal character representation, the format is
1644 usually not used externally (i.e. in a file or in a data sent to a
1645 process). But, it is possible to have a text externally in this
1646 format (i.e. by encoding by the coding system `emacs-mule').
1647
1648 In that case, a sequence of one-byte codes has a slightly different
1649 form.
1650
1651 At first, all characters in eight-bit-control are represented by
1652 one-byte sequences which are their 8-bit code.
1653
1654 Next, character composition data are represented by the byte
1655 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1656 where,
1657 METHOD is 0xF0 plus one of composition method (enum
1658 composition_method),
1659
1660 BYTES is 0xA0 plus a byte length of this composition data,
1661
1662 CHARS is 0x20 plus a number of characters composed by this
1663 data,
1664
1665 COMPONENTs are characters of multibye form or composition
1666 rules encoded by two-byte of ASCII codes.
1667
1668 In addition, for backward compatibility, the following formats are
1669 also recognized as composition data on decoding.
1670
1671 0x80 MSEQ ...
1672 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1673
1674 Here,
1675 MSEQ is a multibyte form but in these special format:
1676 ASCII: 0xA0 ASCII_CODE+0x80,
1677 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1678 RULE is a one byte code of the range 0xA0..0xF0 that
1679 represents a composition rule.
1680 */
1681
1682char emacs_mule_bytes[256];
1683
df7492f9 1684int
ff0dacd7 1685emacs_mule_char (coding, src, nbytes, nchars, id)
df7492f9 1686 struct coding_system *coding;
065e3595 1687 const unsigned char *src;
ff0dacd7 1688 int *nbytes, *nchars, *id;
df7492f9 1689{
8f924df7
KH
1690 const unsigned char *src_end = coding->source + coding->src_bytes;
1691 const unsigned char *src_base = src;
df7492f9 1692 int multibytep = coding->src_multibyte;
df7492f9
KH
1693 struct charset *charset;
1694 unsigned code;
1695 int c;
1696 int consumed_chars = 0;
1697
1698 ONE_MORE_BYTE (c);
065e3595 1699 if (c < 0)
df7492f9 1700 {
065e3595
KH
1701 c = -c;
1702 charset = emacs_mule_charset[0];
1703 }
1704 else
1705 {
1706 switch (emacs_mule_bytes[c])
b73bfc1c 1707 {
065e3595 1708 case 2:
df7492f9
KH
1709 if (! (charset = emacs_mule_charset[c]))
1710 goto invalid_code;
1711 ONE_MORE_BYTE (c);
9ffd559c 1712 if (c < 0xA0)
065e3595 1713 goto invalid_code;
df7492f9 1714 code = c & 0x7F;
065e3595
KH
1715 break;
1716
1717 case 3:
1718 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1719 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1720 {
1721 ONE_MORE_BYTE (c);
9ffd559c 1722 if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
065e3595
KH
1723 goto invalid_code;
1724 ONE_MORE_BYTE (c);
9ffd559c 1725 if (c < 0xA0)
065e3595
KH
1726 goto invalid_code;
1727 code = c & 0x7F;
1728 }
1729 else
1730 {
1731 if (! (charset = emacs_mule_charset[c]))
1732 goto invalid_code;
1733 ONE_MORE_BYTE (c);
9ffd559c 1734 if (c < 0xA0)
065e3595
KH
1735 goto invalid_code;
1736 code = (c & 0x7F) << 8;
1737 ONE_MORE_BYTE (c);
9ffd559c 1738 if (c < 0xA0)
065e3595
KH
1739 goto invalid_code;
1740 code |= c & 0x7F;
1741 }
1742 break;
1743
1744 case 4:
1745 ONE_MORE_BYTE (c);
1746 if (c < 0 || ! (charset = emacs_mule_charset[c]))
df7492f9
KH
1747 goto invalid_code;
1748 ONE_MORE_BYTE (c);
9ffd559c 1749 if (c < 0xA0)
065e3595 1750 goto invalid_code;
781d7a48 1751 code = (c & 0x7F) << 8;
df7492f9 1752 ONE_MORE_BYTE (c);
9ffd559c 1753 if (c < 0xA0)
065e3595 1754 goto invalid_code;
df7492f9 1755 code |= c & 0x7F;
065e3595 1756 break;
df7492f9 1757
065e3595
KH
1758 case 1:
1759 code = c;
1760 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1761 ? charset_ascii : charset_eight_bit);
1762 break;
df7492f9 1763
065e3595
KH
1764 default:
1765 abort ();
1766 }
1767 c = DECODE_CHAR (charset, code);
1768 if (c < 0)
1769 goto invalid_code;
df7492f9 1770 }
df7492f9
KH
1771 *nbytes = src - src_base;
1772 *nchars = consumed_chars;
ff0dacd7
KH
1773 if (id)
1774 *id = charset->id;
df7492f9
KH
1775 return c;
1776
1777 no_more_source:
1778 return -2;
1779
1780 invalid_code:
1781 return -1;
1782}
1783
1784
1785/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
1786 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1787 else return 0. */
df7492f9
KH
1788
1789static int
ff0dacd7 1790detect_coding_emacs_mule (coding, detect_info)
df7492f9 1791 struct coding_system *coding;
ff0dacd7 1792 struct coding_detection_info *detect_info;
df7492f9 1793{
065e3595 1794 const unsigned char *src = coding->source, *src_base;
8f924df7 1795 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
1796 int multibytep = coding->src_multibyte;
1797 int consumed_chars = 0;
1798 int c;
1799 int found = 0;
1800
ff0dacd7 1801 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1802 /* A coding system of this category is always ASCII compatible. */
1803 src += coding->head_ascii;
1804
1805 while (1)
1806 {
065e3595 1807 src_base = src;
df7492f9 1808 ONE_MORE_BYTE (c);
065e3595
KH
1809 if (c < 0)
1810 continue;
df7492f9
KH
1811 if (c == 0x80)
1812 {
1813 /* Perhaps the start of composite character. We simple skip
1814 it because analyzing it is too heavy for detecting. But,
1815 at least, we check that the composite character
1816 constitues of more than 4 bytes. */
8f924df7 1817 const unsigned char *src_base;
df7492f9
KH
1818
1819 repeat:
1820 src_base = src;
1821 do
1822 {
1823 ONE_MORE_BYTE (c);
1824 }
1825 while (c >= 0xA0);
1826
1827 if (src - src_base <= 4)
1828 break;
ff0dacd7 1829 found = CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1830 if (c == 0x80)
1831 goto repeat;
b73bfc1c 1832 }
df7492f9
KH
1833
1834 if (c < 0x80)
b73bfc1c 1835 {
df7492f9
KH
1836 if (c < 0x20
1837 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1838 break;
1839 }
1840 else
1841 {
0e219d54 1842 int more_bytes = emacs_mule_bytes[*src_base] - 1;
df7492f9 1843
0e219d54 1844 while (more_bytes > 0)
df7492f9
KH
1845 {
1846 ONE_MORE_BYTE (c);
0e219d54
KH
1847 if (c < 0xA0)
1848 {
1849 src--; /* Unread the last byte. */
1850 break;
1851 }
1852 more_bytes--;
df7492f9 1853 }
0e219d54 1854 if (more_bytes != 0)
df7492f9 1855 break;
ff0dacd7 1856 found = CATEGORY_MASK_EMACS_MULE;
4ed46869
KH
1857 }
1858 }
ff0dacd7 1859 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
df7492f9
KH
1860 return 0;
1861
1862 no_more_source:
065e3595 1863 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 1864 {
ff0dacd7 1865 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
89528eb3
KH
1866 return 0;
1867 }
ff0dacd7
KH
1868 detect_info->found |= found;
1869 return 1;
4ed46869
KH
1870}
1871
b73bfc1c 1872
df7492f9
KH
1873/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1874
1875/* Decode a character represented as a component of composition
1876 sequence of Emacs 20/21 style at SRC. Set C to that character and
1877 update SRC to the head of next character (or an encoded composition
1878 rule). If SRC doesn't points a composition component, set C to -1.
1879 If SRC points an invalid byte sequence, global exit by a return
1880 value 0. */
1881
1882#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1883 if (1) \
1884 { \
1885 int c; \
1886 int nbytes, nchars; \
1887 \
1888 if (src == src_end) \
1889 break; \
ff0dacd7 1890 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
df7492f9
KH
1891 if (c < 0) \
1892 { \
1893 if (c == -2) \
1894 break; \
1895 goto invalid_code; \
1896 } \
1897 *buf++ = c; \
1898 src += nbytes; \
1899 consumed_chars += nchars; \
1900 } \
1901 else
1902
1903
1904/* Decode a composition rule represented as a component of composition
781d7a48
KH
1905 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1906 and increment BUF. If SRC points an invalid byte sequence, set C
1907 to -1. */
df7492f9 1908
781d7a48 1909#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1910 do { \
1911 int c, gref, nref; \
1912 \
781d7a48 1913 if (src >= src_end) \
df7492f9
KH
1914 goto invalid_code; \
1915 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1916 c -= 0x20; \
df7492f9
KH
1917 if (c < 0 || c >= 81) \
1918 goto invalid_code; \
1919 \
1920 gref = c / 9, nref = c % 9; \
1921 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1922 } while (0)
1923
1924
781d7a48
KH
1925/* Decode a composition rule represented as a component of composition
1926 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1927 and increment BUF. If SRC points an invalid byte sequence, set C
1928 to -1. */
1929
1930#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1931 do { \
1932 int gref, nref; \
1933 \
1934 if (src + 1>= src_end) \
1935 goto invalid_code; \
1936 ONE_MORE_BYTE_NO_CHECK (gref); \
1937 gref -= 0x20; \
1938 ONE_MORE_BYTE_NO_CHECK (nref); \
1939 nref -= 0x20; \
1940 if (gref < 0 || gref >= 81 \
1941 || nref < 0 || nref >= 81) \
1942 goto invalid_code; \
1943 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1944 } while (0)
1945
1946
df7492f9 1947#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1948 do { \
df7492f9 1949 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1950 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1951 the byte length of this composition information, CHARS is the \
1952 number of characters composed by this composition. */ \
781d7a48
KH
1953 enum composition_method method = c - 0xF2; \
1954 int *charbuf_base = charbuf; \
df7492f9
KH
1955 int consumed_chars_limit; \
1956 int nbytes, nchars; \
1957 \
1958 ONE_MORE_BYTE (c); \
065e3595
KH
1959 if (c < 0) \
1960 goto invalid_code; \
df7492f9
KH
1961 nbytes = c - 0xA0; \
1962 if (nbytes < 3) \
1963 goto invalid_code; \
1964 ONE_MORE_BYTE (c); \
065e3595
KH
1965 if (c < 0) \
1966 goto invalid_code; \
df7492f9 1967 nchars = c - 0xA0; \
69a80ea3 1968 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9
KH
1969 consumed_chars_limit = consumed_chars_base + nbytes; \
1970 if (method != COMPOSITION_RELATIVE) \
aa72b389 1971 { \
df7492f9
KH
1972 int i = 0; \
1973 while (consumed_chars < consumed_chars_limit) \
aa72b389 1974 { \
df7492f9 1975 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1976 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1977 else \
1978 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1979 i++; \
aa72b389 1980 } \
df7492f9
KH
1981 if (consumed_chars < consumed_chars_limit) \
1982 goto invalid_code; \
781d7a48 1983 charbuf_base[0] -= i; \
aa72b389
KH
1984 } \
1985 } while (0)
93dec019 1986
aa72b389 1987
df7492f9
KH
1988#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1989 do { \
1990 /* Emacs 20 style format for relative composition. */ \
1991 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 1992 enum composition_method method = COMPOSITION_RELATIVE; \
df7492f9
KH
1993 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1994 int *buf = components; \
1995 int i, j; \
1996 \
1997 src = src_base; \
1998 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1999 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2000 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2001 if (i < 2) \
2002 goto invalid_code; \
69a80ea3 2003 ADD_COMPOSITION_DATA (charbuf, i, method); \
df7492f9
KH
2004 for (j = 0; j < i; j++) \
2005 *charbuf++ = components[j]; \
2006 } while (0)
2007
2008
2009#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2010 do { \
2011 /* Emacs 20 style format for rule-base composition. */ \
2012 /* Store multibyte form of characters to be composed. */ \
ff0dacd7 2013 enum composition_method method = COMPOSITION_WITH_RULE; \
df7492f9
KH
2014 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2015 int *buf = components; \
2016 int i, j; \
2017 \
2018 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2019 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2020 { \
781d7a48 2021 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
2022 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2023 } \
2024 if (i < 1 || (buf - components) % 2 == 0) \
2025 goto invalid_code; \
2026 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2027 goto no_more_source; \
69a80ea3 2028 ADD_COMPOSITION_DATA (buf, i, method); \
df7492f9
KH
2029 for (j = 0; j < i; j++) \
2030 *charbuf++ = components[j]; \
2031 for (j = 0; j < i; j += 2) \
2032 *charbuf++ = components[j]; \
2033 } while (0)
2034
aa72b389
KH
2035
2036static void
df7492f9 2037decode_coding_emacs_mule (coding)
aa72b389 2038 struct coding_system *coding;
aa72b389 2039{
8f924df7
KH
2040 const unsigned char *src = coding->source + coding->consumed;
2041 const unsigned char *src_end = coding->source + coding->src_bytes;
2042 const unsigned char *src_base;
69a80ea3
KH
2043 int *charbuf = coding->charbuf + coding->charbuf_used;
2044 int *charbuf_end
2045 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9 2046 int consumed_chars = 0, consumed_chars_base;
df7492f9 2047 int multibytep = coding->src_multibyte;
24a73b0a 2048 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2049 int char_offset = coding->produced_char;
2050 int last_offset = char_offset;
2051 int last_id = charset_ascii;
aa72b389 2052
24a73b0a 2053 CODING_GET_INFO (coding, attrs, charset_list);
aa72b389 2054
aa72b389
KH
2055 while (1)
2056 {
df7492f9
KH
2057 int c;
2058
aa72b389 2059 src_base = src;
df7492f9
KH
2060 consumed_chars_base = consumed_chars;
2061
2062 if (charbuf >= charbuf_end)
2063 break;
aa72b389 2064
df7492f9 2065 ONE_MORE_BYTE (c);
065e3595
KH
2066 if (c < 0)
2067 {
2068 *charbuf++ = -c;
2069 char_offset++;
2070 }
2071 else if (c < 0x80)
aa72b389 2072 {
df7492f9
KH
2073 *charbuf++ = c;
2074 char_offset++;
aa72b389 2075 }
df7492f9
KH
2076 else if (c == 0x80)
2077 {
df7492f9 2078 ONE_MORE_BYTE (c);
065e3595
KH
2079 if (c < 0)
2080 goto invalid_code;
781d7a48
KH
2081 if (c - 0xF2 >= COMPOSITION_RELATIVE
2082 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
2083 DECODE_EMACS_MULE_21_COMPOSITION (c);
2084 else if (c < 0xC0)
2085 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2086 else if (c == 0xFF)
2087 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2088 else
2089 goto invalid_code;
2090 }
2091 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2092 {
2093 int nbytes, nchars;
ff0dacd7
KH
2094 int id;
2095
781d7a48
KH
2096 src = src_base;
2097 consumed_chars = consumed_chars_base;
ff0dacd7 2098 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
df7492f9
KH
2099 if (c < 0)
2100 {
2101 if (c == -2)
2102 break;
2103 goto invalid_code;
2104 }
ff0dacd7
KH
2105 if (last_id != id)
2106 {
2107 if (last_id != charset_ascii)
69a80ea3 2108 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
2109 last_id = id;
2110 last_offset = char_offset;
2111 }
df7492f9 2112 *charbuf++ = c;
781d7a48
KH
2113 src += nbytes;
2114 consumed_chars += nchars;
df7492f9
KH
2115 char_offset++;
2116 }
2117 continue;
2118
2119 invalid_code:
2120 src = src_base;
2121 consumed_chars = consumed_chars_base;
2122 ONE_MORE_BYTE (c);
2123 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 2124 char_offset++;
df7492f9
KH
2125 coding->errors++;
2126 }
2127
2128 no_more_source:
ff0dacd7 2129 if (last_id != charset_ascii)
69a80ea3 2130 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
2131 coding->consumed_char += consumed_chars_base;
2132 coding->consumed = src_base - coding->source;
2133 coding->charbuf_used = charbuf - coding->charbuf;
2134}
2135
2136
2137#define EMACS_MULE_LEADING_CODES(id, codes) \
2138 do { \
2139 if (id < 0xA0) \
2140 codes[0] = id, codes[1] = 0; \
2141 else if (id < 0xE0) \
2142 codes[0] = 0x9A, codes[1] = id; \
2143 else if (id < 0xF0) \
2144 codes[0] = 0x9B, codes[1] = id; \
2145 else if (id < 0xF5) \
2146 codes[0] = 0x9C, codes[1] = id; \
2147 else \
2148 codes[0] = 0x9D, codes[1] = id; \
2149 } while (0);
2150
aa72b389 2151
df7492f9
KH
2152static int
2153encode_coding_emacs_mule (coding)
2154 struct coding_system *coding;
2155{
2156 int multibytep = coding->dst_multibyte;
2157 int *charbuf = coding->charbuf;
2158 int *charbuf_end = charbuf + coding->charbuf_used;
2159 unsigned char *dst = coding->destination + coding->produced;
2160 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2161 int safe_room = 8;
df7492f9 2162 int produced_chars = 0;
24a73b0a 2163 Lisp_Object attrs, charset_list;
df7492f9 2164 int c;
ff0dacd7 2165 int preferred_charset_id = -1;
df7492f9 2166
24a73b0a 2167 CODING_GET_INFO (coding, attrs, charset_list);
eccb6815
KH
2168 if (! EQ (charset_list, Vemacs_mule_charset_list))
2169 {
2170 CODING_ATTR_CHARSET_LIST (attrs)
2171 = charset_list = Vemacs_mule_charset_list;
2172 }
df7492f9
KH
2173
2174 while (charbuf < charbuf_end)
2175 {
2176 ASSURE_DESTINATION (safe_room);
2177 c = *charbuf++;
ff0dacd7
KH
2178
2179 if (c < 0)
2180 {
2181 /* Handle an annotation. */
2182 switch (*charbuf)
2183 {
2184 case CODING_ANNOTATE_COMPOSITION_MASK:
2185 /* Not yet implemented. */
2186 break;
2187 case CODING_ANNOTATE_CHARSET_MASK:
2188 preferred_charset_id = charbuf[3];
2189 if (preferred_charset_id >= 0
2190 && NILP (Fmemq (make_number (preferred_charset_id),
2191 charset_list)))
2192 preferred_charset_id = -1;
2193 break;
2194 default:
2195 abort ();
2196 }
2197 charbuf += -c - 1;
2198 continue;
2199 }
2200
df7492f9
KH
2201 if (ASCII_CHAR_P (c))
2202 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2203 else if (CHAR_BYTE8_P (c))
2204 {
2205 c = CHAR_TO_BYTE8 (c);
2206 EMIT_ONE_BYTE (c);
2207 }
df7492f9 2208 else
aa72b389 2209 {
df7492f9
KH
2210 struct charset *charset;
2211 unsigned code;
2212 int dimension;
2213 int emacs_mule_id;
2214 unsigned char leading_codes[2];
2215
ff0dacd7
KH
2216 if (preferred_charset_id >= 0)
2217 {
2218 charset = CHARSET_FROM_ID (preferred_charset_id);
2219 if (! CHAR_CHARSET_P (c, charset))
2220 charset = char_charset (c, charset_list, NULL);
2221 }
2222 else
2223 charset = char_charset (c, charset_list, &code);
df7492f9
KH
2224 if (! charset)
2225 {
2226 c = coding->default_char;
2227 if (ASCII_CHAR_P (c))
2228 {
2229 EMIT_ONE_ASCII_BYTE (c);
2230 continue;
2231 }
2232 charset = char_charset (c, charset_list, &code);
2233 }
2234 dimension = CHARSET_DIMENSION (charset);
2235 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2236 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2237 EMIT_ONE_BYTE (leading_codes[0]);
2238 if (leading_codes[1])
2239 EMIT_ONE_BYTE (leading_codes[1]);
2240 if (dimension == 1)
1fa663f9 2241 EMIT_ONE_BYTE (code | 0x80);
aa72b389 2242 else
df7492f9 2243 {
1fa663f9 2244 code |= 0x8080;
df7492f9
KH
2245 EMIT_ONE_BYTE (code >> 8);
2246 EMIT_ONE_BYTE (code & 0xFF);
2247 }
aa72b389 2248 }
aa72b389 2249 }
065e3595 2250 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
2251 coding->produced_char += produced_chars;
2252 coding->produced = dst - coding->destination;
2253 return 0;
aa72b389 2254}
b73bfc1c 2255
4ed46869 2256\f
df7492f9 2257/*** 7. ISO2022 handlers ***/
4ed46869
KH
2258
2259/* The following note describes the coding system ISO2022 briefly.
39787efd 2260 Since the intention of this note is to help understand the
5a936b46 2261 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2262 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46 2263 original document of ISO2022. This is equivalent to the standard
cfb43547 2264 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2265
2266 ISO2022 provides many mechanisms to encode several character sets
cfb43547 2267 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2268 is encoded using bytes less than 128. This may make the encoded
2269 text a little bit longer, but the text passes more easily through
cfb43547 2270 several types of gateway, some of which strip off the MSB (Most
8ca3766a 2271 Significant Bit).
b73bfc1c 2272
cfb43547
DL
2273 There are two kinds of character sets: control character sets and
2274 graphic character sets. The former contain control characters such
4ed46869 2275 as `newline' and `escape' to provide control functions (control
39787efd 2276 functions are also provided by escape sequences). The latter
cfb43547 2277 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2278 two control character sets and many graphic character sets.
2279
2280 Graphic character sets are classified into one of the following
39787efd
KH
2281 four classes, according to the number of bytes (DIMENSION) and
2282 number of characters in one dimension (CHARS) of the set:
2283 - DIMENSION1_CHARS94
2284 - DIMENSION1_CHARS96
2285 - DIMENSION2_CHARS94
2286 - DIMENSION2_CHARS96
2287
2288 In addition, each character set is assigned an identification tag,
cfb43547 2289 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2290 hereafter). The <F> of each character set is decided by ECMA(*)
2291 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2292 (0x30..0x3F are for private use only).
4ed46869
KH
2293
2294 Note (*): ECMA = European Computer Manufacturers Association
2295
cfb43547 2296 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2297 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2298 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2299 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2300 o DIMENSION2_CHARS96 -- none for the moment
2301
39787efd 2302 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2303 C0 [0x00..0x1F] -- control character plane 0
2304 GL [0x20..0x7F] -- graphic character plane 0
2305 C1 [0x80..0x9F] -- control character plane 1
2306 GR [0xA0..0xFF] -- graphic character plane 1
2307
2308 A control character set is directly designated and invoked to C0 or
39787efd
KH
2309 C1 by an escape sequence. The most common case is that:
2310 - ISO646's control character set is designated/invoked to C0, and
2311 - ISO6429's control character set is designated/invoked to C1,
2312 and usually these designations/invocations are omitted in encoded
2313 text. In a 7-bit environment, only C0 can be used, and a control
2314 character for C1 is encoded by an appropriate escape sequence to
2315 fit into the environment. All control characters for C1 are
2316 defined to have corresponding escape sequences.
4ed46869
KH
2317
2318 A graphic character set is at first designated to one of four
2319 graphic registers (G0 through G3), then these graphic registers are
2320 invoked to GL or GR. These designations and invocations can be
2321 done independently. The most common case is that G0 is invoked to
39787efd
KH
2322 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2323 these invocations and designations are omitted in encoded text.
2324 In a 7-bit environment, only GL can be used.
4ed46869 2325
39787efd
KH
2326 When a graphic character set of CHARS94 is invoked to GL, codes
2327 0x20 and 0x7F of the GL area work as control characters SPACE and
2328 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2329 be used.
4ed46869
KH
2330
2331 There are two ways of invocation: locking-shift and single-shift.
2332 With locking-shift, the invocation lasts until the next different
39787efd
KH
2333 invocation, whereas with single-shift, the invocation affects the
2334 following character only and doesn't affect the locking-shift
2335 state. Invocations are done by the following control characters or
2336 escape sequences:
4ed46869
KH
2337
2338 ----------------------------------------------------------------------
39787efd 2339 abbrev function cntrl escape seq description
4ed46869 2340 ----------------------------------------------------------------------
39787efd
KH
2341 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2342 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2343 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2344 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2345 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2346 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2347 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2348 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2349 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2350 ----------------------------------------------------------------------
39787efd
KH
2351 (*) These are not used by any known coding system.
2352
2353 Control characters for these functions are defined by macros
2354 ISO_CODE_XXX in `coding.h'.
4ed46869 2355
39787efd 2356 Designations are done by the following escape sequences:
4ed46869
KH
2357 ----------------------------------------------------------------------
2358 escape sequence description
2359 ----------------------------------------------------------------------
2360 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2361 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2362 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2363 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2364 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2365 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2366 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2367 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2368 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2369 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2370 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2371 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2372 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2373 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2374 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2375 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2376 ----------------------------------------------------------------------
2377
2378 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2379 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2380
2381 Note (*): Although these designations are not allowed in ISO2022,
2382 Emacs accepts them on decoding, and produces them on encoding
39787efd 2383 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2384 7-bit environment, non-locking-shift, and non-single-shift.
2385
2386 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2387 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2388
cfb43547 2389 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2390 same multilingual text in ISO2022. Actually, there exist many
2391 coding systems such as Compound Text (used in X11's inter client
8ca3766a
DL
2392 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2393 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2394 localized platforms), and all of these are variants of ISO2022.
2395
2396 In addition to the above, Emacs handles two more kinds of escape
2397 sequences: ISO6429's direction specification and Emacs' private
2398 sequence for specifying character composition.
2399
39787efd 2400 ISO6429's direction specification takes the following form:
4ed46869
KH
2401 o CSI ']' -- end of the current direction
2402 o CSI '0' ']' -- end of the current direction
2403 o CSI '1' ']' -- start of left-to-right text
2404 o CSI '2' ']' -- start of right-to-left text
2405 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2406 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2407
2408 Character composition specification takes the following form:
ec6d2bb8
KH
2409 o ESC '0' -- start relative composition
2410 o ESC '1' -- end composition
2411 o ESC '2' -- start rule-base composition (*)
2412 o ESC '3' -- start relative composition with alternate chars (**)
2413 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2414 Since these are not standard escape sequences of any ISO standard,
cfb43547 2415 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2416
5a936b46
DL
2417 (*) This form is used only in Emacs 20.7 and older versions,
2418 but newer versions can safely decode it.
cfb43547 2419 (**) This form is used only in Emacs 21.1 and newer versions,
5a936b46 2420 and older versions can't decode it.
ec6d2bb8 2421
cfb43547 2422 Here's a list of example usages of these composition escape
b73bfc1c 2423 sequences (categorized by `enum composition_method').
ec6d2bb8 2424
b73bfc1c 2425 COMPOSITION_RELATIVE:
ec6d2bb8 2426 ESC 0 CHAR [ CHAR ] ESC 1
8ca3766a 2427 COMPOSITION_WITH_RULE:
ec6d2bb8 2428 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2429 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2430 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2431 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2432 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2433
2434enum iso_code_class_type iso_code_class[256];
2435
df7492f9
KH
2436#define SAFE_CHARSET_P(coding, id) \
2437 ((id) <= (coding)->max_charset_id \
2438 && (coding)->safe_charsets[id] >= 0)
2439
2440
2441#define SHIFT_OUT_OK(category) \
2442 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2443
2444static void
f0064e1f
DL
2445setup_iso_safe_charsets (attrs)
2446 Lisp_Object attrs;
df7492f9
KH
2447{
2448 Lisp_Object charset_list, safe_charsets;
2449 Lisp_Object request;
2450 Lisp_Object reg_usage;
2451 Lisp_Object tail;
2452 int reg94, reg96;
2453 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2454 int max_charset_id;
2455
2456 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2457 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2458 && ! EQ (charset_list, Viso_2022_charset_list))
2459 {
2460 CODING_ATTR_CHARSET_LIST (attrs)
2461 = charset_list = Viso_2022_charset_list;
2462 ASET (attrs, coding_attr_safe_charsets, Qnil);
2463 }
2464
2465 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2466 return;
2467
2468 max_charset_id = 0;
2469 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2470 {
2471 int id = XINT (XCAR (tail));
2472 if (max_charset_id < id)
2473 max_charset_id = id;
2474 }
d46c5b12 2475
df7492f9
KH
2476 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2477 make_number (255));
2478 request = AREF (attrs, coding_attr_iso_request);
2479 reg_usage = AREF (attrs, coding_attr_iso_usage);
2480 reg94 = XINT (XCAR (reg_usage));
2481 reg96 = XINT (XCDR (reg_usage));
2482
2483 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2484 {
2485 Lisp_Object id;
2486 Lisp_Object reg;
2487 struct charset *charset;
2488
2489 id = XCAR (tail);
2490 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2491 reg = Fcdr (Fassq (id, request));
df7492f9 2492 if (! NILP (reg))
8f924df7 2493 SSET (safe_charsets, XINT (id), XINT (reg));
df7492f9
KH
2494 else if (charset->iso_chars_96)
2495 {
2496 if (reg96 < 4)
8f924df7 2497 SSET (safe_charsets, XINT (id), reg96);
df7492f9
KH
2498 }
2499 else
2500 {
2501 if (reg94 < 4)
8f924df7 2502 SSET (safe_charsets, XINT (id), reg94);
df7492f9
KH
2503 }
2504 }
2505 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2506}
d46c5b12 2507
b6871cc7 2508
4ed46869 2509/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
ff0dacd7
KH
2510 Check if a text is encoded in one of ISO-2022 based codig systems.
2511 If it is, return 1, else return 0. */
4ed46869 2512
0a28aafb 2513static int
ff0dacd7 2514detect_coding_iso_2022 (coding, detect_info)
df7492f9 2515 struct coding_system *coding;
ff0dacd7 2516 struct coding_detection_info *detect_info;
4ed46869 2517{
8f924df7
KH
2518 const unsigned char *src = coding->source, *src_base = src;
2519 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9 2520 int multibytep = coding->src_multibyte;
ff0dacd7 2521 int single_shifting = 0;
df7492f9
KH
2522 int id;
2523 int c, c1;
2524 int consumed_chars = 0;
2525 int i;
ff0dacd7
KH
2526 int rejected = 0;
2527 int found = 0;
2528
2529 detect_info->checked |= CATEGORY_MASK_ISO;
df7492f9
KH
2530
2531 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2532 {
2533 struct coding_system *this = &(coding_categories[i]);
2534 Lisp_Object attrs, val;
2535
2536 attrs = CODING_ID_ATTRS (this->id);
2537 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2538 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2539 setup_iso_safe_charsets (attrs);
2540 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
2541 this->max_charset_id = SCHARS (val) - 1;
2542 this->safe_charsets = (char *) SDATA (val);
df7492f9
KH
2543 }
2544
2545 /* A coding system of this category is always ASCII compatible. */
2546 src += coding->head_ascii;
3f003981 2547
ff0dacd7 2548 while (rejected != CATEGORY_MASK_ISO)
4ed46869 2549 {
065e3595 2550 src_base = src;
df7492f9 2551 ONE_MORE_BYTE (c);
4ed46869
KH
2552 switch (c)
2553 {
2554 case ISO_CODE_ESC:
74383408
KH
2555 if (inhibit_iso_escape_detection)
2556 break;
f46869e4 2557 single_shifting = 0;
df7492f9 2558 ONE_MORE_BYTE (c);
d46c5b12 2559 if (c >= '(' && c <= '/')
4ed46869 2560 {
bf9cdd4e 2561 /* Designation sequence for a charset of dimension 1. */
df7492f9 2562 ONE_MORE_BYTE (c1);
d46c5b12 2563 if (c1 < ' ' || c1 >= 0x80
df7492f9 2564 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2565 /* Invalid designation sequence. Just ignore. */
2566 break;
bf9cdd4e
KH
2567 }
2568 else if (c == '$')
2569 {
2570 /* Designation sequence for a charset of dimension 2. */
df7492f9 2571 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2572 if (c >= '@' && c <= 'B')
2573 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
ff0dacd7 2574 id = iso_charset_table[1][0][c];
bf9cdd4e 2575 else if (c >= '(' && c <= '/')
bcf26d6a 2576 {
df7492f9 2577 ONE_MORE_BYTE (c1);
d46c5b12 2578 if (c1 < ' ' || c1 >= 0x80
df7492f9 2579 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2580 /* Invalid designation sequence. Just ignore. */
2581 break;
bcf26d6a 2582 }
bf9cdd4e 2583 else
ff0dacd7 2584 /* Invalid designation sequence. Just ignore it. */
d46c5b12
KH
2585 break;
2586 }
ae9ff118 2587 else if (c == 'N' || c == 'O')
d46c5b12 2588 {
ae9ff118 2589 /* ESC <Fe> for SS2 or SS3. */
ff0dacd7
KH
2590 single_shifting = 1;
2591 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
d46c5b12 2592 break;
4ed46869 2593 }
ec6d2bb8
KH
2594 else if (c >= '0' && c <= '4')
2595 {
2596 /* ESC <Fp> for start/end composition. */
ff0dacd7 2597 found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2598 break;
2599 }
bf9cdd4e 2600 else
df7492f9 2601 {
ff0dacd7 2602 /* Invalid escape sequence. Just ignore it. */
df7492f9
KH
2603 break;
2604 }
d46c5b12
KH
2605
2606 /* We found a valid designation sequence for CHARSET. */
ff0dacd7 2607 rejected |= CATEGORY_MASK_ISO_8BIT;
df7492f9
KH
2608 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2609 id))
ff0dacd7 2610 found |= CATEGORY_MASK_ISO_7;
d46c5b12 2611 else
ff0dacd7 2612 rejected |= CATEGORY_MASK_ISO_7;
df7492f9
KH
2613 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2614 id))
ff0dacd7 2615 found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2616 else
ff0dacd7 2617 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
df7492f9
KH
2618 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2619 id))
ff0dacd7 2620 found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2621 else
ff0dacd7 2622 rejected |= CATEGORY_MASK_ISO_7_ELSE;
df7492f9
KH
2623 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2624 id))
ff0dacd7 2625 found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2626 else
ff0dacd7 2627 rejected |= CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2628 break;
2629
4ed46869 2630 case ISO_CODE_SO:
d46c5b12 2631 case ISO_CODE_SI:
ff0dacd7 2632 /* Locking shift out/in. */
74383408
KH
2633 if (inhibit_iso_escape_detection)
2634 break;
f46869e4 2635 single_shifting = 0;
ff0dacd7
KH
2636 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2637 found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2638 break;
2639
4ed46869 2640 case ISO_CODE_CSI:
ff0dacd7 2641 /* Control sequence introducer. */
f46869e4 2642 single_shifting = 0;
ff0dacd7
KH
2643 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2644 found |= CATEGORY_MASK_ISO_8_ELSE;
2645 goto check_extra_latin;
2646
4ed46869
KH
2647 case ISO_CODE_SS2:
2648 case ISO_CODE_SS3:
ff0dacd7
KH
2649 /* Single shift. */
2650 if (inhibit_iso_escape_detection)
2651 break;
75e2a253 2652 single_shifting = 0;
ff0dacd7
KH
2653 rejected |= CATEGORY_MASK_ISO_7BIT;
2654 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2655 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253 2656 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
ff0dacd7
KH
2657 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2658 & CODING_ISO_FLAG_SINGLE_SHIFT)
75e2a253
KH
2659 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2660 if (single_shifting)
2661 break;
ff0dacd7 2662 goto check_extra_latin;
4ed46869
KH
2663
2664 default:
065e3595
KH
2665 if (c < 0)
2666 continue;
4ed46869 2667 if (c < 0x80)
f46869e4
KH
2668 {
2669 single_shifting = 0;
2670 break;
2671 }
ff0dacd7 2672 if (c >= 0xA0)
c4825358 2673 {
ff0dacd7
KH
2674 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2675 found |= CATEGORY_MASK_ISO_8_1;
f46869e4 2676 /* Check the length of succeeding codes of the range
ff0dacd7
KH
2677 0xA0..0FF. If the byte length is even, we include
2678 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2679 only when we are not single shifting. */
2680 if (! single_shifting
2681 && ! (rejected & CATEGORY_MASK_ISO_8_2))
f46869e4 2682 {
e17de821 2683 int i = 1;
b73bfc1c
KH
2684 while (src < src_end)
2685 {
df7492f9 2686 ONE_MORE_BYTE (c);
b73bfc1c
KH
2687 if (c < 0xA0)
2688 break;
2689 i++;
2690 }
2691
2692 if (i & 1 && src < src_end)
ff0dacd7 2693 rejected |= CATEGORY_MASK_ISO_8_2;
f46869e4 2694 else
ff0dacd7 2695 found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2696 }
ff0dacd7 2697 break;
4ed46869 2698 }
ff0dacd7
KH
2699 check_extra_latin:
2700 single_shifting = 0;
2701 if (! VECTORP (Vlatin_extra_code_table)
2702 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2703 {
2704 rejected = CATEGORY_MASK_ISO;
2705 break;
2706 }
2707 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2708 & CODING_ISO_FLAG_LATIN_EXTRA)
2709 found |= CATEGORY_MASK_ISO_8_1;
2710 else
2711 rejected |= CATEGORY_MASK_ISO_8_1;
75e2a253 2712 rejected |= CATEGORY_MASK_ISO_8_2;
4ed46869
KH
2713 }
2714 }
ff0dacd7
KH
2715 detect_info->rejected |= CATEGORY_MASK_ISO;
2716 return 0;
4ed46869 2717
df7492f9 2718 no_more_source:
ff0dacd7
KH
2719 detect_info->rejected |= rejected;
2720 detect_info->found |= (found & ~rejected);
df7492f9 2721 return 1;
4ed46869 2722}
ec6d2bb8 2723
4ed46869 2724
134b9549
KH
2725/* Set designation state into CODING. Set CHARS_96 to -1 if the
2726 escape sequence should be kept. */
df7492f9
KH
2727#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2728 do { \
2729 int id, prev; \
2730 \
2731 if (final < '0' || final >= 128 \
2732 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2733 || !SAFE_CHARSET_P (coding, id)) \
2734 { \
2735 CODING_ISO_DESIGNATION (coding, reg) = -2; \
134b9549
KH
2736 chars_96 = -1; \
2737 break; \
df7492f9
KH
2738 } \
2739 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2740 if (id == charset_jisx0201_roman) \
2741 { \
2742 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2743 id = charset_ascii; \
2744 } \
2745 else if (id == charset_jisx0208_1978) \
2746 { \
2747 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2748 id = charset_jisx0208; \
2749 } \
df7492f9
KH
2750 CODING_ISO_DESIGNATION (coding, reg) = id; \
2751 /* If there was an invalid designation to REG previously, and this \
2752 designation is ASCII to REG, we should keep this designation \
2753 sequence. */ \
2754 if (prev == -2 && id == charset_ascii) \
134b9549 2755 chars_96 = -1; \
4ed46869
KH
2756 } while (0)
2757
d46c5b12 2758
df7492f9
KH
2759#define MAYBE_FINISH_COMPOSITION() \
2760 do { \
2761 int i; \
2762 if (composition_state == COMPOSING_NO) \
2763 break; \
2764 /* It is assured that we have enough room for producing \
2765 characters stored in the table `components'. */ \
2766 if (charbuf + component_idx > charbuf_end) \
2767 goto no_more_source; \
2768 composition_state = COMPOSING_NO; \
2769 if (method == COMPOSITION_RELATIVE \
2770 || method == COMPOSITION_WITH_ALTCHARS) \
2771 { \
2772 for (i = 0; i < component_idx; i++) \
2773 *charbuf++ = components[i]; \
2774 char_offset += component_idx; \
2775 } \
2776 else \
2777 { \
2778 for (i = 0; i < component_idx; i += 2) \
2779 *charbuf++ = components[i]; \
2780 char_offset += (component_idx / 2) + 1; \
2781 } \
2782 } while (0)
2783
d46c5b12 2784
aa72b389
KH
2785/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2786 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2787 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2788 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2789 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2790 */
ec6d2bb8 2791
df7492f9
KH
2792#define DECODE_COMPOSITION_START(c1) \
2793 do { \
2794 if (c1 == '0' \
781d7a48 2795 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2796 { \
2797 component_len = component_idx; \
2798 composition_state = COMPOSING_CHAR; \
2799 } \
2800 else \
2801 { \
8f924df7 2802 const unsigned char *p; \
df7492f9
KH
2803 \
2804 MAYBE_FINISH_COMPOSITION (); \
2805 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2806 goto no_more_source; \
2807 for (p = src; p < src_end - 1; p++) \
2808 if (*p == ISO_CODE_ESC && p[1] == '1') \
2809 break; \
2810 if (p == src_end - 1) \
2811 { \
2812 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2813 goto invalid_code; \
2814 goto no_more_source; \
2815 } \
2816 \
2817 /* This is surely the start of a composition. */ \
2818 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2819 : c1 == '2' ? COMPOSITION_WITH_RULE \
2820 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2821 : COMPOSITION_WITH_RULE_ALTCHARS); \
2822 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2823 : COMPOSING_COMPONENT_CHAR); \
2824 component_idx = component_len = 0; \
2825 } \
ec6d2bb8
KH
2826 } while (0)
2827
ec6d2bb8 2828
df7492f9
KH
2829/* Handle compositoin end sequence ESC 1. */
2830
2831#define DECODE_COMPOSITION_END() \
ec6d2bb8 2832 do { \
df7492f9
KH
2833 int nchars = (component_len > 0 ? component_idx - component_len \
2834 : method == COMPOSITION_RELATIVE ? component_idx \
2835 : (component_idx + 1) / 2); \
2836 int i; \
2837 int *saved_charbuf = charbuf; \
2838 \
69a80ea3 2839 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
df7492f9 2840 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2841 { \
df7492f9
KH
2842 if (component_len == 0) \
2843 for (i = 0; i < component_idx; i++) \
2844 *charbuf++ = components[i]; \
2845 else \
2846 for (i = 0; i < component_len; i++) \
2847 *charbuf++ = components[i]; \
2848 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2849 } \
df7492f9
KH
2850 if (method == COMPOSITION_WITH_RULE) \
2851 for (i = 0; i < component_idx; i += 2, char_offset++) \
2852 *charbuf++ = components[i]; \
ec6d2bb8 2853 else \
df7492f9
KH
2854 for (i = component_len; i < component_idx; i++, char_offset++) \
2855 *charbuf++ = components[i]; \
2856 coding->annotated = 1; \
2857 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2858 } while (0)
2859
df7492f9 2860
ec6d2bb8
KH
2861/* Decode a composition rule from the byte C1 (and maybe one more byte
2862 from SRC) and store one encoded composition rule in
2863 coding->cmp_data. */
2864
2865#define DECODE_COMPOSITION_RULE(c1) \
2866 do { \
ec6d2bb8
KH
2867 (c1) -= 32; \
2868 if (c1 < 81) /* old format (before ver.21) */ \
2869 { \
2870 int gref = (c1) / 9; \
2871 int nref = (c1) % 9; \
2872 if (gref == 4) gref = 10; \
2873 if (nref == 4) nref = 10; \
df7492f9 2874 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2875 } \
b73bfc1c 2876 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2877 { \
2878 ONE_MORE_BYTE (c2); \
df7492f9 2879 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2880 } \
df7492f9
KH
2881 else \
2882 c1 = 0; \
ec6d2bb8 2883 } while (0)
88993dfd 2884
d46c5b12 2885
4ed46869
KH
2886/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2887
b73bfc1c 2888static void
df7492f9 2889decode_coding_iso_2022 (coding)
4ed46869 2890 struct coding_system *coding;
4ed46869 2891{
8f924df7
KH
2892 const unsigned char *src = coding->source + coding->consumed;
2893 const unsigned char *src_end = coding->source + coding->src_bytes;
2894 const unsigned char *src_base;
69a80ea3 2895 int *charbuf = coding->charbuf + coding->charbuf_used;
ff0dacd7 2896 int *charbuf_end
69a80ea3 2897 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
df7492f9 2898 int consumed_chars = 0, consumed_chars_base;
df7492f9 2899 int multibytep = coding->src_multibyte;
4ed46869 2900 /* Charsets invoked to graphic plane 0 and 1 respectively. */
df7492f9
KH
2901 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2902 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
134b9549 2903 int charset_id_2, charset_id_3;
df7492f9
KH
2904 struct charset *charset;
2905 int c;
2906 /* For handling composition sequence. */
2907#define COMPOSING_NO 0
2908#define COMPOSING_CHAR 1
2909#define COMPOSING_RULE 2
2910#define COMPOSING_COMPONENT_CHAR 3
2911#define COMPOSING_COMPONENT_RULE 4
2912
2913 int composition_state = COMPOSING_NO;
2914 enum composition_method method;
2915 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2916 int component_idx;
2917 int component_len;
24a73b0a 2918 Lisp_Object attrs, charset_list;
ff0dacd7
KH
2919 int char_offset = coding->produced_char;
2920 int last_offset = char_offset;
2921 int last_id = charset_ascii;
df7492f9 2922
24a73b0a 2923 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 2924 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2925
2926 while (1)
4ed46869 2927 {
463f5630 2928 int c1, c2;
b73bfc1c
KH
2929
2930 src_base = src;
df7492f9
KH
2931 consumed_chars_base = consumed_chars;
2932
2933 if (charbuf >= charbuf_end)
2934 break;
2935
b73bfc1c 2936 ONE_MORE_BYTE (c1);
065e3595
KH
2937 if (c1 < 0)
2938 goto invalid_code;
4ed46869 2939
98725083 2940 /* We produce at most one character. */
4ed46869
KH
2941 switch (iso_code_class [c1])
2942 {
2943 case ISO_0x20_or_0x7F:
df7492f9 2944 if (composition_state != COMPOSING_NO)
ec6d2bb8 2945 {
df7492f9
KH
2946 if (composition_state == COMPOSING_RULE
2947 || composition_state == COMPOSING_COMPONENT_RULE)
2948 {
2949 DECODE_COMPOSITION_RULE (c1);
2950 components[component_idx++] = c1;
2951 composition_state--;
2952 continue;
2953 }
4ed46869 2954 }
df7492f9
KH
2955 if (charset_id_0 < 0
2956 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2957 /* This is SPACE or DEL. */
2958 charset = CHARSET_FROM_ID (charset_ascii);
2959 else
2960 charset = CHARSET_FROM_ID (charset_id_0);
2961 break;
4ed46869
KH
2962
2963 case ISO_graphic_plane_0:
781d7a48 2964 if (composition_state != COMPOSING_NO)
b73bfc1c 2965 {
781d7a48
KH
2966 if (composition_state == COMPOSING_RULE
2967 || composition_state == COMPOSING_COMPONENT_RULE)
2968 {
2969 DECODE_COMPOSITION_RULE (c1);
2970 components[component_idx++] = c1;
2971 composition_state--;
2972 continue;
2973 }
b73bfc1c 2974 }
134b9549
KH
2975 if (charset_id_0 < 0)
2976 charset = CHARSET_FROM_ID (charset_ascii);
2977 else
2978 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2979 break;
2980
2981 case ISO_0xA0_or_0xFF:
df7492f9
KH
2982 if (charset_id_1 < 0
2983 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2984 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2985 goto invalid_code;
4ed46869
KH
2986 /* This is a graphic character, we fall down ... */
2987
2988 case ISO_graphic_plane_1:
df7492f9
KH
2989 if (charset_id_1 < 0)
2990 goto invalid_code;
2991 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2992 break;
2993
df7492f9
KH
2994 case ISO_control_0:
2995 MAYBE_FINISH_COMPOSITION ();
2996 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2997 break;
2998
df7492f9
KH
2999 case ISO_control_1:
3000 MAYBE_FINISH_COMPOSITION ();
3001 goto invalid_code;
3002
4ed46869 3003 case ISO_shift_out:
df7492f9
KH
3004 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3005 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3006 goto invalid_code;
3007 CODING_ISO_INVOCATION (coding, 0) = 1;
3008 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3009 continue;
4ed46869
KH
3010
3011 case ISO_shift_in:
df7492f9
KH
3012 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3013 goto invalid_code;
3014 CODING_ISO_INVOCATION (coding, 0) = 0;
3015 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3016 continue;
4ed46869
KH
3017
3018 case ISO_single_shift_2_7:
3019 case ISO_single_shift_2:
df7492f9
KH
3020 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3021 goto invalid_code;
4ed46869
KH
3022 /* SS2 is handled as an escape sequence of ESC 'N' */
3023 c1 = 'N';
3024 goto label_escape_sequence;
3025
3026 case ISO_single_shift_3:
df7492f9
KH
3027 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3028 goto invalid_code;
4ed46869
KH
3029 /* SS2 is handled as an escape sequence of ESC 'O' */
3030 c1 = 'O';
3031 goto label_escape_sequence;
3032
3033 case ISO_control_sequence_introducer:
3034 /* CSI is handled as an escape sequence of ESC '[' ... */
3035 c1 = '[';
3036 goto label_escape_sequence;
3037
3038 case ISO_escape:
3039 ONE_MORE_BYTE (c1);
3040 label_escape_sequence:
df7492f9 3041 /* Escape sequences handled here are invocation,
4ed46869
KH
3042 designation, direction specification, and character
3043 composition specification. */
3044 switch (c1)
3045 {
3046 case '&': /* revision of following character set */
3047 ONE_MORE_BYTE (c1);
3048 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 3049 goto invalid_code;
4ed46869
KH
3050 ONE_MORE_BYTE (c1);
3051 if (c1 != ISO_CODE_ESC)
df7492f9 3052 goto invalid_code;
4ed46869
KH
3053 ONE_MORE_BYTE (c1);
3054 goto label_escape_sequence;
3055
3056 case '$': /* designation of 2-byte character set */
df7492f9
KH
3057 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3058 goto invalid_code;
134b9549
KH
3059 {
3060 int reg, chars96;
3061
3062 ONE_MORE_BYTE (c1);
3063 if (c1 >= '@' && c1 <= 'B')
3064 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 3065 or JISX0208.1980 */
134b9549
KH
3066 reg = 0, chars96 = 0;
3067 }
3068 else if (c1 >= 0x28 && c1 <= 0x2B)
3069 { /* designation of DIMENSION2_CHARS94 character set */
3070 reg = c1 - 0x28, chars96 = 0;
3071 ONE_MORE_BYTE (c1);
3072 }
3073 else if (c1 >= 0x2C && c1 <= 0x2F)
3074 { /* designation of DIMENSION2_CHARS96 character set */
3075 reg = c1 - 0x2C, chars96 = 1;
3076 ONE_MORE_BYTE (c1);
3077 }
3078 else
3079 goto invalid_code;
3080 DECODE_DESIGNATION (reg, 2, chars96, c1);
3081 /* We must update these variables now. */
3082 if (reg == 0)
3083 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3084 else if (reg == 1)
3085 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3086 if (chars96 < 0)
3087 goto invalid_code;
3088 }
b73bfc1c 3089 continue;
4ed46869
KH
3090
3091 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
3092 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3093 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3094 goto invalid_code;
3095 CODING_ISO_INVOCATION (coding, 0) = 2;
3096 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3097 continue;
4ed46869
KH
3098
3099 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
3100 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3101 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3102 goto invalid_code;
3103 CODING_ISO_INVOCATION (coding, 0) = 3;
3104 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 3105 continue;
4ed46869
KH
3106
3107 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
3108 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3109 || CODING_ISO_DESIGNATION (coding, 2) < 0)
3110 goto invalid_code;
134b9549
KH
3111 charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3112 if (charset_id_2 < 0)
3113 charset = CHARSET_FROM_ID (charset_ascii);
3114 else
3115 charset = CHARSET_FROM_ID (charset_id_2);
b73bfc1c 3116 ONE_MORE_BYTE (c1);
e7046a18 3117 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3118 goto invalid_code;
4ed46869
KH
3119 break;
3120
3121 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
3122 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3123 || CODING_ISO_DESIGNATION (coding, 3) < 0)
3124 goto invalid_code;
134b9549
KH
3125 charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3126 if (charset_id_3 < 0)
3127 charset = CHARSET_FROM_ID (charset_ascii);
3128 else
3129 charset = CHARSET_FROM_ID (charset_id_3);
b73bfc1c 3130 ONE_MORE_BYTE (c1);
e7046a18 3131 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 3132 goto invalid_code;
4ed46869
KH
3133 break;
3134
ec6d2bb8 3135 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
3136 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3137 goto invalid_code;
ec6d2bb8 3138 DECODE_COMPOSITION_START (c1);
b73bfc1c 3139 continue;
4ed46869 3140
ec6d2bb8 3141 case '1': /* end composition */
df7492f9
KH
3142 if (composition_state == COMPOSING_NO)
3143 goto invalid_code;
3144 DECODE_COMPOSITION_END ();
b73bfc1c 3145 continue;
4ed46869
KH
3146
3147 case '[': /* specification of direction */
df7492f9
KH
3148 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3149 goto invalid_code;
4ed46869 3150 /* For the moment, nested direction is not supported.
d46c5b12 3151 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 3152 left-to-right, and nozero means right-to-left. */
4ed46869
KH
3153 ONE_MORE_BYTE (c1);
3154 switch (c1)
3155 {
3156 case ']': /* end of the current direction */
d46c5b12 3157 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
3158
3159 case '0': /* end of the current direction */
3160 case '1': /* start of left-to-right direction */
3161 ONE_MORE_BYTE (c1);
3162 if (c1 == ']')
d46c5b12 3163 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 3164 else
df7492f9 3165 goto invalid_code;
4ed46869
KH
3166 break;
3167
3168 case '2': /* start of right-to-left direction */
3169 ONE_MORE_BYTE (c1);
3170 if (c1 == ']')
d46c5b12 3171 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3172 else
df7492f9 3173 goto invalid_code;
4ed46869
KH
3174 break;
3175
3176 default:
df7492f9 3177 goto invalid_code;
4ed46869 3178 }
b73bfc1c 3179 continue;
4ed46869 3180
103e0180 3181 case '%':
103e0180
KH
3182 ONE_MORE_BYTE (c1);
3183 if (c1 == '/')
3184 {
3185 /* CTEXT extended segment:
3186 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3187 We keep these bytes as is for the moment.
3188 They may be decoded by post-read-conversion. */
3189 int dim, M, L;
4776e638 3190 int size;
8f924df7 3191
103e0180
KH
3192 ONE_MORE_BYTE (dim);
3193 ONE_MORE_BYTE (M);
3194 ONE_MORE_BYTE (L);
3195 size = ((M - 128) * 128) + (L - 128);
4776e638
KH
3196 if (charbuf + 8 + size > charbuf_end)
3197 goto break_loop;
3198 *charbuf++ = ISO_CODE_ESC;
3199 *charbuf++ = '%';
3200 *charbuf++ = '/';
3201 *charbuf++ = dim;
3202 *charbuf++ = BYTE8_TO_CHAR (M);
3203 *charbuf++ = BYTE8_TO_CHAR (L);
103e0180
KH
3204 while (size-- > 0)
3205 {
3206 ONE_MORE_BYTE (c1);
4776e638 3207 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3208 }
103e0180
KH
3209 }
3210 else if (c1 == 'G')
3211 {
103e0180
KH
3212 /* XFree86 extension for embedding UTF-8 in CTEXT:
3213 ESC % G --UTF-8-BYTES-- ESC % @
3214 We keep these bytes as is for the moment.
3215 They may be decoded by post-read-conversion. */
4776e638
KH
3216 int *p = charbuf;
3217
3218 if (p + 6 > charbuf_end)
3219 goto break_loop;
3220 *p++ = ISO_CODE_ESC;
3221 *p++ = '%';
3222 *p++ = 'G';
3223 while (p < charbuf_end)
103e0180
KH
3224 {
3225 ONE_MORE_BYTE (c1);
3226 if (c1 == ISO_CODE_ESC
3227 && src + 1 < src_end
3228 && src[0] == '%'
3229 && src[1] == '@')
9ffd559c
KH
3230 {
3231 src += 2;
3232 break;
3233 }
4776e638 3234 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
103e0180 3235 }
4776e638
KH
3236 if (p + 3 > charbuf_end)
3237 goto break_loop;
3238 *p++ = ISO_CODE_ESC;
3239 *p++ = '%';
3240 *p++ = '@';
3241 charbuf = p;
103e0180
KH
3242 }
3243 else
4776e638 3244 goto invalid_code;
103e0180 3245 continue;
4776e638 3246 break;
103e0180 3247
4ed46869 3248 default:
df7492f9
KH
3249 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3250 goto invalid_code;
134b9549
KH
3251 {
3252 int reg, chars96;
3253
3254 if (c1 >= 0x28 && c1 <= 0x2B)
3255 { /* designation of DIMENSION1_CHARS94 character set */
3256 reg = c1 - 0x28, chars96 = 0;
3257 ONE_MORE_BYTE (c1);
3258 }
3259 else if (c1 >= 0x2C && c1 <= 0x2F)
3260 { /* designation of DIMENSION1_CHARS96 character set */
3261 reg = c1 - 0x2C, chars96 = 1;
3262 ONE_MORE_BYTE (c1);
3263 }
3264 else
3265 goto invalid_code;
3266 DECODE_DESIGNATION (reg, 1, chars96, c1);
3267 /* We must update these variables now. */
3268 if (reg == 0)
3269 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3270 else if (reg == 1)
3271 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3272 if (chars96 < 0)
3273 goto invalid_code;
3274 }
b73bfc1c 3275 continue;
4ed46869 3276 }
b73bfc1c 3277 }
4ed46869 3278
ff0dacd7
KH
3279 if (charset->id != charset_ascii
3280 && last_id != charset->id)
3281 {
3282 if (last_id != charset_ascii)
69a80ea3 3283 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
ff0dacd7
KH
3284 last_id = charset->id;
3285 last_offset = char_offset;
3286 }
3287
b73bfc1c 3288 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3289 Produce a decoded character while getting 2nd position code
3290 C2 if necessary. */
3291 c1 &= 0x7F;
3292 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3293 {
3294 ONE_MORE_BYTE (c2);
df7492f9 3295 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3296 /* C2 is not in a valid range. */
df7492f9
KH
3297 goto invalid_code;
3298 c1 = (c1 << 8) | (c2 & 0x7F);
3299 if (CHARSET_DIMENSION (charset) > 2)
3300 {
3301 ONE_MORE_BYTE (c2);
3302 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3303 /* C2 is not in a valid range. */
3304 goto invalid_code;
3305 c1 = (c1 << 8) | (c2 & 0x7F);
3306 }
3307 }
3308
3309 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3310 if (c < 0)
3311 {
3312 MAYBE_FINISH_COMPOSITION ();
3313 for (; src_base < src; src_base++, char_offset++)
3314 {
3315 if (ASCII_BYTE_P (*src_base))
3316 *charbuf++ = *src_base;
3317 else
3318 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3319 }
3320 }
3321 else if (composition_state == COMPOSING_NO)
3322 {
3323 *charbuf++ = c;
3324 char_offset++;
4ed46869 3325 }
df7492f9 3326 else
781d7a48
KH
3327 {
3328 components[component_idx++] = c;
3329 if (method == COMPOSITION_WITH_RULE
3330 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3331 && composition_state == COMPOSING_COMPONENT_CHAR))
3332 composition_state++;
4ed46869
KH
3333 }
3334 continue;
3335
df7492f9
KH
3336 invalid_code:
3337 MAYBE_FINISH_COMPOSITION ();
4ed46869 3338 src = src_base;
df7492f9
KH
3339 consumed_chars = consumed_chars_base;
3340 ONE_MORE_BYTE (c);
065e3595 3341 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 3342 char_offset++;
df7492f9 3343 coding->errors++;
4776e638
KH
3344 continue;
3345
3346 break_loop:
3347 break;
4ed46869 3348 }
fb88bf2d 3349
df7492f9 3350 no_more_source:
ff0dacd7 3351 if (last_id != charset_ascii)
69a80ea3 3352 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
3353 coding->consumed_char += consumed_chars_base;
3354 coding->consumed = src_base - coding->source;
3355 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3356}
3357
b73bfc1c 3358
f4dee582 3359/* ISO2022 encoding stuff. */
4ed46869
KH
3360
3361/*
f4dee582 3362 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3363 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3364 variant has the following specifications:
df7492f9 3365 1. Initial designation to G0 thru G3.
4ed46869
KH
3366 2. Allows short-form designation?
3367 3. ASCII should be designated to G0 before control characters?
3368 4. ASCII should be designated to G0 at end of line?
3369 5. 7-bit environment or 8-bit environment?
3370 6. Use locking-shift?
3371 7. Use Single-shift?
3372 And the following two are only for Japanese:
3373 8. Use ASCII in place of JIS0201-1976-Roman?
3374 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3375 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3376 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3377 details.
4ed46869
KH
3378*/
3379
3380/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3381 register REG at DST, and increment DST. If <final-char> of CHARSET is
3382 '@', 'A', or 'B' and the coding system CODING allows, produce
3383 designation sequence of short-form. */
4ed46869
KH
3384
3385#define ENCODE_DESIGNATION(charset, reg, coding) \
3386 do { \
df7492f9 3387 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3388 char *intermediate_char_94 = "()*+"; \
3389 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3390 int revision = -1; \
3391 int c; \
3392 \
3393 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3394 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3395 \
3396 if (revision >= 0) \
70c22245 3397 { \
df7492f9
KH
3398 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3399 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3400 } \
df7492f9 3401 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3402 if (CHARSET_DIMENSION (charset) == 1) \
3403 { \
df7492f9
KH
3404 if (! CHARSET_ISO_CHARS_96 (charset)) \
3405 c = intermediate_char_94[reg]; \
4ed46869 3406 else \
df7492f9
KH
3407 c = intermediate_char_96[reg]; \
3408 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3409 } \
3410 else \
3411 { \
df7492f9
KH
3412 EMIT_ONE_ASCII_BYTE ('$'); \
3413 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3414 { \
df7492f9 3415 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3416 || reg != 0 \
3417 || final_char < '@' || final_char > 'B') \
df7492f9 3418 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3419 } \
3420 else \
df7492f9 3421 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3422 } \
df7492f9
KH
3423 EMIT_ONE_ASCII_BYTE (final_char); \
3424 \
3425 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3426 } while (0)
3427
df7492f9 3428
4ed46869
KH
3429/* The following two macros produce codes (control character or escape
3430 sequence) for ISO2022 single-shift functions (single-shift-2 and
3431 single-shift-3). */
3432
df7492f9
KH
3433#define ENCODE_SINGLE_SHIFT_2 \
3434 do { \
3435 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3436 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3437 else \
3438 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3439 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3440 } while (0)
3441
df7492f9
KH
3442
3443#define ENCODE_SINGLE_SHIFT_3 \
3444 do { \
3445 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3446 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3447 else \
3448 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3449 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3450 } while (0)
3451
df7492f9 3452
4ed46869
KH
3453/* The following four macros produce codes (control character or
3454 escape sequence) for ISO2022 locking-shift functions (shift-in,
3455 shift-out, locking-shift-2, and locking-shift-3). */
3456
df7492f9
KH
3457#define ENCODE_SHIFT_IN \
3458 do { \
3459 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3460 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3461 } while (0)
3462
df7492f9
KH
3463
3464#define ENCODE_SHIFT_OUT \
3465 do { \
3466 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3467 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3468 } while (0)
3469
df7492f9
KH
3470
3471#define ENCODE_LOCKING_SHIFT_2 \
3472 do { \
3473 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3474 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3475 } while (0)
3476
df7492f9
KH
3477
3478#define ENCODE_LOCKING_SHIFT_3 \
3479 do { \
3480 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3481 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3482 } while (0)
3483
df7492f9 3484
f4dee582
RS
3485/* Produce codes for a DIMENSION1 character whose character set is
3486 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3487 sequences are also produced in advance if necessary. */
3488
6e85d753
KH
3489#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3490 do { \
df7492f9 3491 int id = CHARSET_ID (charset); \
bf16eb23
KH
3492 \
3493 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3494 && id == charset_ascii) \
3495 { \
3496 id = charset_jisx0201_roman; \
3497 charset = CHARSET_FROM_ID (id); \
3498 } \
3499 \
df7492f9 3500 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3501 { \
df7492f9
KH
3502 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3503 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3504 else \
df7492f9
KH
3505 EMIT_ONE_BYTE (c1 | 0x80); \
3506 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3507 break; \
3508 } \
df7492f9 3509 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3510 { \
df7492f9 3511 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3512 break; \
3513 } \
df7492f9 3514 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3515 { \
df7492f9 3516 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3517 break; \
3518 } \
6e85d753
KH
3519 else \
3520 /* Since CHARSET is not yet invoked to any graphic planes, we \
3521 must invoke it, or, at first, designate it to some graphic \
3522 register. Then repeat the loop to actually produce the \
3523 character. */ \
df7492f9
KH
3524 dst = encode_invocation_designation (charset, coding, dst, \
3525 &produced_chars); \
4ed46869
KH
3526 } while (1)
3527
df7492f9 3528
f4dee582
RS
3529/* Produce codes for a DIMENSION2 character whose character set is
3530 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3531 invocation codes are also produced in advance if necessary. */
3532
6e85d753
KH
3533#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3534 do { \
df7492f9 3535 int id = CHARSET_ID (charset); \
bf16eb23
KH
3536 \
3537 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3538 && id == charset_jisx0208) \
3539 { \
3540 id = charset_jisx0208_1978; \
3541 charset = CHARSET_FROM_ID (id); \
3542 } \
3543 \
df7492f9 3544 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3545 { \
df7492f9
KH
3546 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3547 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3548 else \
df7492f9
KH
3549 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3550 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3551 break; \
3552 } \
df7492f9 3553 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3554 { \
df7492f9 3555 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3556 break; \
3557 } \
df7492f9 3558 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3559 { \
df7492f9 3560 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3561 break; \
3562 } \
6e85d753
KH
3563 else \
3564 /* Since CHARSET is not yet invoked to any graphic planes, we \
3565 must invoke it, or, at first, designate it to some graphic \
3566 register. Then repeat the loop to actually produce the \
3567 character. */ \
df7492f9
KH
3568 dst = encode_invocation_designation (charset, coding, dst, \
3569 &produced_chars); \
4ed46869
KH
3570 } while (1)
3571
05e6f5dc 3572
df7492f9
KH
3573#define ENCODE_ISO_CHARACTER(charset, c) \
3574 do { \
3575 int code = ENCODE_CHAR ((charset),(c)); \
3576 \
3577 if (CHARSET_DIMENSION (charset) == 1) \
3578 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3579 else \
3580 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3581 } while (0)
bdd9fb48 3582
05e6f5dc 3583
4ed46869 3584/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3585 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3586 Return new DST. */
3587
3588unsigned char *
df7492f9
KH
3589encode_invocation_designation (charset, coding, dst, p_nchars)
3590 struct charset *charset;
4ed46869
KH
3591 struct coding_system *coding;
3592 unsigned char *dst;
df7492f9 3593 int *p_nchars;
4ed46869 3594{
df7492f9
KH
3595 int multibytep = coding->dst_multibyte;
3596 int produced_chars = *p_nchars;
4ed46869 3597 int reg; /* graphic register number */
df7492f9 3598 int id = CHARSET_ID (charset);
4ed46869
KH
3599
3600 /* At first, check designations. */
3601 for (reg = 0; reg < 4; reg++)
df7492f9 3602 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3603 break;
3604
3605 if (reg >= 4)
3606 {
3607 /* CHARSET is not yet designated to any graphic registers. */
3608 /* At first check the requested designation. */
df7492f9
KH
3609 reg = CODING_ISO_REQUEST (coding, id);
3610 if (reg < 0)
1ba9e4ab
KH
3611 /* Since CHARSET requests no special designation, designate it
3612 to graphic register 0. */
4ed46869
KH
3613 reg = 0;
3614
3615 ENCODE_DESIGNATION (charset, reg, coding);
3616 }
3617
df7492f9
KH
3618 if (CODING_ISO_INVOCATION (coding, 0) != reg
3619 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3620 {
3621 /* Since the graphic register REG is not invoked to any graphic
3622 planes, invoke it to graphic plane 0. */
3623 switch (reg)
3624 {
3625 case 0: /* graphic register 0 */
3626 ENCODE_SHIFT_IN;
3627 break;
3628
3629 case 1: /* graphic register 1 */
3630 ENCODE_SHIFT_OUT;
3631 break;
3632
3633 case 2: /* graphic register 2 */
df7492f9 3634 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3635 ENCODE_SINGLE_SHIFT_2;
3636 else
3637 ENCODE_LOCKING_SHIFT_2;
3638 break;
3639
3640 case 3: /* graphic register 3 */
df7492f9 3641 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3642 ENCODE_SINGLE_SHIFT_3;
3643 else
3644 ENCODE_LOCKING_SHIFT_3;
3645 break;
3646 }
3647 }
b73bfc1c 3648
df7492f9 3649 *p_nchars = produced_chars;
4ed46869
KH
3650 return dst;
3651}
3652
df7492f9
KH
3653/* The following three macros produce codes for indicating direction
3654 of text. */
3655#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3656 do { \
df7492f9
KH
3657 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3658 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3659 else \
df7492f9 3660 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3661 } while (0)
3662
ec6d2bb8 3663
df7492f9
KH
3664#define ENCODE_DIRECTION_R2L() \
3665 do { \
3666 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3667 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3668 } while (0)
3669
ec6d2bb8 3670
df7492f9 3671#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3672 do { \
df7492f9
KH
3673 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3674 EMIT_TWO_ASCII_BYTES ('0', ']'); \
ec6d2bb8 3675 } while (0)
4ed46869 3676
4ed46869
KH
3677
3678/* Produce codes for designation and invocation to reset the graphic
3679 planes and registers to initial state. */
df7492f9
KH
3680#define ENCODE_RESET_PLANE_AND_REGISTER() \
3681 do { \
3682 int reg; \
3683 struct charset *charset; \
3684 \
3685 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3686 ENCODE_SHIFT_IN; \
3687 for (reg = 0; reg < 4; reg++) \
3688 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3689 && (CODING_ISO_DESIGNATION (coding, reg) \
3690 != CODING_ISO_INITIAL (coding, reg))) \
3691 { \
3692 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3693 ENCODE_DESIGNATION (charset, reg, coding); \
3694 } \
4ed46869
KH
3695 } while (0)
3696
df7492f9 3697
bdd9fb48 3698/* Produce designation sequences of charsets in the line started from
b73bfc1c 3699 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3700
3701 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3702 find all the necessary designations. */
3703
b73bfc1c 3704static unsigned char *
df7492f9 3705encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3706 struct coding_system *coding;
df7492f9
KH
3707 int *charbuf, *charbuf_end;
3708 unsigned char *dst;
e0e989f6 3709{
df7492f9 3710 struct charset *charset;
bdd9fb48
KH
3711 /* Table of charsets to be designated to each graphic register. */
3712 int r[4];
df7492f9
KH
3713 int c, found = 0, reg;
3714 int produced_chars = 0;
3715 int multibytep = coding->dst_multibyte;
3716 Lisp_Object attrs;
3717 Lisp_Object charset_list;
3718
3719 attrs = CODING_ID_ATTRS (coding->id);
3720 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3721 if (EQ (charset_list, Qiso_2022))
3722 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3723
3724 for (reg = 0; reg < 4; reg++)
3725 r[reg] = -1;
3726
b73bfc1c 3727 while (found < 4)
e0e989f6 3728 {
df7492f9
KH
3729 int id;
3730
3731 c = *charbuf++;
b73bfc1c
KH
3732 if (c == '\n')
3733 break;
df7492f9
KH
3734 charset = char_charset (c, charset_list, NULL);
3735 id = CHARSET_ID (charset);
3736 reg = CODING_ISO_REQUEST (coding, id);
3737 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3738 {
3739 found++;
df7492f9 3740 r[reg] = id;
bdd9fb48 3741 }
bdd9fb48
KH
3742 }
3743
3744 if (found)
3745 {
3746 for (reg = 0; reg < 4; reg++)
3747 if (r[reg] >= 0
df7492f9
KH
3748 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3749 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3750 }
b73bfc1c
KH
3751
3752 return dst;
e0e989f6
KH
3753}
3754
4ed46869
KH
3755/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3756
df7492f9
KH
3757static int
3758encode_coding_iso_2022 (coding)
4ed46869 3759 struct coding_system *coding;
4ed46869 3760{
df7492f9
KH
3761 int multibytep = coding->dst_multibyte;
3762 int *charbuf = coding->charbuf;
3763 int *charbuf_end = charbuf + coding->charbuf_used;
3764 unsigned char *dst = coding->destination + coding->produced;
3765 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3766 int safe_room = 16;
3767 int bol_designation
3768 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3769 && CODING_ISO_BOL (coding));
3770 int produced_chars = 0;
3771 Lisp_Object attrs, eol_type, charset_list;
3772 int ascii_compatible;
b73bfc1c 3773 int c;
ff0dacd7 3774 int preferred_charset_id = -1;
05e6f5dc 3775
24a73b0a
KH
3776 CODING_GET_INFO (coding, attrs, charset_list);
3777 eol_type = CODING_ID_EOL_TYPE (coding->id);
3778 if (VECTORP (eol_type))
3779 eol_type = Qunix;
3780
004068e4 3781 setup_iso_safe_charsets (attrs);
ff0dacd7
KH
3782 /* Charset list may have been changed. */
3783 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
8f924df7 3784 coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
0eecad43 3785
df7492f9 3786 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
bdd9fb48 3787
df7492f9 3788 while (charbuf < charbuf_end)
4ed46869 3789 {
df7492f9 3790 ASSURE_DESTINATION (safe_room);
b73bfc1c 3791
df7492f9 3792 if (bol_designation)
b73bfc1c 3793 {
df7492f9 3794 unsigned char *dst_prev = dst;
4ed46869 3795
bdd9fb48 3796 /* We have to produce designation sequences if any now. */
df7492f9
KH
3797 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3798 bol_designation = 0;
3799 /* We are sure that designation sequences are all ASCII bytes. */
3800 produced_chars += dst - dst_prev;
e0e989f6
KH
3801 }
3802
df7492f9 3803 c = *charbuf++;
ec6d2bb8 3804
ff0dacd7
KH
3805 if (c < 0)
3806 {
3807 /* Handle an annotation. */
3808 switch (*charbuf)
ec6d2bb8 3809 {
ff0dacd7
KH
3810 case CODING_ANNOTATE_COMPOSITION_MASK:
3811 /* Not yet implemented. */
3812 break;
3813 case CODING_ANNOTATE_CHARSET_MASK:
cf7dfdf5 3814 preferred_charset_id = charbuf[2];
ff0dacd7
KH
3815 if (preferred_charset_id >= 0
3816 && NILP (Fmemq (make_number (preferred_charset_id),
3817 charset_list)))
3818 preferred_charset_id = -1;
3819 break;
3820 default:
3821 abort ();
4ed46869 3822 }
ff0dacd7
KH
3823 charbuf += -c - 1;
3824 continue;
4ed46869 3825 }
ec6d2bb8 3826
b73bfc1c
KH
3827 /* Now encode the character C. */
3828 if (c < 0x20 || c == 0x7F)
3829 {
df7492f9
KH
3830 if (c == '\n'
3831 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3832 {
df7492f9
KH
3833 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3834 ENCODE_RESET_PLANE_AND_REGISTER ();
3835 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3836 {
df7492f9
KH
3837 int i;
3838
3839 for (i = 0; i < 4; i++)
3840 CODING_ISO_DESIGNATION (coding, i)
3841 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3842 }
df7492f9
KH
3843 bol_designation
3844 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3845 }
df7492f9
KH
3846 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3847 ENCODE_RESET_PLANE_AND_REGISTER ();
3848 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3849 }
df7492f9 3850 else if (ASCII_CHAR_P (c))
88993dfd 3851 {
df7492f9
KH
3852 if (ascii_compatible)
3853 EMIT_ONE_ASCII_BYTE (c);
93dec019 3854 else
19a8d9e0 3855 {
bf16eb23
KH
3856 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3857 ENCODE_ISO_CHARACTER (charset, c);
19a8d9e0 3858 }
4ed46869 3859 }
16eafb5d 3860 else if (CHAR_BYTE8_P (c))
88993dfd 3861 {
16eafb5d
KH
3862 c = CHAR_TO_BYTE8 (c);
3863 EMIT_ONE_BYTE (c);
88993dfd 3864 }
b73bfc1c 3865 else
df7492f9 3866 {
ff0dacd7 3867 struct charset *charset;
b73bfc1c 3868
ff0dacd7
KH
3869 if (preferred_charset_id >= 0)
3870 {
3871 charset = CHARSET_FROM_ID (preferred_charset_id);
3872 if (! CHAR_CHARSET_P (c, charset))
3873 charset = char_charset (c, charset_list, NULL);
3874 }
3875 else
3876 charset = char_charset (c, charset_list, NULL);
df7492f9
KH
3877 if (!charset)
3878 {
41cbe562
KH
3879 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3880 {
3881 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3882 charset = CHARSET_FROM_ID (charset_ascii);
3883 }
3884 else
3885 {
3886 c = coding->default_char;
3887 charset = char_charset (c, charset_list, NULL);
3888 }
df7492f9
KH
3889 }
3890 ENCODE_ISO_CHARACTER (charset, c);
3891 }
84fbb8a0 3892 }
b73bfc1c 3893
df7492f9
KH
3894 if (coding->mode & CODING_MODE_LAST_BLOCK
3895 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3896 {
3897 ASSURE_DESTINATION (safe_room);
3898 ENCODE_RESET_PLANE_AND_REGISTER ();
3899 }
065e3595 3900 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
3901 CODING_ISO_BOL (coding) = bol_designation;
3902 coding->produced_char += produced_chars;
3903 coding->produced = dst - coding->destination;
3904 return 0;
4ed46869
KH
3905}
3906
3907\f
df7492f9 3908/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3909
df7492f9 3910/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3911 quite widely. So, for the moment, Emacs supports them in the bare
3912 C code. But, in the future, they may be supported only by CCL. */
3913
3914/* SJIS is a coding system encoding three character sets: ASCII, right
3915 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3916 as is. A character of charset katakana-jisx0201 is encoded by
3917 "position-code + 0x80". A character of charset japanese-jisx0208
3918 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3919 so that it fit in the range below.
4ed46869
KH
3920
3921 --- CODE RANGE of SJIS ---
3922 (character set) (range)
3923 ASCII 0x00 .. 0x7F
df7492f9 3924 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3925 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3926 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3927 -------------------------------
3928
3929*/
3930
3931/* BIG5 is a coding system encoding two character sets: ASCII and
3932 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3933 character set and is encoded in two-byte.
4ed46869
KH
3934
3935 --- CODE RANGE of BIG5 ---
3936 (character set) (range)
3937 ASCII 0x00 .. 0x7F
3938 Big5 (1st byte) 0xA1 .. 0xFE
3939 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3940 --------------------------
3941
df7492f9 3942 */
4ed46869
KH
3943
3944/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3945 Check if a text is encoded in SJIS. If it is, return
df7492f9 3946 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3947
0a28aafb 3948static int
ff0dacd7 3949detect_coding_sjis (coding, detect_info)
df7492f9 3950 struct coding_system *coding;
ff0dacd7 3951 struct coding_detection_info *detect_info;
4ed46869 3952{
065e3595 3953 const unsigned char *src = coding->source, *src_base;
8f924df7 3954 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
3955 int multibytep = coding->src_multibyte;
3956 int consumed_chars = 0;
3957 int found = 0;
b73bfc1c 3958 int c;
df7492f9 3959
ff0dacd7 3960 detect_info->checked |= CATEGORY_MASK_SJIS;
df7492f9
KH
3961 /* A coding system of this category is always ASCII compatible. */
3962 src += coding->head_ascii;
4ed46869 3963
b73bfc1c 3964 while (1)
4ed46869 3965 {
065e3595 3966 src_base = src;
df7492f9 3967 ONE_MORE_BYTE (c);
682169fe
KH
3968 if (c < 0x80)
3969 continue;
df7492f9 3970 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3971 {
df7492f9 3972 ONE_MORE_BYTE (c);
682169fe 3973 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9 3974 break;
ff0dacd7 3975 found = CATEGORY_MASK_SJIS;
4ed46869 3976 }
df7492f9 3977 else if (c >= 0xA0 && c < 0xE0)
ff0dacd7 3978 found = CATEGORY_MASK_SJIS;
df7492f9
KH
3979 else
3980 break;
4ed46869 3981 }
ff0dacd7 3982 detect_info->rejected |= CATEGORY_MASK_SJIS;
df7492f9
KH
3983 return 0;
3984
3985 no_more_source:
065e3595 3986 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 3987 {
ff0dacd7 3988 detect_info->rejected |= CATEGORY_MASK_SJIS;
89528eb3 3989 return 0;
4ed46869 3990 }
ff0dacd7
KH
3991 detect_info->found |= found;
3992 return 1;
4ed46869
KH
3993}
3994
3995/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3996 Check if a text is encoded in BIG5. If it is, return
df7492f9 3997 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3998
0a28aafb 3999static int
ff0dacd7 4000detect_coding_big5 (coding, detect_info)
df7492f9 4001 struct coding_system *coding;
ff0dacd7 4002 struct coding_detection_info *detect_info;
4ed46869 4003{
065e3595 4004 const unsigned char *src = coding->source, *src_base;
8f924df7 4005 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4006 int multibytep = coding->src_multibyte;
4007 int consumed_chars = 0;
4008 int found = 0;
b73bfc1c 4009 int c;
fa42c37f 4010
ff0dacd7 4011 detect_info->checked |= CATEGORY_MASK_BIG5;
df7492f9
KH
4012 /* A coding system of this category is always ASCII compatible. */
4013 src += coding->head_ascii;
fa42c37f 4014
b73bfc1c 4015 while (1)
fa42c37f 4016 {
065e3595 4017 src_base = src;
df7492f9
KH
4018 ONE_MORE_BYTE (c);
4019 if (c < 0x80)
fa42c37f 4020 continue;
df7492f9 4021 if (c >= 0xA1)
fa42c37f 4022 {
df7492f9
KH
4023 ONE_MORE_BYTE (c);
4024 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 4025 return 0;
ff0dacd7 4026 found = CATEGORY_MASK_BIG5;
fa42c37f 4027 }
df7492f9
KH
4028 else
4029 break;
fa42c37f 4030 }
ff0dacd7 4031 detect_info->rejected |= CATEGORY_MASK_BIG5;
fa42c37f 4032 return 0;
fa42c37f 4033
df7492f9 4034 no_more_source:
065e3595 4035 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
89528eb3 4036 {
ff0dacd7 4037 detect_info->rejected |= CATEGORY_MASK_BIG5;
89528eb3
KH
4038 return 0;
4039 }
ff0dacd7
KH
4040 detect_info->found |= found;
4041 return 1;
fa42c37f
KH
4042}
4043
4ed46869
KH
4044/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4045 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
fa42c37f 4046
b73bfc1c 4047static void
df7492f9 4048decode_coding_sjis (coding)
4ed46869 4049 struct coding_system *coding;
4ed46869 4050{
8f924df7
KH
4051 const unsigned char *src = coding->source + coding->consumed;
4052 const unsigned char *src_end = coding->source + coding->src_bytes;
4053 const unsigned char *src_base;
69a80ea3
KH
4054 int *charbuf = coding->charbuf + coding->charbuf_used;
4055 int *charbuf_end
4056 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4057 int consumed_chars = 0, consumed_chars_base;
4058 int multibytep = coding->src_multibyte;
4059 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4060 struct charset *charset_kanji2;
24a73b0a 4061 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4062 int char_offset = coding->produced_char;
4063 int last_offset = char_offset;
4064 int last_id = charset_ascii;
a5d301df 4065
24a73b0a 4066 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4067
4068 val = charset_list;
4069 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
89528eb3 4070 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4071 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4072 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 4073
b73bfc1c 4074 while (1)
4ed46869 4075 {
df7492f9 4076 int c, c1;
24a73b0a 4077 struct charset *charset;
fa42c37f 4078
b73bfc1c 4079 src_base = src;
df7492f9 4080 consumed_chars_base = consumed_chars;
fa42c37f 4081
df7492f9
KH
4082 if (charbuf >= charbuf_end)
4083 break;
4084
4085 ONE_MORE_BYTE (c);
065e3595
KH
4086 if (c < 0)
4087 goto invalid_code;
24a73b0a
KH
4088 if (c < 0x80)
4089 charset = charset_roman;
57a47f8a 4090 else if (c == 0x80 || c == 0xA0)
8e921c4b 4091 goto invalid_code;
57a47f8a
KH
4092 else if (c >= 0xA1 && c <= 0xDF)
4093 {
4094 /* SJIS -> JISX0201-Kana */
4095 c &= 0x7F;
4096 charset = charset_kana;
4097 }
4098 else if (c <= 0xEF)
df7492f9 4099 {
57a47f8a
KH
4100 /* SJIS -> JISX0208 */
4101 ONE_MORE_BYTE (c1);
4102 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4103 goto invalid_code;
57a47f8a
KH
4104 c = (c << 8) | c1;
4105 SJIS_TO_JIS (c);
4106 charset = charset_kanji;
4107 }
4108 else if (c <= 0xFC && charset_kanji2)
4109 {
c6876370 4110 /* SJIS -> JISX0213-2 */
57a47f8a
KH
4111 ONE_MORE_BYTE (c1);
4112 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
24a73b0a 4113 goto invalid_code;
57a47f8a
KH
4114 c = (c << 8) | c1;
4115 SJIS_TO_JIS2 (c);
4116 charset = charset_kanji2;
df7492f9 4117 }
57a47f8a
KH
4118 else
4119 goto invalid_code;
24a73b0a
KH
4120 if (charset->id != charset_ascii
4121 && last_id != charset->id)
4122 {
4123 if (last_id != charset_ascii)
69a80ea3 4124 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4125 last_id = charset->id;
4126 last_offset = char_offset;
4127 }
4128 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4129 *charbuf++ = c;
ff0dacd7 4130 char_offset++;
df7492f9 4131 continue;
b73bfc1c 4132
df7492f9
KH
4133 invalid_code:
4134 src = src_base;
4135 consumed_chars = consumed_chars_base;
4136 ONE_MORE_BYTE (c);
065e3595 4137 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4138 char_offset++;
df7492f9
KH
4139 coding->errors++;
4140 }
fa42c37f 4141
df7492f9 4142 no_more_source:
ff0dacd7 4143 if (last_id != charset_ascii)
69a80ea3 4144 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4145 coding->consumed_char += consumed_chars_base;
4146 coding->consumed = src_base - coding->source;
4147 coding->charbuf_used = charbuf - coding->charbuf;
fa42c37f
KH
4148}
4149
b73bfc1c 4150static void
df7492f9 4151decode_coding_big5 (coding)
4ed46869 4152 struct coding_system *coding;
4ed46869 4153{
8f924df7
KH
4154 const unsigned char *src = coding->source + coding->consumed;
4155 const unsigned char *src_end = coding->source + coding->src_bytes;
4156 const unsigned char *src_base;
69a80ea3
KH
4157 int *charbuf = coding->charbuf + coding->charbuf_used;
4158 int *charbuf_end
4159 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4160 int consumed_chars = 0, consumed_chars_base;
4161 int multibytep = coding->src_multibyte;
4162 struct charset *charset_roman, *charset_big5;
24a73b0a 4163 Lisp_Object attrs, charset_list, val;
ff0dacd7
KH
4164 int char_offset = coding->produced_char;
4165 int last_offset = char_offset;
4166 int last_id = charset_ascii;
df7492f9 4167
24a73b0a 4168 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4169 val = charset_list;
4170 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4171 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4172
b73bfc1c 4173 while (1)
4ed46869 4174 {
df7492f9 4175 int c, c1;
24a73b0a 4176 struct charset *charset;
b73bfc1c
KH
4177
4178 src_base = src;
df7492f9
KH
4179 consumed_chars_base = consumed_chars;
4180
4181 if (charbuf >= charbuf_end)
4182 break;
4183
4184 ONE_MORE_BYTE (c);
b73bfc1c 4185
065e3595
KH
4186 if (c < 0)
4187 goto invalid_code;
24a73b0a
KH
4188 if (c < 0x80)
4189 charset = charset_roman;
4190 else
4ed46869 4191 {
24a73b0a
KH
4192 /* BIG5 -> Big5 */
4193 if (c < 0xA1 || c > 0xFE)
4194 goto invalid_code;
4195 ONE_MORE_BYTE (c1);
4196 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4197 goto invalid_code;
4198 c = c << 8 | c1;
4199 charset = charset_big5;
4ed46869 4200 }
24a73b0a
KH
4201 if (charset->id != charset_ascii
4202 && last_id != charset->id)
df7492f9 4203 {
24a73b0a 4204 if (last_id != charset_ascii)
69a80ea3 4205 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4206 last_id = charset->id;
4207 last_offset = char_offset;
4ed46869 4208 }
24a73b0a 4209 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9 4210 *charbuf++ = c;
ff0dacd7 4211 char_offset++;
fb88bf2d
KH
4212 continue;
4213
df7492f9 4214 invalid_code:
4ed46869 4215 src = src_base;
df7492f9
KH
4216 consumed_chars = consumed_chars_base;
4217 ONE_MORE_BYTE (c);
065e3595 4218 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
ff0dacd7 4219 char_offset++;
df7492f9 4220 coding->errors++;
fb88bf2d 4221 }
d46c5b12 4222
df7492f9 4223 no_more_source:
ff0dacd7 4224 if (last_id != charset_ascii)
69a80ea3 4225 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4226 coding->consumed_char += consumed_chars_base;
4227 coding->consumed = src_base - coding->source;
4228 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4229}
4230
4231/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
b73bfc1c
KH
4232 This function can encode charsets `ascii', `katakana-jisx0201',
4233 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4234 are sure that all these charsets are registered as official charset
4ed46869
KH
4235 (i.e. do not have extended leading-codes). Characters of other
4236 charsets are produced without any encoding. If SJIS_P is 1, encode
4237 SJIS text, else encode BIG5 text. */
4238
df7492f9
KH
4239static int
4240encode_coding_sjis (coding)
4ed46869 4241 struct coding_system *coding;
4ed46869 4242{
df7492f9
KH
4243 int multibytep = coding->dst_multibyte;
4244 int *charbuf = coding->charbuf;
4245 int *charbuf_end = charbuf + coding->charbuf_used;
4246 unsigned char *dst = coding->destination + coding->produced;
4247 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4248 int safe_room = 4;
4249 int produced_chars = 0;
24a73b0a 4250 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4251 int ascii_compatible;
4252 struct charset *charset_roman, *charset_kanji, *charset_kana;
57a47f8a 4253 struct charset *charset_kanji2;
df7492f9 4254 int c;
a5d301df 4255
24a73b0a 4256 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4257 val = charset_list;
4258 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4259 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
57a47f8a
KH
4260 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4261 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 4262
df7492f9 4263 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
93dec019 4264
df7492f9
KH
4265 while (charbuf < charbuf_end)
4266 {
4267 ASSURE_DESTINATION (safe_room);
4268 c = *charbuf++;
b73bfc1c 4269 /* Now encode the character C. */
df7492f9
KH
4270 if (ASCII_CHAR_P (c) && ascii_compatible)
4271 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4272 else if (CHAR_BYTE8_P (c))
4273 {
4274 c = CHAR_TO_BYTE8 (c);
4275 EMIT_ONE_BYTE (c);
4276 }
df7492f9 4277 else
b73bfc1c 4278 {
df7492f9
KH
4279 unsigned code;
4280 struct charset *charset = char_charset (c, charset_list, &code);
4281
4282 if (!charset)
4ed46869 4283 {
41cbe562 4284 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4285 {
41cbe562
KH
4286 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4287 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4288 }
41cbe562 4289 else
b73bfc1c 4290 {
41cbe562
KH
4291 c = coding->default_char;
4292 charset = char_charset (c, charset_list, &code);
b73bfc1c 4293 }
b73bfc1c 4294 }
df7492f9
KH
4295 if (code == CHARSET_INVALID_CODE (charset))
4296 abort ();
4297 if (charset == charset_kanji)
4298 {
4299 int c1, c2;
4300 JIS_TO_SJIS (code);
4301 c1 = code >> 8, c2 = code & 0xFF;
4302 EMIT_TWO_BYTES (c1, c2);
4303 }
4304 else if (charset == charset_kana)
4305 EMIT_ONE_BYTE (code | 0x80);
57a47f8a
KH
4306 else if (charset_kanji2 && charset == charset_kanji2)
4307 {
4308 int c1, c2;
4309
4310 c1 = code >> 8;
4311 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4312 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4313 {
4314 JIS_TO_SJIS2 (code);
4315 c1 = code >> 8, c2 = code & 0xFF;
4316 EMIT_TWO_BYTES (c1, c2);
4317 }
4318 else
4319 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4320 }
df7492f9
KH
4321 else
4322 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4323 }
4324 }
065e3595 4325 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4326 coding->produced_char += produced_chars;
4327 coding->produced = dst - coding->destination;
4328 return 0;
4329}
4330
4331static int
4332encode_coding_big5 (coding)
4333 struct coding_system *coding;
4334{
4335 int multibytep = coding->dst_multibyte;
4336 int *charbuf = coding->charbuf;
4337 int *charbuf_end = charbuf + coding->charbuf_used;
4338 unsigned char *dst = coding->destination + coding->produced;
4339 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4340 int safe_room = 4;
4341 int produced_chars = 0;
24a73b0a 4342 Lisp_Object attrs, charset_list, val;
df7492f9
KH
4343 int ascii_compatible;
4344 struct charset *charset_roman, *charset_big5;
4345 int c;
4346
24a73b0a 4347 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4348 val = charset_list;
4349 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4350 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4351 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4352
4353 while (charbuf < charbuf_end)
4354 {
4355 ASSURE_DESTINATION (safe_room);
4356 c = *charbuf++;
4357 /* Now encode the character C. */
4358 if (ASCII_CHAR_P (c) && ascii_compatible)
4359 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4360 else if (CHAR_BYTE8_P (c))
4361 {
4362 c = CHAR_TO_BYTE8 (c);
4363 EMIT_ONE_BYTE (c);
b73bfc1c
KH
4364 }
4365 else
4366 {
df7492f9
KH
4367 unsigned code;
4368 struct charset *charset = char_charset (c, charset_list, &code);
4369
4370 if (! charset)
b73bfc1c 4371 {
41cbe562 4372 if (coding->mode & CODING_MODE_SAFE_ENCODING)
b73bfc1c 4373 {
41cbe562
KH
4374 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4375 charset = CHARSET_FROM_ID (charset_ascii);
b73bfc1c 4376 }
41cbe562 4377 else
0eecad43 4378 {
41cbe562
KH
4379 c = coding->default_char;
4380 charset = char_charset (c, charset_list, &code);
0eecad43 4381 }
4ed46869 4382 }
df7492f9
KH
4383 if (code == CHARSET_INVALID_CODE (charset))
4384 abort ();
4385 if (charset == charset_big5)
b73bfc1c 4386 {
df7492f9
KH
4387 int c1, c2;
4388
4389 c1 = code >> 8, c2 = code & 0xFF;
4390 EMIT_TWO_BYTES (c1, c2);
b73bfc1c 4391 }
df7492f9
KH
4392 else
4393 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4ed46869 4394 }
4ed46869 4395 }
065e3595 4396 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4397 coding->produced_char += produced_chars;
4398 coding->produced = dst - coding->destination;
4399 return 0;
4ed46869
KH
4400}
4401
4402\f
df7492f9 4403/*** 10. CCL handlers ***/
1397dc18
KH
4404
4405/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4406 Check if a text is encoded in a coding system of which
4407 encoder/decoder are written in CCL program. If it is, return
df7492f9 4408 CATEGORY_MASK_CCL, else return 0. */
1397dc18 4409
0a28aafb 4410static int
ff0dacd7 4411detect_coding_ccl (coding, detect_info)
df7492f9 4412 struct coding_system *coding;
ff0dacd7 4413 struct coding_detection_info *detect_info;
1397dc18 4414{
065e3595 4415 const unsigned char *src = coding->source, *src_base;
8f924df7 4416 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4417 int multibytep = coding->src_multibyte;
4418 int consumed_chars = 0;
4419 int found = 0;
0e219d54 4420 unsigned char *valids;
df7492f9
KH
4421 int head_ascii = coding->head_ascii;
4422 Lisp_Object attrs;
4423
ff0dacd7
KH
4424 detect_info->checked |= CATEGORY_MASK_CCL;
4425
df7492f9 4426 coding = &coding_categories[coding_category_ccl];
0e219d54 4427 valids = CODING_CCL_VALIDS (coding);
df7492f9
KH
4428 attrs = CODING_ID_ATTRS (coding->id);
4429 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4430 src += head_ascii;
1397dc18 4431
b73bfc1c 4432 while (1)
1397dc18 4433 {
df7492f9 4434 int c;
065e3595
KH
4435
4436 src_base = src;
df7492f9 4437 ONE_MORE_BYTE (c);
065e3595 4438 if (c < 0 || ! valids[c])
df7492f9 4439 break;
ff0dacd7
KH
4440 if ((valids[c] > 1))
4441 found = CATEGORY_MASK_CCL;
df7492f9 4442 }
ff0dacd7 4443 detect_info->rejected |= CATEGORY_MASK_CCL;
df7492f9
KH
4444 return 0;
4445
4446 no_more_source:
ff0dacd7
KH
4447 detect_info->found |= found;
4448 return 1;
df7492f9
KH
4449}
4450
4451static void
4452decode_coding_ccl (coding)
4453 struct coding_system *coding;
4454{
7c78e542 4455 const unsigned char *src = coding->source + coding->consumed;
8f924df7 4456 const unsigned char *src_end = coding->source + coding->src_bytes;
69a80ea3
KH
4457 int *charbuf = coding->charbuf + coding->charbuf_used;
4458 int *charbuf_end = coding->charbuf + coding->charbuf_size;
df7492f9
KH
4459 int consumed_chars = 0;
4460 int multibytep = coding->src_multibyte;
4461 struct ccl_program ccl;
4462 int source_charbuf[1024];
4463 int source_byteidx[1024];
24a73b0a 4464 Lisp_Object attrs, charset_list;
df7492f9 4465
24a73b0a 4466 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4467 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4468
4469 while (src < src_end)
4470 {
7c78e542 4471 const unsigned char *p = src;
df7492f9
KH
4472 int *source, *source_end;
4473 int i = 0;
4474
4475 if (multibytep)
4476 while (i < 1024 && p < src_end)
4477 {
4478 source_byteidx[i] = p - src;
4479 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4480 }
4481 else
4482 while (i < 1024 && p < src_end)
4483 source_charbuf[i++] = *p++;
8f924df7 4484
df7492f9
KH
4485 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4486 ccl.last_block = 1;
4487
4488 source = source_charbuf;
4489 source_end = source + i;
4490 while (source < source_end)
4491 {
4492 ccl_driver (&ccl, source, charbuf,
8dcbea82
KH
4493 source_end - source, charbuf_end - charbuf,
4494 charset_list);
df7492f9
KH
4495 source += ccl.consumed;
4496 charbuf += ccl.produced;
4497 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4498 break;
4499 }
4500 if (source < source_end)
4501 src += source_byteidx[source - source_charbuf];
4502 else
4503 src = p;
4504 consumed_chars += source - source_charbuf;
4505
4506 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4507 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4508 break;
4509 }
4510
4511 switch (ccl.status)
4512 {
4513 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4514 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4515 break;
4516 case CCL_STAT_SUSPEND_BY_DST:
4517 break;
4518 case CCL_STAT_QUIT:
4519 case CCL_STAT_INVALID_CMD:
065e3595 4520 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4521 break;
4522 default:
065e3595 4523 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4524 break;
4525 }
4526 coding->consumed_char += consumed_chars;
4527 coding->consumed = src - coding->source;
4528 coding->charbuf_used = charbuf - coding->charbuf;
4529}
4530
4531static int
4532encode_coding_ccl (coding)
4533 struct coding_system *coding;
4534{
4535 struct ccl_program ccl;
4536 int multibytep = coding->dst_multibyte;
4537 int *charbuf = coding->charbuf;
4538 int *charbuf_end = charbuf + coding->charbuf_used;
4539 unsigned char *dst = coding->destination + coding->produced;
4540 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4541 unsigned char *adjusted_dst_end = dst_end - 1;
4542 int destination_charbuf[1024];
4543 int i, produced_chars = 0;
24a73b0a 4544 Lisp_Object attrs, charset_list;
df7492f9 4545
24a73b0a 4546 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9
KH
4547 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4548
4549 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4550 ccl.dst_multibyte = coding->dst_multibyte;
4551
4552 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4553 {
4554 int dst_bytes = dst_end - dst;
4555 if (dst_bytes > 1024)
4556 dst_bytes = 1024;
4557
4558 ccl_driver (&ccl, charbuf, destination_charbuf,
8dcbea82 4559 charbuf_end - charbuf, dst_bytes, charset_list);
df7492f9
KH
4560 charbuf += ccl.consumed;
4561 if (multibytep)
4562 for (i = 0; i < ccl.produced; i++)
4563 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4564 else
4565 {
4566 for (i = 0; i < ccl.produced; i++)
4567 *dst++ = destination_charbuf[i] & 0xFF;
4568 produced_chars += ccl.produced;
4569 }
4570 }
4571
4572 switch (ccl.status)
4573 {
4574 case CCL_STAT_SUSPEND_BY_SRC:
065e3595 4575 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
df7492f9
KH
4576 break;
4577 case CCL_STAT_SUSPEND_BY_DST:
065e3595 4578 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
4579 break;
4580 case CCL_STAT_QUIT:
4581 case CCL_STAT_INVALID_CMD:
065e3595 4582 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
df7492f9
KH
4583 break;
4584 default:
065e3595 4585 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4586 break;
1397dc18 4587 }
df7492f9
KH
4588
4589 coding->produced_char += produced_chars;
4590 coding->produced = dst - coding->destination;
4591 return 0;
1397dc18
KH
4592}
4593
df7492f9 4594
1397dc18 4595\f
df7492f9 4596/*** 10, 11. no-conversion handlers ***/
4ed46869 4597
b73bfc1c 4598/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4599
b73bfc1c 4600static void
df7492f9 4601decode_coding_raw_text (coding)
4ed46869 4602 struct coding_system *coding;
4ed46869 4603{
df7492f9 4604 coding->chars_at_source = 1;
2c78b7e1
KH
4605 coding->consumed_char = 0;
4606 coding->consumed = 0;
065e3595 4607 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 4608}
4ed46869 4609
df7492f9
KH
4610static int
4611encode_coding_raw_text (coding)
4612 struct coding_system *coding;
4613{
4614 int multibytep = coding->dst_multibyte;
4615 int *charbuf = coding->charbuf;
4616 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4617 unsigned char *dst = coding->destination + coding->produced;
4618 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4619 int produced_chars = 0;
b73bfc1c
KH
4620 int c;
4621
df7492f9 4622 if (multibytep)
b73bfc1c 4623 {
df7492f9 4624 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4625
df7492f9
KH
4626 if (coding->src_multibyte)
4627 while (charbuf < charbuf_end)
4628 {
4629 ASSURE_DESTINATION (safe_room);
4630 c = *charbuf++;
4631 if (ASCII_CHAR_P (c))
4632 EMIT_ONE_ASCII_BYTE (c);
4633 else if (CHAR_BYTE8_P (c))
4634 {
4635 c = CHAR_TO_BYTE8 (c);
4636 EMIT_ONE_BYTE (c);
4637 }
4638 else
4639 {
4640 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4641
df7492f9
KH
4642 CHAR_STRING_ADVANCE (c, p1);
4643 while (p0 < p1)
9d123124
KH
4644 {
4645 EMIT_ONE_BYTE (*p0);
4646 p0++;
4647 }
df7492f9
KH
4648 }
4649 }
b73bfc1c 4650 else
df7492f9
KH
4651 while (charbuf < charbuf_end)
4652 {
4653 ASSURE_DESTINATION (safe_room);
4654 c = *charbuf++;
4655 EMIT_ONE_BYTE (c);
4656 }
4657 }
4658 else
4ed46869 4659 {
df7492f9 4660 if (coding->src_multibyte)
d46c5b12 4661 {
df7492f9
KH
4662 int safe_room = MAX_MULTIBYTE_LENGTH;
4663
4664 while (charbuf < charbuf_end)
d46c5b12 4665 {
df7492f9
KH
4666 ASSURE_DESTINATION (safe_room);
4667 c = *charbuf++;
4668 if (ASCII_CHAR_P (c))
4669 *dst++ = c;
4670 else if (CHAR_BYTE8_P (c))
4671 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4672 else
df7492f9
KH
4673 CHAR_STRING_ADVANCE (c, dst);
4674 produced_chars++;
d46c5b12
KH
4675 }
4676 }
df7492f9
KH
4677 else
4678 {
4679 ASSURE_DESTINATION (charbuf_end - charbuf);
4680 while (charbuf < charbuf_end && dst < dst_end)
4681 *dst++ = *charbuf++;
4682 produced_chars = dst - (coding->destination + coding->dst_bytes);
8f924df7 4683 }
4ed46869 4684 }
065e3595 4685 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4686 coding->produced_char += produced_chars;
4687 coding->produced = dst - coding->destination;
4688 return 0;
4ed46869
KH
4689}
4690
ff0dacd7
KH
4691/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4692 Check if a text is encoded in a charset-based coding system. If it
4693 is, return 1, else return 0. */
4694
0a28aafb 4695static int
ff0dacd7 4696detect_coding_charset (coding, detect_info)
df7492f9 4697 struct coding_system *coding;
ff0dacd7 4698 struct coding_detection_info *detect_info;
1397dc18 4699{
065e3595 4700 const unsigned char *src = coding->source, *src_base;
8f924df7 4701 const unsigned char *src_end = coding->source + coding->src_bytes;
df7492f9
KH
4702 int multibytep = coding->src_multibyte;
4703 int consumed_chars = 0;
4704 Lisp_Object attrs, valids;
584948ac 4705 int found = 0;
1397dc18 4706
ff0dacd7
KH
4707 detect_info->checked |= CATEGORY_MASK_CHARSET;
4708
df7492f9
KH
4709 coding = &coding_categories[coding_category_charset];
4710 attrs = CODING_ID_ATTRS (coding->id);
4711 valids = AREF (attrs, coding_attr_charset_valids);
4712
4713 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4714 src += coding->head_ascii;
1397dc18 4715
b73bfc1c 4716 while (1)
1397dc18 4717 {
df7492f9 4718 int c;
1397dc18 4719
065e3595 4720 src_base = src;
df7492f9 4721 ONE_MORE_BYTE (c);
065e3595
KH
4722 if (c < 0)
4723 continue;
df7492f9
KH
4724 if (NILP (AREF (valids, c)))
4725 break;
584948ac 4726 if (c >= 0x80)
ff0dacd7 4727 found = CATEGORY_MASK_CHARSET;
df7492f9 4728 }
ff0dacd7 4729 detect_info->rejected |= CATEGORY_MASK_CHARSET;
df7492f9 4730 return 0;
4ed46869 4731
df7492f9 4732 no_more_source:
ff0dacd7
KH
4733 detect_info->found |= found;
4734 return 1;
df7492f9 4735}
b73bfc1c 4736
b73bfc1c 4737static void
df7492f9 4738decode_coding_charset (coding)
4ed46869 4739 struct coding_system *coding;
4ed46869 4740{
8f924df7
KH
4741 const unsigned char *src = coding->source + coding->consumed;
4742 const unsigned char *src_end = coding->source + coding->src_bytes;
4743 const unsigned char *src_base;
69a80ea3
KH
4744 int *charbuf = coding->charbuf + coding->charbuf_used;
4745 int *charbuf_end
4746 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
df7492f9
KH
4747 int consumed_chars = 0, consumed_chars_base;
4748 int multibytep = coding->src_multibyte;
24a73b0a 4749 Lisp_Object attrs, charset_list, valids;
ff0dacd7
KH
4750 int char_offset = coding->produced_char;
4751 int last_offset = char_offset;
4752 int last_id = charset_ascii;
df7492f9 4753
24a73b0a 4754 CODING_GET_INFO (coding, attrs, charset_list);
4eb6d3f1 4755 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4756
df7492f9 4757 while (1)
4ed46869 4758 {
4eb6d3f1 4759 int c;
24a73b0a
KH
4760 Lisp_Object val;
4761 struct charset *charset;
4762 int dim;
4763 int len = 1;
4764 unsigned code;
df7492f9
KH
4765
4766 src_base = src;
4767 consumed_chars_base = consumed_chars;
b73bfc1c 4768
df7492f9
KH
4769 if (charbuf >= charbuf_end)
4770 break;
4771
4eb6d3f1 4772 ONE_MORE_BYTE (c);
065e3595
KH
4773 if (c < 0)
4774 goto invalid_code;
24a73b0a
KH
4775 code = c;
4776
4777 val = AREF (valids, c);
4778 if (NILP (val))
4779 goto invalid_code;
4780 if (INTEGERP (val))
d46c5b12 4781 {
24a73b0a
KH
4782 charset = CHARSET_FROM_ID (XFASTINT (val));
4783 dim = CHARSET_DIMENSION (charset);
4784 while (len < dim)
b73bfc1c 4785 {
24a73b0a
KH
4786 ONE_MORE_BYTE (c);
4787 code = (code << 8) | c;
4788 len++;
b73bfc1c 4789 }
24a73b0a
KH
4790 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4791 charset, code, c);
d46c5b12 4792 }
df7492f9 4793 else
d46c5b12 4794 {
24a73b0a
KH
4795 /* VAL is a list of charset IDs. It is assured that the
4796 list is sorted by charset dimensions (smaller one
4797 comes first). */
4798 while (CONSP (val))
4eb6d3f1 4799 {
24a73b0a 4800 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
c7c66a95 4801 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4802 while (len < dim)
4eb6d3f1 4803 {
acb2a965
KH
4804 ONE_MORE_BYTE (c);
4805 code = (code << 8) | c;
f9d71dcd 4806 len++;
4eb6d3f1 4807 }
24a73b0a
KH
4808 CODING_DECODE_CHAR (coding, src, src_base,
4809 src_end, charset, code, c);
4810 if (c >= 0)
4811 break;
4812 val = XCDR (val);
ff0dacd7 4813 }
d46c5b12 4814 }
24a73b0a
KH
4815 if (c < 0)
4816 goto invalid_code;
4817 if (charset->id != charset_ascii
4818 && last_id != charset->id)
4819 {
4820 if (last_id != charset_ascii)
69a80ea3 4821 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
24a73b0a
KH
4822 last_id = charset->id;
4823 last_offset = char_offset;
4824 }
4825
df7492f9 4826 *charbuf++ = c;
ff0dacd7 4827 char_offset++;
df7492f9
KH
4828 continue;
4829
4830 invalid_code:
4831 src = src_base;
4832 consumed_chars = consumed_chars_base;
4833 ONE_MORE_BYTE (c);
065e3595 4834 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
ff0dacd7 4835 char_offset++;
df7492f9 4836 coding->errors++;
4ed46869
KH
4837 }
4838
df7492f9 4839 no_more_source:
ff0dacd7 4840 if (last_id != charset_ascii)
69a80ea3 4841 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
df7492f9
KH
4842 coding->consumed_char += consumed_chars_base;
4843 coding->consumed = src_base - coding->source;
4844 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4845}
4846
df7492f9
KH
4847static int
4848encode_coding_charset (coding)
4ed46869 4849 struct coding_system *coding;
4ed46869 4850{
df7492f9
KH
4851 int multibytep = coding->dst_multibyte;
4852 int *charbuf = coding->charbuf;
4853 int *charbuf_end = charbuf + coding->charbuf_used;
4854 unsigned char *dst = coding->destination + coding->produced;
4855 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4856 int safe_room = MAX_MULTIBYTE_LENGTH;
4857 int produced_chars = 0;
24a73b0a 4858 Lisp_Object attrs, charset_list;
df7492f9 4859 int ascii_compatible;
b73bfc1c 4860 int c;
b73bfc1c 4861
24a73b0a 4862 CODING_GET_INFO (coding, attrs, charset_list);
df7492f9 4863 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4864
df7492f9 4865 while (charbuf < charbuf_end)
4ed46869 4866 {
4eb6d3f1 4867 struct charset *charset;
df7492f9 4868 unsigned code;
8f924df7 4869
df7492f9
KH
4870 ASSURE_DESTINATION (safe_room);
4871 c = *charbuf++;
4872 if (ascii_compatible && ASCII_CHAR_P (c))
4873 EMIT_ONE_ASCII_BYTE (c);
16eafb5d 4874 else if (CHAR_BYTE8_P (c))
4ed46869 4875 {
16eafb5d
KH
4876 c = CHAR_TO_BYTE8 (c);
4877 EMIT_ONE_BYTE (c);
d46c5b12 4878 }
d46c5b12 4879 else
b73bfc1c 4880 {
4eb6d3f1
KH
4881 charset = char_charset (c, charset_list, &code);
4882 if (charset)
4883 {
4884 if (CHARSET_DIMENSION (charset) == 1)
4885 EMIT_ONE_BYTE (code);
4886 else if (CHARSET_DIMENSION (charset) == 2)
4887 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4888 else if (CHARSET_DIMENSION (charset) == 3)
4889 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4890 else
4891 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4892 (code >> 8) & 0xFF, code & 0xFF);
4893 }
4894 else
41cbe562
KH
4895 {
4896 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4897 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4898 else
4899 c = coding->default_char;
4900 EMIT_ONE_BYTE (c);
4901 }
4ed46869 4902 }
4ed46869
KH
4903 }
4904
065e3595 4905 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9
KH
4906 coding->produced_char += produced_chars;
4907 coding->produced = dst - coding->destination;
4908 return 0;
4ed46869
KH
4909}
4910
4911\f
1397dc18 4912/*** 7. C library functions ***/
4ed46869 4913
df7492f9
KH
4914/* Setup coding context CODING from information about CODING_SYSTEM.
4915 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4916 CODING_SYSTEM is invalid, signal an error. */
4ed46869 4917
ec6d2bb8 4918void
e0e989f6
KH
4919setup_coding_system (coding_system, coding)
4920 Lisp_Object coding_system;
4ed46869
KH
4921 struct coding_system *coding;
4922{
df7492f9
KH
4923 Lisp_Object attrs;
4924 Lisp_Object eol_type;
4925 Lisp_Object coding_type;
4608c386 4926 Lisp_Object val;
4ed46869 4927
df7492f9 4928 if (NILP (coding_system))
ae6f73fa 4929 coding_system = Qundecided;
c07c8e12 4930
df7492f9 4931 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
1f5dbf34 4932
df7492f9
KH
4933 attrs = CODING_ID_ATTRS (coding->id);
4934 eol_type = CODING_ID_EOL_TYPE (coding->id);
1f5dbf34 4935
df7492f9
KH
4936 coding->mode = 0;
4937 coding->head_ascii = -1;
4938 coding->common_flags
4939 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
5e5c78be
KH
4940 if (! NILP (CODING_ATTR_POST_READ (attrs)))
4941 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4942 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4943 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
8f924df7
KH
4944 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4945 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
1f5dbf34 4946
df7492f9 4947 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4948 coding->max_charset_id = SCHARS (val) - 1;
4949 coding->safe_charsets = (char *) SDATA (val);
df7492f9 4950 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4608c386 4951
df7492f9
KH
4952 coding_type = CODING_ATTR_TYPE (attrs);
4953 if (EQ (coding_type, Qundecided))
d46c5b12 4954 {
df7492f9
KH
4955 coding->detector = NULL;
4956 coding->decoder = decode_coding_raw_text;
4957 coding->encoder = encode_coding_raw_text;
4958 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 4959 }
df7492f9 4960 else if (EQ (coding_type, Qiso_2022))
d46c5b12 4961 {
df7492f9
KH
4962 int i;
4963 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4964
4965 /* Invoke graphic register 0 to plane 0. */
4966 CODING_ISO_INVOCATION (coding, 0) = 0;
4967 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4968 CODING_ISO_INVOCATION (coding, 1)
4969 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4970 /* Setup the initial status of designation. */
4971 for (i = 0; i < 4; i++)
4972 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4973 /* Not single shifting initially. */
4974 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4975 /* Beginning of buffer should also be regarded as bol. */
4976 CODING_ISO_BOL (coding) = 1;
4977 coding->detector = detect_coding_iso_2022;
4978 coding->decoder = decode_coding_iso_2022;
4979 coding->encoder = encode_coding_iso_2022;
4980 if (flags & CODING_ISO_FLAG_SAFE)
4981 coding->mode |= CODING_MODE_SAFE_ENCODING;
d46c5b12 4982 coding->common_flags
df7492f9
KH
4983 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4984 | CODING_REQUIRE_FLUSHING_MASK);
4985 if (flags & CODING_ISO_FLAG_COMPOSITION)
4986 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
ff0dacd7
KH
4987 if (flags & CODING_ISO_FLAG_DESIGNATION)
4988 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
df7492f9
KH
4989 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4990 {
4991 setup_iso_safe_charsets (attrs);
4992 val = CODING_ATTR_SAFE_CHARSETS (attrs);
8f924df7
KH
4993 coding->max_charset_id = SCHARS (val) - 1;
4994 coding->safe_charsets = (char *) SDATA (val);
df7492f9
KH
4995 }
4996 CODING_ISO_FLAGS (coding) = flags;
d46c5b12 4997 }
df7492f9 4998 else if (EQ (coding_type, Qcharset))
d46c5b12 4999 {
df7492f9
KH
5000 coding->detector = detect_coding_charset;
5001 coding->decoder = decode_coding_charset;
5002 coding->encoder = encode_coding_charset;
d46c5b12 5003 coding->common_flags
df7492f9 5004 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
d46c5b12 5005 }
df7492f9 5006 else if (EQ (coding_type, Qutf_8))
d46c5b12 5007 {
df7492f9
KH
5008 coding->detector = detect_coding_utf_8;
5009 coding->decoder = decode_coding_utf_8;
5010 coding->encoder = encode_coding_utf_8;
5011 coding->common_flags
5012 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5013 }
5014 else if (EQ (coding_type, Qutf_16))
5015 {
5016 val = AREF (attrs, coding_attr_utf_16_bom);
5017 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5018 : EQ (val, Qt) ? utf_16_with_bom
5019 : utf_16_without_bom);
5020 val = AREF (attrs, coding_attr_utf_16_endian);
b49a1807 5021 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
df7492f9 5022 : utf_16_little_endian);
e19c3639 5023 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
5024 coding->detector = detect_coding_utf_16;
5025 coding->decoder = decode_coding_utf_16;
5026 coding->encoder = encode_coding_utf_16;
5027 coding->common_flags
5028 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
b49a1807
KH
5029 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5030 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
d46c5b12 5031 }
df7492f9 5032 else if (EQ (coding_type, Qccl))
4ed46869 5033 {
df7492f9
KH
5034 coding->detector = detect_coding_ccl;
5035 coding->decoder = decode_coding_ccl;
5036 coding->encoder = encode_coding_ccl;
c952af22 5037 coding->common_flags
df7492f9
KH
5038 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5039 | CODING_REQUIRE_FLUSHING_MASK);
5040 }
5041 else if (EQ (coding_type, Qemacs_mule))
5042 {
5043 coding->detector = detect_coding_emacs_mule;
5044 coding->decoder = decode_coding_emacs_mule;
5045 coding->encoder = encode_coding_emacs_mule;
c952af22 5046 coding->common_flags
df7492f9
KH
5047 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5048 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5049 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5050 {
5051 Lisp_Object tail, safe_charsets;
5052 int max_charset_id = 0;
5053
5054 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5055 tail = XCDR (tail))
5056 if (max_charset_id < XFASTINT (XCAR (tail)))
5057 max_charset_id = XFASTINT (XCAR (tail));
5058 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5059 make_number (255));
5060 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5061 tail = XCDR (tail))
8f924df7 5062 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 5063 coding->max_charset_id = max_charset_id;
8f924df7 5064 coding->safe_charsets = (char *) SDATA (safe_charsets);
df7492f9
KH
5065 }
5066 }
5067 else if (EQ (coding_type, Qshift_jis))
5068 {
5069 coding->detector = detect_coding_sjis;
5070 coding->decoder = decode_coding_sjis;
5071 coding->encoder = encode_coding_sjis;
c952af22 5072 coding->common_flags
df7492f9
KH
5073 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5074 }
5075 else if (EQ (coding_type, Qbig5))
5076 {
5077 coding->detector = detect_coding_big5;
5078 coding->decoder = decode_coding_big5;
5079 coding->encoder = encode_coding_big5;
c952af22 5080 coding->common_flags
df7492f9
KH
5081 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5082 }
5083 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 5084 {
df7492f9
KH
5085 coding->detector = NULL;
5086 coding->decoder = decode_coding_raw_text;
5087 coding->encoder = encode_coding_raw_text;
ea29edf2
KH
5088 if (! EQ (eol_type, Qunix))
5089 {
5090 coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5091 if (! VECTORP (eol_type))
5092 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5093 }
5094
4ed46869 5095 }
4ed46869 5096
df7492f9 5097 return;
4ed46869
KH
5098}
5099
0ff61e78
KH
5100/* Return a list of charsets supported by CODING. */
5101
5102Lisp_Object
5103coding_charset_list (coding)
5104 struct coding_system *coding;
5105{
35befdaa 5106 Lisp_Object attrs, charset_list;
0ff61e78
KH
5107
5108 CODING_GET_INFO (coding, attrs, charset_list);
5109 if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5110 {
5111 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5112
5113 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5114 charset_list = Viso_2022_charset_list;
5115 }
5116 else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5117 {
5118 charset_list = Vemacs_mule_charset_list;
5119 }
5120 return charset_list;
5121}
5122
5123
df7492f9
KH
5124/* Return raw-text or one of its subsidiaries that has the same
5125 eol_type as CODING-SYSTEM. */
ec6d2bb8 5126
df7492f9
KH
5127Lisp_Object
5128raw_text_coding_system (coding_system)
5129 Lisp_Object coding_system;
ec6d2bb8 5130{
0be8721c 5131 Lisp_Object spec, attrs;
df7492f9 5132 Lisp_Object eol_type, raw_text_eol_type;
ec6d2bb8 5133
d3e4cb56
KH
5134 if (NILP (coding_system))
5135 return Qraw_text;
df7492f9
KH
5136 spec = CODING_SYSTEM_SPEC (coding_system);
5137 attrs = AREF (spec, 0);
ec6d2bb8 5138
df7492f9
KH
5139 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5140 return coding_system;
ec6d2bb8 5141
df7492f9
KH
5142 eol_type = AREF (spec, 2);
5143 if (VECTORP (eol_type))
5144 return Qraw_text;
5145 spec = CODING_SYSTEM_SPEC (Qraw_text);
5146 raw_text_eol_type = AREF (spec, 2);
5147 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5148 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5149 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
5150}
5151
54f78171 5152
df7492f9
KH
5153/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5154 does, return one of the subsidiary that has the same eol-spec as
5155 PARENT. Otherwise, return CODING_SYSTEM. */
5156
5157Lisp_Object
5158coding_inherit_eol_type (coding_system, parent)
b74e4686 5159 Lisp_Object coding_system, parent;
54f78171 5160{
3e139625 5161 Lisp_Object spec, eol_type;
54f78171 5162
d3e4cb56
KH
5163 if (NILP (coding_system))
5164 coding_system = Qraw_text;
df7492f9 5165 spec = CODING_SYSTEM_SPEC (coding_system);
df7492f9 5166 eol_type = AREF (spec, 2);
d3e4cb56
KH
5167 if (VECTORP (eol_type)
5168 && ! NILP (parent))
df7492f9
KH
5169 {
5170 Lisp_Object parent_spec;
df7492f9
KH
5171 Lisp_Object parent_eol_type;
5172
5173 parent_spec
5174 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5175 parent_eol_type = AREF (parent_spec, 2);
5176 if (EQ (parent_eol_type, Qunix))
5177 coding_system = AREF (eol_type, 0);
5178 else if (EQ (parent_eol_type, Qdos))
5179 coding_system = AREF (eol_type, 1);
5180 else if (EQ (parent_eol_type, Qmac))
5181 coding_system = AREF (eol_type, 2);
54f78171 5182 }
df7492f9 5183 return coding_system;
54f78171
KH
5184}
5185
4ed46869
KH
5186/* Emacs has a mechanism to automatically detect a coding system if it
5187 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5188 it's impossible to distinguish some coding systems accurately
5189 because they use the same range of codes. So, at first, coding
5190 systems are categorized into 7, those are:
5191
0ef69138 5192 o coding-category-emacs-mule
4ed46869
KH
5193
5194 The category for a coding system which has the same code range
5195 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 5196 symbol) `emacs-mule' by default.
4ed46869
KH
5197
5198 o coding-category-sjis
5199
5200 The category for a coding system which has the same code range
5201 as SJIS. Assigned the coding-system (Lisp
7717c392 5202 symbol) `japanese-shift-jis' by default.
4ed46869
KH
5203
5204 o coding-category-iso-7
5205
5206 The category for a coding system which has the same code range
7717c392 5207 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
5208 shift and single shift functions. This can encode/decode all
5209 charsets. Assigned the coding-system (Lisp symbol)
5210 `iso-2022-7bit' by default.
5211
5212 o coding-category-iso-7-tight
5213
5214 Same as coding-category-iso-7 except that this can
5215 encode/decode only the specified charsets.
4ed46869
KH
5216
5217 o coding-category-iso-8-1
5218
5219 The category for a coding system which has the same code range
5220 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5221 for DIMENSION1 charset. This doesn't use any locking shift
5222 and single shift functions. Assigned the coding-system (Lisp
5223 symbol) `iso-latin-1' by default.
4ed46869
KH
5224
5225 o coding-category-iso-8-2
5226
5227 The category for a coding system which has the same code range
5228 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
5229 for DIMENSION2 charset. This doesn't use any locking shift
5230 and single shift functions. Assigned the coding-system (Lisp
5231 symbol) `japanese-iso-8bit' by default.
4ed46869 5232
7717c392 5233 o coding-category-iso-7-else
4ed46869
KH
5234
5235 The category for a coding system which has the same code range
df7492f9 5236 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
5237 single shift functions. Assigned the coding-system (Lisp
5238 symbol) `iso-2022-7bit-lock' by default.
5239
5240 o coding-category-iso-8-else
5241
5242 The category for a coding system which has the same code range
df7492f9 5243 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
5244 single shift functions. Assigned the coding-system (Lisp
5245 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
5246
5247 o coding-category-big5
5248
5249 The category for a coding system which has the same code range
5250 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 5251 `cn-big5' by default.
4ed46869 5252
fa42c37f
KH
5253 o coding-category-utf-8
5254
5255 The category for a coding system which has the same code range
6e76ae91 5256 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
fa42c37f
KH
5257 symbol) `utf-8' by default.
5258
5259 o coding-category-utf-16-be
5260
5261 The category for a coding system in which a text has an
5262 Unicode signature (cf. Unicode Standard) in the order of BIG
5263 endian at the head. Assigned the coding-system (Lisp symbol)
5264 `utf-16-be' by default.
5265
5266 o coding-category-utf-16-le
5267
5268 The category for a coding system in which a text has an
5269 Unicode signature (cf. Unicode Standard) in the order of
5270 LITTLE endian at the head. Assigned the coding-system (Lisp
5271 symbol) `utf-16-le' by default.
5272
1397dc18
KH
5273 o coding-category-ccl
5274
5275 The category for a coding system of which encoder/decoder is
5276 written in CCL programs. The default value is nil, i.e., no
5277 coding system is assigned.
5278
4ed46869
KH
5279 o coding-category-binary
5280
5281 The category for a coding system not categorized in any of the
5282 above. Assigned the coding-system (Lisp symbol)
e0e989f6 5283 `no-conversion' by default.
4ed46869
KH
5284
5285 Each of them is a Lisp symbol and the value is an actual
df7492f9 5286 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
5287 What Emacs does actually is to detect a category of coding system.
5288 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 5289 decide only one possible category, it selects a category of the
4ed46869
KH
5290 highest priority. Priorities of categories are also specified by a
5291 user in a Lisp variable `coding-category-list'.
5292
5293*/
5294
df7492f9
KH
5295#define EOL_SEEN_NONE 0
5296#define EOL_SEEN_LF 1
5297#define EOL_SEEN_CR 2
5298#define EOL_SEEN_CRLF 4
66cfb530 5299
ff0dacd7
KH
5300/* Detect how end-of-line of a text of length SRC_BYTES pointed by
5301 SOURCE is encoded. If CATEGORY is one of
5302 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5303 two-byte, else they are encoded by one-byte.
5304
5305 Return one of EOL_SEEN_XXX. */
4ed46869 5306
bc4bc72a 5307#define MAX_EOL_CHECK_COUNT 3
d46c5b12
KH
5308
5309static int
89528eb3 5310detect_eol (source, src_bytes, category)
f6cbaf43 5311 const unsigned char *source;
df7492f9 5312 EMACS_INT src_bytes;
89528eb3 5313 enum coding_category category;
4ed46869 5314{
f6cbaf43 5315 const unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 5316 unsigned char c;
df7492f9
KH
5317 int total = 0;
5318 int eol_seen = EOL_SEEN_NONE;
4ed46869 5319
89528eb3 5320 if ((1 << category) & CATEGORY_MASK_UTF_16)
4ed46869 5321 {
df7492f9 5322 int msb, lsb;
fa42c37f 5323
89528eb3
KH
5324 msb = category == (coding_category_utf_16_le
5325 | coding_category_utf_16_le_nosig);
df7492f9 5326 lsb = 1 - msb;
fa42c37f 5327
df7492f9 5328 while (src + 1 < src_end)
fa42c37f 5329 {
df7492f9
KH
5330 c = src[lsb];
5331 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 5332 {
df7492f9
KH
5333 int this_eol;
5334
5335 if (c == '\n')
5336 this_eol = EOL_SEEN_LF;
5337 else if (src + 3 >= src_end
5338 || src[msb + 2] != 0
5339 || src[lsb + 2] != '\n')
5340 this_eol = EOL_SEEN_CR;
fa42c37f 5341 else
8f924df7 5342 this_eol = EOL_SEEN_CRLF;
df7492f9
KH
5343
5344 if (eol_seen == EOL_SEEN_NONE)
5345 /* This is the first end-of-line. */
5346 eol_seen = this_eol;
5347 else if (eol_seen != this_eol)
fa42c37f 5348 {
df7492f9
KH
5349 /* The found type is different from what found before. */
5350 eol_seen = EOL_SEEN_LF;
5351 break;
fa42c37f 5352 }
df7492f9
KH
5353 if (++total == MAX_EOL_CHECK_COUNT)
5354 break;
fa42c37f 5355 }
df7492f9 5356 src += 2;
fa42c37f 5357 }
bcf26d6a 5358 }
d46c5b12 5359 else
c4825358 5360 {
df7492f9 5361 while (src < src_end)
27901516 5362 {
df7492f9
KH
5363 c = *src++;
5364 if (c == '\n' || c == '\r')
5365 {
5366 int this_eol;
d46c5b12 5367
df7492f9
KH
5368 if (c == '\n')
5369 this_eol = EOL_SEEN_LF;
5370 else if (src >= src_end || *src != '\n')
5371 this_eol = EOL_SEEN_CR;
5372 else
5373 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5374
df7492f9
KH
5375 if (eol_seen == EOL_SEEN_NONE)
5376 /* This is the first end-of-line. */
5377 eol_seen = this_eol;
5378 else if (eol_seen != this_eol)
5379 {
5380 /* The found type is different from what found before. */
5381 eol_seen = EOL_SEEN_LF;
5382 break;
5383 }
5384 if (++total == MAX_EOL_CHECK_COUNT)
5385 break;
5386 }
5387 }
73be902c 5388 }
df7492f9 5389 return eol_seen;
73be902c
KH
5390}
5391
df7492f9 5392
24a73b0a 5393static Lisp_Object
df7492f9
KH
5394adjust_coding_eol_type (coding, eol_seen)
5395 struct coding_system *coding;
5396 int eol_seen;
73be902c 5397{
0be8721c 5398 Lisp_Object eol_type;
8f924df7 5399
df7492f9
KH
5400 eol_type = CODING_ID_EOL_TYPE (coding->id);
5401 if (eol_seen & EOL_SEEN_LF)
24a73b0a
KH
5402 {
5403 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5404 eol_type = Qunix;
5405 }
6f197c07 5406 else if (eol_seen & EOL_SEEN_CRLF)
24a73b0a
KH
5407 {
5408 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5409 eol_type = Qdos;
5410 }
6f197c07 5411 else if (eol_seen & EOL_SEEN_CR)
24a73b0a
KH
5412 {
5413 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5414 eol_type = Qmac;
5415 }
5416 return eol_type;
d46c5b12 5417}
4ed46869 5418
df7492f9
KH
5419/* Detect how a text specified in CODING is encoded. If a coding
5420 system is detected, update fields of CODING by the detected coding
5421 system. */
0a28aafb 5422
df7492f9
KH
5423void
5424detect_coding (coding)
d46c5b12 5425 struct coding_system *coding;
d46c5b12 5426{
8f924df7 5427 const unsigned char *src, *src_end;
d46c5b12 5428
df7492f9
KH
5429 coding->consumed = coding->consumed_char = 0;
5430 coding->produced = coding->produced_char = 0;
5431 coding_set_source (coding);
1c3478b0 5432
df7492f9 5433 src_end = coding->source + coding->src_bytes;
1c3478b0 5434
df7492f9
KH
5435 /* If we have not yet decided the text encoding type, detect it
5436 now. */
5437 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5438 {
df7492f9 5439 int c, i;
6cb21a4f 5440 struct coding_detection_info detect_info;
df7492f9 5441
6cb21a4f 5442 detect_info.checked = detect_info.found = detect_info.rejected = 0;
24a73b0a 5443 for (i = 0, src = coding->source; src < src_end; i++, src++)
d46c5b12 5444 {
df7492f9 5445 c = *src;
6cb21a4f 5446 if (c & 0x80)
df7492f9 5447 break;
6cb21a4f
KH
5448 if (c < 0x20
5449 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5450 && ! inhibit_iso_escape_detection
5451 && ! detect_info.checked)
5452 {
5453 coding->head_ascii = src - (coding->source + coding->consumed);
5454 if (detect_coding_iso_2022 (coding, &detect_info))
5455 {
5456 /* We have scanned the whole data. */
5457 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5458 /* We didn't find an 8-bit code. */
5459 src = src_end;
5460 break;
5461 }
5462 }
d46c5b12 5463 }
df7492f9
KH
5464 coding->head_ascii = src - (coding->source + coding->consumed);
5465
3aef54f3 5466 if (coding->head_ascii < coding->src_bytes
6cb21a4f 5467 || detect_info.found)
d46c5b12 5468 {
ff0dacd7
KH
5469 enum coding_category category;
5470 struct coding_system *this;
df7492f9 5471
6cb21a4f
KH
5472 if (coding->head_ascii == coding->src_bytes)
5473 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
5474 for (i = 0; i < coding_category_raw_text; i++)
5475 {
5476 category = coding_priorities[i];
5477 this = coding_categories + category;
5478 if (detect_info.found & (1 << category))
24a73b0a 5479 break;
6cb21a4f
KH
5480 }
5481 else
5482 for (i = 0; i < coding_category_raw_text; i++)
5483 {
5484 category = coding_priorities[i];
5485 this = coding_categories + category;
5486 if (this->id < 0)
5487 {
5488 /* No coding system of this category is defined. */
5489 detect_info.rejected |= (1 << category);
5490 }
5491 else if (category >= coding_category_raw_text)
5492 continue;
5493 else if (detect_info.checked & (1 << category))
5494 {
5495 if (detect_info.found & (1 << category))
5496 break;
5497 }
5498 else if ((*(this->detector)) (coding, &detect_info)
5499 && detect_info.found & (1 << category))
5500 {
5501 if (category == coding_category_utf_16_auto)
5502 {
5503 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5504 category = coding_category_utf_16_le;
5505 else
5506 category = coding_category_utf_16_be;
5507 }
5508 break;
5509 }
5510 }
5511
ff0dacd7
KH
5512 if (i < coding_category_raw_text)
5513 setup_coding_system (CODING_ID_NAME (this->id), coding);
5514 else if (detect_info.rejected == CATEGORY_MASK_ANY)
df7492f9 5515 setup_coding_system (Qraw_text, coding);
ff0dacd7 5516 else if (detect_info.rejected)
df7492f9 5517 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7
KH
5518 if (! (detect_info.rejected & (1 << coding_priorities[i])))
5519 {
5520 this = coding_categories + coding_priorities[i];
5521 setup_coding_system (CODING_ID_NAME (this->id), coding);
5522 break;
5523 }
d46c5b12 5524 }
b73bfc1c 5525 }
24a73b0a
KH
5526 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5527 == coding_category_utf_16_auto)
b49a1807
KH
5528 {
5529 Lisp_Object coding_systems;
5530 struct coding_detection_info detect_info;
5531
5532 coding_systems
5533 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5534 detect_info.found = detect_info.rejected = 0;
5535 if (CONSP (coding_systems)
24a73b0a 5536 && detect_coding_utf_16 (coding, &detect_info))
b49a1807
KH
5537 {
5538 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5539 setup_coding_system (XCAR (coding_systems), coding);
24a73b0a 5540 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
b49a1807
KH
5541 setup_coding_system (XCDR (coding_systems), coding);
5542 }
5543 }
4ed46869 5544}
4ed46869 5545
d46c5b12 5546
aaaf0b1e 5547static void
df7492f9 5548decode_eol (coding)
aaaf0b1e 5549 struct coding_system *coding;
aaaf0b1e 5550{
24a73b0a
KH
5551 Lisp_Object eol_type;
5552 unsigned char *p, *pbeg, *pend;
5553
5554 eol_type = CODING_ID_EOL_TYPE (coding->id);
5555 if (EQ (eol_type, Qunix))
5556 return;
5557
5558 if (NILP (coding->dst_object))
5559 pbeg = coding->destination;
5560 else
5561 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5562 pend = pbeg + coding->produced;
5563
5564 if (VECTORP (eol_type))
aaaf0b1e 5565 {
df7492f9 5566 int eol_seen = EOL_SEEN_NONE;
4ed46869 5567
24a73b0a 5568 for (p = pbeg; p < pend; p++)
aaaf0b1e 5569 {
df7492f9
KH
5570 if (*p == '\n')
5571 eol_seen |= EOL_SEEN_LF;
5572 else if (*p == '\r')
aaaf0b1e 5573 {
df7492f9 5574 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5575 {
df7492f9
KH
5576 eol_seen |= EOL_SEEN_CRLF;
5577 p++;
aaaf0b1e 5578 }
aaaf0b1e 5579 else
df7492f9 5580 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5581 }
aaaf0b1e 5582 }
24a73b0a
KH
5583 if (eol_seen != EOL_SEEN_NONE
5584 && eol_seen != EOL_SEEN_LF
5585 && eol_seen != EOL_SEEN_CRLF
5586 && eol_seen != EOL_SEEN_CR)
5587 eol_seen = EOL_SEEN_LF;
df7492f9 5588 if (eol_seen != EOL_SEEN_NONE)
24a73b0a 5589 eol_type = adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5590 }
d46c5b12 5591
24a73b0a 5592 if (EQ (eol_type, Qmac))
27901516 5593 {
24a73b0a 5594 for (p = pbeg; p < pend; p++)
df7492f9
KH
5595 if (*p == '\r')
5596 *p = '\n';
4ed46869 5597 }
24a73b0a 5598 else if (EQ (eol_type, Qdos))
df7492f9 5599 {
24a73b0a 5600 int n = 0;
b73bfc1c 5601
24a73b0a
KH
5602 if (NILP (coding->dst_object))
5603 {
5604 for (p = pend - 2; p >= pbeg; p--)
5605 if (*p == '\r')
5606 {
5607 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5608 n++;
5609 }
5610 }
5611 else
5612 {
5613 for (p = pend - 2; p >= pbeg; p--)
5614 if (*p == '\r')
5615 {
5616 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5617 int pos = BYTE_TO_CHAR (pos_byte);
5618
5619 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5620 n++;
5621 }
5622 }
5623 coding->produced -= n;
5624 coding->produced_char -= n;
aaaf0b1e 5625 }
4ed46869
KH
5626}
5627
7d64c6ad 5628
a6f87d34
KH
5629/* Return a translation table (or list of them) from coding system
5630 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5631 decoding (ENCODEP is zero). */
7d64c6ad 5632
e6a54062 5633static Lisp_Object
09ee6fdd
KH
5634get_translation_table (attrs, encodep, max_lookup)
5635 Lisp_Object attrs;
5636 int encodep, *max_lookup;
7d64c6ad
KH
5637{
5638 Lisp_Object standard, translation_table;
09ee6fdd 5639 Lisp_Object val;
7d64c6ad
KH
5640
5641 if (encodep)
5642 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5643 standard = Vstandard_translation_table_for_encode;
5644 else
5645 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5646 standard = Vstandard_translation_table_for_decode;
7d64c6ad 5647 if (NILP (translation_table))
09ee6fdd
KH
5648 translation_table = standard;
5649 else
a6f87d34 5650 {
09ee6fdd
KH
5651 if (SYMBOLP (translation_table))
5652 translation_table = Fget (translation_table, Qtranslation_table);
5653 else if (CONSP (translation_table))
5654 {
5655 translation_table = Fcopy_sequence (translation_table);
5656 for (val = translation_table; CONSP (val); val = XCDR (val))
5657 if (SYMBOLP (XCAR (val)))
5658 XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5659 }
5660 if (CHAR_TABLE_P (standard))
5661 {
5662 if (CONSP (translation_table))
5663 translation_table = nconc2 (translation_table,
5664 Fcons (standard, Qnil));
5665 else
5666 translation_table = Fcons (translation_table,
5667 Fcons (standard, Qnil));
5668 }
a6f87d34 5669 }
2170c8f0
KH
5670
5671 if (max_lookup)
09ee6fdd 5672 {
2170c8f0
KH
5673 *max_lookup = 1;
5674 if (CHAR_TABLE_P (translation_table)
5675 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5676 {
5677 val = XCHAR_TABLE (translation_table)->extras[1];
5678 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5679 *max_lookup = XFASTINT (val);
5680 }
5681 else if (CONSP (translation_table))
5682 {
5683 Lisp_Object tail, val;
09ee6fdd 5684
2170c8f0
KH
5685 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5686 if (CHAR_TABLE_P (XCAR (tail))
5687 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5688 {
5689 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5690 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5691 *max_lookup = XFASTINT (val);
5692 }
5693 }
a6f87d34 5694 }
7d64c6ad
KH
5695 return translation_table;
5696}
5697
09ee6fdd
KH
5698#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5699 do { \
5700 trans = Qnil; \
5701 if (CHAR_TABLE_P (table)) \
5702 { \
5703 trans = CHAR_TABLE_REF (table, c); \
5704 if (CHARACTERP (trans)) \
5705 c = XFASTINT (trans), trans = Qnil; \
5706 } \
5707 else if (CONSP (table)) \
5708 { \
5709 Lisp_Object tail; \
5710 \
5711 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5712 if (CHAR_TABLE_P (XCAR (tail))) \
5713 { \
5714 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5715 if (CHARACTERP (trans)) \
5716 c = XFASTINT (trans), trans = Qnil; \
5717 else if (! NILP (trans)) \
5718 break; \
5719 } \
5720 } \
e6a54062
KH
5721 } while (0)
5722
7d64c6ad 5723
69a80ea3
KH
5724static Lisp_Object
5725get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5726 Lisp_Object val;
5727 int *buf, *buf_end;
5728 int last_block;
5729 int *from_nchars, *to_nchars;
5730{
433f7f87
KH
5731 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5732 [TO-CHAR ...]. */
69a80ea3
KH
5733 if (CONSP (val))
5734 {
433f7f87 5735 Lisp_Object from, tail;
69a80ea3
KH
5736 int i, len;
5737
433f7f87 5738 for (tail = val; CONSP (tail); tail = XCDR (tail))
69a80ea3 5739 {
433f7f87
KH
5740 val = XCAR (tail);
5741 from = XCAR (val);
5742 len = ASIZE (from);
5743 for (i = 0; i < len; i++)
5744 {
5745 if (buf + i == buf_end)
5746 {
5747 if (! last_block)
5748 return Qt;
5749 break;
5750 }
5751 if (XINT (AREF (from, i)) != buf[i])
5752 break;
5753 }
5754 if (i == len)
5755 {
5756 val = XCDR (val);
5757 *from_nchars = len;
5758 break;
5759 }
69a80ea3 5760 }
433f7f87
KH
5761 if (! CONSP (tail))
5762 return Qnil;
69a80ea3
KH
5763 }
5764 if (VECTORP (val))
5765 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5766 else
5767 *buf = XINT (val);
5768 return val;
5769}
5770
5771
d46c5b12 5772static int
69a80ea3 5773produce_chars (coding, translation_table, last_block)
df7492f9 5774 struct coding_system *coding;
69a80ea3
KH
5775 Lisp_Object translation_table;
5776 int last_block;
4ed46869 5777{
df7492f9
KH
5778 unsigned char *dst = coding->destination + coding->produced;
5779 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5780 int produced;
5781 int produced_chars = 0;
69a80ea3 5782 int carryover = 0;
4ed46869 5783
df7492f9 5784 if (! coding->chars_at_source)
4ed46869 5785 {
df7492f9 5786 /* Characters are in coding->charbuf. */
fba4576f
AS
5787 int *buf = coding->charbuf;
5788 int *buf_end = buf + coding->charbuf_used;
4ed46869 5789
df7492f9
KH
5790 if (BUFFERP (coding->src_object)
5791 && EQ (coding->src_object, coding->dst_object))
8f924df7 5792 dst_end = ((unsigned char *) coding->source) + coding->consumed;
4ed46869 5793
df7492f9 5794 while (buf < buf_end)
4ed46869 5795 {
69a80ea3 5796 int c = *buf, i;
bc4bc72a 5797
df7492f9
KH
5798 if (c >= 0)
5799 {
69a80ea3
KH
5800 int from_nchars = 1, to_nchars = 1;
5801 Lisp_Object trans = Qnil;
5802
09ee6fdd 5803 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 5804 if (! NILP (trans))
69a80ea3
KH
5805 {
5806 trans = get_translation (trans, buf, buf_end, last_block,
5807 &from_nchars, &to_nchars);
5808 if (EQ (trans, Qt))
5809 break;
5810 c = *buf;
5811 }
5812
5813 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5814 {
5815 dst = alloc_destination (coding,
5816 buf_end - buf
5817 + MAX_MULTIBYTE_LENGTH * to_nchars,
5818 dst);
5819 dst_end = coding->destination + coding->dst_bytes;
5820 }
5821
433f7f87 5822 for (i = 0; i < to_nchars; i++)
69a80ea3 5823 {
433f7f87
KH
5824 if (i > 0)
5825 c = XINT (AREF (trans, i));
69a80ea3
KH
5826 if (coding->dst_multibyte
5827 || ! CHAR_BYTE8_P (c))
5828 CHAR_STRING_ADVANCE (c, dst);
5829 else
5830 *dst++ = CHAR_TO_BYTE8 (c);
5831 }
5832 produced_chars += to_nchars;
5833 *buf++ = to_nchars;
5834 while (--from_nchars > 0)
5835 *buf++ = 0;
d46c5b12 5836 }
df7492f9 5837 else
69a80ea3
KH
5838 /* This is an annotation datum. (-C) is the length. */
5839 buf += -c;
4ed46869 5840 }
69a80ea3 5841 carryover = buf_end - buf;
4ed46869 5842 }
fa42c37f 5843 else
fa42c37f 5844 {
8f924df7
KH
5845 const unsigned char *src = coding->source;
5846 const unsigned char *src_end = src + coding->src_bytes;
df7492f9 5847 Lisp_Object eol_type;
fa42c37f 5848
df7492f9 5849 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5850
df7492f9 5851 if (coding->src_multibyte != coding->dst_multibyte)
fa42c37f 5852 {
df7492f9 5853 if (coding->src_multibyte)
fa42c37f 5854 {
71c81426 5855 int multibytep = 1;
df7492f9 5856 int consumed_chars;
d46c5b12 5857
df7492f9
KH
5858 while (1)
5859 {
8f924df7 5860 const unsigned char *src_base = src;
df7492f9 5861 int c;
b73bfc1c 5862
df7492f9
KH
5863 ONE_MORE_BYTE (c);
5864 if (c == '\r')
5865 {
5866 if (EQ (eol_type, Qdos))
5867 {
98725083
KH
5868 if (src == src_end)
5869 {
065e3595
KH
5870 record_conversion_result
5871 (coding, CODING_RESULT_INSUFFICIENT_SRC);
98725083
KH
5872 goto no_more_source;
5873 }
5874 if (*src == '\n')
df7492f9
KH
5875 c = *src++;
5876 }
5877 else if (EQ (eol_type, Qmac))
5878 c = '\n';
5879 }
5880 if (dst == dst_end)
5881 {
2c78b7e1 5882 coding->consumed = src - coding->source;
b73bfc1c 5883
2c78b7e1 5884 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5885 dst_end = (unsigned char *) src;
2c78b7e1
KH
5886 if (dst == dst_end)
5887 {
5888 dst = alloc_destination (coding, src_end - src + 1,
5889 dst);
5890 dst_end = coding->destination + coding->dst_bytes;
5891 coding_set_source (coding);
5892 src = coding->source + coding->consumed;
5893 src_end = coding->source + coding->src_bytes;
5894 }
df7492f9
KH
5895 }
5896 *dst++ = c;
5897 produced_chars++;
5898 }
5899 no_more_source:
5900 ;
fa42c37f
KH
5901 }
5902 else
df7492f9
KH
5903 while (src < src_end)
5904 {
71c81426 5905 int multibytep = 1;
df7492f9 5906 int c = *src++;
b73bfc1c 5907
df7492f9
KH
5908 if (c == '\r')
5909 {
5910 if (EQ (eol_type, Qdos))
5911 {
5912 if (src < src_end
5913 && *src == '\n')
5914 c = *src++;
5915 }
5916 else if (EQ (eol_type, Qmac))
5917 c = '\n';
5918 }
5919 if (dst >= dst_end - 1)
5920 {
2c78b7e1 5921 coding->consumed = src - coding->source;
df7492f9 5922
2c78b7e1 5923 if (EQ (coding->src_object, coding->dst_object))
8f924df7 5924 dst_end = (unsigned char *) src;
2c78b7e1
KH
5925 if (dst >= dst_end - 1)
5926 {
5927 dst = alloc_destination (coding, src_end - src + 2,
5928 dst);
5929 dst_end = coding->destination + coding->dst_bytes;
5930 coding_set_source (coding);
5931 src = coding->source + coding->consumed;
5932 src_end = coding->source + coding->src_bytes;
5933 }
df7492f9
KH
5934 }
5935 EMIT_ONE_BYTE (c);
5936 }
d46c5b12 5937 }
df7492f9
KH
5938 else
5939 {
5940 if (!EQ (coding->src_object, coding->dst_object))
fa42c37f 5941 {
df7492f9 5942 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5943
df7492f9 5944 if (require > 0)
fa42c37f 5945 {
df7492f9
KH
5946 EMACS_INT offset = src - coding->source;
5947
5948 dst = alloc_destination (coding, require, dst);
5949 coding_set_source (coding);
5950 src = coding->source + offset;
5951 src_end = coding->source + coding->src_bytes;
fa42c37f
KH
5952 }
5953 }
df7492f9
KH
5954 produced_chars = coding->src_chars;
5955 while (src < src_end)
fa42c37f 5956 {
df7492f9
KH
5957 int c = *src++;
5958
5959 if (c == '\r')
5960 {
5961 if (EQ (eol_type, Qdos))
5962 {
5963 if (src < src_end
5964 && *src == '\n')
5965 c = *src++;
5966 produced_chars--;
5967 }
5968 else if (EQ (eol_type, Qmac))
5969 c = '\n';
5970 }
5971 *dst++ = c;
fa42c37f
KH
5972 }
5973 }
2c78b7e1
KH
5974 coding->consumed = coding->src_bytes;
5975 coding->consumed_char = coding->src_chars;
fa42c37f
KH
5976 }
5977
df7492f9
KH
5978 produced = dst - (coding->destination + coding->produced);
5979 if (BUFFERP (coding->dst_object))
5980 insert_from_gap (produced_chars, produced);
5981 coding->produced += produced;
5982 coding->produced_char += produced_chars;
69a80ea3 5983 return carryover;
fa42c37f
KH
5984}
5985
ff0dacd7
KH
5986/* Compose text in CODING->object according to the annotation data at
5987 CHARBUF. CHARBUF is an array:
5988 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
df7492f9 5989 */
4ed46869 5990
df7492f9 5991static INLINE void
69a80ea3 5992produce_composition (coding, charbuf, pos)
4ed46869 5993 struct coding_system *coding;
df7492f9 5994 int *charbuf;
69a80ea3 5995 EMACS_INT pos;
4ed46869 5996{
df7492f9 5997 int len;
69a80ea3 5998 EMACS_INT to;
df7492f9 5999 enum composition_method method;
df7492f9 6000 Lisp_Object components;
fa42c37f 6001
df7492f9 6002 len = -charbuf[0];
69a80ea3 6003 to = pos + charbuf[2];
9ffd559c
KH
6004 if (to <= pos)
6005 return;
69a80ea3 6006 method = (enum composition_method) (charbuf[3]);
d46c5b12 6007
df7492f9
KH
6008 if (method == COMPOSITION_RELATIVE)
6009 components = Qnil;
9ffd559c
KH
6010 else if (method >= COMPOSITION_WITH_RULE
6011 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
d46c5b12 6012 {
df7492f9
KH
6013 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6014 int i;
b73bfc1c 6015
69a80ea3
KH
6016 len -= 4;
6017 charbuf += 4;
df7492f9 6018 for (i = 0; i < len; i++)
9ffd559c
KH
6019 {
6020 args[i] = make_number (charbuf[i]);
6021 if (args[i] < 0)
6022 return;
6023 }
df7492f9
KH
6024 components = (method == COMPOSITION_WITH_ALTCHARS
6025 ? Fstring (len, args) : Fvector (len, args));
d46c5b12 6026 }
9ffd559c
KH
6027 else
6028 return;
69a80ea3 6029 compose_text (pos, to, components, Qnil, coding->dst_object);
d46c5b12
KH
6030}
6031
d46c5b12 6032
ff0dacd7
KH
6033/* Put `charset' property on text in CODING->object according to
6034 the annotation data at CHARBUF. CHARBUF is an array:
69a80ea3 6035 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
ff0dacd7 6036 */
d46c5b12 6037
ff0dacd7 6038static INLINE void
69a80ea3 6039produce_charset (coding, charbuf, pos)
d46c5b12 6040 struct coding_system *coding;
ff0dacd7 6041 int *charbuf;
69a80ea3 6042 EMACS_INT pos;
d46c5b12 6043{
69a80ea3
KH
6044 EMACS_INT from = pos - charbuf[2];
6045 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
b73bfc1c 6046
69a80ea3 6047 Fput_text_property (make_number (from), make_number (pos),
ff0dacd7
KH
6048 Qcharset, CHARSET_NAME (charset),
6049 coding->dst_object);
d46c5b12
KH
6050}
6051
d46c5b12 6052
df7492f9
KH
6053#define CHARBUF_SIZE 0x4000
6054
6055#define ALLOC_CONVERSION_WORK_AREA(coding) \
6056 do { \
6057 int size = CHARBUF_SIZE;; \
6058 \
6059 coding->charbuf = NULL; \
6060 while (size > 1024) \
6061 { \
6062 coding->charbuf = (int *) alloca (sizeof (int) * size); \
6063 if (coding->charbuf) \
6064 break; \
6065 size >>= 1; \
6066 } \
6067 if (! coding->charbuf) \
6068 { \
065e3595 6069 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
df7492f9
KH
6070 return coding->result; \
6071 } \
6072 coding->charbuf_size = size; \
6073 } while (0)
4ed46869 6074
d46c5b12
KH
6075
6076static void
69a80ea3 6077produce_annotation (coding, pos)
d46c5b12 6078 struct coding_system *coding;
69a80ea3 6079 EMACS_INT pos;
d46c5b12 6080{
df7492f9
KH
6081 int *charbuf = coding->charbuf;
6082 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 6083
ff0dacd7
KH
6084 if (NILP (coding->dst_object))
6085 return;
d46c5b12 6086
df7492f9 6087 while (charbuf < charbuf_end)
a84f1519 6088 {
df7492f9 6089 if (*charbuf >= 0)
69a80ea3 6090 pos += *charbuf++;
d46c5b12 6091 else
d46c5b12 6092 {
df7492f9 6093 int len = -*charbuf;
ff0dacd7 6094 switch (charbuf[1])
df7492f9
KH
6095 {
6096 case CODING_ANNOTATE_COMPOSITION_MASK:
69a80ea3 6097 produce_composition (coding, charbuf, pos);
df7492f9 6098 break;
ff0dacd7 6099 case CODING_ANNOTATE_CHARSET_MASK:
69a80ea3 6100 produce_charset (coding, charbuf, pos);
ff0dacd7 6101 break;
df7492f9
KH
6102 default:
6103 abort ();
6104 }
6105 charbuf += len;
d46c5b12 6106 }
a84f1519 6107 }
d46c5b12
KH
6108}
6109
df7492f9
KH
6110/* Decode the data at CODING->src_object into CODING->dst_object.
6111 CODING->src_object is a buffer, a string, or nil.
6112 CODING->dst_object is a buffer.
d46c5b12 6113
df7492f9
KH
6114 If CODING->src_object is a buffer, it must be the current buffer.
6115 In this case, if CODING->src_pos is positive, it is a position of
6116 the source text in the buffer, otherwise, the source text is in the
6117 gap area of the buffer, and CODING->src_pos specifies the offset of
6118 the text from GPT (which must be the same as PT). If this is the
6119 same buffer as CODING->dst_object, CODING->src_pos must be
6120 negative.
d46c5b12 6121
df7492f9
KH
6122 If CODING->src_object is a string, CODING->src_pos in an index to
6123 that string.
d46c5b12 6124
df7492f9
KH
6125 If CODING->src_object is nil, CODING->source must already point to
6126 the non-relocatable memory area. In this case, CODING->src_pos is
6127 an offset from CODING->source.
73be902c 6128
df7492f9
KH
6129 The decoded data is inserted at the current point of the buffer
6130 CODING->dst_object.
6131*/
d46c5b12 6132
df7492f9
KH
6133static int
6134decode_coding (coding)
d46c5b12 6135 struct coding_system *coding;
d46c5b12 6136{
df7492f9 6137 Lisp_Object attrs;
24a73b0a 6138 Lisp_Object undo_list;
7d64c6ad 6139 Lisp_Object translation_table;
69a80ea3
KH
6140 int carryover;
6141 int i;
d46c5b12 6142
df7492f9
KH
6143 if (BUFFERP (coding->src_object)
6144 && coding->src_pos > 0
6145 && coding->src_pos < GPT
6146 && coding->src_pos + coding->src_chars > GPT)
6147 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 6148
24a73b0a 6149 undo_list = Qt;
df7492f9 6150 if (BUFFERP (coding->dst_object))
1c3478b0 6151 {
df7492f9
KH
6152 if (current_buffer != XBUFFER (coding->dst_object))
6153 set_buffer_internal (XBUFFER (coding->dst_object));
6154 if (GPT != PT)
6155 move_gap_both (PT, PT_BYTE);
24a73b0a
KH
6156 undo_list = current_buffer->undo_list;
6157 current_buffer->undo_list = Qt;
1c3478b0
KH
6158 }
6159
df7492f9
KH
6160 coding->consumed = coding->consumed_char = 0;
6161 coding->produced = coding->produced_char = 0;
6162 coding->chars_at_source = 0;
065e3595 6163 record_conversion_result (coding, CODING_RESULT_SUCCESS);
df7492f9 6164 coding->errors = 0;
1c3478b0 6165
df7492f9
KH
6166 ALLOC_CONVERSION_WORK_AREA (coding);
6167
6168 attrs = CODING_ID_ATTRS (coding->id);
2170c8f0 6169 translation_table = get_translation_table (attrs, 0, NULL);
df7492f9 6170
69a80ea3 6171 carryover = 0;
df7492f9 6172 do
b73bfc1c 6173 {
69a80ea3
KH
6174 EMACS_INT pos = coding->dst_pos + coding->produced_char;
6175
df7492f9
KH
6176 coding_set_source (coding);
6177 coding->annotated = 0;
69a80ea3 6178 coding->charbuf_used = carryover;
df7492f9 6179 (*(coding->decoder)) (coding);
df7492f9 6180 coding_set_destination (coding);
69a80ea3 6181 carryover = produce_chars (coding, translation_table, 0);
df7492f9 6182 if (coding->annotated)
69a80ea3
KH
6183 produce_annotation (coding, pos);
6184 for (i = 0; i < carryover; i++)
6185 coding->charbuf[i]
6186 = coding->charbuf[coding->charbuf_used - carryover + i];
d46c5b12 6187 }
df7492f9
KH
6188 while (coding->consumed < coding->src_bytes
6189 && ! coding->result);
d46c5b12 6190
69a80ea3
KH
6191 if (carryover > 0)
6192 {
6193 coding_set_destination (coding);
6194 coding->charbuf_used = carryover;
6195 produce_chars (coding, translation_table, 1);
6196 }
6197
df7492f9
KH
6198 coding->carryover_bytes = 0;
6199 if (coding->consumed < coding->src_bytes)
d46c5b12 6200 {
df7492f9 6201 int nbytes = coding->src_bytes - coding->consumed;
8f924df7 6202 const unsigned char *src;
df7492f9
KH
6203
6204 coding_set_source (coding);
6205 coding_set_destination (coding);
6206 src = coding->source + coding->consumed;
6207
6208 if (coding->mode & CODING_MODE_LAST_BLOCK)
1c3478b0 6209 {
df7492f9
KH
6210 /* Flush out unprocessed data as binary chars. We are sure
6211 that the number of data is less than the size of
6212 coding->charbuf. */
065e3595 6213 coding->charbuf_used = 0;
df7492f9 6214 while (nbytes-- > 0)
1c3478b0 6215 {
df7492f9 6216 int c = *src++;
98725083 6217
1c91457d
KH
6218 if (c & 0x80)
6219 c = BYTE8_TO_CHAR (c);
6220 coding->charbuf[coding->charbuf_used++] = c;
1c3478b0 6221 }
f6cbaf43 6222 produce_chars (coding, Qnil, 1);
d46c5b12 6223 }
d46c5b12 6224 else
df7492f9
KH
6225 {
6226 /* Record unprocessed bytes in coding->carryover. We are
6227 sure that the number of data is less than the size of
6228 coding->carryover. */
6229 unsigned char *p = coding->carryover;
6230
6231 coding->carryover_bytes = nbytes;
6232 while (nbytes-- > 0)
6233 *p++ = *src++;
1c3478b0 6234 }
df7492f9 6235 coding->consumed = coding->src_bytes;
b73bfc1c 6236 }
69f76525 6237
24a73b0a
KH
6238 if (BUFFERP (coding->dst_object))
6239 {
6240 current_buffer->undo_list = undo_list;
6241 record_insert (coding->dst_pos, coding->produced_char);
6242 }
6243 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6244 decode_eol (coding);
73be902c 6245 return coding->result;
4ed46869
KH
6246}
6247
aaaf0b1e 6248
e1c23804 6249/* Extract an annotation datum from a composition starting at POS and
ff0dacd7
KH
6250 ending before LIMIT of CODING->src_object (buffer or string), store
6251 the data in BUF, set *STOP to a starting position of the next
6252 composition (if any) or to LIMIT, and return the address of the
6253 next element of BUF.
6254
6255 If such an annotation is not found, set *STOP to a starting
6256 position of a composition after POS (if any) or to LIMIT, and
6257 return BUF. */
6258
6259static INLINE int *
6260handle_composition_annotation (pos, limit, coding, buf, stop)
6261 EMACS_INT pos, limit;
aaaf0b1e 6262 struct coding_system *coding;
ff0dacd7
KH
6263 int *buf;
6264 EMACS_INT *stop;
aaaf0b1e 6265{
ff0dacd7
KH
6266 EMACS_INT start, end;
6267 Lisp_Object prop;
aaaf0b1e 6268
ff0dacd7
KH
6269 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6270 || end > limit)
6271 *stop = limit;
6272 else if (start > pos)
6273 *stop = start;
6274 else
aaaf0b1e 6275 {
ff0dacd7 6276 if (start == pos)
aaaf0b1e 6277 {
ff0dacd7
KH
6278 /* We found a composition. Store the corresponding
6279 annotation data in BUF. */
6280 int *head = buf;
6281 enum composition_method method = COMPOSITION_METHOD (prop);
6282 int nchars = COMPOSITION_LENGTH (prop);
6283
69a80ea3 6284 ADD_COMPOSITION_DATA (buf, nchars, method);
ff0dacd7 6285 if (method != COMPOSITION_RELATIVE)
aaaf0b1e 6286 {
ff0dacd7
KH
6287 Lisp_Object components;
6288 int len, i, i_byte;
6289
6290 components = COMPOSITION_COMPONENTS (prop);
6291 if (VECTORP (components))
aaaf0b1e 6292 {
ff0dacd7
KH
6293 len = XVECTOR (components)->size;
6294 for (i = 0; i < len; i++)
6295 *buf++ = XINT (AREF (components, i));
aaaf0b1e 6296 }
ff0dacd7 6297 else if (STRINGP (components))
aaaf0b1e 6298 {
8f924df7 6299 len = SCHARS (components);
ff0dacd7
KH
6300 i = i_byte = 0;
6301 while (i < len)
6302 {
6303 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6304 buf++;
6305 }
6306 }
6307 else if (INTEGERP (components))
6308 {
6309 len = 1;
6310 *buf++ = XINT (components);
6311 }
6312 else if (CONSP (components))
6313 {
6314 for (len = 0; CONSP (components);
6315 len++, components = XCDR (components))
6316 *buf++ = XINT (XCAR (components));
aaaf0b1e 6317 }
aaaf0b1e 6318 else
ff0dacd7
KH
6319 abort ();
6320 *head -= len;
aaaf0b1e 6321 }
aaaf0b1e 6322 }
ff0dacd7
KH
6323
6324 if (find_composition (end, limit, &start, &end, &prop,
6325 coding->src_object)
6326 && end <= limit)
6327 *stop = start;
6328 else
6329 *stop = limit;
aaaf0b1e 6330 }
ff0dacd7
KH
6331 return buf;
6332}
6333
6334
e1c23804 6335/* Extract an annotation datum from a text property `charset' at POS of
ff0dacd7
KH
6336 CODING->src_object (buffer of string), store the data in BUF, set
6337 *STOP to the position where the value of `charset' property changes
6338 (limiting by LIMIT), and return the address of the next element of
6339 BUF.
6340
6341 If the property value is nil, set *STOP to the position where the
6342 property value is non-nil (limiting by LIMIT), and return BUF. */
6343
6344static INLINE int *
6345handle_charset_annotation (pos, limit, coding, buf, stop)
6346 EMACS_INT pos, limit;
6347 struct coding_system *coding;
6348 int *buf;
6349 EMACS_INT *stop;
6350{
6351 Lisp_Object val, next;
6352 int id;
6353
6354 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6355 if (! NILP (val) && CHARSETP (val))
6356 id = XINT (CHARSET_SYMBOL_ID (val));
6357 else
6358 id = -1;
69a80ea3 6359 ADD_CHARSET_DATA (buf, 0, id);
ff0dacd7
KH
6360 next = Fnext_single_property_change (make_number (pos), Qcharset,
6361 coding->src_object,
6362 make_number (limit));
6363 *stop = XINT (next);
6364 return buf;
6365}
6366
6367
df7492f9 6368static void
09ee6fdd 6369consume_chars (coding, translation_table, max_lookup)
df7492f9 6370 struct coding_system *coding;
433f7f87 6371 Lisp_Object translation_table;
09ee6fdd 6372 int max_lookup;
df7492f9
KH
6373{
6374 int *buf = coding->charbuf;
ff0dacd7 6375 int *buf_end = coding->charbuf + coding->charbuf_size;
7c78e542 6376 const unsigned char *src = coding->source + coding->consumed;
4776e638 6377 const unsigned char *src_end = coding->source + coding->src_bytes;
ff0dacd7
KH
6378 EMACS_INT pos = coding->src_pos + coding->consumed_char;
6379 EMACS_INT end_pos = coding->src_pos + coding->src_chars;
df7492f9
KH
6380 int multibytep = coding->src_multibyte;
6381 Lisp_Object eol_type;
6382 int c;
ff0dacd7 6383 EMACS_INT stop, stop_composition, stop_charset;
09ee6fdd 6384 int *lookup_buf = NULL;
433f7f87
KH
6385
6386 if (! NILP (translation_table))
09ee6fdd 6387 lookup_buf = alloca (sizeof (int) * max_lookup);
88993dfd 6388
df7492f9
KH
6389 eol_type = CODING_ID_EOL_TYPE (coding->id);
6390 if (VECTORP (eol_type))
6391 eol_type = Qunix;
88993dfd 6392
df7492f9
KH
6393 /* Note: composition handling is not yet implemented. */
6394 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 6395
0b5670c9
KH
6396 if (NILP (coding->src_object))
6397 stop = stop_composition = stop_charset = end_pos;
ff0dacd7 6398 else
0b5670c9
KH
6399 {
6400 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6401 stop = stop_composition = pos;
6402 else
6403 stop = stop_composition = end_pos;
6404 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6405 stop = stop_charset = pos;
6406 else
6407 stop_charset = end_pos;
6408 }
ec6d2bb8 6409
24a73b0a 6410 /* Compensate for CRLF and conversion. */
ff0dacd7 6411 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
df7492f9 6412 while (buf < buf_end)
aaaf0b1e 6413 {
433f7f87
KH
6414 Lisp_Object trans;
6415
df7492f9 6416 if (pos == stop)
ec6d2bb8 6417 {
df7492f9
KH
6418 if (pos == end_pos)
6419 break;
ff0dacd7
KH
6420 if (pos == stop_composition)
6421 buf = handle_composition_annotation (pos, end_pos, coding,
6422 buf, &stop_composition);
6423 if (pos == stop_charset)
6424 buf = handle_charset_annotation (pos, end_pos, coding,
6425 buf, &stop_charset);
6426 stop = (stop_composition < stop_charset
6427 ? stop_composition : stop_charset);
df7492f9
KH
6428 }
6429
6430 if (! multibytep)
4776e638 6431 {
d3e4cb56 6432 EMACS_INT bytes;
aaaf0b1e 6433
ea29edf2
KH
6434 if (coding->encoder == encode_coding_raw_text)
6435 c = *src++, pos++;
6436 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
4776e638
KH
6437 c = STRING_CHAR_ADVANCE (src), pos += bytes;
6438 else
f03caae0 6439 c = BYTE8_TO_CHAR (*src), src++, pos++;
4776e638 6440 }
df7492f9 6441 else
4776e638 6442 c = STRING_CHAR_ADVANCE (src), pos++;
df7492f9
KH
6443 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6444 c = '\n';
6445 if (! EQ (eol_type, Qunix))
aaaf0b1e 6446 {
df7492f9 6447 if (c == '\n')
aaaf0b1e 6448 {
df7492f9
KH
6449 if (EQ (eol_type, Qdos))
6450 *buf++ = '\r';
6451 else
6452 c = '\r';
aaaf0b1e
KH
6453 }
6454 }
433f7f87 6455
e6a54062 6456 trans = Qnil;
09ee6fdd 6457 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
e6a54062 6458 if (NILP (trans))
433f7f87
KH
6459 *buf++ = c;
6460 else
6461 {
6462 int from_nchars = 1, to_nchars = 1;
6463 int *lookup_buf_end;
6464 const unsigned char *p = src;
6465 int i;
6466
6467 lookup_buf[0] = c;
6468 for (i = 1; i < max_lookup && p < src_end; i++)
6469 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6470 lookup_buf_end = lookup_buf + i;
6471 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6472 &from_nchars, &to_nchars);
6473 if (EQ (trans, Qt)
6474 || buf + to_nchars > buf_end)
6475 break;
6476 *buf++ = *lookup_buf;
6477 for (i = 1; i < to_nchars; i++)
6478 *buf++ = XINT (AREF (trans, i));
6479 for (i = 1; i < from_nchars; i++, pos++)
6480 src += MULTIBYTE_LENGTH_NO_CHECK (src);
6481 }
aaaf0b1e 6482 }
ec6d2bb8 6483
df7492f9
KH
6484 coding->consumed = src - coding->source;
6485 coding->consumed_char = pos - coding->src_pos;
6486 coding->charbuf_used = buf - coding->charbuf;
6487 coding->chars_at_source = 0;
aaaf0b1e
KH
6488}
6489
4ed46869 6490
df7492f9
KH
6491/* Encode the text at CODING->src_object into CODING->dst_object.
6492 CODING->src_object is a buffer or a string.
6493 CODING->dst_object is a buffer or nil.
6494
6495 If CODING->src_object is a buffer, it must be the current buffer.
6496 In this case, if CODING->src_pos is positive, it is a position of
6497 the source text in the buffer, otherwise. the source text is in the
6498 gap area of the buffer, and coding->src_pos specifies the offset of
6499 the text from GPT (which must be the same as PT). If this is the
6500 same buffer as CODING->dst_object, CODING->src_pos must be
6501 negative and CODING should not have `pre-write-conversion'.
6502
6503 If CODING->src_object is a string, CODING should not have
6504 `pre-write-conversion'.
6505
6506 If CODING->dst_object is a buffer, the encoded data is inserted at
6507 the current point of that buffer.
6508
6509 If CODING->dst_object is nil, the encoded data is placed at the
6510 memory area specified by CODING->destination. */
6511
6512static int
6513encode_coding (coding)
4ed46869 6514 struct coding_system *coding;
4ed46869 6515{
df7492f9 6516 Lisp_Object attrs;
7d64c6ad 6517 Lisp_Object translation_table;
09ee6fdd 6518 int max_lookup;
9861e777 6519
df7492f9 6520 attrs = CODING_ID_ATTRS (coding->id);
ea29edf2
KH
6521 if (coding->encoder == encode_coding_raw_text)
6522 translation_table = Qnil, max_lookup = 0;
6523 else
6524 translation_table = get_translation_table (attrs, 1, &max_lookup);
4ed46869 6525
df7492f9 6526 if (BUFFERP (coding->dst_object))
8844fa83 6527 {
df7492f9
KH
6528 set_buffer_internal (XBUFFER (coding->dst_object));
6529 coding->dst_multibyte
6530 = ! NILP (current_buffer->enable_multibyte_characters);
8844fa83 6531 }
4ed46869 6532
b73bfc1c 6533 coding->consumed = coding->consumed_char = 0;
df7492f9 6534 coding->produced = coding->produced_char = 0;
065e3595 6535 record_conversion_result (coding, CODING_RESULT_SUCCESS);
b73bfc1c 6536 coding->errors = 0;
b73bfc1c 6537
df7492f9 6538 ALLOC_CONVERSION_WORK_AREA (coding);
4ed46869 6539
df7492f9
KH
6540 do {
6541 coding_set_source (coding);
09ee6fdd 6542 consume_chars (coding, translation_table, max_lookup);
df7492f9
KH
6543 coding_set_destination (coding);
6544 (*(coding->encoder)) (coding);
6545 } while (coding->consumed_char < coding->src_chars);
6546
6547 if (BUFFERP (coding->dst_object))
6548 insert_from_gap (coding->produced_char, coding->produced);
6549
6550 return (coding->result);
ec6d2bb8
KH
6551}
6552
fb88bf2d 6553
24a73b0a
KH
6554/* Name (or base name) of work buffer for code conversion. */
6555static Lisp_Object Vcode_conversion_workbuf_name;
d46c5b12 6556
24a73b0a
KH
6557/* A working buffer used by the top level conversion. Once it is
6558 created, it is never destroyed. It has the name
6559 Vcode_conversion_workbuf_name. The other working buffers are
6560 destroyed after the use is finished, and their names are modified
6561 versions of Vcode_conversion_workbuf_name. */
6562static Lisp_Object Vcode_conversion_reused_workbuf;
b73bfc1c 6563
24a73b0a
KH
6564/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6565static int reused_workbuf_in_use;
4ed46869 6566
24a73b0a
KH
6567
6568/* Return a working buffer of code convesion. MULTIBYTE specifies the
6569 multibyteness of returning buffer. */
b73bfc1c 6570
f6cbaf43 6571static Lisp_Object
24a73b0a 6572make_conversion_work_buffer (multibyte)
f6cbaf43 6573 int multibyte;
df7492f9 6574{
24a73b0a
KH
6575 Lisp_Object name, workbuf;
6576 struct buffer *current;
4ed46869 6577
24a73b0a 6578 if (reused_workbuf_in_use++)
065e3595
KH
6579 {
6580 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6581 workbuf = Fget_buffer_create (name);
6582 }
df7492f9 6583 else
065e3595
KH
6584 {
6585 name = Vcode_conversion_workbuf_name;
6586 workbuf = Fget_buffer_create (name);
6587 if (NILP (Vcode_conversion_reused_workbuf))
6588 Vcode_conversion_reused_workbuf = workbuf;
6589 }
24a73b0a
KH
6590 current = current_buffer;
6591 set_buffer_internal (XBUFFER (workbuf));
6592 Ferase_buffer ();
df7492f9 6593 current_buffer->undo_list = Qt;
24a73b0a 6594 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
df7492f9 6595 set_buffer_internal (current);
24a73b0a 6596 return workbuf;
df7492f9 6597}
d46c5b12 6598
24a73b0a 6599
4776e638 6600static Lisp_Object
24a73b0a
KH
6601code_conversion_restore (arg)
6602 Lisp_Object arg;
4776e638 6603{
24a73b0a 6604 Lisp_Object current, workbuf;
948bdcf3 6605 struct gcpro gcpro1;
24a73b0a 6606
948bdcf3 6607 GCPRO1 (arg);
24a73b0a
KH
6608 current = XCAR (arg);
6609 workbuf = XCDR (arg);
6610 if (! NILP (workbuf))
6611 {
6612 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6613 reused_workbuf_in_use = 0;
6614 else if (! NILP (Fbuffer_live_p (workbuf)))
6615 Fkill_buffer (workbuf);
6616 }
6617 set_buffer_internal (XBUFFER (current));
948bdcf3 6618 UNGCPRO;
4776e638
KH
6619 return Qnil;
6620}
b73bfc1c 6621
24a73b0a
KH
6622Lisp_Object
6623code_conversion_save (with_work_buf, multibyte)
4776e638 6624 int with_work_buf, multibyte;
df7492f9 6625{
24a73b0a 6626 Lisp_Object workbuf = Qnil;
b73bfc1c 6627
4776e638 6628 if (with_work_buf)
24a73b0a
KH
6629 workbuf = make_conversion_work_buffer (multibyte);
6630 record_unwind_protect (code_conversion_restore,
6631 Fcons (Fcurrent_buffer (), workbuf));
4776e638 6632 return workbuf;
df7492f9 6633}
d46c5b12 6634
df7492f9
KH
6635int
6636decode_coding_gap (coding, chars, bytes)
6637 struct coding_system *coding;
6638 EMACS_INT chars, bytes;
6639{
6640 int count = specpdl_ptr - specpdl;
5e5c78be 6641 Lisp_Object attrs;
fb88bf2d 6642
24a73b0a 6643 code_conversion_save (0, 0);
ec6d2bb8 6644
24a73b0a 6645 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6646 coding->src_chars = chars;
6647 coding->src_bytes = bytes;
6648 coding->src_pos = -chars;
6649 coding->src_pos_byte = -bytes;
6650 coding->src_multibyte = chars < bytes;
24a73b0a 6651 coding->dst_object = coding->src_object;
df7492f9
KH
6652 coding->dst_pos = PT;
6653 coding->dst_pos_byte = PT_BYTE;
71c81426 6654 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
98725083 6655 coding->mode |= CODING_MODE_LAST_BLOCK;
4ed46869 6656
df7492f9
KH
6657 if (CODING_REQUIRE_DETECTION (coding))
6658 detect_coding (coding);
8f924df7 6659
df7492f9 6660 decode_coding (coding);
d46c5b12 6661
5e5c78be
KH
6662 attrs = CODING_ID_ATTRS (coding->id);
6663 if (! NILP (CODING_ATTR_POST_READ (attrs)))
b73bfc1c 6664 {
5e5c78be
KH
6665 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6666 Lisp_Object val;
6667
6668 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
5e5c78be
KH
6669 val = call1 (CODING_ATTR_POST_READ (attrs),
6670 make_number (coding->produced_char));
5e5c78be
KH
6671 CHECK_NATNUM (val);
6672 coding->produced_char += Z - prev_Z;
6673 coding->produced += Z_BYTE - prev_Z_BYTE;
b73bfc1c 6674 }
4ed46869 6675
df7492f9 6676 unbind_to (count, Qnil);
b73bfc1c
KH
6677 return coding->result;
6678}
52d41803 6679
4ed46869 6680int
df7492f9 6681encode_coding_gap (coding, chars, bytes)
4ed46869 6682 struct coding_system *coding;
df7492f9 6683 EMACS_INT chars, bytes;
4ed46869 6684{
df7492f9 6685 int count = specpdl_ptr - specpdl;
4ed46869 6686
24a73b0a 6687 code_conversion_save (0, 0);
4ed46869 6688
24a73b0a 6689 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6690 coding->src_chars = chars;
6691 coding->src_bytes = bytes;
6692 coding->src_pos = -chars;
6693 coding->src_pos_byte = -bytes;
6694 coding->src_multibyte = chars < bytes;
6695 coding->dst_object = coding->src_object;
6696 coding->dst_pos = PT;
6697 coding->dst_pos_byte = PT_BYTE;
4ed46869 6698
df7492f9 6699 encode_coding (coding);
b73bfc1c 6700
df7492f9
KH
6701 unbind_to (count, Qnil);
6702 return coding->result;
6703}
4ed46869 6704
d46c5b12 6705
df7492f9
KH
6706/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6707 SRC_OBJECT into DST_OBJECT by coding context CODING.
b73bfc1c 6708
df7492f9 6709 SRC_OBJECT is a buffer, a string, or Qnil.
b73bfc1c 6710
df7492f9
KH
6711 If it is a buffer, the text is at point of the buffer. FROM and TO
6712 are positions in the buffer.
b73bfc1c 6713
df7492f9
KH
6714 If it is a string, the text is at the beginning of the string.
6715 FROM and TO are indices to the string.
4ed46869 6716
df7492f9
KH
6717 If it is nil, the text is at coding->source. FROM and TO are
6718 indices to coding->source.
bb10be8b 6719
df7492f9 6720 DST_OBJECT is a buffer, Qt, or Qnil.
4ed46869 6721
df7492f9
KH
6722 If it is a buffer, the decoded text is inserted at point of the
6723 buffer. If the buffer is the same as SRC_OBJECT, the source text
6724 is deleted.
4ed46869 6725
df7492f9
KH
6726 If it is Qt, a string is made from the decoded text, and
6727 set in CODING->dst_object.
d46c5b12 6728
df7492f9 6729 If it is Qnil, the decoded text is stored at CODING->destination.
2cb26057 6730 The caller must allocate CODING->dst_bytes bytes at
df7492f9
KH
6731 CODING->destination by xmalloc. If the decoded text is longer than
6732 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6733 */
d46c5b12 6734
df7492f9
KH
6735void
6736decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6737 dst_object)
d46c5b12 6738 struct coding_system *coding;
df7492f9
KH
6739 Lisp_Object src_object;
6740 EMACS_INT from, from_byte, to, to_byte;
6741 Lisp_Object dst_object;
d46c5b12 6742{
df7492f9
KH
6743 int count = specpdl_ptr - specpdl;
6744 unsigned char *destination;
6745 EMACS_INT dst_bytes;
6746 EMACS_INT chars = to - from;
6747 EMACS_INT bytes = to_byte - from_byte;
6748 Lisp_Object attrs;
4776e638
KH
6749 Lisp_Object buffer;
6750 int saved_pt = -1, saved_pt_byte;
d46c5b12 6751
4776e638 6752 buffer = Fcurrent_buffer ();
93dec019 6753
df7492f9 6754 if (NILP (dst_object))
d46c5b12 6755 {
df7492f9
KH
6756 destination = coding->destination;
6757 dst_bytes = coding->dst_bytes;
d46c5b12 6758 }
93dec019 6759
df7492f9
KH
6760 coding->src_object = src_object;
6761 coding->src_chars = chars;
6762 coding->src_bytes = bytes;
6763 coding->src_multibyte = chars < bytes;
70ad9fc4 6764
df7492f9 6765 if (STRINGP (src_object))
d46c5b12 6766 {
df7492f9
KH
6767 coding->src_pos = from;
6768 coding->src_pos_byte = from_byte;
d46c5b12 6769 }
df7492f9 6770 else if (BUFFERP (src_object))
88993dfd 6771 {
df7492f9
KH
6772 set_buffer_internal (XBUFFER (src_object));
6773 if (from != GPT)
6774 move_gap_both (from, from_byte);
6775 if (EQ (src_object, dst_object))
fb88bf2d 6776 {
4776e638 6777 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6778 TEMP_SET_PT_BOTH (from, from_byte);
6779 del_range_both (from, from_byte, to, to_byte, 1);
6780 coding->src_pos = -chars;
6781 coding->src_pos_byte = -bytes;
fb88bf2d 6782 }
df7492f9 6783 else
fb88bf2d 6784 {
df7492f9
KH
6785 coding->src_pos = from;
6786 coding->src_pos_byte = from_byte;
fb88bf2d 6787 }
88993dfd
KH
6788 }
6789
df7492f9
KH
6790 if (CODING_REQUIRE_DETECTION (coding))
6791 detect_coding (coding);
6792 attrs = CODING_ID_ATTRS (coding->id);
d46c5b12 6793
2cb26057
KH
6794 if (EQ (dst_object, Qt)
6795 || (! NILP (CODING_ATTR_POST_READ (attrs))
6796 && NILP (dst_object)))
b73bfc1c 6797 {
24a73b0a 6798 coding->dst_object = code_conversion_save (1, 1);
df7492f9
KH
6799 coding->dst_pos = BEG;
6800 coding->dst_pos_byte = BEG_BYTE;
6801 coding->dst_multibyte = 1;
b73bfc1c 6802 }
df7492f9 6803 else if (BUFFERP (dst_object))
d46c5b12 6804 {
24a73b0a 6805 code_conversion_save (0, 0);
df7492f9
KH
6806 coding->dst_object = dst_object;
6807 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6808 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6809 coding->dst_multibyte
6810 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
d46c5b12
KH
6811 }
6812 else
6813 {
24a73b0a 6814 code_conversion_save (0, 0);
df7492f9
KH
6815 coding->dst_object = Qnil;
6816 coding->dst_multibyte = 1;
d46c5b12
KH
6817 }
6818
df7492f9 6819 decode_coding (coding);
fa46990e 6820
df7492f9
KH
6821 if (BUFFERP (coding->dst_object))
6822 set_buffer_internal (XBUFFER (coding->dst_object));
d46c5b12 6823
df7492f9 6824 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6825 {
df7492f9
KH
6826 struct gcpro gcpro1, gcpro2;
6827 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6828 Lisp_Object val;
d46c5b12 6829
c0cc7f7f 6830 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9 6831 GCPRO2 (coding->src_object, coding->dst_object);
d4850d67
KH
6832 val = safe_call1 (CODING_ATTR_POST_READ (attrs),
6833 make_number (coding->produced_char));
df7492f9
KH
6834 UNGCPRO;
6835 CHECK_NATNUM (val);
6836 coding->produced_char += Z - prev_Z;
6837 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6838 }
de79a6a5 6839
df7492f9 6840 if (EQ (dst_object, Qt))
ec6d2bb8 6841 {
df7492f9
KH
6842 coding->dst_object = Fbuffer_string ();
6843 }
6844 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6845 {
6846 set_buffer_internal (XBUFFER (coding->dst_object));
6847 if (dst_bytes < coding->produced)
6848 {
6849 destination
6850 = (unsigned char *) xrealloc (destination, coding->produced);
6851 if (! destination)
6852 {
065e3595
KH
6853 record_conversion_result (coding,
6854 CODING_RESULT_INSUFFICIENT_DST);
df7492f9
KH
6855 unbind_to (count, Qnil);
6856 return;
6857 }
6858 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6859 move_gap_both (BEGV, BEGV_BYTE);
6860 bcopy (BEGV_ADDR, destination, coding->produced);
6861 coding->destination = destination;
d46c5b12 6862 }
ec6d2bb8 6863 }
b73bfc1c 6864
4776e638
KH
6865 if (saved_pt >= 0)
6866 {
6867 /* This is the case of:
6868 (BUFFERP (src_object) && EQ (src_object, dst_object))
6869 As we have moved PT while replacing the original buffer
6870 contents, we must recover it now. */
6871 set_buffer_internal (XBUFFER (src_object));
6872 if (saved_pt < from)
6873 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6874 else if (saved_pt < from + chars)
6875 TEMP_SET_PT_BOTH (from, from_byte);
6876 else if (! NILP (current_buffer->enable_multibyte_characters))
6877 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6878 saved_pt_byte + (coding->produced - bytes));
6879 else
6880 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6881 saved_pt_byte + (coding->produced - bytes));
d46c5b12 6882 }
4776e638 6883
065e3595 6884 unbind_to (count, coding->dst_object);
d46c5b12
KH
6885}
6886
d46c5b12 6887
df7492f9
KH
6888void
6889encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6890 dst_object)
d46c5b12 6891 struct coding_system *coding;
df7492f9
KH
6892 Lisp_Object src_object;
6893 EMACS_INT from, from_byte, to, to_byte;
6894 Lisp_Object dst_object;
d46c5b12 6895{
b73bfc1c 6896 int count = specpdl_ptr - specpdl;
df7492f9
KH
6897 EMACS_INT chars = to - from;
6898 EMACS_INT bytes = to_byte - from_byte;
6899 Lisp_Object attrs;
4776e638
KH
6900 Lisp_Object buffer;
6901 int saved_pt = -1, saved_pt_byte;
c02d943b 6902 int kill_src_buffer = 0;
df7492f9 6903
4776e638 6904 buffer = Fcurrent_buffer ();
df7492f9
KH
6905
6906 coding->src_object = src_object;
6907 coding->src_chars = chars;
6908 coding->src_bytes = bytes;
6909 coding->src_multibyte = chars < bytes;
6910
6911 attrs = CODING_ID_ATTRS (coding->id);
6912
6913 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6914 {
24a73b0a 6915 coding->src_object = code_conversion_save (1, coding->src_multibyte);
df7492f9
KH
6916 set_buffer_internal (XBUFFER (coding->src_object));
6917 if (STRINGP (src_object))
6918 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6919 else if (BUFFERP (src_object))
6920 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6921 else
6922 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
d46c5b12 6923
df7492f9
KH
6924 if (EQ (src_object, dst_object))
6925 {
6926 set_buffer_internal (XBUFFER (src_object));
4776e638 6927 saved_pt = PT, saved_pt_byte = PT_BYTE;
df7492f9
KH
6928 del_range_both (from, from_byte, to, to_byte, 1);
6929 set_buffer_internal (XBUFFER (coding->src_object));
6930 }
6931
d4850d67
KH
6932 {
6933 Lisp_Object args[3];
6934
6935 args[0] = CODING_ATTR_PRE_WRITE (attrs);
6936 args[1] = make_number (BEG);
6937 args[2] = make_number (Z);
6938 safe_call (3, args);
6939 }
c02d943b
KH
6940 if (XBUFFER (coding->src_object) != current_buffer)
6941 kill_src_buffer = 1;
ac87bbef 6942 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6943 if (BEG != GPT)
6944 move_gap_both (BEG, BEG_BYTE);
6945 coding->src_chars = Z - BEG;
6946 coding->src_bytes = Z_BYTE - BEG_BYTE;
6947 coding->src_pos = BEG;
6948 coding->src_pos_byte = BEG_BYTE;
6949 coding->src_multibyte = Z < Z_BYTE;
6950 }
6951 else if (STRINGP (src_object))
d46c5b12 6952 {
24a73b0a 6953 code_conversion_save (0, 0);
df7492f9
KH
6954 coding->src_pos = from;
6955 coding->src_pos_byte = from_byte;
b73bfc1c 6956 }
df7492f9 6957 else if (BUFFERP (src_object))
b73bfc1c 6958 {
24a73b0a 6959 code_conversion_save (0, 0);
df7492f9 6960 set_buffer_internal (XBUFFER (src_object));
df7492f9 6961 if (EQ (src_object, dst_object))
d46c5b12 6962 {
4776e638 6963 saved_pt = PT, saved_pt_byte = PT_BYTE;
ff0dacd7
KH
6964 coding->src_object = del_range_1 (from, to, 1, 1);
6965 coding->src_pos = 0;
6966 coding->src_pos_byte = 0;
d46c5b12 6967 }
df7492f9 6968 else
d46c5b12 6969 {
ff0dacd7
KH
6970 if (from < GPT && to >= GPT)
6971 move_gap_both (from, from_byte);
df7492f9
KH
6972 coding->src_pos = from;
6973 coding->src_pos_byte = from_byte;
d46c5b12 6974 }
d46c5b12 6975 }
4776e638 6976 else
24a73b0a 6977 code_conversion_save (0, 0);
d46c5b12 6978
df7492f9 6979 if (BUFFERP (dst_object))
88993dfd 6980 {
df7492f9 6981 coding->dst_object = dst_object;
28f67a95
KH
6982 if (EQ (src_object, dst_object))
6983 {
6984 coding->dst_pos = from;
6985 coding->dst_pos_byte = from_byte;
6986 }
6987 else
6988 {
6989 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6990 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6991 }
df7492f9
KH
6992 coding->dst_multibyte
6993 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
88993dfd 6994 }
df7492f9 6995 else if (EQ (dst_object, Qt))
d46c5b12 6996 {
df7492f9 6997 coding->dst_object = Qnil;
df7492f9 6998 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6999 if (coding->dst_bytes == 0)
7000 coding->dst_bytes = 1;
7001 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 7002 coding->dst_multibyte = 0;
d46c5b12
KH
7003 }
7004 else
7005 {
df7492f9
KH
7006 coding->dst_object = Qnil;
7007 coding->dst_multibyte = 0;
d46c5b12
KH
7008 }
7009
df7492f9 7010 encode_coding (coding);
d46c5b12 7011
df7492f9 7012 if (EQ (dst_object, Qt))
d46c5b12 7013 {
df7492f9
KH
7014 if (BUFFERP (coding->dst_object))
7015 coding->dst_object = Fbuffer_string ();
7016 else
d46c5b12 7017 {
df7492f9
KH
7018 coding->dst_object
7019 = make_unibyte_string ((char *) coding->destination,
7020 coding->produced);
7021 xfree (coding->destination);
d46c5b12 7022 }
4ed46869 7023 }
d46c5b12 7024
4776e638
KH
7025 if (saved_pt >= 0)
7026 {
7027 /* This is the case of:
7028 (BUFFERP (src_object) && EQ (src_object, dst_object))
7029 As we have moved PT while replacing the original buffer
7030 contents, we must recover it now. */
7031 set_buffer_internal (XBUFFER (src_object));
7032 if (saved_pt < from)
7033 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7034 else if (saved_pt < from + chars)
7035 TEMP_SET_PT_BOTH (from, from_byte);
7036 else if (! NILP (current_buffer->enable_multibyte_characters))
7037 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7038 saved_pt_byte + (coding->produced - bytes));
d46c5b12 7039 else
4776e638
KH
7040 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7041 saved_pt_byte + (coding->produced - bytes));
7042 }
7043
c02d943b
KH
7044 if (kill_src_buffer)
7045 Fkill_buffer (coding->src_object);
df7492f9 7046 unbind_to (count, Qnil);
b73bfc1c
KH
7047}
7048
df7492f9 7049
b73bfc1c 7050Lisp_Object
df7492f9 7051preferred_coding_system ()
b73bfc1c 7052{
df7492f9 7053 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 7054
df7492f9 7055 return CODING_ID_NAME (id);
4ed46869
KH
7056}
7057
7058\f
7059#ifdef emacs
1397dc18 7060/*** 8. Emacs Lisp library functions ***/
4ed46869 7061
4ed46869 7062DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 7063 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 7064See the documentation of `define-coding-system' for information
48b0f3ae
PJ
7065about coding-system objects. */)
7066 (obj)
4ed46869
KH
7067 Lisp_Object obj;
7068{
44e8490d
KH
7069 if (NILP (obj)
7070 || CODING_SYSTEM_ID (obj) >= 0)
7071 return Qt;
7072 if (! SYMBOLP (obj)
7073 || NILP (Fget (obj, Qcoding_system_define_form)))
7074 return Qnil;
7075 return Qt;
4ed46869
KH
7076}
7077
9d991de8
RS
7078DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7079 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
7080 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
7081 (prompt)
4ed46869
KH
7082 Lisp_Object prompt;
7083{
e0e989f6 7084 Lisp_Object val;
9d991de8
RS
7085 do
7086 {
4608c386
KH
7087 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7088 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8 7089 }
8f924df7 7090 while (SCHARS (val) == 0);
e0e989f6 7091 return (Fintern (val, Qnil));
4ed46869
KH
7092}
7093
9b787f3e 7094DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
7095 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7096If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
7097 (prompt, default_coding_system)
9b787f3e 7098 Lisp_Object prompt, default_coding_system;
4ed46869 7099{
f44d27ce 7100 Lisp_Object val;
9b787f3e 7101 if (SYMBOLP (default_coding_system))
a3181084 7102 XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
4608c386 7103 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
7104 Qt, Qnil, Qcoding_system_history,
7105 default_coding_system, Qnil);
8f924df7 7106 return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
7107}
7108
7109DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7110 1, 1, 0,
48b0f3ae 7111 doc: /* Check validity of CODING-SYSTEM.
9ffd559c
KH
7112If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7113It is valid if it is nil or a symbol defined as a coding system by the
7114function `define-coding-system'. */)
df7492f9 7115 (coding_system)
4ed46869
KH
7116 Lisp_Object coding_system;
7117{
44e8490d
KH
7118 Lisp_Object define_form;
7119
7120 define_form = Fget (coding_system, Qcoding_system_define_form);
7121 if (! NILP (define_form))
7122 {
7123 Fput (coding_system, Qcoding_system_define_form, Qnil);
7124 safe_eval (define_form);
7125 }
4ed46869
KH
7126 if (!NILP (Fcoding_system_p (coding_system)))
7127 return coding_system;
7128 while (1)
02ba4723 7129 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 7130}
df7492f9 7131
3a73fa5d 7132\f
89528eb3
KH
7133/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7134 HIGHEST is nonzero, return the coding system of the highest
7135 priority among the detected coding systems. Otherwize return a
7136 list of detected coding systems sorted by their priorities. If
7137 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7138 multibyte form but contains only ASCII and eight-bit chars.
7139 Otherwise, the bytes are raw bytes.
7140
7141 CODING-SYSTEM controls the detection as below:
7142
7143 If it is nil, detect both text-format and eol-format. If the
7144 text-format part of CODING-SYSTEM is already specified
7145 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7146 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7147 detect only text-format. */
7148
d46c5b12 7149Lisp_Object
24a73b0a
KH
7150detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7151 coding_system)
8f924df7 7152 const unsigned char *src;
24a73b0a 7153 int src_chars, src_bytes, highest;
0a28aafb 7154 int multibytep;
df7492f9 7155 Lisp_Object coding_system;
4ed46869 7156{
8f924df7 7157 const unsigned char *src_end = src + src_bytes;
df7492f9
KH
7158 Lisp_Object attrs, eol_type;
7159 Lisp_Object val;
7160 struct coding_system coding;
89528eb3 7161 int id;
ff0dacd7 7162 struct coding_detection_info detect_info;
24a73b0a 7163 enum coding_category base_category;
b73bfc1c 7164
df7492f9
KH
7165 if (NILP (coding_system))
7166 coding_system = Qundecided;
7167 setup_coding_system (coding_system, &coding);
7168 attrs = CODING_ID_ATTRS (coding.id);
7169 eol_type = CODING_ID_EOL_TYPE (coding.id);
89528eb3 7170 coding_system = CODING_ATTR_BASE_NAME (attrs);
4ed46869 7171
df7492f9 7172 coding.source = src;
24a73b0a 7173 coding.src_chars = src_chars;
df7492f9
KH
7174 coding.src_bytes = src_bytes;
7175 coding.src_multibyte = multibytep;
7176 coding.consumed = 0;
89528eb3 7177 coding.mode |= CODING_MODE_LAST_BLOCK;
d46c5b12 7178
ff0dacd7 7179 detect_info.checked = detect_info.found = detect_info.rejected = 0;
d46c5b12 7180
89528eb3 7181 /* At first, detect text-format if necessary. */
24a73b0a
KH
7182 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7183 if (base_category == coding_category_undecided)
4ed46869 7184 {
ff0dacd7
KH
7185 enum coding_category category;
7186 struct coding_system *this;
7187 int c, i;
88993dfd 7188
24a73b0a
KH
7189 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7190 for (i = 0; src < src_end; i++, src++)
4ed46869 7191 {
df7492f9 7192 c = *src;
6cb21a4f 7193 if (c & 0x80)
d46c5b12 7194 break;
6cb21a4f
KH
7195 if (c < 0x20
7196 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7197 && inhibit_iso_escape_detection)
7198 {
7199 coding.head_ascii = src - coding.source;
7200 if (detect_coding_iso_2022 (&coding, &detect_info))
7201 {
7202 /* We have scanned the whole data. */
7203 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7204 /* We didn't find an 8-bit code. */
7205 src = src_end;
7206 break;
7207 }
7208 }
4ed46869 7209 }
df7492f9 7210 coding.head_ascii = src - coding.source;
88993dfd 7211
6cb21a4f
KH
7212 if (src < src_end
7213 || detect_info.found)
7214 {
7215 if (src == src_end)
7216 /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
7217 for (i = 0; i < coding_category_raw_text; i++)
ff0dacd7 7218 {
6cb21a4f
KH
7219 category = coding_priorities[i];
7220 if (detect_info.found & (1 << category))
ff0dacd7
KH
7221 break;
7222 }
6cb21a4f
KH
7223 else
7224 for (i = 0; i < coding_category_raw_text; i++)
df7492f9 7225 {
6cb21a4f
KH
7226 category = coding_priorities[i];
7227 this = coding_categories + category;
7228
7229 if (this->id < 0)
24a73b0a 7230 {
6cb21a4f
KH
7231 /* No coding system of this category is defined. */
7232 detect_info.rejected |= (1 << category);
7233 }
7234 else if (category >= coding_category_raw_text)
7235 continue;
7236 else if (detect_info.checked & (1 << category))
7237 {
7238 if (highest
7239 && (detect_info.found & (1 << category)))
7240 break;
7241 }
7242 else
7243 {
7244 if ((*(this->detector)) (&coding, &detect_info)
7245 && highest
7246 && (detect_info.found & (1 << category)))
24a73b0a 7247 {
6cb21a4f
KH
7248 if (category == coding_category_utf_16_auto)
7249 {
7250 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7251 category = coding_category_utf_16_le;
7252 else
7253 category = coding_category_utf_16_be;
7254 }
7255 break;
24a73b0a 7256 }
24a73b0a 7257 }
df7492f9 7258 }
6cb21a4f 7259 }
ec6d2bb8 7260
ff0dacd7 7261 if (detect_info.rejected == CATEGORY_MASK_ANY)
ec6d2bb8 7262 {
ff0dacd7 7263 detect_info.found = CATEGORY_MASK_RAW_TEXT;
89528eb3
KH
7264 id = coding_categories[coding_category_raw_text].id;
7265 val = Fcons (make_number (id), Qnil);
7266 }
ff0dacd7 7267 else if (! detect_info.rejected && ! detect_info.found)
89528eb3 7268 {
ff0dacd7 7269 detect_info.found = CATEGORY_MASK_ANY;
89528eb3
KH
7270 id = coding_categories[coding_category_undecided].id;
7271 val = Fcons (make_number (id), Qnil);
7272 }
7273 else if (highest)
7274 {
ff0dacd7 7275 if (detect_info.found)
ec6d2bb8 7276 {
ff0dacd7
KH
7277 detect_info.found = 1 << category;
7278 val = Fcons (make_number (this->id), Qnil);
7279 }
7280 else
7281 for (i = 0; i < coding_category_raw_text; i++)
7282 if (! (detect_info.rejected & (1 << coding_priorities[i])))
7283 {
7284 detect_info.found = 1 << coding_priorities[i];
7285 id = coding_categories[coding_priorities[i]].id;
7286 val = Fcons (make_number (id), Qnil);
7287 break;
7288 }
7289 }
89528eb3
KH
7290 else
7291 {
ff0dacd7
KH
7292 int mask = detect_info.rejected | detect_info.found;
7293 int found = 0;
89528eb3 7294 val = Qnil;
ec6d2bb8 7295
89528eb3 7296 for (i = coding_category_raw_text - 1; i >= 0; i--)
ff0dacd7
KH
7297 {
7298 category = coding_priorities[i];
7299 if (! (mask & (1 << category)))
ec6d2bb8 7300 {
ff0dacd7
KH
7301 found |= 1 << category;
7302 id = coding_categories[category].id;
7303 val = Fcons (make_number (id), val);
7304 }
7305 }
7306 for (i = coding_category_raw_text - 1; i >= 0; i--)
7307 {
7308 category = coding_priorities[i];
7309 if (detect_info.found & (1 << category))
7310 {
7311 id = coding_categories[category].id;
7312 val = Fcons (make_number (id), val);
ec6d2bb8 7313 }
ec6d2bb8 7314 }
ff0dacd7 7315 detect_info.found |= found;
ec6d2bb8 7316 }
ec6d2bb8 7317 }
24a73b0a
KH
7318 else if (base_category == coding_category_utf_16_auto)
7319 {
7320 if (detect_coding_utf_16 (&coding, &detect_info))
7321 {
24a73b0a
KH
7322 struct coding_system *this;
7323
7324 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7325 this = coding_categories + coding_category_utf_16_le;
7326 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7327 this = coding_categories + coding_category_utf_16_be;
7328 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7329 this = coding_categories + coding_category_utf_16_be_nosig;
7330 else
7331 this = coding_categories + coding_category_utf_16_le_nosig;
7332 val = Fcons (make_number (this->id), Qnil);
7333 }
7334 }
df7492f9
KH
7335 else
7336 {
ff0dacd7 7337 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
89528eb3 7338 val = Fcons (make_number (coding.id), Qnil);
4ed46869 7339 }
df7492f9 7340
89528eb3 7341 /* Then, detect eol-format if necessary. */
df7492f9 7342 {
89528eb3 7343 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
df7492f9
KH
7344 Lisp_Object tail;
7345
89528eb3
KH
7346 if (VECTORP (eol_type))
7347 {
ff0dacd7 7348 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
89528eb3
KH
7349 normal_eol = detect_eol (coding.source, src_bytes,
7350 coding_category_raw_text);
ff0dacd7
KH
7351 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7352 | CATEGORY_MASK_UTF_16_BE_NOSIG))
89528eb3
KH
7353 utf_16_be_eol = detect_eol (coding.source, src_bytes,
7354 coding_category_utf_16_be);
ff0dacd7
KH
7355 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7356 | CATEGORY_MASK_UTF_16_LE_NOSIG))
89528eb3
KH
7357 utf_16_le_eol = detect_eol (coding.source, src_bytes,
7358 coding_category_utf_16_le);
7359 }
7360 else
7361 {
7362 if (EQ (eol_type, Qunix))
7363 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7364 else if (EQ (eol_type, Qdos))
7365 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7366 else
7367 normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7368 }
7369
df7492f9
KH
7370 for (tail = val; CONSP (tail); tail = XCDR (tail))
7371 {
89528eb3 7372 enum coding_category category;
df7492f9 7373 int this_eol;
89528eb3
KH
7374
7375 id = XINT (XCAR (tail));
7376 attrs = CODING_ID_ATTRS (id);
7377 category = XINT (CODING_ATTR_CATEGORY (attrs));
7378 eol_type = CODING_ID_EOL_TYPE (id);
df7492f9
KH
7379 if (VECTORP (eol_type))
7380 {
89528eb3
KH
7381 if (category == coding_category_utf_16_be
7382 || category == coding_category_utf_16_be_nosig)
7383 this_eol = utf_16_be_eol;
7384 else if (category == coding_category_utf_16_le
7385 || category == coding_category_utf_16_le_nosig)
7386 this_eol = utf_16_le_eol;
df7492f9 7387 else
89528eb3
KH
7388 this_eol = normal_eol;
7389
df7492f9
KH
7390 if (this_eol == EOL_SEEN_LF)
7391 XSETCAR (tail, AREF (eol_type, 0));
7392 else if (this_eol == EOL_SEEN_CRLF)
7393 XSETCAR (tail, AREF (eol_type, 1));
7394 else if (this_eol == EOL_SEEN_CR)
7395 XSETCAR (tail, AREF (eol_type, 2));
89528eb3
KH
7396 else
7397 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9 7398 }
89528eb3
KH
7399 else
7400 XSETCAR (tail, CODING_ID_NAME (id));
df7492f9
KH
7401 }
7402 }
ec6d2bb8 7403
03699b14 7404 return (highest ? XCAR (val) : val);
ec6d2bb8
KH
7405}
7406
ec6d2bb8 7407
d46c5b12
KH
7408DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7409 2, 3, 0,
48b0f3ae
PJ
7410 doc: /* Detect coding system of the text in the region between START and END.
7411Return a list of possible coding systems ordered by priority.
ec6d2bb8 7412
48b0f3ae
PJ
7413If only ASCII characters are found, it returns a list of single element
7414`undecided' or its subsidiary coding system according to a detected
7415end-of-line format.
ec6d2bb8 7416
48b0f3ae
PJ
7417If optional argument HIGHEST is non-nil, return the coding system of
7418highest priority. */)
7419 (start, end, highest)
d46c5b12
KH
7420 Lisp_Object start, end, highest;
7421{
7422 int from, to;
7423 int from_byte, to_byte;
ec6d2bb8 7424
b7826503
PJ
7425 CHECK_NUMBER_COERCE_MARKER (start);
7426 CHECK_NUMBER_COERCE_MARKER (end);
ec6d2bb8 7427
d46c5b12
KH
7428 validate_region (&start, &end);
7429 from = XINT (start), to = XINT (end);
7430 from_byte = CHAR_TO_BYTE (from);
7431 to_byte = CHAR_TO_BYTE (to);
ec6d2bb8 7432
d46c5b12
KH
7433 if (from < GPT && to >= GPT)
7434 move_gap_both (to, to_byte);
c210f766 7435
d46c5b12 7436 return detect_coding_system (BYTE_POS_ADDR (from_byte),
24a73b0a 7437 to - from, to_byte - from_byte,
0a28aafb
KH
7438 !NILP (highest),
7439 !NILP (current_buffer
df7492f9
KH
7440 ->enable_multibyte_characters),
7441 Qnil);
ec6d2bb8
KH
7442}
7443
d46c5b12
KH
7444DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7445 1, 2, 0,
48b0f3ae
PJ
7446 doc: /* Detect coding system of the text in STRING.
7447Return a list of possible coding systems ordered by priority.
fb88bf2d 7448
48b0f3ae
PJ
7449If only ASCII characters are found, it returns a list of single element
7450`undecided' or its subsidiary coding system according to a detected
7451end-of-line format.
d46c5b12 7452
48b0f3ae
PJ
7453If optional argument HIGHEST is non-nil, return the coding system of
7454highest priority. */)
7455 (string, highest)
d46c5b12
KH
7456 Lisp_Object string, highest;
7457{
b7826503 7458 CHECK_STRING (string);
b73bfc1c 7459
24a73b0a
KH
7460 return detect_coding_system (SDATA (string),
7461 SCHARS (string), SBYTES (string),
8f924df7 7462 !NILP (highest), STRING_MULTIBYTE (string),
df7492f9 7463 Qnil);
4ed46869 7464}
4ed46869 7465
b73bfc1c 7466
df7492f9
KH
7467static INLINE int
7468char_encodable_p (c, attrs)
7469 int c;
7470 Lisp_Object attrs;
05e6f5dc 7471{
df7492f9 7472 Lisp_Object tail;
df7492f9 7473 struct charset *charset;
7d64c6ad 7474 Lisp_Object translation_table;
d46c5b12 7475
7d64c6ad 7476 translation_table = CODING_ATTR_TRANS_TBL (attrs);
a6f87d34 7477 if (! NILP (translation_table))
7d64c6ad 7478 c = translate_char (translation_table, c);
df7492f9
KH
7479 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7480 CONSP (tail); tail = XCDR (tail))
e133c8fa 7481 {
df7492f9
KH
7482 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7483 if (CHAR_CHARSET_P (c, charset))
7484 break;
e133c8fa 7485 }
df7492f9 7486 return (! NILP (tail));
05e6f5dc 7487}
83fa074f 7488
fb88bf2d 7489
df7492f9
KH
7490/* Return a list of coding systems that safely encode the text between
7491 START and END. If EXCLUDE is non-nil, it is a list of coding
7492 systems not to check. The returned list doesn't contain any such
48468dac 7493 coding systems. In any case, if the text contains only ASCII or is
df7492f9 7494 unibyte, return t. */
e077cc80 7495
df7492f9
KH
7496DEFUN ("find-coding-systems-region-internal",
7497 Ffind_coding_systems_region_internal,
7498 Sfind_coding_systems_region_internal, 2, 3, 0,
7499 doc: /* Internal use only. */)
7500 (start, end, exclude)
7501 Lisp_Object start, end, exclude;
7502{
7503 Lisp_Object coding_attrs_list, safe_codings;
7504 EMACS_INT start_byte, end_byte;
7c78e542 7505 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
7506 int c;
7507 Lisp_Object tail, elt;
d46c5b12 7508
df7492f9
KH
7509 if (STRINGP (start))
7510 {
7511 if (!STRING_MULTIBYTE (start)
8f924df7 7512 || SCHARS (start) == SBYTES (start))
df7492f9
KH
7513 return Qt;
7514 start_byte = 0;
8f924df7 7515 end_byte = SBYTES (start);
df7492f9
KH
7516 }
7517 else
d46c5b12 7518 {
df7492f9
KH
7519 CHECK_NUMBER_COERCE_MARKER (start);
7520 CHECK_NUMBER_COERCE_MARKER (end);
7521 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7522 args_out_of_range (start, end);
7523 if (NILP (current_buffer->enable_multibyte_characters))
7524 return Qt;
7525 start_byte = CHAR_TO_BYTE (XINT (start));
7526 end_byte = CHAR_TO_BYTE (XINT (end));
7527 if (XINT (end) - XINT (start) == end_byte - start_byte)
7528 return Qt;
d46c5b12 7529
e1c23804 7530 if (XINT (start) < GPT && XINT (end) > GPT)
d46c5b12 7531 {
e1c23804
DL
7532 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7533 move_gap_both (XINT (start), start_byte);
df7492f9 7534 else
e1c23804 7535 move_gap_both (XINT (end), end_byte);
d46c5b12
KH
7536 }
7537 }
7538
df7492f9
KH
7539 coding_attrs_list = Qnil;
7540 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7541 if (NILP (exclude)
7542 || NILP (Fmemq (XCAR (tail), exclude)))
7543 {
7544 Lisp_Object attrs;
d46c5b12 7545
df7492f9
KH
7546 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7547 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7548 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7d64c6ad
KH
7549 {
7550 ASET (attrs, coding_attr_trans_tbl,
2170c8f0 7551 get_translation_table (attrs, 1, NULL));
7d64c6ad
KH
7552 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7553 }
df7492f9 7554 }
d46c5b12 7555
df7492f9 7556 if (STRINGP (start))
8f924df7 7557 p = pbeg = SDATA (start);
df7492f9
KH
7558 else
7559 p = pbeg = BYTE_POS_ADDR (start_byte);
7560 pend = p + (end_byte - start_byte);
b843d1ae 7561
df7492f9
KH
7562 while (p < pend && ASCII_BYTE_P (*p)) p++;
7563 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
d46c5b12 7564
05e6f5dc 7565 while (p < pend)
72d1a715 7566 {
df7492f9
KH
7567 if (ASCII_BYTE_P (*p))
7568 p++;
72d1a715
RS
7569 else
7570 {
df7492f9 7571 c = STRING_CHAR_ADVANCE (p);
12410ef1 7572
df7492f9
KH
7573 charset_map_loaded = 0;
7574 for (tail = coding_attrs_list; CONSP (tail);)
7575 {
7576 elt = XCAR (tail);
7577 if (NILP (elt))
7578 tail = XCDR (tail);
7579 else if (char_encodable_p (c, elt))
7580 tail = XCDR (tail);
7581 else if (CONSP (XCDR (tail)))
7582 {
7583 XSETCAR (tail, XCAR (XCDR (tail)));
7584 XSETCDR (tail, XCDR (XCDR (tail)));
7585 }
7586 else
7587 {
7588 XSETCAR (tail, Qnil);
7589 tail = XCDR (tail);
7590 }
7591 }
7592 if (charset_map_loaded)
7593 {
7594 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 7595
df7492f9 7596 if (STRINGP (start))
8f924df7 7597 pbeg = SDATA (start);
df7492f9
KH
7598 else
7599 pbeg = BYTE_POS_ADDR (start_byte);
7600 p = pbeg + p_offset;
7601 pend = pbeg + pend_offset;
7602 }
7603 }
ec6d2bb8 7604 }
fb88bf2d 7605
988b3759 7606 safe_codings = list2 (Qraw_text, Qno_conversion);
df7492f9
KH
7607 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7608 if (! NILP (XCAR (tail)))
7609 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
ec6d2bb8 7610
05e6f5dc
KH
7611 return safe_codings;
7612}
4956c225 7613
d46c5b12 7614
8f924df7
KH
7615DEFUN ("unencodable-char-position", Funencodable_char_position,
7616 Sunencodable_char_position, 3, 5, 0,
7617 doc: /*
7618Return position of first un-encodable character in a region.
7619START and END specfiy the region and CODING-SYSTEM specifies the
7620encoding to check. Return nil if CODING-SYSTEM does encode the region.
d46c5b12 7621
8f924df7
KH
7622If optional 4th argument COUNT is non-nil, it specifies at most how
7623many un-encodable characters to search. In this case, the value is a
7624list of positions.
d46c5b12 7625
8f924df7
KH
7626If optional 5th argument STRING is non-nil, it is a string to search
7627for un-encodable characters. In that case, START and END are indexes
7628to the string. */)
7629 (start, end, coding_system, count, string)
7630 Lisp_Object start, end, coding_system, count, string;
7631{
7632 int n;
7633 struct coding_system coding;
7d64c6ad 7634 Lisp_Object attrs, charset_list, translation_table;
8f924df7
KH
7635 Lisp_Object positions;
7636 int from, to;
7637 const unsigned char *p, *stop, *pend;
7638 int ascii_compatible;
fb88bf2d 7639
8f924df7
KH
7640 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7641 attrs = CODING_ID_ATTRS (coding.id);
7642 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7643 return Qnil;
7644 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7645 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2170c8f0 7646 translation_table = get_translation_table (attrs, 1, NULL);
fb88bf2d 7647
8f924df7
KH
7648 if (NILP (string))
7649 {
7650 validate_region (&start, &end);
7651 from = XINT (start);
7652 to = XINT (end);
7653 if (NILP (current_buffer->enable_multibyte_characters)
7654 || (ascii_compatible
7655 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7656 return Qnil;
7657 p = CHAR_POS_ADDR (from);
7658 pend = CHAR_POS_ADDR (to);
7659 if (from < GPT && to >= GPT)
7660 stop = GPT_ADDR;
7661 else
7662 stop = pend;
7663 }
7664 else
7665 {
7666 CHECK_STRING (string);
7667 CHECK_NATNUM (start);
7668 CHECK_NATNUM (end);
7669 from = XINT (start);
7670 to = XINT (end);
7671 if (from > to
7672 || to > SCHARS (string))
7673 args_out_of_range_3 (string, start, end);
7674 if (! STRING_MULTIBYTE (string))
7675 return Qnil;
7676 p = SDATA (string) + string_char_to_byte (string, from);
7677 stop = pend = SDATA (string) + string_char_to_byte (string, to);
7678 if (ascii_compatible && (to - from) == (pend - p))
7679 return Qnil;
7680 }
f2558efd 7681
8f924df7
KH
7682 if (NILP (count))
7683 n = 1;
7684 else
b73bfc1c 7685 {
8f924df7
KH
7686 CHECK_NATNUM (count);
7687 n = XINT (count);
b73bfc1c
KH
7688 }
7689
8f924df7
KH
7690 positions = Qnil;
7691 while (1)
d46c5b12 7692 {
8f924df7 7693 int c;
ec6d2bb8 7694
8f924df7
KH
7695 if (ascii_compatible)
7696 while (p < stop && ASCII_BYTE_P (*p))
7697 p++, from++;
7698 if (p >= stop)
0e79d667 7699 {
8f924df7
KH
7700 if (p >= pend)
7701 break;
7702 stop = pend;
7703 p = GAP_END_ADDR;
0e79d667 7704 }
ec6d2bb8 7705
8f924df7
KH
7706 c = STRING_CHAR_ADVANCE (p);
7707 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7d64c6ad
KH
7708 && ! char_charset (translate_char (translation_table, c),
7709 charset_list, NULL))
ec6d2bb8 7710 {
8f924df7
KH
7711 positions = Fcons (make_number (from), positions);
7712 n--;
7713 if (n == 0)
7714 break;
ec6d2bb8
KH
7715 }
7716
8f924df7
KH
7717 from++;
7718 }
d46c5b12 7719
8f924df7
KH
7720 return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7721}
d46c5b12 7722
d46c5b12 7723
df7492f9
KH
7724DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7725 Scheck_coding_systems_region, 3, 3, 0,
7726 doc: /* Check if the region is encodable by coding systems.
d46c5b12 7727
df7492f9
KH
7728START and END are buffer positions specifying the region.
7729CODING-SYSTEM-LIST is a list of coding systems to check.
d46c5b12 7730
df7492f9
KH
7731The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7732CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7733whole region, POS0, POS1, ... are buffer positions where non-encodable
7734characters are found.
93dec019 7735
df7492f9
KH
7736If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7737value is nil.
93dec019 7738
df7492f9
KH
7739START may be a string. In that case, check if the string is
7740encodable, and the value contains indices to the string instead of
7741buffer positions. END is ignored. */)
7742 (start, end, coding_system_list)
7743 Lisp_Object start, end, coding_system_list;
05e6f5dc 7744{
df7492f9
KH
7745 Lisp_Object list;
7746 EMACS_INT start_byte, end_byte;
7747 int pos;
7c78e542 7748 const unsigned char *p, *pbeg, *pend;
df7492f9 7749 int c;
7d64c6ad 7750 Lisp_Object tail, elt, attrs;
70ad9fc4 7751
05e6f5dc
KH
7752 if (STRINGP (start))
7753 {
df7492f9 7754 if (!STRING_MULTIBYTE (start)
8f924df7 7755 && SCHARS (start) != SBYTES (start))
df7492f9
KH
7756 return Qnil;
7757 start_byte = 0;
8f924df7 7758 end_byte = SBYTES (start);
df7492f9 7759 pos = 0;
d46c5b12 7760 }
05e6f5dc 7761 else
b73bfc1c 7762 {
b7826503
PJ
7763 CHECK_NUMBER_COERCE_MARKER (start);
7764 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
7765 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7766 args_out_of_range (start, end);
7767 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
7768 return Qnil;
7769 start_byte = CHAR_TO_BYTE (XINT (start));
7770 end_byte = CHAR_TO_BYTE (XINT (end));
7771 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 7772 return Qt;
df7492f9 7773
e1c23804 7774 if (XINT (start) < GPT && XINT (end) > GPT)
b73bfc1c 7775 {
e1c23804
DL
7776 if ((GPT - XINT (start)) < (XINT (end) - GPT))
7777 move_gap_both (XINT (start), start_byte);
df7492f9 7778 else
e1c23804 7779 move_gap_both (XINT (end), end_byte);
b73bfc1c 7780 }
e1c23804 7781 pos = XINT (start);
b73bfc1c 7782 }
7553d0e1 7783
df7492f9
KH
7784 list = Qnil;
7785 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
12410ef1 7786 {
df7492f9 7787 elt = XCAR (tail);
7d64c6ad 7788 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
2170c8f0
KH
7789 ASET (attrs, coding_attr_trans_tbl,
7790 get_translation_table (attrs, 1, NULL));
7d64c6ad 7791 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
12410ef1
KH
7792 }
7793
df7492f9 7794 if (STRINGP (start))
8f924df7 7795 p = pbeg = SDATA (start);
72d1a715 7796 else
df7492f9
KH
7797 p = pbeg = BYTE_POS_ADDR (start_byte);
7798 pend = p + (end_byte - start_byte);
4ed46869 7799
df7492f9
KH
7800 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7801 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
ec6d2bb8 7802
df7492f9 7803 while (p < pend)
d46c5b12 7804 {
df7492f9
KH
7805 if (ASCII_BYTE_P (*p))
7806 p++;
e133c8fa 7807 else
05e6f5dc 7808 {
df7492f9
KH
7809 c = STRING_CHAR_ADVANCE (p);
7810
7811 charset_map_loaded = 0;
7812 for (tail = list; CONSP (tail); tail = XCDR (tail))
7813 {
7814 elt = XCDR (XCAR (tail));
7815 if (! char_encodable_p (c, XCAR (elt)))
7816 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7817 }
7818 if (charset_map_loaded)
7819 {
7820 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7821
7822 if (STRINGP (start))
8f924df7 7823 pbeg = SDATA (start);
df7492f9
KH
7824 else
7825 pbeg = BYTE_POS_ADDR (start_byte);
7826 p = pbeg + p_offset;
7827 pend = pbeg + pend_offset;
7828 }
05e6f5dc 7829 }
df7492f9 7830 pos++;
d46c5b12 7831 }
4ed46869 7832
df7492f9
KH
7833 tail = list;
7834 list = Qnil;
7835 for (; CONSP (tail); tail = XCDR (tail))
ec6d2bb8 7836 {
df7492f9
KH
7837 elt = XCAR (tail);
7838 if (CONSP (XCDR (XCDR (elt))))
7839 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7840 list);
ec6d2bb8 7841 }
2b4f9037 7842
df7492f9 7843 return list;
d46c5b12
KH
7844}
7845
3fd9494b 7846
b73bfc1c 7847Lisp_Object
df7492f9
KH
7848code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7849 Lisp_Object start, end, coding_system, dst_object;
7850 int encodep, norecord;
4ed46869 7851{
3a73fa5d 7852 struct coding_system coding;
df7492f9
KH
7853 EMACS_INT from, from_byte, to, to_byte;
7854 Lisp_Object src_object;
4ed46869 7855
b7826503
PJ
7856 CHECK_NUMBER_COERCE_MARKER (start);
7857 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
7858 if (NILP (coding_system))
7859 coding_system = Qno_conversion;
7860 else
7861 CHECK_CODING_SYSTEM (coding_system);
7862 src_object = Fcurrent_buffer ();
7863 if (NILP (dst_object))
7864 dst_object = src_object;
7865 else if (! EQ (dst_object, Qt))
7866 CHECK_BUFFER (dst_object);
3a73fa5d 7867
d46c5b12
KH
7868 validate_region (&start, &end);
7869 from = XFASTINT (start);
df7492f9 7870 from_byte = CHAR_TO_BYTE (from);
d46c5b12 7871 to = XFASTINT (end);
df7492f9 7872 to_byte = CHAR_TO_BYTE (to);
764ca8da 7873
df7492f9
KH
7874 setup_coding_system (coding_system, &coding);
7875 coding.mode |= CODING_MODE_LAST_BLOCK;
ec6d2bb8 7876
df7492f9
KH
7877 if (encodep)
7878 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7879 dst_object);
7880 else
7881 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7882 dst_object);
7883 if (! norecord)
7884 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 7885
df7492f9
KH
7886 return (BUFFERP (dst_object)
7887 ? make_number (coding.produced_char)
7888 : coding.dst_object);
4031e2bf 7889}
78108bcd 7890
4ed46869 7891
4031e2bf 7892DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 7893 3, 4, "r\nzCoding system: ",
48b0f3ae 7894 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
7895When called from a program, takes four arguments:
7896 START, END, CODING-SYSTEM, and DESTINATION.
7897START and END are buffer positions.
8844fa83 7898
df7492f9
KH
7899Optional 4th arguments DESTINATION specifies where the decoded text goes.
7900If nil, the region between START and END is replace by the decoded text.
7901If buffer, the decoded text is inserted in the buffer.
7902If t, the decoded text is returned.
8844fa83 7903
48b0f3ae
PJ
7904This function sets `last-coding-system-used' to the precise coding system
7905used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7906not fully specified.)
7907It returns the length of the decoded text. */)
df7492f9
KH
7908 (start, end, coding_system, destination)
7909 Lisp_Object start, end, coding_system, destination;
4031e2bf 7910{
df7492f9 7911 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d 7912}
8844fa83 7913
3a73fa5d 7914DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
7915 3, 4, "r\nzCoding system: ",
7916 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
7917When called from a program, takes three arguments:
7918START, END, and CODING-SYSTEM. START and END are buffer positions.
d46c5b12 7919
df7492f9
KH
7920Optional 4th arguments DESTINATION specifies where the encoded text goes.
7921If nil, the region between START and END is replace by the encoded text.
7922If buffer, the encoded text is inserted in the buffer.
7923If t, the encoded text is returned.
2391eaa4 7924
48b0f3ae
PJ
7925This function sets `last-coding-system-used' to the precise coding system
7926used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7927not fully specified.)
7928It returns the length of the encoded text. */)
df7492f9
KH
7929 (start, end, coding_system, destination)
7930 Lisp_Object start, end, coding_system, destination;
3a73fa5d 7931{
df7492f9 7932 return code_convert_region (start, end, coding_system, destination, 1, 0);
b73bfc1c
KH
7933}
7934
7935Lisp_Object
df7492f9
KH
7936code_convert_string (string, coding_system, dst_object,
7937 encodep, nocopy, norecord)
7938 Lisp_Object string, coding_system, dst_object;
7939 int encodep, nocopy, norecord;
b73bfc1c 7940{
4031e2bf 7941 struct coding_system coding;
df7492f9 7942 EMACS_INT chars, bytes;
ec6d2bb8 7943
b7826503 7944 CHECK_STRING (string);
d46c5b12 7945 if (NILP (coding_system))
4956c225 7946 {
df7492f9
KH
7947 if (! norecord)
7948 Vlast_coding_system_used = Qno_conversion;
7949 if (NILP (dst_object))
7950 return (nocopy ? Fcopy_sequence (string) : string);
4956c225 7951 }
b73bfc1c 7952
df7492f9
KH
7953 if (NILP (coding_system))
7954 coding_system = Qno_conversion;
7955 else
7956 CHECK_CODING_SYSTEM (coding_system);
7957 if (NILP (dst_object))
7958 dst_object = Qt;
7959 else if (! EQ (dst_object, Qt))
7960 CHECK_BUFFER (dst_object);
73be902c 7961
df7492f9 7962 setup_coding_system (coding_system, &coding);
d46c5b12 7963 coding.mode |= CODING_MODE_LAST_BLOCK;
8f924df7
KH
7964 chars = SCHARS (string);
7965 bytes = SBYTES (string);
df7492f9
KH
7966 if (encodep)
7967 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7968 else
7969 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7970 if (! norecord)
7971 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
73be902c 7972
df7492f9
KH
7973 return (BUFFERP (dst_object)
7974 ? make_number (coding.produced_char)
7975 : coding.dst_object);
4ed46869 7976}
73be902c 7977
b73bfc1c 7978
ecec61c1 7979/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8 7980 Do not set Vlast_coding_system_used.
4ed46869 7981
ec6d2bb8
KH
7982 This function is called only from macros DECODE_FILE and
7983 ENCODE_FILE, thus we ignore character composition. */
4ed46869 7984
ecec61c1
KH
7985Lisp_Object
7986code_convert_string_norecord (string, coding_system, encodep)
7987 Lisp_Object string, coding_system;
7988 int encodep;
4ed46869 7989{
0be8721c 7990 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
4ed46869
KH
7991}
7992
4ed46869 7993
df7492f9
KH
7994DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7995 2, 4, 0,
7996 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7997
7998Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7999if the decoding operation is trivial.
ecec61c1 8000
df7492f9 8001Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 8002inserted in BUFFER instead of returned as a string. In this case,
df7492f9 8003the return value is BUFFER.
ecec61c1 8004
df7492f9
KH
8005This function sets `last-coding-system-used' to the precise coding system
8006used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8007not fully specified. */)
8008 (string, coding_system, nocopy, buffer)
8009 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8010{
df7492f9
KH
8011 return code_convert_string (string, coding_system, buffer,
8012 0, ! NILP (nocopy), 0);
4ed46869
KH
8013}
8014
df7492f9
KH
8015DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8016 2, 4, 0,
8017 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8018
8019Optional third arg NOCOPY non-nil means it is OK to return STRING
8020itself if the encoding operation is trivial.
8021
8022Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 8023inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
8024the return value is BUFFER.
8025
8026This function sets `last-coding-system-used' to the precise coding system
8027used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8028not fully specified.) */)
8029 (string, coding_system, nocopy, buffer)
8030 Lisp_Object string, coding_system, nocopy, buffer;
4ed46869 8031{
df7492f9 8032 return code_convert_string (string, coding_system, buffer,
c197f191 8033 1, ! NILP (nocopy), 1);
4ed46869 8034}
df7492f9 8035
3a73fa5d 8036\f
4ed46869 8037DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8038 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8039Return the corresponding character. */)
8040 (code)
4ed46869 8041 Lisp_Object code;
4ed46869 8042{
df7492f9
KH
8043 Lisp_Object spec, attrs, val;
8044 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8045 int c;
4ed46869 8046
df7492f9
KH
8047 CHECK_NATNUM (code);
8048 c = XFASTINT (code);
8049 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8050 attrs = AREF (spec, 0);
4ed46869 8051
df7492f9
KH
8052 if (ASCII_BYTE_P (c)
8053 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8054 return code;
4ed46869 8055
df7492f9
KH
8056 val = CODING_ATTR_CHARSET_LIST (attrs);
8057 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
8058 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8059 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
fa42c37f 8060
df7492f9
KH
8061 if (c <= 0x7F)
8062 charset = charset_roman;
8063 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 8064 {
df7492f9
KH
8065 charset = charset_kana;
8066 c -= 0x80;
4ed46869 8067 }
55ab7be3 8068 else
4ed46869 8069 {
004068e4 8070 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
8071
8072 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8073 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8074 error ("Invalid code: %d", code);
8075 SJIS_TO_JIS (c);
8076 charset = charset_kanji;
4ed46869 8077 }
df7492f9
KH
8078 c = DECODE_CHAR (charset, c);
8079 if (c < 0)
8080 error ("Invalid code: %d", code);
8081 return make_number (c);
93dec019 8082}
4ed46869 8083
48b0f3ae 8084
4ed46869 8085DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
8086 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
8087Return the corresponding code in SJIS. */)
8088 (ch)
df7492f9 8089 Lisp_Object ch;
4ed46869 8090{
df7492f9
KH
8091 Lisp_Object spec, attrs, charset_list;
8092 int c;
8093 struct charset *charset;
8094 unsigned code;
48b0f3ae 8095
df7492f9
KH
8096 CHECK_CHARACTER (ch);
8097 c = XFASTINT (ch);
8098 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8099 attrs = AREF (spec, 0);
8100
8101 if (ASCII_CHAR_P (c)
8102 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8103 return ch;
8104
8105 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8106 charset = char_charset (c, charset_list, &code);
8107 if (code == CHARSET_INVALID_CODE (charset))
8108 error ("Can't encode by shift_jis encoding: %d", c);
8109 JIS_TO_SJIS (code);
8110
8111 return make_number (code);
4ed46869
KH
8112}
8113
8114DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8115 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8116Return the corresponding character. */)
8117 (code)
4ed46869 8118 Lisp_Object code;
d46c5b12 8119{
df7492f9
KH
8120 Lisp_Object spec, attrs, val;
8121 struct charset *charset_roman, *charset_big5, *charset;
8122 int c;
6289dd10 8123
df7492f9
KH
8124 CHECK_NATNUM (code);
8125 c = XFASTINT (code);
8126 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8127 attrs = AREF (spec, 0);
4ed46869 8128
df7492f9
KH
8129 if (ASCII_BYTE_P (c)
8130 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8131 return code;
6289dd10 8132
df7492f9
KH
8133 val = CODING_ATTR_CHARSET_LIST (attrs);
8134 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8135 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
c210f766 8136
df7492f9
KH
8137 if (c <= 0x7F)
8138 charset = charset_roman;
c28a9453
KH
8139 else
8140 {
df7492f9
KH
8141 int b1 = c >> 8, b2 = c & 0x7F;
8142 if (b1 < 0xA1 || b1 > 0xFE
8143 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8144 error ("Invalid code: %d", code);
8145 charset = charset_big5;
c28a9453 8146 }
df7492f9
KH
8147 c = DECODE_CHAR (charset, (unsigned )c);
8148 if (c < 0)
8149 error ("Invalid code: %d", code);
8150 return make_number (c);
d46c5b12 8151}
6289dd10 8152
4ed46869 8153DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
8154 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8155Return the corresponding character code in Big5. */)
8156 (ch)
4ed46869
KH
8157 Lisp_Object ch;
8158{
df7492f9
KH
8159 Lisp_Object spec, attrs, charset_list;
8160 struct charset *charset;
8161 int c;
8162 unsigned code;
8163
8164 CHECK_CHARACTER (ch);
8165 c = XFASTINT (ch);
8166 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8167 attrs = AREF (spec, 0);
8168 if (ASCII_CHAR_P (c)
8169 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8170 return ch;
8171
8172 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8173 charset = char_charset (c, charset_list, &code);
8174 if (code == CHARSET_INVALID_CODE (charset))
8175 error ("Can't encode by Big5 encoding: %d", c);
8176
8177 return make_number (code);
4ed46869 8178}
48b0f3ae 8179
3a73fa5d 8180\f
1ba9e4ab
KH
8181DEFUN ("set-terminal-coding-system-internal",
8182 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
8183 Sset_terminal_coding_system_internal, 1, 1, 0,
8184 doc: /* Internal use only. */)
8185 (coding_system)
b74e4686 8186 Lisp_Object coding_system;
4ed46869 8187{
b7826503 8188 CHECK_SYMBOL (coding_system);
df7492f9
KH
8189 setup_coding_system (Fcheck_coding_system (coding_system),
8190 &terminal_coding);
48b0f3ae 8191
70c22245 8192 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
8193 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8194 /* Characer composition should be disabled. */
8195 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8196 terminal_coding.src_multibyte = 1;
8197 terminal_coding.dst_multibyte = 0;
4ed46869
KH
8198 return Qnil;
8199}
8200
c4825358
KH
8201DEFUN ("set-safe-terminal-coding-system-internal",
8202 Fset_safe_terminal_coding_system_internal,
48b0f3ae 8203 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 8204 doc: /* Internal use only. */)
48b0f3ae 8205 (coding_system)
b74e4686 8206 Lisp_Object coding_system;
d46c5b12 8207{
b7826503 8208 CHECK_SYMBOL (coding_system);
c4825358
KH
8209 setup_coding_system (Fcheck_coding_system (coding_system),
8210 &safe_terminal_coding);
df7492f9
KH
8211 /* Characer composition should be disabled. */
8212 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
8213 safe_terminal_coding.src_multibyte = 1;
8214 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
8215 return Qnil;
8216}
4ed46869 8217
4ed46869
KH
8218DEFUN ("terminal-coding-system",
8219 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
8220 doc: /* Return coding system specified for terminal output. */)
8221 ()
4ed46869 8222{
ae6f73fa
KH
8223 Lisp_Object coding_system;
8224
8225 coding_system = CODING_ID_NAME (terminal_coding.id);
8226 /* For backward compatibility, return nil if it is `undecided'. */
8227 return (coding_system != Qundecided ? coding_system : Qnil);
4ed46869
KH
8228}
8229
1ba9e4ab
KH
8230DEFUN ("set-keyboard-coding-system-internal",
8231 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
8232 Sset_keyboard_coding_system_internal, 1, 1, 0,
8233 doc: /* Internal use only. */)
8234 (coding_system)
4ed46869
KH
8235 Lisp_Object coding_system;
8236{
b7826503 8237 CHECK_SYMBOL (coding_system);
df7492f9
KH
8238 setup_coding_system (Fcheck_coding_system (coding_system),
8239 &keyboard_coding);
8240 /* Characer composition should be disabled. */
8241 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
8242 return Qnil;
8243}
8244
8245DEFUN ("keyboard-coding-system",
8246 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
8247 doc: /* Return coding system specified for decoding keyboard input. */)
8248 ()
4ed46869 8249{
df7492f9 8250 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
8251}
8252
4ed46869 8253\f
a5d301df
KH
8254DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8255 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
8256 doc: /* Choose a coding system for an operation based on the target name.
8257The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8258DECODING-SYSTEM is the coding system to use for decoding
8259\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8260for encoding (in case OPERATION does encoding).
05e6f5dc 8261
48b0f3ae
PJ
8262The first argument OPERATION specifies an I/O primitive:
8263 For file I/O, `insert-file-contents' or `write-region'.
8264 For process I/O, `call-process', `call-process-region', or `start-process'.
8265 For network I/O, `open-network-stream'.
05e6f5dc 8266
48b0f3ae
PJ
8267The remaining arguments should be the same arguments that were passed
8268to the primitive. Depending on which primitive, one of those arguments
8269is selected as the TARGET. For example, if OPERATION does file I/O,
8270whichever argument specifies the file name is TARGET.
05e6f5dc 8271
48b0f3ae
PJ
8272TARGET has a meaning which depends on OPERATION:
8273 For file I/O, TARGET is a file name.
8274 For process I/O, TARGET is a process name.
8275 For network I/O, TARGET is a service name or a port number
05e6f5dc 8276
48b0f3ae
PJ
8277This function looks up what specified for TARGET in,
8278`file-coding-system-alist', `process-coding-system-alist',
8279or `network-coding-system-alist' depending on OPERATION.
8280They may specify a coding system, a cons of coding systems,
8281or a function symbol to call.
8282In the last case, we call the function with one argument,
8283which is a list of all the arguments given to this function.
8284
8285usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8286 (nargs, args)
4ed46869
KH
8287 int nargs;
8288 Lisp_Object *args;
6b89e3aa 8289{
4ed46869
KH
8290 Lisp_Object operation, target_idx, target, val;
8291 register Lisp_Object chain;
177c0ea7 8292
4ed46869
KH
8293 if (nargs < 2)
8294 error ("Too few arguments");
8295 operation = args[0];
8296 if (!SYMBOLP (operation)
8297 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 8298 error ("Invalid first arguement");
4ed46869
KH
8299 if (nargs < 1 + XINT (target_idx))
8300 error ("Too few arguments for operation: %s",
8f924df7 8301 SDATA (SYMBOL_NAME (operation)));
4ed46869
KH
8302 target = args[XINT (target_idx) + 1];
8303 if (!(STRINGP (target)
8304 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 8305 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 8306
2e34157c
RS
8307 chain = ((EQ (operation, Qinsert_file_contents)
8308 || EQ (operation, Qwrite_region))
02ba4723 8309 ? Vfile_coding_system_alist
2e34157c 8310 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
8311 ? Vnetwork_coding_system_alist
8312 : Vprocess_coding_system_alist));
4ed46869
KH
8313 if (NILP (chain))
8314 return Qnil;
8315
03699b14 8316 for (; CONSP (chain); chain = XCDR (chain))
6b89e3aa 8317 {
f44d27ce 8318 Lisp_Object elt;
6b89e3aa 8319
df7492f9 8320 elt = XCAR (chain);
4ed46869
KH
8321 if (CONSP (elt)
8322 && ((STRINGP (target)
03699b14
KR
8323 && STRINGP (XCAR (elt))
8324 && fast_string_match (XCAR (elt), target) >= 0)
8325 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6b89e3aa 8326 {
03699b14 8327 val = XCDR (elt);
b19fd4c5
KH
8328 /* Here, if VAL is both a valid coding system and a valid
8329 function symbol, we return VAL as a coding system. */
02ba4723
KH
8330 if (CONSP (val))
8331 return val;
8332 if (! SYMBOLP (val))
8333 return Qnil;
8334 if (! NILP (Fcoding_system_p (val)))
8335 return Fcons (val, val);
b19fd4c5 8336 if (! NILP (Ffboundp (val)))
6b89e3aa 8337 {
b19fd4c5
KH
8338 val = call1 (val, Flist (nargs, args));
8339 if (CONSP (val))
8340 return val;
8341 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8342 return Fcons (val, val);
6b89e3aa 8343 }
02ba4723 8344 return Qnil;
6b89e3aa
KH
8345 }
8346 }
4ed46869 8347 return Qnil;
6b89e3aa
KH
8348}
8349
df7492f9 8350DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 8351 Sset_coding_system_priority, 0, MANY, 0,
da7db224 8352 doc: /* Assign higher priority to the coding systems given as arguments.
ff563fce 8353If multiple coding systems belongs to the same category,
a3181084
DL
8354all but the first one are ignored.
8355
8356usage: (set-coding-system-priority ...) */)
df7492f9
KH
8357 (nargs, args)
8358 int nargs;
8359 Lisp_Object *args;
8360{
8361 int i, j;
8362 int changed[coding_category_max];
8363 enum coding_category priorities[coding_category_max];
8364
8365 bzero (changed, sizeof changed);
6b89e3aa 8366
df7492f9 8367 for (i = j = 0; i < nargs; i++)
6b89e3aa 8368 {
df7492f9
KH
8369 enum coding_category category;
8370 Lisp_Object spec, attrs;
6b89e3aa 8371
df7492f9
KH
8372 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8373 attrs = AREF (spec, 0);
8374 category = XINT (CODING_ATTR_CATEGORY (attrs));
8375 if (changed[category])
8376 /* Ignore this coding system because a coding system of the
8377 same category already had a higher priority. */
8378 continue;
8379 changed[category] = 1;
8380 priorities[j++] = category;
8381 if (coding_categories[category].id >= 0
8382 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8383 setup_coding_system (args[i], &coding_categories[category]);
ff563fce 8384 Fset (AREF (Vcoding_category_table, category), args[i]);
df7492f9 8385 }
6b89e3aa 8386
df7492f9
KH
8387 /* Now we have decided top J priorities. Reflect the order of the
8388 original priorities to the remaining priorities. */
6b89e3aa 8389
df7492f9 8390 for (i = j, j = 0; i < coding_category_max; i++, j++)
6b89e3aa 8391 {
df7492f9
KH
8392 while (j < coding_category_max
8393 && changed[coding_priorities[j]])
8394 j++;
8395 if (j == coding_category_max)
8396 abort ();
8397 priorities[i] = coding_priorities[j];
8398 }
6b89e3aa 8399
df7492f9 8400 bcopy (priorities, coding_priorities, sizeof priorities);
177c0ea7 8401
ff563fce
KH
8402 /* Update `coding-category-list'. */
8403 Vcoding_category_list = Qnil;
8404 for (i = coding_category_max - 1; i >= 0; i--)
8405 Vcoding_category_list
8406 = Fcons (AREF (Vcoding_category_table, priorities[i]),
8407 Vcoding_category_list);
6b89e3aa 8408
df7492f9 8409 return Qnil;
6b89e3aa
KH
8410}
8411
df7492f9
KH
8412DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8413 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
8414 doc: /* Return a list of coding systems ordered by their priorities.
8415HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
8416 (highestp)
8417 Lisp_Object highestp;
d46c5b12
KH
8418{
8419 int i;
df7492f9 8420 Lisp_Object val;
6b89e3aa 8421
df7492f9 8422 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 8423 {
df7492f9
KH
8424 enum coding_category category = coding_priorities[i];
8425 int id = coding_categories[category].id;
8426 Lisp_Object attrs;
068a9dbd 8427
df7492f9
KH
8428 if (id < 0)
8429 continue;
8430 attrs = CODING_ID_ATTRS (id);
8431 if (! NILP (highestp))
8432 return CODING_ATTR_BASE_NAME (attrs);
8433 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8434 }
8435 return Fnreverse (val);
8436}
068a9dbd 8437
f0064e1f 8438static char *suffixes[] = { "-unix", "-dos", "-mac" };
068a9dbd
KH
8439
8440static Lisp_Object
df7492f9
KH
8441make_subsidiaries (base)
8442 Lisp_Object base;
068a9dbd 8443{
df7492f9 8444 Lisp_Object subsidiaries;
8f924df7 8445 int base_name_len = SBYTES (SYMBOL_NAME (base));
df7492f9
KH
8446 char *buf = (char *) alloca (base_name_len + 6);
8447 int i;
068a9dbd 8448
8f924df7 8449 bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
df7492f9
KH
8450 subsidiaries = Fmake_vector (make_number (3), Qnil);
8451 for (i = 0; i < 3; i++)
068a9dbd 8452 {
df7492f9
KH
8453 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8454 ASET (subsidiaries, i, intern (buf));
068a9dbd 8455 }
df7492f9 8456 return subsidiaries;
068a9dbd
KH
8457}
8458
8459
df7492f9
KH
8460DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8461 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
8462 doc: /* For internal use only.
8463usage: (define-coding-system-internal ...) */)
df7492f9
KH
8464 (nargs, args)
8465 int nargs;
8466 Lisp_Object *args;
068a9dbd 8467{
df7492f9
KH
8468 Lisp_Object name;
8469 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
8470 Lisp_Object attrs; /* Vector of attributes. */
8471 Lisp_Object eol_type;
8472 Lisp_Object aliases;
8473 Lisp_Object coding_type, charset_list, safe_charsets;
8474 enum coding_category category;
8475 Lisp_Object tail, val;
8476 int max_charset_id = 0;
8477 int i;
068a9dbd 8478
df7492f9
KH
8479 if (nargs < coding_arg_max)
8480 goto short_args;
068a9dbd 8481
df7492f9 8482 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
068a9dbd 8483
df7492f9
KH
8484 name = args[coding_arg_name];
8485 CHECK_SYMBOL (name);
8486 CODING_ATTR_BASE_NAME (attrs) = name;
068a9dbd 8487
df7492f9
KH
8488 val = args[coding_arg_mnemonic];
8489 if (! STRINGP (val))
8490 CHECK_CHARACTER (val);
8491 CODING_ATTR_MNEMONIC (attrs) = val;
068a9dbd 8492
df7492f9
KH
8493 coding_type = args[coding_arg_coding_type];
8494 CHECK_SYMBOL (coding_type);
8495 CODING_ATTR_TYPE (attrs) = coding_type;
068a9dbd 8496
df7492f9
KH
8497 charset_list = args[coding_arg_charset_list];
8498 if (SYMBOLP (charset_list))
8499 {
8500 if (EQ (charset_list, Qiso_2022))
8501 {
8502 if (! EQ (coding_type, Qiso_2022))
8503 error ("Invalid charset-list");
8504 charset_list = Viso_2022_charset_list;
8505 }
8506 else if (EQ (charset_list, Qemacs_mule))
8507 {
8508 if (! EQ (coding_type, Qemacs_mule))
8509 error ("Invalid charset-list");
8510 charset_list = Vemacs_mule_charset_list;
8511 }
8512 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8513 if (max_charset_id < XFASTINT (XCAR (tail)))
8514 max_charset_id = XFASTINT (XCAR (tail));
8515 }
068a9dbd
KH
8516 else
8517 {
df7492f9
KH
8518 charset_list = Fcopy_sequence (charset_list);
8519 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
068a9dbd 8520 {
df7492f9
KH
8521 struct charset *charset;
8522
8523 val = Fcar (tail);
8524 CHECK_CHARSET_GET_CHARSET (val, charset);
8525 if (EQ (coding_type, Qiso_2022)
8526 ? CHARSET_ISO_FINAL (charset) < 0
8527 : EQ (coding_type, Qemacs_mule)
8528 ? CHARSET_EMACS_MULE_ID (charset) < 0
8529 : 0)
8530 error ("Can't handle charset `%s'",
8f924df7 8531 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9 8532
8f924df7 8533 XSETCAR (tail, make_number (charset->id));
df7492f9
KH
8534 if (max_charset_id < charset->id)
8535 max_charset_id = charset->id;
068a9dbd
KH
8536 }
8537 }
df7492f9 8538 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
068a9dbd 8539
df7492f9
KH
8540 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8541 make_number (255));
8542 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8f924df7 8543 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
df7492f9 8544 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
068a9dbd 8545
584948ac 8546 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
3a73fa5d 8547
df7492f9 8548 val = args[coding_arg_decode_translation_table];
a6f87d34 8549 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8550 CHECK_SYMBOL (val);
df7492f9 8551 CODING_ATTR_DECODE_TBL (attrs) = val;
3a73fa5d 8552
df7492f9 8553 val = args[coding_arg_encode_translation_table];
a6f87d34 8554 if (! CHAR_TABLE_P (val) && ! CONSP (val))
7d64c6ad 8555 CHECK_SYMBOL (val);
df7492f9 8556 CODING_ATTR_ENCODE_TBL (attrs) = val;
d46c5b12 8557
df7492f9
KH
8558 val = args[coding_arg_post_read_conversion];
8559 CHECK_SYMBOL (val);
8560 CODING_ATTR_POST_READ (attrs) = val;
d46c5b12 8561
df7492f9
KH
8562 val = args[coding_arg_pre_write_conversion];
8563 CHECK_SYMBOL (val);
8564 CODING_ATTR_PRE_WRITE (attrs) = val;
3a73fa5d 8565
df7492f9
KH
8566 val = args[coding_arg_default_char];
8567 if (NILP (val))
8568 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8569 else
8570 {
8f924df7 8571 CHECK_CHARACTER (val);
df7492f9
KH
8572 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8573 }
4031e2bf 8574
8f924df7
KH
8575 val = args[coding_arg_for_unibyte];
8576 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
3a73fa5d 8577
df7492f9
KH
8578 val = args[coding_arg_plist];
8579 CHECK_LIST (val);
8580 CODING_ATTR_PLIST (attrs) = val;
3a73fa5d 8581
df7492f9
KH
8582 if (EQ (coding_type, Qcharset))
8583 {
c7c66a95
KH
8584 /* Generate a lisp vector of 256 elements. Each element is nil,
8585 integer, or a list of charset IDs.
3a73fa5d 8586
c7c66a95
KH
8587 If Nth element is nil, the byte code N is invalid in this
8588 coding system.
4ed46869 8589
c7c66a95
KH
8590 If Nth element is a number NUM, N is the first byte of a
8591 charset whose ID is NUM.
4ed46869 8592
c7c66a95
KH
8593 If Nth element is a list of charset IDs, N is the first byte
8594 of one of them. The list is sorted by dimensions of the
2bc515e4 8595 charsets. A charset of smaller dimension comes firtst. */
df7492f9 8596 val = Fmake_vector (make_number (256), Qnil);
4ed46869 8597
5c99c2e6 8598 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
df7492f9 8599 {
c7c66a95
KH
8600 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8601 int dim = CHARSET_DIMENSION (charset);
8602 int idx = (dim - 1) * 4;
4ed46869 8603
5c99c2e6 8604 if (CHARSET_ASCII_COMPATIBLE_P (charset))
584948ac 8605 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4031e2bf 8606
15d143f7
KH
8607 for (i = charset->code_space[idx];
8608 i <= charset->code_space[idx + 1]; i++)
8609 {
c7c66a95
KH
8610 Lisp_Object tmp, tmp2;
8611 int dim2;
ec6d2bb8 8612
c7c66a95
KH
8613 tmp = AREF (val, i);
8614 if (NILP (tmp))
8615 tmp = XCAR (tail);
8616 else if (NUMBERP (tmp))
8617 {
8618 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8619 if (dim < dim2)
c7c66a95 8620 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
8621 else
8622 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 8623 }
15d143f7 8624 else
c7c66a95
KH
8625 {
8626 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8627 {
8628 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8629 if (dim < dim2)
8630 break;
8631 }
8632 if (NILP (tmp2))
8633 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8634 else
8635 {
8636 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8637 XSETCAR (tmp2, XCAR (tail));
8638 }
8639 }
8640 ASET (val, i, tmp);
15d143f7 8641 }
df7492f9
KH
8642 }
8643 ASET (attrs, coding_attr_charset_valids, val);
8644 category = coding_category_charset;
8645 }
8646 else if (EQ (coding_type, Qccl))
8647 {
8648 Lisp_Object valids;
ecec61c1 8649
df7492f9
KH
8650 if (nargs < coding_arg_ccl_max)
8651 goto short_args;
ecec61c1 8652
df7492f9
KH
8653 val = args[coding_arg_ccl_decoder];
8654 CHECK_CCL_PROGRAM (val);
8655 if (VECTORP (val))
8656 val = Fcopy_sequence (val);
8657 ASET (attrs, coding_attr_ccl_decoder, val);
ecec61c1 8658
df7492f9
KH
8659 val = args[coding_arg_ccl_encoder];
8660 CHECK_CCL_PROGRAM (val);
8661 if (VECTORP (val))
8662 val = Fcopy_sequence (val);
8663 ASET (attrs, coding_attr_ccl_encoder, val);
ecec61c1 8664
df7492f9
KH
8665 val = args[coding_arg_ccl_valids];
8666 valids = Fmake_string (make_number (256), make_number (0));
8667 for (tail = val; !NILP (tail); tail = Fcdr (tail))
8668 {
8dcbea82 8669 int from, to;
ecec61c1 8670
df7492f9
KH
8671 val = Fcar (tail);
8672 if (INTEGERP (val))
8dcbea82
KH
8673 {
8674 from = to = XINT (val);
8675 if (from < 0 || from > 255)
8676 args_out_of_range_3 (val, make_number (0), make_number (255));
8677 }
df7492f9
KH
8678 else
8679 {
df7492f9 8680 CHECK_CONS (val);
8f924df7
KH
8681 CHECK_NATNUM_CAR (val);
8682 CHECK_NATNUM_CDR (val);
df7492f9 8683 from = XINT (XCAR (val));
8f924df7 8684 if (from > 255)
8dcbea82
KH
8685 args_out_of_range_3 (XCAR (val),
8686 make_number (0), make_number (255));
df7492f9 8687 to = XINT (XCDR (val));
8dcbea82
KH
8688 if (to < from || to > 255)
8689 args_out_of_range_3 (XCDR (val),
8690 XCAR (val), make_number (255));
df7492f9 8691 }
8dcbea82 8692 for (i = from; i <= to; i++)
8f924df7 8693 SSET (valids, i, 1);
df7492f9
KH
8694 }
8695 ASET (attrs, coding_attr_ccl_valids, valids);
4ed46869 8696
df7492f9 8697 category = coding_category_ccl;
55ab7be3 8698 }
df7492f9 8699 else if (EQ (coding_type, Qutf_16))
55ab7be3 8700 {
df7492f9 8701 Lisp_Object bom, endian;
4ed46869 8702
584948ac 8703 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
4ed46869 8704
df7492f9
KH
8705 if (nargs < coding_arg_utf16_max)
8706 goto short_args;
4ed46869 8707
df7492f9
KH
8708 bom = args[coding_arg_utf16_bom];
8709 if (! NILP (bom) && ! EQ (bom, Qt))
8710 {
8711 CHECK_CONS (bom);
8f924df7
KH
8712 val = XCAR (bom);
8713 CHECK_CODING_SYSTEM (val);
8714 val = XCDR (bom);
8715 CHECK_CODING_SYSTEM (val);
df7492f9
KH
8716 }
8717 ASET (attrs, coding_attr_utf_16_bom, bom);
8718
8719 endian = args[coding_arg_utf16_endian];
b49a1807
KH
8720 CHECK_SYMBOL (endian);
8721 if (NILP (endian))
8722 endian = Qbig;
8723 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8f924df7 8724 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
df7492f9
KH
8725 ASET (attrs, coding_attr_utf_16_endian, endian);
8726
8727 category = (CONSP (bom)
8728 ? coding_category_utf_16_auto
8729 : NILP (bom)
b49a1807 8730 ? (EQ (endian, Qbig)
df7492f9
KH
8731 ? coding_category_utf_16_be_nosig
8732 : coding_category_utf_16_le_nosig)
b49a1807 8733 : (EQ (endian, Qbig)
df7492f9
KH
8734 ? coding_category_utf_16_be
8735 : coding_category_utf_16_le));
8736 }
8737 else if (EQ (coding_type, Qiso_2022))
8738 {
8739 Lisp_Object initial, reg_usage, request, flags;
4776e638 8740 int i;
1397dc18 8741
df7492f9
KH
8742 if (nargs < coding_arg_iso2022_max)
8743 goto short_args;
8744
8745 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8746 CHECK_VECTOR (initial);
8747 for (i = 0; i < 4; i++)
8748 {
8749 val = Faref (initial, make_number (i));
8750 if (! NILP (val))
8751 {
584948ac
KH
8752 struct charset *charset;
8753
8754 CHECK_CHARSET_GET_CHARSET (val, charset);
8755 ASET (initial, i, make_number (CHARSET_ID (charset)));
8756 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8757 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8758 }
8759 else
8760 ASET (initial, i, make_number (-1));
8761 }
8762
8763 reg_usage = args[coding_arg_iso2022_reg_usage];
8764 CHECK_CONS (reg_usage);
8f924df7
KH
8765 CHECK_NUMBER_CAR (reg_usage);
8766 CHECK_NUMBER_CDR (reg_usage);
df7492f9
KH
8767
8768 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8769 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 8770 {
df7492f9 8771 int id;
8f924df7 8772 Lisp_Object tmp;
df7492f9
KH
8773
8774 val = Fcar (tail);
8775 CHECK_CONS (val);
8f924df7
KH
8776 tmp = XCAR (val);
8777 CHECK_CHARSET_GET_ID (tmp, id);
8778 CHECK_NATNUM_CDR (val);
df7492f9
KH
8779 if (XINT (XCDR (val)) >= 4)
8780 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8f924df7 8781 XSETCAR (val, make_number (id));
1397dc18 8782 }
4ed46869 8783
df7492f9
KH
8784 flags = args[coding_arg_iso2022_flags];
8785 CHECK_NATNUM (flags);
8786 i = XINT (flags);
8787 if (EQ (args[coding_arg_charset_list], Qiso_2022))
8788 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8789
8790 ASET (attrs, coding_attr_iso_initial, initial);
8791 ASET (attrs, coding_attr_iso_usage, reg_usage);
8792 ASET (attrs, coding_attr_iso_request, request);
8793 ASET (attrs, coding_attr_iso_flags, flags);
8794 setup_iso_safe_charsets (attrs);
8795
8796 if (i & CODING_ISO_FLAG_SEVEN_BITS)
8797 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8798 | CODING_ISO_FLAG_SINGLE_SHIFT))
8799 ? coding_category_iso_7_else
8800 : EQ (args[coding_arg_charset_list], Qiso_2022)
8801 ? coding_category_iso_7
8802 : coding_category_iso_7_tight);
8803 else
8804 {
8805 int id = XINT (AREF (initial, 1));
8806
c6fb6e98 8807 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
8808 || EQ (args[coding_arg_charset_list], Qiso_2022)
8809 || id < 0)
8810 ? coding_category_iso_8_else
8811 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8812 ? coding_category_iso_8_1
8813 : coding_category_iso_8_2);
8814 }
0ce7886f
KH
8815 if (category != coding_category_iso_8_1
8816 && category != coding_category_iso_8_2)
8817 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
df7492f9
KH
8818 }
8819 else if (EQ (coding_type, Qemacs_mule))
c28a9453 8820 {
df7492f9
KH
8821 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8822 ASET (attrs, coding_attr_emacs_mule_full, Qt);
584948ac 8823 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9 8824 category = coding_category_emacs_mule;
c28a9453 8825 }
df7492f9 8826 else if (EQ (coding_type, Qshift_jis))
c28a9453 8827 {
df7492f9
KH
8828
8829 struct charset *charset;
8830
7d64c6ad 8831 if (XINT (Flength (charset_list)) != 3
6e07c25f 8832 && XINT (Flength (charset_list)) != 4)
7d64c6ad 8833 error ("There should be three or four charsets");
df7492f9
KH
8834
8835 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8836 if (CHARSET_DIMENSION (charset) != 1)
8837 error ("Dimension of charset %s is not one",
8f924df7 8838 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8839 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8840 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8841
8842 charset_list = XCDR (charset_list);
8843 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8844 if (CHARSET_DIMENSION (charset) != 1)
8845 error ("Dimension of charset %s is not one",
8f924df7 8846 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
df7492f9
KH
8847
8848 charset_list = XCDR (charset_list);
8849 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8850 if (CHARSET_DIMENSION (charset) != 2)
7d64c6ad
KH
8851 error ("Dimension of charset %s is not two",
8852 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8853
8854 charset_list = XCDR (charset_list);
2b917a06
KH
8855 if (! NILP (charset_list))
8856 {
8857 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8858 if (CHARSET_DIMENSION (charset) != 2)
8859 error ("Dimension of charset %s is not two",
8860 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8861 }
df7492f9
KH
8862
8863 category = coding_category_sjis;
8864 Vsjis_coding_system = name;
c28a9453 8865 }
df7492f9
KH
8866 else if (EQ (coding_type, Qbig5))
8867 {
8868 struct charset *charset;
4ed46869 8869
df7492f9
KH
8870 if (XINT (Flength (charset_list)) != 2)
8871 error ("There should be just two charsets");
8872
8873 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8874 if (CHARSET_DIMENSION (charset) != 1)
8875 error ("Dimension of charset %s is not one",
8f924df7 8876 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
584948ac
KH
8877 if (CHARSET_ASCII_COMPATIBLE_P (charset))
8878 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
df7492f9
KH
8879
8880 charset_list = XCDR (charset_list);
8881 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8882 if (CHARSET_DIMENSION (charset) != 2)
8883 error ("Dimension of charset %s is not two",
8f924df7 8884 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
4ed46869 8885
df7492f9
KH
8886 category = coding_category_big5;
8887 Vbig5_coding_system = name;
8888 }
8889 else if (EQ (coding_type, Qraw_text))
c28a9453 8890 {
584948ac
KH
8891 category = coding_category_raw_text;
8892 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
c28a9453 8893 }
df7492f9 8894 else if (EQ (coding_type, Qutf_8))
4ed46869 8895 {
584948ac
KH
8896 category = coding_category_utf_8;
8897 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
4ed46869 8898 }
df7492f9
KH
8899 else if (EQ (coding_type, Qundecided))
8900 category = coding_category_undecided;
4ed46869 8901 else
df7492f9 8902 error ("Invalid coding system type: %s",
8f924df7 8903 SDATA (SYMBOL_NAME (coding_type)));
4ed46869 8904
df7492f9 8905 CODING_ATTR_CATEGORY (attrs) = make_number (category);
01378f49
KH
8906 CODING_ATTR_PLIST (attrs)
8907 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8908 CODING_ATTR_PLIST (attrs)));
35befdaa
KH
8909 CODING_ATTR_PLIST (attrs)
8910 = Fcons (QCascii_compatible_p,
8911 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
8912 CODING_ATTR_PLIST (attrs)));
c4825358 8913
df7492f9
KH
8914 eol_type = args[coding_arg_eol_type];
8915 if (! NILP (eol_type)
8916 && ! EQ (eol_type, Qunix)
8917 && ! EQ (eol_type, Qdos)
8918 && ! EQ (eol_type, Qmac))
8919 error ("Invalid eol-type");
4ed46869 8920
df7492f9 8921 aliases = Fcons (name, Qnil);
4ed46869 8922
df7492f9
KH
8923 if (NILP (eol_type))
8924 {
8925 eol_type = make_subsidiaries (name);
8926 for (i = 0; i < 3; i++)
1397dc18 8927 {
df7492f9
KH
8928 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8929
8930 this_name = AREF (eol_type, i);
8931 this_aliases = Fcons (this_name, Qnil);
8932 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8933 this_spec = Fmake_vector (make_number (3), attrs);
8934 ASET (this_spec, 1, this_aliases);
8935 ASET (this_spec, 2, this_eol_type);
8936 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8937 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
583f71ca
KH
8938 val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
8939 if (NILP (val))
8940 Vcoding_system_alist
8941 = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8942 Vcoding_system_alist);
1397dc18 8943 }
d46c5b12 8944 }
4ed46869 8945
df7492f9
KH
8946 spec_vec = Fmake_vector (make_number (3), attrs);
8947 ASET (spec_vec, 1, aliases);
8948 ASET (spec_vec, 2, eol_type);
48b0f3ae 8949
df7492f9
KH
8950 Fputhash (name, spec_vec, Vcoding_system_hash_table);
8951 Vcoding_system_list = Fcons (name, Vcoding_system_list);
583f71ca
KH
8952 val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
8953 if (NILP (val))
8954 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8955 Vcoding_system_alist);
48b0f3ae 8956
df7492f9
KH
8957 {
8958 int id = coding_categories[category].id;
48b0f3ae 8959
df7492f9
KH
8960 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8961 setup_coding_system (name, &coding_categories[category]);
8962 }
48b0f3ae 8963
d46c5b12 8964 return Qnil;
48b0f3ae 8965
df7492f9
KH
8966 short_args:
8967 return Fsignal (Qwrong_number_of_arguments,
8968 Fcons (intern ("define-coding-system-internal"),
8969 make_number (nargs)));
d46c5b12 8970}
4ed46869 8971
d6925f38 8972
a6f87d34
KH
8973DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8974 3, 3, 0,
8975 doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8976 (coding_system, prop, val)
8977 Lisp_Object coding_system, prop, val;
8978{
3dbe7859 8979 Lisp_Object spec, attrs;
a6f87d34
KH
8980
8981 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8982 attrs = AREF (spec, 0);
8983 if (EQ (prop, QCmnemonic))
8984 {
8985 if (! STRINGP (val))
8986 CHECK_CHARACTER (val);
8987 CODING_ATTR_MNEMONIC (attrs) = val;
8988 }
8989 else if (EQ (prop, QCdefalut_char))
8990 {
8991 if (NILP (val))
8992 val = make_number (' ');
8993 else
8994 CHECK_CHARACTER (val);
8995 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8996 }
8997 else if (EQ (prop, QCdecode_translation_table))
8998 {
8999 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9000 CHECK_SYMBOL (val);
9001 CODING_ATTR_DECODE_TBL (attrs) = val;
9002 }
9003 else if (EQ (prop, QCencode_translation_table))
9004 {
9005 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9006 CHECK_SYMBOL (val);
9007 CODING_ATTR_ENCODE_TBL (attrs) = val;
9008 }
9009 else if (EQ (prop, QCpost_read_conversion))
9010 {
9011 CHECK_SYMBOL (val);
9012 CODING_ATTR_POST_READ (attrs) = val;
9013 }
9014 else if (EQ (prop, QCpre_write_conversion))
9015 {
9016 CHECK_SYMBOL (val);
9017 CODING_ATTR_PRE_WRITE (attrs) = val;
9018 }
35befdaa
KH
9019 else if (EQ (prop, QCascii_compatible_p))
9020 {
9021 CODING_ATTR_ASCII_COMPAT (attrs) = val;
9022 }
a6f87d34
KH
9023
9024 CODING_ATTR_PLIST (attrs)
9025 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9026 return val;
9027}
9028
9029
df7492f9
KH
9030DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9031 Sdefine_coding_system_alias, 2, 2, 0,
9032 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
9033 (alias, coding_system)
9034 Lisp_Object alias, coding_system;
66cfb530 9035{
583f71ca 9036 Lisp_Object spec, aliases, eol_type, val;
4ed46869 9037
df7492f9
KH
9038 CHECK_SYMBOL (alias);
9039 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9040 aliases = AREF (spec, 1);
d6925f38
KH
9041 /* ALISES should be a list of length more than zero, and the first
9042 element is a base coding system. Append ALIAS at the tail of the
9043 list. */
df7492f9
KH
9044 while (!NILP (XCDR (aliases)))
9045 aliases = XCDR (aliases);
8f924df7 9046 XSETCDR (aliases, Fcons (alias, Qnil));
4ed46869 9047
df7492f9
KH
9048 eol_type = AREF (spec, 2);
9049 if (VECTORP (eol_type))
4ed46869 9050 {
df7492f9
KH
9051 Lisp_Object subsidiaries;
9052 int i;
4ed46869 9053
df7492f9
KH
9054 subsidiaries = make_subsidiaries (alias);
9055 for (i = 0; i < 3; i++)
9056 Fdefine_coding_system_alias (AREF (subsidiaries, i),
9057 AREF (eol_type, i));
4ed46869 9058 }
df7492f9
KH
9059
9060 Fputhash (alias, spec, Vcoding_system_hash_table);
d6925f38 9061 Vcoding_system_list = Fcons (alias, Vcoding_system_list);
583f71ca
KH
9062 val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9063 if (NILP (val))
9064 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9065 Vcoding_system_alist);
66cfb530 9066
4ed46869
KH
9067 return Qnil;
9068}
9069
df7492f9
KH
9070DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9071 1, 1, 0,
9072 doc: /* Return the base of CODING-SYSTEM.
da7db224 9073Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
9074 (coding_system)
9075 Lisp_Object coding_system;
d46c5b12 9076{
df7492f9 9077 Lisp_Object spec, attrs;
d46c5b12 9078
df7492f9
KH
9079 if (NILP (coding_system))
9080 return (Qno_conversion);
9081 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9082 attrs = AREF (spec, 0);
9083 return CODING_ATTR_BASE_NAME (attrs);
9084}
1397dc18 9085
df7492f9
KH
9086DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9087 1, 1, 0,
9088 doc: "Return the property list of CODING-SYSTEM.")
9089 (coding_system)
9090 Lisp_Object coding_system;
9091{
9092 Lisp_Object spec, attrs;
1397dc18 9093
df7492f9
KH
9094 if (NILP (coding_system))
9095 coding_system = Qno_conversion;
9096 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9097 attrs = AREF (spec, 0);
9098 return CODING_ATTR_PLIST (attrs);
d46c5b12
KH
9099}
9100
df7492f9
KH
9101
9102DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9103 1, 1, 0,
da7db224 9104 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
9105 (coding_system)
9106 Lisp_Object coding_system;
66cfb530 9107{
df7492f9 9108 Lisp_Object spec;
84d60297 9109
df7492f9
KH
9110 if (NILP (coding_system))
9111 coding_system = Qno_conversion;
9112 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 9113 return AREF (spec, 1);
df7492f9 9114}
66cfb530 9115
df7492f9
KH
9116DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9117 Scoding_system_eol_type, 1, 1, 0,
9118 doc: /* Return eol-type of CODING-SYSTEM.
9119An eol-type is integer 0, 1, 2, or a vector of coding systems.
66cfb530 9120
df7492f9
KH
9121Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9122and CR respectively.
66cfb530 9123
df7492f9
KH
9124A vector value indicates that a format of end-of-line should be
9125detected automatically. Nth element of the vector is the subsidiary
9126coding system whose eol-type is N. */)
6b89e3aa
KH
9127 (coding_system)
9128 Lisp_Object coding_system;
9129{
df7492f9
KH
9130 Lisp_Object spec, eol_type;
9131 int n;
6b89e3aa 9132
df7492f9
KH
9133 if (NILP (coding_system))
9134 coding_system = Qno_conversion;
9135 if (! CODING_SYSTEM_P (coding_system))
9136 return Qnil;
9137 spec = CODING_SYSTEM_SPEC (coding_system);
9138 eol_type = AREF (spec, 2);
9139 if (VECTORP (eol_type))
9140 return Fcopy_sequence (eol_type);
9141 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9142 return make_number (n);
6b89e3aa
KH
9143}
9144
4ed46869
KH
9145#endif /* emacs */
9146
9147\f
1397dc18 9148/*** 9. Post-amble ***/
4ed46869 9149
dfcf069d 9150void
4ed46869
KH
9151init_coding_once ()
9152{
9153 int i;
9154
df7492f9
KH
9155 for (i = 0; i < coding_category_max; i++)
9156 {
9157 coding_categories[i].id = -1;
9158 coding_priorities[i] = i;
9159 }
4ed46869
KH
9160
9161 /* ISO2022 specific initialize routine. */
9162 for (i = 0; i < 0x20; i++)
b73bfc1c 9163 iso_code_class[i] = ISO_control_0;
4ed46869
KH
9164 for (i = 0x21; i < 0x7F; i++)
9165 iso_code_class[i] = ISO_graphic_plane_0;
9166 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 9167 iso_code_class[i] = ISO_control_1;
4ed46869
KH
9168 for (i = 0xA1; i < 0xFF; i++)
9169 iso_code_class[i] = ISO_graphic_plane_1;
9170 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9171 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4ed46869
KH
9172 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9173 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9174 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9175 iso_code_class[ISO_CODE_ESC] = ISO_escape;
9176 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9177 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9178 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9179
df7492f9
KH
9180 for (i = 0; i < 256; i++)
9181 {
9182 emacs_mule_bytes[i] = 1;
9183 }
7c78e542
KH
9184 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9185 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9186 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9187 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
9188}
9189
9190#ifdef emacs
9191
dfcf069d 9192void
e0e989f6
KH
9193syms_of_coding ()
9194{
df7492f9 9195 staticpro (&Vcoding_system_hash_table);
8f924df7
KH
9196 {
9197 Lisp_Object args[2];
9198 args[0] = QCtest;
9199 args[1] = Qeq;
9200 Vcoding_system_hash_table = Fmake_hash_table (2, args);
9201 }
df7492f9
KH
9202
9203 staticpro (&Vsjis_coding_system);
9204 Vsjis_coding_system = Qnil;
e0e989f6 9205
df7492f9
KH
9206 staticpro (&Vbig5_coding_system);
9207 Vbig5_coding_system = Qnil;
9208
24a73b0a
KH
9209 staticpro (&Vcode_conversion_reused_workbuf);
9210 Vcode_conversion_reused_workbuf = Qnil;
9211
9212 staticpro (&Vcode_conversion_workbuf_name);
9213 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
e0e989f6 9214
24a73b0a 9215 reused_workbuf_in_use = 0;
df7492f9
KH
9216
9217 DEFSYM (Qcharset, "charset");
9218 DEFSYM (Qtarget_idx, "target-idx");
9219 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
9220 Fset (Qcoding_system_history, Qnil);
9221
9ce27fde 9222 /* Target FILENAME is the first argument. */
e0e989f6 9223 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 9224 /* Target FILENAME is the third argument. */
e0e989f6
KH
9225 Fput (Qwrite_region, Qtarget_idx, make_number (2));
9226
df7492f9 9227 DEFSYM (Qcall_process, "call-process");
9ce27fde 9228 /* Target PROGRAM is the first argument. */
e0e989f6
KH
9229 Fput (Qcall_process, Qtarget_idx, make_number (0));
9230
df7492f9 9231 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 9232 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9233 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9234
df7492f9 9235 DEFSYM (Qstart_process, "start-process");
9ce27fde 9236 /* Target PROGRAM is the third argument. */
e0e989f6
KH
9237 Fput (Qstart_process, Qtarget_idx, make_number (2));
9238
df7492f9 9239 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 9240 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
9241 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9242
df7492f9
KH
9243 DEFSYM (Qcoding_system, "coding-system");
9244 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 9245
df7492f9
KH
9246 DEFSYM (Qeol_type, "eol-type");
9247 DEFSYM (Qunix, "unix");
9248 DEFSYM (Qdos, "dos");
4ed46869 9249
df7492f9
KH
9250 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9251 DEFSYM (Qpost_read_conversion, "post-read-conversion");
9252 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9253 DEFSYM (Qdefault_char, "default-char");
9254 DEFSYM (Qundecided, "undecided");
9255 DEFSYM (Qno_conversion, "no-conversion");
9256 DEFSYM (Qraw_text, "raw-text");
4ed46869 9257
df7492f9 9258 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 9259
df7492f9 9260 DEFSYM (Qutf_8, "utf-8");
8f924df7 9261 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
27901516 9262
df7492f9 9263 DEFSYM (Qutf_16, "utf-16");
df7492f9
KH
9264 DEFSYM (Qbig, "big");
9265 DEFSYM (Qlittle, "little");
27901516 9266
df7492f9
KH
9267 DEFSYM (Qshift_jis, "shift-jis");
9268 DEFSYM (Qbig5, "big5");
4ed46869 9269
df7492f9 9270 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 9271
df7492f9 9272 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
9273 Fput (Qcoding_system_error, Qerror_conditions,
9274 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9275 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 9276 build_string ("Invalid coding system"));
4ed46869 9277
05e6f5dc
KH
9278 /* Intern this now in case it isn't already done.
9279 Setting this variable twice is harmless.
9280 But don't staticpro it here--that is done in alloc.c. */
9281 Qchar_table_extra_slots = intern ("char-table-extra-slots");
70c22245 9282
df7492f9 9283 DEFSYM (Qtranslation_table, "translation-table");
433f7f87 9284 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
df7492f9
KH
9285 DEFSYM (Qtranslation_table_id, "translation-table-id");
9286 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9287 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
1397dc18 9288
df7492f9 9289 DEFSYM (Qvalid_codes, "valid-codes");
9ce27fde 9290
df7492f9 9291 DEFSYM (Qemacs_mule, "emacs-mule");
d46c5b12 9292
01378f49 9293 DEFSYM (QCcategory, ":category");
a6f87d34
KH
9294 DEFSYM (QCmnemonic, ":mnemonic");
9295 DEFSYM (QCdefalut_char, ":default-char");
9296 DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9297 DEFSYM (QCencode_translation_table, ":encode-translation-table");
9298 DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9299 DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
35befdaa 9300 DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
01378f49 9301
df7492f9
KH
9302 Vcoding_category_table
9303 = Fmake_vector (make_number (coding_category_max), Qnil);
9304 staticpro (&Vcoding_category_table);
9305 /* Followings are target of code detection. */
9306 ASET (Vcoding_category_table, coding_category_iso_7,
9307 intern ("coding-category-iso-7"));
9308 ASET (Vcoding_category_table, coding_category_iso_7_tight,
9309 intern ("coding-category-iso-7-tight"));
9310 ASET (Vcoding_category_table, coding_category_iso_8_1,
9311 intern ("coding-category-iso-8-1"));
9312 ASET (Vcoding_category_table, coding_category_iso_8_2,
9313 intern ("coding-category-iso-8-2"));
9314 ASET (Vcoding_category_table, coding_category_iso_7_else,
9315 intern ("coding-category-iso-7-else"));
9316 ASET (Vcoding_category_table, coding_category_iso_8_else,
9317 intern ("coding-category-iso-8-else"));
9318 ASET (Vcoding_category_table, coding_category_utf_8,
9319 intern ("coding-category-utf-8"));
9320 ASET (Vcoding_category_table, coding_category_utf_16_be,
9321 intern ("coding-category-utf-16-be"));
ff563fce
KH
9322 ASET (Vcoding_category_table, coding_category_utf_16_auto,
9323 intern ("coding-category-utf-16-auto"));
df7492f9
KH
9324 ASET (Vcoding_category_table, coding_category_utf_16_le,
9325 intern ("coding-category-utf-16-le"));
9326 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9327 intern ("coding-category-utf-16-be-nosig"));
9328 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9329 intern ("coding-category-utf-16-le-nosig"));
9330 ASET (Vcoding_category_table, coding_category_charset,
9331 intern ("coding-category-charset"));
9332 ASET (Vcoding_category_table, coding_category_sjis,
9333 intern ("coding-category-sjis"));
9334 ASET (Vcoding_category_table, coding_category_big5,
9335 intern ("coding-category-big5"));
9336 ASET (Vcoding_category_table, coding_category_ccl,
9337 intern ("coding-category-ccl"));
9338 ASET (Vcoding_category_table, coding_category_emacs_mule,
9339 intern ("coding-category-emacs-mule"));
9340 /* Followings are NOT target of code detection. */
9341 ASET (Vcoding_category_table, coding_category_raw_text,
9342 intern ("coding-category-raw-text"));
9343 ASET (Vcoding_category_table, coding_category_undecided,
9344 intern ("coding-category-undecided"));
ecf488bc 9345
065e3595
KH
9346 DEFSYM (Qinsufficient_source, "insufficient-source");
9347 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9348 DEFSYM (Qinvalid_source, "invalid-source");
9349 DEFSYM (Qinterrupted, "interrupted");
9350 DEFSYM (Qinsufficient_memory, "insufficient-memory");
44e8490d 9351 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
065e3595 9352
4ed46869
KH
9353 defsubr (&Scoding_system_p);
9354 defsubr (&Sread_coding_system);
9355 defsubr (&Sread_non_nil_coding_system);
9356 defsubr (&Scheck_coding_system);
9357 defsubr (&Sdetect_coding_region);
d46c5b12 9358 defsubr (&Sdetect_coding_string);
05e6f5dc 9359 defsubr (&Sfind_coding_systems_region_internal);
068a9dbd 9360 defsubr (&Sunencodable_char_position);
df7492f9 9361 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
9362 defsubr (&Sdecode_coding_region);
9363 defsubr (&Sencode_coding_region);
9364 defsubr (&Sdecode_coding_string);
9365 defsubr (&Sencode_coding_string);
9366 defsubr (&Sdecode_sjis_char);
9367 defsubr (&Sencode_sjis_char);
9368 defsubr (&Sdecode_big5_char);
9369 defsubr (&Sencode_big5_char);
1ba9e4ab 9370 defsubr (&Sset_terminal_coding_system_internal);
c4825358 9371 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 9372 defsubr (&Sterminal_coding_system);
1ba9e4ab 9373 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 9374 defsubr (&Skeyboard_coding_system);
a5d301df 9375 defsubr (&Sfind_operation_coding_system);
df7492f9 9376 defsubr (&Sset_coding_system_priority);
6b89e3aa 9377 defsubr (&Sdefine_coding_system_internal);
df7492f9 9378 defsubr (&Sdefine_coding_system_alias);
a6f87d34 9379 defsubr (&Scoding_system_put);
df7492f9
KH
9380 defsubr (&Scoding_system_base);
9381 defsubr (&Scoding_system_plist);
9382 defsubr (&Scoding_system_aliases);
9383 defsubr (&Scoding_system_eol_type);
9384 defsubr (&Scoding_system_priority_list);
4ed46869 9385
4608c386 9386 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
9387 doc: /* List of coding systems.
9388
9389Do not alter the value of this variable manually. This variable should be
df7492f9 9390updated by the functions `define-coding-system' and
48b0f3ae 9391`define-coding-system-alias'. */);
4608c386
KH
9392 Vcoding_system_list = Qnil;
9393
9394 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
9395 doc: /* Alist of coding system names.
9396Each element is one element list of coding system name.
9397This variable is given to `completing-read' as TABLE argument.
9398
9399Do not alter the value of this variable manually. This variable should be
9400updated by the functions `make-coding-system' and
9401`define-coding-system-alias'. */);
4608c386
KH
9402 Vcoding_system_alist = Qnil;
9403
4ed46869 9404 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
9405 doc: /* List of coding-categories (symbols) ordered by priority.
9406
9407On detecting a coding system, Emacs tries code detection algorithms
9408associated with each coding-category one by one in this order. When
9409one algorithm agrees with a byte sequence of source text, the coding
0ec31faf
KH
9410system bound to the corresponding coding-category is selected.
9411
42205607 9412Don't modify this variable directly, but use `set-coding-priority'. */);
4ed46869
KH
9413 {
9414 int i;
9415
9416 Vcoding_category_list = Qnil;
df7492f9 9417 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 9418 Vcoding_category_list
d46c5b12
KH
9419 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9420 Vcoding_category_list);
4ed46869
KH
9421 }
9422
9423 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
9424 doc: /* Specify the coding system for read operations.
9425It is useful to bind this variable with `let', but do not set it globally.
9426If the value is a coding system, it is used for decoding on read operation.
9427If not, an appropriate element is used from one of the coding system alists:
9428There are three such tables, `file-coding-system-alist',
9429`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
9430 Vcoding_system_for_read = Qnil;
9431
9432 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
9433 doc: /* Specify the coding system for write operations.
9434Programs bind this variable with `let', but you should not set it globally.
9435If the value is a coding system, it is used for encoding of output,
9436when writing it to a file and when sending it to a file or subprocess.
9437
9438If this does not specify a coding system, an appropriate element
9439is used from one of the coding system alists:
9440There are three such tables, `file-coding-system-alist',
9441`process-coding-system-alist', and `network-coding-system-alist'.
9442For output to files, if the above procedure does not specify a coding system,
9443the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
9444 Vcoding_system_for_write = Qnil;
9445
9446 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
9447 doc: /*
9448Coding system used in the latest file or process I/O. */);
4ed46869
KH
9449 Vlast_coding_system_used = Qnil;
9450
065e3595
KH
9451 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9452 doc: /*
9453Error status of the last code conversion.
9454
9455When an error was detected in the last code conversion, this variable
9456is set to one of the following symbols.
9457 `insufficient-source'
9458 `inconsistent-eol'
9459 `invalid-source'
9460 `interrupted'
9461 `insufficient-memory'
9462When no error was detected, the value doesn't change. So, to check
9463the error status of a code conversion by this variable, you must
9464explicitly set this variable to nil before performing code
9465conversion. */);
9466 Vlast_code_conversion_error = Qnil;
9467
9ce27fde 9468 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
9469 doc: /*
9470*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
9471See info node `Coding Systems' and info node `Text and Binary' concerning
9472such conversion. */);
9ce27fde
KH
9473 inhibit_eol_conversion = 0;
9474
ed29121d 9475 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
9476 doc: /*
9477Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
9478Bind it to t if the process output is to be treated as if it were a file
9479read from some filesystem. */);
ed29121d
EZ
9480 inherit_process_coding_system = 0;
9481
02ba4723 9482 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
9483 doc: /*
9484Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
9485The format is ((PATTERN . VAL) ...),
9486where PATTERN is a regular expression matching a file name,
9487VAL is a coding system, a cons of coding systems, or a function symbol.
9488If VAL is a coding system, it is used for both decoding and encoding
9489the file contents.
9490If VAL is a cons of coding systems, the car part is used for decoding,
9491and the cdr part is used for encoding.
9492If VAL is a function symbol, the function must return a coding system
0192762c
DL
9493or a cons of coding systems which are used as above. The function gets
9494the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
9495
9496See also the function `find-operation-coding-system'
9497and the variable `auto-coding-alist'. */);
02ba4723
KH
9498 Vfile_coding_system_alist = Qnil;
9499
9500 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
9501 doc: /*
9502Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
9503The format is ((PATTERN . VAL) ...),
9504where PATTERN is a regular expression matching a program name,
9505VAL is a coding system, a cons of coding systems, or a function symbol.
9506If VAL is a coding system, it is used for both decoding what received
9507from the program and encoding what sent to the program.
9508If VAL is a cons of coding systems, the car part is used for decoding,
9509and the cdr part is used for encoding.
9510If VAL is a function symbol, the function must return a coding system
9511or a cons of coding systems which are used as above.
9512
9513See also the function `find-operation-coding-system'. */);
02ba4723
KH
9514 Vprocess_coding_system_alist = Qnil;
9515
9516 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
9517 doc: /*
9518Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
9519The format is ((PATTERN . VAL) ...),
9520where PATTERN is a regular expression matching a network service name
9521or is a port number to connect to,
9522VAL is a coding system, a cons of coding systems, or a function symbol.
9523If VAL is a coding system, it is used for both decoding what received
9524from the network stream and encoding what sent to the network stream.
9525If VAL is a cons of coding systems, the car part is used for decoding,
9526and the cdr part is used for encoding.
9527If VAL is a function symbol, the function must return a coding system
9528or a cons of coding systems which are used as above.
9529
9530See also the function `find-operation-coding-system'. */);
02ba4723 9531 Vnetwork_coding_system_alist = Qnil;
4ed46869 9532
68c45bf0 9533 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
9534 doc: /* Coding system to use with system messages.
9535Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
9536 Vlocale_coding_system = Qnil;
9537
005f0d35 9538 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 9539 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
9540 doc: /*
9541*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 9542 eol_mnemonic_unix = build_string (":");
4ed46869 9543
7722baf9 9544 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
9545 doc: /*
9546*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 9547 eol_mnemonic_dos = build_string ("\\");
4ed46869 9548
7722baf9 9549 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
9550 doc: /*
9551*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 9552 eol_mnemonic_mac = build_string ("/");
4ed46869 9553
7722baf9 9554 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
9555 doc: /*
9556*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 9557 eol_mnemonic_undecided = build_string (":");
4ed46869 9558
84fbb8a0 9559 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
9560 doc: /*
9561*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 9562 Venable_character_translation = Qt;
bdd9fb48 9563
f967223b 9564 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
9565 &Vstandard_translation_table_for_decode,
9566 doc: /* Table for translating characters while decoding. */);
f967223b 9567 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 9568
f967223b 9569 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
9570 &Vstandard_translation_table_for_encode,
9571 doc: /* Table for translating characters while encoding. */);
f967223b 9572 Vstandard_translation_table_for_encode = Qnil;
4ed46869 9573
df7492f9 9574 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
9575 doc: /* Alist of charsets vs revision numbers.
9576While encoding, if a charset (car part of an element) is found,
df7492f9
KH
9577designate it with the escape sequence identifying revision (cdr part
9578of the element). */);
9579 Vcharset_revision_table = Qnil;
02ba4723
KH
9580
9581 DEFVAR_LISP ("default-process-coding-system",
9582 &Vdefault_process_coding_system,
48b0f3ae
PJ
9583 doc: /* Cons of coding systems used for process I/O by default.
9584The car part is used for decoding a process output,
9585the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 9586 Vdefault_process_coding_system = Qnil;
c4825358 9587
3f003981 9588 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
9589 doc: /*
9590Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
9591This is a vector of length 256.
9592If Nth element is non-nil, the existence of code N in a file
9593\(or output of subprocess) doesn't prevent it to be detected as
9594a coding system of ISO 2022 variant which has a flag
9595`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9596or reading output of a subprocess.
9597Only 128th through 159th elements has a meaning. */);
3f003981 9598 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
9599
9600 DEFVAR_LISP ("select-safe-coding-system-function",
9601 &Vselect_safe_coding_system_function,
df7492f9
KH
9602 doc: /*
9603Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
9604
9605If set, this function is called to force a user to select a proper
9606coding system which can encode the text in the case that a default
9607coding system used in each operation can't encode the text.
9608
9609The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
9610 Vselect_safe_coding_system_function = Qnil;
9611
5d5bf4d8
KH
9612 DEFVAR_BOOL ("coding-system-require-warning",
9613 &coding_system_require_warning,
9614 doc: /* Internal use only.
6b89e3aa
KH
9615If non-nil, on writing a file, `select-safe-coding-system-function' is
9616called even if `coding-system-for-write' is non-nil. The command
9617`universal-coding-system-argument' binds this variable to t temporarily. */);
5d5bf4d8
KH
9618 coding_system_require_warning = 0;
9619
9620
22ab2303 9621 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 9622 &inhibit_iso_escape_detection,
df7492f9
KH
9623 doc: /*
9624If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
9625
9626By default, on reading a file, Emacs tries to detect how the text is
9627encoded. This code detection is sensitive to escape sequences. If
9628the sequence is valid as ISO2022, the code is determined as one of
9629the ISO2022 encodings, and the file is decoded by the corresponding
9630coding system (e.g. `iso-2022-7bit').
9631
9632However, there may be a case that you want to read escape sequences in
9633a file as is. In such a case, you can set this variable to non-nil.
9634Then, as the code detection ignores any escape sequences, no file is
9635detected as encoded in some ISO2022 encoding. The result is that all
9636escape sequences become visible in a buffer.
9637
9638The default value is nil, and it is strongly recommended not to change
9639it. That is because many Emacs Lisp source files that contain
9640non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9641in Emacs's distribution, and they won't be decoded correctly on
9642reading if you suppress escape sequence detection.
9643
9644The other way to read escape sequences in a file without decoding is
9645to explicitly specify some coding system that doesn't use ISO2022's
9646escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 9647 inhibit_iso_escape_detection = 0;
002fdb44
DL
9648
9649 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
15c8f9d1
DL
9650 doc: /* Char table for translating self-inserting characters.
9651This is applied to the result of input methods, not their input. See also
9652`keyboard-translate-table'. */);
002fdb44 9653 Vtranslation_table_for_input = Qnil;
8f924df7 9654
2c78b7e1
KH
9655 {
9656 Lisp_Object args[coding_arg_max];
8f924df7 9657 Lisp_Object plist[16];
2c78b7e1
KH
9658 int i;
9659
9660 for (i = 0; i < coding_arg_max; i++)
9661 args[i] = Qnil;
9662
9663 plist[0] = intern (":name");
9664 plist[1] = args[coding_arg_name] = Qno_conversion;
9665 plist[2] = intern (":mnemonic");
9666 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9667 plist[4] = intern (":coding-type");
9668 plist[5] = args[coding_arg_coding_type] = Qraw_text;
9669 plist[6] = intern (":ascii-compatible-p");
9670 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9671 plist[8] = intern (":default-char");
9672 plist[9] = args[coding_arg_default_char] = make_number (0);
8f924df7
KH
9673 plist[10] = intern (":for-unibyte");
9674 plist[11] = args[coding_arg_for_unibyte] = Qt;
9675 plist[12] = intern (":docstring");
9676 plist[13] = build_string ("Do no conversion.\n\
2c78b7e1
KH
9677\n\
9678When you visit a file with this coding, the file is read into a\n\
9679unibyte buffer as is, thus each byte of a file is treated as a\n\
9680character.");
8f924df7
KH
9681 plist[14] = intern (":eol-type");
9682 plist[15] = args[coding_arg_eol_type] = Qunix;
9683 args[coding_arg_plist] = Flist (16, plist);
2c78b7e1 9684 Fdefine_coding_system_internal (coding_arg_max, args);
ae6f73fa
KH
9685
9686 plist[1] = args[coding_arg_name] = Qundecided;
9687 plist[3] = args[coding_arg_mnemonic] = make_number ('-');
9688 plist[5] = args[coding_arg_coding_type] = Qundecided;
9689 /* This is already set.
35befdaa 9690 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
ae6f73fa
KH
9691 plist[8] = intern (":charset-list");
9692 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
9693 plist[11] = args[coding_arg_for_unibyte] = Qnil;
9694 plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9695 plist[15] = args[coding_arg_eol_type] = Qnil;
9696 args[coding_arg_plist] = Flist (16, plist);
9697 Fdefine_coding_system_internal (coding_arg_max, args);
2c78b7e1
KH
9698 }
9699
9700 setup_coding_system (Qno_conversion, &keyboard_coding);
ae6f73fa 9701 setup_coding_system (Qundecided, &terminal_coding);
2c78b7e1 9702 setup_coding_system (Qno_conversion, &safe_terminal_coding);
ff563fce
KH
9703
9704 {
9705 int i;
9706
9707 for (i = 0; i < coding_category_max; i++)
9708 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9709 }
4ed46869
KH
9710}
9711
68c45bf0
PE
9712char *
9713emacs_strerror (error_number)
9714 int error_number;
9715{
9716 char *str;
9717
ca9c0567 9718 synchronize_system_messages_locale ();
68c45bf0
PE
9719 str = strerror (error_number);
9720
9721 if (! NILP (Vlocale_coding_system))
9722 {
9723 Lisp_Object dec = code_convert_string_norecord (build_string (str),
9724 Vlocale_coding_system,
9725 0);
d5db4077 9726 str = (char *) SDATA (dec);
68c45bf0
PE
9727 }
9728
9729 return str;
9730}
9731
4ed46869 9732#endif /* emacs */
9ffd559c
KH
9733
9734/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9735 (do not change this comment) */