(costs_set): Declare static, non-initialized for pcc.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
70ad9fc4 4 Copyright (C) 2001 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9
KH
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
df7492f9 111 A coding system for a text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9
KH
305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306Lisp_Object Qunix, Qdos, Qmac;
4ed46869
KH
307Lisp_Object Qbuffer_file_coding_system;
308Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 309Lisp_Object Qdefault_char;
27901516 310Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
311Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 314Lisp_Object Qcoding_system_history;
1397dc18 315Lisp_Object Qvalid_codes;
4ed46869
KH
316
317extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319Lisp_Object Qstart_process, Qopen_network_stream;
320Lisp_Object Qtarget_idx;
321
d46c5b12
KH
322Lisp_Object Vselect_safe_coding_system_function;
323
7722baf9
EZ
324/* Mnemonic string for each format of end-of-line. */
325Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 327 decided. */
7722baf9 328Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
329
330#ifdef emacs
331
4608c386
KH
332Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 335
d46c5b12
KH
336/* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 339
4ed46869
KH
340/* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342/* Coding-system for reading files and receiving data from process. */
343Lisp_Object Vcoding_system_for_read;
344/* Coding-system for writing files and sending data to process. */
345Lisp_Object Vcoding_system_for_write;
346/* Coding-system actually used in the latest I/O. */
347Lisp_Object Vlast_coding_system_used;
348
c4825358 349/* A vector of length 256 which contains information about special
94487c4e 350 Latin codes (especially for dealing with Microsoft codes). */
3f003981 351Lisp_Object Vlatin_extra_code_table;
c4825358 352
9ce27fde
KH
353/* Flag to inhibit code conversion of end-of-line format. */
354int inhibit_eol_conversion;
355
74383408
KH
356/* Flag to inhibit ISO2022 escape sequence detection. */
357int inhibit_iso_escape_detection;
358
ed29121d
EZ
359/* Flag to make buffer-file-coding-system inherit from process-coding. */
360int inherit_process_coding_system;
361
c4825358 362/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
363struct coding_system terminal_coding;
364
c4825358
KH
365/* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367struct coding_system safe_terminal_coding;
368
369/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
370struct coding_system keyboard_coding;
371
02ba4723
KH
372Lisp_Object Vfile_coding_system_alist;
373Lisp_Object Vprocess_coding_system_alist;
374Lisp_Object Vnetwork_coding_system_alist;
4ed46869 375
68c45bf0
PE
376Lisp_Object Vlocale_coding_system;
377
4ed46869
KH
378#endif /* emacs */
379
f967223b
KH
380/* Flag to tell if we look up translation table on character code
381 conversion. */
84fbb8a0 382Lisp_Object Venable_character_translation;
f967223b
KH
383/* Standard translation table to look up on decoding (reading). */
384Lisp_Object Vstandard_translation_table_for_decode;
385/* Standard translation table to look up on encoding (writing). */
386Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 387
f967223b
KH
388Lisp_Object Qtranslation_table;
389Lisp_Object Qtranslation_table_id;
390Lisp_Object Qtranslation_table_for_decode;
391Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
392
393/* Alist of charsets vs revision number. */
df7492f9 394static Lisp_Object Vcharset_revision_table;
4ed46869 395
02ba4723
KH
396/* Default coding systems used for process I/O. */
397Lisp_Object Vdefault_process_coding_system;
398
b843d1ae
KH
399/* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403static int inhibit_pre_post_conversion;
404
df7492f9
KH
405/* Two special coding systems. */
406Lisp_Object Vsjis_coding_system;
407Lisp_Object Vbig5_coding_system;
408
409
410static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
411static void decode_coding_utf_8 P_ ((struct coding_system *));
412static int encode_coding_utf_8 P_ ((struct coding_system *));
413
414static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
415static void decode_coding_utf_16 P_ ((struct coding_system *));
416static int encode_coding_utf_16 P_ ((struct coding_system *));
417
418static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
419static void decode_coding_iso_2022 P_ ((struct coding_system *));
420static int encode_coding_iso_2022 P_ ((struct coding_system *));
421
422static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
423static void decode_coding_emacs_mule P_ ((struct coding_system *));
424static int encode_coding_emacs_mule P_ ((struct coding_system *));
425
426static int detect_coding_sjis P_ ((struct coding_system *, int *));
427static void decode_coding_sjis P_ ((struct coding_system *));
428static int encode_coding_sjis P_ ((struct coding_system *));
429
430static int detect_coding_big5 P_ ((struct coding_system *, int *));
431static void decode_coding_big5 P_ ((struct coding_system *));
432static int encode_coding_big5 P_ ((struct coding_system *));
433
434static int detect_coding_ccl P_ ((struct coding_system *, int *));
435static void decode_coding_ccl P_ ((struct coding_system *));
436static int encode_coding_ccl P_ ((struct coding_system *));
437
438static void decode_coding_raw_text P_ ((struct coding_system *));
439static int encode_coding_raw_text P_ ((struct coding_system *));
440
441
442/* ISO2022 section */
443
444#define CODING_ISO_INITIAL(coding, reg) \
445 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
446 coding_attr_iso_initial), \
447 reg)))
448
449
450#define CODING_ISO_REQUEST(coding, charset_id) \
451 ((charset_id <= (coding)->max_charset_id \
452 ? (coding)->safe_charsets[charset_id] \
453 : -1))
454
455
456#define CODING_ISO_FLAGS(coding) \
457 ((coding)->spec.iso_2022.flags)
458#define CODING_ISO_DESIGNATION(coding, reg) \
459 ((coding)->spec.iso_2022.current_designation[reg])
460#define CODING_ISO_INVOCATION(coding, plane) \
461 ((coding)->spec.iso_2022.current_invocation[plane])
462#define CODING_ISO_SINGLE_SHIFTING(coding) \
463 ((coding)->spec.iso_2022.single_shifting)
464#define CODING_ISO_BOL(coding) \
465 ((coding)->spec.iso_2022.bol)
466#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
467 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
468
469/* Control characters of ISO2022. */
470 /* code */ /* function */
471#define ISO_CODE_LF 0x0A /* line-feed */
472#define ISO_CODE_CR 0x0D /* carriage-return */
473#define ISO_CODE_SO 0x0E /* shift-out */
474#define ISO_CODE_SI 0x0F /* shift-in */
475#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
476#define ISO_CODE_ESC 0x1B /* escape */
477#define ISO_CODE_SS2 0x8E /* single-shift-2 */
478#define ISO_CODE_SS3 0x8F /* single-shift-3 */
479#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
480
481/* All code (1-byte) of ISO2022 is classified into one of the
482 followings. */
483enum iso_code_class_type
484 {
485 ISO_control_0, /* Control codes in the range
486 0x00..0x1F and 0x7F, except for the
487 following 5 codes. */
488 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
489 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
490 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
491 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
492 ISO_escape, /* ISO_CODE_SO (0x1B) */
493 ISO_control_1, /* Control codes in the range
494 0x80..0x9F, except for the
495 following 3 codes. */
496 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
497 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
498 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
499 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
500 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
501 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
502 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
503 };
05e6f5dc 504
df7492f9
KH
505/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
506 `iso-flags' attribute of an iso2022 coding system. */
93dec019 507
df7492f9
KH
508/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
509 instead of the correct short-form sequence (e.g. ESC $ A). */
510#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 511
df7492f9
KH
512/* If set, reset graphic planes and registers at end-of-line to the
513 initial state. */
514#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 515
df7492f9
KH
516/* If set, reset graphic planes and registers before any control
517 characters to the initial state. */
518#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 519
df7492f9
KH
520/* If set, encode by 7-bit environment. */
521#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 522
df7492f9
KH
523/* If set, use locking-shift function. */
524#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 525
df7492f9
KH
526/* If set, use single-shift function. Overwrite
527 CODING_ISO_FLAG_LOCKING_SHIFT. */
528#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 529
df7492f9
KH
530/* If set, use designation escape sequence. */
531#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 532
df7492f9
KH
533/* If set, produce revision number sequence. */
534#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 535
df7492f9
KH
536/* If set, produce ISO6429's direction specifying sequence. */
537#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 538
df7492f9
KH
539/* If set, assume designation states are reset at beginning of line on
540 output. */
541#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 542
df7492f9
KH
543/* If set, designation sequence should be placed at beginning of line
544 on output. */
545#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 546
df7492f9
KH
547/* If set, do not encode unsafe charactes on output. */
548#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 549
df7492f9
KH
550/* If set, extra latin codes (128..159) are accepted as a valid code
551 on input. */
552#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 553
df7492f9 554#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 555
df7492f9 556#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 557
df7492f9 558#define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
aa72b389 559
df7492f9
KH
560/* A character to be produced on output if encoding of the original
561 character is prohibited by CODING_ISO_FLAG_SAFE. */
562#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 563
aa72b389 564
df7492f9
KH
565/* UTF-16 section */
566#define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
4ed46869 568
df7492f9
KH
569#define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
4ed46869 571
df7492f9
KH
572#define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
4ed46869 574
4ed46869 575
df7492f9
KH
576/* CCL section */
577#define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579#define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581#define CODING_CCL_VALIDS(coding) \
582 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
583 ->data)
4ed46869 584
df7492f9 585/* Index for each coding category in `coding_category_table' */
4ed46869 586
df7492f9
KH
587enum coding_category
588 {
589 coding_category_iso_7,
590 coding_category_iso_7_tight,
591 coding_category_iso_8_1,
592 coding_category_iso_8_2,
593 coding_category_iso_7_else,
594 coding_category_iso_8_else,
595 coding_category_utf_8,
596 coding_category_utf_16_auto,
597 coding_category_utf_16_be,
598 coding_category_utf_16_le,
599 coding_category_utf_16_be_nosig,
600 coding_category_utf_16_le_nosig,
601 coding_category_charset,
602 coding_category_sjis,
603 coding_category_big5,
604 coding_category_ccl,
605 coding_category_emacs_mule,
606 /* All above are targets of code detection. */
607 coding_category_raw_text,
608 coding_category_undecided,
609 coding_category_max
610 };
611
612/* Definitions of flag bits used in detect_coding_XXXX. */
613#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
614#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
615#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
616#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
617#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
618#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
619#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
620#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
621#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
622#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
623#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
624#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
625#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
626#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
627#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
628#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
629
630/* This value is returned if detect_coding_mask () find nothing other
631 than ASCII characters. */
632#define CATEGORY_MASK_ANY \
633 (CATEGORY_MASK_ISO_7 \
634 | CATEGORY_MASK_ISO_7_TIGHT \
635 | CATEGORY_MASK_ISO_8_1 \
636 | CATEGORY_MASK_ISO_8_2 \
637 | CATEGORY_MASK_ISO_7_ELSE \
638 | CATEGORY_MASK_ISO_8_ELSE \
639 | CATEGORY_MASK_UTF_8 \
640 | CATEGORY_MASK_UTF_16_BE \
641 | CATEGORY_MASK_UTF_16_LE \
642 | CATEGORY_MASK_UTF_16_BE_NOSIG \
643 | CATEGORY_MASK_UTF_16_LE_NOSIG \
644 | CATEGORY_MASK_CHARSET \
645 | CATEGORY_MASK_SJIS \
646 | CATEGORY_MASK_BIG5 \
647 | CATEGORY_MASK_CCL \
648 | CATEGORY_MASK_EMACS_MULE)
649
650
651#define CATEGORY_MASK_ISO_7BIT \
652 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
653
654#define CATEGORY_MASK_ISO_8BIT \
655 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
656
657#define CATEGORY_MASK_ISO_ELSE \
658 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
659
660#define CATEGORY_MASK_ISO_ESCAPE \
661 (CATEGORY_MASK_ISO_7 \
662 | CATEGORY_MASK_ISO_7_TIGHT \
663 | CATEGORY_MASK_ISO_7_ELSE \
664 | CATEGORY_MASK_ISO_8_ELSE)
665
666#define CATEGORY_MASK_ISO \
667 ( CATEGORY_MASK_ISO_7BIT \
668 | CATEGORY_MASK_ISO_8BIT \
669 | CATEGORY_MASK_ISO_ELSE)
670
671#define CATEGORY_MASK_UTF_16 \
672 (CATEGORY_MASK_UTF_16_BE \
673 | CATEGORY_MASK_UTF_16_LE \
674 | CATEGORY_MASK_UTF_16_BE_NOSIG \
675 | CATEGORY_MASK_UTF_16_LE_NOSIG)
676
677
678/* List of symbols `coding-category-xxx' ordered by priority. This
679 variable is exposed to Emacs Lisp. */
680static Lisp_Object Vcoding_category_list;
681
682/* Table of coding categories (Lisp symbols). This variable is for
683 internal use oly. */
684static Lisp_Object Vcoding_category_table;
685
686/* Table of coding-categories ordered by priority. */
687static enum coding_category coding_priorities[coding_category_max];
688
689/* Nth element is a coding context for the coding system bound to the
690 Nth coding category. */
691static struct coding_system coding_categories[coding_category_max];
692
693static int detected_mask[coding_category_raw_text] =
694 { CATEGORY_MASK_ISO,
695 CATEGORY_MASK_ISO,
696 CATEGORY_MASK_ISO,
697 CATEGORY_MASK_ISO,
698 CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_UTF_8,
701 CATEGORY_MASK_UTF_16,
702 CATEGORY_MASK_UTF_16,
703 CATEGORY_MASK_UTF_16,
704 CATEGORY_MASK_UTF_16,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_CHARSET,
707 CATEGORY_MASK_SJIS,
708 CATEGORY_MASK_BIG5,
709 CATEGORY_MASK_CCL,
710 CATEGORY_MASK_EMACS_MULE
711 };
712
713/*** Commonly used macros and functions ***/
714
715#ifndef min
716#define min(a, b) ((a) < (b) ? (a) : (b))
717#endif
718#ifndef max
719#define max(a, b) ((a) > (b) ? (a) : (b))
720#endif
4ed46869 721
df7492f9
KH
722#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
723 do { \
724 attrs = CODING_ID_ATTRS (coding->id); \
725 eol_type = CODING_ID_EOL_TYPE (coding->id); \
726 if (VECTORP (eol_type)) \
727 eol_type = Qunix; \
728 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
729 } while (0)
4ed46869 730
4ed46869 731
df7492f9
KH
732/* Safely get one byte from the source text pointed by SRC which ends
733 at SRC_END, and set C to that byte. If there are not enough bytes
734 in the source, it jumps to `no_more_source'. The caller
735 should declare and set these variables appropriately in advance:
736 src, src_end, multibytep
737*/
aa72b389 738
df7492f9 739#define ONE_MORE_BYTE(c) \
aa72b389 740 do { \
df7492f9
KH
741 if (src == src_end) \
742 { \
743 if (src_base < src) \
744 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
745 goto no_more_source; \
746 } \
747 c = *src++; \
748 if (multibytep && (c & 0x80)) \
749 { \
750 if ((c & 0xFE) != 0xC0) \
751 error ("Undecodable char found"); \
752 c = ((c & 1) << 6) | *src++; \
753 } \
754 consumed_chars++; \
aa72b389
KH
755 } while (0)
756
aa72b389 757
df7492f9
KH
758#define ONE_MORE_BYTE_NO_CHECK(c) \
759 do { \
760 c = *src++; \
761 if (multibytep && (c & 0x80)) \
762 { \
763 if ((c & 0xFE) != 0xC0) \
764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \
766 } \
781d7a48 767 consumed_chars++; \
aa72b389
KH
768 } while (0)
769
aa72b389 770
df7492f9
KH
771/* Store a byte C in the place pointed by DST and increment DST to the
772 next free point, and increment PRODUCED_CHARS. The caller should
773 assure that C is 0..127, and declare and set the variable `dst'
774 appropriately in advance.
775*/
aa72b389
KH
776
777
df7492f9
KH
778#define EMIT_ONE_ASCII_BYTE(c) \
779 do { \
780 produced_chars++; \
781 *dst++ = (c); \
782 } while (0)
aa72b389 783
aa72b389 784
df7492f9 785/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 786
df7492f9
KH
787#define EMIT_TWO_ASCII_BYTES(c1, c2) \
788 do { \
789 produced_chars += 2; \
790 *dst++ = (c1), *dst++ = (c2); \
791 } while (0)
aa72b389 792
df7492f9
KH
793
794/* Store a byte C in the place pointed by DST and increment DST to the
795 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
796 nonzero, store in an appropriate multibyte from. The caller should
797 declare and set the variables `dst' and `multibytep' appropriately
798 in advance. */
799
800#define EMIT_ONE_BYTE(c) \
801 do { \
802 produced_chars++; \
803 if (multibytep) \
804 { \
805 int ch = (c); \
806 if (ch >= 0x80) \
807 ch = BYTE8_TO_CHAR (ch); \
808 CHAR_STRING_ADVANCE (ch, dst); \
809 } \
810 else \
811 *dst++ = (c); \
aa72b389
KH
812 } while (0)
813
814
df7492f9 815/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 816
e19c3639
KH
817#define EMIT_TWO_BYTES(c1, c2) \
818 do { \
819 produced_chars += 2; \
820 if (multibytep) \
821 { \
822 int ch; \
823 \
824 ch = (c1); \
825 if (ch >= 0x80) \
826 ch = BYTE8_TO_CHAR (ch); \
827 CHAR_STRING_ADVANCE (ch, dst); \
828 ch = (c2); \
829 if (ch >= 0x80) \
830 ch = BYTE8_TO_CHAR (ch); \
831 CHAR_STRING_ADVANCE (ch, dst); \
832 } \
833 else \
834 { \
835 *dst++ = (c1); \
836 *dst++ = (c2); \
837 } \
aa72b389
KH
838 } while (0)
839
840
df7492f9
KH
841#define EMIT_THREE_BYTES(c1, c2, c3) \
842 do { \
843 EMIT_ONE_BYTE (c1); \
844 EMIT_TWO_BYTES (c2, c3); \
845 } while (0)
aa72b389 846
aa72b389 847
df7492f9
KH
848#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
849 do { \
850 EMIT_TWO_BYTES (c1, c2); \
851 EMIT_TWO_BYTES (c3, c4); \
852 } while (0)
aa72b389 853
aa72b389 854
df7492f9
KH
855#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
856 do { \
857 charset_map_loaded = 0; \
858 c = DECODE_CHAR (charset, code); \
859 if (charset_map_loaded) \
860 { \
861 unsigned char *orig = coding->source; \
862 EMACS_INT offset; \
863 \
864 coding_set_source (coding); \
865 offset = coding->source - orig; \
866 src += offset; \
867 src_base += offset; \
868 src_end += offset; \
869 } \
870 } while (0)
aa72b389 871
aa72b389 872
df7492f9
KH
873#define ASSURE_DESTINATION(bytes) \
874 do { \
875 if (dst + (bytes) >= dst_end) \
876 { \
877 int more_bytes = charbuf_end - charbuf + (bytes); \
878 \
879 dst = alloc_destination (coding, more_bytes, dst); \
880 dst_end = coding->destination + coding->dst_bytes; \
881 } \
882 } while (0)
b1887814 883
df7492f9
KH
884
885
886static void
887coding_set_source (coding)
888 struct coding_system *coding;
889{
890 if (BUFFERP (coding->src_object))
891 {
892 if (coding->src_pos < 0)
893 coding->source = GAP_END_ADDR + coding->src_pos_byte;
894 else
895 {
e19c3639 896 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
897 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
898 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
899
900 coding->source = beg_addr + coding->src_pos_byte - 1;
901 if (coding->src_pos_byte >= gpt_byte)
902 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
903 }
904 }
df7492f9 905 else if (STRINGP (coding->src_object))
aa72b389 906 {
df7492f9
KH
907 coding->source = (XSTRING (coding->src_object)->data
908 + coding->src_pos_byte);
909 }
910 else
911 /* Otherwise, the source is C string and is never relocated
912 automatically. Thus we don't have to update anything. */
913 ;
914}
915
916static void
917coding_set_destination (coding)
918 struct coding_system *coding;
919{
920 if (BUFFERP (coding->dst_object))
921 {
922 /* We are sure that coding->dst_pos_byte is before the gap of the
923 buffer. */
924 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
925 + coding->dst_pos_byte - 1);
926 if (coding->src_pos < 0)
df7492f9
KH
927 coding->dst_bytes = (GAP_END_ADDR
928 - (coding->src_bytes - coding->consumed)
929 - coding->destination);
930 else
931 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
932 - coding->destination);
933 }
934 else
935 /* Otherwise, the destination is C string and is never relocated
936 automatically. Thus we don't have to update anything. */
937 ;
938}
939
940
941static void
942coding_alloc_by_realloc (coding, bytes)
943 struct coding_system *coding;
944 EMACS_INT bytes;
945{
946 coding->destination = (unsigned char *) xrealloc (coding->destination,
947 coding->dst_bytes + bytes);
948 coding->dst_bytes += bytes;
949}
950
951static void
952coding_alloc_by_making_gap (coding, bytes)
953 struct coding_system *coding;
954 EMACS_INT bytes;
955{
2c78b7e1
KH
956 if (BUFFERP (coding->dst_object)
957 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
958 {
959 EMACS_INT add = coding->src_bytes - coding->consumed;
960
961 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
962 make_gap (bytes);
963 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
964 }
965 else
966 {
2c78b7e1
KH
967 Lisp_Object this_buffer;
968
969 this_buffer = Fcurrent_buffer ();
df7492f9
KH
970 set_buffer_internal (XBUFFER (coding->dst_object));
971 make_gap (bytes);
972 set_buffer_internal (XBUFFER (this_buffer));
973 }
974}
975
976
977static unsigned char *
978alloc_destination (coding, nbytes, dst)
979 struct coding_system *coding;
980 int nbytes;
981 unsigned char *dst;
982{
983 EMACS_INT offset = dst - coding->destination;
984
985 if (BUFFERP (coding->dst_object))
986 coding_alloc_by_making_gap (coding, nbytes);
987 else
988 coding_alloc_by_realloc (coding, nbytes);
989 coding->result = CODING_RESULT_SUCCESS;
990 coding_set_destination (coding);
991 dst = coding->destination + offset;
992 return dst;
993}
aa72b389 994
df7492f9
KH
995\f
996/*** 2. Emacs' internal format (emacs-utf-8) ***/
997
998
999
1000\f
1001/*** 3. UTF-8 ***/
1002
1003/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1004 Check if a text is encoded in UTF-8. If it is, return
1005 CATEGORY_MASK_UTF_8, else return 0. */
1006
1007#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1008#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1009#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1010#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1011#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1012#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1013
1014static int
1015detect_coding_utf_8 (coding, mask)
1016 struct coding_system *coding;
1017 int *mask;
1018{
1019 unsigned char *src = coding->source, *src_base = src;
1020 unsigned char *src_end = coding->source + coding->src_bytes;
1021 int multibytep = coding->src_multibyte;
1022 int consumed_chars = 0;
1023 int found = 0;
1024
1025 /* A coding system of this category is always ASCII compatible. */
1026 src += coding->head_ascii;
1027
1028 while (1)
1029 {
1030 int c, c1, c2, c3, c4;
1031
1032 ONE_MORE_BYTE (c);
1033 if (UTF_8_1_OCTET_P (c))
1034 continue;
1035 ONE_MORE_BYTE (c1);
1036 if (! UTF_8_EXTRA_OCTET_P (c1))
1037 break;
1038 if (UTF_8_2_OCTET_LEADING_P (c))
1039 {
1040 found++;
1041 continue;
1042 }
1043 ONE_MORE_BYTE (c2);
1044 if (! UTF_8_EXTRA_OCTET_P (c2))
1045 break;
1046 if (UTF_8_3_OCTET_LEADING_P (c))
1047 {
1048 found++;
1049 continue;
1050 }
1051 ONE_MORE_BYTE (c3);
1052 if (! UTF_8_EXTRA_OCTET_P (c3))
1053 break;
1054 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1055 {
df7492f9
KH
1056 found++;
1057 continue;
1058 }
1059 ONE_MORE_BYTE (c4);
1060 if (! UTF_8_EXTRA_OCTET_P (c4))
1061 break;
1062 if (UTF_8_5_OCTET_LEADING_P (c))
1063 {
1064 found++;
1065 continue;
1066 }
1067 break;
1068 }
1069 *mask &= ~CATEGORY_MASK_UTF_8;
1070 return 0;
1071
1072 no_more_source:
1073 if (! found)
1074 return 0;
1075 *mask &= CATEGORY_MASK_UTF_8;
1076 return 1;
1077}
1078
1079
1080static void
1081decode_coding_utf_8 (coding)
1082 struct coding_system *coding;
1083{
1084 unsigned char *src = coding->source + coding->consumed;
1085 unsigned char *src_end = coding->source + coding->src_bytes;
1086 unsigned char *src_base;
1087 int *charbuf = coding->charbuf;
1088 int *charbuf_end = charbuf + coding->charbuf_size;
1089 int consumed_chars = 0, consumed_chars_base;
1090 int multibytep = coding->src_multibyte;
1091 Lisp_Object attr, eol_type, charset_list;
1092
1093 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1094
1095 while (1)
1096 {
1097 int c, c1, c2, c3, c4, c5;
1098
1099 src_base = src;
1100 consumed_chars_base = consumed_chars;
1101
1102 if (charbuf >= charbuf_end)
1103 break;
1104
1105 ONE_MORE_BYTE (c1);
1106 if (UTF_8_1_OCTET_P(c1))
1107 {
1108 c = c1;
1109 if (c == '\r')
aa72b389 1110 {
df7492f9
KH
1111 if (EQ (eol_type, Qdos))
1112 {
1113 if (src == src_end)
1114 goto no_more_source;
1115 if (*src == '\n')
1116 ONE_MORE_BYTE (c);
1117 }
1118 else if (EQ (eol_type, Qmac))
1119 c = '\n';
aa72b389 1120 }
aa72b389 1121 }
df7492f9 1122 else
aa72b389 1123 {
df7492f9
KH
1124 ONE_MORE_BYTE (c2);
1125 if (! UTF_8_EXTRA_OCTET_P (c2))
1126 goto invalid_code;
1127 if (UTF_8_2_OCTET_LEADING_P (c1))
1128 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1129 else
aa72b389 1130 {
df7492f9
KH
1131 ONE_MORE_BYTE (c3);
1132 if (! UTF_8_EXTRA_OCTET_P (c3))
1133 goto invalid_code;
1134 if (UTF_8_3_OCTET_LEADING_P (c1))
1135 c = (((c1 & 0xF) << 12)
1136 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1137 else
1138 {
1139 ONE_MORE_BYTE (c4);
1140 if (! UTF_8_EXTRA_OCTET_P (c4))
1141 goto invalid_code;
1142 if (UTF_8_4_OCTET_LEADING_P (c1))
1143 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1144 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1145 else
1146 {
1147 ONE_MORE_BYTE (c5);
1148 if (! UTF_8_EXTRA_OCTET_P (c5))
1149 goto invalid_code;
1150 if (UTF_8_5_OCTET_LEADING_P (c1))
1151 {
1152 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1153 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1154 | (c5 & 0x3F));
1155 if (c > MAX_CHAR)
1156 goto invalid_code;
1157 }
1158 else
1159 goto invalid_code;
1160 }
1161 }
aa72b389 1162 }
aa72b389 1163 }
df7492f9
KH
1164
1165 *charbuf++ = c;
1166 continue;
1167
1168 invalid_code:
1169 src = src_base;
1170 consumed_chars = consumed_chars_base;
1171 ONE_MORE_BYTE (c);
1172 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1173 coding->errors++;
aa72b389
KH
1174 }
1175
df7492f9
KH
1176 no_more_source:
1177 coding->consumed_char += consumed_chars_base;
1178 coding->consumed = src_base - coding->source;
1179 coding->charbuf_used = charbuf - coding->charbuf;
1180}
1181
1182
1183static int
1184encode_coding_utf_8 (coding)
1185 struct coding_system *coding;
1186{
1187 int multibytep = coding->dst_multibyte;
1188 int *charbuf = coding->charbuf;
1189 int *charbuf_end = charbuf + coding->charbuf_used;
1190 unsigned char *dst = coding->destination + coding->produced;
1191 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1192 int produced_chars = 0;
df7492f9
KH
1193 int c;
1194
1195 if (multibytep)
aa72b389 1196 {
df7492f9
KH
1197 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1198
1199 while (charbuf < charbuf_end)
aa72b389 1200 {
df7492f9
KH
1201 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1202
1203 ASSURE_DESTINATION (safe_room);
1204 c = *charbuf++;
1205 CHAR_STRING_ADVANCE (c, pend);
1206 for (p = str; p < pend; p++)
1207 EMIT_ONE_BYTE (*p);
aa72b389 1208 }
aa72b389 1209 }
df7492f9
KH
1210 else
1211 {
1212 int safe_room = MAX_MULTIBYTE_LENGTH;
1213
1214 while (charbuf < charbuf_end)
1215 {
1216 ASSURE_DESTINATION (safe_room);
1217 c = *charbuf++;
1218 dst += CHAR_STRING (c, dst);
1219 produced_chars++;
1220 }
1221 }
1222 coding->result = CODING_RESULT_SUCCESS;
1223 coding->produced_char += produced_chars;
1224 coding->produced = dst - coding->destination;
1225 return 0;
aa72b389
KH
1226}
1227
4ed46869 1228
df7492f9
KH
1229/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1230 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1231 Little Endian (otherwise). If it is, return
1232 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1233 else return 0. */
1234
1235#define UTF_16_HIGH_SURROGATE_P(val) \
1236 (((val) & 0xFC00) == 0xD800)
1237
1238#define UTF_16_LOW_SURROGATE_P(val) \
1239 (((val) & 0xFC00) == 0xDC00)
1240
1241#define UTF_16_INVALID_P(val) \
1242 (((val) == 0xFFFE) \
1243 || ((val) == 0xFFFF) \
1244 || UTF_16_LOW_SURROGATE_P (val))
1245
1246
1247static int
1248detect_coding_utf_16 (coding, mask)
b73bfc1c 1249 struct coding_system *coding;
df7492f9 1250 int *mask;
b73bfc1c 1251{
df7492f9
KH
1252 unsigned char *src = coding->source, *src_base = src;
1253 unsigned char *src_end = coding->source + coding->src_bytes;
1254 int multibytep = coding->src_multibyte;
1255 int consumed_chars = 0;
1256 int c1, c2;
1257
1258 ONE_MORE_BYTE (c1);
1259 ONE_MORE_BYTE (c2);
4ed46869 1260
df7492f9 1261 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1262 {
df7492f9
KH
1263 *mask &= CATEGORY_MASK_UTF_16_LE;
1264 return 1;
1265 }
1266 else if ((c1 == 0xFE) && (c2 == 0xFF))
1267 {
1268 *mask &= CATEGORY_MASK_UTF_16_BE;
1269 return 1;
1270 }
1271 no_more_source:
1272 return 0;
1273}
ec6d2bb8 1274
df7492f9
KH
1275static void
1276decode_coding_utf_16 (coding)
1277 struct coding_system *coding;
1278{
1279 unsigned char *src = coding->source + coding->consumed;
1280 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1281 unsigned char *src_base;
df7492f9
KH
1282 int *charbuf = coding->charbuf;
1283 int *charbuf_end = charbuf + coding->charbuf_size;
1284 int consumed_chars = 0, consumed_chars_base;
1285 int multibytep = coding->src_multibyte;
1286 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1287 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1288 int surrogate = CODING_UTF_16_SURROGATE (coding);
1289 Lisp_Object attr, eol_type, charset_list;
1290
1291 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1292
1293 if (bom != utf_16_without_bom)
1294 {
1295 int c, c1, c2;
4af310db 1296
df7492f9
KH
1297 src_base = src;
1298 ONE_MORE_BYTE (c1);
1299 ONE_MORE_BYTE (c2);
e19c3639 1300 c = (c1 << 8) | c2;
df7492f9
KH
1301 if (bom == utf_16_with_bom)
1302 {
1303 if (endian == utf_16_big_endian
1304 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1305 {
df7492f9
KH
1306 /* We are sure that there's enouph room at CHARBUF. */
1307 *charbuf++ = c1;
1308 *charbuf++ = c2;
1309 coding->errors++;
4af310db 1310 }
4af310db 1311 }
df7492f9 1312 else
4af310db 1313 {
df7492f9
KH
1314 if (c == 0xFFFE)
1315 CODING_UTF_16_ENDIAN (coding)
1316 = endian = utf_16_big_endian;
1317 else if (c == 0xFEFF)
1318 CODING_UTF_16_ENDIAN (coding)
1319 = endian = utf_16_little_endian;
1320 else
4af310db 1321 {
df7492f9
KH
1322 CODING_UTF_16_ENDIAN (coding)
1323 = endian = utf_16_big_endian;
1324 src = src_base;
4af310db 1325 }
4af310db 1326 }
df7492f9
KH
1327 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1328 }
1329
1330 while (1)
1331 {
1332 int c, c1, c2;
1333
1334 src_base = src;
1335 consumed_chars_base = consumed_chars;
1336
1337 if (charbuf + 2 >= charbuf_end)
1338 break;
1339
1340 ONE_MORE_BYTE (c1);
1341 ONE_MORE_BYTE (c2);
1342 c = (endian == utf_16_big_endian
e19c3639 1343 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1344 if (surrogate)
aa72b389 1345 {
df7492f9 1346 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1347 {
df7492f9
KH
1348 if (endian == utf_16_big_endian)
1349 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1350 else
1351 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1352 *charbuf++ = c1;
1353 *charbuf++ = c2;
1354 coding->errors++;
1355 if (UTF_16_HIGH_SURROGATE_P (c))
1356 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1357 else
1358 *charbuf++ = c;
aa72b389 1359 }
df7492f9
KH
1360 else
1361 {
1362 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1363 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1364 *charbuf++ = c;
1365 }
1366 }
1367 else
1368 {
1369 if (UTF_16_HIGH_SURROGATE_P (c))
1370 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1371 else
1372 *charbuf++ = c;
1373 }
1374 }
1375
1376 no_more_source:
1377 coding->consumed_char += consumed_chars_base;
1378 coding->consumed = src_base - coding->source;
1379 coding->charbuf_used = charbuf - coding->charbuf;
1380}
1381
1382static int
1383encode_coding_utf_16 (coding)
1384 struct coding_system *coding;
1385{
1386 int multibytep = coding->dst_multibyte;
1387 int *charbuf = coding->charbuf;
1388 int *charbuf_end = charbuf + coding->charbuf_used;
1389 unsigned char *dst = coding->destination + coding->produced;
1390 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1391 int safe_room = 8;
1392 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1393 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1394 int produced_chars = 0;
1395 Lisp_Object attrs, eol_type, charset_list;
1396 int c;
1397
1398 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1399
1400 if (bom == utf_16_with_bom)
1401 {
1402 ASSURE_DESTINATION (safe_room);
1403 if (big_endian)
1404 EMIT_TWO_BYTES (0xFF, 0xFE);
1405 else
1406 EMIT_TWO_BYTES (0xFE, 0xFF);
1407 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1408 }
1409
1410 while (charbuf < charbuf_end)
1411 {
1412 ASSURE_DESTINATION (safe_room);
1413 c = *charbuf++;
e19c3639
KH
1414 if (c >= MAX_UNICODE_CHAR)
1415 c = coding->default_char;
df7492f9
KH
1416
1417 if (c < 0x10000)
1418 {
1419 if (big_endian)
1420 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1421 else
1422 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1423 }
1424 else
1425 {
1426 int c1, c2;
1427
1428 c -= 0x10000;
1429 c1 = (c >> 10) + 0xD800;
1430 c2 = (c & 0x3FF) + 0xDC00;
1431 if (big_endian)
1432 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1433 else
1434 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1435 }
1436 }
1437 coding->result = CODING_RESULT_SUCCESS;
1438 coding->produced = dst - coding->destination;
1439 coding->produced_char += produced_chars;
1440 return 0;
1441}
1442
1443\f
1444/*** 6. Old Emacs' internal format (emacs-mule) ***/
1445
1446/* Emacs' internal format for representation of multiple character
1447 sets is a kind of multi-byte encoding, i.e. characters are
1448 represented by variable-length sequences of one-byte codes.
1449
1450 ASCII characters and control characters (e.g. `tab', `newline') are
1451 represented by one-byte sequences which are their ASCII codes, in
1452 the range 0x00 through 0x7F.
1453
1454 8-bit characters of the range 0x80..0x9F are represented by
1455 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1456 code + 0x20).
1457
1458 8-bit characters of the range 0xA0..0xFF are represented by
1459 one-byte sequences which are their 8-bit code.
1460
1461 The other characters are represented by a sequence of `base
1462 leading-code', optional `extended leading-code', and one or two
1463 `position-code's. The length of the sequence is determined by the
1464 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1465 whereas extended leading-code and position-code take the range 0xA0
1466 through 0xFF. See `charset.h' for more details about leading-code
1467 and position-code.
1468
1469 --- CODE RANGE of Emacs' internal format ---
1470 character set range
1471 ------------- -----
1472 ascii 0x00..0x7F
1473 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1474 eight-bit-graphic 0xA0..0xBF
1475 ELSE 0x81..0x9D + [0xA0..0xFF]+
1476 ---------------------------------------------
1477
1478 As this is the internal character representation, the format is
1479 usually not used externally (i.e. in a file or in a data sent to a
1480 process). But, it is possible to have a text externally in this
1481 format (i.e. by encoding by the coding system `emacs-mule').
1482
1483 In that case, a sequence of one-byte codes has a slightly different
1484 form.
1485
1486 At first, all characters in eight-bit-control are represented by
1487 one-byte sequences which are their 8-bit code.
1488
1489 Next, character composition data are represented by the byte
1490 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1491 where,
1492 METHOD is 0xF0 plus one of composition method (enum
1493 composition_method),
1494
1495 BYTES is 0xA0 plus a byte length of this composition data,
1496
1497 CHARS is 0x20 plus a number of characters composed by this
1498 data,
1499
1500 COMPONENTs are characters of multibye form or composition
1501 rules encoded by two-byte of ASCII codes.
1502
1503 In addition, for backward compatibility, the following formats are
1504 also recognized as composition data on decoding.
1505
1506 0x80 MSEQ ...
1507 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1508
1509 Here,
1510 MSEQ is a multibyte form but in these special format:
1511 ASCII: 0xA0 ASCII_CODE+0x80,
1512 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1513 RULE is a one byte code of the range 0xA0..0xF0 that
1514 represents a composition rule.
1515 */
1516
1517char emacs_mule_bytes[256];
1518
1519/* Leading-code followed by extended leading-code. */
1520#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1521#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1522#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1523#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1524
1525
1526int
781d7a48 1527emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1528 struct coding_system *coding;
781d7a48 1529 unsigned char *src;
df7492f9
KH
1530 int *nbytes, *nchars;
1531{
df7492f9
KH
1532 unsigned char *src_end = coding->source + coding->src_bytes;
1533 int multibytep = coding->src_multibyte;
1534 unsigned char *src_base = src;
1535 struct charset *charset;
1536 unsigned code;
1537 int c;
1538 int consumed_chars = 0;
1539
1540 ONE_MORE_BYTE (c);
df7492f9
KH
1541 switch (emacs_mule_bytes[c])
1542 {
1543 case 2:
1544 if (! (charset = emacs_mule_charset[c]))
1545 goto invalid_code;
1546 ONE_MORE_BYTE (c);
1547 code = c & 0x7F;
1548 break;
1549
1550 case 3:
1551 if (c == LEADING_CODE_PRIVATE_11
1552 || c == LEADING_CODE_PRIVATE_12)
b73bfc1c 1553 {
df7492f9
KH
1554 ONE_MORE_BYTE (c);
1555 if (! (charset = emacs_mule_charset[c]))
1556 goto invalid_code;
1557 ONE_MORE_BYTE (c);
1558 code = c & 0x7F;
b73bfc1c
KH
1559 }
1560 else
1561 {
df7492f9
KH
1562 if (! (charset = emacs_mule_charset[c]))
1563 goto invalid_code;
1564 ONE_MORE_BYTE (c);
781d7a48 1565 code = (c & 0x7F) << 8;
df7492f9
KH
1566 ONE_MORE_BYTE (c);
1567 code |= c & 0x7F;
1568 }
1569 break;
1570
1571 case 4:
781d7a48 1572 ONE_MORE_BYTE (c);
df7492f9
KH
1573 if (! (charset = emacs_mule_charset[c]))
1574 goto invalid_code;
1575 ONE_MORE_BYTE (c);
781d7a48 1576 code = (c & 0x7F) << 8;
df7492f9
KH
1577 ONE_MORE_BYTE (c);
1578 code |= c & 0x7F;
1579 break;
1580
1581 case 1:
1582 code = c;
1583 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1584 : code < 0xA0 ? charset_8_bit_control
1585 : charset_8_bit_graphic);
1586 break;
1587
1588 default:
1589 abort ();
1590 }
1591 c = DECODE_CHAR (charset, code);
1592 if (c < 0)
1593 goto invalid_code;
1594 *nbytes = src - src_base;
1595 *nchars = consumed_chars;
1596 return c;
1597
1598 no_more_source:
1599 return -2;
1600
1601 invalid_code:
1602 return -1;
1603}
1604
1605
1606/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1607 Check if a text is encoded in `emacs-mule'. */
1608
1609static int
1610detect_coding_emacs_mule (coding, mask)
1611 struct coding_system *coding;
1612 int *mask;
1613{
1614 unsigned char *src = coding->source, *src_base = src;
1615 unsigned char *src_end = coding->source + coding->src_bytes;
1616 int multibytep = coding->src_multibyte;
1617 int consumed_chars = 0;
1618 int c;
1619 int found = 0;
1620
1621 /* A coding system of this category is always ASCII compatible. */
1622 src += coding->head_ascii;
1623
1624 while (1)
1625 {
1626 ONE_MORE_BYTE (c);
1627
1628 if (c == 0x80)
1629 {
1630 /* Perhaps the start of composite character. We simple skip
1631 it because analyzing it is too heavy for detecting. But,
1632 at least, we check that the composite character
1633 constitues of more than 4 bytes. */
1634 unsigned char *src_base;
1635
1636 repeat:
1637 src_base = src;
1638 do
1639 {
1640 ONE_MORE_BYTE (c);
1641 }
1642 while (c >= 0xA0);
1643
1644 if (src - src_base <= 4)
1645 break;
1646 found = 1;
1647 if (c == 0x80)
1648 goto repeat;
b73bfc1c 1649 }
df7492f9
KH
1650
1651 if (c < 0x80)
b73bfc1c 1652 {
df7492f9
KH
1653 if (c < 0x20
1654 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1655 break;
1656 }
1657 else
1658 {
1659 unsigned char *src_base = src - 1;
1660
1661 do
1662 {
1663 ONE_MORE_BYTE (c);
1664 }
1665 while (c >= 0xA0);
1666 if (src - src_base != emacs_mule_bytes[*src_base])
1667 break;
1668 found = 1;
4ed46869
KH
1669 }
1670 }
df7492f9
KH
1671 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1672 return 0;
1673
1674 no_more_source:
1675 if (!found)
1676 return 0;
1677 *mask &= CATEGORY_MASK_EMACS_MULE;
1678 return 1;
4ed46869
KH
1679}
1680
b73bfc1c 1681
df7492f9
KH
1682/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1683
1684/* Decode a character represented as a component of composition
1685 sequence of Emacs 20/21 style at SRC. Set C to that character and
1686 update SRC to the head of next character (or an encoded composition
1687 rule). If SRC doesn't points a composition component, set C to -1.
1688 If SRC points an invalid byte sequence, global exit by a return
1689 value 0. */
1690
1691#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1692 if (1) \
1693 { \
1694 int c; \
1695 int nbytes, nchars; \
1696 \
1697 if (src == src_end) \
1698 break; \
781d7a48 1699 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1700 if (c < 0) \
1701 { \
1702 if (c == -2) \
1703 break; \
1704 goto invalid_code; \
1705 } \
1706 *buf++ = c; \
1707 src += nbytes; \
1708 consumed_chars += nchars; \
1709 } \
1710 else
1711
1712
1713/* Decode a composition rule represented as a component of composition
781d7a48
KH
1714 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1715 and increment BUF. If SRC points an invalid byte sequence, set C
1716 to -1. */
df7492f9 1717
781d7a48 1718#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1719 do { \
1720 int c, gref, nref; \
1721 \
781d7a48 1722 if (src >= src_end) \
df7492f9
KH
1723 goto invalid_code; \
1724 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1725 c -= 0x20; \
df7492f9
KH
1726 if (c < 0 || c >= 81) \
1727 goto invalid_code; \
1728 \
1729 gref = c / 9, nref = c % 9; \
1730 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1731 } while (0)
1732
1733
781d7a48
KH
1734/* Decode a composition rule represented as a component of composition
1735 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1736 and increment BUF. If SRC points an invalid byte sequence, set C
1737 to -1. */
1738
1739#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1740 do { \
1741 int gref, nref; \
1742 \
1743 if (src + 1>= src_end) \
1744 goto invalid_code; \
1745 ONE_MORE_BYTE_NO_CHECK (gref); \
1746 gref -= 0x20; \
1747 ONE_MORE_BYTE_NO_CHECK (nref); \
1748 nref -= 0x20; \
1749 if (gref < 0 || gref >= 81 \
1750 || nref < 0 || nref >= 81) \
1751 goto invalid_code; \
1752 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1753 } while (0)
1754
1755
df7492f9
KH
1756#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1757 do { \
1758 *buf++ = -5; \
1759 *buf++ = coding->produced_char + char_offset; \
1760 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1761 *buf++ = method; \
1762 *buf++ = nchars; \
1763 } while (0)
aa72b389 1764
df7492f9
KH
1765
1766#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1767 do { \
df7492f9 1768 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1769 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1770 the byte length of this composition information, CHARS is the \
1771 number of characters composed by this composition. */ \
781d7a48
KH
1772 enum composition_method method = c - 0xF2; \
1773 int *charbuf_base = charbuf; \
df7492f9
KH
1774 int consumed_chars_limit; \
1775 int nbytes, nchars; \
1776 \
1777 ONE_MORE_BYTE (c); \
1778 nbytes = c - 0xA0; \
1779 if (nbytes < 3) \
1780 goto invalid_code; \
1781 ONE_MORE_BYTE (c); \
1782 nchars = c - 0xA0; \
1783 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1784 consumed_chars_limit = consumed_chars_base + nbytes; \
1785 if (method != COMPOSITION_RELATIVE) \
aa72b389 1786 { \
df7492f9
KH
1787 int i = 0; \
1788 while (consumed_chars < consumed_chars_limit) \
aa72b389 1789 { \
df7492f9 1790 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1791 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1792 else \
1793 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1794 i++; \
aa72b389 1795 } \
df7492f9
KH
1796 if (consumed_chars < consumed_chars_limit) \
1797 goto invalid_code; \
781d7a48 1798 charbuf_base[0] -= i; \
aa72b389
KH
1799 } \
1800 } while (0)
93dec019 1801
aa72b389 1802
df7492f9
KH
1803#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1804 do { \
1805 /* Emacs 20 style format for relative composition. */ \
1806 /* Store multibyte form of characters to be composed. */ \
1807 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1808 int *buf = components; \
1809 int i, j; \
1810 \
1811 src = src_base; \
1812 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1813 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1814 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1815 if (i < 2) \
1816 goto invalid_code; \
1817 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1818 for (j = 0; j < i; j++) \
1819 *charbuf++ = components[j]; \
1820 } while (0)
1821
1822
1823#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1824 do { \
1825 /* Emacs 20 style format for rule-base composition. */ \
1826 /* Store multibyte form of characters to be composed. */ \
1827 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1828 int *buf = components; \
1829 int i, j; \
1830 \
1831 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1832 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1833 { \
781d7a48 1834 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1835 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1836 } \
1837 if (i < 1 || (buf - components) % 2 == 0) \
1838 goto invalid_code; \
1839 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1840 goto no_more_source; \
1841 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1842 for (j = 0; j < i; j++) \
1843 *charbuf++ = components[j]; \
1844 for (j = 0; j < i; j += 2) \
1845 *charbuf++ = components[j]; \
1846 } while (0)
1847
aa72b389
KH
1848
1849static void
df7492f9 1850decode_coding_emacs_mule (coding)
aa72b389 1851 struct coding_system *coding;
aa72b389 1852{
df7492f9
KH
1853 unsigned char *src = coding->source + coding->consumed;
1854 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1855 unsigned char *src_base;
df7492f9
KH
1856 int *charbuf = coding->charbuf;
1857 int *charbuf_end = charbuf + coding->charbuf_size;
1858 int consumed_chars = 0, consumed_chars_base;
1859 int char_offset = 0;
1860 int multibytep = coding->src_multibyte;
1861 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1862
df7492f9 1863 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1864
aa72b389
KH
1865 while (1)
1866 {
df7492f9
KH
1867 int c;
1868
aa72b389 1869 src_base = src;
df7492f9
KH
1870 consumed_chars_base = consumed_chars;
1871
1872 if (charbuf >= charbuf_end)
1873 break;
aa72b389 1874
df7492f9
KH
1875 ONE_MORE_BYTE (c);
1876
1877 if (c < 0x80)
aa72b389 1878 {
df7492f9
KH
1879 if (c == '\r')
1880 {
1881 if (EQ (eol_type, Qdos))
1882 {
1883 if (src == src_end)
1884 goto no_more_source;
1885 if (*src == '\n')
1886 ONE_MORE_BYTE (c);
1887 }
1888 else if (EQ (eol_type, Qmac))
1889 c = '\n';
1890 }
1891 *charbuf++ = c;
1892 char_offset++;
aa72b389 1893 }
df7492f9
KH
1894 else if (c == 0x80)
1895 {
1896 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1897 break;
1898 ONE_MORE_BYTE (c);
781d7a48
KH
1899 if (c - 0xF2 >= COMPOSITION_RELATIVE
1900 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1901 DECODE_EMACS_MULE_21_COMPOSITION (c);
1902 else if (c < 0xC0)
1903 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1904 else if (c == 0xFF)
1905 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1906 else
1907 goto invalid_code;
781d7a48 1908 coding->annotated = 1;
df7492f9
KH
1909 }
1910 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1911 {
1912 int nbytes, nchars;
781d7a48
KH
1913 src = src_base;
1914 consumed_chars = consumed_chars_base;
1915 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1916 if (c < 0)
1917 {
1918 if (c == -2)
1919 break;
1920 goto invalid_code;
1921 }
1922 *charbuf++ = c;
781d7a48
KH
1923 src += nbytes;
1924 consumed_chars += nchars;
df7492f9
KH
1925 char_offset++;
1926 }
1927 continue;
1928
1929 invalid_code:
1930 src = src_base;
1931 consumed_chars = consumed_chars_base;
1932 ONE_MORE_BYTE (c);
1933 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1934 coding->errors++;
1935 }
1936
1937 no_more_source:
1938 coding->consumed_char += consumed_chars_base;
1939 coding->consumed = src_base - coding->source;
1940 coding->charbuf_used = charbuf - coding->charbuf;
1941}
1942
1943
1944#define EMACS_MULE_LEADING_CODES(id, codes) \
1945 do { \
1946 if (id < 0xA0) \
1947 codes[0] = id, codes[1] = 0; \
1948 else if (id < 0xE0) \
1949 codes[0] = 0x9A, codes[1] = id; \
1950 else if (id < 0xF0) \
1951 codes[0] = 0x9B, codes[1] = id; \
1952 else if (id < 0xF5) \
1953 codes[0] = 0x9C, codes[1] = id; \
1954 else \
1955 codes[0] = 0x9D, codes[1] = id; \
1956 } while (0);
1957
aa72b389 1958
df7492f9
KH
1959static int
1960encode_coding_emacs_mule (coding)
1961 struct coding_system *coding;
1962{
1963 int multibytep = coding->dst_multibyte;
1964 int *charbuf = coding->charbuf;
1965 int *charbuf_end = charbuf + coding->charbuf_used;
1966 unsigned char *dst = coding->destination + coding->produced;
1967 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1968 int safe_room = 8;
df7492f9
KH
1969 int produced_chars = 0;
1970 Lisp_Object attrs, eol_type, charset_list;
1971 int c;
1972
1973 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1974
1975 while (charbuf < charbuf_end)
1976 {
1977 ASSURE_DESTINATION (safe_room);
1978 c = *charbuf++;
1979 if (ASCII_CHAR_P (c))
1980 EMIT_ONE_ASCII_BYTE (c);
1981 else
aa72b389 1982 {
df7492f9
KH
1983 struct charset *charset;
1984 unsigned code;
1985 int dimension;
1986 int emacs_mule_id;
1987 unsigned char leading_codes[2];
1988
1989 charset = char_charset (c, charset_list, &code);
1990 if (! charset)
1991 {
1992 c = coding->default_char;
1993 if (ASCII_CHAR_P (c))
1994 {
1995 EMIT_ONE_ASCII_BYTE (c);
1996 continue;
1997 }
1998 charset = char_charset (c, charset_list, &code);
1999 }
2000 dimension = CHARSET_DIMENSION (charset);
2001 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2002 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2003 EMIT_ONE_BYTE (leading_codes[0]);
2004 if (leading_codes[1])
2005 EMIT_ONE_BYTE (leading_codes[1]);
2006 if (dimension == 1)
2007 EMIT_ONE_BYTE (code);
aa72b389 2008 else
df7492f9
KH
2009 {
2010 EMIT_ONE_BYTE (code >> 8);
2011 EMIT_ONE_BYTE (code & 0xFF);
2012 }
aa72b389 2013 }
aa72b389 2014 }
df7492f9
KH
2015 coding->result = CODING_RESULT_SUCCESS;
2016 coding->produced_char += produced_chars;
2017 coding->produced = dst - coding->destination;
2018 return 0;
aa72b389 2019}
b73bfc1c 2020
4ed46869 2021\f
df7492f9 2022/*** 7. ISO2022 handlers ***/
4ed46869
KH
2023
2024/* The following note describes the coding system ISO2022 briefly.
39787efd 2025 Since the intention of this note is to help understand the
df7492f9 2026 functions in this file, some parts are NOT ACCURATE or OVERLY
39787efd 2027 SIMPLIFIED. For thorough understanding, please refer to the
df7492f9 2028 original document of ISO2022.
4ed46869
KH
2029
2030 ISO2022 provides many mechanisms to encode several character sets
df7492f9 2031 in 7-bit and 8-bit environments. For 7-bite environments, all text
39787efd
KH
2032 is encoded using bytes less than 128. This may make the encoded
2033 text a little bit longer, but the text passes more easily through
df7492f9 2034 several gateways, some of which strip off MSB (Most Signigant Bit).
b73bfc1c 2035
df7492f9
KH
2036 There are two kinds of character sets: control character set and
2037 graphic character set. The former contains control characters such
4ed46869 2038 as `newline' and `escape' to provide control functions (control
39787efd 2039 functions are also provided by escape sequences). The latter
df7492f9 2040 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2041 two control character sets and many graphic character sets.
2042
2043 Graphic character sets are classified into one of the following
39787efd
KH
2044 four classes, according to the number of bytes (DIMENSION) and
2045 number of characters in one dimension (CHARS) of the set:
2046 - DIMENSION1_CHARS94
2047 - DIMENSION1_CHARS96
2048 - DIMENSION2_CHARS94
2049 - DIMENSION2_CHARS96
2050
2051 In addition, each character set is assigned an identification tag,
df7492f9 2052 unique for each set, called "final character" (denoted as <F>
39787efd
KH
2053 hereafter). The <F> of each character set is decided by ECMA(*)
2054 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2055 (0x30..0x3F are for private use only).
4ed46869
KH
2056
2057 Note (*): ECMA = European Computer Manufacturers Association
2058
df7492f9 2059 Here are examples of graphic character set [NAME(<F>)]:
4ed46869
KH
2060 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2061 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2062 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2063 o DIMENSION2_CHARS96 -- none for the moment
2064
39787efd 2065 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2066 C0 [0x00..0x1F] -- control character plane 0
2067 GL [0x20..0x7F] -- graphic character plane 0
2068 C1 [0x80..0x9F] -- control character plane 1
2069 GR [0xA0..0xFF] -- graphic character plane 1
2070
2071 A control character set is directly designated and invoked to C0 or
39787efd
KH
2072 C1 by an escape sequence. The most common case is that:
2073 - ISO646's control character set is designated/invoked to C0, and
2074 - ISO6429's control character set is designated/invoked to C1,
2075 and usually these designations/invocations are omitted in encoded
2076 text. In a 7-bit environment, only C0 can be used, and a control
2077 character for C1 is encoded by an appropriate escape sequence to
2078 fit into the environment. All control characters for C1 are
2079 defined to have corresponding escape sequences.
4ed46869
KH
2080
2081 A graphic character set is at first designated to one of four
2082 graphic registers (G0 through G3), then these graphic registers are
2083 invoked to GL or GR. These designations and invocations can be
2084 done independently. The most common case is that G0 is invoked to
39787efd
KH
2085 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2086 these invocations and designations are omitted in encoded text.
2087 In a 7-bit environment, only GL can be used.
4ed46869 2088
39787efd
KH
2089 When a graphic character set of CHARS94 is invoked to GL, codes
2090 0x20 and 0x7F of the GL area work as control characters SPACE and
2091 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2092 be used.
4ed46869
KH
2093
2094 There are two ways of invocation: locking-shift and single-shift.
2095 With locking-shift, the invocation lasts until the next different
39787efd
KH
2096 invocation, whereas with single-shift, the invocation affects the
2097 following character only and doesn't affect the locking-shift
2098 state. Invocations are done by the following control characters or
2099 escape sequences:
4ed46869
KH
2100
2101 ----------------------------------------------------------------------
39787efd 2102 abbrev function cntrl escape seq description
4ed46869 2103 ----------------------------------------------------------------------
39787efd
KH
2104 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2105 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2106 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2107 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2108 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2109 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2110 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2111 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2112 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2113 ----------------------------------------------------------------------
39787efd
KH
2114 (*) These are not used by any known coding system.
2115
2116 Control characters for these functions are defined by macros
2117 ISO_CODE_XXX in `coding.h'.
4ed46869 2118
39787efd 2119 Designations are done by the following escape sequences:
4ed46869
KH
2120 ----------------------------------------------------------------------
2121 escape sequence description
2122 ----------------------------------------------------------------------
2123 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2124 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2125 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2126 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2127 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2128 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2129 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2130 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2131 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2132 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2133 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2134 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2135 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2136 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2137 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2138 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2139 ----------------------------------------------------------------------
2140
2141 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2142 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2143
2144 Note (*): Although these designations are not allowed in ISO2022,
2145 Emacs accepts them on decoding, and produces them on encoding
39787efd 2146 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2147 7-bit environment, non-locking-shift, and non-single-shift.
2148
2149 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2150 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2151
df7492f9 2152 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
2153 same multilingual text in ISO2022. Actually, there exist many
2154 coding systems such as Compound Text (used in X11's inter client
df7492f9
KH
2155 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2156 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2157 localized platforms), and all of these are variants of ISO2022.
2158
2159 In addition to the above, Emacs handles two more kinds of escape
2160 sequences: ISO6429's direction specification and Emacs' private
2161 sequence for specifying character composition.
2162
39787efd 2163 ISO6429's direction specification takes the following form:
4ed46869
KH
2164 o CSI ']' -- end of the current direction
2165 o CSI '0' ']' -- end of the current direction
2166 o CSI '1' ']' -- start of left-to-right text
2167 o CSI '2' ']' -- start of right-to-left text
2168 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2169 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2170
2171 Character composition specification takes the following form:
ec6d2bb8
KH
2172 o ESC '0' -- start relative composition
2173 o ESC '1' -- end composition
2174 o ESC '2' -- start rule-base composition (*)
2175 o ESC '3' -- start relative composition with alternate chars (**)
2176 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2177 Since these are not standard escape sequences of any ISO standard,
df7492f9 2178 the use of them for these meaning is restricted to Emacs only.
ec6d2bb8 2179
df7492f9 2180 (*) This form is used only in Emacs 20.5 and the older versions,
b73bfc1c 2181 but the newer versions can safely decode it.
df7492f9 2182 (**) This form is used only in Emacs 21.1 and the newer versions,
b73bfc1c 2183 and the older versions can't decode it.
ec6d2bb8 2184
df7492f9 2185 Here's a list of examples usages of these composition escape
b73bfc1c 2186 sequences (categorized by `enum composition_method').
ec6d2bb8 2187
b73bfc1c 2188 COMPOSITION_RELATIVE:
ec6d2bb8 2189 ESC 0 CHAR [ CHAR ] ESC 1
df7492f9 2190 COMPOSITOIN_WITH_RULE:
ec6d2bb8 2191 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2192 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2193 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2194 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2195 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2196
2197enum iso_code_class_type iso_code_class[256];
2198
df7492f9
KH
2199#define SAFE_CHARSET_P(coding, id) \
2200 ((id) <= (coding)->max_charset_id \
2201 && (coding)->safe_charsets[id] >= 0)
2202
2203
2204#define SHIFT_OUT_OK(category) \
2205 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2206
2207static void
2208setup_iso_safe_charsets (Lisp_Object attrs)
2209{
2210 Lisp_Object charset_list, safe_charsets;
2211 Lisp_Object request;
2212 Lisp_Object reg_usage;
2213 Lisp_Object tail;
2214 int reg94, reg96;
2215 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2216 int max_charset_id;
2217
2218 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2219 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2220 && ! EQ (charset_list, Viso_2022_charset_list))
2221 {
2222 CODING_ATTR_CHARSET_LIST (attrs)
2223 = charset_list = Viso_2022_charset_list;
2224 ASET (attrs, coding_attr_safe_charsets, Qnil);
2225 }
2226
2227 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2228 return;
2229
2230 max_charset_id = 0;
2231 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2232 {
2233 int id = XINT (XCAR (tail));
2234 if (max_charset_id < id)
2235 max_charset_id = id;
2236 }
2237
2238 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2239 make_number (255));
2240 request = AREF (attrs, coding_attr_iso_request);
2241 reg_usage = AREF (attrs, coding_attr_iso_usage);
2242 reg94 = XINT (XCAR (reg_usage));
2243 reg96 = XINT (XCDR (reg_usage));
2244
2245 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2246 {
2247 Lisp_Object id;
2248 Lisp_Object reg;
2249 struct charset *charset;
2250
2251 id = XCAR (tail);
2252 charset = CHARSET_FROM_ID (XINT (id));
2253 reg = Fcdr (Fassq (request, id));
2254 if (! NILP (reg))
2255 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2256 else if (charset->iso_chars_96)
2257 {
2258 if (reg96 < 4)
2259 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2260 }
2261 else
2262 {
2263 if (reg94 < 4)
2264 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2265 }
2266 }
2267 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2268}
d46c5b12 2269
d46c5b12 2270
4ed46869 2271/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2272 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2273 integer in which appropriate flag bits any of:
df7492f9
KH
2274 CATEGORY_MASK_ISO_7
2275 CATEGORY_MASK_ISO_7_TIGHT
2276 CATEGORY_MASK_ISO_8_1
2277 CATEGORY_MASK_ISO_8_2
2278 CATEGORY_MASK_ISO_7_ELSE
2279 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2280 are set. If a code which should never appear in ISO2022 is found,
2281 returns 0. */
2282
0a28aafb 2283static int
df7492f9
KH
2284detect_coding_iso_2022 (coding, mask)
2285 struct coding_system *coding;
2286 int *mask;
4ed46869 2287{
df7492f9
KH
2288 unsigned char *src = coding->source, *src_base = src;
2289 unsigned char *src_end = coding->source + coding->src_bytes;
2290 int multibytep = coding->src_multibyte;
2291 int mask_iso = CATEGORY_MASK_ISO;
2292 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2293 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2294 int id;
2295 int c, c1;
2296 int consumed_chars = 0;
2297 int i;
2298
2299 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2300 {
2301 struct coding_system *this = &(coding_categories[i]);
2302 Lisp_Object attrs, val;
2303
2304 attrs = CODING_ID_ATTRS (this->id);
2305 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2306 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2307 setup_iso_safe_charsets (attrs);
2308 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2309 this->max_charset_id = XSTRING (val)->size - 1;
2310 this->safe_charsets = (char *) XSTRING (val)->data;
2311 }
2312
2313 /* A coding system of this category is always ASCII compatible. */
2314 src += coding->head_ascii;
3f003981 2315
df7492f9
KH
2316 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2317 while (mask_iso && src < src_end)
4ed46869 2318 {
df7492f9 2319 ONE_MORE_BYTE (c);
4ed46869
KH
2320 switch (c)
2321 {
2322 case ISO_CODE_ESC:
74383408
KH
2323 if (inhibit_iso_escape_detection)
2324 break;
f46869e4 2325 single_shifting = 0;
df7492f9 2326 ONE_MORE_BYTE (c);
d46c5b12 2327 if (c >= '(' && c <= '/')
4ed46869 2328 {
bf9cdd4e 2329 /* Designation sequence for a charset of dimension 1. */
df7492f9 2330 ONE_MORE_BYTE (c1);
d46c5b12 2331 if (c1 < ' ' || c1 >= 0x80
df7492f9 2332 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2333 /* Invalid designation sequence. Just ignore. */
2334 break;
df7492f9 2335 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2336 }
2337 else if (c == '$')
2338 {
2339 /* Designation sequence for a charset of dimension 2. */
df7492f9 2340 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2341 if (c >= '@' && c <= 'B')
2342 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2343 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2344 else if (c >= '(' && c <= '/')
bcf26d6a 2345 {
df7492f9 2346 ONE_MORE_BYTE (c1);
d46c5b12 2347 if (c1 < ' ' || c1 >= 0x80
df7492f9 2348 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2349 /* Invalid designation sequence. Just ignore. */
2350 break;
df7492f9 2351 reg[(c - '(') % 4] = id;
bcf26d6a 2352 }
bf9cdd4e 2353 else
d46c5b12
KH
2354 /* Invalid designation sequence. Just ignore. */
2355 break;
2356 }
ae9ff118 2357 else if (c == 'N' || c == 'O')
d46c5b12 2358 {
ae9ff118 2359 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2360 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2361 break;
4ed46869 2362 }
ec6d2bb8
KH
2363 else if (c >= '0' && c <= '4')
2364 {
2365 /* ESC <Fp> for start/end composition. */
df7492f9 2366 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2367 break;
2368 }
bf9cdd4e 2369 else
df7492f9
KH
2370 {
2371 /* Invalid escape sequence. */
2372 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2373 break;
2374 }
d46c5b12
KH
2375
2376 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2377 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2378 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2379 id))
2380 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2381 else
df7492f9
KH
2382 mask_iso &= ~CATEGORY_MASK_ISO_7;
2383 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2384 id))
2385 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2386 else
df7492f9
KH
2387 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2388 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2389 id))
2390 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2391 else
df7492f9
KH
2392 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2393 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2394 id))
2395 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2396 else
df7492f9 2397 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2398 break;
2399
4ed46869 2400 case ISO_CODE_SO:
74383408
KH
2401 if (inhibit_iso_escape_detection)
2402 break;
f46869e4 2403 single_shifting = 0;
d46c5b12
KH
2404 if (shift_out == 0
2405 && (reg[1] >= 0
df7492f9
KH
2406 || SHIFT_OUT_OK (coding_category_iso_7_else)
2407 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2408 {
2409 /* Locking shift out. */
df7492f9
KH
2410 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2411 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2412 }
e0e989f6 2413 break;
df7492f9 2414
d46c5b12 2415 case ISO_CODE_SI:
74383408
KH
2416 if (inhibit_iso_escape_detection)
2417 break;
f46869e4 2418 single_shifting = 0;
d46c5b12
KH
2419 if (shift_out == 1)
2420 {
2421 /* Locking shift in. */
df7492f9
KH
2422 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2423 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2424 }
2425 break;
2426
4ed46869 2427 case ISO_CODE_CSI:
f46869e4 2428 single_shifting = 0;
4ed46869
KH
2429 case ISO_CODE_SS2:
2430 case ISO_CODE_SS3:
3f003981 2431 {
df7492f9 2432 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2433
74383408
KH
2434 if (inhibit_iso_escape_detection)
2435 break;
70c22245
KH
2436 if (c != ISO_CODE_CSI)
2437 {
df7492f9
KH
2438 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2439 & CODING_ISO_FLAG_SINGLE_SHIFT)
2440 newmask |= CATEGORY_MASK_ISO_8_1;
2441 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2442 & CODING_ISO_FLAG_SINGLE_SHIFT)
2443 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2444 single_shifting = 1;
70c22245 2445 }
3f003981
KH
2446 if (VECTORP (Vlatin_extra_code_table)
2447 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2448 {
df7492f9
KH
2449 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2450 & CODING_ISO_FLAG_LATIN_EXTRA)
2451 newmask |= CATEGORY_MASK_ISO_8_1;
2452 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2453 & CODING_ISO_FLAG_LATIN_EXTRA)
2454 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2455 }
df7492f9 2456 mask_iso &= newmask;
d46c5b12 2457 mask_found |= newmask;
3f003981
KH
2458 }
2459 break;
4ed46869
KH
2460
2461 default:
2462 if (c < 0x80)
f46869e4
KH
2463 {
2464 single_shifting = 0;
2465 break;
2466 }
4ed46869 2467 else if (c < 0xA0)
c4825358 2468 {
f46869e4 2469 single_shifting = 0;
df7492f9 2470 mask_8bit_found = 1;
3f003981
KH
2471 if (VECTORP (Vlatin_extra_code_table)
2472 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2473 {
3f003981
KH
2474 int newmask = 0;
2475
df7492f9
KH
2476 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2477 & CODING_ISO_FLAG_LATIN_EXTRA)
2478 newmask |= CATEGORY_MASK_ISO_8_1;
2479 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2480 & CODING_ISO_FLAG_LATIN_EXTRA)
2481 newmask |= CATEGORY_MASK_ISO_8_2;
2482 mask_iso &= newmask;
d46c5b12 2483 mask_found |= newmask;
c4825358 2484 }
3f003981
KH
2485 else
2486 return 0;
c4825358 2487 }
4ed46869
KH
2488 else
2489 {
df7492f9
KH
2490 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2491 | CATEGORY_MASK_ISO_7_ELSE);
2492 mask_found |= CATEGORY_MASK_ISO_8_1;
2493 mask_8bit_found = 1;
f46869e4
KH
2494 /* Check the length of succeeding codes of the range
2495 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2496 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2497 when we are not single shifting. */
b73bfc1c 2498 if (!single_shifting
df7492f9 2499 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2500 {
e17de821 2501 int i = 1;
b73bfc1c
KH
2502 while (src < src_end)
2503 {
df7492f9 2504 ONE_MORE_BYTE (c);
b73bfc1c
KH
2505 if (c < 0xA0)
2506 break;
2507 i++;
2508 }
2509
2510 if (i & 1 && src < src_end)
df7492f9 2511 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2512 else
df7492f9 2513 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2514 }
4ed46869
KH
2515 }
2516 break;
2517 }
2518 }
df7492f9
KH
2519 no_more_source:
2520 if (!mask_iso)
2521 {
2522 *mask &= ~CATEGORY_MASK_ISO;
2523 return 0;
2524 }
2525 if (!mask_found)
2526 return 0;
2527 *mask &= mask_iso & mask_found;
2528 if (! mask_8bit_found)
2529 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2530 return 1;
4ed46869
KH
2531}
2532
4ed46869
KH
2533
2534/* Set designation state into CODING. */
df7492f9
KH
2535#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2536 do { \
2537 int id, prev; \
2538 \
2539 if (final < '0' || final >= 128 \
2540 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2541 || !SAFE_CHARSET_P (coding, id)) \
2542 { \
2543 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2544 goto invalid_code; \
2545 } \
2546 prev = CODING_ISO_DESIGNATION (coding, reg); \
2547 CODING_ISO_DESIGNATION (coding, reg) = id; \
2548 /* If there was an invalid designation to REG previously, and this \
2549 designation is ASCII to REG, we should keep this designation \
2550 sequence. */ \
2551 if (prev == -2 && id == charset_ascii) \
2552 goto invalid_code; \
4ed46869
KH
2553 } while (0)
2554
d46c5b12 2555
df7492f9
KH
2556#define MAYBE_FINISH_COMPOSITION() \
2557 do { \
2558 int i; \
2559 if (composition_state == COMPOSING_NO) \
2560 break; \
2561 /* It is assured that we have enough room for producing \
2562 characters stored in the table `components'. */ \
2563 if (charbuf + component_idx > charbuf_end) \
2564 goto no_more_source; \
2565 composition_state = COMPOSING_NO; \
2566 if (method == COMPOSITION_RELATIVE \
2567 || method == COMPOSITION_WITH_ALTCHARS) \
2568 { \
2569 for (i = 0; i < component_idx; i++) \
2570 *charbuf++ = components[i]; \
2571 char_offset += component_idx; \
2572 } \
2573 else \
2574 { \
2575 for (i = 0; i < component_idx; i += 2) \
2576 *charbuf++ = components[i]; \
2577 char_offset += (component_idx / 2) + 1; \
2578 } \
2579 } while (0)
2580
d46c5b12 2581
aa72b389
KH
2582/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2583 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2584 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2585 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2586 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2587 */
ec6d2bb8 2588
df7492f9
KH
2589#define DECODE_COMPOSITION_START(c1) \
2590 do { \
2591 if (c1 == '0' \
781d7a48 2592 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2593 { \
2594 component_len = component_idx; \
2595 composition_state = COMPOSING_CHAR; \
2596 } \
2597 else \
2598 { \
2599 unsigned char *p; \
2600 \
2601 MAYBE_FINISH_COMPOSITION (); \
2602 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2603 goto no_more_source; \
2604 for (p = src; p < src_end - 1; p++) \
2605 if (*p == ISO_CODE_ESC && p[1] == '1') \
2606 break; \
2607 if (p == src_end - 1) \
2608 { \
2609 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2610 goto invalid_code; \
2611 goto no_more_source; \
2612 } \
2613 \
2614 /* This is surely the start of a composition. */ \
2615 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2616 : c1 == '2' ? COMPOSITION_WITH_RULE \
2617 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2618 : COMPOSITION_WITH_RULE_ALTCHARS); \
2619 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2620 : COMPOSING_COMPONENT_CHAR); \
2621 component_idx = component_len = 0; \
2622 } \
ec6d2bb8
KH
2623 } while (0)
2624
ec6d2bb8 2625
df7492f9
KH
2626/* Handle compositoin end sequence ESC 1. */
2627
2628#define DECODE_COMPOSITION_END() \
ec6d2bb8 2629 do { \
df7492f9
KH
2630 int nchars = (component_len > 0 ? component_idx - component_len \
2631 : method == COMPOSITION_RELATIVE ? component_idx \
2632 : (component_idx + 1) / 2); \
2633 int i; \
2634 int *saved_charbuf = charbuf; \
2635 \
2636 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2637 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2638 { \
df7492f9
KH
2639 if (component_len == 0) \
2640 for (i = 0; i < component_idx; i++) \
2641 *charbuf++ = components[i]; \
2642 else \
2643 for (i = 0; i < component_len; i++) \
2644 *charbuf++ = components[i]; \
2645 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2646 } \
df7492f9
KH
2647 if (method == COMPOSITION_WITH_RULE) \
2648 for (i = 0; i < component_idx; i += 2, char_offset++) \
2649 *charbuf++ = components[i]; \
ec6d2bb8 2650 else \
df7492f9
KH
2651 for (i = component_len; i < component_idx; i++, char_offset++) \
2652 *charbuf++ = components[i]; \
2653 coding->annotated = 1; \
2654 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2655 } while (0)
2656
df7492f9 2657
ec6d2bb8
KH
2658/* Decode a composition rule from the byte C1 (and maybe one more byte
2659 from SRC) and store one encoded composition rule in
2660 coding->cmp_data. */
2661
2662#define DECODE_COMPOSITION_RULE(c1) \
2663 do { \
ec6d2bb8
KH
2664 (c1) -= 32; \
2665 if (c1 < 81) /* old format (before ver.21) */ \
2666 { \
2667 int gref = (c1) / 9; \
2668 int nref = (c1) % 9; \
2669 if (gref == 4) gref = 10; \
2670 if (nref == 4) nref = 10; \
df7492f9 2671 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2672 } \
b73bfc1c 2673 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2674 { \
2675 ONE_MORE_BYTE (c2); \
df7492f9 2676 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2677 } \
df7492f9
KH
2678 else \
2679 c1 = 0; \
ec6d2bb8 2680 } while (0)
88993dfd 2681
d46c5b12 2682
4ed46869
KH
2683/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2684
b73bfc1c 2685static void
df7492f9 2686decode_coding_iso_2022 (coding)
4ed46869 2687 struct coding_system *coding;
4ed46869 2688{
df7492f9
KH
2689 unsigned char *src = coding->source + coding->consumed;
2690 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2691 unsigned char *src_base;
df7492f9
KH
2692 int *charbuf = coding->charbuf;
2693 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2694 int consumed_chars = 0, consumed_chars_base;
2695 int char_offset = 0;
2696 int multibytep = coding->src_multibyte;
2697 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2698 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2699 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2700 struct charset *charset;
2701 int c;
2702 /* For handling composition sequence. */
2703#define COMPOSING_NO 0
2704#define COMPOSING_CHAR 1
2705#define COMPOSING_RULE 2
2706#define COMPOSING_COMPONENT_CHAR 3
2707#define COMPOSING_COMPONENT_RULE 4
2708
2709 int composition_state = COMPOSING_NO;
2710 enum composition_method method;
2711 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2712 int component_idx;
2713 int component_len;
2714 Lisp_Object attrs, eol_type, charset_list;
2715
2716 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2717 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2718
2719 while (1)
4ed46869 2720 {
b73bfc1c
KH
2721 int c1, c2;
2722
2723 src_base = src;
df7492f9
KH
2724 consumed_chars_base = consumed_chars;
2725
2726 if (charbuf >= charbuf_end)
2727 break;
2728
b73bfc1c 2729 ONE_MORE_BYTE (c1);
4ed46869 2730
ec6d2bb8 2731 /* We produce no character or one character. */
4ed46869
KH
2732 switch (iso_code_class [c1])
2733 {
2734 case ISO_0x20_or_0x7F:
df7492f9 2735 if (composition_state != COMPOSING_NO)
ec6d2bb8 2736 {
df7492f9
KH
2737 if (composition_state == COMPOSING_RULE
2738 || composition_state == COMPOSING_COMPONENT_RULE)
2739 {
2740 DECODE_COMPOSITION_RULE (c1);
2741 components[component_idx++] = c1;
2742 composition_state--;
2743 continue;
2744 }
ec6d2bb8 2745 }
df7492f9
KH
2746 if (charset_id_0 < 0
2747 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2748 /* This is SPACE or DEL. */
2749 charset = CHARSET_FROM_ID (charset_ascii);
2750 else
2751 charset = CHARSET_FROM_ID (charset_id_0);
2752 break;
4ed46869
KH
2753
2754 case ISO_graphic_plane_0:
781d7a48 2755 if (composition_state != COMPOSING_NO)
b73bfc1c 2756 {
781d7a48
KH
2757 if (composition_state == COMPOSING_RULE
2758 || composition_state == COMPOSING_COMPONENT_RULE)
2759 {
2760 DECODE_COMPOSITION_RULE (c1);
2761 components[component_idx++] = c1;
2762 composition_state--;
2763 continue;
2764 }
b73bfc1c 2765 }
df7492f9 2766 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2767 break;
2768
2769 case ISO_0xA0_or_0xFF:
df7492f9
KH
2770 if (charset_id_1 < 0
2771 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2772 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2773 goto invalid_code;
4ed46869
KH
2774 /* This is a graphic character, we fall down ... */
2775
2776 case ISO_graphic_plane_1:
df7492f9
KH
2777 if (charset_id_1 < 0)
2778 goto invalid_code;
2779 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2780 break;
2781
2782 case ISO_carriage_return:
df7492f9 2783 if (c1 == '\r')
4ed46869 2784 {
df7492f9 2785 if (EQ (eol_type, Qdos))
4ed46869 2786 {
df7492f9
KH
2787 if (src == src_end)
2788 goto no_more_source;
2789 if (*src == '\n')
2790 ONE_MORE_BYTE (c1);
4ed46869 2791 }
df7492f9
KH
2792 else if (EQ (eol_type, Qmac))
2793 c1 = '\n';
4ed46869 2794 }
df7492f9
KH
2795 /* fall through */
2796
2797 case ISO_control_0:
2798 MAYBE_FINISH_COMPOSITION ();
2799 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2800 break;
2801
df7492f9
KH
2802 case ISO_control_1:
2803 MAYBE_FINISH_COMPOSITION ();
2804 goto invalid_code;
2805
4ed46869 2806 case ISO_shift_out:
df7492f9
KH
2807 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2808 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2809 goto invalid_code;
2810 CODING_ISO_INVOCATION (coding, 0) = 1;
2811 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2812 continue;
4ed46869
KH
2813
2814 case ISO_shift_in:
df7492f9
KH
2815 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2816 goto invalid_code;
2817 CODING_ISO_INVOCATION (coding, 0) = 0;
2818 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2819 continue;
4ed46869
KH
2820
2821 case ISO_single_shift_2_7:
2822 case ISO_single_shift_2:
df7492f9
KH
2823 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2824 goto invalid_code;
4ed46869
KH
2825 /* SS2 is handled as an escape sequence of ESC 'N' */
2826 c1 = 'N';
2827 goto label_escape_sequence;
2828
2829 case ISO_single_shift_3:
df7492f9
KH
2830 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2831 goto invalid_code;
4ed46869
KH
2832 /* SS2 is handled as an escape sequence of ESC 'O' */
2833 c1 = 'O';
2834 goto label_escape_sequence;
2835
2836 case ISO_control_sequence_introducer:
2837 /* CSI is handled as an escape sequence of ESC '[' ... */
2838 c1 = '[';
2839 goto label_escape_sequence;
2840
2841 case ISO_escape:
2842 ONE_MORE_BYTE (c1);
2843 label_escape_sequence:
df7492f9 2844 /* Escape sequences handled here are invocation,
4ed46869
KH
2845 designation, direction specification, and character
2846 composition specification. */
2847 switch (c1)
2848 {
2849 case '&': /* revision of following character set */
2850 ONE_MORE_BYTE (c1);
2851 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2852 goto invalid_code;
4ed46869
KH
2853 ONE_MORE_BYTE (c1);
2854 if (c1 != ISO_CODE_ESC)
df7492f9 2855 goto invalid_code;
4ed46869
KH
2856 ONE_MORE_BYTE (c1);
2857 goto label_escape_sequence;
2858
2859 case '$': /* designation of 2-byte character set */
df7492f9
KH
2860 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2861 goto invalid_code;
4ed46869
KH
2862 ONE_MORE_BYTE (c1);
2863 if (c1 >= '@' && c1 <= 'B')
2864 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2865 or JISX0208.1980 */
df7492f9 2866 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2867 }
2868 else if (c1 >= 0x28 && c1 <= 0x2B)
2869 { /* designation of DIMENSION2_CHARS94 character set */
2870 ONE_MORE_BYTE (c2);
df7492f9 2871 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2872 }
2873 else if (c1 >= 0x2C && c1 <= 0x2F)
2874 { /* designation of DIMENSION2_CHARS96 character set */
2875 ONE_MORE_BYTE (c2);
df7492f9 2876 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2877 }
2878 else
df7492f9 2879 goto invalid_code;
b73bfc1c 2880 /* We must update these variables now. */
df7492f9
KH
2881 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2882 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2883 continue;
4ed46869
KH
2884
2885 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2886 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2887 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2888 goto invalid_code;
2889 CODING_ISO_INVOCATION (coding, 0) = 2;
2890 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2891 continue;
4ed46869
KH
2892
2893 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2894 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2895 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2896 goto invalid_code;
2897 CODING_ISO_INVOCATION (coding, 0) = 3;
2898 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2899 continue;
4ed46869
KH
2900
2901 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2902 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2903 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2904 goto invalid_code;
2905 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2906 ONE_MORE_BYTE (c1);
e7046a18 2907 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2908 goto invalid_code;
4ed46869
KH
2909 break;
2910
2911 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2912 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2913 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2914 goto invalid_code;
2915 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2916 ONE_MORE_BYTE (c1);
e7046a18 2917 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2918 goto invalid_code;
4ed46869
KH
2919 break;
2920
ec6d2bb8 2921 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2922 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2923 goto invalid_code;
ec6d2bb8 2924 DECODE_COMPOSITION_START (c1);
b73bfc1c 2925 continue;
4ed46869 2926
ec6d2bb8 2927 case '1': /* end composition */
df7492f9
KH
2928 if (composition_state == COMPOSING_NO)
2929 goto invalid_code;
2930 DECODE_COMPOSITION_END ();
b73bfc1c 2931 continue;
4ed46869
KH
2932
2933 case '[': /* specification of direction */
df7492f9
KH
2934 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2935 goto invalid_code;
4ed46869 2936 /* For the moment, nested direction is not supported.
d46c5b12 2937 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2938 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2939 ONE_MORE_BYTE (c1);
2940 switch (c1)
2941 {
2942 case ']': /* end of the current direction */
d46c5b12 2943 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2944
2945 case '0': /* end of the current direction */
2946 case '1': /* start of left-to-right direction */
2947 ONE_MORE_BYTE (c1);
2948 if (c1 == ']')
d46c5b12 2949 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2950 else
df7492f9 2951 goto invalid_code;
4ed46869
KH
2952 break;
2953
2954 case '2': /* start of right-to-left direction */
2955 ONE_MORE_BYTE (c1);
2956 if (c1 == ']')
d46c5b12 2957 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2958 else
df7492f9 2959 goto invalid_code;
4ed46869
KH
2960 break;
2961
2962 default:
df7492f9 2963 goto invalid_code;
4ed46869 2964 }
b73bfc1c 2965 continue;
4ed46869
KH
2966
2967 default:
df7492f9
KH
2968 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2969 goto invalid_code;
4ed46869
KH
2970 if (c1 >= 0x28 && c1 <= 0x2B)
2971 { /* designation of DIMENSION1_CHARS94 character set */
2972 ONE_MORE_BYTE (c2);
df7492f9 2973 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
2974 }
2975 else if (c1 >= 0x2C && c1 <= 0x2F)
2976 { /* designation of DIMENSION1_CHARS96 character set */
2977 ONE_MORE_BYTE (c2);
df7492f9 2978 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
2979 }
2980 else
df7492f9 2981 goto invalid_code;
b73bfc1c 2982 /* We must update these variables now. */
df7492f9
KH
2983 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2984 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2985 continue;
4ed46869 2986 }
b73bfc1c 2987 }
4ed46869 2988
b73bfc1c 2989 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
2990 Produce a decoded character while getting 2nd position code
2991 C2 if necessary. */
2992 c1 &= 0x7F;
2993 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
2994 {
2995 ONE_MORE_BYTE (c2);
df7492f9 2996 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 2997 /* C2 is not in a valid range. */
df7492f9
KH
2998 goto invalid_code;
2999 c1 = (c1 << 8) | (c2 & 0x7F);
3000 if (CHARSET_DIMENSION (charset) > 2)
3001 {
3002 ONE_MORE_BYTE (c2);
3003 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3004 /* C2 is not in a valid range. */
3005 goto invalid_code;
3006 c1 = (c1 << 8) | (c2 & 0x7F);
3007 }
3008 }
3009
3010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3011 if (c < 0)
3012 {
3013 MAYBE_FINISH_COMPOSITION ();
3014 for (; src_base < src; src_base++, char_offset++)
3015 {
3016 if (ASCII_BYTE_P (*src_base))
3017 *charbuf++ = *src_base;
3018 else
3019 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3020 }
3021 }
3022 else if (composition_state == COMPOSING_NO)
3023 {
3024 *charbuf++ = c;
3025 char_offset++;
4ed46869 3026 }
df7492f9 3027 else
781d7a48
KH
3028 {
3029 components[component_idx++] = c;
3030 if (method == COMPOSITION_WITH_RULE
3031 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3032 && composition_state == COMPOSING_COMPONENT_CHAR))
3033 composition_state++;
3034 }
4ed46869
KH
3035 continue;
3036
df7492f9
KH
3037 invalid_code:
3038 MAYBE_FINISH_COMPOSITION ();
4ed46869 3039 src = src_base;
df7492f9
KH
3040 consumed_chars = consumed_chars_base;
3041 ONE_MORE_BYTE (c);
3042 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3043 coding->errors++;
4ed46869 3044 }
fb88bf2d 3045
df7492f9
KH
3046 no_more_source:
3047 coding->consumed_char += consumed_chars_base;
3048 coding->consumed = src_base - coding->source;
3049 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3050}
3051
b73bfc1c 3052
f4dee582 3053/* ISO2022 encoding stuff. */
4ed46869
KH
3054
3055/*
f4dee582 3056 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3057 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3058 variant has the following specifications:
df7492f9 3059 1. Initial designation to G0 thru G3.
4ed46869
KH
3060 2. Allows short-form designation?
3061 3. ASCII should be designated to G0 before control characters?
3062 4. ASCII should be designated to G0 at end of line?
3063 5. 7-bit environment or 8-bit environment?
3064 6. Use locking-shift?
3065 7. Use Single-shift?
3066 And the following two are only for Japanese:
3067 8. Use ASCII in place of JIS0201-1976-Roman?
3068 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3069 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3070 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3071 details.
4ed46869
KH
3072*/
3073
3074/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3075 register REG at DST, and increment DST. If <final-char> of CHARSET is
3076 '@', 'A', or 'B' and the coding system CODING allows, produce
3077 designation sequence of short-form. */
4ed46869
KH
3078
3079#define ENCODE_DESIGNATION(charset, reg, coding) \
3080 do { \
df7492f9 3081 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3082 char *intermediate_char_94 = "()*+"; \
3083 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3084 int revision = -1; \
3085 int c; \
3086 \
3087 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3088 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3089 \
3090 if (revision >= 0) \
70c22245 3091 { \
df7492f9
KH
3092 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3093 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3094 } \
df7492f9 3095 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3096 if (CHARSET_DIMENSION (charset) == 1) \
3097 { \
df7492f9
KH
3098 if (! CHARSET_ISO_CHARS_96 (charset)) \
3099 c = intermediate_char_94[reg]; \
4ed46869 3100 else \
df7492f9
KH
3101 c = intermediate_char_96[reg]; \
3102 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3103 } \
3104 else \
3105 { \
df7492f9
KH
3106 EMIT_ONE_ASCII_BYTE ('$'); \
3107 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3108 { \
df7492f9 3109 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3110 || reg != 0 \
3111 || final_char < '@' || final_char > 'B') \
df7492f9 3112 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3113 } \
3114 else \
df7492f9 3115 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3116 } \
df7492f9
KH
3117 EMIT_ONE_ASCII_BYTE (final_char); \
3118 \
3119 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3120 } while (0)
3121
df7492f9 3122
4ed46869
KH
3123/* The following two macros produce codes (control character or escape
3124 sequence) for ISO2022 single-shift functions (single-shift-2 and
3125 single-shift-3). */
3126
df7492f9
KH
3127#define ENCODE_SINGLE_SHIFT_2 \
3128 do { \
3129 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3130 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3131 else \
3132 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3133 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3134 } while (0)
3135
df7492f9
KH
3136
3137#define ENCODE_SINGLE_SHIFT_3 \
3138 do { \
3139 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3140 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3141 else \
3142 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3143 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3144 } while (0)
3145
df7492f9 3146
4ed46869
KH
3147/* The following four macros produce codes (control character or
3148 escape sequence) for ISO2022 locking-shift functions (shift-in,
3149 shift-out, locking-shift-2, and locking-shift-3). */
3150
df7492f9
KH
3151#define ENCODE_SHIFT_IN \
3152 do { \
3153 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3154 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3155 } while (0)
3156
df7492f9
KH
3157
3158#define ENCODE_SHIFT_OUT \
3159 do { \
3160 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3161 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3162 } while (0)
3163
df7492f9
KH
3164
3165#define ENCODE_LOCKING_SHIFT_2 \
3166 do { \
3167 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3168 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3169 } while (0)
3170
df7492f9
KH
3171
3172#define ENCODE_LOCKING_SHIFT_3 \
3173 do { \
3174 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3175 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3176 } while (0)
3177
df7492f9 3178
f4dee582
RS
3179/* Produce codes for a DIMENSION1 character whose character set is
3180 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3181 sequences are also produced in advance if necessary. */
3182
6e85d753
KH
3183#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3184 do { \
df7492f9
KH
3185 int id = CHARSET_ID (charset); \
3186 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3187 { \
df7492f9
KH
3188 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3189 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3190 else \
df7492f9
KH
3191 EMIT_ONE_BYTE (c1 | 0x80); \
3192 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3193 break; \
3194 } \
df7492f9 3195 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3196 { \
df7492f9 3197 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3198 break; \
3199 } \
df7492f9 3200 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3201 { \
df7492f9 3202 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3203 break; \
3204 } \
6e85d753
KH
3205 else \
3206 /* Since CHARSET is not yet invoked to any graphic planes, we \
3207 must invoke it, or, at first, designate it to some graphic \
3208 register. Then repeat the loop to actually produce the \
3209 character. */ \
df7492f9
KH
3210 dst = encode_invocation_designation (charset, coding, dst, \
3211 &produced_chars); \
4ed46869
KH
3212 } while (1)
3213
df7492f9 3214
f4dee582
RS
3215/* Produce codes for a DIMENSION2 character whose character set is
3216 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3217 invocation codes are also produced in advance if necessary. */
3218
6e85d753
KH
3219#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3220 do { \
df7492f9
KH
3221 int id = CHARSET_ID (charset); \
3222 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3223 { \
df7492f9
KH
3224 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3225 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3226 else \
df7492f9
KH
3227 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3228 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3229 break; \
3230 } \
df7492f9 3231 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3232 { \
df7492f9 3233 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3234 break; \
3235 } \
df7492f9 3236 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3237 { \
df7492f9 3238 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3239 break; \
3240 } \
6e85d753
KH
3241 else \
3242 /* Since CHARSET is not yet invoked to any graphic planes, we \
3243 must invoke it, or, at first, designate it to some graphic \
3244 register. Then repeat the loop to actually produce the \
3245 character. */ \
df7492f9
KH
3246 dst = encode_invocation_designation (charset, coding, dst, \
3247 &produced_chars); \
4ed46869
KH
3248 } while (1)
3249
05e6f5dc 3250
df7492f9
KH
3251#define ENCODE_ISO_CHARACTER(charset, c) \
3252 do { \
3253 int code = ENCODE_CHAR ((charset),(c)); \
3254 \
3255 if (CHARSET_DIMENSION (charset) == 1) \
3256 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3257 else \
3258 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3259 } while (0)
bdd9fb48 3260
05e6f5dc 3261
4ed46869 3262/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3263 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3264 Return new DST. */
3265
3266unsigned char *
df7492f9
KH
3267encode_invocation_designation (charset, coding, dst, p_nchars)
3268 struct charset *charset;
4ed46869
KH
3269 struct coding_system *coding;
3270 unsigned char *dst;
df7492f9 3271 int *p_nchars;
4ed46869 3272{
df7492f9
KH
3273 int multibytep = coding->dst_multibyte;
3274 int produced_chars = *p_nchars;
4ed46869 3275 int reg; /* graphic register number */
df7492f9 3276 int id = CHARSET_ID (charset);
4ed46869
KH
3277
3278 /* At first, check designations. */
3279 for (reg = 0; reg < 4; reg++)
df7492f9 3280 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3281 break;
3282
3283 if (reg >= 4)
3284 {
3285 /* CHARSET is not yet designated to any graphic registers. */
3286 /* At first check the requested designation. */
df7492f9
KH
3287 reg = CODING_ISO_REQUEST (coding, id);
3288 if (reg < 0)
1ba9e4ab
KH
3289 /* Since CHARSET requests no special designation, designate it
3290 to graphic register 0. */
4ed46869
KH
3291 reg = 0;
3292
3293 ENCODE_DESIGNATION (charset, reg, coding);
3294 }
3295
df7492f9
KH
3296 if (CODING_ISO_INVOCATION (coding, 0) != reg
3297 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3298 {
3299 /* Since the graphic register REG is not invoked to any graphic
3300 planes, invoke it to graphic plane 0. */
3301 switch (reg)
3302 {
3303 case 0: /* graphic register 0 */
3304 ENCODE_SHIFT_IN;
3305 break;
3306
3307 case 1: /* graphic register 1 */
3308 ENCODE_SHIFT_OUT;
3309 break;
3310
3311 case 2: /* graphic register 2 */
df7492f9 3312 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3313 ENCODE_SINGLE_SHIFT_2;
3314 else
3315 ENCODE_LOCKING_SHIFT_2;
3316 break;
3317
3318 case 3: /* graphic register 3 */
df7492f9 3319 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3320 ENCODE_SINGLE_SHIFT_3;
3321 else
3322 ENCODE_LOCKING_SHIFT_3;
3323 break;
3324 }
3325 }
b73bfc1c 3326
df7492f9 3327 *p_nchars = produced_chars;
4ed46869
KH
3328 return dst;
3329}
3330
df7492f9
KH
3331/* The following three macros produce codes for indicating direction
3332 of text. */
3333#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3334 do { \
df7492f9
KH
3335 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3336 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3337 else \
df7492f9 3338 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3339 } while (0)
3340
ec6d2bb8 3341
df7492f9
KH
3342#define ENCODE_DIRECTION_R2L() \
3343 do { \
3344 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3345 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3346 } while (0)
3347
ec6d2bb8 3348
df7492f9 3349#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3350 do { \
df7492f9
KH
3351 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3352 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3353 } while (0)
3354
4ed46869
KH
3355
3356/* Produce codes for designation and invocation to reset the graphic
3357 planes and registers to initial state. */
df7492f9
KH
3358#define ENCODE_RESET_PLANE_AND_REGISTER() \
3359 do { \
3360 int reg; \
3361 struct charset *charset; \
3362 \
3363 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3364 ENCODE_SHIFT_IN; \
3365 for (reg = 0; reg < 4; reg++) \
3366 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3367 && (CODING_ISO_DESIGNATION (coding, reg) \
3368 != CODING_ISO_INITIAL (coding, reg))) \
3369 { \
3370 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3371 ENCODE_DESIGNATION (charset, reg, coding); \
3372 } \
4ed46869
KH
3373 } while (0)
3374
df7492f9 3375
bdd9fb48 3376/* Produce designation sequences of charsets in the line started from
b73bfc1c 3377 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3378
3379 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3380 find all the necessary designations. */
3381
b73bfc1c 3382static unsigned char *
df7492f9 3383encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3384 struct coding_system *coding;
df7492f9
KH
3385 int *charbuf, *charbuf_end;
3386 unsigned char *dst;
e0e989f6 3387{
df7492f9 3388 struct charset *charset;
bdd9fb48
KH
3389 /* Table of charsets to be designated to each graphic register. */
3390 int r[4];
df7492f9
KH
3391 int c, found = 0, reg;
3392 int produced_chars = 0;
3393 int multibytep = coding->dst_multibyte;
3394 Lisp_Object attrs;
3395 Lisp_Object charset_list;
3396
3397 attrs = CODING_ID_ATTRS (coding->id);
3398 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3399 if (EQ (charset_list, Qiso_2022))
3400 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3401
3402 for (reg = 0; reg < 4; reg++)
3403 r[reg] = -1;
3404
b73bfc1c 3405 while (found < 4)
e0e989f6 3406 {
df7492f9
KH
3407 int id;
3408
3409 c = *charbuf++;
b73bfc1c
KH
3410 if (c == '\n')
3411 break;
df7492f9
KH
3412 charset = char_charset (c, charset_list, NULL);
3413 id = CHARSET_ID (charset);
3414 reg = CODING_ISO_REQUEST (coding, id);
3415 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3416 {
3417 found++;
df7492f9 3418 r[reg] = id;
bdd9fb48 3419 }
bdd9fb48
KH
3420 }
3421
3422 if (found)
3423 {
3424 for (reg = 0; reg < 4; reg++)
3425 if (r[reg] >= 0
df7492f9
KH
3426 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3427 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3428 }
b73bfc1c
KH
3429
3430 return dst;
e0e989f6
KH
3431}
3432
4ed46869
KH
3433/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3434
df7492f9
KH
3435static int
3436encode_coding_iso_2022 (coding)
4ed46869 3437 struct coding_system *coding;
4ed46869 3438{
df7492f9
KH
3439 int multibytep = coding->dst_multibyte;
3440 int *charbuf = coding->charbuf;
3441 int *charbuf_end = charbuf + coding->charbuf_used;
3442 unsigned char *dst = coding->destination + coding->produced;
3443 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3444 int safe_room = 16;
3445 int bol_designation
3446 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3447 && CODING_ISO_BOL (coding));
3448 int produced_chars = 0;
3449 Lisp_Object attrs, eol_type, charset_list;
3450 int ascii_compatible;
b73bfc1c 3451 int c;
05e6f5dc 3452
df7492f9 3453 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3454 setup_iso_safe_charsets (attrs);
3455 coding->safe_charsets
3456 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3457
df7492f9 3458 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3459
df7492f9 3460 while (charbuf < charbuf_end)
4ed46869 3461 {
df7492f9 3462 ASSURE_DESTINATION (safe_room);
b73bfc1c 3463
df7492f9 3464 if (bol_designation)
b73bfc1c 3465 {
df7492f9 3466 unsigned char *dst_prev = dst;
4ed46869 3467
bdd9fb48 3468 /* We have to produce designation sequences if any now. */
df7492f9
KH
3469 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3470 bol_designation = 0;
3471 /* We are sure that designation sequences are all ASCII bytes. */
3472 produced_chars += dst - dst_prev;
4ed46869 3473 }
ec6d2bb8 3474
df7492f9 3475 c = *charbuf++;
4ed46869 3476
b73bfc1c
KH
3477 /* Now encode the character C. */
3478 if (c < 0x20 || c == 0x7F)
3479 {
df7492f9
KH
3480 if (c == '\n'
3481 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3482 {
df7492f9
KH
3483 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3484 ENCODE_RESET_PLANE_AND_REGISTER ();
3485 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3486 {
df7492f9
KH
3487 int i;
3488
3489 for (i = 0; i < 4; i++)
3490 CODING_ISO_DESIGNATION (coding, i)
3491 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3492 }
df7492f9
KH
3493 bol_designation
3494 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3495 }
df7492f9
KH
3496 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3497 ENCODE_RESET_PLANE_AND_REGISTER ();
3498 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3499 }
df7492f9 3500 else if (ASCII_CHAR_P (c))
88993dfd 3501 {
df7492f9
KH
3502 if (ascii_compatible)
3503 EMIT_ONE_ASCII_BYTE (c);
3504 else
3505 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
88993dfd 3506 }
b73bfc1c 3507 else
df7492f9
KH
3508 {
3509 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3510
df7492f9
KH
3511 if (!charset)
3512 {
41cbe562
KH
3513 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3514 {
3515 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3516 charset = CHARSET_FROM_ID (charset_ascii);
3517 }
3518 else
3519 {
3520 c = coding->default_char;
3521 charset = char_charset (c, charset_list, NULL);
3522 }
df7492f9
KH
3523 }
3524 ENCODE_ISO_CHARACTER (charset, c);
3525 }
84fbb8a0 3526 }
b73bfc1c 3527
df7492f9
KH
3528 if (coding->mode & CODING_MODE_LAST_BLOCK
3529 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3530 {
3531 ASSURE_DESTINATION (safe_room);
3532 ENCODE_RESET_PLANE_AND_REGISTER ();
3533 }
3534 coding->result = CODING_RESULT_SUCCESS;
3535 CODING_ISO_BOL (coding) = bol_designation;
3536 coding->produced_char += produced_chars;
3537 coding->produced = dst - coding->destination;
3538 return 0;
4ed46869
KH
3539}
3540
3541\f
df7492f9 3542/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3543
df7492f9 3544/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3545 quite widely. So, for the moment, Emacs supports them in the bare
3546 C code. But, in the future, they may be supported only by CCL. */
3547
3548/* SJIS is a coding system encoding three character sets: ASCII, right
3549 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3550 as is. A character of charset katakana-jisx0201 is encoded by
3551 "position-code + 0x80". A character of charset japanese-jisx0208
3552 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3553 so that it fit in the range below.
4ed46869
KH
3554
3555 --- CODE RANGE of SJIS ---
3556 (character set) (range)
3557 ASCII 0x00 .. 0x7F
df7492f9 3558 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3559 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3560 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3561 -------------------------------
3562
3563*/
3564
3565/* BIG5 is a coding system encoding two character sets: ASCII and
3566 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3567 character set and is encoded in two-byte.
4ed46869
KH
3568
3569 --- CODE RANGE of BIG5 ---
3570 (character set) (range)
3571 ASCII 0x00 .. 0x7F
3572 Big5 (1st byte) 0xA1 .. 0xFE
3573 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3574 --------------------------
3575
df7492f9 3576 */
4ed46869
KH
3577
3578/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3579 Check if a text is encoded in SJIS. If it is, return
df7492f9 3580 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3581
0a28aafb 3582static int
df7492f9
KH
3583detect_coding_sjis (coding, mask)
3584 struct coding_system *coding;
3585 int *mask;
4ed46869 3586{
df7492f9
KH
3587 unsigned char *src = coding->source, *src_base = src;
3588 unsigned char *src_end = coding->source + coding->src_bytes;
3589 int multibytep = coding->src_multibyte;
3590 int consumed_chars = 0;
3591 int found = 0;
b73bfc1c 3592 int c;
df7492f9
KH
3593
3594 /* A coding system of this category is always ASCII compatible. */
3595 src += coding->head_ascii;
4ed46869 3596
b73bfc1c 3597 while (1)
4ed46869 3598 {
df7492f9 3599 ONE_MORE_BYTE (c);
682169fe
KH
3600 if (c < 0x80)
3601 continue;
df7492f9 3602 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3603 {
df7492f9 3604 ONE_MORE_BYTE (c);
682169fe 3605 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3606 break;
3607 found = 1;
4ed46869 3608 }
df7492f9
KH
3609 else if (c >= 0xA0 && c < 0xE0)
3610 found = 1;
3611 else
3612 break;
4ed46869 3613 }
df7492f9
KH
3614 *mask &= ~CATEGORY_MASK_SJIS;
3615 return 0;
3616
3617 no_more_source:
3618 if (!found)
3619 return 0;
3620 *mask &= CATEGORY_MASK_SJIS;
3621 return 1;
4ed46869
KH
3622}
3623
3624/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3625 Check if a text is encoded in BIG5. If it is, return
df7492f9 3626 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3627
0a28aafb 3628static int
df7492f9
KH
3629detect_coding_big5 (coding, mask)
3630 struct coding_system *coding;
3631 int *mask;
4ed46869 3632{
df7492f9
KH
3633 unsigned char *src = coding->source, *src_base = src;
3634 unsigned char *src_end = coding->source + coding->src_bytes;
3635 int multibytep = coding->src_multibyte;
3636 int consumed_chars = 0;
3637 int found = 0;
b73bfc1c 3638 int c;
fa42c37f 3639
df7492f9
KH
3640 /* A coding system of this category is always ASCII compatible. */
3641 src += coding->head_ascii;
fa42c37f 3642
b73bfc1c 3643 while (1)
fa42c37f 3644 {
df7492f9
KH
3645 ONE_MORE_BYTE (c);
3646 if (c < 0x80)
fa42c37f 3647 continue;
df7492f9 3648 if (c >= 0xA1)
fa42c37f 3649 {
df7492f9
KH
3650 ONE_MORE_BYTE (c);
3651 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3652 return 0;
df7492f9 3653 found = 1;
fa42c37f 3654 }
df7492f9
KH
3655 else
3656 break;
fa42c37f 3657 }
df7492f9 3658 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3659 return 0;
df7492f9
KH
3660
3661 no_more_source:
3662 if (!found)
3663 return 0;
3664 *mask &= CATEGORY_MASK_BIG5;
3665 return 1;
fa42c37f
KH
3666}
3667
4ed46869
KH
3668/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3669 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3670
b73bfc1c 3671static void
df7492f9 3672decode_coding_sjis (coding)
4ed46869 3673 struct coding_system *coding;
4ed46869 3674{
df7492f9
KH
3675 unsigned char *src = coding->source + coding->consumed;
3676 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3677 unsigned char *src_base;
df7492f9
KH
3678 int *charbuf = coding->charbuf;
3679 int *charbuf_end = charbuf + coding->charbuf_size;
3680 int consumed_chars = 0, consumed_chars_base;
3681 int multibytep = coding->src_multibyte;
3682 struct charset *charset_roman, *charset_kanji, *charset_kana;
3683 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3684
df7492f9
KH
3685 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3686
3687 val = charset_list;
3688 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3689 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3690 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3691
b73bfc1c 3692 while (1)
4ed46869 3693 {
df7492f9 3694 int c, c1;
b73bfc1c
KH
3695
3696 src_base = src;
df7492f9
KH
3697 consumed_chars_base = consumed_chars;
3698
3699 if (charbuf >= charbuf_end)
3700 break;
3701
3702 ONE_MORE_BYTE (c);
b73bfc1c 3703
df7492f9 3704 if (c == '\r')
4ed46869 3705 {
df7492f9 3706 if (EQ (eol_type, Qdos))
4ed46869 3707 {
df7492f9
KH
3708 if (src == src_end)
3709 goto no_more_source;
3710 if (*src == '\n')
3711 ONE_MORE_BYTE (c);
4ed46869 3712 }
df7492f9
KH
3713 else if (EQ (eol_type, Qmac))
3714 c = '\n';
4ed46869 3715 }
54f78171 3716 else
df7492f9
KH
3717 {
3718 struct charset *charset;
3719
3720 if (c < 0x80)
3721 charset = charset_roman;
3722 else
4ed46869 3723 {
df7492f9
KH
3724 if (c >= 0xF0)
3725 goto invalid_code;
3726 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3727 {
54f78171 3728 /* SJIS -> JISX0208 */
df7492f9
KH
3729 ONE_MORE_BYTE (c1);
3730 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3731 goto invalid_code;
3732 c = (c << 8) | c1;
3733 SJIS_TO_JIS (c);
3734 charset = charset_kanji;
5e34de15 3735 }
fb88bf2d 3736 else
b73bfc1c 3737 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3738 charset = charset_kana;
3739 }
3740 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3741 }
3742 *charbuf++ = c;
3743 continue;
3744
3745 invalid_code:
3746 src = src_base;
3747 consumed_chars = consumed_chars_base;
3748 ONE_MORE_BYTE (c);
3749 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3750 coding->errors++;
3751 }
3752
3753 no_more_source:
3754 coding->consumed_char += consumed_chars_base;
3755 coding->consumed = src_base - coding->source;
3756 coding->charbuf_used = charbuf - coding->charbuf;
3757}
3758
3759static void
3760decode_coding_big5 (coding)
3761 struct coding_system *coding;
3762{
3763 unsigned char *src = coding->source + coding->consumed;
3764 unsigned char *src_end = coding->source + coding->src_bytes;
3765 unsigned char *src_base;
3766 int *charbuf = coding->charbuf;
3767 int *charbuf_end = charbuf + coding->charbuf_size;
3768 int consumed_chars = 0, consumed_chars_base;
3769 int multibytep = coding->src_multibyte;
3770 struct charset *charset_roman, *charset_big5;
3771 Lisp_Object attrs, eol_type, charset_list, val;
3772
3773 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3774 val = charset_list;
3775 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3776 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3777
3778 while (1)
3779 {
3780 int c, c1;
3781
3782 src_base = src;
3783 consumed_chars_base = consumed_chars;
3784
3785 if (charbuf >= charbuf_end)
3786 break;
3787
3788 ONE_MORE_BYTE (c);
3789
3790 if (c == '\r')
3791 {
3792 if (EQ (eol_type, Qdos))
3793 {
3794 if (src == src_end)
3795 goto no_more_source;
3796 if (*src == '\n')
3797 ONE_MORE_BYTE (c);
4ed46869 3798 }
df7492f9
KH
3799 else if (EQ (eol_type, Qmac))
3800 c = '\n';
3801 }
3802 else
3803 {
3804 struct charset *charset;
3805 if (c < 0x80)
3806 charset = charset_roman;
fb88bf2d 3807 else
fb88bf2d 3808 {
54f78171 3809 /* BIG5 -> Big5 */
df7492f9
KH
3810 if (c < 0xA1 || c > 0xFE)
3811 goto invalid_code;
3812 ONE_MORE_BYTE (c1);
3813 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3814 goto invalid_code;
3815 c = c << 8 | c1;
3816 charset = charset_big5;
4ed46869 3817 }
df7492f9 3818 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3819 }
4ed46869 3820
df7492f9 3821 *charbuf++ = c;
fb88bf2d
KH
3822 continue;
3823
df7492f9 3824 invalid_code:
4ed46869 3825 src = src_base;
df7492f9
KH
3826 consumed_chars = consumed_chars_base;
3827 ONE_MORE_BYTE (c);
3828 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3829 coding->errors++;
fb88bf2d 3830 }
d46c5b12 3831
df7492f9
KH
3832 no_more_source:
3833 coding->consumed_char += consumed_chars_base;
3834 coding->consumed = src_base - coding->source;
3835 coding->charbuf_used = charbuf - coding->charbuf;
3836}
3837
3838/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3839 This function can encode charsets `ascii', `katakana-jisx0201',
3840 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3841 are sure that all these charsets are registered as official charset
3842 (i.e. do not have extended leading-codes). Characters of other
3843 charsets are produced without any encoding. If SJIS_P is 1, encode
3844 SJIS text, else encode BIG5 text. */
3845
3846static int
3847encode_coding_sjis (coding)
3848 struct coding_system *coding;
3849{
3850 int multibytep = coding->dst_multibyte;
3851 int *charbuf = coding->charbuf;
3852 int *charbuf_end = charbuf + coding->charbuf_used;
3853 unsigned char *dst = coding->destination + coding->produced;
3854 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3855 int safe_room = 4;
3856 int produced_chars = 0;
3857 Lisp_Object attrs, eol_type, charset_list, val;
3858 int ascii_compatible;
3859 struct charset *charset_roman, *charset_kanji, *charset_kana;
3860 int c;
3861
3862 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3863 val = charset_list;
3864 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3865 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3866 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3867
3868 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3869
3870 while (charbuf < charbuf_end)
3871 {
3872 ASSURE_DESTINATION (safe_room);
3873 c = *charbuf++;
3874 /* Now encode the character C. */
3875 if (ASCII_CHAR_P (c) && ascii_compatible)
3876 EMIT_ONE_ASCII_BYTE (c);
3877 else
3878 {
3879 unsigned code;
3880 struct charset *charset = char_charset (c, charset_list, &code);
3881
3882 if (!charset)
3883 {
41cbe562
KH
3884 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3885 {
3886 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3887 charset = CHARSET_FROM_ID (charset_ascii);
3888 }
3889 else
3890 {
3891 c = coding->default_char;
3892 charset = char_charset (c, charset_list, &code);
3893 }
df7492f9
KH
3894 }
3895 if (code == CHARSET_INVALID_CODE (charset))
3896 abort ();
3897 if (charset == charset_kanji)
3898 {
3899 int c1, c2;
3900 JIS_TO_SJIS (code);
3901 c1 = code >> 8, c2 = code & 0xFF;
3902 EMIT_TWO_BYTES (c1, c2);
3903 }
3904 else if (charset == charset_kana)
3905 EMIT_ONE_BYTE (code | 0x80);
3906 else
3907 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3908 }
3909 }
3910 coding->result = CODING_RESULT_SUCCESS;
3911 coding->produced_char += produced_chars;
3912 coding->produced = dst - coding->destination;
3913 return 0;
3914}
3915
3916static int
3917encode_coding_big5 (coding)
3918 struct coding_system *coding;
3919{
3920 int multibytep = coding->dst_multibyte;
3921 int *charbuf = coding->charbuf;
3922 int *charbuf_end = charbuf + coding->charbuf_used;
3923 unsigned char *dst = coding->destination + coding->produced;
3924 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3925 int safe_room = 4;
3926 int produced_chars = 0;
3927 Lisp_Object attrs, eol_type, charset_list, val;
3928 int ascii_compatible;
3929 struct charset *charset_roman, *charset_big5;
3930 int c;
3931
3932 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3933 val = charset_list;
3934 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3935 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3936 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3937
3938 while (charbuf < charbuf_end)
3939 {
3940 ASSURE_DESTINATION (safe_room);
3941 c = *charbuf++;
3942 /* Now encode the character C. */
3943 if (ASCII_CHAR_P (c) && ascii_compatible)
3944 EMIT_ONE_ASCII_BYTE (c);
3945 else
3946 {
3947 unsigned code;
3948 struct charset *charset = char_charset (c, charset_list, &code);
3949
3950 if (! charset)
3951 {
41cbe562
KH
3952 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3953 {
3954 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3955 charset = CHARSET_FROM_ID (charset_ascii);
3956 }
3957 else
3958 {
3959 c = coding->default_char;
3960 charset = char_charset (c, charset_list, &code);
3961 }
df7492f9
KH
3962 }
3963 if (code == CHARSET_INVALID_CODE (charset))
3964 abort ();
3965 if (charset == charset_big5)
3966 {
3967 int c1, c2;
3968
3969 c1 = code >> 8, c2 = code & 0xFF;
3970 EMIT_TWO_BYTES (c1, c2);
3971 }
3972 else
3973 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3974 }
3975 }
3976 coding->result = CODING_RESULT_SUCCESS;
3977 coding->produced_char += produced_chars;
3978 coding->produced = dst - coding->destination;
3979 return 0;
3980}
3981
3982\f
3983/*** 10. CCL handlers ***/
3984
3985/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3986 Check if a text is encoded in a coding system of which
3987 encoder/decoder are written in CCL program. If it is, return
3988 CATEGORY_MASK_CCL, else return 0. */
3989
3990static int
3991detect_coding_ccl (coding, mask)
3992 struct coding_system *coding;
3993 int *mask;
3994{
3995 unsigned char *src = coding->source, *src_base = src;
3996 unsigned char *src_end = coding->source + coding->src_bytes;
3997 int multibytep = coding->src_multibyte;
3998 int consumed_chars = 0;
3999 int found = 0;
4000 unsigned char *valids = CODING_CCL_VALIDS (coding);
4001 int head_ascii = coding->head_ascii;
4002 Lisp_Object attrs;
4003
4004 coding = &coding_categories[coding_category_ccl];
4005 attrs = CODING_ID_ATTRS (coding->id);
4006 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4007 src += head_ascii;
4008
4009 while (1)
4010 {
4011 int c;
4012 ONE_MORE_BYTE (c);
4013 if (! valids[c])
4014 break;
4015 if (!found && valids[c] > 1)
4016 found = 1;
4017 }
4018 *mask &= ~CATEGORY_MASK_CCL;
4019 return 0;
4020
4021 no_more_source:
4022 if (!found)
4023 return 0;
4024 *mask &= CATEGORY_MASK_CCL;
4025 return 1;
4026}
4027
4028static void
4029decode_coding_ccl (coding)
4030 struct coding_system *coding;
4031{
4032 unsigned char *src = coding->source + coding->consumed;
4033 unsigned char *src_end = coding->source + coding->src_bytes;
4034 int *charbuf = coding->charbuf;
4035 int *charbuf_end = charbuf + coding->charbuf_size;
4036 int consumed_chars = 0;
4037 int multibytep = coding->src_multibyte;
4038 struct ccl_program ccl;
4039 int source_charbuf[1024];
4040 int source_byteidx[1024];
4041
4042 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4043
4044 while (src < src_end)
4045 {
4046 unsigned char *p = src;
4047 int *source, *source_end;
4048 int i = 0;
4049
4050 if (multibytep)
4051 while (i < 1024 && p < src_end)
4052 {
4053 source_byteidx[i] = p - src;
4054 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4055 }
4056 else
4057 while (i < 1024 && p < src_end)
4058 source_charbuf[i++] = *p++;
4059
4060 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4061 ccl.last_block = 1;
4062
4063 source = source_charbuf;
4064 source_end = source + i;
4065 while (source < source_end)
4066 {
4067 ccl_driver (&ccl, source, charbuf,
4068 source_end - source, charbuf_end - charbuf);
4069 source += ccl.consumed;
4070 charbuf += ccl.produced;
4071 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4072 break;
4073 }
4074 if (source < source_end)
4075 src += source_byteidx[source - source_charbuf];
4076 else
4077 src = p;
4078 consumed_chars += source - source_charbuf;
4079
4080 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4081 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4082 break;
4083 }
4084
4085 switch (ccl.status)
4086 {
4087 case CCL_STAT_SUSPEND_BY_SRC:
4088 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4089 break;
4090 case CCL_STAT_SUSPEND_BY_DST:
4091 break;
4092 case CCL_STAT_QUIT:
4093 case CCL_STAT_INVALID_CMD:
4094 coding->result = CODING_RESULT_INTERRUPT;
4095 break;
4096 default:
4097 coding->result = CODING_RESULT_SUCCESS;
4098 break;
4099 }
4100 coding->consumed_char += consumed_chars;
4101 coding->consumed = src - coding->source;
4102 coding->charbuf_used = charbuf - coding->charbuf;
4103}
4104
4105static int
4106encode_coding_ccl (coding)
4107 struct coding_system *coding;
4108{
4109 struct ccl_program ccl;
4110 int multibytep = coding->dst_multibyte;
4111 int *charbuf = coding->charbuf;
4112 int *charbuf_end = charbuf + coding->charbuf_used;
4113 unsigned char *dst = coding->destination + coding->produced;
4114 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4115 unsigned char *adjusted_dst_end = dst_end - 1;
4116 int destination_charbuf[1024];
4117 int i, produced_chars = 0;
4118
4119 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4120
4121 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4122 ccl.dst_multibyte = coding->dst_multibyte;
4123
4124 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4125 {
4126 int dst_bytes = dst_end - dst;
4127 if (dst_bytes > 1024)
4128 dst_bytes = 1024;
4129
4130 ccl_driver (&ccl, charbuf, destination_charbuf,
4131 charbuf_end - charbuf, dst_bytes);
4132 charbuf += ccl.consumed;
4133 if (multibytep)
4134 for (i = 0; i < ccl.produced; i++)
4135 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4136 else
4137 {
4138 for (i = 0; i < ccl.produced; i++)
4139 *dst++ = destination_charbuf[i] & 0xFF;
4140 produced_chars += ccl.produced;
4141 }
4142 }
4143
4144 switch (ccl.status)
4145 {
4146 case CCL_STAT_SUSPEND_BY_SRC:
4147 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4148 break;
4149 case CCL_STAT_SUSPEND_BY_DST:
4150 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4151 break;
4152 case CCL_STAT_QUIT:
4153 case CCL_STAT_INVALID_CMD:
4154 coding->result = CODING_RESULT_INTERRUPT;
4155 break;
4156 default:
4157 coding->result = CODING_RESULT_SUCCESS;
4158 break;
4159 }
4160
4161 coding->produced_char += produced_chars;
4162 coding->produced = dst - coding->destination;
4163 return 0;
4ed46869
KH
4164}
4165
df7492f9
KH
4166
4167\f
4168/*** 10, 11. no-conversion handlers ***/
4169
4170/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4171
b73bfc1c 4172static void
df7492f9 4173decode_coding_raw_text (coding)
4ed46869 4174 struct coding_system *coding;
4ed46869 4175{
df7492f9 4176 coding->chars_at_source = 1;
2c78b7e1
KH
4177 coding->consumed_char = 0;
4178 coding->consumed = 0;
df7492f9
KH
4179 coding->result = CODING_RESULT_SUCCESS;
4180}
4ed46869 4181
df7492f9
KH
4182static int
4183encode_coding_raw_text (coding)
4184 struct coding_system *coding;
4185{
4186 int multibytep = coding->dst_multibyte;
4187 int *charbuf = coding->charbuf;
4188 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4189 unsigned char *dst = coding->destination + coding->produced;
4190 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4191 int produced_chars = 0;
4192 int c;
a5d301df 4193
df7492f9 4194 if (multibytep)
b73bfc1c 4195 {
df7492f9 4196 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4197
df7492f9
KH
4198 if (coding->src_multibyte)
4199 while (charbuf < charbuf_end)
4200 {
4201 ASSURE_DESTINATION (safe_room);
4202 c = *charbuf++;
4203 if (ASCII_CHAR_P (c))
4204 EMIT_ONE_ASCII_BYTE (c);
4205 else if (CHAR_BYTE8_P (c))
4206 {
4207 c = CHAR_TO_BYTE8 (c);
4208 EMIT_ONE_BYTE (c);
4209 }
4210 else
4211 {
4212 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4213
df7492f9
KH
4214 CHAR_STRING_ADVANCE (c, p1);
4215 while (p0 < p1)
4216 EMIT_ONE_BYTE (*p0);
4217 }
4218 }
b73bfc1c 4219 else
df7492f9
KH
4220 while (charbuf < charbuf_end)
4221 {
4222 ASSURE_DESTINATION (safe_room);
4223 c = *charbuf++;
4224 EMIT_ONE_BYTE (c);
4225 }
4226 }
4227 else
4228 {
4229 if (coding->src_multibyte)
b73bfc1c 4230 {
df7492f9
KH
4231 int safe_room = MAX_MULTIBYTE_LENGTH;
4232
4233 while (charbuf < charbuf_end)
b73bfc1c 4234 {
df7492f9
KH
4235 ASSURE_DESTINATION (safe_room);
4236 c = *charbuf++;
4237 if (ASCII_CHAR_P (c))
4238 *dst++ = c;
4239 else if (CHAR_BYTE8_P (c))
4240 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4241 else
df7492f9
KH
4242 CHAR_STRING_ADVANCE (c, dst);
4243 produced_chars++;
b73bfc1c 4244 }
4ed46869 4245 }
df7492f9
KH
4246 else
4247 {
4248 ASSURE_DESTINATION (charbuf_end - charbuf);
4249 while (charbuf < charbuf_end && dst < dst_end)
4250 *dst++ = *charbuf++;
4251 produced_chars = dst - (coding->destination + coding->dst_bytes);
4252 }
4ed46869 4253 }
df7492f9
KH
4254 coding->result = CODING_RESULT_SUCCESS;
4255 coding->produced_char += produced_chars;
4256 coding->produced = dst - coding->destination;
4257 return 0;
4ed46869
KH
4258}
4259
0a28aafb 4260static int
df7492f9
KH
4261detect_coding_charset (coding, mask)
4262 struct coding_system *coding;
4263 int *mask;
1397dc18 4264{
df7492f9
KH
4265 unsigned char *src = coding->source, *src_base = src;
4266 unsigned char *src_end = coding->source + coding->src_bytes;
4267 int multibytep = coding->src_multibyte;
4268 int consumed_chars = 0;
4269 Lisp_Object attrs, valids;
1397dc18 4270
df7492f9
KH
4271 coding = &coding_categories[coding_category_charset];
4272 attrs = CODING_ID_ATTRS (coding->id);
4273 valids = AREF (attrs, coding_attr_charset_valids);
4274
4275 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4276 src += coding->head_ascii;
1397dc18 4277
b73bfc1c 4278 while (1)
1397dc18 4279 {
df7492f9 4280 int c;
1397dc18 4281
df7492f9
KH
4282 ONE_MORE_BYTE (c);
4283 if (NILP (AREF (valids, c)))
4284 break;
4285 }
4286 *mask &= ~CATEGORY_MASK_CHARSET;
4287 return 0;
4ed46869 4288
df7492f9
KH
4289 no_more_source:
4290 *mask &= CATEGORY_MASK_CHARSET;
4291 return 1;
4292}
4ed46869 4293
b73bfc1c 4294static void
df7492f9 4295decode_coding_charset (coding)
4ed46869 4296 struct coding_system *coding;
4ed46869 4297{
df7492f9
KH
4298 unsigned char *src = coding->source + coding->consumed;
4299 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4300 unsigned char *src_base;
df7492f9
KH
4301 int *charbuf = coding->charbuf;
4302 int *charbuf_end = charbuf + coding->charbuf_size;
4303 int consumed_chars = 0, consumed_chars_base;
4304 int multibytep = coding->src_multibyte;
4eb6d3f1 4305 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4306
4307 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4308 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4309
df7492f9 4310 while (1)
4ed46869 4311 {
4eb6d3f1 4312 int c;
df7492f9
KH
4313
4314 src_base = src;
4315 consumed_chars_base = consumed_chars;
b73bfc1c 4316
df7492f9
KH
4317 if (charbuf >= charbuf_end)
4318 break;
4319
4eb6d3f1 4320 ONE_MORE_BYTE (c);
df7492f9 4321 if (c == '\r')
d46c5b12 4322 {
c7c66a95
KH
4323 /* Here we assume that no charset maps '\r' to something
4324 else. */
df7492f9 4325 if (EQ (eol_type, Qdos))
b73bfc1c 4326 {
4eb6d3f1
KH
4327 if (src < src_end
4328 && *src == '\n')
df7492f9 4329 ONE_MORE_BYTE (c);
b73bfc1c 4330 }
df7492f9 4331 else if (EQ (eol_type, Qmac))
b73bfc1c 4332 c = '\n';
d46c5b12 4333 }
df7492f9 4334 else
d46c5b12 4335 {
4eb6d3f1
KH
4336 Lisp_Object val;
4337 struct charset *charset;
c7c66a95 4338 int dim;
acb2a965
KH
4339 int len = 1;
4340 unsigned code = c;
4eb6d3f1
KH
4341
4342 val = AREF (valids, c);
4343 if (NILP (val))
4344 goto invalid_code;
c7c66a95 4345 if (INTEGERP (val))
4eb6d3f1 4346 {
c7c66a95
KH
4347 charset = CHARSET_FROM_ID (XFASTINT (val));
4348 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4349 while (len < dim)
4eb6d3f1 4350 {
acb2a965
KH
4351 ONE_MORE_BYTE (c);
4352 code = (code << 8) | c;
f9d71dcd 4353 len++;
4eb6d3f1 4354 }
c7c66a95
KH
4355 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4356 charset, code, c);
4357 }
4358 else
4359 {
4360 /* VAL is a list of charset IDs. It is assured that the
4361 list is sorted by charset dimensions (smaller one
4362 comes first). */
c7c66a95
KH
4363 while (CONSP (val))
4364 {
4365 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4366 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4367 while (len < dim)
c7c66a95 4368 {
acb2a965
KH
4369 ONE_MORE_BYTE (c);
4370 code = (code << 8) | c;
f9d71dcd 4371 len++;
c7c66a95 4372 }
c7c66a95
KH
4373 CODING_DECODE_CHAR (coding, src, src_base,
4374 src_end, charset, code, c);
4375 if (c >= 0)
4376 break;
4377 val = XCDR (val);
4378 }
4eb6d3f1 4379 }
df7492f9
KH
4380 if (c < 0)
4381 goto invalid_code;
d46c5b12 4382 }
df7492f9
KH
4383 *charbuf++ = c;
4384 continue;
4385
4386 invalid_code:
4387 src = src_base;
4388 consumed_chars = consumed_chars_base;
4389 ONE_MORE_BYTE (c);
4390 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4391 coding->errors++;
4ed46869
KH
4392 }
4393
df7492f9
KH
4394 no_more_source:
4395 coding->consumed_char += consumed_chars_base;
4396 coding->consumed = src_base - coding->source;
4397 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4398}
4399
df7492f9
KH
4400static int
4401encode_coding_charset (coding)
4ed46869 4402 struct coding_system *coding;
4ed46869 4403{
df7492f9
KH
4404 int multibytep = coding->dst_multibyte;
4405 int *charbuf = coding->charbuf;
4406 int *charbuf_end = charbuf + coding->charbuf_used;
4407 unsigned char *dst = coding->destination + coding->produced;
4408 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4409 int safe_room = MAX_MULTIBYTE_LENGTH;
4410 int produced_chars = 0;
df7492f9
KH
4411 Lisp_Object attrs, eol_type, charset_list;
4412 int ascii_compatible;
b73bfc1c 4413 int c;
b73bfc1c 4414
df7492f9 4415 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4416 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4417
df7492f9 4418 while (charbuf < charbuf_end)
4ed46869 4419 {
4eb6d3f1 4420 struct charset *charset;
df7492f9
KH
4421 unsigned code;
4422
4423 ASSURE_DESTINATION (safe_room);
4424 c = *charbuf++;
4425 if (ascii_compatible && ASCII_CHAR_P (c))
4426 EMIT_ONE_ASCII_BYTE (c);
d46c5b12 4427 else
4eb6d3f1
KH
4428 {
4429 charset = char_charset (c, charset_list, &code);
4430 if (charset)
4431 {
4432 if (CHARSET_DIMENSION (charset) == 1)
4433 EMIT_ONE_BYTE (code);
4434 else if (CHARSET_DIMENSION (charset) == 2)
4435 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4436 else if (CHARSET_DIMENSION (charset) == 3)
4437 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4438 else
4439 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4440 (code >> 8) & 0xFF, code & 0xFF);
4441 }
4442 else
41cbe562
KH
4443 {
4444 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4445 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4446 else
4447 c = coding->default_char;
4448 EMIT_ONE_BYTE (c);
4449 }
4eb6d3f1 4450 }
4ed46869
KH
4451 }
4452
df7492f9
KH
4453 coding->result = CODING_RESULT_SUCCESS;
4454 coding->produced_char += produced_chars;
4455 coding->produced = dst - coding->destination;
4456 return 0;
4ed46869
KH
4457}
4458
4459\f
1397dc18 4460/*** 7. C library functions ***/
4ed46869 4461
df7492f9 4462/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4ed46869 4463 has a property `coding-system'. The value of this property is a
df7492f9 4464 vector of length 5 (called as coding-vector). Among elements of
4ed46869
KH
4465 this vector, the first (element[0]) and the fifth (element[4])
4466 carry important information for decoding/encoding. Before
4467 decoding/encoding, this information should be set in fields of a
4468 structure of type `coding_system'.
4469
df7492f9 4470 A value of property `coding-system' can be a symbol of another
4ed46869
KH
4471 subsidiary coding-system. In that case, Emacs gets coding-vector
4472 from that symbol.
4473
4474 `element[0]' contains information to be set in `coding->type'. The
4475 value and its meaning is as follows:
4476
0ef69138
KH
4477 0 -- coding_type_emacs_mule
4478 1 -- coding_type_sjis
df7492f9 4479 2 -- coding_type_iso_2022
0ef69138
KH
4480 3 -- coding_type_big5
4481 4 -- coding_type_ccl encoder/decoder written in CCL
4482 nil -- coding_type_no_conversion
4483 t -- coding_type_undecided (automatic conversion on decoding,
4484 no-conversion on encoding)
4ed46869
KH
4485
4486 `element[4]' contains information to be set in `coding->flags' and
4487 `coding->spec'. The meaning varies by `coding->type'.
4488
df7492f9 4489 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4ed46869
KH
4490 of length 32 (of which the first 13 sub-elements are used now).
4491 Meanings of these sub-elements are:
4492
df7492f9
KH
4493 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4494 If the value is an integer of valid charset, the charset is
4495 assumed to be designated to graphic register N initially.
4ed46869 4496
df7492f9
KH
4497 If the value is minus, it is a minus value of charset which
4498 reserves graphic register N, which means that the charset is
4499 not designated initially but should be designated to graphic
4500 register N just before encoding a character in that charset.
1397dc18 4501
df7492f9
KH
4502 If the value is nil, graphic register N is never used on
4503 encoding.
4504
4505 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4506 Each value takes t or nil. See the section ISO2022 of
4507 `coding.h' for more information.
1397dc18 4508
df7492f9
KH
4509 If `coding->type' is `coding_type_big5', element[4] is t to denote
4510 BIG5-ETen or nil to denote BIG5-HKU.
4ed46869 4511
df7492f9 4512 If `coding->type' takes the other value, element[4] is ignored.
27901516 4513
df7492f9
KH
4514 Emacs Lisp's coding system also carries information about format of
4515 end-of-line in a value of property `eol-type'. If the value is
4516 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4517 it is not integer, it should be a vector of subsidiary coding
4518 systems of which property `eol-type' has one of above values.
4ed46869 4519
df7492f9 4520*/
4ed46869 4521
df7492f9
KH
4522/* Setup coding context CODING from information about CODING_SYSTEM.
4523 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4524 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4525
4526void
df7492f9
KH
4527setup_coding_system (coding_system, coding)
4528 Lisp_Object coding_system;
ec6d2bb8
KH
4529 struct coding_system *coding;
4530{
df7492f9
KH
4531 Lisp_Object attrs;
4532 Lisp_Object eol_type;
4533 Lisp_Object coding_type;
4534 Lisp_Object val;
ec6d2bb8 4535
df7492f9
KH
4536 if (NILP (coding_system))
4537 coding_system = Qno_conversion;
4538
4539 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4540
4541 attrs = CODING_ID_ATTRS (coding->id);
4542 eol_type = CODING_ID_EOL_TYPE (coding->id);
4543
4544 coding->mode = 0;
4545 coding->head_ascii = -1;
4546 coding->common_flags
4547 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4548
4549 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4550 coding->max_charset_id = XSTRING (val)->size - 1;
4551 coding->safe_charsets = (char *) XSTRING (val)->data;
4552 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4553
4554 coding_type = CODING_ATTR_TYPE (attrs);
4555 if (EQ (coding_type, Qundecided))
4556 {
4557 coding->detector = NULL;
4558 coding->decoder = decode_coding_raw_text;
4559 coding->encoder = encode_coding_raw_text;
4560 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4561 }
4562 else if (EQ (coding_type, Qiso_2022))
4563 {
4564 int i;
4565 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4566
4567 /* Invoke graphic register 0 to plane 0. */
4568 CODING_ISO_INVOCATION (coding, 0) = 0;
4569 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4570 CODING_ISO_INVOCATION (coding, 1)
4571 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4572 /* Setup the initial status of designation. */
4573 for (i = 0; i < 4; i++)
4574 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4575 /* Not single shifting initially. */
4576 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4577 /* Beginning of buffer should also be regarded as bol. */
4578 CODING_ISO_BOL (coding) = 1;
4579 coding->detector = detect_coding_iso_2022;
4580 coding->decoder = decode_coding_iso_2022;
4581 coding->encoder = encode_coding_iso_2022;
4582 if (flags & CODING_ISO_FLAG_SAFE)
4583 coding->mode |= CODING_MODE_SAFE_ENCODING;
4584 coding->common_flags
4585 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4586 | CODING_REQUIRE_FLUSHING_MASK);
4587 if (flags & CODING_ISO_FLAG_COMPOSITION)
4588 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4589 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4590 {
4591 setup_iso_safe_charsets (attrs);
4592 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4593 coding->max_charset_id = XSTRING (val)->size - 1;
4594 coding->safe_charsets = (char *) XSTRING (val)->data;
4595 }
4596 CODING_ISO_FLAGS (coding) = flags;
4597 }
4598 else if (EQ (coding_type, Qcharset))
4599 {
4600 coding->detector = detect_coding_charset;
4601 coding->decoder = decode_coding_charset;
4602 coding->encoder = encode_coding_charset;
4603 coding->common_flags
4604 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4605 }
4606 else if (EQ (coding_type, Qutf_8))
4607 {
4608 coding->detector = detect_coding_utf_8;
4609 coding->decoder = decode_coding_utf_8;
4610 coding->encoder = encode_coding_utf_8;
4611 coding->common_flags
4612 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4613 }
4614 else if (EQ (coding_type, Qutf_16))
4615 {
4616 val = AREF (attrs, coding_attr_utf_16_bom);
4617 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4618 : EQ (val, Qt) ? utf_16_with_bom
4619 : utf_16_without_bom);
4620 val = AREF (attrs, coding_attr_utf_16_endian);
4621 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4622 : utf_16_little_endian);
e19c3639 4623 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4624 coding->detector = detect_coding_utf_16;
4625 coding->decoder = decode_coding_utf_16;
4626 coding->encoder = encode_coding_utf_16;
4627 coding->common_flags
4628 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4629 }
4630 else if (EQ (coding_type, Qccl))
4631 {
4632 coding->detector = detect_coding_ccl;
4633 coding->decoder = decode_coding_ccl;
4634 coding->encoder = encode_coding_ccl;
4635 coding->common_flags
4636 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4637 | CODING_REQUIRE_FLUSHING_MASK);
4638 }
4639 else if (EQ (coding_type, Qemacs_mule))
4640 {
4641 coding->detector = detect_coding_emacs_mule;
4642 coding->decoder = decode_coding_emacs_mule;
4643 coding->encoder = encode_coding_emacs_mule;
4644 coding->common_flags
4645 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4646 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4647 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4648 {
4649 Lisp_Object tail, safe_charsets;
4650 int max_charset_id = 0;
4651
4652 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4653 tail = XCDR (tail))
4654 if (max_charset_id < XFASTINT (XCAR (tail)))
4655 max_charset_id = XFASTINT (XCAR (tail));
4656 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4657 make_number (255));
4658 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4659 tail = XCDR (tail))
4660 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4661 coding->max_charset_id = max_charset_id;
4662 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4663 }
4664 }
4665 else if (EQ (coding_type, Qshift_jis))
4666 {
4667 coding->detector = detect_coding_sjis;
4668 coding->decoder = decode_coding_sjis;
4669 coding->encoder = encode_coding_sjis;
4670 coding->common_flags
4671 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4672 }
4673 else if (EQ (coding_type, Qbig5))
4674 {
4675 coding->detector = detect_coding_big5;
4676 coding->decoder = decode_coding_big5;
4677 coding->encoder = encode_coding_big5;
4678 coding->common_flags
4679 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4680 }
4681 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4682 {
df7492f9
KH
4683 coding->detector = NULL;
4684 coding->decoder = decode_coding_raw_text;
4685 coding->encoder = encode_coding_raw_text;
4686 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4687 }
df7492f9
KH
4688
4689 return;
ec6d2bb8
KH
4690}
4691
df7492f9
KH
4692/* Return raw-text or one of its subsidiaries that has the same
4693 eol_type as CODING-SYSTEM. */
ec6d2bb8 4694
df7492f9
KH
4695Lisp_Object
4696raw_text_coding_system (coding_system)
4697 Lisp_Object coding_system;
ec6d2bb8 4698{
0be8721c 4699 Lisp_Object spec, attrs;
df7492f9
KH
4700 Lisp_Object eol_type, raw_text_eol_type;
4701
4702 spec = CODING_SYSTEM_SPEC (coding_system);
4703 attrs = AREF (spec, 0);
4704
4705 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4706 return coding_system;
ec6d2bb8 4707
df7492f9
KH
4708 eol_type = AREF (spec, 2);
4709 if (VECTORP (eol_type))
4710 return Qraw_text;
4711 spec = CODING_SYSTEM_SPEC (Qraw_text);
4712 raw_text_eol_type = AREF (spec, 2);
4713 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4714 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4715 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4716}
4717
54f78171 4718
df7492f9
KH
4719/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4720 does, return one of the subsidiary that has the same eol-spec as
4721 PARENT. Otherwise, return CODING_SYSTEM. */
4722
4723Lisp_Object
4724coding_inherit_eol_type (coding_system, parent)
b74e4686 4725 Lisp_Object coding_system, parent;
54f78171 4726{
df7492f9 4727 Lisp_Object spec, attrs, eol_type;
54f78171 4728
df7492f9
KH
4729 spec = CODING_SYSTEM_SPEC (coding_system);
4730 attrs = AREF (spec, 0);
4731 eol_type = AREF (spec, 2);
4732 if (VECTORP (eol_type))
4733 {
4734 Lisp_Object parent_spec;
df7492f9
KH
4735 Lisp_Object parent_eol_type;
4736
4737 parent_spec
4738 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4739 parent_eol_type = AREF (parent_spec, 2);
4740 if (EQ (parent_eol_type, Qunix))
4741 coding_system = AREF (eol_type, 0);
4742 else if (EQ (parent_eol_type, Qdos))
4743 coding_system = AREF (eol_type, 1);
4744 else if (EQ (parent_eol_type, Qmac))
4745 coding_system = AREF (eol_type, 2);
54f78171 4746 }
df7492f9 4747 return coding_system;
54f78171
KH
4748}
4749
4ed46869
KH
4750/* Emacs has a mechanism to automatically detect a coding system if it
4751 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4752 it's impossible to distinguish some coding systems accurately
4753 because they use the same range of codes. So, at first, coding
4754 systems are categorized into 7, those are:
4755
0ef69138 4756 o coding-category-emacs-mule
4ed46869
KH
4757
4758 The category for a coding system which has the same code range
4759 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4760 symbol) `emacs-mule' by default.
4ed46869
KH
4761
4762 o coding-category-sjis
4763
4764 The category for a coding system which has the same code range
4765 as SJIS. Assigned the coding-system (Lisp
7717c392 4766 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4767
4768 o coding-category-iso-7
4769
4770 The category for a coding system which has the same code range
7717c392 4771 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4772 shift and single shift functions. This can encode/decode all
4773 charsets. Assigned the coding-system (Lisp symbol)
4774 `iso-2022-7bit' by default.
4775
4776 o coding-category-iso-7-tight
4777
4778 Same as coding-category-iso-7 except that this can
4779 encode/decode only the specified charsets.
4ed46869
KH
4780
4781 o coding-category-iso-8-1
4782
4783 The category for a coding system which has the same code range
4784 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4785 for DIMENSION1 charset. This doesn't use any locking shift
4786 and single shift functions. Assigned the coding-system (Lisp
4787 symbol) `iso-latin-1' by default.
4ed46869
KH
4788
4789 o coding-category-iso-8-2
4790
4791 The category for a coding system which has the same code range
4792 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4793 for DIMENSION2 charset. This doesn't use any locking shift
4794 and single shift functions. Assigned the coding-system (Lisp
4795 symbol) `japanese-iso-8bit' by default.
4ed46869 4796
7717c392 4797 o coding-category-iso-7-else
4ed46869
KH
4798
4799 The category for a coding system which has the same code range
df7492f9 4800 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4801 single shift functions. Assigned the coding-system (Lisp
4802 symbol) `iso-2022-7bit-lock' by default.
4803
4804 o coding-category-iso-8-else
4805
4806 The category for a coding system which has the same code range
df7492f9 4807 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4808 single shift functions. Assigned the coding-system (Lisp
4809 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4810
4811 o coding-category-big5
4812
4813 The category for a coding system which has the same code range
4814 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4815 `cn-big5' by default.
4ed46869 4816
fa42c37f
KH
4817 o coding-category-utf-8
4818
4819 The category for a coding system which has the same code range
4820 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4821 symbol) `utf-8' by default.
4822
4823 o coding-category-utf-16-be
4824
4825 The category for a coding system in which a text has an
4826 Unicode signature (cf. Unicode Standard) in the order of BIG
4827 endian at the head. Assigned the coding-system (Lisp symbol)
4828 `utf-16-be' by default.
4829
4830 o coding-category-utf-16-le
4831
4832 The category for a coding system in which a text has an
4833 Unicode signature (cf. Unicode Standard) in the order of
4834 LITTLE endian at the head. Assigned the coding-system (Lisp
4835 symbol) `utf-16-le' by default.
4836
1397dc18
KH
4837 o coding-category-ccl
4838
4839 The category for a coding system of which encoder/decoder is
4840 written in CCL programs. The default value is nil, i.e., no
4841 coding system is assigned.
4842
4ed46869
KH
4843 o coding-category-binary
4844
4845 The category for a coding system not categorized in any of the
4846 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4847 `no-conversion' by default.
4ed46869
KH
4848
4849 Each of them is a Lisp symbol and the value is an actual
df7492f9 4850 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4851 What Emacs does actually is to detect a category of coding system.
4852 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4853 decide only one possible category, it selects a category of the
4ed46869
KH
4854 highest priority. Priorities of categories are also specified by a
4855 user in a Lisp variable `coding-category-list'.
4856
4857*/
4858
df7492f9
KH
4859#define EOL_SEEN_NONE 0
4860#define EOL_SEEN_LF 1
4861#define EOL_SEEN_CR 2
4862#define EOL_SEEN_CRLF 4
4ed46869 4863
df7492f9
KH
4864/* Detect how end-of-line of a text of length CODING->src_bytes
4865 pointed by CODING->source is encoded. Return one of
4866 EOL_SEEN_XXX. */
4ed46869 4867
bc4bc72a
RS
4868#define MAX_EOL_CHECK_COUNT 3
4869
d46c5b12 4870static int
df7492f9
KH
4871detect_eol (coding, source, src_bytes)
4872 struct coding_system *coding;
d46c5b12 4873 unsigned char *source;
df7492f9 4874 EMACS_INT src_bytes;
4ed46869 4875{
df7492f9 4876 Lisp_Object attrs, coding_type;
d46c5b12 4877 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4878 unsigned char c;
df7492f9
KH
4879 int total = 0;
4880 int eol_seen = EOL_SEEN_NONE;
4ed46869 4881
df7492f9
KH
4882 attrs = CODING_ID_ATTRS (coding->id);
4883 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4884
df7492f9 4885 if (EQ (coding_type, Qccl))
4ed46869 4886 {
df7492f9 4887 int msb, lsb;
fa42c37f 4888
df7492f9
KH
4889 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4890 lsb = 1 - msb;
fa42c37f 4891
df7492f9 4892 while (src + 1 < src_end)
fa42c37f 4893 {
df7492f9
KH
4894 c = src[lsb];
4895 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4896 {
df7492f9
KH
4897 int this_eol;
4898
4899 if (c == '\n')
4900 this_eol = EOL_SEEN_LF;
4901 else if (src + 3 >= src_end
4902 || src[msb + 2] != 0
4903 || src[lsb + 2] != '\n')
4904 this_eol = EOL_SEEN_CR;
fa42c37f 4905 else
df7492f9
KH
4906 this_eol = EOL_SEEN_CRLF;
4907
4908 if (eol_seen == EOL_SEEN_NONE)
4909 /* This is the first end-of-line. */
4910 eol_seen = this_eol;
4911 else if (eol_seen != this_eol)
fa42c37f 4912 {
df7492f9
KH
4913 /* The found type is different from what found before. */
4914 eol_seen = EOL_SEEN_LF;
4915 break;
fa42c37f 4916 }
df7492f9
KH
4917 if (++total == MAX_EOL_CHECK_COUNT)
4918 break;
fa42c37f 4919 }
df7492f9 4920 src += 2;
fa42c37f 4921 }
df7492f9 4922 }
d46c5b12 4923 else
27901516 4924 {
df7492f9 4925 while (src < src_end)
27901516 4926 {
df7492f9
KH
4927 c = *src++;
4928 if (c == '\n' || c == '\r')
4929 {
4930 int this_eol;
d46c5b12 4931
df7492f9
KH
4932 if (c == '\n')
4933 this_eol = EOL_SEEN_LF;
4934 else if (src >= src_end || *src != '\n')
4935 this_eol = EOL_SEEN_CR;
4936 else
4937 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4938
df7492f9
KH
4939 if (eol_seen == EOL_SEEN_NONE)
4940 /* This is the first end-of-line. */
4941 eol_seen = this_eol;
4942 else if (eol_seen != this_eol)
4943 {
4944 /* The found type is different from what found before. */
4945 eol_seen = EOL_SEEN_LF;
4946 break;
4947 }
4948 if (++total == MAX_EOL_CHECK_COUNT)
4949 break;
4950 }
4951 }
73be902c 4952 }
df7492f9 4953 return eol_seen;
73be902c
KH
4954}
4955
df7492f9 4956
73be902c 4957static void
df7492f9
KH
4958adjust_coding_eol_type (coding, eol_seen)
4959 struct coding_system *coding;
4960 int eol_seen;
73be902c 4961{
0be8721c 4962 Lisp_Object eol_type;
df7492f9
KH
4963
4964 eol_type = CODING_ID_EOL_TYPE (coding->id);
4965 if (eol_seen & EOL_SEEN_LF)
4966 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4967 else if (eol_type & EOL_SEEN_CRLF)
4968 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4969 else if (eol_type & EOL_SEEN_CR)
4970 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
4971}
4972
df7492f9
KH
4973/* Detect how a text specified in CODING is encoded. If a coding
4974 system is detected, update fields of CODING by the detected coding
4975 system. */
4976
4977void
4978detect_coding (coding)
d46c5b12 4979 struct coding_system *coding;
d46c5b12 4980{
df7492f9
KH
4981 unsigned char *src, *src_end;
4982 Lisp_Object attrs, coding_type;
d46c5b12 4983
df7492f9
KH
4984 coding->consumed = coding->consumed_char = 0;
4985 coding->produced = coding->produced_char = 0;
4986 coding_set_source (coding);
1c3478b0 4987
df7492f9 4988 src_end = coding->source + coding->src_bytes;
1c3478b0 4989
df7492f9
KH
4990 /* If we have not yet decided the text encoding type, detect it
4991 now. */
4992 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 4993 {
df7492f9
KH
4994 int mask = CATEGORY_MASK_ANY;
4995 int c, i;
4996
4997 for (src = coding->source; src < src_end; src++)
4998 {
4999 c = *src;
5000 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5001 || c == ISO_CODE_SI
5002 || c == ISO_CODE_SO)))
5003 break;
5004 }
5005 coding->head_ascii = src - (coding->source + coding->consumed);
5006
5007 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5008 {
df7492f9
KH
5009 int detected = 0;
5010
5011 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5012 {
df7492f9
KH
5013 enum coding_category category = coding_priorities[i];
5014 struct coding_system *this = coding_categories + category;
5015
5016 if (category >= coding_category_raw_text
5017 || detected & (1 << category))
5018 continue;
5019
5020 if (this->id < 0)
1c3478b0 5021 {
df7492f9
KH
5022 /* No coding system of this category is defined. */
5023 mask &= ~(1 << category);
5024 }
5025 else
5026 {
5027 detected |= detected_mask[category];
5028 if ((*(this->detector)) (coding, &mask))
5029 break;
1c3478b0
KH
5030 }
5031 }
df7492f9
KH
5032 if (! mask)
5033 setup_coding_system (Qraw_text, coding);
5034 else if (mask != CATEGORY_MASK_ANY)
5035 for (i = 0; i < coding_category_raw_text; i++)
5036 {
5037 enum coding_category category = coding_priorities[i];
5038 struct coding_system *this = coding_categories + category;
5039
5040 if (mask & (1 << category))
5041 {
5042 setup_coding_system (CODING_ID_NAME (this->id), coding);
5043 break;
5044 }
5045 }
1c3478b0 5046 }
b73bfc1c 5047 }
69f76525 5048
df7492f9
KH
5049 attrs = CODING_ID_ATTRS (coding->id);
5050 coding_type = CODING_ATTR_TYPE (attrs);
5051
5052 /* If we have not yet decided the EOL type, detect it now. But, the
5053 detection is impossible for a CCL based coding system, in which
5054 case, we detct the EOL type after decoding. */
5055 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5056 && ! EQ (coding_type, Qccl))
d46c5b12 5057 {
df7492f9
KH
5058 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5059
5060 if (eol_seen != EOL_SEEN_NONE)
5061 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5062 }
4ed46869
KH
5063}
5064
aaaf0b1e
KH
5065
5066static void
df7492f9 5067decode_eol (coding)
aaaf0b1e 5068 struct coding_system *coding;
aaaf0b1e 5069{
df7492f9 5070 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5071 {
df7492f9
KH
5072 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5073 unsigned char *pend = p + coding->produced;
5074 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5075
df7492f9 5076 for (; p < pend; p++)
aaaf0b1e 5077 {
df7492f9
KH
5078 if (*p == '\n')
5079 eol_seen |= EOL_SEEN_LF;
5080 else if (*p == '\r')
aaaf0b1e 5081 {
df7492f9 5082 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5083 {
df7492f9
KH
5084 eol_seen |= EOL_SEEN_CRLF;
5085 p++;
aaaf0b1e 5086 }
aaaf0b1e 5087 else
df7492f9 5088 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5089 }
aaaf0b1e 5090 }
df7492f9
KH
5091 if (eol_seen != EOL_SEEN_NONE)
5092 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5093 }
aaaf0b1e 5094
df7492f9
KH
5095 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5096 {
5097 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5098 unsigned char *pend = p + coding->produced;
5099
5100 for (; p < pend; p++)
5101 if (*p == '\r')
5102 *p = '\n';
5103 }
5104 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5105 {
5106 unsigned char *p, *pbeg, *pend;
5107 Lisp_Object undo_list;
5108
5109 move_gap_both (coding->dst_pos + coding->produced_char,
5110 coding->dst_pos_byte + coding->produced);
5111 undo_list = current_buffer->undo_list;
5112 current_buffer->undo_list = Qt;
5113 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5114 current_buffer->undo_list = undo_list;
5115 pbeg = GPT_ADDR;
5116 pend = pbeg + coding->produced;
5117
5118 for (p = pend - 1; p >= pbeg; p--)
5119 if (*p == '\r')
5120 {
5121 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5122 pend--;
5123 }
5124 coding->produced_char -= coding->produced - (pend - pbeg);
5125 coding->produced = pend - pbeg;
5126 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5127 }
5128}
5129
df7492f9
KH
5130static void
5131translate_chars (coding, table)
4ed46869 5132 struct coding_system *coding;
df7492f9 5133 Lisp_Object table;
4ed46869 5134{
df7492f9
KH
5135 int *charbuf = coding->charbuf;
5136 int *charbuf_end = charbuf + coding->charbuf_used;
5137 int c;
5138
5139 if (coding->chars_at_source)
5140 return;
4ed46869 5141
df7492f9 5142 while (charbuf < charbuf_end)
8844fa83 5143 {
df7492f9
KH
5144 c = *charbuf;
5145 if (c < 0)
5146 charbuf += c;
5147 else
5148 *charbuf++ = translate_char (table, c);
8844fa83 5149 }
df7492f9 5150}
4ed46869 5151
df7492f9
KH
5152static int
5153produce_chars (coding)
5154 struct coding_system *coding;
5155{
5156 unsigned char *dst = coding->destination + coding->produced;
5157 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5158 int produced;
5159 int produced_chars = 0;
b73bfc1c 5160
df7492f9 5161 if (! coding->chars_at_source)
4ed46869 5162 {
df7492f9
KH
5163 /* Characters are in coding->charbuf. */
5164 int *buf = coding->charbuf;
5165 int *buf_end = buf + coding->charbuf_used;
5166 unsigned char *adjusted_dst_end;
4ed46869 5167
df7492f9
KH
5168 if (BUFFERP (coding->src_object)
5169 && EQ (coding->src_object, coding->dst_object))
5170 dst_end = coding->source + coding->consumed;
5171 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5172
df7492f9
KH
5173 while (buf < buf_end)
5174 {
5175 int c = *buf++;
5176
5177 if (dst >= adjusted_dst_end)
5178 {
5179 dst = alloc_destination (coding,
5180 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5181 dst);
5182 dst_end = coding->destination + coding->dst_bytes;
5183 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5184 }
5185 if (c >= 0)
5186 {
5187 if (coding->dst_multibyte
5188 || ! CHAR_BYTE8_P (c))
5189 CHAR_STRING_ADVANCE (c, dst);
5190 else
5191 *dst++ = CHAR_TO_BYTE8 (c);
5192 produced_chars++;
5193 }
5194 else
5195 /* This is an annotation data. */
5196 buf -= c + 1;
5197 }
5198 }
5199 else
5200 {
df7492f9
KH
5201 unsigned char *src = coding->source;
5202 unsigned char *src_end = src + coding->src_bytes;
5203 Lisp_Object eol_type;
b73bfc1c 5204
df7492f9 5205 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5206
df7492f9 5207 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5208 {
df7492f9
KH
5209 if (coding->src_multibyte)
5210 {
71c81426 5211 int multibytep = 1;
df7492f9 5212 int consumed_chars;
d46c5b12 5213
df7492f9
KH
5214 while (1)
5215 {
5216 unsigned char *src_base = src;
5217 int c;
b73bfc1c 5218
df7492f9
KH
5219 ONE_MORE_BYTE (c);
5220 if (c == '\r')
5221 {
5222 if (EQ (eol_type, Qdos))
5223 {
5224 if (src < src_end
5225 && *src == '\n')
5226 c = *src++;
5227 }
5228 else if (EQ (eol_type, Qmac))
5229 c = '\n';
5230 }
5231 if (dst == dst_end)
5232 {
2c78b7e1 5233 coding->consumed = src - coding->source;
b73bfc1c 5234
2c78b7e1
KH
5235 if (EQ (coding->src_object, coding->dst_object))
5236 dst_end = src;
5237 if (dst == dst_end)
5238 {
5239 dst = alloc_destination (coding, src_end - src + 1,
5240 dst);
5241 dst_end = coding->destination + coding->dst_bytes;
5242 coding_set_source (coding);
5243 src = coding->source + coding->consumed;
5244 src_end = coding->source + coding->src_bytes;
5245 }
df7492f9
KH
5246 }
5247 *dst++ = c;
5248 produced_chars++;
5249 }
5250 no_more_source:
5251 ;
5252 }
5253 else
5254 while (src < src_end)
5255 {
71c81426 5256 int multibytep = 1;
df7492f9 5257 int c = *src++;
b73bfc1c 5258
df7492f9
KH
5259 if (c == '\r')
5260 {
5261 if (EQ (eol_type, Qdos))
5262 {
5263 if (src < src_end
5264 && *src == '\n')
5265 c = *src++;
5266 }
5267 else if (EQ (eol_type, Qmac))
5268 c = '\n';
5269 }
5270 if (dst >= dst_end - 1)
5271 {
2c78b7e1 5272 coding->consumed = src - coding->source;
df7492f9 5273
2c78b7e1
KH
5274 if (EQ (coding->src_object, coding->dst_object))
5275 dst_end = src;
5276 if (dst >= dst_end - 1)
5277 {
5278 dst = alloc_destination (coding, src_end - src + 2,
5279 dst);
5280 dst_end = coding->destination + coding->dst_bytes;
5281 coding_set_source (coding);
5282 src = coding->source + coding->consumed;
5283 src_end = coding->source + coding->src_bytes;
5284 }
df7492f9
KH
5285 }
5286 EMIT_ONE_BYTE (c);
5287 }
d46c5b12 5288 }
df7492f9
KH
5289 else
5290 {
5291 if (!EQ (coding->src_object, coding->dst_object))
5292 {
5293 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5294
df7492f9
KH
5295 if (require > 0)
5296 {
5297 EMACS_INT offset = src - coding->source;
5298
5299 dst = alloc_destination (coding, require, dst);
5300 coding_set_source (coding);
5301 src = coding->source + offset;
5302 src_end = coding->source + coding->src_bytes;
5303 }
5304 }
5305 produced_chars = coding->src_chars;
5306 while (src < src_end)
5307 {
5308 int c = *src++;
5309
5310 if (c == '\r')
5311 {
5312 if (EQ (eol_type, Qdos))
5313 {
5314 if (src < src_end
5315 && *src == '\n')
5316 c = *src++;
5317 produced_chars--;
5318 }
5319 else if (EQ (eol_type, Qmac))
5320 c = '\n';
5321 }
5322 *dst++ = c;
5323 }
5324 }
2c78b7e1
KH
5325 coding->consumed = coding->src_bytes;
5326 coding->consumed_char = coding->src_chars;
b73bfc1c 5327 }
4ed46869 5328
df7492f9
KH
5329 produced = dst - (coding->destination + coding->produced);
5330 if (BUFFERP (coding->dst_object))
5331 insert_from_gap (produced_chars, produced);
5332 coding->produced += produced;
5333 coding->produced_char += produced_chars;
5334 return produced_chars;
b73bfc1c 5335}
52d41803 5336
df7492f9
KH
5337/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5338 or
5339 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5340 */
4ed46869 5341
df7492f9
KH
5342static INLINE void
5343produce_composition (coding, charbuf)
4ed46869 5344 struct coding_system *coding;
df7492f9 5345 int *charbuf;
4ed46869 5346{
df7492f9
KH
5347 Lisp_Object buffer;
5348 int len;
5349 EMACS_INT pos;
5350 enum composition_method method;
5351 int cmp_len;
5352 Lisp_Object components;
5353
5354 buffer = coding->dst_object;
5355 len = -charbuf[0];
5356 pos = coding->dst_pos + charbuf[1];
5357 method = (enum composition_method) (charbuf[3]);
5358 cmp_len = charbuf[4];
5359
5360 if (method == COMPOSITION_RELATIVE)
5361 components = Qnil;
5362 else
d46c5b12 5363 {
df7492f9
KH
5364 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5365 int i;
4ed46869 5366
df7492f9
KH
5367 len -= 5;
5368 charbuf += 5;
5369 for (i = 0; i < len; i++)
5370 args[i] = make_number (charbuf[i]);
5371 components = (method == COMPOSITION_WITH_ALTCHARS
5372 ? Fstring (len, args) : Fvector (len, args));
5373 }
5374 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5375}
b73bfc1c 5376
df7492f9
KH
5377static int *
5378save_composition_data (buf, buf_end, prop)
5379 int *buf, *buf_end;
5380 Lisp_Object prop;
5381{
5382 enum composition_method method = COMPOSITION_METHOD (prop);
5383 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5384
df7492f9
KH
5385 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5386 return NULL;
d46c5b12 5387
df7492f9
KH
5388 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5389 buf[2] = method;
5390 buf[3] = cmp_len;
b73bfc1c 5391
df7492f9
KH
5392 if (method == COMPOSITION_RELATIVE)
5393 buf[0] = 4;
5394 else
b73bfc1c 5395 {
df7492f9
KH
5396 Lisp_Object components;
5397 int len, i;
b73bfc1c 5398
df7492f9
KH
5399 components = COMPOSITION_COMPONENTS (prop);
5400 if (VECTORP (components))
d46c5b12 5401 {
df7492f9
KH
5402 len = XVECTOR (components)->size;
5403 for (i = 0; i < len; i++)
5404 buf[4 + i] = XINT (AREF (components, i));
5405 }
5406 else if (STRINGP (components))
5407 {
5408 int i_byte;
b73bfc1c 5409
df7492f9
KH
5410 len = XSTRING (components)->size;
5411 i = i_byte = 0;
5412 while (i < len)
5413 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5414 }
5415 else if (INTEGERP (components))
5416 {
5417 len = 1;
5418 buf[4] = XINT (components);
5419 }
5420 else if (CONSP (components))
5421 {
5422 for (len = 0; CONSP (components);
5423 len++, components = XCDR (components))
5424 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5425 }
df7492f9
KH
5426 else
5427 abort ();
5428 buf[0] = 4 + len;
4ed46869 5429 }
df7492f9 5430 return (buf + buf[0]);
4ed46869
KH
5431}
5432
df7492f9
KH
5433#define CHARBUF_SIZE 0x4000
5434
5435#define ALLOC_CONVERSION_WORK_AREA(coding) \
5436 do { \
5437 int size = CHARBUF_SIZE;; \
5438 \
5439 coding->charbuf = NULL; \
5440 while (size > 1024) \
5441 { \
5442 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5443 if (coding->charbuf) \
5444 break; \
5445 size >>= 1; \
5446 } \
5447 if (! coding->charbuf) \
5448 { \
5449 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5450 return coding->result; \
5451 } \
5452 coding->charbuf_size = size; \
5453 } while (0)
4ed46869 5454
d46c5b12
KH
5455
5456static void
df7492f9 5457produce_annotation (coding)
d46c5b12 5458 struct coding_system *coding;
d46c5b12 5459{
df7492f9
KH
5460 int *charbuf = coding->charbuf;
5461 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5462
df7492f9 5463 while (charbuf < charbuf_end)
d46c5b12 5464 {
df7492f9
KH
5465 if (*charbuf >= 0)
5466 charbuf++;
d46c5b12 5467 else
d46c5b12 5468 {
df7492f9
KH
5469 int len = -*charbuf;
5470 switch (charbuf[2])
5471 {
5472 case CODING_ANNOTATE_COMPOSITION_MASK:
5473 produce_composition (coding, charbuf);
5474 break;
5475 default:
5476 abort ();
5477 }
5478 charbuf += len;
d46c5b12 5479 }
df7492f9
KH
5480 }
5481}
d46c5b12 5482
df7492f9
KH
5483/* Decode the data at CODING->src_object into CODING->dst_object.
5484 CODING->src_object is a buffer, a string, or nil.
5485 CODING->dst_object is a buffer.
de79a6a5 5486
df7492f9
KH
5487 If CODING->src_object is a buffer, it must be the current buffer.
5488 In this case, if CODING->src_pos is positive, it is a position of
5489 the source text in the buffer, otherwise, the source text is in the
5490 gap area of the buffer, and CODING->src_pos specifies the offset of
5491 the text from GPT (which must be the same as PT). If this is the
5492 same buffer as CODING->dst_object, CODING->src_pos must be
5493 negative.
b73bfc1c 5494
df7492f9
KH
5495 If CODING->src_object is a string, CODING->src_pos in an index to
5496 that string.
d46c5b12 5497
df7492f9
KH
5498 If CODING->src_object is nil, CODING->source must already point to
5499 the non-relocatable memory area. In this case, CODING->src_pos is
5500 an offset from CODING->source.
d46c5b12 5501
df7492f9
KH
5502 The decoded data is inserted at the current point of the buffer
5503 CODING->dst_object.
5504*/
5505
5506static int
5507decode_coding (coding)
d46c5b12 5508 struct coding_system *coding;
d46c5b12 5509{
df7492f9 5510 Lisp_Object attrs;
d46c5b12 5511
df7492f9
KH
5512 if (BUFFERP (coding->src_object)
5513 && coding->src_pos > 0
5514 && coding->src_pos < GPT
5515 && coding->src_pos + coding->src_chars > GPT)
5516 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5517
df7492f9 5518 if (BUFFERP (coding->dst_object))
88993dfd 5519 {
df7492f9
KH
5520 if (current_buffer != XBUFFER (coding->dst_object))
5521 set_buffer_internal (XBUFFER (coding->dst_object));
5522 if (GPT != PT)
5523 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5524 }
5525
df7492f9
KH
5526 coding->consumed = coding->consumed_char = 0;
5527 coding->produced = coding->produced_char = 0;
5528 coding->chars_at_source = 0;
5529 coding->result = CODING_RESULT_SUCCESS;
5530 coding->errors = 0;
5531
5532 ALLOC_CONVERSION_WORK_AREA (coding);
5533
5534 attrs = CODING_ID_ATTRS (coding->id);
5535
5536 do
d46c5b12 5537 {
df7492f9
KH
5538 coding_set_source (coding);
5539 coding->annotated = 0;
5540 (*(coding->decoder)) (coding);
5541 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5542 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5543 coding_set_destination (coding);
5544 produce_chars (coding);
5545 if (coding->annotated)
5546 produce_annotation (coding);
d46c5b12 5547 }
df7492f9
KH
5548 while (coding->consumed < coding->src_bytes
5549 && ! coding->result);
d46c5b12 5550
df7492f9
KH
5551 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5552 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5553 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5554 decode_eol (coding);
d46c5b12 5555
df7492f9
KH
5556 coding->carryover_bytes = 0;
5557 if (coding->consumed < coding->src_bytes)
d46c5b12 5558 {
df7492f9
KH
5559 int nbytes = coding->src_bytes - coding->consumed;
5560 unsigned char *src;
5561
5562 coding_set_source (coding);
5563 coding_set_destination (coding);
5564 src = coding->source + coding->consumed;
5565
5566 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5567 {
df7492f9
KH
5568 /* Flush out unprocessed data as binary chars. We are sure
5569 that the number of data is less than the size of
5570 coding->charbuf. */
5571 int *charbuf = coding->charbuf;
5572
5573 while (nbytes-- > 0)
d46c5b12 5574 {
df7492f9
KH
5575 int c = *src++;
5576 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5577 }
df7492f9 5578 produce_chars (coding);
d46c5b12 5579 }
d46c5b12 5580 else
df7492f9
KH
5581 {
5582 /* Record unprocessed bytes in coding->carryover. We are
5583 sure that the number of data is less than the size of
5584 coding->carryover. */
5585 unsigned char *p = coding->carryover;
5586
5587 coding->carryover_bytes = nbytes;
5588 while (nbytes-- > 0)
5589 *p++ = *src++;
5590 }
5591 coding->consumed = coding->src_bytes;
5592 }
b73bfc1c 5593
df7492f9 5594 return coding->result;
d46c5b12
KH
5595}
5596
df7492f9
KH
5597static void
5598consume_chars (coding)
5599 struct coding_system *coding;
5600{
5601 int *buf = coding->charbuf;
5602 /* -1 is to compensate for CRLF. */
5603 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5604 unsigned char *src = coding->source + coding->consumed;
5605 int pos = coding->src_pos + coding->consumed_char;
5606 int end_pos = coding->src_pos + coding->src_chars;
5607 int multibytep = coding->src_multibyte;
5608 Lisp_Object eol_type;
5609 int c;
5610 int start, end, stop;
5611 Lisp_Object object, prop;
88993dfd 5612
df7492f9
KH
5613 eol_type = CODING_ID_EOL_TYPE (coding->id);
5614 if (VECTORP (eol_type))
5615 eol_type = Qunix;
88993dfd 5616
df7492f9 5617 object = coding->src_object;
b843d1ae 5618
df7492f9
KH
5619 /* Note: composition handling is not yet implemented. */
5620 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5621
df7492f9
KH
5622 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5623 && find_composition (pos, end_pos, &start, &end, &prop, object)
5624 && end <= end_pos
5625 && (start >= pos
5626 || (find_composition (end, end_pos, &start, &end, &prop, object)
5627 && end <= end_pos)))
5628 stop = start;
5629 else
5630 stop = end_pos;
ec6d2bb8 5631
df7492f9 5632 while (buf < buf_end)
ec6d2bb8 5633 {
df7492f9 5634 if (pos == stop)
ec6d2bb8 5635 {
df7492f9 5636 int *p;
ec6d2bb8 5637
df7492f9
KH
5638 if (pos == end_pos)
5639 break;
5640 p = save_composition_data (buf, buf_end, prop);
5641 if (p == NULL)
5642 break;
5643 buf = p;
5644 if (find_composition (end, end_pos, &start, &end, &prop, object)
5645 && end <= end_pos)
5646 stop = start;
5647 else
5648 stop = end_pos;
5649 }
5650
5651 if (! multibytep)
5652 c = *src++;
5653 else
5654 c = STRING_CHAR_ADVANCE (src);
5655 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5656 c = '\n';
5657 if (! EQ (eol_type, Qunix))
5658 {
5659 if (c == '\n')
5660 {
5661 if (EQ (eol_type, Qdos))
5662 *buf++ = '\r';
5663 else
5664 c = '\r';
ec6d2bb8 5665 }
ec6d2bb8 5666 }
df7492f9
KH
5667 *buf++ = c;
5668 pos++;
ec6d2bb8 5669 }
ec6d2bb8 5670
df7492f9
KH
5671 coding->consumed = src - coding->source;
5672 coding->consumed_char = pos - coding->src_pos;
5673 coding->charbuf_used = buf - coding->charbuf;
5674 coding->chars_at_source = 0;
ec6d2bb8
KH
5675}
5676
ec6d2bb8 5677
df7492f9
KH
5678/* Encode the text at CODING->src_object into CODING->dst_object.
5679 CODING->src_object is a buffer or a string.
5680 CODING->dst_object is a buffer or nil.
5681
5682 If CODING->src_object is a buffer, it must be the current buffer.
5683 In this case, if CODING->src_pos is positive, it is a position of
5684 the source text in the buffer, otherwise. the source text is in the
5685 gap area of the buffer, and coding->src_pos specifies the offset of
5686 the text from GPT (which must be the same as PT). If this is the
5687 same buffer as CODING->dst_object, CODING->src_pos must be
5688 negative and CODING should not have `pre-write-conversion'.
5689
5690 If CODING->src_object is a string, CODING should not have
5691 `pre-write-conversion'.
5692
5693 If CODING->dst_object is a buffer, the encoded data is inserted at
5694 the current point of that buffer.
5695
5696 If CODING->dst_object is nil, the encoded data is placed at the
5697 memory area specified by CODING->destination. */
5698
5699static int
5700encode_coding (coding)
ec6d2bb8 5701 struct coding_system *coding;
ec6d2bb8 5702{
df7492f9 5703 Lisp_Object attrs;
ec6d2bb8 5704
df7492f9 5705 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5706
df7492f9 5707 if (BUFFERP (coding->dst_object))
ec6d2bb8 5708 {
df7492f9
KH
5709 set_buffer_internal (XBUFFER (coding->dst_object));
5710 coding->dst_multibyte
5711 = ! NILP (current_buffer->enable_multibyte_characters);
5712 }
ec6d2bb8 5713
df7492f9
KH
5714 coding->consumed = coding->consumed_char = 0;
5715 coding->produced = coding->produced_char = 0;
5716 coding->result = CODING_RESULT_SUCCESS;
5717 coding->errors = 0;
ec6d2bb8 5718
df7492f9 5719 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5720
df7492f9
KH
5721 do {
5722 coding_set_source (coding);
5723 consume_chars (coding);
5724
5725 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5726 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5727
5728 coding_set_destination (coding);
5729 (*(coding->encoder)) (coding);
5730 } while (coding->consumed_char < coding->src_chars);
5731
5732 if (BUFFERP (coding->dst_object))
5733 insert_from_gap (coding->produced_char, coding->produced);
5734
5735 return (coding->result);
ec6d2bb8
KH
5736}
5737
df7492f9 5738/* Work buffer */
fb88bf2d 5739
df7492f9
KH
5740/* List of currently used working buffer. */
5741Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5742
df7492f9
KH
5743/* A working buffer used by the top level conversion. */
5744Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5745
4ed46869 5746
df7492f9
KH
5747/* Return a working buffer that can be freely used by the following
5748 code conversion. MULTIBYTEP specifies the multibyteness of the
5749 buffer. */
b73bfc1c 5750
df7492f9
KH
5751Lisp_Object
5752make_conversion_work_buffer (multibytep)
5753 int multibytep;
5754{
5755 struct buffer *current = current_buffer;
5756 Lisp_Object buf;
d46c5b12 5757
df7492f9 5758 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5759 {
df7492f9
KH
5760 if (NILP (Vcode_conversion_reused_work_buf))
5761 Vcode_conversion_reused_work_buf
5762 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5763 Vcode_conversion_work_buf_list
5764 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5765 }
df7492f9 5766 else
d46c5b12 5767 {
df7492f9
KH
5768 int depth = Flength (Vcode_conversion_work_buf_list);
5769 char str[128];
e077cc80 5770
df7492f9
KH
5771 sprintf (str, " *code-conversion-work*<%d>", depth);
5772 Vcode_conversion_work_buf_list
5773 = Fcons (Fget_buffer_create (build_string (str)),
5774 Vcode_conversion_work_buf_list);
d46c5b12 5775 }
d46c5b12 5776
df7492f9
KH
5777 buf = XCAR (Vcode_conversion_work_buf_list);
5778 set_buffer_internal (XBUFFER (buf));
5779 current_buffer->undo_list = Qt;
5780 Ferase_buffer ();
5781 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5782 set_buffer_internal (current);
5783 return buf;
5784}
d46c5b12 5785
df7492f9 5786static struct coding_system *saved_coding;
d46c5b12 5787
df7492f9
KH
5788Lisp_Object
5789code_conversion_restore (info)
5790 Lisp_Object info;
5791{
5792 int depth = Flength (Vcode_conversion_work_buf_list);
5793 Lisp_Object buf;
d46c5b12 5794
df7492f9 5795 if (depth > 0)
d46c5b12 5796 {
df7492f9
KH
5797 buf = XCAR (Vcode_conversion_work_buf_list);
5798 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5799 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5800 Fkill_buffer (buf);
5801 }
d46c5b12 5802
df7492f9
KH
5803 if (saved_coding->dst_object == Qt
5804 && saved_coding->destination)
5805 xfree (saved_coding->destination);
b843d1ae 5806
df7492f9
KH
5807 return save_excursion_restore (info);
5808}
d46c5b12 5809
12410ef1 5810
df7492f9
KH
5811int
5812decode_coding_gap (coding, chars, bytes)
5813 struct coding_system *coding;
5814 EMACS_INT chars, bytes;
5815{
5816 int count = specpdl_ptr - specpdl;
fb88bf2d 5817
df7492f9
KH
5818 saved_coding = coding;
5819 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5820
df7492f9
KH
5821 coding->src_object = Fcurrent_buffer ();
5822 coding->src_chars = chars;
5823 coding->src_bytes = bytes;
5824 coding->src_pos = -chars;
5825 coding->src_pos_byte = -bytes;
5826 coding->src_multibyte = chars < bytes;
5827 coding->dst_object = coding->src_object;
5828 coding->dst_pos = PT;
5829 coding->dst_pos_byte = PT_BYTE;
71c81426 5830 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5831
df7492f9
KH
5832 if (CODING_REQUIRE_DETECTION (coding))
5833 detect_coding (coding);
5834
5835 decode_coding (coding);
d46c5b12 5836
df7492f9
KH
5837 unbind_to (count, Qnil);
5838 return coding->result;
5839}
d46c5b12 5840
df7492f9
KH
5841int
5842encode_coding_gap (coding, chars, bytes)
5843 struct coding_system *coding;
5844 EMACS_INT chars, bytes;
5845{
5846 int count = specpdl_ptr - specpdl;
5847 Lisp_Object buffer;
d46c5b12 5848
df7492f9
KH
5849 saved_coding = coding;
5850 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5851
df7492f9
KH
5852 buffer = Fcurrent_buffer ();
5853 coding->src_object = buffer;
5854 coding->src_chars = chars;
5855 coding->src_bytes = bytes;
5856 coding->src_pos = -chars;
5857 coding->src_pos_byte = -bytes;
5858 coding->src_multibyte = chars < bytes;
5859 coding->dst_object = coding->src_object;
5860 coding->dst_pos = PT;
5861 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5862
df7492f9 5863 encode_coding (coding);
f2558efd 5864
df7492f9
KH
5865 unbind_to (count, Qnil);
5866 return coding->result;
5867}
b73bfc1c 5868
d46c5b12 5869
df7492f9
KH
5870/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5871 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5872
df7492f9 5873 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5874
df7492f9
KH
5875 If it is a buffer, the text is at point of the buffer. FROM and TO
5876 are positions in the buffer.
ec6d2bb8 5877
df7492f9
KH
5878 If it is a string, the text is at the beginning of the string.
5879 FROM and TO are indices to the string.
ec6d2bb8 5880
df7492f9
KH
5881 If it is nil, the text is at coding->source. FROM and TO are
5882 indices to coding->source.
ec6d2bb8 5883
df7492f9 5884 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5885
df7492f9
KH
5886 If it is a buffer, the decoded text is inserted at point of the
5887 buffer. If the buffer is the same as SRC_OBJECT, the source text
5888 is deleted.
d46c5b12 5889
df7492f9
KH
5890 If it is Qt, a string is made from the decoded text, and
5891 set in CODING->dst_object.
d46c5b12 5892
df7492f9
KH
5893 If it is Qnil, the decoded text is stored at CODING->destination.
5894 The called must allocate CODING->dst_bytes bytes at
5895 CODING->destination by xmalloc. If the decoded text is longer than
5896 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5897 */
d46c5b12 5898
df7492f9
KH
5899void
5900decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5901 dst_object)
5902 struct coding_system *coding;
5903 Lisp_Object src_object;
5904 EMACS_INT from, from_byte, to, to_byte;
5905 Lisp_Object dst_object;
5906{
5907 int count = specpdl_ptr - specpdl;
5908 unsigned char *destination;
5909 EMACS_INT dst_bytes;
5910 EMACS_INT chars = to - from;
5911 EMACS_INT bytes = to_byte - from_byte;
5912 Lisp_Object attrs;
d46c5b12 5913
df7492f9
KH
5914 saved_coding = coding;
5915 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5916
df7492f9
KH
5917 if (NILP (dst_object))
5918 {
5919 destination = coding->destination;
5920 dst_bytes = coding->dst_bytes;
5921 }
93dec019 5922
df7492f9
KH
5923 coding->src_object = src_object;
5924 coding->src_chars = chars;
5925 coding->src_bytes = bytes;
5926 coding->src_multibyte = chars < bytes;
70ad9fc4 5927
df7492f9
KH
5928 if (STRINGP (src_object))
5929 {
5930 coding->src_pos = from;
5931 coding->src_pos_byte = from_byte;
5932 }
5933 else if (BUFFERP (src_object))
5934 {
5935 set_buffer_internal (XBUFFER (src_object));
5936 if (from != GPT)
5937 move_gap_both (from, from_byte);
5938 if (EQ (src_object, dst_object))
fb88bf2d 5939 {
df7492f9
KH
5940 TEMP_SET_PT_BOTH (from, from_byte);
5941 del_range_both (from, from_byte, to, to_byte, 1);
5942 coding->src_pos = -chars;
5943 coding->src_pos_byte = -bytes;
fb88bf2d 5944 }
df7492f9 5945 else
fb88bf2d 5946 {
df7492f9
KH
5947 coding->src_pos = from;
5948 coding->src_pos_byte = from_byte;
fb88bf2d 5949 }
d46c5b12 5950 }
fb88bf2d 5951
df7492f9
KH
5952 if (CODING_REQUIRE_DETECTION (coding))
5953 detect_coding (coding);
5954 attrs = CODING_ID_ATTRS (coding->id);
5955
5956 if (! NILP (CODING_ATTR_POST_READ (attrs))
5957 || EQ (dst_object, Qt))
b73bfc1c 5958 {
df7492f9
KH
5959 coding->dst_object = make_conversion_work_buffer (1);
5960 coding->dst_pos = BEG;
5961 coding->dst_pos_byte = BEG_BYTE;
5962 coding->dst_multibyte = 1;
b73bfc1c 5963 }
df7492f9 5964 else if (BUFFERP (dst_object))
12410ef1 5965 {
df7492f9
KH
5966 coding->dst_object = dst_object;
5967 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5968 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5969 coding->dst_multibyte
5970 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 5971 }
72d1a715 5972 else
df7492f9
KH
5973 {
5974 coding->dst_object = Qnil;
5975 coding->dst_multibyte = 1;
5976 }
5977
5978 decode_coding (coding);
4ed46869 5979
df7492f9
KH
5980 if (BUFFERP (coding->dst_object))
5981 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 5982
df7492f9 5983 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 5984 {
df7492f9
KH
5985 struct gcpro gcpro1, gcpro2;
5986 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 5987 Lisp_Object val;
4ed46869 5988
c0cc7f7f 5989 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
5990 GCPRO2 (coding->src_object, coding->dst_object);
5991 val = call1 (CODING_ATTR_POST_READ (attrs),
5992 make_number (coding->produced_char));
5993 UNGCPRO;
5994 CHECK_NATNUM (val);
5995 coding->produced_char += Z - prev_Z;
5996 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 5997 }
4ed46869 5998
df7492f9 5999 if (EQ (dst_object, Qt))
ec6d2bb8 6000 {
df7492f9
KH
6001 coding->dst_object = Fbuffer_string ();
6002 }
6003 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6004 {
6005 set_buffer_internal (XBUFFER (coding->dst_object));
6006 if (dst_bytes < coding->produced)
6007 {
6008 destination
6009 = (unsigned char *) xrealloc (destination, coding->produced);
6010 if (! destination)
6011 {
6012 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6013 unbind_to (count, Qnil);
6014 return;
6015 }
6016 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6017 move_gap_both (BEGV, BEGV_BYTE);
6018 bcopy (BEGV_ADDR, destination, coding->produced);
6019 coding->destination = destination;
6020 }
ec6d2bb8 6021 }
2b4f9037 6022
df7492f9 6023 unbind_to (count, Qnil);
d46c5b12
KH
6024}
6025
df7492f9
KH
6026
6027void
6028encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6029 dst_object)
b73bfc1c 6030 struct coding_system *coding;
df7492f9
KH
6031 Lisp_Object src_object;
6032 EMACS_INT from, from_byte, to, to_byte;
6033 Lisp_Object dst_object;
b73bfc1c
KH
6034{
6035 int count = specpdl_ptr - specpdl;
df7492f9
KH
6036 EMACS_INT chars = to - from;
6037 EMACS_INT bytes = to_byte - from_byte;
6038 Lisp_Object attrs;
6039
6040 saved_coding = coding;
6041 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6042
6043 coding->src_object = src_object;
6044 coding->src_chars = chars;
6045 coding->src_bytes = bytes;
6046 coding->src_multibyte = chars < bytes;
6047
6048 attrs = CODING_ID_ATTRS (coding->id);
6049
6050 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6051 {
df7492f9
KH
6052 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6053 set_buffer_internal (XBUFFER (coding->src_object));
6054 if (STRINGP (src_object))
6055 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6056 else if (BUFFERP (src_object))
6057 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6058 else
6059 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6060
6061 if (EQ (src_object, dst_object))
6062 {
6063 set_buffer_internal (XBUFFER (src_object));
6064 del_range_both (from, from_byte, to, to_byte, 1);
6065 set_buffer_internal (XBUFFER (coding->src_object));
6066 }
6067
ac87bbef
KH
6068 call2 (CODING_ATTR_PRE_WRITE (attrs),
6069 make_number (BEG), make_number (Z));
6070 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6071 if (BEG != GPT)
6072 move_gap_both (BEG, BEG_BYTE);
6073 coding->src_chars = Z - BEG;
6074 coding->src_bytes = Z_BYTE - BEG_BYTE;
6075 coding->src_pos = BEG;
6076 coding->src_pos_byte = BEG_BYTE;
6077 coding->src_multibyte = Z < Z_BYTE;
6078 }
6079 else if (STRINGP (src_object))
6080 {
6081 coding->src_pos = from;
6082 coding->src_pos_byte = from_byte;
6083 }
6084 else if (BUFFERP (src_object))
d46c5b12 6085 {
df7492f9
KH
6086 set_buffer_internal (XBUFFER (src_object));
6087 if (from != GPT)
6088 move_gap_both (from, from_byte);
6089 if (EQ (src_object, dst_object))
d46c5b12 6090 {
df7492f9
KH
6091 del_range_both (from, from_byte, to, to_byte, 1);
6092 coding->src_pos = -chars;
6093 coding->src_pos_byte = -bytes;
d46c5b12 6094 }
df7492f9 6095 else
d46c5b12 6096 {
df7492f9
KH
6097 coding->src_pos = from;
6098 coding->src_pos_byte = from_byte;
d46c5b12
KH
6099 }
6100 }
4ed46869 6101
df7492f9 6102 if (BUFFERP (dst_object))
d46c5b12 6103 {
df7492f9
KH
6104 coding->dst_object = dst_object;
6105 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6106 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6107 coding->dst_multibyte
6108 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6109 }
df7492f9 6110 else if (EQ (dst_object, Qt))
4956c225 6111 {
df7492f9 6112 coding->dst_object = Qnil;
df7492f9 6113 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6114 if (coding->dst_bytes == 0)
6115 coding->dst_bytes = 1;
6116 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6117 coding->dst_multibyte = 0;
4956c225 6118 }
df7492f9 6119 else
78108bcd 6120 {
df7492f9
KH
6121 coding->dst_object = Qnil;
6122 coding->dst_multibyte = 0;
78108bcd
KH
6123 }
6124
df7492f9 6125 encode_coding (coding);
4ed46869 6126
df7492f9 6127 if (EQ (dst_object, Qt))
4ed46869 6128 {
df7492f9
KH
6129 if (BUFFERP (coding->dst_object))
6130 coding->dst_object = Fbuffer_string ();
6131 else
73be902c 6132 {
df7492f9
KH
6133 coding->dst_object
6134 = make_unibyte_string ((char *) coding->destination,
6135 coding->produced);
6136 xfree (coding->destination);
73be902c 6137 }
4ed46869 6138 }
d46c5b12 6139
df7492f9 6140 unbind_to (count, Qnil);
b73bfc1c
KH
6141}
6142
df7492f9 6143
b73bfc1c 6144Lisp_Object
df7492f9 6145preferred_coding_system ()
b73bfc1c 6146{
df7492f9 6147 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6148
df7492f9 6149 return CODING_ID_NAME (id);
4ed46869
KH
6150}
6151
6152\f
6153#ifdef emacs
1397dc18 6154/*** 8. Emacs Lisp library functions ***/
4ed46869 6155
4ed46869 6156DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6157 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6158See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6159about coding-system objects. */)
6160 (obj)
4ed46869
KH
6161 Lisp_Object obj;
6162{
df7492f9 6163 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6164}
6165
9d991de8
RS
6166DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6167 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6168 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6169 (prompt)
4ed46869
KH
6170 Lisp_Object prompt;
6171{
e0e989f6 6172 Lisp_Object val;
9d991de8
RS
6173 do
6174 {
4608c386
KH
6175 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6176 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6177 }
6178 while (XSTRING (val)->size == 0);
e0e989f6 6179 return (Fintern (val, Qnil));
4ed46869
KH
6180}
6181
9b787f3e 6182DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6183 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6184If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6185 (prompt, default_coding_system)
9b787f3e 6186 Lisp_Object prompt, default_coding_system;
4ed46869 6187{
f44d27ce 6188 Lisp_Object val;
9b787f3e
RS
6189 if (SYMBOLP (default_coding_system))
6190 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6191 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6192 Qt, Qnil, Qcoding_system_history,
6193 default_coding_system, Qnil);
e0e989f6 6194 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6195}
6196
6197DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6198 1, 1, 0,
48b0f3ae
PJ
6199 doc: /* Check validity of CODING-SYSTEM.
6200If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6201It is valid if it is a symbol with a non-nil `coding-system' property.
6202The value of property should be a vector of length 5. */)
df7492f9 6203 (coding_system)
4ed46869
KH
6204 Lisp_Object coding_system;
6205{
b7826503 6206 CHECK_SYMBOL (coding_system);
4ed46869
KH
6207 if (!NILP (Fcoding_system_p (coding_system)))
6208 return coding_system;
6209 while (1)
02ba4723 6210 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6211}
df7492f9 6212
3a73fa5d 6213\f
d46c5b12 6214Lisp_Object
df7492f9 6215detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6216 unsigned char *src;
6217 int src_bytes, highest;
0a28aafb 6218 int multibytep;
df7492f9 6219 Lisp_Object coding_system;
4ed46869 6220{
df7492f9
KH
6221 unsigned char *src_end = src + src_bytes;
6222 int mask = CATEGORY_MASK_ANY;
6223 int detected = 0;
6224 int c, i;
6225 Lisp_Object attrs, eol_type;
6226 Lisp_Object val;
6227 struct coding_system coding;
6228
6229 if (NILP (coding_system))
6230 coding_system = Qundecided;
6231 setup_coding_system (coding_system, &coding);
6232 attrs = CODING_ID_ATTRS (coding.id);
6233 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6234
df7492f9
KH
6235 coding.source = src;
6236 coding.src_bytes = src_bytes;
6237 coding.src_multibyte = multibytep;
6238 coding.consumed = 0;
4ed46869 6239
df7492f9 6240 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6241 {
df7492f9 6242 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6243 }
df7492f9 6244 else
4ed46869 6245 {
df7492f9
KH
6246 coding_system = Qnil;
6247 for (; src < src_end; src++)
4ed46869 6248 {
df7492f9
KH
6249 c = *src;
6250 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6251 || c == ISO_CODE_SI
6252 || c == ISO_CODE_SO)))
d46c5b12 6253 break;
4ed46869 6254 }
df7492f9
KH
6255 coding.head_ascii = src - coding.source;
6256
6257 if (src < src_end)
6258 for (i = 0; i < coding_category_raw_text; i++)
6259 {
6260 enum coding_category category = coding_priorities[i];
6261 struct coding_system *this = coding_categories + category;
6262
6263 if (category >= coding_category_raw_text
6264 || detected & (1 << category))
6265 continue;
6266
6267 if (this->id < 0)
6268 {
6269 /* No coding system of this category is defined. */
6270 mask &= ~(1 << category);
6271 }
6272 else
6273 {
6274 detected |= detected_mask[category];
6275 if ((*(coding_categories[category].detector)) (&coding, &mask)
6276 && highest)
6277 {
6278 mask &= detected_mask[category];
6279 break;
6280 }
6281 }
6282 }
4ed46869 6283 }
4ed46869 6284
df7492f9
KH
6285 if (!mask)
6286 val = Fcons (make_number (coding_category_raw_text), Qnil);
6287 else if (mask == CATEGORY_MASK_ANY)
6288 val = Fcons (make_number (coding_category_undecided), Qnil);
6289 else if (highest)
4ed46869 6290 {
df7492f9
KH
6291 for (i = 0; i < coding_category_raw_text; i++)
6292 if (mask & (1 << coding_priorities[i]))
6293 {
6294 val = Fcons (make_number (coding_priorities[i]), Qnil);
6295 break;
6296 }
6297 }
6298 else
6299 {
6300 val = Qnil;
6301 for (i = coding_category_raw_text - 1; i >= 0; i--)
6302 if (mask & (1 << coding_priorities[i]))
6303 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6304 }
df7492f9
KH
6305
6306 {
6307 int one_byte_eol = -1, two_byte_eol = -1;
6308 Lisp_Object tail;
6309
6310 for (tail = val; CONSP (tail); tail = XCDR (tail))
6311 {
6312 struct coding_system *this
6313 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6314 : &coding);
6315 int this_eol;
6316
6317 attrs = CODING_ID_ATTRS (this->id);
6318 eol_type = CODING_ID_EOL_TYPE (this->id);
6319 XSETCAR (tail, CODING_ID_NAME (this->id));
6320 if (VECTORP (eol_type))
6321 {
6322 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6323 {
6324 if (two_byte_eol < 0)
6325 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6326 this_eol = two_byte_eol;
6327 }
6328 else
6329 {
6330 if (one_byte_eol < 0)
6331 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6332 this_eol = one_byte_eol;
6333 }
6334 if (this_eol == EOL_SEEN_LF)
6335 XSETCAR (tail, AREF (eol_type, 0));
6336 else if (this_eol == EOL_SEEN_CRLF)
6337 XSETCAR (tail, AREF (eol_type, 1));
6338 else if (this_eol == EOL_SEEN_CR)
6339 XSETCAR (tail, AREF (eol_type, 2));
6340 }
6341 }
6342 }
6343
03699b14 6344 return (highest ? XCAR (val) : val);
93dec019 6345}
4ed46869 6346
df7492f9 6347
d46c5b12
KH
6348DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6349 2, 3, 0,
48b0f3ae
PJ
6350 doc: /* Detect coding system of the text in the region between START and END.
6351Return a list of possible coding systems ordered by priority.
6352
6353If only ASCII characters are found, it returns a list of single element
6354`undecided' or its subsidiary coding system according to a detected
6355end-of-line format.
6356
6357If optional argument HIGHEST is non-nil, return the coding system of
6358highest priority. */)
6359 (start, end, highest)
d46c5b12
KH
6360 Lisp_Object start, end, highest;
6361{
6362 int from, to;
6363 int from_byte, to_byte;
6289dd10 6364
b7826503
PJ
6365 CHECK_NUMBER_COERCE_MARKER (start);
6366 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6367
d46c5b12
KH
6368 validate_region (&start, &end);
6369 from = XINT (start), to = XINT (end);
6370 from_byte = CHAR_TO_BYTE (from);
6371 to_byte = CHAR_TO_BYTE (to);
6289dd10 6372
d46c5b12
KH
6373 if (from < GPT && to >= GPT)
6374 move_gap_both (to, to_byte);
c210f766 6375
d46c5b12 6376 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6377 to_byte - from_byte,
0a28aafb
KH
6378 !NILP (highest),
6379 !NILP (current_buffer
df7492f9
KH
6380 ->enable_multibyte_characters),
6381 Qnil);
d46c5b12 6382}
6289dd10 6383
d46c5b12
KH
6384DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6385 1, 2, 0,
48b0f3ae
PJ
6386 doc: /* Detect coding system of the text in STRING.
6387Return a list of possible coding systems ordered by priority.
6388
6389If only ASCII characters are found, it returns a list of single element
6390`undecided' or its subsidiary coding system according to a detected
6391end-of-line format.
6392
6393If optional argument HIGHEST is non-nil, return the coding system of
6394highest priority. */)
6395 (string, highest)
d46c5b12
KH
6396 Lisp_Object string, highest;
6397{
b7826503 6398 CHECK_STRING (string);
4ed46869 6399
d46c5b12 6400 return detect_coding_system (XSTRING (string)->data,
df7492f9 6401 STRING_BYTES (XSTRING (string)),
0a28aafb 6402 !NILP (highest),
df7492f9
KH
6403 STRING_MULTIBYTE (string),
6404 Qnil);
4ed46869
KH
6405}
6406
05e6f5dc 6407
df7492f9
KH
6408static INLINE int
6409char_encodable_p (c, attrs)
6410 int c;
6411 Lisp_Object attrs;
05e6f5dc 6412{
df7492f9 6413 Lisp_Object tail;
df7492f9 6414 struct charset *charset;
05e6f5dc 6415
df7492f9
KH
6416 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6417 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6418 {
df7492f9
KH
6419 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6420 if (CHAR_CHARSET_P (c, charset))
6421 break;
05e6f5dc 6422 }
df7492f9 6423 return (! NILP (tail));
05e6f5dc
KH
6424}
6425
6426
df7492f9
KH
6427/* Return a list of coding systems that safely encode the text between
6428 START and END. If EXCLUDE is non-nil, it is a list of coding
6429 systems not to check. The returned list doesn't contain any such
6430 coding systems. In any case, If the text contains only ASCII or is
6431 unibyte, return t. */
6432
6433DEFUN ("find-coding-systems-region-internal",
6434 Ffind_coding_systems_region_internal,
6435 Sfind_coding_systems_region_internal, 2, 3, 0,
6436 doc: /* Internal use only. */)
6437 (start, end, exclude)
6438 Lisp_Object start, end, exclude;
6439{
6440 Lisp_Object coding_attrs_list, safe_codings;
6441 EMACS_INT start_byte, end_byte;
6442 unsigned char *p, *pbeg, *pend;
6443 int c;
6444 Lisp_Object tail, elt;
05e6f5dc 6445
df7492f9
KH
6446 if (STRINGP (start))
6447 {
6448 if (!STRING_MULTIBYTE (start)
6449 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6450 return Qt;
6451 start_byte = 0;
6452 end_byte = STRING_BYTES (XSTRING (start));
6453 }
6454 else
6455 {
6456 CHECK_NUMBER_COERCE_MARKER (start);
6457 CHECK_NUMBER_COERCE_MARKER (end);
6458 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6459 args_out_of_range (start, end);
6460 if (NILP (current_buffer->enable_multibyte_characters))
6461 return Qt;
6462 start_byte = CHAR_TO_BYTE (XINT (start));
6463 end_byte = CHAR_TO_BYTE (XINT (end));
6464 if (XINT (end) - XINT (start) == end_byte - start_byte)
6465 return Qt;
05e6f5dc 6466
df7492f9
KH
6467 if (start < GPT && end > GPT)
6468 {
6469 if ((GPT - start) < (end - GPT))
6470 move_gap_both (start, start_byte);
6471 else
6472 move_gap_both (end, end_byte);
6473 }
6474 }
05e6f5dc 6475
df7492f9
KH
6476 coding_attrs_list = Qnil;
6477 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6478 if (NILP (exclude)
6479 || NILP (Fmemq (XCAR (tail), exclude)))
6480 {
6481 Lisp_Object attrs;
05e6f5dc 6482
df7492f9
KH
6483 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6484 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6485 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6486 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6487 }
6488
6489 if (STRINGP (start))
6490 p = pbeg = XSTRING (start)->data;
6491 else
6492 p = pbeg = BYTE_POS_ADDR (start_byte);
6493 pend = p + (end_byte - start_byte);
6494
6495 while (p < pend && ASCII_BYTE_P (*p)) p++;
6496 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6497
6498 while (p < pend)
6499 {
df7492f9
KH
6500 if (ASCII_BYTE_P (*p))
6501 p++;
6502 else
6503 {
6504 c = STRING_CHAR_ADVANCE (p);
6505
6506 charset_map_loaded = 0;
6507 for (tail = coding_attrs_list; CONSP (tail);)
6508 {
6509 elt = XCAR (tail);
6510 if (NILP (elt))
6511 tail = XCDR (tail);
6512 else if (char_encodable_p (c, elt))
6513 tail = XCDR (tail);
6514 else if (CONSP (XCDR (tail)))
6515 {
6516 XSETCAR (tail, XCAR (XCDR (tail)));
6517 XSETCDR (tail, XCDR (XCDR (tail)));
6518 }
6519 else
6520 {
6521 XSETCAR (tail, Qnil);
6522 tail = XCDR (tail);
6523 }
6524 }
6525 if (charset_map_loaded)
6526 {
6527 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6528
df7492f9
KH
6529 if (STRINGP (start))
6530 pbeg = XSTRING (start)->data;
6531 else
6532 pbeg = BYTE_POS_ADDR (start_byte);
6533 p = pbeg + p_offset;
6534 pend = pbeg + pend_offset;
6535 }
6536 }
05e6f5dc 6537 }
df7492f9
KH
6538
6539 safe_codings = Qnil;
6540 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6541 if (! NILP (XCAR (tail)))
6542 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6543
05e6f5dc
KH
6544 return safe_codings;
6545}
6546
6547
df7492f9
KH
6548DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6549 Scheck_coding_systems_region, 3, 3, 0,
6550 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6551
df7492f9
KH
6552START and END are buffer positions specifying the region.
6553CODING-SYSTEM-LIST is a list of coding systems to check.
6554
6555The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6556CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6557whole region, POS0, POS1, ... are buffer positions where non-encodable
6558characters are found.
6559
6560If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6561value is nil.
6562
6563START may be a string. In that case, check if the string is
6564encodable, and the value contains indices to the string instead of
6565buffer positions. END is ignored. */)
6566 (start, end, coding_system_list)
6567 Lisp_Object start, end, coding_system_list;
05e6f5dc 6568{
df7492f9
KH
6569 Lisp_Object list;
6570 EMACS_INT start_byte, end_byte;
6571 int pos;
6572 unsigned char *p, *pbeg, *pend;
6573 int c;
6574 Lisp_Object tail, elt;
05e6f5dc
KH
6575
6576 if (STRINGP (start))
6577 {
df7492f9
KH
6578 if (!STRING_MULTIBYTE (start)
6579 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6580 return Qnil;
6581 start_byte = 0;
6582 end_byte = STRING_BYTES (XSTRING (start));
6583 pos = 0;
05e6f5dc
KH
6584 }
6585 else
6586 {
b7826503
PJ
6587 CHECK_NUMBER_COERCE_MARKER (start);
6588 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6589 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6590 args_out_of_range (start, end);
6591 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6592 return Qnil;
6593 start_byte = CHAR_TO_BYTE (XINT (start));
6594 end_byte = CHAR_TO_BYTE (XINT (end));
6595 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6596 return Qt;
df7492f9
KH
6597
6598 if (start < GPT && end > GPT)
6599 {
6600 if ((GPT - start) < (end - GPT))
6601 move_gap_both (start, start_byte);
6602 else
6603 move_gap_both (end, end_byte);
6604 }
6605 pos = start;
6606 }
6607
6608 list = Qnil;
6609 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6610 {
6611 elt = XCAR (tail);
6612 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6613 Qnil)),
6614 list);
05e6f5dc
KH
6615 }
6616
df7492f9
KH
6617 if (STRINGP (start))
6618 p = pbeg = XSTRING (start)->data;
6619 else
6620 p = pbeg = BYTE_POS_ADDR (start_byte);
6621 pend = p + (end_byte - start_byte);
6622
6623 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6624 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6625
6626 while (p < pend)
05e6f5dc 6627 {
df7492f9
KH
6628 if (ASCII_BYTE_P (*p))
6629 p++;
6630 else
05e6f5dc 6631 {
df7492f9
KH
6632 c = STRING_CHAR_ADVANCE (p);
6633
6634 charset_map_loaded = 0;
6635 for (tail = list; CONSP (tail); tail = XCDR (tail))
6636 {
6637 elt = XCDR (XCAR (tail));
6638 if (! char_encodable_p (c, XCAR (elt)))
6639 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6640 }
6641 if (charset_map_loaded)
6642 {
6643 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6644
6645 if (STRINGP (start))
6646 pbeg = XSTRING (start)->data;
6647 else
6648 pbeg = BYTE_POS_ADDR (start_byte);
6649 p = pbeg + p_offset;
6650 pend = pbeg + pend_offset;
6651 }
05e6f5dc 6652 }
df7492f9 6653 pos++;
05e6f5dc
KH
6654 }
6655
df7492f9
KH
6656 tail = list;
6657 list = Qnil;
6658 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6659 {
df7492f9
KH
6660 elt = XCAR (tail);
6661 if (CONSP (XCDR (XCDR (elt))))
6662 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6663 list);
05e6f5dc 6664 }
df7492f9
KH
6665
6666 return list;
05e6f5dc
KH
6667}
6668
6669
df7492f9 6670
4031e2bf 6671Lisp_Object
df7492f9
KH
6672code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6673 Lisp_Object start, end, coding_system, dst_object;
6674 int encodep, norecord;
3a73fa5d
RS
6675{
6676 struct coding_system coding;
df7492f9
KH
6677 EMACS_INT from, from_byte, to, to_byte;
6678 Lisp_Object src_object;
3a73fa5d 6679
b7826503
PJ
6680 CHECK_NUMBER_COERCE_MARKER (start);
6681 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6682 if (NILP (coding_system))
6683 coding_system = Qno_conversion;
6684 else
6685 CHECK_CODING_SYSTEM (coding_system);
6686 src_object = Fcurrent_buffer ();
6687 if (NILP (dst_object))
6688 dst_object = src_object;
6689 else if (! EQ (dst_object, Qt))
6690 CHECK_BUFFER (dst_object);
3a73fa5d 6691
d46c5b12
KH
6692 validate_region (&start, &end);
6693 from = XFASTINT (start);
df7492f9 6694 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6695 to = XFASTINT (end);
df7492f9 6696 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6697
df7492f9
KH
6698 setup_coding_system (coding_system, &coding);
6699 coding.mode |= CODING_MODE_LAST_BLOCK;
6700
6701 if (encodep)
6702 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6703 dst_object);
6704 else
6705 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6706 dst_object);
6707 if (! norecord)
6708 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6709
df7492f9
KH
6710 if (coding.result != CODING_RESULT_SUCCESS)
6711 error ("Code conversion error: %d", coding.result);
3a73fa5d 6712
df7492f9
KH
6713 return (BUFFERP (dst_object)
6714 ? make_number (coding.produced_char)
6715 : coding.dst_object);
4031e2bf
KH
6716}
6717
df7492f9 6718
4031e2bf 6719DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6720 3, 4, "r\nzCoding system: ",
48b0f3ae 6721 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6722When called from a program, takes four arguments:
6723 START, END, CODING-SYSTEM, and DESTINATION.
6724START and END are buffer positions.
6725
6726Optional 4th arguments DESTINATION specifies where the decoded text goes.
6727If nil, the region between START and END is replace by the decoded text.
6728If buffer, the decoded text is inserted in the buffer.
6729If t, the decoded text is returned.
6730
48b0f3ae
PJ
6731This function sets `last-coding-system-used' to the precise coding system
6732used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6733not fully specified.)
6734It returns the length of the decoded text. */)
df7492f9
KH
6735 (start, end, coding_system, destination)
6736 Lisp_Object start, end, coding_system, destination;
4031e2bf 6737{
df7492f9 6738 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6739}
6740
6741DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6742 3, 4, "r\nzCoding system: ",
6743 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6744When called from a program, takes three arguments:
6745START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6746
6747Optional 4th arguments DESTINATION specifies where the encoded text goes.
6748If nil, the region between START and END is replace by the encoded text.
6749If buffer, the encoded text is inserted in the buffer.
6750If t, the encoded text is returned.
6751
48b0f3ae
PJ
6752This function sets `last-coding-system-used' to the precise coding system
6753used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6754not fully specified.)
6755It returns the length of the encoded text. */)
df7492f9
KH
6756 (start, end, coding_system, destination)
6757 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6758{
df7492f9 6759 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6760}
3a73fa5d 6761
4031e2bf 6762Lisp_Object
df7492f9
KH
6763code_convert_string (string, coding_system, dst_object,
6764 encodep, nocopy, norecord)
6765 Lisp_Object string, coding_system, dst_object;
6766 int encodep, nocopy, norecord;
4031e2bf
KH
6767{
6768 struct coding_system coding;
df7492f9 6769 EMACS_INT chars, bytes;
3a73fa5d 6770
b7826503 6771 CHECK_STRING (string);
d46c5b12 6772 if (NILP (coding_system))
df7492f9
KH
6773 {
6774 if (! norecord)
6775 Vlast_coding_system_used = Qno_conversion;
6776 if (NILP (dst_object))
6777 return (nocopy ? Fcopy_sequence (string) : string);
6778 }
4ed46869 6779
df7492f9
KH
6780 if (NILP (coding_system))
6781 coding_system = Qno_conversion;
6782 else
6783 CHECK_CODING_SYSTEM (coding_system);
6784 if (NILP (dst_object))
6785 dst_object = Qt;
6786 else if (! EQ (dst_object, Qt))
6787 CHECK_BUFFER (dst_object);
5f1cd180 6788
df7492f9 6789 setup_coding_system (coding_system, &coding);
d46c5b12 6790 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6791 chars = XSTRING (string)->size;
6792 bytes = STRING_BYTES (XSTRING (string));
6793 if (encodep)
6794 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6795 else
6796 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6797 if (! norecord)
6798 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6799
df7492f9
KH
6800 if (coding.result != CODING_RESULT_SUCCESS)
6801 error ("Code conversion error: %d", coding.result);
4ed46869 6802
df7492f9
KH
6803 return (BUFFERP (dst_object)
6804 ? make_number (coding.produced_char)
6805 : coding.dst_object);
4ed46869
KH
6806}
6807
4031e2bf 6808
ecec61c1 6809/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6810 Do not set Vlast_coding_system_used.
6811
6812 This function is called only from macros DECODE_FILE and
6813 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6814
6815Lisp_Object
6816code_convert_string_norecord (string, coding_system, encodep)
6817 Lisp_Object string, coding_system;
6818 int encodep;
6819{
0be8721c 6820 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6821}
ecec61c1 6822
ecec61c1 6823
df7492f9
KH
6824DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6825 2, 4, 0,
6826 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6827
6828Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6829if the decoding operation is trivial.
ecec61c1 6830
df7492f9
KH
6831Optional fourth arg BUFFER non-nil meant that the decoded text is
6832inserted in BUFFER instead of returned as a astring. In this case,
6833the return value is BUFFER.
ecec61c1 6834
df7492f9
KH
6835This function sets `last-coding-system-used' to the precise coding system
6836used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6837not fully specified. */)
6838 (string, coding_system, nocopy, buffer)
6839 Lisp_Object string, coding_system, nocopy, buffer;
6840{
6841 return code_convert_string (string, coding_system, buffer,
6842 0, ! NILP (nocopy), 0);
6843}
6844
6845DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6846 2, 4, 0,
6847 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6848
6849Optional third arg NOCOPY non-nil means it is OK to return STRING
6850itself if the encoding operation is trivial.
6851
6852Optional fourth arg BUFFER non-nil meant that the encoded text is
6853inserted in BUFFER instead of returned as a astring. In this case,
6854the return value is BUFFER.
6855
6856This function sets `last-coding-system-used' to the precise coding system
6857used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6858not fully specified.) */)
6859 (string, coding_system, nocopy, buffer)
6860 Lisp_Object string, coding_system, nocopy, buffer;
6861{
6862 return code_convert_string (string, coding_system, buffer,
6863 nocopy, ! NILP (nocopy), 1);
ecec61c1 6864}
df7492f9 6865
3a73fa5d 6866\f
4ed46869 6867DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6868 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6869Return the corresponding character. */)
6870 (code)
4ed46869
KH
6871 Lisp_Object code;
6872{
df7492f9
KH
6873 Lisp_Object spec, attrs, val;
6874 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6875 int c;
6876
6877 CHECK_NATNUM (code);
6878 c = XFASTINT (code);
6879 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6880 attrs = AREF (spec, 0);
4ed46869 6881
df7492f9
KH
6882 if (ASCII_BYTE_P (c)
6883 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6884 return code;
6885
6886 val = CODING_ATTR_CHARSET_LIST (attrs);
6887 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
6888 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6889 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
6890
6891 if (c <= 0x7F)
6892 charset = charset_roman;
6893 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6894 {
df7492f9
KH
6895 charset = charset_kana;
6896 c -= 0x80;
55ab7be3
KH
6897 }
6898 else
6899 {
004068e4 6900 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
6901
6902 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6903 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6904 error ("Invalid code: %d", code);
6905 SJIS_TO_JIS (c);
6906 charset = charset_kanji;
55ab7be3 6907 }
df7492f9
KH
6908 c = DECODE_CHAR (charset, c);
6909 if (c < 0)
6910 error ("Invalid code: %d", code);
6911 return make_number (c);
4ed46869
KH
6912}
6913
df7492f9 6914
4ed46869 6915DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6916 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6917Return the corresponding code in SJIS. */)
6918 (ch)
df7492f9 6919 Lisp_Object ch;
4ed46869 6920{
df7492f9
KH
6921 Lisp_Object spec, attrs, charset_list;
6922 int c;
6923 struct charset *charset;
6924 unsigned code;
4ed46869 6925
df7492f9
KH
6926 CHECK_CHARACTER (ch);
6927 c = XFASTINT (ch);
6928 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6929 attrs = AREF (spec, 0);
6930
6931 if (ASCII_CHAR_P (c)
6932 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6933 return ch;
6934
6935 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6936 charset = char_charset (c, charset_list, &code);
6937 if (code == CHARSET_INVALID_CODE (charset))
6938 error ("Can't encode by shift_jis encoding: %d", c);
6939 JIS_TO_SJIS (code);
6940
6941 return make_number (code);
4ed46869
KH
6942}
6943
6944DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6945 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6946Return the corresponding character. */)
6947 (code)
4ed46869
KH
6948 Lisp_Object code;
6949{
df7492f9
KH
6950 Lisp_Object spec, attrs, val;
6951 struct charset *charset_roman, *charset_big5, *charset;
6952 int c;
4ed46869 6953
df7492f9
KH
6954 CHECK_NATNUM (code);
6955 c = XFASTINT (code);
6956 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6957 attrs = AREF (spec, 0);
6958
6959 if (ASCII_BYTE_P (c)
6960 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6961 return code;
6962
6963 val = CODING_ATTR_CHARSET_LIST (attrs);
6964 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6965 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6966
6967 if (c <= 0x7F)
6968 charset = charset_roman;
c28a9453
KH
6969 else
6970 {
df7492f9
KH
6971 int b1 = c >> 8, b2 = c & 0x7F;
6972 if (b1 < 0xA1 || b1 > 0xFE
6973 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6974 error ("Invalid code: %d", code);
6975 charset = charset_big5;
c28a9453 6976 }
df7492f9
KH
6977 c = DECODE_CHAR (charset, (unsigned )c);
6978 if (c < 0)
6979 error ("Invalid code: %d", code);
6980 return make_number (c);
4ed46869
KH
6981}
6982
6983DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6984 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6985Return the corresponding character code in Big5. */)
6986 (ch)
4ed46869
KH
6987 Lisp_Object ch;
6988{
df7492f9
KH
6989 Lisp_Object spec, attrs, charset_list;
6990 struct charset *charset;
6991 int c;
6992 unsigned code;
6993
6994 CHECK_CHARACTER (ch);
6995 c = XFASTINT (ch);
6996 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6997 attrs = AREF (spec, 0);
6998 if (ASCII_CHAR_P (c)
6999 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7000 return ch;
7001
7002 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7003 charset = char_charset (c, charset_list, &code);
7004 if (code == CHARSET_INVALID_CODE (charset))
7005 error ("Can't encode by Big5 encoding: %d", c);
7006
7007 return make_number (code);
4ed46869 7008}
df7492f9 7009
3a73fa5d 7010\f
1ba9e4ab
KH
7011DEFUN ("set-terminal-coding-system-internal",
7012 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7013 Sset_terminal_coding_system_internal, 1, 1, 0,
7014 doc: /* Internal use only. */)
7015 (coding_system)
b74e4686 7016 Lisp_Object coding_system;
4ed46869 7017{
b7826503 7018 CHECK_SYMBOL (coding_system);
df7492f9
KH
7019 setup_coding_system (Fcheck_coding_system (coding_system),
7020 &terminal_coding);
7021
70c22245 7022 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7023 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7024 /* Characer composition should be disabled. */
7025 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7026 terminal_coding.src_multibyte = 1;
7027 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7028 return Qnil;
7029}
7030
c4825358
KH
7031DEFUN ("set-safe-terminal-coding-system-internal",
7032 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7033 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7034 doc: /* Internal use only. */)
48b0f3ae 7035 (coding_system)
b74e4686 7036 Lisp_Object coding_system;
c4825358 7037{
b7826503 7038 CHECK_SYMBOL (coding_system);
c4825358
KH
7039 setup_coding_system (Fcheck_coding_system (coding_system),
7040 &safe_terminal_coding);
df7492f9
KH
7041 /* Characer composition should be disabled. */
7042 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7043 safe_terminal_coding.src_multibyte = 1;
7044 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7045 return Qnil;
7046}
7047
4ed46869
KH
7048DEFUN ("terminal-coding-system",
7049 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7050 doc: /* Return coding system specified for terminal output. */)
7051 ()
4ed46869 7052{
df7492f9 7053 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7054}
7055
1ba9e4ab
KH
7056DEFUN ("set-keyboard-coding-system-internal",
7057 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7058 Sset_keyboard_coding_system_internal, 1, 1, 0,
7059 doc: /* Internal use only. */)
7060 (coding_system)
4ed46869
KH
7061 Lisp_Object coding_system;
7062{
b7826503 7063 CHECK_SYMBOL (coding_system);
df7492f9
KH
7064 setup_coding_system (Fcheck_coding_system (coding_system),
7065 &keyboard_coding);
7066 /* Characer composition should be disabled. */
7067 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7068 return Qnil;
7069}
7070
7071DEFUN ("keyboard-coding-system",
7072 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7073 doc: /* Return coding system specified for decoding keyboard input. */)
7074 ()
4ed46869 7075{
df7492f9 7076 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7077}
7078
7079\f
a5d301df
KH
7080DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7081 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7082 doc: /* Choose a coding system for an operation based on the target name.
7083The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7084DECODING-SYSTEM is the coding system to use for decoding
7085\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7086for encoding (in case OPERATION does encoding).
7087
7088The first argument OPERATION specifies an I/O primitive:
7089 For file I/O, `insert-file-contents' or `write-region'.
7090 For process I/O, `call-process', `call-process-region', or `start-process'.
7091 For network I/O, `open-network-stream'.
7092
7093The remaining arguments should be the same arguments that were passed
7094to the primitive. Depending on which primitive, one of those arguments
7095is selected as the TARGET. For example, if OPERATION does file I/O,
7096whichever argument specifies the file name is TARGET.
7097
7098TARGET has a meaning which depends on OPERATION:
7099 For file I/O, TARGET is a file name.
7100 For process I/O, TARGET is a process name.
7101 For network I/O, TARGET is a service name or a port number
7102
7103This function looks up what specified for TARGET in,
7104`file-coding-system-alist', `process-coding-system-alist',
7105or `network-coding-system-alist' depending on OPERATION.
7106They may specify a coding system, a cons of coding systems,
7107or a function symbol to call.
7108In the last case, we call the function with one argument,
7109which is a list of all the arguments given to this function.
7110
7111usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7112 (nargs, args)
4ed46869
KH
7113 int nargs;
7114 Lisp_Object *args;
7115{
7116 Lisp_Object operation, target_idx, target, val;
7117 register Lisp_Object chain;
7118
7119 if (nargs < 2)
7120 error ("Too few arguments");
7121 operation = args[0];
7122 if (!SYMBOLP (operation)
7123 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7124 error ("Invalid first arguement");
4ed46869
KH
7125 if (nargs < 1 + XINT (target_idx))
7126 error ("Too few arguments for operation: %s",
7127 XSYMBOL (operation)->name->data);
7128 target = args[XINT (target_idx) + 1];
7129 if (!(STRINGP (target)
7130 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7131 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7132
2e34157c
RS
7133 chain = ((EQ (operation, Qinsert_file_contents)
7134 || EQ (operation, Qwrite_region))
02ba4723 7135 ? Vfile_coding_system_alist
2e34157c 7136 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7137 ? Vnetwork_coding_system_alist
7138 : Vprocess_coding_system_alist));
4ed46869
KH
7139 if (NILP (chain))
7140 return Qnil;
7141
03699b14 7142 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7143 {
f44d27ce 7144 Lisp_Object elt;
4ed46869 7145
df7492f9 7146 elt = XCAR (chain);
4ed46869
KH
7147 if (CONSP (elt)
7148 && ((STRINGP (target)
03699b14
KR
7149 && STRINGP (XCAR (elt))
7150 && fast_string_match (XCAR (elt), target) >= 0)
7151 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7152 {
03699b14 7153 val = XCDR (elt);
b19fd4c5
KH
7154 /* Here, if VAL is both a valid coding system and a valid
7155 function symbol, we return VAL as a coding system. */
02ba4723
KH
7156 if (CONSP (val))
7157 return val;
7158 if (! SYMBOLP (val))
7159 return Qnil;
7160 if (! NILP (Fcoding_system_p (val)))
7161 return Fcons (val, val);
b19fd4c5
KH
7162 if (! NILP (Ffboundp (val)))
7163 {
7164 val = call1 (val, Flist (nargs, args));
7165 if (CONSP (val))
7166 return val;
7167 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7168 return Fcons (val, val);
7169 }
02ba4723
KH
7170 return Qnil;
7171 }
4ed46869
KH
7172 }
7173 return Qnil;
7174}
7175
df7492f9
KH
7176DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7177 Sset_coding_system_priority, 1, MANY, 0,
1fcd6c8b
DL
7178 doc: /* Assign higher priority to coding systems given as arguments.
7179usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7180 (nargs, args)
7181 int nargs;
7182 Lisp_Object *args;
7183{
7184 int i, j;
7185 int changed[coding_category_max];
7186 enum coding_category priorities[coding_category_max];
7187
7188 bzero (changed, sizeof changed);
7189
7190 for (i = j = 0; i < nargs; i++)
7191 {
7192 enum coding_category category;
7193 Lisp_Object spec, attrs;
7194
7195 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7196 attrs = AREF (spec, 0);
7197 category = XINT (CODING_ATTR_CATEGORY (attrs));
7198 if (changed[category])
7199 /* Ignore this coding system because a coding system of the
7200 same category already had a higher priority. */
7201 continue;
7202 changed[category] = 1;
7203 priorities[j++] = category;
7204 if (coding_categories[category].id >= 0
7205 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7206 setup_coding_system (args[i], &coding_categories[category]);
7207 }
7208
7209 /* Now we have decided top J priorities. Reflect the order of the
7210 original priorities to the remaining priorities. */
7211
7212 for (i = j, j = 0; i < coding_category_max; i++, j++)
7213 {
7214 while (j < coding_category_max
7215 && changed[coding_priorities[j]])
7216 j++;
7217 if (j == coding_category_max)
7218 abort ();
7219 priorities[i] = coding_priorities[j];
7220 }
7221
7222 bcopy (priorities, coding_priorities, sizeof priorities);
7223 return Qnil;
7224}
7225
7226DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7227 Scoding_system_priority_list, 0, 1, 0,
7228 doc: /* Return a list of coding systems ordered by their priorities. */)
7229 (highestp)
7230 Lisp_Object highestp;
d46c5b12
KH
7231{
7232 int i;
df7492f9 7233 Lisp_Object val;
d46c5b12 7234
df7492f9 7235 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7236 {
df7492f9
KH
7237 enum coding_category category = coding_priorities[i];
7238 int id = coding_categories[category].id;
7239 Lisp_Object attrs;
7240
7241 if (id < 0)
7242 continue;
7243 attrs = CODING_ID_ATTRS (id);
7244 if (! NILP (highestp))
7245 return CODING_ATTR_BASE_NAME (attrs);
7246 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7247 }
7248 return Fnreverse (val);
7249}
7250
7251static Lisp_Object
7252make_subsidiaries (base)
7253 Lisp_Object base;
7254{
7255 Lisp_Object subsidiaries;
7256 char *suffixes[] = { "-unix", "-dos", "-mac" };
7257 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7258 char *buf = (char *) alloca (base_name_len + 6);
7259 int i;
7260
7261 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7262 subsidiaries = Fmake_vector (make_number (3), Qnil);
7263 for (i = 0; i < 3; i++)
7264 {
7265 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7266 ASET (subsidiaries, i, intern (buf));
7267 }
7268 return subsidiaries;
7269}
7270
7271
7272DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7273 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7274 doc: /* For internal use only.
7275usage: (define-coding-system-internal ...) */)
df7492f9
KH
7276 (nargs, args)
7277 int nargs;
7278 Lisp_Object *args;
7279{
7280 Lisp_Object name;
7281 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7282 Lisp_Object attrs; /* Vector of attributes. */
7283 Lisp_Object eol_type;
7284 Lisp_Object aliases;
7285 Lisp_Object coding_type, charset_list, safe_charsets;
7286 enum coding_category category;
7287 Lisp_Object tail, val;
7288 int max_charset_id = 0;
7289 int i;
7290
7291 if (nargs < coding_arg_max)
7292 goto short_args;
7293
7294 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7295
7296 name = args[coding_arg_name];
7297 CHECK_SYMBOL (name);
7298 CODING_ATTR_BASE_NAME (attrs) = name;
7299
7300 val = args[coding_arg_mnemonic];
7301 if (! STRINGP (val))
7302 CHECK_CHARACTER (val);
7303 CODING_ATTR_MNEMONIC (attrs) = val;
7304
7305 coding_type = args[coding_arg_coding_type];
7306 CHECK_SYMBOL (coding_type);
7307 CODING_ATTR_TYPE (attrs) = coding_type;
7308
7309 charset_list = args[coding_arg_charset_list];
7310 if (SYMBOLP (charset_list))
7311 {
7312 if (EQ (charset_list, Qiso_2022))
7313 {
7314 if (! EQ (coding_type, Qiso_2022))
7315 error ("Invalid charset-list");
7316 charset_list = Viso_2022_charset_list;
7317 }
7318 else if (EQ (charset_list, Qemacs_mule))
7319 {
7320 if (! EQ (coding_type, Qemacs_mule))
7321 error ("Invalid charset-list");
7322 charset_list = Vemacs_mule_charset_list;
7323 }
7324 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7325 if (max_charset_id < XFASTINT (XCAR (tail)))
7326 max_charset_id = XFASTINT (XCAR (tail));
7327 }
7328 else
7329 {
7330 charset_list = Fcopy_sequence (charset_list);
7331 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7332 {
7333 struct charset *charset;
7334
7335 val = Fcar (tail);
7336 CHECK_CHARSET_GET_CHARSET (val, charset);
7337 if (EQ (coding_type, Qiso_2022)
7338 ? CHARSET_ISO_FINAL (charset) < 0
7339 : EQ (coding_type, Qemacs_mule)
7340 ? CHARSET_EMACS_MULE_ID (charset) < 0
7341 : 0)
7342 error ("Can't handle charset `%s'",
7343 XSYMBOL (CHARSET_NAME (charset))->name->data);
7344
7345 XCAR (tail) = make_number (charset->id);
7346 if (max_charset_id < charset->id)
7347 max_charset_id = charset->id;
7348 }
7349 }
7350 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7351
7352 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7353 make_number (255));
7354 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7355 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7356 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7357
7358 val = args[coding_arg_decode_translation_table];
7359 if (! NILP (val))
7360 CHECK_CHAR_TABLE (val);
7361 CODING_ATTR_DECODE_TBL (attrs) = val;
7362
7363 val = args[coding_arg_encode_translation_table];
7364 if (! NILP (val))
7365 CHECK_CHAR_TABLE (val);
7366 CODING_ATTR_ENCODE_TBL (attrs) = val;
7367
7368 val = args[coding_arg_post_read_conversion];
7369 CHECK_SYMBOL (val);
7370 CODING_ATTR_POST_READ (attrs) = val;
7371
7372 val = args[coding_arg_pre_write_conversion];
7373 CHECK_SYMBOL (val);
7374 CODING_ATTR_PRE_WRITE (attrs) = val;
7375
7376 val = args[coding_arg_default_char];
7377 if (NILP (val))
7378 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7379 else
7380 {
7381 CHECK_CHARACTER (val);
7382 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7383 }
7384
7385 val = args[coding_arg_plist];
7386 CHECK_LIST (val);
7387 CODING_ATTR_PLIST (attrs) = val;
7388
7389 if (EQ (coding_type, Qcharset))
7390 {
c7c66a95
KH
7391 /* Generate a lisp vector of 256 elements. Each element is nil,
7392 integer, or a list of charset IDs.
7393
7394 If Nth element is nil, the byte code N is invalid in this
7395 coding system.
7396
7397 If Nth element is a number NUM, N is the first byte of a
7398 charset whose ID is NUM.
7399
7400 If Nth element is a list of charset IDs, N is the first byte
7401 of one of them. The list is sorted by dimensions of the
7402 charsets. A charset of smaller dimension comes firtst.
7403 */
df7492f9
KH
7404 val = Fmake_vector (make_number (256), Qnil);
7405
7406 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7407 {
c7c66a95
KH
7408 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7409 int dim = CHARSET_DIMENSION (charset);
7410 int idx = (dim - 1) * 4;
7411
15d143f7
KH
7412 for (i = charset->code_space[idx];
7413 i <= charset->code_space[idx + 1]; i++)
7414 {
c7c66a95
KH
7415 Lisp_Object tmp, tmp2;
7416 int dim2;
7417
7418 tmp = AREF (val, i);
7419 if (NILP (tmp))
7420 tmp = XCAR (tail);
7421 else if (NUMBERP (tmp))
7422 {
7423 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7424 if (dim < dim2)
c7c66a95 7425 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7426 else
7427 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7428 }
15d143f7 7429 else
c7c66a95
KH
7430 {
7431 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7432 {
7433 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7434 if (dim < dim2)
7435 break;
7436 }
7437 if (NILP (tmp2))
7438 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7439 else
7440 {
7441 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7442 XSETCAR (tmp2, XCAR (tail));
7443 }
7444 }
7445 ASET (val, i, tmp);
15d143f7 7446 }
df7492f9
KH
7447 }
7448 ASET (attrs, coding_attr_charset_valids, val);
7449 category = coding_category_charset;
7450 }
7451 else if (EQ (coding_type, Qccl))
7452 {
7453 Lisp_Object valids;
7454
7455 if (nargs < coding_arg_ccl_max)
7456 goto short_args;
7457
7458 val = args[coding_arg_ccl_decoder];
7459 CHECK_CCL_PROGRAM (val);
7460 if (VECTORP (val))
7461 val = Fcopy_sequence (val);
7462 ASET (attrs, coding_attr_ccl_decoder, val);
7463
7464 val = args[coding_arg_ccl_encoder];
7465 CHECK_CCL_PROGRAM (val);
7466 if (VECTORP (val))
7467 val = Fcopy_sequence (val);
7468 ASET (attrs, coding_attr_ccl_encoder, val);
7469
7470 val = args[coding_arg_ccl_valids];
7471 valids = Fmake_string (make_number (256), make_number (0));
7472 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7473 {
7474 val = Fcar (tail);
7475 if (INTEGERP (val))
7476 ASET (valids, XINT (val), 1);
7477 else
7478 {
7479 int from, to;
7480
7481 CHECK_CONS (val);
7482 CHECK_NUMBER (XCAR (val));
7483 CHECK_NUMBER (XCDR (val));
7484 from = XINT (XCAR (val));
7485 to = XINT (XCDR (val));
7486 for (i = from; i <= to; i++)
7487 ASET (valids, i, 1);
7488 }
7489 }
7490 ASET (attrs, coding_attr_ccl_valids, valids);
7491
7492 category = coding_category_ccl;
7493 }
7494 else if (EQ (coding_type, Qutf_16))
7495 {
7496 Lisp_Object bom, endian;
7497
7498 if (nargs < coding_arg_utf16_max)
7499 goto short_args;
7500
7501 bom = args[coding_arg_utf16_bom];
7502 if (! NILP (bom) && ! EQ (bom, Qt))
7503 {
7504 CHECK_CONS (bom);
7505 CHECK_CODING_SYSTEM (XCAR (bom));
7506 CHECK_CODING_SYSTEM (XCDR (bom));
7507 }
7508 ASET (attrs, coding_attr_utf_16_bom, bom);
7509
7510 endian = args[coding_arg_utf16_endian];
7511 ASET (attrs, coding_attr_utf_16_endian, endian);
7512
7513 category = (CONSP (bom)
7514 ? coding_category_utf_16_auto
7515 : NILP (bom)
7516 ? (NILP (endian)
7517 ? coding_category_utf_16_be_nosig
7518 : coding_category_utf_16_le_nosig)
7519 : (NILP (endian)
7520 ? coding_category_utf_16_be
7521 : coding_category_utf_16_le));
7522 }
7523 else if (EQ (coding_type, Qiso_2022))
7524 {
7525 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7526 int i, id;
1397dc18 7527
df7492f9
KH
7528 if (nargs < coding_arg_iso2022_max)
7529 goto short_args;
7530
7531 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7532 CHECK_VECTOR (initial);
7533 for (i = 0; i < 4; i++)
7534 {
7535 val = Faref (initial, make_number (i));
7536 if (! NILP (val))
7537 {
7538 CHECK_CHARSET_GET_ID (val, id);
7539 ASET (initial, i, make_number (id));
7540 }
7541 else
7542 ASET (initial, i, make_number (-1));
7543 }
7544
7545 reg_usage = args[coding_arg_iso2022_reg_usage];
7546 CHECK_CONS (reg_usage);
7547 CHECK_NATNUM (XCAR (reg_usage));
7548 CHECK_NATNUM (XCDR (reg_usage));
7549
7550 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7551 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7552 {
df7492f9
KH
7553 int id;
7554
7555 val = Fcar (tail);
7556 CHECK_CONS (val);
7557 CHECK_CHARSET_GET_ID (XCAR (val), id);
7558 CHECK_NATNUM (XCDR (val));
7559 if (XINT (XCDR (val)) >= 4)
7560 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7561 XCAR (val) = make_number (id);
1397dc18 7562 }
df7492f9
KH
7563
7564 flags = args[coding_arg_iso2022_flags];
7565 CHECK_NATNUM (flags);
7566 i = XINT (flags);
7567 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7568 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7569
7570 ASET (attrs, coding_attr_iso_initial, initial);
7571 ASET (attrs, coding_attr_iso_usage, reg_usage);
7572 ASET (attrs, coding_attr_iso_request, request);
7573 ASET (attrs, coding_attr_iso_flags, flags);
7574 setup_iso_safe_charsets (attrs);
7575
7576 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7577 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7578 | CODING_ISO_FLAG_SINGLE_SHIFT))
7579 ? coding_category_iso_7_else
7580 : EQ (args[coding_arg_charset_list], Qiso_2022)
7581 ? coding_category_iso_7
7582 : coding_category_iso_7_tight);
7583 else
7584 {
7585 int id = XINT (AREF (initial, 1));
7586
7587 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7588 | CODING_ISO_FLAG_SINGLE_SHIFT))
7589 || EQ (args[coding_arg_charset_list], Qiso_2022)
7590 || id < 0)
7591 ? coding_category_iso_8_else
7592 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7593 ? coding_category_iso_8_1
7594 : coding_category_iso_8_2);
7595 }
7596 }
7597 else if (EQ (coding_type, Qemacs_mule))
7598 {
7599 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7600 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7601
7602 category = coding_category_emacs_mule;
7603 }
7604 else if (EQ (coding_type, Qshift_jis))
7605 {
7606
7607 struct charset *charset;
7608
7609 if (XINT (Flength (charset_list)) != 3)
7610 error ("There should be just three charsets");
7611
7612 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7613 if (CHARSET_DIMENSION (charset) != 1)
7614 error ("Dimension of charset %s is not one",
7615 XSYMBOL (CHARSET_NAME (charset))->name->data);
7616
7617 charset_list = XCDR (charset_list);
7618 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7619 if (CHARSET_DIMENSION (charset) != 1)
7620 error ("Dimension of charset %s is not one",
7621 XSYMBOL (CHARSET_NAME (charset))->name->data);
7622
7623 charset_list = XCDR (charset_list);
7624 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7625 if (CHARSET_DIMENSION (charset) != 2)
7626 error ("Dimension of charset %s is not two",
7627 XSYMBOL (CHARSET_NAME (charset))->name->data);
7628
7629 category = coding_category_sjis;
7630 Vsjis_coding_system = name;
7631 }
7632 else if (EQ (coding_type, Qbig5))
7633 {
7634 struct charset *charset;
7635
7636 if (XINT (Flength (charset_list)) != 2)
7637 error ("There should be just two charsets");
7638
7639 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7640 if (CHARSET_DIMENSION (charset) != 1)
7641 error ("Dimension of charset %s is not one",
7642 XSYMBOL (CHARSET_NAME (charset))->name->data);
7643
7644 charset_list = XCDR (charset_list);
7645 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7646 if (CHARSET_DIMENSION (charset) != 2)
7647 error ("Dimension of charset %s is not two",
7648 XSYMBOL (CHARSET_NAME (charset))->name->data);
7649
7650 category = coding_category_big5;
7651 Vbig5_coding_system = name;
7652 }
7653 else if (EQ (coding_type, Qraw_text))
7654 category = coding_category_raw_text;
7655 else if (EQ (coding_type, Qutf_8))
7656 category = coding_category_utf_8;
7657 else if (EQ (coding_type, Qundecided))
7658 category = coding_category_undecided;
7659 else
7660 error ("Invalid coding system type: %s",
7661 XSYMBOL (coding_type)->name->data);
7662
7663 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7664
7665 eol_type = args[coding_arg_eol_type];
7666 if (! NILP (eol_type)
7667 && ! EQ (eol_type, Qunix)
7668 && ! EQ (eol_type, Qdos)
7669 && ! EQ (eol_type, Qmac))
7670 error ("Invalid eol-type");
7671
7672 aliases = Fcons (name, Qnil);
7673
7674 if (NILP (eol_type))
7675 {
7676 eol_type = make_subsidiaries (name);
7677 for (i = 0; i < 3; i++)
1397dc18 7678 {
df7492f9
KH
7679 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7680
7681 this_name = AREF (eol_type, i);
7682 this_aliases = Fcons (this_name, Qnil);
7683 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7684 this_spec = Fmake_vector (make_number (3), attrs);
7685 ASET (this_spec, 1, this_aliases);
7686 ASET (this_spec, 2, this_eol_type);
7687 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7688 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7689 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7690 Vcoding_system_alist);
1397dc18 7691 }
d46c5b12 7692 }
1397dc18 7693
df7492f9
KH
7694 spec_vec = Fmake_vector (make_number (3), attrs);
7695 ASET (spec_vec, 1, aliases);
7696 ASET (spec_vec, 2, eol_type);
7697
7698 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7699 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7700 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7701 Vcoding_system_alist);
7702
7703 {
7704 int id = coding_categories[category].id;
7705
7706 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7707 setup_coding_system (name, &coding_categories[category]);
7708 }
7709
d46c5b12 7710 return Qnil;
df7492f9
KH
7711
7712 short_args:
7713 return Fsignal (Qwrong_number_of_arguments,
7714 Fcons (intern ("define-coding-system-internal"),
7715 make_number (nargs)));
d46c5b12
KH
7716}
7717
df7492f9
KH
7718DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7719 Sdefine_coding_system_alias, 2, 2, 0,
7720 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7721 (alias, coding_system)
7722 Lisp_Object alias, coding_system;
66cfb530 7723{
df7492f9 7724 Lisp_Object spec, aliases, eol_type;
84d60297 7725
df7492f9
KH
7726 CHECK_SYMBOL (alias);
7727 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7728 aliases = AREF (spec, 1);
7729 while (!NILP (XCDR (aliases)))
7730 aliases = XCDR (aliases);
7731 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7732
df7492f9
KH
7733 eol_type = AREF (spec, 2);
7734 if (VECTORP (eol_type))
66cfb530 7735 {
df7492f9
KH
7736 Lisp_Object subsidiaries;
7737 int i;
7738
7739 subsidiaries = make_subsidiaries (alias);
7740 for (i = 0; i < 3; i++)
7741 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7742 AREF (eol_type, i));
7743
7744 ASET (spec, 2, subsidiaries);
66cfb530 7745 }
df7492f9
KH
7746
7747 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7748 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7749 Vcoding_system_alist);
66cfb530
KH
7750
7751 return Qnil;
7752}
7753
df7492f9
KH
7754DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7755 1, 1, 0,
7756 doc: /* Return the base of CODING-SYSTEM.
7757Any alias or subsidiary coding systems are not base coding system. */)
7758 (coding_system)
7759 Lisp_Object coding_system;
7760{
7761 Lisp_Object spec, attrs;
7762
7763 if (NILP (coding_system))
7764 return (Qno_conversion);
7765 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7766 attrs = AREF (spec, 0);
7767 return CODING_ATTR_BASE_NAME (attrs);
7768}
7769
7770DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7771 1, 1, 0,
7772 doc: "Return the property list of CODING-SYSTEM.")
7773 (coding_system)
7774 Lisp_Object coding_system;
7775{
7776 Lisp_Object spec, attrs;
7777
7778 if (NILP (coding_system))
7779 coding_system = Qno_conversion;
7780 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7781 attrs = AREF (spec, 0);
7782 return CODING_ATTR_PLIST (attrs);
7783}
7784
7785
7786DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7787 1, 1, 0,
7788 doc: /* Return the list of aliases of CODING-SYSTEM.
7789A base coding system is what made by `define-coding-system'.
7790Any alias nor subsidiary coding systems are not base coding system. */)
7791 (coding_system)
7792 Lisp_Object coding_system;
7793{
7794 Lisp_Object spec;
7795
7796 if (NILP (coding_system))
7797 coding_system = Qno_conversion;
7798 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7799 return AREF (spec, 2);
7800}
7801
7802DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7803 Scoding_system_eol_type, 1, 1, 0,
7804 doc: /* Return eol-type of CODING-SYSTEM.
7805An eol-type is integer 0, 1, 2, or a vector of coding systems.
7806
7807Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7808and CR respectively.
7809
7810A vector value indicates that a format of end-of-line should be
7811detected automatically. Nth element of the vector is the subsidiary
7812coding system whose eol-type is N. */)
7813 (coding_system)
7814 Lisp_Object coding_system;
7815{
7816 Lisp_Object spec, eol_type;
7817 int n;
7818
7819 if (NILP (coding_system))
7820 coding_system = Qno_conversion;
7821 if (! CODING_SYSTEM_P (coding_system))
7822 return Qnil;
7823 spec = CODING_SYSTEM_SPEC (coding_system);
7824 eol_type = AREF (spec, 2);
7825 if (VECTORP (eol_type))
7826 return Fcopy_sequence (eol_type);
7827 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7828 return make_number (n);
7829}
7830
4ed46869
KH
7831#endif /* emacs */
7832
7833\f
1397dc18 7834/*** 9. Post-amble ***/
4ed46869 7835
dfcf069d 7836void
4ed46869
KH
7837init_coding_once ()
7838{
7839 int i;
7840
df7492f9
KH
7841 for (i = 0; i < coding_category_max; i++)
7842 {
7843 coding_categories[i].id = -1;
7844 coding_priorities[i] = i;
7845 }
4ed46869
KH
7846
7847 /* ISO2022 specific initialize routine. */
7848 for (i = 0; i < 0x20; i++)
b73bfc1c 7849 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7850 for (i = 0x21; i < 0x7F; i++)
7851 iso_code_class[i] = ISO_graphic_plane_0;
7852 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7853 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7854 for (i = 0xA1; i < 0xFF; i++)
7855 iso_code_class[i] = ISO_graphic_plane_1;
7856 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7857 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7858 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7859 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7860 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7861 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7862 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7863 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7864 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7865 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7866
b843d1ae 7867 inhibit_pre_post_conversion = 0;
df7492f9
KH
7868
7869 for (i = 0; i < 256; i++)
7870 {
7871 emacs_mule_bytes[i] = 1;
7872 }
781d7a48
KH
7873 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7874 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7875 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7876 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7877}
7878
7879#ifdef emacs
7880
dfcf069d 7881void
e0e989f6
KH
7882syms_of_coding ()
7883{
df7492f9
KH
7884 staticpro (&Vcoding_system_hash_table);
7885 Vcoding_system_hash_table = Fmakehash (Qeq);
7886
7887 staticpro (&Vsjis_coding_system);
7888 Vsjis_coding_system = Qnil;
7889
7890 staticpro (&Vbig5_coding_system);
7891 Vbig5_coding_system = Qnil;
7892
7893 staticpro (&Vcode_conversion_work_buf_list);
7894 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7895
df7492f9
KH
7896 staticpro (&Vcode_conversion_reused_work_buf);
7897 Vcode_conversion_reused_work_buf = Qnil;
7898
7899 DEFSYM (Qcharset, "charset");
7900 DEFSYM (Qtarget_idx, "target-idx");
7901 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7902 Fset (Qcoding_system_history, Qnil);
7903
9ce27fde 7904 /* Target FILENAME is the first argument. */
e0e989f6 7905 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7906 /* Target FILENAME is the third argument. */
e0e989f6
KH
7907 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7908
df7492f9 7909 DEFSYM (Qcall_process, "call-process");
9ce27fde 7910 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7911 Fput (Qcall_process, Qtarget_idx, make_number (0));
7912
df7492f9 7913 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7914 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7915 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7916
df7492f9 7917 DEFSYM (Qstart_process, "start-process");
9ce27fde 7918 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7919 Fput (Qstart_process, Qtarget_idx, make_number (2));
7920
df7492f9 7921 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 7922 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7923 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7924
df7492f9
KH
7925 DEFSYM (Qcoding_system, "coding-system");
7926 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 7927
df7492f9
KH
7928 DEFSYM (Qeol_type, "eol-type");
7929 DEFSYM (Qunix, "unix");
7930 DEFSYM (Qdos, "dos");
7931 DEFSYM (Qmac, "mac");
4ed46869 7932
df7492f9
KH
7933 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7934 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7935 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7936 DEFSYM (Qdefault_char, "default-char");
7937 DEFSYM (Qundecided, "undecided");
7938 DEFSYM (Qno_conversion, "no-conversion");
7939 DEFSYM (Qraw_text, "raw-text");
4ed46869 7940
df7492f9 7941 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 7942
df7492f9 7943 DEFSYM (Qutf_8, "utf-8");
27901516 7944
df7492f9
KH
7945 DEFSYM (Qutf_16, "utf-16");
7946 DEFSYM (Qutf_16_be, "utf-16-be");
7947 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7948 DEFSYM (Qutf_16_le, "utf-16-l3");
7949 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7950 DEFSYM (Qsignature, "signature");
7951 DEFSYM (Qendian, "endian");
7952 DEFSYM (Qbig, "big");
7953 DEFSYM (Qlittle, "little");
27901516 7954
df7492f9
KH
7955 DEFSYM (Qshift_jis, "shift-jis");
7956 DEFSYM (Qbig5, "big5");
4ed46869 7957
df7492f9 7958 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 7959
df7492f9 7960 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
7961 Fput (Qcoding_system_error, Qerror_conditions,
7962 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7963 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7964 build_string ("Invalid coding system"));
4ed46869 7965
df7492f9
KH
7966 /* Intern this now in case it isn't already done.
7967 Setting this variable twice is harmless.
7968 But don't staticpro it here--that is done in alloc.c. */
7969 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 7970
df7492f9 7971 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 7972 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
7973 DEFSYM (Qtranslation_table_id, "translation-table-id");
7974 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7975 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 7976
df7492f9 7977 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 7978
df7492f9 7979 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 7980
df7492f9
KH
7981 Vcoding_category_table
7982 = Fmake_vector (make_number (coding_category_max), Qnil);
7983 staticpro (&Vcoding_category_table);
7984 /* Followings are target of code detection. */
7985 ASET (Vcoding_category_table, coding_category_iso_7,
7986 intern ("coding-category-iso-7"));
7987 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7988 intern ("coding-category-iso-7-tight"));
7989 ASET (Vcoding_category_table, coding_category_iso_8_1,
7990 intern ("coding-category-iso-8-1"));
7991 ASET (Vcoding_category_table, coding_category_iso_8_2,
7992 intern ("coding-category-iso-8-2"));
7993 ASET (Vcoding_category_table, coding_category_iso_7_else,
7994 intern ("coding-category-iso-7-else"));
7995 ASET (Vcoding_category_table, coding_category_iso_8_else,
7996 intern ("coding-category-iso-8-else"));
7997 ASET (Vcoding_category_table, coding_category_utf_8,
7998 intern ("coding-category-utf-8"));
7999 ASET (Vcoding_category_table, coding_category_utf_16_be,
8000 intern ("coding-category-utf-16-be"));
8001 ASET (Vcoding_category_table, coding_category_utf_16_le,
8002 intern ("coding-category-utf-16-le"));
8003 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8004 intern ("coding-category-utf-16-be-nosig"));
8005 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8006 intern ("coding-category-utf-16-le-nosig"));
8007 ASET (Vcoding_category_table, coding_category_charset,
8008 intern ("coding-category-charset"));
8009 ASET (Vcoding_category_table, coding_category_sjis,
8010 intern ("coding-category-sjis"));
8011 ASET (Vcoding_category_table, coding_category_big5,
8012 intern ("coding-category-big5"));
8013 ASET (Vcoding_category_table, coding_category_ccl,
8014 intern ("coding-category-ccl"));
8015 ASET (Vcoding_category_table, coding_category_emacs_mule,
8016 intern ("coding-category-emacs-mule"));
8017 /* Followings are NOT target of code detection. */
8018 ASET (Vcoding_category_table, coding_category_raw_text,
8019 intern ("coding-category-raw-text"));
8020 ASET (Vcoding_category_table, coding_category_undecided,
8021 intern ("coding-category-undecided"));
70c22245 8022
4ed46869
KH
8023 defsubr (&Scoding_system_p);
8024 defsubr (&Sread_coding_system);
8025 defsubr (&Sread_non_nil_coding_system);
8026 defsubr (&Scheck_coding_system);
8027 defsubr (&Sdetect_coding_region);
d46c5b12 8028 defsubr (&Sdetect_coding_string);
05e6f5dc 8029 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8030 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8031 defsubr (&Sdecode_coding_region);
8032 defsubr (&Sencode_coding_region);
8033 defsubr (&Sdecode_coding_string);
8034 defsubr (&Sencode_coding_string);
8035 defsubr (&Sdecode_sjis_char);
8036 defsubr (&Sencode_sjis_char);
8037 defsubr (&Sdecode_big5_char);
8038 defsubr (&Sencode_big5_char);
1ba9e4ab 8039 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8040 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8041 defsubr (&Sterminal_coding_system);
1ba9e4ab 8042 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8043 defsubr (&Skeyboard_coding_system);
a5d301df 8044 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8045 defsubr (&Sset_coding_system_priority);
8046 defsubr (&Sdefine_coding_system_internal);
8047 defsubr (&Sdefine_coding_system_alias);
8048 defsubr (&Scoding_system_base);
8049 defsubr (&Scoding_system_plist);
8050 defsubr (&Scoding_system_aliases);
8051 defsubr (&Scoding_system_eol_type);
8052 defsubr (&Scoding_system_priority_list);
4ed46869 8053
4608c386 8054 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8055 doc: /* List of coding systems.
8056
8057Do not alter the value of this variable manually. This variable should be
df7492f9 8058updated by the functions `define-coding-system' and
48b0f3ae 8059`define-coding-system-alias'. */);
4608c386
KH
8060 Vcoding_system_list = Qnil;
8061
8062 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8063 doc: /* Alist of coding system names.
8064Each element is one element list of coding system name.
8065This variable is given to `completing-read' as TABLE argument.
8066
8067Do not alter the value of this variable manually. This variable should be
8068updated by the functions `make-coding-system' and
8069`define-coding-system-alias'. */);
4608c386
KH
8070 Vcoding_system_alist = Qnil;
8071
4ed46869 8072 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8073 doc: /* List of coding-categories (symbols) ordered by priority.
8074
8075On detecting a coding system, Emacs tries code detection algorithms
8076associated with each coding-category one by one in this order. When
8077one algorithm agrees with a byte sequence of source text, the coding
8078system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8079 {
8080 int i;
8081
8082 Vcoding_category_list = Qnil;
df7492f9 8083 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8084 Vcoding_category_list
d46c5b12
KH
8085 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8086 Vcoding_category_list);
4ed46869
KH
8087 }
8088
8089 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8090 doc: /* Specify the coding system for read operations.
8091It is useful to bind this variable with `let', but do not set it globally.
8092If the value is a coding system, it is used for decoding on read operation.
8093If not, an appropriate element is used from one of the coding system alists:
8094There are three such tables, `file-coding-system-alist',
8095`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8096 Vcoding_system_for_read = Qnil;
8097
8098 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8099 doc: /* Specify the coding system for write operations.
8100Programs bind this variable with `let', but you should not set it globally.
8101If the value is a coding system, it is used for encoding of output,
8102when writing it to a file and when sending it to a file or subprocess.
8103
8104If this does not specify a coding system, an appropriate element
8105is used from one of the coding system alists:
8106There are three such tables, `file-coding-system-alist',
8107`process-coding-system-alist', and `network-coding-system-alist'.
8108For output to files, if the above procedure does not specify a coding system,
8109the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8110 Vcoding_system_for_write = Qnil;
8111
8112 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8113 doc: /*
8114Coding system used in the latest file or process I/O. */);
4ed46869
KH
8115 Vlast_coding_system_used = Qnil;
8116
9ce27fde 8117 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8118 doc: /*
8119*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8120See info node `Coding Systems' and info node `Text and Binary' concerning
8121such conversion. */);
9ce27fde
KH
8122 inhibit_eol_conversion = 0;
8123
ed29121d 8124 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8125 doc: /*
8126Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8127Bind it to t if the process output is to be treated as if it were a file
8128read from some filesystem. */);
ed29121d
EZ
8129 inherit_process_coding_system = 0;
8130
02ba4723 8131 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8132 doc: /*
8133Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8134The format is ((PATTERN . VAL) ...),
8135where PATTERN is a regular expression matching a file name,
8136VAL is a coding system, a cons of coding systems, or a function symbol.
8137If VAL is a coding system, it is used for both decoding and encoding
8138the file contents.
8139If VAL is a cons of coding systems, the car part is used for decoding,
8140and the cdr part is used for encoding.
8141If VAL is a function symbol, the function must return a coding system
0192762c
DL
8142or a cons of coding systems which are used as above. The function gets
8143the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8144
8145See also the function `find-operation-coding-system'
8146and the variable `auto-coding-alist'. */);
02ba4723
KH
8147 Vfile_coding_system_alist = Qnil;
8148
8149 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8150 doc: /*
8151Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8152The format is ((PATTERN . VAL) ...),
8153where PATTERN is a regular expression matching a program name,
8154VAL is a coding system, a cons of coding systems, or a function symbol.
8155If VAL is a coding system, it is used for both decoding what received
8156from the program and encoding what sent to the program.
8157If VAL is a cons of coding systems, the car part is used for decoding,
8158and the cdr part is used for encoding.
8159If VAL is a function symbol, the function must return a coding system
8160or a cons of coding systems which are used as above.
8161
8162See also the function `find-operation-coding-system'. */);
02ba4723
KH
8163 Vprocess_coding_system_alist = Qnil;
8164
8165 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8166 doc: /*
8167Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8168The format is ((PATTERN . VAL) ...),
8169where PATTERN is a regular expression matching a network service name
8170or is a port number to connect to,
8171VAL is a coding system, a cons of coding systems, or a function symbol.
8172If VAL is a coding system, it is used for both decoding what received
8173from the network stream and encoding what sent to the network stream.
8174If VAL is a cons of coding systems, the car part is used for decoding,
8175and the cdr part is used for encoding.
8176If VAL is a function symbol, the function must return a coding system
8177or a cons of coding systems which are used as above.
8178
8179See also the function `find-operation-coding-system'. */);
02ba4723 8180 Vnetwork_coding_system_alist = Qnil;
4ed46869 8181
68c45bf0 8182 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8183 doc: /* Coding system to use with system messages.
8184Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8185 Vlocale_coding_system = Qnil;
8186
005f0d35 8187 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8188 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8189 doc: /*
8190*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8191 eol_mnemonic_unix = build_string (":");
4ed46869 8192
7722baf9 8193 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8194 doc: /*
8195*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8196 eol_mnemonic_dos = build_string ("\\");
4ed46869 8197
7722baf9 8198 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8199 doc: /*
8200*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8201 eol_mnemonic_mac = build_string ("/");
4ed46869 8202
7722baf9 8203 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8204 doc: /*
8205*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8206 eol_mnemonic_undecided = build_string (":");
4ed46869 8207
84fbb8a0 8208 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8209 doc: /*
8210*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8211 Venable_character_translation = Qt;
bdd9fb48 8212
f967223b 8213 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8214 &Vstandard_translation_table_for_decode,
8215 doc: /* Table for translating characters while decoding. */);
f967223b 8216 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8217
f967223b 8218 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8219 &Vstandard_translation_table_for_encode,
8220 doc: /* Table for translating characters while encoding. */);
f967223b 8221 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8222
df7492f9 8223 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8224 doc: /* Alist of charsets vs revision numbers.
8225While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8226designate it with the escape sequence identifying revision (cdr part
8227of the element). */);
8228 Vcharset_revision_table = Qnil;
02ba4723
KH
8229
8230 DEFVAR_LISP ("default-process-coding-system",
8231 &Vdefault_process_coding_system,
48b0f3ae
PJ
8232 doc: /* Cons of coding systems used for process I/O by default.
8233The car part is used for decoding a process output,
8234the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8235 Vdefault_process_coding_system = Qnil;
c4825358 8236
3f003981 8237 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8238 doc: /*
8239Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8240This is a vector of length 256.
8241If Nth element is non-nil, the existence of code N in a file
8242\(or output of subprocess) doesn't prevent it to be detected as
8243a coding system of ISO 2022 variant which has a flag
8244`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8245or reading output of a subprocess.
8246Only 128th through 159th elements has a meaning. */);
3f003981 8247 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8248
8249 DEFVAR_LISP ("select-safe-coding-system-function",
8250 &Vselect_safe_coding_system_function,
df7492f9
KH
8251 doc: /*
8252Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8253
8254If set, this function is called to force a user to select a proper
8255coding system which can encode the text in the case that a default
8256coding system used in each operation can't encode the text.
8257
8258The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8259 Vselect_safe_coding_system_function = Qnil;
8260
22ab2303 8261 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8262 &inhibit_iso_escape_detection,
df7492f9
KH
8263 doc: /*
8264If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8265
8266By default, on reading a file, Emacs tries to detect how the text is
8267encoded. This code detection is sensitive to escape sequences. If
8268the sequence is valid as ISO2022, the code is determined as one of
8269the ISO2022 encodings, and the file is decoded by the corresponding
8270coding system (e.g. `iso-2022-7bit').
8271
8272However, there may be a case that you want to read escape sequences in
8273a file as is. In such a case, you can set this variable to non-nil.
8274Then, as the code detection ignores any escape sequences, no file is
8275detected as encoded in some ISO2022 encoding. The result is that all
8276escape sequences become visible in a buffer.
8277
8278The default value is nil, and it is strongly recommended not to change
8279it. That is because many Emacs Lisp source files that contain
8280non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8281in Emacs's distribution, and they won't be decoded correctly on
8282reading if you suppress escape sequence detection.
8283
8284The other way to read escape sequences in a file without decoding is
8285to explicitly specify some coding system that doesn't use ISO2022's
8286escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8287 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8288
8289 {
8290 Lisp_Object args[coding_arg_max];
8291 Lisp_Object plist[14];
8292 int i;
8293
8294 for (i = 0; i < coding_arg_max; i++)
8295 args[i] = Qnil;
8296
8297 plist[0] = intern (":name");
8298 plist[1] = args[coding_arg_name] = Qno_conversion;
8299 plist[2] = intern (":mnemonic");
8300 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8301 plist[4] = intern (":coding-type");
8302 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8303 plist[6] = intern (":ascii-compatible-p");
8304 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8305 plist[8] = intern (":default-char");
8306 plist[9] = args[coding_arg_default_char] = make_number (0);
8307 plist[10] = intern (":docstring");
8308 plist[11] = build_string ("Do no conversion.\n\
8309\n\
8310When you visit a file with this coding, the file is read into a\n\
8311unibyte buffer as is, thus each byte of a file is treated as a\n\
8312character.");
8313 plist[12] = intern (":eol-type");
8314 plist[13] = args[coding_arg_eol_type] = Qunix;
8315 args[coding_arg_plist] = Flist (14, plist);
8316 Fdefine_coding_system_internal (coding_arg_max, args);
8317 }
8318
8319 setup_coding_system (Qno_conversion, &keyboard_coding);
8320 setup_coding_system (Qno_conversion, &terminal_coding);
8321 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8322}
8323
68c45bf0
PE
8324char *
8325emacs_strerror (error_number)
8326 int error_number;
8327{
8328 char *str;
8329
ca9c0567 8330 synchronize_system_messages_locale ();
68c45bf0
PE
8331 str = strerror (error_number);
8332
8333 if (! NILP (Vlocale_coding_system))
8334 {
8335 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8336 Vlocale_coding_system,
8337 0);
8338 str = (char *) XSTRING (dec)->data;
8339 }
8340
8341 return str;
8342}
8343
4ed46869 8344#endif /* emacs */