(generate_ascii_font): Fix return type.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
70ad9fc4 4 Copyright (C) 2001 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
e19c3639
KH
49 Coding system is an object for a encoding mechanism that contains
50 information about how to convert byte sequence to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from a
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
65 the mechanism of encoding. Here's a brief descrition about type.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by code converter defined for each
75 character set.
76
77 o Old Emacs' internal format (emacs-mule)
78
79 The coding system adopted by an old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9
KH
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
df7492f9
KH
104 If a user wants to decode/encode a text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
df7492f9
KH
111 A coding system for a text containing raw eight-bit data. Emacs
112 treat each byte of source text as a character (except for
113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
df7492f9
KH
122 How end-of-line of a text is encoded depends on a system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
df7492f9
KH
128 Since text characters encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9
KH
305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
306Lisp_Object Qunix, Qdos, Qmac;
4ed46869
KH
307Lisp_Object Qbuffer_file_coding_system;
308Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 309Lisp_Object Qdefault_char;
27901516 310Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
311Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
312Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
313Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 314Lisp_Object Qcoding_system_history;
1397dc18 315Lisp_Object Qvalid_codes;
4ed46869
KH
316
317extern Lisp_Object Qinsert_file_contents, Qwrite_region;
318Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
319Lisp_Object Qstart_process, Qopen_network_stream;
320Lisp_Object Qtarget_idx;
321
d46c5b12
KH
322Lisp_Object Vselect_safe_coding_system_function;
323
7722baf9
EZ
324/* Mnemonic string for each format of end-of-line. */
325Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
326/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 327 decided. */
7722baf9 328Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
329
330#ifdef emacs
331
4608c386
KH
332Lisp_Object Vcoding_system_list, Vcoding_system_alist;
333
334Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 335
d46c5b12
KH
336/* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 339
4ed46869
KH
340/* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342/* Coding-system for reading files and receiving data from process. */
343Lisp_Object Vcoding_system_for_read;
344/* Coding-system for writing files and sending data to process. */
345Lisp_Object Vcoding_system_for_write;
346/* Coding-system actually used in the latest I/O. */
347Lisp_Object Vlast_coding_system_used;
348
c4825358 349/* A vector of length 256 which contains information about special
94487c4e 350 Latin codes (especially for dealing with Microsoft codes). */
3f003981 351Lisp_Object Vlatin_extra_code_table;
c4825358 352
9ce27fde
KH
353/* Flag to inhibit code conversion of end-of-line format. */
354int inhibit_eol_conversion;
355
74383408
KH
356/* Flag to inhibit ISO2022 escape sequence detection. */
357int inhibit_iso_escape_detection;
358
ed29121d
EZ
359/* Flag to make buffer-file-coding-system inherit from process-coding. */
360int inherit_process_coding_system;
361
c4825358 362/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
363struct coding_system terminal_coding;
364
c4825358
KH
365/* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367struct coding_system safe_terminal_coding;
368
369/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
370struct coding_system keyboard_coding;
371
02ba4723
KH
372Lisp_Object Vfile_coding_system_alist;
373Lisp_Object Vprocess_coding_system_alist;
374Lisp_Object Vnetwork_coding_system_alist;
4ed46869 375
68c45bf0
PE
376Lisp_Object Vlocale_coding_system;
377
4ed46869
KH
378#endif /* emacs */
379
f967223b
KH
380/* Flag to tell if we look up translation table on character code
381 conversion. */
84fbb8a0 382Lisp_Object Venable_character_translation;
f967223b
KH
383/* Standard translation table to look up on decoding (reading). */
384Lisp_Object Vstandard_translation_table_for_decode;
385/* Standard translation table to look up on encoding (writing). */
386Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 387
f967223b
KH
388Lisp_Object Qtranslation_table;
389Lisp_Object Qtranslation_table_id;
390Lisp_Object Qtranslation_table_for_decode;
391Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
392
393/* Alist of charsets vs revision number. */
df7492f9 394static Lisp_Object Vcharset_revision_table;
4ed46869 395
02ba4723
KH
396/* Default coding systems used for process I/O. */
397Lisp_Object Vdefault_process_coding_system;
398
b843d1ae
KH
399/* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403static int inhibit_pre_post_conversion;
404
05e6f5dc
KH
405/* Char-table containing safe coding systems of each character. */
406Lisp_Object Vchar_coding_system_table;
407Lisp_Object Qchar_coding_system;
408
df7492f9
KH
409/* Two special coding systems. */
410Lisp_Object Vsjis_coding_system;
411Lisp_Object Vbig5_coding_system;
412
413
414static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
415static void decode_coding_utf_8 P_ ((struct coding_system *));
416static int encode_coding_utf_8 P_ ((struct coding_system *));
417
418static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
419static void decode_coding_utf_16 P_ ((struct coding_system *));
420static int encode_coding_utf_16 P_ ((struct coding_system *));
421
422static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
423static void decode_coding_iso_2022 P_ ((struct coding_system *));
424static int encode_coding_iso_2022 P_ ((struct coding_system *));
425
426static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
427static void decode_coding_emacs_mule P_ ((struct coding_system *));
428static int encode_coding_emacs_mule P_ ((struct coding_system *));
429
430static int detect_coding_sjis P_ ((struct coding_system *, int *));
431static void decode_coding_sjis P_ ((struct coding_system *));
432static int encode_coding_sjis P_ ((struct coding_system *));
433
434static int detect_coding_big5 P_ ((struct coding_system *, int *));
435static void decode_coding_big5 P_ ((struct coding_system *));
436static int encode_coding_big5 P_ ((struct coding_system *));
437
438static int detect_coding_ccl P_ ((struct coding_system *, int *));
439static void decode_coding_ccl P_ ((struct coding_system *));
440static int encode_coding_ccl P_ ((struct coding_system *));
441
442static void decode_coding_raw_text P_ ((struct coding_system *));
443static int encode_coding_raw_text P_ ((struct coding_system *));
444
445
446/* ISO2022 section */
447
448#define CODING_ISO_INITIAL(coding, reg) \
449 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
450 coding_attr_iso_initial), \
451 reg)))
452
453
454#define CODING_ISO_REQUEST(coding, charset_id) \
455 ((charset_id <= (coding)->max_charset_id \
456 ? (coding)->safe_charsets[charset_id] \
457 : -1))
458
459
460#define CODING_ISO_FLAGS(coding) \
461 ((coding)->spec.iso_2022.flags)
462#define CODING_ISO_DESIGNATION(coding, reg) \
463 ((coding)->spec.iso_2022.current_designation[reg])
464#define CODING_ISO_INVOCATION(coding, plane) \
465 ((coding)->spec.iso_2022.current_invocation[plane])
466#define CODING_ISO_SINGLE_SHIFTING(coding) \
467 ((coding)->spec.iso_2022.single_shifting)
468#define CODING_ISO_BOL(coding) \
469 ((coding)->spec.iso_2022.bol)
470#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
471 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
472
473/* Control characters of ISO2022. */
474 /* code */ /* function */
475#define ISO_CODE_LF 0x0A /* line-feed */
476#define ISO_CODE_CR 0x0D /* carriage-return */
477#define ISO_CODE_SO 0x0E /* shift-out */
478#define ISO_CODE_SI 0x0F /* shift-in */
479#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
480#define ISO_CODE_ESC 0x1B /* escape */
481#define ISO_CODE_SS2 0x8E /* single-shift-2 */
482#define ISO_CODE_SS3 0x8F /* single-shift-3 */
483#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
484
485/* All code (1-byte) of ISO2022 is classified into one of the
486 followings. */
487enum iso_code_class_type
488 {
489 ISO_control_0, /* Control codes in the range
490 0x00..0x1F and 0x7F, except for the
491 following 5 codes. */
492 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
493 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
494 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
495 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
496 ISO_escape, /* ISO_CODE_SO (0x1B) */
497 ISO_control_1, /* Control codes in the range
498 0x80..0x9F, except for the
499 following 3 codes. */
500 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
501 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
502 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
503 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
504 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
505 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
506 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
507 };
05e6f5dc 508
df7492f9
KH
509/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
510 `iso-flags' attribute of an iso2022 coding system. */
93dec019 511
df7492f9
KH
512/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
513 instead of the correct short-form sequence (e.g. ESC $ A). */
514#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 515
df7492f9
KH
516/* If set, reset graphic planes and registers at end-of-line to the
517 initial state. */
518#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 519
df7492f9
KH
520/* If set, reset graphic planes and registers before any control
521 characters to the initial state. */
522#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 523
df7492f9
KH
524/* If set, encode by 7-bit environment. */
525#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 526
df7492f9
KH
527/* If set, use locking-shift function. */
528#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 529
df7492f9
KH
530/* If set, use single-shift function. Overwrite
531 CODING_ISO_FLAG_LOCKING_SHIFT. */
532#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 533
df7492f9
KH
534/* If set, use designation escape sequence. */
535#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 536
df7492f9
KH
537/* If set, produce revision number sequence. */
538#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 539
df7492f9
KH
540/* If set, produce ISO6429's direction specifying sequence. */
541#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 542
df7492f9
KH
543/* If set, assume designation states are reset at beginning of line on
544 output. */
545#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 546
df7492f9
KH
547/* If set, designation sequence should be placed at beginning of line
548 on output. */
549#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 550
df7492f9
KH
551/* If set, do not encode unsafe charactes on output. */
552#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 553
df7492f9
KH
554/* If set, extra latin codes (128..159) are accepted as a valid code
555 on input. */
556#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 557
df7492f9 558#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 559
df7492f9 560#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 561
df7492f9 562#define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
aa72b389 563
df7492f9
KH
564/* A character to be produced on output if encoding of the original
565 character is prohibited by CODING_ISO_FLAG_SAFE. */
566#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 567
aa72b389 568
df7492f9
KH
569/* UTF-16 section */
570#define CODING_UTF_16_BOM(coding) \
571 ((coding)->spec.utf_16.bom)
4ed46869 572
df7492f9
KH
573#define CODING_UTF_16_ENDIAN(coding) \
574 ((coding)->spec.utf_16.endian)
4ed46869 575
df7492f9
KH
576#define CODING_UTF_16_SURROGATE(coding) \
577 ((coding)->spec.utf_16.surrogate)
4ed46869 578
4ed46869 579
df7492f9
KH
580/* CCL section */
581#define CODING_CCL_DECODER(coding) \
582 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
583#define CODING_CCL_ENCODER(coding) \
584 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
585#define CODING_CCL_VALIDS(coding) \
586 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
587 ->data)
4ed46869 588
df7492f9 589/* Index for each coding category in `coding_category_table' */
4ed46869 590
df7492f9
KH
591enum coding_category
592 {
593 coding_category_iso_7,
594 coding_category_iso_7_tight,
595 coding_category_iso_8_1,
596 coding_category_iso_8_2,
597 coding_category_iso_7_else,
598 coding_category_iso_8_else,
599 coding_category_utf_8,
600 coding_category_utf_16_auto,
601 coding_category_utf_16_be,
602 coding_category_utf_16_le,
603 coding_category_utf_16_be_nosig,
604 coding_category_utf_16_le_nosig,
605 coding_category_charset,
606 coding_category_sjis,
607 coding_category_big5,
608 coding_category_ccl,
609 coding_category_emacs_mule,
610 /* All above are targets of code detection. */
611 coding_category_raw_text,
612 coding_category_undecided,
613 coding_category_max
614 };
615
616/* Definitions of flag bits used in detect_coding_XXXX. */
617#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
618#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
619#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
620#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
621#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
622#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
623#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
624#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
625#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
626#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
627#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
628#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
629#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
630#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
631#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
632#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
633
634/* This value is returned if detect_coding_mask () find nothing other
635 than ASCII characters. */
636#define CATEGORY_MASK_ANY \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_8_1 \
640 | CATEGORY_MASK_ISO_8_2 \
641 | CATEGORY_MASK_ISO_7_ELSE \
642 | CATEGORY_MASK_ISO_8_ELSE \
643 | CATEGORY_MASK_UTF_8 \
644 | CATEGORY_MASK_UTF_16_BE \
645 | CATEGORY_MASK_UTF_16_LE \
646 | CATEGORY_MASK_UTF_16_BE_NOSIG \
647 | CATEGORY_MASK_UTF_16_LE_NOSIG \
648 | CATEGORY_MASK_CHARSET \
649 | CATEGORY_MASK_SJIS \
650 | CATEGORY_MASK_BIG5 \
651 | CATEGORY_MASK_CCL \
652 | CATEGORY_MASK_EMACS_MULE)
653
654
655#define CATEGORY_MASK_ISO_7BIT \
656 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
657
658#define CATEGORY_MASK_ISO_8BIT \
659 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
660
661#define CATEGORY_MASK_ISO_ELSE \
662 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
663
664#define CATEGORY_MASK_ISO_ESCAPE \
665 (CATEGORY_MASK_ISO_7 \
666 | CATEGORY_MASK_ISO_7_TIGHT \
667 | CATEGORY_MASK_ISO_7_ELSE \
668 | CATEGORY_MASK_ISO_8_ELSE)
669
670#define CATEGORY_MASK_ISO \
671 ( CATEGORY_MASK_ISO_7BIT \
672 | CATEGORY_MASK_ISO_8BIT \
673 | CATEGORY_MASK_ISO_ELSE)
674
675#define CATEGORY_MASK_UTF_16 \
676 (CATEGORY_MASK_UTF_16_BE \
677 | CATEGORY_MASK_UTF_16_LE \
678 | CATEGORY_MASK_UTF_16_BE_NOSIG \
679 | CATEGORY_MASK_UTF_16_LE_NOSIG)
680
681
682/* List of symbols `coding-category-xxx' ordered by priority. This
683 variable is exposed to Emacs Lisp. */
684static Lisp_Object Vcoding_category_list;
685
686/* Table of coding categories (Lisp symbols). This variable is for
687 internal use oly. */
688static Lisp_Object Vcoding_category_table;
689
690/* Table of coding-categories ordered by priority. */
691static enum coding_category coding_priorities[coding_category_max];
692
693/* Nth element is a coding context for the coding system bound to the
694 Nth coding category. */
695static struct coding_system coding_categories[coding_category_max];
696
697static int detected_mask[coding_category_raw_text] =
698 { CATEGORY_MASK_ISO,
699 CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_UTF_8,
705 CATEGORY_MASK_UTF_16,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_CHARSET,
711 CATEGORY_MASK_SJIS,
712 CATEGORY_MASK_BIG5,
713 CATEGORY_MASK_CCL,
714 CATEGORY_MASK_EMACS_MULE
715 };
716
717/*** Commonly used macros and functions ***/
718
719#ifndef min
720#define min(a, b) ((a) < (b) ? (a) : (b))
721#endif
722#ifndef max
723#define max(a, b) ((a) > (b) ? (a) : (b))
724#endif
4ed46869 725
df7492f9
KH
726#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
727 do { \
728 attrs = CODING_ID_ATTRS (coding->id); \
729 eol_type = CODING_ID_EOL_TYPE (coding->id); \
730 if (VECTORP (eol_type)) \
731 eol_type = Qunix; \
732 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
733 } while (0)
4ed46869 734
4ed46869 735
df7492f9
KH
736/* Safely get one byte from the source text pointed by SRC which ends
737 at SRC_END, and set C to that byte. If there are not enough bytes
738 in the source, it jumps to `no_more_source'. The caller
739 should declare and set these variables appropriately in advance:
740 src, src_end, multibytep
741*/
aa72b389 742
df7492f9 743#define ONE_MORE_BYTE(c) \
aa72b389 744 do { \
df7492f9
KH
745 if (src == src_end) \
746 { \
747 if (src_base < src) \
748 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
749 goto no_more_source; \
750 } \
751 c = *src++; \
752 if (multibytep && (c & 0x80)) \
753 { \
754 if ((c & 0xFE) != 0xC0) \
755 error ("Undecodable char found"); \
756 c = ((c & 1) << 6) | *src++; \
757 } \
758 consumed_chars++; \
aa72b389
KH
759 } while (0)
760
aa72b389 761
df7492f9
KH
762#define ONE_MORE_BYTE_NO_CHECK(c) \
763 do { \
764 c = *src++; \
765 if (multibytep && (c & 0x80)) \
766 { \
767 if ((c & 0xFE) != 0xC0) \
768 error ("Undecodable char found"); \
769 c = ((c & 1) << 6) | *src++; \
770 } \
aa72b389
KH
771 } while (0)
772
aa72b389 773
df7492f9
KH
774/* Store a byte C in the place pointed by DST and increment DST to the
775 next free point, and increment PRODUCED_CHARS. The caller should
776 assure that C is 0..127, and declare and set the variable `dst'
777 appropriately in advance.
778*/
aa72b389
KH
779
780
df7492f9
KH
781#define EMIT_ONE_ASCII_BYTE(c) \
782 do { \
783 produced_chars++; \
784 *dst++ = (c); \
785 } while (0)
aa72b389 786
aa72b389 787
df7492f9 788/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 789
df7492f9
KH
790#define EMIT_TWO_ASCII_BYTES(c1, c2) \
791 do { \
792 produced_chars += 2; \
793 *dst++ = (c1), *dst++ = (c2); \
794 } while (0)
aa72b389 795
df7492f9
KH
796
797/* Store a byte C in the place pointed by DST and increment DST to the
798 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
799 nonzero, store in an appropriate multibyte from. The caller should
800 declare and set the variables `dst' and `multibytep' appropriately
801 in advance. */
802
803#define EMIT_ONE_BYTE(c) \
804 do { \
805 produced_chars++; \
806 if (multibytep) \
807 { \
808 int ch = (c); \
809 if (ch >= 0x80) \
810 ch = BYTE8_TO_CHAR (ch); \
811 CHAR_STRING_ADVANCE (ch, dst); \
812 } \
813 else \
814 *dst++ = (c); \
aa72b389
KH
815 } while (0)
816
817
df7492f9 818/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 819
e19c3639
KH
820#define EMIT_TWO_BYTES(c1, c2) \
821 do { \
822 produced_chars += 2; \
823 if (multibytep) \
824 { \
825 int ch; \
826 \
827 ch = (c1); \
828 if (ch >= 0x80) \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
831 ch = (c2); \
832 if (ch >= 0x80) \
833 ch = BYTE8_TO_CHAR (ch); \
834 CHAR_STRING_ADVANCE (ch, dst); \
835 } \
836 else \
837 { \
838 *dst++ = (c1); \
839 *dst++ = (c2); \
840 } \
aa72b389
KH
841 } while (0)
842
843
df7492f9
KH
844#define EMIT_THREE_BYTES(c1, c2, c3) \
845 do { \
846 EMIT_ONE_BYTE (c1); \
847 EMIT_TWO_BYTES (c2, c3); \
848 } while (0)
aa72b389 849
aa72b389 850
df7492f9
KH
851#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
852 do { \
853 EMIT_TWO_BYTES (c1, c2); \
854 EMIT_TWO_BYTES (c3, c4); \
855 } while (0)
aa72b389 856
aa72b389 857
df7492f9
KH
858#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
859 do { \
860 charset_map_loaded = 0; \
861 c = DECODE_CHAR (charset, code); \
862 if (charset_map_loaded) \
863 { \
864 unsigned char *orig = coding->source; \
865 EMACS_INT offset; \
866 \
867 coding_set_source (coding); \
868 offset = coding->source - orig; \
869 src += offset; \
870 src_base += offset; \
871 src_end += offset; \
872 } \
873 } while (0)
aa72b389 874
aa72b389 875
df7492f9
KH
876#define ASSURE_DESTINATION(bytes) \
877 do { \
878 if (dst + (bytes) >= dst_end) \
879 { \
880 int more_bytes = charbuf_end - charbuf + (bytes); \
881 \
882 dst = alloc_destination (coding, more_bytes, dst); \
883 dst_end = coding->destination + coding->dst_bytes; \
884 } \
885 } while (0)
b1887814 886
df7492f9
KH
887
888
889static void
890coding_set_source (coding)
891 struct coding_system *coding;
892{
893 if (BUFFERP (coding->src_object))
894 {
895 if (coding->src_pos < 0)
896 coding->source = GAP_END_ADDR + coding->src_pos_byte;
897 else
898 {
e19c3639
KH
899 struct buffer *buf = XBUFFER (coding->src_object);
900 EMACS_INT beg_byte = BUF_BEG_BYTE (buf);
901 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
902 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
903
904 coding->source = beg_addr + coding->src_pos_byte - 1;
905 if (coding->src_pos_byte >= gpt_byte)
906 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
907 }
908 }
df7492f9 909 else if (STRINGP (coding->src_object))
aa72b389 910 {
df7492f9
KH
911 coding->source = (XSTRING (coding->src_object)->data
912 + coding->src_pos_byte);
913 }
914 else
915 /* Otherwise, the source is C string and is never relocated
916 automatically. Thus we don't have to update anything. */
917 ;
918}
919
920static void
921coding_set_destination (coding)
922 struct coding_system *coding;
923{
924 if (BUFFERP (coding->dst_object))
925 {
926 /* We are sure that coding->dst_pos_byte is before the gap of the
927 buffer. */
928 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
929 + coding->dst_pos_byte - 1);
930 if (coding->src_pos < 0)
df7492f9
KH
931 coding->dst_bytes = (GAP_END_ADDR
932 - (coding->src_bytes - coding->consumed)
933 - coding->destination);
934 else
935 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
936 - coding->destination);
937 }
938 else
939 /* Otherwise, the destination is C string and is never relocated
940 automatically. Thus we don't have to update anything. */
941 ;
942}
943
944
945static void
946coding_alloc_by_realloc (coding, bytes)
947 struct coding_system *coding;
948 EMACS_INT bytes;
949{
950 coding->destination = (unsigned char *) xrealloc (coding->destination,
951 coding->dst_bytes + bytes);
952 coding->dst_bytes += bytes;
953}
954
955static void
956coding_alloc_by_making_gap (coding, bytes)
957 struct coding_system *coding;
958 EMACS_INT bytes;
959{
2c78b7e1
KH
960 if (BUFFERP (coding->dst_object)
961 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
962 {
963 EMACS_INT add = coding->src_bytes - coding->consumed;
964
965 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
966 make_gap (bytes);
967 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
968 }
969 else
970 {
2c78b7e1
KH
971 Lisp_Object this_buffer;
972
973 this_buffer = Fcurrent_buffer ();
df7492f9
KH
974 set_buffer_internal (XBUFFER (coding->dst_object));
975 make_gap (bytes);
976 set_buffer_internal (XBUFFER (this_buffer));
977 }
978}
979
980
981static unsigned char *
982alloc_destination (coding, nbytes, dst)
983 struct coding_system *coding;
984 int nbytes;
985 unsigned char *dst;
986{
987 EMACS_INT offset = dst - coding->destination;
988
989 if (BUFFERP (coding->dst_object))
990 coding_alloc_by_making_gap (coding, nbytes);
991 else
992 coding_alloc_by_realloc (coding, nbytes);
993 coding->result = CODING_RESULT_SUCCESS;
994 coding_set_destination (coding);
995 dst = coding->destination + offset;
996 return dst;
997}
aa72b389 998
df7492f9
KH
999\f
1000/*** 2. Emacs' internal format (emacs-utf-8) ***/
1001
1002
1003
1004\f
1005/*** 3. UTF-8 ***/
1006
1007/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1008 Check if a text is encoded in UTF-8. If it is, return
1009 CATEGORY_MASK_UTF_8, else return 0. */
1010
1011#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1012#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1013#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1014#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1015#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1016#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1017
1018static int
1019detect_coding_utf_8 (coding, mask)
1020 struct coding_system *coding;
1021 int *mask;
1022{
1023 unsigned char *src = coding->source, *src_base = src;
1024 unsigned char *src_end = coding->source + coding->src_bytes;
1025 int multibytep = coding->src_multibyte;
1026 int consumed_chars = 0;
1027 int found = 0;
1028
1029 /* A coding system of this category is always ASCII compatible. */
1030 src += coding->head_ascii;
1031
1032 while (1)
1033 {
1034 int c, c1, c2, c3, c4;
1035
1036 ONE_MORE_BYTE (c);
1037 if (UTF_8_1_OCTET_P (c))
1038 continue;
1039 ONE_MORE_BYTE (c1);
1040 if (! UTF_8_EXTRA_OCTET_P (c1))
1041 break;
1042 if (UTF_8_2_OCTET_LEADING_P (c))
1043 {
1044 found++;
1045 continue;
1046 }
1047 ONE_MORE_BYTE (c2);
1048 if (! UTF_8_EXTRA_OCTET_P (c2))
1049 break;
1050 if (UTF_8_3_OCTET_LEADING_P (c))
1051 {
1052 found++;
1053 continue;
1054 }
1055 ONE_MORE_BYTE (c3);
1056 if (! UTF_8_EXTRA_OCTET_P (c3))
1057 break;
1058 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1059 {
df7492f9
KH
1060 found++;
1061 continue;
1062 }
1063 ONE_MORE_BYTE (c4);
1064 if (! UTF_8_EXTRA_OCTET_P (c4))
1065 break;
1066 if (UTF_8_5_OCTET_LEADING_P (c))
1067 {
1068 found++;
1069 continue;
1070 }
1071 break;
1072 }
1073 *mask &= ~CATEGORY_MASK_UTF_8;
1074 return 0;
1075
1076 no_more_source:
1077 if (! found)
1078 return 0;
1079 *mask &= CATEGORY_MASK_UTF_8;
1080 return 1;
1081}
1082
1083
1084static void
1085decode_coding_utf_8 (coding)
1086 struct coding_system *coding;
1087{
1088 unsigned char *src = coding->source + coding->consumed;
1089 unsigned char *src_end = coding->source + coding->src_bytes;
1090 unsigned char *src_base;
1091 int *charbuf = coding->charbuf;
1092 int *charbuf_end = charbuf + coding->charbuf_size;
1093 int consumed_chars = 0, consumed_chars_base;
1094 int multibytep = coding->src_multibyte;
1095 Lisp_Object attr, eol_type, charset_list;
1096
1097 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1098
1099 while (1)
1100 {
1101 int c, c1, c2, c3, c4, c5;
1102
1103 src_base = src;
1104 consumed_chars_base = consumed_chars;
1105
1106 if (charbuf >= charbuf_end)
1107 break;
1108
1109 ONE_MORE_BYTE (c1);
1110 if (UTF_8_1_OCTET_P(c1))
1111 {
1112 c = c1;
1113 if (c == '\r')
aa72b389 1114 {
df7492f9
KH
1115 if (EQ (eol_type, Qdos))
1116 {
1117 if (src == src_end)
1118 goto no_more_source;
1119 if (*src == '\n')
1120 ONE_MORE_BYTE (c);
1121 }
1122 else if (EQ (eol_type, Qmac))
1123 c = '\n';
aa72b389 1124 }
aa72b389 1125 }
df7492f9 1126 else
aa72b389 1127 {
df7492f9
KH
1128 ONE_MORE_BYTE (c2);
1129 if (! UTF_8_EXTRA_OCTET_P (c2))
1130 goto invalid_code;
1131 if (UTF_8_2_OCTET_LEADING_P (c1))
1132 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1133 else
aa72b389 1134 {
df7492f9
KH
1135 ONE_MORE_BYTE (c3);
1136 if (! UTF_8_EXTRA_OCTET_P (c3))
1137 goto invalid_code;
1138 if (UTF_8_3_OCTET_LEADING_P (c1))
1139 c = (((c1 & 0xF) << 12)
1140 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1141 else
1142 {
1143 ONE_MORE_BYTE (c4);
1144 if (! UTF_8_EXTRA_OCTET_P (c4))
1145 goto invalid_code;
1146 if (UTF_8_4_OCTET_LEADING_P (c1))
1147 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1148 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1149 else
1150 {
1151 ONE_MORE_BYTE (c5);
1152 if (! UTF_8_EXTRA_OCTET_P (c5))
1153 goto invalid_code;
1154 if (UTF_8_5_OCTET_LEADING_P (c1))
1155 {
1156 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1157 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1158 | (c5 & 0x3F));
1159 if (c > MAX_CHAR)
1160 goto invalid_code;
1161 }
1162 else
1163 goto invalid_code;
1164 }
1165 }
aa72b389 1166 }
aa72b389 1167 }
df7492f9
KH
1168
1169 *charbuf++ = c;
1170 continue;
1171
1172 invalid_code:
1173 src = src_base;
1174 consumed_chars = consumed_chars_base;
1175 ONE_MORE_BYTE (c);
1176 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1177 coding->errors++;
aa72b389
KH
1178 }
1179
df7492f9
KH
1180 no_more_source:
1181 coding->consumed_char += consumed_chars_base;
1182 coding->consumed = src_base - coding->source;
1183 coding->charbuf_used = charbuf - coding->charbuf;
1184}
1185
1186
1187static int
1188encode_coding_utf_8 (coding)
1189 struct coding_system *coding;
1190{
1191 int multibytep = coding->dst_multibyte;
1192 int *charbuf = coding->charbuf;
1193 int *charbuf_end = charbuf + coding->charbuf_used;
1194 unsigned char *dst = coding->destination + coding->produced;
1195 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1196 int produced_chars = 0;
df7492f9
KH
1197 int c;
1198
1199 if (multibytep)
aa72b389 1200 {
df7492f9
KH
1201 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1202
1203 while (charbuf < charbuf_end)
aa72b389 1204 {
df7492f9
KH
1205 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1206
1207 ASSURE_DESTINATION (safe_room);
1208 c = *charbuf++;
1209 CHAR_STRING_ADVANCE (c, pend);
1210 for (p = str; p < pend; p++)
1211 EMIT_ONE_BYTE (*p);
aa72b389 1212 }
aa72b389 1213 }
df7492f9
KH
1214 else
1215 {
1216 int safe_room = MAX_MULTIBYTE_LENGTH;
1217
1218 while (charbuf < charbuf_end)
1219 {
1220 ASSURE_DESTINATION (safe_room);
1221 c = *charbuf++;
1222 dst += CHAR_STRING (c, dst);
1223 produced_chars++;
1224 }
1225 }
1226 coding->result = CODING_RESULT_SUCCESS;
1227 coding->produced_char += produced_chars;
1228 coding->produced = dst - coding->destination;
1229 return 0;
aa72b389
KH
1230}
1231
4ed46869 1232
df7492f9
KH
1233/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1234 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1235 Little Endian (otherwise). If it is, return
1236 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1237 else return 0. */
1238
1239#define UTF_16_HIGH_SURROGATE_P(val) \
1240 (((val) & 0xFC00) == 0xD800)
1241
1242#define UTF_16_LOW_SURROGATE_P(val) \
1243 (((val) & 0xFC00) == 0xDC00)
1244
1245#define UTF_16_INVALID_P(val) \
1246 (((val) == 0xFFFE) \
1247 || ((val) == 0xFFFF) \
1248 || UTF_16_LOW_SURROGATE_P (val))
1249
1250
1251static int
1252detect_coding_utf_16 (coding, mask)
b73bfc1c 1253 struct coding_system *coding;
df7492f9 1254 int *mask;
b73bfc1c 1255{
df7492f9
KH
1256 unsigned char *src = coding->source, *src_base = src;
1257 unsigned char *src_end = coding->source + coding->src_bytes;
1258 int multibytep = coding->src_multibyte;
1259 int consumed_chars = 0;
1260 int c1, c2;
1261
1262 ONE_MORE_BYTE (c1);
1263 ONE_MORE_BYTE (c2);
4ed46869 1264
df7492f9 1265 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1266 {
df7492f9
KH
1267 *mask &= CATEGORY_MASK_UTF_16_LE;
1268 return 1;
1269 }
1270 else if ((c1 == 0xFE) && (c2 == 0xFF))
1271 {
1272 *mask &= CATEGORY_MASK_UTF_16_BE;
1273 return 1;
1274 }
1275 no_more_source:
1276 return 0;
1277}
ec6d2bb8 1278
df7492f9
KH
1279static void
1280decode_coding_utf_16 (coding)
1281 struct coding_system *coding;
1282{
1283 unsigned char *src = coding->source + coding->consumed;
1284 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1285 unsigned char *src_base;
df7492f9
KH
1286 int *charbuf = coding->charbuf;
1287 int *charbuf_end = charbuf + coding->charbuf_size;
1288 int consumed_chars = 0, consumed_chars_base;
1289 int multibytep = coding->src_multibyte;
1290 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1291 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1292 int surrogate = CODING_UTF_16_SURROGATE (coding);
1293 Lisp_Object attr, eol_type, charset_list;
1294
1295 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1296
1297 if (bom != utf_16_without_bom)
1298 {
1299 int c, c1, c2;
4af310db 1300
df7492f9
KH
1301 src_base = src;
1302 ONE_MORE_BYTE (c1);
1303 ONE_MORE_BYTE (c2);
e19c3639 1304 c = (c1 << 8) | c2;
df7492f9
KH
1305 if (bom == utf_16_with_bom)
1306 {
1307 if (endian == utf_16_big_endian
1308 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1309 {
df7492f9
KH
1310 /* We are sure that there's enouph room at CHARBUF. */
1311 *charbuf++ = c1;
1312 *charbuf++ = c2;
1313 coding->errors++;
4af310db 1314 }
4af310db 1315 }
df7492f9 1316 else
4af310db 1317 {
df7492f9
KH
1318 if (c == 0xFFFE)
1319 CODING_UTF_16_ENDIAN (coding)
1320 = endian = utf_16_big_endian;
1321 else if (c == 0xFEFF)
1322 CODING_UTF_16_ENDIAN (coding)
1323 = endian = utf_16_little_endian;
1324 else
4af310db 1325 {
df7492f9
KH
1326 CODING_UTF_16_ENDIAN (coding)
1327 = endian = utf_16_big_endian;
1328 src = src_base;
4af310db 1329 }
4af310db 1330 }
df7492f9
KH
1331 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1332 }
1333
1334 while (1)
1335 {
1336 int c, c1, c2;
1337
1338 src_base = src;
1339 consumed_chars_base = consumed_chars;
1340
1341 if (charbuf + 2 >= charbuf_end)
1342 break;
1343
1344 ONE_MORE_BYTE (c1);
1345 ONE_MORE_BYTE (c2);
1346 c = (endian == utf_16_big_endian
e19c3639 1347 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1348 if (surrogate)
aa72b389 1349 {
df7492f9 1350 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1351 {
df7492f9
KH
1352 if (endian == utf_16_big_endian)
1353 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1354 else
1355 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1356 *charbuf++ = c1;
1357 *charbuf++ = c2;
1358 coding->errors++;
1359 if (UTF_16_HIGH_SURROGATE_P (c))
1360 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1361 else
1362 *charbuf++ = c;
aa72b389 1363 }
df7492f9
KH
1364 else
1365 {
1366 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1367 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1368 *charbuf++ = c;
1369 }
1370 }
1371 else
1372 {
1373 if (UTF_16_HIGH_SURROGATE_P (c))
1374 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1375 else
1376 *charbuf++ = c;
1377 }
1378 }
1379
1380 no_more_source:
1381 coding->consumed_char += consumed_chars_base;
1382 coding->consumed = src_base - coding->source;
1383 coding->charbuf_used = charbuf - coding->charbuf;
1384}
1385
1386static int
1387encode_coding_utf_16 (coding)
1388 struct coding_system *coding;
1389{
1390 int multibytep = coding->dst_multibyte;
1391 int *charbuf = coding->charbuf;
1392 int *charbuf_end = charbuf + coding->charbuf_used;
1393 unsigned char *dst = coding->destination + coding->produced;
1394 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1395 int safe_room = 8;
1396 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1397 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1398 int produced_chars = 0;
1399 Lisp_Object attrs, eol_type, charset_list;
1400 int c;
1401
1402 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1403
1404 if (bom == utf_16_with_bom)
1405 {
1406 ASSURE_DESTINATION (safe_room);
1407 if (big_endian)
1408 EMIT_TWO_BYTES (0xFF, 0xFE);
1409 else
1410 EMIT_TWO_BYTES (0xFE, 0xFF);
1411 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1412 }
1413
1414 while (charbuf < charbuf_end)
1415 {
1416 ASSURE_DESTINATION (safe_room);
1417 c = *charbuf++;
e19c3639
KH
1418 if (c >= MAX_UNICODE_CHAR)
1419 c = coding->default_char;
df7492f9
KH
1420
1421 if (c < 0x10000)
1422 {
1423 if (big_endian)
1424 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1425 else
1426 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1427 }
1428 else
1429 {
1430 int c1, c2;
1431
1432 c -= 0x10000;
1433 c1 = (c >> 10) + 0xD800;
1434 c2 = (c & 0x3FF) + 0xDC00;
1435 if (big_endian)
1436 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1437 else
1438 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1439 }
1440 }
1441 coding->result = CODING_RESULT_SUCCESS;
1442 coding->produced = dst - coding->destination;
1443 coding->produced_char += produced_chars;
1444 return 0;
1445}
1446
1447\f
1448/*** 6. Old Emacs' internal format (emacs-mule) ***/
1449
1450/* Emacs' internal format for representation of multiple character
1451 sets is a kind of multi-byte encoding, i.e. characters are
1452 represented by variable-length sequences of one-byte codes.
1453
1454 ASCII characters and control characters (e.g. `tab', `newline') are
1455 represented by one-byte sequences which are their ASCII codes, in
1456 the range 0x00 through 0x7F.
1457
1458 8-bit characters of the range 0x80..0x9F are represented by
1459 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1460 code + 0x20).
1461
1462 8-bit characters of the range 0xA0..0xFF are represented by
1463 one-byte sequences which are their 8-bit code.
1464
1465 The other characters are represented by a sequence of `base
1466 leading-code', optional `extended leading-code', and one or two
1467 `position-code's. The length of the sequence is determined by the
1468 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1469 whereas extended leading-code and position-code take the range 0xA0
1470 through 0xFF. See `charset.h' for more details about leading-code
1471 and position-code.
1472
1473 --- CODE RANGE of Emacs' internal format ---
1474 character set range
1475 ------------- -----
1476 ascii 0x00..0x7F
1477 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1478 eight-bit-graphic 0xA0..0xBF
1479 ELSE 0x81..0x9D + [0xA0..0xFF]+
1480 ---------------------------------------------
1481
1482 As this is the internal character representation, the format is
1483 usually not used externally (i.e. in a file or in a data sent to a
1484 process). But, it is possible to have a text externally in this
1485 format (i.e. by encoding by the coding system `emacs-mule').
1486
1487 In that case, a sequence of one-byte codes has a slightly different
1488 form.
1489
1490 At first, all characters in eight-bit-control are represented by
1491 one-byte sequences which are their 8-bit code.
1492
1493 Next, character composition data are represented by the byte
1494 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1495 where,
1496 METHOD is 0xF0 plus one of composition method (enum
1497 composition_method),
1498
1499 BYTES is 0xA0 plus a byte length of this composition data,
1500
1501 CHARS is 0x20 plus a number of characters composed by this
1502 data,
1503
1504 COMPONENTs are characters of multibye form or composition
1505 rules encoded by two-byte of ASCII codes.
1506
1507 In addition, for backward compatibility, the following formats are
1508 also recognized as composition data on decoding.
1509
1510 0x80 MSEQ ...
1511 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1512
1513 Here,
1514 MSEQ is a multibyte form but in these special format:
1515 ASCII: 0xA0 ASCII_CODE+0x80,
1516 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1517 RULE is a one byte code of the range 0xA0..0xF0 that
1518 represents a composition rule.
1519 */
1520
1521char emacs_mule_bytes[256];
1522
1523/* Leading-code followed by extended leading-code. */
1524#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1525#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1526#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1527#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1528
1529
1530int
1531emacs_mule_char (coding, composition, nbytes, nchars)
1532 struct coding_system *coding;
1533 int composition;
1534 int *nbytes, *nchars;
1535{
1536 unsigned char *src = coding->source + coding->consumed;
1537 unsigned char *src_end = coding->source + coding->src_bytes;
1538 int multibytep = coding->src_multibyte;
1539 unsigned char *src_base = src;
1540 struct charset *charset;
1541 unsigned code;
1542 int c;
1543 int consumed_chars = 0;
1544
1545 ONE_MORE_BYTE (c);
1546 if (composition)
1547 {
1548 c -= 0x20;
1549 if (c == 0x80)
1550 {
1551 ONE_MORE_BYTE (c);
1552 if (c < 0xA0)
1553 goto invalid_code;
1554 *nbytes = src - src_base;
1555 *nchars = consumed_chars;
1556 return (c - 0x80);
aa72b389 1557 }
df7492f9
KH
1558 }
1559
1560 switch (emacs_mule_bytes[c])
1561 {
1562 case 2:
1563 if (! (charset = emacs_mule_charset[c]))
1564 goto invalid_code;
1565 ONE_MORE_BYTE (c);
1566 code = c & 0x7F;
1567 break;
1568
1569 case 3:
1570 if (c == LEADING_CODE_PRIVATE_11
1571 || c == LEADING_CODE_PRIVATE_12)
b73bfc1c 1572 {
df7492f9
KH
1573 ONE_MORE_BYTE (c);
1574 if (! (charset = emacs_mule_charset[c]))
1575 goto invalid_code;
1576 ONE_MORE_BYTE (c);
1577 code = c & 0x7F;
b73bfc1c
KH
1578 }
1579 else
1580 {
df7492f9
KH
1581 if (! (charset = emacs_mule_charset[c]))
1582 goto invalid_code;
1583 ONE_MORE_BYTE (c);
1584 code = (c & 0x7F) << 7;
1585 ONE_MORE_BYTE (c);
1586 code |= c & 0x7F;
1587 }
1588 break;
1589
1590 case 4:
1591 if (! (charset = emacs_mule_charset[c]))
1592 goto invalid_code;
1593 ONE_MORE_BYTE (c);
1594 code = (c & 0x7F) << 7;
1595 ONE_MORE_BYTE (c);
1596 code |= c & 0x7F;
1597 break;
1598
1599 case 1:
1600 code = c;
1601 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1602 : code < 0xA0 ? charset_8_bit_control
1603 : charset_8_bit_graphic);
1604 break;
1605
1606 default:
1607 abort ();
1608 }
1609 c = DECODE_CHAR (charset, code);
1610 if (c < 0)
1611 goto invalid_code;
1612 *nbytes = src - src_base;
1613 *nchars = consumed_chars;
1614 return c;
1615
1616 no_more_source:
1617 return -2;
1618
1619 invalid_code:
1620 return -1;
1621}
1622
1623
1624/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1625 Check if a text is encoded in `emacs-mule'. */
1626
1627static int
1628detect_coding_emacs_mule (coding, mask)
1629 struct coding_system *coding;
1630 int *mask;
1631{
1632 unsigned char *src = coding->source, *src_base = src;
1633 unsigned char *src_end = coding->source + coding->src_bytes;
1634 int multibytep = coding->src_multibyte;
1635 int consumed_chars = 0;
1636 int c;
1637 int found = 0;
1638
1639 /* A coding system of this category is always ASCII compatible. */
1640 src += coding->head_ascii;
1641
1642 while (1)
1643 {
1644 ONE_MORE_BYTE (c);
1645
1646 if (c == 0x80)
1647 {
1648 /* Perhaps the start of composite character. We simple skip
1649 it because analyzing it is too heavy for detecting. But,
1650 at least, we check that the composite character
1651 constitues of more than 4 bytes. */
1652 unsigned char *src_base;
1653
1654 repeat:
1655 src_base = src;
1656 do
1657 {
1658 ONE_MORE_BYTE (c);
1659 }
1660 while (c >= 0xA0);
1661
1662 if (src - src_base <= 4)
1663 break;
1664 found = 1;
1665 if (c == 0x80)
1666 goto repeat;
b73bfc1c 1667 }
df7492f9
KH
1668
1669 if (c < 0x80)
b73bfc1c 1670 {
df7492f9
KH
1671 if (c < 0x20
1672 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1673 break;
1674 }
1675 else
1676 {
1677 unsigned char *src_base = src - 1;
1678
1679 do
1680 {
1681 ONE_MORE_BYTE (c);
1682 }
1683 while (c >= 0xA0);
1684 if (src - src_base != emacs_mule_bytes[*src_base])
1685 break;
1686 found = 1;
4ed46869
KH
1687 }
1688 }
df7492f9
KH
1689 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1690 return 0;
1691
1692 no_more_source:
1693 if (!found)
1694 return 0;
1695 *mask &= CATEGORY_MASK_EMACS_MULE;
1696 return 1;
4ed46869
KH
1697}
1698
b73bfc1c 1699
df7492f9
KH
1700/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1701
1702/* Decode a character represented as a component of composition
1703 sequence of Emacs 20/21 style at SRC. Set C to that character and
1704 update SRC to the head of next character (or an encoded composition
1705 rule). If SRC doesn't points a composition component, set C to -1.
1706 If SRC points an invalid byte sequence, global exit by a return
1707 value 0. */
1708
1709#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1710 if (1) \
1711 { \
1712 int c; \
1713 int nbytes, nchars; \
1714 \
1715 if (src == src_end) \
1716 break; \
1717 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1718 if (c < 0) \
1719 { \
1720 if (c == -2) \
1721 break; \
1722 goto invalid_code; \
1723 } \
1724 *buf++ = c; \
1725 src += nbytes; \
1726 consumed_chars += nchars; \
1727 } \
1728 else
1729
1730
1731/* Decode a composition rule represented as a component of composition
1732 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1733 points an invalid byte sequence, set C to -1. */
1734
1735#define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1736 do { \
1737 int c, gref, nref; \
1738 \
1739 if (src < src_end) \
1740 goto invalid_code; \
1741 ONE_MORE_BYTE_NO_CHECK (c); \
1742 c -= 0xA0; \
1743 if (c < 0 || c >= 81) \
1744 goto invalid_code; \
1745 \
1746 gref = c / 9, nref = c % 9; \
1747 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1748 } while (0)
1749
1750
1751#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1752 do { \
1753 *buf++ = -5; \
1754 *buf++ = coding->produced_char + char_offset; \
1755 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1756 *buf++ = method; \
1757 *buf++ = nchars; \
1758 } while (0)
aa72b389 1759
df7492f9
KH
1760
1761#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1762 do { \
df7492f9
KH
1763 /* Emacs 21 style format. The first three bytes at SRC are \
1764 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1765 the byte length of this composition information, CHARS is the \
1766 number of characters composed by this composition. */ \
1767 enum composition_method method = c - 0xF0; \
1768 int consumed_chars_limit; \
1769 int nbytes, nchars; \
1770 \
1771 ONE_MORE_BYTE (c); \
1772 nbytes = c - 0xA0; \
1773 if (nbytes < 3) \
1774 goto invalid_code; \
1775 ONE_MORE_BYTE (c); \
1776 nchars = c - 0xA0; \
1777 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1778 consumed_chars_limit = consumed_chars_base + nbytes; \
1779 if (method != COMPOSITION_RELATIVE) \
aa72b389 1780 { \
df7492f9
KH
1781 int i = 0; \
1782 while (consumed_chars < consumed_chars_limit) \
aa72b389 1783 { \
df7492f9
KH
1784 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1785 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1786 else \
1787 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
aa72b389 1788 } \
df7492f9
KH
1789 if (consumed_chars < consumed_chars_limit) \
1790 goto invalid_code; \
aa72b389
KH
1791 } \
1792 } while (0)
93dec019 1793
aa72b389 1794
df7492f9
KH
1795#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1796 do { \
1797 /* Emacs 20 style format for relative composition. */ \
1798 /* Store multibyte form of characters to be composed. */ \
1799 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1800 int *buf = components; \
1801 int i, j; \
1802 \
1803 src = src_base; \
1804 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1805 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1806 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1807 if (i < 2) \
1808 goto invalid_code; \
1809 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1810 for (j = 0; j < i; j++) \
1811 *charbuf++ = components[j]; \
1812 } while (0)
1813
1814
1815#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1816 do { \
1817 /* Emacs 20 style format for rule-base composition. */ \
1818 /* Store multibyte form of characters to be composed. */ \
1819 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1820 int *buf = components; \
1821 int i, j; \
1822 \
1823 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1824 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1825 { \
1826 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1827 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1828 } \
1829 if (i < 1 || (buf - components) % 2 == 0) \
1830 goto invalid_code; \
1831 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1832 goto no_more_source; \
1833 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1834 for (j = 0; j < i; j++) \
1835 *charbuf++ = components[j]; \
1836 for (j = 0; j < i; j += 2) \
1837 *charbuf++ = components[j]; \
1838 } while (0)
1839
aa72b389
KH
1840
1841static void
df7492f9 1842decode_coding_emacs_mule (coding)
aa72b389 1843 struct coding_system *coding;
aa72b389 1844{
df7492f9
KH
1845 unsigned char *src = coding->source + coding->consumed;
1846 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1847 unsigned char *src_base;
df7492f9
KH
1848 int *charbuf = coding->charbuf;
1849 int *charbuf_end = charbuf + coding->charbuf_size;
1850 int consumed_chars = 0, consumed_chars_base;
1851 int char_offset = 0;
1852 int multibytep = coding->src_multibyte;
1853 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1854
df7492f9 1855 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1856
aa72b389
KH
1857 while (1)
1858 {
df7492f9
KH
1859 int c;
1860
aa72b389 1861 src_base = src;
df7492f9
KH
1862 consumed_chars_base = consumed_chars;
1863
1864 if (charbuf >= charbuf_end)
1865 break;
aa72b389 1866
df7492f9
KH
1867 ONE_MORE_BYTE (c);
1868
1869 if (c < 0x80)
aa72b389 1870 {
df7492f9
KH
1871 if (c == '\r')
1872 {
1873 if (EQ (eol_type, Qdos))
1874 {
1875 if (src == src_end)
1876 goto no_more_source;
1877 if (*src == '\n')
1878 ONE_MORE_BYTE (c);
1879 }
1880 else if (EQ (eol_type, Qmac))
1881 c = '\n';
1882 }
1883 *charbuf++ = c;
1884 char_offset++;
aa72b389 1885 }
df7492f9
KH
1886 else if (c == 0x80)
1887 {
1888 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1889 break;
1890 ONE_MORE_BYTE (c);
1891 if (c - 0xF0 >= COMPOSITION_RELATIVE
1892 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
1893 DECODE_EMACS_MULE_21_COMPOSITION (c);
1894 else if (c < 0xC0)
1895 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1896 else if (c == 0xFF)
1897 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1898 else
1899 goto invalid_code;
1900 }
1901 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1902 {
1903 int nbytes, nchars;
1904 src--;
1905 c = emacs_mule_char (coding, 0, &nbytes, &nchars);
1906 if (c < 0)
1907 {
1908 if (c == -2)
1909 break;
1910 goto invalid_code;
1911 }
1912 *charbuf++ = c;
1913 char_offset++;
1914 }
1915 continue;
1916
1917 invalid_code:
1918 src = src_base;
1919 consumed_chars = consumed_chars_base;
1920 ONE_MORE_BYTE (c);
1921 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1922 coding->errors++;
1923 }
1924
1925 no_more_source:
1926 coding->consumed_char += consumed_chars_base;
1927 coding->consumed = src_base - coding->source;
1928 coding->charbuf_used = charbuf - coding->charbuf;
1929}
1930
1931
1932#define EMACS_MULE_LEADING_CODES(id, codes) \
1933 do { \
1934 if (id < 0xA0) \
1935 codes[0] = id, codes[1] = 0; \
1936 else if (id < 0xE0) \
1937 codes[0] = 0x9A, codes[1] = id; \
1938 else if (id < 0xF0) \
1939 codes[0] = 0x9B, codes[1] = id; \
1940 else if (id < 0xF5) \
1941 codes[0] = 0x9C, codes[1] = id; \
1942 else \
1943 codes[0] = 0x9D, codes[1] = id; \
1944 } while (0);
1945
aa72b389 1946
df7492f9
KH
1947static int
1948encode_coding_emacs_mule (coding)
1949 struct coding_system *coding;
1950{
1951 int multibytep = coding->dst_multibyte;
1952 int *charbuf = coding->charbuf;
1953 int *charbuf_end = charbuf + coding->charbuf_used;
1954 unsigned char *dst = coding->destination + coding->produced;
1955 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1956 int safe_room = 8;
df7492f9
KH
1957 int produced_chars = 0;
1958 Lisp_Object attrs, eol_type, charset_list;
1959 int c;
1960
1961 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1962
1963 while (charbuf < charbuf_end)
1964 {
1965 ASSURE_DESTINATION (safe_room);
1966 c = *charbuf++;
1967 if (ASCII_CHAR_P (c))
1968 EMIT_ONE_ASCII_BYTE (c);
1969 else
aa72b389 1970 {
df7492f9
KH
1971 struct charset *charset;
1972 unsigned code;
1973 int dimension;
1974 int emacs_mule_id;
1975 unsigned char leading_codes[2];
1976
1977 charset = char_charset (c, charset_list, &code);
1978 if (! charset)
1979 {
1980 c = coding->default_char;
1981 if (ASCII_CHAR_P (c))
1982 {
1983 EMIT_ONE_ASCII_BYTE (c);
1984 continue;
1985 }
1986 charset = char_charset (c, charset_list, &code);
1987 }
1988 dimension = CHARSET_DIMENSION (charset);
1989 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
1990 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
1991 EMIT_ONE_BYTE (leading_codes[0]);
1992 if (leading_codes[1])
1993 EMIT_ONE_BYTE (leading_codes[1]);
1994 if (dimension == 1)
1995 EMIT_ONE_BYTE (code);
aa72b389 1996 else
df7492f9
KH
1997 {
1998 EMIT_ONE_BYTE (code >> 8);
1999 EMIT_ONE_BYTE (code & 0xFF);
2000 }
aa72b389 2001 }
aa72b389 2002 }
df7492f9
KH
2003 coding->result = CODING_RESULT_SUCCESS;
2004 coding->produced_char += produced_chars;
2005 coding->produced = dst - coding->destination;
2006 return 0;
aa72b389 2007}
b73bfc1c 2008
4ed46869 2009\f
df7492f9 2010/*** 7. ISO2022 handlers ***/
4ed46869
KH
2011
2012/* The following note describes the coding system ISO2022 briefly.
39787efd 2013 Since the intention of this note is to help understand the
df7492f9 2014 functions in this file, some parts are NOT ACCURATE or OVERLY
39787efd 2015 SIMPLIFIED. For thorough understanding, please refer to the
df7492f9 2016 original document of ISO2022.
4ed46869
KH
2017
2018 ISO2022 provides many mechanisms to encode several character sets
df7492f9 2019 in 7-bit and 8-bit environments. For 7-bite environments, all text
39787efd
KH
2020 is encoded using bytes less than 128. This may make the encoded
2021 text a little bit longer, but the text passes more easily through
df7492f9 2022 several gateways, some of which strip off MSB (Most Signigant Bit).
b73bfc1c 2023
df7492f9
KH
2024 There are two kinds of character sets: control character set and
2025 graphic character set. The former contains control characters such
4ed46869 2026 as `newline' and `escape' to provide control functions (control
39787efd 2027 functions are also provided by escape sequences). The latter
df7492f9 2028 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2029 two control character sets and many graphic character sets.
2030
2031 Graphic character sets are classified into one of the following
39787efd
KH
2032 four classes, according to the number of bytes (DIMENSION) and
2033 number of characters in one dimension (CHARS) of the set:
2034 - DIMENSION1_CHARS94
2035 - DIMENSION1_CHARS96
2036 - DIMENSION2_CHARS94
2037 - DIMENSION2_CHARS96
2038
2039 In addition, each character set is assigned an identification tag,
df7492f9 2040 unique for each set, called "final character" (denoted as <F>
39787efd
KH
2041 hereafter). The <F> of each character set is decided by ECMA(*)
2042 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2043 (0x30..0x3F are for private use only).
4ed46869
KH
2044
2045 Note (*): ECMA = European Computer Manufacturers Association
2046
df7492f9 2047 Here are examples of graphic character set [NAME(<F>)]:
4ed46869
KH
2048 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2049 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2050 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2051 o DIMENSION2_CHARS96 -- none for the moment
2052
39787efd 2053 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2054 C0 [0x00..0x1F] -- control character plane 0
2055 GL [0x20..0x7F] -- graphic character plane 0
2056 C1 [0x80..0x9F] -- control character plane 1
2057 GR [0xA0..0xFF] -- graphic character plane 1
2058
2059 A control character set is directly designated and invoked to C0 or
39787efd
KH
2060 C1 by an escape sequence. The most common case is that:
2061 - ISO646's control character set is designated/invoked to C0, and
2062 - ISO6429's control character set is designated/invoked to C1,
2063 and usually these designations/invocations are omitted in encoded
2064 text. In a 7-bit environment, only C0 can be used, and a control
2065 character for C1 is encoded by an appropriate escape sequence to
2066 fit into the environment. All control characters for C1 are
2067 defined to have corresponding escape sequences.
4ed46869
KH
2068
2069 A graphic character set is at first designated to one of four
2070 graphic registers (G0 through G3), then these graphic registers are
2071 invoked to GL or GR. These designations and invocations can be
2072 done independently. The most common case is that G0 is invoked to
39787efd
KH
2073 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2074 these invocations and designations are omitted in encoded text.
2075 In a 7-bit environment, only GL can be used.
4ed46869 2076
39787efd
KH
2077 When a graphic character set of CHARS94 is invoked to GL, codes
2078 0x20 and 0x7F of the GL area work as control characters SPACE and
2079 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2080 be used.
4ed46869
KH
2081
2082 There are two ways of invocation: locking-shift and single-shift.
2083 With locking-shift, the invocation lasts until the next different
39787efd
KH
2084 invocation, whereas with single-shift, the invocation affects the
2085 following character only and doesn't affect the locking-shift
2086 state. Invocations are done by the following control characters or
2087 escape sequences:
4ed46869
KH
2088
2089 ----------------------------------------------------------------------
39787efd 2090 abbrev function cntrl escape seq description
4ed46869 2091 ----------------------------------------------------------------------
39787efd
KH
2092 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2093 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2094 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2095 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2096 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2097 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2098 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2099 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2100 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2101 ----------------------------------------------------------------------
39787efd
KH
2102 (*) These are not used by any known coding system.
2103
2104 Control characters for these functions are defined by macros
2105 ISO_CODE_XXX in `coding.h'.
4ed46869 2106
39787efd 2107 Designations are done by the following escape sequences:
4ed46869
KH
2108 ----------------------------------------------------------------------
2109 escape sequence description
2110 ----------------------------------------------------------------------
2111 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2112 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2113 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2114 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2115 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2116 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2117 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2118 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2119 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2120 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2121 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2122 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2123 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2124 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2125 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2126 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2127 ----------------------------------------------------------------------
2128
2129 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2130 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2131
2132 Note (*): Although these designations are not allowed in ISO2022,
2133 Emacs accepts them on decoding, and produces them on encoding
39787efd 2134 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2135 7-bit environment, non-locking-shift, and non-single-shift.
2136
2137 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2138 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2139
df7492f9 2140 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
2141 same multilingual text in ISO2022. Actually, there exist many
2142 coding systems such as Compound Text (used in X11's inter client
df7492f9
KH
2143 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2144 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2145 localized platforms), and all of these are variants of ISO2022.
2146
2147 In addition to the above, Emacs handles two more kinds of escape
2148 sequences: ISO6429's direction specification and Emacs' private
2149 sequence for specifying character composition.
2150
39787efd 2151 ISO6429's direction specification takes the following form:
4ed46869
KH
2152 o CSI ']' -- end of the current direction
2153 o CSI '0' ']' -- end of the current direction
2154 o CSI '1' ']' -- start of left-to-right text
2155 o CSI '2' ']' -- start of right-to-left text
2156 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2157 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2158
2159 Character composition specification takes the following form:
ec6d2bb8
KH
2160 o ESC '0' -- start relative composition
2161 o ESC '1' -- end composition
2162 o ESC '2' -- start rule-base composition (*)
2163 o ESC '3' -- start relative composition with alternate chars (**)
2164 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2165 Since these are not standard escape sequences of any ISO standard,
df7492f9 2166 the use of them for these meaning is restricted to Emacs only.
ec6d2bb8 2167
df7492f9 2168 (*) This form is used only in Emacs 20.5 and the older versions,
b73bfc1c 2169 but the newer versions can safely decode it.
df7492f9 2170 (**) This form is used only in Emacs 21.1 and the newer versions,
b73bfc1c 2171 and the older versions can't decode it.
ec6d2bb8 2172
df7492f9 2173 Here's a list of examples usages of these composition escape
b73bfc1c 2174 sequences (categorized by `enum composition_method').
ec6d2bb8 2175
b73bfc1c 2176 COMPOSITION_RELATIVE:
ec6d2bb8 2177 ESC 0 CHAR [ CHAR ] ESC 1
df7492f9 2178 COMPOSITOIN_WITH_RULE:
ec6d2bb8 2179 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2180 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2181 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2182 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2183 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2184
2185enum iso_code_class_type iso_code_class[256];
2186
df7492f9
KH
2187#define SAFE_CHARSET_P(coding, id) \
2188 ((id) <= (coding)->max_charset_id \
2189 && (coding)->safe_charsets[id] >= 0)
2190
2191
2192#define SHIFT_OUT_OK(category) \
2193 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2194
2195static void
2196setup_iso_safe_charsets (Lisp_Object attrs)
2197{
2198 Lisp_Object charset_list, safe_charsets;
2199 Lisp_Object request;
2200 Lisp_Object reg_usage;
2201 Lisp_Object tail;
2202 int reg94, reg96;
2203 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2204 int max_charset_id;
2205
2206 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2207 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2208 && ! EQ (charset_list, Viso_2022_charset_list))
2209 {
2210 CODING_ATTR_CHARSET_LIST (attrs)
2211 = charset_list = Viso_2022_charset_list;
2212 ASET (attrs, coding_attr_safe_charsets, Qnil);
2213 }
2214
2215 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2216 return;
2217
2218 max_charset_id = 0;
2219 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2220 {
2221 int id = XINT (XCAR (tail));
2222 if (max_charset_id < id)
2223 max_charset_id = id;
2224 }
2225
2226 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2227 make_number (255));
2228 request = AREF (attrs, coding_attr_iso_request);
2229 reg_usage = AREF (attrs, coding_attr_iso_usage);
2230 reg94 = XINT (XCAR (reg_usage));
2231 reg96 = XINT (XCDR (reg_usage));
2232
2233 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2234 {
2235 Lisp_Object id;
2236 Lisp_Object reg;
2237 struct charset *charset;
2238
2239 id = XCAR (tail);
2240 charset = CHARSET_FROM_ID (XINT (id));
2241 reg = Fcdr (Fassq (request, id));
2242 if (! NILP (reg))
2243 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2244 else if (charset->iso_chars_96)
2245 {
2246 if (reg96 < 4)
2247 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2248 }
2249 else
2250 {
2251 if (reg94 < 4)
2252 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2253 }
2254 }
2255 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2256}
d46c5b12 2257
d46c5b12 2258
4ed46869 2259/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2260 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2261 integer in which appropriate flag bits any of:
df7492f9
KH
2262 CATEGORY_MASK_ISO_7
2263 CATEGORY_MASK_ISO_7_TIGHT
2264 CATEGORY_MASK_ISO_8_1
2265 CATEGORY_MASK_ISO_8_2
2266 CATEGORY_MASK_ISO_7_ELSE
2267 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2268 are set. If a code which should never appear in ISO2022 is found,
2269 returns 0. */
2270
0a28aafb 2271static int
df7492f9
KH
2272detect_coding_iso_2022 (coding, mask)
2273 struct coding_system *coding;
2274 int *mask;
4ed46869 2275{
df7492f9
KH
2276 unsigned char *src = coding->source, *src_base = src;
2277 unsigned char *src_end = coding->source + coding->src_bytes;
2278 int multibytep = coding->src_multibyte;
2279 int mask_iso = CATEGORY_MASK_ISO;
2280 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2281 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2282 int id;
2283 int c, c1;
2284 int consumed_chars = 0;
2285 int i;
2286
2287 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2288 {
2289 struct coding_system *this = &(coding_categories[i]);
2290 Lisp_Object attrs, val;
2291
2292 attrs = CODING_ID_ATTRS (this->id);
2293 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2294 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2295 setup_iso_safe_charsets (attrs);
2296 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2297 this->max_charset_id = XSTRING (val)->size - 1;
2298 this->safe_charsets = (char *) XSTRING (val)->data;
2299 }
2300
2301 /* A coding system of this category is always ASCII compatible. */
2302 src += coding->head_ascii;
3f003981 2303
df7492f9
KH
2304 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2305 while (mask_iso && src < src_end)
4ed46869 2306 {
df7492f9 2307 ONE_MORE_BYTE (c);
4ed46869
KH
2308 switch (c)
2309 {
2310 case ISO_CODE_ESC:
74383408
KH
2311 if (inhibit_iso_escape_detection)
2312 break;
f46869e4 2313 single_shifting = 0;
df7492f9 2314 ONE_MORE_BYTE (c);
d46c5b12 2315 if (c >= '(' && c <= '/')
4ed46869 2316 {
bf9cdd4e 2317 /* Designation sequence for a charset of dimension 1. */
df7492f9 2318 ONE_MORE_BYTE (c1);
d46c5b12 2319 if (c1 < ' ' || c1 >= 0x80
df7492f9 2320 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2321 /* Invalid designation sequence. Just ignore. */
2322 break;
df7492f9 2323 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2324 }
2325 else if (c == '$')
2326 {
2327 /* Designation sequence for a charset of dimension 2. */
df7492f9 2328 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2329 if (c >= '@' && c <= 'B')
2330 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2331 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2332 else if (c >= '(' && c <= '/')
bcf26d6a 2333 {
df7492f9 2334 ONE_MORE_BYTE (c1);
d46c5b12 2335 if (c1 < ' ' || c1 >= 0x80
df7492f9 2336 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2337 /* Invalid designation sequence. Just ignore. */
2338 break;
df7492f9 2339 reg[(c - '(') % 4] = id;
bcf26d6a 2340 }
bf9cdd4e 2341 else
d46c5b12
KH
2342 /* Invalid designation sequence. Just ignore. */
2343 break;
2344 }
ae9ff118 2345 else if (c == 'N' || c == 'O')
d46c5b12 2346 {
ae9ff118 2347 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2348 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2349 break;
4ed46869 2350 }
ec6d2bb8
KH
2351 else if (c >= '0' && c <= '4')
2352 {
2353 /* ESC <Fp> for start/end composition. */
df7492f9 2354 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2355 break;
2356 }
bf9cdd4e 2357 else
df7492f9
KH
2358 {
2359 /* Invalid escape sequence. */
2360 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2361 break;
2362 }
d46c5b12
KH
2363
2364 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2365 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2366 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2367 id))
2368 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2369 else
df7492f9
KH
2370 mask_iso &= ~CATEGORY_MASK_ISO_7;
2371 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2372 id))
2373 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2374 else
df7492f9
KH
2375 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2376 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2377 id))
2378 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2379 else
df7492f9
KH
2380 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2381 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2382 id))
2383 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2384 else
df7492f9 2385 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2386 break;
2387
4ed46869 2388 case ISO_CODE_SO:
74383408
KH
2389 if (inhibit_iso_escape_detection)
2390 break;
f46869e4 2391 single_shifting = 0;
d46c5b12
KH
2392 if (shift_out == 0
2393 && (reg[1] >= 0
df7492f9
KH
2394 || SHIFT_OUT_OK (coding_category_iso_7_else)
2395 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2396 {
2397 /* Locking shift out. */
df7492f9
KH
2398 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2399 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2400 }
e0e989f6 2401 break;
df7492f9 2402
d46c5b12 2403 case ISO_CODE_SI:
74383408
KH
2404 if (inhibit_iso_escape_detection)
2405 break;
f46869e4 2406 single_shifting = 0;
d46c5b12
KH
2407 if (shift_out == 1)
2408 {
2409 /* Locking shift in. */
df7492f9
KH
2410 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2411 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2412 }
2413 break;
2414
4ed46869 2415 case ISO_CODE_CSI:
f46869e4 2416 single_shifting = 0;
4ed46869
KH
2417 case ISO_CODE_SS2:
2418 case ISO_CODE_SS3:
3f003981 2419 {
df7492f9 2420 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2421
74383408
KH
2422 if (inhibit_iso_escape_detection)
2423 break;
70c22245
KH
2424 if (c != ISO_CODE_CSI)
2425 {
df7492f9
KH
2426 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2427 & CODING_ISO_FLAG_SINGLE_SHIFT)
2428 newmask |= CATEGORY_MASK_ISO_8_1;
2429 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2430 & CODING_ISO_FLAG_SINGLE_SHIFT)
2431 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2432 single_shifting = 1;
70c22245 2433 }
3f003981
KH
2434 if (VECTORP (Vlatin_extra_code_table)
2435 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2436 {
df7492f9
KH
2437 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2438 & CODING_ISO_FLAG_LATIN_EXTRA)
2439 newmask |= CATEGORY_MASK_ISO_8_1;
2440 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2441 & CODING_ISO_FLAG_LATIN_EXTRA)
2442 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2443 }
df7492f9 2444 mask_iso &= newmask;
d46c5b12 2445 mask_found |= newmask;
3f003981
KH
2446 }
2447 break;
4ed46869
KH
2448
2449 default:
2450 if (c < 0x80)
f46869e4
KH
2451 {
2452 single_shifting = 0;
2453 break;
2454 }
4ed46869 2455 else if (c < 0xA0)
c4825358 2456 {
f46869e4 2457 single_shifting = 0;
df7492f9 2458 mask_8bit_found = 1;
3f003981
KH
2459 if (VECTORP (Vlatin_extra_code_table)
2460 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2461 {
3f003981
KH
2462 int newmask = 0;
2463
df7492f9
KH
2464 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2465 & CODING_ISO_FLAG_LATIN_EXTRA)
2466 newmask |= CATEGORY_MASK_ISO_8_1;
2467 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2468 & CODING_ISO_FLAG_LATIN_EXTRA)
2469 newmask |= CATEGORY_MASK_ISO_8_2;
2470 mask_iso &= newmask;
d46c5b12 2471 mask_found |= newmask;
c4825358 2472 }
3f003981
KH
2473 else
2474 return 0;
c4825358 2475 }
4ed46869
KH
2476 else
2477 {
df7492f9
KH
2478 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2479 | CATEGORY_MASK_ISO_7_ELSE);
2480 mask_found |= CATEGORY_MASK_ISO_8_1;
2481 mask_8bit_found = 1;
f46869e4
KH
2482 /* Check the length of succeeding codes of the range
2483 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2484 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2485 when we are not single shifting. */
b73bfc1c 2486 if (!single_shifting
df7492f9 2487 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2488 {
e17de821 2489 int i = 1;
b73bfc1c
KH
2490 while (src < src_end)
2491 {
df7492f9 2492 ONE_MORE_BYTE (c);
b73bfc1c
KH
2493 if (c < 0xA0)
2494 break;
2495 i++;
2496 }
2497
2498 if (i & 1 && src < src_end)
df7492f9 2499 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2500 else
df7492f9 2501 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2502 }
4ed46869
KH
2503 }
2504 break;
2505 }
2506 }
df7492f9
KH
2507 no_more_source:
2508 if (!mask_iso)
2509 {
2510 *mask &= ~CATEGORY_MASK_ISO;
2511 return 0;
2512 }
2513 if (!mask_found)
2514 return 0;
2515 *mask &= mask_iso & mask_found;
2516 if (! mask_8bit_found)
2517 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2518 return 1;
4ed46869
KH
2519}
2520
4ed46869
KH
2521
2522/* Set designation state into CODING. */
df7492f9
KH
2523#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2524 do { \
2525 int id, prev; \
2526 \
2527 if (final < '0' || final >= 128 \
2528 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2529 || !SAFE_CHARSET_P (coding, id)) \
2530 { \
2531 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2532 goto invalid_code; \
2533 } \
2534 prev = CODING_ISO_DESIGNATION (coding, reg); \
2535 CODING_ISO_DESIGNATION (coding, reg) = id; \
2536 /* If there was an invalid designation to REG previously, and this \
2537 designation is ASCII to REG, we should keep this designation \
2538 sequence. */ \
2539 if (prev == -2 && id == charset_ascii) \
2540 goto invalid_code; \
4ed46869
KH
2541 } while (0)
2542
d46c5b12 2543
df7492f9
KH
2544#define MAYBE_FINISH_COMPOSITION() \
2545 do { \
2546 int i; \
2547 if (composition_state == COMPOSING_NO) \
2548 break; \
2549 /* It is assured that we have enough room for producing \
2550 characters stored in the table `components'. */ \
2551 if (charbuf + component_idx > charbuf_end) \
2552 goto no_more_source; \
2553 composition_state = COMPOSING_NO; \
2554 if (method == COMPOSITION_RELATIVE \
2555 || method == COMPOSITION_WITH_ALTCHARS) \
2556 { \
2557 for (i = 0; i < component_idx; i++) \
2558 *charbuf++ = components[i]; \
2559 char_offset += component_idx; \
2560 } \
2561 else \
2562 { \
2563 for (i = 0; i < component_idx; i += 2) \
2564 *charbuf++ = components[i]; \
2565 char_offset += (component_idx / 2) + 1; \
2566 } \
2567 } while (0)
2568
d46c5b12 2569
aa72b389
KH
2570/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2571 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2572 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2573 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2574 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2575 */
ec6d2bb8 2576
df7492f9
KH
2577#define DECODE_COMPOSITION_START(c1) \
2578 do { \
2579 if (c1 == '0' \
2580 && composition_state == COMPOSING_COMPONENT_CHAR) \
2581 { \
2582 component_len = component_idx; \
2583 composition_state = COMPOSING_CHAR; \
2584 } \
2585 else \
2586 { \
2587 unsigned char *p; \
2588 \
2589 MAYBE_FINISH_COMPOSITION (); \
2590 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2591 goto no_more_source; \
2592 for (p = src; p < src_end - 1; p++) \
2593 if (*p == ISO_CODE_ESC && p[1] == '1') \
2594 break; \
2595 if (p == src_end - 1) \
2596 { \
2597 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2598 goto invalid_code; \
2599 goto no_more_source; \
2600 } \
2601 \
2602 /* This is surely the start of a composition. */ \
2603 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2604 : c1 == '2' ? COMPOSITION_WITH_RULE \
2605 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2606 : COMPOSITION_WITH_RULE_ALTCHARS); \
2607 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2608 : COMPOSING_COMPONENT_CHAR); \
2609 component_idx = component_len = 0; \
2610 } \
ec6d2bb8
KH
2611 } while (0)
2612
ec6d2bb8 2613
df7492f9
KH
2614/* Handle compositoin end sequence ESC 1. */
2615
2616#define DECODE_COMPOSITION_END() \
ec6d2bb8 2617 do { \
df7492f9
KH
2618 int nchars = (component_len > 0 ? component_idx - component_len \
2619 : method == COMPOSITION_RELATIVE ? component_idx \
2620 : (component_idx + 1) / 2); \
2621 int i; \
2622 int *saved_charbuf = charbuf; \
2623 \
2624 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2625 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2626 { \
df7492f9
KH
2627 if (component_len == 0) \
2628 for (i = 0; i < component_idx; i++) \
2629 *charbuf++ = components[i]; \
2630 else \
2631 for (i = 0; i < component_len; i++) \
2632 *charbuf++ = components[i]; \
2633 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2634 } \
df7492f9
KH
2635 if (method == COMPOSITION_WITH_RULE) \
2636 for (i = 0; i < component_idx; i += 2, char_offset++) \
2637 *charbuf++ = components[i]; \
ec6d2bb8 2638 else \
df7492f9
KH
2639 for (i = component_len; i < component_idx; i++, char_offset++) \
2640 *charbuf++ = components[i]; \
2641 coding->annotated = 1; \
2642 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2643 } while (0)
2644
df7492f9 2645
ec6d2bb8
KH
2646/* Decode a composition rule from the byte C1 (and maybe one more byte
2647 from SRC) and store one encoded composition rule in
2648 coding->cmp_data. */
2649
2650#define DECODE_COMPOSITION_RULE(c1) \
2651 do { \
ec6d2bb8
KH
2652 (c1) -= 32; \
2653 if (c1 < 81) /* old format (before ver.21) */ \
2654 { \
2655 int gref = (c1) / 9; \
2656 int nref = (c1) % 9; \
2657 if (gref == 4) gref = 10; \
2658 if (nref == 4) nref = 10; \
df7492f9 2659 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2660 } \
b73bfc1c 2661 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2662 { \
2663 ONE_MORE_BYTE (c2); \
df7492f9 2664 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2665 } \
df7492f9
KH
2666 else \
2667 c1 = 0; \
ec6d2bb8 2668 } while (0)
88993dfd 2669
d46c5b12 2670
4ed46869
KH
2671/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2672
b73bfc1c 2673static void
df7492f9 2674decode_coding_iso_2022 (coding)
4ed46869 2675 struct coding_system *coding;
4ed46869 2676{
df7492f9
KH
2677 unsigned char *src = coding->source + coding->consumed;
2678 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2679 unsigned char *src_base;
df7492f9
KH
2680 int *charbuf = coding->charbuf;
2681 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2682 int consumed_chars = 0, consumed_chars_base;
2683 int char_offset = 0;
2684 int multibytep = coding->src_multibyte;
2685 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2686 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2687 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2688 struct charset *charset;
2689 int c;
2690 /* For handling composition sequence. */
2691#define COMPOSING_NO 0
2692#define COMPOSING_CHAR 1
2693#define COMPOSING_RULE 2
2694#define COMPOSING_COMPONENT_CHAR 3
2695#define COMPOSING_COMPONENT_RULE 4
2696
2697 int composition_state = COMPOSING_NO;
2698 enum composition_method method;
2699 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2700 int component_idx;
2701 int component_len;
2702 Lisp_Object attrs, eol_type, charset_list;
2703
2704 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2705 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2706
2707 while (1)
4ed46869 2708 {
b73bfc1c
KH
2709 int c1, c2;
2710
2711 src_base = src;
df7492f9
KH
2712 consumed_chars_base = consumed_chars;
2713
2714 if (charbuf >= charbuf_end)
2715 break;
2716
b73bfc1c 2717 ONE_MORE_BYTE (c1);
4ed46869 2718
ec6d2bb8 2719 /* We produce no character or one character. */
4ed46869
KH
2720 switch (iso_code_class [c1])
2721 {
2722 case ISO_0x20_or_0x7F:
df7492f9 2723 if (composition_state != COMPOSING_NO)
ec6d2bb8 2724 {
df7492f9
KH
2725 if (composition_state == COMPOSING_RULE
2726 || composition_state == COMPOSING_COMPONENT_RULE)
2727 {
2728 DECODE_COMPOSITION_RULE (c1);
2729 components[component_idx++] = c1;
2730 composition_state--;
2731 continue;
2732 }
2733 else if (method == COMPOSITION_WITH_RULE)
2734 composition_state = COMPOSING_RULE;
2735 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2736 && composition_state == COMPOSING_COMPONENT_CHAR)
2737 composition_state = COMPOSING_COMPONENT_CHAR;
ec6d2bb8 2738 }
df7492f9
KH
2739 if (charset_id_0 < 0
2740 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
4ed46869
KH
2741 {
2742 /* This is SPACE or DEL. */
df7492f9 2743 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2744 break;
2745 }
2746 /* This is a graphic character, we fall down ... */
2747
2748 case ISO_graphic_plane_0:
df7492f9 2749 if (composition_state == COMPOSING_RULE)
b73bfc1c
KH
2750 {
2751 DECODE_COMPOSITION_RULE (c1);
df7492f9
KH
2752 components[component_idx++] = c1;
2753 composition_state = COMPOSING_CHAR;
b73bfc1c 2754 }
df7492f9 2755 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2756 break;
2757
2758 case ISO_0xA0_or_0xFF:
df7492f9
KH
2759 if (charset_id_1 < 0
2760 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2761 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2762 goto invalid_code;
4ed46869
KH
2763 /* This is a graphic character, we fall down ... */
2764
2765 case ISO_graphic_plane_1:
df7492f9
KH
2766 if (charset_id_1 < 0)
2767 goto invalid_code;
2768 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2769 break;
2770
2771 case ISO_carriage_return:
df7492f9 2772 if (c1 == '\r')
4ed46869 2773 {
df7492f9 2774 if (EQ (eol_type, Qdos))
4ed46869 2775 {
df7492f9
KH
2776 if (src == src_end)
2777 goto no_more_source;
2778 if (*src == '\n')
2779 ONE_MORE_BYTE (c1);
4ed46869 2780 }
df7492f9
KH
2781 else if (EQ (eol_type, Qmac))
2782 c1 = '\n';
4ed46869 2783 }
df7492f9
KH
2784 /* fall through */
2785
2786 case ISO_control_0:
2787 MAYBE_FINISH_COMPOSITION ();
2788 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2789 break;
2790
df7492f9
KH
2791 case ISO_control_1:
2792 MAYBE_FINISH_COMPOSITION ();
2793 goto invalid_code;
2794
4ed46869 2795 case ISO_shift_out:
df7492f9
KH
2796 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2797 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2798 goto invalid_code;
2799 CODING_ISO_INVOCATION (coding, 0) = 1;
2800 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2801 continue;
4ed46869
KH
2802
2803 case ISO_shift_in:
df7492f9
KH
2804 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2805 goto invalid_code;
2806 CODING_ISO_INVOCATION (coding, 0) = 0;
2807 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2808 continue;
4ed46869
KH
2809
2810 case ISO_single_shift_2_7:
2811 case ISO_single_shift_2:
df7492f9
KH
2812 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2813 goto invalid_code;
4ed46869
KH
2814 /* SS2 is handled as an escape sequence of ESC 'N' */
2815 c1 = 'N';
2816 goto label_escape_sequence;
2817
2818 case ISO_single_shift_3:
df7492f9
KH
2819 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2820 goto invalid_code;
4ed46869
KH
2821 /* SS2 is handled as an escape sequence of ESC 'O' */
2822 c1 = 'O';
2823 goto label_escape_sequence;
2824
2825 case ISO_control_sequence_introducer:
2826 /* CSI is handled as an escape sequence of ESC '[' ... */
2827 c1 = '[';
2828 goto label_escape_sequence;
2829
2830 case ISO_escape:
2831 ONE_MORE_BYTE (c1);
2832 label_escape_sequence:
df7492f9 2833 /* Escape sequences handled here are invocation,
4ed46869
KH
2834 designation, direction specification, and character
2835 composition specification. */
2836 switch (c1)
2837 {
2838 case '&': /* revision of following character set */
2839 ONE_MORE_BYTE (c1);
2840 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2841 goto invalid_code;
4ed46869
KH
2842 ONE_MORE_BYTE (c1);
2843 if (c1 != ISO_CODE_ESC)
df7492f9 2844 goto invalid_code;
4ed46869
KH
2845 ONE_MORE_BYTE (c1);
2846 goto label_escape_sequence;
2847
2848 case '$': /* designation of 2-byte character set */
df7492f9
KH
2849 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2850 goto invalid_code;
4ed46869
KH
2851 ONE_MORE_BYTE (c1);
2852 if (c1 >= '@' && c1 <= 'B')
2853 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2854 or JISX0208.1980 */
df7492f9 2855 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2856 }
2857 else if (c1 >= 0x28 && c1 <= 0x2B)
2858 { /* designation of DIMENSION2_CHARS94 character set */
2859 ONE_MORE_BYTE (c2);
df7492f9 2860 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2861 }
2862 else if (c1 >= 0x2C && c1 <= 0x2F)
2863 { /* designation of DIMENSION2_CHARS96 character set */
2864 ONE_MORE_BYTE (c2);
df7492f9 2865 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2866 }
2867 else
df7492f9 2868 goto invalid_code;
b73bfc1c 2869 /* We must update these variables now. */
df7492f9
KH
2870 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2871 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2872 continue;
4ed46869
KH
2873
2874 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2875 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2876 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2877 goto invalid_code;
2878 CODING_ISO_INVOCATION (coding, 0) = 2;
2879 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2880 continue;
4ed46869
KH
2881
2882 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2883 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2884 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2885 goto invalid_code;
2886 CODING_ISO_INVOCATION (coding, 0) = 3;
2887 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2888 continue;
4ed46869
KH
2889
2890 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2891 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2892 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2893 goto invalid_code;
2894 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2895 ONE_MORE_BYTE (c1);
e7046a18 2896 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2897 goto invalid_code;
4ed46869
KH
2898 break;
2899
2900 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2901 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2902 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2903 goto invalid_code;
2904 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2905 ONE_MORE_BYTE (c1);
e7046a18 2906 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2907 goto invalid_code;
4ed46869
KH
2908 break;
2909
ec6d2bb8 2910 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2911 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2912 goto invalid_code;
ec6d2bb8 2913 DECODE_COMPOSITION_START (c1);
b73bfc1c 2914 continue;
4ed46869 2915
ec6d2bb8 2916 case '1': /* end composition */
df7492f9
KH
2917 if (composition_state == COMPOSING_NO)
2918 goto invalid_code;
2919 DECODE_COMPOSITION_END ();
b73bfc1c 2920 continue;
4ed46869
KH
2921
2922 case '[': /* specification of direction */
df7492f9
KH
2923 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2924 goto invalid_code;
4ed46869 2925 /* For the moment, nested direction is not supported.
d46c5b12 2926 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2927 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2928 ONE_MORE_BYTE (c1);
2929 switch (c1)
2930 {
2931 case ']': /* end of the current direction */
d46c5b12 2932 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2933
2934 case '0': /* end of the current direction */
2935 case '1': /* start of left-to-right direction */
2936 ONE_MORE_BYTE (c1);
2937 if (c1 == ']')
d46c5b12 2938 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2939 else
df7492f9 2940 goto invalid_code;
4ed46869
KH
2941 break;
2942
2943 case '2': /* start of right-to-left direction */
2944 ONE_MORE_BYTE (c1);
2945 if (c1 == ']')
d46c5b12 2946 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2947 else
df7492f9 2948 goto invalid_code;
4ed46869
KH
2949 break;
2950
2951 default:
df7492f9 2952 goto invalid_code;
4ed46869 2953 }
b73bfc1c 2954 continue;
4ed46869
KH
2955
2956 default:
df7492f9
KH
2957 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2958 goto invalid_code;
4ed46869
KH
2959 if (c1 >= 0x28 && c1 <= 0x2B)
2960 { /* designation of DIMENSION1_CHARS94 character set */
2961 ONE_MORE_BYTE (c2);
df7492f9 2962 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
2963 }
2964 else if (c1 >= 0x2C && c1 <= 0x2F)
2965 { /* designation of DIMENSION1_CHARS96 character set */
2966 ONE_MORE_BYTE (c2);
df7492f9 2967 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
2968 }
2969 else
df7492f9 2970 goto invalid_code;
b73bfc1c 2971 /* We must update these variables now. */
df7492f9
KH
2972 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2973 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2974 continue;
4ed46869 2975 }
b73bfc1c 2976 }
4ed46869 2977
b73bfc1c 2978 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
2979 Produce a decoded character while getting 2nd position code
2980 C2 if necessary. */
2981 c1 &= 0x7F;
2982 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
2983 {
2984 ONE_MORE_BYTE (c2);
df7492f9 2985 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 2986 /* C2 is not in a valid range. */
df7492f9
KH
2987 goto invalid_code;
2988 c1 = (c1 << 8) | (c2 & 0x7F);
2989 if (CHARSET_DIMENSION (charset) > 2)
2990 {
2991 ONE_MORE_BYTE (c2);
2992 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
2993 /* C2 is not in a valid range. */
2994 goto invalid_code;
2995 c1 = (c1 << 8) | (c2 & 0x7F);
2996 }
2997 }
2998
2999 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3000 if (c < 0)
3001 {
3002 MAYBE_FINISH_COMPOSITION ();
3003 for (; src_base < src; src_base++, char_offset++)
3004 {
3005 if (ASCII_BYTE_P (*src_base))
3006 *charbuf++ = *src_base;
3007 else
3008 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3009 }
3010 }
3011 else if (composition_state == COMPOSING_NO)
3012 {
3013 *charbuf++ = c;
3014 char_offset++;
4ed46869 3015 }
df7492f9
KH
3016 else
3017 components[component_idx++] = c;
4ed46869
KH
3018 continue;
3019
df7492f9
KH
3020 invalid_code:
3021 MAYBE_FINISH_COMPOSITION ();
4ed46869 3022 src = src_base;
df7492f9
KH
3023 consumed_chars = consumed_chars_base;
3024 ONE_MORE_BYTE (c);
3025 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3026 coding->errors++;
4ed46869 3027 }
fb88bf2d 3028
df7492f9
KH
3029 no_more_source:
3030 coding->consumed_char += consumed_chars_base;
3031 coding->consumed = src_base - coding->source;
3032 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3033}
3034
b73bfc1c 3035
f4dee582 3036/* ISO2022 encoding stuff. */
4ed46869
KH
3037
3038/*
f4dee582 3039 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3040 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3041 variant has the following specifications:
df7492f9 3042 1. Initial designation to G0 thru G3.
4ed46869
KH
3043 2. Allows short-form designation?
3044 3. ASCII should be designated to G0 before control characters?
3045 4. ASCII should be designated to G0 at end of line?
3046 5. 7-bit environment or 8-bit environment?
3047 6. Use locking-shift?
3048 7. Use Single-shift?
3049 And the following two are only for Japanese:
3050 8. Use ASCII in place of JIS0201-1976-Roman?
3051 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3052 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3053 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3054 details.
4ed46869
KH
3055*/
3056
3057/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3058 register REG at DST, and increment DST. If <final-char> of CHARSET is
3059 '@', 'A', or 'B' and the coding system CODING allows, produce
3060 designation sequence of short-form. */
4ed46869
KH
3061
3062#define ENCODE_DESIGNATION(charset, reg, coding) \
3063 do { \
df7492f9 3064 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3065 char *intermediate_char_94 = "()*+"; \
3066 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3067 int revision = -1; \
3068 int c; \
3069 \
3070 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3071 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3072 \
3073 if (revision >= 0) \
70c22245 3074 { \
df7492f9
KH
3075 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3076 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3077 } \
df7492f9 3078 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3079 if (CHARSET_DIMENSION (charset) == 1) \
3080 { \
df7492f9
KH
3081 if (! CHARSET_ISO_CHARS_96 (charset)) \
3082 c = intermediate_char_94[reg]; \
4ed46869 3083 else \
df7492f9
KH
3084 c = intermediate_char_96[reg]; \
3085 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3086 } \
3087 else \
3088 { \
df7492f9
KH
3089 EMIT_ONE_ASCII_BYTE ('$'); \
3090 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3091 { \
df7492f9 3092 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3093 || reg != 0 \
3094 || final_char < '@' || final_char > 'B') \
df7492f9 3095 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3096 } \
3097 else \
df7492f9 3098 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3099 } \
df7492f9
KH
3100 EMIT_ONE_ASCII_BYTE (final_char); \
3101 \
3102 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3103 } while (0)
3104
df7492f9 3105
4ed46869
KH
3106/* The following two macros produce codes (control character or escape
3107 sequence) for ISO2022 single-shift functions (single-shift-2 and
3108 single-shift-3). */
3109
df7492f9
KH
3110#define ENCODE_SINGLE_SHIFT_2 \
3111 do { \
3112 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3113 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3114 else \
3115 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3116 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3117 } while (0)
3118
df7492f9
KH
3119
3120#define ENCODE_SINGLE_SHIFT_3 \
3121 do { \
3122 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3123 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3124 else \
3125 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3126 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3127 } while (0)
3128
df7492f9 3129
4ed46869
KH
3130/* The following four macros produce codes (control character or
3131 escape sequence) for ISO2022 locking-shift functions (shift-in,
3132 shift-out, locking-shift-2, and locking-shift-3). */
3133
df7492f9
KH
3134#define ENCODE_SHIFT_IN \
3135 do { \
3136 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3137 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3138 } while (0)
3139
df7492f9
KH
3140
3141#define ENCODE_SHIFT_OUT \
3142 do { \
3143 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3144 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3145 } while (0)
3146
df7492f9
KH
3147
3148#define ENCODE_LOCKING_SHIFT_2 \
3149 do { \
3150 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3151 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3152 } while (0)
3153
df7492f9
KH
3154
3155#define ENCODE_LOCKING_SHIFT_3 \
3156 do { \
3157 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3158 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3159 } while (0)
3160
df7492f9 3161
f4dee582
RS
3162/* Produce codes for a DIMENSION1 character whose character set is
3163 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3164 sequences are also produced in advance if necessary. */
3165
6e85d753
KH
3166#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3167 do { \
df7492f9
KH
3168 int id = CHARSET_ID (charset); \
3169 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3170 { \
df7492f9
KH
3171 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3172 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3173 else \
df7492f9
KH
3174 EMIT_ONE_BYTE (c1 | 0x80); \
3175 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3176 break; \
3177 } \
df7492f9 3178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3179 { \
df7492f9 3180 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3181 break; \
3182 } \
df7492f9 3183 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3184 { \
df7492f9 3185 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3186 break; \
3187 } \
6e85d753
KH
3188 else \
3189 /* Since CHARSET is not yet invoked to any graphic planes, we \
3190 must invoke it, or, at first, designate it to some graphic \
3191 register. Then repeat the loop to actually produce the \
3192 character. */ \
df7492f9
KH
3193 dst = encode_invocation_designation (charset, coding, dst, \
3194 &produced_chars); \
4ed46869
KH
3195 } while (1)
3196
df7492f9 3197
f4dee582
RS
3198/* Produce codes for a DIMENSION2 character whose character set is
3199 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3200 invocation codes are also produced in advance if necessary. */
3201
6e85d753
KH
3202#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3203 do { \
df7492f9
KH
3204 int id = CHARSET_ID (charset); \
3205 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3206 { \
df7492f9
KH
3207 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3208 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3209 else \
df7492f9
KH
3210 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3211 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3212 break; \
3213 } \
df7492f9 3214 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3215 { \
df7492f9 3216 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3217 break; \
3218 } \
df7492f9 3219 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3220 { \
df7492f9 3221 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3222 break; \
3223 } \
6e85d753
KH
3224 else \
3225 /* Since CHARSET is not yet invoked to any graphic planes, we \
3226 must invoke it, or, at first, designate it to some graphic \
3227 register. Then repeat the loop to actually produce the \
3228 character. */ \
df7492f9
KH
3229 dst = encode_invocation_designation (charset, coding, dst, \
3230 &produced_chars); \
4ed46869
KH
3231 } while (1)
3232
05e6f5dc 3233
df7492f9
KH
3234#define ENCODE_ISO_CHARACTER(charset, c) \
3235 do { \
3236 int code = ENCODE_CHAR ((charset),(c)); \
3237 \
3238 if (CHARSET_DIMENSION (charset) == 1) \
3239 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3240 else \
3241 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3242 } while (0)
bdd9fb48 3243
05e6f5dc 3244
4ed46869 3245/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3246 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3247 Return new DST. */
3248
3249unsigned char *
df7492f9
KH
3250encode_invocation_designation (charset, coding, dst, p_nchars)
3251 struct charset *charset;
4ed46869
KH
3252 struct coding_system *coding;
3253 unsigned char *dst;
df7492f9 3254 int *p_nchars;
4ed46869 3255{
df7492f9
KH
3256 int multibytep = coding->dst_multibyte;
3257 int produced_chars = *p_nchars;
4ed46869 3258 int reg; /* graphic register number */
df7492f9 3259 int id = CHARSET_ID (charset);
4ed46869
KH
3260
3261 /* At first, check designations. */
3262 for (reg = 0; reg < 4; reg++)
df7492f9 3263 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3264 break;
3265
3266 if (reg >= 4)
3267 {
3268 /* CHARSET is not yet designated to any graphic registers. */
3269 /* At first check the requested designation. */
df7492f9
KH
3270 reg = CODING_ISO_REQUEST (coding, id);
3271 if (reg < 0)
1ba9e4ab
KH
3272 /* Since CHARSET requests no special designation, designate it
3273 to graphic register 0. */
4ed46869
KH
3274 reg = 0;
3275
3276 ENCODE_DESIGNATION (charset, reg, coding);
3277 }
3278
df7492f9
KH
3279 if (CODING_ISO_INVOCATION (coding, 0) != reg
3280 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3281 {
3282 /* Since the graphic register REG is not invoked to any graphic
3283 planes, invoke it to graphic plane 0. */
3284 switch (reg)
3285 {
3286 case 0: /* graphic register 0 */
3287 ENCODE_SHIFT_IN;
3288 break;
3289
3290 case 1: /* graphic register 1 */
3291 ENCODE_SHIFT_OUT;
3292 break;
3293
3294 case 2: /* graphic register 2 */
df7492f9 3295 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3296 ENCODE_SINGLE_SHIFT_2;
3297 else
3298 ENCODE_LOCKING_SHIFT_2;
3299 break;
3300
3301 case 3: /* graphic register 3 */
df7492f9 3302 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3303 ENCODE_SINGLE_SHIFT_3;
3304 else
3305 ENCODE_LOCKING_SHIFT_3;
3306 break;
3307 }
3308 }
b73bfc1c 3309
df7492f9 3310 *p_nchars = produced_chars;
4ed46869
KH
3311 return dst;
3312}
3313
df7492f9
KH
3314/* The following three macros produce codes for indicating direction
3315 of text. */
3316#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3317 do { \
df7492f9
KH
3318 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3319 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3320 else \
df7492f9 3321 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3322 } while (0)
3323
ec6d2bb8 3324
df7492f9
KH
3325#define ENCODE_DIRECTION_R2L() \
3326 do { \
3327 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3328 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3329 } while (0)
3330
ec6d2bb8 3331
df7492f9 3332#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3333 do { \
df7492f9
KH
3334 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3335 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3336 } while (0)
3337
4ed46869
KH
3338
3339/* Produce codes for designation and invocation to reset the graphic
3340 planes and registers to initial state. */
df7492f9
KH
3341#define ENCODE_RESET_PLANE_AND_REGISTER() \
3342 do { \
3343 int reg; \
3344 struct charset *charset; \
3345 \
3346 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3347 ENCODE_SHIFT_IN; \
3348 for (reg = 0; reg < 4; reg++) \
3349 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3350 && (CODING_ISO_DESIGNATION (coding, reg) \
3351 != CODING_ISO_INITIAL (coding, reg))) \
3352 { \
3353 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3354 ENCODE_DESIGNATION (charset, reg, coding); \
3355 } \
4ed46869
KH
3356 } while (0)
3357
df7492f9 3358
bdd9fb48 3359/* Produce designation sequences of charsets in the line started from
b73bfc1c 3360 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3361
3362 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3363 find all the necessary designations. */
3364
b73bfc1c 3365static unsigned char *
df7492f9 3366encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3367 struct coding_system *coding;
df7492f9
KH
3368 int *charbuf, *charbuf_end;
3369 unsigned char *dst;
e0e989f6 3370{
df7492f9 3371 struct charset *charset;
bdd9fb48
KH
3372 /* Table of charsets to be designated to each graphic register. */
3373 int r[4];
df7492f9
KH
3374 int c, found = 0, reg;
3375 int produced_chars = 0;
3376 int multibytep = coding->dst_multibyte;
3377 Lisp_Object attrs;
3378 Lisp_Object charset_list;
3379
3380 attrs = CODING_ID_ATTRS (coding->id);
3381 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3382 if (EQ (charset_list, Qiso_2022))
3383 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3384
3385 for (reg = 0; reg < 4; reg++)
3386 r[reg] = -1;
3387
b73bfc1c 3388 while (found < 4)
e0e989f6 3389 {
df7492f9
KH
3390 int id;
3391
3392 c = *charbuf++;
b73bfc1c
KH
3393 if (c == '\n')
3394 break;
df7492f9
KH
3395 charset = char_charset (c, charset_list, NULL);
3396 id = CHARSET_ID (charset);
3397 reg = CODING_ISO_REQUEST (coding, id);
3398 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3399 {
3400 found++;
df7492f9 3401 r[reg] = id;
bdd9fb48 3402 }
bdd9fb48
KH
3403 }
3404
3405 if (found)
3406 {
3407 for (reg = 0; reg < 4; reg++)
3408 if (r[reg] >= 0
df7492f9
KH
3409 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3410 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3411 }
b73bfc1c
KH
3412
3413 return dst;
e0e989f6
KH
3414}
3415
4ed46869
KH
3416/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3417
df7492f9
KH
3418static int
3419encode_coding_iso_2022 (coding)
4ed46869 3420 struct coding_system *coding;
4ed46869 3421{
df7492f9
KH
3422 int multibytep = coding->dst_multibyte;
3423 int *charbuf = coding->charbuf;
3424 int *charbuf_end = charbuf + coding->charbuf_used;
3425 unsigned char *dst = coding->destination + coding->produced;
3426 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3427 int safe_room = 16;
3428 int bol_designation
3429 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3430 && CODING_ISO_BOL (coding));
3431 int produced_chars = 0;
3432 Lisp_Object attrs, eol_type, charset_list;
3433 int ascii_compatible;
b73bfc1c 3434 int c;
05e6f5dc 3435
df7492f9 3436 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
bdd9fb48 3437
df7492f9 3438 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3439
df7492f9 3440 while (charbuf < charbuf_end)
4ed46869 3441 {
df7492f9 3442 ASSURE_DESTINATION (safe_room);
b73bfc1c 3443
df7492f9 3444 if (bol_designation)
b73bfc1c 3445 {
df7492f9 3446 unsigned char *dst_prev = dst;
4ed46869 3447
bdd9fb48 3448 /* We have to produce designation sequences if any now. */
df7492f9
KH
3449 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3450 bol_designation = 0;
3451 /* We are sure that designation sequences are all ASCII bytes. */
3452 produced_chars += dst - dst_prev;
4ed46869 3453 }
ec6d2bb8 3454
df7492f9 3455 c = *charbuf++;
4ed46869 3456
b73bfc1c
KH
3457 /* Now encode the character C. */
3458 if (c < 0x20 || c == 0x7F)
3459 {
df7492f9
KH
3460 if (c == '\n'
3461 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3462 {
df7492f9
KH
3463 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3464 ENCODE_RESET_PLANE_AND_REGISTER ();
3465 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3466 {
df7492f9
KH
3467 int i;
3468
3469 for (i = 0; i < 4; i++)
3470 CODING_ISO_DESIGNATION (coding, i)
3471 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3472 }
df7492f9
KH
3473 bol_designation
3474 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3475 }
df7492f9
KH
3476 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3477 ENCODE_RESET_PLANE_AND_REGISTER ();
3478 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3479 }
df7492f9 3480 else if (ASCII_CHAR_P (c))
88993dfd 3481 {
df7492f9
KH
3482 if (ascii_compatible)
3483 EMIT_ONE_ASCII_BYTE (c);
3484 else
3485 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii), c);
88993dfd 3486 }
b73bfc1c 3487 else
df7492f9
KH
3488 {
3489 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3490
df7492f9
KH
3491 if (!charset)
3492 {
3493 c = coding->default_char;
3494 charset = char_charset (c, charset_list, NULL);
3495 }
3496 ENCODE_ISO_CHARACTER (charset, c);
3497 }
84fbb8a0 3498 }
b73bfc1c 3499
df7492f9
KH
3500 if (coding->mode & CODING_MODE_LAST_BLOCK
3501 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3502 {
3503 ASSURE_DESTINATION (safe_room);
3504 ENCODE_RESET_PLANE_AND_REGISTER ();
3505 }
3506 coding->result = CODING_RESULT_SUCCESS;
3507 CODING_ISO_BOL (coding) = bol_designation;
3508 coding->produced_char += produced_chars;
3509 coding->produced = dst - coding->destination;
3510 return 0;
4ed46869
KH
3511}
3512
3513\f
df7492f9 3514/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3515
df7492f9 3516/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3517 quite widely. So, for the moment, Emacs supports them in the bare
3518 C code. But, in the future, they may be supported only by CCL. */
3519
3520/* SJIS is a coding system encoding three character sets: ASCII, right
3521 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3522 as is. A character of charset katakana-jisx0201 is encoded by
3523 "position-code + 0x80". A character of charset japanese-jisx0208
3524 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3525 so that it fit in the range below.
4ed46869
KH
3526
3527 --- CODE RANGE of SJIS ---
3528 (character set) (range)
3529 ASCII 0x00 .. 0x7F
df7492f9 3530 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3531 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3532 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3533 -------------------------------
3534
3535*/
3536
3537/* BIG5 is a coding system encoding two character sets: ASCII and
3538 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3539 character set and is encoded in two-byte.
4ed46869
KH
3540
3541 --- CODE RANGE of BIG5 ---
3542 (character set) (range)
3543 ASCII 0x00 .. 0x7F
3544 Big5 (1st byte) 0xA1 .. 0xFE
3545 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3546 --------------------------
3547
df7492f9 3548 */
4ed46869
KH
3549
3550/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3551 Check if a text is encoded in SJIS. If it is, return
df7492f9 3552 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3553
0a28aafb 3554static int
df7492f9
KH
3555detect_coding_sjis (coding, mask)
3556 struct coding_system *coding;
3557 int *mask;
4ed46869 3558{
df7492f9
KH
3559 unsigned char *src = coding->source, *src_base = src;
3560 unsigned char *src_end = coding->source + coding->src_bytes;
3561 int multibytep = coding->src_multibyte;
3562 int consumed_chars = 0;
3563 int found = 0;
b73bfc1c 3564 int c;
df7492f9
KH
3565
3566 /* A coding system of this category is always ASCII compatible. */
3567 src += coding->head_ascii;
4ed46869 3568
b73bfc1c 3569 while (1)
4ed46869 3570 {
df7492f9 3571 ONE_MORE_BYTE (c);
682169fe
KH
3572 if (c < 0x80)
3573 continue;
df7492f9 3574 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3575 {
df7492f9 3576 ONE_MORE_BYTE (c);
682169fe 3577 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3578 break;
3579 found = 1;
4ed46869 3580 }
df7492f9
KH
3581 else if (c >= 0xA0 && c < 0xE0)
3582 found = 1;
3583 else
3584 break;
4ed46869 3585 }
df7492f9
KH
3586 *mask &= ~CATEGORY_MASK_SJIS;
3587 return 0;
3588
3589 no_more_source:
3590 if (!found)
3591 return 0;
3592 *mask &= CATEGORY_MASK_SJIS;
3593 return 1;
4ed46869
KH
3594}
3595
3596/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3597 Check if a text is encoded in BIG5. If it is, return
df7492f9 3598 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3599
0a28aafb 3600static int
df7492f9
KH
3601detect_coding_big5 (coding, mask)
3602 struct coding_system *coding;
3603 int *mask;
4ed46869 3604{
df7492f9
KH
3605 unsigned char *src = coding->source, *src_base = src;
3606 unsigned char *src_end = coding->source + coding->src_bytes;
3607 int multibytep = coding->src_multibyte;
3608 int consumed_chars = 0;
3609 int found = 0;
b73bfc1c 3610 int c;
fa42c37f 3611
df7492f9
KH
3612 /* A coding system of this category is always ASCII compatible. */
3613 src += coding->head_ascii;
fa42c37f 3614
b73bfc1c 3615 while (1)
fa42c37f 3616 {
df7492f9
KH
3617 ONE_MORE_BYTE (c);
3618 if (c < 0x80)
fa42c37f 3619 continue;
df7492f9 3620 if (c >= 0xA1)
fa42c37f 3621 {
df7492f9
KH
3622 ONE_MORE_BYTE (c);
3623 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3624 return 0;
df7492f9 3625 found = 1;
fa42c37f 3626 }
df7492f9
KH
3627 else
3628 break;
fa42c37f 3629 }
df7492f9 3630 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3631 return 0;
df7492f9
KH
3632
3633 no_more_source:
3634 if (!found)
3635 return 0;
3636 *mask &= CATEGORY_MASK_BIG5;
3637 return 1;
fa42c37f
KH
3638}
3639
4ed46869
KH
3640/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3641 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3642
b73bfc1c 3643static void
df7492f9 3644decode_coding_sjis (coding)
4ed46869 3645 struct coding_system *coding;
4ed46869 3646{
df7492f9
KH
3647 unsigned char *src = coding->source + coding->consumed;
3648 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3649 unsigned char *src_base;
df7492f9
KH
3650 int *charbuf = coding->charbuf;
3651 int *charbuf_end = charbuf + coding->charbuf_size;
3652 int consumed_chars = 0, consumed_chars_base;
3653 int multibytep = coding->src_multibyte;
3654 struct charset *charset_roman, *charset_kanji, *charset_kana;
3655 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3656
df7492f9
KH
3657 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3658
3659 val = charset_list;
3660 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3661 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3662 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3663
b73bfc1c 3664 while (1)
4ed46869 3665 {
df7492f9 3666 int c, c1;
b73bfc1c
KH
3667
3668 src_base = src;
df7492f9
KH
3669 consumed_chars_base = consumed_chars;
3670
3671 if (charbuf >= charbuf_end)
3672 break;
3673
3674 ONE_MORE_BYTE (c);
b73bfc1c 3675
df7492f9 3676 if (c == '\r')
4ed46869 3677 {
df7492f9 3678 if (EQ (eol_type, Qdos))
4ed46869 3679 {
df7492f9
KH
3680 if (src == src_end)
3681 goto no_more_source;
3682 if (*src == '\n')
3683 ONE_MORE_BYTE (c);
4ed46869 3684 }
df7492f9
KH
3685 else if (EQ (eol_type, Qmac))
3686 c = '\n';
4ed46869 3687 }
54f78171 3688 else
df7492f9
KH
3689 {
3690 struct charset *charset;
3691
3692 if (c < 0x80)
3693 charset = charset_roman;
3694 else
4ed46869 3695 {
df7492f9
KH
3696 if (c >= 0xF0)
3697 goto invalid_code;
3698 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3699 {
54f78171 3700 /* SJIS -> JISX0208 */
df7492f9
KH
3701 ONE_MORE_BYTE (c1);
3702 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3703 goto invalid_code;
3704 c = (c << 8) | c1;
3705 SJIS_TO_JIS (c);
3706 charset = charset_kanji;
5e34de15 3707 }
fb88bf2d 3708 else
b73bfc1c 3709 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3710 charset = charset_kana;
3711 }
3712 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3713 }
3714 *charbuf++ = c;
3715 continue;
3716
3717 invalid_code:
3718 src = src_base;
3719 consumed_chars = consumed_chars_base;
3720 ONE_MORE_BYTE (c);
3721 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3722 coding->errors++;
3723 }
3724
3725 no_more_source:
3726 coding->consumed_char += consumed_chars_base;
3727 coding->consumed = src_base - coding->source;
3728 coding->charbuf_used = charbuf - coding->charbuf;
3729}
3730
3731static void
3732decode_coding_big5 (coding)
3733 struct coding_system *coding;
3734{
3735 unsigned char *src = coding->source + coding->consumed;
3736 unsigned char *src_end = coding->source + coding->src_bytes;
3737 unsigned char *src_base;
3738 int *charbuf = coding->charbuf;
3739 int *charbuf_end = charbuf + coding->charbuf_size;
3740 int consumed_chars = 0, consumed_chars_base;
3741 int multibytep = coding->src_multibyte;
3742 struct charset *charset_roman, *charset_big5;
3743 Lisp_Object attrs, eol_type, charset_list, val;
3744
3745 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3746 val = charset_list;
3747 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3748 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3749
3750 while (1)
3751 {
3752 int c, c1;
3753
3754 src_base = src;
3755 consumed_chars_base = consumed_chars;
3756
3757 if (charbuf >= charbuf_end)
3758 break;
3759
3760 ONE_MORE_BYTE (c);
3761
3762 if (c == '\r')
3763 {
3764 if (EQ (eol_type, Qdos))
3765 {
3766 if (src == src_end)
3767 goto no_more_source;
3768 if (*src == '\n')
3769 ONE_MORE_BYTE (c);
4ed46869 3770 }
df7492f9
KH
3771 else if (EQ (eol_type, Qmac))
3772 c = '\n';
3773 }
3774 else
3775 {
3776 struct charset *charset;
3777 if (c < 0x80)
3778 charset = charset_roman;
fb88bf2d 3779 else
fb88bf2d 3780 {
54f78171 3781 /* BIG5 -> Big5 */
df7492f9
KH
3782 if (c < 0xA1 || c > 0xFE)
3783 goto invalid_code;
3784 ONE_MORE_BYTE (c1);
3785 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3786 goto invalid_code;
3787 c = c << 8 | c1;
3788 charset = charset_big5;
4ed46869 3789 }
df7492f9 3790 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3791 }
4ed46869 3792
df7492f9 3793 *charbuf++ = c;
fb88bf2d
KH
3794 continue;
3795
df7492f9 3796 invalid_code:
4ed46869 3797 src = src_base;
df7492f9
KH
3798 consumed_chars = consumed_chars_base;
3799 ONE_MORE_BYTE (c);
3800 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3801 coding->errors++;
fb88bf2d 3802 }
d46c5b12 3803
df7492f9
KH
3804 no_more_source:
3805 coding->consumed_char += consumed_chars_base;
3806 coding->consumed = src_base - coding->source;
3807 coding->charbuf_used = charbuf - coding->charbuf;
3808}
3809
3810/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3811 This function can encode charsets `ascii', `katakana-jisx0201',
3812 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3813 are sure that all these charsets are registered as official charset
3814 (i.e. do not have extended leading-codes). Characters of other
3815 charsets are produced without any encoding. If SJIS_P is 1, encode
3816 SJIS text, else encode BIG5 text. */
3817
3818static int
3819encode_coding_sjis (coding)
3820 struct coding_system *coding;
3821{
3822 int multibytep = coding->dst_multibyte;
3823 int *charbuf = coding->charbuf;
3824 int *charbuf_end = charbuf + coding->charbuf_used;
3825 unsigned char *dst = coding->destination + coding->produced;
3826 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3827 int safe_room = 4;
3828 int produced_chars = 0;
3829 Lisp_Object attrs, eol_type, charset_list, val;
3830 int ascii_compatible;
3831 struct charset *charset_roman, *charset_kanji, *charset_kana;
3832 int c;
3833
3834 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3835 val = charset_list;
3836 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3837 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3838 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3839
3840 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3841
3842 while (charbuf < charbuf_end)
3843 {
3844 ASSURE_DESTINATION (safe_room);
3845 c = *charbuf++;
3846 /* Now encode the character C. */
3847 if (ASCII_CHAR_P (c) && ascii_compatible)
3848 EMIT_ONE_ASCII_BYTE (c);
3849 else
3850 {
3851 unsigned code;
3852 struct charset *charset = char_charset (c, charset_list, &code);
3853
3854 if (!charset)
3855 {
3856 c = coding->default_char;
3857 charset = char_charset (c, charset_list, &code);
3858 }
3859 if (code == CHARSET_INVALID_CODE (charset))
3860 abort ();
3861 if (charset == charset_kanji)
3862 {
3863 int c1, c2;
3864 JIS_TO_SJIS (code);
3865 c1 = code >> 8, c2 = code & 0xFF;
3866 EMIT_TWO_BYTES (c1, c2);
3867 }
3868 else if (charset == charset_kana)
3869 EMIT_ONE_BYTE (code | 0x80);
3870 else
3871 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3872 }
3873 }
3874 coding->result = CODING_RESULT_SUCCESS;
3875 coding->produced_char += produced_chars;
3876 coding->produced = dst - coding->destination;
3877 return 0;
3878}
3879
3880static int
3881encode_coding_big5 (coding)
3882 struct coding_system *coding;
3883{
3884 int multibytep = coding->dst_multibyte;
3885 int *charbuf = coding->charbuf;
3886 int *charbuf_end = charbuf + coding->charbuf_used;
3887 unsigned char *dst = coding->destination + coding->produced;
3888 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3889 int safe_room = 4;
3890 int produced_chars = 0;
3891 Lisp_Object attrs, eol_type, charset_list, val;
3892 int ascii_compatible;
3893 struct charset *charset_roman, *charset_big5;
3894 int c;
3895
3896 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3897 val = charset_list;
3898 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3899 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3900 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3901
3902 while (charbuf < charbuf_end)
3903 {
3904 ASSURE_DESTINATION (safe_room);
3905 c = *charbuf++;
3906 /* Now encode the character C. */
3907 if (ASCII_CHAR_P (c) && ascii_compatible)
3908 EMIT_ONE_ASCII_BYTE (c);
3909 else
3910 {
3911 unsigned code;
3912 struct charset *charset = char_charset (c, charset_list, &code);
3913
3914 if (! charset)
3915 {
3916 c = coding->default_char;
3917 charset = char_charset (c, charset_list, &code);
3918 }
3919 if (code == CHARSET_INVALID_CODE (charset))
3920 abort ();
3921 if (charset == charset_big5)
3922 {
3923 int c1, c2;
3924
3925 c1 = code >> 8, c2 = code & 0xFF;
3926 EMIT_TWO_BYTES (c1, c2);
3927 }
3928 else
3929 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3930 }
3931 }
3932 coding->result = CODING_RESULT_SUCCESS;
3933 coding->produced_char += produced_chars;
3934 coding->produced = dst - coding->destination;
3935 return 0;
3936}
3937
3938\f
3939/*** 10. CCL handlers ***/
3940
3941/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3942 Check if a text is encoded in a coding system of which
3943 encoder/decoder are written in CCL program. If it is, return
3944 CATEGORY_MASK_CCL, else return 0. */
3945
3946static int
3947detect_coding_ccl (coding, mask)
3948 struct coding_system *coding;
3949 int *mask;
3950{
3951 unsigned char *src = coding->source, *src_base = src;
3952 unsigned char *src_end = coding->source + coding->src_bytes;
3953 int multibytep = coding->src_multibyte;
3954 int consumed_chars = 0;
3955 int found = 0;
3956 unsigned char *valids = CODING_CCL_VALIDS (coding);
3957 int head_ascii = coding->head_ascii;
3958 Lisp_Object attrs;
3959
3960 coding = &coding_categories[coding_category_ccl];
3961 attrs = CODING_ID_ATTRS (coding->id);
3962 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
3963 src += head_ascii;
3964
3965 while (1)
3966 {
3967 int c;
3968 ONE_MORE_BYTE (c);
3969 if (! valids[c])
3970 break;
3971 if (!found && valids[c] > 1)
3972 found = 1;
3973 }
3974 *mask &= ~CATEGORY_MASK_CCL;
3975 return 0;
3976
3977 no_more_source:
3978 if (!found)
3979 return 0;
3980 *mask &= CATEGORY_MASK_CCL;
3981 return 1;
3982}
3983
3984static void
3985decode_coding_ccl (coding)
3986 struct coding_system *coding;
3987{
3988 unsigned char *src = coding->source + coding->consumed;
3989 unsigned char *src_end = coding->source + coding->src_bytes;
3990 int *charbuf = coding->charbuf;
3991 int *charbuf_end = charbuf + coding->charbuf_size;
3992 int consumed_chars = 0;
3993 int multibytep = coding->src_multibyte;
3994 struct ccl_program ccl;
3995 int source_charbuf[1024];
3996 int source_byteidx[1024];
3997
3998 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
3999
4000 while (src < src_end)
4001 {
4002 unsigned char *p = src;
4003 int *source, *source_end;
4004 int i = 0;
4005
4006 if (multibytep)
4007 while (i < 1024 && p < src_end)
4008 {
4009 source_byteidx[i] = p - src;
4010 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4011 }
4012 else
4013 while (i < 1024 && p < src_end)
4014 source_charbuf[i++] = *p++;
4015
4016 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4017 ccl.last_block = 1;
4018
4019 source = source_charbuf;
4020 source_end = source + i;
4021 while (source < source_end)
4022 {
4023 ccl_driver (&ccl, source, charbuf,
4024 source_end - source, charbuf_end - charbuf);
4025 source += ccl.consumed;
4026 charbuf += ccl.produced;
4027 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4028 break;
4029 }
4030 if (source < source_end)
4031 src += source_byteidx[source - source_charbuf];
4032 else
4033 src = p;
4034 consumed_chars += source - source_charbuf;
4035
4036 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4037 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4038 break;
4039 }
4040
4041 switch (ccl.status)
4042 {
4043 case CCL_STAT_SUSPEND_BY_SRC:
4044 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4045 break;
4046 case CCL_STAT_SUSPEND_BY_DST:
4047 break;
4048 case CCL_STAT_QUIT:
4049 case CCL_STAT_INVALID_CMD:
4050 coding->result = CODING_RESULT_INTERRUPT;
4051 break;
4052 default:
4053 coding->result = CODING_RESULT_SUCCESS;
4054 break;
4055 }
4056 coding->consumed_char += consumed_chars;
4057 coding->consumed = src - coding->source;
4058 coding->charbuf_used = charbuf - coding->charbuf;
4059}
4060
4061static int
4062encode_coding_ccl (coding)
4063 struct coding_system *coding;
4064{
4065 struct ccl_program ccl;
4066 int multibytep = coding->dst_multibyte;
4067 int *charbuf = coding->charbuf;
4068 int *charbuf_end = charbuf + coding->charbuf_used;
4069 unsigned char *dst = coding->destination + coding->produced;
4070 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4071 unsigned char *adjusted_dst_end = dst_end - 1;
4072 int destination_charbuf[1024];
4073 int i, produced_chars = 0;
4074
4075 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4076
4077 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4078 ccl.dst_multibyte = coding->dst_multibyte;
4079
4080 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4081 {
4082 int dst_bytes = dst_end - dst;
4083 if (dst_bytes > 1024)
4084 dst_bytes = 1024;
4085
4086 ccl_driver (&ccl, charbuf, destination_charbuf,
4087 charbuf_end - charbuf, dst_bytes);
4088 charbuf += ccl.consumed;
4089 if (multibytep)
4090 for (i = 0; i < ccl.produced; i++)
4091 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4092 else
4093 {
4094 for (i = 0; i < ccl.produced; i++)
4095 *dst++ = destination_charbuf[i] & 0xFF;
4096 produced_chars += ccl.produced;
4097 }
4098 }
4099
4100 switch (ccl.status)
4101 {
4102 case CCL_STAT_SUSPEND_BY_SRC:
4103 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4104 break;
4105 case CCL_STAT_SUSPEND_BY_DST:
4106 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4107 break;
4108 case CCL_STAT_QUIT:
4109 case CCL_STAT_INVALID_CMD:
4110 coding->result = CODING_RESULT_INTERRUPT;
4111 break;
4112 default:
4113 coding->result = CODING_RESULT_SUCCESS;
4114 break;
4115 }
4116
4117 coding->produced_char += produced_chars;
4118 coding->produced = dst - coding->destination;
4119 return 0;
4ed46869
KH
4120}
4121
df7492f9
KH
4122
4123\f
4124/*** 10, 11. no-conversion handlers ***/
4125
4126/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4127
b73bfc1c 4128static void
df7492f9 4129decode_coding_raw_text (coding)
4ed46869 4130 struct coding_system *coding;
4ed46869 4131{
df7492f9 4132 coding->chars_at_source = 1;
2c78b7e1
KH
4133 coding->consumed_char = 0;
4134 coding->consumed = 0;
df7492f9
KH
4135 coding->result = CODING_RESULT_SUCCESS;
4136}
4ed46869 4137
df7492f9
KH
4138static int
4139encode_coding_raw_text (coding)
4140 struct coding_system *coding;
4141{
4142 int multibytep = coding->dst_multibyte;
4143 int *charbuf = coding->charbuf;
4144 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4145 unsigned char *dst = coding->destination + coding->produced;
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147 int produced_chars = 0;
4148 int c;
a5d301df 4149
df7492f9 4150 if (multibytep)
b73bfc1c 4151 {
df7492f9 4152 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4153
df7492f9
KH
4154 if (coding->src_multibyte)
4155 while (charbuf < charbuf_end)
4156 {
4157 ASSURE_DESTINATION (safe_room);
4158 c = *charbuf++;
4159 if (ASCII_CHAR_P (c))
4160 EMIT_ONE_ASCII_BYTE (c);
4161 else if (CHAR_BYTE8_P (c))
4162 {
4163 c = CHAR_TO_BYTE8 (c);
4164 EMIT_ONE_BYTE (c);
4165 }
4166 else
4167 {
4168 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4169
df7492f9
KH
4170 CHAR_STRING_ADVANCE (c, p1);
4171 while (p0 < p1)
4172 EMIT_ONE_BYTE (*p0);
4173 }
4174 }
b73bfc1c 4175 else
df7492f9
KH
4176 while (charbuf < charbuf_end)
4177 {
4178 ASSURE_DESTINATION (safe_room);
4179 c = *charbuf++;
4180 EMIT_ONE_BYTE (c);
4181 }
4182 }
4183 else
4184 {
4185 if (coding->src_multibyte)
b73bfc1c 4186 {
df7492f9
KH
4187 int safe_room = MAX_MULTIBYTE_LENGTH;
4188
4189 while (charbuf < charbuf_end)
b73bfc1c 4190 {
df7492f9
KH
4191 ASSURE_DESTINATION (safe_room);
4192 c = *charbuf++;
4193 if (ASCII_CHAR_P (c))
4194 *dst++ = c;
4195 else if (CHAR_BYTE8_P (c))
4196 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4197 else
df7492f9
KH
4198 CHAR_STRING_ADVANCE (c, dst);
4199 produced_chars++;
b73bfc1c 4200 }
4ed46869 4201 }
df7492f9
KH
4202 else
4203 {
4204 ASSURE_DESTINATION (charbuf_end - charbuf);
4205 while (charbuf < charbuf_end && dst < dst_end)
4206 *dst++ = *charbuf++;
4207 produced_chars = dst - (coding->destination + coding->dst_bytes);
4208 }
4ed46869 4209 }
df7492f9
KH
4210 coding->result = CODING_RESULT_SUCCESS;
4211 coding->produced_char += produced_chars;
4212 coding->produced = dst - coding->destination;
4213 return 0;
4ed46869
KH
4214}
4215
0a28aafb 4216static int
df7492f9
KH
4217detect_coding_charset (coding, mask)
4218 struct coding_system *coding;
4219 int *mask;
1397dc18 4220{
df7492f9
KH
4221 unsigned char *src = coding->source, *src_base = src;
4222 unsigned char *src_end = coding->source + coding->src_bytes;
4223 int multibytep = coding->src_multibyte;
4224 int consumed_chars = 0;
4225 Lisp_Object attrs, valids;
1397dc18 4226
df7492f9
KH
4227 coding = &coding_categories[coding_category_charset];
4228 attrs = CODING_ID_ATTRS (coding->id);
4229 valids = AREF (attrs, coding_attr_charset_valids);
4230
4231 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4232 src += coding->head_ascii;
1397dc18 4233
b73bfc1c 4234 while (1)
1397dc18 4235 {
df7492f9 4236 int c;
1397dc18 4237
df7492f9
KH
4238 ONE_MORE_BYTE (c);
4239 if (NILP (AREF (valids, c)))
4240 break;
4241 }
4242 *mask &= ~CATEGORY_MASK_CHARSET;
4243 return 0;
4ed46869 4244
df7492f9
KH
4245 no_more_source:
4246 *mask &= CATEGORY_MASK_CHARSET;
4247 return 1;
4248}
4ed46869 4249
b73bfc1c 4250static void
df7492f9 4251decode_coding_charset (coding)
4ed46869 4252 struct coding_system *coding;
4ed46869 4253{
df7492f9
KH
4254 unsigned char *src = coding->source + coding->consumed;
4255 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4256 unsigned char *src_base;
df7492f9
KH
4257 int *charbuf = coding->charbuf;
4258 int *charbuf_end = charbuf + coding->charbuf_size;
4259 int consumed_chars = 0, consumed_chars_base;
4260 int multibytep = coding->src_multibyte;
4eb6d3f1 4261 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4262
4263 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4264 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4265
df7492f9 4266 while (1)
4ed46869 4267 {
4eb6d3f1 4268 int c;
df7492f9
KH
4269
4270 src_base = src;
4271 consumed_chars_base = consumed_chars;
b73bfc1c 4272
df7492f9
KH
4273 if (charbuf >= charbuf_end)
4274 break;
4275
4eb6d3f1 4276 ONE_MORE_BYTE (c);
df7492f9 4277 if (c == '\r')
d46c5b12 4278 {
df7492f9 4279 if (EQ (eol_type, Qdos))
b73bfc1c 4280 {
4eb6d3f1
KH
4281 if (src < src_end
4282 && *src == '\n')
df7492f9 4283 ONE_MORE_BYTE (c);
b73bfc1c 4284 }
df7492f9 4285 else if (EQ (eol_type, Qmac))
b73bfc1c 4286 c = '\n';
d46c5b12 4287 }
df7492f9 4288 else
d46c5b12 4289 {
4eb6d3f1
KH
4290 Lisp_Object val;
4291 struct charset *charset;
4292 int c1;
4293
4294 val = AREF (valids, c);
4295 if (NILP (val))
4296 goto invalid_code;
4297 charset = CHARSET_FROM_ID (XFASTINT (val));
4298 if (CHARSET_DIMENSION (charset) > 1)
4299 {
4300 ONE_MORE_BYTE (c1);
4301 c = (c << 8) | c1;
4302 if (CHARSET_DIMENSION (charset) > 2)
4303 {
4304 ONE_MORE_BYTE (c1);
4305 c = (c << 8) | c1;
4306 if (CHARSET_DIMENSION (charset) > 3)
4307 {
4308 ONE_MORE_BYTE (c1);
4309 c = (c << 8) | c1;
4310 }
4311 }
4312 }
4313 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
df7492f9
KH
4314 if (c < 0)
4315 goto invalid_code;
d46c5b12 4316 }
df7492f9
KH
4317 *charbuf++ = c;
4318 continue;
4319
4320 invalid_code:
4321 src = src_base;
4322 consumed_chars = consumed_chars_base;
4323 ONE_MORE_BYTE (c);
4324 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4325 coding->errors++;
4ed46869
KH
4326 }
4327
df7492f9
KH
4328 no_more_source:
4329 coding->consumed_char += consumed_chars_base;
4330 coding->consumed = src_base - coding->source;
4331 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4332}
4333
df7492f9
KH
4334static int
4335encode_coding_charset (coding)
4ed46869 4336 struct coding_system *coding;
4ed46869 4337{
df7492f9
KH
4338 int multibytep = coding->dst_multibyte;
4339 int *charbuf = coding->charbuf;
4340 int *charbuf_end = charbuf + coding->charbuf_used;
4341 unsigned char *dst = coding->destination + coding->produced;
4342 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4343 int safe_room = MAX_MULTIBYTE_LENGTH;
4344 int produced_chars = 0;
4345 struct charset *charset;
4346 Lisp_Object attrs, eol_type, charset_list;
4347 int ascii_compatible;
b73bfc1c 4348 int c;
b73bfc1c 4349
df7492f9 4350 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4351 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4352
df7492f9 4353 while (charbuf < charbuf_end)
4ed46869 4354 {
4eb6d3f1 4355 struct charset *charset;
df7492f9
KH
4356 unsigned code;
4357
4358 ASSURE_DESTINATION (safe_room);
4359 c = *charbuf++;
4360 if (ascii_compatible && ASCII_CHAR_P (c))
4361 EMIT_ONE_ASCII_BYTE (c);
d46c5b12 4362 else
4eb6d3f1
KH
4363 {
4364 charset = char_charset (c, charset_list, &code);
4365 if (charset)
4366 {
4367 if (CHARSET_DIMENSION (charset) == 1)
4368 EMIT_ONE_BYTE (code);
4369 else if (CHARSET_DIMENSION (charset) == 2)
4370 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4371 else if (CHARSET_DIMENSION (charset) == 3)
4372 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4373 else
4374 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4375 (code >> 8) & 0xFF, code & 0xFF);
4376 }
4377 else
4378 EMIT_ONE_BYTE (coding->default_char);
4379 }
4ed46869
KH
4380 }
4381
df7492f9
KH
4382 coding->result = CODING_RESULT_SUCCESS;
4383 coding->produced_char += produced_chars;
4384 coding->produced = dst - coding->destination;
4385 return 0;
4ed46869
KH
4386}
4387
4388\f
1397dc18 4389/*** 7. C library functions ***/
4ed46869 4390
df7492f9 4391/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4ed46869 4392 has a property `coding-system'. The value of this property is a
df7492f9 4393 vector of length 5 (called as coding-vector). Among elements of
4ed46869
KH
4394 this vector, the first (element[0]) and the fifth (element[4])
4395 carry important information for decoding/encoding. Before
4396 decoding/encoding, this information should be set in fields of a
4397 structure of type `coding_system'.
4398
df7492f9 4399 A value of property `coding-system' can be a symbol of another
4ed46869
KH
4400 subsidiary coding-system. In that case, Emacs gets coding-vector
4401 from that symbol.
4402
4403 `element[0]' contains information to be set in `coding->type'. The
4404 value and its meaning is as follows:
4405
0ef69138
KH
4406 0 -- coding_type_emacs_mule
4407 1 -- coding_type_sjis
df7492f9 4408 2 -- coding_type_iso_2022
0ef69138
KH
4409 3 -- coding_type_big5
4410 4 -- coding_type_ccl encoder/decoder written in CCL
4411 nil -- coding_type_no_conversion
4412 t -- coding_type_undecided (automatic conversion on decoding,
4413 no-conversion on encoding)
4ed46869
KH
4414
4415 `element[4]' contains information to be set in `coding->flags' and
4416 `coding->spec'. The meaning varies by `coding->type'.
4417
df7492f9 4418 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4ed46869
KH
4419 of length 32 (of which the first 13 sub-elements are used now).
4420 Meanings of these sub-elements are:
4421
df7492f9
KH
4422 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4423 If the value is an integer of valid charset, the charset is
4424 assumed to be designated to graphic register N initially.
4ed46869 4425
df7492f9
KH
4426 If the value is minus, it is a minus value of charset which
4427 reserves graphic register N, which means that the charset is
4428 not designated initially but should be designated to graphic
4429 register N just before encoding a character in that charset.
1397dc18 4430
df7492f9
KH
4431 If the value is nil, graphic register N is never used on
4432 encoding.
4433
4434 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4435 Each value takes t or nil. See the section ISO2022 of
4436 `coding.h' for more information.
1397dc18 4437
df7492f9
KH
4438 If `coding->type' is `coding_type_big5', element[4] is t to denote
4439 BIG5-ETen or nil to denote BIG5-HKU.
4ed46869 4440
df7492f9 4441 If `coding->type' takes the other value, element[4] is ignored.
27901516 4442
df7492f9
KH
4443 Emacs Lisp's coding system also carries information about format of
4444 end-of-line in a value of property `eol-type'. If the value is
4445 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4446 it is not integer, it should be a vector of subsidiary coding
4447 systems of which property `eol-type' has one of above values.
4ed46869 4448
df7492f9 4449*/
4ed46869 4450
df7492f9
KH
4451/* Setup coding context CODING from information about CODING_SYSTEM.
4452 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4453 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4454
4455void
df7492f9
KH
4456setup_coding_system (coding_system, coding)
4457 Lisp_Object coding_system;
ec6d2bb8
KH
4458 struct coding_system *coding;
4459{
df7492f9
KH
4460 Lisp_Object attrs;
4461 Lisp_Object eol_type;
4462 Lisp_Object coding_type;
4463 Lisp_Object val;
ec6d2bb8 4464
df7492f9
KH
4465 if (NILP (coding_system))
4466 coding_system = Qno_conversion;
4467
4468 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4469
4470 attrs = CODING_ID_ATTRS (coding->id);
4471 eol_type = CODING_ID_EOL_TYPE (coding->id);
4472
4473 coding->mode = 0;
4474 coding->head_ascii = -1;
4475 coding->common_flags
4476 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4477
4478 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4479 coding->max_charset_id = XSTRING (val)->size - 1;
4480 coding->safe_charsets = (char *) XSTRING (val)->data;
4481 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4482
4483 coding_type = CODING_ATTR_TYPE (attrs);
4484 if (EQ (coding_type, Qundecided))
4485 {
4486 coding->detector = NULL;
4487 coding->decoder = decode_coding_raw_text;
4488 coding->encoder = encode_coding_raw_text;
4489 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4490 }
4491 else if (EQ (coding_type, Qiso_2022))
4492 {
4493 int i;
4494 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4495
4496 /* Invoke graphic register 0 to plane 0. */
4497 CODING_ISO_INVOCATION (coding, 0) = 0;
4498 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4499 CODING_ISO_INVOCATION (coding, 1)
4500 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4501 /* Setup the initial status of designation. */
4502 for (i = 0; i < 4; i++)
4503 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4504 /* Not single shifting initially. */
4505 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4506 /* Beginning of buffer should also be regarded as bol. */
4507 CODING_ISO_BOL (coding) = 1;
4508 coding->detector = detect_coding_iso_2022;
4509 coding->decoder = decode_coding_iso_2022;
4510 coding->encoder = encode_coding_iso_2022;
4511 if (flags & CODING_ISO_FLAG_SAFE)
4512 coding->mode |= CODING_MODE_SAFE_ENCODING;
4513 coding->common_flags
4514 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4515 | CODING_REQUIRE_FLUSHING_MASK);
4516 if (flags & CODING_ISO_FLAG_COMPOSITION)
4517 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4518 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4519 {
4520 setup_iso_safe_charsets (attrs);
4521 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4522 coding->max_charset_id = XSTRING (val)->size - 1;
4523 coding->safe_charsets = (char *) XSTRING (val)->data;
4524 }
4525 CODING_ISO_FLAGS (coding) = flags;
4526 }
4527 else if (EQ (coding_type, Qcharset))
4528 {
4529 coding->detector = detect_coding_charset;
4530 coding->decoder = decode_coding_charset;
4531 coding->encoder = encode_coding_charset;
4532 coding->common_flags
4533 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4534 }
4535 else if (EQ (coding_type, Qutf_8))
4536 {
4537 coding->detector = detect_coding_utf_8;
4538 coding->decoder = decode_coding_utf_8;
4539 coding->encoder = encode_coding_utf_8;
4540 coding->common_flags
4541 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4542 }
4543 else if (EQ (coding_type, Qutf_16))
4544 {
4545 val = AREF (attrs, coding_attr_utf_16_bom);
4546 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4547 : EQ (val, Qt) ? utf_16_with_bom
4548 : utf_16_without_bom);
4549 val = AREF (attrs, coding_attr_utf_16_endian);
4550 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4551 : utf_16_little_endian);
e19c3639 4552 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4553 coding->detector = detect_coding_utf_16;
4554 coding->decoder = decode_coding_utf_16;
4555 coding->encoder = encode_coding_utf_16;
4556 coding->common_flags
4557 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4558 }
4559 else if (EQ (coding_type, Qccl))
4560 {
4561 coding->detector = detect_coding_ccl;
4562 coding->decoder = decode_coding_ccl;
4563 coding->encoder = encode_coding_ccl;
4564 coding->common_flags
4565 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4566 | CODING_REQUIRE_FLUSHING_MASK);
4567 }
4568 else if (EQ (coding_type, Qemacs_mule))
4569 {
4570 coding->detector = detect_coding_emacs_mule;
4571 coding->decoder = decode_coding_emacs_mule;
4572 coding->encoder = encode_coding_emacs_mule;
4573 coding->common_flags
4574 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4575 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4576 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4577 {
4578 Lisp_Object tail, safe_charsets;
4579 int max_charset_id = 0;
4580
4581 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4582 tail = XCDR (tail))
4583 if (max_charset_id < XFASTINT (XCAR (tail)))
4584 max_charset_id = XFASTINT (XCAR (tail));
4585 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4586 make_number (255));
4587 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4588 tail = XCDR (tail))
4589 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4590 coding->max_charset_id = max_charset_id;
4591 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4592 }
4593 }
4594 else if (EQ (coding_type, Qshift_jis))
4595 {
4596 coding->detector = detect_coding_sjis;
4597 coding->decoder = decode_coding_sjis;
4598 coding->encoder = encode_coding_sjis;
4599 coding->common_flags
4600 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4601 }
4602 else if (EQ (coding_type, Qbig5))
4603 {
4604 coding->detector = detect_coding_big5;
4605 coding->decoder = decode_coding_big5;
4606 coding->encoder = encode_coding_big5;
4607 coding->common_flags
4608 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4609 }
4610 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4611 {
df7492f9
KH
4612 coding->detector = NULL;
4613 coding->decoder = decode_coding_raw_text;
4614 coding->encoder = encode_coding_raw_text;
4615 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4616 }
df7492f9
KH
4617
4618 return;
ec6d2bb8
KH
4619}
4620
df7492f9
KH
4621/* Return raw-text or one of its subsidiaries that has the same
4622 eol_type as CODING-SYSTEM. */
ec6d2bb8 4623
df7492f9
KH
4624Lisp_Object
4625raw_text_coding_system (coding_system)
4626 Lisp_Object coding_system;
ec6d2bb8 4627{
0be8721c 4628 Lisp_Object spec, attrs;
df7492f9
KH
4629 Lisp_Object eol_type, raw_text_eol_type;
4630
4631 spec = CODING_SYSTEM_SPEC (coding_system);
4632 attrs = AREF (spec, 0);
4633
4634 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4635 return coding_system;
ec6d2bb8 4636
df7492f9
KH
4637 eol_type = AREF (spec, 2);
4638 if (VECTORP (eol_type))
4639 return Qraw_text;
4640 spec = CODING_SYSTEM_SPEC (Qraw_text);
4641 raw_text_eol_type = AREF (spec, 2);
4642 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4643 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4644 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4645}
4646
54f78171 4647
df7492f9
KH
4648/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4649 does, return one of the subsidiary that has the same eol-spec as
4650 PARENT. Otherwise, return CODING_SYSTEM. */
4651
4652Lisp_Object
4653coding_inherit_eol_type (coding_system, parent)
54f78171 4654{
df7492f9 4655 Lisp_Object spec, attrs, eol_type;
54f78171 4656
df7492f9
KH
4657 spec = CODING_SYSTEM_SPEC (coding_system);
4658 attrs = AREF (spec, 0);
4659 eol_type = AREF (spec, 2);
4660 if (VECTORP (eol_type))
4661 {
4662 Lisp_Object parent_spec;
df7492f9
KH
4663 Lisp_Object parent_eol_type;
4664
4665 parent_spec
4666 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4667 parent_eol_type = AREF (parent_spec, 2);
4668 if (EQ (parent_eol_type, Qunix))
4669 coding_system = AREF (eol_type, 0);
4670 else if (EQ (parent_eol_type, Qdos))
4671 coding_system = AREF (eol_type, 1);
4672 else if (EQ (parent_eol_type, Qmac))
4673 coding_system = AREF (eol_type, 2);
54f78171 4674 }
df7492f9 4675 return coding_system;
54f78171
KH
4676}
4677
4ed46869
KH
4678/* Emacs has a mechanism to automatically detect a coding system if it
4679 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4680 it's impossible to distinguish some coding systems accurately
4681 because they use the same range of codes. So, at first, coding
4682 systems are categorized into 7, those are:
4683
0ef69138 4684 o coding-category-emacs-mule
4ed46869
KH
4685
4686 The category for a coding system which has the same code range
4687 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4688 symbol) `emacs-mule' by default.
4ed46869
KH
4689
4690 o coding-category-sjis
4691
4692 The category for a coding system which has the same code range
4693 as SJIS. Assigned the coding-system (Lisp
7717c392 4694 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4695
4696 o coding-category-iso-7
4697
4698 The category for a coding system which has the same code range
7717c392 4699 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4700 shift and single shift functions. This can encode/decode all
4701 charsets. Assigned the coding-system (Lisp symbol)
4702 `iso-2022-7bit' by default.
4703
4704 o coding-category-iso-7-tight
4705
4706 Same as coding-category-iso-7 except that this can
4707 encode/decode only the specified charsets.
4ed46869
KH
4708
4709 o coding-category-iso-8-1
4710
4711 The category for a coding system which has the same code range
4712 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4713 for DIMENSION1 charset. This doesn't use any locking shift
4714 and single shift functions. Assigned the coding-system (Lisp
4715 symbol) `iso-latin-1' by default.
4ed46869
KH
4716
4717 o coding-category-iso-8-2
4718
4719 The category for a coding system which has the same code range
4720 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4721 for DIMENSION2 charset. This doesn't use any locking shift
4722 and single shift functions. Assigned the coding-system (Lisp
4723 symbol) `japanese-iso-8bit' by default.
4ed46869 4724
7717c392 4725 o coding-category-iso-7-else
4ed46869
KH
4726
4727 The category for a coding system which has the same code range
df7492f9 4728 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4729 single shift functions. Assigned the coding-system (Lisp
4730 symbol) `iso-2022-7bit-lock' by default.
4731
4732 o coding-category-iso-8-else
4733
4734 The category for a coding system which has the same code range
df7492f9 4735 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4736 single shift functions. Assigned the coding-system (Lisp
4737 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4738
4739 o coding-category-big5
4740
4741 The category for a coding system which has the same code range
4742 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4743 `cn-big5' by default.
4ed46869 4744
fa42c37f
KH
4745 o coding-category-utf-8
4746
4747 The category for a coding system which has the same code range
4748 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4749 symbol) `utf-8' by default.
4750
4751 o coding-category-utf-16-be
4752
4753 The category for a coding system in which a text has an
4754 Unicode signature (cf. Unicode Standard) in the order of BIG
4755 endian at the head. Assigned the coding-system (Lisp symbol)
4756 `utf-16-be' by default.
4757
4758 o coding-category-utf-16-le
4759
4760 The category for a coding system in which a text has an
4761 Unicode signature (cf. Unicode Standard) in the order of
4762 LITTLE endian at the head. Assigned the coding-system (Lisp
4763 symbol) `utf-16-le' by default.
4764
1397dc18
KH
4765 o coding-category-ccl
4766
4767 The category for a coding system of which encoder/decoder is
4768 written in CCL programs. The default value is nil, i.e., no
4769 coding system is assigned.
4770
4ed46869
KH
4771 o coding-category-binary
4772
4773 The category for a coding system not categorized in any of the
4774 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4775 `no-conversion' by default.
4ed46869
KH
4776
4777 Each of them is a Lisp symbol and the value is an actual
df7492f9 4778 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4779 What Emacs does actually is to detect a category of coding system.
4780 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4781 decide only one possible category, it selects a category of the
4ed46869
KH
4782 highest priority. Priorities of categories are also specified by a
4783 user in a Lisp variable `coding-category-list'.
4784
4785*/
4786
df7492f9
KH
4787#define EOL_SEEN_NONE 0
4788#define EOL_SEEN_LF 1
4789#define EOL_SEEN_CR 2
4790#define EOL_SEEN_CRLF 4
4ed46869 4791
df7492f9
KH
4792/* Detect how end-of-line of a text of length CODING->src_bytes
4793 pointed by CODING->source is encoded. Return one of
4794 EOL_SEEN_XXX. */
4ed46869 4795
bc4bc72a
RS
4796#define MAX_EOL_CHECK_COUNT 3
4797
d46c5b12 4798static int
df7492f9
KH
4799detect_eol (coding, source, src_bytes)
4800 struct coding_system *coding;
d46c5b12 4801 unsigned char *source;
df7492f9 4802 EMACS_INT src_bytes;
4ed46869 4803{
df7492f9 4804 Lisp_Object attrs, coding_type;
d46c5b12 4805 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4806 unsigned char c;
df7492f9
KH
4807 int total = 0;
4808 int eol_seen = EOL_SEEN_NONE;
4ed46869 4809
df7492f9
KH
4810 attrs = CODING_ID_ATTRS (coding->id);
4811 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4812
df7492f9 4813 if (EQ (coding_type, Qccl))
4ed46869 4814 {
df7492f9 4815 int msb, lsb;
fa42c37f 4816
df7492f9
KH
4817 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4818 lsb = 1 - msb;
fa42c37f 4819
df7492f9 4820 while (src + 1 < src_end)
fa42c37f 4821 {
df7492f9
KH
4822 c = src[lsb];
4823 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4824 {
df7492f9
KH
4825 int this_eol;
4826
4827 if (c == '\n')
4828 this_eol = EOL_SEEN_LF;
4829 else if (src + 3 >= src_end
4830 || src[msb + 2] != 0
4831 || src[lsb + 2] != '\n')
4832 this_eol = EOL_SEEN_CR;
fa42c37f 4833 else
df7492f9
KH
4834 this_eol = EOL_SEEN_CRLF;
4835
4836 if (eol_seen == EOL_SEEN_NONE)
4837 /* This is the first end-of-line. */
4838 eol_seen = this_eol;
4839 else if (eol_seen != this_eol)
fa42c37f 4840 {
df7492f9
KH
4841 /* The found type is different from what found before. */
4842 eol_seen = EOL_SEEN_LF;
4843 break;
fa42c37f 4844 }
df7492f9
KH
4845 if (++total == MAX_EOL_CHECK_COUNT)
4846 break;
fa42c37f 4847 }
df7492f9 4848 src += 2;
fa42c37f 4849 }
df7492f9 4850 }
d46c5b12 4851 else
27901516 4852 {
df7492f9 4853 while (src < src_end)
27901516 4854 {
df7492f9
KH
4855 c = *src++;
4856 if (c == '\n' || c == '\r')
4857 {
4858 int this_eol;
d46c5b12 4859
df7492f9
KH
4860 if (c == '\n')
4861 this_eol = EOL_SEEN_LF;
4862 else if (src >= src_end || *src != '\n')
4863 this_eol = EOL_SEEN_CR;
4864 else
4865 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4866
df7492f9
KH
4867 if (eol_seen == EOL_SEEN_NONE)
4868 /* This is the first end-of-line. */
4869 eol_seen = this_eol;
4870 else if (eol_seen != this_eol)
4871 {
4872 /* The found type is different from what found before. */
4873 eol_seen = EOL_SEEN_LF;
4874 break;
4875 }
4876 if (++total == MAX_EOL_CHECK_COUNT)
4877 break;
4878 }
4879 }
73be902c 4880 }
df7492f9 4881 return eol_seen;
73be902c
KH
4882}
4883
df7492f9 4884
73be902c 4885static void
df7492f9
KH
4886adjust_coding_eol_type (coding, eol_seen)
4887 struct coding_system *coding;
4888 int eol_seen;
73be902c 4889{
0be8721c 4890 Lisp_Object eol_type;
df7492f9
KH
4891
4892 eol_type = CODING_ID_EOL_TYPE (coding->id);
4893 if (eol_seen & EOL_SEEN_LF)
4894 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4895 else if (eol_type & EOL_SEEN_CRLF)
4896 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4897 else if (eol_type & EOL_SEEN_CR)
4898 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
4899}
4900
df7492f9
KH
4901/* Detect how a text specified in CODING is encoded. If a coding
4902 system is detected, update fields of CODING by the detected coding
4903 system. */
4904
4905void
4906detect_coding (coding)
d46c5b12 4907 struct coding_system *coding;
d46c5b12 4908{
df7492f9
KH
4909 unsigned char *src, *src_end;
4910 Lisp_Object attrs, coding_type;
d46c5b12 4911
df7492f9
KH
4912 coding->consumed = coding->consumed_char = 0;
4913 coding->produced = coding->produced_char = 0;
4914 coding_set_source (coding);
1c3478b0 4915
df7492f9 4916 src_end = coding->source + coding->src_bytes;
1c3478b0 4917
df7492f9
KH
4918 /* If we have not yet decided the text encoding type, detect it
4919 now. */
4920 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 4921 {
df7492f9
KH
4922 int mask = CATEGORY_MASK_ANY;
4923 int c, i;
4924
4925 for (src = coding->source; src < src_end; src++)
4926 {
4927 c = *src;
4928 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
4929 || c == ISO_CODE_SI
4930 || c == ISO_CODE_SO)))
4931 break;
4932 }
4933 coding->head_ascii = src - (coding->source + coding->consumed);
4934
4935 if (coding->head_ascii < coding->src_bytes)
1c3478b0 4936 {
df7492f9
KH
4937 int detected = 0;
4938
4939 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 4940 {
df7492f9
KH
4941 enum coding_category category = coding_priorities[i];
4942 struct coding_system *this = coding_categories + category;
4943
4944 if (category >= coding_category_raw_text
4945 || detected & (1 << category))
4946 continue;
4947
4948 if (this->id < 0)
1c3478b0 4949 {
df7492f9
KH
4950 /* No coding system of this category is defined. */
4951 mask &= ~(1 << category);
4952 }
4953 else
4954 {
4955 detected |= detected_mask[category];
4956 if ((*(this->detector)) (coding, &mask))
4957 break;
1c3478b0
KH
4958 }
4959 }
df7492f9
KH
4960 if (! mask)
4961 setup_coding_system (Qraw_text, coding);
4962 else if (mask != CATEGORY_MASK_ANY)
4963 for (i = 0; i < coding_category_raw_text; i++)
4964 {
4965 enum coding_category category = coding_priorities[i];
4966 struct coding_system *this = coding_categories + category;
4967
4968 if (mask & (1 << category))
4969 {
4970 setup_coding_system (CODING_ID_NAME (this->id), coding);
4971 break;
4972 }
4973 }
1c3478b0 4974 }
b73bfc1c 4975 }
69f76525 4976
df7492f9
KH
4977 attrs = CODING_ID_ATTRS (coding->id);
4978 coding_type = CODING_ATTR_TYPE (attrs);
4979
4980 /* If we have not yet decided the EOL type, detect it now. But, the
4981 detection is impossible for a CCL based coding system, in which
4982 case, we detct the EOL type after decoding. */
4983 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
4984 && ! EQ (coding_type, Qccl))
d46c5b12 4985 {
df7492f9
KH
4986 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
4987
4988 if (eol_seen != EOL_SEEN_NONE)
4989 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 4990 }
4ed46869
KH
4991}
4992
aaaf0b1e
KH
4993
4994static void
df7492f9 4995decode_eol (coding)
aaaf0b1e 4996 struct coding_system *coding;
aaaf0b1e 4997{
df7492f9 4998 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 4999 {
df7492f9
KH
5000 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5001 unsigned char *pend = p + coding->produced;
5002 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5003
df7492f9 5004 for (; p < pend; p++)
aaaf0b1e 5005 {
df7492f9
KH
5006 if (*p == '\n')
5007 eol_seen |= EOL_SEEN_LF;
5008 else if (*p == '\r')
aaaf0b1e 5009 {
df7492f9 5010 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5011 {
df7492f9
KH
5012 eol_seen |= EOL_SEEN_CRLF;
5013 p++;
aaaf0b1e 5014 }
aaaf0b1e 5015 else
df7492f9 5016 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5017 }
aaaf0b1e 5018 }
df7492f9
KH
5019 if (eol_seen != EOL_SEEN_NONE)
5020 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5021 }
aaaf0b1e 5022
df7492f9
KH
5023 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5024 {
5025 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5026 unsigned char *pend = p + coding->produced;
5027
5028 for (; p < pend; p++)
5029 if (*p == '\r')
5030 *p = '\n';
5031 }
5032 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5033 {
5034 unsigned char *p, *pbeg, *pend;
5035 Lisp_Object undo_list;
5036
5037 move_gap_both (coding->dst_pos + coding->produced_char,
5038 coding->dst_pos_byte + coding->produced);
5039 undo_list = current_buffer->undo_list;
5040 current_buffer->undo_list = Qt;
5041 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5042 current_buffer->undo_list = undo_list;
5043 pbeg = GPT_ADDR;
5044 pend = pbeg + coding->produced;
5045
5046 for (p = pend - 1; p >= pbeg; p--)
5047 if (*p == '\r')
5048 {
5049 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5050 pend--;
5051 }
5052 coding->produced_char -= coding->produced - (pend - pbeg);
5053 coding->produced = pend - pbeg;
5054 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5055 }
5056}
5057
df7492f9
KH
5058static void
5059translate_chars (coding, table)
4ed46869 5060 struct coding_system *coding;
df7492f9 5061 Lisp_Object table;
4ed46869 5062{
df7492f9
KH
5063 int *charbuf = coding->charbuf;
5064 int *charbuf_end = charbuf + coding->charbuf_used;
5065 int c;
5066
5067 if (coding->chars_at_source)
5068 return;
4ed46869 5069
df7492f9 5070 while (charbuf < charbuf_end)
8844fa83 5071 {
df7492f9
KH
5072 c = *charbuf;
5073 if (c < 0)
5074 charbuf += c;
5075 else
5076 *charbuf++ = translate_char (table, c);
8844fa83 5077 }
df7492f9 5078}
4ed46869 5079
df7492f9
KH
5080static int
5081produce_chars (coding)
5082 struct coding_system *coding;
5083{
5084 unsigned char *dst = coding->destination + coding->produced;
5085 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5086 int produced;
5087 int produced_chars = 0;
b73bfc1c 5088
df7492f9 5089 if (! coding->chars_at_source)
4ed46869 5090 {
df7492f9
KH
5091 /* Characters are in coding->charbuf. */
5092 int *buf = coding->charbuf;
5093 int *buf_end = buf + coding->charbuf_used;
5094 unsigned char *adjusted_dst_end;
4ed46869 5095
df7492f9
KH
5096 if (BUFFERP (coding->src_object)
5097 && EQ (coding->src_object, coding->dst_object))
5098 dst_end = coding->source + coding->consumed;
5099 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5100
df7492f9
KH
5101 while (buf < buf_end)
5102 {
5103 int c = *buf++;
5104
5105 if (dst >= adjusted_dst_end)
5106 {
5107 dst = alloc_destination (coding,
5108 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5109 dst);
5110 dst_end = coding->destination + coding->dst_bytes;
5111 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5112 }
5113 if (c >= 0)
5114 {
5115 if (coding->dst_multibyte
5116 || ! CHAR_BYTE8_P (c))
5117 CHAR_STRING_ADVANCE (c, dst);
5118 else
5119 *dst++ = CHAR_TO_BYTE8 (c);
5120 produced_chars++;
5121 }
5122 else
5123 /* This is an annotation data. */
5124 buf -= c + 1;
5125 }
5126 }
5127 else
5128 {
df7492f9
KH
5129 unsigned char *src = coding->source;
5130 unsigned char *src_end = src + coding->src_bytes;
5131 Lisp_Object eol_type;
b73bfc1c 5132
df7492f9 5133 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5134
df7492f9 5135 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5136 {
df7492f9
KH
5137 if (coding->src_multibyte)
5138 {
71c81426 5139 int multibytep = 1;
df7492f9 5140 int consumed_chars;
d46c5b12 5141
df7492f9
KH
5142 while (1)
5143 {
5144 unsigned char *src_base = src;
5145 int c;
b73bfc1c 5146
df7492f9
KH
5147 ONE_MORE_BYTE (c);
5148 if (c == '\r')
5149 {
5150 if (EQ (eol_type, Qdos))
5151 {
5152 if (src < src_end
5153 && *src == '\n')
5154 c = *src++;
5155 }
5156 else if (EQ (eol_type, Qmac))
5157 c = '\n';
5158 }
5159 if (dst == dst_end)
5160 {
2c78b7e1 5161 coding->consumed = src - coding->source;
b73bfc1c 5162
2c78b7e1
KH
5163 if (EQ (coding->src_object, coding->dst_object))
5164 dst_end = src;
5165 if (dst == dst_end)
5166 {
5167 dst = alloc_destination (coding, src_end - src + 1,
5168 dst);
5169 dst_end = coding->destination + coding->dst_bytes;
5170 coding_set_source (coding);
5171 src = coding->source + coding->consumed;
5172 src_end = coding->source + coding->src_bytes;
5173 }
df7492f9
KH
5174 }
5175 *dst++ = c;
5176 produced_chars++;
5177 }
5178 no_more_source:
5179 ;
5180 }
5181 else
5182 while (src < src_end)
5183 {
71c81426 5184 int multibytep = 1;
df7492f9 5185 int c = *src++;
b73bfc1c 5186
df7492f9
KH
5187 if (c == '\r')
5188 {
5189 if (EQ (eol_type, Qdos))
5190 {
5191 if (src < src_end
5192 && *src == '\n')
5193 c = *src++;
5194 }
5195 else if (EQ (eol_type, Qmac))
5196 c = '\n';
5197 }
5198 if (dst >= dst_end - 1)
5199 {
2c78b7e1 5200 coding->consumed = src - coding->source;
df7492f9 5201
2c78b7e1
KH
5202 if (EQ (coding->src_object, coding->dst_object))
5203 dst_end = src;
5204 if (dst >= dst_end - 1)
5205 {
5206 dst = alloc_destination (coding, src_end - src + 2,
5207 dst);
5208 dst_end = coding->destination + coding->dst_bytes;
5209 coding_set_source (coding);
5210 src = coding->source + coding->consumed;
5211 src_end = coding->source + coding->src_bytes;
5212 }
df7492f9
KH
5213 }
5214 EMIT_ONE_BYTE (c);
5215 }
d46c5b12 5216 }
df7492f9
KH
5217 else
5218 {
5219 if (!EQ (coding->src_object, coding->dst_object))
5220 {
5221 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5222
df7492f9
KH
5223 if (require > 0)
5224 {
5225 EMACS_INT offset = src - coding->source;
5226
5227 dst = alloc_destination (coding, require, dst);
5228 coding_set_source (coding);
5229 src = coding->source + offset;
5230 src_end = coding->source + coding->src_bytes;
5231 }
5232 }
5233 produced_chars = coding->src_chars;
5234 while (src < src_end)
5235 {
5236 int c = *src++;
5237
5238 if (c == '\r')
5239 {
5240 if (EQ (eol_type, Qdos))
5241 {
5242 if (src < src_end
5243 && *src == '\n')
5244 c = *src++;
5245 produced_chars--;
5246 }
5247 else if (EQ (eol_type, Qmac))
5248 c = '\n';
5249 }
5250 *dst++ = c;
5251 }
5252 }
2c78b7e1
KH
5253 coding->consumed = coding->src_bytes;
5254 coding->consumed_char = coding->src_chars;
b73bfc1c 5255 }
4ed46869 5256
df7492f9
KH
5257 produced = dst - (coding->destination + coding->produced);
5258 if (BUFFERP (coding->dst_object))
5259 insert_from_gap (produced_chars, produced);
5260 coding->produced += produced;
5261 coding->produced_char += produced_chars;
5262 return produced_chars;
b73bfc1c 5263}
52d41803 5264
df7492f9
KH
5265/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5266 or
5267 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5268 */
4ed46869 5269
df7492f9
KH
5270static INLINE void
5271produce_composition (coding, charbuf)
4ed46869 5272 struct coding_system *coding;
df7492f9 5273 int *charbuf;
4ed46869 5274{
df7492f9
KH
5275 Lisp_Object buffer;
5276 int len;
5277 EMACS_INT pos;
5278 enum composition_method method;
5279 int cmp_len;
5280 Lisp_Object components;
5281
5282 buffer = coding->dst_object;
5283 len = -charbuf[0];
5284 pos = coding->dst_pos + charbuf[1];
5285 method = (enum composition_method) (charbuf[3]);
5286 cmp_len = charbuf[4];
5287
5288 if (method == COMPOSITION_RELATIVE)
5289 components = Qnil;
5290 else
d46c5b12 5291 {
df7492f9
KH
5292 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5293 int i;
4ed46869 5294
df7492f9
KH
5295 len -= 5;
5296 charbuf += 5;
5297 for (i = 0; i < len; i++)
5298 args[i] = make_number (charbuf[i]);
5299 components = (method == COMPOSITION_WITH_ALTCHARS
5300 ? Fstring (len, args) : Fvector (len, args));
5301 }
5302 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5303}
b73bfc1c 5304
df7492f9
KH
5305static int *
5306save_composition_data (buf, buf_end, prop)
5307 int *buf, *buf_end;
5308 Lisp_Object prop;
5309{
5310 enum composition_method method = COMPOSITION_METHOD (prop);
5311 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5312
df7492f9
KH
5313 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5314 return NULL;
d46c5b12 5315
df7492f9
KH
5316 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5317 buf[2] = method;
5318 buf[3] = cmp_len;
b73bfc1c 5319
df7492f9
KH
5320 if (method == COMPOSITION_RELATIVE)
5321 buf[0] = 4;
5322 else
b73bfc1c 5323 {
df7492f9
KH
5324 Lisp_Object components;
5325 int len, i;
b73bfc1c 5326
df7492f9
KH
5327 components = COMPOSITION_COMPONENTS (prop);
5328 if (VECTORP (components))
d46c5b12 5329 {
df7492f9
KH
5330 len = XVECTOR (components)->size;
5331 for (i = 0; i < len; i++)
5332 buf[4 + i] = XINT (AREF (components, i));
5333 }
5334 else if (STRINGP (components))
5335 {
5336 int i_byte;
b73bfc1c 5337
df7492f9
KH
5338 len = XSTRING (components)->size;
5339 i = i_byte = 0;
5340 while (i < len)
5341 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5342 }
5343 else if (INTEGERP (components))
5344 {
5345 len = 1;
5346 buf[4] = XINT (components);
5347 }
5348 else if (CONSP (components))
5349 {
5350 for (len = 0; CONSP (components);
5351 len++, components = XCDR (components))
5352 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5353 }
df7492f9
KH
5354 else
5355 abort ();
5356 buf[0] = 4 + len;
4ed46869 5357 }
df7492f9 5358 return (buf + buf[0]);
4ed46869
KH
5359}
5360
df7492f9
KH
5361#define CHARBUF_SIZE 0x4000
5362
5363#define ALLOC_CONVERSION_WORK_AREA(coding) \
5364 do { \
5365 int size = CHARBUF_SIZE;; \
5366 \
5367 coding->charbuf = NULL; \
5368 while (size > 1024) \
5369 { \
5370 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5371 if (coding->charbuf) \
5372 break; \
5373 size >>= 1; \
5374 } \
5375 if (! coding->charbuf) \
5376 { \
5377 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5378 return coding->result; \
5379 } \
5380 coding->charbuf_size = size; \
5381 } while (0)
4ed46869 5382
d46c5b12
KH
5383
5384static void
df7492f9 5385produce_annotation (coding)
d46c5b12 5386 struct coding_system *coding;
d46c5b12 5387{
df7492f9
KH
5388 int *charbuf = coding->charbuf;
5389 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5390
df7492f9 5391 while (charbuf < charbuf_end)
d46c5b12 5392 {
df7492f9
KH
5393 if (*charbuf >= 0)
5394 charbuf++;
d46c5b12 5395 else
d46c5b12 5396 {
df7492f9
KH
5397 int len = -*charbuf;
5398 switch (charbuf[2])
5399 {
5400 case CODING_ANNOTATE_COMPOSITION_MASK:
5401 produce_composition (coding, charbuf);
5402 break;
5403 default:
5404 abort ();
5405 }
5406 charbuf += len;
d46c5b12 5407 }
df7492f9
KH
5408 }
5409}
d46c5b12 5410
df7492f9
KH
5411/* Decode the data at CODING->src_object into CODING->dst_object.
5412 CODING->src_object is a buffer, a string, or nil.
5413 CODING->dst_object is a buffer.
de79a6a5 5414
df7492f9
KH
5415 If CODING->src_object is a buffer, it must be the current buffer.
5416 In this case, if CODING->src_pos is positive, it is a position of
5417 the source text in the buffer, otherwise, the source text is in the
5418 gap area of the buffer, and CODING->src_pos specifies the offset of
5419 the text from GPT (which must be the same as PT). If this is the
5420 same buffer as CODING->dst_object, CODING->src_pos must be
5421 negative.
b73bfc1c 5422
df7492f9
KH
5423 If CODING->src_object is a string, CODING->src_pos in an index to
5424 that string.
d46c5b12 5425
df7492f9
KH
5426 If CODING->src_object is nil, CODING->source must already point to
5427 the non-relocatable memory area. In this case, CODING->src_pos is
5428 an offset from CODING->source.
d46c5b12 5429
df7492f9
KH
5430 The decoded data is inserted at the current point of the buffer
5431 CODING->dst_object.
5432*/
5433
5434static int
5435decode_coding (coding)
d46c5b12 5436 struct coding_system *coding;
d46c5b12 5437{
df7492f9 5438 Lisp_Object attrs;
d46c5b12 5439
df7492f9
KH
5440 if (BUFFERP (coding->src_object)
5441 && coding->src_pos > 0
5442 && coding->src_pos < GPT
5443 && coding->src_pos + coding->src_chars > GPT)
5444 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5445
df7492f9 5446 if (BUFFERP (coding->dst_object))
88993dfd 5447 {
df7492f9
KH
5448 if (current_buffer != XBUFFER (coding->dst_object))
5449 set_buffer_internal (XBUFFER (coding->dst_object));
5450 if (GPT != PT)
5451 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5452 }
5453
df7492f9
KH
5454 coding->consumed = coding->consumed_char = 0;
5455 coding->produced = coding->produced_char = 0;
5456 coding->chars_at_source = 0;
5457 coding->result = CODING_RESULT_SUCCESS;
5458 coding->errors = 0;
5459
5460 ALLOC_CONVERSION_WORK_AREA (coding);
5461
5462 attrs = CODING_ID_ATTRS (coding->id);
5463
5464 do
d46c5b12 5465 {
df7492f9
KH
5466 coding_set_source (coding);
5467 coding->annotated = 0;
5468 (*(coding->decoder)) (coding);
5469 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5470 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5471 coding_set_destination (coding);
5472 produce_chars (coding);
5473 if (coding->annotated)
5474 produce_annotation (coding);
d46c5b12 5475 }
df7492f9
KH
5476 while (coding->consumed < coding->src_bytes
5477 && ! coding->result);
d46c5b12 5478
df7492f9
KH
5479 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5480 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5481 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5482 decode_eol (coding);
d46c5b12 5483
df7492f9
KH
5484 coding->carryover_bytes = 0;
5485 if (coding->consumed < coding->src_bytes)
d46c5b12 5486 {
df7492f9
KH
5487 int nbytes = coding->src_bytes - coding->consumed;
5488 unsigned char *src;
5489
5490 coding_set_source (coding);
5491 coding_set_destination (coding);
5492 src = coding->source + coding->consumed;
5493
5494 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5495 {
df7492f9
KH
5496 /* Flush out unprocessed data as binary chars. We are sure
5497 that the number of data is less than the size of
5498 coding->charbuf. */
5499 int *charbuf = coding->charbuf;
5500
5501 while (nbytes-- > 0)
d46c5b12 5502 {
df7492f9
KH
5503 int c = *src++;
5504 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5505 }
df7492f9 5506 produce_chars (coding);
d46c5b12 5507 }
d46c5b12 5508 else
df7492f9
KH
5509 {
5510 /* Record unprocessed bytes in coding->carryover. We are
5511 sure that the number of data is less than the size of
5512 coding->carryover. */
5513 unsigned char *p = coding->carryover;
5514
5515 coding->carryover_bytes = nbytes;
5516 while (nbytes-- > 0)
5517 *p++ = *src++;
5518 }
5519 coding->consumed = coding->src_bytes;
5520 }
b73bfc1c 5521
df7492f9 5522 return coding->result;
d46c5b12
KH
5523}
5524
df7492f9
KH
5525static void
5526consume_chars (coding)
5527 struct coding_system *coding;
5528{
5529 int *buf = coding->charbuf;
5530 /* -1 is to compensate for CRLF. */
5531 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5532 unsigned char *src = coding->source + coding->consumed;
5533 int pos = coding->src_pos + coding->consumed_char;
5534 int end_pos = coding->src_pos + coding->src_chars;
5535 int multibytep = coding->src_multibyte;
5536 Lisp_Object eol_type;
5537 int c;
5538 int start, end, stop;
5539 Lisp_Object object, prop;
88993dfd 5540
df7492f9
KH
5541 eol_type = CODING_ID_EOL_TYPE (coding->id);
5542 if (VECTORP (eol_type))
5543 eol_type = Qunix;
88993dfd 5544
df7492f9 5545 object = coding->src_object;
b843d1ae 5546
df7492f9
KH
5547 /* Note: composition handling is not yet implemented. */
5548 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5549
df7492f9
KH
5550 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5551 && find_composition (pos, end_pos, &start, &end, &prop, object)
5552 && end <= end_pos
5553 && (start >= pos
5554 || (find_composition (end, end_pos, &start, &end, &prop, object)
5555 && end <= end_pos)))
5556 stop = start;
5557 else
5558 stop = end_pos;
ec6d2bb8 5559
df7492f9 5560 while (buf < buf_end)
ec6d2bb8 5561 {
df7492f9 5562 if (pos == stop)
ec6d2bb8 5563 {
df7492f9 5564 int *p;
ec6d2bb8 5565
df7492f9
KH
5566 if (pos == end_pos)
5567 break;
5568 p = save_composition_data (buf, buf_end, prop);
5569 if (p == NULL)
5570 break;
5571 buf = p;
5572 if (find_composition (end, end_pos, &start, &end, &prop, object)
5573 && end <= end_pos)
5574 stop = start;
5575 else
5576 stop = end_pos;
5577 }
5578
5579 if (! multibytep)
5580 c = *src++;
5581 else
5582 c = STRING_CHAR_ADVANCE (src);
5583 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5584 c = '\n';
5585 if (! EQ (eol_type, Qunix))
5586 {
5587 if (c == '\n')
5588 {
5589 if (EQ (eol_type, Qdos))
5590 *buf++ = '\r';
5591 else
5592 c = '\r';
ec6d2bb8 5593 }
ec6d2bb8 5594 }
df7492f9
KH
5595 *buf++ = c;
5596 pos++;
ec6d2bb8 5597 }
ec6d2bb8 5598
df7492f9
KH
5599 coding->consumed = src - coding->source;
5600 coding->consumed_char = pos - coding->src_pos;
5601 coding->charbuf_used = buf - coding->charbuf;
5602 coding->chars_at_source = 0;
ec6d2bb8
KH
5603}
5604
ec6d2bb8 5605
df7492f9
KH
5606/* Encode the text at CODING->src_object into CODING->dst_object.
5607 CODING->src_object is a buffer or a string.
5608 CODING->dst_object is a buffer or nil.
5609
5610 If CODING->src_object is a buffer, it must be the current buffer.
5611 In this case, if CODING->src_pos is positive, it is a position of
5612 the source text in the buffer, otherwise. the source text is in the
5613 gap area of the buffer, and coding->src_pos specifies the offset of
5614 the text from GPT (which must be the same as PT). If this is the
5615 same buffer as CODING->dst_object, CODING->src_pos must be
5616 negative and CODING should not have `pre-write-conversion'.
5617
5618 If CODING->src_object is a string, CODING should not have
5619 `pre-write-conversion'.
5620
5621 If CODING->dst_object is a buffer, the encoded data is inserted at
5622 the current point of that buffer.
5623
5624 If CODING->dst_object is nil, the encoded data is placed at the
5625 memory area specified by CODING->destination. */
5626
5627static int
5628encode_coding (coding)
ec6d2bb8 5629 struct coding_system *coding;
ec6d2bb8 5630{
df7492f9 5631 Lisp_Object attrs;
ec6d2bb8 5632
df7492f9 5633 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5634
df7492f9 5635 if (BUFFERP (coding->dst_object))
ec6d2bb8 5636 {
df7492f9
KH
5637 set_buffer_internal (XBUFFER (coding->dst_object));
5638 coding->dst_multibyte
5639 = ! NILP (current_buffer->enable_multibyte_characters);
5640 }
ec6d2bb8 5641
df7492f9
KH
5642 coding->consumed = coding->consumed_char = 0;
5643 coding->produced = coding->produced_char = 0;
5644 coding->result = CODING_RESULT_SUCCESS;
5645 coding->errors = 0;
ec6d2bb8 5646
df7492f9 5647 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5648
df7492f9
KH
5649 do {
5650 coding_set_source (coding);
5651 consume_chars (coding);
5652
5653 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5654 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5655
5656 coding_set_destination (coding);
5657 (*(coding->encoder)) (coding);
5658 } while (coding->consumed_char < coding->src_chars);
5659
5660 if (BUFFERP (coding->dst_object))
5661 insert_from_gap (coding->produced_char, coding->produced);
5662
5663 return (coding->result);
ec6d2bb8
KH
5664}
5665
df7492f9 5666/* Work buffer */
fb88bf2d 5667
df7492f9
KH
5668/* List of currently used working buffer. */
5669Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5670
df7492f9
KH
5671/* A working buffer used by the top level conversion. */
5672Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5673
4ed46869 5674
df7492f9
KH
5675/* Return a working buffer that can be freely used by the following
5676 code conversion. MULTIBYTEP specifies the multibyteness of the
5677 buffer. */
b73bfc1c 5678
df7492f9
KH
5679Lisp_Object
5680make_conversion_work_buffer (multibytep)
5681 int multibytep;
5682{
5683 struct buffer *current = current_buffer;
5684 Lisp_Object buf;
d46c5b12 5685
df7492f9 5686 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5687 {
df7492f9
KH
5688 if (NILP (Vcode_conversion_reused_work_buf))
5689 Vcode_conversion_reused_work_buf
5690 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5691 Vcode_conversion_work_buf_list
5692 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5693 }
df7492f9 5694 else
d46c5b12 5695 {
df7492f9
KH
5696 int depth = Flength (Vcode_conversion_work_buf_list);
5697 char str[128];
e077cc80 5698
df7492f9
KH
5699 sprintf (str, " *code-conversion-work*<%d>", depth);
5700 Vcode_conversion_work_buf_list
5701 = Fcons (Fget_buffer_create (build_string (str)),
5702 Vcode_conversion_work_buf_list);
d46c5b12 5703 }
d46c5b12 5704
df7492f9
KH
5705 buf = XCAR (Vcode_conversion_work_buf_list);
5706 set_buffer_internal (XBUFFER (buf));
5707 current_buffer->undo_list = Qt;
5708 Ferase_buffer ();
5709 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5710 set_buffer_internal (current);
5711 return buf;
5712}
d46c5b12 5713
df7492f9 5714static struct coding_system *saved_coding;
d46c5b12 5715
df7492f9
KH
5716Lisp_Object
5717code_conversion_restore (info)
5718 Lisp_Object info;
5719{
5720 int depth = Flength (Vcode_conversion_work_buf_list);
5721 Lisp_Object buf;
d46c5b12 5722
df7492f9 5723 if (depth > 0)
d46c5b12 5724 {
df7492f9
KH
5725 buf = XCAR (Vcode_conversion_work_buf_list);
5726 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5727 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5728 Fkill_buffer (buf);
5729 }
d46c5b12 5730
df7492f9
KH
5731 if (saved_coding->dst_object == Qt
5732 && saved_coding->destination)
5733 xfree (saved_coding->destination);
b843d1ae 5734
df7492f9
KH
5735 return save_excursion_restore (info);
5736}
d46c5b12 5737
12410ef1 5738
df7492f9
KH
5739int
5740decode_coding_gap (coding, chars, bytes)
5741 struct coding_system *coding;
5742 EMACS_INT chars, bytes;
5743{
5744 int count = specpdl_ptr - specpdl;
fb88bf2d 5745
df7492f9
KH
5746 saved_coding = coding;
5747 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5748
df7492f9
KH
5749 coding->src_object = Fcurrent_buffer ();
5750 coding->src_chars = chars;
5751 coding->src_bytes = bytes;
5752 coding->src_pos = -chars;
5753 coding->src_pos_byte = -bytes;
5754 coding->src_multibyte = chars < bytes;
5755 coding->dst_object = coding->src_object;
5756 coding->dst_pos = PT;
5757 coding->dst_pos_byte = PT_BYTE;
71c81426 5758 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5759
df7492f9
KH
5760 if (CODING_REQUIRE_DETECTION (coding))
5761 detect_coding (coding);
5762
5763 decode_coding (coding);
d46c5b12 5764
df7492f9
KH
5765 unbind_to (count, Qnil);
5766 return coding->result;
5767}
d46c5b12 5768
df7492f9
KH
5769int
5770encode_coding_gap (coding, chars, bytes)
5771 struct coding_system *coding;
5772 EMACS_INT chars, bytes;
5773{
5774 int count = specpdl_ptr - specpdl;
5775 Lisp_Object buffer;
d46c5b12 5776
df7492f9
KH
5777 saved_coding = coding;
5778 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5779
df7492f9
KH
5780 buffer = Fcurrent_buffer ();
5781 coding->src_object = buffer;
5782 coding->src_chars = chars;
5783 coding->src_bytes = bytes;
5784 coding->src_pos = -chars;
5785 coding->src_pos_byte = -bytes;
5786 coding->src_multibyte = chars < bytes;
5787 coding->dst_object = coding->src_object;
5788 coding->dst_pos = PT;
5789 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5790
df7492f9 5791 encode_coding (coding);
f2558efd 5792
df7492f9
KH
5793 unbind_to (count, Qnil);
5794 return coding->result;
5795}
b73bfc1c 5796
d46c5b12 5797
df7492f9
KH
5798/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5799 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5800
df7492f9 5801 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5802
df7492f9
KH
5803 If it is a buffer, the text is at point of the buffer. FROM and TO
5804 are positions in the buffer.
ec6d2bb8 5805
df7492f9
KH
5806 If it is a string, the text is at the beginning of the string.
5807 FROM and TO are indices to the string.
ec6d2bb8 5808
df7492f9
KH
5809 If it is nil, the text is at coding->source. FROM and TO are
5810 indices to coding->source.
ec6d2bb8 5811
df7492f9 5812 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5813
df7492f9
KH
5814 If it is a buffer, the decoded text is inserted at point of the
5815 buffer. If the buffer is the same as SRC_OBJECT, the source text
5816 is deleted.
d46c5b12 5817
df7492f9
KH
5818 If it is Qt, a string is made from the decoded text, and
5819 set in CODING->dst_object.
d46c5b12 5820
df7492f9
KH
5821 If it is Qnil, the decoded text is stored at CODING->destination.
5822 The called must allocate CODING->dst_bytes bytes at
5823 CODING->destination by xmalloc. If the decoded text is longer than
5824 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5825 */
d46c5b12 5826
df7492f9
KH
5827void
5828decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5829 dst_object)
5830 struct coding_system *coding;
5831 Lisp_Object src_object;
5832 EMACS_INT from, from_byte, to, to_byte;
5833 Lisp_Object dst_object;
5834{
5835 int count = specpdl_ptr - specpdl;
5836 unsigned char *destination;
5837 EMACS_INT dst_bytes;
5838 EMACS_INT chars = to - from;
5839 EMACS_INT bytes = to_byte - from_byte;
5840 Lisp_Object attrs;
d46c5b12 5841
df7492f9
KH
5842 saved_coding = coding;
5843 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5844
df7492f9
KH
5845 if (NILP (dst_object))
5846 {
5847 destination = coding->destination;
5848 dst_bytes = coding->dst_bytes;
5849 }
93dec019 5850
df7492f9
KH
5851 coding->src_object = src_object;
5852 coding->src_chars = chars;
5853 coding->src_bytes = bytes;
5854 coding->src_multibyte = chars < bytes;
70ad9fc4 5855
df7492f9
KH
5856 if (STRINGP (src_object))
5857 {
5858 coding->src_pos = from;
5859 coding->src_pos_byte = from_byte;
5860 }
5861 else if (BUFFERP (src_object))
5862 {
5863 set_buffer_internal (XBUFFER (src_object));
5864 if (from != GPT)
5865 move_gap_both (from, from_byte);
5866 if (EQ (src_object, dst_object))
fb88bf2d 5867 {
df7492f9
KH
5868 TEMP_SET_PT_BOTH (from, from_byte);
5869 del_range_both (from, from_byte, to, to_byte, 1);
5870 coding->src_pos = -chars;
5871 coding->src_pos_byte = -bytes;
fb88bf2d 5872 }
df7492f9 5873 else
fb88bf2d 5874 {
df7492f9
KH
5875 coding->src_pos = from;
5876 coding->src_pos_byte = from_byte;
fb88bf2d 5877 }
d46c5b12 5878 }
fb88bf2d 5879
df7492f9
KH
5880 if (CODING_REQUIRE_DETECTION (coding))
5881 detect_coding (coding);
5882 attrs = CODING_ID_ATTRS (coding->id);
5883
5884 if (! NILP (CODING_ATTR_POST_READ (attrs))
5885 || EQ (dst_object, Qt))
b73bfc1c 5886 {
df7492f9
KH
5887 coding->dst_object = make_conversion_work_buffer (1);
5888 coding->dst_pos = BEG;
5889 coding->dst_pos_byte = BEG_BYTE;
5890 coding->dst_multibyte = 1;
b73bfc1c 5891 }
df7492f9 5892 else if (BUFFERP (dst_object))
12410ef1 5893 {
df7492f9
KH
5894 coding->dst_object = dst_object;
5895 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5896 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5897 coding->dst_multibyte
5898 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 5899 }
72d1a715 5900 else
df7492f9
KH
5901 {
5902 coding->dst_object = Qnil;
5903 coding->dst_multibyte = 1;
5904 }
5905
5906 decode_coding (coding);
4ed46869 5907
df7492f9
KH
5908 if (BUFFERP (coding->dst_object))
5909 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 5910
df7492f9 5911 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 5912 {
df7492f9
KH
5913 struct gcpro gcpro1, gcpro2;
5914 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 5915 Lisp_Object val;
4ed46869 5916
df7492f9
KH
5917 GCPRO2 (coding->src_object, coding->dst_object);
5918 val = call1 (CODING_ATTR_POST_READ (attrs),
5919 make_number (coding->produced_char));
5920 UNGCPRO;
5921 CHECK_NATNUM (val);
5922 coding->produced_char += Z - prev_Z;
5923 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 5924 }
4ed46869 5925
df7492f9 5926 if (EQ (dst_object, Qt))
ec6d2bb8 5927 {
df7492f9
KH
5928 coding->dst_object = Fbuffer_string ();
5929 }
5930 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
5931 {
5932 set_buffer_internal (XBUFFER (coding->dst_object));
5933 if (dst_bytes < coding->produced)
5934 {
5935 destination
5936 = (unsigned char *) xrealloc (destination, coding->produced);
5937 if (! destination)
5938 {
5939 coding->result = CODING_RESULT_INSUFFICIENT_DST;
5940 unbind_to (count, Qnil);
5941 return;
5942 }
5943 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
5944 move_gap_both (BEGV, BEGV_BYTE);
5945 bcopy (BEGV_ADDR, destination, coding->produced);
5946 coding->destination = destination;
5947 }
ec6d2bb8 5948 }
2b4f9037 5949
df7492f9 5950 unbind_to (count, Qnil);
d46c5b12
KH
5951}
5952
df7492f9
KH
5953
5954void
5955encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5956 dst_object)
b73bfc1c 5957 struct coding_system *coding;
df7492f9
KH
5958 Lisp_Object src_object;
5959 EMACS_INT from, from_byte, to, to_byte;
5960 Lisp_Object dst_object;
b73bfc1c
KH
5961{
5962 int count = specpdl_ptr - specpdl;
df7492f9
KH
5963 EMACS_INT chars = to - from;
5964 EMACS_INT bytes = to_byte - from_byte;
5965 Lisp_Object attrs;
5966
5967 saved_coding = coding;
5968 record_unwind_protect (code_conversion_restore, save_excursion_save ());
5969
5970 coding->src_object = src_object;
5971 coding->src_chars = chars;
5972 coding->src_bytes = bytes;
5973 coding->src_multibyte = chars < bytes;
5974
5975 attrs = CODING_ID_ATTRS (coding->id);
5976
5977 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 5978 {
df7492f9 5979 Lisp_Object val;
b73bfc1c 5980
df7492f9
KH
5981 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
5982 set_buffer_internal (XBUFFER (coding->src_object));
5983 if (STRINGP (src_object))
5984 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
5985 else if (BUFFERP (src_object))
5986 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
5987 else
5988 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
5989
5990 if (EQ (src_object, dst_object))
5991 {
5992 set_buffer_internal (XBUFFER (src_object));
5993 del_range_both (from, from_byte, to, to_byte, 1);
5994 set_buffer_internal (XBUFFER (coding->src_object));
5995 }
5996
5997 val = call2 (CODING_ATTR_PRE_WRITE (attrs),
5998 make_number (1), make_number (chars));
5999 CHECK_NATNUM (val);
6000 if (BEG != GPT)
6001 move_gap_both (BEG, BEG_BYTE);
6002 coding->src_chars = Z - BEG;
6003 coding->src_bytes = Z_BYTE - BEG_BYTE;
6004 coding->src_pos = BEG;
6005 coding->src_pos_byte = BEG_BYTE;
6006 coding->src_multibyte = Z < Z_BYTE;
6007 }
6008 else if (STRINGP (src_object))
6009 {
6010 coding->src_pos = from;
6011 coding->src_pos_byte = from_byte;
6012 }
6013 else if (BUFFERP (src_object))
d46c5b12 6014 {
df7492f9
KH
6015 set_buffer_internal (XBUFFER (src_object));
6016 if (from != GPT)
6017 move_gap_both (from, from_byte);
6018 if (EQ (src_object, dst_object))
d46c5b12 6019 {
df7492f9
KH
6020 del_range_both (from, from_byte, to, to_byte, 1);
6021 coding->src_pos = -chars;
6022 coding->src_pos_byte = -bytes;
d46c5b12 6023 }
df7492f9 6024 else
d46c5b12 6025 {
df7492f9
KH
6026 coding->src_pos = from;
6027 coding->src_pos_byte = from_byte;
d46c5b12
KH
6028 }
6029 }
4ed46869 6030
df7492f9 6031 if (BUFFERP (dst_object))
d46c5b12 6032 {
df7492f9
KH
6033 coding->dst_object = dst_object;
6034 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6035 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6036 coding->dst_multibyte
6037 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6038 }
df7492f9 6039 else if (EQ (dst_object, Qt))
4956c225 6040 {
df7492f9
KH
6041 coding->dst_object = Qnil;
6042 coding->destination = (unsigned char *) xmalloc (coding->src_chars);
6043 coding->dst_bytes = coding->src_chars;
6044 coding->dst_multibyte = 0;
4956c225 6045 }
df7492f9 6046 else
78108bcd 6047 {
df7492f9
KH
6048 coding->dst_object = Qnil;
6049 coding->dst_multibyte = 0;
78108bcd
KH
6050 }
6051
df7492f9 6052 encode_coding (coding);
4ed46869 6053
df7492f9 6054 if (EQ (dst_object, Qt))
4ed46869 6055 {
df7492f9
KH
6056 if (BUFFERP (coding->dst_object))
6057 coding->dst_object = Fbuffer_string ();
6058 else
73be902c 6059 {
df7492f9
KH
6060 coding->dst_object
6061 = make_unibyte_string ((char *) coding->destination,
6062 coding->produced);
6063 xfree (coding->destination);
73be902c 6064 }
4ed46869 6065 }
d46c5b12 6066
df7492f9 6067 unbind_to (count, Qnil);
b73bfc1c
KH
6068}
6069
df7492f9 6070
b73bfc1c 6071Lisp_Object
df7492f9 6072preferred_coding_system ()
b73bfc1c 6073{
df7492f9 6074 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6075
df7492f9 6076 return CODING_ID_NAME (id);
4ed46869
KH
6077}
6078
6079\f
6080#ifdef emacs
1397dc18 6081/*** 8. Emacs Lisp library functions ***/
4ed46869 6082
4ed46869 6083DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6084 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6085See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6086about coding-system objects. */)
6087 (obj)
4ed46869
KH
6088 Lisp_Object obj;
6089{
df7492f9 6090 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6091}
6092
9d991de8
RS
6093DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6094 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6095 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6096 (prompt)
4ed46869
KH
6097 Lisp_Object prompt;
6098{
e0e989f6 6099 Lisp_Object val;
9d991de8
RS
6100 do
6101 {
4608c386
KH
6102 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6103 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6104 }
6105 while (XSTRING (val)->size == 0);
e0e989f6 6106 return (Fintern (val, Qnil));
4ed46869
KH
6107}
6108
9b787f3e 6109DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6110 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6111If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6112 (prompt, default_coding_system)
9b787f3e 6113 Lisp_Object prompt, default_coding_system;
4ed46869 6114{
f44d27ce 6115 Lisp_Object val;
9b787f3e
RS
6116 if (SYMBOLP (default_coding_system))
6117 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6118 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6119 Qt, Qnil, Qcoding_system_history,
6120 default_coding_system, Qnil);
e0e989f6 6121 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6122}
6123
6124DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6125 1, 1, 0,
48b0f3ae
PJ
6126 doc: /* Check validity of CODING-SYSTEM.
6127If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6128It is valid if it is a symbol with a non-nil `coding-system' property.
6129The value of property should be a vector of length 5. */)
df7492f9 6130 (coding_system)
4ed46869
KH
6131 Lisp_Object coding_system;
6132{
b7826503 6133 CHECK_SYMBOL (coding_system);
4ed46869
KH
6134 if (!NILP (Fcoding_system_p (coding_system)))
6135 return coding_system;
6136 while (1)
02ba4723 6137 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6138}
df7492f9 6139
3a73fa5d 6140\f
d46c5b12 6141Lisp_Object
df7492f9 6142detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6143 unsigned char *src;
6144 int src_bytes, highest;
0a28aafb 6145 int multibytep;
df7492f9 6146 Lisp_Object coding_system;
4ed46869 6147{
df7492f9
KH
6148 unsigned char *src_end = src + src_bytes;
6149 int mask = CATEGORY_MASK_ANY;
6150 int detected = 0;
6151 int c, i;
6152 Lisp_Object attrs, eol_type;
6153 Lisp_Object val;
6154 struct coding_system coding;
6155
6156 if (NILP (coding_system))
6157 coding_system = Qundecided;
6158 setup_coding_system (coding_system, &coding);
6159 attrs = CODING_ID_ATTRS (coding.id);
6160 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6161
df7492f9
KH
6162 coding.source = src;
6163 coding.src_bytes = src_bytes;
6164 coding.src_multibyte = multibytep;
6165 coding.consumed = 0;
4ed46869 6166
df7492f9 6167 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6168 {
df7492f9 6169 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6170 }
df7492f9 6171 else
4ed46869 6172 {
df7492f9
KH
6173 coding_system = Qnil;
6174 for (; src < src_end; src++)
4ed46869 6175 {
df7492f9
KH
6176 c = *src;
6177 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6178 || c == ISO_CODE_SI
6179 || c == ISO_CODE_SO)))
d46c5b12 6180 break;
4ed46869 6181 }
df7492f9
KH
6182 coding.head_ascii = src - coding.source;
6183
6184 if (src < src_end)
6185 for (i = 0; i < coding_category_raw_text; i++)
6186 {
6187 enum coding_category category = coding_priorities[i];
6188 struct coding_system *this = coding_categories + category;
6189
6190 if (category >= coding_category_raw_text
6191 || detected & (1 << category))
6192 continue;
6193
6194 if (this->id < 0)
6195 {
6196 /* No coding system of this category is defined. */
6197 mask &= ~(1 << category);
6198 }
6199 else
6200 {
6201 detected |= detected_mask[category];
6202 if ((*(coding_categories[category].detector)) (&coding, &mask)
6203 && highest)
6204 {
6205 mask &= detected_mask[category];
6206 break;
6207 }
6208 }
6209 }
4ed46869 6210 }
4ed46869 6211
df7492f9
KH
6212 if (!mask)
6213 val = Fcons (make_number (coding_category_raw_text), Qnil);
6214 else if (mask == CATEGORY_MASK_ANY)
6215 val = Fcons (make_number (coding_category_undecided), Qnil);
6216 else if (highest)
4ed46869 6217 {
df7492f9
KH
6218 for (i = 0; i < coding_category_raw_text; i++)
6219 if (mask & (1 << coding_priorities[i]))
6220 {
6221 val = Fcons (make_number (coding_priorities[i]), Qnil);
6222 break;
6223 }
6224 }
6225 else
6226 {
6227 val = Qnil;
6228 for (i = coding_category_raw_text - 1; i >= 0; i--)
6229 if (mask & (1 << coding_priorities[i]))
6230 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6231 }
df7492f9
KH
6232
6233 {
6234 int one_byte_eol = -1, two_byte_eol = -1;
6235 Lisp_Object tail;
6236
6237 for (tail = val; CONSP (tail); tail = XCDR (tail))
6238 {
6239 struct coding_system *this
6240 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6241 : &coding);
6242 int this_eol;
6243
6244 attrs = CODING_ID_ATTRS (this->id);
6245 eol_type = CODING_ID_EOL_TYPE (this->id);
6246 XSETCAR (tail, CODING_ID_NAME (this->id));
6247 if (VECTORP (eol_type))
6248 {
6249 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6250 {
6251 if (two_byte_eol < 0)
6252 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6253 this_eol = two_byte_eol;
6254 }
6255 else
6256 {
6257 if (one_byte_eol < 0)
6258 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6259 this_eol = one_byte_eol;
6260 }
6261 if (this_eol == EOL_SEEN_LF)
6262 XSETCAR (tail, AREF (eol_type, 0));
6263 else if (this_eol == EOL_SEEN_CRLF)
6264 XSETCAR (tail, AREF (eol_type, 1));
6265 else if (this_eol == EOL_SEEN_CR)
6266 XSETCAR (tail, AREF (eol_type, 2));
6267 }
6268 }
6269 }
6270
03699b14 6271 return (highest ? XCAR (val) : val);
93dec019 6272}
4ed46869 6273
df7492f9 6274
d46c5b12
KH
6275DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6276 2, 3, 0,
48b0f3ae
PJ
6277 doc: /* Detect coding system of the text in the region between START and END.
6278Return a list of possible coding systems ordered by priority.
6279
6280If only ASCII characters are found, it returns a list of single element
6281`undecided' or its subsidiary coding system according to a detected
6282end-of-line format.
6283
6284If optional argument HIGHEST is non-nil, return the coding system of
6285highest priority. */)
6286 (start, end, highest)
d46c5b12
KH
6287 Lisp_Object start, end, highest;
6288{
6289 int from, to;
6290 int from_byte, to_byte;
6289dd10 6291
b7826503
PJ
6292 CHECK_NUMBER_COERCE_MARKER (start);
6293 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6294
d46c5b12
KH
6295 validate_region (&start, &end);
6296 from = XINT (start), to = XINT (end);
6297 from_byte = CHAR_TO_BYTE (from);
6298 to_byte = CHAR_TO_BYTE (to);
6289dd10 6299
d46c5b12
KH
6300 if (from < GPT && to >= GPT)
6301 move_gap_both (to, to_byte);
c210f766 6302
d46c5b12 6303 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6304 to_byte - from_byte,
0a28aafb
KH
6305 !NILP (highest),
6306 !NILP (current_buffer
df7492f9
KH
6307 ->enable_multibyte_characters),
6308 Qnil);
d46c5b12 6309}
6289dd10 6310
d46c5b12
KH
6311DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6312 1, 2, 0,
48b0f3ae
PJ
6313 doc: /* Detect coding system of the text in STRING.
6314Return a list of possible coding systems ordered by priority.
6315
6316If only ASCII characters are found, it returns a list of single element
6317`undecided' or its subsidiary coding system according to a detected
6318end-of-line format.
6319
6320If optional argument HIGHEST is non-nil, return the coding system of
6321highest priority. */)
6322 (string, highest)
d46c5b12
KH
6323 Lisp_Object string, highest;
6324{
b7826503 6325 CHECK_STRING (string);
4ed46869 6326
d46c5b12 6327 return detect_coding_system (XSTRING (string)->data,
df7492f9 6328 STRING_BYTES (XSTRING (string)),
0a28aafb 6329 !NILP (highest),
df7492f9
KH
6330 STRING_MULTIBYTE (string),
6331 Qnil);
4ed46869
KH
6332}
6333
05e6f5dc 6334
df7492f9
KH
6335static INLINE int
6336char_encodable_p (c, attrs)
6337 int c;
6338 Lisp_Object attrs;
05e6f5dc 6339{
df7492f9 6340 Lisp_Object tail;
df7492f9 6341 struct charset *charset;
05e6f5dc 6342
df7492f9
KH
6343 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6344 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6345 {
df7492f9
KH
6346 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6347 if (CHAR_CHARSET_P (c, charset))
6348 break;
05e6f5dc 6349 }
df7492f9 6350 return (! NILP (tail));
05e6f5dc
KH
6351}
6352
6353
df7492f9
KH
6354/* Return a list of coding systems that safely encode the text between
6355 START and END. If EXCLUDE is non-nil, it is a list of coding
6356 systems not to check. The returned list doesn't contain any such
6357 coding systems. In any case, If the text contains only ASCII or is
6358 unibyte, return t. */
6359
6360DEFUN ("find-coding-systems-region-internal",
6361 Ffind_coding_systems_region_internal,
6362 Sfind_coding_systems_region_internal, 2, 3, 0,
6363 doc: /* Internal use only. */)
6364 (start, end, exclude)
6365 Lisp_Object start, end, exclude;
6366{
6367 Lisp_Object coding_attrs_list, safe_codings;
6368 EMACS_INT start_byte, end_byte;
6369 unsigned char *p, *pbeg, *pend;
6370 int c;
6371 Lisp_Object tail, elt;
05e6f5dc 6372
df7492f9
KH
6373 if (STRINGP (start))
6374 {
6375 if (!STRING_MULTIBYTE (start)
6376 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6377 return Qt;
6378 start_byte = 0;
6379 end_byte = STRING_BYTES (XSTRING (start));
6380 }
6381 else
6382 {
6383 CHECK_NUMBER_COERCE_MARKER (start);
6384 CHECK_NUMBER_COERCE_MARKER (end);
6385 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6386 args_out_of_range (start, end);
6387 if (NILP (current_buffer->enable_multibyte_characters))
6388 return Qt;
6389 start_byte = CHAR_TO_BYTE (XINT (start));
6390 end_byte = CHAR_TO_BYTE (XINT (end));
6391 if (XINT (end) - XINT (start) == end_byte - start_byte)
6392 return Qt;
05e6f5dc 6393
df7492f9
KH
6394 if (start < GPT && end > GPT)
6395 {
6396 if ((GPT - start) < (end - GPT))
6397 move_gap_both (start, start_byte);
6398 else
6399 move_gap_both (end, end_byte);
6400 }
6401 }
05e6f5dc 6402
df7492f9
KH
6403 coding_attrs_list = Qnil;
6404 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6405 if (NILP (exclude)
6406 || NILP (Fmemq (XCAR (tail), exclude)))
6407 {
6408 Lisp_Object attrs;
05e6f5dc 6409
df7492f9
KH
6410 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6411 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6412 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6413 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6414 }
6415
6416 if (STRINGP (start))
6417 p = pbeg = XSTRING (start)->data;
6418 else
6419 p = pbeg = BYTE_POS_ADDR (start_byte);
6420 pend = p + (end_byte - start_byte);
6421
6422 while (p < pend && ASCII_BYTE_P (*p)) p++;
6423 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6424
6425 while (p < pend)
6426 {
df7492f9
KH
6427 if (ASCII_BYTE_P (*p))
6428 p++;
6429 else
6430 {
6431 c = STRING_CHAR_ADVANCE (p);
6432
6433 charset_map_loaded = 0;
6434 for (tail = coding_attrs_list; CONSP (tail);)
6435 {
6436 elt = XCAR (tail);
6437 if (NILP (elt))
6438 tail = XCDR (tail);
6439 else if (char_encodable_p (c, elt))
6440 tail = XCDR (tail);
6441 else if (CONSP (XCDR (tail)))
6442 {
6443 XSETCAR (tail, XCAR (XCDR (tail)));
6444 XSETCDR (tail, XCDR (XCDR (tail)));
6445 }
6446 else
6447 {
6448 XSETCAR (tail, Qnil);
6449 tail = XCDR (tail);
6450 }
6451 }
6452 if (charset_map_loaded)
6453 {
6454 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6455
df7492f9
KH
6456 if (STRINGP (start))
6457 pbeg = XSTRING (start)->data;
6458 else
6459 pbeg = BYTE_POS_ADDR (start_byte);
6460 p = pbeg + p_offset;
6461 pend = pbeg + pend_offset;
6462 }
6463 }
05e6f5dc 6464 }
df7492f9
KH
6465
6466 safe_codings = Qnil;
6467 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6468 if (! NILP (XCAR (tail)))
6469 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6470
05e6f5dc
KH
6471 return safe_codings;
6472}
6473
6474
df7492f9
KH
6475DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6476 Scheck_coding_systems_region, 3, 3, 0,
6477 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6478
df7492f9
KH
6479START and END are buffer positions specifying the region.
6480CODING-SYSTEM-LIST is a list of coding systems to check.
6481
6482The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6483CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6484whole region, POS0, POS1, ... are buffer positions where non-encodable
6485characters are found.
6486
6487If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6488value is nil.
6489
6490START may be a string. In that case, check if the string is
6491encodable, and the value contains indices to the string instead of
6492buffer positions. END is ignored. */)
6493 (start, end, coding_system_list)
6494 Lisp_Object start, end, coding_system_list;
05e6f5dc 6495{
df7492f9
KH
6496 Lisp_Object list;
6497 EMACS_INT start_byte, end_byte;
6498 int pos;
6499 unsigned char *p, *pbeg, *pend;
6500 int c;
6501 Lisp_Object tail, elt;
05e6f5dc
KH
6502
6503 if (STRINGP (start))
6504 {
df7492f9
KH
6505 if (!STRING_MULTIBYTE (start)
6506 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6507 return Qnil;
6508 start_byte = 0;
6509 end_byte = STRING_BYTES (XSTRING (start));
6510 pos = 0;
05e6f5dc
KH
6511 }
6512 else
6513 {
b7826503
PJ
6514 CHECK_NUMBER_COERCE_MARKER (start);
6515 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6516 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6517 args_out_of_range (start, end);
6518 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6519 return Qnil;
6520 start_byte = CHAR_TO_BYTE (XINT (start));
6521 end_byte = CHAR_TO_BYTE (XINT (end));
6522 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6523 return Qt;
df7492f9
KH
6524
6525 if (start < GPT && end > GPT)
6526 {
6527 if ((GPT - start) < (end - GPT))
6528 move_gap_both (start, start_byte);
6529 else
6530 move_gap_both (end, end_byte);
6531 }
6532 pos = start;
6533 }
6534
6535 list = Qnil;
6536 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6537 {
6538 elt = XCAR (tail);
6539 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6540 Qnil)),
6541 list);
05e6f5dc
KH
6542 }
6543
df7492f9
KH
6544 if (STRINGP (start))
6545 p = pbeg = XSTRING (start)->data;
6546 else
6547 p = pbeg = BYTE_POS_ADDR (start_byte);
6548 pend = p + (end_byte - start_byte);
6549
6550 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6551 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6552
6553 while (p < pend)
05e6f5dc 6554 {
df7492f9
KH
6555 if (ASCII_BYTE_P (*p))
6556 p++;
6557 else
05e6f5dc 6558 {
df7492f9
KH
6559 c = STRING_CHAR_ADVANCE (p);
6560
6561 charset_map_loaded = 0;
6562 for (tail = list; CONSP (tail); tail = XCDR (tail))
6563 {
6564 elt = XCDR (XCAR (tail));
6565 if (! char_encodable_p (c, XCAR (elt)))
6566 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6567 }
6568 if (charset_map_loaded)
6569 {
6570 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6571
6572 if (STRINGP (start))
6573 pbeg = XSTRING (start)->data;
6574 else
6575 pbeg = BYTE_POS_ADDR (start_byte);
6576 p = pbeg + p_offset;
6577 pend = pbeg + pend_offset;
6578 }
05e6f5dc 6579 }
df7492f9 6580 pos++;
05e6f5dc
KH
6581 }
6582
df7492f9
KH
6583 tail = list;
6584 list = Qnil;
6585 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6586 {
df7492f9
KH
6587 elt = XCAR (tail);
6588 if (CONSP (XCDR (XCDR (elt))))
6589 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6590 list);
05e6f5dc 6591 }
df7492f9
KH
6592
6593 return list;
05e6f5dc
KH
6594}
6595
6596
df7492f9 6597
4031e2bf 6598Lisp_Object
df7492f9
KH
6599code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6600 Lisp_Object start, end, coding_system, dst_object;
6601 int encodep, norecord;
3a73fa5d
RS
6602{
6603 struct coding_system coding;
df7492f9
KH
6604 EMACS_INT from, from_byte, to, to_byte;
6605 Lisp_Object src_object;
3a73fa5d 6606
b7826503
PJ
6607 CHECK_NUMBER_COERCE_MARKER (start);
6608 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6609 if (NILP (coding_system))
6610 coding_system = Qno_conversion;
6611 else
6612 CHECK_CODING_SYSTEM (coding_system);
6613 src_object = Fcurrent_buffer ();
6614 if (NILP (dst_object))
6615 dst_object = src_object;
6616 else if (! EQ (dst_object, Qt))
6617 CHECK_BUFFER (dst_object);
3a73fa5d 6618
d46c5b12
KH
6619 validate_region (&start, &end);
6620 from = XFASTINT (start);
df7492f9 6621 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6622 to = XFASTINT (end);
df7492f9 6623 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6624
df7492f9
KH
6625 setup_coding_system (coding_system, &coding);
6626 coding.mode |= CODING_MODE_LAST_BLOCK;
6627
6628 if (encodep)
6629 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6630 dst_object);
6631 else
6632 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6633 dst_object);
6634 if (! norecord)
6635 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6636
df7492f9
KH
6637 if (coding.result != CODING_RESULT_SUCCESS)
6638 error ("Code conversion error: %d", coding.result);
3a73fa5d 6639
df7492f9
KH
6640 return (BUFFERP (dst_object)
6641 ? make_number (coding.produced_char)
6642 : coding.dst_object);
4031e2bf
KH
6643}
6644
df7492f9 6645
4031e2bf 6646DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6647 3, 4, "r\nzCoding system: ",
48b0f3ae 6648 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6649When called from a program, takes four arguments:
6650 START, END, CODING-SYSTEM, and DESTINATION.
6651START and END are buffer positions.
6652
6653Optional 4th arguments DESTINATION specifies where the decoded text goes.
6654If nil, the region between START and END is replace by the decoded text.
6655If buffer, the decoded text is inserted in the buffer.
6656If t, the decoded text is returned.
6657
48b0f3ae
PJ
6658This function sets `last-coding-system-used' to the precise coding system
6659used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6660not fully specified.)
6661It returns the length of the decoded text. */)
df7492f9
KH
6662 (start, end, coding_system, destination)
6663 Lisp_Object start, end, coding_system, destination;
4031e2bf 6664{
df7492f9 6665 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6666}
6667
6668DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6669 3, 4, "r\nzCoding system: ",
6670 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6671When called from a program, takes three arguments:
6672START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6673
6674Optional 4th arguments DESTINATION specifies where the encoded text goes.
6675If nil, the region between START and END is replace by the encoded text.
6676If buffer, the encoded text is inserted in the buffer.
6677If t, the encoded text is returned.
6678
48b0f3ae
PJ
6679This function sets `last-coding-system-used' to the precise coding system
6680used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6681not fully specified.)
6682It returns the length of the encoded text. */)
df7492f9
KH
6683 (start, end, coding_system, destination)
6684 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6685{
df7492f9 6686 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6687}
3a73fa5d 6688
4031e2bf 6689Lisp_Object
df7492f9
KH
6690code_convert_string (string, coding_system, dst_object,
6691 encodep, nocopy, norecord)
6692 Lisp_Object string, coding_system, dst_object;
6693 int encodep, nocopy, norecord;
4031e2bf
KH
6694{
6695 struct coding_system coding;
df7492f9 6696 EMACS_INT chars, bytes;
3a73fa5d 6697
b7826503 6698 CHECK_STRING (string);
d46c5b12 6699 if (NILP (coding_system))
df7492f9
KH
6700 {
6701 if (! norecord)
6702 Vlast_coding_system_used = Qno_conversion;
6703 if (NILP (dst_object))
6704 return (nocopy ? Fcopy_sequence (string) : string);
6705 }
4ed46869 6706
df7492f9
KH
6707 if (NILP (coding_system))
6708 coding_system = Qno_conversion;
6709 else
6710 CHECK_CODING_SYSTEM (coding_system);
6711 if (NILP (dst_object))
6712 dst_object = Qt;
6713 else if (! EQ (dst_object, Qt))
6714 CHECK_BUFFER (dst_object);
5f1cd180 6715
df7492f9 6716 setup_coding_system (coding_system, &coding);
d46c5b12 6717 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6718 chars = XSTRING (string)->size;
6719 bytes = STRING_BYTES (XSTRING (string));
6720 if (encodep)
6721 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6722 else
6723 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6724 if (! norecord)
6725 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6726
df7492f9
KH
6727 if (coding.result != CODING_RESULT_SUCCESS)
6728 error ("Code conversion error: %d", coding.result);
4ed46869 6729
df7492f9
KH
6730 return (BUFFERP (dst_object)
6731 ? make_number (coding.produced_char)
6732 : coding.dst_object);
4ed46869
KH
6733}
6734
4031e2bf 6735
ecec61c1 6736/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6737 Do not set Vlast_coding_system_used.
6738
6739 This function is called only from macros DECODE_FILE and
6740 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6741
6742Lisp_Object
6743code_convert_string_norecord (string, coding_system, encodep)
6744 Lisp_Object string, coding_system;
6745 int encodep;
6746{
0be8721c 6747 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6748}
ecec61c1 6749
ecec61c1 6750
df7492f9
KH
6751DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6752 2, 4, 0,
6753 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6754
6755Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6756if the decoding operation is trivial.
ecec61c1 6757
df7492f9
KH
6758Optional fourth arg BUFFER non-nil meant that the decoded text is
6759inserted in BUFFER instead of returned as a astring. In this case,
6760the return value is BUFFER.
ecec61c1 6761
df7492f9
KH
6762This function sets `last-coding-system-used' to the precise coding system
6763used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6764not fully specified. */)
6765 (string, coding_system, nocopy, buffer)
6766 Lisp_Object string, coding_system, nocopy, buffer;
6767{
6768 return code_convert_string (string, coding_system, buffer,
6769 0, ! NILP (nocopy), 0);
6770}
6771
6772DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6773 2, 4, 0,
6774 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6775
6776Optional third arg NOCOPY non-nil means it is OK to return STRING
6777itself if the encoding operation is trivial.
6778
6779Optional fourth arg BUFFER non-nil meant that the encoded text is
6780inserted in BUFFER instead of returned as a astring. In this case,
6781the return value is BUFFER.
6782
6783This function sets `last-coding-system-used' to the precise coding system
6784used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6785not fully specified.) */)
6786 (string, coding_system, nocopy, buffer)
6787 Lisp_Object string, coding_system, nocopy, buffer;
6788{
6789 return code_convert_string (string, coding_system, buffer,
6790 nocopy, ! NILP (nocopy), 1);
ecec61c1 6791}
df7492f9 6792
3a73fa5d 6793\f
4ed46869 6794DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6795 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6796Return the corresponding character. */)
6797 (code)
4ed46869
KH
6798 Lisp_Object code;
6799{
df7492f9
KH
6800 Lisp_Object spec, attrs, val;
6801 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6802 int c;
6803
6804 CHECK_NATNUM (code);
6805 c = XFASTINT (code);
6806 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6807 attrs = AREF (spec, 0);
4ed46869 6808
df7492f9
KH
6809 if (ASCII_BYTE_P (c)
6810 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6811 return code;
6812
6813 val = CODING_ATTR_CHARSET_LIST (attrs);
6814 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6815 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6816 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
6817
6818 if (c <= 0x7F)
6819 charset = charset_roman;
6820 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6821 {
df7492f9
KH
6822 charset = charset_kana;
6823 c -= 0x80;
55ab7be3
KH
6824 }
6825 else
6826 {
df7492f9
KH
6827 int s1 = c >> 8, s2 = c & 0x7F;
6828
6829 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6830 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6831 error ("Invalid code: %d", code);
6832 SJIS_TO_JIS (c);
6833 charset = charset_kanji;
55ab7be3 6834 }
df7492f9
KH
6835 c = DECODE_CHAR (charset, c);
6836 if (c < 0)
6837 error ("Invalid code: %d", code);
6838 return make_number (c);
4ed46869
KH
6839}
6840
df7492f9 6841
4ed46869 6842DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6843 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6844Return the corresponding code in SJIS. */)
6845 (ch)
df7492f9 6846 Lisp_Object ch;
4ed46869 6847{
df7492f9
KH
6848 Lisp_Object spec, attrs, charset_list;
6849 int c;
6850 struct charset *charset;
6851 unsigned code;
4ed46869 6852
df7492f9
KH
6853 CHECK_CHARACTER (ch);
6854 c = XFASTINT (ch);
6855 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6856 attrs = AREF (spec, 0);
6857
6858 if (ASCII_CHAR_P (c)
6859 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6860 return ch;
6861
6862 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6863 charset = char_charset (c, charset_list, &code);
6864 if (code == CHARSET_INVALID_CODE (charset))
6865 error ("Can't encode by shift_jis encoding: %d", c);
6866 JIS_TO_SJIS (code);
6867
6868 return make_number (code);
4ed46869
KH
6869}
6870
6871DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6872 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6873Return the corresponding character. */)
6874 (code)
4ed46869
KH
6875 Lisp_Object code;
6876{
df7492f9
KH
6877 Lisp_Object spec, attrs, val;
6878 struct charset *charset_roman, *charset_big5, *charset;
6879 int c;
4ed46869 6880
df7492f9
KH
6881 CHECK_NATNUM (code);
6882 c = XFASTINT (code);
6883 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6884 attrs = AREF (spec, 0);
6885
6886 if (ASCII_BYTE_P (c)
6887 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6888 return code;
6889
6890 val = CODING_ATTR_CHARSET_LIST (attrs);
6891 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6892 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6893
6894 if (c <= 0x7F)
6895 charset = charset_roman;
c28a9453
KH
6896 else
6897 {
df7492f9
KH
6898 int b1 = c >> 8, b2 = c & 0x7F;
6899 if (b1 < 0xA1 || b1 > 0xFE
6900 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6901 error ("Invalid code: %d", code);
6902 charset = charset_big5;
c28a9453 6903 }
df7492f9
KH
6904 c = DECODE_CHAR (charset, (unsigned )c);
6905 if (c < 0)
6906 error ("Invalid code: %d", code);
6907 return make_number (c);
4ed46869
KH
6908}
6909
6910DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6911 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6912Return the corresponding character code in Big5. */)
6913 (ch)
4ed46869
KH
6914 Lisp_Object ch;
6915{
df7492f9
KH
6916 Lisp_Object spec, attrs, charset_list;
6917 struct charset *charset;
6918 int c;
6919 unsigned code;
6920
6921 CHECK_CHARACTER (ch);
6922 c = XFASTINT (ch);
6923 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6924 attrs = AREF (spec, 0);
6925 if (ASCII_CHAR_P (c)
6926 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6927 return ch;
6928
6929 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6930 charset = char_charset (c, charset_list, &code);
6931 if (code == CHARSET_INVALID_CODE (charset))
6932 error ("Can't encode by Big5 encoding: %d", c);
6933
6934 return make_number (code);
4ed46869 6935}
df7492f9 6936
3a73fa5d 6937\f
1ba9e4ab
KH
6938DEFUN ("set-terminal-coding-system-internal",
6939 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
6940 Sset_terminal_coding_system_internal, 1, 1, 0,
6941 doc: /* Internal use only. */)
6942 (coding_system)
4ed46869 6943{
b7826503 6944 CHECK_SYMBOL (coding_system);
df7492f9
KH
6945 setup_coding_system (Fcheck_coding_system (coding_system),
6946 &terminal_coding);
6947
70c22245 6948 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
6949 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
6950 /* Characer composition should be disabled. */
6951 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
6952 terminal_coding.src_multibyte = 1;
6953 terminal_coding.dst_multibyte = 0;
4ed46869
KH
6954 return Qnil;
6955}
6956
c4825358
KH
6957DEFUN ("set-safe-terminal-coding-system-internal",
6958 Fset_safe_terminal_coding_system_internal,
48b0f3ae 6959 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 6960 doc: /* Internal use only. */)
48b0f3ae 6961 (coding_system)
c4825358 6962{
b7826503 6963 CHECK_SYMBOL (coding_system);
c4825358
KH
6964 setup_coding_system (Fcheck_coding_system (coding_system),
6965 &safe_terminal_coding);
df7492f9
KH
6966 /* Characer composition should be disabled. */
6967 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
6968 safe_terminal_coding.src_multibyte = 1;
6969 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
6970 return Qnil;
6971}
6972
4ed46869
KH
6973DEFUN ("terminal-coding-system",
6974 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
6975 doc: /* Return coding system specified for terminal output. */)
6976 ()
4ed46869 6977{
df7492f9 6978 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
6979}
6980
1ba9e4ab
KH
6981DEFUN ("set-keyboard-coding-system-internal",
6982 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
6983 Sset_keyboard_coding_system_internal, 1, 1, 0,
6984 doc: /* Internal use only. */)
6985 (coding_system)
4ed46869
KH
6986 Lisp_Object coding_system;
6987{
b7826503 6988 CHECK_SYMBOL (coding_system);
df7492f9
KH
6989 setup_coding_system (Fcheck_coding_system (coding_system),
6990 &keyboard_coding);
6991 /* Characer composition should be disabled. */
6992 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
6993 return Qnil;
6994}
6995
6996DEFUN ("keyboard-coding-system",
6997 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
6998 doc: /* Return coding system specified for decoding keyboard input. */)
6999 ()
4ed46869 7000{
df7492f9 7001 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7002}
7003
7004\f
a5d301df
KH
7005DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7006 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7007 doc: /* Choose a coding system for an operation based on the target name.
7008The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7009DECODING-SYSTEM is the coding system to use for decoding
7010\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7011for encoding (in case OPERATION does encoding).
7012
7013The first argument OPERATION specifies an I/O primitive:
7014 For file I/O, `insert-file-contents' or `write-region'.
7015 For process I/O, `call-process', `call-process-region', or `start-process'.
7016 For network I/O, `open-network-stream'.
7017
7018The remaining arguments should be the same arguments that were passed
7019to the primitive. Depending on which primitive, one of those arguments
7020is selected as the TARGET. For example, if OPERATION does file I/O,
7021whichever argument specifies the file name is TARGET.
7022
7023TARGET has a meaning which depends on OPERATION:
7024 For file I/O, TARGET is a file name.
7025 For process I/O, TARGET is a process name.
7026 For network I/O, TARGET is a service name or a port number
7027
7028This function looks up what specified for TARGET in,
7029`file-coding-system-alist', `process-coding-system-alist',
7030or `network-coding-system-alist' depending on OPERATION.
7031They may specify a coding system, a cons of coding systems,
7032or a function symbol to call.
7033In the last case, we call the function with one argument,
7034which is a list of all the arguments given to this function.
7035
7036usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7037 (nargs, args)
4ed46869
KH
7038 int nargs;
7039 Lisp_Object *args;
7040{
7041 Lisp_Object operation, target_idx, target, val;
7042 register Lisp_Object chain;
7043
7044 if (nargs < 2)
7045 error ("Too few arguments");
7046 operation = args[0];
7047 if (!SYMBOLP (operation)
7048 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7049 error ("Invalid first arguement");
4ed46869
KH
7050 if (nargs < 1 + XINT (target_idx))
7051 error ("Too few arguments for operation: %s",
7052 XSYMBOL (operation)->name->data);
7053 target = args[XINT (target_idx) + 1];
7054 if (!(STRINGP (target)
7055 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7056 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7057
2e34157c
RS
7058 chain = ((EQ (operation, Qinsert_file_contents)
7059 || EQ (operation, Qwrite_region))
02ba4723 7060 ? Vfile_coding_system_alist
2e34157c 7061 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7062 ? Vnetwork_coding_system_alist
7063 : Vprocess_coding_system_alist));
4ed46869
KH
7064 if (NILP (chain))
7065 return Qnil;
7066
03699b14 7067 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7068 {
f44d27ce 7069 Lisp_Object elt;
4ed46869 7070
df7492f9 7071 elt = XCAR (chain);
4ed46869
KH
7072 if (CONSP (elt)
7073 && ((STRINGP (target)
03699b14
KR
7074 && STRINGP (XCAR (elt))
7075 && fast_string_match (XCAR (elt), target) >= 0)
7076 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7077 {
03699b14 7078 val = XCDR (elt);
b19fd4c5
KH
7079 /* Here, if VAL is both a valid coding system and a valid
7080 function symbol, we return VAL as a coding system. */
02ba4723
KH
7081 if (CONSP (val))
7082 return val;
7083 if (! SYMBOLP (val))
7084 return Qnil;
7085 if (! NILP (Fcoding_system_p (val)))
7086 return Fcons (val, val);
b19fd4c5
KH
7087 if (! NILP (Ffboundp (val)))
7088 {
7089 val = call1 (val, Flist (nargs, args));
7090 if (CONSP (val))
7091 return val;
7092 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7093 return Fcons (val, val);
7094 }
02ba4723
KH
7095 return Qnil;
7096 }
4ed46869
KH
7097 }
7098 return Qnil;
7099}
7100
df7492f9
KH
7101DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7102 Sset_coding_system_priority, 1, MANY, 0,
7103 doc: /* Put higher priority to coding systems of the arguments. */)
7104 (nargs, args)
7105 int nargs;
7106 Lisp_Object *args;
7107{
7108 int i, j;
7109 int changed[coding_category_max];
7110 enum coding_category priorities[coding_category_max];
7111
7112 bzero (changed, sizeof changed);
7113
7114 for (i = j = 0; i < nargs; i++)
7115 {
7116 enum coding_category category;
7117 Lisp_Object spec, attrs;
7118
7119 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7120 attrs = AREF (spec, 0);
7121 category = XINT (CODING_ATTR_CATEGORY (attrs));
7122 if (changed[category])
7123 /* Ignore this coding system because a coding system of the
7124 same category already had a higher priority. */
7125 continue;
7126 changed[category] = 1;
7127 priorities[j++] = category;
7128 if (coding_categories[category].id >= 0
7129 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7130 setup_coding_system (args[i], &coding_categories[category]);
7131 }
7132
7133 /* Now we have decided top J priorities. Reflect the order of the
7134 original priorities to the remaining priorities. */
7135
7136 for (i = j, j = 0; i < coding_category_max; i++, j++)
7137 {
7138 while (j < coding_category_max
7139 && changed[coding_priorities[j]])
7140 j++;
7141 if (j == coding_category_max)
7142 abort ();
7143 priorities[i] = coding_priorities[j];
7144 }
7145
7146 bcopy (priorities, coding_priorities, sizeof priorities);
7147 return Qnil;
7148}
7149
7150DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7151 Scoding_system_priority_list, 0, 1, 0,
7152 doc: /* Return a list of coding systems ordered by their priorities. */)
7153 (highestp)
7154 Lisp_Object highestp;
d46c5b12
KH
7155{
7156 int i;
df7492f9 7157 Lisp_Object val;
d46c5b12 7158
df7492f9 7159 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7160 {
df7492f9
KH
7161 enum coding_category category = coding_priorities[i];
7162 int id = coding_categories[category].id;
7163 Lisp_Object attrs;
7164
7165 if (id < 0)
7166 continue;
7167 attrs = CODING_ID_ATTRS (id);
7168 if (! NILP (highestp))
7169 return CODING_ATTR_BASE_NAME (attrs);
7170 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7171 }
7172 return Fnreverse (val);
7173}
7174
7175static Lisp_Object
7176make_subsidiaries (base)
7177 Lisp_Object base;
7178{
7179 Lisp_Object subsidiaries;
7180 char *suffixes[] = { "-unix", "-dos", "-mac" };
7181 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7182 char *buf = (char *) alloca (base_name_len + 6);
7183 int i;
7184
7185 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7186 subsidiaries = Fmake_vector (make_number (3), Qnil);
7187 for (i = 0; i < 3; i++)
7188 {
7189 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7190 ASET (subsidiaries, i, intern (buf));
7191 }
7192 return subsidiaries;
7193}
7194
7195
7196DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7197 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7198 doc: /* For internal use only. */)
7199 (nargs, args)
7200 int nargs;
7201 Lisp_Object *args;
7202{
7203 Lisp_Object name;
7204 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7205 Lisp_Object attrs; /* Vector of attributes. */
7206 Lisp_Object eol_type;
7207 Lisp_Object aliases;
7208 Lisp_Object coding_type, charset_list, safe_charsets;
7209 enum coding_category category;
7210 Lisp_Object tail, val;
7211 int max_charset_id = 0;
7212 int i;
7213
7214 if (nargs < coding_arg_max)
7215 goto short_args;
7216
7217 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7218
7219 name = args[coding_arg_name];
7220 CHECK_SYMBOL (name);
7221 CODING_ATTR_BASE_NAME (attrs) = name;
7222
7223 val = args[coding_arg_mnemonic];
7224 if (! STRINGP (val))
7225 CHECK_CHARACTER (val);
7226 CODING_ATTR_MNEMONIC (attrs) = val;
7227
7228 coding_type = args[coding_arg_coding_type];
7229 CHECK_SYMBOL (coding_type);
7230 CODING_ATTR_TYPE (attrs) = coding_type;
7231
7232 charset_list = args[coding_arg_charset_list];
7233 if (SYMBOLP (charset_list))
7234 {
7235 if (EQ (charset_list, Qiso_2022))
7236 {
7237 if (! EQ (coding_type, Qiso_2022))
7238 error ("Invalid charset-list");
7239 charset_list = Viso_2022_charset_list;
7240 }
7241 else if (EQ (charset_list, Qemacs_mule))
7242 {
7243 if (! EQ (coding_type, Qemacs_mule))
7244 error ("Invalid charset-list");
7245 charset_list = Vemacs_mule_charset_list;
7246 }
7247 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7248 if (max_charset_id < XFASTINT (XCAR (tail)))
7249 max_charset_id = XFASTINT (XCAR (tail));
7250 }
7251 else
7252 {
7253 charset_list = Fcopy_sequence (charset_list);
7254 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7255 {
7256 struct charset *charset;
7257
7258 val = Fcar (tail);
7259 CHECK_CHARSET_GET_CHARSET (val, charset);
7260 if (EQ (coding_type, Qiso_2022)
7261 ? CHARSET_ISO_FINAL (charset) < 0
7262 : EQ (coding_type, Qemacs_mule)
7263 ? CHARSET_EMACS_MULE_ID (charset) < 0
7264 : 0)
7265 error ("Can't handle charset `%s'",
7266 XSYMBOL (CHARSET_NAME (charset))->name->data);
7267
7268 XCAR (tail) = make_number (charset->id);
7269 if (max_charset_id < charset->id)
7270 max_charset_id = charset->id;
7271 }
7272 }
7273 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7274
7275 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7276 make_number (255));
7277 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7278 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7279 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7280
7281 val = args[coding_arg_decode_translation_table];
7282 if (! NILP (val))
7283 CHECK_CHAR_TABLE (val);
7284 CODING_ATTR_DECODE_TBL (attrs) = val;
7285
7286 val = args[coding_arg_encode_translation_table];
7287 if (! NILP (val))
7288 CHECK_CHAR_TABLE (val);
7289 CODING_ATTR_ENCODE_TBL (attrs) = val;
7290
7291 val = args[coding_arg_post_read_conversion];
7292 CHECK_SYMBOL (val);
7293 CODING_ATTR_POST_READ (attrs) = val;
7294
7295 val = args[coding_arg_pre_write_conversion];
7296 CHECK_SYMBOL (val);
7297 CODING_ATTR_PRE_WRITE (attrs) = val;
7298
7299 val = args[coding_arg_default_char];
7300 if (NILP (val))
7301 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7302 else
7303 {
7304 CHECK_CHARACTER (val);
7305 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7306 }
7307
7308 val = args[coding_arg_plist];
7309 CHECK_LIST (val);
7310 CODING_ATTR_PLIST (attrs) = val;
7311
7312 if (EQ (coding_type, Qcharset))
7313 {
7314 val = Fmake_vector (make_number (256), Qnil);
7315
7316 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7317 {
7318 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7319
7320 for (i = charset->code_space[0]; i <= charset->code_space[1]; i++)
7321 if (NILP (AREF (val, i)))
7322 ASET (val, i, XCAR (tail));
7323 }
7324 ASET (attrs, coding_attr_charset_valids, val);
7325 category = coding_category_charset;
7326 }
7327 else if (EQ (coding_type, Qccl))
7328 {
7329 Lisp_Object valids;
7330
7331 if (nargs < coding_arg_ccl_max)
7332 goto short_args;
7333
7334 val = args[coding_arg_ccl_decoder];
7335 CHECK_CCL_PROGRAM (val);
7336 if (VECTORP (val))
7337 val = Fcopy_sequence (val);
7338 ASET (attrs, coding_attr_ccl_decoder, val);
7339
7340 val = args[coding_arg_ccl_encoder];
7341 CHECK_CCL_PROGRAM (val);
7342 if (VECTORP (val))
7343 val = Fcopy_sequence (val);
7344 ASET (attrs, coding_attr_ccl_encoder, val);
7345
7346 val = args[coding_arg_ccl_valids];
7347 valids = Fmake_string (make_number (256), make_number (0));
7348 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7349 {
7350 val = Fcar (tail);
7351 if (INTEGERP (val))
7352 ASET (valids, XINT (val), 1);
7353 else
7354 {
7355 int from, to;
7356
7357 CHECK_CONS (val);
7358 CHECK_NUMBER (XCAR (val));
7359 CHECK_NUMBER (XCDR (val));
7360 from = XINT (XCAR (val));
7361 to = XINT (XCDR (val));
7362 for (i = from; i <= to; i++)
7363 ASET (valids, i, 1);
7364 }
7365 }
7366 ASET (attrs, coding_attr_ccl_valids, valids);
7367
7368 category = coding_category_ccl;
7369 }
7370 else if (EQ (coding_type, Qutf_16))
7371 {
7372 Lisp_Object bom, endian;
7373
7374 if (nargs < coding_arg_utf16_max)
7375 goto short_args;
7376
7377 bom = args[coding_arg_utf16_bom];
7378 if (! NILP (bom) && ! EQ (bom, Qt))
7379 {
7380 CHECK_CONS (bom);
7381 CHECK_CODING_SYSTEM (XCAR (bom));
7382 CHECK_CODING_SYSTEM (XCDR (bom));
7383 }
7384 ASET (attrs, coding_attr_utf_16_bom, bom);
7385
7386 endian = args[coding_arg_utf16_endian];
7387 ASET (attrs, coding_attr_utf_16_endian, endian);
7388
7389 category = (CONSP (bom)
7390 ? coding_category_utf_16_auto
7391 : NILP (bom)
7392 ? (NILP (endian)
7393 ? coding_category_utf_16_be_nosig
7394 : coding_category_utf_16_le_nosig)
7395 : (NILP (endian)
7396 ? coding_category_utf_16_be
7397 : coding_category_utf_16_le));
7398 }
7399 else if (EQ (coding_type, Qiso_2022))
7400 {
7401 Lisp_Object initial, reg_usage, request, flags;
7402 struct charset *charset;
0be8721c 7403 int i, id;
1397dc18 7404
df7492f9
KH
7405 if (nargs < coding_arg_iso2022_max)
7406 goto short_args;
7407
7408 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7409 CHECK_VECTOR (initial);
7410 for (i = 0; i < 4; i++)
7411 {
7412 val = Faref (initial, make_number (i));
7413 if (! NILP (val))
7414 {
7415 CHECK_CHARSET_GET_ID (val, id);
7416 ASET (initial, i, make_number (id));
7417 }
7418 else
7419 ASET (initial, i, make_number (-1));
7420 }
7421
7422 reg_usage = args[coding_arg_iso2022_reg_usage];
7423 CHECK_CONS (reg_usage);
7424 CHECK_NATNUM (XCAR (reg_usage));
7425 CHECK_NATNUM (XCDR (reg_usage));
7426
7427 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7428 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7429 {
df7492f9
KH
7430 int id;
7431
7432 val = Fcar (tail);
7433 CHECK_CONS (val);
7434 CHECK_CHARSET_GET_ID (XCAR (val), id);
7435 CHECK_NATNUM (XCDR (val));
7436 if (XINT (XCDR (val)) >= 4)
7437 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7438 XCAR (val) = make_number (id);
1397dc18 7439 }
df7492f9
KH
7440
7441 flags = args[coding_arg_iso2022_flags];
7442 CHECK_NATNUM (flags);
7443 i = XINT (flags);
7444 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7445 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7446
7447 ASET (attrs, coding_attr_iso_initial, initial);
7448 ASET (attrs, coding_attr_iso_usage, reg_usage);
7449 ASET (attrs, coding_attr_iso_request, request);
7450 ASET (attrs, coding_attr_iso_flags, flags);
7451 setup_iso_safe_charsets (attrs);
7452
7453 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7454 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7455 | CODING_ISO_FLAG_SINGLE_SHIFT))
7456 ? coding_category_iso_7_else
7457 : EQ (args[coding_arg_charset_list], Qiso_2022)
7458 ? coding_category_iso_7
7459 : coding_category_iso_7_tight);
7460 else
7461 {
7462 int id = XINT (AREF (initial, 1));
7463
7464 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7465 | CODING_ISO_FLAG_SINGLE_SHIFT))
7466 || EQ (args[coding_arg_charset_list], Qiso_2022)
7467 || id < 0)
7468 ? coding_category_iso_8_else
7469 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7470 ? coding_category_iso_8_1
7471 : coding_category_iso_8_2);
7472 }
7473 }
7474 else if (EQ (coding_type, Qemacs_mule))
7475 {
7476 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7477 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7478
7479 category = coding_category_emacs_mule;
7480 }
7481 else if (EQ (coding_type, Qshift_jis))
7482 {
7483
7484 struct charset *charset;
7485
7486 if (XINT (Flength (charset_list)) != 3)
7487 error ("There should be just three charsets");
7488
7489 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7490 if (CHARSET_DIMENSION (charset) != 1)
7491 error ("Dimension of charset %s is not one",
7492 XSYMBOL (CHARSET_NAME (charset))->name->data);
7493
7494 charset_list = XCDR (charset_list);
7495 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7496 if (CHARSET_DIMENSION (charset) != 1)
7497 error ("Dimension of charset %s is not one",
7498 XSYMBOL (CHARSET_NAME (charset))->name->data);
7499
7500 charset_list = XCDR (charset_list);
7501 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7502 if (CHARSET_DIMENSION (charset) != 2)
7503 error ("Dimension of charset %s is not two",
7504 XSYMBOL (CHARSET_NAME (charset))->name->data);
7505
7506 category = coding_category_sjis;
7507 Vsjis_coding_system = name;
7508 }
7509 else if (EQ (coding_type, Qbig5))
7510 {
7511 struct charset *charset;
7512
7513 if (XINT (Flength (charset_list)) != 2)
7514 error ("There should be just two charsets");
7515
7516 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7517 if (CHARSET_DIMENSION (charset) != 1)
7518 error ("Dimension of charset %s is not one",
7519 XSYMBOL (CHARSET_NAME (charset))->name->data);
7520
7521 charset_list = XCDR (charset_list);
7522 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7523 if (CHARSET_DIMENSION (charset) != 2)
7524 error ("Dimension of charset %s is not two",
7525 XSYMBOL (CHARSET_NAME (charset))->name->data);
7526
7527 category = coding_category_big5;
7528 Vbig5_coding_system = name;
7529 }
7530 else if (EQ (coding_type, Qraw_text))
7531 category = coding_category_raw_text;
7532 else if (EQ (coding_type, Qutf_8))
7533 category = coding_category_utf_8;
7534 else if (EQ (coding_type, Qundecided))
7535 category = coding_category_undecided;
7536 else
7537 error ("Invalid coding system type: %s",
7538 XSYMBOL (coding_type)->name->data);
7539
7540 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7541
7542 eol_type = args[coding_arg_eol_type];
7543 if (! NILP (eol_type)
7544 && ! EQ (eol_type, Qunix)
7545 && ! EQ (eol_type, Qdos)
7546 && ! EQ (eol_type, Qmac))
7547 error ("Invalid eol-type");
7548
7549 aliases = Fcons (name, Qnil);
7550
7551 if (NILP (eol_type))
7552 {
7553 eol_type = make_subsidiaries (name);
7554 for (i = 0; i < 3; i++)
1397dc18 7555 {
df7492f9
KH
7556 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7557
7558 this_name = AREF (eol_type, i);
7559 this_aliases = Fcons (this_name, Qnil);
7560 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7561 this_spec = Fmake_vector (make_number (3), attrs);
7562 ASET (this_spec, 1, this_aliases);
7563 ASET (this_spec, 2, this_eol_type);
7564 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7565 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7566 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7567 Vcoding_system_alist);
1397dc18 7568 }
d46c5b12 7569 }
1397dc18 7570
df7492f9
KH
7571 spec_vec = Fmake_vector (make_number (3), attrs);
7572 ASET (spec_vec, 1, aliases);
7573 ASET (spec_vec, 2, eol_type);
7574
7575 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7576 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7577 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7578 Vcoding_system_alist);
7579
7580 {
7581 int id = coding_categories[category].id;
7582
7583 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7584 setup_coding_system (name, &coding_categories[category]);
7585 }
7586
d46c5b12 7587 return Qnil;
df7492f9
KH
7588
7589 short_args:
7590 return Fsignal (Qwrong_number_of_arguments,
7591 Fcons (intern ("define-coding-system-internal"),
7592 make_number (nargs)));
d46c5b12
KH
7593}
7594
df7492f9
KH
7595DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7596 Sdefine_coding_system_alias, 2, 2, 0,
7597 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7598 (alias, coding_system)
7599 Lisp_Object alias, coding_system;
66cfb530 7600{
df7492f9 7601 Lisp_Object spec, aliases, eol_type;
84d60297 7602
df7492f9
KH
7603 CHECK_SYMBOL (alias);
7604 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7605 aliases = AREF (spec, 1);
7606 while (!NILP (XCDR (aliases)))
7607 aliases = XCDR (aliases);
7608 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7609
df7492f9
KH
7610 eol_type = AREF (spec, 2);
7611 if (VECTORP (eol_type))
66cfb530 7612 {
df7492f9
KH
7613 Lisp_Object subsidiaries;
7614 int i;
7615
7616 subsidiaries = make_subsidiaries (alias);
7617 for (i = 0; i < 3; i++)
7618 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7619 AREF (eol_type, i));
7620
7621 ASET (spec, 2, subsidiaries);
66cfb530 7622 }
df7492f9
KH
7623
7624 Fputhash (alias, spec, Vcoding_system_hash_table);
2c78b7e1 7625 Vcoding_system_alist = Fcons (Fcons (alias, Qnil), Vcoding_system_alist);
66cfb530
KH
7626
7627 return Qnil;
7628}
7629
df7492f9
KH
7630DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7631 1, 1, 0,
7632 doc: /* Return the base of CODING-SYSTEM.
7633Any alias or subsidiary coding systems are not base coding system. */)
7634 (coding_system)
7635 Lisp_Object coding_system;
7636{
7637 Lisp_Object spec, attrs;
7638
7639 if (NILP (coding_system))
7640 return (Qno_conversion);
7641 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7642 attrs = AREF (spec, 0);
7643 return CODING_ATTR_BASE_NAME (attrs);
7644}
7645
7646DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7647 1, 1, 0,
7648 doc: "Return the property list of CODING-SYSTEM.")
7649 (coding_system)
7650 Lisp_Object coding_system;
7651{
7652 Lisp_Object spec, attrs;
7653
7654 if (NILP (coding_system))
7655 coding_system = Qno_conversion;
7656 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7657 attrs = AREF (spec, 0);
7658 return CODING_ATTR_PLIST (attrs);
7659}
7660
7661
7662DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7663 1, 1, 0,
7664 doc: /* Return the list of aliases of CODING-SYSTEM.
7665A base coding system is what made by `define-coding-system'.
7666Any alias nor subsidiary coding systems are not base coding system. */)
7667 (coding_system)
7668 Lisp_Object coding_system;
7669{
7670 Lisp_Object spec;
7671
7672 if (NILP (coding_system))
7673 coding_system = Qno_conversion;
7674 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7675 return AREF (spec, 2);
7676}
7677
7678DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7679 Scoding_system_eol_type, 1, 1, 0,
7680 doc: /* Return eol-type of CODING-SYSTEM.
7681An eol-type is integer 0, 1, 2, or a vector of coding systems.
7682
7683Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7684and CR respectively.
7685
7686A vector value indicates that a format of end-of-line should be
7687detected automatically. Nth element of the vector is the subsidiary
7688coding system whose eol-type is N. */)
7689 (coding_system)
7690 Lisp_Object coding_system;
7691{
7692 Lisp_Object spec, eol_type;
7693 int n;
7694
7695 if (NILP (coding_system))
7696 coding_system = Qno_conversion;
7697 if (! CODING_SYSTEM_P (coding_system))
7698 return Qnil;
7699 spec = CODING_SYSTEM_SPEC (coding_system);
7700 eol_type = AREF (spec, 2);
7701 if (VECTORP (eol_type))
7702 return Fcopy_sequence (eol_type);
7703 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7704 return make_number (n);
7705}
7706
4ed46869
KH
7707#endif /* emacs */
7708
7709\f
1397dc18 7710/*** 9. Post-amble ***/
4ed46869 7711
dfcf069d 7712void
4ed46869
KH
7713init_coding_once ()
7714{
7715 int i;
7716
df7492f9
KH
7717 for (i = 0; i < coding_category_max; i++)
7718 {
7719 coding_categories[i].id = -1;
7720 coding_priorities[i] = i;
7721 }
4ed46869
KH
7722
7723 /* ISO2022 specific initialize routine. */
7724 for (i = 0; i < 0x20; i++)
b73bfc1c 7725 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7726 for (i = 0x21; i < 0x7F; i++)
7727 iso_code_class[i] = ISO_graphic_plane_0;
7728 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7729 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7730 for (i = 0xA1; i < 0xFF; i++)
7731 iso_code_class[i] = ISO_graphic_plane_1;
7732 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7733 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7734 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7735 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7736 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7737 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7738 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7739 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7740 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7741 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7742
b843d1ae 7743 inhibit_pre_post_conversion = 0;
df7492f9
KH
7744
7745 for (i = 0; i < 256; i++)
7746 {
7747 emacs_mule_bytes[i] = 1;
7748 }
e0e989f6
KH
7749}
7750
7751#ifdef emacs
7752
dfcf069d 7753void
e0e989f6
KH
7754syms_of_coding ()
7755{
df7492f9
KH
7756 staticpro (&Vcoding_system_hash_table);
7757 Vcoding_system_hash_table = Fmakehash (Qeq);
7758
7759 staticpro (&Vsjis_coding_system);
7760 Vsjis_coding_system = Qnil;
7761
7762 staticpro (&Vbig5_coding_system);
7763 Vbig5_coding_system = Qnil;
7764
7765 staticpro (&Vcode_conversion_work_buf_list);
7766 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7767
df7492f9
KH
7768 staticpro (&Vcode_conversion_reused_work_buf);
7769 Vcode_conversion_reused_work_buf = Qnil;
7770
7771 DEFSYM (Qcharset, "charset");
7772 DEFSYM (Qtarget_idx, "target-idx");
7773 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7774 Fset (Qcoding_system_history, Qnil);
7775
9ce27fde 7776 /* Target FILENAME is the first argument. */
e0e989f6 7777 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7778 /* Target FILENAME is the third argument. */
e0e989f6
KH
7779 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7780
df7492f9 7781 DEFSYM (Qcall_process, "call-process");
9ce27fde 7782 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7783 Fput (Qcall_process, Qtarget_idx, make_number (0));
7784
df7492f9 7785 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7786 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7787 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7788
df7492f9 7789 DEFSYM (Qstart_process, "start-process");
9ce27fde 7790 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7791 Fput (Qstart_process, Qtarget_idx, make_number (2));
7792
df7492f9 7793 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 7794 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7795 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7796
df7492f9
KH
7797 DEFSYM (Qcoding_system, "coding-system");
7798 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 7799
df7492f9
KH
7800 DEFSYM (Qeol_type, "eol-type");
7801 DEFSYM (Qunix, "unix");
7802 DEFSYM (Qdos, "dos");
7803 DEFSYM (Qmac, "mac");
4ed46869 7804
df7492f9
KH
7805 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7806 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7807 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7808 DEFSYM (Qdefault_char, "default-char");
7809 DEFSYM (Qundecided, "undecided");
7810 DEFSYM (Qno_conversion, "no-conversion");
7811 DEFSYM (Qraw_text, "raw-text");
4ed46869 7812
df7492f9 7813 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 7814
df7492f9 7815 DEFSYM (Qutf_8, "utf-8");
27901516 7816
df7492f9
KH
7817 DEFSYM (Qutf_16, "utf-16");
7818 DEFSYM (Qutf_16_be, "utf-16-be");
7819 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7820 DEFSYM (Qutf_16_le, "utf-16-l3");
7821 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7822 DEFSYM (Qsignature, "signature");
7823 DEFSYM (Qendian, "endian");
7824 DEFSYM (Qbig, "big");
7825 DEFSYM (Qlittle, "little");
27901516 7826
df7492f9
KH
7827 DEFSYM (Qshift_jis, "shift-jis");
7828 DEFSYM (Qbig5, "big5");
4ed46869 7829
df7492f9 7830 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 7831
df7492f9 7832 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
7833 Fput (Qcoding_system_error, Qerror_conditions,
7834 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7835 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7836 build_string ("Invalid coding system"));
4ed46869 7837
df7492f9
KH
7838 /* Intern this now in case it isn't already done.
7839 Setting this variable twice is harmless.
7840 But don't staticpro it here--that is done in alloc.c. */
7841 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 7842
df7492f9 7843 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 7844 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
7845 DEFSYM (Qtranslation_table_id, "translation-table-id");
7846 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7847 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 7848
df7492f9 7849 DEFSYM (Qchar_coding_system, "char-coding-system");
84fbb8a0 7850
df7492f9 7851 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
a5d301df 7852
df7492f9 7853 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 7854
df7492f9 7855 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 7856
df7492f9
KH
7857 Vcoding_category_table
7858 = Fmake_vector (make_number (coding_category_max), Qnil);
7859 staticpro (&Vcoding_category_table);
7860 /* Followings are target of code detection. */
7861 ASET (Vcoding_category_table, coding_category_iso_7,
7862 intern ("coding-category-iso-7"));
7863 ASET (Vcoding_category_table, coding_category_iso_7_tight,
7864 intern ("coding-category-iso-7-tight"));
7865 ASET (Vcoding_category_table, coding_category_iso_8_1,
7866 intern ("coding-category-iso-8-1"));
7867 ASET (Vcoding_category_table, coding_category_iso_8_2,
7868 intern ("coding-category-iso-8-2"));
7869 ASET (Vcoding_category_table, coding_category_iso_7_else,
7870 intern ("coding-category-iso-7-else"));
7871 ASET (Vcoding_category_table, coding_category_iso_8_else,
7872 intern ("coding-category-iso-8-else"));
7873 ASET (Vcoding_category_table, coding_category_utf_8,
7874 intern ("coding-category-utf-8"));
7875 ASET (Vcoding_category_table, coding_category_utf_16_be,
7876 intern ("coding-category-utf-16-be"));
7877 ASET (Vcoding_category_table, coding_category_utf_16_le,
7878 intern ("coding-category-utf-16-le"));
7879 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
7880 intern ("coding-category-utf-16-be-nosig"));
7881 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
7882 intern ("coding-category-utf-16-le-nosig"));
7883 ASET (Vcoding_category_table, coding_category_charset,
7884 intern ("coding-category-charset"));
7885 ASET (Vcoding_category_table, coding_category_sjis,
7886 intern ("coding-category-sjis"));
7887 ASET (Vcoding_category_table, coding_category_big5,
7888 intern ("coding-category-big5"));
7889 ASET (Vcoding_category_table, coding_category_ccl,
7890 intern ("coding-category-ccl"));
7891 ASET (Vcoding_category_table, coding_category_emacs_mule,
7892 intern ("coding-category-emacs-mule"));
7893 /* Followings are NOT target of code detection. */
7894 ASET (Vcoding_category_table, coding_category_raw_text,
7895 intern ("coding-category-raw-text"));
7896 ASET (Vcoding_category_table, coding_category_undecided,
7897 intern ("coding-category-undecided"));
70c22245 7898
4ed46869
KH
7899 defsubr (&Scoding_system_p);
7900 defsubr (&Sread_coding_system);
7901 defsubr (&Sread_non_nil_coding_system);
7902 defsubr (&Scheck_coding_system);
7903 defsubr (&Sdetect_coding_region);
d46c5b12 7904 defsubr (&Sdetect_coding_string);
05e6f5dc 7905 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 7906 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
7907 defsubr (&Sdecode_coding_region);
7908 defsubr (&Sencode_coding_region);
7909 defsubr (&Sdecode_coding_string);
7910 defsubr (&Sencode_coding_string);
7911 defsubr (&Sdecode_sjis_char);
7912 defsubr (&Sencode_sjis_char);
7913 defsubr (&Sdecode_big5_char);
7914 defsubr (&Sencode_big5_char);
1ba9e4ab 7915 defsubr (&Sset_terminal_coding_system_internal);
c4825358 7916 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 7917 defsubr (&Sterminal_coding_system);
1ba9e4ab 7918 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 7919 defsubr (&Skeyboard_coding_system);
a5d301df 7920 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
7921 defsubr (&Sset_coding_system_priority);
7922 defsubr (&Sdefine_coding_system_internal);
7923 defsubr (&Sdefine_coding_system_alias);
7924 defsubr (&Scoding_system_base);
7925 defsubr (&Scoding_system_plist);
7926 defsubr (&Scoding_system_aliases);
7927 defsubr (&Scoding_system_eol_type);
7928 defsubr (&Scoding_system_priority_list);
4ed46869 7929
4608c386 7930 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
7931 doc: /* List of coding systems.
7932
7933Do not alter the value of this variable manually. This variable should be
df7492f9 7934updated by the functions `define-coding-system' and
48b0f3ae 7935`define-coding-system-alias'. */);
4608c386
KH
7936 Vcoding_system_list = Qnil;
7937
7938 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
7939 doc: /* Alist of coding system names.
7940Each element is one element list of coding system name.
7941This variable is given to `completing-read' as TABLE argument.
7942
7943Do not alter the value of this variable manually. This variable should be
7944updated by the functions `make-coding-system' and
7945`define-coding-system-alias'. */);
4608c386
KH
7946 Vcoding_system_alist = Qnil;
7947
4ed46869 7948 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
7949 doc: /* List of coding-categories (symbols) ordered by priority.
7950
7951On detecting a coding system, Emacs tries code detection algorithms
7952associated with each coding-category one by one in this order. When
7953one algorithm agrees with a byte sequence of source text, the coding
7954system bound to the corresponding coding-category is selected. */);
4ed46869
KH
7955 {
7956 int i;
7957
7958 Vcoding_category_list = Qnil;
df7492f9 7959 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 7960 Vcoding_category_list
d46c5b12
KH
7961 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7962 Vcoding_category_list);
4ed46869
KH
7963 }
7964
7965 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
7966 doc: /* Specify the coding system for read operations.
7967It is useful to bind this variable with `let', but do not set it globally.
7968If the value is a coding system, it is used for decoding on read operation.
7969If not, an appropriate element is used from one of the coding system alists:
7970There are three such tables, `file-coding-system-alist',
7971`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
7972 Vcoding_system_for_read = Qnil;
7973
7974 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
7975 doc: /* Specify the coding system for write operations.
7976Programs bind this variable with `let', but you should not set it globally.
7977If the value is a coding system, it is used for encoding of output,
7978when writing it to a file and when sending it to a file or subprocess.
7979
7980If this does not specify a coding system, an appropriate element
7981is used from one of the coding system alists:
7982There are three such tables, `file-coding-system-alist',
7983`process-coding-system-alist', and `network-coding-system-alist'.
7984For output to files, if the above procedure does not specify a coding system,
7985the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
7986 Vcoding_system_for_write = Qnil;
7987
7988 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
7989 doc: /*
7990Coding system used in the latest file or process I/O. */);
4ed46869
KH
7991 Vlast_coding_system_used = Qnil;
7992
9ce27fde 7993 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
7994 doc: /*
7995*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
7996See info node `Coding Systems' and info node `Text and Binary' concerning
7997such conversion. */);
9ce27fde
KH
7998 inhibit_eol_conversion = 0;
7999
ed29121d 8000 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8001 doc: /*
8002Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8003Bind it to t if the process output is to be treated as if it were a file
8004read from some filesystem. */);
ed29121d
EZ
8005 inherit_process_coding_system = 0;
8006
02ba4723 8007 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8008 doc: /*
8009Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8010The format is ((PATTERN . VAL) ...),
8011where PATTERN is a regular expression matching a file name,
8012VAL is a coding system, a cons of coding systems, or a function symbol.
8013If VAL is a coding system, it is used for both decoding and encoding
8014the file contents.
8015If VAL is a cons of coding systems, the car part is used for decoding,
8016and the cdr part is used for encoding.
8017If VAL is a function symbol, the function must return a coding system
0192762c
DL
8018or a cons of coding systems which are used as above. The function gets
8019the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8020
8021See also the function `find-operation-coding-system'
8022and the variable `auto-coding-alist'. */);
02ba4723
KH
8023 Vfile_coding_system_alist = Qnil;
8024
8025 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8026 doc: /*
8027Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8028The format is ((PATTERN . VAL) ...),
8029where PATTERN is a regular expression matching a program name,
8030VAL is a coding system, a cons of coding systems, or a function symbol.
8031If VAL is a coding system, it is used for both decoding what received
8032from the program and encoding what sent to the program.
8033If VAL is a cons of coding systems, the car part is used for decoding,
8034and the cdr part is used for encoding.
8035If VAL is a function symbol, the function must return a coding system
8036or a cons of coding systems which are used as above.
8037
8038See also the function `find-operation-coding-system'. */);
02ba4723
KH
8039 Vprocess_coding_system_alist = Qnil;
8040
8041 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8042 doc: /*
8043Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8044The format is ((PATTERN . VAL) ...),
8045where PATTERN is a regular expression matching a network service name
8046or is a port number to connect to,
8047VAL is a coding system, a cons of coding systems, or a function symbol.
8048If VAL is a coding system, it is used for both decoding what received
8049from the network stream and encoding what sent to the network stream.
8050If VAL is a cons of coding systems, the car part is used for decoding,
8051and the cdr part is used for encoding.
8052If VAL is a function symbol, the function must return a coding system
8053or a cons of coding systems which are used as above.
8054
8055See also the function `find-operation-coding-system'. */);
02ba4723 8056 Vnetwork_coding_system_alist = Qnil;
4ed46869 8057
68c45bf0 8058 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8059 doc: /* Coding system to use with system messages.
8060Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8061 Vlocale_coding_system = Qnil;
8062
005f0d35 8063 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8064 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8065 doc: /*
8066*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8067 eol_mnemonic_unix = build_string (":");
4ed46869 8068
7722baf9 8069 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8070 doc: /*
8071*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8072 eol_mnemonic_dos = build_string ("\\");
4ed46869 8073
7722baf9 8074 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8075 doc: /*
8076*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8077 eol_mnemonic_mac = build_string ("/");
4ed46869 8078
7722baf9 8079 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8080 doc: /*
8081*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8082 eol_mnemonic_undecided = build_string (":");
4ed46869 8083
84fbb8a0 8084 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8085 doc: /*
8086*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8087 Venable_character_translation = Qt;
bdd9fb48 8088
f967223b 8089 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8090 &Vstandard_translation_table_for_decode,
8091 doc: /* Table for translating characters while decoding. */);
f967223b 8092 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8093
f967223b 8094 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8095 &Vstandard_translation_table_for_encode,
8096 doc: /* Table for translating characters while encoding. */);
f967223b 8097 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8098
df7492f9 8099 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8100 doc: /* Alist of charsets vs revision numbers.
8101While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8102designate it with the escape sequence identifying revision (cdr part
8103of the element). */);
8104 Vcharset_revision_table = Qnil;
02ba4723
KH
8105
8106 DEFVAR_LISP ("default-process-coding-system",
8107 &Vdefault_process_coding_system,
48b0f3ae
PJ
8108 doc: /* Cons of coding systems used for process I/O by default.
8109The car part is used for decoding a process output,
8110the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8111 Vdefault_process_coding_system = Qnil;
c4825358 8112
3f003981 8113 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8114 doc: /*
8115Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8116This is a vector of length 256.
8117If Nth element is non-nil, the existence of code N in a file
8118\(or output of subprocess) doesn't prevent it to be detected as
8119a coding system of ISO 2022 variant which has a flag
8120`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8121or reading output of a subprocess.
8122Only 128th through 159th elements has a meaning. */);
3f003981 8123 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8124
8125 DEFVAR_LISP ("select-safe-coding-system-function",
8126 &Vselect_safe_coding_system_function,
df7492f9
KH
8127 doc: /*
8128Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8129
8130If set, this function is called to force a user to select a proper
8131coding system which can encode the text in the case that a default
8132coding system used in each operation can't encode the text.
8133
8134The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8135 Vselect_safe_coding_system_function = Qnil;
8136
05e6f5dc 8137 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
df7492f9
KH
8138 doc: /*
8139Char-table containing safe coding systems of each characters.
48b0f3ae
PJ
8140Each element doesn't include such generic coding systems that can
8141encode any characters. They are in the first extra slot. */);
05e6f5dc
KH
8142 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
8143
22ab2303 8144 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8145 &inhibit_iso_escape_detection,
df7492f9
KH
8146 doc: /*
8147If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8148
8149By default, on reading a file, Emacs tries to detect how the text is
8150encoded. This code detection is sensitive to escape sequences. If
8151the sequence is valid as ISO2022, the code is determined as one of
8152the ISO2022 encodings, and the file is decoded by the corresponding
8153coding system (e.g. `iso-2022-7bit').
8154
8155However, there may be a case that you want to read escape sequences in
8156a file as is. In such a case, you can set this variable to non-nil.
8157Then, as the code detection ignores any escape sequences, no file is
8158detected as encoded in some ISO2022 encoding. The result is that all
8159escape sequences become visible in a buffer.
8160
8161The default value is nil, and it is strongly recommended not to change
8162it. That is because many Emacs Lisp source files that contain
8163non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8164in Emacs's distribution, and they won't be decoded correctly on
8165reading if you suppress escape sequence detection.
8166
8167The other way to read escape sequences in a file without decoding is
8168to explicitly specify some coding system that doesn't use ISO2022's
8169escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8170 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8171
8172 {
8173 Lisp_Object args[coding_arg_max];
8174 Lisp_Object plist[14];
8175 int i;
8176
8177 for (i = 0; i < coding_arg_max; i++)
8178 args[i] = Qnil;
8179
8180 plist[0] = intern (":name");
8181 plist[1] = args[coding_arg_name] = Qno_conversion;
8182 plist[2] = intern (":mnemonic");
8183 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8184 plist[4] = intern (":coding-type");
8185 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8186 plist[6] = intern (":ascii-compatible-p");
8187 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8188 plist[8] = intern (":default-char");
8189 plist[9] = args[coding_arg_default_char] = make_number (0);
8190 plist[10] = intern (":docstring");
8191 plist[11] = build_string ("Do no conversion.\n\
8192\n\
8193When you visit a file with this coding, the file is read into a\n\
8194unibyte buffer as is, thus each byte of a file is treated as a\n\
8195character.");
8196 plist[12] = intern (":eol-type");
8197 plist[13] = args[coding_arg_eol_type] = Qunix;
8198 args[coding_arg_plist] = Flist (14, plist);
8199 Fdefine_coding_system_internal (coding_arg_max, args);
8200 }
8201
8202 setup_coding_system (Qno_conversion, &keyboard_coding);
8203 setup_coding_system (Qno_conversion, &terminal_coding);
8204 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8205}
8206
68c45bf0
PE
8207char *
8208emacs_strerror (error_number)
8209 int error_number;
8210{
8211 char *str;
8212
ca9c0567 8213 synchronize_system_messages_locale ();
68c45bf0
PE
8214 str = strerror (error_number);
8215
8216 if (! NILP (Vlocale_coding_system))
8217 {
8218 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8219 Vlocale_coding_system,
8220 0);
8221 str = (char *) XSTRING (dec)->data;
8222 }
8223
8224 return str;
8225}
8226
4ed46869 8227#endif /* emacs */