(encode_coding_emacs_mule): Pay attention to raw-8-bit chars.
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
70ad9fc4 4 Copyright (C) 2001 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9
KH
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
df7492f9 111 A coding system for a text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 340
4ed46869
KH
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
c4825358 350/* A vector of length 256 which contains information about special
94487c4e 351 Latin codes (especially for dealing with Microsoft codes). */
3f003981 352Lisp_Object Vlatin_extra_code_table;
c4825358 353
9ce27fde
KH
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
74383408
KH
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
ed29121d
EZ
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
c4825358 363/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
364struct coding_system terminal_coding;
365
c4825358
KH
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
371struct coding_system keyboard_coding;
372
02ba4723
KH
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
4ed46869 376
68c45bf0
PE
377Lisp_Object Vlocale_coding_system;
378
4ed46869
KH
379#endif /* emacs */
380
f967223b
KH
381/* Flag to tell if we look up translation table on character code
382 conversion. */
84fbb8a0 383Lisp_Object Venable_character_translation;
f967223b
KH
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 388
f967223b
KH
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
393
394/* Alist of charsets vs revision number. */
df7492f9 395static Lisp_Object Vcharset_revision_table;
4ed46869 396
02ba4723
KH
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
b843d1ae
KH
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
df7492f9
KH
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
05e6f5dc 505
df7492f9
KH
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
93dec019 508
df7492f9
KH
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 512
df7492f9
KH
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 520
df7492f9
KH
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 523
df7492f9
KH
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 526
df7492f9
KH
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 530
df7492f9
KH
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 533
df7492f9
KH
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 536
df7492f9
KH
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 539
df7492f9
KH
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 543
df7492f9
KH
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 547
df7492f9
KH
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 550
df7492f9
KH
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 554
df7492f9 555#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 556
df7492f9 557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 558
bf16eb23
KH
559#define CODING_ISO_FLAG_USE_ROMAN 0x8000
560
561#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
562
563#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 564
df7492f9
KH
565/* A character to be produced on output if encoding of the original
566 character is prohibited by CODING_ISO_FLAG_SAFE. */
567#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 568
aa72b389 569
df7492f9
KH
570/* UTF-16 section */
571#define CODING_UTF_16_BOM(coding) \
572 ((coding)->spec.utf_16.bom)
4ed46869 573
df7492f9
KH
574#define CODING_UTF_16_ENDIAN(coding) \
575 ((coding)->spec.utf_16.endian)
4ed46869 576
df7492f9
KH
577#define CODING_UTF_16_SURROGATE(coding) \
578 ((coding)->spec.utf_16.surrogate)
4ed46869 579
4ed46869 580
df7492f9
KH
581/* CCL section */
582#define CODING_CCL_DECODER(coding) \
583 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
584#define CODING_CCL_ENCODER(coding) \
585 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
586#define CODING_CCL_VALIDS(coding) \
587 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
588 ->data)
4ed46869 589
df7492f9 590/* Index for each coding category in `coding_category_table' */
4ed46869 591
df7492f9
KH
592enum coding_category
593 {
594 coding_category_iso_7,
595 coding_category_iso_7_tight,
596 coding_category_iso_8_1,
597 coding_category_iso_8_2,
598 coding_category_iso_7_else,
599 coding_category_iso_8_else,
600 coding_category_utf_8,
601 coding_category_utf_16_auto,
602 coding_category_utf_16_be,
603 coding_category_utf_16_le,
604 coding_category_utf_16_be_nosig,
605 coding_category_utf_16_le_nosig,
606 coding_category_charset,
607 coding_category_sjis,
608 coding_category_big5,
609 coding_category_ccl,
610 coding_category_emacs_mule,
611 /* All above are targets of code detection. */
612 coding_category_raw_text,
613 coding_category_undecided,
614 coding_category_max
615 };
616
617/* Definitions of flag bits used in detect_coding_XXXX. */
618#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
619#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
620#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
621#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
622#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
623#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
624#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
625#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
626#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
627#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
628#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
629#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
630#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
631#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
632#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
633#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
644 | CATEGORY_MASK_UTF_8 \
645 | CATEGORY_MASK_UTF_16_BE \
646 | CATEGORY_MASK_UTF_16_LE \
647 | CATEGORY_MASK_UTF_16_BE_NOSIG \
648 | CATEGORY_MASK_UTF_16_LE_NOSIG \
649 | CATEGORY_MASK_CHARSET \
650 | CATEGORY_MASK_SJIS \
651 | CATEGORY_MASK_BIG5 \
652 | CATEGORY_MASK_CCL \
653 | CATEGORY_MASK_EMACS_MULE)
654
655
656#define CATEGORY_MASK_ISO_7BIT \
657 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658
659#define CATEGORY_MASK_ISO_8BIT \
660 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661
662#define CATEGORY_MASK_ISO_ELSE \
663 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664
665#define CATEGORY_MASK_ISO_ESCAPE \
666 (CATEGORY_MASK_ISO_7 \
667 | CATEGORY_MASK_ISO_7_TIGHT \
668 | CATEGORY_MASK_ISO_7_ELSE \
669 | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO \
672 ( CATEGORY_MASK_ISO_7BIT \
673 | CATEGORY_MASK_ISO_8BIT \
674 | CATEGORY_MASK_ISO_ELSE)
675
676#define CATEGORY_MASK_UTF_16 \
677 (CATEGORY_MASK_UTF_16_BE \
678 | CATEGORY_MASK_UTF_16_LE \
679 | CATEGORY_MASK_UTF_16_BE_NOSIG \
680 | CATEGORY_MASK_UTF_16_LE_NOSIG)
681
682
683/* List of symbols `coding-category-xxx' ordered by priority. This
684 variable is exposed to Emacs Lisp. */
685static Lisp_Object Vcoding_category_list;
686
687/* Table of coding categories (Lisp symbols). This variable is for
688 internal use oly. */
689static Lisp_Object Vcoding_category_table;
690
691/* Table of coding-categories ordered by priority. */
692static enum coding_category coding_priorities[coding_category_max];
693
694/* Nth element is a coding context for the coding system bound to the
695 Nth coding category. */
696static struct coding_system coding_categories[coding_category_max];
697
698static int detected_mask[coding_category_raw_text] =
699 { CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_ISO,
705 CATEGORY_MASK_UTF_8,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_UTF_16,
711 CATEGORY_MASK_CHARSET,
712 CATEGORY_MASK_SJIS,
713 CATEGORY_MASK_BIG5,
714 CATEGORY_MASK_CCL,
715 CATEGORY_MASK_EMACS_MULE
716 };
717
718/*** Commonly used macros and functions ***/
719
720#ifndef min
721#define min(a, b) ((a) < (b) ? (a) : (b))
722#endif
723#ifndef max
724#define max(a, b) ((a) > (b) ? (a) : (b))
725#endif
4ed46869 726
df7492f9
KH
727#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 do { \
729 attrs = CODING_ID_ATTRS (coding->id); \
730 eol_type = CODING_ID_EOL_TYPE (coding->id); \
731 if (VECTORP (eol_type)) \
732 eol_type = Qunix; \
733 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
734 } while (0)
4ed46869 735
4ed46869 736
df7492f9
KH
737/* Safely get one byte from the source text pointed by SRC which ends
738 at SRC_END, and set C to that byte. If there are not enough bytes
739 in the source, it jumps to `no_more_source'. The caller
740 should declare and set these variables appropriately in advance:
741 src, src_end, multibytep
742*/
aa72b389 743
df7492f9 744#define ONE_MORE_BYTE(c) \
aa72b389 745 do { \
df7492f9
KH
746 if (src == src_end) \
747 { \
748 if (src_base < src) \
749 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
750 goto no_more_source; \
751 } \
752 c = *src++; \
753 if (multibytep && (c & 0x80)) \
754 { \
755 if ((c & 0xFE) != 0xC0) \
756 error ("Undecodable char found"); \
757 c = ((c & 1) << 6) | *src++; \
758 } \
759 consumed_chars++; \
aa72b389
KH
760 } while (0)
761
aa72b389 762
df7492f9
KH
763#define ONE_MORE_BYTE_NO_CHECK(c) \
764 do { \
765 c = *src++; \
766 if (multibytep && (c & 0x80)) \
767 { \
768 if ((c & 0xFE) != 0xC0) \
769 error ("Undecodable char found"); \
770 c = ((c & 1) << 6) | *src++; \
771 } \
781d7a48 772 consumed_chars++; \
aa72b389
KH
773 } while (0)
774
aa72b389 775
df7492f9
KH
776/* Store a byte C in the place pointed by DST and increment DST to the
777 next free point, and increment PRODUCED_CHARS. The caller should
778 assure that C is 0..127, and declare and set the variable `dst'
779 appropriately in advance.
780*/
aa72b389
KH
781
782
df7492f9
KH
783#define EMIT_ONE_ASCII_BYTE(c) \
784 do { \
785 produced_chars++; \
786 *dst++ = (c); \
787 } while (0)
aa72b389 788
aa72b389 789
df7492f9 790/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 791
df7492f9
KH
792#define EMIT_TWO_ASCII_BYTES(c1, c2) \
793 do { \
794 produced_chars += 2; \
795 *dst++ = (c1), *dst++ = (c2); \
796 } while (0)
aa72b389 797
df7492f9
KH
798
799/* Store a byte C in the place pointed by DST and increment DST to the
800 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
801 nonzero, store in an appropriate multibyte from. The caller should
802 declare and set the variables `dst' and `multibytep' appropriately
803 in advance. */
804
805#define EMIT_ONE_BYTE(c) \
806 do { \
807 produced_chars++; \
808 if (multibytep) \
809 { \
810 int ch = (c); \
811 if (ch >= 0x80) \
812 ch = BYTE8_TO_CHAR (ch); \
813 CHAR_STRING_ADVANCE (ch, dst); \
814 } \
815 else \
816 *dst++ = (c); \
aa72b389
KH
817 } while (0)
818
819
df7492f9 820/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 821
e19c3639
KH
822#define EMIT_TWO_BYTES(c1, c2) \
823 do { \
824 produced_chars += 2; \
825 if (multibytep) \
826 { \
827 int ch; \
828 \
829 ch = (c1); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = (c2); \
834 if (ch >= 0x80) \
835 ch = BYTE8_TO_CHAR (ch); \
836 CHAR_STRING_ADVANCE (ch, dst); \
837 } \
838 else \
839 { \
840 *dst++ = (c1); \
841 *dst++ = (c2); \
842 } \
aa72b389
KH
843 } while (0)
844
845
df7492f9
KH
846#define EMIT_THREE_BYTES(c1, c2, c3) \
847 do { \
848 EMIT_ONE_BYTE (c1); \
849 EMIT_TWO_BYTES (c2, c3); \
850 } while (0)
aa72b389 851
aa72b389 852
df7492f9
KH
853#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
854 do { \
855 EMIT_TWO_BYTES (c1, c2); \
856 EMIT_TWO_BYTES (c3, c4); \
857 } while (0)
aa72b389 858
aa72b389 859
df7492f9
KH
860#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
861 do { \
862 charset_map_loaded = 0; \
863 c = DECODE_CHAR (charset, code); \
864 if (charset_map_loaded) \
865 { \
866 unsigned char *orig = coding->source; \
867 EMACS_INT offset; \
868 \
869 coding_set_source (coding); \
870 offset = coding->source - orig; \
871 src += offset; \
872 src_base += offset; \
873 src_end += offset; \
874 } \
875 } while (0)
aa72b389 876
aa72b389 877
df7492f9
KH
878#define ASSURE_DESTINATION(bytes) \
879 do { \
880 if (dst + (bytes) >= dst_end) \
881 { \
882 int more_bytes = charbuf_end - charbuf + (bytes); \
883 \
884 dst = alloc_destination (coding, more_bytes, dst); \
885 dst_end = coding->destination + coding->dst_bytes; \
886 } \
887 } while (0)
b1887814 888
df7492f9
KH
889
890
891static void
892coding_set_source (coding)
893 struct coding_system *coding;
894{
895 if (BUFFERP (coding->src_object))
896 {
897 if (coding->src_pos < 0)
898 coding->source = GAP_END_ADDR + coding->src_pos_byte;
899 else
900 {
e19c3639 901 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
902 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
903 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
904
905 coding->source = beg_addr + coding->src_pos_byte - 1;
906 if (coding->src_pos_byte >= gpt_byte)
907 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
908 }
909 }
df7492f9 910 else if (STRINGP (coding->src_object))
aa72b389 911 {
df7492f9
KH
912 coding->source = (XSTRING (coding->src_object)->data
913 + coding->src_pos_byte);
914 }
915 else
916 /* Otherwise, the source is C string and is never relocated
917 automatically. Thus we don't have to update anything. */
918 ;
919}
920
921static void
922coding_set_destination (coding)
923 struct coding_system *coding;
924{
925 if (BUFFERP (coding->dst_object))
926 {
927 /* We are sure that coding->dst_pos_byte is before the gap of the
928 buffer. */
929 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
930 + coding->dst_pos_byte - 1);
931 if (coding->src_pos < 0)
df7492f9
KH
932 coding->dst_bytes = (GAP_END_ADDR
933 - (coding->src_bytes - coding->consumed)
934 - coding->destination);
935 else
936 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
937 - coding->destination);
938 }
939 else
940 /* Otherwise, the destination is C string and is never relocated
941 automatically. Thus we don't have to update anything. */
942 ;
943}
944
945
946static void
947coding_alloc_by_realloc (coding, bytes)
948 struct coding_system *coding;
949 EMACS_INT bytes;
950{
951 coding->destination = (unsigned char *) xrealloc (coding->destination,
952 coding->dst_bytes + bytes);
953 coding->dst_bytes += bytes;
954}
955
956static void
957coding_alloc_by_making_gap (coding, bytes)
958 struct coding_system *coding;
959 EMACS_INT bytes;
960{
2c78b7e1
KH
961 if (BUFFERP (coding->dst_object)
962 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
963 {
964 EMACS_INT add = coding->src_bytes - coding->consumed;
965
966 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
967 make_gap (bytes);
968 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
969 }
970 else
971 {
2c78b7e1
KH
972 Lisp_Object this_buffer;
973
974 this_buffer = Fcurrent_buffer ();
df7492f9
KH
975 set_buffer_internal (XBUFFER (coding->dst_object));
976 make_gap (bytes);
977 set_buffer_internal (XBUFFER (this_buffer));
978 }
979}
980
981
982static unsigned char *
983alloc_destination (coding, nbytes, dst)
984 struct coding_system *coding;
985 int nbytes;
986 unsigned char *dst;
987{
988 EMACS_INT offset = dst - coding->destination;
989
990 if (BUFFERP (coding->dst_object))
991 coding_alloc_by_making_gap (coding, nbytes);
992 else
993 coding_alloc_by_realloc (coding, nbytes);
994 coding->result = CODING_RESULT_SUCCESS;
995 coding_set_destination (coding);
996 dst = coding->destination + offset;
997 return dst;
998}
aa72b389 999
df7492f9
KH
1000\f
1001/*** 2. Emacs' internal format (emacs-utf-8) ***/
1002
1003
1004
1005\f
1006/*** 3. UTF-8 ***/
1007
1008/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1009 Check if a text is encoded in UTF-8. If it is, return
1010 CATEGORY_MASK_UTF_8, else return 0. */
1011
1012#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1013#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1014#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1015#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1016#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1017#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1018
1019static int
1020detect_coding_utf_8 (coding, mask)
1021 struct coding_system *coding;
1022 int *mask;
1023{
1024 unsigned char *src = coding->source, *src_base = src;
1025 unsigned char *src_end = coding->source + coding->src_bytes;
1026 int multibytep = coding->src_multibyte;
1027 int consumed_chars = 0;
1028 int found = 0;
1029
1030 /* A coding system of this category is always ASCII compatible. */
1031 src += coding->head_ascii;
1032
1033 while (1)
1034 {
1035 int c, c1, c2, c3, c4;
1036
1037 ONE_MORE_BYTE (c);
1038 if (UTF_8_1_OCTET_P (c))
1039 continue;
1040 ONE_MORE_BYTE (c1);
1041 if (! UTF_8_EXTRA_OCTET_P (c1))
1042 break;
1043 if (UTF_8_2_OCTET_LEADING_P (c))
1044 {
1045 found++;
1046 continue;
1047 }
1048 ONE_MORE_BYTE (c2);
1049 if (! UTF_8_EXTRA_OCTET_P (c2))
1050 break;
1051 if (UTF_8_3_OCTET_LEADING_P (c))
1052 {
1053 found++;
1054 continue;
1055 }
1056 ONE_MORE_BYTE (c3);
1057 if (! UTF_8_EXTRA_OCTET_P (c3))
1058 break;
1059 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1060 {
df7492f9
KH
1061 found++;
1062 continue;
1063 }
1064 ONE_MORE_BYTE (c4);
1065 if (! UTF_8_EXTRA_OCTET_P (c4))
1066 break;
1067 if (UTF_8_5_OCTET_LEADING_P (c))
1068 {
1069 found++;
1070 continue;
1071 }
1072 break;
1073 }
1074 *mask &= ~CATEGORY_MASK_UTF_8;
1075 return 0;
1076
1077 no_more_source:
1078 if (! found)
1079 return 0;
1080 *mask &= CATEGORY_MASK_UTF_8;
1081 return 1;
1082}
1083
1084
b0edb2c5 1085/* Fixme: deal with surrogates? */
df7492f9
KH
1086static void
1087decode_coding_utf_8 (coding)
1088 struct coding_system *coding;
1089{
1090 unsigned char *src = coding->source + coding->consumed;
1091 unsigned char *src_end = coding->source + coding->src_bytes;
1092 unsigned char *src_base;
1093 int *charbuf = coding->charbuf;
1094 int *charbuf_end = charbuf + coding->charbuf_size;
1095 int consumed_chars = 0, consumed_chars_base;
1096 int multibytep = coding->src_multibyte;
1097 Lisp_Object attr, eol_type, charset_list;
1098
1099 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1100
1101 while (1)
1102 {
1103 int c, c1, c2, c3, c4, c5;
1104
1105 src_base = src;
1106 consumed_chars_base = consumed_chars;
1107
1108 if (charbuf >= charbuf_end)
1109 break;
1110
1111 ONE_MORE_BYTE (c1);
1112 if (UTF_8_1_OCTET_P(c1))
1113 {
1114 c = c1;
1115 if (c == '\r')
aa72b389 1116 {
df7492f9
KH
1117 if (EQ (eol_type, Qdos))
1118 {
1119 if (src == src_end)
1120 goto no_more_source;
1121 if (*src == '\n')
1122 ONE_MORE_BYTE (c);
1123 }
1124 else if (EQ (eol_type, Qmac))
1125 c = '\n';
aa72b389 1126 }
aa72b389 1127 }
df7492f9 1128 else
aa72b389 1129 {
df7492f9
KH
1130 ONE_MORE_BYTE (c2);
1131 if (! UTF_8_EXTRA_OCTET_P (c2))
1132 goto invalid_code;
1133 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1134 {
1135 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1136 /* Reject overlong sequences here and below. Encoders
1137 producing them are incorrect, they can be misleading,
1138 and they mess up read/write invariance. */
1139 if (c < 128)
1140 goto invalid_code;
1141 }
df7492f9 1142 else
aa72b389 1143 {
df7492f9
KH
1144 ONE_MORE_BYTE (c3);
1145 if (! UTF_8_EXTRA_OCTET_P (c3))
1146 goto invalid_code;
1147 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1148 {
1149 c = (((c1 & 0xF) << 12)
1150 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1151 if (c < 0x800)
1152 goto invalid_code;
1153 }
df7492f9
KH
1154 else
1155 {
1156 ONE_MORE_BYTE (c4);
1157 if (! UTF_8_EXTRA_OCTET_P (c4))
1158 goto invalid_code;
1159 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1160 {
df7492f9
KH
1161 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1162 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1163 if (c < 0x10000)
1164 goto invalid_code;
1165 }
df7492f9
KH
1166 else
1167 {
1168 ONE_MORE_BYTE (c5);
1169 if (! UTF_8_EXTRA_OCTET_P (c5))
1170 goto invalid_code;
1171 if (UTF_8_5_OCTET_LEADING_P (c1))
1172 {
1173 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1174 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1175 | (c5 & 0x3F));
b0edb2c5 1176 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1177 goto invalid_code;
1178 }
1179 else
1180 goto invalid_code;
1181 }
1182 }
aa72b389 1183 }
aa72b389 1184 }
df7492f9
KH
1185
1186 *charbuf++ = c;
1187 continue;
1188
1189 invalid_code:
1190 src = src_base;
1191 consumed_chars = consumed_chars_base;
1192 ONE_MORE_BYTE (c);
1193 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1194 coding->errors++;
aa72b389
KH
1195 }
1196
df7492f9
KH
1197 no_more_source:
1198 coding->consumed_char += consumed_chars_base;
1199 coding->consumed = src_base - coding->source;
1200 coding->charbuf_used = charbuf - coding->charbuf;
1201}
1202
1203
1204static int
1205encode_coding_utf_8 (coding)
1206 struct coding_system *coding;
1207{
1208 int multibytep = coding->dst_multibyte;
1209 int *charbuf = coding->charbuf;
1210 int *charbuf_end = charbuf + coding->charbuf_used;
1211 unsigned char *dst = coding->destination + coding->produced;
1212 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1213 int produced_chars = 0;
df7492f9
KH
1214 int c;
1215
1216 if (multibytep)
aa72b389 1217 {
df7492f9
KH
1218 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1219
1220 while (charbuf < charbuf_end)
aa72b389 1221 {
df7492f9
KH
1222 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1223
1224 ASSURE_DESTINATION (safe_room);
1225 c = *charbuf++;
1226 CHAR_STRING_ADVANCE (c, pend);
1227 for (p = str; p < pend; p++)
1228 EMIT_ONE_BYTE (*p);
aa72b389 1229 }
aa72b389 1230 }
df7492f9
KH
1231 else
1232 {
1233 int safe_room = MAX_MULTIBYTE_LENGTH;
1234
1235 while (charbuf < charbuf_end)
1236 {
1237 ASSURE_DESTINATION (safe_room);
1238 c = *charbuf++;
1239 dst += CHAR_STRING (c, dst);
1240 produced_chars++;
1241 }
1242 }
1243 coding->result = CODING_RESULT_SUCCESS;
1244 coding->produced_char += produced_chars;
1245 coding->produced = dst - coding->destination;
1246 return 0;
aa72b389
KH
1247}
1248
4ed46869 1249
df7492f9
KH
1250/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1251 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1252 Little Endian (otherwise). If it is, return
1253 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1254 else return 0. */
1255
1256#define UTF_16_HIGH_SURROGATE_P(val) \
1257 (((val) & 0xFC00) == 0xD800)
1258
1259#define UTF_16_LOW_SURROGATE_P(val) \
1260 (((val) & 0xFC00) == 0xDC00)
1261
1262#define UTF_16_INVALID_P(val) \
1263 (((val) == 0xFFFE) \
1264 || ((val) == 0xFFFF) \
1265 || UTF_16_LOW_SURROGATE_P (val))
1266
1267
1268static int
1269detect_coding_utf_16 (coding, mask)
b73bfc1c 1270 struct coding_system *coding;
df7492f9 1271 int *mask;
b73bfc1c 1272{
df7492f9
KH
1273 unsigned char *src = coding->source, *src_base = src;
1274 unsigned char *src_end = coding->source + coding->src_bytes;
1275 int multibytep = coding->src_multibyte;
1276 int consumed_chars = 0;
1277 int c1, c2;
1278
1279 ONE_MORE_BYTE (c1);
1280 ONE_MORE_BYTE (c2);
4ed46869 1281
df7492f9 1282 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1283 {
df7492f9
KH
1284 *mask &= CATEGORY_MASK_UTF_16_LE;
1285 return 1;
1286 }
1287 else if ((c1 == 0xFE) && (c2 == 0xFF))
1288 {
1289 *mask &= CATEGORY_MASK_UTF_16_BE;
1290 return 1;
1291 }
1292 no_more_source:
1293 return 0;
1294}
ec6d2bb8 1295
df7492f9
KH
1296static void
1297decode_coding_utf_16 (coding)
1298 struct coding_system *coding;
1299{
1300 unsigned char *src = coding->source + coding->consumed;
1301 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1302 unsigned char *src_base;
df7492f9
KH
1303 int *charbuf = coding->charbuf;
1304 int *charbuf_end = charbuf + coding->charbuf_size;
1305 int consumed_chars = 0, consumed_chars_base;
1306 int multibytep = coding->src_multibyte;
1307 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1308 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1309 int surrogate = CODING_UTF_16_SURROGATE (coding);
1310 Lisp_Object attr, eol_type, charset_list;
1311
1312 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1313
1314 if (bom != utf_16_without_bom)
1315 {
1316 int c, c1, c2;
4af310db 1317
df7492f9
KH
1318 src_base = src;
1319 ONE_MORE_BYTE (c1);
1320 ONE_MORE_BYTE (c2);
e19c3639 1321 c = (c1 << 8) | c2;
df7492f9
KH
1322 if (bom == utf_16_with_bom)
1323 {
1324 if (endian == utf_16_big_endian
1325 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1326 {
df7492f9
KH
1327 /* We are sure that there's enouph room at CHARBUF. */
1328 *charbuf++ = c1;
1329 *charbuf++ = c2;
1330 coding->errors++;
4af310db 1331 }
4af310db 1332 }
df7492f9 1333 else
4af310db 1334 {
df7492f9
KH
1335 if (c == 0xFFFE)
1336 CODING_UTF_16_ENDIAN (coding)
1337 = endian = utf_16_big_endian;
1338 else if (c == 0xFEFF)
1339 CODING_UTF_16_ENDIAN (coding)
1340 = endian = utf_16_little_endian;
1341 else
4af310db 1342 {
df7492f9
KH
1343 CODING_UTF_16_ENDIAN (coding)
1344 = endian = utf_16_big_endian;
1345 src = src_base;
4af310db 1346 }
4af310db 1347 }
df7492f9
KH
1348 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1349 }
1350
1351 while (1)
1352 {
1353 int c, c1, c2;
1354
1355 src_base = src;
1356 consumed_chars_base = consumed_chars;
1357
1358 if (charbuf + 2 >= charbuf_end)
1359 break;
1360
1361 ONE_MORE_BYTE (c1);
1362 ONE_MORE_BYTE (c2);
1363 c = (endian == utf_16_big_endian
e19c3639 1364 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1365 if (surrogate)
aa72b389 1366 {
df7492f9 1367 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1368 {
df7492f9
KH
1369 if (endian == utf_16_big_endian)
1370 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1371 else
1372 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1373 *charbuf++ = c1;
1374 *charbuf++ = c2;
1375 coding->errors++;
1376 if (UTF_16_HIGH_SURROGATE_P (c))
1377 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1378 else
1379 *charbuf++ = c;
aa72b389 1380 }
df7492f9
KH
1381 else
1382 {
1383 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1384 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1385 *charbuf++ = c;
1386 }
1387 }
1388 else
1389 {
1390 if (UTF_16_HIGH_SURROGATE_P (c))
1391 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1392 else
1393 *charbuf++ = c;
1394 }
1395 }
1396
1397 no_more_source:
1398 coding->consumed_char += consumed_chars_base;
1399 coding->consumed = src_base - coding->source;
1400 coding->charbuf_used = charbuf - coding->charbuf;
1401}
1402
1403static int
1404encode_coding_utf_16 (coding)
1405 struct coding_system *coding;
1406{
1407 int multibytep = coding->dst_multibyte;
1408 int *charbuf = coding->charbuf;
1409 int *charbuf_end = charbuf + coding->charbuf_used;
1410 unsigned char *dst = coding->destination + coding->produced;
1411 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1412 int safe_room = 8;
1413 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1414 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1415 int produced_chars = 0;
1416 Lisp_Object attrs, eol_type, charset_list;
1417 int c;
1418
1419 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1420
1421 if (bom == utf_16_with_bom)
1422 {
1423 ASSURE_DESTINATION (safe_room);
1424 if (big_endian)
1425 EMIT_TWO_BYTES (0xFF, 0xFE);
1426 else
1427 EMIT_TWO_BYTES (0xFE, 0xFF);
1428 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1429 }
1430
1431 while (charbuf < charbuf_end)
1432 {
1433 ASSURE_DESTINATION (safe_room);
1434 c = *charbuf++;
e19c3639
KH
1435 if (c >= MAX_UNICODE_CHAR)
1436 c = coding->default_char;
df7492f9
KH
1437
1438 if (c < 0x10000)
1439 {
1440 if (big_endian)
1441 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1442 else
1443 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1444 }
1445 else
1446 {
1447 int c1, c2;
1448
1449 c -= 0x10000;
1450 c1 = (c >> 10) + 0xD800;
1451 c2 = (c & 0x3FF) + 0xDC00;
1452 if (big_endian)
1453 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1454 else
1455 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1456 }
1457 }
1458 coding->result = CODING_RESULT_SUCCESS;
1459 coding->produced = dst - coding->destination;
1460 coding->produced_char += produced_chars;
1461 return 0;
1462}
1463
1464\f
1465/*** 6. Old Emacs' internal format (emacs-mule) ***/
1466
1467/* Emacs' internal format for representation of multiple character
1468 sets is a kind of multi-byte encoding, i.e. characters are
1469 represented by variable-length sequences of one-byte codes.
1470
1471 ASCII characters and control characters (e.g. `tab', `newline') are
1472 represented by one-byte sequences which are their ASCII codes, in
1473 the range 0x00 through 0x7F.
1474
1475 8-bit characters of the range 0x80..0x9F are represented by
1476 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1477 code + 0x20).
1478
1479 8-bit characters of the range 0xA0..0xFF are represented by
1480 one-byte sequences which are their 8-bit code.
1481
1482 The other characters are represented by a sequence of `base
1483 leading-code', optional `extended leading-code', and one or two
1484 `position-code's. The length of the sequence is determined by the
1485 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1486 whereas extended leading-code and position-code take the range 0xA0
1487 through 0xFF. See `charset.h' for more details about leading-code
1488 and position-code.
1489
1490 --- CODE RANGE of Emacs' internal format ---
1491 character set range
1492 ------------- -----
1493 ascii 0x00..0x7F
1494 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1495 eight-bit-graphic 0xA0..0xBF
1496 ELSE 0x81..0x9D + [0xA0..0xFF]+
1497 ---------------------------------------------
1498
1499 As this is the internal character representation, the format is
1500 usually not used externally (i.e. in a file or in a data sent to a
1501 process). But, it is possible to have a text externally in this
1502 format (i.e. by encoding by the coding system `emacs-mule').
1503
1504 In that case, a sequence of one-byte codes has a slightly different
1505 form.
1506
1507 At first, all characters in eight-bit-control are represented by
1508 one-byte sequences which are their 8-bit code.
1509
1510 Next, character composition data are represented by the byte
1511 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1512 where,
1513 METHOD is 0xF0 plus one of composition method (enum
1514 composition_method),
1515
1516 BYTES is 0xA0 plus a byte length of this composition data,
1517
1518 CHARS is 0x20 plus a number of characters composed by this
1519 data,
1520
1521 COMPONENTs are characters of multibye form or composition
1522 rules encoded by two-byte of ASCII codes.
1523
1524 In addition, for backward compatibility, the following formats are
1525 also recognized as composition data on decoding.
1526
1527 0x80 MSEQ ...
1528 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1529
1530 Here,
1531 MSEQ is a multibyte form but in these special format:
1532 ASCII: 0xA0 ASCII_CODE+0x80,
1533 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1534 RULE is a one byte code of the range 0xA0..0xF0 that
1535 represents a composition rule.
1536 */
1537
1538char emacs_mule_bytes[256];
1539
1540/* Leading-code followed by extended leading-code. */
1541#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1542#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1543#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1544#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1545
1546
1547int
781d7a48 1548emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1549 struct coding_system *coding;
781d7a48 1550 unsigned char *src;
df7492f9
KH
1551 int *nbytes, *nchars;
1552{
df7492f9
KH
1553 unsigned char *src_end = coding->source + coding->src_bytes;
1554 int multibytep = coding->src_multibyte;
1555 unsigned char *src_base = src;
1556 struct charset *charset;
1557 unsigned code;
1558 int c;
1559 int consumed_chars = 0;
1560
1561 ONE_MORE_BYTE (c);
df7492f9
KH
1562 switch (emacs_mule_bytes[c])
1563 {
1564 case 2:
1565 if (! (charset = emacs_mule_charset[c]))
1566 goto invalid_code;
1567 ONE_MORE_BYTE (c);
1568 code = c & 0x7F;
1569 break;
1570
1571 case 3:
1572 if (c == LEADING_CODE_PRIVATE_11
1573 || c == LEADING_CODE_PRIVATE_12)
b73bfc1c 1574 {
df7492f9
KH
1575 ONE_MORE_BYTE (c);
1576 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code;
1578 ONE_MORE_BYTE (c);
1579 code = c & 0x7F;
b73bfc1c
KH
1580 }
1581 else
1582 {
df7492f9
KH
1583 if (! (charset = emacs_mule_charset[c]))
1584 goto invalid_code;
1585 ONE_MORE_BYTE (c);
781d7a48 1586 code = (c & 0x7F) << 8;
df7492f9
KH
1587 ONE_MORE_BYTE (c);
1588 code |= c & 0x7F;
1589 }
1590 break;
1591
1592 case 4:
781d7a48 1593 ONE_MORE_BYTE (c);
df7492f9
KH
1594 if (! (charset = emacs_mule_charset[c]))
1595 goto invalid_code;
1596 ONE_MORE_BYTE (c);
781d7a48 1597 code = (c & 0x7F) << 8;
df7492f9
KH
1598 ONE_MORE_BYTE (c);
1599 code |= c & 0x7F;
1600 break;
1601
1602 case 1:
1603 code = c;
1604 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1605 : code < 0xA0 ? charset_8_bit_control
1606 : charset_8_bit_graphic);
1607 break;
1608
1609 default:
1610 abort ();
1611 }
1612 c = DECODE_CHAR (charset, code);
1613 if (c < 0)
1614 goto invalid_code;
1615 *nbytes = src - src_base;
1616 *nchars = consumed_chars;
1617 return c;
1618
1619 no_more_source:
1620 return -2;
1621
1622 invalid_code:
1623 return -1;
1624}
1625
1626
1627/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1628 Check if a text is encoded in `emacs-mule'. */
1629
1630static int
1631detect_coding_emacs_mule (coding, mask)
1632 struct coding_system *coding;
1633 int *mask;
1634{
1635 unsigned char *src = coding->source, *src_base = src;
1636 unsigned char *src_end = coding->source + coding->src_bytes;
1637 int multibytep = coding->src_multibyte;
1638 int consumed_chars = 0;
1639 int c;
1640 int found = 0;
1641
1642 /* A coding system of this category is always ASCII compatible. */
1643 src += coding->head_ascii;
1644
1645 while (1)
1646 {
1647 ONE_MORE_BYTE (c);
1648
1649 if (c == 0x80)
1650 {
1651 /* Perhaps the start of composite character. We simple skip
1652 it because analyzing it is too heavy for detecting. But,
1653 at least, we check that the composite character
1654 constitues of more than 4 bytes. */
1655 unsigned char *src_base;
1656
1657 repeat:
1658 src_base = src;
1659 do
1660 {
1661 ONE_MORE_BYTE (c);
1662 }
1663 while (c >= 0xA0);
1664
1665 if (src - src_base <= 4)
1666 break;
1667 found = 1;
1668 if (c == 0x80)
1669 goto repeat;
b73bfc1c 1670 }
df7492f9
KH
1671
1672 if (c < 0x80)
b73bfc1c 1673 {
df7492f9
KH
1674 if (c < 0x20
1675 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1676 break;
1677 }
1678 else
1679 {
1680 unsigned char *src_base = src - 1;
1681
1682 do
1683 {
1684 ONE_MORE_BYTE (c);
1685 }
1686 while (c >= 0xA0);
1687 if (src - src_base != emacs_mule_bytes[*src_base])
1688 break;
1689 found = 1;
4ed46869
KH
1690 }
1691 }
df7492f9
KH
1692 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1693 return 0;
1694
1695 no_more_source:
1696 if (!found)
1697 return 0;
1698 *mask &= CATEGORY_MASK_EMACS_MULE;
1699 return 1;
4ed46869
KH
1700}
1701
b73bfc1c 1702
df7492f9
KH
1703/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1704
1705/* Decode a character represented as a component of composition
1706 sequence of Emacs 20/21 style at SRC. Set C to that character and
1707 update SRC to the head of next character (or an encoded composition
1708 rule). If SRC doesn't points a composition component, set C to -1.
1709 If SRC points an invalid byte sequence, global exit by a return
1710 value 0. */
1711
1712#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1713 if (1) \
1714 { \
1715 int c; \
1716 int nbytes, nchars; \
1717 \
1718 if (src == src_end) \
1719 break; \
781d7a48 1720 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1721 if (c < 0) \
1722 { \
1723 if (c == -2) \
1724 break; \
1725 goto invalid_code; \
1726 } \
1727 *buf++ = c; \
1728 src += nbytes; \
1729 consumed_chars += nchars; \
1730 } \
1731 else
1732
1733
1734/* Decode a composition rule represented as a component of composition
781d7a48
KH
1735 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1736 and increment BUF. If SRC points an invalid byte sequence, set C
1737 to -1. */
df7492f9 1738
781d7a48 1739#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1740 do { \
1741 int c, gref, nref; \
1742 \
781d7a48 1743 if (src >= src_end) \
df7492f9
KH
1744 goto invalid_code; \
1745 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1746 c -= 0x20; \
df7492f9
KH
1747 if (c < 0 || c >= 81) \
1748 goto invalid_code; \
1749 \
1750 gref = c / 9, nref = c % 9; \
1751 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1752 } while (0)
1753
1754
781d7a48
KH
1755/* Decode a composition rule represented as a component of composition
1756 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1757 and increment BUF. If SRC points an invalid byte sequence, set C
1758 to -1. */
1759
1760#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1761 do { \
1762 int gref, nref; \
1763 \
1764 if (src + 1>= src_end) \
1765 goto invalid_code; \
1766 ONE_MORE_BYTE_NO_CHECK (gref); \
1767 gref -= 0x20; \
1768 ONE_MORE_BYTE_NO_CHECK (nref); \
1769 nref -= 0x20; \
1770 if (gref < 0 || gref >= 81 \
1771 || nref < 0 || nref >= 81) \
1772 goto invalid_code; \
1773 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1774 } while (0)
1775
1776
df7492f9
KH
1777#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1778 do { \
1779 *buf++ = -5; \
1780 *buf++ = coding->produced_char + char_offset; \
1781 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1782 *buf++ = method; \
1783 *buf++ = nchars; \
1784 } while (0)
aa72b389 1785
df7492f9
KH
1786
1787#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1788 do { \
df7492f9 1789 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1790 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1791 the byte length of this composition information, CHARS is the \
1792 number of characters composed by this composition. */ \
781d7a48
KH
1793 enum composition_method method = c - 0xF2; \
1794 int *charbuf_base = charbuf; \
df7492f9
KH
1795 int consumed_chars_limit; \
1796 int nbytes, nchars; \
1797 \
1798 ONE_MORE_BYTE (c); \
1799 nbytes = c - 0xA0; \
1800 if (nbytes < 3) \
1801 goto invalid_code; \
1802 ONE_MORE_BYTE (c); \
1803 nchars = c - 0xA0; \
1804 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1805 consumed_chars_limit = consumed_chars_base + nbytes; \
1806 if (method != COMPOSITION_RELATIVE) \
aa72b389 1807 { \
df7492f9
KH
1808 int i = 0; \
1809 while (consumed_chars < consumed_chars_limit) \
aa72b389 1810 { \
df7492f9 1811 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1812 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1813 else \
1814 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1815 i++; \
aa72b389 1816 } \
df7492f9
KH
1817 if (consumed_chars < consumed_chars_limit) \
1818 goto invalid_code; \
781d7a48 1819 charbuf_base[0] -= i; \
aa72b389
KH
1820 } \
1821 } while (0)
93dec019 1822
aa72b389 1823
df7492f9
KH
1824#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1825 do { \
1826 /* Emacs 20 style format for relative composition. */ \
1827 /* Store multibyte form of characters to be composed. */ \
1828 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1829 int *buf = components; \
1830 int i, j; \
1831 \
1832 src = src_base; \
1833 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1834 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1835 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1836 if (i < 2) \
1837 goto invalid_code; \
1838 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1839 for (j = 0; j < i; j++) \
1840 *charbuf++ = components[j]; \
1841 } while (0)
1842
1843
1844#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1845 do { \
1846 /* Emacs 20 style format for rule-base composition. */ \
1847 /* Store multibyte form of characters to be composed. */ \
1848 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1849 int *buf = components; \
1850 int i, j; \
1851 \
1852 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1853 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1854 { \
781d7a48 1855 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1856 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1857 } \
1858 if (i < 1 || (buf - components) % 2 == 0) \
1859 goto invalid_code; \
1860 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1861 goto no_more_source; \
1862 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1863 for (j = 0; j < i; j++) \
1864 *charbuf++ = components[j]; \
1865 for (j = 0; j < i; j += 2) \
1866 *charbuf++ = components[j]; \
1867 } while (0)
1868
aa72b389
KH
1869
1870static void
df7492f9 1871decode_coding_emacs_mule (coding)
aa72b389 1872 struct coding_system *coding;
aa72b389 1873{
df7492f9
KH
1874 unsigned char *src = coding->source + coding->consumed;
1875 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1876 unsigned char *src_base;
df7492f9
KH
1877 int *charbuf = coding->charbuf;
1878 int *charbuf_end = charbuf + coding->charbuf_size;
1879 int consumed_chars = 0, consumed_chars_base;
1880 int char_offset = 0;
1881 int multibytep = coding->src_multibyte;
1882 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1883
df7492f9 1884 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1885
aa72b389
KH
1886 while (1)
1887 {
df7492f9
KH
1888 int c;
1889
aa72b389 1890 src_base = src;
df7492f9
KH
1891 consumed_chars_base = consumed_chars;
1892
1893 if (charbuf >= charbuf_end)
1894 break;
aa72b389 1895
df7492f9
KH
1896 ONE_MORE_BYTE (c);
1897
1898 if (c < 0x80)
aa72b389 1899 {
df7492f9
KH
1900 if (c == '\r')
1901 {
1902 if (EQ (eol_type, Qdos))
1903 {
1904 if (src == src_end)
1905 goto no_more_source;
1906 if (*src == '\n')
1907 ONE_MORE_BYTE (c);
1908 }
1909 else if (EQ (eol_type, Qmac))
1910 c = '\n';
1911 }
1912 *charbuf++ = c;
1913 char_offset++;
aa72b389 1914 }
df7492f9
KH
1915 else if (c == 0x80)
1916 {
1917 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1918 break;
1919 ONE_MORE_BYTE (c);
781d7a48
KH
1920 if (c - 0xF2 >= COMPOSITION_RELATIVE
1921 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1922 DECODE_EMACS_MULE_21_COMPOSITION (c);
1923 else if (c < 0xC0)
1924 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1925 else if (c == 0xFF)
1926 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1927 else
1928 goto invalid_code;
781d7a48 1929 coding->annotated = 1;
df7492f9
KH
1930 }
1931 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1932 {
1933 int nbytes, nchars;
781d7a48
KH
1934 src = src_base;
1935 consumed_chars = consumed_chars_base;
1936 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1937 if (c < 0)
1938 {
1939 if (c == -2)
1940 break;
1941 goto invalid_code;
1942 }
1943 *charbuf++ = c;
781d7a48
KH
1944 src += nbytes;
1945 consumed_chars += nchars;
df7492f9
KH
1946 char_offset++;
1947 }
1948 continue;
1949
1950 invalid_code:
1951 src = src_base;
1952 consumed_chars = consumed_chars_base;
1953 ONE_MORE_BYTE (c);
1954 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1955 coding->errors++;
1956 }
1957
1958 no_more_source:
1959 coding->consumed_char += consumed_chars_base;
1960 coding->consumed = src_base - coding->source;
1961 coding->charbuf_used = charbuf - coding->charbuf;
1962}
1963
1964
1965#define EMACS_MULE_LEADING_CODES(id, codes) \
1966 do { \
1967 if (id < 0xA0) \
1968 codes[0] = id, codes[1] = 0; \
1969 else if (id < 0xE0) \
1970 codes[0] = 0x9A, codes[1] = id; \
1971 else if (id < 0xF0) \
1972 codes[0] = 0x9B, codes[1] = id; \
1973 else if (id < 0xF5) \
1974 codes[0] = 0x9C, codes[1] = id; \
1975 else \
1976 codes[0] = 0x9D, codes[1] = id; \
1977 } while (0);
1978
aa72b389 1979
df7492f9
KH
1980static int
1981encode_coding_emacs_mule (coding)
1982 struct coding_system *coding;
1983{
1984 int multibytep = coding->dst_multibyte;
1985 int *charbuf = coding->charbuf;
1986 int *charbuf_end = charbuf + coding->charbuf_used;
1987 unsigned char *dst = coding->destination + coding->produced;
1988 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1989 int safe_room = 8;
df7492f9
KH
1990 int produced_chars = 0;
1991 Lisp_Object attrs, eol_type, charset_list;
1992 int c;
1993
1994 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1995
1996 while (charbuf < charbuf_end)
1997 {
1998 ASSURE_DESTINATION (safe_room);
1999 c = *charbuf++;
2000 if (ASCII_CHAR_P (c))
2001 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2002 else if (CHAR_BYTE8_P (c))
2003 {
2004 c = CHAR_TO_BYTE8 (c);
2005 EMIT_ONE_BYTE (c);
2006 }
df7492f9 2007 else
aa72b389 2008 {
df7492f9
KH
2009 struct charset *charset;
2010 unsigned code;
2011 int dimension;
2012 int emacs_mule_id;
2013 unsigned char leading_codes[2];
2014
2015 charset = char_charset (c, charset_list, &code);
2016 if (! charset)
2017 {
2018 c = coding->default_char;
2019 if (ASCII_CHAR_P (c))
2020 {
2021 EMIT_ONE_ASCII_BYTE (c);
2022 continue;
2023 }
2024 charset = char_charset (c, charset_list, &code);
2025 }
2026 dimension = CHARSET_DIMENSION (charset);
2027 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2028 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2029 EMIT_ONE_BYTE (leading_codes[0]);
2030 if (leading_codes[1])
2031 EMIT_ONE_BYTE (leading_codes[1]);
2032 if (dimension == 1)
2033 EMIT_ONE_BYTE (code);
aa72b389 2034 else
df7492f9
KH
2035 {
2036 EMIT_ONE_BYTE (code >> 8);
2037 EMIT_ONE_BYTE (code & 0xFF);
2038 }
aa72b389 2039 }
aa72b389 2040 }
df7492f9
KH
2041 coding->result = CODING_RESULT_SUCCESS;
2042 coding->produced_char += produced_chars;
2043 coding->produced = dst - coding->destination;
2044 return 0;
aa72b389 2045}
b73bfc1c 2046
4ed46869 2047\f
df7492f9 2048/*** 7. ISO2022 handlers ***/
4ed46869
KH
2049
2050/* The following note describes the coding system ISO2022 briefly.
39787efd 2051 Since the intention of this note is to help understand the
df7492f9 2052 functions in this file, some parts are NOT ACCURATE or OVERLY
39787efd 2053 SIMPLIFIED. For thorough understanding, please refer to the
df7492f9 2054 original document of ISO2022.
4ed46869
KH
2055
2056 ISO2022 provides many mechanisms to encode several character sets
df7492f9 2057 in 7-bit and 8-bit environments. For 7-bite environments, all text
39787efd
KH
2058 is encoded using bytes less than 128. This may make the encoded
2059 text a little bit longer, but the text passes more easily through
df7492f9 2060 several gateways, some of which strip off MSB (Most Signigant Bit).
b73bfc1c 2061
df7492f9
KH
2062 There are two kinds of character sets: control character set and
2063 graphic character set. The former contains control characters such
4ed46869 2064 as `newline' and `escape' to provide control functions (control
39787efd 2065 functions are also provided by escape sequences). The latter
df7492f9 2066 contains graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2067 two control character sets and many graphic character sets.
2068
2069 Graphic character sets are classified into one of the following
39787efd
KH
2070 four classes, according to the number of bytes (DIMENSION) and
2071 number of characters in one dimension (CHARS) of the set:
2072 - DIMENSION1_CHARS94
2073 - DIMENSION1_CHARS96
2074 - DIMENSION2_CHARS94
2075 - DIMENSION2_CHARS96
2076
2077 In addition, each character set is assigned an identification tag,
df7492f9 2078 unique for each set, called "final character" (denoted as <F>
39787efd
KH
2079 hereafter). The <F> of each character set is decided by ECMA(*)
2080 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2081 (0x30..0x3F are for private use only).
4ed46869
KH
2082
2083 Note (*): ECMA = European Computer Manufacturers Association
2084
df7492f9 2085 Here are examples of graphic character set [NAME(<F>)]:
4ed46869
KH
2086 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2087 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2088 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2089 o DIMENSION2_CHARS96 -- none for the moment
2090
39787efd 2091 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2092 C0 [0x00..0x1F] -- control character plane 0
2093 GL [0x20..0x7F] -- graphic character plane 0
2094 C1 [0x80..0x9F] -- control character plane 1
2095 GR [0xA0..0xFF] -- graphic character plane 1
2096
2097 A control character set is directly designated and invoked to C0 or
39787efd
KH
2098 C1 by an escape sequence. The most common case is that:
2099 - ISO646's control character set is designated/invoked to C0, and
2100 - ISO6429's control character set is designated/invoked to C1,
2101 and usually these designations/invocations are omitted in encoded
2102 text. In a 7-bit environment, only C0 can be used, and a control
2103 character for C1 is encoded by an appropriate escape sequence to
2104 fit into the environment. All control characters for C1 are
2105 defined to have corresponding escape sequences.
4ed46869
KH
2106
2107 A graphic character set is at first designated to one of four
2108 graphic registers (G0 through G3), then these graphic registers are
2109 invoked to GL or GR. These designations and invocations can be
2110 done independently. The most common case is that G0 is invoked to
39787efd
KH
2111 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2112 these invocations and designations are omitted in encoded text.
2113 In a 7-bit environment, only GL can be used.
4ed46869 2114
39787efd
KH
2115 When a graphic character set of CHARS94 is invoked to GL, codes
2116 0x20 and 0x7F of the GL area work as control characters SPACE and
2117 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2118 be used.
4ed46869
KH
2119
2120 There are two ways of invocation: locking-shift and single-shift.
2121 With locking-shift, the invocation lasts until the next different
39787efd
KH
2122 invocation, whereas with single-shift, the invocation affects the
2123 following character only and doesn't affect the locking-shift
2124 state. Invocations are done by the following control characters or
2125 escape sequences:
4ed46869
KH
2126
2127 ----------------------------------------------------------------------
39787efd 2128 abbrev function cntrl escape seq description
4ed46869 2129 ----------------------------------------------------------------------
39787efd
KH
2130 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2131 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2132 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2133 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2134 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2135 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2136 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2137 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2138 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2139 ----------------------------------------------------------------------
39787efd
KH
2140 (*) These are not used by any known coding system.
2141
2142 Control characters for these functions are defined by macros
2143 ISO_CODE_XXX in `coding.h'.
4ed46869 2144
39787efd 2145 Designations are done by the following escape sequences:
4ed46869
KH
2146 ----------------------------------------------------------------------
2147 escape sequence description
2148 ----------------------------------------------------------------------
2149 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2150 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2151 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2152 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2153 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2154 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2155 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2156 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2157 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2158 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2159 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2160 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2161 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2162 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2163 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2164 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2165 ----------------------------------------------------------------------
2166
2167 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2168 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2169
2170 Note (*): Although these designations are not allowed in ISO2022,
2171 Emacs accepts them on decoding, and produces them on encoding
39787efd 2172 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2173 7-bit environment, non-locking-shift, and non-single-shift.
2174
2175 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2176 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2177
df7492f9 2178 Now you may notice that there are a lot of ways for encoding the
39787efd
KH
2179 same multilingual text in ISO2022. Actually, there exist many
2180 coding systems such as Compound Text (used in X11's inter client
df7492f9
KH
2181 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2182 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2183 localized platforms), and all of these are variants of ISO2022.
2184
2185 In addition to the above, Emacs handles two more kinds of escape
2186 sequences: ISO6429's direction specification and Emacs' private
2187 sequence for specifying character composition.
2188
39787efd 2189 ISO6429's direction specification takes the following form:
4ed46869
KH
2190 o CSI ']' -- end of the current direction
2191 o CSI '0' ']' -- end of the current direction
2192 o CSI '1' ']' -- start of left-to-right text
2193 o CSI '2' ']' -- start of right-to-left text
2194 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2195 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2196
2197 Character composition specification takes the following form:
ec6d2bb8
KH
2198 o ESC '0' -- start relative composition
2199 o ESC '1' -- end composition
2200 o ESC '2' -- start rule-base composition (*)
2201 o ESC '3' -- start relative composition with alternate chars (**)
2202 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2203 Since these are not standard escape sequences of any ISO standard,
df7492f9 2204 the use of them for these meaning is restricted to Emacs only.
ec6d2bb8 2205
df7492f9 2206 (*) This form is used only in Emacs 20.5 and the older versions,
b73bfc1c 2207 but the newer versions can safely decode it.
df7492f9 2208 (**) This form is used only in Emacs 21.1 and the newer versions,
b73bfc1c 2209 and the older versions can't decode it.
ec6d2bb8 2210
df7492f9 2211 Here's a list of examples usages of these composition escape
b73bfc1c 2212 sequences (categorized by `enum composition_method').
ec6d2bb8 2213
b73bfc1c 2214 COMPOSITION_RELATIVE:
ec6d2bb8 2215 ESC 0 CHAR [ CHAR ] ESC 1
df7492f9 2216 COMPOSITOIN_WITH_RULE:
ec6d2bb8 2217 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2218 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2219 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2220 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2221 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2222
2223enum iso_code_class_type iso_code_class[256];
2224
df7492f9
KH
2225#define SAFE_CHARSET_P(coding, id) \
2226 ((id) <= (coding)->max_charset_id \
2227 && (coding)->safe_charsets[id] >= 0)
2228
2229
2230#define SHIFT_OUT_OK(category) \
2231 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2232
2233static void
f0064e1f
DL
2234setup_iso_safe_charsets (attrs)
2235 Lisp_Object attrs;
df7492f9
KH
2236{
2237 Lisp_Object charset_list, safe_charsets;
2238 Lisp_Object request;
2239 Lisp_Object reg_usage;
2240 Lisp_Object tail;
2241 int reg94, reg96;
2242 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2243 int max_charset_id;
2244
2245 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2246 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2247 && ! EQ (charset_list, Viso_2022_charset_list))
2248 {
2249 CODING_ATTR_CHARSET_LIST (attrs)
2250 = charset_list = Viso_2022_charset_list;
2251 ASET (attrs, coding_attr_safe_charsets, Qnil);
2252 }
2253
2254 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2255 return;
2256
2257 max_charset_id = 0;
2258 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2259 {
2260 int id = XINT (XCAR (tail));
2261 if (max_charset_id < id)
2262 max_charset_id = id;
2263 }
2264
2265 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2266 make_number (255));
2267 request = AREF (attrs, coding_attr_iso_request);
2268 reg_usage = AREF (attrs, coding_attr_iso_usage);
2269 reg94 = XINT (XCAR (reg_usage));
2270 reg96 = XINT (XCDR (reg_usage));
2271
2272 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2273 {
2274 Lisp_Object id;
2275 Lisp_Object reg;
2276 struct charset *charset;
2277
2278 id = XCAR (tail);
2279 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2280 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2281 if (! NILP (reg))
2282 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2283 else if (charset->iso_chars_96)
2284 {
2285 if (reg96 < 4)
2286 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2287 }
2288 else
2289 {
2290 if (reg94 < 4)
2291 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2292 }
2293 }
2294 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2295}
d46c5b12 2296
d46c5b12 2297
4ed46869 2298/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2299 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2300 integer in which appropriate flag bits any of:
df7492f9
KH
2301 CATEGORY_MASK_ISO_7
2302 CATEGORY_MASK_ISO_7_TIGHT
2303 CATEGORY_MASK_ISO_8_1
2304 CATEGORY_MASK_ISO_8_2
2305 CATEGORY_MASK_ISO_7_ELSE
2306 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2307 are set. If a code which should never appear in ISO2022 is found,
2308 returns 0. */
2309
0a28aafb 2310static int
df7492f9
KH
2311detect_coding_iso_2022 (coding, mask)
2312 struct coding_system *coding;
2313 int *mask;
4ed46869 2314{
df7492f9
KH
2315 unsigned char *src = coding->source, *src_base = src;
2316 unsigned char *src_end = coding->source + coding->src_bytes;
2317 int multibytep = coding->src_multibyte;
2318 int mask_iso = CATEGORY_MASK_ISO;
2319 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2320 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2321 int id;
2322 int c, c1;
2323 int consumed_chars = 0;
2324 int i;
2325
2326 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2327 {
2328 struct coding_system *this = &(coding_categories[i]);
2329 Lisp_Object attrs, val;
2330
2331 attrs = CODING_ID_ATTRS (this->id);
2332 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2333 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2334 setup_iso_safe_charsets (attrs);
2335 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2336 this->max_charset_id = XSTRING (val)->size - 1;
2337 this->safe_charsets = (char *) XSTRING (val)->data;
2338 }
2339
2340 /* A coding system of this category is always ASCII compatible. */
2341 src += coding->head_ascii;
3f003981 2342
df7492f9
KH
2343 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2344 while (mask_iso && src < src_end)
4ed46869 2345 {
df7492f9 2346 ONE_MORE_BYTE (c);
4ed46869
KH
2347 switch (c)
2348 {
2349 case ISO_CODE_ESC:
74383408
KH
2350 if (inhibit_iso_escape_detection)
2351 break;
f46869e4 2352 single_shifting = 0;
df7492f9 2353 ONE_MORE_BYTE (c);
d46c5b12 2354 if (c >= '(' && c <= '/')
4ed46869 2355 {
bf9cdd4e 2356 /* Designation sequence for a charset of dimension 1. */
df7492f9 2357 ONE_MORE_BYTE (c1);
d46c5b12 2358 if (c1 < ' ' || c1 >= 0x80
df7492f9 2359 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2360 /* Invalid designation sequence. Just ignore. */
2361 break;
df7492f9 2362 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2363 }
2364 else if (c == '$')
2365 {
2366 /* Designation sequence for a charset of dimension 2. */
df7492f9 2367 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2368 if (c >= '@' && c <= 'B')
2369 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2370 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2371 else if (c >= '(' && c <= '/')
bcf26d6a 2372 {
df7492f9 2373 ONE_MORE_BYTE (c1);
d46c5b12 2374 if (c1 < ' ' || c1 >= 0x80
df7492f9 2375 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2376 /* Invalid designation sequence. Just ignore. */
2377 break;
df7492f9 2378 reg[(c - '(') % 4] = id;
bcf26d6a 2379 }
bf9cdd4e 2380 else
d46c5b12
KH
2381 /* Invalid designation sequence. Just ignore. */
2382 break;
2383 }
ae9ff118 2384 else if (c == 'N' || c == 'O')
d46c5b12 2385 {
ae9ff118 2386 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2387 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2388 break;
4ed46869 2389 }
ec6d2bb8
KH
2390 else if (c >= '0' && c <= '4')
2391 {
2392 /* ESC <Fp> for start/end composition. */
df7492f9 2393 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2394 break;
2395 }
bf9cdd4e 2396 else
df7492f9
KH
2397 {
2398 /* Invalid escape sequence. */
2399 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2400 break;
2401 }
d46c5b12
KH
2402
2403 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2404 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2405 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2406 id))
2407 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2408 else
df7492f9
KH
2409 mask_iso &= ~CATEGORY_MASK_ISO_7;
2410 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2411 id))
2412 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2413 else
df7492f9
KH
2414 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2415 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2416 id))
2417 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2418 else
df7492f9
KH
2419 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2420 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2421 id))
2422 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2423 else
df7492f9 2424 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2425 break;
2426
4ed46869 2427 case ISO_CODE_SO:
74383408
KH
2428 if (inhibit_iso_escape_detection)
2429 break;
f46869e4 2430 single_shifting = 0;
d46c5b12
KH
2431 if (shift_out == 0
2432 && (reg[1] >= 0
df7492f9
KH
2433 || SHIFT_OUT_OK (coding_category_iso_7_else)
2434 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2435 {
2436 /* Locking shift out. */
df7492f9
KH
2437 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2438 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2439 }
e0e989f6 2440 break;
df7492f9 2441
d46c5b12 2442 case ISO_CODE_SI:
74383408
KH
2443 if (inhibit_iso_escape_detection)
2444 break;
f46869e4 2445 single_shifting = 0;
d46c5b12
KH
2446 if (shift_out == 1)
2447 {
2448 /* Locking shift in. */
df7492f9
KH
2449 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2450 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2451 }
2452 break;
2453
4ed46869 2454 case ISO_CODE_CSI:
f46869e4 2455 single_shifting = 0;
4ed46869
KH
2456 case ISO_CODE_SS2:
2457 case ISO_CODE_SS3:
3f003981 2458 {
df7492f9 2459 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2460
74383408
KH
2461 if (inhibit_iso_escape_detection)
2462 break;
70c22245
KH
2463 if (c != ISO_CODE_CSI)
2464 {
df7492f9
KH
2465 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2466 & CODING_ISO_FLAG_SINGLE_SHIFT)
2467 newmask |= CATEGORY_MASK_ISO_8_1;
2468 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2469 & CODING_ISO_FLAG_SINGLE_SHIFT)
2470 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2471 single_shifting = 1;
70c22245 2472 }
3f003981
KH
2473 if (VECTORP (Vlatin_extra_code_table)
2474 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2475 {
df7492f9
KH
2476 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2477 & CODING_ISO_FLAG_LATIN_EXTRA)
2478 newmask |= CATEGORY_MASK_ISO_8_1;
2479 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2480 & CODING_ISO_FLAG_LATIN_EXTRA)
2481 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2482 }
df7492f9 2483 mask_iso &= newmask;
d46c5b12 2484 mask_found |= newmask;
3f003981
KH
2485 }
2486 break;
4ed46869
KH
2487
2488 default:
2489 if (c < 0x80)
f46869e4
KH
2490 {
2491 single_shifting = 0;
2492 break;
2493 }
4ed46869 2494 else if (c < 0xA0)
c4825358 2495 {
f46869e4 2496 single_shifting = 0;
df7492f9 2497 mask_8bit_found = 1;
3f003981
KH
2498 if (VECTORP (Vlatin_extra_code_table)
2499 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2500 {
3f003981
KH
2501 int newmask = 0;
2502
df7492f9
KH
2503 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2504 & CODING_ISO_FLAG_LATIN_EXTRA)
2505 newmask |= CATEGORY_MASK_ISO_8_1;
2506 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2507 & CODING_ISO_FLAG_LATIN_EXTRA)
2508 newmask |= CATEGORY_MASK_ISO_8_2;
2509 mask_iso &= newmask;
d46c5b12 2510 mask_found |= newmask;
c4825358 2511 }
3f003981
KH
2512 else
2513 return 0;
c4825358 2514 }
4ed46869
KH
2515 else
2516 {
df7492f9
KH
2517 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2518 | CATEGORY_MASK_ISO_7_ELSE);
2519 mask_found |= CATEGORY_MASK_ISO_8_1;
2520 mask_8bit_found = 1;
f46869e4
KH
2521 /* Check the length of succeeding codes of the range
2522 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2523 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2524 when we are not single shifting. */
b73bfc1c 2525 if (!single_shifting
df7492f9 2526 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2527 {
e17de821 2528 int i = 1;
b73bfc1c
KH
2529 while (src < src_end)
2530 {
df7492f9 2531 ONE_MORE_BYTE (c);
b73bfc1c
KH
2532 if (c < 0xA0)
2533 break;
2534 i++;
2535 }
2536
2537 if (i & 1 && src < src_end)
df7492f9 2538 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2539 else
df7492f9 2540 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2541 }
4ed46869
KH
2542 }
2543 break;
2544 }
2545 }
df7492f9
KH
2546 no_more_source:
2547 if (!mask_iso)
2548 {
2549 *mask &= ~CATEGORY_MASK_ISO;
2550 return 0;
2551 }
2552 if (!mask_found)
2553 return 0;
2554 *mask &= mask_iso & mask_found;
2555 if (! mask_8bit_found)
2556 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2557 return 1;
4ed46869
KH
2558}
2559
4ed46869
KH
2560
2561/* Set designation state into CODING. */
df7492f9
KH
2562#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2563 do { \
2564 int id, prev; \
2565 \
2566 if (final < '0' || final >= 128 \
2567 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2568 || !SAFE_CHARSET_P (coding, id)) \
2569 { \
2570 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2571 goto invalid_code; \
2572 } \
2573 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2574 if (id == charset_jisx0201_roman) \
2575 { \
2576 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2577 id = charset_ascii; \
2578 } \
2579 else if (id == charset_jisx0208_1978) \
2580 { \
2581 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2582 id = charset_jisx0208; \
2583 } \
df7492f9
KH
2584 CODING_ISO_DESIGNATION (coding, reg) = id; \
2585 /* If there was an invalid designation to REG previously, and this \
2586 designation is ASCII to REG, we should keep this designation \
2587 sequence. */ \
2588 if (prev == -2 && id == charset_ascii) \
2589 goto invalid_code; \
4ed46869
KH
2590 } while (0)
2591
d46c5b12 2592
df7492f9
KH
2593#define MAYBE_FINISH_COMPOSITION() \
2594 do { \
2595 int i; \
2596 if (composition_state == COMPOSING_NO) \
2597 break; \
2598 /* It is assured that we have enough room for producing \
2599 characters stored in the table `components'. */ \
2600 if (charbuf + component_idx > charbuf_end) \
2601 goto no_more_source; \
2602 composition_state = COMPOSING_NO; \
2603 if (method == COMPOSITION_RELATIVE \
2604 || method == COMPOSITION_WITH_ALTCHARS) \
2605 { \
2606 for (i = 0; i < component_idx; i++) \
2607 *charbuf++ = components[i]; \
2608 char_offset += component_idx; \
2609 } \
2610 else \
2611 { \
2612 for (i = 0; i < component_idx; i += 2) \
2613 *charbuf++ = components[i]; \
2614 char_offset += (component_idx / 2) + 1; \
2615 } \
2616 } while (0)
2617
d46c5b12 2618
aa72b389
KH
2619/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2620 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2621 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2622 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2623 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2624 */
ec6d2bb8 2625
df7492f9
KH
2626#define DECODE_COMPOSITION_START(c1) \
2627 do { \
2628 if (c1 == '0' \
781d7a48 2629 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2630 { \
2631 component_len = component_idx; \
2632 composition_state = COMPOSING_CHAR; \
2633 } \
2634 else \
2635 { \
2636 unsigned char *p; \
2637 \
2638 MAYBE_FINISH_COMPOSITION (); \
2639 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2640 goto no_more_source; \
2641 for (p = src; p < src_end - 1; p++) \
2642 if (*p == ISO_CODE_ESC && p[1] == '1') \
2643 break; \
2644 if (p == src_end - 1) \
2645 { \
2646 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2647 goto invalid_code; \
2648 goto no_more_source; \
2649 } \
2650 \
2651 /* This is surely the start of a composition. */ \
2652 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2653 : c1 == '2' ? COMPOSITION_WITH_RULE \
2654 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2655 : COMPOSITION_WITH_RULE_ALTCHARS); \
2656 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2657 : COMPOSING_COMPONENT_CHAR); \
2658 component_idx = component_len = 0; \
2659 } \
ec6d2bb8
KH
2660 } while (0)
2661
ec6d2bb8 2662
df7492f9
KH
2663/* Handle compositoin end sequence ESC 1. */
2664
2665#define DECODE_COMPOSITION_END() \
ec6d2bb8 2666 do { \
df7492f9
KH
2667 int nchars = (component_len > 0 ? component_idx - component_len \
2668 : method == COMPOSITION_RELATIVE ? component_idx \
2669 : (component_idx + 1) / 2); \
2670 int i; \
2671 int *saved_charbuf = charbuf; \
2672 \
2673 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2674 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2675 { \
df7492f9
KH
2676 if (component_len == 0) \
2677 for (i = 0; i < component_idx; i++) \
2678 *charbuf++ = components[i]; \
2679 else \
2680 for (i = 0; i < component_len; i++) \
2681 *charbuf++ = components[i]; \
2682 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2683 } \
df7492f9
KH
2684 if (method == COMPOSITION_WITH_RULE) \
2685 for (i = 0; i < component_idx; i += 2, char_offset++) \
2686 *charbuf++ = components[i]; \
ec6d2bb8 2687 else \
df7492f9
KH
2688 for (i = component_len; i < component_idx; i++, char_offset++) \
2689 *charbuf++ = components[i]; \
2690 coding->annotated = 1; \
2691 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2692 } while (0)
2693
df7492f9 2694
ec6d2bb8
KH
2695/* Decode a composition rule from the byte C1 (and maybe one more byte
2696 from SRC) and store one encoded composition rule in
2697 coding->cmp_data. */
2698
2699#define DECODE_COMPOSITION_RULE(c1) \
2700 do { \
ec6d2bb8
KH
2701 (c1) -= 32; \
2702 if (c1 < 81) /* old format (before ver.21) */ \
2703 { \
2704 int gref = (c1) / 9; \
2705 int nref = (c1) % 9; \
2706 if (gref == 4) gref = 10; \
2707 if (nref == 4) nref = 10; \
df7492f9 2708 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2709 } \
b73bfc1c 2710 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2711 { \
2712 ONE_MORE_BYTE (c2); \
df7492f9 2713 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2714 } \
df7492f9
KH
2715 else \
2716 c1 = 0; \
ec6d2bb8 2717 } while (0)
88993dfd 2718
d46c5b12 2719
4ed46869
KH
2720/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2721
b73bfc1c 2722static void
df7492f9 2723decode_coding_iso_2022 (coding)
4ed46869 2724 struct coding_system *coding;
4ed46869 2725{
df7492f9
KH
2726 unsigned char *src = coding->source + coding->consumed;
2727 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2728 unsigned char *src_base;
df7492f9
KH
2729 int *charbuf = coding->charbuf;
2730 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2731 int consumed_chars = 0, consumed_chars_base;
2732 int char_offset = 0;
2733 int multibytep = coding->src_multibyte;
2734 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2735 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2736 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2737 struct charset *charset;
2738 int c;
2739 /* For handling composition sequence. */
2740#define COMPOSING_NO 0
2741#define COMPOSING_CHAR 1
2742#define COMPOSING_RULE 2
2743#define COMPOSING_COMPONENT_CHAR 3
2744#define COMPOSING_COMPONENT_RULE 4
2745
2746 int composition_state = COMPOSING_NO;
2747 enum composition_method method;
2748 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2749 int component_idx;
2750 int component_len;
2751 Lisp_Object attrs, eol_type, charset_list;
2752
2753 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2754 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2755
2756 while (1)
4ed46869 2757 {
b73bfc1c
KH
2758 int c1, c2;
2759
2760 src_base = src;
df7492f9
KH
2761 consumed_chars_base = consumed_chars;
2762
2763 if (charbuf >= charbuf_end)
2764 break;
2765
b73bfc1c 2766 ONE_MORE_BYTE (c1);
4ed46869 2767
ec6d2bb8 2768 /* We produce no character or one character. */
4ed46869
KH
2769 switch (iso_code_class [c1])
2770 {
2771 case ISO_0x20_or_0x7F:
df7492f9 2772 if (composition_state != COMPOSING_NO)
ec6d2bb8 2773 {
df7492f9
KH
2774 if (composition_state == COMPOSING_RULE
2775 || composition_state == COMPOSING_COMPONENT_RULE)
2776 {
2777 DECODE_COMPOSITION_RULE (c1);
2778 components[component_idx++] = c1;
2779 composition_state--;
2780 continue;
2781 }
ec6d2bb8 2782 }
df7492f9
KH
2783 if (charset_id_0 < 0
2784 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2785 /* This is SPACE or DEL. */
2786 charset = CHARSET_FROM_ID (charset_ascii);
2787 else
2788 charset = CHARSET_FROM_ID (charset_id_0);
2789 break;
4ed46869
KH
2790
2791 case ISO_graphic_plane_0:
781d7a48 2792 if (composition_state != COMPOSING_NO)
b73bfc1c 2793 {
781d7a48
KH
2794 if (composition_state == COMPOSING_RULE
2795 || composition_state == COMPOSING_COMPONENT_RULE)
2796 {
2797 DECODE_COMPOSITION_RULE (c1);
2798 components[component_idx++] = c1;
2799 composition_state--;
2800 continue;
2801 }
b73bfc1c 2802 }
df7492f9 2803 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2804 break;
2805
2806 case ISO_0xA0_or_0xFF:
df7492f9
KH
2807 if (charset_id_1 < 0
2808 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2809 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2810 goto invalid_code;
4ed46869
KH
2811 /* This is a graphic character, we fall down ... */
2812
2813 case ISO_graphic_plane_1:
df7492f9
KH
2814 if (charset_id_1 < 0)
2815 goto invalid_code;
2816 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2817 break;
2818
2819 case ISO_carriage_return:
df7492f9 2820 if (c1 == '\r')
4ed46869 2821 {
df7492f9 2822 if (EQ (eol_type, Qdos))
4ed46869 2823 {
df7492f9
KH
2824 if (src == src_end)
2825 goto no_more_source;
2826 if (*src == '\n')
2827 ONE_MORE_BYTE (c1);
4ed46869 2828 }
df7492f9
KH
2829 else if (EQ (eol_type, Qmac))
2830 c1 = '\n';
4ed46869 2831 }
df7492f9
KH
2832 /* fall through */
2833
2834 case ISO_control_0:
2835 MAYBE_FINISH_COMPOSITION ();
2836 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2837 break;
2838
df7492f9
KH
2839 case ISO_control_1:
2840 MAYBE_FINISH_COMPOSITION ();
2841 goto invalid_code;
2842
4ed46869 2843 case ISO_shift_out:
df7492f9
KH
2844 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2845 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2846 goto invalid_code;
2847 CODING_ISO_INVOCATION (coding, 0) = 1;
2848 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2849 continue;
4ed46869
KH
2850
2851 case ISO_shift_in:
df7492f9
KH
2852 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2853 goto invalid_code;
2854 CODING_ISO_INVOCATION (coding, 0) = 0;
2855 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2856 continue;
4ed46869
KH
2857
2858 case ISO_single_shift_2_7:
2859 case ISO_single_shift_2:
df7492f9
KH
2860 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2861 goto invalid_code;
4ed46869
KH
2862 /* SS2 is handled as an escape sequence of ESC 'N' */
2863 c1 = 'N';
2864 goto label_escape_sequence;
2865
2866 case ISO_single_shift_3:
df7492f9
KH
2867 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2868 goto invalid_code;
4ed46869
KH
2869 /* SS2 is handled as an escape sequence of ESC 'O' */
2870 c1 = 'O';
2871 goto label_escape_sequence;
2872
2873 case ISO_control_sequence_introducer:
2874 /* CSI is handled as an escape sequence of ESC '[' ... */
2875 c1 = '[';
2876 goto label_escape_sequence;
2877
2878 case ISO_escape:
2879 ONE_MORE_BYTE (c1);
2880 label_escape_sequence:
df7492f9 2881 /* Escape sequences handled here are invocation,
4ed46869
KH
2882 designation, direction specification, and character
2883 composition specification. */
2884 switch (c1)
2885 {
2886 case '&': /* revision of following character set */
2887 ONE_MORE_BYTE (c1);
2888 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2889 goto invalid_code;
4ed46869
KH
2890 ONE_MORE_BYTE (c1);
2891 if (c1 != ISO_CODE_ESC)
df7492f9 2892 goto invalid_code;
4ed46869
KH
2893 ONE_MORE_BYTE (c1);
2894 goto label_escape_sequence;
2895
2896 case '$': /* designation of 2-byte character set */
df7492f9
KH
2897 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2898 goto invalid_code;
4ed46869
KH
2899 ONE_MORE_BYTE (c1);
2900 if (c1 >= '@' && c1 <= 'B')
2901 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2902 or JISX0208.1980 */
df7492f9 2903 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2904 }
2905 else if (c1 >= 0x28 && c1 <= 0x2B)
2906 { /* designation of DIMENSION2_CHARS94 character set */
2907 ONE_MORE_BYTE (c2);
df7492f9 2908 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2909 }
2910 else if (c1 >= 0x2C && c1 <= 0x2F)
2911 { /* designation of DIMENSION2_CHARS96 character set */
2912 ONE_MORE_BYTE (c2);
df7492f9 2913 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2914 }
2915 else
df7492f9 2916 goto invalid_code;
b73bfc1c 2917 /* We must update these variables now. */
df7492f9
KH
2918 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2919 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2920 continue;
4ed46869
KH
2921
2922 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2923 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2924 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2925 goto invalid_code;
2926 CODING_ISO_INVOCATION (coding, 0) = 2;
2927 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2928 continue;
4ed46869
KH
2929
2930 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2931 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2932 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2933 goto invalid_code;
2934 CODING_ISO_INVOCATION (coding, 0) = 3;
2935 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2936 continue;
4ed46869
KH
2937
2938 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2939 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2940 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2941 goto invalid_code;
2942 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2943 ONE_MORE_BYTE (c1);
e7046a18 2944 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2945 goto invalid_code;
4ed46869
KH
2946 break;
2947
2948 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2949 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2950 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2951 goto invalid_code;
2952 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2953 ONE_MORE_BYTE (c1);
e7046a18 2954 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2955 goto invalid_code;
4ed46869
KH
2956 break;
2957
ec6d2bb8 2958 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2959 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2960 goto invalid_code;
ec6d2bb8 2961 DECODE_COMPOSITION_START (c1);
b73bfc1c 2962 continue;
4ed46869 2963
ec6d2bb8 2964 case '1': /* end composition */
df7492f9
KH
2965 if (composition_state == COMPOSING_NO)
2966 goto invalid_code;
2967 DECODE_COMPOSITION_END ();
b73bfc1c 2968 continue;
4ed46869
KH
2969
2970 case '[': /* specification of direction */
df7492f9
KH
2971 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2972 goto invalid_code;
4ed46869 2973 /* For the moment, nested direction is not supported.
d46c5b12 2974 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2975 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2976 ONE_MORE_BYTE (c1);
2977 switch (c1)
2978 {
2979 case ']': /* end of the current direction */
d46c5b12 2980 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2981
2982 case '0': /* end of the current direction */
2983 case '1': /* start of left-to-right direction */
2984 ONE_MORE_BYTE (c1);
2985 if (c1 == ']')
d46c5b12 2986 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2987 else
df7492f9 2988 goto invalid_code;
4ed46869
KH
2989 break;
2990
2991 case '2': /* start of right-to-left direction */
2992 ONE_MORE_BYTE (c1);
2993 if (c1 == ']')
d46c5b12 2994 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2995 else
df7492f9 2996 goto invalid_code;
4ed46869
KH
2997 break;
2998
2999 default:
df7492f9 3000 goto invalid_code;
4ed46869 3001 }
b73bfc1c 3002 continue;
4ed46869
KH
3003
3004 default:
df7492f9
KH
3005 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3006 goto invalid_code;
4ed46869
KH
3007 if (c1 >= 0x28 && c1 <= 0x2B)
3008 { /* designation of DIMENSION1_CHARS94 character set */
3009 ONE_MORE_BYTE (c2);
df7492f9 3010 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3011 }
3012 else if (c1 >= 0x2C && c1 <= 0x2F)
3013 { /* designation of DIMENSION1_CHARS96 character set */
3014 ONE_MORE_BYTE (c2);
df7492f9 3015 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3016 }
3017 else
df7492f9 3018 goto invalid_code;
b73bfc1c 3019 /* We must update these variables now. */
df7492f9
KH
3020 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3021 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3022 continue;
4ed46869 3023 }
b73bfc1c 3024 }
4ed46869 3025
b73bfc1c 3026 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3027 Produce a decoded character while getting 2nd position code
3028 C2 if necessary. */
3029 c1 &= 0x7F;
3030 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3031 {
3032 ONE_MORE_BYTE (c2);
df7492f9 3033 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3034 /* C2 is not in a valid range. */
df7492f9
KH
3035 goto invalid_code;
3036 c1 = (c1 << 8) | (c2 & 0x7F);
3037 if (CHARSET_DIMENSION (charset) > 2)
3038 {
3039 ONE_MORE_BYTE (c2);
3040 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3041 /* C2 is not in a valid range. */
3042 goto invalid_code;
3043 c1 = (c1 << 8) | (c2 & 0x7F);
3044 }
3045 }
3046
3047 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3048 if (c < 0)
3049 {
3050 MAYBE_FINISH_COMPOSITION ();
3051 for (; src_base < src; src_base++, char_offset++)
3052 {
3053 if (ASCII_BYTE_P (*src_base))
3054 *charbuf++ = *src_base;
3055 else
3056 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3057 }
3058 }
3059 else if (composition_state == COMPOSING_NO)
3060 {
3061 *charbuf++ = c;
3062 char_offset++;
4ed46869 3063 }
df7492f9 3064 else
781d7a48
KH
3065 {
3066 components[component_idx++] = c;
3067 if (method == COMPOSITION_WITH_RULE
3068 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3069 && composition_state == COMPOSING_COMPONENT_CHAR))
3070 composition_state++;
3071 }
4ed46869
KH
3072 continue;
3073
df7492f9
KH
3074 invalid_code:
3075 MAYBE_FINISH_COMPOSITION ();
4ed46869 3076 src = src_base;
df7492f9
KH
3077 consumed_chars = consumed_chars_base;
3078 ONE_MORE_BYTE (c);
3079 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3080 coding->errors++;
4ed46869 3081 }
fb88bf2d 3082
df7492f9
KH
3083 no_more_source:
3084 coding->consumed_char += consumed_chars_base;
3085 coding->consumed = src_base - coding->source;
3086 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3087}
3088
b73bfc1c 3089
f4dee582 3090/* ISO2022 encoding stuff. */
4ed46869
KH
3091
3092/*
f4dee582 3093 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3094 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3095 variant has the following specifications:
df7492f9 3096 1. Initial designation to G0 thru G3.
4ed46869
KH
3097 2. Allows short-form designation?
3098 3. ASCII should be designated to G0 before control characters?
3099 4. ASCII should be designated to G0 at end of line?
3100 5. 7-bit environment or 8-bit environment?
3101 6. Use locking-shift?
3102 7. Use Single-shift?
3103 And the following two are only for Japanese:
3104 8. Use ASCII in place of JIS0201-1976-Roman?
3105 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3106 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3107 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3108 details.
4ed46869
KH
3109*/
3110
3111/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3112 register REG at DST, and increment DST. If <final-char> of CHARSET is
3113 '@', 'A', or 'B' and the coding system CODING allows, produce
3114 designation sequence of short-form. */
4ed46869
KH
3115
3116#define ENCODE_DESIGNATION(charset, reg, coding) \
3117 do { \
df7492f9 3118 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3119 char *intermediate_char_94 = "()*+"; \
3120 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3121 int revision = -1; \
3122 int c; \
3123 \
3124 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3125 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3126 \
3127 if (revision >= 0) \
70c22245 3128 { \
df7492f9
KH
3129 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3130 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3131 } \
df7492f9 3132 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3133 if (CHARSET_DIMENSION (charset) == 1) \
3134 { \
df7492f9
KH
3135 if (! CHARSET_ISO_CHARS_96 (charset)) \
3136 c = intermediate_char_94[reg]; \
4ed46869 3137 else \
df7492f9
KH
3138 c = intermediate_char_96[reg]; \
3139 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3140 } \
3141 else \
3142 { \
df7492f9
KH
3143 EMIT_ONE_ASCII_BYTE ('$'); \
3144 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3145 { \
df7492f9 3146 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3147 || reg != 0 \
3148 || final_char < '@' || final_char > 'B') \
df7492f9 3149 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3150 } \
3151 else \
df7492f9 3152 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3153 } \
df7492f9
KH
3154 EMIT_ONE_ASCII_BYTE (final_char); \
3155 \
3156 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3157 } while (0)
3158
df7492f9 3159
4ed46869
KH
3160/* The following two macros produce codes (control character or escape
3161 sequence) for ISO2022 single-shift functions (single-shift-2 and
3162 single-shift-3). */
3163
df7492f9
KH
3164#define ENCODE_SINGLE_SHIFT_2 \
3165 do { \
3166 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3167 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3168 else \
3169 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3170 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3171 } while (0)
3172
df7492f9
KH
3173
3174#define ENCODE_SINGLE_SHIFT_3 \
3175 do { \
3176 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3177 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3178 else \
3179 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3180 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3181 } while (0)
3182
df7492f9 3183
4ed46869
KH
3184/* The following four macros produce codes (control character or
3185 escape sequence) for ISO2022 locking-shift functions (shift-in,
3186 shift-out, locking-shift-2, and locking-shift-3). */
3187
df7492f9
KH
3188#define ENCODE_SHIFT_IN \
3189 do { \
3190 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3191 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3192 } while (0)
3193
df7492f9
KH
3194
3195#define ENCODE_SHIFT_OUT \
3196 do { \
3197 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3198 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3199 } while (0)
3200
df7492f9
KH
3201
3202#define ENCODE_LOCKING_SHIFT_2 \
3203 do { \
3204 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3205 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3206 } while (0)
3207
df7492f9
KH
3208
3209#define ENCODE_LOCKING_SHIFT_3 \
3210 do { \
3211 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3212 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3213 } while (0)
3214
df7492f9 3215
f4dee582
RS
3216/* Produce codes for a DIMENSION1 character whose character set is
3217 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3218 sequences are also produced in advance if necessary. */
3219
6e85d753
KH
3220#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3221 do { \
df7492f9 3222 int id = CHARSET_ID (charset); \
bf16eb23
KH
3223 \
3224 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3225 && id == charset_ascii) \
3226 { \
3227 id = charset_jisx0201_roman; \
3228 charset = CHARSET_FROM_ID (id); \
3229 } \
3230 \
df7492f9 3231 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3232 { \
df7492f9
KH
3233 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3234 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3235 else \
df7492f9
KH
3236 EMIT_ONE_BYTE (c1 | 0x80); \
3237 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3238 break; \
3239 } \
df7492f9 3240 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3241 { \
df7492f9 3242 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3243 break; \
3244 } \
df7492f9 3245 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3246 { \
df7492f9 3247 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3248 break; \
3249 } \
6e85d753
KH
3250 else \
3251 /* Since CHARSET is not yet invoked to any graphic planes, we \
3252 must invoke it, or, at first, designate it to some graphic \
3253 register. Then repeat the loop to actually produce the \
3254 character. */ \
df7492f9
KH
3255 dst = encode_invocation_designation (charset, coding, dst, \
3256 &produced_chars); \
4ed46869
KH
3257 } while (1)
3258
df7492f9 3259
f4dee582
RS
3260/* Produce codes for a DIMENSION2 character whose character set is
3261 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3262 invocation codes are also produced in advance if necessary. */
3263
6e85d753
KH
3264#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3265 do { \
df7492f9 3266 int id = CHARSET_ID (charset); \
bf16eb23
KH
3267 \
3268 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3269 && id == charset_jisx0208) \
3270 { \
3271 id = charset_jisx0208_1978; \
3272 charset = CHARSET_FROM_ID (id); \
3273 } \
3274 \
df7492f9 3275 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3276 { \
df7492f9
KH
3277 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3278 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3279 else \
df7492f9
KH
3280 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3281 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3282 break; \
3283 } \
df7492f9 3284 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3285 { \
df7492f9 3286 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3287 break; \
3288 } \
df7492f9 3289 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3290 { \
df7492f9 3291 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3292 break; \
3293 } \
6e85d753
KH
3294 else \
3295 /* Since CHARSET is not yet invoked to any graphic planes, we \
3296 must invoke it, or, at first, designate it to some graphic \
3297 register. Then repeat the loop to actually produce the \
3298 character. */ \
df7492f9
KH
3299 dst = encode_invocation_designation (charset, coding, dst, \
3300 &produced_chars); \
4ed46869
KH
3301 } while (1)
3302
05e6f5dc 3303
df7492f9
KH
3304#define ENCODE_ISO_CHARACTER(charset, c) \
3305 do { \
3306 int code = ENCODE_CHAR ((charset),(c)); \
3307 \
3308 if (CHARSET_DIMENSION (charset) == 1) \
3309 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3310 else \
3311 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3312 } while (0)
bdd9fb48 3313
05e6f5dc 3314
4ed46869 3315/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3316 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3317 Return new DST. */
3318
3319unsigned char *
df7492f9
KH
3320encode_invocation_designation (charset, coding, dst, p_nchars)
3321 struct charset *charset;
4ed46869
KH
3322 struct coding_system *coding;
3323 unsigned char *dst;
df7492f9 3324 int *p_nchars;
4ed46869 3325{
df7492f9
KH
3326 int multibytep = coding->dst_multibyte;
3327 int produced_chars = *p_nchars;
4ed46869 3328 int reg; /* graphic register number */
df7492f9 3329 int id = CHARSET_ID (charset);
4ed46869
KH
3330
3331 /* At first, check designations. */
3332 for (reg = 0; reg < 4; reg++)
df7492f9 3333 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3334 break;
3335
3336 if (reg >= 4)
3337 {
3338 /* CHARSET is not yet designated to any graphic registers. */
3339 /* At first check the requested designation. */
df7492f9
KH
3340 reg = CODING_ISO_REQUEST (coding, id);
3341 if (reg < 0)
1ba9e4ab
KH
3342 /* Since CHARSET requests no special designation, designate it
3343 to graphic register 0. */
4ed46869
KH
3344 reg = 0;
3345
3346 ENCODE_DESIGNATION (charset, reg, coding);
3347 }
3348
df7492f9
KH
3349 if (CODING_ISO_INVOCATION (coding, 0) != reg
3350 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3351 {
3352 /* Since the graphic register REG is not invoked to any graphic
3353 planes, invoke it to graphic plane 0. */
3354 switch (reg)
3355 {
3356 case 0: /* graphic register 0 */
3357 ENCODE_SHIFT_IN;
3358 break;
3359
3360 case 1: /* graphic register 1 */
3361 ENCODE_SHIFT_OUT;
3362 break;
3363
3364 case 2: /* graphic register 2 */
df7492f9 3365 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3366 ENCODE_SINGLE_SHIFT_2;
3367 else
3368 ENCODE_LOCKING_SHIFT_2;
3369 break;
3370
3371 case 3: /* graphic register 3 */
df7492f9 3372 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3373 ENCODE_SINGLE_SHIFT_3;
3374 else
3375 ENCODE_LOCKING_SHIFT_3;
3376 break;
3377 }
3378 }
b73bfc1c 3379
df7492f9 3380 *p_nchars = produced_chars;
4ed46869
KH
3381 return dst;
3382}
3383
df7492f9
KH
3384/* The following three macros produce codes for indicating direction
3385 of text. */
3386#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3387 do { \
df7492f9
KH
3388 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3389 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3390 else \
df7492f9 3391 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3392 } while (0)
3393
ec6d2bb8 3394
df7492f9
KH
3395#define ENCODE_DIRECTION_R2L() \
3396 do { \
3397 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3398 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3399 } while (0)
3400
ec6d2bb8 3401
df7492f9 3402#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3403 do { \
df7492f9
KH
3404 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3405 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3406 } while (0)
3407
4ed46869
KH
3408
3409/* Produce codes for designation and invocation to reset the graphic
3410 planes and registers to initial state. */
df7492f9
KH
3411#define ENCODE_RESET_PLANE_AND_REGISTER() \
3412 do { \
3413 int reg; \
3414 struct charset *charset; \
3415 \
3416 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3417 ENCODE_SHIFT_IN; \
3418 for (reg = 0; reg < 4; reg++) \
3419 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3420 && (CODING_ISO_DESIGNATION (coding, reg) \
3421 != CODING_ISO_INITIAL (coding, reg))) \
3422 { \
3423 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3424 ENCODE_DESIGNATION (charset, reg, coding); \
3425 } \
4ed46869
KH
3426 } while (0)
3427
df7492f9 3428
bdd9fb48 3429/* Produce designation sequences of charsets in the line started from
b73bfc1c 3430 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3431
3432 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3433 find all the necessary designations. */
3434
b73bfc1c 3435static unsigned char *
df7492f9 3436encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3437 struct coding_system *coding;
df7492f9
KH
3438 int *charbuf, *charbuf_end;
3439 unsigned char *dst;
e0e989f6 3440{
df7492f9 3441 struct charset *charset;
bdd9fb48
KH
3442 /* Table of charsets to be designated to each graphic register. */
3443 int r[4];
df7492f9
KH
3444 int c, found = 0, reg;
3445 int produced_chars = 0;
3446 int multibytep = coding->dst_multibyte;
3447 Lisp_Object attrs;
3448 Lisp_Object charset_list;
3449
3450 attrs = CODING_ID_ATTRS (coding->id);
3451 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3452 if (EQ (charset_list, Qiso_2022))
3453 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3454
3455 for (reg = 0; reg < 4; reg++)
3456 r[reg] = -1;
3457
b73bfc1c 3458 while (found < 4)
e0e989f6 3459 {
df7492f9
KH
3460 int id;
3461
3462 c = *charbuf++;
b73bfc1c
KH
3463 if (c == '\n')
3464 break;
df7492f9
KH
3465 charset = char_charset (c, charset_list, NULL);
3466 id = CHARSET_ID (charset);
3467 reg = CODING_ISO_REQUEST (coding, id);
3468 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3469 {
3470 found++;
df7492f9 3471 r[reg] = id;
bdd9fb48 3472 }
bdd9fb48
KH
3473 }
3474
3475 if (found)
3476 {
3477 for (reg = 0; reg < 4; reg++)
3478 if (r[reg] >= 0
df7492f9
KH
3479 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3480 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3481 }
b73bfc1c
KH
3482
3483 return dst;
e0e989f6
KH
3484}
3485
4ed46869
KH
3486/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3487
df7492f9
KH
3488static int
3489encode_coding_iso_2022 (coding)
4ed46869 3490 struct coding_system *coding;
4ed46869 3491{
df7492f9
KH
3492 int multibytep = coding->dst_multibyte;
3493 int *charbuf = coding->charbuf;
3494 int *charbuf_end = charbuf + coding->charbuf_used;
3495 unsigned char *dst = coding->destination + coding->produced;
3496 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3497 int safe_room = 16;
3498 int bol_designation
3499 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3500 && CODING_ISO_BOL (coding));
3501 int produced_chars = 0;
3502 Lisp_Object attrs, eol_type, charset_list;
3503 int ascii_compatible;
b73bfc1c 3504 int c;
05e6f5dc 3505
df7492f9 3506 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3507 setup_iso_safe_charsets (attrs);
3508 coding->safe_charsets
3509 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3510
df7492f9 3511 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3512
df7492f9 3513 while (charbuf < charbuf_end)
4ed46869 3514 {
df7492f9 3515 ASSURE_DESTINATION (safe_room);
b73bfc1c 3516
df7492f9 3517 if (bol_designation)
b73bfc1c 3518 {
df7492f9 3519 unsigned char *dst_prev = dst;
4ed46869 3520
bdd9fb48 3521 /* We have to produce designation sequences if any now. */
df7492f9
KH
3522 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3523 bol_designation = 0;
3524 /* We are sure that designation sequences are all ASCII bytes. */
3525 produced_chars += dst - dst_prev;
4ed46869 3526 }
ec6d2bb8 3527
df7492f9 3528 c = *charbuf++;
4ed46869 3529
b73bfc1c
KH
3530 /* Now encode the character C. */
3531 if (c < 0x20 || c == 0x7F)
3532 {
df7492f9
KH
3533 if (c == '\n'
3534 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3535 {
df7492f9
KH
3536 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3537 ENCODE_RESET_PLANE_AND_REGISTER ();
3538 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3539 {
df7492f9
KH
3540 int i;
3541
3542 for (i = 0; i < 4; i++)
3543 CODING_ISO_DESIGNATION (coding, i)
3544 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3545 }
df7492f9
KH
3546 bol_designation
3547 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3548 }
df7492f9
KH
3549 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3550 ENCODE_RESET_PLANE_AND_REGISTER ();
3551 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3552 }
df7492f9 3553 else if (ASCII_CHAR_P (c))
88993dfd 3554 {
df7492f9
KH
3555 if (ascii_compatible)
3556 EMIT_ONE_ASCII_BYTE (c);
3557 else
bf16eb23
KH
3558 {
3559 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3560 ENCODE_ISO_CHARACTER (charset, c);
3561 }
88993dfd 3562 }
16eafb5d
KH
3563 else if (CHAR_BYTE8_P (c))
3564 {
3565 c = CHAR_TO_BYTE8 (c);
3566 EMIT_ONE_BYTE (c);
3567 }
b73bfc1c 3568 else
df7492f9
KH
3569 {
3570 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3571
df7492f9
KH
3572 if (!charset)
3573 {
41cbe562
KH
3574 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3575 {
3576 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3577 charset = CHARSET_FROM_ID (charset_ascii);
3578 }
3579 else
3580 {
3581 c = coding->default_char;
3582 charset = char_charset (c, charset_list, NULL);
3583 }
df7492f9
KH
3584 }
3585 ENCODE_ISO_CHARACTER (charset, c);
3586 }
84fbb8a0 3587 }
b73bfc1c 3588
df7492f9
KH
3589 if (coding->mode & CODING_MODE_LAST_BLOCK
3590 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3591 {
3592 ASSURE_DESTINATION (safe_room);
3593 ENCODE_RESET_PLANE_AND_REGISTER ();
3594 }
3595 coding->result = CODING_RESULT_SUCCESS;
3596 CODING_ISO_BOL (coding) = bol_designation;
3597 coding->produced_char += produced_chars;
3598 coding->produced = dst - coding->destination;
3599 return 0;
4ed46869
KH
3600}
3601
3602\f
df7492f9 3603/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3604
df7492f9 3605/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3606 quite widely. So, for the moment, Emacs supports them in the bare
3607 C code. But, in the future, they may be supported only by CCL. */
3608
3609/* SJIS is a coding system encoding three character sets: ASCII, right
3610 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3611 as is. A character of charset katakana-jisx0201 is encoded by
3612 "position-code + 0x80". A character of charset japanese-jisx0208
3613 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3614 so that it fit in the range below.
4ed46869
KH
3615
3616 --- CODE RANGE of SJIS ---
3617 (character set) (range)
3618 ASCII 0x00 .. 0x7F
df7492f9 3619 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3620 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3621 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3622 -------------------------------
3623
3624*/
3625
3626/* BIG5 is a coding system encoding two character sets: ASCII and
3627 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3628 character set and is encoded in two-byte.
4ed46869
KH
3629
3630 --- CODE RANGE of BIG5 ---
3631 (character set) (range)
3632 ASCII 0x00 .. 0x7F
3633 Big5 (1st byte) 0xA1 .. 0xFE
3634 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3635 --------------------------
3636
df7492f9 3637 */
4ed46869
KH
3638
3639/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3640 Check if a text is encoded in SJIS. If it is, return
df7492f9 3641 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3642
0a28aafb 3643static int
df7492f9
KH
3644detect_coding_sjis (coding, mask)
3645 struct coding_system *coding;
3646 int *mask;
4ed46869 3647{
df7492f9
KH
3648 unsigned char *src = coding->source, *src_base = src;
3649 unsigned char *src_end = coding->source + coding->src_bytes;
3650 int multibytep = coding->src_multibyte;
3651 int consumed_chars = 0;
3652 int found = 0;
b73bfc1c 3653 int c;
df7492f9
KH
3654
3655 /* A coding system of this category is always ASCII compatible. */
3656 src += coding->head_ascii;
4ed46869 3657
b73bfc1c 3658 while (1)
4ed46869 3659 {
df7492f9 3660 ONE_MORE_BYTE (c);
682169fe
KH
3661 if (c < 0x80)
3662 continue;
df7492f9 3663 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3664 {
df7492f9 3665 ONE_MORE_BYTE (c);
682169fe 3666 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3667 break;
3668 found = 1;
4ed46869 3669 }
df7492f9
KH
3670 else if (c >= 0xA0 && c < 0xE0)
3671 found = 1;
3672 else
3673 break;
4ed46869 3674 }
df7492f9
KH
3675 *mask &= ~CATEGORY_MASK_SJIS;
3676 return 0;
3677
3678 no_more_source:
3679 if (!found)
3680 return 0;
3681 *mask &= CATEGORY_MASK_SJIS;
3682 return 1;
4ed46869
KH
3683}
3684
3685/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3686 Check if a text is encoded in BIG5. If it is, return
df7492f9 3687 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3688
0a28aafb 3689static int
df7492f9
KH
3690detect_coding_big5 (coding, mask)
3691 struct coding_system *coding;
3692 int *mask;
4ed46869 3693{
df7492f9
KH
3694 unsigned char *src = coding->source, *src_base = src;
3695 unsigned char *src_end = coding->source + coding->src_bytes;
3696 int multibytep = coding->src_multibyte;
3697 int consumed_chars = 0;
3698 int found = 0;
b73bfc1c 3699 int c;
fa42c37f 3700
df7492f9
KH
3701 /* A coding system of this category is always ASCII compatible. */
3702 src += coding->head_ascii;
fa42c37f 3703
b73bfc1c 3704 while (1)
fa42c37f 3705 {
df7492f9
KH
3706 ONE_MORE_BYTE (c);
3707 if (c < 0x80)
fa42c37f 3708 continue;
df7492f9 3709 if (c >= 0xA1)
fa42c37f 3710 {
df7492f9
KH
3711 ONE_MORE_BYTE (c);
3712 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3713 return 0;
df7492f9 3714 found = 1;
fa42c37f 3715 }
df7492f9
KH
3716 else
3717 break;
fa42c37f 3718 }
df7492f9 3719 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3720 return 0;
df7492f9
KH
3721
3722 no_more_source:
3723 if (!found)
3724 return 0;
3725 *mask &= CATEGORY_MASK_BIG5;
3726 return 1;
fa42c37f
KH
3727}
3728
4ed46869
KH
3729/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3730 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3731
b73bfc1c 3732static void
df7492f9 3733decode_coding_sjis (coding)
4ed46869 3734 struct coding_system *coding;
4ed46869 3735{
df7492f9
KH
3736 unsigned char *src = coding->source + coding->consumed;
3737 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3738 unsigned char *src_base;
df7492f9
KH
3739 int *charbuf = coding->charbuf;
3740 int *charbuf_end = charbuf + coding->charbuf_size;
3741 int consumed_chars = 0, consumed_chars_base;
3742 int multibytep = coding->src_multibyte;
3743 struct charset *charset_roman, *charset_kanji, *charset_kana;
3744 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3745
df7492f9
KH
3746 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3747
3748 val = charset_list;
3749 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3750 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3751 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3752
b73bfc1c 3753 while (1)
4ed46869 3754 {
df7492f9 3755 int c, c1;
b73bfc1c
KH
3756
3757 src_base = src;
df7492f9
KH
3758 consumed_chars_base = consumed_chars;
3759
3760 if (charbuf >= charbuf_end)
3761 break;
3762
3763 ONE_MORE_BYTE (c);
b73bfc1c 3764
df7492f9 3765 if (c == '\r')
4ed46869 3766 {
df7492f9 3767 if (EQ (eol_type, Qdos))
4ed46869 3768 {
df7492f9
KH
3769 if (src == src_end)
3770 goto no_more_source;
3771 if (*src == '\n')
3772 ONE_MORE_BYTE (c);
4ed46869 3773 }
df7492f9
KH
3774 else if (EQ (eol_type, Qmac))
3775 c = '\n';
4ed46869 3776 }
54f78171 3777 else
df7492f9
KH
3778 {
3779 struct charset *charset;
3780
3781 if (c < 0x80)
3782 charset = charset_roman;
3783 else
4ed46869 3784 {
df7492f9
KH
3785 if (c >= 0xF0)
3786 goto invalid_code;
3787 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3788 {
54f78171 3789 /* SJIS -> JISX0208 */
df7492f9
KH
3790 ONE_MORE_BYTE (c1);
3791 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3792 goto invalid_code;
3793 c = (c << 8) | c1;
3794 SJIS_TO_JIS (c);
3795 charset = charset_kanji;
5e34de15 3796 }
fb88bf2d 3797 else
b73bfc1c 3798 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3799 charset = charset_kana;
3800 }
3801 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3802 }
3803 *charbuf++ = c;
3804 continue;
3805
3806 invalid_code:
3807 src = src_base;
3808 consumed_chars = consumed_chars_base;
3809 ONE_MORE_BYTE (c);
3810 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3811 coding->errors++;
3812 }
3813
3814 no_more_source:
3815 coding->consumed_char += consumed_chars_base;
3816 coding->consumed = src_base - coding->source;
3817 coding->charbuf_used = charbuf - coding->charbuf;
3818}
3819
3820static void
3821decode_coding_big5 (coding)
3822 struct coding_system *coding;
3823{
3824 unsigned char *src = coding->source + coding->consumed;
3825 unsigned char *src_end = coding->source + coding->src_bytes;
3826 unsigned char *src_base;
3827 int *charbuf = coding->charbuf;
3828 int *charbuf_end = charbuf + coding->charbuf_size;
3829 int consumed_chars = 0, consumed_chars_base;
3830 int multibytep = coding->src_multibyte;
3831 struct charset *charset_roman, *charset_big5;
3832 Lisp_Object attrs, eol_type, charset_list, val;
3833
3834 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3835 val = charset_list;
3836 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3837 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3838
3839 while (1)
3840 {
3841 int c, c1;
3842
3843 src_base = src;
3844 consumed_chars_base = consumed_chars;
3845
3846 if (charbuf >= charbuf_end)
3847 break;
3848
3849 ONE_MORE_BYTE (c);
3850
3851 if (c == '\r')
3852 {
3853 if (EQ (eol_type, Qdos))
3854 {
3855 if (src == src_end)
3856 goto no_more_source;
3857 if (*src == '\n')
3858 ONE_MORE_BYTE (c);
4ed46869 3859 }
df7492f9
KH
3860 else if (EQ (eol_type, Qmac))
3861 c = '\n';
3862 }
3863 else
3864 {
3865 struct charset *charset;
3866 if (c < 0x80)
3867 charset = charset_roman;
fb88bf2d 3868 else
fb88bf2d 3869 {
54f78171 3870 /* BIG5 -> Big5 */
df7492f9
KH
3871 if (c < 0xA1 || c > 0xFE)
3872 goto invalid_code;
3873 ONE_MORE_BYTE (c1);
3874 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3875 goto invalid_code;
3876 c = c << 8 | c1;
3877 charset = charset_big5;
4ed46869 3878 }
df7492f9 3879 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3880 }
4ed46869 3881
df7492f9 3882 *charbuf++ = c;
fb88bf2d
KH
3883 continue;
3884
df7492f9 3885 invalid_code:
4ed46869 3886 src = src_base;
df7492f9
KH
3887 consumed_chars = consumed_chars_base;
3888 ONE_MORE_BYTE (c);
3889 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3890 coding->errors++;
fb88bf2d 3891 }
d46c5b12 3892
df7492f9
KH
3893 no_more_source:
3894 coding->consumed_char += consumed_chars_base;
3895 coding->consumed = src_base - coding->source;
3896 coding->charbuf_used = charbuf - coding->charbuf;
3897}
3898
3899/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3900 This function can encode charsets `ascii', `katakana-jisx0201',
3901 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3902 are sure that all these charsets are registered as official charset
3903 (i.e. do not have extended leading-codes). Characters of other
3904 charsets are produced without any encoding. If SJIS_P is 1, encode
3905 SJIS text, else encode BIG5 text. */
3906
3907static int
3908encode_coding_sjis (coding)
3909 struct coding_system *coding;
3910{
3911 int multibytep = coding->dst_multibyte;
3912 int *charbuf = coding->charbuf;
3913 int *charbuf_end = charbuf + coding->charbuf_used;
3914 unsigned char *dst = coding->destination + coding->produced;
3915 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3916 int safe_room = 4;
3917 int produced_chars = 0;
3918 Lisp_Object attrs, eol_type, charset_list, val;
3919 int ascii_compatible;
3920 struct charset *charset_roman, *charset_kanji, *charset_kana;
3921 int c;
3922
3923 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3924 val = charset_list;
3925 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3926 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3927 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3928
3929 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3930
3931 while (charbuf < charbuf_end)
3932 {
3933 ASSURE_DESTINATION (safe_room);
3934 c = *charbuf++;
3935 /* Now encode the character C. */
3936 if (ASCII_CHAR_P (c) && ascii_compatible)
3937 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
3938 else if (CHAR_BYTE8_P (c))
3939 {
3940 c = CHAR_TO_BYTE8 (c);
3941 EMIT_ONE_BYTE (c);
3942 }
df7492f9
KH
3943 else
3944 {
3945 unsigned code;
3946 struct charset *charset = char_charset (c, charset_list, &code);
3947
3948 if (!charset)
3949 {
41cbe562
KH
3950 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3951 {
3952 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3953 charset = CHARSET_FROM_ID (charset_ascii);
3954 }
3955 else
3956 {
3957 c = coding->default_char;
3958 charset = char_charset (c, charset_list, &code);
3959 }
df7492f9
KH
3960 }
3961 if (code == CHARSET_INVALID_CODE (charset))
3962 abort ();
3963 if (charset == charset_kanji)
3964 {
3965 int c1, c2;
3966 JIS_TO_SJIS (code);
3967 c1 = code >> 8, c2 = code & 0xFF;
3968 EMIT_TWO_BYTES (c1, c2);
3969 }
3970 else if (charset == charset_kana)
3971 EMIT_ONE_BYTE (code | 0x80);
3972 else
3973 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3974 }
3975 }
3976 coding->result = CODING_RESULT_SUCCESS;
3977 coding->produced_char += produced_chars;
3978 coding->produced = dst - coding->destination;
3979 return 0;
3980}
3981
3982static int
3983encode_coding_big5 (coding)
3984 struct coding_system *coding;
3985{
3986 int multibytep = coding->dst_multibyte;
3987 int *charbuf = coding->charbuf;
3988 int *charbuf_end = charbuf + coding->charbuf_used;
3989 unsigned char *dst = coding->destination + coding->produced;
3990 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3991 int safe_room = 4;
3992 int produced_chars = 0;
3993 Lisp_Object attrs, eol_type, charset_list, val;
3994 int ascii_compatible;
3995 struct charset *charset_roman, *charset_big5;
3996 int c;
3997
3998 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3999 val = charset_list;
4000 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4001 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4002 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4003
4004 while (charbuf < charbuf_end)
4005 {
4006 ASSURE_DESTINATION (safe_room);
4007 c = *charbuf++;
4008 /* Now encode the character C. */
4009 if (ASCII_CHAR_P (c) && ascii_compatible)
4010 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4011 else if (CHAR_BYTE8_P (c))
4012 {
4013 c = CHAR_TO_BYTE8 (c);
4014 EMIT_ONE_BYTE (c);
4015 }
df7492f9
KH
4016 else
4017 {
4018 unsigned code;
4019 struct charset *charset = char_charset (c, charset_list, &code);
4020
4021 if (! charset)
4022 {
41cbe562
KH
4023 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4024 {
4025 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4026 charset = CHARSET_FROM_ID (charset_ascii);
4027 }
4028 else
4029 {
4030 c = coding->default_char;
4031 charset = char_charset (c, charset_list, &code);
4032 }
df7492f9
KH
4033 }
4034 if (code == CHARSET_INVALID_CODE (charset))
4035 abort ();
4036 if (charset == charset_big5)
4037 {
4038 int c1, c2;
4039
4040 c1 = code >> 8, c2 = code & 0xFF;
4041 EMIT_TWO_BYTES (c1, c2);
4042 }
4043 else
4044 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4045 }
4046 }
4047 coding->result = CODING_RESULT_SUCCESS;
4048 coding->produced_char += produced_chars;
4049 coding->produced = dst - coding->destination;
4050 return 0;
4051}
4052
4053\f
4054/*** 10. CCL handlers ***/
4055
4056/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4057 Check if a text is encoded in a coding system of which
4058 encoder/decoder are written in CCL program. If it is, return
4059 CATEGORY_MASK_CCL, else return 0. */
4060
4061static int
4062detect_coding_ccl (coding, mask)
4063 struct coding_system *coding;
4064 int *mask;
4065{
4066 unsigned char *src = coding->source, *src_base = src;
4067 unsigned char *src_end = coding->source + coding->src_bytes;
4068 int multibytep = coding->src_multibyte;
4069 int consumed_chars = 0;
4070 int found = 0;
4071 unsigned char *valids = CODING_CCL_VALIDS (coding);
4072 int head_ascii = coding->head_ascii;
4073 Lisp_Object attrs;
4074
4075 coding = &coding_categories[coding_category_ccl];
4076 attrs = CODING_ID_ATTRS (coding->id);
4077 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4078 src += head_ascii;
4079
4080 while (1)
4081 {
4082 int c;
4083 ONE_MORE_BYTE (c);
4084 if (! valids[c])
4085 break;
4086 if (!found && valids[c] > 1)
4087 found = 1;
4088 }
4089 *mask &= ~CATEGORY_MASK_CCL;
4090 return 0;
4091
4092 no_more_source:
4093 if (!found)
4094 return 0;
4095 *mask &= CATEGORY_MASK_CCL;
4096 return 1;
4097}
4098
4099static void
4100decode_coding_ccl (coding)
4101 struct coding_system *coding;
4102{
4103 unsigned char *src = coding->source + coding->consumed;
4104 unsigned char *src_end = coding->source + coding->src_bytes;
4105 int *charbuf = coding->charbuf;
4106 int *charbuf_end = charbuf + coding->charbuf_size;
4107 int consumed_chars = 0;
4108 int multibytep = coding->src_multibyte;
4109 struct ccl_program ccl;
4110 int source_charbuf[1024];
4111 int source_byteidx[1024];
4112
4113 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4114
4115 while (src < src_end)
4116 {
4117 unsigned char *p = src;
4118 int *source, *source_end;
4119 int i = 0;
4120
4121 if (multibytep)
4122 while (i < 1024 && p < src_end)
4123 {
4124 source_byteidx[i] = p - src;
4125 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4126 }
4127 else
4128 while (i < 1024 && p < src_end)
4129 source_charbuf[i++] = *p++;
4130
4131 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4132 ccl.last_block = 1;
4133
4134 source = source_charbuf;
4135 source_end = source + i;
4136 while (source < source_end)
4137 {
4138 ccl_driver (&ccl, source, charbuf,
4139 source_end - source, charbuf_end - charbuf);
4140 source += ccl.consumed;
4141 charbuf += ccl.produced;
4142 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4143 break;
4144 }
4145 if (source < source_end)
4146 src += source_byteidx[source - source_charbuf];
4147 else
4148 src = p;
4149 consumed_chars += source - source_charbuf;
4150
4151 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4152 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4153 break;
4154 }
4155
4156 switch (ccl.status)
4157 {
4158 case CCL_STAT_SUSPEND_BY_SRC:
4159 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4160 break;
4161 case CCL_STAT_SUSPEND_BY_DST:
4162 break;
4163 case CCL_STAT_QUIT:
4164 case CCL_STAT_INVALID_CMD:
4165 coding->result = CODING_RESULT_INTERRUPT;
4166 break;
4167 default:
4168 coding->result = CODING_RESULT_SUCCESS;
4169 break;
4170 }
4171 coding->consumed_char += consumed_chars;
4172 coding->consumed = src - coding->source;
4173 coding->charbuf_used = charbuf - coding->charbuf;
4174}
4175
4176static int
4177encode_coding_ccl (coding)
4178 struct coding_system *coding;
4179{
4180 struct ccl_program ccl;
4181 int multibytep = coding->dst_multibyte;
4182 int *charbuf = coding->charbuf;
4183 int *charbuf_end = charbuf + coding->charbuf_used;
4184 unsigned char *dst = coding->destination + coding->produced;
4185 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4186 unsigned char *adjusted_dst_end = dst_end - 1;
4187 int destination_charbuf[1024];
4188 int i, produced_chars = 0;
4189
4190 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4191
4192 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4193 ccl.dst_multibyte = coding->dst_multibyte;
4194
4195 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4196 {
4197 int dst_bytes = dst_end - dst;
4198 if (dst_bytes > 1024)
4199 dst_bytes = 1024;
4200
4201 ccl_driver (&ccl, charbuf, destination_charbuf,
4202 charbuf_end - charbuf, dst_bytes);
4203 charbuf += ccl.consumed;
4204 if (multibytep)
4205 for (i = 0; i < ccl.produced; i++)
4206 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4207 else
4208 {
4209 for (i = 0; i < ccl.produced; i++)
4210 *dst++ = destination_charbuf[i] & 0xFF;
4211 produced_chars += ccl.produced;
4212 }
4213 }
4214
4215 switch (ccl.status)
4216 {
4217 case CCL_STAT_SUSPEND_BY_SRC:
4218 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4219 break;
4220 case CCL_STAT_SUSPEND_BY_DST:
4221 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4222 break;
4223 case CCL_STAT_QUIT:
4224 case CCL_STAT_INVALID_CMD:
4225 coding->result = CODING_RESULT_INTERRUPT;
4226 break;
4227 default:
4228 coding->result = CODING_RESULT_SUCCESS;
4229 break;
4230 }
4231
4232 coding->produced_char += produced_chars;
4233 coding->produced = dst - coding->destination;
4234 return 0;
4ed46869
KH
4235}
4236
df7492f9
KH
4237
4238\f
4239/*** 10, 11. no-conversion handlers ***/
4240
4241/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4242
b73bfc1c 4243static void
df7492f9 4244decode_coding_raw_text (coding)
4ed46869 4245 struct coding_system *coding;
4ed46869 4246{
df7492f9 4247 coding->chars_at_source = 1;
2c78b7e1
KH
4248 coding->consumed_char = 0;
4249 coding->consumed = 0;
df7492f9
KH
4250 coding->result = CODING_RESULT_SUCCESS;
4251}
4ed46869 4252
df7492f9
KH
4253static int
4254encode_coding_raw_text (coding)
4255 struct coding_system *coding;
4256{
4257 int multibytep = coding->dst_multibyte;
4258 int *charbuf = coding->charbuf;
4259 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4260 unsigned char *dst = coding->destination + coding->produced;
4261 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4262 int produced_chars = 0;
4263 int c;
a5d301df 4264
df7492f9 4265 if (multibytep)
b73bfc1c 4266 {
df7492f9 4267 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4268
df7492f9
KH
4269 if (coding->src_multibyte)
4270 while (charbuf < charbuf_end)
4271 {
4272 ASSURE_DESTINATION (safe_room);
4273 c = *charbuf++;
4274 if (ASCII_CHAR_P (c))
4275 EMIT_ONE_ASCII_BYTE (c);
4276 else if (CHAR_BYTE8_P (c))
4277 {
4278 c = CHAR_TO_BYTE8 (c);
4279 EMIT_ONE_BYTE (c);
4280 }
4281 else
4282 {
4283 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4284
df7492f9
KH
4285 CHAR_STRING_ADVANCE (c, p1);
4286 while (p0 < p1)
4287 EMIT_ONE_BYTE (*p0);
4288 }
4289 }
b73bfc1c 4290 else
df7492f9
KH
4291 while (charbuf < charbuf_end)
4292 {
4293 ASSURE_DESTINATION (safe_room);
4294 c = *charbuf++;
4295 EMIT_ONE_BYTE (c);
4296 }
4297 }
4298 else
4299 {
4300 if (coding->src_multibyte)
b73bfc1c 4301 {
df7492f9
KH
4302 int safe_room = MAX_MULTIBYTE_LENGTH;
4303
4304 while (charbuf < charbuf_end)
b73bfc1c 4305 {
df7492f9
KH
4306 ASSURE_DESTINATION (safe_room);
4307 c = *charbuf++;
4308 if (ASCII_CHAR_P (c))
4309 *dst++ = c;
4310 else if (CHAR_BYTE8_P (c))
4311 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4312 else
df7492f9
KH
4313 CHAR_STRING_ADVANCE (c, dst);
4314 produced_chars++;
b73bfc1c 4315 }
4ed46869 4316 }
df7492f9
KH
4317 else
4318 {
4319 ASSURE_DESTINATION (charbuf_end - charbuf);
4320 while (charbuf < charbuf_end && dst < dst_end)
4321 *dst++ = *charbuf++;
4322 produced_chars = dst - (coding->destination + coding->dst_bytes);
4323 }
4ed46869 4324 }
df7492f9
KH
4325 coding->result = CODING_RESULT_SUCCESS;
4326 coding->produced_char += produced_chars;
4327 coding->produced = dst - coding->destination;
4328 return 0;
4ed46869
KH
4329}
4330
0a28aafb 4331static int
df7492f9
KH
4332detect_coding_charset (coding, mask)
4333 struct coding_system *coding;
4334 int *mask;
1397dc18 4335{
df7492f9
KH
4336 unsigned char *src = coding->source, *src_base = src;
4337 unsigned char *src_end = coding->source + coding->src_bytes;
4338 int multibytep = coding->src_multibyte;
4339 int consumed_chars = 0;
4340 Lisp_Object attrs, valids;
1397dc18 4341
df7492f9
KH
4342 coding = &coding_categories[coding_category_charset];
4343 attrs = CODING_ID_ATTRS (coding->id);
4344 valids = AREF (attrs, coding_attr_charset_valids);
4345
4346 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4347 src += coding->head_ascii;
1397dc18 4348
b73bfc1c 4349 while (1)
1397dc18 4350 {
df7492f9 4351 int c;
1397dc18 4352
df7492f9
KH
4353 ONE_MORE_BYTE (c);
4354 if (NILP (AREF (valids, c)))
4355 break;
4356 }
4357 *mask &= ~CATEGORY_MASK_CHARSET;
4358 return 0;
4ed46869 4359
df7492f9
KH
4360 no_more_source:
4361 *mask &= CATEGORY_MASK_CHARSET;
4362 return 1;
4363}
4ed46869 4364
b73bfc1c 4365static void
df7492f9 4366decode_coding_charset (coding)
4ed46869 4367 struct coding_system *coding;
4ed46869 4368{
df7492f9
KH
4369 unsigned char *src = coding->source + coding->consumed;
4370 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4371 unsigned char *src_base;
df7492f9
KH
4372 int *charbuf = coding->charbuf;
4373 int *charbuf_end = charbuf + coding->charbuf_size;
4374 int consumed_chars = 0, consumed_chars_base;
4375 int multibytep = coding->src_multibyte;
4eb6d3f1 4376 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4377
4378 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4379 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4380
df7492f9 4381 while (1)
4ed46869 4382 {
4eb6d3f1 4383 int c;
df7492f9
KH
4384
4385 src_base = src;
4386 consumed_chars_base = consumed_chars;
b73bfc1c 4387
df7492f9
KH
4388 if (charbuf >= charbuf_end)
4389 break;
4390
4eb6d3f1 4391 ONE_MORE_BYTE (c);
df7492f9 4392 if (c == '\r')
d46c5b12 4393 {
c7c66a95
KH
4394 /* Here we assume that no charset maps '\r' to something
4395 else. */
df7492f9 4396 if (EQ (eol_type, Qdos))
b73bfc1c 4397 {
4eb6d3f1
KH
4398 if (src < src_end
4399 && *src == '\n')
df7492f9 4400 ONE_MORE_BYTE (c);
b73bfc1c 4401 }
df7492f9 4402 else if (EQ (eol_type, Qmac))
b73bfc1c 4403 c = '\n';
d46c5b12 4404 }
df7492f9 4405 else
d46c5b12 4406 {
4eb6d3f1
KH
4407 Lisp_Object val;
4408 struct charset *charset;
c7c66a95 4409 int dim;
acb2a965
KH
4410 int len = 1;
4411 unsigned code = c;
4eb6d3f1
KH
4412
4413 val = AREF (valids, c);
4414 if (NILP (val))
4415 goto invalid_code;
c7c66a95 4416 if (INTEGERP (val))
4eb6d3f1 4417 {
c7c66a95
KH
4418 charset = CHARSET_FROM_ID (XFASTINT (val));
4419 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4420 while (len < dim)
4eb6d3f1 4421 {
acb2a965
KH
4422 ONE_MORE_BYTE (c);
4423 code = (code << 8) | c;
f9d71dcd 4424 len++;
4eb6d3f1 4425 }
c7c66a95
KH
4426 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4427 charset, code, c);
4428 }
4429 else
4430 {
4431 /* VAL is a list of charset IDs. It is assured that the
4432 list is sorted by charset dimensions (smaller one
4433 comes first). */
c7c66a95
KH
4434 while (CONSP (val))
4435 {
4436 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4437 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4438 while (len < dim)
c7c66a95 4439 {
acb2a965
KH
4440 ONE_MORE_BYTE (c);
4441 code = (code << 8) | c;
f9d71dcd 4442 len++;
c7c66a95 4443 }
c7c66a95
KH
4444 CODING_DECODE_CHAR (coding, src, src_base,
4445 src_end, charset, code, c);
4446 if (c >= 0)
4447 break;
4448 val = XCDR (val);
4449 }
4eb6d3f1 4450 }
df7492f9
KH
4451 if (c < 0)
4452 goto invalid_code;
d46c5b12 4453 }
df7492f9
KH
4454 *charbuf++ = c;
4455 continue;
4456
4457 invalid_code:
4458 src = src_base;
4459 consumed_chars = consumed_chars_base;
4460 ONE_MORE_BYTE (c);
4461 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4462 coding->errors++;
4ed46869
KH
4463 }
4464
df7492f9
KH
4465 no_more_source:
4466 coding->consumed_char += consumed_chars_base;
4467 coding->consumed = src_base - coding->source;
4468 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4469}
4470
df7492f9
KH
4471static int
4472encode_coding_charset (coding)
4ed46869 4473 struct coding_system *coding;
4ed46869 4474{
df7492f9
KH
4475 int multibytep = coding->dst_multibyte;
4476 int *charbuf = coding->charbuf;
4477 int *charbuf_end = charbuf + coding->charbuf_used;
4478 unsigned char *dst = coding->destination + coding->produced;
4479 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4480 int safe_room = MAX_MULTIBYTE_LENGTH;
4481 int produced_chars = 0;
df7492f9
KH
4482 Lisp_Object attrs, eol_type, charset_list;
4483 int ascii_compatible;
b73bfc1c 4484 int c;
b73bfc1c 4485
df7492f9 4486 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4487 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4488
df7492f9 4489 while (charbuf < charbuf_end)
4ed46869 4490 {
4eb6d3f1 4491 struct charset *charset;
df7492f9
KH
4492 unsigned code;
4493
4494 ASSURE_DESTINATION (safe_room);
4495 c = *charbuf++;
4496 if (ascii_compatible && ASCII_CHAR_P (c))
4497 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4498 else if (CHAR_BYTE8_P (c))
4499 {
4500 c = CHAR_TO_BYTE8 (c);
4501 EMIT_ONE_BYTE (c);
4502 }
d46c5b12 4503 else
4eb6d3f1
KH
4504 {
4505 charset = char_charset (c, charset_list, &code);
4506 if (charset)
4507 {
4508 if (CHARSET_DIMENSION (charset) == 1)
4509 EMIT_ONE_BYTE (code);
4510 else if (CHARSET_DIMENSION (charset) == 2)
4511 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4512 else if (CHARSET_DIMENSION (charset) == 3)
4513 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4514 else
4515 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4516 (code >> 8) & 0xFF, code & 0xFF);
4517 }
4518 else
41cbe562
KH
4519 {
4520 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4521 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4522 else
4523 c = coding->default_char;
4524 EMIT_ONE_BYTE (c);
4525 }
4eb6d3f1 4526 }
4ed46869
KH
4527 }
4528
df7492f9
KH
4529 coding->result = CODING_RESULT_SUCCESS;
4530 coding->produced_char += produced_chars;
4531 coding->produced = dst - coding->destination;
4532 return 0;
4ed46869
KH
4533}
4534
4535\f
1397dc18 4536/*** 7. C library functions ***/
4ed46869 4537
df7492f9 4538/* In Emacs Lisp, coding system is represented by a Lisp symbol which
4ed46869 4539 has a property `coding-system'. The value of this property is a
df7492f9 4540 vector of length 5 (called as coding-vector). Among elements of
4ed46869
KH
4541 this vector, the first (element[0]) and the fifth (element[4])
4542 carry important information for decoding/encoding. Before
4543 decoding/encoding, this information should be set in fields of a
4544 structure of type `coding_system'.
4545
df7492f9 4546 A value of property `coding-system' can be a symbol of another
4ed46869
KH
4547 subsidiary coding-system. In that case, Emacs gets coding-vector
4548 from that symbol.
4549
4550 `element[0]' contains information to be set in `coding->type'. The
4551 value and its meaning is as follows:
4552
0ef69138
KH
4553 0 -- coding_type_emacs_mule
4554 1 -- coding_type_sjis
df7492f9 4555 2 -- coding_type_iso_2022
0ef69138
KH
4556 3 -- coding_type_big5
4557 4 -- coding_type_ccl encoder/decoder written in CCL
4558 nil -- coding_type_no_conversion
4559 t -- coding_type_undecided (automatic conversion on decoding,
4560 no-conversion on encoding)
4ed46869
KH
4561
4562 `element[4]' contains information to be set in `coding->flags' and
4563 `coding->spec'. The meaning varies by `coding->type'.
4564
df7492f9 4565 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4ed46869
KH
4566 of length 32 (of which the first 13 sub-elements are used now).
4567 Meanings of these sub-elements are:
4568
df7492f9
KH
4569 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4570 If the value is an integer of valid charset, the charset is
4571 assumed to be designated to graphic register N initially.
4ed46869 4572
df7492f9
KH
4573 If the value is minus, it is a minus value of charset which
4574 reserves graphic register N, which means that the charset is
4575 not designated initially but should be designated to graphic
4576 register N just before encoding a character in that charset.
1397dc18 4577
df7492f9
KH
4578 If the value is nil, graphic register N is never used on
4579 encoding.
4580
4581 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4582 Each value takes t or nil. See the section ISO2022 of
4583 `coding.h' for more information.
1397dc18 4584
df7492f9
KH
4585 If `coding->type' is `coding_type_big5', element[4] is t to denote
4586 BIG5-ETen or nil to denote BIG5-HKU.
4ed46869 4587
df7492f9 4588 If `coding->type' takes the other value, element[4] is ignored.
27901516 4589
df7492f9
KH
4590 Emacs Lisp's coding system also carries information about format of
4591 end-of-line in a value of property `eol-type'. If the value is
4592 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4593 it is not integer, it should be a vector of subsidiary coding
4594 systems of which property `eol-type' has one of above values.
4ed46869 4595
df7492f9 4596*/
4ed46869 4597
df7492f9
KH
4598/* Setup coding context CODING from information about CODING_SYSTEM.
4599 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4600 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4601
4602void
df7492f9
KH
4603setup_coding_system (coding_system, coding)
4604 Lisp_Object coding_system;
ec6d2bb8
KH
4605 struct coding_system *coding;
4606{
df7492f9
KH
4607 Lisp_Object attrs;
4608 Lisp_Object eol_type;
4609 Lisp_Object coding_type;
4610 Lisp_Object val;
ec6d2bb8 4611
df7492f9
KH
4612 if (NILP (coding_system))
4613 coding_system = Qno_conversion;
4614
4615 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4616
4617 attrs = CODING_ID_ATTRS (coding->id);
4618 eol_type = CODING_ID_EOL_TYPE (coding->id);
4619
4620 coding->mode = 0;
4621 coding->head_ascii = -1;
4622 coding->common_flags
4623 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4624
4625 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4626 coding->max_charset_id = XSTRING (val)->size - 1;
4627 coding->safe_charsets = (char *) XSTRING (val)->data;
4628 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4629
4630 coding_type = CODING_ATTR_TYPE (attrs);
4631 if (EQ (coding_type, Qundecided))
4632 {
4633 coding->detector = NULL;
4634 coding->decoder = decode_coding_raw_text;
4635 coding->encoder = encode_coding_raw_text;
4636 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4637 }
4638 else if (EQ (coding_type, Qiso_2022))
4639 {
4640 int i;
4641 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4642
4643 /* Invoke graphic register 0 to plane 0. */
4644 CODING_ISO_INVOCATION (coding, 0) = 0;
4645 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4646 CODING_ISO_INVOCATION (coding, 1)
4647 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4648 /* Setup the initial status of designation. */
4649 for (i = 0; i < 4; i++)
4650 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4651 /* Not single shifting initially. */
4652 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4653 /* Beginning of buffer should also be regarded as bol. */
4654 CODING_ISO_BOL (coding) = 1;
4655 coding->detector = detect_coding_iso_2022;
4656 coding->decoder = decode_coding_iso_2022;
4657 coding->encoder = encode_coding_iso_2022;
4658 if (flags & CODING_ISO_FLAG_SAFE)
4659 coding->mode |= CODING_MODE_SAFE_ENCODING;
4660 coding->common_flags
4661 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4662 | CODING_REQUIRE_FLUSHING_MASK);
4663 if (flags & CODING_ISO_FLAG_COMPOSITION)
4664 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4665 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4666 {
4667 setup_iso_safe_charsets (attrs);
4668 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4669 coding->max_charset_id = XSTRING (val)->size - 1;
4670 coding->safe_charsets = (char *) XSTRING (val)->data;
4671 }
4672 CODING_ISO_FLAGS (coding) = flags;
4673 }
4674 else if (EQ (coding_type, Qcharset))
4675 {
4676 coding->detector = detect_coding_charset;
4677 coding->decoder = decode_coding_charset;
4678 coding->encoder = encode_coding_charset;
4679 coding->common_flags
4680 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4681 }
4682 else if (EQ (coding_type, Qutf_8))
4683 {
4684 coding->detector = detect_coding_utf_8;
4685 coding->decoder = decode_coding_utf_8;
4686 coding->encoder = encode_coding_utf_8;
4687 coding->common_flags
4688 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4689 }
4690 else if (EQ (coding_type, Qutf_16))
4691 {
4692 val = AREF (attrs, coding_attr_utf_16_bom);
4693 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4694 : EQ (val, Qt) ? utf_16_with_bom
4695 : utf_16_without_bom);
4696 val = AREF (attrs, coding_attr_utf_16_endian);
4697 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4698 : utf_16_little_endian);
e19c3639 4699 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4700 coding->detector = detect_coding_utf_16;
4701 coding->decoder = decode_coding_utf_16;
4702 coding->encoder = encode_coding_utf_16;
4703 coding->common_flags
4704 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4705 }
4706 else if (EQ (coding_type, Qccl))
4707 {
4708 coding->detector = detect_coding_ccl;
4709 coding->decoder = decode_coding_ccl;
4710 coding->encoder = encode_coding_ccl;
4711 coding->common_flags
4712 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4713 | CODING_REQUIRE_FLUSHING_MASK);
4714 }
4715 else if (EQ (coding_type, Qemacs_mule))
4716 {
4717 coding->detector = detect_coding_emacs_mule;
4718 coding->decoder = decode_coding_emacs_mule;
4719 coding->encoder = encode_coding_emacs_mule;
4720 coding->common_flags
4721 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4722 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4723 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4724 {
4725 Lisp_Object tail, safe_charsets;
4726 int max_charset_id = 0;
4727
4728 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4729 tail = XCDR (tail))
4730 if (max_charset_id < XFASTINT (XCAR (tail)))
4731 max_charset_id = XFASTINT (XCAR (tail));
4732 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4733 make_number (255));
4734 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4735 tail = XCDR (tail))
4736 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4737 coding->max_charset_id = max_charset_id;
4738 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4739 }
4740 }
4741 else if (EQ (coding_type, Qshift_jis))
4742 {
4743 coding->detector = detect_coding_sjis;
4744 coding->decoder = decode_coding_sjis;
4745 coding->encoder = encode_coding_sjis;
4746 coding->common_flags
4747 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4748 }
4749 else if (EQ (coding_type, Qbig5))
4750 {
4751 coding->detector = detect_coding_big5;
4752 coding->decoder = decode_coding_big5;
4753 coding->encoder = encode_coding_big5;
4754 coding->common_flags
4755 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4756 }
4757 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4758 {
df7492f9
KH
4759 coding->detector = NULL;
4760 coding->decoder = decode_coding_raw_text;
4761 coding->encoder = encode_coding_raw_text;
4762 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4763 }
df7492f9
KH
4764
4765 return;
ec6d2bb8
KH
4766}
4767
df7492f9
KH
4768/* Return raw-text or one of its subsidiaries that has the same
4769 eol_type as CODING-SYSTEM. */
ec6d2bb8 4770
df7492f9
KH
4771Lisp_Object
4772raw_text_coding_system (coding_system)
4773 Lisp_Object coding_system;
ec6d2bb8 4774{
0be8721c 4775 Lisp_Object spec, attrs;
df7492f9
KH
4776 Lisp_Object eol_type, raw_text_eol_type;
4777
4778 spec = CODING_SYSTEM_SPEC (coding_system);
4779 attrs = AREF (spec, 0);
4780
4781 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4782 return coding_system;
ec6d2bb8 4783
df7492f9
KH
4784 eol_type = AREF (spec, 2);
4785 if (VECTORP (eol_type))
4786 return Qraw_text;
4787 spec = CODING_SYSTEM_SPEC (Qraw_text);
4788 raw_text_eol_type = AREF (spec, 2);
4789 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4790 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4791 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4792}
4793
54f78171 4794
df7492f9
KH
4795/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4796 does, return one of the subsidiary that has the same eol-spec as
4797 PARENT. Otherwise, return CODING_SYSTEM. */
4798
4799Lisp_Object
4800coding_inherit_eol_type (coding_system, parent)
b74e4686 4801 Lisp_Object coding_system, parent;
54f78171 4802{
df7492f9 4803 Lisp_Object spec, attrs, eol_type;
54f78171 4804
df7492f9
KH
4805 spec = CODING_SYSTEM_SPEC (coding_system);
4806 attrs = AREF (spec, 0);
4807 eol_type = AREF (spec, 2);
4808 if (VECTORP (eol_type))
4809 {
4810 Lisp_Object parent_spec;
df7492f9
KH
4811 Lisp_Object parent_eol_type;
4812
4813 parent_spec
4814 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4815 parent_eol_type = AREF (parent_spec, 2);
4816 if (EQ (parent_eol_type, Qunix))
4817 coding_system = AREF (eol_type, 0);
4818 else if (EQ (parent_eol_type, Qdos))
4819 coding_system = AREF (eol_type, 1);
4820 else if (EQ (parent_eol_type, Qmac))
4821 coding_system = AREF (eol_type, 2);
54f78171 4822 }
df7492f9 4823 return coding_system;
54f78171
KH
4824}
4825
4ed46869
KH
4826/* Emacs has a mechanism to automatically detect a coding system if it
4827 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4828 it's impossible to distinguish some coding systems accurately
4829 because they use the same range of codes. So, at first, coding
4830 systems are categorized into 7, those are:
4831
0ef69138 4832 o coding-category-emacs-mule
4ed46869
KH
4833
4834 The category for a coding system which has the same code range
4835 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4836 symbol) `emacs-mule' by default.
4ed46869
KH
4837
4838 o coding-category-sjis
4839
4840 The category for a coding system which has the same code range
4841 as SJIS. Assigned the coding-system (Lisp
7717c392 4842 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4843
4844 o coding-category-iso-7
4845
4846 The category for a coding system which has the same code range
7717c392 4847 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4848 shift and single shift functions. This can encode/decode all
4849 charsets. Assigned the coding-system (Lisp symbol)
4850 `iso-2022-7bit' by default.
4851
4852 o coding-category-iso-7-tight
4853
4854 Same as coding-category-iso-7 except that this can
4855 encode/decode only the specified charsets.
4ed46869
KH
4856
4857 o coding-category-iso-8-1
4858
4859 The category for a coding system which has the same code range
4860 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4861 for DIMENSION1 charset. This doesn't use any locking shift
4862 and single shift functions. Assigned the coding-system (Lisp
4863 symbol) `iso-latin-1' by default.
4ed46869
KH
4864
4865 o coding-category-iso-8-2
4866
4867 The category for a coding system which has the same code range
4868 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4869 for DIMENSION2 charset. This doesn't use any locking shift
4870 and single shift functions. Assigned the coding-system (Lisp
4871 symbol) `japanese-iso-8bit' by default.
4ed46869 4872
7717c392 4873 o coding-category-iso-7-else
4ed46869
KH
4874
4875 The category for a coding system which has the same code range
df7492f9 4876 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4877 single shift functions. Assigned the coding-system (Lisp
4878 symbol) `iso-2022-7bit-lock' by default.
4879
4880 o coding-category-iso-8-else
4881
4882 The category for a coding system which has the same code range
df7492f9 4883 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4884 single shift functions. Assigned the coding-system (Lisp
4885 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4886
4887 o coding-category-big5
4888
4889 The category for a coding system which has the same code range
4890 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4891 `cn-big5' by default.
4ed46869 4892
fa42c37f
KH
4893 o coding-category-utf-8
4894
4895 The category for a coding system which has the same code range
4896 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4897 symbol) `utf-8' by default.
4898
4899 o coding-category-utf-16-be
4900
4901 The category for a coding system in which a text has an
4902 Unicode signature (cf. Unicode Standard) in the order of BIG
4903 endian at the head. Assigned the coding-system (Lisp symbol)
4904 `utf-16-be' by default.
4905
4906 o coding-category-utf-16-le
4907
4908 The category for a coding system in which a text has an
4909 Unicode signature (cf. Unicode Standard) in the order of
4910 LITTLE endian at the head. Assigned the coding-system (Lisp
4911 symbol) `utf-16-le' by default.
4912
1397dc18
KH
4913 o coding-category-ccl
4914
4915 The category for a coding system of which encoder/decoder is
4916 written in CCL programs. The default value is nil, i.e., no
4917 coding system is assigned.
4918
4ed46869
KH
4919 o coding-category-binary
4920
4921 The category for a coding system not categorized in any of the
4922 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4923 `no-conversion' by default.
4ed46869
KH
4924
4925 Each of them is a Lisp symbol and the value is an actual
df7492f9 4926 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4927 What Emacs does actually is to detect a category of coding system.
4928 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4929 decide only one possible category, it selects a category of the
4ed46869
KH
4930 highest priority. Priorities of categories are also specified by a
4931 user in a Lisp variable `coding-category-list'.
4932
4933*/
4934
df7492f9
KH
4935#define EOL_SEEN_NONE 0
4936#define EOL_SEEN_LF 1
4937#define EOL_SEEN_CR 2
4938#define EOL_SEEN_CRLF 4
4ed46869 4939
df7492f9
KH
4940/* Detect how end-of-line of a text of length CODING->src_bytes
4941 pointed by CODING->source is encoded. Return one of
4942 EOL_SEEN_XXX. */
4ed46869 4943
bc4bc72a
RS
4944#define MAX_EOL_CHECK_COUNT 3
4945
d46c5b12 4946static int
df7492f9
KH
4947detect_eol (coding, source, src_bytes)
4948 struct coding_system *coding;
d46c5b12 4949 unsigned char *source;
df7492f9 4950 EMACS_INT src_bytes;
4ed46869 4951{
df7492f9 4952 Lisp_Object attrs, coding_type;
d46c5b12 4953 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4954 unsigned char c;
df7492f9
KH
4955 int total = 0;
4956 int eol_seen = EOL_SEEN_NONE;
4ed46869 4957
df7492f9
KH
4958 attrs = CODING_ID_ATTRS (coding->id);
4959 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4960
df7492f9 4961 if (EQ (coding_type, Qccl))
4ed46869 4962 {
df7492f9 4963 int msb, lsb;
fa42c37f 4964
df7492f9
KH
4965 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4966 lsb = 1 - msb;
fa42c37f 4967
df7492f9 4968 while (src + 1 < src_end)
fa42c37f 4969 {
df7492f9
KH
4970 c = src[lsb];
4971 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4972 {
df7492f9
KH
4973 int this_eol;
4974
4975 if (c == '\n')
4976 this_eol = EOL_SEEN_LF;
4977 else if (src + 3 >= src_end
4978 || src[msb + 2] != 0
4979 || src[lsb + 2] != '\n')
4980 this_eol = EOL_SEEN_CR;
fa42c37f 4981 else
df7492f9
KH
4982 this_eol = EOL_SEEN_CRLF;
4983
4984 if (eol_seen == EOL_SEEN_NONE)
4985 /* This is the first end-of-line. */
4986 eol_seen = this_eol;
4987 else if (eol_seen != this_eol)
fa42c37f 4988 {
df7492f9
KH
4989 /* The found type is different from what found before. */
4990 eol_seen = EOL_SEEN_LF;
4991 break;
fa42c37f 4992 }
df7492f9
KH
4993 if (++total == MAX_EOL_CHECK_COUNT)
4994 break;
fa42c37f 4995 }
df7492f9 4996 src += 2;
fa42c37f 4997 }
df7492f9 4998 }
d46c5b12 4999 else
27901516 5000 {
df7492f9 5001 while (src < src_end)
27901516 5002 {
df7492f9
KH
5003 c = *src++;
5004 if (c == '\n' || c == '\r')
5005 {
5006 int this_eol;
d46c5b12 5007
df7492f9
KH
5008 if (c == '\n')
5009 this_eol = EOL_SEEN_LF;
5010 else if (src >= src_end || *src != '\n')
5011 this_eol = EOL_SEEN_CR;
5012 else
5013 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 5014
df7492f9
KH
5015 if (eol_seen == EOL_SEEN_NONE)
5016 /* This is the first end-of-line. */
5017 eol_seen = this_eol;
5018 else if (eol_seen != this_eol)
5019 {
5020 /* The found type is different from what found before. */
5021 eol_seen = EOL_SEEN_LF;
5022 break;
5023 }
5024 if (++total == MAX_EOL_CHECK_COUNT)
5025 break;
5026 }
5027 }
73be902c 5028 }
df7492f9 5029 return eol_seen;
73be902c
KH
5030}
5031
df7492f9 5032
73be902c 5033static void
df7492f9
KH
5034adjust_coding_eol_type (coding, eol_seen)
5035 struct coding_system *coding;
5036 int eol_seen;
73be902c 5037{
0be8721c 5038 Lisp_Object eol_type;
df7492f9
KH
5039
5040 eol_type = CODING_ID_EOL_TYPE (coding->id);
5041 if (eol_seen & EOL_SEEN_LF)
5042 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5043 else if (eol_type & EOL_SEEN_CRLF)
5044 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5045 else if (eol_type & EOL_SEEN_CR)
5046 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
5047}
5048
df7492f9
KH
5049/* Detect how a text specified in CODING is encoded. If a coding
5050 system is detected, update fields of CODING by the detected coding
5051 system. */
5052
5053void
5054detect_coding (coding)
d46c5b12 5055 struct coding_system *coding;
d46c5b12 5056{
df7492f9
KH
5057 unsigned char *src, *src_end;
5058 Lisp_Object attrs, coding_type;
d46c5b12 5059
df7492f9
KH
5060 coding->consumed = coding->consumed_char = 0;
5061 coding->produced = coding->produced_char = 0;
5062 coding_set_source (coding);
1c3478b0 5063
df7492f9 5064 src_end = coding->source + coding->src_bytes;
1c3478b0 5065
df7492f9
KH
5066 /* If we have not yet decided the text encoding type, detect it
5067 now. */
5068 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5069 {
df7492f9
KH
5070 int mask = CATEGORY_MASK_ANY;
5071 int c, i;
5072
5073 for (src = coding->source; src < src_end; src++)
5074 {
5075 c = *src;
5076 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5077 || c == ISO_CODE_SI
5078 || c == ISO_CODE_SO)))
5079 break;
5080 }
5081 coding->head_ascii = src - (coding->source + coding->consumed);
5082
5083 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5084 {
df7492f9
KH
5085 int detected = 0;
5086
5087 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5088 {
df7492f9
KH
5089 enum coding_category category = coding_priorities[i];
5090 struct coding_system *this = coding_categories + category;
5091
5092 if (category >= coding_category_raw_text
5093 || detected & (1 << category))
5094 continue;
5095
5096 if (this->id < 0)
1c3478b0 5097 {
df7492f9
KH
5098 /* No coding system of this category is defined. */
5099 mask &= ~(1 << category);
5100 }
5101 else
5102 {
5103 detected |= detected_mask[category];
5104 if ((*(this->detector)) (coding, &mask))
5105 break;
1c3478b0
KH
5106 }
5107 }
df7492f9
KH
5108 if (! mask)
5109 setup_coding_system (Qraw_text, coding);
5110 else if (mask != CATEGORY_MASK_ANY)
5111 for (i = 0; i < coding_category_raw_text; i++)
5112 {
5113 enum coding_category category = coding_priorities[i];
5114 struct coding_system *this = coding_categories + category;
5115
5116 if (mask & (1 << category))
5117 {
5118 setup_coding_system (CODING_ID_NAME (this->id), coding);
5119 break;
5120 }
5121 }
1c3478b0 5122 }
b73bfc1c 5123 }
69f76525 5124
df7492f9
KH
5125 attrs = CODING_ID_ATTRS (coding->id);
5126 coding_type = CODING_ATTR_TYPE (attrs);
5127
5128 /* If we have not yet decided the EOL type, detect it now. But, the
5129 detection is impossible for a CCL based coding system, in which
5130 case, we detct the EOL type after decoding. */
5131 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5132 && ! EQ (coding_type, Qccl))
d46c5b12 5133 {
df7492f9
KH
5134 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5135
5136 if (eol_seen != EOL_SEEN_NONE)
5137 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5138 }
4ed46869
KH
5139}
5140
aaaf0b1e
KH
5141
5142static void
df7492f9 5143decode_eol (coding)
aaaf0b1e 5144 struct coding_system *coding;
aaaf0b1e 5145{
df7492f9 5146 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5147 {
df7492f9
KH
5148 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5149 unsigned char *pend = p + coding->produced;
5150 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5151
df7492f9 5152 for (; p < pend; p++)
aaaf0b1e 5153 {
df7492f9
KH
5154 if (*p == '\n')
5155 eol_seen |= EOL_SEEN_LF;
5156 else if (*p == '\r')
aaaf0b1e 5157 {
df7492f9 5158 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5159 {
df7492f9
KH
5160 eol_seen |= EOL_SEEN_CRLF;
5161 p++;
aaaf0b1e 5162 }
aaaf0b1e 5163 else
df7492f9 5164 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5165 }
aaaf0b1e 5166 }
df7492f9
KH
5167 if (eol_seen != EOL_SEEN_NONE)
5168 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5169 }
aaaf0b1e 5170
df7492f9
KH
5171 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5172 {
5173 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5174 unsigned char *pend = p + coding->produced;
5175
5176 for (; p < pend; p++)
5177 if (*p == '\r')
5178 *p = '\n';
5179 }
5180 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5181 {
5182 unsigned char *p, *pbeg, *pend;
5183 Lisp_Object undo_list;
5184
5185 move_gap_both (coding->dst_pos + coding->produced_char,
5186 coding->dst_pos_byte + coding->produced);
5187 undo_list = current_buffer->undo_list;
5188 current_buffer->undo_list = Qt;
5189 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, Qnil);
5190 current_buffer->undo_list = undo_list;
5191 pbeg = GPT_ADDR;
5192 pend = pbeg + coding->produced;
5193
5194 for (p = pend - 1; p >= pbeg; p--)
5195 if (*p == '\r')
5196 {
5197 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5198 pend--;
5199 }
5200 coding->produced_char -= coding->produced - (pend - pbeg);
5201 coding->produced = pend - pbeg;
5202 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5203 }
5204}
5205
df7492f9
KH
5206static void
5207translate_chars (coding, table)
4ed46869 5208 struct coding_system *coding;
df7492f9 5209 Lisp_Object table;
4ed46869 5210{
df7492f9
KH
5211 int *charbuf = coding->charbuf;
5212 int *charbuf_end = charbuf + coding->charbuf_used;
5213 int c;
5214
5215 if (coding->chars_at_source)
5216 return;
4ed46869 5217
df7492f9 5218 while (charbuf < charbuf_end)
8844fa83 5219 {
df7492f9
KH
5220 c = *charbuf;
5221 if (c < 0)
5222 charbuf += c;
5223 else
5224 *charbuf++ = translate_char (table, c);
8844fa83 5225 }
df7492f9 5226}
4ed46869 5227
df7492f9
KH
5228static int
5229produce_chars (coding)
5230 struct coding_system *coding;
5231{
5232 unsigned char *dst = coding->destination + coding->produced;
5233 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5234 int produced;
5235 int produced_chars = 0;
b73bfc1c 5236
df7492f9 5237 if (! coding->chars_at_source)
4ed46869 5238 {
df7492f9
KH
5239 /* Characters are in coding->charbuf. */
5240 int *buf = coding->charbuf;
5241 int *buf_end = buf + coding->charbuf_used;
5242 unsigned char *adjusted_dst_end;
4ed46869 5243
df7492f9
KH
5244 if (BUFFERP (coding->src_object)
5245 && EQ (coding->src_object, coding->dst_object))
5246 dst_end = coding->source + coding->consumed;
5247 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5248
df7492f9
KH
5249 while (buf < buf_end)
5250 {
5251 int c = *buf++;
5252
5253 if (dst >= adjusted_dst_end)
5254 {
5255 dst = alloc_destination (coding,
5256 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5257 dst);
5258 dst_end = coding->destination + coding->dst_bytes;
5259 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5260 }
5261 if (c >= 0)
5262 {
5263 if (coding->dst_multibyte
5264 || ! CHAR_BYTE8_P (c))
5265 CHAR_STRING_ADVANCE (c, dst);
5266 else
5267 *dst++ = CHAR_TO_BYTE8 (c);
5268 produced_chars++;
5269 }
5270 else
5271 /* This is an annotation data. */
5272 buf -= c + 1;
5273 }
5274 }
5275 else
5276 {
df7492f9
KH
5277 unsigned char *src = coding->source;
5278 unsigned char *src_end = src + coding->src_bytes;
5279 Lisp_Object eol_type;
b73bfc1c 5280
df7492f9 5281 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5282
df7492f9 5283 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5284 {
df7492f9
KH
5285 if (coding->src_multibyte)
5286 {
71c81426 5287 int multibytep = 1;
df7492f9 5288 int consumed_chars;
d46c5b12 5289
df7492f9
KH
5290 while (1)
5291 {
5292 unsigned char *src_base = src;
5293 int c;
b73bfc1c 5294
df7492f9
KH
5295 ONE_MORE_BYTE (c);
5296 if (c == '\r')
5297 {
5298 if (EQ (eol_type, Qdos))
5299 {
5300 if (src < src_end
5301 && *src == '\n')
5302 c = *src++;
5303 }
5304 else if (EQ (eol_type, Qmac))
5305 c = '\n';
5306 }
5307 if (dst == dst_end)
5308 {
2c78b7e1 5309 coding->consumed = src - coding->source;
b73bfc1c 5310
2c78b7e1
KH
5311 if (EQ (coding->src_object, coding->dst_object))
5312 dst_end = src;
5313 if (dst == dst_end)
5314 {
5315 dst = alloc_destination (coding, src_end - src + 1,
5316 dst);
5317 dst_end = coding->destination + coding->dst_bytes;
5318 coding_set_source (coding);
5319 src = coding->source + coding->consumed;
5320 src_end = coding->source + coding->src_bytes;
5321 }
df7492f9
KH
5322 }
5323 *dst++ = c;
5324 produced_chars++;
5325 }
5326 no_more_source:
5327 ;
5328 }
5329 else
5330 while (src < src_end)
5331 {
71c81426 5332 int multibytep = 1;
df7492f9 5333 int c = *src++;
b73bfc1c 5334
df7492f9
KH
5335 if (c == '\r')
5336 {
5337 if (EQ (eol_type, Qdos))
5338 {
5339 if (src < src_end
5340 && *src == '\n')
5341 c = *src++;
5342 }
5343 else if (EQ (eol_type, Qmac))
5344 c = '\n';
5345 }
5346 if (dst >= dst_end - 1)
5347 {
2c78b7e1 5348 coding->consumed = src - coding->source;
df7492f9 5349
2c78b7e1
KH
5350 if (EQ (coding->src_object, coding->dst_object))
5351 dst_end = src;
5352 if (dst >= dst_end - 1)
5353 {
5354 dst = alloc_destination (coding, src_end - src + 2,
5355 dst);
5356 dst_end = coding->destination + coding->dst_bytes;
5357 coding_set_source (coding);
5358 src = coding->source + coding->consumed;
5359 src_end = coding->source + coding->src_bytes;
5360 }
df7492f9
KH
5361 }
5362 EMIT_ONE_BYTE (c);
5363 }
d46c5b12 5364 }
df7492f9
KH
5365 else
5366 {
5367 if (!EQ (coding->src_object, coding->dst_object))
5368 {
5369 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5370
df7492f9
KH
5371 if (require > 0)
5372 {
5373 EMACS_INT offset = src - coding->source;
5374
5375 dst = alloc_destination (coding, require, dst);
5376 coding_set_source (coding);
5377 src = coding->source + offset;
5378 src_end = coding->source + coding->src_bytes;
5379 }
5380 }
5381 produced_chars = coding->src_chars;
5382 while (src < src_end)
5383 {
5384 int c = *src++;
5385
5386 if (c == '\r')
5387 {
5388 if (EQ (eol_type, Qdos))
5389 {
5390 if (src < src_end
5391 && *src == '\n')
5392 c = *src++;
5393 produced_chars--;
5394 }
5395 else if (EQ (eol_type, Qmac))
5396 c = '\n';
5397 }
5398 *dst++ = c;
5399 }
5400 }
2c78b7e1
KH
5401 coding->consumed = coding->src_bytes;
5402 coding->consumed_char = coding->src_chars;
b73bfc1c 5403 }
4ed46869 5404
df7492f9
KH
5405 produced = dst - (coding->destination + coding->produced);
5406 if (BUFFERP (coding->dst_object))
5407 insert_from_gap (produced_chars, produced);
5408 coding->produced += produced;
5409 coding->produced_char += produced_chars;
5410 return produced_chars;
b73bfc1c 5411}
52d41803 5412
df7492f9
KH
5413/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5414 or
5415 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5416 */
4ed46869 5417
df7492f9
KH
5418static INLINE void
5419produce_composition (coding, charbuf)
4ed46869 5420 struct coding_system *coding;
df7492f9 5421 int *charbuf;
4ed46869 5422{
df7492f9
KH
5423 Lisp_Object buffer;
5424 int len;
5425 EMACS_INT pos;
5426 enum composition_method method;
5427 int cmp_len;
5428 Lisp_Object components;
5429
5430 buffer = coding->dst_object;
5431 len = -charbuf[0];
5432 pos = coding->dst_pos + charbuf[1];
5433 method = (enum composition_method) (charbuf[3]);
5434 cmp_len = charbuf[4];
5435
5436 if (method == COMPOSITION_RELATIVE)
5437 components = Qnil;
5438 else
d46c5b12 5439 {
df7492f9
KH
5440 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5441 int i;
4ed46869 5442
df7492f9
KH
5443 len -= 5;
5444 charbuf += 5;
5445 for (i = 0; i < len; i++)
5446 args[i] = make_number (charbuf[i]);
5447 components = (method == COMPOSITION_WITH_ALTCHARS
5448 ? Fstring (len, args) : Fvector (len, args));
5449 }
5450 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5451}
b73bfc1c 5452
df7492f9
KH
5453static int *
5454save_composition_data (buf, buf_end, prop)
5455 int *buf, *buf_end;
5456 Lisp_Object prop;
5457{
5458 enum composition_method method = COMPOSITION_METHOD (prop);
5459 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5460
df7492f9
KH
5461 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5462 return NULL;
d46c5b12 5463
df7492f9
KH
5464 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5465 buf[2] = method;
5466 buf[3] = cmp_len;
b73bfc1c 5467
df7492f9
KH
5468 if (method == COMPOSITION_RELATIVE)
5469 buf[0] = 4;
5470 else
b73bfc1c 5471 {
df7492f9
KH
5472 Lisp_Object components;
5473 int len, i;
b73bfc1c 5474
df7492f9
KH
5475 components = COMPOSITION_COMPONENTS (prop);
5476 if (VECTORP (components))
d46c5b12 5477 {
df7492f9
KH
5478 len = XVECTOR (components)->size;
5479 for (i = 0; i < len; i++)
5480 buf[4 + i] = XINT (AREF (components, i));
5481 }
5482 else if (STRINGP (components))
5483 {
5484 int i_byte;
b73bfc1c 5485
df7492f9
KH
5486 len = XSTRING (components)->size;
5487 i = i_byte = 0;
5488 while (i < len)
5489 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5490 }
5491 else if (INTEGERP (components))
5492 {
5493 len = 1;
5494 buf[4] = XINT (components);
5495 }
5496 else if (CONSP (components))
5497 {
5498 for (len = 0; CONSP (components);
5499 len++, components = XCDR (components))
5500 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5501 }
df7492f9
KH
5502 else
5503 abort ();
5504 buf[0] = 4 + len;
4ed46869 5505 }
df7492f9 5506 return (buf + buf[0]);
4ed46869
KH
5507}
5508
df7492f9
KH
5509#define CHARBUF_SIZE 0x4000
5510
5511#define ALLOC_CONVERSION_WORK_AREA(coding) \
5512 do { \
5513 int size = CHARBUF_SIZE;; \
5514 \
5515 coding->charbuf = NULL; \
5516 while (size > 1024) \
5517 { \
5518 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5519 if (coding->charbuf) \
5520 break; \
5521 size >>= 1; \
5522 } \
5523 if (! coding->charbuf) \
5524 { \
5525 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5526 return coding->result; \
5527 } \
5528 coding->charbuf_size = size; \
5529 } while (0)
4ed46869 5530
d46c5b12
KH
5531
5532static void
df7492f9 5533produce_annotation (coding)
d46c5b12 5534 struct coding_system *coding;
d46c5b12 5535{
df7492f9
KH
5536 int *charbuf = coding->charbuf;
5537 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5538
df7492f9 5539 while (charbuf < charbuf_end)
d46c5b12 5540 {
df7492f9
KH
5541 if (*charbuf >= 0)
5542 charbuf++;
d46c5b12 5543 else
d46c5b12 5544 {
df7492f9
KH
5545 int len = -*charbuf;
5546 switch (charbuf[2])
5547 {
5548 case CODING_ANNOTATE_COMPOSITION_MASK:
5549 produce_composition (coding, charbuf);
5550 break;
5551 default:
5552 abort ();
5553 }
5554 charbuf += len;
d46c5b12 5555 }
df7492f9
KH
5556 }
5557}
d46c5b12 5558
df7492f9
KH
5559/* Decode the data at CODING->src_object into CODING->dst_object.
5560 CODING->src_object is a buffer, a string, or nil.
5561 CODING->dst_object is a buffer.
de79a6a5 5562
df7492f9
KH
5563 If CODING->src_object is a buffer, it must be the current buffer.
5564 In this case, if CODING->src_pos is positive, it is a position of
5565 the source text in the buffer, otherwise, the source text is in the
5566 gap area of the buffer, and CODING->src_pos specifies the offset of
5567 the text from GPT (which must be the same as PT). If this is the
5568 same buffer as CODING->dst_object, CODING->src_pos must be
5569 negative.
b73bfc1c 5570
df7492f9
KH
5571 If CODING->src_object is a string, CODING->src_pos in an index to
5572 that string.
d46c5b12 5573
df7492f9
KH
5574 If CODING->src_object is nil, CODING->source must already point to
5575 the non-relocatable memory area. In this case, CODING->src_pos is
5576 an offset from CODING->source.
d46c5b12 5577
df7492f9
KH
5578 The decoded data is inserted at the current point of the buffer
5579 CODING->dst_object.
5580*/
5581
5582static int
5583decode_coding (coding)
d46c5b12 5584 struct coding_system *coding;
d46c5b12 5585{
df7492f9 5586 Lisp_Object attrs;
d46c5b12 5587
df7492f9
KH
5588 if (BUFFERP (coding->src_object)
5589 && coding->src_pos > 0
5590 && coding->src_pos < GPT
5591 && coding->src_pos + coding->src_chars > GPT)
5592 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5593
df7492f9 5594 if (BUFFERP (coding->dst_object))
88993dfd 5595 {
df7492f9
KH
5596 if (current_buffer != XBUFFER (coding->dst_object))
5597 set_buffer_internal (XBUFFER (coding->dst_object));
5598 if (GPT != PT)
5599 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5600 }
5601
df7492f9
KH
5602 coding->consumed = coding->consumed_char = 0;
5603 coding->produced = coding->produced_char = 0;
5604 coding->chars_at_source = 0;
5605 coding->result = CODING_RESULT_SUCCESS;
5606 coding->errors = 0;
5607
5608 ALLOC_CONVERSION_WORK_AREA (coding);
5609
5610 attrs = CODING_ID_ATTRS (coding->id);
5611
5612 do
d46c5b12 5613 {
df7492f9
KH
5614 coding_set_source (coding);
5615 coding->annotated = 0;
5616 (*(coding->decoder)) (coding);
5617 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5618 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5619 coding_set_destination (coding);
5620 produce_chars (coding);
5621 if (coding->annotated)
5622 produce_annotation (coding);
d46c5b12 5623 }
df7492f9
KH
5624 while (coding->consumed < coding->src_bytes
5625 && ! coding->result);
d46c5b12 5626
df7492f9
KH
5627 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5628 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5629 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5630 decode_eol (coding);
d46c5b12 5631
df7492f9
KH
5632 coding->carryover_bytes = 0;
5633 if (coding->consumed < coding->src_bytes)
d46c5b12 5634 {
df7492f9
KH
5635 int nbytes = coding->src_bytes - coding->consumed;
5636 unsigned char *src;
5637
5638 coding_set_source (coding);
5639 coding_set_destination (coding);
5640 src = coding->source + coding->consumed;
5641
5642 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5643 {
df7492f9
KH
5644 /* Flush out unprocessed data as binary chars. We are sure
5645 that the number of data is less than the size of
5646 coding->charbuf. */
5647 int *charbuf = coding->charbuf;
5648
5649 while (nbytes-- > 0)
d46c5b12 5650 {
df7492f9
KH
5651 int c = *src++;
5652 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5653 }
df7492f9 5654 produce_chars (coding);
d46c5b12 5655 }
d46c5b12 5656 else
df7492f9
KH
5657 {
5658 /* Record unprocessed bytes in coding->carryover. We are
5659 sure that the number of data is less than the size of
5660 coding->carryover. */
5661 unsigned char *p = coding->carryover;
5662
5663 coding->carryover_bytes = nbytes;
5664 while (nbytes-- > 0)
5665 *p++ = *src++;
5666 }
5667 coding->consumed = coding->src_bytes;
5668 }
b73bfc1c 5669
df7492f9 5670 return coding->result;
d46c5b12
KH
5671}
5672
df7492f9
KH
5673static void
5674consume_chars (coding)
5675 struct coding_system *coding;
5676{
5677 int *buf = coding->charbuf;
5678 /* -1 is to compensate for CRLF. */
5679 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5680 unsigned char *src = coding->source + coding->consumed;
5681 int pos = coding->src_pos + coding->consumed_char;
5682 int end_pos = coding->src_pos + coding->src_chars;
5683 int multibytep = coding->src_multibyte;
5684 Lisp_Object eol_type;
5685 int c;
5686 int start, end, stop;
5687 Lisp_Object object, prop;
88993dfd 5688
df7492f9
KH
5689 eol_type = CODING_ID_EOL_TYPE (coding->id);
5690 if (VECTORP (eol_type))
5691 eol_type = Qunix;
88993dfd 5692
df7492f9 5693 object = coding->src_object;
b843d1ae 5694
df7492f9
KH
5695 /* Note: composition handling is not yet implemented. */
5696 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5697
df7492f9
KH
5698 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5699 && find_composition (pos, end_pos, &start, &end, &prop, object)
5700 && end <= end_pos
5701 && (start >= pos
5702 || (find_composition (end, end_pos, &start, &end, &prop, object)
5703 && end <= end_pos)))
5704 stop = start;
5705 else
5706 stop = end_pos;
ec6d2bb8 5707
df7492f9 5708 while (buf < buf_end)
ec6d2bb8 5709 {
df7492f9 5710 if (pos == stop)
ec6d2bb8 5711 {
df7492f9 5712 int *p;
ec6d2bb8 5713
df7492f9
KH
5714 if (pos == end_pos)
5715 break;
5716 p = save_composition_data (buf, buf_end, prop);
5717 if (p == NULL)
5718 break;
5719 buf = p;
5720 if (find_composition (end, end_pos, &start, &end, &prop, object)
5721 && end <= end_pos)
5722 stop = start;
5723 else
5724 stop = end_pos;
5725 }
5726
5727 if (! multibytep)
5728 c = *src++;
5729 else
5730 c = STRING_CHAR_ADVANCE (src);
5731 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5732 c = '\n';
5733 if (! EQ (eol_type, Qunix))
5734 {
5735 if (c == '\n')
5736 {
5737 if (EQ (eol_type, Qdos))
5738 *buf++ = '\r';
5739 else
5740 c = '\r';
ec6d2bb8 5741 }
ec6d2bb8 5742 }
df7492f9
KH
5743 *buf++ = c;
5744 pos++;
ec6d2bb8 5745 }
ec6d2bb8 5746
df7492f9
KH
5747 coding->consumed = src - coding->source;
5748 coding->consumed_char = pos - coding->src_pos;
5749 coding->charbuf_used = buf - coding->charbuf;
5750 coding->chars_at_source = 0;
ec6d2bb8
KH
5751}
5752
ec6d2bb8 5753
df7492f9
KH
5754/* Encode the text at CODING->src_object into CODING->dst_object.
5755 CODING->src_object is a buffer or a string.
5756 CODING->dst_object is a buffer or nil.
5757
5758 If CODING->src_object is a buffer, it must be the current buffer.
5759 In this case, if CODING->src_pos is positive, it is a position of
5760 the source text in the buffer, otherwise. the source text is in the
5761 gap area of the buffer, and coding->src_pos specifies the offset of
5762 the text from GPT (which must be the same as PT). If this is the
5763 same buffer as CODING->dst_object, CODING->src_pos must be
5764 negative and CODING should not have `pre-write-conversion'.
5765
5766 If CODING->src_object is a string, CODING should not have
5767 `pre-write-conversion'.
5768
5769 If CODING->dst_object is a buffer, the encoded data is inserted at
5770 the current point of that buffer.
5771
5772 If CODING->dst_object is nil, the encoded data is placed at the
5773 memory area specified by CODING->destination. */
5774
5775static int
5776encode_coding (coding)
ec6d2bb8 5777 struct coding_system *coding;
ec6d2bb8 5778{
df7492f9 5779 Lisp_Object attrs;
ec6d2bb8 5780
df7492f9 5781 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5782
df7492f9 5783 if (BUFFERP (coding->dst_object))
ec6d2bb8 5784 {
df7492f9
KH
5785 set_buffer_internal (XBUFFER (coding->dst_object));
5786 coding->dst_multibyte
5787 = ! NILP (current_buffer->enable_multibyte_characters);
5788 }
ec6d2bb8 5789
df7492f9
KH
5790 coding->consumed = coding->consumed_char = 0;
5791 coding->produced = coding->produced_char = 0;
5792 coding->result = CODING_RESULT_SUCCESS;
5793 coding->errors = 0;
ec6d2bb8 5794
df7492f9 5795 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5796
df7492f9
KH
5797 do {
5798 coding_set_source (coding);
5799 consume_chars (coding);
5800
5801 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5802 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5803
5804 coding_set_destination (coding);
5805 (*(coding->encoder)) (coding);
5806 } while (coding->consumed_char < coding->src_chars);
5807
5808 if (BUFFERP (coding->dst_object))
5809 insert_from_gap (coding->produced_char, coding->produced);
5810
5811 return (coding->result);
ec6d2bb8
KH
5812}
5813
df7492f9 5814/* Work buffer */
fb88bf2d 5815
df7492f9
KH
5816/* List of currently used working buffer. */
5817Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5818
df7492f9
KH
5819/* A working buffer used by the top level conversion. */
5820Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5821
4ed46869 5822
df7492f9
KH
5823/* Return a working buffer that can be freely used by the following
5824 code conversion. MULTIBYTEP specifies the multibyteness of the
5825 buffer. */
b73bfc1c 5826
df7492f9
KH
5827Lisp_Object
5828make_conversion_work_buffer (multibytep)
5829 int multibytep;
5830{
5831 struct buffer *current = current_buffer;
5832 Lisp_Object buf;
d46c5b12 5833
df7492f9 5834 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5835 {
df7492f9
KH
5836 if (NILP (Vcode_conversion_reused_work_buf))
5837 Vcode_conversion_reused_work_buf
5838 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5839 Vcode_conversion_work_buf_list
5840 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5841 }
df7492f9 5842 else
d46c5b12 5843 {
df7492f9
KH
5844 int depth = Flength (Vcode_conversion_work_buf_list);
5845 char str[128];
e077cc80 5846
df7492f9
KH
5847 sprintf (str, " *code-conversion-work*<%d>", depth);
5848 Vcode_conversion_work_buf_list
5849 = Fcons (Fget_buffer_create (build_string (str)),
5850 Vcode_conversion_work_buf_list);
d46c5b12 5851 }
d46c5b12 5852
df7492f9
KH
5853 buf = XCAR (Vcode_conversion_work_buf_list);
5854 set_buffer_internal (XBUFFER (buf));
5855 current_buffer->undo_list = Qt;
5856 Ferase_buffer ();
5857 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5858 set_buffer_internal (current);
5859 return buf;
5860}
d46c5b12 5861
df7492f9 5862static struct coding_system *saved_coding;
d46c5b12 5863
df7492f9
KH
5864Lisp_Object
5865code_conversion_restore (info)
5866 Lisp_Object info;
5867{
5868 int depth = Flength (Vcode_conversion_work_buf_list);
5869 Lisp_Object buf;
d46c5b12 5870
df7492f9 5871 if (depth > 0)
d46c5b12 5872 {
df7492f9
KH
5873 buf = XCAR (Vcode_conversion_work_buf_list);
5874 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5875 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5876 Fkill_buffer (buf);
5877 }
d46c5b12 5878
df7492f9
KH
5879 if (saved_coding->dst_object == Qt
5880 && saved_coding->destination)
5881 xfree (saved_coding->destination);
b843d1ae 5882
df7492f9
KH
5883 return save_excursion_restore (info);
5884}
d46c5b12 5885
12410ef1 5886
df7492f9
KH
5887int
5888decode_coding_gap (coding, chars, bytes)
5889 struct coding_system *coding;
5890 EMACS_INT chars, bytes;
5891{
5892 int count = specpdl_ptr - specpdl;
fb88bf2d 5893
df7492f9
KH
5894 saved_coding = coding;
5895 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5896
df7492f9
KH
5897 coding->src_object = Fcurrent_buffer ();
5898 coding->src_chars = chars;
5899 coding->src_bytes = bytes;
5900 coding->src_pos = -chars;
5901 coding->src_pos_byte = -bytes;
5902 coding->src_multibyte = chars < bytes;
5903 coding->dst_object = coding->src_object;
5904 coding->dst_pos = PT;
5905 coding->dst_pos_byte = PT_BYTE;
71c81426 5906 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5907
df7492f9
KH
5908 if (CODING_REQUIRE_DETECTION (coding))
5909 detect_coding (coding);
5910
5911 decode_coding (coding);
d46c5b12 5912
df7492f9
KH
5913 unbind_to (count, Qnil);
5914 return coding->result;
5915}
d46c5b12 5916
df7492f9
KH
5917int
5918encode_coding_gap (coding, chars, bytes)
5919 struct coding_system *coding;
5920 EMACS_INT chars, bytes;
5921{
5922 int count = specpdl_ptr - specpdl;
5923 Lisp_Object buffer;
d46c5b12 5924
df7492f9
KH
5925 saved_coding = coding;
5926 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5927
df7492f9
KH
5928 buffer = Fcurrent_buffer ();
5929 coding->src_object = buffer;
5930 coding->src_chars = chars;
5931 coding->src_bytes = bytes;
5932 coding->src_pos = -chars;
5933 coding->src_pos_byte = -bytes;
5934 coding->src_multibyte = chars < bytes;
5935 coding->dst_object = coding->src_object;
5936 coding->dst_pos = PT;
5937 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5938
df7492f9 5939 encode_coding (coding);
f2558efd 5940
df7492f9
KH
5941 unbind_to (count, Qnil);
5942 return coding->result;
5943}
b73bfc1c 5944
d46c5b12 5945
df7492f9
KH
5946/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5947 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5948
df7492f9 5949 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5950
df7492f9
KH
5951 If it is a buffer, the text is at point of the buffer. FROM and TO
5952 are positions in the buffer.
ec6d2bb8 5953
df7492f9
KH
5954 If it is a string, the text is at the beginning of the string.
5955 FROM and TO are indices to the string.
ec6d2bb8 5956
df7492f9
KH
5957 If it is nil, the text is at coding->source. FROM and TO are
5958 indices to coding->source.
ec6d2bb8 5959
df7492f9 5960 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5961
df7492f9
KH
5962 If it is a buffer, the decoded text is inserted at point of the
5963 buffer. If the buffer is the same as SRC_OBJECT, the source text
5964 is deleted.
d46c5b12 5965
df7492f9
KH
5966 If it is Qt, a string is made from the decoded text, and
5967 set in CODING->dst_object.
d46c5b12 5968
df7492f9
KH
5969 If it is Qnil, the decoded text is stored at CODING->destination.
5970 The called must allocate CODING->dst_bytes bytes at
5971 CODING->destination by xmalloc. If the decoded text is longer than
5972 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5973 */
d46c5b12 5974
df7492f9
KH
5975void
5976decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5977 dst_object)
5978 struct coding_system *coding;
5979 Lisp_Object src_object;
5980 EMACS_INT from, from_byte, to, to_byte;
5981 Lisp_Object dst_object;
5982{
5983 int count = specpdl_ptr - specpdl;
5984 unsigned char *destination;
5985 EMACS_INT dst_bytes;
5986 EMACS_INT chars = to - from;
5987 EMACS_INT bytes = to_byte - from_byte;
5988 Lisp_Object attrs;
d46c5b12 5989
df7492f9
KH
5990 saved_coding = coding;
5991 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5992
df7492f9
KH
5993 if (NILP (dst_object))
5994 {
5995 destination = coding->destination;
5996 dst_bytes = coding->dst_bytes;
5997 }
93dec019 5998
df7492f9
KH
5999 coding->src_object = src_object;
6000 coding->src_chars = chars;
6001 coding->src_bytes = bytes;
6002 coding->src_multibyte = chars < bytes;
70ad9fc4 6003
df7492f9
KH
6004 if (STRINGP (src_object))
6005 {
6006 coding->src_pos = from;
6007 coding->src_pos_byte = from_byte;
6008 }
6009 else if (BUFFERP (src_object))
6010 {
6011 set_buffer_internal (XBUFFER (src_object));
6012 if (from != GPT)
6013 move_gap_both (from, from_byte);
6014 if (EQ (src_object, dst_object))
fb88bf2d 6015 {
df7492f9
KH
6016 TEMP_SET_PT_BOTH (from, from_byte);
6017 del_range_both (from, from_byte, to, to_byte, 1);
6018 coding->src_pos = -chars;
6019 coding->src_pos_byte = -bytes;
fb88bf2d 6020 }
df7492f9 6021 else
fb88bf2d 6022 {
df7492f9
KH
6023 coding->src_pos = from;
6024 coding->src_pos_byte = from_byte;
fb88bf2d 6025 }
d46c5b12 6026 }
fb88bf2d 6027
df7492f9
KH
6028 if (CODING_REQUIRE_DETECTION (coding))
6029 detect_coding (coding);
6030 attrs = CODING_ID_ATTRS (coding->id);
6031
6032 if (! NILP (CODING_ATTR_POST_READ (attrs))
6033 || EQ (dst_object, Qt))
b73bfc1c 6034 {
df7492f9
KH
6035 coding->dst_object = make_conversion_work_buffer (1);
6036 coding->dst_pos = BEG;
6037 coding->dst_pos_byte = BEG_BYTE;
6038 coding->dst_multibyte = 1;
b73bfc1c 6039 }
df7492f9 6040 else if (BUFFERP (dst_object))
12410ef1 6041 {
df7492f9
KH
6042 coding->dst_object = dst_object;
6043 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6044 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6045 coding->dst_multibyte
6046 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 6047 }
72d1a715 6048 else
df7492f9
KH
6049 {
6050 coding->dst_object = Qnil;
6051 coding->dst_multibyte = 1;
6052 }
6053
6054 decode_coding (coding);
4ed46869 6055
df7492f9
KH
6056 if (BUFFERP (coding->dst_object))
6057 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6058
df7492f9 6059 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6060 {
df7492f9
KH
6061 struct gcpro gcpro1, gcpro2;
6062 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6063 Lisp_Object val;
4ed46869 6064
c0cc7f7f 6065 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6066 GCPRO2 (coding->src_object, coding->dst_object);
6067 val = call1 (CODING_ATTR_POST_READ (attrs),
6068 make_number (coding->produced_char));
6069 UNGCPRO;
6070 CHECK_NATNUM (val);
6071 coding->produced_char += Z - prev_Z;
6072 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6073 }
4ed46869 6074
df7492f9 6075 if (EQ (dst_object, Qt))
ec6d2bb8 6076 {
df7492f9
KH
6077 coding->dst_object = Fbuffer_string ();
6078 }
6079 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6080 {
6081 set_buffer_internal (XBUFFER (coding->dst_object));
6082 if (dst_bytes < coding->produced)
6083 {
6084 destination
6085 = (unsigned char *) xrealloc (destination, coding->produced);
6086 if (! destination)
6087 {
6088 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6089 unbind_to (count, Qnil);
6090 return;
6091 }
6092 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6093 move_gap_both (BEGV, BEGV_BYTE);
6094 bcopy (BEGV_ADDR, destination, coding->produced);
6095 coding->destination = destination;
6096 }
ec6d2bb8 6097 }
2b4f9037 6098
df7492f9 6099 unbind_to (count, Qnil);
d46c5b12
KH
6100}
6101
df7492f9
KH
6102
6103void
6104encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6105 dst_object)
b73bfc1c 6106 struct coding_system *coding;
df7492f9
KH
6107 Lisp_Object src_object;
6108 EMACS_INT from, from_byte, to, to_byte;
6109 Lisp_Object dst_object;
b73bfc1c
KH
6110{
6111 int count = specpdl_ptr - specpdl;
df7492f9
KH
6112 EMACS_INT chars = to - from;
6113 EMACS_INT bytes = to_byte - from_byte;
6114 Lisp_Object attrs;
6115
6116 saved_coding = coding;
6117 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6118
6119 coding->src_object = src_object;
6120 coding->src_chars = chars;
6121 coding->src_bytes = bytes;
6122 coding->src_multibyte = chars < bytes;
6123
6124 attrs = CODING_ID_ATTRS (coding->id);
6125
6126 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6127 {
df7492f9
KH
6128 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6129 set_buffer_internal (XBUFFER (coding->src_object));
6130 if (STRINGP (src_object))
6131 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6132 else if (BUFFERP (src_object))
6133 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6134 else
6135 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6136
6137 if (EQ (src_object, dst_object))
6138 {
6139 set_buffer_internal (XBUFFER (src_object));
6140 del_range_both (from, from_byte, to, to_byte, 1);
6141 set_buffer_internal (XBUFFER (coding->src_object));
6142 }
6143
ac87bbef
KH
6144 call2 (CODING_ATTR_PRE_WRITE (attrs),
6145 make_number (BEG), make_number (Z));
6146 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6147 if (BEG != GPT)
6148 move_gap_both (BEG, BEG_BYTE);
6149 coding->src_chars = Z - BEG;
6150 coding->src_bytes = Z_BYTE - BEG_BYTE;
6151 coding->src_pos = BEG;
6152 coding->src_pos_byte = BEG_BYTE;
6153 coding->src_multibyte = Z < Z_BYTE;
6154 }
6155 else if (STRINGP (src_object))
6156 {
6157 coding->src_pos = from;
6158 coding->src_pos_byte = from_byte;
6159 }
6160 else if (BUFFERP (src_object))
d46c5b12 6161 {
df7492f9
KH
6162 set_buffer_internal (XBUFFER (src_object));
6163 if (from != GPT)
6164 move_gap_both (from, from_byte);
6165 if (EQ (src_object, dst_object))
d46c5b12 6166 {
df7492f9
KH
6167 del_range_both (from, from_byte, to, to_byte, 1);
6168 coding->src_pos = -chars;
6169 coding->src_pos_byte = -bytes;
d46c5b12 6170 }
df7492f9 6171 else
d46c5b12 6172 {
df7492f9
KH
6173 coding->src_pos = from;
6174 coding->src_pos_byte = from_byte;
d46c5b12
KH
6175 }
6176 }
4ed46869 6177
df7492f9 6178 if (BUFFERP (dst_object))
d46c5b12 6179 {
df7492f9
KH
6180 coding->dst_object = dst_object;
6181 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6182 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6183 coding->dst_multibyte
6184 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6185 }
df7492f9 6186 else if (EQ (dst_object, Qt))
4956c225 6187 {
df7492f9 6188 coding->dst_object = Qnil;
df7492f9 6189 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6190 if (coding->dst_bytes == 0)
6191 coding->dst_bytes = 1;
6192 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6193 coding->dst_multibyte = 0;
4956c225 6194 }
df7492f9 6195 else
78108bcd 6196 {
df7492f9
KH
6197 coding->dst_object = Qnil;
6198 coding->dst_multibyte = 0;
78108bcd
KH
6199 }
6200
df7492f9 6201 encode_coding (coding);
4ed46869 6202
df7492f9 6203 if (EQ (dst_object, Qt))
4ed46869 6204 {
df7492f9
KH
6205 if (BUFFERP (coding->dst_object))
6206 coding->dst_object = Fbuffer_string ();
6207 else
73be902c 6208 {
df7492f9
KH
6209 coding->dst_object
6210 = make_unibyte_string ((char *) coding->destination,
6211 coding->produced);
6212 xfree (coding->destination);
73be902c 6213 }
4ed46869 6214 }
d46c5b12 6215
df7492f9 6216 unbind_to (count, Qnil);
b73bfc1c
KH
6217}
6218
df7492f9 6219
b73bfc1c 6220Lisp_Object
df7492f9 6221preferred_coding_system ()
b73bfc1c 6222{
df7492f9 6223 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6224
df7492f9 6225 return CODING_ID_NAME (id);
4ed46869
KH
6226}
6227
6228\f
6229#ifdef emacs
1397dc18 6230/*** 8. Emacs Lisp library functions ***/
4ed46869 6231
4ed46869 6232DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6233 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6234See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6235about coding-system objects. */)
6236 (obj)
4ed46869
KH
6237 Lisp_Object obj;
6238{
df7492f9 6239 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6240}
6241
9d991de8
RS
6242DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6243 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6244 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6245 (prompt)
4ed46869
KH
6246 Lisp_Object prompt;
6247{
e0e989f6 6248 Lisp_Object val;
9d991de8
RS
6249 do
6250 {
4608c386
KH
6251 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6252 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6253 }
6254 while (XSTRING (val)->size == 0);
e0e989f6 6255 return (Fintern (val, Qnil));
4ed46869
KH
6256}
6257
9b787f3e 6258DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6259 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6260If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6261 (prompt, default_coding_system)
9b787f3e 6262 Lisp_Object prompt, default_coding_system;
4ed46869 6263{
f44d27ce 6264 Lisp_Object val;
9b787f3e
RS
6265 if (SYMBOLP (default_coding_system))
6266 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6267 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6268 Qt, Qnil, Qcoding_system_history,
6269 default_coding_system, Qnil);
e0e989f6 6270 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6271}
6272
6273DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6274 1, 1, 0,
48b0f3ae
PJ
6275 doc: /* Check validity of CODING-SYSTEM.
6276If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6277It is valid if it is a symbol with a non-nil `coding-system' property.
6278The value of property should be a vector of length 5. */)
df7492f9 6279 (coding_system)
4ed46869
KH
6280 Lisp_Object coding_system;
6281{
b7826503 6282 CHECK_SYMBOL (coding_system);
4ed46869
KH
6283 if (!NILP (Fcoding_system_p (coding_system)))
6284 return coding_system;
6285 while (1)
02ba4723 6286 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6287}
df7492f9 6288
3a73fa5d 6289\f
d46c5b12 6290Lisp_Object
df7492f9 6291detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6292 unsigned char *src;
6293 int src_bytes, highest;
0a28aafb 6294 int multibytep;
df7492f9 6295 Lisp_Object coding_system;
4ed46869 6296{
df7492f9
KH
6297 unsigned char *src_end = src + src_bytes;
6298 int mask = CATEGORY_MASK_ANY;
6299 int detected = 0;
6300 int c, i;
6301 Lisp_Object attrs, eol_type;
6302 Lisp_Object val;
6303 struct coding_system coding;
6304
6305 if (NILP (coding_system))
6306 coding_system = Qundecided;
6307 setup_coding_system (coding_system, &coding);
6308 attrs = CODING_ID_ATTRS (coding.id);
6309 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6310
df7492f9
KH
6311 coding.source = src;
6312 coding.src_bytes = src_bytes;
6313 coding.src_multibyte = multibytep;
6314 coding.consumed = 0;
4ed46869 6315
df7492f9 6316 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6317 {
df7492f9 6318 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6319 }
df7492f9 6320 else
4ed46869 6321 {
df7492f9
KH
6322 coding_system = Qnil;
6323 for (; src < src_end; src++)
4ed46869 6324 {
df7492f9
KH
6325 c = *src;
6326 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6327 || c == ISO_CODE_SI
6328 || c == ISO_CODE_SO)))
d46c5b12 6329 break;
4ed46869 6330 }
df7492f9
KH
6331 coding.head_ascii = src - coding.source;
6332
6333 if (src < src_end)
6334 for (i = 0; i < coding_category_raw_text; i++)
6335 {
6336 enum coding_category category = coding_priorities[i];
6337 struct coding_system *this = coding_categories + category;
6338
6339 if (category >= coding_category_raw_text
6340 || detected & (1 << category))
6341 continue;
6342
6343 if (this->id < 0)
6344 {
6345 /* No coding system of this category is defined. */
6346 mask &= ~(1 << category);
6347 }
6348 else
6349 {
6350 detected |= detected_mask[category];
6351 if ((*(coding_categories[category].detector)) (&coding, &mask)
6352 && highest)
6353 {
6354 mask &= detected_mask[category];
6355 break;
6356 }
6357 }
6358 }
4ed46869 6359 }
4ed46869 6360
df7492f9
KH
6361 if (!mask)
6362 val = Fcons (make_number (coding_category_raw_text), Qnil);
6363 else if (mask == CATEGORY_MASK_ANY)
6364 val = Fcons (make_number (coding_category_undecided), Qnil);
6365 else if (highest)
4ed46869 6366 {
df7492f9
KH
6367 for (i = 0; i < coding_category_raw_text; i++)
6368 if (mask & (1 << coding_priorities[i]))
6369 {
6370 val = Fcons (make_number (coding_priorities[i]), Qnil);
6371 break;
6372 }
6373 }
6374 else
6375 {
6376 val = Qnil;
6377 for (i = coding_category_raw_text - 1; i >= 0; i--)
6378 if (mask & (1 << coding_priorities[i]))
6379 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6380 }
df7492f9
KH
6381
6382 {
6383 int one_byte_eol = -1, two_byte_eol = -1;
6384 Lisp_Object tail;
6385
6386 for (tail = val; CONSP (tail); tail = XCDR (tail))
6387 {
6388 struct coding_system *this
6389 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6390 : &coding);
6391 int this_eol;
6392
6393 attrs = CODING_ID_ATTRS (this->id);
6394 eol_type = CODING_ID_EOL_TYPE (this->id);
6395 XSETCAR (tail, CODING_ID_NAME (this->id));
6396 if (VECTORP (eol_type))
6397 {
6398 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6399 {
6400 if (two_byte_eol < 0)
6401 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6402 this_eol = two_byte_eol;
6403 }
6404 else
6405 {
6406 if (one_byte_eol < 0)
6407 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6408 this_eol = one_byte_eol;
6409 }
6410 if (this_eol == EOL_SEEN_LF)
6411 XSETCAR (tail, AREF (eol_type, 0));
6412 else if (this_eol == EOL_SEEN_CRLF)
6413 XSETCAR (tail, AREF (eol_type, 1));
6414 else if (this_eol == EOL_SEEN_CR)
6415 XSETCAR (tail, AREF (eol_type, 2));
6416 }
6417 }
6418 }
6419
03699b14 6420 return (highest ? XCAR (val) : val);
93dec019 6421}
4ed46869 6422
df7492f9 6423
d46c5b12
KH
6424DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6425 2, 3, 0,
48b0f3ae
PJ
6426 doc: /* Detect coding system of the text in the region between START and END.
6427Return a list of possible coding systems ordered by priority.
6428
6429If only ASCII characters are found, it returns a list of single element
6430`undecided' or its subsidiary coding system according to a detected
6431end-of-line format.
6432
6433If optional argument HIGHEST is non-nil, return the coding system of
6434highest priority. */)
6435 (start, end, highest)
d46c5b12
KH
6436 Lisp_Object start, end, highest;
6437{
6438 int from, to;
6439 int from_byte, to_byte;
6289dd10 6440
b7826503
PJ
6441 CHECK_NUMBER_COERCE_MARKER (start);
6442 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6443
d46c5b12
KH
6444 validate_region (&start, &end);
6445 from = XINT (start), to = XINT (end);
6446 from_byte = CHAR_TO_BYTE (from);
6447 to_byte = CHAR_TO_BYTE (to);
6289dd10 6448
d46c5b12
KH
6449 if (from < GPT && to >= GPT)
6450 move_gap_both (to, to_byte);
c210f766 6451
d46c5b12 6452 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6453 to_byte - from_byte,
0a28aafb
KH
6454 !NILP (highest),
6455 !NILP (current_buffer
df7492f9
KH
6456 ->enable_multibyte_characters),
6457 Qnil);
d46c5b12 6458}
6289dd10 6459
d46c5b12
KH
6460DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6461 1, 2, 0,
48b0f3ae
PJ
6462 doc: /* Detect coding system of the text in STRING.
6463Return a list of possible coding systems ordered by priority.
6464
6465If only ASCII characters are found, it returns a list of single element
6466`undecided' or its subsidiary coding system according to a detected
6467end-of-line format.
6468
6469If optional argument HIGHEST is non-nil, return the coding system of
6470highest priority. */)
6471 (string, highest)
d46c5b12
KH
6472 Lisp_Object string, highest;
6473{
b7826503 6474 CHECK_STRING (string);
4ed46869 6475
d46c5b12 6476 return detect_coding_system (XSTRING (string)->data,
df7492f9 6477 STRING_BYTES (XSTRING (string)),
0a28aafb 6478 !NILP (highest),
df7492f9
KH
6479 STRING_MULTIBYTE (string),
6480 Qnil);
4ed46869
KH
6481}
6482
05e6f5dc 6483
df7492f9
KH
6484static INLINE int
6485char_encodable_p (c, attrs)
6486 int c;
6487 Lisp_Object attrs;
05e6f5dc 6488{
df7492f9 6489 Lisp_Object tail;
df7492f9 6490 struct charset *charset;
05e6f5dc 6491
df7492f9
KH
6492 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6493 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6494 {
df7492f9
KH
6495 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6496 if (CHAR_CHARSET_P (c, charset))
6497 break;
05e6f5dc 6498 }
df7492f9 6499 return (! NILP (tail));
05e6f5dc
KH
6500}
6501
6502
df7492f9
KH
6503/* Return a list of coding systems that safely encode the text between
6504 START and END. If EXCLUDE is non-nil, it is a list of coding
6505 systems not to check. The returned list doesn't contain any such
6506 coding systems. In any case, If the text contains only ASCII or is
6507 unibyte, return t. */
6508
6509DEFUN ("find-coding-systems-region-internal",
6510 Ffind_coding_systems_region_internal,
6511 Sfind_coding_systems_region_internal, 2, 3, 0,
6512 doc: /* Internal use only. */)
6513 (start, end, exclude)
6514 Lisp_Object start, end, exclude;
6515{
6516 Lisp_Object coding_attrs_list, safe_codings;
6517 EMACS_INT start_byte, end_byte;
6518 unsigned char *p, *pbeg, *pend;
6519 int c;
6520 Lisp_Object tail, elt;
05e6f5dc 6521
df7492f9
KH
6522 if (STRINGP (start))
6523 {
6524 if (!STRING_MULTIBYTE (start)
6525 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6526 return Qt;
6527 start_byte = 0;
6528 end_byte = STRING_BYTES (XSTRING (start));
6529 }
6530 else
6531 {
6532 CHECK_NUMBER_COERCE_MARKER (start);
6533 CHECK_NUMBER_COERCE_MARKER (end);
6534 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6535 args_out_of_range (start, end);
6536 if (NILP (current_buffer->enable_multibyte_characters))
6537 return Qt;
6538 start_byte = CHAR_TO_BYTE (XINT (start));
6539 end_byte = CHAR_TO_BYTE (XINT (end));
6540 if (XINT (end) - XINT (start) == end_byte - start_byte)
6541 return Qt;
05e6f5dc 6542
df7492f9
KH
6543 if (start < GPT && end > GPT)
6544 {
6545 if ((GPT - start) < (end - GPT))
6546 move_gap_both (start, start_byte);
6547 else
6548 move_gap_both (end, end_byte);
6549 }
6550 }
05e6f5dc 6551
df7492f9
KH
6552 coding_attrs_list = Qnil;
6553 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6554 if (NILP (exclude)
6555 || NILP (Fmemq (XCAR (tail), exclude)))
6556 {
6557 Lisp_Object attrs;
05e6f5dc 6558
df7492f9
KH
6559 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6560 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6561 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6562 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6563 }
6564
6565 if (STRINGP (start))
6566 p = pbeg = XSTRING (start)->data;
6567 else
6568 p = pbeg = BYTE_POS_ADDR (start_byte);
6569 pend = p + (end_byte - start_byte);
6570
6571 while (p < pend && ASCII_BYTE_P (*p)) p++;
6572 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6573
6574 while (p < pend)
6575 {
df7492f9
KH
6576 if (ASCII_BYTE_P (*p))
6577 p++;
6578 else
6579 {
6580 c = STRING_CHAR_ADVANCE (p);
6581
6582 charset_map_loaded = 0;
6583 for (tail = coding_attrs_list; CONSP (tail);)
6584 {
6585 elt = XCAR (tail);
6586 if (NILP (elt))
6587 tail = XCDR (tail);
6588 else if (char_encodable_p (c, elt))
6589 tail = XCDR (tail);
6590 else if (CONSP (XCDR (tail)))
6591 {
6592 XSETCAR (tail, XCAR (XCDR (tail)));
6593 XSETCDR (tail, XCDR (XCDR (tail)));
6594 }
6595 else
6596 {
6597 XSETCAR (tail, Qnil);
6598 tail = XCDR (tail);
6599 }
6600 }
6601 if (charset_map_loaded)
6602 {
6603 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6604
df7492f9
KH
6605 if (STRINGP (start))
6606 pbeg = XSTRING (start)->data;
6607 else
6608 pbeg = BYTE_POS_ADDR (start_byte);
6609 p = pbeg + p_offset;
6610 pend = pbeg + pend_offset;
6611 }
6612 }
05e6f5dc 6613 }
df7492f9
KH
6614
6615 safe_codings = Qnil;
6616 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6617 if (! NILP (XCAR (tail)))
6618 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6619
05e6f5dc
KH
6620 return safe_codings;
6621}
6622
6623
df7492f9
KH
6624DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6625 Scheck_coding_systems_region, 3, 3, 0,
6626 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6627
df7492f9
KH
6628START and END are buffer positions specifying the region.
6629CODING-SYSTEM-LIST is a list of coding systems to check.
6630
6631The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6632CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6633whole region, POS0, POS1, ... are buffer positions where non-encodable
6634characters are found.
6635
6636If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6637value is nil.
6638
6639START may be a string. In that case, check if the string is
6640encodable, and the value contains indices to the string instead of
6641buffer positions. END is ignored. */)
6642 (start, end, coding_system_list)
6643 Lisp_Object start, end, coding_system_list;
05e6f5dc 6644{
df7492f9
KH
6645 Lisp_Object list;
6646 EMACS_INT start_byte, end_byte;
6647 int pos;
6648 unsigned char *p, *pbeg, *pend;
6649 int c;
6650 Lisp_Object tail, elt;
05e6f5dc
KH
6651
6652 if (STRINGP (start))
6653 {
df7492f9
KH
6654 if (!STRING_MULTIBYTE (start)
6655 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6656 return Qnil;
6657 start_byte = 0;
6658 end_byte = STRING_BYTES (XSTRING (start));
6659 pos = 0;
05e6f5dc
KH
6660 }
6661 else
6662 {
b7826503
PJ
6663 CHECK_NUMBER_COERCE_MARKER (start);
6664 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6665 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6666 args_out_of_range (start, end);
6667 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6668 return Qnil;
6669 start_byte = CHAR_TO_BYTE (XINT (start));
6670 end_byte = CHAR_TO_BYTE (XINT (end));
6671 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6672 return Qt;
df7492f9
KH
6673
6674 if (start < GPT && end > GPT)
6675 {
6676 if ((GPT - start) < (end - GPT))
6677 move_gap_both (start, start_byte);
6678 else
6679 move_gap_both (end, end_byte);
6680 }
6681 pos = start;
6682 }
6683
6684 list = Qnil;
6685 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6686 {
6687 elt = XCAR (tail);
6688 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6689 Qnil)),
6690 list);
05e6f5dc
KH
6691 }
6692
df7492f9
KH
6693 if (STRINGP (start))
6694 p = pbeg = XSTRING (start)->data;
6695 else
6696 p = pbeg = BYTE_POS_ADDR (start_byte);
6697 pend = p + (end_byte - start_byte);
6698
6699 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6700 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6701
6702 while (p < pend)
05e6f5dc 6703 {
df7492f9
KH
6704 if (ASCII_BYTE_P (*p))
6705 p++;
6706 else
05e6f5dc 6707 {
df7492f9
KH
6708 c = STRING_CHAR_ADVANCE (p);
6709
6710 charset_map_loaded = 0;
6711 for (tail = list; CONSP (tail); tail = XCDR (tail))
6712 {
6713 elt = XCDR (XCAR (tail));
6714 if (! char_encodable_p (c, XCAR (elt)))
6715 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6716 }
6717 if (charset_map_loaded)
6718 {
6719 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6720
6721 if (STRINGP (start))
6722 pbeg = XSTRING (start)->data;
6723 else
6724 pbeg = BYTE_POS_ADDR (start_byte);
6725 p = pbeg + p_offset;
6726 pend = pbeg + pend_offset;
6727 }
05e6f5dc 6728 }
df7492f9 6729 pos++;
05e6f5dc
KH
6730 }
6731
df7492f9
KH
6732 tail = list;
6733 list = Qnil;
6734 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6735 {
df7492f9
KH
6736 elt = XCAR (tail);
6737 if (CONSP (XCDR (XCDR (elt))))
6738 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6739 list);
05e6f5dc 6740 }
df7492f9
KH
6741
6742 return list;
05e6f5dc
KH
6743}
6744
6745
df7492f9 6746
4031e2bf 6747Lisp_Object
df7492f9
KH
6748code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6749 Lisp_Object start, end, coding_system, dst_object;
6750 int encodep, norecord;
3a73fa5d
RS
6751{
6752 struct coding_system coding;
df7492f9
KH
6753 EMACS_INT from, from_byte, to, to_byte;
6754 Lisp_Object src_object;
3a73fa5d 6755
b7826503
PJ
6756 CHECK_NUMBER_COERCE_MARKER (start);
6757 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6758 if (NILP (coding_system))
6759 coding_system = Qno_conversion;
6760 else
6761 CHECK_CODING_SYSTEM (coding_system);
6762 src_object = Fcurrent_buffer ();
6763 if (NILP (dst_object))
6764 dst_object = src_object;
6765 else if (! EQ (dst_object, Qt))
6766 CHECK_BUFFER (dst_object);
3a73fa5d 6767
d46c5b12
KH
6768 validate_region (&start, &end);
6769 from = XFASTINT (start);
df7492f9 6770 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6771 to = XFASTINT (end);
df7492f9 6772 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6773
df7492f9
KH
6774 setup_coding_system (coding_system, &coding);
6775 coding.mode |= CODING_MODE_LAST_BLOCK;
6776
6777 if (encodep)
6778 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6779 dst_object);
6780 else
6781 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6782 dst_object);
6783 if (! norecord)
6784 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6785
df7492f9
KH
6786 if (coding.result != CODING_RESULT_SUCCESS)
6787 error ("Code conversion error: %d", coding.result);
3a73fa5d 6788
df7492f9
KH
6789 return (BUFFERP (dst_object)
6790 ? make_number (coding.produced_char)
6791 : coding.dst_object);
4031e2bf
KH
6792}
6793
df7492f9 6794
4031e2bf 6795DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6796 3, 4, "r\nzCoding system: ",
48b0f3ae 6797 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6798When called from a program, takes four arguments:
6799 START, END, CODING-SYSTEM, and DESTINATION.
6800START and END are buffer positions.
6801
6802Optional 4th arguments DESTINATION specifies where the decoded text goes.
6803If nil, the region between START and END is replace by the decoded text.
6804If buffer, the decoded text is inserted in the buffer.
6805If t, the decoded text is returned.
6806
48b0f3ae
PJ
6807This function sets `last-coding-system-used' to the precise coding system
6808used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6809not fully specified.)
6810It returns the length of the decoded text. */)
df7492f9
KH
6811 (start, end, coding_system, destination)
6812 Lisp_Object start, end, coding_system, destination;
4031e2bf 6813{
df7492f9 6814 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6815}
6816
6817DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6818 3, 4, "r\nzCoding system: ",
6819 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6820When called from a program, takes three arguments:
6821START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6822
6823Optional 4th arguments DESTINATION specifies where the encoded text goes.
6824If nil, the region between START and END is replace by the encoded text.
6825If buffer, the encoded text is inserted in the buffer.
6826If t, the encoded text is returned.
6827
48b0f3ae
PJ
6828This function sets `last-coding-system-used' to the precise coding system
6829used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6830not fully specified.)
6831It returns the length of the encoded text. */)
df7492f9
KH
6832 (start, end, coding_system, destination)
6833 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6834{
df7492f9 6835 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6836}
3a73fa5d 6837
4031e2bf 6838Lisp_Object
df7492f9
KH
6839code_convert_string (string, coding_system, dst_object,
6840 encodep, nocopy, norecord)
6841 Lisp_Object string, coding_system, dst_object;
6842 int encodep, nocopy, norecord;
4031e2bf
KH
6843{
6844 struct coding_system coding;
df7492f9 6845 EMACS_INT chars, bytes;
3a73fa5d 6846
b7826503 6847 CHECK_STRING (string);
d46c5b12 6848 if (NILP (coding_system))
df7492f9
KH
6849 {
6850 if (! norecord)
6851 Vlast_coding_system_used = Qno_conversion;
6852 if (NILP (dst_object))
6853 return (nocopy ? Fcopy_sequence (string) : string);
6854 }
4ed46869 6855
df7492f9
KH
6856 if (NILP (coding_system))
6857 coding_system = Qno_conversion;
6858 else
6859 CHECK_CODING_SYSTEM (coding_system);
6860 if (NILP (dst_object))
6861 dst_object = Qt;
6862 else if (! EQ (dst_object, Qt))
6863 CHECK_BUFFER (dst_object);
5f1cd180 6864
df7492f9 6865 setup_coding_system (coding_system, &coding);
d46c5b12 6866 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6867 chars = XSTRING (string)->size;
6868 bytes = STRING_BYTES (XSTRING (string));
6869 if (encodep)
6870 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6871 else
6872 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6873 if (! norecord)
6874 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6875
df7492f9
KH
6876 if (coding.result != CODING_RESULT_SUCCESS)
6877 error ("Code conversion error: %d", coding.result);
4ed46869 6878
df7492f9
KH
6879 return (BUFFERP (dst_object)
6880 ? make_number (coding.produced_char)
6881 : coding.dst_object);
4ed46869
KH
6882}
6883
4031e2bf 6884
ecec61c1 6885/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6886 Do not set Vlast_coding_system_used.
6887
6888 This function is called only from macros DECODE_FILE and
6889 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6890
6891Lisp_Object
6892code_convert_string_norecord (string, coding_system, encodep)
6893 Lisp_Object string, coding_system;
6894 int encodep;
6895{
0be8721c 6896 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6897}
ecec61c1 6898
ecec61c1 6899
df7492f9
KH
6900DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6901 2, 4, 0,
6902 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6903
6904Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6905if the decoding operation is trivial.
ecec61c1 6906
df7492f9
KH
6907Optional fourth arg BUFFER non-nil meant that the decoded text is
6908inserted in BUFFER instead of returned as a astring. In this case,
6909the return value is BUFFER.
ecec61c1 6910
df7492f9
KH
6911This function sets `last-coding-system-used' to the precise coding system
6912used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6913not fully specified. */)
6914 (string, coding_system, nocopy, buffer)
6915 Lisp_Object string, coding_system, nocopy, buffer;
6916{
6917 return code_convert_string (string, coding_system, buffer,
6918 0, ! NILP (nocopy), 0);
6919}
6920
6921DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6922 2, 4, 0,
6923 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6924
6925Optional third arg NOCOPY non-nil means it is OK to return STRING
6926itself if the encoding operation is trivial.
6927
6928Optional fourth arg BUFFER non-nil meant that the encoded text is
6929inserted in BUFFER instead of returned as a astring. In this case,
6930the return value is BUFFER.
6931
6932This function sets `last-coding-system-used' to the precise coding system
6933used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6934not fully specified.) */)
6935 (string, coding_system, nocopy, buffer)
6936 Lisp_Object string, coding_system, nocopy, buffer;
6937{
6938 return code_convert_string (string, coding_system, buffer,
6939 nocopy, ! NILP (nocopy), 1);
ecec61c1 6940}
df7492f9 6941
3a73fa5d 6942\f
4ed46869 6943DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6944 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6945Return the corresponding character. */)
6946 (code)
4ed46869
KH
6947 Lisp_Object code;
6948{
df7492f9
KH
6949 Lisp_Object spec, attrs, val;
6950 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6951 int c;
6952
6953 CHECK_NATNUM (code);
6954 c = XFASTINT (code);
6955 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6956 attrs = AREF (spec, 0);
4ed46869 6957
df7492f9
KH
6958 if (ASCII_BYTE_P (c)
6959 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6960 return code;
6961
6962 val = CODING_ATTR_CHARSET_LIST (attrs);
6963 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
6964 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6965 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
6966
6967 if (c <= 0x7F)
6968 charset = charset_roman;
6969 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6970 {
df7492f9
KH
6971 charset = charset_kana;
6972 c -= 0x80;
55ab7be3
KH
6973 }
6974 else
6975 {
004068e4 6976 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
6977
6978 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6979 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6980 error ("Invalid code: %d", code);
6981 SJIS_TO_JIS (c);
6982 charset = charset_kanji;
55ab7be3 6983 }
df7492f9
KH
6984 c = DECODE_CHAR (charset, c);
6985 if (c < 0)
6986 error ("Invalid code: %d", code);
6987 return make_number (c);
4ed46869
KH
6988}
6989
df7492f9 6990
4ed46869 6991DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6992 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6993Return the corresponding code in SJIS. */)
6994 (ch)
df7492f9 6995 Lisp_Object ch;
4ed46869 6996{
df7492f9
KH
6997 Lisp_Object spec, attrs, charset_list;
6998 int c;
6999 struct charset *charset;
7000 unsigned code;
4ed46869 7001
df7492f9
KH
7002 CHECK_CHARACTER (ch);
7003 c = XFASTINT (ch);
7004 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7005 attrs = AREF (spec, 0);
7006
7007 if (ASCII_CHAR_P (c)
7008 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7009 return ch;
7010
7011 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7012 charset = char_charset (c, charset_list, &code);
7013 if (code == CHARSET_INVALID_CODE (charset))
7014 error ("Can't encode by shift_jis encoding: %d", c);
7015 JIS_TO_SJIS (code);
7016
7017 return make_number (code);
4ed46869
KH
7018}
7019
7020DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7021 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7022Return the corresponding character. */)
7023 (code)
4ed46869
KH
7024 Lisp_Object code;
7025{
df7492f9
KH
7026 Lisp_Object spec, attrs, val;
7027 struct charset *charset_roman, *charset_big5, *charset;
7028 int c;
4ed46869 7029
df7492f9
KH
7030 CHECK_NATNUM (code);
7031 c = XFASTINT (code);
7032 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7033 attrs = AREF (spec, 0);
7034
7035 if (ASCII_BYTE_P (c)
7036 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7037 return code;
7038
7039 val = CODING_ATTR_CHARSET_LIST (attrs);
7040 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7041 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7042
7043 if (c <= 0x7F)
7044 charset = charset_roman;
c28a9453
KH
7045 else
7046 {
df7492f9
KH
7047 int b1 = c >> 8, b2 = c & 0x7F;
7048 if (b1 < 0xA1 || b1 > 0xFE
7049 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7050 error ("Invalid code: %d", code);
7051 charset = charset_big5;
c28a9453 7052 }
df7492f9
KH
7053 c = DECODE_CHAR (charset, (unsigned )c);
7054 if (c < 0)
7055 error ("Invalid code: %d", code);
7056 return make_number (c);
4ed46869
KH
7057}
7058
7059DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7060 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7061Return the corresponding character code in Big5. */)
7062 (ch)
4ed46869
KH
7063 Lisp_Object ch;
7064{
df7492f9
KH
7065 Lisp_Object spec, attrs, charset_list;
7066 struct charset *charset;
7067 int c;
7068 unsigned code;
7069
7070 CHECK_CHARACTER (ch);
7071 c = XFASTINT (ch);
7072 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7073 attrs = AREF (spec, 0);
7074 if (ASCII_CHAR_P (c)
7075 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7076 return ch;
7077
7078 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7079 charset = char_charset (c, charset_list, &code);
7080 if (code == CHARSET_INVALID_CODE (charset))
7081 error ("Can't encode by Big5 encoding: %d", c);
7082
7083 return make_number (code);
4ed46869 7084}
df7492f9 7085
3a73fa5d 7086\f
1ba9e4ab
KH
7087DEFUN ("set-terminal-coding-system-internal",
7088 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7089 Sset_terminal_coding_system_internal, 1, 1, 0,
7090 doc: /* Internal use only. */)
7091 (coding_system)
b74e4686 7092 Lisp_Object coding_system;
4ed46869 7093{
b7826503 7094 CHECK_SYMBOL (coding_system);
df7492f9
KH
7095 setup_coding_system (Fcheck_coding_system (coding_system),
7096 &terminal_coding);
7097
70c22245 7098 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7099 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7100 /* Characer composition should be disabled. */
7101 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7102 terminal_coding.src_multibyte = 1;
7103 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7104 return Qnil;
7105}
7106
c4825358
KH
7107DEFUN ("set-safe-terminal-coding-system-internal",
7108 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7109 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7110 doc: /* Internal use only. */)
48b0f3ae 7111 (coding_system)
b74e4686 7112 Lisp_Object coding_system;
c4825358 7113{
b7826503 7114 CHECK_SYMBOL (coding_system);
c4825358
KH
7115 setup_coding_system (Fcheck_coding_system (coding_system),
7116 &safe_terminal_coding);
df7492f9
KH
7117 /* Characer composition should be disabled. */
7118 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7119 safe_terminal_coding.src_multibyte = 1;
7120 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7121 return Qnil;
7122}
7123
4ed46869
KH
7124DEFUN ("terminal-coding-system",
7125 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7126 doc: /* Return coding system specified for terminal output. */)
7127 ()
4ed46869 7128{
df7492f9 7129 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7130}
7131
1ba9e4ab
KH
7132DEFUN ("set-keyboard-coding-system-internal",
7133 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7134 Sset_keyboard_coding_system_internal, 1, 1, 0,
7135 doc: /* Internal use only. */)
7136 (coding_system)
4ed46869
KH
7137 Lisp_Object coding_system;
7138{
b7826503 7139 CHECK_SYMBOL (coding_system);
df7492f9
KH
7140 setup_coding_system (Fcheck_coding_system (coding_system),
7141 &keyboard_coding);
7142 /* Characer composition should be disabled. */
7143 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7144 return Qnil;
7145}
7146
7147DEFUN ("keyboard-coding-system",
7148 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7149 doc: /* Return coding system specified for decoding keyboard input. */)
7150 ()
4ed46869 7151{
df7492f9 7152 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7153}
7154
7155\f
a5d301df
KH
7156DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7157 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7158 doc: /* Choose a coding system for an operation based on the target name.
7159The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7160DECODING-SYSTEM is the coding system to use for decoding
7161\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7162for encoding (in case OPERATION does encoding).
7163
7164The first argument OPERATION specifies an I/O primitive:
7165 For file I/O, `insert-file-contents' or `write-region'.
7166 For process I/O, `call-process', `call-process-region', or `start-process'.
7167 For network I/O, `open-network-stream'.
7168
7169The remaining arguments should be the same arguments that were passed
7170to the primitive. Depending on which primitive, one of those arguments
7171is selected as the TARGET. For example, if OPERATION does file I/O,
7172whichever argument specifies the file name is TARGET.
7173
7174TARGET has a meaning which depends on OPERATION:
7175 For file I/O, TARGET is a file name.
7176 For process I/O, TARGET is a process name.
7177 For network I/O, TARGET is a service name or a port number
7178
7179This function looks up what specified for TARGET in,
7180`file-coding-system-alist', `process-coding-system-alist',
7181or `network-coding-system-alist' depending on OPERATION.
7182They may specify a coding system, a cons of coding systems,
7183or a function symbol to call.
7184In the last case, we call the function with one argument,
7185which is a list of all the arguments given to this function.
7186
7187usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7188 (nargs, args)
4ed46869
KH
7189 int nargs;
7190 Lisp_Object *args;
7191{
7192 Lisp_Object operation, target_idx, target, val;
7193 register Lisp_Object chain;
7194
7195 if (nargs < 2)
7196 error ("Too few arguments");
7197 operation = args[0];
7198 if (!SYMBOLP (operation)
7199 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7200 error ("Invalid first arguement");
4ed46869
KH
7201 if (nargs < 1 + XINT (target_idx))
7202 error ("Too few arguments for operation: %s",
7203 XSYMBOL (operation)->name->data);
7204 target = args[XINT (target_idx) + 1];
7205 if (!(STRINGP (target)
7206 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7207 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7208
2e34157c
RS
7209 chain = ((EQ (operation, Qinsert_file_contents)
7210 || EQ (operation, Qwrite_region))
02ba4723 7211 ? Vfile_coding_system_alist
2e34157c 7212 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7213 ? Vnetwork_coding_system_alist
7214 : Vprocess_coding_system_alist));
4ed46869
KH
7215 if (NILP (chain))
7216 return Qnil;
7217
03699b14 7218 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7219 {
f44d27ce 7220 Lisp_Object elt;
4ed46869 7221
df7492f9 7222 elt = XCAR (chain);
4ed46869
KH
7223 if (CONSP (elt)
7224 && ((STRINGP (target)
03699b14
KR
7225 && STRINGP (XCAR (elt))
7226 && fast_string_match (XCAR (elt), target) >= 0)
7227 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7228 {
03699b14 7229 val = XCDR (elt);
b19fd4c5
KH
7230 /* Here, if VAL is both a valid coding system and a valid
7231 function symbol, we return VAL as a coding system. */
02ba4723
KH
7232 if (CONSP (val))
7233 return val;
7234 if (! SYMBOLP (val))
7235 return Qnil;
7236 if (! NILP (Fcoding_system_p (val)))
7237 return Fcons (val, val);
b19fd4c5
KH
7238 if (! NILP (Ffboundp (val)))
7239 {
7240 val = call1 (val, Flist (nargs, args));
7241 if (CONSP (val))
7242 return val;
7243 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7244 return Fcons (val, val);
7245 }
02ba4723
KH
7246 return Qnil;
7247 }
4ed46869
KH
7248 }
7249 return Qnil;
7250}
7251
df7492f9
KH
7252DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7253 Sset_coding_system_priority, 1, MANY, 0,
da7db224 7254 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7255usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7256 (nargs, args)
7257 int nargs;
7258 Lisp_Object *args;
7259{
7260 int i, j;
7261 int changed[coding_category_max];
7262 enum coding_category priorities[coding_category_max];
7263
7264 bzero (changed, sizeof changed);
7265
7266 for (i = j = 0; i < nargs; i++)
7267 {
7268 enum coding_category category;
7269 Lisp_Object spec, attrs;
7270
7271 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7272 attrs = AREF (spec, 0);
7273 category = XINT (CODING_ATTR_CATEGORY (attrs));
7274 if (changed[category])
7275 /* Ignore this coding system because a coding system of the
7276 same category already had a higher priority. */
7277 continue;
7278 changed[category] = 1;
7279 priorities[j++] = category;
7280 if (coding_categories[category].id >= 0
7281 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7282 setup_coding_system (args[i], &coding_categories[category]);
7283 }
7284
7285 /* Now we have decided top J priorities. Reflect the order of the
7286 original priorities to the remaining priorities. */
7287
7288 for (i = j, j = 0; i < coding_category_max; i++, j++)
7289 {
7290 while (j < coding_category_max
7291 && changed[coding_priorities[j]])
7292 j++;
7293 if (j == coding_category_max)
7294 abort ();
7295 priorities[i] = coding_priorities[j];
7296 }
7297
7298 bcopy (priorities, coding_priorities, sizeof priorities);
7299 return Qnil;
7300}
7301
7302DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7303 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7304 doc: /* Return a list of coding systems ordered by their priorities.
7305HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7306 (highestp)
7307 Lisp_Object highestp;
d46c5b12
KH
7308{
7309 int i;
df7492f9 7310 Lisp_Object val;
d46c5b12 7311
df7492f9 7312 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7313 {
df7492f9
KH
7314 enum coding_category category = coding_priorities[i];
7315 int id = coding_categories[category].id;
7316 Lisp_Object attrs;
7317
7318 if (id < 0)
7319 continue;
7320 attrs = CODING_ID_ATTRS (id);
7321 if (! NILP (highestp))
7322 return CODING_ATTR_BASE_NAME (attrs);
7323 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7324 }
7325 return Fnreverse (val);
7326}
7327
f0064e1f
DL
7328static char *suffixes[] = { "-unix", "-dos", "-mac" };
7329
df7492f9
KH
7330static Lisp_Object
7331make_subsidiaries (base)
7332 Lisp_Object base;
7333{
7334 Lisp_Object subsidiaries;
df7492f9
KH
7335 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7336 char *buf = (char *) alloca (base_name_len + 6);
7337 int i;
7338
7339 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7340 subsidiaries = Fmake_vector (make_number (3), Qnil);
7341 for (i = 0; i < 3; i++)
7342 {
7343 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7344 ASET (subsidiaries, i, intern (buf));
7345 }
7346 return subsidiaries;
7347}
7348
7349
7350DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7351 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7352 doc: /* For internal use only.
7353usage: (define-coding-system-internal ...) */)
df7492f9
KH
7354 (nargs, args)
7355 int nargs;
7356 Lisp_Object *args;
7357{
7358 Lisp_Object name;
7359 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7360 Lisp_Object attrs; /* Vector of attributes. */
7361 Lisp_Object eol_type;
7362 Lisp_Object aliases;
7363 Lisp_Object coding_type, charset_list, safe_charsets;
7364 enum coding_category category;
7365 Lisp_Object tail, val;
7366 int max_charset_id = 0;
7367 int i;
7368
7369 if (nargs < coding_arg_max)
7370 goto short_args;
7371
7372 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7373
7374 name = args[coding_arg_name];
7375 CHECK_SYMBOL (name);
7376 CODING_ATTR_BASE_NAME (attrs) = name;
7377
7378 val = args[coding_arg_mnemonic];
7379 if (! STRINGP (val))
7380 CHECK_CHARACTER (val);
7381 CODING_ATTR_MNEMONIC (attrs) = val;
7382
7383 coding_type = args[coding_arg_coding_type];
7384 CHECK_SYMBOL (coding_type);
7385 CODING_ATTR_TYPE (attrs) = coding_type;
7386
7387 charset_list = args[coding_arg_charset_list];
7388 if (SYMBOLP (charset_list))
7389 {
7390 if (EQ (charset_list, Qiso_2022))
7391 {
7392 if (! EQ (coding_type, Qiso_2022))
7393 error ("Invalid charset-list");
7394 charset_list = Viso_2022_charset_list;
7395 }
7396 else if (EQ (charset_list, Qemacs_mule))
7397 {
7398 if (! EQ (coding_type, Qemacs_mule))
7399 error ("Invalid charset-list");
7400 charset_list = Vemacs_mule_charset_list;
7401 }
7402 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7403 if (max_charset_id < XFASTINT (XCAR (tail)))
7404 max_charset_id = XFASTINT (XCAR (tail));
7405 }
7406 else
7407 {
7408 charset_list = Fcopy_sequence (charset_list);
7409 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7410 {
7411 struct charset *charset;
7412
7413 val = Fcar (tail);
7414 CHECK_CHARSET_GET_CHARSET (val, charset);
7415 if (EQ (coding_type, Qiso_2022)
7416 ? CHARSET_ISO_FINAL (charset) < 0
7417 : EQ (coding_type, Qemacs_mule)
7418 ? CHARSET_EMACS_MULE_ID (charset) < 0
7419 : 0)
7420 error ("Can't handle charset `%s'",
7421 XSYMBOL (CHARSET_NAME (charset))->name->data);
7422
7423 XCAR (tail) = make_number (charset->id);
7424 if (max_charset_id < charset->id)
7425 max_charset_id = charset->id;
7426 }
7427 }
7428 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7429
7430 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7431 make_number (255));
7432 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7433 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7434 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7435
7436 val = args[coding_arg_decode_translation_table];
7437 if (! NILP (val))
7438 CHECK_CHAR_TABLE (val);
7439 CODING_ATTR_DECODE_TBL (attrs) = val;
7440
7441 val = args[coding_arg_encode_translation_table];
7442 if (! NILP (val))
7443 CHECK_CHAR_TABLE (val);
7444 CODING_ATTR_ENCODE_TBL (attrs) = val;
7445
7446 val = args[coding_arg_post_read_conversion];
7447 CHECK_SYMBOL (val);
7448 CODING_ATTR_POST_READ (attrs) = val;
7449
7450 val = args[coding_arg_pre_write_conversion];
7451 CHECK_SYMBOL (val);
7452 CODING_ATTR_PRE_WRITE (attrs) = val;
7453
7454 val = args[coding_arg_default_char];
7455 if (NILP (val))
7456 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7457 else
7458 {
7459 CHECK_CHARACTER (val);
7460 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7461 }
7462
7463 val = args[coding_arg_plist];
7464 CHECK_LIST (val);
7465 CODING_ATTR_PLIST (attrs) = val;
7466
7467 if (EQ (coding_type, Qcharset))
7468 {
c7c66a95
KH
7469 /* Generate a lisp vector of 256 elements. Each element is nil,
7470 integer, or a list of charset IDs.
7471
7472 If Nth element is nil, the byte code N is invalid in this
7473 coding system.
7474
7475 If Nth element is a number NUM, N is the first byte of a
7476 charset whose ID is NUM.
7477
7478 If Nth element is a list of charset IDs, N is the first byte
7479 of one of them. The list is sorted by dimensions of the
7480 charsets. A charset of smaller dimension comes firtst.
7481 */
df7492f9
KH
7482 val = Fmake_vector (make_number (256), Qnil);
7483
7484 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7485 {
c7c66a95
KH
7486 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7487 int dim = CHARSET_DIMENSION (charset);
7488 int idx = (dim - 1) * 4;
7489
15d143f7
KH
7490 for (i = charset->code_space[idx];
7491 i <= charset->code_space[idx + 1]; i++)
7492 {
c7c66a95
KH
7493 Lisp_Object tmp, tmp2;
7494 int dim2;
7495
7496 tmp = AREF (val, i);
7497 if (NILP (tmp))
7498 tmp = XCAR (tail);
7499 else if (NUMBERP (tmp))
7500 {
7501 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7502 if (dim < dim2)
c7c66a95 7503 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7504 else
7505 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7506 }
15d143f7 7507 else
c7c66a95
KH
7508 {
7509 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7510 {
7511 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7512 if (dim < dim2)
7513 break;
7514 }
7515 if (NILP (tmp2))
7516 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7517 else
7518 {
7519 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7520 XSETCAR (tmp2, XCAR (tail));
7521 }
7522 }
7523 ASET (val, i, tmp);
15d143f7 7524 }
df7492f9
KH
7525 }
7526 ASET (attrs, coding_attr_charset_valids, val);
7527 category = coding_category_charset;
7528 }
7529 else if (EQ (coding_type, Qccl))
7530 {
7531 Lisp_Object valids;
7532
7533 if (nargs < coding_arg_ccl_max)
7534 goto short_args;
7535
7536 val = args[coding_arg_ccl_decoder];
7537 CHECK_CCL_PROGRAM (val);
7538 if (VECTORP (val))
7539 val = Fcopy_sequence (val);
7540 ASET (attrs, coding_attr_ccl_decoder, val);
7541
7542 val = args[coding_arg_ccl_encoder];
7543 CHECK_CCL_PROGRAM (val);
7544 if (VECTORP (val))
7545 val = Fcopy_sequence (val);
7546 ASET (attrs, coding_attr_ccl_encoder, val);
7547
7548 val = args[coding_arg_ccl_valids];
7549 valids = Fmake_string (make_number (256), make_number (0));
7550 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7551 {
7552 val = Fcar (tail);
7553 if (INTEGERP (val))
7554 ASET (valids, XINT (val), 1);
7555 else
7556 {
7557 int from, to;
7558
7559 CHECK_CONS (val);
7560 CHECK_NUMBER (XCAR (val));
7561 CHECK_NUMBER (XCDR (val));
7562 from = XINT (XCAR (val));
7563 to = XINT (XCDR (val));
7564 for (i = from; i <= to; i++)
7565 ASET (valids, i, 1);
7566 }
7567 }
7568 ASET (attrs, coding_attr_ccl_valids, valids);
7569
7570 category = coding_category_ccl;
7571 }
7572 else if (EQ (coding_type, Qutf_16))
7573 {
7574 Lisp_Object bom, endian;
7575
7576 if (nargs < coding_arg_utf16_max)
7577 goto short_args;
7578
7579 bom = args[coding_arg_utf16_bom];
7580 if (! NILP (bom) && ! EQ (bom, Qt))
7581 {
7582 CHECK_CONS (bom);
7583 CHECK_CODING_SYSTEM (XCAR (bom));
7584 CHECK_CODING_SYSTEM (XCDR (bom));
7585 }
7586 ASET (attrs, coding_attr_utf_16_bom, bom);
7587
7588 endian = args[coding_arg_utf16_endian];
7589 ASET (attrs, coding_attr_utf_16_endian, endian);
7590
7591 category = (CONSP (bom)
7592 ? coding_category_utf_16_auto
7593 : NILP (bom)
7594 ? (NILP (endian)
7595 ? coding_category_utf_16_be_nosig
7596 : coding_category_utf_16_le_nosig)
7597 : (NILP (endian)
7598 ? coding_category_utf_16_be
7599 : coding_category_utf_16_le));
7600 }
7601 else if (EQ (coding_type, Qiso_2022))
7602 {
7603 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7604 int i, id;
1397dc18 7605
df7492f9
KH
7606 if (nargs < coding_arg_iso2022_max)
7607 goto short_args;
7608
7609 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7610 CHECK_VECTOR (initial);
7611 for (i = 0; i < 4; i++)
7612 {
7613 val = Faref (initial, make_number (i));
7614 if (! NILP (val))
7615 {
7616 CHECK_CHARSET_GET_ID (val, id);
7617 ASET (initial, i, make_number (id));
7618 }
7619 else
7620 ASET (initial, i, make_number (-1));
7621 }
7622
7623 reg_usage = args[coding_arg_iso2022_reg_usage];
7624 CHECK_CONS (reg_usage);
7625 CHECK_NATNUM (XCAR (reg_usage));
7626 CHECK_NATNUM (XCDR (reg_usage));
7627
7628 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7629 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7630 {
df7492f9
KH
7631 int id;
7632
7633 val = Fcar (tail);
7634 CHECK_CONS (val);
7635 CHECK_CHARSET_GET_ID (XCAR (val), id);
7636 CHECK_NATNUM (XCDR (val));
7637 if (XINT (XCDR (val)) >= 4)
7638 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7639 XCAR (val) = make_number (id);
1397dc18 7640 }
df7492f9
KH
7641
7642 flags = args[coding_arg_iso2022_flags];
7643 CHECK_NATNUM (flags);
7644 i = XINT (flags);
7645 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7646 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7647
7648 ASET (attrs, coding_attr_iso_initial, initial);
7649 ASET (attrs, coding_attr_iso_usage, reg_usage);
7650 ASET (attrs, coding_attr_iso_request, request);
7651 ASET (attrs, coding_attr_iso_flags, flags);
7652 setup_iso_safe_charsets (attrs);
7653
7654 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7655 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7656 | CODING_ISO_FLAG_SINGLE_SHIFT))
7657 ? coding_category_iso_7_else
7658 : EQ (args[coding_arg_charset_list], Qiso_2022)
7659 ? coding_category_iso_7
7660 : coding_category_iso_7_tight);
7661 else
7662 {
7663 int id = XINT (AREF (initial, 1));
7664
7665 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7666 | CODING_ISO_FLAG_SINGLE_SHIFT))
7667 || EQ (args[coding_arg_charset_list], Qiso_2022)
7668 || id < 0)
7669 ? coding_category_iso_8_else
7670 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7671 ? coding_category_iso_8_1
7672 : coding_category_iso_8_2);
7673 }
7674 }
7675 else if (EQ (coding_type, Qemacs_mule))
7676 {
7677 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7678 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7679
7680 category = coding_category_emacs_mule;
7681 }
7682 else if (EQ (coding_type, Qshift_jis))
7683 {
7684
7685 struct charset *charset;
7686
7687 if (XINT (Flength (charset_list)) != 3)
7688 error ("There should be just three charsets");
7689
7690 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7691 if (CHARSET_DIMENSION (charset) != 1)
7692 error ("Dimension of charset %s is not one",
7693 XSYMBOL (CHARSET_NAME (charset))->name->data);
7694
7695 charset_list = XCDR (charset_list);
7696 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7697 if (CHARSET_DIMENSION (charset) != 1)
7698 error ("Dimension of charset %s is not one",
7699 XSYMBOL (CHARSET_NAME (charset))->name->data);
7700
7701 charset_list = XCDR (charset_list);
7702 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7703 if (CHARSET_DIMENSION (charset) != 2)
7704 error ("Dimension of charset %s is not two",
7705 XSYMBOL (CHARSET_NAME (charset))->name->data);
7706
7707 category = coding_category_sjis;
7708 Vsjis_coding_system = name;
7709 }
7710 else if (EQ (coding_type, Qbig5))
7711 {
7712 struct charset *charset;
7713
7714 if (XINT (Flength (charset_list)) != 2)
7715 error ("There should be just two charsets");
7716
7717 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7718 if (CHARSET_DIMENSION (charset) != 1)
7719 error ("Dimension of charset %s is not one",
7720 XSYMBOL (CHARSET_NAME (charset))->name->data);
7721
7722 charset_list = XCDR (charset_list);
7723 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7724 if (CHARSET_DIMENSION (charset) != 2)
7725 error ("Dimension of charset %s is not two",
7726 XSYMBOL (CHARSET_NAME (charset))->name->data);
7727
7728 category = coding_category_big5;
7729 Vbig5_coding_system = name;
7730 }
7731 else if (EQ (coding_type, Qraw_text))
7732 category = coding_category_raw_text;
7733 else if (EQ (coding_type, Qutf_8))
7734 category = coding_category_utf_8;
7735 else if (EQ (coding_type, Qundecided))
7736 category = coding_category_undecided;
7737 else
7738 error ("Invalid coding system type: %s",
7739 XSYMBOL (coding_type)->name->data);
7740
7741 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7742
7743 eol_type = args[coding_arg_eol_type];
7744 if (! NILP (eol_type)
7745 && ! EQ (eol_type, Qunix)
7746 && ! EQ (eol_type, Qdos)
7747 && ! EQ (eol_type, Qmac))
7748 error ("Invalid eol-type");
7749
7750 aliases = Fcons (name, Qnil);
7751
7752 if (NILP (eol_type))
7753 {
7754 eol_type = make_subsidiaries (name);
7755 for (i = 0; i < 3; i++)
1397dc18 7756 {
df7492f9
KH
7757 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7758
7759 this_name = AREF (eol_type, i);
7760 this_aliases = Fcons (this_name, Qnil);
7761 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7762 this_spec = Fmake_vector (make_number (3), attrs);
7763 ASET (this_spec, 1, this_aliases);
7764 ASET (this_spec, 2, this_eol_type);
7765 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7766 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7767 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7768 Vcoding_system_alist);
1397dc18 7769 }
d46c5b12 7770 }
1397dc18 7771
df7492f9
KH
7772 spec_vec = Fmake_vector (make_number (3), attrs);
7773 ASET (spec_vec, 1, aliases);
7774 ASET (spec_vec, 2, eol_type);
7775
7776 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7777 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7778 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7779 Vcoding_system_alist);
7780
7781 {
7782 int id = coding_categories[category].id;
7783
7784 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7785 setup_coding_system (name, &coding_categories[category]);
7786 }
7787
d46c5b12 7788 return Qnil;
df7492f9
KH
7789
7790 short_args:
7791 return Fsignal (Qwrong_number_of_arguments,
7792 Fcons (intern ("define-coding-system-internal"),
7793 make_number (nargs)));
d46c5b12
KH
7794}
7795
da7db224
DL
7796/* Fixme: should this record the alias relationships for
7797 diagnostics? */
df7492f9
KH
7798DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7799 Sdefine_coding_system_alias, 2, 2, 0,
7800 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7801 (alias, coding_system)
7802 Lisp_Object alias, coding_system;
66cfb530 7803{
df7492f9 7804 Lisp_Object spec, aliases, eol_type;
84d60297 7805
df7492f9
KH
7806 CHECK_SYMBOL (alias);
7807 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7808 aliases = AREF (spec, 1);
7809 while (!NILP (XCDR (aliases)))
7810 aliases = XCDR (aliases);
7811 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7812
df7492f9
KH
7813 eol_type = AREF (spec, 2);
7814 if (VECTORP (eol_type))
66cfb530 7815 {
df7492f9
KH
7816 Lisp_Object subsidiaries;
7817 int i;
7818
7819 subsidiaries = make_subsidiaries (alias);
7820 for (i = 0; i < 3; i++)
7821 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7822 AREF (eol_type, i));
7823
7824 ASET (spec, 2, subsidiaries);
66cfb530 7825 }
df7492f9
KH
7826
7827 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7828 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7829 Vcoding_system_alist);
66cfb530
KH
7830
7831 return Qnil;
7832}
7833
df7492f9
KH
7834DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7835 1, 1, 0,
7836 doc: /* Return the base of CODING-SYSTEM.
da7db224 7837Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
7838 (coding_system)
7839 Lisp_Object coding_system;
7840{
7841 Lisp_Object spec, attrs;
7842
7843 if (NILP (coding_system))
7844 return (Qno_conversion);
7845 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7846 attrs = AREF (spec, 0);
7847 return CODING_ATTR_BASE_NAME (attrs);
7848}
7849
7850DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7851 1, 1, 0,
7852 doc: "Return the property list of CODING-SYSTEM.")
7853 (coding_system)
7854 Lisp_Object coding_system;
7855{
7856 Lisp_Object spec, attrs;
7857
7858 if (NILP (coding_system))
7859 coding_system = Qno_conversion;
7860 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7861 attrs = AREF (spec, 0);
7862 return CODING_ATTR_PLIST (attrs);
7863}
7864
7865
7866DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7867 1, 1, 0,
da7db224 7868 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
7869 (coding_system)
7870 Lisp_Object coding_system;
7871{
7872 Lisp_Object spec;
7873
7874 if (NILP (coding_system))
7875 coding_system = Qno_conversion;
7876 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 7877 return AREF (spec, 1);
df7492f9
KH
7878}
7879
7880DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7881 Scoding_system_eol_type, 1, 1, 0,
7882 doc: /* Return eol-type of CODING-SYSTEM.
7883An eol-type is integer 0, 1, 2, or a vector of coding systems.
7884
7885Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7886and CR respectively.
7887
7888A vector value indicates that a format of end-of-line should be
7889detected automatically. Nth element of the vector is the subsidiary
7890coding system whose eol-type is N. */)
7891 (coding_system)
7892 Lisp_Object coding_system;
7893{
7894 Lisp_Object spec, eol_type;
7895 int n;
7896
7897 if (NILP (coding_system))
7898 coding_system = Qno_conversion;
7899 if (! CODING_SYSTEM_P (coding_system))
7900 return Qnil;
7901 spec = CODING_SYSTEM_SPEC (coding_system);
7902 eol_type = AREF (spec, 2);
7903 if (VECTORP (eol_type))
7904 return Fcopy_sequence (eol_type);
7905 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7906 return make_number (n);
7907}
7908
4ed46869
KH
7909#endif /* emacs */
7910
7911\f
1397dc18 7912/*** 9. Post-amble ***/
4ed46869 7913
dfcf069d 7914void
4ed46869
KH
7915init_coding_once ()
7916{
7917 int i;
7918
df7492f9
KH
7919 for (i = 0; i < coding_category_max; i++)
7920 {
7921 coding_categories[i].id = -1;
7922 coding_priorities[i] = i;
7923 }
4ed46869
KH
7924
7925 /* ISO2022 specific initialize routine. */
7926 for (i = 0; i < 0x20; i++)
b73bfc1c 7927 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7928 for (i = 0x21; i < 0x7F; i++)
7929 iso_code_class[i] = ISO_graphic_plane_0;
7930 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7931 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7932 for (i = 0xA1; i < 0xFF; i++)
7933 iso_code_class[i] = ISO_graphic_plane_1;
7934 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7935 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7936 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7937 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7938 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7939 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7940 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7941 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7942 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7943 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7944
b843d1ae 7945 inhibit_pre_post_conversion = 0;
df7492f9
KH
7946
7947 for (i = 0; i < 256; i++)
7948 {
7949 emacs_mule_bytes[i] = 1;
7950 }
781d7a48
KH
7951 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7952 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7953 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7954 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7955}
7956
7957#ifdef emacs
7958
dfcf069d 7959void
e0e989f6
KH
7960syms_of_coding ()
7961{
df7492f9
KH
7962 staticpro (&Vcoding_system_hash_table);
7963 Vcoding_system_hash_table = Fmakehash (Qeq);
7964
7965 staticpro (&Vsjis_coding_system);
7966 Vsjis_coding_system = Qnil;
7967
7968 staticpro (&Vbig5_coding_system);
7969 Vbig5_coding_system = Qnil;
7970
7971 staticpro (&Vcode_conversion_work_buf_list);
7972 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7973
df7492f9
KH
7974 staticpro (&Vcode_conversion_reused_work_buf);
7975 Vcode_conversion_reused_work_buf = Qnil;
7976
7977 DEFSYM (Qcharset, "charset");
7978 DEFSYM (Qtarget_idx, "target-idx");
7979 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7980 Fset (Qcoding_system_history, Qnil);
7981
9ce27fde 7982 /* Target FILENAME is the first argument. */
e0e989f6 7983 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7984 /* Target FILENAME is the third argument. */
e0e989f6
KH
7985 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7986
df7492f9 7987 DEFSYM (Qcall_process, "call-process");
9ce27fde 7988 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7989 Fput (Qcall_process, Qtarget_idx, make_number (0));
7990
df7492f9 7991 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7992 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7993 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7994
df7492f9 7995 DEFSYM (Qstart_process, "start-process");
9ce27fde 7996 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7997 Fput (Qstart_process, Qtarget_idx, make_number (2));
7998
df7492f9 7999 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 8000 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
8001 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8002
df7492f9
KH
8003 DEFSYM (Qcoding_system, "coding-system");
8004 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 8005
df7492f9
KH
8006 DEFSYM (Qeol_type, "eol-type");
8007 DEFSYM (Qunix, "unix");
8008 DEFSYM (Qdos, "dos");
4ed46869 8009
df7492f9
KH
8010 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8011 DEFSYM (Qpost_read_conversion, "post-read-conversion");
8012 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8013 DEFSYM (Qdefault_char, "default-char");
8014 DEFSYM (Qundecided, "undecided");
8015 DEFSYM (Qno_conversion, "no-conversion");
8016 DEFSYM (Qraw_text, "raw-text");
4ed46869 8017
df7492f9 8018 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 8019
df7492f9 8020 DEFSYM (Qutf_8, "utf-8");
27901516 8021
df7492f9
KH
8022 DEFSYM (Qutf_16, "utf-16");
8023 DEFSYM (Qutf_16_be, "utf-16-be");
8024 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
8025 DEFSYM (Qutf_16_le, "utf-16-l3");
8026 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
8027 DEFSYM (Qsignature, "signature");
8028 DEFSYM (Qendian, "endian");
8029 DEFSYM (Qbig, "big");
8030 DEFSYM (Qlittle, "little");
27901516 8031
df7492f9
KH
8032 DEFSYM (Qshift_jis, "shift-jis");
8033 DEFSYM (Qbig5, "big5");
4ed46869 8034
df7492f9 8035 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 8036
df7492f9 8037 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
8038 Fput (Qcoding_system_error, Qerror_conditions,
8039 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8040 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 8041 build_string ("Invalid coding system"));
4ed46869 8042
df7492f9
KH
8043 /* Intern this now in case it isn't already done.
8044 Setting this variable twice is harmless.
8045 But don't staticpro it here--that is done in alloc.c. */
8046 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8047
df7492f9 8048 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8049 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8050 DEFSYM (Qtranslation_table_id, "translation-table-id");
8051 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8052 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8053
df7492f9 8054 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8055
df7492f9 8056 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8057
df7492f9
KH
8058 Vcoding_category_table
8059 = Fmake_vector (make_number (coding_category_max), Qnil);
8060 staticpro (&Vcoding_category_table);
8061 /* Followings are target of code detection. */
8062 ASET (Vcoding_category_table, coding_category_iso_7,
8063 intern ("coding-category-iso-7"));
8064 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8065 intern ("coding-category-iso-7-tight"));
8066 ASET (Vcoding_category_table, coding_category_iso_8_1,
8067 intern ("coding-category-iso-8-1"));
8068 ASET (Vcoding_category_table, coding_category_iso_8_2,
8069 intern ("coding-category-iso-8-2"));
8070 ASET (Vcoding_category_table, coding_category_iso_7_else,
8071 intern ("coding-category-iso-7-else"));
8072 ASET (Vcoding_category_table, coding_category_iso_8_else,
8073 intern ("coding-category-iso-8-else"));
8074 ASET (Vcoding_category_table, coding_category_utf_8,
8075 intern ("coding-category-utf-8"));
8076 ASET (Vcoding_category_table, coding_category_utf_16_be,
8077 intern ("coding-category-utf-16-be"));
8078 ASET (Vcoding_category_table, coding_category_utf_16_le,
8079 intern ("coding-category-utf-16-le"));
8080 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8081 intern ("coding-category-utf-16-be-nosig"));
8082 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8083 intern ("coding-category-utf-16-le-nosig"));
8084 ASET (Vcoding_category_table, coding_category_charset,
8085 intern ("coding-category-charset"));
8086 ASET (Vcoding_category_table, coding_category_sjis,
8087 intern ("coding-category-sjis"));
8088 ASET (Vcoding_category_table, coding_category_big5,
8089 intern ("coding-category-big5"));
8090 ASET (Vcoding_category_table, coding_category_ccl,
8091 intern ("coding-category-ccl"));
8092 ASET (Vcoding_category_table, coding_category_emacs_mule,
8093 intern ("coding-category-emacs-mule"));
8094 /* Followings are NOT target of code detection. */
8095 ASET (Vcoding_category_table, coding_category_raw_text,
8096 intern ("coding-category-raw-text"));
8097 ASET (Vcoding_category_table, coding_category_undecided,
8098 intern ("coding-category-undecided"));
70c22245 8099
4ed46869
KH
8100 defsubr (&Scoding_system_p);
8101 defsubr (&Sread_coding_system);
8102 defsubr (&Sread_non_nil_coding_system);
8103 defsubr (&Scheck_coding_system);
8104 defsubr (&Sdetect_coding_region);
d46c5b12 8105 defsubr (&Sdetect_coding_string);
05e6f5dc 8106 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8107 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8108 defsubr (&Sdecode_coding_region);
8109 defsubr (&Sencode_coding_region);
8110 defsubr (&Sdecode_coding_string);
8111 defsubr (&Sencode_coding_string);
8112 defsubr (&Sdecode_sjis_char);
8113 defsubr (&Sencode_sjis_char);
8114 defsubr (&Sdecode_big5_char);
8115 defsubr (&Sencode_big5_char);
1ba9e4ab 8116 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8117 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8118 defsubr (&Sterminal_coding_system);
1ba9e4ab 8119 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8120 defsubr (&Skeyboard_coding_system);
a5d301df 8121 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8122 defsubr (&Sset_coding_system_priority);
8123 defsubr (&Sdefine_coding_system_internal);
8124 defsubr (&Sdefine_coding_system_alias);
8125 defsubr (&Scoding_system_base);
8126 defsubr (&Scoding_system_plist);
8127 defsubr (&Scoding_system_aliases);
8128 defsubr (&Scoding_system_eol_type);
8129 defsubr (&Scoding_system_priority_list);
4ed46869 8130
4608c386 8131 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8132 doc: /* List of coding systems.
8133
8134Do not alter the value of this variable manually. This variable should be
df7492f9 8135updated by the functions `define-coding-system' and
48b0f3ae 8136`define-coding-system-alias'. */);
4608c386
KH
8137 Vcoding_system_list = Qnil;
8138
8139 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8140 doc: /* Alist of coding system names.
8141Each element is one element list of coding system name.
8142This variable is given to `completing-read' as TABLE argument.
8143
8144Do not alter the value of this variable manually. This variable should be
8145updated by the functions `make-coding-system' and
8146`define-coding-system-alias'. */);
4608c386
KH
8147 Vcoding_system_alist = Qnil;
8148
4ed46869 8149 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8150 doc: /* List of coding-categories (symbols) ordered by priority.
8151
8152On detecting a coding system, Emacs tries code detection algorithms
8153associated with each coding-category one by one in this order. When
8154one algorithm agrees with a byte sequence of source text, the coding
8155system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8156 {
8157 int i;
8158
8159 Vcoding_category_list = Qnil;
df7492f9 8160 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8161 Vcoding_category_list
d46c5b12
KH
8162 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8163 Vcoding_category_list);
4ed46869
KH
8164 }
8165
8166 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8167 doc: /* Specify the coding system for read operations.
8168It is useful to bind this variable with `let', but do not set it globally.
8169If the value is a coding system, it is used for decoding on read operation.
8170If not, an appropriate element is used from one of the coding system alists:
8171There are three such tables, `file-coding-system-alist',
8172`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8173 Vcoding_system_for_read = Qnil;
8174
8175 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8176 doc: /* Specify the coding system for write operations.
8177Programs bind this variable with `let', but you should not set it globally.
8178If the value is a coding system, it is used for encoding of output,
8179when writing it to a file and when sending it to a file or subprocess.
8180
8181If this does not specify a coding system, an appropriate element
8182is used from one of the coding system alists:
8183There are three such tables, `file-coding-system-alist',
8184`process-coding-system-alist', and `network-coding-system-alist'.
8185For output to files, if the above procedure does not specify a coding system,
8186the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8187 Vcoding_system_for_write = Qnil;
8188
8189 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8190 doc: /*
8191Coding system used in the latest file or process I/O. */);
4ed46869
KH
8192 Vlast_coding_system_used = Qnil;
8193
9ce27fde 8194 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8195 doc: /*
8196*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8197See info node `Coding Systems' and info node `Text and Binary' concerning
8198such conversion. */);
9ce27fde
KH
8199 inhibit_eol_conversion = 0;
8200
ed29121d 8201 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8202 doc: /*
8203Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8204Bind it to t if the process output is to be treated as if it were a file
8205read from some filesystem. */);
ed29121d
EZ
8206 inherit_process_coding_system = 0;
8207
02ba4723 8208 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8209 doc: /*
8210Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8211The format is ((PATTERN . VAL) ...),
8212where PATTERN is a regular expression matching a file name,
8213VAL is a coding system, a cons of coding systems, or a function symbol.
8214If VAL is a coding system, it is used for both decoding and encoding
8215the file contents.
8216If VAL is a cons of coding systems, the car part is used for decoding,
8217and the cdr part is used for encoding.
8218If VAL is a function symbol, the function must return a coding system
0192762c
DL
8219or a cons of coding systems which are used as above. The function gets
8220the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8221
8222See also the function `find-operation-coding-system'
8223and the variable `auto-coding-alist'. */);
02ba4723
KH
8224 Vfile_coding_system_alist = Qnil;
8225
8226 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8227 doc: /*
8228Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8229The format is ((PATTERN . VAL) ...),
8230where PATTERN is a regular expression matching a program name,
8231VAL is a coding system, a cons of coding systems, or a function symbol.
8232If VAL is a coding system, it is used for both decoding what received
8233from the program and encoding what sent to the program.
8234If VAL is a cons of coding systems, the car part is used for decoding,
8235and the cdr part is used for encoding.
8236If VAL is a function symbol, the function must return a coding system
8237or a cons of coding systems which are used as above.
8238
8239See also the function `find-operation-coding-system'. */);
02ba4723
KH
8240 Vprocess_coding_system_alist = Qnil;
8241
8242 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8243 doc: /*
8244Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8245The format is ((PATTERN . VAL) ...),
8246where PATTERN is a regular expression matching a network service name
8247or is a port number to connect to,
8248VAL is a coding system, a cons of coding systems, or a function symbol.
8249If VAL is a coding system, it is used for both decoding what received
8250from the network stream and encoding what sent to the network stream.
8251If VAL is a cons of coding systems, the car part is used for decoding,
8252and the cdr part is used for encoding.
8253If VAL is a function symbol, the function must return a coding system
8254or a cons of coding systems which are used as above.
8255
8256See also the function `find-operation-coding-system'. */);
02ba4723 8257 Vnetwork_coding_system_alist = Qnil;
4ed46869 8258
68c45bf0 8259 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8260 doc: /* Coding system to use with system messages.
8261Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8262 Vlocale_coding_system = Qnil;
8263
005f0d35 8264 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8265 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8266 doc: /*
8267*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8268 eol_mnemonic_unix = build_string (":");
4ed46869 8269
7722baf9 8270 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8271 doc: /*
8272*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8273 eol_mnemonic_dos = build_string ("\\");
4ed46869 8274
7722baf9 8275 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8276 doc: /*
8277*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8278 eol_mnemonic_mac = build_string ("/");
4ed46869 8279
7722baf9 8280 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8281 doc: /*
8282*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8283 eol_mnemonic_undecided = build_string (":");
4ed46869 8284
84fbb8a0 8285 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8286 doc: /*
8287*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8288 Venable_character_translation = Qt;
bdd9fb48 8289
f967223b 8290 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8291 &Vstandard_translation_table_for_decode,
8292 doc: /* Table for translating characters while decoding. */);
f967223b 8293 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8294
f967223b 8295 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8296 &Vstandard_translation_table_for_encode,
8297 doc: /* Table for translating characters while encoding. */);
f967223b 8298 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8299
df7492f9 8300 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8301 doc: /* Alist of charsets vs revision numbers.
8302While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8303designate it with the escape sequence identifying revision (cdr part
8304of the element). */);
8305 Vcharset_revision_table = Qnil;
02ba4723
KH
8306
8307 DEFVAR_LISP ("default-process-coding-system",
8308 &Vdefault_process_coding_system,
48b0f3ae
PJ
8309 doc: /* Cons of coding systems used for process I/O by default.
8310The car part is used for decoding a process output,
8311the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8312 Vdefault_process_coding_system = Qnil;
c4825358 8313
3f003981 8314 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8315 doc: /*
8316Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8317This is a vector of length 256.
8318If Nth element is non-nil, the existence of code N in a file
8319\(or output of subprocess) doesn't prevent it to be detected as
8320a coding system of ISO 2022 variant which has a flag
8321`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8322or reading output of a subprocess.
8323Only 128th through 159th elements has a meaning. */);
3f003981 8324 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8325
8326 DEFVAR_LISP ("select-safe-coding-system-function",
8327 &Vselect_safe_coding_system_function,
df7492f9
KH
8328 doc: /*
8329Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8330
8331If set, this function is called to force a user to select a proper
8332coding system which can encode the text in the case that a default
8333coding system used in each operation can't encode the text.
8334
8335The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8336 Vselect_safe_coding_system_function = Qnil;
8337
22ab2303 8338 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8339 &inhibit_iso_escape_detection,
df7492f9
KH
8340 doc: /*
8341If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8342
8343By default, on reading a file, Emacs tries to detect how the text is
8344encoded. This code detection is sensitive to escape sequences. If
8345the sequence is valid as ISO2022, the code is determined as one of
8346the ISO2022 encodings, and the file is decoded by the corresponding
8347coding system (e.g. `iso-2022-7bit').
8348
8349However, there may be a case that you want to read escape sequences in
8350a file as is. In such a case, you can set this variable to non-nil.
8351Then, as the code detection ignores any escape sequences, no file is
8352detected as encoded in some ISO2022 encoding. The result is that all
8353escape sequences become visible in a buffer.
8354
8355The default value is nil, and it is strongly recommended not to change
8356it. That is because many Emacs Lisp source files that contain
8357non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8358in Emacs's distribution, and they won't be decoded correctly on
8359reading if you suppress escape sequence detection.
8360
8361The other way to read escape sequences in a file without decoding is
8362to explicitly specify some coding system that doesn't use ISO2022's
8363escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8364 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8365
8366 {
8367 Lisp_Object args[coding_arg_max];
8368 Lisp_Object plist[14];
8369 int i;
8370
8371 for (i = 0; i < coding_arg_max; i++)
8372 args[i] = Qnil;
8373
8374 plist[0] = intern (":name");
8375 plist[1] = args[coding_arg_name] = Qno_conversion;
8376 plist[2] = intern (":mnemonic");
8377 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8378 plist[4] = intern (":coding-type");
8379 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8380 plist[6] = intern (":ascii-compatible-p");
8381 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8382 plist[8] = intern (":default-char");
8383 plist[9] = args[coding_arg_default_char] = make_number (0);
8384 plist[10] = intern (":docstring");
8385 plist[11] = build_string ("Do no conversion.\n\
8386\n\
8387When you visit a file with this coding, the file is read into a\n\
8388unibyte buffer as is, thus each byte of a file is treated as a\n\
8389character.");
8390 plist[12] = intern (":eol-type");
8391 plist[13] = args[coding_arg_eol_type] = Qunix;
8392 args[coding_arg_plist] = Flist (14, plist);
8393 Fdefine_coding_system_internal (coding_arg_max, args);
8394 }
8395
8396 setup_coding_system (Qno_conversion, &keyboard_coding);
8397 setup_coding_system (Qno_conversion, &terminal_coding);
8398 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8399}
8400
68c45bf0
PE
8401char *
8402emacs_strerror (error_number)
8403 int error_number;
8404{
8405 char *str;
8406
ca9c0567 8407 synchronize_system_messages_locale ();
68c45bf0
PE
8408 str = strerror (error_number);
8409
8410 if (! NILP (Vlocale_coding_system))
8411 {
8412 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8413 Vlocale_coding_system,
8414 0);
8415 str = (char *) XSTRING (dec)->data;
8416 }
8417
8418 return str;
8419}
8420
4ed46869 8421#endif /* emacs */