*** empty log message ***
[bpt/emacs.git] / src / coding.c
CommitLineData
9542cb1f 1/* Coding system handler (conversion, detection, etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
6f197c07 4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 340
4ed46869
KH
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
c4825358 350/* A vector of length 256 which contains information about special
94487c4e 351 Latin codes (especially for dealing with Microsoft codes). */
3f003981 352Lisp_Object Vlatin_extra_code_table;
c4825358 353
9ce27fde
KH
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
74383408
KH
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
ed29121d
EZ
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
c4825358 363/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
364struct coding_system terminal_coding;
365
c4825358
KH
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
371struct coding_system keyboard_coding;
372
02ba4723
KH
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
4ed46869 376
68c45bf0
PE
377Lisp_Object Vlocale_coding_system;
378
4ed46869
KH
379#endif /* emacs */
380
f967223b
KH
381/* Flag to tell if we look up translation table on character code
382 conversion. */
84fbb8a0 383Lisp_Object Venable_character_translation;
f967223b
KH
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 388
f967223b
KH
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
393
394/* Alist of charsets vs revision number. */
df7492f9 395static Lisp_Object Vcharset_revision_table;
4ed46869 396
02ba4723
KH
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
b843d1ae
KH
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
df7492f9
KH
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
05e6f5dc 505
df7492f9
KH
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
93dec019 508
df7492f9
KH
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 512
df7492f9
KH
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 520
df7492f9
KH
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 523
df7492f9
KH
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 526
df7492f9
KH
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 530
df7492f9
KH
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 533
df7492f9
KH
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 536
df7492f9
KH
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 539
df7492f9
KH
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 543
df7492f9
KH
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 547
df7492f9
KH
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 550
df7492f9
KH
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 554
df7492f9 555#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 556
df7492f9 557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 558
bf16eb23
KH
559#define CODING_ISO_FLAG_USE_ROMAN 0x8000
560
561#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
562
563#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 564
df7492f9
KH
565/* A character to be produced on output if encoding of the original
566 character is prohibited by CODING_ISO_FLAG_SAFE. */
567#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 568
aa72b389 569
df7492f9
KH
570/* UTF-16 section */
571#define CODING_UTF_16_BOM(coding) \
572 ((coding)->spec.utf_16.bom)
4ed46869 573
df7492f9
KH
574#define CODING_UTF_16_ENDIAN(coding) \
575 ((coding)->spec.utf_16.endian)
4ed46869 576
df7492f9
KH
577#define CODING_UTF_16_SURROGATE(coding) \
578 ((coding)->spec.utf_16.surrogate)
4ed46869 579
4ed46869 580
df7492f9
KH
581/* CCL section */
582#define CODING_CCL_DECODER(coding) \
583 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
584#define CODING_CCL_ENCODER(coding) \
585 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
586#define CODING_CCL_VALIDS(coding) \
587 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
588 ->data)
4ed46869 589
5a936b46 590/* Index for each coding category in `coding_categories' */
4ed46869 591
df7492f9
KH
592enum coding_category
593 {
594 coding_category_iso_7,
595 coding_category_iso_7_tight,
596 coding_category_iso_8_1,
597 coding_category_iso_8_2,
598 coding_category_iso_7_else,
599 coding_category_iso_8_else,
600 coding_category_utf_8,
601 coding_category_utf_16_auto,
602 coding_category_utf_16_be,
603 coding_category_utf_16_le,
604 coding_category_utf_16_be_nosig,
605 coding_category_utf_16_le_nosig,
606 coding_category_charset,
607 coding_category_sjis,
608 coding_category_big5,
609 coding_category_ccl,
610 coding_category_emacs_mule,
611 /* All above are targets of code detection. */
612 coding_category_raw_text,
613 coding_category_undecided,
614 coding_category_max
615 };
616
617/* Definitions of flag bits used in detect_coding_XXXX. */
618#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
619#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
620#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
621#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
622#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
623#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
624#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
625#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
626#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
627#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
628#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
629#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
630#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
631#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
632#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
633#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
644 | CATEGORY_MASK_UTF_8 \
645 | CATEGORY_MASK_UTF_16_BE \
646 | CATEGORY_MASK_UTF_16_LE \
647 | CATEGORY_MASK_UTF_16_BE_NOSIG \
648 | CATEGORY_MASK_UTF_16_LE_NOSIG \
649 | CATEGORY_MASK_CHARSET \
650 | CATEGORY_MASK_SJIS \
651 | CATEGORY_MASK_BIG5 \
652 | CATEGORY_MASK_CCL \
653 | CATEGORY_MASK_EMACS_MULE)
654
655
656#define CATEGORY_MASK_ISO_7BIT \
657 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658
659#define CATEGORY_MASK_ISO_8BIT \
660 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661
662#define CATEGORY_MASK_ISO_ELSE \
663 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664
665#define CATEGORY_MASK_ISO_ESCAPE \
666 (CATEGORY_MASK_ISO_7 \
667 | CATEGORY_MASK_ISO_7_TIGHT \
668 | CATEGORY_MASK_ISO_7_ELSE \
669 | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO \
672 ( CATEGORY_MASK_ISO_7BIT \
673 | CATEGORY_MASK_ISO_8BIT \
674 | CATEGORY_MASK_ISO_ELSE)
675
676#define CATEGORY_MASK_UTF_16 \
677 (CATEGORY_MASK_UTF_16_BE \
678 | CATEGORY_MASK_UTF_16_LE \
679 | CATEGORY_MASK_UTF_16_BE_NOSIG \
680 | CATEGORY_MASK_UTF_16_LE_NOSIG)
681
682
683/* List of symbols `coding-category-xxx' ordered by priority. This
684 variable is exposed to Emacs Lisp. */
685static Lisp_Object Vcoding_category_list;
686
687/* Table of coding categories (Lisp symbols). This variable is for
688 internal use oly. */
689static Lisp_Object Vcoding_category_table;
690
691/* Table of coding-categories ordered by priority. */
692static enum coding_category coding_priorities[coding_category_max];
693
694/* Nth element is a coding context for the coding system bound to the
695 Nth coding category. */
696static struct coding_system coding_categories[coding_category_max];
697
698static int detected_mask[coding_category_raw_text] =
699 { CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_ISO,
705 CATEGORY_MASK_UTF_8,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_UTF_16,
711 CATEGORY_MASK_CHARSET,
712 CATEGORY_MASK_SJIS,
713 CATEGORY_MASK_BIG5,
714 CATEGORY_MASK_CCL,
715 CATEGORY_MASK_EMACS_MULE
716 };
717
718/*** Commonly used macros and functions ***/
719
720#ifndef min
721#define min(a, b) ((a) < (b) ? (a) : (b))
722#endif
723#ifndef max
724#define max(a, b) ((a) > (b) ? (a) : (b))
725#endif
4ed46869 726
df7492f9
KH
727#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 do { \
729 attrs = CODING_ID_ATTRS (coding->id); \
730 eol_type = CODING_ID_EOL_TYPE (coding->id); \
731 if (VECTORP (eol_type)) \
732 eol_type = Qunix; \
733 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
734 } while (0)
4ed46869 735
4ed46869 736
df7492f9
KH
737/* Safely get one byte from the source text pointed by SRC which ends
738 at SRC_END, and set C to that byte. If there are not enough bytes
739 in the source, it jumps to `no_more_source'. The caller
740 should declare and set these variables appropriately in advance:
741 src, src_end, multibytep
742*/
aa72b389 743
df7492f9 744#define ONE_MORE_BYTE(c) \
aa72b389 745 do { \
df7492f9
KH
746 if (src == src_end) \
747 { \
748 if (src_base < src) \
749 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
750 goto no_more_source; \
751 } \
752 c = *src++; \
753 if (multibytep && (c & 0x80)) \
754 { \
755 if ((c & 0xFE) != 0xC0) \
756 error ("Undecodable char found"); \
757 c = ((c & 1) << 6) | *src++; \
758 } \
759 consumed_chars++; \
aa72b389
KH
760 } while (0)
761
aa72b389 762
df7492f9
KH
763#define ONE_MORE_BYTE_NO_CHECK(c) \
764 do { \
765 c = *src++; \
766 if (multibytep && (c & 0x80)) \
767 { \
768 if ((c & 0xFE) != 0xC0) \
769 error ("Undecodable char found"); \
770 c = ((c & 1) << 6) | *src++; \
771 } \
781d7a48 772 consumed_chars++; \
aa72b389
KH
773 } while (0)
774
aa72b389 775
df7492f9
KH
776/* Store a byte C in the place pointed by DST and increment DST to the
777 next free point, and increment PRODUCED_CHARS. The caller should
778 assure that C is 0..127, and declare and set the variable `dst'
779 appropriately in advance.
780*/
aa72b389
KH
781
782
df7492f9
KH
783#define EMIT_ONE_ASCII_BYTE(c) \
784 do { \
785 produced_chars++; \
786 *dst++ = (c); \
787 } while (0)
aa72b389 788
aa72b389 789
df7492f9 790/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 791
df7492f9
KH
792#define EMIT_TWO_ASCII_BYTES(c1, c2) \
793 do { \
794 produced_chars += 2; \
795 *dst++ = (c1), *dst++ = (c2); \
796 } while (0)
aa72b389 797
df7492f9
KH
798
799/* Store a byte C in the place pointed by DST and increment DST to the
800 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
801 nonzero, store in an appropriate multibyte from. The caller should
802 declare and set the variables `dst' and `multibytep' appropriately
803 in advance. */
804
805#define EMIT_ONE_BYTE(c) \
806 do { \
807 produced_chars++; \
808 if (multibytep) \
809 { \
810 int ch = (c); \
811 if (ch >= 0x80) \
812 ch = BYTE8_TO_CHAR (ch); \
813 CHAR_STRING_ADVANCE (ch, dst); \
814 } \
815 else \
816 *dst++ = (c); \
aa72b389
KH
817 } while (0)
818
819
df7492f9 820/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 821
e19c3639
KH
822#define EMIT_TWO_BYTES(c1, c2) \
823 do { \
824 produced_chars += 2; \
825 if (multibytep) \
826 { \
827 int ch; \
828 \
829 ch = (c1); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = (c2); \
834 if (ch >= 0x80) \
835 ch = BYTE8_TO_CHAR (ch); \
836 CHAR_STRING_ADVANCE (ch, dst); \
837 } \
838 else \
839 { \
840 *dst++ = (c1); \
841 *dst++ = (c2); \
842 } \
aa72b389
KH
843 } while (0)
844
845
df7492f9
KH
846#define EMIT_THREE_BYTES(c1, c2, c3) \
847 do { \
848 EMIT_ONE_BYTE (c1); \
849 EMIT_TWO_BYTES (c2, c3); \
850 } while (0)
aa72b389 851
aa72b389 852
df7492f9
KH
853#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
854 do { \
855 EMIT_TWO_BYTES (c1, c2); \
856 EMIT_TWO_BYTES (c3, c4); \
857 } while (0)
aa72b389 858
aa72b389 859
df7492f9
KH
860#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
861 do { \
862 charset_map_loaded = 0; \
863 c = DECODE_CHAR (charset, code); \
864 if (charset_map_loaded) \
865 { \
866 unsigned char *orig = coding->source; \
867 EMACS_INT offset; \
868 \
869 coding_set_source (coding); \
870 offset = coding->source - orig; \
871 src += offset; \
872 src_base += offset; \
873 src_end += offset; \
874 } \
875 } while (0)
aa72b389 876
aa72b389 877
df7492f9
KH
878#define ASSURE_DESTINATION(bytes) \
879 do { \
880 if (dst + (bytes) >= dst_end) \
881 { \
882 int more_bytes = charbuf_end - charbuf + (bytes); \
883 \
884 dst = alloc_destination (coding, more_bytes, dst); \
885 dst_end = coding->destination + coding->dst_bytes; \
886 } \
887 } while (0)
b1887814 888
df7492f9
KH
889
890
891static void
892coding_set_source (coding)
893 struct coding_system *coding;
894{
895 if (BUFFERP (coding->src_object))
896 {
897 if (coding->src_pos < 0)
898 coding->source = GAP_END_ADDR + coding->src_pos_byte;
899 else
900 {
e19c3639 901 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
902 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
903 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
904
905 coding->source = beg_addr + coding->src_pos_byte - 1;
906 if (coding->src_pos_byte >= gpt_byte)
907 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
908 }
909 }
df7492f9 910 else if (STRINGP (coding->src_object))
aa72b389 911 {
df7492f9
KH
912 coding->source = (XSTRING (coding->src_object)->data
913 + coding->src_pos_byte);
914 }
915 else
916 /* Otherwise, the source is C string and is never relocated
917 automatically. Thus we don't have to update anything. */
918 ;
919}
920
921static void
922coding_set_destination (coding)
923 struct coding_system *coding;
924{
925 if (BUFFERP (coding->dst_object))
926 {
df7492f9 927 if (coding->src_pos < 0)
28f67a95
KH
928 {
929 coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
930 coding->dst_bytes = (GAP_END_ADDR
931 - (coding->src_bytes - coding->consumed)
932 - coding->destination);
933 }
df7492f9 934 else
28f67a95
KH
935 {
936 /* We are sure that coding->dst_pos_byte is before the gap
937 of the buffer. */
938 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
939 + coding->dst_pos_byte - 1);
940 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
941 - coding->destination);
942 }
df7492f9
KH
943 }
944 else
945 /* Otherwise, the destination is C string and is never relocated
946 automatically. Thus we don't have to update anything. */
947 ;
948}
949
950
951static void
952coding_alloc_by_realloc (coding, bytes)
953 struct coding_system *coding;
954 EMACS_INT bytes;
955{
956 coding->destination = (unsigned char *) xrealloc (coding->destination,
957 coding->dst_bytes + bytes);
958 coding->dst_bytes += bytes;
959}
960
961static void
962coding_alloc_by_making_gap (coding, bytes)
963 struct coding_system *coding;
964 EMACS_INT bytes;
965{
2c78b7e1
KH
966 if (BUFFERP (coding->dst_object)
967 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
968 {
969 EMACS_INT add = coding->src_bytes - coding->consumed;
970
971 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
972 make_gap (bytes);
973 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
974 }
975 else
976 {
2c78b7e1
KH
977 Lisp_Object this_buffer;
978
979 this_buffer = Fcurrent_buffer ();
df7492f9
KH
980 set_buffer_internal (XBUFFER (coding->dst_object));
981 make_gap (bytes);
982 set_buffer_internal (XBUFFER (this_buffer));
983 }
984}
985
986
987static unsigned char *
988alloc_destination (coding, nbytes, dst)
989 struct coding_system *coding;
990 int nbytes;
991 unsigned char *dst;
992{
993 EMACS_INT offset = dst - coding->destination;
994
995 if (BUFFERP (coding->dst_object))
996 coding_alloc_by_making_gap (coding, nbytes);
997 else
998 coding_alloc_by_realloc (coding, nbytes);
999 coding->result = CODING_RESULT_SUCCESS;
1000 coding_set_destination (coding);
1001 dst = coding->destination + offset;
1002 return dst;
1003}
aa72b389 1004
df7492f9
KH
1005\f
1006/*** 2. Emacs' internal format (emacs-utf-8) ***/
1007
1008
1009
1010\f
1011/*** 3. UTF-8 ***/
1012
1013/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1014 Check if a text is encoded in UTF-8. If it is, return
1015 CATEGORY_MASK_UTF_8, else return 0. */
1016
1017#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1018#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1019#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1020#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1021#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1022#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1023
1024static int
1025detect_coding_utf_8 (coding, mask)
1026 struct coding_system *coding;
1027 int *mask;
1028{
1029 unsigned char *src = coding->source, *src_base = src;
1030 unsigned char *src_end = coding->source + coding->src_bytes;
1031 int multibytep = coding->src_multibyte;
1032 int consumed_chars = 0;
1033 int found = 0;
1034
1035 /* A coding system of this category is always ASCII compatible. */
1036 src += coding->head_ascii;
1037
1038 while (1)
1039 {
1040 int c, c1, c2, c3, c4;
1041
1042 ONE_MORE_BYTE (c);
1043 if (UTF_8_1_OCTET_P (c))
1044 continue;
1045 ONE_MORE_BYTE (c1);
1046 if (! UTF_8_EXTRA_OCTET_P (c1))
1047 break;
1048 if (UTF_8_2_OCTET_LEADING_P (c))
1049 {
1050 found++;
1051 continue;
1052 }
1053 ONE_MORE_BYTE (c2);
1054 if (! UTF_8_EXTRA_OCTET_P (c2))
1055 break;
1056 if (UTF_8_3_OCTET_LEADING_P (c))
1057 {
1058 found++;
1059 continue;
1060 }
1061 ONE_MORE_BYTE (c3);
1062 if (! UTF_8_EXTRA_OCTET_P (c3))
1063 break;
1064 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1065 {
df7492f9
KH
1066 found++;
1067 continue;
1068 }
1069 ONE_MORE_BYTE (c4);
1070 if (! UTF_8_EXTRA_OCTET_P (c4))
1071 break;
1072 if (UTF_8_5_OCTET_LEADING_P (c))
1073 {
1074 found++;
1075 continue;
1076 }
1077 break;
1078 }
1079 *mask &= ~CATEGORY_MASK_UTF_8;
1080 return 0;
1081
1082 no_more_source:
1083 if (! found)
1084 return 0;
1085 *mask &= CATEGORY_MASK_UTF_8;
1086 return 1;
1087}
1088
1089
b0edb2c5 1090/* Fixme: deal with surrogates? */
df7492f9
KH
1091static void
1092decode_coding_utf_8 (coding)
1093 struct coding_system *coding;
1094{
1095 unsigned char *src = coding->source + coding->consumed;
1096 unsigned char *src_end = coding->source + coding->src_bytes;
1097 unsigned char *src_base;
1098 int *charbuf = coding->charbuf;
1099 int *charbuf_end = charbuf + coding->charbuf_size;
1100 int consumed_chars = 0, consumed_chars_base;
1101 int multibytep = coding->src_multibyte;
1102 Lisp_Object attr, eol_type, charset_list;
1103
1104 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1105
1106 while (1)
1107 {
1108 int c, c1, c2, c3, c4, c5;
1109
1110 src_base = src;
1111 consumed_chars_base = consumed_chars;
1112
1113 if (charbuf >= charbuf_end)
1114 break;
1115
1116 ONE_MORE_BYTE (c1);
1117 if (UTF_8_1_OCTET_P(c1))
1118 {
1119 c = c1;
1120 if (c == '\r')
aa72b389 1121 {
df7492f9
KH
1122 if (EQ (eol_type, Qdos))
1123 {
1124 if (src == src_end)
1125 goto no_more_source;
1126 if (*src == '\n')
1127 ONE_MORE_BYTE (c);
1128 }
1129 else if (EQ (eol_type, Qmac))
1130 c = '\n';
aa72b389 1131 }
aa72b389 1132 }
df7492f9 1133 else
aa72b389 1134 {
df7492f9
KH
1135 ONE_MORE_BYTE (c2);
1136 if (! UTF_8_EXTRA_OCTET_P (c2))
1137 goto invalid_code;
1138 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1139 {
1140 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1141 /* Reject overlong sequences here and below. Encoders
1142 producing them are incorrect, they can be misleading,
1143 and they mess up read/write invariance. */
1144 if (c < 128)
1145 goto invalid_code;
1146 }
df7492f9 1147 else
aa72b389 1148 {
df7492f9
KH
1149 ONE_MORE_BYTE (c3);
1150 if (! UTF_8_EXTRA_OCTET_P (c3))
1151 goto invalid_code;
1152 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1153 {
1154 c = (((c1 & 0xF) << 12)
1155 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1156 if (c < 0x800)
1157 goto invalid_code;
1158 }
df7492f9
KH
1159 else
1160 {
1161 ONE_MORE_BYTE (c4);
1162 if (! UTF_8_EXTRA_OCTET_P (c4))
1163 goto invalid_code;
1164 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1165 {
df7492f9
KH
1166 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1167 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1168 if (c < 0x10000)
1169 goto invalid_code;
1170 }
df7492f9
KH
1171 else
1172 {
1173 ONE_MORE_BYTE (c5);
1174 if (! UTF_8_EXTRA_OCTET_P (c5))
1175 goto invalid_code;
1176 if (UTF_8_5_OCTET_LEADING_P (c1))
1177 {
1178 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1179 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1180 | (c5 & 0x3F));
b0edb2c5 1181 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1182 goto invalid_code;
1183 }
1184 else
1185 goto invalid_code;
1186 }
1187 }
aa72b389 1188 }
aa72b389 1189 }
df7492f9
KH
1190
1191 *charbuf++ = c;
1192 continue;
1193
1194 invalid_code:
1195 src = src_base;
1196 consumed_chars = consumed_chars_base;
1197 ONE_MORE_BYTE (c);
1198 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1199 coding->errors++;
aa72b389
KH
1200 }
1201
df7492f9
KH
1202 no_more_source:
1203 coding->consumed_char += consumed_chars_base;
1204 coding->consumed = src_base - coding->source;
1205 coding->charbuf_used = charbuf - coding->charbuf;
1206}
1207
1208
1209static int
1210encode_coding_utf_8 (coding)
1211 struct coding_system *coding;
1212{
1213 int multibytep = coding->dst_multibyte;
1214 int *charbuf = coding->charbuf;
1215 int *charbuf_end = charbuf + coding->charbuf_used;
1216 unsigned char *dst = coding->destination + coding->produced;
1217 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1218 int produced_chars = 0;
df7492f9
KH
1219 int c;
1220
1221 if (multibytep)
aa72b389 1222 {
df7492f9
KH
1223 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1224
1225 while (charbuf < charbuf_end)
aa72b389 1226 {
df7492f9
KH
1227 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1228
1229 ASSURE_DESTINATION (safe_room);
1230 c = *charbuf++;
28f67a95
KH
1231 if (CHAR_BYTE8_P (c))
1232 {
1233 c = CHAR_TO_BYTE8 (c);
1234 EMIT_ONE_BYTE (c);
1235 }
1236 else
1237 {
1238 CHAR_STRING_ADVANCE (c, pend);
1239 for (p = str; p < pend; p++)
1240 EMIT_ONE_BYTE (*p);
1241 }
aa72b389 1242 }
aa72b389 1243 }
df7492f9
KH
1244 else
1245 {
1246 int safe_room = MAX_MULTIBYTE_LENGTH;
1247
1248 while (charbuf < charbuf_end)
1249 {
1250 ASSURE_DESTINATION (safe_room);
1251 c = *charbuf++;
1252 dst += CHAR_STRING (c, dst);
1253 produced_chars++;
1254 }
1255 }
1256 coding->result = CODING_RESULT_SUCCESS;
1257 coding->produced_char += produced_chars;
1258 coding->produced = dst - coding->destination;
1259 return 0;
aa72b389
KH
1260}
1261
4ed46869 1262
df7492f9
KH
1263/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1264 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1265 Little Endian (otherwise). If it is, return
1266 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1267 else return 0. */
1268
1269#define UTF_16_HIGH_SURROGATE_P(val) \
1270 (((val) & 0xFC00) == 0xD800)
1271
1272#define UTF_16_LOW_SURROGATE_P(val) \
1273 (((val) & 0xFC00) == 0xDC00)
1274
1275#define UTF_16_INVALID_P(val) \
1276 (((val) == 0xFFFE) \
1277 || ((val) == 0xFFFF) \
1278 || UTF_16_LOW_SURROGATE_P (val))
1279
1280
1281static int
1282detect_coding_utf_16 (coding, mask)
b73bfc1c 1283 struct coding_system *coding;
df7492f9 1284 int *mask;
b73bfc1c 1285{
df7492f9
KH
1286 unsigned char *src = coding->source, *src_base = src;
1287 unsigned char *src_end = coding->source + coding->src_bytes;
1288 int multibytep = coding->src_multibyte;
1289 int consumed_chars = 0;
1290 int c1, c2;
1291
1292 ONE_MORE_BYTE (c1);
1293 ONE_MORE_BYTE (c2);
4ed46869 1294
df7492f9 1295 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1296 {
df7492f9
KH
1297 *mask &= CATEGORY_MASK_UTF_16_LE;
1298 return 1;
1299 }
1300 else if ((c1 == 0xFE) && (c2 == 0xFF))
1301 {
1302 *mask &= CATEGORY_MASK_UTF_16_BE;
1303 return 1;
1304 }
1305 no_more_source:
1306 return 0;
1307}
ec6d2bb8 1308
df7492f9
KH
1309static void
1310decode_coding_utf_16 (coding)
1311 struct coding_system *coding;
1312{
1313 unsigned char *src = coding->source + coding->consumed;
1314 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1315 unsigned char *src_base;
df7492f9
KH
1316 int *charbuf = coding->charbuf;
1317 int *charbuf_end = charbuf + coding->charbuf_size;
1318 int consumed_chars = 0, consumed_chars_base;
1319 int multibytep = coding->src_multibyte;
1320 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1321 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1322 int surrogate = CODING_UTF_16_SURROGATE (coding);
1323 Lisp_Object attr, eol_type, charset_list;
1324
1325 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1326
1327 if (bom != utf_16_without_bom)
1328 {
1329 int c, c1, c2;
4af310db 1330
df7492f9
KH
1331 src_base = src;
1332 ONE_MORE_BYTE (c1);
1333 ONE_MORE_BYTE (c2);
e19c3639 1334 c = (c1 << 8) | c2;
df7492f9
KH
1335 if (bom == utf_16_with_bom)
1336 {
1337 if (endian == utf_16_big_endian
1338 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1339 {
df7492f9
KH
1340 /* We are sure that there's enouph room at CHARBUF. */
1341 *charbuf++ = c1;
1342 *charbuf++ = c2;
1343 coding->errors++;
4af310db 1344 }
4af310db 1345 }
df7492f9 1346 else
4af310db 1347 {
df7492f9
KH
1348 if (c == 0xFFFE)
1349 CODING_UTF_16_ENDIAN (coding)
1350 = endian = utf_16_big_endian;
1351 else if (c == 0xFEFF)
1352 CODING_UTF_16_ENDIAN (coding)
1353 = endian = utf_16_little_endian;
1354 else
4af310db 1355 {
df7492f9
KH
1356 CODING_UTF_16_ENDIAN (coding)
1357 = endian = utf_16_big_endian;
1358 src = src_base;
4af310db 1359 }
4af310db 1360 }
df7492f9
KH
1361 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1362 }
1363
1364 while (1)
1365 {
1366 int c, c1, c2;
1367
1368 src_base = src;
1369 consumed_chars_base = consumed_chars;
1370
1371 if (charbuf + 2 >= charbuf_end)
1372 break;
1373
1374 ONE_MORE_BYTE (c1);
1375 ONE_MORE_BYTE (c2);
1376 c = (endian == utf_16_big_endian
e19c3639 1377 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1378 if (surrogate)
aa72b389 1379 {
df7492f9 1380 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1381 {
df7492f9
KH
1382 if (endian == utf_16_big_endian)
1383 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1384 else
1385 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1386 *charbuf++ = c1;
1387 *charbuf++ = c2;
1388 coding->errors++;
1389 if (UTF_16_HIGH_SURROGATE_P (c))
1390 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1391 else
1392 *charbuf++ = c;
aa72b389 1393 }
df7492f9
KH
1394 else
1395 {
1396 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1397 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1398 *charbuf++ = c;
1399 }
1400 }
1401 else
1402 {
1403 if (UTF_16_HIGH_SURROGATE_P (c))
1404 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1405 else
1406 *charbuf++ = c;
1407 }
1408 }
1409
1410 no_more_source:
1411 coding->consumed_char += consumed_chars_base;
1412 coding->consumed = src_base - coding->source;
1413 coding->charbuf_used = charbuf - coding->charbuf;
1414}
1415
1416static int
1417encode_coding_utf_16 (coding)
1418 struct coding_system *coding;
1419{
1420 int multibytep = coding->dst_multibyte;
1421 int *charbuf = coding->charbuf;
1422 int *charbuf_end = charbuf + coding->charbuf_used;
1423 unsigned char *dst = coding->destination + coding->produced;
1424 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1425 int safe_room = 8;
1426 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1427 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1428 int produced_chars = 0;
1429 Lisp_Object attrs, eol_type, charset_list;
1430 int c;
1431
1432 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1433
1434 if (bom == utf_16_with_bom)
1435 {
1436 ASSURE_DESTINATION (safe_room);
1437 if (big_endian)
1438 EMIT_TWO_BYTES (0xFF, 0xFE);
1439 else
1440 EMIT_TWO_BYTES (0xFE, 0xFF);
1441 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1442 }
1443
1444 while (charbuf < charbuf_end)
1445 {
1446 ASSURE_DESTINATION (safe_room);
1447 c = *charbuf++;
e19c3639
KH
1448 if (c >= MAX_UNICODE_CHAR)
1449 c = coding->default_char;
df7492f9
KH
1450
1451 if (c < 0x10000)
1452 {
1453 if (big_endian)
1454 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1455 else
1456 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1457 }
1458 else
1459 {
1460 int c1, c2;
1461
1462 c -= 0x10000;
1463 c1 = (c >> 10) + 0xD800;
1464 c2 = (c & 0x3FF) + 0xDC00;
1465 if (big_endian)
1466 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1467 else
1468 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1469 }
1470 }
1471 coding->result = CODING_RESULT_SUCCESS;
1472 coding->produced = dst - coding->destination;
1473 coding->produced_char += produced_chars;
1474 return 0;
1475}
1476
1477\f
1478/*** 6. Old Emacs' internal format (emacs-mule) ***/
1479
1480/* Emacs' internal format for representation of multiple character
1481 sets is a kind of multi-byte encoding, i.e. characters are
1482 represented by variable-length sequences of one-byte codes.
1483
1484 ASCII characters and control characters (e.g. `tab', `newline') are
1485 represented by one-byte sequences which are their ASCII codes, in
1486 the range 0x00 through 0x7F.
1487
1488 8-bit characters of the range 0x80..0x9F are represented by
1489 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1490 code + 0x20).
1491
1492 8-bit characters of the range 0xA0..0xFF are represented by
1493 one-byte sequences which are their 8-bit code.
1494
1495 The other characters are represented by a sequence of `base
1496 leading-code', optional `extended leading-code', and one or two
1497 `position-code's. The length of the sequence is determined by the
1498 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1499 whereas extended leading-code and position-code take the range 0xA0
1500 through 0xFF. See `charset.h' for more details about leading-code
1501 and position-code.
1502
1503 --- CODE RANGE of Emacs' internal format ---
1504 character set range
1505 ------------- -----
1506 ascii 0x00..0x7F
1507 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1508 eight-bit-graphic 0xA0..0xBF
1509 ELSE 0x81..0x9D + [0xA0..0xFF]+
1510 ---------------------------------------------
1511
1512 As this is the internal character representation, the format is
1513 usually not used externally (i.e. in a file or in a data sent to a
1514 process). But, it is possible to have a text externally in this
1515 format (i.e. by encoding by the coding system `emacs-mule').
1516
1517 In that case, a sequence of one-byte codes has a slightly different
1518 form.
1519
1520 At first, all characters in eight-bit-control are represented by
1521 one-byte sequences which are their 8-bit code.
1522
1523 Next, character composition data are represented by the byte
1524 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1525 where,
1526 METHOD is 0xF0 plus one of composition method (enum
1527 composition_method),
1528
1529 BYTES is 0xA0 plus a byte length of this composition data,
1530
1531 CHARS is 0x20 plus a number of characters composed by this
1532 data,
1533
1534 COMPONENTs are characters of multibye form or composition
1535 rules encoded by two-byte of ASCII codes.
1536
1537 In addition, for backward compatibility, the following formats are
1538 also recognized as composition data on decoding.
1539
1540 0x80 MSEQ ...
1541 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1542
1543 Here,
1544 MSEQ is a multibyte form but in these special format:
1545 ASCII: 0xA0 ASCII_CODE+0x80,
1546 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1547 RULE is a one byte code of the range 0xA0..0xF0 that
1548 represents a composition rule.
1549 */
1550
1551char emacs_mule_bytes[256];
1552
df7492f9 1553int
781d7a48 1554emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1555 struct coding_system *coding;
781d7a48 1556 unsigned char *src;
df7492f9
KH
1557 int *nbytes, *nchars;
1558{
df7492f9
KH
1559 unsigned char *src_end = coding->source + coding->src_bytes;
1560 int multibytep = coding->src_multibyte;
1561 unsigned char *src_base = src;
1562 struct charset *charset;
1563 unsigned code;
1564 int c;
1565 int consumed_chars = 0;
1566
1567 ONE_MORE_BYTE (c);
df7492f9
KH
1568 switch (emacs_mule_bytes[c])
1569 {
1570 case 2:
1571 if (! (charset = emacs_mule_charset[c]))
1572 goto invalid_code;
1573 ONE_MORE_BYTE (c);
1574 code = c & 0x7F;
1575 break;
1576
1577 case 3:
7c78e542
KH
1578 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1579 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
b73bfc1c 1580 {
df7492f9
KH
1581 ONE_MORE_BYTE (c);
1582 if (! (charset = emacs_mule_charset[c]))
1583 goto invalid_code;
1584 ONE_MORE_BYTE (c);
1585 code = c & 0x7F;
b73bfc1c
KH
1586 }
1587 else
1588 {
df7492f9
KH
1589 if (! (charset = emacs_mule_charset[c]))
1590 goto invalid_code;
1591 ONE_MORE_BYTE (c);
781d7a48 1592 code = (c & 0x7F) << 8;
df7492f9
KH
1593 ONE_MORE_BYTE (c);
1594 code |= c & 0x7F;
1595 }
1596 break;
1597
1598 case 4:
781d7a48 1599 ONE_MORE_BYTE (c);
df7492f9
KH
1600 if (! (charset = emacs_mule_charset[c]))
1601 goto invalid_code;
1602 ONE_MORE_BYTE (c);
781d7a48 1603 code = (c & 0x7F) << 8;
df7492f9
KH
1604 ONE_MORE_BYTE (c);
1605 code |= c & 0x7F;
1606 break;
1607
1608 case 1:
1609 code = c;
9d123124
KH
1610 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1611 ? charset_ascii : charset_eight_bit);
df7492f9
KH
1612 break;
1613
1614 default:
1615 abort ();
1616 }
1617 c = DECODE_CHAR (charset, code);
1618 if (c < 0)
1619 goto invalid_code;
1620 *nbytes = src - src_base;
1621 *nchars = consumed_chars;
1622 return c;
1623
1624 no_more_source:
1625 return -2;
1626
1627 invalid_code:
1628 return -1;
1629}
1630
1631
1632/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1633 Check if a text is encoded in `emacs-mule'. */
1634
1635static int
1636detect_coding_emacs_mule (coding, mask)
1637 struct coding_system *coding;
1638 int *mask;
1639{
1640 unsigned char *src = coding->source, *src_base = src;
1641 unsigned char *src_end = coding->source + coding->src_bytes;
1642 int multibytep = coding->src_multibyte;
1643 int consumed_chars = 0;
1644 int c;
1645 int found = 0;
1646
1647 /* A coding system of this category is always ASCII compatible. */
1648 src += coding->head_ascii;
1649
1650 while (1)
1651 {
1652 ONE_MORE_BYTE (c);
1653
1654 if (c == 0x80)
1655 {
1656 /* Perhaps the start of composite character. We simple skip
1657 it because analyzing it is too heavy for detecting. But,
1658 at least, we check that the composite character
1659 constitues of more than 4 bytes. */
1660 unsigned char *src_base;
1661
1662 repeat:
1663 src_base = src;
1664 do
1665 {
1666 ONE_MORE_BYTE (c);
1667 }
1668 while (c >= 0xA0);
1669
1670 if (src - src_base <= 4)
1671 break;
1672 found = 1;
1673 if (c == 0x80)
1674 goto repeat;
b73bfc1c 1675 }
df7492f9
KH
1676
1677 if (c < 0x80)
b73bfc1c 1678 {
df7492f9
KH
1679 if (c < 0x20
1680 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1681 break;
1682 }
1683 else
1684 {
1685 unsigned char *src_base = src - 1;
1686
1687 do
1688 {
1689 ONE_MORE_BYTE (c);
1690 }
1691 while (c >= 0xA0);
1692 if (src - src_base != emacs_mule_bytes[*src_base])
1693 break;
1694 found = 1;
4ed46869
KH
1695 }
1696 }
df7492f9
KH
1697 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1698 return 0;
1699
1700 no_more_source:
1701 if (!found)
1702 return 0;
1703 *mask &= CATEGORY_MASK_EMACS_MULE;
1704 return 1;
4ed46869
KH
1705}
1706
b73bfc1c 1707
df7492f9
KH
1708/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1709
1710/* Decode a character represented as a component of composition
1711 sequence of Emacs 20/21 style at SRC. Set C to that character and
1712 update SRC to the head of next character (or an encoded composition
1713 rule). If SRC doesn't points a composition component, set C to -1.
1714 If SRC points an invalid byte sequence, global exit by a return
1715 value 0. */
1716
1717#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1718 if (1) \
1719 { \
1720 int c; \
1721 int nbytes, nchars; \
1722 \
1723 if (src == src_end) \
1724 break; \
781d7a48 1725 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1726 if (c < 0) \
1727 { \
1728 if (c == -2) \
1729 break; \
1730 goto invalid_code; \
1731 } \
1732 *buf++ = c; \
1733 src += nbytes; \
1734 consumed_chars += nchars; \
1735 } \
1736 else
1737
1738
1739/* Decode a composition rule represented as a component of composition
781d7a48
KH
1740 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1741 and increment BUF. If SRC points an invalid byte sequence, set C
1742 to -1. */
df7492f9 1743
781d7a48 1744#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1745 do { \
1746 int c, gref, nref; \
1747 \
781d7a48 1748 if (src >= src_end) \
df7492f9
KH
1749 goto invalid_code; \
1750 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1751 c -= 0x20; \
df7492f9
KH
1752 if (c < 0 || c >= 81) \
1753 goto invalid_code; \
1754 \
1755 gref = c / 9, nref = c % 9; \
1756 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1757 } while (0)
1758
1759
781d7a48
KH
1760/* Decode a composition rule represented as a component of composition
1761 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1762 and increment BUF. If SRC points an invalid byte sequence, set C
1763 to -1. */
1764
1765#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1766 do { \
1767 int gref, nref; \
1768 \
1769 if (src + 1>= src_end) \
1770 goto invalid_code; \
1771 ONE_MORE_BYTE_NO_CHECK (gref); \
1772 gref -= 0x20; \
1773 ONE_MORE_BYTE_NO_CHECK (nref); \
1774 nref -= 0x20; \
1775 if (gref < 0 || gref >= 81 \
1776 || nref < 0 || nref >= 81) \
1777 goto invalid_code; \
1778 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1779 } while (0)
1780
1781
df7492f9
KH
1782#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1783 do { \
1784 *buf++ = -5; \
1785 *buf++ = coding->produced_char + char_offset; \
1786 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1787 *buf++ = method; \
1788 *buf++ = nchars; \
1789 } while (0)
aa72b389 1790
df7492f9
KH
1791
1792#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1793 do { \
df7492f9 1794 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1795 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1796 the byte length of this composition information, CHARS is the \
1797 number of characters composed by this composition. */ \
781d7a48
KH
1798 enum composition_method method = c - 0xF2; \
1799 int *charbuf_base = charbuf; \
df7492f9
KH
1800 int consumed_chars_limit; \
1801 int nbytes, nchars; \
1802 \
1803 ONE_MORE_BYTE (c); \
1804 nbytes = c - 0xA0; \
1805 if (nbytes < 3) \
1806 goto invalid_code; \
1807 ONE_MORE_BYTE (c); \
1808 nchars = c - 0xA0; \
1809 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1810 consumed_chars_limit = consumed_chars_base + nbytes; \
1811 if (method != COMPOSITION_RELATIVE) \
aa72b389 1812 { \
df7492f9
KH
1813 int i = 0; \
1814 while (consumed_chars < consumed_chars_limit) \
aa72b389 1815 { \
df7492f9 1816 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1817 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1818 else \
1819 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1820 i++; \
aa72b389 1821 } \
df7492f9
KH
1822 if (consumed_chars < consumed_chars_limit) \
1823 goto invalid_code; \
781d7a48 1824 charbuf_base[0] -= i; \
aa72b389
KH
1825 } \
1826 } while (0)
93dec019 1827
aa72b389 1828
df7492f9
KH
1829#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1830 do { \
1831 /* Emacs 20 style format for relative composition. */ \
1832 /* Store multibyte form of characters to be composed. */ \
1833 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1834 int *buf = components; \
1835 int i, j; \
1836 \
1837 src = src_base; \
1838 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1839 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1840 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1841 if (i < 2) \
1842 goto invalid_code; \
1843 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1844 for (j = 0; j < i; j++) \
1845 *charbuf++ = components[j]; \
1846 } while (0)
1847
1848
1849#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1850 do { \
1851 /* Emacs 20 style format for rule-base composition. */ \
1852 /* Store multibyte form of characters to be composed. */ \
1853 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1854 int *buf = components; \
1855 int i, j; \
1856 \
1857 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1858 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1859 { \
781d7a48 1860 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1861 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1862 } \
1863 if (i < 1 || (buf - components) % 2 == 0) \
1864 goto invalid_code; \
1865 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1866 goto no_more_source; \
1867 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1868 for (j = 0; j < i; j++) \
1869 *charbuf++ = components[j]; \
1870 for (j = 0; j < i; j += 2) \
1871 *charbuf++ = components[j]; \
1872 } while (0)
1873
aa72b389
KH
1874
1875static void
df7492f9 1876decode_coding_emacs_mule (coding)
aa72b389 1877 struct coding_system *coding;
aa72b389 1878{
df7492f9
KH
1879 unsigned char *src = coding->source + coding->consumed;
1880 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1881 unsigned char *src_base;
df7492f9
KH
1882 int *charbuf = coding->charbuf;
1883 int *charbuf_end = charbuf + coding->charbuf_size;
1884 int consumed_chars = 0, consumed_chars_base;
1885 int char_offset = 0;
1886 int multibytep = coding->src_multibyte;
1887 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1888
df7492f9 1889 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1890
aa72b389
KH
1891 while (1)
1892 {
df7492f9
KH
1893 int c;
1894
aa72b389 1895 src_base = src;
df7492f9
KH
1896 consumed_chars_base = consumed_chars;
1897
1898 if (charbuf >= charbuf_end)
1899 break;
aa72b389 1900
df7492f9
KH
1901 ONE_MORE_BYTE (c);
1902
1903 if (c < 0x80)
aa72b389 1904 {
df7492f9
KH
1905 if (c == '\r')
1906 {
1907 if (EQ (eol_type, Qdos))
1908 {
1909 if (src == src_end)
1910 goto no_more_source;
1911 if (*src == '\n')
1912 ONE_MORE_BYTE (c);
1913 }
1914 else if (EQ (eol_type, Qmac))
1915 c = '\n';
1916 }
1917 *charbuf++ = c;
1918 char_offset++;
aa72b389 1919 }
df7492f9
KH
1920 else if (c == 0x80)
1921 {
1922 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1923 break;
1924 ONE_MORE_BYTE (c);
781d7a48
KH
1925 if (c - 0xF2 >= COMPOSITION_RELATIVE
1926 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1927 DECODE_EMACS_MULE_21_COMPOSITION (c);
1928 else if (c < 0xC0)
1929 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1930 else if (c == 0xFF)
1931 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1932 else
1933 goto invalid_code;
781d7a48 1934 coding->annotated = 1;
df7492f9
KH
1935 }
1936 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1937 {
1938 int nbytes, nchars;
781d7a48
KH
1939 src = src_base;
1940 consumed_chars = consumed_chars_base;
1941 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1942 if (c < 0)
1943 {
1944 if (c == -2)
1945 break;
1946 goto invalid_code;
1947 }
1948 *charbuf++ = c;
781d7a48
KH
1949 src += nbytes;
1950 consumed_chars += nchars;
df7492f9
KH
1951 char_offset++;
1952 }
1953 continue;
1954
1955 invalid_code:
1956 src = src_base;
1957 consumed_chars = consumed_chars_base;
1958 ONE_MORE_BYTE (c);
1959 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1960 coding->errors++;
1961 }
1962
1963 no_more_source:
1964 coding->consumed_char += consumed_chars_base;
1965 coding->consumed = src_base - coding->source;
1966 coding->charbuf_used = charbuf - coding->charbuf;
1967}
1968
1969
1970#define EMACS_MULE_LEADING_CODES(id, codes) \
1971 do { \
1972 if (id < 0xA0) \
1973 codes[0] = id, codes[1] = 0; \
1974 else if (id < 0xE0) \
1975 codes[0] = 0x9A, codes[1] = id; \
1976 else if (id < 0xF0) \
1977 codes[0] = 0x9B, codes[1] = id; \
1978 else if (id < 0xF5) \
1979 codes[0] = 0x9C, codes[1] = id; \
1980 else \
1981 codes[0] = 0x9D, codes[1] = id; \
1982 } while (0);
1983
aa72b389 1984
df7492f9
KH
1985static int
1986encode_coding_emacs_mule (coding)
1987 struct coding_system *coding;
1988{
1989 int multibytep = coding->dst_multibyte;
1990 int *charbuf = coding->charbuf;
1991 int *charbuf_end = charbuf + coding->charbuf_used;
1992 unsigned char *dst = coding->destination + coding->produced;
1993 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1994 int safe_room = 8;
df7492f9
KH
1995 int produced_chars = 0;
1996 Lisp_Object attrs, eol_type, charset_list;
1997 int c;
1998
1999 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2000
2001 while (charbuf < charbuf_end)
2002 {
2003 ASSURE_DESTINATION (safe_room);
2004 c = *charbuf++;
2005 if (ASCII_CHAR_P (c))
2006 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2007 else if (CHAR_BYTE8_P (c))
2008 {
2009 c = CHAR_TO_BYTE8 (c);
2010 EMIT_ONE_BYTE (c);
2011 }
df7492f9 2012 else
aa72b389 2013 {
df7492f9
KH
2014 struct charset *charset;
2015 unsigned code;
2016 int dimension;
2017 int emacs_mule_id;
2018 unsigned char leading_codes[2];
2019
2020 charset = char_charset (c, charset_list, &code);
2021 if (! charset)
2022 {
2023 c = coding->default_char;
2024 if (ASCII_CHAR_P (c))
2025 {
2026 EMIT_ONE_ASCII_BYTE (c);
2027 continue;
2028 }
2029 charset = char_charset (c, charset_list, &code);
2030 }
2031 dimension = CHARSET_DIMENSION (charset);
2032 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2033 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2034 EMIT_ONE_BYTE (leading_codes[0]);
2035 if (leading_codes[1])
2036 EMIT_ONE_BYTE (leading_codes[1]);
2037 if (dimension == 1)
2038 EMIT_ONE_BYTE (code);
aa72b389 2039 else
df7492f9
KH
2040 {
2041 EMIT_ONE_BYTE (code >> 8);
2042 EMIT_ONE_BYTE (code & 0xFF);
2043 }
aa72b389 2044 }
aa72b389 2045 }
df7492f9
KH
2046 coding->result = CODING_RESULT_SUCCESS;
2047 coding->produced_char += produced_chars;
2048 coding->produced = dst - coding->destination;
2049 return 0;
aa72b389 2050}
b73bfc1c 2051
4ed46869 2052\f
df7492f9 2053/*** 7. ISO2022 handlers ***/
4ed46869
KH
2054
2055/* The following note describes the coding system ISO2022 briefly.
39787efd 2056 Since the intention of this note is to help understand the
5a936b46 2057 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2058 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2059 original document of ISO2022. This is equivalent to the standard
2060 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2061
2062 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2063 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2064 is encoded using bytes less than 128. This may make the encoded
2065 text a little bit longer, but the text passes more easily through
5a936b46
DL
2066 several types of gateway, some of which strip off the MSB (Most
2067 Significant Bit).
b73bfc1c 2068
5a936b46
DL
2069 There are two kinds of character sets: control character sets and
2070 graphic character sets. The former contain control characters such
4ed46869 2071 as `newline' and `escape' to provide control functions (control
39787efd 2072 functions are also provided by escape sequences). The latter
5a936b46 2073 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2074 two control character sets and many graphic character sets.
2075
2076 Graphic character sets are classified into one of the following
39787efd
KH
2077 four classes, according to the number of bytes (DIMENSION) and
2078 number of characters in one dimension (CHARS) of the set:
2079 - DIMENSION1_CHARS94
2080 - DIMENSION1_CHARS96
2081 - DIMENSION2_CHARS94
2082 - DIMENSION2_CHARS96
2083
2084 In addition, each character set is assigned an identification tag,
5a936b46 2085 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2086 hereafter). The <F> of each character set is decided by ECMA(*)
2087 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2088 (0x30..0x3F are for private use only).
4ed46869
KH
2089
2090 Note (*): ECMA = European Computer Manufacturers Association
2091
5a936b46 2092 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2093 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2094 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2095 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2096 o DIMENSION2_CHARS96 -- none for the moment
2097
39787efd 2098 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2099 C0 [0x00..0x1F] -- control character plane 0
2100 GL [0x20..0x7F] -- graphic character plane 0
2101 C1 [0x80..0x9F] -- control character plane 1
2102 GR [0xA0..0xFF] -- graphic character plane 1
2103
2104 A control character set is directly designated and invoked to C0 or
39787efd
KH
2105 C1 by an escape sequence. The most common case is that:
2106 - ISO646's control character set is designated/invoked to C0, and
2107 - ISO6429's control character set is designated/invoked to C1,
2108 and usually these designations/invocations are omitted in encoded
2109 text. In a 7-bit environment, only C0 can be used, and a control
2110 character for C1 is encoded by an appropriate escape sequence to
2111 fit into the environment. All control characters for C1 are
2112 defined to have corresponding escape sequences.
4ed46869
KH
2113
2114 A graphic character set is at first designated to one of four
2115 graphic registers (G0 through G3), then these graphic registers are
2116 invoked to GL or GR. These designations and invocations can be
2117 done independently. The most common case is that G0 is invoked to
39787efd
KH
2118 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2119 these invocations and designations are omitted in encoded text.
2120 In a 7-bit environment, only GL can be used.
4ed46869 2121
39787efd
KH
2122 When a graphic character set of CHARS94 is invoked to GL, codes
2123 0x20 and 0x7F of the GL area work as control characters SPACE and
2124 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2125 be used.
4ed46869
KH
2126
2127 There are two ways of invocation: locking-shift and single-shift.
2128 With locking-shift, the invocation lasts until the next different
39787efd
KH
2129 invocation, whereas with single-shift, the invocation affects the
2130 following character only and doesn't affect the locking-shift
2131 state. Invocations are done by the following control characters or
2132 escape sequences:
4ed46869
KH
2133
2134 ----------------------------------------------------------------------
39787efd 2135 abbrev function cntrl escape seq description
4ed46869 2136 ----------------------------------------------------------------------
39787efd
KH
2137 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2138 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2139 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2140 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2141 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2142 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2143 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2144 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2145 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2146 ----------------------------------------------------------------------
39787efd
KH
2147 (*) These are not used by any known coding system.
2148
2149 Control characters for these functions are defined by macros
2150 ISO_CODE_XXX in `coding.h'.
4ed46869 2151
39787efd 2152 Designations are done by the following escape sequences:
4ed46869
KH
2153 ----------------------------------------------------------------------
2154 escape sequence description
2155 ----------------------------------------------------------------------
2156 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2157 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2158 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2159 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2160 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2161 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2162 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2163 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2164 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2165 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2166 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2167 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2168 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2169 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2170 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2171 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2172 ----------------------------------------------------------------------
2173
2174 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2175 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2176
2177 Note (*): Although these designations are not allowed in ISO2022,
2178 Emacs accepts them on decoding, and produces them on encoding
39787efd 2179 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2180 7-bit environment, non-locking-shift, and non-single-shift.
2181
2182 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2183 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2184
5a936b46 2185 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2186 same multilingual text in ISO2022. Actually, there exist many
2187 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2188 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2189 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2190 localized platforms), and all of these are variants of ISO2022.
2191
2192 In addition to the above, Emacs handles two more kinds of escape
2193 sequences: ISO6429's direction specification and Emacs' private
2194 sequence for specifying character composition.
2195
39787efd 2196 ISO6429's direction specification takes the following form:
4ed46869
KH
2197 o CSI ']' -- end of the current direction
2198 o CSI '0' ']' -- end of the current direction
2199 o CSI '1' ']' -- start of left-to-right text
2200 o CSI '2' ']' -- start of right-to-left text
2201 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2202 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2203
2204 Character composition specification takes the following form:
ec6d2bb8
KH
2205 o ESC '0' -- start relative composition
2206 o ESC '1' -- end composition
2207 o ESC '2' -- start rule-base composition (*)
2208 o ESC '3' -- start relative composition with alternate chars (**)
2209 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2210 Since these are not standard escape sequences of any ISO standard,
5a936b46 2211 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2212
5a936b46
DL
2213 (*) This form is used only in Emacs 20.7 and older versions,
2214 but newer versions can safely decode it.
2215 (**) This form is used only in Emacs 21.1 and newer versions,
2216 and older versions can't decode it.
ec6d2bb8 2217
5a936b46 2218 Here's a list of example usages of these composition escape
b73bfc1c 2219 sequences (categorized by `enum composition_method').
ec6d2bb8 2220
b73bfc1c 2221 COMPOSITION_RELATIVE:
ec6d2bb8 2222 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2223 COMPOSITION_WITH_RULE:
ec6d2bb8 2224 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2225 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2226 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2227 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2228 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2229
2230enum iso_code_class_type iso_code_class[256];
2231
df7492f9
KH
2232#define SAFE_CHARSET_P(coding, id) \
2233 ((id) <= (coding)->max_charset_id \
2234 && (coding)->safe_charsets[id] >= 0)
2235
2236
2237#define SHIFT_OUT_OK(category) \
2238 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2239
2240static void
f0064e1f
DL
2241setup_iso_safe_charsets (attrs)
2242 Lisp_Object attrs;
df7492f9
KH
2243{
2244 Lisp_Object charset_list, safe_charsets;
2245 Lisp_Object request;
2246 Lisp_Object reg_usage;
2247 Lisp_Object tail;
2248 int reg94, reg96;
2249 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2250 int max_charset_id;
2251
2252 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2253 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2254 && ! EQ (charset_list, Viso_2022_charset_list))
2255 {
2256 CODING_ATTR_CHARSET_LIST (attrs)
2257 = charset_list = Viso_2022_charset_list;
2258 ASET (attrs, coding_attr_safe_charsets, Qnil);
2259 }
2260
2261 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2262 return;
2263
2264 max_charset_id = 0;
2265 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2266 {
2267 int id = XINT (XCAR (tail));
2268 if (max_charset_id < id)
2269 max_charset_id = id;
2270 }
2271
2272 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2273 make_number (255));
2274 request = AREF (attrs, coding_attr_iso_request);
2275 reg_usage = AREF (attrs, coding_attr_iso_usage);
2276 reg94 = XINT (XCAR (reg_usage));
2277 reg96 = XINT (XCDR (reg_usage));
2278
2279 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2280 {
2281 Lisp_Object id;
2282 Lisp_Object reg;
2283 struct charset *charset;
2284
2285 id = XCAR (tail);
2286 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2287 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2288 if (! NILP (reg))
2289 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2290 else if (charset->iso_chars_96)
2291 {
2292 if (reg96 < 4)
2293 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2294 }
2295 else
2296 {
2297 if (reg94 < 4)
2298 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2299 }
2300 }
2301 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2302}
d46c5b12 2303
d46c5b12 2304
4ed46869 2305/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2306 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2307 integer in which appropriate flag bits any of:
df7492f9
KH
2308 CATEGORY_MASK_ISO_7
2309 CATEGORY_MASK_ISO_7_TIGHT
2310 CATEGORY_MASK_ISO_8_1
2311 CATEGORY_MASK_ISO_8_2
2312 CATEGORY_MASK_ISO_7_ELSE
2313 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2314 are set. If a code which should never appear in ISO2022 is found,
2315 returns 0. */
2316
0a28aafb 2317static int
df7492f9
KH
2318detect_coding_iso_2022 (coding, mask)
2319 struct coding_system *coding;
2320 int *mask;
4ed46869 2321{
df7492f9
KH
2322 unsigned char *src = coding->source, *src_base = src;
2323 unsigned char *src_end = coding->source + coding->src_bytes;
2324 int multibytep = coding->src_multibyte;
2325 int mask_iso = CATEGORY_MASK_ISO;
2326 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2327 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2328 int id;
2329 int c, c1;
2330 int consumed_chars = 0;
2331 int i;
2332
2333 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2334 {
2335 struct coding_system *this = &(coding_categories[i]);
2336 Lisp_Object attrs, val;
2337
2338 attrs = CODING_ID_ATTRS (this->id);
2339 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2340 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2341 setup_iso_safe_charsets (attrs);
2342 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2343 this->max_charset_id = XSTRING (val)->size - 1;
2344 this->safe_charsets = (char *) XSTRING (val)->data;
2345 }
2346
2347 /* A coding system of this category is always ASCII compatible. */
2348 src += coding->head_ascii;
3f003981 2349
df7492f9
KH
2350 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2351 while (mask_iso && src < src_end)
4ed46869 2352 {
df7492f9 2353 ONE_MORE_BYTE (c);
4ed46869
KH
2354 switch (c)
2355 {
2356 case ISO_CODE_ESC:
74383408
KH
2357 if (inhibit_iso_escape_detection)
2358 break;
f46869e4 2359 single_shifting = 0;
df7492f9 2360 ONE_MORE_BYTE (c);
d46c5b12 2361 if (c >= '(' && c <= '/')
4ed46869 2362 {
bf9cdd4e 2363 /* Designation sequence for a charset of dimension 1. */
df7492f9 2364 ONE_MORE_BYTE (c1);
d46c5b12 2365 if (c1 < ' ' || c1 >= 0x80
df7492f9 2366 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2367 /* Invalid designation sequence. Just ignore. */
2368 break;
df7492f9 2369 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2370 }
2371 else if (c == '$')
2372 {
2373 /* Designation sequence for a charset of dimension 2. */
df7492f9 2374 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2375 if (c >= '@' && c <= 'B')
2376 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2377 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2378 else if (c >= '(' && c <= '/')
bcf26d6a 2379 {
df7492f9 2380 ONE_MORE_BYTE (c1);
d46c5b12 2381 if (c1 < ' ' || c1 >= 0x80
df7492f9 2382 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2383 /* Invalid designation sequence. Just ignore. */
2384 break;
df7492f9 2385 reg[(c - '(') % 4] = id;
bcf26d6a 2386 }
bf9cdd4e 2387 else
d46c5b12
KH
2388 /* Invalid designation sequence. Just ignore. */
2389 break;
2390 }
ae9ff118 2391 else if (c == 'N' || c == 'O')
d46c5b12 2392 {
ae9ff118 2393 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2394 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2395 break;
4ed46869 2396 }
ec6d2bb8
KH
2397 else if (c >= '0' && c <= '4')
2398 {
2399 /* ESC <Fp> for start/end composition. */
df7492f9 2400 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2401 break;
2402 }
bf9cdd4e 2403 else
df7492f9
KH
2404 {
2405 /* Invalid escape sequence. */
2406 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2407 break;
2408 }
d46c5b12
KH
2409
2410 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2411 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2412 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2413 id))
2414 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2415 else
df7492f9
KH
2416 mask_iso &= ~CATEGORY_MASK_ISO_7;
2417 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2418 id))
2419 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2420 else
df7492f9
KH
2421 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2422 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2423 id))
2424 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2425 else
df7492f9
KH
2426 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2427 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2428 id))
2429 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2430 else
df7492f9 2431 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2432 break;
2433
4ed46869 2434 case ISO_CODE_SO:
74383408
KH
2435 if (inhibit_iso_escape_detection)
2436 break;
f46869e4 2437 single_shifting = 0;
d46c5b12
KH
2438 if (shift_out == 0
2439 && (reg[1] >= 0
df7492f9
KH
2440 || SHIFT_OUT_OK (coding_category_iso_7_else)
2441 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2442 {
2443 /* Locking shift out. */
df7492f9
KH
2444 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2445 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2446 }
e0e989f6 2447 break;
df7492f9 2448
d46c5b12 2449 case ISO_CODE_SI:
74383408
KH
2450 if (inhibit_iso_escape_detection)
2451 break;
f46869e4 2452 single_shifting = 0;
d46c5b12
KH
2453 if (shift_out == 1)
2454 {
2455 /* Locking shift in. */
df7492f9
KH
2456 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2457 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2458 }
2459 break;
2460
4ed46869 2461 case ISO_CODE_CSI:
f46869e4 2462 single_shifting = 0;
4ed46869
KH
2463 case ISO_CODE_SS2:
2464 case ISO_CODE_SS3:
3f003981 2465 {
df7492f9 2466 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2467
74383408
KH
2468 if (inhibit_iso_escape_detection)
2469 break;
70c22245
KH
2470 if (c != ISO_CODE_CSI)
2471 {
df7492f9
KH
2472 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2473 & CODING_ISO_FLAG_SINGLE_SHIFT)
2474 newmask |= CATEGORY_MASK_ISO_8_1;
2475 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2476 & CODING_ISO_FLAG_SINGLE_SHIFT)
2477 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2478 single_shifting = 1;
70c22245 2479 }
3f003981
KH
2480 if (VECTORP (Vlatin_extra_code_table)
2481 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2482 {
df7492f9
KH
2483 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2484 & CODING_ISO_FLAG_LATIN_EXTRA)
2485 newmask |= CATEGORY_MASK_ISO_8_1;
2486 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2487 & CODING_ISO_FLAG_LATIN_EXTRA)
2488 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2489 }
df7492f9 2490 mask_iso &= newmask;
d46c5b12 2491 mask_found |= newmask;
3f003981
KH
2492 }
2493 break;
4ed46869
KH
2494
2495 default:
2496 if (c < 0x80)
f46869e4
KH
2497 {
2498 single_shifting = 0;
2499 break;
2500 }
4ed46869 2501 else if (c < 0xA0)
c4825358 2502 {
f46869e4 2503 single_shifting = 0;
df7492f9 2504 mask_8bit_found = 1;
3f003981
KH
2505 if (VECTORP (Vlatin_extra_code_table)
2506 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2507 {
3f003981
KH
2508 int newmask = 0;
2509
df7492f9
KH
2510 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2511 & CODING_ISO_FLAG_LATIN_EXTRA)
2512 newmask |= CATEGORY_MASK_ISO_8_1;
2513 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2514 & CODING_ISO_FLAG_LATIN_EXTRA)
2515 newmask |= CATEGORY_MASK_ISO_8_2;
2516 mask_iso &= newmask;
d46c5b12 2517 mask_found |= newmask;
c4825358 2518 }
3f003981
KH
2519 else
2520 return 0;
c4825358 2521 }
4ed46869
KH
2522 else
2523 {
df7492f9
KH
2524 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2525 | CATEGORY_MASK_ISO_7_ELSE);
2526 mask_found |= CATEGORY_MASK_ISO_8_1;
2527 mask_8bit_found = 1;
f46869e4
KH
2528 /* Check the length of succeeding codes of the range
2529 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2530 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2531 when we are not single shifting. */
b73bfc1c 2532 if (!single_shifting
df7492f9 2533 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2534 {
e17de821 2535 int i = 1;
b73bfc1c
KH
2536 while (src < src_end)
2537 {
df7492f9 2538 ONE_MORE_BYTE (c);
b73bfc1c
KH
2539 if (c < 0xA0)
2540 break;
2541 i++;
2542 }
2543
2544 if (i & 1 && src < src_end)
df7492f9 2545 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2546 else
df7492f9 2547 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2548 }
4ed46869
KH
2549 }
2550 break;
2551 }
2552 }
df7492f9
KH
2553 no_more_source:
2554 if (!mask_iso)
2555 {
2556 *mask &= ~CATEGORY_MASK_ISO;
2557 return 0;
2558 }
2559 if (!mask_found)
2560 return 0;
2561 *mask &= mask_iso & mask_found;
2562 if (! mask_8bit_found)
2563 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2564 return 1;
4ed46869
KH
2565}
2566
4ed46869
KH
2567
2568/* Set designation state into CODING. */
df7492f9
KH
2569#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2570 do { \
2571 int id, prev; \
2572 \
2573 if (final < '0' || final >= 128 \
2574 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2575 || !SAFE_CHARSET_P (coding, id)) \
2576 { \
2577 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2578 goto invalid_code; \
2579 } \
2580 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2581 if (id == charset_jisx0201_roman) \
2582 { \
2583 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2584 id = charset_ascii; \
2585 } \
2586 else if (id == charset_jisx0208_1978) \
2587 { \
2588 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2589 id = charset_jisx0208; \
2590 } \
df7492f9
KH
2591 CODING_ISO_DESIGNATION (coding, reg) = id; \
2592 /* If there was an invalid designation to REG previously, and this \
2593 designation is ASCII to REG, we should keep this designation \
2594 sequence. */ \
2595 if (prev == -2 && id == charset_ascii) \
2596 goto invalid_code; \
4ed46869
KH
2597 } while (0)
2598
d46c5b12 2599
df7492f9
KH
2600#define MAYBE_FINISH_COMPOSITION() \
2601 do { \
2602 int i; \
2603 if (composition_state == COMPOSING_NO) \
2604 break; \
2605 /* It is assured that we have enough room for producing \
2606 characters stored in the table `components'. */ \
2607 if (charbuf + component_idx > charbuf_end) \
2608 goto no_more_source; \
2609 composition_state = COMPOSING_NO; \
2610 if (method == COMPOSITION_RELATIVE \
2611 || method == COMPOSITION_WITH_ALTCHARS) \
2612 { \
2613 for (i = 0; i < component_idx; i++) \
2614 *charbuf++ = components[i]; \
2615 char_offset += component_idx; \
2616 } \
2617 else \
2618 { \
2619 for (i = 0; i < component_idx; i += 2) \
2620 *charbuf++ = components[i]; \
2621 char_offset += (component_idx / 2) + 1; \
2622 } \
2623 } while (0)
2624
d46c5b12 2625
aa72b389
KH
2626/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2627 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2628 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2629 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2630 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2631 */
ec6d2bb8 2632
df7492f9
KH
2633#define DECODE_COMPOSITION_START(c1) \
2634 do { \
2635 if (c1 == '0' \
781d7a48 2636 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2637 { \
2638 component_len = component_idx; \
2639 composition_state = COMPOSING_CHAR; \
2640 } \
2641 else \
2642 { \
2643 unsigned char *p; \
2644 \
2645 MAYBE_FINISH_COMPOSITION (); \
2646 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2647 goto no_more_source; \
2648 for (p = src; p < src_end - 1; p++) \
2649 if (*p == ISO_CODE_ESC && p[1] == '1') \
2650 break; \
2651 if (p == src_end - 1) \
2652 { \
2653 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2654 goto invalid_code; \
2655 goto no_more_source; \
2656 } \
2657 \
2658 /* This is surely the start of a composition. */ \
2659 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2660 : c1 == '2' ? COMPOSITION_WITH_RULE \
2661 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2662 : COMPOSITION_WITH_RULE_ALTCHARS); \
2663 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2664 : COMPOSING_COMPONENT_CHAR); \
2665 component_idx = component_len = 0; \
2666 } \
ec6d2bb8
KH
2667 } while (0)
2668
ec6d2bb8 2669
df7492f9
KH
2670/* Handle compositoin end sequence ESC 1. */
2671
2672#define DECODE_COMPOSITION_END() \
ec6d2bb8 2673 do { \
df7492f9
KH
2674 int nchars = (component_len > 0 ? component_idx - component_len \
2675 : method == COMPOSITION_RELATIVE ? component_idx \
2676 : (component_idx + 1) / 2); \
2677 int i; \
2678 int *saved_charbuf = charbuf; \
2679 \
2680 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2681 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2682 { \
df7492f9
KH
2683 if (component_len == 0) \
2684 for (i = 0; i < component_idx; i++) \
2685 *charbuf++ = components[i]; \
2686 else \
2687 for (i = 0; i < component_len; i++) \
2688 *charbuf++ = components[i]; \
2689 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2690 } \
df7492f9
KH
2691 if (method == COMPOSITION_WITH_RULE) \
2692 for (i = 0; i < component_idx; i += 2, char_offset++) \
2693 *charbuf++ = components[i]; \
ec6d2bb8 2694 else \
df7492f9
KH
2695 for (i = component_len; i < component_idx; i++, char_offset++) \
2696 *charbuf++ = components[i]; \
2697 coding->annotated = 1; \
2698 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2699 } while (0)
2700
df7492f9 2701
ec6d2bb8
KH
2702/* Decode a composition rule from the byte C1 (and maybe one more byte
2703 from SRC) and store one encoded composition rule in
2704 coding->cmp_data. */
2705
2706#define DECODE_COMPOSITION_RULE(c1) \
2707 do { \
ec6d2bb8
KH
2708 (c1) -= 32; \
2709 if (c1 < 81) /* old format (before ver.21) */ \
2710 { \
2711 int gref = (c1) / 9; \
2712 int nref = (c1) % 9; \
2713 if (gref == 4) gref = 10; \
2714 if (nref == 4) nref = 10; \
df7492f9 2715 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2716 } \
b73bfc1c 2717 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2718 { \
2719 ONE_MORE_BYTE (c2); \
df7492f9 2720 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2721 } \
df7492f9
KH
2722 else \
2723 c1 = 0; \
ec6d2bb8 2724 } while (0)
88993dfd 2725
d46c5b12 2726
4ed46869
KH
2727/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2728
b73bfc1c 2729static void
df7492f9 2730decode_coding_iso_2022 (coding)
4ed46869 2731 struct coding_system *coding;
4ed46869 2732{
df7492f9
KH
2733 unsigned char *src = coding->source + coding->consumed;
2734 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2735 unsigned char *src_base;
df7492f9
KH
2736 int *charbuf = coding->charbuf;
2737 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2738 int consumed_chars = 0, consumed_chars_base;
2739 int char_offset = 0;
2740 int multibytep = coding->src_multibyte;
2741 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2742 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2743 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2744 struct charset *charset;
2745 int c;
2746 /* For handling composition sequence. */
2747#define COMPOSING_NO 0
2748#define COMPOSING_CHAR 1
2749#define COMPOSING_RULE 2
2750#define COMPOSING_COMPONENT_CHAR 3
2751#define COMPOSING_COMPONENT_RULE 4
2752
2753 int composition_state = COMPOSING_NO;
2754 enum composition_method method;
2755 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2756 int component_idx;
2757 int component_len;
2758 Lisp_Object attrs, eol_type, charset_list;
2759
2760 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2761 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2762
2763 while (1)
4ed46869 2764 {
b73bfc1c
KH
2765 int c1, c2;
2766
2767 src_base = src;
df7492f9
KH
2768 consumed_chars_base = consumed_chars;
2769
2770 if (charbuf >= charbuf_end)
2771 break;
2772
b73bfc1c 2773 ONE_MORE_BYTE (c1);
4ed46869 2774
ec6d2bb8 2775 /* We produce no character or one character. */
4ed46869
KH
2776 switch (iso_code_class [c1])
2777 {
2778 case ISO_0x20_or_0x7F:
df7492f9 2779 if (composition_state != COMPOSING_NO)
ec6d2bb8 2780 {
df7492f9
KH
2781 if (composition_state == COMPOSING_RULE
2782 || composition_state == COMPOSING_COMPONENT_RULE)
2783 {
2784 DECODE_COMPOSITION_RULE (c1);
2785 components[component_idx++] = c1;
2786 composition_state--;
2787 continue;
2788 }
ec6d2bb8 2789 }
df7492f9
KH
2790 if (charset_id_0 < 0
2791 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2792 /* This is SPACE or DEL. */
2793 charset = CHARSET_FROM_ID (charset_ascii);
2794 else
2795 charset = CHARSET_FROM_ID (charset_id_0);
2796 break;
4ed46869
KH
2797
2798 case ISO_graphic_plane_0:
781d7a48 2799 if (composition_state != COMPOSING_NO)
b73bfc1c 2800 {
781d7a48
KH
2801 if (composition_state == COMPOSING_RULE
2802 || composition_state == COMPOSING_COMPONENT_RULE)
2803 {
2804 DECODE_COMPOSITION_RULE (c1);
2805 components[component_idx++] = c1;
2806 composition_state--;
2807 continue;
2808 }
b73bfc1c 2809 }
df7492f9 2810 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2811 break;
2812
2813 case ISO_0xA0_or_0xFF:
df7492f9
KH
2814 if (charset_id_1 < 0
2815 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2816 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2817 goto invalid_code;
4ed46869
KH
2818 /* This is a graphic character, we fall down ... */
2819
2820 case ISO_graphic_plane_1:
df7492f9
KH
2821 if (charset_id_1 < 0)
2822 goto invalid_code;
2823 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2824 break;
2825
2826 case ISO_carriage_return:
df7492f9 2827 if (c1 == '\r')
4ed46869 2828 {
df7492f9 2829 if (EQ (eol_type, Qdos))
4ed46869 2830 {
df7492f9
KH
2831 if (src == src_end)
2832 goto no_more_source;
2833 if (*src == '\n')
2834 ONE_MORE_BYTE (c1);
4ed46869 2835 }
df7492f9
KH
2836 else if (EQ (eol_type, Qmac))
2837 c1 = '\n';
4ed46869 2838 }
df7492f9
KH
2839 /* fall through */
2840
2841 case ISO_control_0:
2842 MAYBE_FINISH_COMPOSITION ();
2843 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2844 break;
2845
df7492f9
KH
2846 case ISO_control_1:
2847 MAYBE_FINISH_COMPOSITION ();
2848 goto invalid_code;
2849
4ed46869 2850 case ISO_shift_out:
df7492f9
KH
2851 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2852 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2853 goto invalid_code;
2854 CODING_ISO_INVOCATION (coding, 0) = 1;
2855 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2856 continue;
4ed46869
KH
2857
2858 case ISO_shift_in:
df7492f9
KH
2859 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2860 goto invalid_code;
2861 CODING_ISO_INVOCATION (coding, 0) = 0;
2862 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2863 continue;
4ed46869
KH
2864
2865 case ISO_single_shift_2_7:
2866 case ISO_single_shift_2:
df7492f9
KH
2867 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2868 goto invalid_code;
4ed46869
KH
2869 /* SS2 is handled as an escape sequence of ESC 'N' */
2870 c1 = 'N';
2871 goto label_escape_sequence;
2872
2873 case ISO_single_shift_3:
df7492f9
KH
2874 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2875 goto invalid_code;
4ed46869
KH
2876 /* SS2 is handled as an escape sequence of ESC 'O' */
2877 c1 = 'O';
2878 goto label_escape_sequence;
2879
2880 case ISO_control_sequence_introducer:
2881 /* CSI is handled as an escape sequence of ESC '[' ... */
2882 c1 = '[';
2883 goto label_escape_sequence;
2884
2885 case ISO_escape:
2886 ONE_MORE_BYTE (c1);
2887 label_escape_sequence:
df7492f9 2888 /* Escape sequences handled here are invocation,
4ed46869
KH
2889 designation, direction specification, and character
2890 composition specification. */
2891 switch (c1)
2892 {
2893 case '&': /* revision of following character set */
2894 ONE_MORE_BYTE (c1);
2895 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2896 goto invalid_code;
4ed46869
KH
2897 ONE_MORE_BYTE (c1);
2898 if (c1 != ISO_CODE_ESC)
df7492f9 2899 goto invalid_code;
4ed46869
KH
2900 ONE_MORE_BYTE (c1);
2901 goto label_escape_sequence;
2902
2903 case '$': /* designation of 2-byte character set */
df7492f9
KH
2904 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2905 goto invalid_code;
4ed46869
KH
2906 ONE_MORE_BYTE (c1);
2907 if (c1 >= '@' && c1 <= 'B')
2908 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2909 or JISX0208.1980 */
df7492f9 2910 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2911 }
2912 else if (c1 >= 0x28 && c1 <= 0x2B)
2913 { /* designation of DIMENSION2_CHARS94 character set */
2914 ONE_MORE_BYTE (c2);
df7492f9 2915 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2916 }
2917 else if (c1 >= 0x2C && c1 <= 0x2F)
2918 { /* designation of DIMENSION2_CHARS96 character set */
2919 ONE_MORE_BYTE (c2);
df7492f9 2920 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2921 }
2922 else
df7492f9 2923 goto invalid_code;
b73bfc1c 2924 /* We must update these variables now. */
df7492f9
KH
2925 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2926 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2927 continue;
4ed46869
KH
2928
2929 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2930 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2931 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2932 goto invalid_code;
2933 CODING_ISO_INVOCATION (coding, 0) = 2;
2934 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2935 continue;
4ed46869
KH
2936
2937 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2938 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2939 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2940 goto invalid_code;
2941 CODING_ISO_INVOCATION (coding, 0) = 3;
2942 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2943 continue;
4ed46869
KH
2944
2945 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2946 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2947 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2948 goto invalid_code;
2949 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2950 ONE_MORE_BYTE (c1);
e7046a18 2951 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2952 goto invalid_code;
4ed46869
KH
2953 break;
2954
2955 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2956 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2957 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2958 goto invalid_code;
2959 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2960 ONE_MORE_BYTE (c1);
e7046a18 2961 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2962 goto invalid_code;
4ed46869
KH
2963 break;
2964
ec6d2bb8 2965 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2966 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2967 goto invalid_code;
ec6d2bb8 2968 DECODE_COMPOSITION_START (c1);
b73bfc1c 2969 continue;
4ed46869 2970
ec6d2bb8 2971 case '1': /* end composition */
df7492f9
KH
2972 if (composition_state == COMPOSING_NO)
2973 goto invalid_code;
2974 DECODE_COMPOSITION_END ();
b73bfc1c 2975 continue;
4ed46869
KH
2976
2977 case '[': /* specification of direction */
df7492f9
KH
2978 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2979 goto invalid_code;
4ed46869 2980 /* For the moment, nested direction is not supported.
d46c5b12 2981 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2982 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2983 ONE_MORE_BYTE (c1);
2984 switch (c1)
2985 {
2986 case ']': /* end of the current direction */
d46c5b12 2987 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2988
2989 case '0': /* end of the current direction */
2990 case '1': /* start of left-to-right direction */
2991 ONE_MORE_BYTE (c1);
2992 if (c1 == ']')
d46c5b12 2993 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2994 else
df7492f9 2995 goto invalid_code;
4ed46869
KH
2996 break;
2997
2998 case '2': /* start of right-to-left direction */
2999 ONE_MORE_BYTE (c1);
3000 if (c1 == ']')
d46c5b12 3001 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 3002 else
df7492f9 3003 goto invalid_code;
4ed46869
KH
3004 break;
3005
3006 default:
df7492f9 3007 goto invalid_code;
4ed46869 3008 }
b73bfc1c 3009 continue;
4ed46869
KH
3010
3011 default:
df7492f9
KH
3012 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3013 goto invalid_code;
4ed46869
KH
3014 if (c1 >= 0x28 && c1 <= 0x2B)
3015 { /* designation of DIMENSION1_CHARS94 character set */
3016 ONE_MORE_BYTE (c2);
df7492f9 3017 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3018 }
3019 else if (c1 >= 0x2C && c1 <= 0x2F)
3020 { /* designation of DIMENSION1_CHARS96 character set */
3021 ONE_MORE_BYTE (c2);
df7492f9 3022 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3023 }
3024 else
df7492f9 3025 goto invalid_code;
b73bfc1c 3026 /* We must update these variables now. */
df7492f9
KH
3027 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3028 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3029 continue;
4ed46869 3030 }
b73bfc1c 3031 }
4ed46869 3032
b73bfc1c 3033 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3034 Produce a decoded character while getting 2nd position code
3035 C2 if necessary. */
3036 c1 &= 0x7F;
3037 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3038 {
3039 ONE_MORE_BYTE (c2);
df7492f9 3040 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3041 /* C2 is not in a valid range. */
df7492f9
KH
3042 goto invalid_code;
3043 c1 = (c1 << 8) | (c2 & 0x7F);
3044 if (CHARSET_DIMENSION (charset) > 2)
3045 {
3046 ONE_MORE_BYTE (c2);
3047 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3048 /* C2 is not in a valid range. */
3049 goto invalid_code;
3050 c1 = (c1 << 8) | (c2 & 0x7F);
3051 }
3052 }
3053
3054 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3055 if (c < 0)
3056 {
3057 MAYBE_FINISH_COMPOSITION ();
3058 for (; src_base < src; src_base++, char_offset++)
3059 {
3060 if (ASCII_BYTE_P (*src_base))
3061 *charbuf++ = *src_base;
3062 else
3063 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3064 }
3065 }
3066 else if (composition_state == COMPOSING_NO)
3067 {
3068 *charbuf++ = c;
3069 char_offset++;
4ed46869 3070 }
df7492f9 3071 else
781d7a48
KH
3072 {
3073 components[component_idx++] = c;
3074 if (method == COMPOSITION_WITH_RULE
3075 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3076 && composition_state == COMPOSING_COMPONENT_CHAR))
3077 composition_state++;
3078 }
4ed46869
KH
3079 continue;
3080
df7492f9
KH
3081 invalid_code:
3082 MAYBE_FINISH_COMPOSITION ();
4ed46869 3083 src = src_base;
df7492f9
KH
3084 consumed_chars = consumed_chars_base;
3085 ONE_MORE_BYTE (c);
3086 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3087 coding->errors++;
4ed46869 3088 }
fb88bf2d 3089
df7492f9
KH
3090 no_more_source:
3091 coding->consumed_char += consumed_chars_base;
3092 coding->consumed = src_base - coding->source;
3093 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3094}
3095
b73bfc1c 3096
f4dee582 3097/* ISO2022 encoding stuff. */
4ed46869
KH
3098
3099/*
f4dee582 3100 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3101 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3102 variant has the following specifications:
df7492f9 3103 1. Initial designation to G0 thru G3.
4ed46869
KH
3104 2. Allows short-form designation?
3105 3. ASCII should be designated to G0 before control characters?
3106 4. ASCII should be designated to G0 at end of line?
3107 5. 7-bit environment or 8-bit environment?
3108 6. Use locking-shift?
3109 7. Use Single-shift?
3110 And the following two are only for Japanese:
3111 8. Use ASCII in place of JIS0201-1976-Roman?
3112 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3113 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3114 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3115 details.
4ed46869
KH
3116*/
3117
3118/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3119 register REG at DST, and increment DST. If <final-char> of CHARSET is
3120 '@', 'A', or 'B' and the coding system CODING allows, produce
3121 designation sequence of short-form. */
4ed46869
KH
3122
3123#define ENCODE_DESIGNATION(charset, reg, coding) \
3124 do { \
df7492f9 3125 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3126 char *intermediate_char_94 = "()*+"; \
3127 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3128 int revision = -1; \
3129 int c; \
3130 \
3131 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3132 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3133 \
3134 if (revision >= 0) \
70c22245 3135 { \
df7492f9
KH
3136 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3137 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3138 } \
df7492f9 3139 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3140 if (CHARSET_DIMENSION (charset) == 1) \
3141 { \
df7492f9
KH
3142 if (! CHARSET_ISO_CHARS_96 (charset)) \
3143 c = intermediate_char_94[reg]; \
4ed46869 3144 else \
df7492f9
KH
3145 c = intermediate_char_96[reg]; \
3146 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3147 } \
3148 else \
3149 { \
df7492f9
KH
3150 EMIT_ONE_ASCII_BYTE ('$'); \
3151 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3152 { \
df7492f9 3153 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3154 || reg != 0 \
3155 || final_char < '@' || final_char > 'B') \
df7492f9 3156 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3157 } \
3158 else \
df7492f9 3159 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3160 } \
df7492f9
KH
3161 EMIT_ONE_ASCII_BYTE (final_char); \
3162 \
3163 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3164 } while (0)
3165
df7492f9 3166
4ed46869
KH
3167/* The following two macros produce codes (control character or escape
3168 sequence) for ISO2022 single-shift functions (single-shift-2 and
3169 single-shift-3). */
3170
df7492f9
KH
3171#define ENCODE_SINGLE_SHIFT_2 \
3172 do { \
3173 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3174 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3175 else \
3176 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3177 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3178 } while (0)
3179
df7492f9
KH
3180
3181#define ENCODE_SINGLE_SHIFT_3 \
3182 do { \
3183 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3184 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3185 else \
3186 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3187 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3188 } while (0)
3189
df7492f9 3190
4ed46869
KH
3191/* The following four macros produce codes (control character or
3192 escape sequence) for ISO2022 locking-shift functions (shift-in,
3193 shift-out, locking-shift-2, and locking-shift-3). */
3194
df7492f9
KH
3195#define ENCODE_SHIFT_IN \
3196 do { \
3197 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3198 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3199 } while (0)
3200
df7492f9
KH
3201
3202#define ENCODE_SHIFT_OUT \
3203 do { \
3204 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3205 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3206 } while (0)
3207
df7492f9
KH
3208
3209#define ENCODE_LOCKING_SHIFT_2 \
3210 do { \
3211 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3212 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3213 } while (0)
3214
df7492f9
KH
3215
3216#define ENCODE_LOCKING_SHIFT_3 \
3217 do { \
3218 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3219 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3220 } while (0)
3221
df7492f9 3222
f4dee582
RS
3223/* Produce codes for a DIMENSION1 character whose character set is
3224 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3225 sequences are also produced in advance if necessary. */
3226
6e85d753
KH
3227#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3228 do { \
df7492f9 3229 int id = CHARSET_ID (charset); \
bf16eb23
KH
3230 \
3231 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3232 && id == charset_ascii) \
3233 { \
3234 id = charset_jisx0201_roman; \
3235 charset = CHARSET_FROM_ID (id); \
3236 } \
3237 \
df7492f9 3238 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3239 { \
df7492f9
KH
3240 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3241 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3242 else \
df7492f9
KH
3243 EMIT_ONE_BYTE (c1 | 0x80); \
3244 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3245 break; \
3246 } \
df7492f9 3247 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3248 { \
df7492f9 3249 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3250 break; \
3251 } \
df7492f9 3252 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3253 { \
df7492f9 3254 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3255 break; \
3256 } \
6e85d753
KH
3257 else \
3258 /* Since CHARSET is not yet invoked to any graphic planes, we \
3259 must invoke it, or, at first, designate it to some graphic \
3260 register. Then repeat the loop to actually produce the \
3261 character. */ \
df7492f9
KH
3262 dst = encode_invocation_designation (charset, coding, dst, \
3263 &produced_chars); \
4ed46869
KH
3264 } while (1)
3265
df7492f9 3266
f4dee582
RS
3267/* Produce codes for a DIMENSION2 character whose character set is
3268 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3269 invocation codes are also produced in advance if necessary. */
3270
6e85d753
KH
3271#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3272 do { \
df7492f9 3273 int id = CHARSET_ID (charset); \
bf16eb23
KH
3274 \
3275 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3276 && id == charset_jisx0208) \
3277 { \
3278 id = charset_jisx0208_1978; \
3279 charset = CHARSET_FROM_ID (id); \
3280 } \
3281 \
df7492f9 3282 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3283 { \
df7492f9
KH
3284 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3285 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3286 else \
df7492f9
KH
3287 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3288 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3289 break; \
3290 } \
df7492f9 3291 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3292 { \
df7492f9 3293 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3294 break; \
3295 } \
df7492f9 3296 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3297 { \
df7492f9 3298 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3299 break; \
3300 } \
6e85d753
KH
3301 else \
3302 /* Since CHARSET is not yet invoked to any graphic planes, we \
3303 must invoke it, or, at first, designate it to some graphic \
3304 register. Then repeat the loop to actually produce the \
3305 character. */ \
df7492f9
KH
3306 dst = encode_invocation_designation (charset, coding, dst, \
3307 &produced_chars); \
4ed46869
KH
3308 } while (1)
3309
05e6f5dc 3310
df7492f9
KH
3311#define ENCODE_ISO_CHARACTER(charset, c) \
3312 do { \
3313 int code = ENCODE_CHAR ((charset),(c)); \
3314 \
3315 if (CHARSET_DIMENSION (charset) == 1) \
3316 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3317 else \
3318 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3319 } while (0)
bdd9fb48 3320
05e6f5dc 3321
4ed46869 3322/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3323 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3324 Return new DST. */
3325
3326unsigned char *
df7492f9
KH
3327encode_invocation_designation (charset, coding, dst, p_nchars)
3328 struct charset *charset;
4ed46869
KH
3329 struct coding_system *coding;
3330 unsigned char *dst;
df7492f9 3331 int *p_nchars;
4ed46869 3332{
df7492f9
KH
3333 int multibytep = coding->dst_multibyte;
3334 int produced_chars = *p_nchars;
4ed46869 3335 int reg; /* graphic register number */
df7492f9 3336 int id = CHARSET_ID (charset);
4ed46869
KH
3337
3338 /* At first, check designations. */
3339 for (reg = 0; reg < 4; reg++)
df7492f9 3340 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3341 break;
3342
3343 if (reg >= 4)
3344 {
3345 /* CHARSET is not yet designated to any graphic registers. */
3346 /* At first check the requested designation. */
df7492f9
KH
3347 reg = CODING_ISO_REQUEST (coding, id);
3348 if (reg < 0)
1ba9e4ab
KH
3349 /* Since CHARSET requests no special designation, designate it
3350 to graphic register 0. */
4ed46869
KH
3351 reg = 0;
3352
3353 ENCODE_DESIGNATION (charset, reg, coding);
3354 }
3355
df7492f9
KH
3356 if (CODING_ISO_INVOCATION (coding, 0) != reg
3357 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3358 {
3359 /* Since the graphic register REG is not invoked to any graphic
3360 planes, invoke it to graphic plane 0. */
3361 switch (reg)
3362 {
3363 case 0: /* graphic register 0 */
3364 ENCODE_SHIFT_IN;
3365 break;
3366
3367 case 1: /* graphic register 1 */
3368 ENCODE_SHIFT_OUT;
3369 break;
3370
3371 case 2: /* graphic register 2 */
df7492f9 3372 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3373 ENCODE_SINGLE_SHIFT_2;
3374 else
3375 ENCODE_LOCKING_SHIFT_2;
3376 break;
3377
3378 case 3: /* graphic register 3 */
df7492f9 3379 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3380 ENCODE_SINGLE_SHIFT_3;
3381 else
3382 ENCODE_LOCKING_SHIFT_3;
3383 break;
3384 }
3385 }
b73bfc1c 3386
df7492f9 3387 *p_nchars = produced_chars;
4ed46869
KH
3388 return dst;
3389}
3390
df7492f9
KH
3391/* The following three macros produce codes for indicating direction
3392 of text. */
3393#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3394 do { \
df7492f9
KH
3395 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3396 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3397 else \
df7492f9 3398 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3399 } while (0)
3400
ec6d2bb8 3401
df7492f9
KH
3402#define ENCODE_DIRECTION_R2L() \
3403 do { \
3404 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3405 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3406 } while (0)
3407
ec6d2bb8 3408
df7492f9 3409#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3410 do { \
df7492f9
KH
3411 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3412 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3413 } while (0)
3414
4ed46869
KH
3415
3416/* Produce codes for designation and invocation to reset the graphic
3417 planes and registers to initial state. */
df7492f9
KH
3418#define ENCODE_RESET_PLANE_AND_REGISTER() \
3419 do { \
3420 int reg; \
3421 struct charset *charset; \
3422 \
3423 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3424 ENCODE_SHIFT_IN; \
3425 for (reg = 0; reg < 4; reg++) \
3426 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3427 && (CODING_ISO_DESIGNATION (coding, reg) \
3428 != CODING_ISO_INITIAL (coding, reg))) \
3429 { \
3430 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3431 ENCODE_DESIGNATION (charset, reg, coding); \
3432 } \
4ed46869
KH
3433 } while (0)
3434
df7492f9 3435
bdd9fb48 3436/* Produce designation sequences of charsets in the line started from
b73bfc1c 3437 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3438
3439 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3440 find all the necessary designations. */
3441
b73bfc1c 3442static unsigned char *
df7492f9 3443encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3444 struct coding_system *coding;
df7492f9
KH
3445 int *charbuf, *charbuf_end;
3446 unsigned char *dst;
e0e989f6 3447{
df7492f9 3448 struct charset *charset;
bdd9fb48
KH
3449 /* Table of charsets to be designated to each graphic register. */
3450 int r[4];
df7492f9
KH
3451 int c, found = 0, reg;
3452 int produced_chars = 0;
3453 int multibytep = coding->dst_multibyte;
3454 Lisp_Object attrs;
3455 Lisp_Object charset_list;
3456
3457 attrs = CODING_ID_ATTRS (coding->id);
3458 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3459 if (EQ (charset_list, Qiso_2022))
3460 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3461
3462 for (reg = 0; reg < 4; reg++)
3463 r[reg] = -1;
3464
b73bfc1c 3465 while (found < 4)
e0e989f6 3466 {
df7492f9
KH
3467 int id;
3468
3469 c = *charbuf++;
b73bfc1c
KH
3470 if (c == '\n')
3471 break;
df7492f9
KH
3472 charset = char_charset (c, charset_list, NULL);
3473 id = CHARSET_ID (charset);
3474 reg = CODING_ISO_REQUEST (coding, id);
3475 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3476 {
3477 found++;
df7492f9 3478 r[reg] = id;
bdd9fb48 3479 }
bdd9fb48
KH
3480 }
3481
3482 if (found)
3483 {
3484 for (reg = 0; reg < 4; reg++)
3485 if (r[reg] >= 0
df7492f9
KH
3486 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3487 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3488 }
b73bfc1c
KH
3489
3490 return dst;
e0e989f6
KH
3491}
3492
4ed46869
KH
3493/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3494
df7492f9
KH
3495static int
3496encode_coding_iso_2022 (coding)
4ed46869 3497 struct coding_system *coding;
4ed46869 3498{
df7492f9
KH
3499 int multibytep = coding->dst_multibyte;
3500 int *charbuf = coding->charbuf;
3501 int *charbuf_end = charbuf + coding->charbuf_used;
3502 unsigned char *dst = coding->destination + coding->produced;
3503 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3504 int safe_room = 16;
3505 int bol_designation
3506 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3507 && CODING_ISO_BOL (coding));
3508 int produced_chars = 0;
3509 Lisp_Object attrs, eol_type, charset_list;
3510 int ascii_compatible;
b73bfc1c 3511 int c;
05e6f5dc 3512
df7492f9 3513 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3514 setup_iso_safe_charsets (attrs);
3515 coding->safe_charsets
3516 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3517
df7492f9 3518 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3519
df7492f9 3520 while (charbuf < charbuf_end)
4ed46869 3521 {
df7492f9 3522 ASSURE_DESTINATION (safe_room);
b73bfc1c 3523
df7492f9 3524 if (bol_designation)
b73bfc1c 3525 {
df7492f9 3526 unsigned char *dst_prev = dst;
4ed46869 3527
bdd9fb48 3528 /* We have to produce designation sequences if any now. */
df7492f9
KH
3529 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3530 bol_designation = 0;
3531 /* We are sure that designation sequences are all ASCII bytes. */
3532 produced_chars += dst - dst_prev;
4ed46869 3533 }
ec6d2bb8 3534
df7492f9 3535 c = *charbuf++;
4ed46869 3536
b73bfc1c
KH
3537 /* Now encode the character C. */
3538 if (c < 0x20 || c == 0x7F)
3539 {
df7492f9
KH
3540 if (c == '\n'
3541 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3542 {
df7492f9
KH
3543 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3544 ENCODE_RESET_PLANE_AND_REGISTER ();
3545 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3546 {
df7492f9
KH
3547 int i;
3548
3549 for (i = 0; i < 4; i++)
3550 CODING_ISO_DESIGNATION (coding, i)
3551 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3552 }
df7492f9
KH
3553 bol_designation
3554 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3555 }
df7492f9
KH
3556 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3557 ENCODE_RESET_PLANE_AND_REGISTER ();
3558 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3559 }
df7492f9 3560 else if (ASCII_CHAR_P (c))
88993dfd 3561 {
df7492f9
KH
3562 if (ascii_compatible)
3563 EMIT_ONE_ASCII_BYTE (c);
3564 else
bf16eb23
KH
3565 {
3566 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3567 ENCODE_ISO_CHARACTER (charset, c);
3568 }
88993dfd 3569 }
16eafb5d
KH
3570 else if (CHAR_BYTE8_P (c))
3571 {
3572 c = CHAR_TO_BYTE8 (c);
3573 EMIT_ONE_BYTE (c);
3574 }
b73bfc1c 3575 else
df7492f9
KH
3576 {
3577 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3578
df7492f9
KH
3579 if (!charset)
3580 {
41cbe562
KH
3581 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3582 {
3583 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3584 charset = CHARSET_FROM_ID (charset_ascii);
3585 }
3586 else
3587 {
3588 c = coding->default_char;
3589 charset = char_charset (c, charset_list, NULL);
3590 }
df7492f9
KH
3591 }
3592 ENCODE_ISO_CHARACTER (charset, c);
3593 }
84fbb8a0 3594 }
b73bfc1c 3595
df7492f9
KH
3596 if (coding->mode & CODING_MODE_LAST_BLOCK
3597 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3598 {
3599 ASSURE_DESTINATION (safe_room);
3600 ENCODE_RESET_PLANE_AND_REGISTER ();
3601 }
3602 coding->result = CODING_RESULT_SUCCESS;
3603 CODING_ISO_BOL (coding) = bol_designation;
3604 coding->produced_char += produced_chars;
3605 coding->produced = dst - coding->destination;
3606 return 0;
4ed46869
KH
3607}
3608
3609\f
df7492f9 3610/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3611
df7492f9 3612/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3613 quite widely. So, for the moment, Emacs supports them in the bare
3614 C code. But, in the future, they may be supported only by CCL. */
3615
3616/* SJIS is a coding system encoding three character sets: ASCII, right
3617 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3618 as is. A character of charset katakana-jisx0201 is encoded by
3619 "position-code + 0x80". A character of charset japanese-jisx0208
3620 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3621 so that it fit in the range below.
4ed46869
KH
3622
3623 --- CODE RANGE of SJIS ---
3624 (character set) (range)
3625 ASCII 0x00 .. 0x7F
df7492f9 3626 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3627 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3628 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3629 -------------------------------
3630
3631*/
3632
3633/* BIG5 is a coding system encoding two character sets: ASCII and
3634 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3635 character set and is encoded in two-byte.
4ed46869
KH
3636
3637 --- CODE RANGE of BIG5 ---
3638 (character set) (range)
3639 ASCII 0x00 .. 0x7F
3640 Big5 (1st byte) 0xA1 .. 0xFE
3641 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3642 --------------------------
3643
df7492f9 3644 */
4ed46869
KH
3645
3646/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3647 Check if a text is encoded in SJIS. If it is, return
df7492f9 3648 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3649
0a28aafb 3650static int
df7492f9
KH
3651detect_coding_sjis (coding, mask)
3652 struct coding_system *coding;
3653 int *mask;
4ed46869 3654{
df7492f9
KH
3655 unsigned char *src = coding->source, *src_base = src;
3656 unsigned char *src_end = coding->source + coding->src_bytes;
3657 int multibytep = coding->src_multibyte;
3658 int consumed_chars = 0;
3659 int found = 0;
b73bfc1c 3660 int c;
df7492f9
KH
3661
3662 /* A coding system of this category is always ASCII compatible. */
3663 src += coding->head_ascii;
4ed46869 3664
b73bfc1c 3665 while (1)
4ed46869 3666 {
df7492f9 3667 ONE_MORE_BYTE (c);
682169fe
KH
3668 if (c < 0x80)
3669 continue;
df7492f9 3670 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3671 {
df7492f9 3672 ONE_MORE_BYTE (c);
682169fe 3673 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3674 break;
3675 found = 1;
4ed46869 3676 }
df7492f9
KH
3677 else if (c >= 0xA0 && c < 0xE0)
3678 found = 1;
3679 else
3680 break;
4ed46869 3681 }
df7492f9
KH
3682 *mask &= ~CATEGORY_MASK_SJIS;
3683 return 0;
3684
3685 no_more_source:
3686 if (!found)
3687 return 0;
3688 *mask &= CATEGORY_MASK_SJIS;
3689 return 1;
4ed46869
KH
3690}
3691
3692/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3693 Check if a text is encoded in BIG5. If it is, return
df7492f9 3694 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3695
0a28aafb 3696static int
df7492f9
KH
3697detect_coding_big5 (coding, mask)
3698 struct coding_system *coding;
3699 int *mask;
4ed46869 3700{
df7492f9
KH
3701 unsigned char *src = coding->source, *src_base = src;
3702 unsigned char *src_end = coding->source + coding->src_bytes;
3703 int multibytep = coding->src_multibyte;
3704 int consumed_chars = 0;
3705 int found = 0;
b73bfc1c 3706 int c;
fa42c37f 3707
df7492f9
KH
3708 /* A coding system of this category is always ASCII compatible. */
3709 src += coding->head_ascii;
fa42c37f 3710
b73bfc1c 3711 while (1)
fa42c37f 3712 {
df7492f9
KH
3713 ONE_MORE_BYTE (c);
3714 if (c < 0x80)
fa42c37f 3715 continue;
df7492f9 3716 if (c >= 0xA1)
fa42c37f 3717 {
df7492f9
KH
3718 ONE_MORE_BYTE (c);
3719 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3720 return 0;
df7492f9 3721 found = 1;
fa42c37f 3722 }
df7492f9
KH
3723 else
3724 break;
fa42c37f 3725 }
df7492f9 3726 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3727 return 0;
df7492f9
KH
3728
3729 no_more_source:
3730 if (!found)
3731 return 0;
3732 *mask &= CATEGORY_MASK_BIG5;
3733 return 1;
fa42c37f
KH
3734}
3735
4ed46869
KH
3736/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3737 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3738
b73bfc1c 3739static void
df7492f9 3740decode_coding_sjis (coding)
4ed46869 3741 struct coding_system *coding;
4ed46869 3742{
df7492f9
KH
3743 unsigned char *src = coding->source + coding->consumed;
3744 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3745 unsigned char *src_base;
df7492f9
KH
3746 int *charbuf = coding->charbuf;
3747 int *charbuf_end = charbuf + coding->charbuf_size;
3748 int consumed_chars = 0, consumed_chars_base;
3749 int multibytep = coding->src_multibyte;
3750 struct charset *charset_roman, *charset_kanji, *charset_kana;
3751 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3752
df7492f9
KH
3753 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3754
3755 val = charset_list;
3756 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3757 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3758 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3759
b73bfc1c 3760 while (1)
4ed46869 3761 {
df7492f9 3762 int c, c1;
b73bfc1c
KH
3763
3764 src_base = src;
df7492f9
KH
3765 consumed_chars_base = consumed_chars;
3766
3767 if (charbuf >= charbuf_end)
3768 break;
3769
3770 ONE_MORE_BYTE (c);
b73bfc1c 3771
df7492f9 3772 if (c == '\r')
4ed46869 3773 {
df7492f9 3774 if (EQ (eol_type, Qdos))
4ed46869 3775 {
df7492f9
KH
3776 if (src == src_end)
3777 goto no_more_source;
3778 if (*src == '\n')
3779 ONE_MORE_BYTE (c);
4ed46869 3780 }
df7492f9
KH
3781 else if (EQ (eol_type, Qmac))
3782 c = '\n';
4ed46869 3783 }
54f78171 3784 else
df7492f9
KH
3785 {
3786 struct charset *charset;
3787
3788 if (c < 0x80)
3789 charset = charset_roman;
3790 else
4ed46869 3791 {
df7492f9
KH
3792 if (c >= 0xF0)
3793 goto invalid_code;
3794 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3795 {
54f78171 3796 /* SJIS -> JISX0208 */
df7492f9
KH
3797 ONE_MORE_BYTE (c1);
3798 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3799 goto invalid_code;
3800 c = (c << 8) | c1;
3801 SJIS_TO_JIS (c);
3802 charset = charset_kanji;
5e34de15 3803 }
fb88bf2d 3804 else
b73bfc1c 3805 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3806 charset = charset_kana;
3807 }
3808 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3809 }
3810 *charbuf++ = c;
3811 continue;
3812
3813 invalid_code:
3814 src = src_base;
3815 consumed_chars = consumed_chars_base;
3816 ONE_MORE_BYTE (c);
3817 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3818 coding->errors++;
3819 }
3820
3821 no_more_source:
3822 coding->consumed_char += consumed_chars_base;
3823 coding->consumed = src_base - coding->source;
3824 coding->charbuf_used = charbuf - coding->charbuf;
3825}
3826
3827static void
3828decode_coding_big5 (coding)
3829 struct coding_system *coding;
3830{
3831 unsigned char *src = coding->source + coding->consumed;
3832 unsigned char *src_end = coding->source + coding->src_bytes;
3833 unsigned char *src_base;
3834 int *charbuf = coding->charbuf;
3835 int *charbuf_end = charbuf + coding->charbuf_size;
3836 int consumed_chars = 0, consumed_chars_base;
3837 int multibytep = coding->src_multibyte;
3838 struct charset *charset_roman, *charset_big5;
3839 Lisp_Object attrs, eol_type, charset_list, val;
3840
3841 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3842 val = charset_list;
3843 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3844 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3845
3846 while (1)
3847 {
3848 int c, c1;
3849
3850 src_base = src;
3851 consumed_chars_base = consumed_chars;
3852
3853 if (charbuf >= charbuf_end)
3854 break;
3855
3856 ONE_MORE_BYTE (c);
3857
3858 if (c == '\r')
3859 {
3860 if (EQ (eol_type, Qdos))
3861 {
3862 if (src == src_end)
3863 goto no_more_source;
3864 if (*src == '\n')
3865 ONE_MORE_BYTE (c);
4ed46869 3866 }
df7492f9
KH
3867 else if (EQ (eol_type, Qmac))
3868 c = '\n';
3869 }
3870 else
3871 {
3872 struct charset *charset;
3873 if (c < 0x80)
3874 charset = charset_roman;
fb88bf2d 3875 else
fb88bf2d 3876 {
54f78171 3877 /* BIG5 -> Big5 */
df7492f9
KH
3878 if (c < 0xA1 || c > 0xFE)
3879 goto invalid_code;
3880 ONE_MORE_BYTE (c1);
3881 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3882 goto invalid_code;
3883 c = c << 8 | c1;
3884 charset = charset_big5;
4ed46869 3885 }
df7492f9 3886 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3887 }
4ed46869 3888
df7492f9 3889 *charbuf++ = c;
fb88bf2d
KH
3890 continue;
3891
df7492f9 3892 invalid_code:
4ed46869 3893 src = src_base;
df7492f9
KH
3894 consumed_chars = consumed_chars_base;
3895 ONE_MORE_BYTE (c);
3896 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3897 coding->errors++;
fb88bf2d 3898 }
d46c5b12 3899
df7492f9
KH
3900 no_more_source:
3901 coding->consumed_char += consumed_chars_base;
3902 coding->consumed = src_base - coding->source;
3903 coding->charbuf_used = charbuf - coding->charbuf;
3904}
3905
3906/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3907 This function can encode charsets `ascii', `katakana-jisx0201',
3908 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3909 are sure that all these charsets are registered as official charset
3910 (i.e. do not have extended leading-codes). Characters of other
3911 charsets are produced without any encoding. If SJIS_P is 1, encode
3912 SJIS text, else encode BIG5 text. */
3913
3914static int
3915encode_coding_sjis (coding)
3916 struct coding_system *coding;
3917{
3918 int multibytep = coding->dst_multibyte;
3919 int *charbuf = coding->charbuf;
3920 int *charbuf_end = charbuf + coding->charbuf_used;
3921 unsigned char *dst = coding->destination + coding->produced;
3922 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3923 int safe_room = 4;
3924 int produced_chars = 0;
3925 Lisp_Object attrs, eol_type, charset_list, val;
3926 int ascii_compatible;
3927 struct charset *charset_roman, *charset_kanji, *charset_kana;
3928 int c;
3929
3930 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3931 val = charset_list;
3932 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3933 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3934 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3935
3936 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3937
3938 while (charbuf < charbuf_end)
3939 {
3940 ASSURE_DESTINATION (safe_room);
3941 c = *charbuf++;
3942 /* Now encode the character C. */
3943 if (ASCII_CHAR_P (c) && ascii_compatible)
3944 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
3945 else if (CHAR_BYTE8_P (c))
3946 {
3947 c = CHAR_TO_BYTE8 (c);
3948 EMIT_ONE_BYTE (c);
3949 }
df7492f9
KH
3950 else
3951 {
3952 unsigned code;
3953 struct charset *charset = char_charset (c, charset_list, &code);
3954
3955 if (!charset)
3956 {
41cbe562
KH
3957 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3958 {
3959 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3960 charset = CHARSET_FROM_ID (charset_ascii);
3961 }
3962 else
3963 {
3964 c = coding->default_char;
3965 charset = char_charset (c, charset_list, &code);
3966 }
df7492f9
KH
3967 }
3968 if (code == CHARSET_INVALID_CODE (charset))
3969 abort ();
3970 if (charset == charset_kanji)
3971 {
3972 int c1, c2;
3973 JIS_TO_SJIS (code);
3974 c1 = code >> 8, c2 = code & 0xFF;
3975 EMIT_TWO_BYTES (c1, c2);
3976 }
3977 else if (charset == charset_kana)
3978 EMIT_ONE_BYTE (code | 0x80);
3979 else
3980 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3981 }
3982 }
3983 coding->result = CODING_RESULT_SUCCESS;
3984 coding->produced_char += produced_chars;
3985 coding->produced = dst - coding->destination;
3986 return 0;
3987}
3988
3989static int
3990encode_coding_big5 (coding)
3991 struct coding_system *coding;
3992{
3993 int multibytep = coding->dst_multibyte;
3994 int *charbuf = coding->charbuf;
3995 int *charbuf_end = charbuf + coding->charbuf_used;
3996 unsigned char *dst = coding->destination + coding->produced;
3997 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3998 int safe_room = 4;
3999 int produced_chars = 0;
4000 Lisp_Object attrs, eol_type, charset_list, val;
4001 int ascii_compatible;
4002 struct charset *charset_roman, *charset_big5;
4003 int c;
4004
4005 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4006 val = charset_list;
4007 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4008 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4009 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4010
4011 while (charbuf < charbuf_end)
4012 {
4013 ASSURE_DESTINATION (safe_room);
4014 c = *charbuf++;
4015 /* Now encode the character C. */
4016 if (ASCII_CHAR_P (c) && ascii_compatible)
4017 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4018 else if (CHAR_BYTE8_P (c))
4019 {
4020 c = CHAR_TO_BYTE8 (c);
4021 EMIT_ONE_BYTE (c);
4022 }
df7492f9
KH
4023 else
4024 {
4025 unsigned code;
4026 struct charset *charset = char_charset (c, charset_list, &code);
4027
4028 if (! charset)
4029 {
41cbe562
KH
4030 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4031 {
4032 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4033 charset = CHARSET_FROM_ID (charset_ascii);
4034 }
4035 else
4036 {
4037 c = coding->default_char;
4038 charset = char_charset (c, charset_list, &code);
4039 }
df7492f9
KH
4040 }
4041 if (code == CHARSET_INVALID_CODE (charset))
4042 abort ();
4043 if (charset == charset_big5)
4044 {
4045 int c1, c2;
4046
4047 c1 = code >> 8, c2 = code & 0xFF;
4048 EMIT_TWO_BYTES (c1, c2);
4049 }
4050 else
4051 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4052 }
4053 }
4054 coding->result = CODING_RESULT_SUCCESS;
4055 coding->produced_char += produced_chars;
4056 coding->produced = dst - coding->destination;
4057 return 0;
4058}
4059
4060\f
4061/*** 10. CCL handlers ***/
4062
4063/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4064 Check if a text is encoded in a coding system of which
4065 encoder/decoder are written in CCL program. If it is, return
4066 CATEGORY_MASK_CCL, else return 0. */
4067
4068static int
4069detect_coding_ccl (coding, mask)
4070 struct coding_system *coding;
4071 int *mask;
4072{
4073 unsigned char *src = coding->source, *src_base = src;
4074 unsigned char *src_end = coding->source + coding->src_bytes;
4075 int multibytep = coding->src_multibyte;
4076 int consumed_chars = 0;
4077 int found = 0;
4078 unsigned char *valids = CODING_CCL_VALIDS (coding);
4079 int head_ascii = coding->head_ascii;
4080 Lisp_Object attrs;
4081
4082 coding = &coding_categories[coding_category_ccl];
4083 attrs = CODING_ID_ATTRS (coding->id);
4084 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4085 src += head_ascii;
4086
4087 while (1)
4088 {
4089 int c;
4090 ONE_MORE_BYTE (c);
4091 if (! valids[c])
4092 break;
4093 if (!found && valids[c] > 1)
4094 found = 1;
4095 }
4096 *mask &= ~CATEGORY_MASK_CCL;
4097 return 0;
4098
4099 no_more_source:
4100 if (!found)
4101 return 0;
4102 *mask &= CATEGORY_MASK_CCL;
4103 return 1;
4104}
4105
4106static void
4107decode_coding_ccl (coding)
4108 struct coding_system *coding;
4109{
7c78e542 4110 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
4111 unsigned char *src_end = coding->source + coding->src_bytes;
4112 int *charbuf = coding->charbuf;
4113 int *charbuf_end = charbuf + coding->charbuf_size;
4114 int consumed_chars = 0;
4115 int multibytep = coding->src_multibyte;
4116 struct ccl_program ccl;
4117 int source_charbuf[1024];
4118 int source_byteidx[1024];
4119
4120 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4121
4122 while (src < src_end)
4123 {
7c78e542 4124 const unsigned char *p = src;
df7492f9
KH
4125 int *source, *source_end;
4126 int i = 0;
4127
4128 if (multibytep)
4129 while (i < 1024 && p < src_end)
4130 {
4131 source_byteidx[i] = p - src;
4132 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4133 }
4134 else
4135 while (i < 1024 && p < src_end)
4136 source_charbuf[i++] = *p++;
4137
4138 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4139 ccl.last_block = 1;
4140
4141 source = source_charbuf;
4142 source_end = source + i;
4143 while (source < source_end)
4144 {
4145 ccl_driver (&ccl, source, charbuf,
4146 source_end - source, charbuf_end - charbuf);
4147 source += ccl.consumed;
4148 charbuf += ccl.produced;
4149 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4150 break;
4151 }
4152 if (source < source_end)
4153 src += source_byteidx[source - source_charbuf];
4154 else
4155 src = p;
4156 consumed_chars += source - source_charbuf;
4157
4158 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4159 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4160 break;
4161 }
4162
4163 switch (ccl.status)
4164 {
4165 case CCL_STAT_SUSPEND_BY_SRC:
4166 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4167 break;
4168 case CCL_STAT_SUSPEND_BY_DST:
4169 break;
4170 case CCL_STAT_QUIT:
4171 case CCL_STAT_INVALID_CMD:
4172 coding->result = CODING_RESULT_INTERRUPT;
4173 break;
4174 default:
4175 coding->result = CODING_RESULT_SUCCESS;
4176 break;
4177 }
4178 coding->consumed_char += consumed_chars;
4179 coding->consumed = src - coding->source;
4180 coding->charbuf_used = charbuf - coding->charbuf;
4181}
4182
4183static int
4184encode_coding_ccl (coding)
4185 struct coding_system *coding;
4186{
4187 struct ccl_program ccl;
4188 int multibytep = coding->dst_multibyte;
4189 int *charbuf = coding->charbuf;
4190 int *charbuf_end = charbuf + coding->charbuf_used;
4191 unsigned char *dst = coding->destination + coding->produced;
4192 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4193 unsigned char *adjusted_dst_end = dst_end - 1;
4194 int destination_charbuf[1024];
4195 int i, produced_chars = 0;
4196
4197 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4198
4199 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4200 ccl.dst_multibyte = coding->dst_multibyte;
4201
4202 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4203 {
4204 int dst_bytes = dst_end - dst;
4205 if (dst_bytes > 1024)
4206 dst_bytes = 1024;
4207
4208 ccl_driver (&ccl, charbuf, destination_charbuf,
4209 charbuf_end - charbuf, dst_bytes);
4210 charbuf += ccl.consumed;
4211 if (multibytep)
4212 for (i = 0; i < ccl.produced; i++)
4213 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4214 else
4215 {
4216 for (i = 0; i < ccl.produced; i++)
4217 *dst++ = destination_charbuf[i] & 0xFF;
4218 produced_chars += ccl.produced;
4219 }
4220 }
4221
4222 switch (ccl.status)
4223 {
4224 case CCL_STAT_SUSPEND_BY_SRC:
4225 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4226 break;
4227 case CCL_STAT_SUSPEND_BY_DST:
4228 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4229 break;
4230 case CCL_STAT_QUIT:
4231 case CCL_STAT_INVALID_CMD:
4232 coding->result = CODING_RESULT_INTERRUPT;
4233 break;
4234 default:
4235 coding->result = CODING_RESULT_SUCCESS;
4236 break;
4237 }
4238
4239 coding->produced_char += produced_chars;
4240 coding->produced = dst - coding->destination;
4241 return 0;
4ed46869
KH
4242}
4243
df7492f9
KH
4244
4245\f
4246/*** 10, 11. no-conversion handlers ***/
4247
4248/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4249
b73bfc1c 4250static void
df7492f9 4251decode_coding_raw_text (coding)
4ed46869 4252 struct coding_system *coding;
4ed46869 4253{
df7492f9 4254 coding->chars_at_source = 1;
2c78b7e1
KH
4255 coding->consumed_char = 0;
4256 coding->consumed = 0;
df7492f9
KH
4257 coding->result = CODING_RESULT_SUCCESS;
4258}
4ed46869 4259
df7492f9
KH
4260static int
4261encode_coding_raw_text (coding)
4262 struct coding_system *coding;
4263{
4264 int multibytep = coding->dst_multibyte;
4265 int *charbuf = coding->charbuf;
4266 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4267 unsigned char *dst = coding->destination + coding->produced;
4268 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4269 int produced_chars = 0;
4270 int c;
a5d301df 4271
df7492f9 4272 if (multibytep)
b73bfc1c 4273 {
df7492f9 4274 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4275
df7492f9
KH
4276 if (coding->src_multibyte)
4277 while (charbuf < charbuf_end)
4278 {
4279 ASSURE_DESTINATION (safe_room);
4280 c = *charbuf++;
4281 if (ASCII_CHAR_P (c))
4282 EMIT_ONE_ASCII_BYTE (c);
4283 else if (CHAR_BYTE8_P (c))
4284 {
4285 c = CHAR_TO_BYTE8 (c);
4286 EMIT_ONE_BYTE (c);
4287 }
4288 else
4289 {
4290 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4291
df7492f9
KH
4292 CHAR_STRING_ADVANCE (c, p1);
4293 while (p0 < p1)
9d123124
KH
4294 {
4295 EMIT_ONE_BYTE (*p0);
4296 p0++;
4297 }
df7492f9
KH
4298 }
4299 }
b73bfc1c 4300 else
df7492f9
KH
4301 while (charbuf < charbuf_end)
4302 {
4303 ASSURE_DESTINATION (safe_room);
4304 c = *charbuf++;
4305 EMIT_ONE_BYTE (c);
4306 }
4307 }
4308 else
4309 {
4310 if (coding->src_multibyte)
b73bfc1c 4311 {
df7492f9
KH
4312 int safe_room = MAX_MULTIBYTE_LENGTH;
4313
4314 while (charbuf < charbuf_end)
b73bfc1c 4315 {
df7492f9
KH
4316 ASSURE_DESTINATION (safe_room);
4317 c = *charbuf++;
4318 if (ASCII_CHAR_P (c))
4319 *dst++ = c;
4320 else if (CHAR_BYTE8_P (c))
4321 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4322 else
df7492f9
KH
4323 CHAR_STRING_ADVANCE (c, dst);
4324 produced_chars++;
b73bfc1c 4325 }
4ed46869 4326 }
df7492f9
KH
4327 else
4328 {
4329 ASSURE_DESTINATION (charbuf_end - charbuf);
4330 while (charbuf < charbuf_end && dst < dst_end)
4331 *dst++ = *charbuf++;
4332 produced_chars = dst - (coding->destination + coding->dst_bytes);
4333 }
4ed46869 4334 }
df7492f9
KH
4335 coding->result = CODING_RESULT_SUCCESS;
4336 coding->produced_char += produced_chars;
4337 coding->produced = dst - coding->destination;
4338 return 0;
4ed46869
KH
4339}
4340
0a28aafb 4341static int
df7492f9
KH
4342detect_coding_charset (coding, mask)
4343 struct coding_system *coding;
4344 int *mask;
1397dc18 4345{
df7492f9
KH
4346 unsigned char *src = coding->source, *src_base = src;
4347 unsigned char *src_end = coding->source + coding->src_bytes;
4348 int multibytep = coding->src_multibyte;
4349 int consumed_chars = 0;
4350 Lisp_Object attrs, valids;
1397dc18 4351
df7492f9
KH
4352 coding = &coding_categories[coding_category_charset];
4353 attrs = CODING_ID_ATTRS (coding->id);
4354 valids = AREF (attrs, coding_attr_charset_valids);
4355
4356 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4357 src += coding->head_ascii;
1397dc18 4358
b73bfc1c 4359 while (1)
1397dc18 4360 {
df7492f9 4361 int c;
1397dc18 4362
df7492f9
KH
4363 ONE_MORE_BYTE (c);
4364 if (NILP (AREF (valids, c)))
4365 break;
4366 }
4367 *mask &= ~CATEGORY_MASK_CHARSET;
4368 return 0;
4ed46869 4369
df7492f9
KH
4370 no_more_source:
4371 *mask &= CATEGORY_MASK_CHARSET;
4372 return 1;
4373}
4ed46869 4374
b73bfc1c 4375static void
df7492f9 4376decode_coding_charset (coding)
4ed46869 4377 struct coding_system *coding;
4ed46869 4378{
df7492f9
KH
4379 unsigned char *src = coding->source + coding->consumed;
4380 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4381 unsigned char *src_base;
df7492f9
KH
4382 int *charbuf = coding->charbuf;
4383 int *charbuf_end = charbuf + coding->charbuf_size;
4384 int consumed_chars = 0, consumed_chars_base;
4385 int multibytep = coding->src_multibyte;
4eb6d3f1 4386 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4387
4388 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4389 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4390
df7492f9 4391 while (1)
4ed46869 4392 {
4eb6d3f1 4393 int c;
df7492f9
KH
4394
4395 src_base = src;
4396 consumed_chars_base = consumed_chars;
b73bfc1c 4397
df7492f9
KH
4398 if (charbuf >= charbuf_end)
4399 break;
4400
4eb6d3f1 4401 ONE_MORE_BYTE (c);
df7492f9 4402 if (c == '\r')
d46c5b12 4403 {
c7c66a95
KH
4404 /* Here we assume that no charset maps '\r' to something
4405 else. */
df7492f9 4406 if (EQ (eol_type, Qdos))
b73bfc1c 4407 {
4eb6d3f1
KH
4408 if (src < src_end
4409 && *src == '\n')
df7492f9 4410 ONE_MORE_BYTE (c);
b73bfc1c 4411 }
df7492f9 4412 else if (EQ (eol_type, Qmac))
b73bfc1c 4413 c = '\n';
d46c5b12 4414 }
df7492f9 4415 else
d46c5b12 4416 {
4eb6d3f1
KH
4417 Lisp_Object val;
4418 struct charset *charset;
c7c66a95 4419 int dim;
acb2a965
KH
4420 int len = 1;
4421 unsigned code = c;
4eb6d3f1
KH
4422
4423 val = AREF (valids, c);
4424 if (NILP (val))
4425 goto invalid_code;
c7c66a95 4426 if (INTEGERP (val))
4eb6d3f1 4427 {
c7c66a95
KH
4428 charset = CHARSET_FROM_ID (XFASTINT (val));
4429 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4430 while (len < dim)
4eb6d3f1 4431 {
acb2a965
KH
4432 ONE_MORE_BYTE (c);
4433 code = (code << 8) | c;
f9d71dcd 4434 len++;
4eb6d3f1 4435 }
c7c66a95
KH
4436 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4437 charset, code, c);
4438 }
4439 else
4440 {
4441 /* VAL is a list of charset IDs. It is assured that the
4442 list is sorted by charset dimensions (smaller one
4443 comes first). */
c7c66a95
KH
4444 while (CONSP (val))
4445 {
4446 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4447 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4448 while (len < dim)
c7c66a95 4449 {
acb2a965
KH
4450 ONE_MORE_BYTE (c);
4451 code = (code << 8) | c;
f9d71dcd 4452 len++;
c7c66a95 4453 }
c7c66a95
KH
4454 CODING_DECODE_CHAR (coding, src, src_base,
4455 src_end, charset, code, c);
4456 if (c >= 0)
4457 break;
4458 val = XCDR (val);
4459 }
4eb6d3f1 4460 }
df7492f9
KH
4461 if (c < 0)
4462 goto invalid_code;
d46c5b12 4463 }
df7492f9
KH
4464 *charbuf++ = c;
4465 continue;
4466
4467 invalid_code:
4468 src = src_base;
4469 consumed_chars = consumed_chars_base;
4470 ONE_MORE_BYTE (c);
4471 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4472 coding->errors++;
4ed46869
KH
4473 }
4474
df7492f9
KH
4475 no_more_source:
4476 coding->consumed_char += consumed_chars_base;
4477 coding->consumed = src_base - coding->source;
4478 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4479}
4480
df7492f9
KH
4481static int
4482encode_coding_charset (coding)
4ed46869 4483 struct coding_system *coding;
4ed46869 4484{
df7492f9
KH
4485 int multibytep = coding->dst_multibyte;
4486 int *charbuf = coding->charbuf;
4487 int *charbuf_end = charbuf + coding->charbuf_used;
4488 unsigned char *dst = coding->destination + coding->produced;
4489 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4490 int safe_room = MAX_MULTIBYTE_LENGTH;
4491 int produced_chars = 0;
df7492f9
KH
4492 Lisp_Object attrs, eol_type, charset_list;
4493 int ascii_compatible;
b73bfc1c 4494 int c;
b73bfc1c 4495
df7492f9 4496 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4497 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4498
df7492f9 4499 while (charbuf < charbuf_end)
4ed46869 4500 {
4eb6d3f1 4501 struct charset *charset;
df7492f9
KH
4502 unsigned code;
4503
4504 ASSURE_DESTINATION (safe_room);
4505 c = *charbuf++;
4506 if (ascii_compatible && ASCII_CHAR_P (c))
4507 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4508 else if (CHAR_BYTE8_P (c))
4509 {
4510 c = CHAR_TO_BYTE8 (c);
4511 EMIT_ONE_BYTE (c);
4512 }
d46c5b12 4513 else
4eb6d3f1
KH
4514 {
4515 charset = char_charset (c, charset_list, &code);
4516 if (charset)
4517 {
4518 if (CHARSET_DIMENSION (charset) == 1)
4519 EMIT_ONE_BYTE (code);
4520 else if (CHARSET_DIMENSION (charset) == 2)
4521 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4522 else if (CHARSET_DIMENSION (charset) == 3)
4523 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4524 else
4525 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4526 (code >> 8) & 0xFF, code & 0xFF);
4527 }
4528 else
41cbe562
KH
4529 {
4530 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4531 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4532 else
4533 c = coding->default_char;
4534 EMIT_ONE_BYTE (c);
4535 }
4eb6d3f1 4536 }
4ed46869
KH
4537 }
4538
df7492f9
KH
4539 coding->result = CODING_RESULT_SUCCESS;
4540 coding->produced_char += produced_chars;
4541 coding->produced = dst - coding->destination;
4542 return 0;
4ed46869
KH
4543}
4544
4545\f
1397dc18 4546/*** 7. C library functions ***/
4ed46869 4547
df7492f9
KH
4548/* Setup coding context CODING from information about CODING_SYSTEM.
4549 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4550 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4551
4552void
df7492f9
KH
4553setup_coding_system (coding_system, coding)
4554 Lisp_Object coding_system;
ec6d2bb8
KH
4555 struct coding_system *coding;
4556{
df7492f9
KH
4557 Lisp_Object attrs;
4558 Lisp_Object eol_type;
4559 Lisp_Object coding_type;
4560 Lisp_Object val;
ec6d2bb8 4561
df7492f9
KH
4562 if (NILP (coding_system))
4563 coding_system = Qno_conversion;
4564
4565 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4566
4567 attrs = CODING_ID_ATTRS (coding->id);
4568 eol_type = CODING_ID_EOL_TYPE (coding->id);
4569
4570 coding->mode = 0;
4571 coding->head_ascii = -1;
4572 coding->common_flags
4573 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4574
4575 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4576 coding->max_charset_id = XSTRING (val)->size - 1;
4577 coding->safe_charsets = (char *) XSTRING (val)->data;
4578 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4579
4580 coding_type = CODING_ATTR_TYPE (attrs);
4581 if (EQ (coding_type, Qundecided))
4582 {
4583 coding->detector = NULL;
4584 coding->decoder = decode_coding_raw_text;
4585 coding->encoder = encode_coding_raw_text;
4586 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4587 }
4588 else if (EQ (coding_type, Qiso_2022))
4589 {
4590 int i;
4591 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4592
4593 /* Invoke graphic register 0 to plane 0. */
4594 CODING_ISO_INVOCATION (coding, 0) = 0;
4595 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4596 CODING_ISO_INVOCATION (coding, 1)
4597 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4598 /* Setup the initial status of designation. */
4599 for (i = 0; i < 4; i++)
4600 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4601 /* Not single shifting initially. */
4602 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4603 /* Beginning of buffer should also be regarded as bol. */
4604 CODING_ISO_BOL (coding) = 1;
4605 coding->detector = detect_coding_iso_2022;
4606 coding->decoder = decode_coding_iso_2022;
4607 coding->encoder = encode_coding_iso_2022;
4608 if (flags & CODING_ISO_FLAG_SAFE)
4609 coding->mode |= CODING_MODE_SAFE_ENCODING;
4610 coding->common_flags
4611 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4612 | CODING_REQUIRE_FLUSHING_MASK);
4613 if (flags & CODING_ISO_FLAG_COMPOSITION)
4614 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4615 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4616 {
4617 setup_iso_safe_charsets (attrs);
4618 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4619 coding->max_charset_id = XSTRING (val)->size - 1;
4620 coding->safe_charsets = (char *) XSTRING (val)->data;
4621 }
4622 CODING_ISO_FLAGS (coding) = flags;
4623 }
4624 else if (EQ (coding_type, Qcharset))
4625 {
4626 coding->detector = detect_coding_charset;
4627 coding->decoder = decode_coding_charset;
4628 coding->encoder = encode_coding_charset;
4629 coding->common_flags
4630 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4631 }
4632 else if (EQ (coding_type, Qutf_8))
4633 {
4634 coding->detector = detect_coding_utf_8;
4635 coding->decoder = decode_coding_utf_8;
4636 coding->encoder = encode_coding_utf_8;
4637 coding->common_flags
4638 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4639 }
4640 else if (EQ (coding_type, Qutf_16))
4641 {
4642 val = AREF (attrs, coding_attr_utf_16_bom);
4643 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4644 : EQ (val, Qt) ? utf_16_with_bom
4645 : utf_16_without_bom);
4646 val = AREF (attrs, coding_attr_utf_16_endian);
4647 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4648 : utf_16_little_endian);
e19c3639 4649 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4650 coding->detector = detect_coding_utf_16;
4651 coding->decoder = decode_coding_utf_16;
4652 coding->encoder = encode_coding_utf_16;
4653 coding->common_flags
4654 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4655 }
4656 else if (EQ (coding_type, Qccl))
4657 {
4658 coding->detector = detect_coding_ccl;
4659 coding->decoder = decode_coding_ccl;
4660 coding->encoder = encode_coding_ccl;
4661 coding->common_flags
4662 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4663 | CODING_REQUIRE_FLUSHING_MASK);
4664 }
4665 else if (EQ (coding_type, Qemacs_mule))
4666 {
4667 coding->detector = detect_coding_emacs_mule;
4668 coding->decoder = decode_coding_emacs_mule;
4669 coding->encoder = encode_coding_emacs_mule;
4670 coding->common_flags
4671 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4672 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4673 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4674 {
4675 Lisp_Object tail, safe_charsets;
4676 int max_charset_id = 0;
4677
4678 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4679 tail = XCDR (tail))
4680 if (max_charset_id < XFASTINT (XCAR (tail)))
4681 max_charset_id = XFASTINT (XCAR (tail));
4682 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4683 make_number (255));
4684 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4685 tail = XCDR (tail))
4686 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4687 coding->max_charset_id = max_charset_id;
4688 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4689 }
4690 }
4691 else if (EQ (coding_type, Qshift_jis))
4692 {
4693 coding->detector = detect_coding_sjis;
4694 coding->decoder = decode_coding_sjis;
4695 coding->encoder = encode_coding_sjis;
4696 coding->common_flags
4697 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4698 }
4699 else if (EQ (coding_type, Qbig5))
4700 {
4701 coding->detector = detect_coding_big5;
4702 coding->decoder = decode_coding_big5;
4703 coding->encoder = encode_coding_big5;
4704 coding->common_flags
4705 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4706 }
4707 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4708 {
df7492f9
KH
4709 coding->detector = NULL;
4710 coding->decoder = decode_coding_raw_text;
4711 coding->encoder = encode_coding_raw_text;
4712 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4713 }
df7492f9
KH
4714
4715 return;
ec6d2bb8
KH
4716}
4717
df7492f9
KH
4718/* Return raw-text or one of its subsidiaries that has the same
4719 eol_type as CODING-SYSTEM. */
ec6d2bb8 4720
df7492f9
KH
4721Lisp_Object
4722raw_text_coding_system (coding_system)
4723 Lisp_Object coding_system;
ec6d2bb8 4724{
0be8721c 4725 Lisp_Object spec, attrs;
df7492f9
KH
4726 Lisp_Object eol_type, raw_text_eol_type;
4727
4728 spec = CODING_SYSTEM_SPEC (coding_system);
4729 attrs = AREF (spec, 0);
4730
4731 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4732 return coding_system;
ec6d2bb8 4733
df7492f9
KH
4734 eol_type = AREF (spec, 2);
4735 if (VECTORP (eol_type))
4736 return Qraw_text;
4737 spec = CODING_SYSTEM_SPEC (Qraw_text);
4738 raw_text_eol_type = AREF (spec, 2);
4739 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4740 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4741 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4742}
4743
54f78171 4744
df7492f9
KH
4745/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4746 does, return one of the subsidiary that has the same eol-spec as
4747 PARENT. Otherwise, return CODING_SYSTEM. */
4748
4749Lisp_Object
4750coding_inherit_eol_type (coding_system, parent)
b74e4686 4751 Lisp_Object coding_system, parent;
54f78171 4752{
df7492f9 4753 Lisp_Object spec, attrs, eol_type;
54f78171 4754
df7492f9
KH
4755 spec = CODING_SYSTEM_SPEC (coding_system);
4756 attrs = AREF (spec, 0);
4757 eol_type = AREF (spec, 2);
4758 if (VECTORP (eol_type))
4759 {
4760 Lisp_Object parent_spec;
df7492f9
KH
4761 Lisp_Object parent_eol_type;
4762
4763 parent_spec
4764 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4765 parent_eol_type = AREF (parent_spec, 2);
4766 if (EQ (parent_eol_type, Qunix))
4767 coding_system = AREF (eol_type, 0);
4768 else if (EQ (parent_eol_type, Qdos))
4769 coding_system = AREF (eol_type, 1);
4770 else if (EQ (parent_eol_type, Qmac))
4771 coding_system = AREF (eol_type, 2);
54f78171 4772 }
df7492f9 4773 return coding_system;
54f78171
KH
4774}
4775
4ed46869
KH
4776/* Emacs has a mechanism to automatically detect a coding system if it
4777 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4778 it's impossible to distinguish some coding systems accurately
4779 because they use the same range of codes. So, at first, coding
4780 systems are categorized into 7, those are:
4781
0ef69138 4782 o coding-category-emacs-mule
4ed46869
KH
4783
4784 The category for a coding system which has the same code range
4785 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4786 symbol) `emacs-mule' by default.
4ed46869
KH
4787
4788 o coding-category-sjis
4789
4790 The category for a coding system which has the same code range
4791 as SJIS. Assigned the coding-system (Lisp
7717c392 4792 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4793
4794 o coding-category-iso-7
4795
4796 The category for a coding system which has the same code range
7717c392 4797 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4798 shift and single shift functions. This can encode/decode all
4799 charsets. Assigned the coding-system (Lisp symbol)
4800 `iso-2022-7bit' by default.
4801
4802 o coding-category-iso-7-tight
4803
4804 Same as coding-category-iso-7 except that this can
4805 encode/decode only the specified charsets.
4ed46869
KH
4806
4807 o coding-category-iso-8-1
4808
4809 The category for a coding system which has the same code range
4810 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4811 for DIMENSION1 charset. This doesn't use any locking shift
4812 and single shift functions. Assigned the coding-system (Lisp
4813 symbol) `iso-latin-1' by default.
4ed46869
KH
4814
4815 o coding-category-iso-8-2
4816
4817 The category for a coding system which has the same code range
4818 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4819 for DIMENSION2 charset. This doesn't use any locking shift
4820 and single shift functions. Assigned the coding-system (Lisp
4821 symbol) `japanese-iso-8bit' by default.
4ed46869 4822
7717c392 4823 o coding-category-iso-7-else
4ed46869
KH
4824
4825 The category for a coding system which has the same code range
df7492f9 4826 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4827 single shift functions. Assigned the coding-system (Lisp
4828 symbol) `iso-2022-7bit-lock' by default.
4829
4830 o coding-category-iso-8-else
4831
4832 The category for a coding system which has the same code range
df7492f9 4833 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4834 single shift functions. Assigned the coding-system (Lisp
4835 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4836
4837 o coding-category-big5
4838
4839 The category for a coding system which has the same code range
4840 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4841 `cn-big5' by default.
4ed46869 4842
fa42c37f
KH
4843 o coding-category-utf-8
4844
4845 The category for a coding system which has the same code range
4846 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4847 symbol) `utf-8' by default.
4848
4849 o coding-category-utf-16-be
4850
4851 The category for a coding system in which a text has an
4852 Unicode signature (cf. Unicode Standard) in the order of BIG
4853 endian at the head. Assigned the coding-system (Lisp symbol)
4854 `utf-16-be' by default.
4855
4856 o coding-category-utf-16-le
4857
4858 The category for a coding system in which a text has an
4859 Unicode signature (cf. Unicode Standard) in the order of
4860 LITTLE endian at the head. Assigned the coding-system (Lisp
4861 symbol) `utf-16-le' by default.
4862
1397dc18
KH
4863 o coding-category-ccl
4864
4865 The category for a coding system of which encoder/decoder is
4866 written in CCL programs. The default value is nil, i.e., no
4867 coding system is assigned.
4868
4ed46869
KH
4869 o coding-category-binary
4870
4871 The category for a coding system not categorized in any of the
4872 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4873 `no-conversion' by default.
4ed46869
KH
4874
4875 Each of them is a Lisp symbol and the value is an actual
df7492f9 4876 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4877 What Emacs does actually is to detect a category of coding system.
4878 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4879 decide only one possible category, it selects a category of the
4ed46869
KH
4880 highest priority. Priorities of categories are also specified by a
4881 user in a Lisp variable `coding-category-list'.
4882
4883*/
4884
df7492f9
KH
4885#define EOL_SEEN_NONE 0
4886#define EOL_SEEN_LF 1
4887#define EOL_SEEN_CR 2
4888#define EOL_SEEN_CRLF 4
4ed46869 4889
df7492f9
KH
4890/* Detect how end-of-line of a text of length CODING->src_bytes
4891 pointed by CODING->source is encoded. Return one of
4892 EOL_SEEN_XXX. */
4ed46869 4893
bc4bc72a
RS
4894#define MAX_EOL_CHECK_COUNT 3
4895
d46c5b12 4896static int
df7492f9
KH
4897detect_eol (coding, source, src_bytes)
4898 struct coding_system *coding;
d46c5b12 4899 unsigned char *source;
df7492f9 4900 EMACS_INT src_bytes;
4ed46869 4901{
df7492f9 4902 Lisp_Object attrs, coding_type;
d46c5b12 4903 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4904 unsigned char c;
df7492f9
KH
4905 int total = 0;
4906 int eol_seen = EOL_SEEN_NONE;
4ed46869 4907
df7492f9
KH
4908 attrs = CODING_ID_ATTRS (coding->id);
4909 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4910
df7492f9 4911 if (EQ (coding_type, Qccl))
4ed46869 4912 {
df7492f9 4913 int msb, lsb;
fa42c37f 4914
df7492f9
KH
4915 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4916 lsb = 1 - msb;
fa42c37f 4917
df7492f9 4918 while (src + 1 < src_end)
fa42c37f 4919 {
df7492f9
KH
4920 c = src[lsb];
4921 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4922 {
df7492f9
KH
4923 int this_eol;
4924
4925 if (c == '\n')
4926 this_eol = EOL_SEEN_LF;
4927 else if (src + 3 >= src_end
4928 || src[msb + 2] != 0
4929 || src[lsb + 2] != '\n')
4930 this_eol = EOL_SEEN_CR;
fa42c37f 4931 else
df7492f9
KH
4932 this_eol = EOL_SEEN_CRLF;
4933
4934 if (eol_seen == EOL_SEEN_NONE)
4935 /* This is the first end-of-line. */
4936 eol_seen = this_eol;
4937 else if (eol_seen != this_eol)
fa42c37f 4938 {
df7492f9
KH
4939 /* The found type is different from what found before. */
4940 eol_seen = EOL_SEEN_LF;
4941 break;
fa42c37f 4942 }
df7492f9
KH
4943 if (++total == MAX_EOL_CHECK_COUNT)
4944 break;
fa42c37f 4945 }
df7492f9 4946 src += 2;
fa42c37f 4947 }
df7492f9 4948 }
d46c5b12 4949 else
27901516 4950 {
df7492f9 4951 while (src < src_end)
27901516 4952 {
df7492f9
KH
4953 c = *src++;
4954 if (c == '\n' || c == '\r')
4955 {
4956 int this_eol;
d46c5b12 4957
df7492f9
KH
4958 if (c == '\n')
4959 this_eol = EOL_SEEN_LF;
4960 else if (src >= src_end || *src != '\n')
4961 this_eol = EOL_SEEN_CR;
4962 else
4963 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4964
df7492f9
KH
4965 if (eol_seen == EOL_SEEN_NONE)
4966 /* This is the first end-of-line. */
4967 eol_seen = this_eol;
4968 else if (eol_seen != this_eol)
4969 {
4970 /* The found type is different from what found before. */
4971 eol_seen = EOL_SEEN_LF;
4972 break;
4973 }
4974 if (++total == MAX_EOL_CHECK_COUNT)
4975 break;
4976 }
4977 }
73be902c 4978 }
df7492f9 4979 return eol_seen;
73be902c
KH
4980}
4981
df7492f9 4982
73be902c 4983static void
df7492f9
KH
4984adjust_coding_eol_type (coding, eol_seen)
4985 struct coding_system *coding;
4986 int eol_seen;
73be902c 4987{
0be8721c 4988 Lisp_Object eol_type;
df7492f9
KH
4989
4990 eol_type = CODING_ID_EOL_TYPE (coding->id);
4991 if (eol_seen & EOL_SEEN_LF)
4992 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6f197c07 4993 else if (eol_seen & EOL_SEEN_CRLF)
df7492f9 4994 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6f197c07 4995 else if (eol_seen & EOL_SEEN_CR)
df7492f9 4996 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
4997}
4998
df7492f9
KH
4999/* Detect how a text specified in CODING is encoded. If a coding
5000 system is detected, update fields of CODING by the detected coding
5001 system. */
5002
5003void
5004detect_coding (coding)
d46c5b12 5005 struct coding_system *coding;
d46c5b12 5006{
df7492f9
KH
5007 unsigned char *src, *src_end;
5008 Lisp_Object attrs, coding_type;
d46c5b12 5009
df7492f9
KH
5010 coding->consumed = coding->consumed_char = 0;
5011 coding->produced = coding->produced_char = 0;
5012 coding_set_source (coding);
1c3478b0 5013
df7492f9 5014 src_end = coding->source + coding->src_bytes;
1c3478b0 5015
df7492f9
KH
5016 /* If we have not yet decided the text encoding type, detect it
5017 now. */
5018 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5019 {
df7492f9
KH
5020 int mask = CATEGORY_MASK_ANY;
5021 int c, i;
5022
5023 for (src = coding->source; src < src_end; src++)
5024 {
5025 c = *src;
5026 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5027 || c == ISO_CODE_SI
5028 || c == ISO_CODE_SO)))
5029 break;
5030 }
5031 coding->head_ascii = src - (coding->source + coding->consumed);
5032
5033 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5034 {
df7492f9
KH
5035 int detected = 0;
5036
5037 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5038 {
df7492f9
KH
5039 enum coding_category category = coding_priorities[i];
5040 struct coding_system *this = coding_categories + category;
5041
5042 if (category >= coding_category_raw_text
5043 || detected & (1 << category))
5044 continue;
5045
5046 if (this->id < 0)
1c3478b0 5047 {
df7492f9
KH
5048 /* No coding system of this category is defined. */
5049 mask &= ~(1 << category);
5050 }
5051 else
5052 {
5053 detected |= detected_mask[category];
5054 if ((*(this->detector)) (coding, &mask))
5055 break;
1c3478b0
KH
5056 }
5057 }
df7492f9
KH
5058 if (! mask)
5059 setup_coding_system (Qraw_text, coding);
5060 else if (mask != CATEGORY_MASK_ANY)
5061 for (i = 0; i < coding_category_raw_text; i++)
5062 {
5063 enum coding_category category = coding_priorities[i];
5064 struct coding_system *this = coding_categories + category;
5065
5066 if (mask & (1 << category))
5067 {
5068 setup_coding_system (CODING_ID_NAME (this->id), coding);
5069 break;
5070 }
5071 }
1c3478b0 5072 }
b73bfc1c 5073 }
69f76525 5074
df7492f9
KH
5075 attrs = CODING_ID_ATTRS (coding->id);
5076 coding_type = CODING_ATTR_TYPE (attrs);
5077
5078 /* If we have not yet decided the EOL type, detect it now. But, the
5079 detection is impossible for a CCL based coding system, in which
5080 case, we detct the EOL type after decoding. */
5081 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5082 && ! EQ (coding_type, Qccl))
d46c5b12 5083 {
df7492f9
KH
5084 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5085
5086 if (eol_seen != EOL_SEEN_NONE)
5087 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5088 }
4ed46869
KH
5089}
5090
aaaf0b1e
KH
5091
5092static void
df7492f9 5093decode_eol (coding)
aaaf0b1e 5094 struct coding_system *coding;
aaaf0b1e 5095{
df7492f9 5096 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5097 {
df7492f9
KH
5098 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5099 unsigned char *pend = p + coding->produced;
5100 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5101
df7492f9 5102 for (; p < pend; p++)
aaaf0b1e 5103 {
df7492f9
KH
5104 if (*p == '\n')
5105 eol_seen |= EOL_SEEN_LF;
5106 else if (*p == '\r')
aaaf0b1e 5107 {
df7492f9 5108 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5109 {
df7492f9
KH
5110 eol_seen |= EOL_SEEN_CRLF;
5111 p++;
aaaf0b1e 5112 }
aaaf0b1e 5113 else
df7492f9 5114 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5115 }
aaaf0b1e 5116 }
df7492f9
KH
5117 if (eol_seen != EOL_SEEN_NONE)
5118 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5119 }
aaaf0b1e 5120
df7492f9
KH
5121 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5122 {
5123 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5124 unsigned char *pend = p + coding->produced;
5125
5126 for (; p < pend; p++)
5127 if (*p == '\r')
5128 *p = '\n';
5129 }
5130 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5131 {
5132 unsigned char *p, *pbeg, *pend;
5133 Lisp_Object undo_list;
5134
5135 move_gap_both (coding->dst_pos + coding->produced_char,
5136 coding->dst_pos_byte + coding->produced);
5137 undo_list = current_buffer->undo_list;
5138 current_buffer->undo_list = Qt;
c197f191 5139 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5140 current_buffer->undo_list = undo_list;
5141 pbeg = GPT_ADDR;
5142 pend = pbeg + coding->produced;
5143
5144 for (p = pend - 1; p >= pbeg; p--)
5145 if (*p == '\r')
5146 {
5147 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5148 pend--;
5149 }
5150 coding->produced_char -= coding->produced - (pend - pbeg);
5151 coding->produced = pend - pbeg;
5152 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5153 }
5154}
5155
df7492f9
KH
5156static void
5157translate_chars (coding, table)
4ed46869 5158 struct coding_system *coding;
df7492f9 5159 Lisp_Object table;
4ed46869 5160{
df7492f9
KH
5161 int *charbuf = coding->charbuf;
5162 int *charbuf_end = charbuf + coding->charbuf_used;
5163 int c;
5164
5165 if (coding->chars_at_source)
5166 return;
4ed46869 5167
df7492f9 5168 while (charbuf < charbuf_end)
8844fa83 5169 {
df7492f9
KH
5170 c = *charbuf;
5171 if (c < 0)
5172 charbuf += c;
5173 else
5174 *charbuf++ = translate_char (table, c);
8844fa83 5175 }
df7492f9 5176}
4ed46869 5177
df7492f9
KH
5178static int
5179produce_chars (coding)
5180 struct coding_system *coding;
5181{
5182 unsigned char *dst = coding->destination + coding->produced;
5183 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5184 int produced;
5185 int produced_chars = 0;
b73bfc1c 5186
df7492f9 5187 if (! coding->chars_at_source)
4ed46869 5188 {
df7492f9
KH
5189 /* Characters are in coding->charbuf. */
5190 int *buf = coding->charbuf;
5191 int *buf_end = buf + coding->charbuf_used;
5192 unsigned char *adjusted_dst_end;
4ed46869 5193
df7492f9
KH
5194 if (BUFFERP (coding->src_object)
5195 && EQ (coding->src_object, coding->dst_object))
5196 dst_end = coding->source + coding->consumed;
5197 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5198
df7492f9
KH
5199 while (buf < buf_end)
5200 {
5201 int c = *buf++;
5202
5203 if (dst >= adjusted_dst_end)
5204 {
5205 dst = alloc_destination (coding,
5206 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5207 dst);
5208 dst_end = coding->destination + coding->dst_bytes;
5209 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5210 }
5211 if (c >= 0)
5212 {
5213 if (coding->dst_multibyte
5214 || ! CHAR_BYTE8_P (c))
5215 CHAR_STRING_ADVANCE (c, dst);
5216 else
5217 *dst++ = CHAR_TO_BYTE8 (c);
5218 produced_chars++;
5219 }
5220 else
5221 /* This is an annotation data. */
5222 buf -= c + 1;
5223 }
5224 }
5225 else
5226 {
df7492f9
KH
5227 unsigned char *src = coding->source;
5228 unsigned char *src_end = src + coding->src_bytes;
5229 Lisp_Object eol_type;
b73bfc1c 5230
df7492f9 5231 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5232
df7492f9 5233 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5234 {
df7492f9
KH
5235 if (coding->src_multibyte)
5236 {
71c81426 5237 int multibytep = 1;
df7492f9 5238 int consumed_chars;
d46c5b12 5239
df7492f9
KH
5240 while (1)
5241 {
5242 unsigned char *src_base = src;
5243 int c;
b73bfc1c 5244
df7492f9
KH
5245 ONE_MORE_BYTE (c);
5246 if (c == '\r')
5247 {
5248 if (EQ (eol_type, Qdos))
5249 {
5250 if (src < src_end
5251 && *src == '\n')
5252 c = *src++;
5253 }
5254 else if (EQ (eol_type, Qmac))
5255 c = '\n';
5256 }
5257 if (dst == dst_end)
5258 {
2c78b7e1 5259 coding->consumed = src - coding->source;
b73bfc1c 5260
2c78b7e1
KH
5261 if (EQ (coding->src_object, coding->dst_object))
5262 dst_end = src;
5263 if (dst == dst_end)
5264 {
5265 dst = alloc_destination (coding, src_end - src + 1,
5266 dst);
5267 dst_end = coding->destination + coding->dst_bytes;
5268 coding_set_source (coding);
5269 src = coding->source + coding->consumed;
5270 src_end = coding->source + coding->src_bytes;
5271 }
df7492f9
KH
5272 }
5273 *dst++ = c;
5274 produced_chars++;
5275 }
5276 no_more_source:
5277 ;
5278 }
5279 else
5280 while (src < src_end)
5281 {
71c81426 5282 int multibytep = 1;
df7492f9 5283 int c = *src++;
b73bfc1c 5284
df7492f9
KH
5285 if (c == '\r')
5286 {
5287 if (EQ (eol_type, Qdos))
5288 {
5289 if (src < src_end
5290 && *src == '\n')
5291 c = *src++;
5292 }
5293 else if (EQ (eol_type, Qmac))
5294 c = '\n';
5295 }
5296 if (dst >= dst_end - 1)
5297 {
2c78b7e1 5298 coding->consumed = src - coding->source;
df7492f9 5299
2c78b7e1
KH
5300 if (EQ (coding->src_object, coding->dst_object))
5301 dst_end = src;
5302 if (dst >= dst_end - 1)
5303 {
5304 dst = alloc_destination (coding, src_end - src + 2,
5305 dst);
5306 dst_end = coding->destination + coding->dst_bytes;
5307 coding_set_source (coding);
5308 src = coding->source + coding->consumed;
5309 src_end = coding->source + coding->src_bytes;
5310 }
df7492f9
KH
5311 }
5312 EMIT_ONE_BYTE (c);
5313 }
d46c5b12 5314 }
df7492f9
KH
5315 else
5316 {
5317 if (!EQ (coding->src_object, coding->dst_object))
5318 {
5319 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5320
df7492f9
KH
5321 if (require > 0)
5322 {
5323 EMACS_INT offset = src - coding->source;
5324
5325 dst = alloc_destination (coding, require, dst);
5326 coding_set_source (coding);
5327 src = coding->source + offset;
5328 src_end = coding->source + coding->src_bytes;
5329 }
5330 }
5331 produced_chars = coding->src_chars;
5332 while (src < src_end)
5333 {
5334 int c = *src++;
5335
5336 if (c == '\r')
5337 {
5338 if (EQ (eol_type, Qdos))
5339 {
5340 if (src < src_end
5341 && *src == '\n')
5342 c = *src++;
5343 produced_chars--;
5344 }
5345 else if (EQ (eol_type, Qmac))
5346 c = '\n';
5347 }
5348 *dst++ = c;
5349 }
5350 }
2c78b7e1
KH
5351 coding->consumed = coding->src_bytes;
5352 coding->consumed_char = coding->src_chars;
b73bfc1c 5353 }
4ed46869 5354
df7492f9
KH
5355 produced = dst - (coding->destination + coding->produced);
5356 if (BUFFERP (coding->dst_object))
5357 insert_from_gap (produced_chars, produced);
5358 coding->produced += produced;
5359 coding->produced_char += produced_chars;
5360 return produced_chars;
b73bfc1c 5361}
52d41803 5362
df7492f9
KH
5363/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5364 or
5365 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5366 */
4ed46869 5367
df7492f9
KH
5368static INLINE void
5369produce_composition (coding, charbuf)
4ed46869 5370 struct coding_system *coding;
df7492f9 5371 int *charbuf;
4ed46869 5372{
df7492f9
KH
5373 Lisp_Object buffer;
5374 int len;
5375 EMACS_INT pos;
5376 enum composition_method method;
5377 int cmp_len;
5378 Lisp_Object components;
5379
5380 buffer = coding->dst_object;
5381 len = -charbuf[0];
5382 pos = coding->dst_pos + charbuf[1];
5383 method = (enum composition_method) (charbuf[3]);
5384 cmp_len = charbuf[4];
5385
5386 if (method == COMPOSITION_RELATIVE)
5387 components = Qnil;
5388 else
d46c5b12 5389 {
df7492f9
KH
5390 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5391 int i;
4ed46869 5392
df7492f9
KH
5393 len -= 5;
5394 charbuf += 5;
5395 for (i = 0; i < len; i++)
5396 args[i] = make_number (charbuf[i]);
5397 components = (method == COMPOSITION_WITH_ALTCHARS
5398 ? Fstring (len, args) : Fvector (len, args));
5399 }
5400 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5401}
b73bfc1c 5402
df7492f9
KH
5403static int *
5404save_composition_data (buf, buf_end, prop)
5405 int *buf, *buf_end;
5406 Lisp_Object prop;
5407{
5408 enum composition_method method = COMPOSITION_METHOD (prop);
5409 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5410
df7492f9
KH
5411 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5412 return NULL;
d46c5b12 5413
df7492f9
KH
5414 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5415 buf[2] = method;
5416 buf[3] = cmp_len;
b73bfc1c 5417
df7492f9
KH
5418 if (method == COMPOSITION_RELATIVE)
5419 buf[0] = 4;
5420 else
b73bfc1c 5421 {
df7492f9
KH
5422 Lisp_Object components;
5423 int len, i;
b73bfc1c 5424
df7492f9
KH
5425 components = COMPOSITION_COMPONENTS (prop);
5426 if (VECTORP (components))
d46c5b12 5427 {
df7492f9
KH
5428 len = XVECTOR (components)->size;
5429 for (i = 0; i < len; i++)
5430 buf[4 + i] = XINT (AREF (components, i));
5431 }
5432 else if (STRINGP (components))
5433 {
5434 int i_byte;
b73bfc1c 5435
df7492f9
KH
5436 len = XSTRING (components)->size;
5437 i = i_byte = 0;
5438 while (i < len)
5439 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5440 }
5441 else if (INTEGERP (components))
5442 {
5443 len = 1;
5444 buf[4] = XINT (components);
5445 }
5446 else if (CONSP (components))
5447 {
5448 for (len = 0; CONSP (components);
5449 len++, components = XCDR (components))
5450 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5451 }
df7492f9
KH
5452 else
5453 abort ();
5454 buf[0] = 4 + len;
4ed46869 5455 }
df7492f9 5456 return (buf + buf[0]);
4ed46869
KH
5457}
5458
df7492f9
KH
5459#define CHARBUF_SIZE 0x4000
5460
5461#define ALLOC_CONVERSION_WORK_AREA(coding) \
5462 do { \
5463 int size = CHARBUF_SIZE;; \
5464 \
5465 coding->charbuf = NULL; \
5466 while (size > 1024) \
5467 { \
5468 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5469 if (coding->charbuf) \
5470 break; \
5471 size >>= 1; \
5472 } \
5473 if (! coding->charbuf) \
5474 { \
5475 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5476 return coding->result; \
5477 } \
5478 coding->charbuf_size = size; \
5479 } while (0)
4ed46869 5480
d46c5b12
KH
5481
5482static void
df7492f9 5483produce_annotation (coding)
d46c5b12 5484 struct coding_system *coding;
d46c5b12 5485{
df7492f9
KH
5486 int *charbuf = coding->charbuf;
5487 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5488
df7492f9 5489 while (charbuf < charbuf_end)
d46c5b12 5490 {
df7492f9
KH
5491 if (*charbuf >= 0)
5492 charbuf++;
d46c5b12 5493 else
d46c5b12 5494 {
df7492f9
KH
5495 int len = -*charbuf;
5496 switch (charbuf[2])
5497 {
5498 case CODING_ANNOTATE_COMPOSITION_MASK:
5499 produce_composition (coding, charbuf);
5500 break;
5501 default:
5502 abort ();
5503 }
5504 charbuf += len;
d46c5b12 5505 }
df7492f9
KH
5506 }
5507}
d46c5b12 5508
df7492f9
KH
5509/* Decode the data at CODING->src_object into CODING->dst_object.
5510 CODING->src_object is a buffer, a string, or nil.
5511 CODING->dst_object is a buffer.
de79a6a5 5512
df7492f9
KH
5513 If CODING->src_object is a buffer, it must be the current buffer.
5514 In this case, if CODING->src_pos is positive, it is a position of
5515 the source text in the buffer, otherwise, the source text is in the
5516 gap area of the buffer, and CODING->src_pos specifies the offset of
5517 the text from GPT (which must be the same as PT). If this is the
5518 same buffer as CODING->dst_object, CODING->src_pos must be
5519 negative.
b73bfc1c 5520
df7492f9
KH
5521 If CODING->src_object is a string, CODING->src_pos in an index to
5522 that string.
d46c5b12 5523
df7492f9
KH
5524 If CODING->src_object is nil, CODING->source must already point to
5525 the non-relocatable memory area. In this case, CODING->src_pos is
5526 an offset from CODING->source.
d46c5b12 5527
df7492f9
KH
5528 The decoded data is inserted at the current point of the buffer
5529 CODING->dst_object.
5530*/
5531
5532static int
5533decode_coding (coding)
d46c5b12 5534 struct coding_system *coding;
d46c5b12 5535{
df7492f9 5536 Lisp_Object attrs;
d46c5b12 5537
df7492f9
KH
5538 if (BUFFERP (coding->src_object)
5539 && coding->src_pos > 0
5540 && coding->src_pos < GPT
5541 && coding->src_pos + coding->src_chars > GPT)
5542 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5543
df7492f9 5544 if (BUFFERP (coding->dst_object))
88993dfd 5545 {
df7492f9
KH
5546 if (current_buffer != XBUFFER (coding->dst_object))
5547 set_buffer_internal (XBUFFER (coding->dst_object));
5548 if (GPT != PT)
5549 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5550 }
5551
df7492f9
KH
5552 coding->consumed = coding->consumed_char = 0;
5553 coding->produced = coding->produced_char = 0;
5554 coding->chars_at_source = 0;
5555 coding->result = CODING_RESULT_SUCCESS;
5556 coding->errors = 0;
5557
5558 ALLOC_CONVERSION_WORK_AREA (coding);
5559
5560 attrs = CODING_ID_ATTRS (coding->id);
5561
5562 do
d46c5b12 5563 {
df7492f9
KH
5564 coding_set_source (coding);
5565 coding->annotated = 0;
5566 (*(coding->decoder)) (coding);
5567 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5568 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5569 coding_set_destination (coding);
5570 produce_chars (coding);
5571 if (coding->annotated)
5572 produce_annotation (coding);
d46c5b12 5573 }
df7492f9
KH
5574 while (coding->consumed < coding->src_bytes
5575 && ! coding->result);
d46c5b12 5576
df7492f9
KH
5577 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5578 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5579 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5580 decode_eol (coding);
d46c5b12 5581
df7492f9
KH
5582 coding->carryover_bytes = 0;
5583 if (coding->consumed < coding->src_bytes)
d46c5b12 5584 {
df7492f9
KH
5585 int nbytes = coding->src_bytes - coding->consumed;
5586 unsigned char *src;
5587
5588 coding_set_source (coding);
5589 coding_set_destination (coding);
5590 src = coding->source + coding->consumed;
5591
5592 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5593 {
df7492f9
KH
5594 /* Flush out unprocessed data as binary chars. We are sure
5595 that the number of data is less than the size of
5596 coding->charbuf. */
5597 int *charbuf = coding->charbuf;
5598
5599 while (nbytes-- > 0)
d46c5b12 5600 {
df7492f9
KH
5601 int c = *src++;
5602 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5603 }
df7492f9 5604 produce_chars (coding);
d46c5b12 5605 }
d46c5b12 5606 else
df7492f9
KH
5607 {
5608 /* Record unprocessed bytes in coding->carryover. We are
5609 sure that the number of data is less than the size of
5610 coding->carryover. */
5611 unsigned char *p = coding->carryover;
5612
5613 coding->carryover_bytes = nbytes;
5614 while (nbytes-- > 0)
5615 *p++ = *src++;
5616 }
5617 coding->consumed = coding->src_bytes;
5618 }
b73bfc1c 5619
df7492f9 5620 return coding->result;
d46c5b12
KH
5621}
5622
df7492f9
KH
5623static void
5624consume_chars (coding)
5625 struct coding_system *coding;
5626{
5627 int *buf = coding->charbuf;
5628 /* -1 is to compensate for CRLF. */
5629 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
7c78e542 5630 const unsigned char *src = coding->source + coding->consumed;
df7492f9
KH
5631 int pos = coding->src_pos + coding->consumed_char;
5632 int end_pos = coding->src_pos + coding->src_chars;
5633 int multibytep = coding->src_multibyte;
5634 Lisp_Object eol_type;
5635 int c;
5636 int start, end, stop;
5637 Lisp_Object object, prop;
88993dfd 5638
df7492f9
KH
5639 eol_type = CODING_ID_EOL_TYPE (coding->id);
5640 if (VECTORP (eol_type))
5641 eol_type = Qunix;
88993dfd 5642
df7492f9 5643 object = coding->src_object;
b843d1ae 5644
df7492f9
KH
5645 /* Note: composition handling is not yet implemented. */
5646 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5647
df7492f9
KH
5648 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5649 && find_composition (pos, end_pos, &start, &end, &prop, object)
5650 && end <= end_pos
5651 && (start >= pos
5652 || (find_composition (end, end_pos, &start, &end, &prop, object)
5653 && end <= end_pos)))
5654 stop = start;
5655 else
5656 stop = end_pos;
ec6d2bb8 5657
df7492f9 5658 while (buf < buf_end)
ec6d2bb8 5659 {
df7492f9 5660 if (pos == stop)
ec6d2bb8 5661 {
df7492f9 5662 int *p;
ec6d2bb8 5663
df7492f9
KH
5664 if (pos == end_pos)
5665 break;
5666 p = save_composition_data (buf, buf_end, prop);
5667 if (p == NULL)
5668 break;
5669 buf = p;
5670 if (find_composition (end, end_pos, &start, &end, &prop, object)
5671 && end <= end_pos)
5672 stop = start;
5673 else
5674 stop = end_pos;
5675 }
5676
5677 if (! multibytep)
5678 c = *src++;
5679 else
5680 c = STRING_CHAR_ADVANCE (src);
5681 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5682 c = '\n';
5683 if (! EQ (eol_type, Qunix))
5684 {
5685 if (c == '\n')
5686 {
5687 if (EQ (eol_type, Qdos))
5688 *buf++ = '\r';
5689 else
5690 c = '\r';
ec6d2bb8 5691 }
ec6d2bb8 5692 }
df7492f9
KH
5693 *buf++ = c;
5694 pos++;
ec6d2bb8 5695 }
ec6d2bb8 5696
df7492f9
KH
5697 coding->consumed = src - coding->source;
5698 coding->consumed_char = pos - coding->src_pos;
5699 coding->charbuf_used = buf - coding->charbuf;
5700 coding->chars_at_source = 0;
ec6d2bb8
KH
5701}
5702
ec6d2bb8 5703
df7492f9
KH
5704/* Encode the text at CODING->src_object into CODING->dst_object.
5705 CODING->src_object is a buffer or a string.
5706 CODING->dst_object is a buffer or nil.
5707
5708 If CODING->src_object is a buffer, it must be the current buffer.
5709 In this case, if CODING->src_pos is positive, it is a position of
5710 the source text in the buffer, otherwise. the source text is in the
5711 gap area of the buffer, and coding->src_pos specifies the offset of
5712 the text from GPT (which must be the same as PT). If this is the
5713 same buffer as CODING->dst_object, CODING->src_pos must be
5714 negative and CODING should not have `pre-write-conversion'.
5715
5716 If CODING->src_object is a string, CODING should not have
5717 `pre-write-conversion'.
5718
5719 If CODING->dst_object is a buffer, the encoded data is inserted at
5720 the current point of that buffer.
5721
5722 If CODING->dst_object is nil, the encoded data is placed at the
5723 memory area specified by CODING->destination. */
5724
5725static int
5726encode_coding (coding)
ec6d2bb8 5727 struct coding_system *coding;
ec6d2bb8 5728{
df7492f9 5729 Lisp_Object attrs;
ec6d2bb8 5730
df7492f9 5731 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5732
df7492f9 5733 if (BUFFERP (coding->dst_object))
ec6d2bb8 5734 {
df7492f9
KH
5735 set_buffer_internal (XBUFFER (coding->dst_object));
5736 coding->dst_multibyte
5737 = ! NILP (current_buffer->enable_multibyte_characters);
5738 }
ec6d2bb8 5739
df7492f9
KH
5740 coding->consumed = coding->consumed_char = 0;
5741 coding->produced = coding->produced_char = 0;
5742 coding->result = CODING_RESULT_SUCCESS;
5743 coding->errors = 0;
ec6d2bb8 5744
df7492f9 5745 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5746
df7492f9
KH
5747 do {
5748 coding_set_source (coding);
5749 consume_chars (coding);
5750
5751 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5752 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5753
5754 coding_set_destination (coding);
5755 (*(coding->encoder)) (coding);
5756 } while (coding->consumed_char < coding->src_chars);
5757
5758 if (BUFFERP (coding->dst_object))
5759 insert_from_gap (coding->produced_char, coding->produced);
5760
5761 return (coding->result);
ec6d2bb8
KH
5762}
5763
df7492f9 5764/* Work buffer */
fb88bf2d 5765
df7492f9
KH
5766/* List of currently used working buffer. */
5767Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5768
df7492f9
KH
5769/* A working buffer used by the top level conversion. */
5770Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5771
4ed46869 5772
df7492f9
KH
5773/* Return a working buffer that can be freely used by the following
5774 code conversion. MULTIBYTEP specifies the multibyteness of the
5775 buffer. */
b73bfc1c 5776
df7492f9
KH
5777Lisp_Object
5778make_conversion_work_buffer (multibytep)
5779 int multibytep;
5780{
5781 struct buffer *current = current_buffer;
5782 Lisp_Object buf;
d46c5b12 5783
df7492f9 5784 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5785 {
df7492f9
KH
5786 if (NILP (Vcode_conversion_reused_work_buf))
5787 Vcode_conversion_reused_work_buf
5788 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5789 Vcode_conversion_work_buf_list
5790 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5791 }
df7492f9 5792 else
d46c5b12 5793 {
c197f191 5794 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5795 char str[128];
e077cc80 5796
df7492f9
KH
5797 sprintf (str, " *code-conversion-work*<%d>", depth);
5798 Vcode_conversion_work_buf_list
5799 = Fcons (Fget_buffer_create (build_string (str)),
5800 Vcode_conversion_work_buf_list);
d46c5b12 5801 }
d46c5b12 5802
df7492f9
KH
5803 buf = XCAR (Vcode_conversion_work_buf_list);
5804 set_buffer_internal (XBUFFER (buf));
5805 current_buffer->undo_list = Qt;
5806 Ferase_buffer ();
9d123124 5807 Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
df7492f9
KH
5808 set_buffer_internal (current);
5809 return buf;
5810}
d46c5b12 5811
df7492f9 5812static struct coding_system *saved_coding;
d46c5b12 5813
df7492f9
KH
5814Lisp_Object
5815code_conversion_restore (info)
5816 Lisp_Object info;
5817{
c197f191 5818 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5819 Lisp_Object buf;
d46c5b12 5820
df7492f9 5821 if (depth > 0)
d46c5b12 5822 {
df7492f9
KH
5823 buf = XCAR (Vcode_conversion_work_buf_list);
5824 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5825 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5826 Fkill_buffer (buf);
5827 }
d46c5b12 5828
c197f191 5829 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
5830 && saved_coding->destination)
5831 xfree (saved_coding->destination);
b843d1ae 5832
df7492f9
KH
5833 return save_excursion_restore (info);
5834}
d46c5b12 5835
12410ef1 5836
df7492f9
KH
5837int
5838decode_coding_gap (coding, chars, bytes)
5839 struct coding_system *coding;
5840 EMACS_INT chars, bytes;
5841{
5842 int count = specpdl_ptr - specpdl;
fb88bf2d 5843
df7492f9
KH
5844 saved_coding = coding;
5845 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5846
df7492f9
KH
5847 coding->src_object = Fcurrent_buffer ();
5848 coding->src_chars = chars;
5849 coding->src_bytes = bytes;
5850 coding->src_pos = -chars;
5851 coding->src_pos_byte = -bytes;
5852 coding->src_multibyte = chars < bytes;
5853 coding->dst_object = coding->src_object;
5854 coding->dst_pos = PT;
5855 coding->dst_pos_byte = PT_BYTE;
71c81426 5856 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5857
df7492f9
KH
5858 if (CODING_REQUIRE_DETECTION (coding))
5859 detect_coding (coding);
5860
5861 decode_coding (coding);
d46c5b12 5862
df7492f9
KH
5863 unbind_to (count, Qnil);
5864 return coding->result;
5865}
d46c5b12 5866
df7492f9
KH
5867int
5868encode_coding_gap (coding, chars, bytes)
5869 struct coding_system *coding;
5870 EMACS_INT chars, bytes;
5871{
5872 int count = specpdl_ptr - specpdl;
5873 Lisp_Object buffer;
d46c5b12 5874
df7492f9
KH
5875 saved_coding = coding;
5876 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5877
df7492f9
KH
5878 buffer = Fcurrent_buffer ();
5879 coding->src_object = buffer;
5880 coding->src_chars = chars;
5881 coding->src_bytes = bytes;
5882 coding->src_pos = -chars;
5883 coding->src_pos_byte = -bytes;
5884 coding->src_multibyte = chars < bytes;
5885 coding->dst_object = coding->src_object;
5886 coding->dst_pos = PT;
5887 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5888
df7492f9 5889 encode_coding (coding);
f2558efd 5890
df7492f9
KH
5891 unbind_to (count, Qnil);
5892 return coding->result;
5893}
b73bfc1c 5894
d46c5b12 5895
df7492f9
KH
5896/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5897 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5898
df7492f9 5899 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5900
df7492f9
KH
5901 If it is a buffer, the text is at point of the buffer. FROM and TO
5902 are positions in the buffer.
ec6d2bb8 5903
df7492f9
KH
5904 If it is a string, the text is at the beginning of the string.
5905 FROM and TO are indices to the string.
ec6d2bb8 5906
df7492f9
KH
5907 If it is nil, the text is at coding->source. FROM and TO are
5908 indices to coding->source.
ec6d2bb8 5909
df7492f9 5910 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5911
df7492f9
KH
5912 If it is a buffer, the decoded text is inserted at point of the
5913 buffer. If the buffer is the same as SRC_OBJECT, the source text
5914 is deleted.
d46c5b12 5915
df7492f9
KH
5916 If it is Qt, a string is made from the decoded text, and
5917 set in CODING->dst_object.
d46c5b12 5918
df7492f9
KH
5919 If it is Qnil, the decoded text is stored at CODING->destination.
5920 The called must allocate CODING->dst_bytes bytes at
5921 CODING->destination by xmalloc. If the decoded text is longer than
5922 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5923 */
d46c5b12 5924
df7492f9
KH
5925void
5926decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5927 dst_object)
5928 struct coding_system *coding;
5929 Lisp_Object src_object;
5930 EMACS_INT from, from_byte, to, to_byte;
5931 Lisp_Object dst_object;
5932{
5933 int count = specpdl_ptr - specpdl;
5934 unsigned char *destination;
5935 EMACS_INT dst_bytes;
5936 EMACS_INT chars = to - from;
5937 EMACS_INT bytes = to_byte - from_byte;
5938 Lisp_Object attrs;
d46c5b12 5939
df7492f9
KH
5940 saved_coding = coding;
5941 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5942
df7492f9
KH
5943 if (NILP (dst_object))
5944 {
5945 destination = coding->destination;
5946 dst_bytes = coding->dst_bytes;
5947 }
93dec019 5948
df7492f9
KH
5949 coding->src_object = src_object;
5950 coding->src_chars = chars;
5951 coding->src_bytes = bytes;
5952 coding->src_multibyte = chars < bytes;
70ad9fc4 5953
df7492f9
KH
5954 if (STRINGP (src_object))
5955 {
5956 coding->src_pos = from;
5957 coding->src_pos_byte = from_byte;
5958 }
5959 else if (BUFFERP (src_object))
5960 {
5961 set_buffer_internal (XBUFFER (src_object));
5962 if (from != GPT)
5963 move_gap_both (from, from_byte);
5964 if (EQ (src_object, dst_object))
fb88bf2d 5965 {
df7492f9
KH
5966 TEMP_SET_PT_BOTH (from, from_byte);
5967 del_range_both (from, from_byte, to, to_byte, 1);
5968 coding->src_pos = -chars;
5969 coding->src_pos_byte = -bytes;
fb88bf2d 5970 }
df7492f9 5971 else
fb88bf2d 5972 {
df7492f9
KH
5973 coding->src_pos = from;
5974 coding->src_pos_byte = from_byte;
fb88bf2d 5975 }
d46c5b12 5976 }
fb88bf2d 5977
df7492f9
KH
5978 if (CODING_REQUIRE_DETECTION (coding))
5979 detect_coding (coding);
5980 attrs = CODING_ID_ATTRS (coding->id);
5981
5982 if (! NILP (CODING_ATTR_POST_READ (attrs))
5983 || EQ (dst_object, Qt))
b73bfc1c 5984 {
df7492f9
KH
5985 coding->dst_object = make_conversion_work_buffer (1);
5986 coding->dst_pos = BEG;
5987 coding->dst_pos_byte = BEG_BYTE;
5988 coding->dst_multibyte = 1;
b73bfc1c 5989 }
df7492f9 5990 else if (BUFFERP (dst_object))
12410ef1 5991 {
df7492f9
KH
5992 coding->dst_object = dst_object;
5993 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5994 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5995 coding->dst_multibyte
5996 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 5997 }
72d1a715 5998 else
df7492f9
KH
5999 {
6000 coding->dst_object = Qnil;
6001 coding->dst_multibyte = 1;
6002 }
6003
6004 decode_coding (coding);
4ed46869 6005
df7492f9
KH
6006 if (BUFFERP (coding->dst_object))
6007 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6008
df7492f9 6009 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6010 {
df7492f9
KH
6011 struct gcpro gcpro1, gcpro2;
6012 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6013 Lisp_Object val;
4ed46869 6014
c0cc7f7f 6015 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6016 GCPRO2 (coding->src_object, coding->dst_object);
6017 val = call1 (CODING_ATTR_POST_READ (attrs),
6018 make_number (coding->produced_char));
6019 UNGCPRO;
6020 CHECK_NATNUM (val);
6021 coding->produced_char += Z - prev_Z;
6022 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6023 }
4ed46869 6024
df7492f9 6025 if (EQ (dst_object, Qt))
ec6d2bb8 6026 {
df7492f9
KH
6027 coding->dst_object = Fbuffer_string ();
6028 }
6029 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6030 {
6031 set_buffer_internal (XBUFFER (coding->dst_object));
6032 if (dst_bytes < coding->produced)
6033 {
6034 destination
6035 = (unsigned char *) xrealloc (destination, coding->produced);
6036 if (! destination)
6037 {
6038 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6039 unbind_to (count, Qnil);
6040 return;
6041 }
6042 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6043 move_gap_both (BEGV, BEGV_BYTE);
6044 bcopy (BEGV_ADDR, destination, coding->produced);
6045 coding->destination = destination;
6046 }
ec6d2bb8 6047 }
2b4f9037 6048
df7492f9 6049 unbind_to (count, Qnil);
d46c5b12
KH
6050}
6051
df7492f9
KH
6052
6053void
6054encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6055 dst_object)
b73bfc1c 6056 struct coding_system *coding;
df7492f9
KH
6057 Lisp_Object src_object;
6058 EMACS_INT from, from_byte, to, to_byte;
6059 Lisp_Object dst_object;
b73bfc1c
KH
6060{
6061 int count = specpdl_ptr - specpdl;
df7492f9
KH
6062 EMACS_INT chars = to - from;
6063 EMACS_INT bytes = to_byte - from_byte;
6064 Lisp_Object attrs;
6065
6066 saved_coding = coding;
6067 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6068
6069 coding->src_object = src_object;
6070 coding->src_chars = chars;
6071 coding->src_bytes = bytes;
6072 coding->src_multibyte = chars < bytes;
6073
6074 attrs = CODING_ID_ATTRS (coding->id);
6075
6076 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6077 {
df7492f9
KH
6078 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6079 set_buffer_internal (XBUFFER (coding->src_object));
6080 if (STRINGP (src_object))
6081 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6082 else if (BUFFERP (src_object))
6083 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6084 else
6085 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6086
6087 if (EQ (src_object, dst_object))
6088 {
6089 set_buffer_internal (XBUFFER (src_object));
6090 del_range_both (from, from_byte, to, to_byte, 1);
6091 set_buffer_internal (XBUFFER (coding->src_object));
6092 }
6093
ac87bbef
KH
6094 call2 (CODING_ATTR_PRE_WRITE (attrs),
6095 make_number (BEG), make_number (Z));
6096 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6097 if (BEG != GPT)
6098 move_gap_both (BEG, BEG_BYTE);
6099 coding->src_chars = Z - BEG;
6100 coding->src_bytes = Z_BYTE - BEG_BYTE;
6101 coding->src_pos = BEG;
6102 coding->src_pos_byte = BEG_BYTE;
6103 coding->src_multibyte = Z < Z_BYTE;
6104 }
6105 else if (STRINGP (src_object))
6106 {
6107 coding->src_pos = from;
6108 coding->src_pos_byte = from_byte;
6109 }
6110 else if (BUFFERP (src_object))
d46c5b12 6111 {
df7492f9
KH
6112 set_buffer_internal (XBUFFER (src_object));
6113 if (from != GPT)
6114 move_gap_both (from, from_byte);
6115 if (EQ (src_object, dst_object))
d46c5b12 6116 {
df7492f9
KH
6117 del_range_both (from, from_byte, to, to_byte, 1);
6118 coding->src_pos = -chars;
6119 coding->src_pos_byte = -bytes;
d46c5b12 6120 }
df7492f9 6121 else
d46c5b12 6122 {
df7492f9
KH
6123 coding->src_pos = from;
6124 coding->src_pos_byte = from_byte;
d46c5b12
KH
6125 }
6126 }
4ed46869 6127
df7492f9 6128 if (BUFFERP (dst_object))
d46c5b12 6129 {
df7492f9 6130 coding->dst_object = dst_object;
28f67a95
KH
6131 if (EQ (src_object, dst_object))
6132 {
6133 coding->dst_pos = from;
6134 coding->dst_pos_byte = from_byte;
6135 }
6136 else
6137 {
6138 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6139 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6140 }
df7492f9
KH
6141 coding->dst_multibyte
6142 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6143 }
df7492f9 6144 else if (EQ (dst_object, Qt))
4956c225 6145 {
df7492f9 6146 coding->dst_object = Qnil;
df7492f9 6147 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6148 if (coding->dst_bytes == 0)
6149 coding->dst_bytes = 1;
6150 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6151 coding->dst_multibyte = 0;
4956c225 6152 }
df7492f9 6153 else
78108bcd 6154 {
df7492f9
KH
6155 coding->dst_object = Qnil;
6156 coding->dst_multibyte = 0;
78108bcd
KH
6157 }
6158
df7492f9 6159 encode_coding (coding);
4ed46869 6160
df7492f9 6161 if (EQ (dst_object, Qt))
4ed46869 6162 {
df7492f9
KH
6163 if (BUFFERP (coding->dst_object))
6164 coding->dst_object = Fbuffer_string ();
6165 else
73be902c 6166 {
df7492f9
KH
6167 coding->dst_object
6168 = make_unibyte_string ((char *) coding->destination,
6169 coding->produced);
6170 xfree (coding->destination);
73be902c 6171 }
4ed46869 6172 }
d46c5b12 6173
df7492f9 6174 unbind_to (count, Qnil);
b73bfc1c
KH
6175}
6176
df7492f9 6177
b73bfc1c 6178Lisp_Object
df7492f9 6179preferred_coding_system ()
b73bfc1c 6180{
df7492f9 6181 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6182
df7492f9 6183 return CODING_ID_NAME (id);
4ed46869
KH
6184}
6185
6186\f
6187#ifdef emacs
1397dc18 6188/*** 8. Emacs Lisp library functions ***/
4ed46869 6189
4ed46869 6190DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6191 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6192See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6193about coding-system objects. */)
6194 (obj)
4ed46869
KH
6195 Lisp_Object obj;
6196{
df7492f9 6197 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6198}
6199
9d991de8
RS
6200DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6201 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6202 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6203 (prompt)
4ed46869
KH
6204 Lisp_Object prompt;
6205{
e0e989f6 6206 Lisp_Object val;
9d991de8
RS
6207 do
6208 {
4608c386
KH
6209 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6210 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6211 }
6212 while (XSTRING (val)->size == 0);
e0e989f6 6213 return (Fintern (val, Qnil));
4ed46869
KH
6214}
6215
9b787f3e 6216DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6217 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6218If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6219 (prompt, default_coding_system)
9b787f3e 6220 Lisp_Object prompt, default_coding_system;
4ed46869 6221{
f44d27ce 6222 Lisp_Object val;
9b787f3e
RS
6223 if (SYMBOLP (default_coding_system))
6224 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6225 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6226 Qt, Qnil, Qcoding_system_history,
6227 default_coding_system, Qnil);
e0e989f6 6228 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6229}
6230
6231DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6232 1, 1, 0,
48b0f3ae
PJ
6233 doc: /* Check validity of CODING-SYSTEM.
6234If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6235It is valid if it is a symbol with a non-nil `coding-system' property.
6236The value of property should be a vector of length 5. */)
df7492f9 6237 (coding_system)
4ed46869
KH
6238 Lisp_Object coding_system;
6239{
b7826503 6240 CHECK_SYMBOL (coding_system);
4ed46869
KH
6241 if (!NILP (Fcoding_system_p (coding_system)))
6242 return coding_system;
6243 while (1)
02ba4723 6244 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6245}
df7492f9 6246
3a73fa5d 6247\f
d46c5b12 6248Lisp_Object
df7492f9 6249detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6250 unsigned char *src;
6251 int src_bytes, highest;
0a28aafb 6252 int multibytep;
df7492f9 6253 Lisp_Object coding_system;
4ed46869 6254{
df7492f9
KH
6255 unsigned char *src_end = src + src_bytes;
6256 int mask = CATEGORY_MASK_ANY;
6257 int detected = 0;
6258 int c, i;
6259 Lisp_Object attrs, eol_type;
6260 Lisp_Object val;
6261 struct coding_system coding;
6262
6263 if (NILP (coding_system))
6264 coding_system = Qundecided;
6265 setup_coding_system (coding_system, &coding);
6266 attrs = CODING_ID_ATTRS (coding.id);
6267 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6268
df7492f9
KH
6269 coding.source = src;
6270 coding.src_bytes = src_bytes;
6271 coding.src_multibyte = multibytep;
6272 coding.consumed = 0;
4ed46869 6273
df7492f9 6274 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6275 {
df7492f9 6276 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6277 }
df7492f9 6278 else
4ed46869 6279 {
df7492f9
KH
6280 coding_system = Qnil;
6281 for (; src < src_end; src++)
4ed46869 6282 {
df7492f9
KH
6283 c = *src;
6284 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6285 || c == ISO_CODE_SI
6286 || c == ISO_CODE_SO)))
d46c5b12 6287 break;
4ed46869 6288 }
df7492f9
KH
6289 coding.head_ascii = src - coding.source;
6290
6291 if (src < src_end)
6292 for (i = 0; i < coding_category_raw_text; i++)
6293 {
6294 enum coding_category category = coding_priorities[i];
6295 struct coding_system *this = coding_categories + category;
6296
6297 if (category >= coding_category_raw_text
6298 || detected & (1 << category))
6299 continue;
6300
6301 if (this->id < 0)
6302 {
6303 /* No coding system of this category is defined. */
6304 mask &= ~(1 << category);
6305 }
6306 else
6307 {
6308 detected |= detected_mask[category];
6309 if ((*(coding_categories[category].detector)) (&coding, &mask)
6310 && highest)
6311 {
6312 mask &= detected_mask[category];
6313 break;
6314 }
6315 }
6316 }
4ed46869 6317 }
4ed46869 6318
df7492f9
KH
6319 if (!mask)
6320 val = Fcons (make_number (coding_category_raw_text), Qnil);
6321 else if (mask == CATEGORY_MASK_ANY)
6322 val = Fcons (make_number (coding_category_undecided), Qnil);
6323 else if (highest)
4ed46869 6324 {
df7492f9
KH
6325 for (i = 0; i < coding_category_raw_text; i++)
6326 if (mask & (1 << coding_priorities[i]))
6327 {
6328 val = Fcons (make_number (coding_priorities[i]), Qnil);
6329 break;
6330 }
6331 }
6332 else
6333 {
6334 val = Qnil;
6335 for (i = coding_category_raw_text - 1; i >= 0; i--)
6336 if (mask & (1 << coding_priorities[i]))
6337 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6338 }
df7492f9
KH
6339
6340 {
6341 int one_byte_eol = -1, two_byte_eol = -1;
6342 Lisp_Object tail;
6343
6344 for (tail = val; CONSP (tail); tail = XCDR (tail))
6345 {
6346 struct coding_system *this
6347 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6348 : &coding);
6349 int this_eol;
6350
6351 attrs = CODING_ID_ATTRS (this->id);
6352 eol_type = CODING_ID_EOL_TYPE (this->id);
6353 XSETCAR (tail, CODING_ID_NAME (this->id));
6354 if (VECTORP (eol_type))
6355 {
6356 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6357 {
6358 if (two_byte_eol < 0)
6359 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6360 this_eol = two_byte_eol;
6361 }
6362 else
6363 {
6364 if (one_byte_eol < 0)
6365 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6366 this_eol = one_byte_eol;
6367 }
6368 if (this_eol == EOL_SEEN_LF)
6369 XSETCAR (tail, AREF (eol_type, 0));
6370 else if (this_eol == EOL_SEEN_CRLF)
6371 XSETCAR (tail, AREF (eol_type, 1));
6372 else if (this_eol == EOL_SEEN_CR)
6373 XSETCAR (tail, AREF (eol_type, 2));
6374 }
6375 }
6376 }
6377
03699b14 6378 return (highest ? XCAR (val) : val);
93dec019 6379}
4ed46869 6380
df7492f9 6381
d46c5b12
KH
6382DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6383 2, 3, 0,
48b0f3ae
PJ
6384 doc: /* Detect coding system of the text in the region between START and END.
6385Return a list of possible coding systems ordered by priority.
6386
6387If only ASCII characters are found, it returns a list of single element
6388`undecided' or its subsidiary coding system according to a detected
6389end-of-line format.
6390
6391If optional argument HIGHEST is non-nil, return the coding system of
6392highest priority. */)
6393 (start, end, highest)
d46c5b12
KH
6394 Lisp_Object start, end, highest;
6395{
6396 int from, to;
6397 int from_byte, to_byte;
6289dd10 6398
b7826503
PJ
6399 CHECK_NUMBER_COERCE_MARKER (start);
6400 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6401
d46c5b12
KH
6402 validate_region (&start, &end);
6403 from = XINT (start), to = XINT (end);
6404 from_byte = CHAR_TO_BYTE (from);
6405 to_byte = CHAR_TO_BYTE (to);
6289dd10 6406
d46c5b12
KH
6407 if (from < GPT && to >= GPT)
6408 move_gap_both (to, to_byte);
c210f766 6409
d46c5b12 6410 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6411 to_byte - from_byte,
0a28aafb
KH
6412 !NILP (highest),
6413 !NILP (current_buffer
df7492f9
KH
6414 ->enable_multibyte_characters),
6415 Qnil);
d46c5b12 6416}
6289dd10 6417
d46c5b12
KH
6418DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6419 1, 2, 0,
48b0f3ae
PJ
6420 doc: /* Detect coding system of the text in STRING.
6421Return a list of possible coding systems ordered by priority.
6422
6423If only ASCII characters are found, it returns a list of single element
6424`undecided' or its subsidiary coding system according to a detected
6425end-of-line format.
6426
6427If optional argument HIGHEST is non-nil, return the coding system of
6428highest priority. */)
6429 (string, highest)
d46c5b12
KH
6430 Lisp_Object string, highest;
6431{
b7826503 6432 CHECK_STRING (string);
4ed46869 6433
d46c5b12 6434 return detect_coding_system (XSTRING (string)->data,
df7492f9 6435 STRING_BYTES (XSTRING (string)),
0a28aafb 6436 !NILP (highest),
df7492f9
KH
6437 STRING_MULTIBYTE (string),
6438 Qnil);
4ed46869
KH
6439}
6440
05e6f5dc 6441
df7492f9
KH
6442static INLINE int
6443char_encodable_p (c, attrs)
6444 int c;
6445 Lisp_Object attrs;
05e6f5dc 6446{
df7492f9 6447 Lisp_Object tail;
df7492f9 6448 struct charset *charset;
05e6f5dc 6449
df7492f9
KH
6450 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6451 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6452 {
df7492f9
KH
6453 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6454 if (CHAR_CHARSET_P (c, charset))
6455 break;
05e6f5dc 6456 }
df7492f9 6457 return (! NILP (tail));
05e6f5dc
KH
6458}
6459
6460
df7492f9
KH
6461/* Return a list of coding systems that safely encode the text between
6462 START and END. If EXCLUDE is non-nil, it is a list of coding
6463 systems not to check. The returned list doesn't contain any such
48468dac 6464 coding systems. In any case, if the text contains only ASCII or is
df7492f9
KH
6465 unibyte, return t. */
6466
6467DEFUN ("find-coding-systems-region-internal",
6468 Ffind_coding_systems_region_internal,
6469 Sfind_coding_systems_region_internal, 2, 3, 0,
6470 doc: /* Internal use only. */)
6471 (start, end, exclude)
6472 Lisp_Object start, end, exclude;
6473{
6474 Lisp_Object coding_attrs_list, safe_codings;
6475 EMACS_INT start_byte, end_byte;
7c78e542 6476 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6477 int c;
6478 Lisp_Object tail, elt;
05e6f5dc 6479
df7492f9
KH
6480 if (STRINGP (start))
6481 {
6482 if (!STRING_MULTIBYTE (start)
48468dac 6483 || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
df7492f9
KH
6484 return Qt;
6485 start_byte = 0;
6486 end_byte = STRING_BYTES (XSTRING (start));
6487 }
6488 else
6489 {
6490 CHECK_NUMBER_COERCE_MARKER (start);
6491 CHECK_NUMBER_COERCE_MARKER (end);
6492 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6493 args_out_of_range (start, end);
6494 if (NILP (current_buffer->enable_multibyte_characters))
6495 return Qt;
6496 start_byte = CHAR_TO_BYTE (XINT (start));
6497 end_byte = CHAR_TO_BYTE (XINT (end));
6498 if (XINT (end) - XINT (start) == end_byte - start_byte)
6499 return Qt;
05e6f5dc 6500
df7492f9
KH
6501 if (start < GPT && end > GPT)
6502 {
6503 if ((GPT - start) < (end - GPT))
6504 move_gap_both (start, start_byte);
6505 else
6506 move_gap_both (end, end_byte);
6507 }
6508 }
05e6f5dc 6509
df7492f9
KH
6510 coding_attrs_list = Qnil;
6511 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6512 if (NILP (exclude)
6513 || NILP (Fmemq (XCAR (tail), exclude)))
6514 {
6515 Lisp_Object attrs;
05e6f5dc 6516
df7492f9
KH
6517 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6518 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6519 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6520 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6521 }
6522
6523 if (STRINGP (start))
6524 p = pbeg = XSTRING (start)->data;
6525 else
6526 p = pbeg = BYTE_POS_ADDR (start_byte);
6527 pend = p + (end_byte - start_byte);
6528
6529 while (p < pend && ASCII_BYTE_P (*p)) p++;
6530 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6531
6532 while (p < pend)
6533 {
df7492f9
KH
6534 if (ASCII_BYTE_P (*p))
6535 p++;
6536 else
6537 {
6538 c = STRING_CHAR_ADVANCE (p);
6539
6540 charset_map_loaded = 0;
6541 for (tail = coding_attrs_list; CONSP (tail);)
6542 {
6543 elt = XCAR (tail);
6544 if (NILP (elt))
6545 tail = XCDR (tail);
6546 else if (char_encodable_p (c, elt))
6547 tail = XCDR (tail);
6548 else if (CONSP (XCDR (tail)))
6549 {
6550 XSETCAR (tail, XCAR (XCDR (tail)));
6551 XSETCDR (tail, XCDR (XCDR (tail)));
6552 }
6553 else
6554 {
6555 XSETCAR (tail, Qnil);
6556 tail = XCDR (tail);
6557 }
6558 }
6559 if (charset_map_loaded)
6560 {
6561 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6562
df7492f9
KH
6563 if (STRINGP (start))
6564 pbeg = XSTRING (start)->data;
6565 else
6566 pbeg = BYTE_POS_ADDR (start_byte);
6567 p = pbeg + p_offset;
6568 pend = pbeg + pend_offset;
6569 }
6570 }
05e6f5dc 6571 }
df7492f9
KH
6572
6573 safe_codings = Qnil;
6574 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6575 if (! NILP (XCAR (tail)))
6576 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6577
05e6f5dc
KH
6578 return safe_codings;
6579}
6580
6581
df7492f9
KH
6582DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6583 Scheck_coding_systems_region, 3, 3, 0,
6584 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6585
df7492f9
KH
6586START and END are buffer positions specifying the region.
6587CODING-SYSTEM-LIST is a list of coding systems to check.
6588
6589The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6590CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6591whole region, POS0, POS1, ... are buffer positions where non-encodable
6592characters are found.
6593
6594If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6595value is nil.
6596
6597START may be a string. In that case, check if the string is
6598encodable, and the value contains indices to the string instead of
6599buffer positions. END is ignored. */)
6600 (start, end, coding_system_list)
6601 Lisp_Object start, end, coding_system_list;
05e6f5dc 6602{
df7492f9
KH
6603 Lisp_Object list;
6604 EMACS_INT start_byte, end_byte;
6605 int pos;
7c78e542 6606 const unsigned char *p, *pbeg, *pend;
df7492f9
KH
6607 int c;
6608 Lisp_Object tail, elt;
05e6f5dc
KH
6609
6610 if (STRINGP (start))
6611 {
df7492f9
KH
6612 if (!STRING_MULTIBYTE (start)
6613 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6614 return Qnil;
6615 start_byte = 0;
6616 end_byte = STRING_BYTES (XSTRING (start));
6617 pos = 0;
05e6f5dc
KH
6618 }
6619 else
6620 {
b7826503
PJ
6621 CHECK_NUMBER_COERCE_MARKER (start);
6622 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6623 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6624 args_out_of_range (start, end);
6625 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6626 return Qnil;
6627 start_byte = CHAR_TO_BYTE (XINT (start));
6628 end_byte = CHAR_TO_BYTE (XINT (end));
6629 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6630 return Qt;
df7492f9
KH
6631
6632 if (start < GPT && end > GPT)
6633 {
6634 if ((GPT - start) < (end - GPT))
6635 move_gap_both (start, start_byte);
6636 else
6637 move_gap_both (end, end_byte);
6638 }
6639 pos = start;
6640 }
6641
6642 list = Qnil;
6643 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6644 {
6645 elt = XCAR (tail);
6646 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6647 Qnil)),
6648 list);
05e6f5dc
KH
6649 }
6650
df7492f9
KH
6651 if (STRINGP (start))
6652 p = pbeg = XSTRING (start)->data;
6653 else
6654 p = pbeg = BYTE_POS_ADDR (start_byte);
6655 pend = p + (end_byte - start_byte);
6656
6657 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6658 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6659
6660 while (p < pend)
05e6f5dc 6661 {
df7492f9
KH
6662 if (ASCII_BYTE_P (*p))
6663 p++;
6664 else
05e6f5dc 6665 {
df7492f9
KH
6666 c = STRING_CHAR_ADVANCE (p);
6667
6668 charset_map_loaded = 0;
6669 for (tail = list; CONSP (tail); tail = XCDR (tail))
6670 {
6671 elt = XCDR (XCAR (tail));
6672 if (! char_encodable_p (c, XCAR (elt)))
6673 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6674 }
6675 if (charset_map_loaded)
6676 {
6677 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6678
6679 if (STRINGP (start))
6680 pbeg = XSTRING (start)->data;
6681 else
6682 pbeg = BYTE_POS_ADDR (start_byte);
6683 p = pbeg + p_offset;
6684 pend = pbeg + pend_offset;
6685 }
05e6f5dc 6686 }
df7492f9 6687 pos++;
05e6f5dc
KH
6688 }
6689
df7492f9
KH
6690 tail = list;
6691 list = Qnil;
6692 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6693 {
df7492f9
KH
6694 elt = XCAR (tail);
6695 if (CONSP (XCDR (XCDR (elt))))
6696 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6697 list);
05e6f5dc 6698 }
df7492f9
KH
6699
6700 return list;
05e6f5dc
KH
6701}
6702
6703
df7492f9 6704
4031e2bf 6705Lisp_Object
df7492f9
KH
6706code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6707 Lisp_Object start, end, coding_system, dst_object;
6708 int encodep, norecord;
3a73fa5d
RS
6709{
6710 struct coding_system coding;
df7492f9
KH
6711 EMACS_INT from, from_byte, to, to_byte;
6712 Lisp_Object src_object;
3a73fa5d 6713
b7826503
PJ
6714 CHECK_NUMBER_COERCE_MARKER (start);
6715 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6716 if (NILP (coding_system))
6717 coding_system = Qno_conversion;
6718 else
6719 CHECK_CODING_SYSTEM (coding_system);
6720 src_object = Fcurrent_buffer ();
6721 if (NILP (dst_object))
6722 dst_object = src_object;
6723 else if (! EQ (dst_object, Qt))
6724 CHECK_BUFFER (dst_object);
3a73fa5d 6725
d46c5b12
KH
6726 validate_region (&start, &end);
6727 from = XFASTINT (start);
df7492f9 6728 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6729 to = XFASTINT (end);
df7492f9 6730 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6731
df7492f9
KH
6732 setup_coding_system (coding_system, &coding);
6733 coding.mode |= CODING_MODE_LAST_BLOCK;
6734
6735 if (encodep)
6736 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6737 dst_object);
6738 else
6739 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6740 dst_object);
6741 if (! norecord)
6742 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6743
df7492f9
KH
6744 if (coding.result != CODING_RESULT_SUCCESS)
6745 error ("Code conversion error: %d", coding.result);
3a73fa5d 6746
df7492f9
KH
6747 return (BUFFERP (dst_object)
6748 ? make_number (coding.produced_char)
6749 : coding.dst_object);
4031e2bf
KH
6750}
6751
df7492f9 6752
4031e2bf 6753DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6754 3, 4, "r\nzCoding system: ",
48b0f3ae 6755 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6756When called from a program, takes four arguments:
6757 START, END, CODING-SYSTEM, and DESTINATION.
6758START and END are buffer positions.
6759
6760Optional 4th arguments DESTINATION specifies where the decoded text goes.
6761If nil, the region between START and END is replace by the decoded text.
6762If buffer, the decoded text is inserted in the buffer.
6763If t, the decoded text is returned.
6764
48b0f3ae
PJ
6765This function sets `last-coding-system-used' to the precise coding system
6766used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6767not fully specified.)
6768It returns the length of the decoded text. */)
df7492f9
KH
6769 (start, end, coding_system, destination)
6770 Lisp_Object start, end, coding_system, destination;
4031e2bf 6771{
df7492f9 6772 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6773}
6774
6775DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6776 3, 4, "r\nzCoding system: ",
6777 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6778When called from a program, takes three arguments:
6779START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6780
6781Optional 4th arguments DESTINATION specifies where the encoded text goes.
6782If nil, the region between START and END is replace by the encoded text.
6783If buffer, the encoded text is inserted in the buffer.
6784If t, the encoded text is returned.
6785
48b0f3ae
PJ
6786This function sets `last-coding-system-used' to the precise coding system
6787used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6788not fully specified.)
6789It returns the length of the encoded text. */)
df7492f9
KH
6790 (start, end, coding_system, destination)
6791 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6792{
df7492f9 6793 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6794}
3a73fa5d 6795
4031e2bf 6796Lisp_Object
df7492f9
KH
6797code_convert_string (string, coding_system, dst_object,
6798 encodep, nocopy, norecord)
6799 Lisp_Object string, coding_system, dst_object;
6800 int encodep, nocopy, norecord;
4031e2bf
KH
6801{
6802 struct coding_system coding;
df7492f9 6803 EMACS_INT chars, bytes;
3a73fa5d 6804
b7826503 6805 CHECK_STRING (string);
d46c5b12 6806 if (NILP (coding_system))
df7492f9
KH
6807 {
6808 if (! norecord)
6809 Vlast_coding_system_used = Qno_conversion;
6810 if (NILP (dst_object))
6811 return (nocopy ? Fcopy_sequence (string) : string);
6812 }
4ed46869 6813
df7492f9
KH
6814 if (NILP (coding_system))
6815 coding_system = Qno_conversion;
6816 else
6817 CHECK_CODING_SYSTEM (coding_system);
6818 if (NILP (dst_object))
6819 dst_object = Qt;
6820 else if (! EQ (dst_object, Qt))
6821 CHECK_BUFFER (dst_object);
5f1cd180 6822
df7492f9 6823 setup_coding_system (coding_system, &coding);
d46c5b12 6824 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6825 chars = XSTRING (string)->size;
6826 bytes = STRING_BYTES (XSTRING (string));
6827 if (encodep)
6828 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6829 else
6830 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6831 if (! norecord)
6832 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6833
df7492f9
KH
6834 if (coding.result != CODING_RESULT_SUCCESS)
6835 error ("Code conversion error: %d", coding.result);
4ed46869 6836
df7492f9
KH
6837 return (BUFFERP (dst_object)
6838 ? make_number (coding.produced_char)
6839 : coding.dst_object);
4ed46869
KH
6840}
6841
4031e2bf 6842
ecec61c1 6843/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6844 Do not set Vlast_coding_system_used.
6845
6846 This function is called only from macros DECODE_FILE and
6847 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6848
6849Lisp_Object
6850code_convert_string_norecord (string, coding_system, encodep)
6851 Lisp_Object string, coding_system;
6852 int encodep;
6853{
0be8721c 6854 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6855}
ecec61c1 6856
ecec61c1 6857
df7492f9
KH
6858DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6859 2, 4, 0,
6860 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6861
6862Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6863if the decoding operation is trivial.
ecec61c1 6864
df7492f9 6865Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 6866inserted in BUFFER instead of returned as a string. In this case,
df7492f9 6867the return value is BUFFER.
ecec61c1 6868
df7492f9
KH
6869This function sets `last-coding-system-used' to the precise coding system
6870used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6871not fully specified. */)
6872 (string, coding_system, nocopy, buffer)
6873 Lisp_Object string, coding_system, nocopy, buffer;
6874{
6875 return code_convert_string (string, coding_system, buffer,
6876 0, ! NILP (nocopy), 0);
6877}
6878
6879DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6880 2, 4, 0,
6881 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6882
6883Optional third arg NOCOPY non-nil means it is OK to return STRING
6884itself if the encoding operation is trivial.
6885
6886Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 6887inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
6888the return value is BUFFER.
6889
6890This function sets `last-coding-system-used' to the precise coding system
6891used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6892not fully specified.) */)
6893 (string, coding_system, nocopy, buffer)
6894 Lisp_Object string, coding_system, nocopy, buffer;
6895{
6896 return code_convert_string (string, coding_system, buffer,
c197f191 6897 1, ! NILP (nocopy), 1);
ecec61c1 6898}
df7492f9 6899
3a73fa5d 6900\f
4ed46869 6901DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6902 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6903Return the corresponding character. */)
6904 (code)
4ed46869
KH
6905 Lisp_Object code;
6906{
df7492f9
KH
6907 Lisp_Object spec, attrs, val;
6908 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6909 int c;
6910
6911 CHECK_NATNUM (code);
6912 c = XFASTINT (code);
6913 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6914 attrs = AREF (spec, 0);
4ed46869 6915
df7492f9
KH
6916 if (ASCII_BYTE_P (c)
6917 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6918 return code;
6919
6920 val = CODING_ATTR_CHARSET_LIST (attrs);
6921 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
6922 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6923 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
6924
6925 if (c <= 0x7F)
6926 charset = charset_roman;
6927 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6928 {
df7492f9
KH
6929 charset = charset_kana;
6930 c -= 0x80;
55ab7be3
KH
6931 }
6932 else
6933 {
004068e4 6934 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
6935
6936 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6937 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6938 error ("Invalid code: %d", code);
6939 SJIS_TO_JIS (c);
6940 charset = charset_kanji;
55ab7be3 6941 }
df7492f9
KH
6942 c = DECODE_CHAR (charset, c);
6943 if (c < 0)
6944 error ("Invalid code: %d", code);
6945 return make_number (c);
4ed46869
KH
6946}
6947
df7492f9 6948
4ed46869 6949DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6950 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6951Return the corresponding code in SJIS. */)
6952 (ch)
df7492f9 6953 Lisp_Object ch;
4ed46869 6954{
df7492f9
KH
6955 Lisp_Object spec, attrs, charset_list;
6956 int c;
6957 struct charset *charset;
6958 unsigned code;
4ed46869 6959
df7492f9
KH
6960 CHECK_CHARACTER (ch);
6961 c = XFASTINT (ch);
6962 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6963 attrs = AREF (spec, 0);
6964
6965 if (ASCII_CHAR_P (c)
6966 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6967 return ch;
6968
6969 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6970 charset = char_charset (c, charset_list, &code);
6971 if (code == CHARSET_INVALID_CODE (charset))
6972 error ("Can't encode by shift_jis encoding: %d", c);
6973 JIS_TO_SJIS (code);
6974
6975 return make_number (code);
4ed46869
KH
6976}
6977
6978DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6979 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6980Return the corresponding character. */)
6981 (code)
4ed46869
KH
6982 Lisp_Object code;
6983{
df7492f9
KH
6984 Lisp_Object spec, attrs, val;
6985 struct charset *charset_roman, *charset_big5, *charset;
6986 int c;
4ed46869 6987
df7492f9
KH
6988 CHECK_NATNUM (code);
6989 c = XFASTINT (code);
6990 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6991 attrs = AREF (spec, 0);
6992
6993 if (ASCII_BYTE_P (c)
6994 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6995 return code;
6996
6997 val = CODING_ATTR_CHARSET_LIST (attrs);
6998 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6999 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7000
7001 if (c <= 0x7F)
7002 charset = charset_roman;
c28a9453
KH
7003 else
7004 {
df7492f9
KH
7005 int b1 = c >> 8, b2 = c & 0x7F;
7006 if (b1 < 0xA1 || b1 > 0xFE
7007 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7008 error ("Invalid code: %d", code);
7009 charset = charset_big5;
c28a9453 7010 }
df7492f9
KH
7011 c = DECODE_CHAR (charset, (unsigned )c);
7012 if (c < 0)
7013 error ("Invalid code: %d", code);
7014 return make_number (c);
4ed46869
KH
7015}
7016
7017DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7018 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7019Return the corresponding character code in Big5. */)
7020 (ch)
4ed46869
KH
7021 Lisp_Object ch;
7022{
df7492f9
KH
7023 Lisp_Object spec, attrs, charset_list;
7024 struct charset *charset;
7025 int c;
7026 unsigned code;
7027
7028 CHECK_CHARACTER (ch);
7029 c = XFASTINT (ch);
7030 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7031 attrs = AREF (spec, 0);
7032 if (ASCII_CHAR_P (c)
7033 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7034 return ch;
7035
7036 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7037 charset = char_charset (c, charset_list, &code);
7038 if (code == CHARSET_INVALID_CODE (charset))
7039 error ("Can't encode by Big5 encoding: %d", c);
7040
7041 return make_number (code);
4ed46869 7042}
df7492f9 7043
3a73fa5d 7044\f
1ba9e4ab
KH
7045DEFUN ("set-terminal-coding-system-internal",
7046 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7047 Sset_terminal_coding_system_internal, 1, 1, 0,
7048 doc: /* Internal use only. */)
7049 (coding_system)
b74e4686 7050 Lisp_Object coding_system;
4ed46869 7051{
b7826503 7052 CHECK_SYMBOL (coding_system);
df7492f9
KH
7053 setup_coding_system (Fcheck_coding_system (coding_system),
7054 &terminal_coding);
7055
70c22245 7056 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7057 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7058 /* Characer composition should be disabled. */
7059 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7060 terminal_coding.src_multibyte = 1;
7061 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7062 return Qnil;
7063}
7064
c4825358
KH
7065DEFUN ("set-safe-terminal-coding-system-internal",
7066 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7067 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7068 doc: /* Internal use only. */)
48b0f3ae 7069 (coding_system)
b74e4686 7070 Lisp_Object coding_system;
c4825358 7071{
b7826503 7072 CHECK_SYMBOL (coding_system);
c4825358
KH
7073 setup_coding_system (Fcheck_coding_system (coding_system),
7074 &safe_terminal_coding);
df7492f9
KH
7075 /* Characer composition should be disabled. */
7076 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7077 safe_terminal_coding.src_multibyte = 1;
7078 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7079 return Qnil;
7080}
7081
4ed46869
KH
7082DEFUN ("terminal-coding-system",
7083 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7084 doc: /* Return coding system specified for terminal output. */)
7085 ()
4ed46869 7086{
df7492f9 7087 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7088}
7089
1ba9e4ab
KH
7090DEFUN ("set-keyboard-coding-system-internal",
7091 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7092 Sset_keyboard_coding_system_internal, 1, 1, 0,
7093 doc: /* Internal use only. */)
7094 (coding_system)
4ed46869
KH
7095 Lisp_Object coding_system;
7096{
b7826503 7097 CHECK_SYMBOL (coding_system);
df7492f9
KH
7098 setup_coding_system (Fcheck_coding_system (coding_system),
7099 &keyboard_coding);
7100 /* Characer composition should be disabled. */
7101 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7102 return Qnil;
7103}
7104
7105DEFUN ("keyboard-coding-system",
7106 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7107 doc: /* Return coding system specified for decoding keyboard input. */)
7108 ()
4ed46869 7109{
df7492f9 7110 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7111}
7112
7113\f
a5d301df
KH
7114DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7115 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7116 doc: /* Choose a coding system for an operation based on the target name.
7117The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7118DECODING-SYSTEM is the coding system to use for decoding
7119\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7120for encoding (in case OPERATION does encoding).
7121
7122The first argument OPERATION specifies an I/O primitive:
7123 For file I/O, `insert-file-contents' or `write-region'.
7124 For process I/O, `call-process', `call-process-region', or `start-process'.
7125 For network I/O, `open-network-stream'.
7126
7127The remaining arguments should be the same arguments that were passed
7128to the primitive. Depending on which primitive, one of those arguments
7129is selected as the TARGET. For example, if OPERATION does file I/O,
7130whichever argument specifies the file name is TARGET.
7131
7132TARGET has a meaning which depends on OPERATION:
7133 For file I/O, TARGET is a file name.
7134 For process I/O, TARGET is a process name.
7135 For network I/O, TARGET is a service name or a port number
7136
7137This function looks up what specified for TARGET in,
7138`file-coding-system-alist', `process-coding-system-alist',
7139or `network-coding-system-alist' depending on OPERATION.
7140They may specify a coding system, a cons of coding systems,
7141or a function symbol to call.
7142In the last case, we call the function with one argument,
7143which is a list of all the arguments given to this function.
7144
7145usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7146 (nargs, args)
4ed46869
KH
7147 int nargs;
7148 Lisp_Object *args;
7149{
7150 Lisp_Object operation, target_idx, target, val;
7151 register Lisp_Object chain;
7152
7153 if (nargs < 2)
7154 error ("Too few arguments");
7155 operation = args[0];
7156 if (!SYMBOLP (operation)
7157 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7158 error ("Invalid first arguement");
4ed46869
KH
7159 if (nargs < 1 + XINT (target_idx))
7160 error ("Too few arguments for operation: %s",
7161 XSYMBOL (operation)->name->data);
7162 target = args[XINT (target_idx) + 1];
7163 if (!(STRINGP (target)
7164 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7165 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7166
2e34157c
RS
7167 chain = ((EQ (operation, Qinsert_file_contents)
7168 || EQ (operation, Qwrite_region))
02ba4723 7169 ? Vfile_coding_system_alist
2e34157c 7170 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7171 ? Vnetwork_coding_system_alist
7172 : Vprocess_coding_system_alist));
4ed46869
KH
7173 if (NILP (chain))
7174 return Qnil;
7175
03699b14 7176 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7177 {
f44d27ce 7178 Lisp_Object elt;
4ed46869 7179
df7492f9 7180 elt = XCAR (chain);
4ed46869
KH
7181 if (CONSP (elt)
7182 && ((STRINGP (target)
03699b14
KR
7183 && STRINGP (XCAR (elt))
7184 && fast_string_match (XCAR (elt), target) >= 0)
7185 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7186 {
03699b14 7187 val = XCDR (elt);
b19fd4c5
KH
7188 /* Here, if VAL is both a valid coding system and a valid
7189 function symbol, we return VAL as a coding system. */
02ba4723
KH
7190 if (CONSP (val))
7191 return val;
7192 if (! SYMBOLP (val))
7193 return Qnil;
7194 if (! NILP (Fcoding_system_p (val)))
7195 return Fcons (val, val);
b19fd4c5
KH
7196 if (! NILP (Ffboundp (val)))
7197 {
7198 val = call1 (val, Flist (nargs, args));
7199 if (CONSP (val))
7200 return val;
7201 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7202 return Fcons (val, val);
7203 }
02ba4723
KH
7204 return Qnil;
7205 }
4ed46869
KH
7206 }
7207 return Qnil;
7208}
7209
df7492f9 7210DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7211 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7212 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7213usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7214 (nargs, args)
7215 int nargs;
7216 Lisp_Object *args;
7217{
7218 int i, j;
7219 int changed[coding_category_max];
7220 enum coding_category priorities[coding_category_max];
7221
7222 bzero (changed, sizeof changed);
7223
7224 for (i = j = 0; i < nargs; i++)
7225 {
7226 enum coding_category category;
7227 Lisp_Object spec, attrs;
7228
7229 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7230 attrs = AREF (spec, 0);
7231 category = XINT (CODING_ATTR_CATEGORY (attrs));
7232 if (changed[category])
7233 /* Ignore this coding system because a coding system of the
7234 same category already had a higher priority. */
7235 continue;
7236 changed[category] = 1;
7237 priorities[j++] = category;
7238 if (coding_categories[category].id >= 0
7239 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7240 setup_coding_system (args[i], &coding_categories[category]);
7241 }
7242
7243 /* Now we have decided top J priorities. Reflect the order of the
7244 original priorities to the remaining priorities. */
7245
7246 for (i = j, j = 0; i < coding_category_max; i++, j++)
7247 {
7248 while (j < coding_category_max
7249 && changed[coding_priorities[j]])
7250 j++;
7251 if (j == coding_category_max)
7252 abort ();
7253 priorities[i] = coding_priorities[j];
7254 }
7255
7256 bcopy (priorities, coding_priorities, sizeof priorities);
7257 return Qnil;
7258}
7259
7260DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7261 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7262 doc: /* Return a list of coding systems ordered by their priorities.
7263HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7264 (highestp)
7265 Lisp_Object highestp;
d46c5b12
KH
7266{
7267 int i;
df7492f9 7268 Lisp_Object val;
d46c5b12 7269
df7492f9 7270 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7271 {
df7492f9
KH
7272 enum coding_category category = coding_priorities[i];
7273 int id = coding_categories[category].id;
7274 Lisp_Object attrs;
7275
7276 if (id < 0)
7277 continue;
7278 attrs = CODING_ID_ATTRS (id);
7279 if (! NILP (highestp))
7280 return CODING_ATTR_BASE_NAME (attrs);
7281 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7282 }
7283 return Fnreverse (val);
7284}
7285
f0064e1f
DL
7286static char *suffixes[] = { "-unix", "-dos", "-mac" };
7287
df7492f9
KH
7288static Lisp_Object
7289make_subsidiaries (base)
7290 Lisp_Object base;
7291{
7292 Lisp_Object subsidiaries;
df7492f9
KH
7293 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7294 char *buf = (char *) alloca (base_name_len + 6);
7295 int i;
7296
7297 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7298 subsidiaries = Fmake_vector (make_number (3), Qnil);
7299 for (i = 0; i < 3; i++)
7300 {
7301 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7302 ASET (subsidiaries, i, intern (buf));
7303 }
7304 return subsidiaries;
7305}
7306
7307
7308DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7309 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7310 doc: /* For internal use only.
7311usage: (define-coding-system-internal ...) */)
df7492f9
KH
7312 (nargs, args)
7313 int nargs;
7314 Lisp_Object *args;
7315{
7316 Lisp_Object name;
7317 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7318 Lisp_Object attrs; /* Vector of attributes. */
7319 Lisp_Object eol_type;
7320 Lisp_Object aliases;
7321 Lisp_Object coding_type, charset_list, safe_charsets;
7322 enum coding_category category;
7323 Lisp_Object tail, val;
7324 int max_charset_id = 0;
7325 int i;
7326
7327 if (nargs < coding_arg_max)
7328 goto short_args;
7329
7330 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7331
7332 name = args[coding_arg_name];
7333 CHECK_SYMBOL (name);
7334 CODING_ATTR_BASE_NAME (attrs) = name;
7335
7336 val = args[coding_arg_mnemonic];
7337 if (! STRINGP (val))
7338 CHECK_CHARACTER (val);
7339 CODING_ATTR_MNEMONIC (attrs) = val;
7340
7341 coding_type = args[coding_arg_coding_type];
7342 CHECK_SYMBOL (coding_type);
7343 CODING_ATTR_TYPE (attrs) = coding_type;
7344
7345 charset_list = args[coding_arg_charset_list];
7346 if (SYMBOLP (charset_list))
7347 {
7348 if (EQ (charset_list, Qiso_2022))
7349 {
7350 if (! EQ (coding_type, Qiso_2022))
7351 error ("Invalid charset-list");
7352 charset_list = Viso_2022_charset_list;
7353 }
7354 else if (EQ (charset_list, Qemacs_mule))
7355 {
7356 if (! EQ (coding_type, Qemacs_mule))
7357 error ("Invalid charset-list");
7358 charset_list = Vemacs_mule_charset_list;
7359 }
7360 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7361 if (max_charset_id < XFASTINT (XCAR (tail)))
7362 max_charset_id = XFASTINT (XCAR (tail));
7363 }
7364 else
7365 {
7366 charset_list = Fcopy_sequence (charset_list);
7367 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7368 {
7369 struct charset *charset;
7370
7371 val = Fcar (tail);
7372 CHECK_CHARSET_GET_CHARSET (val, charset);
7373 if (EQ (coding_type, Qiso_2022)
7374 ? CHARSET_ISO_FINAL (charset) < 0
7375 : EQ (coding_type, Qemacs_mule)
7376 ? CHARSET_EMACS_MULE_ID (charset) < 0
7377 : 0)
7378 error ("Can't handle charset `%s'",
7379 XSYMBOL (CHARSET_NAME (charset))->name->data);
7380
7381 XCAR (tail) = make_number (charset->id);
7382 if (max_charset_id < charset->id)
7383 max_charset_id = charset->id;
7384 }
7385 }
7386 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7387
7388 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7389 make_number (255));
7390 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7391 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7392 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7393
7394 val = args[coding_arg_decode_translation_table];
7395 if (! NILP (val))
7396 CHECK_CHAR_TABLE (val);
7397 CODING_ATTR_DECODE_TBL (attrs) = val;
7398
7399 val = args[coding_arg_encode_translation_table];
7400 if (! NILP (val))
7401 CHECK_CHAR_TABLE (val);
7402 CODING_ATTR_ENCODE_TBL (attrs) = val;
7403
7404 val = args[coding_arg_post_read_conversion];
7405 CHECK_SYMBOL (val);
7406 CODING_ATTR_POST_READ (attrs) = val;
7407
7408 val = args[coding_arg_pre_write_conversion];
7409 CHECK_SYMBOL (val);
7410 CODING_ATTR_PRE_WRITE (attrs) = val;
7411
7412 val = args[coding_arg_default_char];
7413 if (NILP (val))
7414 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7415 else
7416 {
7417 CHECK_CHARACTER (val);
7418 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7419 }
7420
7421 val = args[coding_arg_plist];
7422 CHECK_LIST (val);
7423 CODING_ATTR_PLIST (attrs) = val;
7424
7425 if (EQ (coding_type, Qcharset))
7426 {
c7c66a95
KH
7427 /* Generate a lisp vector of 256 elements. Each element is nil,
7428 integer, or a list of charset IDs.
7429
7430 If Nth element is nil, the byte code N is invalid in this
7431 coding system.
7432
7433 If Nth element is a number NUM, N is the first byte of a
7434 charset whose ID is NUM.
7435
7436 If Nth element is a list of charset IDs, N is the first byte
7437 of one of them. The list is sorted by dimensions of the
7438 charsets. A charset of smaller dimension comes firtst.
7439 */
df7492f9
KH
7440 val = Fmake_vector (make_number (256), Qnil);
7441
7442 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7443 {
c7c66a95
KH
7444 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7445 int dim = CHARSET_DIMENSION (charset);
7446 int idx = (dim - 1) * 4;
7447
15d143f7
KH
7448 for (i = charset->code_space[idx];
7449 i <= charset->code_space[idx + 1]; i++)
7450 {
c7c66a95
KH
7451 Lisp_Object tmp, tmp2;
7452 int dim2;
7453
7454 tmp = AREF (val, i);
7455 if (NILP (tmp))
7456 tmp = XCAR (tail);
7457 else if (NUMBERP (tmp))
7458 {
7459 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7460 if (dim < dim2)
c7c66a95 7461 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7462 else
7463 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7464 }
15d143f7 7465 else
c7c66a95
KH
7466 {
7467 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7468 {
7469 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7470 if (dim < dim2)
7471 break;
7472 }
7473 if (NILP (tmp2))
7474 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7475 else
7476 {
7477 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7478 XSETCAR (tmp2, XCAR (tail));
7479 }
7480 }
7481 ASET (val, i, tmp);
15d143f7 7482 }
df7492f9
KH
7483 }
7484 ASET (attrs, coding_attr_charset_valids, val);
7485 category = coding_category_charset;
7486 }
7487 else if (EQ (coding_type, Qccl))
7488 {
7489 Lisp_Object valids;
7490
7491 if (nargs < coding_arg_ccl_max)
7492 goto short_args;
7493
7494 val = args[coding_arg_ccl_decoder];
7495 CHECK_CCL_PROGRAM (val);
7496 if (VECTORP (val))
7497 val = Fcopy_sequence (val);
7498 ASET (attrs, coding_attr_ccl_decoder, val);
7499
7500 val = args[coding_arg_ccl_encoder];
7501 CHECK_CCL_PROGRAM (val);
7502 if (VECTORP (val))
7503 val = Fcopy_sequence (val);
7504 ASET (attrs, coding_attr_ccl_encoder, val);
7505
7506 val = args[coding_arg_ccl_valids];
7507 valids = Fmake_string (make_number (256), make_number (0));
7508 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7509 {
7510 val = Fcar (tail);
7511 if (INTEGERP (val))
c197f191 7512 ASET (valids, XINT (val), make_number (1));
df7492f9
KH
7513 else
7514 {
7515 int from, to;
7516
7517 CHECK_CONS (val);
7518 CHECK_NUMBER (XCAR (val));
7519 CHECK_NUMBER (XCDR (val));
7520 from = XINT (XCAR (val));
7521 to = XINT (XCDR (val));
7522 for (i = from; i <= to; i++)
c197f191 7523 ASET (valids, i, make_number (1));
df7492f9
KH
7524 }
7525 }
7526 ASET (attrs, coding_attr_ccl_valids, valids);
7527
7528 category = coding_category_ccl;
7529 }
7530 else if (EQ (coding_type, Qutf_16))
7531 {
7532 Lisp_Object bom, endian;
7533
7534 if (nargs < coding_arg_utf16_max)
7535 goto short_args;
7536
7537 bom = args[coding_arg_utf16_bom];
7538 if (! NILP (bom) && ! EQ (bom, Qt))
7539 {
7540 CHECK_CONS (bom);
7541 CHECK_CODING_SYSTEM (XCAR (bom));
7542 CHECK_CODING_SYSTEM (XCDR (bom));
7543 }
7544 ASET (attrs, coding_attr_utf_16_bom, bom);
7545
7546 endian = args[coding_arg_utf16_endian];
7547 ASET (attrs, coding_attr_utf_16_endian, endian);
7548
7549 category = (CONSP (bom)
7550 ? coding_category_utf_16_auto
7551 : NILP (bom)
7552 ? (NILP (endian)
7553 ? coding_category_utf_16_be_nosig
7554 : coding_category_utf_16_le_nosig)
7555 : (NILP (endian)
7556 ? coding_category_utf_16_be
7557 : coding_category_utf_16_le));
7558 }
7559 else if (EQ (coding_type, Qiso_2022))
7560 {
7561 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7562 int i, id;
1397dc18 7563
df7492f9
KH
7564 if (nargs < coding_arg_iso2022_max)
7565 goto short_args;
7566
7567 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7568 CHECK_VECTOR (initial);
7569 for (i = 0; i < 4; i++)
7570 {
7571 val = Faref (initial, make_number (i));
7572 if (! NILP (val))
7573 {
7574 CHECK_CHARSET_GET_ID (val, id);
7575 ASET (initial, i, make_number (id));
7576 }
7577 else
7578 ASET (initial, i, make_number (-1));
7579 }
7580
7581 reg_usage = args[coding_arg_iso2022_reg_usage];
7582 CHECK_CONS (reg_usage);
7583 CHECK_NATNUM (XCAR (reg_usage));
7584 CHECK_NATNUM (XCDR (reg_usage));
7585
7586 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7587 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7588 {
df7492f9
KH
7589 int id;
7590
7591 val = Fcar (tail);
7592 CHECK_CONS (val);
7593 CHECK_CHARSET_GET_ID (XCAR (val), id);
7594 CHECK_NATNUM (XCDR (val));
7595 if (XINT (XCDR (val)) >= 4)
7596 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7597 XCAR (val) = make_number (id);
1397dc18 7598 }
df7492f9
KH
7599
7600 flags = args[coding_arg_iso2022_flags];
7601 CHECK_NATNUM (flags);
7602 i = XINT (flags);
7603 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7604 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7605
7606 ASET (attrs, coding_attr_iso_initial, initial);
7607 ASET (attrs, coding_attr_iso_usage, reg_usage);
7608 ASET (attrs, coding_attr_iso_request, request);
7609 ASET (attrs, coding_attr_iso_flags, flags);
7610 setup_iso_safe_charsets (attrs);
7611
7612 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7613 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7614 | CODING_ISO_FLAG_SINGLE_SHIFT))
7615 ? coding_category_iso_7_else
7616 : EQ (args[coding_arg_charset_list], Qiso_2022)
7617 ? coding_category_iso_7
7618 : coding_category_iso_7_tight);
7619 else
7620 {
7621 int id = XINT (AREF (initial, 1));
7622
c6fb6e98 7623 category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
df7492f9
KH
7624 || EQ (args[coding_arg_charset_list], Qiso_2022)
7625 || id < 0)
7626 ? coding_category_iso_8_else
7627 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7628 ? coding_category_iso_8_1
7629 : coding_category_iso_8_2);
7630 }
7631 }
7632 else if (EQ (coding_type, Qemacs_mule))
7633 {
7634 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7635 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7636
7637 category = coding_category_emacs_mule;
7638 }
7639 else if (EQ (coding_type, Qshift_jis))
7640 {
7641
7642 struct charset *charset;
7643
7644 if (XINT (Flength (charset_list)) != 3)
7645 error ("There should be just three charsets");
7646
7647 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7648 if (CHARSET_DIMENSION (charset) != 1)
7649 error ("Dimension of charset %s is not one",
7650 XSYMBOL (CHARSET_NAME (charset))->name->data);
7651
7652 charset_list = XCDR (charset_list);
7653 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7654 if (CHARSET_DIMENSION (charset) != 1)
7655 error ("Dimension of charset %s is not one",
7656 XSYMBOL (CHARSET_NAME (charset))->name->data);
7657
7658 charset_list = XCDR (charset_list);
7659 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7660 if (CHARSET_DIMENSION (charset) != 2)
7661 error ("Dimension of charset %s is not two",
7662 XSYMBOL (CHARSET_NAME (charset))->name->data);
7663
7664 category = coding_category_sjis;
7665 Vsjis_coding_system = name;
7666 }
7667 else if (EQ (coding_type, Qbig5))
7668 {
7669 struct charset *charset;
7670
7671 if (XINT (Flength (charset_list)) != 2)
7672 error ("There should be just two charsets");
7673
7674 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7675 if (CHARSET_DIMENSION (charset) != 1)
7676 error ("Dimension of charset %s is not one",
7677 XSYMBOL (CHARSET_NAME (charset))->name->data);
7678
7679 charset_list = XCDR (charset_list);
7680 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7681 if (CHARSET_DIMENSION (charset) != 2)
7682 error ("Dimension of charset %s is not two",
7683 XSYMBOL (CHARSET_NAME (charset))->name->data);
7684
7685 category = coding_category_big5;
7686 Vbig5_coding_system = name;
7687 }
7688 else if (EQ (coding_type, Qraw_text))
7689 category = coding_category_raw_text;
7690 else if (EQ (coding_type, Qutf_8))
7691 category = coding_category_utf_8;
7692 else if (EQ (coding_type, Qundecided))
7693 category = coding_category_undecided;
7694 else
7695 error ("Invalid coding system type: %s",
7696 XSYMBOL (coding_type)->name->data);
7697
7698 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7699
7700 eol_type = args[coding_arg_eol_type];
7701 if (! NILP (eol_type)
7702 && ! EQ (eol_type, Qunix)
7703 && ! EQ (eol_type, Qdos)
7704 && ! EQ (eol_type, Qmac))
7705 error ("Invalid eol-type");
7706
7707 aliases = Fcons (name, Qnil);
7708
7709 if (NILP (eol_type))
7710 {
7711 eol_type = make_subsidiaries (name);
7712 for (i = 0; i < 3; i++)
1397dc18 7713 {
df7492f9
KH
7714 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7715
7716 this_name = AREF (eol_type, i);
7717 this_aliases = Fcons (this_name, Qnil);
7718 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7719 this_spec = Fmake_vector (make_number (3), attrs);
7720 ASET (this_spec, 1, this_aliases);
7721 ASET (this_spec, 2, this_eol_type);
7722 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7723 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7724 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7725 Vcoding_system_alist);
1397dc18 7726 }
d46c5b12 7727 }
1397dc18 7728
df7492f9
KH
7729 spec_vec = Fmake_vector (make_number (3), attrs);
7730 ASET (spec_vec, 1, aliases);
7731 ASET (spec_vec, 2, eol_type);
7732
7733 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7734 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7735 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7736 Vcoding_system_alist);
7737
7738 {
7739 int id = coding_categories[category].id;
7740
7741 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7742 setup_coding_system (name, &coding_categories[category]);
7743 }
7744
d46c5b12 7745 return Qnil;
df7492f9
KH
7746
7747 short_args:
7748 return Fsignal (Qwrong_number_of_arguments,
7749 Fcons (intern ("define-coding-system-internal"),
7750 make_number (nargs)));
d46c5b12
KH
7751}
7752
da7db224
DL
7753/* Fixme: should this record the alias relationships for
7754 diagnostics? */
df7492f9
KH
7755DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7756 Sdefine_coding_system_alias, 2, 2, 0,
7757 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7758 (alias, coding_system)
7759 Lisp_Object alias, coding_system;
66cfb530 7760{
df7492f9 7761 Lisp_Object spec, aliases, eol_type;
84d60297 7762
df7492f9
KH
7763 CHECK_SYMBOL (alias);
7764 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7765 aliases = AREF (spec, 1);
7766 while (!NILP (XCDR (aliases)))
7767 aliases = XCDR (aliases);
7768 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7769
df7492f9
KH
7770 eol_type = AREF (spec, 2);
7771 if (VECTORP (eol_type))
66cfb530 7772 {
df7492f9
KH
7773 Lisp_Object subsidiaries;
7774 int i;
7775
7776 subsidiaries = make_subsidiaries (alias);
7777 for (i = 0; i < 3; i++)
7778 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7779 AREF (eol_type, i));
7780
7781 ASET (spec, 2, subsidiaries);
66cfb530 7782 }
df7492f9
KH
7783
7784 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7785 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7786 Vcoding_system_alist);
66cfb530
KH
7787
7788 return Qnil;
7789}
7790
df7492f9
KH
7791DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7792 1, 1, 0,
7793 doc: /* Return the base of CODING-SYSTEM.
da7db224 7794Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
7795 (coding_system)
7796 Lisp_Object coding_system;
7797{
7798 Lisp_Object spec, attrs;
7799
7800 if (NILP (coding_system))
7801 return (Qno_conversion);
7802 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7803 attrs = AREF (spec, 0);
7804 return CODING_ATTR_BASE_NAME (attrs);
7805}
7806
7807DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7808 1, 1, 0,
7809 doc: "Return the property list of CODING-SYSTEM.")
7810 (coding_system)
7811 Lisp_Object coding_system;
7812{
7813 Lisp_Object spec, attrs;
7814
7815 if (NILP (coding_system))
7816 coding_system = Qno_conversion;
7817 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7818 attrs = AREF (spec, 0);
7819 return CODING_ATTR_PLIST (attrs);
7820}
7821
7822
7823DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7824 1, 1, 0,
da7db224 7825 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
7826 (coding_system)
7827 Lisp_Object coding_system;
7828{
7829 Lisp_Object spec;
7830
7831 if (NILP (coding_system))
7832 coding_system = Qno_conversion;
7833 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 7834 return AREF (spec, 1);
df7492f9
KH
7835}
7836
7837DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7838 Scoding_system_eol_type, 1, 1, 0,
7839 doc: /* Return eol-type of CODING-SYSTEM.
7840An eol-type is integer 0, 1, 2, or a vector of coding systems.
7841
7842Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7843and CR respectively.
7844
7845A vector value indicates that a format of end-of-line should be
7846detected automatically. Nth element of the vector is the subsidiary
7847coding system whose eol-type is N. */)
7848 (coding_system)
7849 Lisp_Object coding_system;
7850{
7851 Lisp_Object spec, eol_type;
7852 int n;
7853
7854 if (NILP (coding_system))
7855 coding_system = Qno_conversion;
7856 if (! CODING_SYSTEM_P (coding_system))
7857 return Qnil;
7858 spec = CODING_SYSTEM_SPEC (coding_system);
7859 eol_type = AREF (spec, 2);
7860 if (VECTORP (eol_type))
7861 return Fcopy_sequence (eol_type);
7862 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7863 return make_number (n);
7864}
7865
4ed46869
KH
7866#endif /* emacs */
7867
7868\f
1397dc18 7869/*** 9. Post-amble ***/
4ed46869 7870
dfcf069d 7871void
4ed46869
KH
7872init_coding_once ()
7873{
7874 int i;
7875
df7492f9
KH
7876 for (i = 0; i < coding_category_max; i++)
7877 {
7878 coding_categories[i].id = -1;
7879 coding_priorities[i] = i;
7880 }
4ed46869
KH
7881
7882 /* ISO2022 specific initialize routine. */
7883 for (i = 0; i < 0x20; i++)
b73bfc1c 7884 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7885 for (i = 0x21; i < 0x7F; i++)
7886 iso_code_class[i] = ISO_graphic_plane_0;
7887 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7888 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7889 for (i = 0xA1; i < 0xFF; i++)
7890 iso_code_class[i] = ISO_graphic_plane_1;
7891 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7892 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7893 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7894 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7895 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7896 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7897 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7898 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7899 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7900 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7901
b843d1ae 7902 inhibit_pre_post_conversion = 0;
df7492f9
KH
7903
7904 for (i = 0; i < 256; i++)
7905 {
7906 emacs_mule_bytes[i] = 1;
7907 }
7c78e542
KH
7908 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
7909 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
7910 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
7911 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7912}
7913
7914#ifdef emacs
7915
dfcf069d 7916void
e0e989f6
KH
7917syms_of_coding ()
7918{
df7492f9
KH
7919 staticpro (&Vcoding_system_hash_table);
7920 Vcoding_system_hash_table = Fmakehash (Qeq);
7921
7922 staticpro (&Vsjis_coding_system);
7923 Vsjis_coding_system = Qnil;
7924
7925 staticpro (&Vbig5_coding_system);
7926 Vbig5_coding_system = Qnil;
7927
7928 staticpro (&Vcode_conversion_work_buf_list);
7929 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7930
df7492f9
KH
7931 staticpro (&Vcode_conversion_reused_work_buf);
7932 Vcode_conversion_reused_work_buf = Qnil;
7933
7934 DEFSYM (Qcharset, "charset");
7935 DEFSYM (Qtarget_idx, "target-idx");
7936 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7937 Fset (Qcoding_system_history, Qnil);
7938
9ce27fde 7939 /* Target FILENAME is the first argument. */
e0e989f6 7940 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7941 /* Target FILENAME is the third argument. */
e0e989f6
KH
7942 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7943
df7492f9 7944 DEFSYM (Qcall_process, "call-process");
9ce27fde 7945 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7946 Fput (Qcall_process, Qtarget_idx, make_number (0));
7947
df7492f9 7948 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7949 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7950 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7951
df7492f9 7952 DEFSYM (Qstart_process, "start-process");
9ce27fde 7953 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7954 Fput (Qstart_process, Qtarget_idx, make_number (2));
7955
df7492f9 7956 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 7957 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7958 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7959
df7492f9
KH
7960 DEFSYM (Qcoding_system, "coding-system");
7961 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 7962
df7492f9
KH
7963 DEFSYM (Qeol_type, "eol-type");
7964 DEFSYM (Qunix, "unix");
7965 DEFSYM (Qdos, "dos");
4ed46869 7966
df7492f9
KH
7967 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7968 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7969 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7970 DEFSYM (Qdefault_char, "default-char");
7971 DEFSYM (Qundecided, "undecided");
7972 DEFSYM (Qno_conversion, "no-conversion");
7973 DEFSYM (Qraw_text, "raw-text");
4ed46869 7974
df7492f9 7975 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 7976
df7492f9 7977 DEFSYM (Qutf_8, "utf-8");
27901516 7978
df7492f9
KH
7979 DEFSYM (Qutf_16, "utf-16");
7980 DEFSYM (Qutf_16_be, "utf-16-be");
7981 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7982 DEFSYM (Qutf_16_le, "utf-16-l3");
7983 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7984 DEFSYM (Qsignature, "signature");
7985 DEFSYM (Qendian, "endian");
7986 DEFSYM (Qbig, "big");
7987 DEFSYM (Qlittle, "little");
27901516 7988
df7492f9
KH
7989 DEFSYM (Qshift_jis, "shift-jis");
7990 DEFSYM (Qbig5, "big5");
4ed46869 7991
df7492f9 7992 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 7993
df7492f9 7994 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
7995 Fput (Qcoding_system_error, Qerror_conditions,
7996 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7997 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7998 build_string ("Invalid coding system"));
4ed46869 7999
df7492f9
KH
8000 /* Intern this now in case it isn't already done.
8001 Setting this variable twice is harmless.
8002 But don't staticpro it here--that is done in alloc.c. */
8003 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 8004
df7492f9 8005 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 8006 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
8007 DEFSYM (Qtranslation_table_id, "translation-table-id");
8008 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8009 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 8010
df7492f9 8011 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 8012
df7492f9 8013 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 8014
df7492f9
KH
8015 Vcoding_category_table
8016 = Fmake_vector (make_number (coding_category_max), Qnil);
8017 staticpro (&Vcoding_category_table);
8018 /* Followings are target of code detection. */
8019 ASET (Vcoding_category_table, coding_category_iso_7,
8020 intern ("coding-category-iso-7"));
8021 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8022 intern ("coding-category-iso-7-tight"));
8023 ASET (Vcoding_category_table, coding_category_iso_8_1,
8024 intern ("coding-category-iso-8-1"));
8025 ASET (Vcoding_category_table, coding_category_iso_8_2,
8026 intern ("coding-category-iso-8-2"));
8027 ASET (Vcoding_category_table, coding_category_iso_7_else,
8028 intern ("coding-category-iso-7-else"));
8029 ASET (Vcoding_category_table, coding_category_iso_8_else,
8030 intern ("coding-category-iso-8-else"));
8031 ASET (Vcoding_category_table, coding_category_utf_8,
8032 intern ("coding-category-utf-8"));
8033 ASET (Vcoding_category_table, coding_category_utf_16_be,
8034 intern ("coding-category-utf-16-be"));
8035 ASET (Vcoding_category_table, coding_category_utf_16_le,
8036 intern ("coding-category-utf-16-le"));
8037 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8038 intern ("coding-category-utf-16-be-nosig"));
8039 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8040 intern ("coding-category-utf-16-le-nosig"));
8041 ASET (Vcoding_category_table, coding_category_charset,
8042 intern ("coding-category-charset"));
8043 ASET (Vcoding_category_table, coding_category_sjis,
8044 intern ("coding-category-sjis"));
8045 ASET (Vcoding_category_table, coding_category_big5,
8046 intern ("coding-category-big5"));
8047 ASET (Vcoding_category_table, coding_category_ccl,
8048 intern ("coding-category-ccl"));
8049 ASET (Vcoding_category_table, coding_category_emacs_mule,
8050 intern ("coding-category-emacs-mule"));
8051 /* Followings are NOT target of code detection. */
8052 ASET (Vcoding_category_table, coding_category_raw_text,
8053 intern ("coding-category-raw-text"));
8054 ASET (Vcoding_category_table, coding_category_undecided,
8055 intern ("coding-category-undecided"));
70c22245 8056
4ed46869
KH
8057 defsubr (&Scoding_system_p);
8058 defsubr (&Sread_coding_system);
8059 defsubr (&Sread_non_nil_coding_system);
8060 defsubr (&Scheck_coding_system);
8061 defsubr (&Sdetect_coding_region);
d46c5b12 8062 defsubr (&Sdetect_coding_string);
05e6f5dc 8063 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8064 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8065 defsubr (&Sdecode_coding_region);
8066 defsubr (&Sencode_coding_region);
8067 defsubr (&Sdecode_coding_string);
8068 defsubr (&Sencode_coding_string);
8069 defsubr (&Sdecode_sjis_char);
8070 defsubr (&Sencode_sjis_char);
8071 defsubr (&Sdecode_big5_char);
8072 defsubr (&Sencode_big5_char);
1ba9e4ab 8073 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8074 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8075 defsubr (&Sterminal_coding_system);
1ba9e4ab 8076 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8077 defsubr (&Skeyboard_coding_system);
a5d301df 8078 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8079 defsubr (&Sset_coding_system_priority);
8080 defsubr (&Sdefine_coding_system_internal);
8081 defsubr (&Sdefine_coding_system_alias);
8082 defsubr (&Scoding_system_base);
8083 defsubr (&Scoding_system_plist);
8084 defsubr (&Scoding_system_aliases);
8085 defsubr (&Scoding_system_eol_type);
8086 defsubr (&Scoding_system_priority_list);
4ed46869 8087
4608c386 8088 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8089 doc: /* List of coding systems.
8090
8091Do not alter the value of this variable manually. This variable should be
df7492f9 8092updated by the functions `define-coding-system' and
48b0f3ae 8093`define-coding-system-alias'. */);
4608c386
KH
8094 Vcoding_system_list = Qnil;
8095
8096 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8097 doc: /* Alist of coding system names.
8098Each element is one element list of coding system name.
8099This variable is given to `completing-read' as TABLE argument.
8100
8101Do not alter the value of this variable manually. This variable should be
8102updated by the functions `make-coding-system' and
8103`define-coding-system-alias'. */);
4608c386
KH
8104 Vcoding_system_alist = Qnil;
8105
4ed46869 8106 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8107 doc: /* List of coding-categories (symbols) ordered by priority.
8108
8109On detecting a coding system, Emacs tries code detection algorithms
8110associated with each coding-category one by one in this order. When
8111one algorithm agrees with a byte sequence of source text, the coding
8112system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8113 {
8114 int i;
8115
8116 Vcoding_category_list = Qnil;
df7492f9 8117 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8118 Vcoding_category_list
d46c5b12
KH
8119 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8120 Vcoding_category_list);
4ed46869
KH
8121 }
8122
8123 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8124 doc: /* Specify the coding system for read operations.
8125It is useful to bind this variable with `let', but do not set it globally.
8126If the value is a coding system, it is used for decoding on read operation.
8127If not, an appropriate element is used from one of the coding system alists:
8128There are three such tables, `file-coding-system-alist',
8129`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8130 Vcoding_system_for_read = Qnil;
8131
8132 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8133 doc: /* Specify the coding system for write operations.
8134Programs bind this variable with `let', but you should not set it globally.
8135If the value is a coding system, it is used for encoding of output,
8136when writing it to a file and when sending it to a file or subprocess.
8137
8138If this does not specify a coding system, an appropriate element
8139is used from one of the coding system alists:
8140There are three such tables, `file-coding-system-alist',
8141`process-coding-system-alist', and `network-coding-system-alist'.
8142For output to files, if the above procedure does not specify a coding system,
8143the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8144 Vcoding_system_for_write = Qnil;
8145
8146 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8147 doc: /*
8148Coding system used in the latest file or process I/O. */);
4ed46869
KH
8149 Vlast_coding_system_used = Qnil;
8150
9ce27fde 8151 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8152 doc: /*
8153*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8154See info node `Coding Systems' and info node `Text and Binary' concerning
8155such conversion. */);
9ce27fde
KH
8156 inhibit_eol_conversion = 0;
8157
ed29121d 8158 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8159 doc: /*
8160Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8161Bind it to t if the process output is to be treated as if it were a file
8162read from some filesystem. */);
ed29121d
EZ
8163 inherit_process_coding_system = 0;
8164
02ba4723 8165 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8166 doc: /*
8167Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8168The format is ((PATTERN . VAL) ...),
8169where PATTERN is a regular expression matching a file name,
8170VAL is a coding system, a cons of coding systems, or a function symbol.
8171If VAL is a coding system, it is used for both decoding and encoding
8172the file contents.
8173If VAL is a cons of coding systems, the car part is used for decoding,
8174and the cdr part is used for encoding.
8175If VAL is a function symbol, the function must return a coding system
0192762c
DL
8176or a cons of coding systems which are used as above. The function gets
8177the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8178
8179See also the function `find-operation-coding-system'
8180and the variable `auto-coding-alist'. */);
02ba4723
KH
8181 Vfile_coding_system_alist = Qnil;
8182
8183 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8184 doc: /*
8185Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8186The format is ((PATTERN . VAL) ...),
8187where PATTERN is a regular expression matching a program name,
8188VAL is a coding system, a cons of coding systems, or a function symbol.
8189If VAL is a coding system, it is used for both decoding what received
8190from the program and encoding what sent to the program.
8191If VAL is a cons of coding systems, the car part is used for decoding,
8192and the cdr part is used for encoding.
8193If VAL is a function symbol, the function must return a coding system
8194or a cons of coding systems which are used as above.
8195
8196See also the function `find-operation-coding-system'. */);
02ba4723
KH
8197 Vprocess_coding_system_alist = Qnil;
8198
8199 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8200 doc: /*
8201Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8202The format is ((PATTERN . VAL) ...),
8203where PATTERN is a regular expression matching a network service name
8204or is a port number to connect to,
8205VAL is a coding system, a cons of coding systems, or a function symbol.
8206If VAL is a coding system, it is used for both decoding what received
8207from the network stream and encoding what sent to the network stream.
8208If VAL is a cons of coding systems, the car part is used for decoding,
8209and the cdr part is used for encoding.
8210If VAL is a function symbol, the function must return a coding system
8211or a cons of coding systems which are used as above.
8212
8213See also the function `find-operation-coding-system'. */);
02ba4723 8214 Vnetwork_coding_system_alist = Qnil;
4ed46869 8215
68c45bf0 8216 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8217 doc: /* Coding system to use with system messages.
8218Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8219 Vlocale_coding_system = Qnil;
8220
005f0d35 8221 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8222 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8223 doc: /*
8224*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8225 eol_mnemonic_unix = build_string (":");
4ed46869 8226
7722baf9 8227 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8228 doc: /*
8229*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8230 eol_mnemonic_dos = build_string ("\\");
4ed46869 8231
7722baf9 8232 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8233 doc: /*
8234*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8235 eol_mnemonic_mac = build_string ("/");
4ed46869 8236
7722baf9 8237 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8238 doc: /*
8239*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8240 eol_mnemonic_undecided = build_string (":");
4ed46869 8241
84fbb8a0 8242 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8243 doc: /*
8244*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8245 Venable_character_translation = Qt;
bdd9fb48 8246
f967223b 8247 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8248 &Vstandard_translation_table_for_decode,
8249 doc: /* Table for translating characters while decoding. */);
f967223b 8250 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8251
f967223b 8252 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8253 &Vstandard_translation_table_for_encode,
8254 doc: /* Table for translating characters while encoding. */);
f967223b 8255 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8256
df7492f9 8257 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8258 doc: /* Alist of charsets vs revision numbers.
8259While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8260designate it with the escape sequence identifying revision (cdr part
8261of the element). */);
8262 Vcharset_revision_table = Qnil;
02ba4723
KH
8263
8264 DEFVAR_LISP ("default-process-coding-system",
8265 &Vdefault_process_coding_system,
48b0f3ae
PJ
8266 doc: /* Cons of coding systems used for process I/O by default.
8267The car part is used for decoding a process output,
8268the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8269 Vdefault_process_coding_system = Qnil;
c4825358 8270
3f003981 8271 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8272 doc: /*
8273Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8274This is a vector of length 256.
8275If Nth element is non-nil, the existence of code N in a file
8276\(or output of subprocess) doesn't prevent it to be detected as
8277a coding system of ISO 2022 variant which has a flag
8278`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8279or reading output of a subprocess.
8280Only 128th through 159th elements has a meaning. */);
3f003981 8281 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8282
8283 DEFVAR_LISP ("select-safe-coding-system-function",
8284 &Vselect_safe_coding_system_function,
df7492f9
KH
8285 doc: /*
8286Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8287
8288If set, this function is called to force a user to select a proper
8289coding system which can encode the text in the case that a default
8290coding system used in each operation can't encode the text.
8291
8292The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8293 Vselect_safe_coding_system_function = Qnil;
8294
22ab2303 8295 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8296 &inhibit_iso_escape_detection,
df7492f9
KH
8297 doc: /*
8298If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8299
8300By default, on reading a file, Emacs tries to detect how the text is
8301encoded. This code detection is sensitive to escape sequences. If
8302the sequence is valid as ISO2022, the code is determined as one of
8303the ISO2022 encodings, and the file is decoded by the corresponding
8304coding system (e.g. `iso-2022-7bit').
8305
8306However, there may be a case that you want to read escape sequences in
8307a file as is. In such a case, you can set this variable to non-nil.
8308Then, as the code detection ignores any escape sequences, no file is
8309detected as encoded in some ISO2022 encoding. The result is that all
8310escape sequences become visible in a buffer.
8311
8312The default value is nil, and it is strongly recommended not to change
8313it. That is because many Emacs Lisp source files that contain
8314non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8315in Emacs's distribution, and they won't be decoded correctly on
8316reading if you suppress escape sequence detection.
8317
8318The other way to read escape sequences in a file without decoding is
8319to explicitly specify some coding system that doesn't use ISO2022's
8320escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8321 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8322
8323 {
8324 Lisp_Object args[coding_arg_max];
8325 Lisp_Object plist[14];
8326 int i;
8327
8328 for (i = 0; i < coding_arg_max; i++)
8329 args[i] = Qnil;
8330
8331 plist[0] = intern (":name");
8332 plist[1] = args[coding_arg_name] = Qno_conversion;
8333 plist[2] = intern (":mnemonic");
8334 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8335 plist[4] = intern (":coding-type");
8336 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8337 plist[6] = intern (":ascii-compatible-p");
8338 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8339 plist[8] = intern (":default-char");
8340 plist[9] = args[coding_arg_default_char] = make_number (0);
8341 plist[10] = intern (":docstring");
8342 plist[11] = build_string ("Do no conversion.\n\
8343\n\
8344When you visit a file with this coding, the file is read into a\n\
8345unibyte buffer as is, thus each byte of a file is treated as a\n\
8346character.");
8347 plist[12] = intern (":eol-type");
8348 plist[13] = args[coding_arg_eol_type] = Qunix;
8349 args[coding_arg_plist] = Flist (14, plist);
8350 Fdefine_coding_system_internal (coding_arg_max, args);
8351 }
8352
8353 setup_coding_system (Qno_conversion, &keyboard_coding);
8354 setup_coding_system (Qno_conversion, &terminal_coding);
8355 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8356}
8357
68c45bf0
PE
8358char *
8359emacs_strerror (error_number)
8360 int error_number;
8361{
8362 char *str;
8363
ca9c0567 8364 synchronize_system_messages_locale ();
68c45bf0
PE
8365 str = strerror (error_number);
8366
8367 if (! NILP (Vlocale_coding_system))
8368 {
8369 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8370 Vlocale_coding_system,
8371 0);
8372 str = (char *) XSTRING (dec)->data;
8373 }
8374
8375 return str;
8376}
8377
4ed46869 8378#endif /* emacs */