(ENCODE_DESIGNATION, decode_eol)
[bpt/emacs.git] / src / coding.c
CommitLineData
4ed46869 1/* Coding system handler (conversion, detection, and etc).
4a2f9c6a 2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
203cb916 3 Licensed to the Free Software Foundation.
70ad9fc4 4 Copyright (C) 2001 Free Software Foundation, Inc.
df7492f9
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869
KH
25
26/*** TABLE OF CONTENTS ***
27
b73bfc1c 28 0. General comments
4ed46869 29 1. Preamble
df7492f9
KH
30 2. Emacs' internal format (emacs-utf-8) handlers
31 3. UTF-8 handlers
32 4. UTF-16 handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
35 7. ISO2022 handlers
36 8. Shift-JIS and BIG5 handlers
37 9. CCL handlers
38 10. C library functions
39 11. Emacs Lisp library functions
40 12. Postamble
4ed46869
KH
41
42*/
43
df7492f9 44/*** 0. General comments ***
b73bfc1c
KH
45
46
df7492f9 47CODING SYSTEM
4ed46869 48
5bad0796
DL
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
e19c3639
KH
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system.
57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
5bad0796 60 stored in the hash table Vcharset_hash_table. The conversion from
e19c3639
KH
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
63
64 Coding systems are classified into the following types depending on
5bad0796 65 the encoding mechanism. Here's a brief description of the types.
df7492f9
KH
66
67 o UTF-8
68
69 o UTF-16
70
71 o Charset-base coding system
72
73 A coding system defined by one or more (coded) character sets.
5bad0796 74 Decoding and encoding are done by a code converter defined for each
df7492f9
KH
75 character set.
76
5bad0796 77 o Old Emacs internal format (emacs-mule)
df7492f9 78
5bad0796 79 The coding system adopted by old versions of Emacs (20 and 21).
4ed46869 80
df7492f9 81 o ISO2022-base coding system
93dec019 82
df7492f9
KH
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
86 variants of ISO2022.
87
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
89
4ed46869
KH
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
df7492f9 92 section 8.
4ed46869 93
df7492f9 94 o BIG5
4ed46869 95
df7492f9 96 A coding system to encode character sets: ASCII and Big5. Widely
5a936b46 97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
df7492f9
KH
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
4ed46869 101
df7492f9 102 o CCL
27901516 103
5bad0796 104 If a user wants to decode/encode text encoded in a coding system
df7492f9
KH
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
27901516 108
df7492f9 109 o Raw-text
4ed46869 110
5a936b46 111 A coding system for text containing raw eight-bit data. Emacs
5bad0796 112 treats each byte of source text as a character (except for
df7492f9 113 end-of-line conversion).
4ed46869 114
df7492f9
KH
115 o No-conversion
116
117 Like raw text, but don't do end-of-line conversion.
4ed46869 118
4ed46869 119
df7492f9 120END-OF-LINE FORMAT
4ed46869 121
5bad0796 122 How text end-of-line is encoded depends on operating system. For
df7492f9 123 instance, Unix's format is just one byte of LF (line-feed) code,
f4dee582 124 whereas DOS's format is two-byte sequence of `carriage-return' and
d46c5b12
KH
125 `line-feed' codes. MacOS's format is usually one byte of
126 `carriage-return'.
4ed46869 127
5bad0796 128 Since text character encoding and end-of-line encoding are
df7492f9
KH
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
4ed46869 131
e19c3639
KH
132STRUCT CODING_SYSTEM
133
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
5bad0796 137 conversion (e.g. the location of source and destination data).
e19c3639 138
4ed46869
KH
139*/
140
df7492f9
KH
141/* COMMON MACROS */
142
143
4ed46869
KH
144/*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
145
df7492f9
KH
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
151
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
154
155 Below is the template of these functions. */
156
4ed46869 157#if 0
df7492f9
KH
158static int
159detect_coding_XXX (coding, mask)
160 struct coding_system *coding;
161 int *mask;
4ed46869 162{
df7492f9
KH
163 unsigned char *src = coding->source;
164 unsigned char *src_end = coding->source + coding->src_bytes;
165 int multibytep = coding->src_multibyte;
166 int c;
167 int found = 0;
168 ...;
169
170 while (1)
171 {
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
174 ONE_MORE_BYTE (c);
175 /* Check if it conforms to XXX. If not, break the loop. */
176 }
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask &= ~CODING_CATEGORY_XXX;
179 return 0;
180 no_more_source:
181 /* The source exausted. */
182 if (!found)
183 /* ASCII characters only. */
184 return 0;
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask &= CODING_CATEGORY_XXX;
187 return 1;
4ed46869
KH
188}
189#endif
190
191/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
192
df7492f9
KH
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
d46c5b12 197
df7492f9
KH
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
d46c5b12 202
df7492f9 203 Below is the template of these functions. */
d46c5b12 204
4ed46869 205#if 0
b73bfc1c 206static void
df7492f9 207decode_coding_XXXX (coding)
4ed46869 208 struct coding_system *coding;
4ed46869 209{
df7492f9
KH
210 unsigned char *src = coding->source + coding->consumed;
211 unsigned char *src_end = coding->source + coding->src_bytes;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base;
216 /* A buffer to produce decoded characters. */
217 int *charbuf = coding->charbuf;
218 int *charbuf_end = charbuf + coding->charbuf_size;
219 int multibytep = coding->src_multibyte;
220
221 while (1)
222 {
223 src_base = src;
224 if (charbuf < charbuf_end)
225 /* No more room to produce a decoded character. */
226 break;
227 ONE_MORE_BYTE (c);
228 /* Decode it. */
229 }
230
231 no_more_source:
232 if (src_base < src_end
233 && coding->mode & CODING_MODE_LAST_BLOCK)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base < src_end && charbuf < charbuf_end)
237 *charbuf++ = *src_base++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding->consumed = coding->consumed_char = src_base - coding->source;
241 /* Remember how many characters we produced. */
242 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
243}
244#endif
245
246/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
247
df7492f9
KH
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
b73bfc1c
KH
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
d46c5b12 252
df7492f9
KH
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
d46c5b12 257
df7492f9
KH
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
d46c5b12 261
df7492f9 262 Below is a template of these functions. */
4ed46869 263#if 0
b73bfc1c 264static void
df7492f9 265encode_coding_XXX (coding)
4ed46869 266 struct coding_system *coding;
4ed46869 267{
df7492f9
KH
268 int multibytep = coding->dst_multibyte;
269 int *charbuf = coding->charbuf;
270 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
271 unsigned char *dst = coding->destination + coding->produced;
272 unsigned char *dst_end = coding->destination + coding->dst_bytes;
273 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
274 int produced_chars = 0;
275
276 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
277 {
278 int c = *charbuf;
279 /* Encode C into DST, and increment DST. */
280 }
281 label_no_more_destination:
282 /* How many chars and bytes we produced. */
283 coding->produced_char += produced_chars;
284 coding->produced = dst - coding->destination;
4ed46869
KH
285}
286#endif
287
4ed46869
KH
288\f
289/*** 1. Preamble ***/
290
68c45bf0 291#include <config.h>
4ed46869
KH
292#include <stdio.h>
293
4ed46869
KH
294#include "lisp.h"
295#include "buffer.h"
df7492f9 296#include "character.h"
4ed46869
KH
297#include "charset.h"
298#include "ccl.h"
df7492f9 299#include "composite.h"
4ed46869
KH
300#include "coding.h"
301#include "window.h"
302
df7492f9 303Lisp_Object Vcoding_system_hash_table;
4ed46869 304
df7492f9 305Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
1965cb73
DL
306Lisp_Object Qunix, Qdos;
307extern Lisp_Object Qmac; /* frame.c */
4ed46869
KH
308Lisp_Object Qbuffer_file_coding_system;
309Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
df7492f9 310Lisp_Object Qdefault_char;
27901516 311Lisp_Object Qno_conversion, Qundecided;
df7492f9
KH
312Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
313Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
314Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
bb0115a2 315Lisp_Object Qcoding_system_history;
1397dc18 316Lisp_Object Qvalid_codes;
4ed46869
KH
317
318extern Lisp_Object Qinsert_file_contents, Qwrite_region;
319Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
320Lisp_Object Qstart_process, Qopen_network_stream;
321Lisp_Object Qtarget_idx;
322
d46c5b12
KH
323Lisp_Object Vselect_safe_coding_system_function;
324
7722baf9
EZ
325/* Mnemonic string for each format of end-of-line. */
326Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
327/* Mnemonic string to indicate format of end-of-line is not yet
4ed46869 328 decided. */
7722baf9 329Lisp_Object eol_mnemonic_undecided;
4ed46869
KH
330
331#ifdef emacs
332
4608c386
KH
333Lisp_Object Vcoding_system_list, Vcoding_system_alist;
334
335Lisp_Object Qcoding_system_p, Qcoding_system_error;
4ed46869 336
d46c5b12
KH
337/* Coding system emacs-mule and raw-text are for converting only
338 end-of-line format. */
339Lisp_Object Qemacs_mule, Qraw_text;
9ce27fde 340
4ed46869
KH
341/* Coding-systems are handed between Emacs Lisp programs and C internal
342 routines by the following three variables. */
343/* Coding-system for reading files and receiving data from process. */
344Lisp_Object Vcoding_system_for_read;
345/* Coding-system for writing files and sending data to process. */
346Lisp_Object Vcoding_system_for_write;
347/* Coding-system actually used in the latest I/O. */
348Lisp_Object Vlast_coding_system_used;
349
c4825358 350/* A vector of length 256 which contains information about special
94487c4e 351 Latin codes (especially for dealing with Microsoft codes). */
3f003981 352Lisp_Object Vlatin_extra_code_table;
c4825358 353
9ce27fde
KH
354/* Flag to inhibit code conversion of end-of-line format. */
355int inhibit_eol_conversion;
356
74383408
KH
357/* Flag to inhibit ISO2022 escape sequence detection. */
358int inhibit_iso_escape_detection;
359
ed29121d
EZ
360/* Flag to make buffer-file-coding-system inherit from process-coding. */
361int inherit_process_coding_system;
362
c4825358 363/* Coding system to be used to encode text for terminal display. */
4ed46869
KH
364struct coding_system terminal_coding;
365
c4825358
KH
366/* Coding system to be used to encode text for terminal display when
367 terminal coding system is nil. */
368struct coding_system safe_terminal_coding;
369
370/* Coding system of what is sent from terminal keyboard. */
4ed46869
KH
371struct coding_system keyboard_coding;
372
02ba4723
KH
373Lisp_Object Vfile_coding_system_alist;
374Lisp_Object Vprocess_coding_system_alist;
375Lisp_Object Vnetwork_coding_system_alist;
4ed46869 376
68c45bf0
PE
377Lisp_Object Vlocale_coding_system;
378
4ed46869
KH
379#endif /* emacs */
380
f967223b
KH
381/* Flag to tell if we look up translation table on character code
382 conversion. */
84fbb8a0 383Lisp_Object Venable_character_translation;
f967223b
KH
384/* Standard translation table to look up on decoding (reading). */
385Lisp_Object Vstandard_translation_table_for_decode;
386/* Standard translation table to look up on encoding (writing). */
387Lisp_Object Vstandard_translation_table_for_encode;
84fbb8a0 388
f967223b
KH
389Lisp_Object Qtranslation_table;
390Lisp_Object Qtranslation_table_id;
391Lisp_Object Qtranslation_table_for_decode;
392Lisp_Object Qtranslation_table_for_encode;
4ed46869
KH
393
394/* Alist of charsets vs revision number. */
df7492f9 395static Lisp_Object Vcharset_revision_table;
4ed46869 396
02ba4723
KH
397/* Default coding systems used for process I/O. */
398Lisp_Object Vdefault_process_coding_system;
399
b843d1ae
KH
400/* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404static int inhibit_pre_post_conversion;
405
df7492f9
KH
406/* Two special coding systems. */
407Lisp_Object Vsjis_coding_system;
408Lisp_Object Vbig5_coding_system;
409
410
411static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
412static void decode_coding_utf_8 P_ ((struct coding_system *));
413static int encode_coding_utf_8 P_ ((struct coding_system *));
414
415static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
416static void decode_coding_utf_16 P_ ((struct coding_system *));
417static int encode_coding_utf_16 P_ ((struct coding_system *));
418
419static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
420static void decode_coding_iso_2022 P_ ((struct coding_system *));
421static int encode_coding_iso_2022 P_ ((struct coding_system *));
422
423static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
424static void decode_coding_emacs_mule P_ ((struct coding_system *));
425static int encode_coding_emacs_mule P_ ((struct coding_system *));
426
427static int detect_coding_sjis P_ ((struct coding_system *, int *));
428static void decode_coding_sjis P_ ((struct coding_system *));
429static int encode_coding_sjis P_ ((struct coding_system *));
430
431static int detect_coding_big5 P_ ((struct coding_system *, int *));
432static void decode_coding_big5 P_ ((struct coding_system *));
433static int encode_coding_big5 P_ ((struct coding_system *));
434
435static int detect_coding_ccl P_ ((struct coding_system *, int *));
436static void decode_coding_ccl P_ ((struct coding_system *));
437static int encode_coding_ccl P_ ((struct coding_system *));
438
439static void decode_coding_raw_text P_ ((struct coding_system *));
440static int encode_coding_raw_text P_ ((struct coding_system *));
441
442
443/* ISO2022 section */
444
445#define CODING_ISO_INITIAL(coding, reg) \
446 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
447 coding_attr_iso_initial), \
448 reg)))
449
450
451#define CODING_ISO_REQUEST(coding, charset_id) \
452 ((charset_id <= (coding)->max_charset_id \
453 ? (coding)->safe_charsets[charset_id] \
454 : -1))
455
456
457#define CODING_ISO_FLAGS(coding) \
458 ((coding)->spec.iso_2022.flags)
459#define CODING_ISO_DESIGNATION(coding, reg) \
460 ((coding)->spec.iso_2022.current_designation[reg])
461#define CODING_ISO_INVOCATION(coding, plane) \
462 ((coding)->spec.iso_2022.current_invocation[plane])
463#define CODING_ISO_SINGLE_SHIFTING(coding) \
464 ((coding)->spec.iso_2022.single_shifting)
465#define CODING_ISO_BOL(coding) \
466 ((coding)->spec.iso_2022.bol)
467#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
468 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469
470/* Control characters of ISO2022. */
471 /* code */ /* function */
472#define ISO_CODE_LF 0x0A /* line-feed */
473#define ISO_CODE_CR 0x0D /* carriage-return */
474#define ISO_CODE_SO 0x0E /* shift-out */
475#define ISO_CODE_SI 0x0F /* shift-in */
476#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
477#define ISO_CODE_ESC 0x1B /* escape */
478#define ISO_CODE_SS2 0x8E /* single-shift-2 */
479#define ISO_CODE_SS3 0x8F /* single-shift-3 */
480#define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481
482/* All code (1-byte) of ISO2022 is classified into one of the
483 followings. */
484enum iso_code_class_type
485 {
486 ISO_control_0, /* Control codes in the range
487 0x00..0x1F and 0x7F, except for the
488 following 5 codes. */
489 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
490 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
491 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
492 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
493 ISO_escape, /* ISO_CODE_SO (0x1B) */
494 ISO_control_1, /* Control codes in the range
495 0x80..0x9F, except for the
496 following 3 codes. */
497 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
498 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
499 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
500 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
501 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
502 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
503 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
504 };
05e6f5dc 505
df7492f9
KH
506/** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
507 `iso-flags' attribute of an iso2022 coding system. */
93dec019 508
df7492f9
KH
509/* If set, produce long-form designation sequence (e.g. ESC $ ( A)
510 instead of the correct short-form sequence (e.g. ESC $ A). */
511#define CODING_ISO_FLAG_LONG_FORM 0x0001
05e6f5dc 512
df7492f9
KH
513/* If set, reset graphic planes and registers at end-of-line to the
514 initial state. */
515#define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
05e6f5dc 516
df7492f9
KH
517/* If set, reset graphic planes and registers before any control
518 characters to the initial state. */
519#define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
4ed46869 520
df7492f9
KH
521/* If set, encode by 7-bit environment. */
522#define CODING_ISO_FLAG_SEVEN_BITS 0x0008
b73bfc1c 523
df7492f9
KH
524/* If set, use locking-shift function. */
525#define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
b73bfc1c 526
df7492f9
KH
527/* If set, use single-shift function. Overwrite
528 CODING_ISO_FLAG_LOCKING_SHIFT. */
529#define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
b73bfc1c 530
df7492f9
KH
531/* If set, use designation escape sequence. */
532#define CODING_ISO_FLAG_DESIGNATION 0x0040
b73bfc1c 533
df7492f9
KH
534/* If set, produce revision number sequence. */
535#define CODING_ISO_FLAG_REVISION 0x0080
f4dee582 536
df7492f9
KH
537/* If set, produce ISO6429's direction specifying sequence. */
538#define CODING_ISO_FLAG_DIRECTION 0x0100
4ed46869 539
df7492f9
KH
540/* If set, assume designation states are reset at beginning of line on
541 output. */
542#define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
aa72b389 543
df7492f9
KH
544/* If set, designation sequence should be placed at beginning of line
545 on output. */
546#define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
aa72b389 547
df7492f9
KH
548/* If set, do not encode unsafe charactes on output. */
549#define CODING_ISO_FLAG_SAFE 0x0800
aa72b389 550
df7492f9
KH
551/* If set, extra latin codes (128..159) are accepted as a valid code
552 on input. */
553#define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
aa72b389 554
df7492f9 555#define CODING_ISO_FLAG_COMPOSITION 0x2000
aa72b389 556
df7492f9 557#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
aa72b389 558
bf16eb23
KH
559#define CODING_ISO_FLAG_USE_ROMAN 0x8000
560
561#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
562
563#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
aa72b389 564
df7492f9
KH
565/* A character to be produced on output if encoding of the original
566 character is prohibited by CODING_ISO_FLAG_SAFE. */
567#define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
aa72b389 568
aa72b389 569
df7492f9
KH
570/* UTF-16 section */
571#define CODING_UTF_16_BOM(coding) \
572 ((coding)->spec.utf_16.bom)
4ed46869 573
df7492f9
KH
574#define CODING_UTF_16_ENDIAN(coding) \
575 ((coding)->spec.utf_16.endian)
4ed46869 576
df7492f9
KH
577#define CODING_UTF_16_SURROGATE(coding) \
578 ((coding)->spec.utf_16.surrogate)
4ed46869 579
4ed46869 580
df7492f9
KH
581/* CCL section */
582#define CODING_CCL_DECODER(coding) \
583 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
584#define CODING_CCL_ENCODER(coding) \
585 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
586#define CODING_CCL_VALIDS(coding) \
587 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
588 ->data)
4ed46869 589
5a936b46 590/* Index for each coding category in `coding_categories' */
4ed46869 591
df7492f9
KH
592enum coding_category
593 {
594 coding_category_iso_7,
595 coding_category_iso_7_tight,
596 coding_category_iso_8_1,
597 coding_category_iso_8_2,
598 coding_category_iso_7_else,
599 coding_category_iso_8_else,
600 coding_category_utf_8,
601 coding_category_utf_16_auto,
602 coding_category_utf_16_be,
603 coding_category_utf_16_le,
604 coding_category_utf_16_be_nosig,
605 coding_category_utf_16_le_nosig,
606 coding_category_charset,
607 coding_category_sjis,
608 coding_category_big5,
609 coding_category_ccl,
610 coding_category_emacs_mule,
611 /* All above are targets of code detection. */
612 coding_category_raw_text,
613 coding_category_undecided,
614 coding_category_max
615 };
616
617/* Definitions of flag bits used in detect_coding_XXXX. */
618#define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
619#define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
620#define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
621#define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
622#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
623#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
624#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
625#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
626#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
627#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
628#define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
629#define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
630#define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
631#define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
632#define CATEGORY_MASK_CCL (1 << coding_category_ccl)
633#define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
634
635/* This value is returned if detect_coding_mask () find nothing other
636 than ASCII characters. */
637#define CATEGORY_MASK_ANY \
638 (CATEGORY_MASK_ISO_7 \
639 | CATEGORY_MASK_ISO_7_TIGHT \
640 | CATEGORY_MASK_ISO_8_1 \
641 | CATEGORY_MASK_ISO_8_2 \
642 | CATEGORY_MASK_ISO_7_ELSE \
643 | CATEGORY_MASK_ISO_8_ELSE \
644 | CATEGORY_MASK_UTF_8 \
645 | CATEGORY_MASK_UTF_16_BE \
646 | CATEGORY_MASK_UTF_16_LE \
647 | CATEGORY_MASK_UTF_16_BE_NOSIG \
648 | CATEGORY_MASK_UTF_16_LE_NOSIG \
649 | CATEGORY_MASK_CHARSET \
650 | CATEGORY_MASK_SJIS \
651 | CATEGORY_MASK_BIG5 \
652 | CATEGORY_MASK_CCL \
653 | CATEGORY_MASK_EMACS_MULE)
654
655
656#define CATEGORY_MASK_ISO_7BIT \
657 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
658
659#define CATEGORY_MASK_ISO_8BIT \
660 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
661
662#define CATEGORY_MASK_ISO_ELSE \
663 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
664
665#define CATEGORY_MASK_ISO_ESCAPE \
666 (CATEGORY_MASK_ISO_7 \
667 | CATEGORY_MASK_ISO_7_TIGHT \
668 | CATEGORY_MASK_ISO_7_ELSE \
669 | CATEGORY_MASK_ISO_8_ELSE)
670
671#define CATEGORY_MASK_ISO \
672 ( CATEGORY_MASK_ISO_7BIT \
673 | CATEGORY_MASK_ISO_8BIT \
674 | CATEGORY_MASK_ISO_ELSE)
675
676#define CATEGORY_MASK_UTF_16 \
677 (CATEGORY_MASK_UTF_16_BE \
678 | CATEGORY_MASK_UTF_16_LE \
679 | CATEGORY_MASK_UTF_16_BE_NOSIG \
680 | CATEGORY_MASK_UTF_16_LE_NOSIG)
681
682
683/* List of symbols `coding-category-xxx' ordered by priority. This
684 variable is exposed to Emacs Lisp. */
685static Lisp_Object Vcoding_category_list;
686
687/* Table of coding categories (Lisp symbols). This variable is for
688 internal use oly. */
689static Lisp_Object Vcoding_category_table;
690
691/* Table of coding-categories ordered by priority. */
692static enum coding_category coding_priorities[coding_category_max];
693
694/* Nth element is a coding context for the coding system bound to the
695 Nth coding category. */
696static struct coding_system coding_categories[coding_category_max];
697
698static int detected_mask[coding_category_raw_text] =
699 { CATEGORY_MASK_ISO,
700 CATEGORY_MASK_ISO,
701 CATEGORY_MASK_ISO,
702 CATEGORY_MASK_ISO,
703 CATEGORY_MASK_ISO,
704 CATEGORY_MASK_ISO,
705 CATEGORY_MASK_UTF_8,
706 CATEGORY_MASK_UTF_16,
707 CATEGORY_MASK_UTF_16,
708 CATEGORY_MASK_UTF_16,
709 CATEGORY_MASK_UTF_16,
710 CATEGORY_MASK_UTF_16,
711 CATEGORY_MASK_CHARSET,
712 CATEGORY_MASK_SJIS,
713 CATEGORY_MASK_BIG5,
714 CATEGORY_MASK_CCL,
715 CATEGORY_MASK_EMACS_MULE
716 };
717
718/*** Commonly used macros and functions ***/
719
720#ifndef min
721#define min(a, b) ((a) < (b) ? (a) : (b))
722#endif
723#ifndef max
724#define max(a, b) ((a) > (b) ? (a) : (b))
725#endif
4ed46869 726
df7492f9
KH
727#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
728 do { \
729 attrs = CODING_ID_ATTRS (coding->id); \
730 eol_type = CODING_ID_EOL_TYPE (coding->id); \
731 if (VECTORP (eol_type)) \
732 eol_type = Qunix; \
733 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
734 } while (0)
4ed46869 735
4ed46869 736
df7492f9
KH
737/* Safely get one byte from the source text pointed by SRC which ends
738 at SRC_END, and set C to that byte. If there are not enough bytes
739 in the source, it jumps to `no_more_source'. The caller
740 should declare and set these variables appropriately in advance:
741 src, src_end, multibytep
742*/
aa72b389 743
df7492f9 744#define ONE_MORE_BYTE(c) \
aa72b389 745 do { \
df7492f9
KH
746 if (src == src_end) \
747 { \
748 if (src_base < src) \
749 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
750 goto no_more_source; \
751 } \
752 c = *src++; \
753 if (multibytep && (c & 0x80)) \
754 { \
755 if ((c & 0xFE) != 0xC0) \
756 error ("Undecodable char found"); \
757 c = ((c & 1) << 6) | *src++; \
758 } \
759 consumed_chars++; \
aa72b389
KH
760 } while (0)
761
aa72b389 762
df7492f9
KH
763#define ONE_MORE_BYTE_NO_CHECK(c) \
764 do { \
765 c = *src++; \
766 if (multibytep && (c & 0x80)) \
767 { \
768 if ((c & 0xFE) != 0xC0) \
769 error ("Undecodable char found"); \
770 c = ((c & 1) << 6) | *src++; \
771 } \
781d7a48 772 consumed_chars++; \
aa72b389
KH
773 } while (0)
774
aa72b389 775
df7492f9
KH
776/* Store a byte C in the place pointed by DST and increment DST to the
777 next free point, and increment PRODUCED_CHARS. The caller should
778 assure that C is 0..127, and declare and set the variable `dst'
779 appropriately in advance.
780*/
aa72b389
KH
781
782
df7492f9
KH
783#define EMIT_ONE_ASCII_BYTE(c) \
784 do { \
785 produced_chars++; \
786 *dst++ = (c); \
787 } while (0)
aa72b389 788
aa72b389 789
df7492f9 790/* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
aa72b389 791
df7492f9
KH
792#define EMIT_TWO_ASCII_BYTES(c1, c2) \
793 do { \
794 produced_chars += 2; \
795 *dst++ = (c1), *dst++ = (c2); \
796 } while (0)
aa72b389 797
df7492f9
KH
798
799/* Store a byte C in the place pointed by DST and increment DST to the
800 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
801 nonzero, store in an appropriate multibyte from. The caller should
802 declare and set the variables `dst' and `multibytep' appropriately
803 in advance. */
804
805#define EMIT_ONE_BYTE(c) \
806 do { \
807 produced_chars++; \
808 if (multibytep) \
809 { \
810 int ch = (c); \
811 if (ch >= 0x80) \
812 ch = BYTE8_TO_CHAR (ch); \
813 CHAR_STRING_ADVANCE (ch, dst); \
814 } \
815 else \
816 *dst++ = (c); \
aa72b389
KH
817 } while (0)
818
819
df7492f9 820/* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
aa72b389 821
e19c3639
KH
822#define EMIT_TWO_BYTES(c1, c2) \
823 do { \
824 produced_chars += 2; \
825 if (multibytep) \
826 { \
827 int ch; \
828 \
829 ch = (c1); \
830 if (ch >= 0x80) \
831 ch = BYTE8_TO_CHAR (ch); \
832 CHAR_STRING_ADVANCE (ch, dst); \
833 ch = (c2); \
834 if (ch >= 0x80) \
835 ch = BYTE8_TO_CHAR (ch); \
836 CHAR_STRING_ADVANCE (ch, dst); \
837 } \
838 else \
839 { \
840 *dst++ = (c1); \
841 *dst++ = (c2); \
842 } \
aa72b389
KH
843 } while (0)
844
845
df7492f9
KH
846#define EMIT_THREE_BYTES(c1, c2, c3) \
847 do { \
848 EMIT_ONE_BYTE (c1); \
849 EMIT_TWO_BYTES (c2, c3); \
850 } while (0)
aa72b389 851
aa72b389 852
df7492f9
KH
853#define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
854 do { \
855 EMIT_TWO_BYTES (c1, c2); \
856 EMIT_TWO_BYTES (c3, c4); \
857 } while (0)
aa72b389 858
aa72b389 859
df7492f9
KH
860#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
861 do { \
862 charset_map_loaded = 0; \
863 c = DECODE_CHAR (charset, code); \
864 if (charset_map_loaded) \
865 { \
866 unsigned char *orig = coding->source; \
867 EMACS_INT offset; \
868 \
869 coding_set_source (coding); \
870 offset = coding->source - orig; \
871 src += offset; \
872 src_base += offset; \
873 src_end += offset; \
874 } \
875 } while (0)
aa72b389 876
aa72b389 877
df7492f9
KH
878#define ASSURE_DESTINATION(bytes) \
879 do { \
880 if (dst + (bytes) >= dst_end) \
881 { \
882 int more_bytes = charbuf_end - charbuf + (bytes); \
883 \
884 dst = alloc_destination (coding, more_bytes, dst); \
885 dst_end = coding->destination + coding->dst_bytes; \
886 } \
887 } while (0)
b1887814 888
df7492f9
KH
889
890
891static void
892coding_set_source (coding)
893 struct coding_system *coding;
894{
895 if (BUFFERP (coding->src_object))
896 {
897 if (coding->src_pos < 0)
898 coding->source = GAP_END_ADDR + coding->src_pos_byte;
899 else
900 {
e19c3639 901 struct buffer *buf = XBUFFER (coding->src_object);
e19c3639
KH
902 EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
903 unsigned char *beg_addr = BUF_BEG_ADDR (buf);
904
905 coding->source = beg_addr + coding->src_pos_byte - 1;
906 if (coding->src_pos_byte >= gpt_byte)
907 coding->source += BUF_GAP_SIZE (buf);
aa72b389
KH
908 }
909 }
df7492f9 910 else if (STRINGP (coding->src_object))
aa72b389 911 {
df7492f9
KH
912 coding->source = (XSTRING (coding->src_object)->data
913 + coding->src_pos_byte);
914 }
915 else
916 /* Otherwise, the source is C string and is never relocated
917 automatically. Thus we don't have to update anything. */
918 ;
919}
920
921static void
922coding_set_destination (coding)
923 struct coding_system *coding;
924{
925 if (BUFFERP (coding->dst_object))
926 {
927 /* We are sure that coding->dst_pos_byte is before the gap of the
928 buffer. */
929 coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
930 + coding->dst_pos_byte - 1);
931 if (coding->src_pos < 0)
df7492f9
KH
932 coding->dst_bytes = (GAP_END_ADDR
933 - (coding->src_bytes - coding->consumed)
934 - coding->destination);
935 else
936 coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
937 - coding->destination);
938 }
939 else
940 /* Otherwise, the destination is C string and is never relocated
941 automatically. Thus we don't have to update anything. */
942 ;
943}
944
945
946static void
947coding_alloc_by_realloc (coding, bytes)
948 struct coding_system *coding;
949 EMACS_INT bytes;
950{
951 coding->destination = (unsigned char *) xrealloc (coding->destination,
952 coding->dst_bytes + bytes);
953 coding->dst_bytes += bytes;
954}
955
956static void
957coding_alloc_by_making_gap (coding, bytes)
958 struct coding_system *coding;
959 EMACS_INT bytes;
960{
2c78b7e1
KH
961 if (BUFFERP (coding->dst_object)
962 && EQ (coding->src_object, coding->dst_object))
df7492f9
KH
963 {
964 EMACS_INT add = coding->src_bytes - coding->consumed;
965
966 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
967 make_gap (bytes);
968 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
969 }
970 else
971 {
2c78b7e1
KH
972 Lisp_Object this_buffer;
973
974 this_buffer = Fcurrent_buffer ();
df7492f9
KH
975 set_buffer_internal (XBUFFER (coding->dst_object));
976 make_gap (bytes);
977 set_buffer_internal (XBUFFER (this_buffer));
978 }
979}
980
981
982static unsigned char *
983alloc_destination (coding, nbytes, dst)
984 struct coding_system *coding;
985 int nbytes;
986 unsigned char *dst;
987{
988 EMACS_INT offset = dst - coding->destination;
989
990 if (BUFFERP (coding->dst_object))
991 coding_alloc_by_making_gap (coding, nbytes);
992 else
993 coding_alloc_by_realloc (coding, nbytes);
994 coding->result = CODING_RESULT_SUCCESS;
995 coding_set_destination (coding);
996 dst = coding->destination + offset;
997 return dst;
998}
aa72b389 999
df7492f9
KH
1000\f
1001/*** 2. Emacs' internal format (emacs-utf-8) ***/
1002
1003
1004
1005\f
1006/*** 3. UTF-8 ***/
1007
1008/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1009 Check if a text is encoded in UTF-8. If it is, return
1010 CATEGORY_MASK_UTF_8, else return 0. */
1011
1012#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1013#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1014#define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1015#define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1016#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1017#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1018
1019static int
1020detect_coding_utf_8 (coding, mask)
1021 struct coding_system *coding;
1022 int *mask;
1023{
1024 unsigned char *src = coding->source, *src_base = src;
1025 unsigned char *src_end = coding->source + coding->src_bytes;
1026 int multibytep = coding->src_multibyte;
1027 int consumed_chars = 0;
1028 int found = 0;
1029
1030 /* A coding system of this category is always ASCII compatible. */
1031 src += coding->head_ascii;
1032
1033 while (1)
1034 {
1035 int c, c1, c2, c3, c4;
1036
1037 ONE_MORE_BYTE (c);
1038 if (UTF_8_1_OCTET_P (c))
1039 continue;
1040 ONE_MORE_BYTE (c1);
1041 if (! UTF_8_EXTRA_OCTET_P (c1))
1042 break;
1043 if (UTF_8_2_OCTET_LEADING_P (c))
1044 {
1045 found++;
1046 continue;
1047 }
1048 ONE_MORE_BYTE (c2);
1049 if (! UTF_8_EXTRA_OCTET_P (c2))
1050 break;
1051 if (UTF_8_3_OCTET_LEADING_P (c))
1052 {
1053 found++;
1054 continue;
1055 }
1056 ONE_MORE_BYTE (c3);
1057 if (! UTF_8_EXTRA_OCTET_P (c3))
1058 break;
1059 if (UTF_8_4_OCTET_LEADING_P (c))
aa72b389 1060 {
df7492f9
KH
1061 found++;
1062 continue;
1063 }
1064 ONE_MORE_BYTE (c4);
1065 if (! UTF_8_EXTRA_OCTET_P (c4))
1066 break;
1067 if (UTF_8_5_OCTET_LEADING_P (c))
1068 {
1069 found++;
1070 continue;
1071 }
1072 break;
1073 }
1074 *mask &= ~CATEGORY_MASK_UTF_8;
1075 return 0;
1076
1077 no_more_source:
1078 if (! found)
1079 return 0;
1080 *mask &= CATEGORY_MASK_UTF_8;
1081 return 1;
1082}
1083
1084
b0edb2c5 1085/* Fixme: deal with surrogates? */
df7492f9
KH
1086static void
1087decode_coding_utf_8 (coding)
1088 struct coding_system *coding;
1089{
1090 unsigned char *src = coding->source + coding->consumed;
1091 unsigned char *src_end = coding->source + coding->src_bytes;
1092 unsigned char *src_base;
1093 int *charbuf = coding->charbuf;
1094 int *charbuf_end = charbuf + coding->charbuf_size;
1095 int consumed_chars = 0, consumed_chars_base;
1096 int multibytep = coding->src_multibyte;
1097 Lisp_Object attr, eol_type, charset_list;
1098
1099 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1100
1101 while (1)
1102 {
1103 int c, c1, c2, c3, c4, c5;
1104
1105 src_base = src;
1106 consumed_chars_base = consumed_chars;
1107
1108 if (charbuf >= charbuf_end)
1109 break;
1110
1111 ONE_MORE_BYTE (c1);
1112 if (UTF_8_1_OCTET_P(c1))
1113 {
1114 c = c1;
1115 if (c == '\r')
aa72b389 1116 {
df7492f9
KH
1117 if (EQ (eol_type, Qdos))
1118 {
1119 if (src == src_end)
1120 goto no_more_source;
1121 if (*src == '\n')
1122 ONE_MORE_BYTE (c);
1123 }
1124 else if (EQ (eol_type, Qmac))
1125 c = '\n';
aa72b389 1126 }
aa72b389 1127 }
df7492f9 1128 else
aa72b389 1129 {
df7492f9
KH
1130 ONE_MORE_BYTE (c2);
1131 if (! UTF_8_EXTRA_OCTET_P (c2))
1132 goto invalid_code;
1133 if (UTF_8_2_OCTET_LEADING_P (c1))
b0edb2c5
DL
1134 {
1135 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1136 /* Reject overlong sequences here and below. Encoders
1137 producing them are incorrect, they can be misleading,
1138 and they mess up read/write invariance. */
1139 if (c < 128)
1140 goto invalid_code;
1141 }
df7492f9 1142 else
aa72b389 1143 {
df7492f9
KH
1144 ONE_MORE_BYTE (c3);
1145 if (! UTF_8_EXTRA_OCTET_P (c3))
1146 goto invalid_code;
1147 if (UTF_8_3_OCTET_LEADING_P (c1))
b0edb2c5
DL
1148 {
1149 c = (((c1 & 0xF) << 12)
1150 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1151 if (c < 0x800)
1152 goto invalid_code;
1153 }
df7492f9
KH
1154 else
1155 {
1156 ONE_MORE_BYTE (c4);
1157 if (! UTF_8_EXTRA_OCTET_P (c4))
1158 goto invalid_code;
1159 if (UTF_8_4_OCTET_LEADING_P (c1))
b0edb2c5 1160 {
df7492f9
KH
1161 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1162 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
b0edb2c5
DL
1163 if (c < 0x10000)
1164 goto invalid_code;
1165 }
df7492f9
KH
1166 else
1167 {
1168 ONE_MORE_BYTE (c5);
1169 if (! UTF_8_EXTRA_OCTET_P (c5))
1170 goto invalid_code;
1171 if (UTF_8_5_OCTET_LEADING_P (c1))
1172 {
1173 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1174 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1175 | (c5 & 0x3F));
b0edb2c5 1176 if ((c > MAX_CHAR) || (c < 0x200000))
df7492f9
KH
1177 goto invalid_code;
1178 }
1179 else
1180 goto invalid_code;
1181 }
1182 }
aa72b389 1183 }
aa72b389 1184 }
df7492f9
KH
1185
1186 *charbuf++ = c;
1187 continue;
1188
1189 invalid_code:
1190 src = src_base;
1191 consumed_chars = consumed_chars_base;
1192 ONE_MORE_BYTE (c);
1193 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1194 coding->errors++;
aa72b389
KH
1195 }
1196
df7492f9
KH
1197 no_more_source:
1198 coding->consumed_char += consumed_chars_base;
1199 coding->consumed = src_base - coding->source;
1200 coding->charbuf_used = charbuf - coding->charbuf;
1201}
1202
1203
1204static int
1205encode_coding_utf_8 (coding)
1206 struct coding_system *coding;
1207{
1208 int multibytep = coding->dst_multibyte;
1209 int *charbuf = coding->charbuf;
1210 int *charbuf_end = charbuf + coding->charbuf_used;
1211 unsigned char *dst = coding->destination + coding->produced;
1212 unsigned char *dst_end = coding->destination + coding->dst_bytes;
e19c3639 1213 int produced_chars = 0;
df7492f9
KH
1214 int c;
1215
1216 if (multibytep)
aa72b389 1217 {
df7492f9
KH
1218 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1219
1220 while (charbuf < charbuf_end)
aa72b389 1221 {
df7492f9
KH
1222 unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1223
1224 ASSURE_DESTINATION (safe_room);
1225 c = *charbuf++;
1226 CHAR_STRING_ADVANCE (c, pend);
1227 for (p = str; p < pend; p++)
1228 EMIT_ONE_BYTE (*p);
aa72b389 1229 }
aa72b389 1230 }
df7492f9
KH
1231 else
1232 {
1233 int safe_room = MAX_MULTIBYTE_LENGTH;
1234
1235 while (charbuf < charbuf_end)
1236 {
1237 ASSURE_DESTINATION (safe_room);
1238 c = *charbuf++;
1239 dst += CHAR_STRING (c, dst);
1240 produced_chars++;
1241 }
1242 }
1243 coding->result = CODING_RESULT_SUCCESS;
1244 coding->produced_char += produced_chars;
1245 coding->produced = dst - coding->destination;
1246 return 0;
aa72b389
KH
1247}
1248
4ed46869 1249
df7492f9
KH
1250/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1251 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1252 Little Endian (otherwise). If it is, return
1253 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1254 else return 0. */
1255
1256#define UTF_16_HIGH_SURROGATE_P(val) \
1257 (((val) & 0xFC00) == 0xD800)
1258
1259#define UTF_16_LOW_SURROGATE_P(val) \
1260 (((val) & 0xFC00) == 0xDC00)
1261
1262#define UTF_16_INVALID_P(val) \
1263 (((val) == 0xFFFE) \
1264 || ((val) == 0xFFFF) \
1265 || UTF_16_LOW_SURROGATE_P (val))
1266
1267
1268static int
1269detect_coding_utf_16 (coding, mask)
b73bfc1c 1270 struct coding_system *coding;
df7492f9 1271 int *mask;
b73bfc1c 1272{
df7492f9
KH
1273 unsigned char *src = coding->source, *src_base = src;
1274 unsigned char *src_end = coding->source + coding->src_bytes;
1275 int multibytep = coding->src_multibyte;
1276 int consumed_chars = 0;
1277 int c1, c2;
1278
1279 ONE_MORE_BYTE (c1);
1280 ONE_MORE_BYTE (c2);
4ed46869 1281
df7492f9 1282 if ((c1 == 0xFF) && (c2 == 0xFE))
b73bfc1c 1283 {
df7492f9
KH
1284 *mask &= CATEGORY_MASK_UTF_16_LE;
1285 return 1;
1286 }
1287 else if ((c1 == 0xFE) && (c2 == 0xFF))
1288 {
1289 *mask &= CATEGORY_MASK_UTF_16_BE;
1290 return 1;
1291 }
1292 no_more_source:
1293 return 0;
1294}
ec6d2bb8 1295
df7492f9
KH
1296static void
1297decode_coding_utf_16 (coding)
1298 struct coding_system *coding;
1299{
1300 unsigned char *src = coding->source + coding->consumed;
1301 unsigned char *src_end = coding->source + coding->src_bytes;
0be8721c 1302 unsigned char *src_base;
df7492f9
KH
1303 int *charbuf = coding->charbuf;
1304 int *charbuf_end = charbuf + coding->charbuf_size;
1305 int consumed_chars = 0, consumed_chars_base;
1306 int multibytep = coding->src_multibyte;
1307 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1308 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1309 int surrogate = CODING_UTF_16_SURROGATE (coding);
1310 Lisp_Object attr, eol_type, charset_list;
1311
1312 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1313
1314 if (bom != utf_16_without_bom)
1315 {
1316 int c, c1, c2;
4af310db 1317
df7492f9
KH
1318 src_base = src;
1319 ONE_MORE_BYTE (c1);
1320 ONE_MORE_BYTE (c2);
e19c3639 1321 c = (c1 << 8) | c2;
df7492f9
KH
1322 if (bom == utf_16_with_bom)
1323 {
1324 if (endian == utf_16_big_endian
1325 ? c != 0xFFFE : c != 0xFEFF)
4af310db 1326 {
df7492f9
KH
1327 /* We are sure that there's enouph room at CHARBUF. */
1328 *charbuf++ = c1;
1329 *charbuf++ = c2;
1330 coding->errors++;
4af310db 1331 }
4af310db 1332 }
df7492f9 1333 else
4af310db 1334 {
df7492f9
KH
1335 if (c == 0xFFFE)
1336 CODING_UTF_16_ENDIAN (coding)
1337 = endian = utf_16_big_endian;
1338 else if (c == 0xFEFF)
1339 CODING_UTF_16_ENDIAN (coding)
1340 = endian = utf_16_little_endian;
1341 else
4af310db 1342 {
df7492f9
KH
1343 CODING_UTF_16_ENDIAN (coding)
1344 = endian = utf_16_big_endian;
1345 src = src_base;
4af310db 1346 }
4af310db 1347 }
df7492f9
KH
1348 CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1349 }
1350
1351 while (1)
1352 {
1353 int c, c1, c2;
1354
1355 src_base = src;
1356 consumed_chars_base = consumed_chars;
1357
1358 if (charbuf + 2 >= charbuf_end)
1359 break;
1360
1361 ONE_MORE_BYTE (c1);
1362 ONE_MORE_BYTE (c2);
1363 c = (endian == utf_16_big_endian
e19c3639 1364 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
df7492f9 1365 if (surrogate)
aa72b389 1366 {
df7492f9 1367 if (! UTF_16_LOW_SURROGATE_P (c))
aa72b389 1368 {
df7492f9
KH
1369 if (endian == utf_16_big_endian)
1370 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1371 else
1372 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1373 *charbuf++ = c1;
1374 *charbuf++ = c2;
1375 coding->errors++;
1376 if (UTF_16_HIGH_SURROGATE_P (c))
1377 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1378 else
1379 *charbuf++ = c;
aa72b389 1380 }
df7492f9
KH
1381 else
1382 {
1383 c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1384 CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1385 *charbuf++ = c;
1386 }
1387 }
1388 else
1389 {
1390 if (UTF_16_HIGH_SURROGATE_P (c))
1391 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1392 else
1393 *charbuf++ = c;
1394 }
1395 }
1396
1397 no_more_source:
1398 coding->consumed_char += consumed_chars_base;
1399 coding->consumed = src_base - coding->source;
1400 coding->charbuf_used = charbuf - coding->charbuf;
1401}
1402
1403static int
1404encode_coding_utf_16 (coding)
1405 struct coding_system *coding;
1406{
1407 int multibytep = coding->dst_multibyte;
1408 int *charbuf = coding->charbuf;
1409 int *charbuf_end = charbuf + coding->charbuf_used;
1410 unsigned char *dst = coding->destination + coding->produced;
1411 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1412 int safe_room = 8;
1413 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1414 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1415 int produced_chars = 0;
1416 Lisp_Object attrs, eol_type, charset_list;
1417 int c;
1418
1419 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1420
1421 if (bom == utf_16_with_bom)
1422 {
1423 ASSURE_DESTINATION (safe_room);
1424 if (big_endian)
1425 EMIT_TWO_BYTES (0xFF, 0xFE);
1426 else
1427 EMIT_TWO_BYTES (0xFE, 0xFF);
1428 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1429 }
1430
1431 while (charbuf < charbuf_end)
1432 {
1433 ASSURE_DESTINATION (safe_room);
1434 c = *charbuf++;
e19c3639
KH
1435 if (c >= MAX_UNICODE_CHAR)
1436 c = coding->default_char;
df7492f9
KH
1437
1438 if (c < 0x10000)
1439 {
1440 if (big_endian)
1441 EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1442 else
1443 EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1444 }
1445 else
1446 {
1447 int c1, c2;
1448
1449 c -= 0x10000;
1450 c1 = (c >> 10) + 0xD800;
1451 c2 = (c & 0x3FF) + 0xDC00;
1452 if (big_endian)
1453 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1454 else
1455 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1456 }
1457 }
1458 coding->result = CODING_RESULT_SUCCESS;
1459 coding->produced = dst - coding->destination;
1460 coding->produced_char += produced_chars;
1461 return 0;
1462}
1463
1464\f
1465/*** 6. Old Emacs' internal format (emacs-mule) ***/
1466
1467/* Emacs' internal format for representation of multiple character
1468 sets is a kind of multi-byte encoding, i.e. characters are
1469 represented by variable-length sequences of one-byte codes.
1470
1471 ASCII characters and control characters (e.g. `tab', `newline') are
1472 represented by one-byte sequences which are their ASCII codes, in
1473 the range 0x00 through 0x7F.
1474
1475 8-bit characters of the range 0x80..0x9F are represented by
1476 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1477 code + 0x20).
1478
1479 8-bit characters of the range 0xA0..0xFF are represented by
1480 one-byte sequences which are their 8-bit code.
1481
1482 The other characters are represented by a sequence of `base
1483 leading-code', optional `extended leading-code', and one or two
1484 `position-code's. The length of the sequence is determined by the
1485 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1486 whereas extended leading-code and position-code take the range 0xA0
1487 through 0xFF. See `charset.h' for more details about leading-code
1488 and position-code.
1489
1490 --- CODE RANGE of Emacs' internal format ---
1491 character set range
1492 ------------- -----
1493 ascii 0x00..0x7F
1494 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1495 eight-bit-graphic 0xA0..0xBF
1496 ELSE 0x81..0x9D + [0xA0..0xFF]+
1497 ---------------------------------------------
1498
1499 As this is the internal character representation, the format is
1500 usually not used externally (i.e. in a file or in a data sent to a
1501 process). But, it is possible to have a text externally in this
1502 format (i.e. by encoding by the coding system `emacs-mule').
1503
1504 In that case, a sequence of one-byte codes has a slightly different
1505 form.
1506
1507 At first, all characters in eight-bit-control are represented by
1508 one-byte sequences which are their 8-bit code.
1509
1510 Next, character composition data are represented by the byte
1511 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1512 where,
1513 METHOD is 0xF0 plus one of composition method (enum
1514 composition_method),
1515
1516 BYTES is 0xA0 plus a byte length of this composition data,
1517
1518 CHARS is 0x20 plus a number of characters composed by this
1519 data,
1520
1521 COMPONENTs are characters of multibye form or composition
1522 rules encoded by two-byte of ASCII codes.
1523
1524 In addition, for backward compatibility, the following formats are
1525 also recognized as composition data on decoding.
1526
1527 0x80 MSEQ ...
1528 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1529
1530 Here,
1531 MSEQ is a multibyte form but in these special format:
1532 ASCII: 0xA0 ASCII_CODE+0x80,
1533 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1534 RULE is a one byte code of the range 0xA0..0xF0 that
1535 represents a composition rule.
1536 */
1537
1538char emacs_mule_bytes[256];
1539
1540/* Leading-code followed by extended leading-code. */
1541#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1542#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1543#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1544#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1545
1546
1547int
781d7a48 1548emacs_mule_char (coding, src, nbytes, nchars)
df7492f9 1549 struct coding_system *coding;
781d7a48 1550 unsigned char *src;
df7492f9
KH
1551 int *nbytes, *nchars;
1552{
df7492f9
KH
1553 unsigned char *src_end = coding->source + coding->src_bytes;
1554 int multibytep = coding->src_multibyte;
1555 unsigned char *src_base = src;
1556 struct charset *charset;
1557 unsigned code;
1558 int c;
1559 int consumed_chars = 0;
1560
1561 ONE_MORE_BYTE (c);
df7492f9
KH
1562 switch (emacs_mule_bytes[c])
1563 {
1564 case 2:
1565 if (! (charset = emacs_mule_charset[c]))
1566 goto invalid_code;
1567 ONE_MORE_BYTE (c);
1568 code = c & 0x7F;
1569 break;
1570
1571 case 3:
1572 if (c == LEADING_CODE_PRIVATE_11
1573 || c == LEADING_CODE_PRIVATE_12)
b73bfc1c 1574 {
df7492f9
KH
1575 ONE_MORE_BYTE (c);
1576 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code;
1578 ONE_MORE_BYTE (c);
1579 code = c & 0x7F;
b73bfc1c
KH
1580 }
1581 else
1582 {
df7492f9
KH
1583 if (! (charset = emacs_mule_charset[c]))
1584 goto invalid_code;
1585 ONE_MORE_BYTE (c);
781d7a48 1586 code = (c & 0x7F) << 8;
df7492f9
KH
1587 ONE_MORE_BYTE (c);
1588 code |= c & 0x7F;
1589 }
1590 break;
1591
1592 case 4:
781d7a48 1593 ONE_MORE_BYTE (c);
df7492f9
KH
1594 if (! (charset = emacs_mule_charset[c]))
1595 goto invalid_code;
1596 ONE_MORE_BYTE (c);
781d7a48 1597 code = (c & 0x7F) << 8;
df7492f9
KH
1598 ONE_MORE_BYTE (c);
1599 code |= c & 0x7F;
1600 break;
1601
1602 case 1:
1603 code = c;
1604 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) ? charset_ascii
1605 : code < 0xA0 ? charset_8_bit_control
1606 : charset_8_bit_graphic);
1607 break;
1608
1609 default:
1610 abort ();
1611 }
1612 c = DECODE_CHAR (charset, code);
1613 if (c < 0)
1614 goto invalid_code;
1615 *nbytes = src - src_base;
1616 *nchars = consumed_chars;
1617 return c;
1618
1619 no_more_source:
1620 return -2;
1621
1622 invalid_code:
1623 return -1;
1624}
1625
1626
1627/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1628 Check if a text is encoded in `emacs-mule'. */
1629
1630static int
1631detect_coding_emacs_mule (coding, mask)
1632 struct coding_system *coding;
1633 int *mask;
1634{
1635 unsigned char *src = coding->source, *src_base = src;
1636 unsigned char *src_end = coding->source + coding->src_bytes;
1637 int multibytep = coding->src_multibyte;
1638 int consumed_chars = 0;
1639 int c;
1640 int found = 0;
1641
1642 /* A coding system of this category is always ASCII compatible. */
1643 src += coding->head_ascii;
1644
1645 while (1)
1646 {
1647 ONE_MORE_BYTE (c);
1648
1649 if (c == 0x80)
1650 {
1651 /* Perhaps the start of composite character. We simple skip
1652 it because analyzing it is too heavy for detecting. But,
1653 at least, we check that the composite character
1654 constitues of more than 4 bytes. */
1655 unsigned char *src_base;
1656
1657 repeat:
1658 src_base = src;
1659 do
1660 {
1661 ONE_MORE_BYTE (c);
1662 }
1663 while (c >= 0xA0);
1664
1665 if (src - src_base <= 4)
1666 break;
1667 found = 1;
1668 if (c == 0x80)
1669 goto repeat;
b73bfc1c 1670 }
df7492f9
KH
1671
1672 if (c < 0x80)
b73bfc1c 1673 {
df7492f9
KH
1674 if (c < 0x20
1675 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1676 break;
1677 }
1678 else
1679 {
1680 unsigned char *src_base = src - 1;
1681
1682 do
1683 {
1684 ONE_MORE_BYTE (c);
1685 }
1686 while (c >= 0xA0);
1687 if (src - src_base != emacs_mule_bytes[*src_base])
1688 break;
1689 found = 1;
4ed46869
KH
1690 }
1691 }
df7492f9
KH
1692 *mask &= ~CATEGORY_MASK_EMACS_MULE;
1693 return 0;
1694
1695 no_more_source:
1696 if (!found)
1697 return 0;
1698 *mask &= CATEGORY_MASK_EMACS_MULE;
1699 return 1;
4ed46869
KH
1700}
1701
b73bfc1c 1702
df7492f9
KH
1703/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1704
1705/* Decode a character represented as a component of composition
1706 sequence of Emacs 20/21 style at SRC. Set C to that character and
1707 update SRC to the head of next character (or an encoded composition
1708 rule). If SRC doesn't points a composition component, set C to -1.
1709 If SRC points an invalid byte sequence, global exit by a return
1710 value 0. */
1711
1712#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1713 if (1) \
1714 { \
1715 int c; \
1716 int nbytes, nchars; \
1717 \
1718 if (src == src_end) \
1719 break; \
781d7a48 1720 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
df7492f9
KH
1721 if (c < 0) \
1722 { \
1723 if (c == -2) \
1724 break; \
1725 goto invalid_code; \
1726 } \
1727 *buf++ = c; \
1728 src += nbytes; \
1729 consumed_chars += nchars; \
1730 } \
1731 else
1732
1733
1734/* Decode a composition rule represented as a component of composition
781d7a48
KH
1735 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1736 and increment BUF. If SRC points an invalid byte sequence, set C
1737 to -1. */
df7492f9 1738
781d7a48 1739#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
df7492f9
KH
1740 do { \
1741 int c, gref, nref; \
1742 \
781d7a48 1743 if (src >= src_end) \
df7492f9
KH
1744 goto invalid_code; \
1745 ONE_MORE_BYTE_NO_CHECK (c); \
781d7a48 1746 c -= 0x20; \
df7492f9
KH
1747 if (c < 0 || c >= 81) \
1748 goto invalid_code; \
1749 \
1750 gref = c / 9, nref = c % 9; \
1751 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1752 } while (0)
1753
1754
781d7a48
KH
1755/* Decode a composition rule represented as a component of composition
1756 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1757 and increment BUF. If SRC points an invalid byte sequence, set C
1758 to -1. */
1759
1760#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1761 do { \
1762 int gref, nref; \
1763 \
1764 if (src + 1>= src_end) \
1765 goto invalid_code; \
1766 ONE_MORE_BYTE_NO_CHECK (gref); \
1767 gref -= 0x20; \
1768 ONE_MORE_BYTE_NO_CHECK (nref); \
1769 nref -= 0x20; \
1770 if (gref < 0 || gref >= 81 \
1771 || nref < 0 || nref >= 81) \
1772 goto invalid_code; \
1773 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1774 } while (0)
1775
1776
df7492f9
KH
1777#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1778 do { \
1779 *buf++ = -5; \
1780 *buf++ = coding->produced_char + char_offset; \
1781 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1782 *buf++ = method; \
1783 *buf++ = nchars; \
1784 } while (0)
aa72b389 1785
df7492f9
KH
1786
1787#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
aa72b389 1788 do { \
df7492f9 1789 /* Emacs 21 style format. The first three bytes at SRC are \
781d7a48 1790 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
df7492f9
KH
1791 the byte length of this composition information, CHARS is the \
1792 number of characters composed by this composition. */ \
781d7a48
KH
1793 enum composition_method method = c - 0xF2; \
1794 int *charbuf_base = charbuf; \
df7492f9
KH
1795 int consumed_chars_limit; \
1796 int nbytes, nchars; \
1797 \
1798 ONE_MORE_BYTE (c); \
1799 nbytes = c - 0xA0; \
1800 if (nbytes < 3) \
1801 goto invalid_code; \
1802 ONE_MORE_BYTE (c); \
1803 nchars = c - 0xA0; \
1804 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1805 consumed_chars_limit = consumed_chars_base + nbytes; \
1806 if (method != COMPOSITION_RELATIVE) \
aa72b389 1807 { \
df7492f9
KH
1808 int i = 0; \
1809 while (consumed_chars < consumed_chars_limit) \
aa72b389 1810 { \
df7492f9 1811 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
781d7a48 1812 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
df7492f9
KH
1813 else \
1814 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
781d7a48 1815 i++; \
aa72b389 1816 } \
df7492f9
KH
1817 if (consumed_chars < consumed_chars_limit) \
1818 goto invalid_code; \
781d7a48 1819 charbuf_base[0] -= i; \
aa72b389
KH
1820 } \
1821 } while (0)
93dec019 1822
aa72b389 1823
df7492f9
KH
1824#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1825 do { \
1826 /* Emacs 20 style format for relative composition. */ \
1827 /* Store multibyte form of characters to be composed. */ \
1828 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1829 int *buf = components; \
1830 int i, j; \
1831 \
1832 src = src_base; \
1833 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1834 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1835 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1836 if (i < 2) \
1837 goto invalid_code; \
1838 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1839 for (j = 0; j < i; j++) \
1840 *charbuf++ = components[j]; \
1841 } while (0)
1842
1843
1844#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1845 do { \
1846 /* Emacs 20 style format for rule-base composition. */ \
1847 /* Store multibyte form of characters to be composed. */ \
1848 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1849 int *buf = components; \
1850 int i, j; \
1851 \
1852 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1853 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1854 { \
781d7a48 1855 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
df7492f9
KH
1856 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1857 } \
1858 if (i < 1 || (buf - components) % 2 == 0) \
1859 goto invalid_code; \
1860 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1861 goto no_more_source; \
1862 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1863 for (j = 0; j < i; j++) \
1864 *charbuf++ = components[j]; \
1865 for (j = 0; j < i; j += 2) \
1866 *charbuf++ = components[j]; \
1867 } while (0)
1868
aa72b389
KH
1869
1870static void
df7492f9 1871decode_coding_emacs_mule (coding)
aa72b389 1872 struct coding_system *coding;
aa72b389 1873{
df7492f9
KH
1874 unsigned char *src = coding->source + coding->consumed;
1875 unsigned char *src_end = coding->source + coding->src_bytes;
aa72b389 1876 unsigned char *src_base;
df7492f9
KH
1877 int *charbuf = coding->charbuf;
1878 int *charbuf_end = charbuf + coding->charbuf_size;
1879 int consumed_chars = 0, consumed_chars_base;
1880 int char_offset = 0;
1881 int multibytep = coding->src_multibyte;
1882 Lisp_Object attrs, eol_type, charset_list;
aa72b389 1883
df7492f9 1884 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
aa72b389 1885
aa72b389
KH
1886 while (1)
1887 {
df7492f9
KH
1888 int c;
1889
aa72b389 1890 src_base = src;
df7492f9
KH
1891 consumed_chars_base = consumed_chars;
1892
1893 if (charbuf >= charbuf_end)
1894 break;
aa72b389 1895
df7492f9
KH
1896 ONE_MORE_BYTE (c);
1897
1898 if (c < 0x80)
aa72b389 1899 {
df7492f9
KH
1900 if (c == '\r')
1901 {
1902 if (EQ (eol_type, Qdos))
1903 {
1904 if (src == src_end)
1905 goto no_more_source;
1906 if (*src == '\n')
1907 ONE_MORE_BYTE (c);
1908 }
1909 else if (EQ (eol_type, Qmac))
1910 c = '\n';
1911 }
1912 *charbuf++ = c;
1913 char_offset++;
aa72b389 1914 }
df7492f9
KH
1915 else if (c == 0x80)
1916 {
1917 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1918 break;
1919 ONE_MORE_BYTE (c);
781d7a48
KH
1920 if (c - 0xF2 >= COMPOSITION_RELATIVE
1921 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
df7492f9
KH
1922 DECODE_EMACS_MULE_21_COMPOSITION (c);
1923 else if (c < 0xC0)
1924 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1925 else if (c == 0xFF)
1926 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1927 else
1928 goto invalid_code;
781d7a48 1929 coding->annotated = 1;
df7492f9
KH
1930 }
1931 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1932 {
1933 int nbytes, nchars;
781d7a48
KH
1934 src = src_base;
1935 consumed_chars = consumed_chars_base;
1936 c = emacs_mule_char (coding, src, &nbytes, &nchars);
df7492f9
KH
1937 if (c < 0)
1938 {
1939 if (c == -2)
1940 break;
1941 goto invalid_code;
1942 }
1943 *charbuf++ = c;
781d7a48
KH
1944 src += nbytes;
1945 consumed_chars += nchars;
df7492f9
KH
1946 char_offset++;
1947 }
1948 continue;
1949
1950 invalid_code:
1951 src = src_base;
1952 consumed_chars = consumed_chars_base;
1953 ONE_MORE_BYTE (c);
1954 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1955 coding->errors++;
1956 }
1957
1958 no_more_source:
1959 coding->consumed_char += consumed_chars_base;
1960 coding->consumed = src_base - coding->source;
1961 coding->charbuf_used = charbuf - coding->charbuf;
1962}
1963
1964
1965#define EMACS_MULE_LEADING_CODES(id, codes) \
1966 do { \
1967 if (id < 0xA0) \
1968 codes[0] = id, codes[1] = 0; \
1969 else if (id < 0xE0) \
1970 codes[0] = 0x9A, codes[1] = id; \
1971 else if (id < 0xF0) \
1972 codes[0] = 0x9B, codes[1] = id; \
1973 else if (id < 0xF5) \
1974 codes[0] = 0x9C, codes[1] = id; \
1975 else \
1976 codes[0] = 0x9D, codes[1] = id; \
1977 } while (0);
1978
aa72b389 1979
df7492f9
KH
1980static int
1981encode_coding_emacs_mule (coding)
1982 struct coding_system *coding;
1983{
1984 int multibytep = coding->dst_multibyte;
1985 int *charbuf = coding->charbuf;
1986 int *charbuf_end = charbuf + coding->charbuf_used;
1987 unsigned char *dst = coding->destination + coding->produced;
1988 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1989 int safe_room = 8;
df7492f9
KH
1990 int produced_chars = 0;
1991 Lisp_Object attrs, eol_type, charset_list;
1992 int c;
1993
1994 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1995
1996 while (charbuf < charbuf_end)
1997 {
1998 ASSURE_DESTINATION (safe_room);
1999 c = *charbuf++;
2000 if (ASCII_CHAR_P (c))
2001 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
2002 else if (CHAR_BYTE8_P (c))
2003 {
2004 c = CHAR_TO_BYTE8 (c);
2005 EMIT_ONE_BYTE (c);
2006 }
df7492f9 2007 else
aa72b389 2008 {
df7492f9
KH
2009 struct charset *charset;
2010 unsigned code;
2011 int dimension;
2012 int emacs_mule_id;
2013 unsigned char leading_codes[2];
2014
2015 charset = char_charset (c, charset_list, &code);
2016 if (! charset)
2017 {
2018 c = coding->default_char;
2019 if (ASCII_CHAR_P (c))
2020 {
2021 EMIT_ONE_ASCII_BYTE (c);
2022 continue;
2023 }
2024 charset = char_charset (c, charset_list, &code);
2025 }
2026 dimension = CHARSET_DIMENSION (charset);
2027 emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2028 EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2029 EMIT_ONE_BYTE (leading_codes[0]);
2030 if (leading_codes[1])
2031 EMIT_ONE_BYTE (leading_codes[1]);
2032 if (dimension == 1)
2033 EMIT_ONE_BYTE (code);
aa72b389 2034 else
df7492f9
KH
2035 {
2036 EMIT_ONE_BYTE (code >> 8);
2037 EMIT_ONE_BYTE (code & 0xFF);
2038 }
aa72b389 2039 }
aa72b389 2040 }
df7492f9
KH
2041 coding->result = CODING_RESULT_SUCCESS;
2042 coding->produced_char += produced_chars;
2043 coding->produced = dst - coding->destination;
2044 return 0;
aa72b389 2045}
b73bfc1c 2046
4ed46869 2047\f
df7492f9 2048/*** 7. ISO2022 handlers ***/
4ed46869
KH
2049
2050/* The following note describes the coding system ISO2022 briefly.
39787efd 2051 Since the intention of this note is to help understand the
5a936b46 2052 functions in this file, some parts are NOT ACCURATE or are OVERLY
39787efd 2053 SIMPLIFIED. For thorough understanding, please refer to the
5a936b46
DL
2054 original document of ISO2022. This is equivalent to the standard
2055 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
4ed46869
KH
2056
2057 ISO2022 provides many mechanisms to encode several character sets
5a936b46 2058 in 7-bit and 8-bit environments. For 7-bit environments, all text
39787efd
KH
2059 is encoded using bytes less than 128. This may make the encoded
2060 text a little bit longer, but the text passes more easily through
5a936b46
DL
2061 several types of gateway, some of which strip off the MSB (Most
2062 Significant Bit).
b73bfc1c 2063
5a936b46
DL
2064 There are two kinds of character sets: control character sets and
2065 graphic character sets. The former contain control characters such
4ed46869 2066 as `newline' and `escape' to provide control functions (control
39787efd 2067 functions are also provided by escape sequences). The latter
5a936b46 2068 contain graphic characters such as 'A' and '-'. Emacs recognizes
4ed46869
KH
2069 two control character sets and many graphic character sets.
2070
2071 Graphic character sets are classified into one of the following
39787efd
KH
2072 four classes, according to the number of bytes (DIMENSION) and
2073 number of characters in one dimension (CHARS) of the set:
2074 - DIMENSION1_CHARS94
2075 - DIMENSION1_CHARS96
2076 - DIMENSION2_CHARS94
2077 - DIMENSION2_CHARS96
2078
2079 In addition, each character set is assigned an identification tag,
5a936b46 2080 unique for each set, called the "final character" (denoted as <F>
39787efd
KH
2081 hereafter). The <F> of each character set is decided by ECMA(*)
2082 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2083 (0x30..0x3F are for private use only).
4ed46869
KH
2084
2085 Note (*): ECMA = European Computer Manufacturers Association
2086
5a936b46 2087 Here are examples of graphic character sets [NAME(<F>)]:
4ed46869
KH
2088 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2089 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2090 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2091 o DIMENSION2_CHARS96 -- none for the moment
2092
39787efd 2093 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
4ed46869
KH
2094 C0 [0x00..0x1F] -- control character plane 0
2095 GL [0x20..0x7F] -- graphic character plane 0
2096 C1 [0x80..0x9F] -- control character plane 1
2097 GR [0xA0..0xFF] -- graphic character plane 1
2098
2099 A control character set is directly designated and invoked to C0 or
39787efd
KH
2100 C1 by an escape sequence. The most common case is that:
2101 - ISO646's control character set is designated/invoked to C0, and
2102 - ISO6429's control character set is designated/invoked to C1,
2103 and usually these designations/invocations are omitted in encoded
2104 text. In a 7-bit environment, only C0 can be used, and a control
2105 character for C1 is encoded by an appropriate escape sequence to
2106 fit into the environment. All control characters for C1 are
2107 defined to have corresponding escape sequences.
4ed46869
KH
2108
2109 A graphic character set is at first designated to one of four
2110 graphic registers (G0 through G3), then these graphic registers are
2111 invoked to GL or GR. These designations and invocations can be
2112 done independently. The most common case is that G0 is invoked to
39787efd
KH
2113 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2114 these invocations and designations are omitted in encoded text.
2115 In a 7-bit environment, only GL can be used.
4ed46869 2116
39787efd
KH
2117 When a graphic character set of CHARS94 is invoked to GL, codes
2118 0x20 and 0x7F of the GL area work as control characters SPACE and
2119 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2120 be used.
4ed46869
KH
2121
2122 There are two ways of invocation: locking-shift and single-shift.
2123 With locking-shift, the invocation lasts until the next different
39787efd
KH
2124 invocation, whereas with single-shift, the invocation affects the
2125 following character only and doesn't affect the locking-shift
2126 state. Invocations are done by the following control characters or
2127 escape sequences:
4ed46869
KH
2128
2129 ----------------------------------------------------------------------
39787efd 2130 abbrev function cntrl escape seq description
4ed46869 2131 ----------------------------------------------------------------------
39787efd
KH
2132 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2133 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2134 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2135 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2136 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2137 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2138 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2139 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2140 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
4ed46869 2141 ----------------------------------------------------------------------
39787efd
KH
2142 (*) These are not used by any known coding system.
2143
2144 Control characters for these functions are defined by macros
2145 ISO_CODE_XXX in `coding.h'.
4ed46869 2146
39787efd 2147 Designations are done by the following escape sequences:
4ed46869
KH
2148 ----------------------------------------------------------------------
2149 escape sequence description
2150 ----------------------------------------------------------------------
2151 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2152 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2153 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2154 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2155 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2156 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2157 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2158 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2159 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2160 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2161 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2162 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2163 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2164 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2165 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2166 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2167 ----------------------------------------------------------------------
2168
2169 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
39787efd 2170 of dimension 1, chars 94, and final character <F>, etc...
4ed46869
KH
2171
2172 Note (*): Although these designations are not allowed in ISO2022,
2173 Emacs accepts them on decoding, and produces them on encoding
39787efd 2174 CHARS96 character sets in a coding system which is characterized as
4ed46869
KH
2175 7-bit environment, non-locking-shift, and non-single-shift.
2176
2177 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
df7492f9 2178 '(' must be omitted. We refer to this as "short-form" hereafter.
4ed46869 2179
5a936b46 2180 Now you may notice that there are a lot of ways of encoding the
39787efd
KH
2181 same multilingual text in ISO2022. Actually, there exist many
2182 coding systems such as Compound Text (used in X11's inter client
5a936b46
DL
2183 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2184 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
4ed46869
KH
2185 localized platforms), and all of these are variants of ISO2022.
2186
2187 In addition to the above, Emacs handles two more kinds of escape
2188 sequences: ISO6429's direction specification and Emacs' private
2189 sequence for specifying character composition.
2190
39787efd 2191 ISO6429's direction specification takes the following form:
4ed46869
KH
2192 o CSI ']' -- end of the current direction
2193 o CSI '0' ']' -- end of the current direction
2194 o CSI '1' ']' -- start of left-to-right text
2195 o CSI '2' ']' -- start of right-to-left text
2196 The control character CSI (0x9B: control sequence introducer) is
39787efd
KH
2197 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2198
2199 Character composition specification takes the following form:
ec6d2bb8
KH
2200 o ESC '0' -- start relative composition
2201 o ESC '1' -- end composition
2202 o ESC '2' -- start rule-base composition (*)
2203 o ESC '3' -- start relative composition with alternate chars (**)
2204 o ESC '4' -- start rule-base composition with alternate chars (**)
b73bfc1c 2205 Since these are not standard escape sequences of any ISO standard,
5a936b46 2206 the use of them with these meanings is restricted to Emacs only.
ec6d2bb8 2207
5a936b46
DL
2208 (*) This form is used only in Emacs 20.7 and older versions,
2209 but newer versions can safely decode it.
2210 (**) This form is used only in Emacs 21.1 and newer versions,
2211 and older versions can't decode it.
ec6d2bb8 2212
5a936b46 2213 Here's a list of example usages of these composition escape
b73bfc1c 2214 sequences (categorized by `enum composition_method').
ec6d2bb8 2215
b73bfc1c 2216 COMPOSITION_RELATIVE:
ec6d2bb8 2217 ESC 0 CHAR [ CHAR ] ESC 1
5a936b46 2218 COMPOSITION_WITH_RULE:
ec6d2bb8 2219 ESC 2 CHAR [ RULE CHAR ] ESC 1
b73bfc1c 2220 COMPOSITION_WITH_ALTCHARS:
ec6d2bb8 2221 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
b73bfc1c 2222 COMPOSITION_WITH_RULE_ALTCHARS:
ec6d2bb8 2223 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
4ed46869
KH
2224
2225enum iso_code_class_type iso_code_class[256];
2226
df7492f9
KH
2227#define SAFE_CHARSET_P(coding, id) \
2228 ((id) <= (coding)->max_charset_id \
2229 && (coding)->safe_charsets[id] >= 0)
2230
2231
2232#define SHIFT_OUT_OK(category) \
2233 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2234
2235static void
f0064e1f
DL
2236setup_iso_safe_charsets (attrs)
2237 Lisp_Object attrs;
df7492f9
KH
2238{
2239 Lisp_Object charset_list, safe_charsets;
2240 Lisp_Object request;
2241 Lisp_Object reg_usage;
2242 Lisp_Object tail;
2243 int reg94, reg96;
2244 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2245 int max_charset_id;
2246
2247 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2248 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2249 && ! EQ (charset_list, Viso_2022_charset_list))
2250 {
2251 CODING_ATTR_CHARSET_LIST (attrs)
2252 = charset_list = Viso_2022_charset_list;
2253 ASET (attrs, coding_attr_safe_charsets, Qnil);
2254 }
2255
2256 if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2257 return;
2258
2259 max_charset_id = 0;
2260 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2261 {
2262 int id = XINT (XCAR (tail));
2263 if (max_charset_id < id)
2264 max_charset_id = id;
2265 }
2266
2267 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2268 make_number (255));
2269 request = AREF (attrs, coding_attr_iso_request);
2270 reg_usage = AREF (attrs, coding_attr_iso_usage);
2271 reg94 = XINT (XCAR (reg_usage));
2272 reg96 = XINT (XCDR (reg_usage));
2273
2274 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2275 {
2276 Lisp_Object id;
2277 Lisp_Object reg;
2278 struct charset *charset;
2279
2280 id = XCAR (tail);
2281 charset = CHARSET_FROM_ID (XINT (id));
bf16eb23 2282 reg = Fcdr (Fassq (id, request));
df7492f9
KH
2283 if (! NILP (reg))
2284 XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2285 else if (charset->iso_chars_96)
2286 {
2287 if (reg96 < 4)
2288 XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2289 }
2290 else
2291 {
2292 if (reg94 < 4)
2293 XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2294 }
2295 }
2296 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2297}
d46c5b12 2298
d46c5b12 2299
4ed46869 2300/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
df7492f9 2301 Check if a text is encoded in ISO2022. If it is, returns an
4ed46869 2302 integer in which appropriate flag bits any of:
df7492f9
KH
2303 CATEGORY_MASK_ISO_7
2304 CATEGORY_MASK_ISO_7_TIGHT
2305 CATEGORY_MASK_ISO_8_1
2306 CATEGORY_MASK_ISO_8_2
2307 CATEGORY_MASK_ISO_7_ELSE
2308 CATEGORY_MASK_ISO_8_ELSE
4ed46869
KH
2309 are set. If a code which should never appear in ISO2022 is found,
2310 returns 0. */
2311
0a28aafb 2312static int
df7492f9
KH
2313detect_coding_iso_2022 (coding, mask)
2314 struct coding_system *coding;
2315 int *mask;
4ed46869 2316{
df7492f9
KH
2317 unsigned char *src = coding->source, *src_base = src;
2318 unsigned char *src_end = coding->source + coding->src_bytes;
2319 int multibytep = coding->src_multibyte;
2320 int mask_iso = CATEGORY_MASK_ISO;
2321 int mask_found = 0, mask_8bit_found = 0;
f46869e4 2322 int reg[4], shift_out = 0, single_shifting = 0;
df7492f9
KH
2323 int id;
2324 int c, c1;
2325 int consumed_chars = 0;
2326 int i;
2327
2328 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2329 {
2330 struct coding_system *this = &(coding_categories[i]);
2331 Lisp_Object attrs, val;
2332
2333 attrs = CODING_ID_ATTRS (this->id);
2334 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2335 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2336 setup_iso_safe_charsets (attrs);
2337 val = CODING_ATTR_SAFE_CHARSETS (attrs);
2338 this->max_charset_id = XSTRING (val)->size - 1;
2339 this->safe_charsets = (char *) XSTRING (val)->data;
2340 }
2341
2342 /* A coding system of this category is always ASCII compatible. */
2343 src += coding->head_ascii;
3f003981 2344
df7492f9
KH
2345 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2346 while (mask_iso && src < src_end)
4ed46869 2347 {
df7492f9 2348 ONE_MORE_BYTE (c);
4ed46869
KH
2349 switch (c)
2350 {
2351 case ISO_CODE_ESC:
74383408
KH
2352 if (inhibit_iso_escape_detection)
2353 break;
f46869e4 2354 single_shifting = 0;
df7492f9 2355 ONE_MORE_BYTE (c);
d46c5b12 2356 if (c >= '(' && c <= '/')
4ed46869 2357 {
bf9cdd4e 2358 /* Designation sequence for a charset of dimension 1. */
df7492f9 2359 ONE_MORE_BYTE (c1);
d46c5b12 2360 if (c1 < ' ' || c1 >= 0x80
df7492f9 2361 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
d46c5b12
KH
2362 /* Invalid designation sequence. Just ignore. */
2363 break;
df7492f9 2364 reg[(c - '(') % 4] = id;
bf9cdd4e
KH
2365 }
2366 else if (c == '$')
2367 {
2368 /* Designation sequence for a charset of dimension 2. */
df7492f9 2369 ONE_MORE_BYTE (c);
bf9cdd4e
KH
2370 if (c >= '@' && c <= 'B')
2371 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
df7492f9 2372 reg[0] = id = iso_charset_table[1][0][c];
bf9cdd4e 2373 else if (c >= '(' && c <= '/')
bcf26d6a 2374 {
df7492f9 2375 ONE_MORE_BYTE (c1);
d46c5b12 2376 if (c1 < ' ' || c1 >= 0x80
df7492f9 2377 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
d46c5b12
KH
2378 /* Invalid designation sequence. Just ignore. */
2379 break;
df7492f9 2380 reg[(c - '(') % 4] = id;
bcf26d6a 2381 }
bf9cdd4e 2382 else
d46c5b12
KH
2383 /* Invalid designation sequence. Just ignore. */
2384 break;
2385 }
ae9ff118 2386 else if (c == 'N' || c == 'O')
d46c5b12 2387 {
ae9ff118 2388 /* ESC <Fe> for SS2 or SS3. */
df7492f9 2389 mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
d46c5b12 2390 break;
4ed46869 2391 }
ec6d2bb8
KH
2392 else if (c >= '0' && c <= '4')
2393 {
2394 /* ESC <Fp> for start/end composition. */
df7492f9 2395 mask_found |= CATEGORY_MASK_ISO;
ec6d2bb8
KH
2396 break;
2397 }
bf9cdd4e 2398 else
df7492f9
KH
2399 {
2400 /* Invalid escape sequence. */
2401 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2402 break;
2403 }
d46c5b12
KH
2404
2405 /* We found a valid designation sequence for CHARSET. */
df7492f9
KH
2406 mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2407 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2408 id))
2409 mask_found |= CATEGORY_MASK_ISO_7;
d46c5b12 2410 else
df7492f9
KH
2411 mask_iso &= ~CATEGORY_MASK_ISO_7;
2412 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2413 id))
2414 mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
d46c5b12 2415 else
df7492f9
KH
2416 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2417 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2418 id))
2419 mask_found |= CATEGORY_MASK_ISO_7_ELSE;
ae9ff118 2420 else
df7492f9
KH
2421 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2422 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2423 id))
2424 mask_found |= CATEGORY_MASK_ISO_8_ELSE;
ae9ff118 2425 else
df7492f9 2426 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
4ed46869
KH
2427 break;
2428
4ed46869 2429 case ISO_CODE_SO:
74383408
KH
2430 if (inhibit_iso_escape_detection)
2431 break;
f46869e4 2432 single_shifting = 0;
d46c5b12
KH
2433 if (shift_out == 0
2434 && (reg[1] >= 0
df7492f9
KH
2435 || SHIFT_OUT_OK (coding_category_iso_7_else)
2436 || SHIFT_OUT_OK (coding_category_iso_8_else)))
d46c5b12
KH
2437 {
2438 /* Locking shift out. */
df7492f9
KH
2439 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2440 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12 2441 }
e0e989f6 2442 break;
df7492f9 2443
d46c5b12 2444 case ISO_CODE_SI:
74383408
KH
2445 if (inhibit_iso_escape_detection)
2446 break;
f46869e4 2447 single_shifting = 0;
d46c5b12
KH
2448 if (shift_out == 1)
2449 {
2450 /* Locking shift in. */
df7492f9
KH
2451 mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2452 mask_found |= CATEGORY_MASK_ISO_ELSE;
d46c5b12
KH
2453 }
2454 break;
2455
4ed46869 2456 case ISO_CODE_CSI:
f46869e4 2457 single_shifting = 0;
4ed46869
KH
2458 case ISO_CODE_SS2:
2459 case ISO_CODE_SS3:
3f003981 2460 {
df7492f9 2461 int newmask = CATEGORY_MASK_ISO_8_ELSE;
3f003981 2462
74383408
KH
2463 if (inhibit_iso_escape_detection)
2464 break;
70c22245
KH
2465 if (c != ISO_CODE_CSI)
2466 {
df7492f9
KH
2467 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2468 & CODING_ISO_FLAG_SINGLE_SHIFT)
2469 newmask |= CATEGORY_MASK_ISO_8_1;
2470 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2471 & CODING_ISO_FLAG_SINGLE_SHIFT)
2472 newmask |= CATEGORY_MASK_ISO_8_2;
f46869e4 2473 single_shifting = 1;
70c22245 2474 }
3f003981
KH
2475 if (VECTORP (Vlatin_extra_code_table)
2476 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2477 {
df7492f9
KH
2478 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2479 & CODING_ISO_FLAG_LATIN_EXTRA)
2480 newmask |= CATEGORY_MASK_ISO_8_1;
2481 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2482 & CODING_ISO_FLAG_LATIN_EXTRA)
2483 newmask |= CATEGORY_MASK_ISO_8_2;
3f003981 2484 }
df7492f9 2485 mask_iso &= newmask;
d46c5b12 2486 mask_found |= newmask;
3f003981
KH
2487 }
2488 break;
4ed46869
KH
2489
2490 default:
2491 if (c < 0x80)
f46869e4
KH
2492 {
2493 single_shifting = 0;
2494 break;
2495 }
4ed46869 2496 else if (c < 0xA0)
c4825358 2497 {
f46869e4 2498 single_shifting = 0;
df7492f9 2499 mask_8bit_found = 1;
3f003981
KH
2500 if (VECTORP (Vlatin_extra_code_table)
2501 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
c4825358 2502 {
3f003981
KH
2503 int newmask = 0;
2504
df7492f9
KH
2505 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2506 & CODING_ISO_FLAG_LATIN_EXTRA)
2507 newmask |= CATEGORY_MASK_ISO_8_1;
2508 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2509 & CODING_ISO_FLAG_LATIN_EXTRA)
2510 newmask |= CATEGORY_MASK_ISO_8_2;
2511 mask_iso &= newmask;
d46c5b12 2512 mask_found |= newmask;
c4825358 2513 }
3f003981
KH
2514 else
2515 return 0;
c4825358 2516 }
4ed46869
KH
2517 else
2518 {
df7492f9
KH
2519 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2520 | CATEGORY_MASK_ISO_7_ELSE);
2521 mask_found |= CATEGORY_MASK_ISO_8_1;
2522 mask_8bit_found = 1;
f46869e4
KH
2523 /* Check the length of succeeding codes of the range
2524 0xA0..0FF. If the byte length is odd, we exclude
df7492f9 2525 CATEGORY_MASK_ISO_8_2. We can check this only
f46869e4 2526 when we are not single shifting. */
b73bfc1c 2527 if (!single_shifting
df7492f9 2528 && mask_iso & CATEGORY_MASK_ISO_8_2)
f46869e4 2529 {
e17de821 2530 int i = 1;
b73bfc1c
KH
2531 while (src < src_end)
2532 {
df7492f9 2533 ONE_MORE_BYTE (c);
b73bfc1c
KH
2534 if (c < 0xA0)
2535 break;
2536 i++;
2537 }
2538
2539 if (i & 1 && src < src_end)
df7492f9 2540 mask_iso &= ~CATEGORY_MASK_ISO_8_2;
f46869e4 2541 else
df7492f9 2542 mask_found |= CATEGORY_MASK_ISO_8_2;
f46869e4 2543 }
4ed46869
KH
2544 }
2545 break;
2546 }
2547 }
df7492f9
KH
2548 no_more_source:
2549 if (!mask_iso)
2550 {
2551 *mask &= ~CATEGORY_MASK_ISO;
2552 return 0;
2553 }
2554 if (!mask_found)
2555 return 0;
2556 *mask &= mask_iso & mask_found;
2557 if (! mask_8bit_found)
2558 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2559 return 1;
4ed46869
KH
2560}
2561
4ed46869
KH
2562
2563/* Set designation state into CODING. */
df7492f9
KH
2564#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2565 do { \
2566 int id, prev; \
2567 \
2568 if (final < '0' || final >= 128 \
2569 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2570 || !SAFE_CHARSET_P (coding, id)) \
2571 { \
2572 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2573 goto invalid_code; \
2574 } \
2575 prev = CODING_ISO_DESIGNATION (coding, reg); \
bf16eb23
KH
2576 if (id == charset_jisx0201_roman) \
2577 { \
2578 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2579 id = charset_ascii; \
2580 } \
2581 else if (id == charset_jisx0208_1978) \
2582 { \
2583 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2584 id = charset_jisx0208; \
2585 } \
df7492f9
KH
2586 CODING_ISO_DESIGNATION (coding, reg) = id; \
2587 /* If there was an invalid designation to REG previously, and this \
2588 designation is ASCII to REG, we should keep this designation \
2589 sequence. */ \
2590 if (prev == -2 && id == charset_ascii) \
2591 goto invalid_code; \
4ed46869
KH
2592 } while (0)
2593
d46c5b12 2594
df7492f9
KH
2595#define MAYBE_FINISH_COMPOSITION() \
2596 do { \
2597 int i; \
2598 if (composition_state == COMPOSING_NO) \
2599 break; \
2600 /* It is assured that we have enough room for producing \
2601 characters stored in the table `components'. */ \
2602 if (charbuf + component_idx > charbuf_end) \
2603 goto no_more_source; \
2604 composition_state = COMPOSING_NO; \
2605 if (method == COMPOSITION_RELATIVE \
2606 || method == COMPOSITION_WITH_ALTCHARS) \
2607 { \
2608 for (i = 0; i < component_idx; i++) \
2609 *charbuf++ = components[i]; \
2610 char_offset += component_idx; \
2611 } \
2612 else \
2613 { \
2614 for (i = 0; i < component_idx; i += 2) \
2615 *charbuf++ = components[i]; \
2616 char_offset += (component_idx / 2) + 1; \
2617 } \
2618 } while (0)
2619
d46c5b12 2620
aa72b389
KH
2621/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2622 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2623 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
df7492f9
KH
2624 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2625 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
aa72b389 2626 */
ec6d2bb8 2627
df7492f9
KH
2628#define DECODE_COMPOSITION_START(c1) \
2629 do { \
2630 if (c1 == '0' \
781d7a48 2631 && composition_state == COMPOSING_COMPONENT_RULE) \
df7492f9
KH
2632 { \
2633 component_len = component_idx; \
2634 composition_state = COMPOSING_CHAR; \
2635 } \
2636 else \
2637 { \
2638 unsigned char *p; \
2639 \
2640 MAYBE_FINISH_COMPOSITION (); \
2641 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2642 goto no_more_source; \
2643 for (p = src; p < src_end - 1; p++) \
2644 if (*p == ISO_CODE_ESC && p[1] == '1') \
2645 break; \
2646 if (p == src_end - 1) \
2647 { \
2648 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2649 goto invalid_code; \
2650 goto no_more_source; \
2651 } \
2652 \
2653 /* This is surely the start of a composition. */ \
2654 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2655 : c1 == '2' ? COMPOSITION_WITH_RULE \
2656 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2657 : COMPOSITION_WITH_RULE_ALTCHARS); \
2658 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2659 : COMPOSING_COMPONENT_CHAR); \
2660 component_idx = component_len = 0; \
2661 } \
ec6d2bb8
KH
2662 } while (0)
2663
ec6d2bb8 2664
df7492f9
KH
2665/* Handle compositoin end sequence ESC 1. */
2666
2667#define DECODE_COMPOSITION_END() \
ec6d2bb8 2668 do { \
df7492f9
KH
2669 int nchars = (component_len > 0 ? component_idx - component_len \
2670 : method == COMPOSITION_RELATIVE ? component_idx \
2671 : (component_idx + 1) / 2); \
2672 int i; \
2673 int *saved_charbuf = charbuf; \
2674 \
2675 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2676 if (method != COMPOSITION_RELATIVE) \
ec6d2bb8 2677 { \
df7492f9
KH
2678 if (component_len == 0) \
2679 for (i = 0; i < component_idx; i++) \
2680 *charbuf++ = components[i]; \
2681 else \
2682 for (i = 0; i < component_len; i++) \
2683 *charbuf++ = components[i]; \
2684 *saved_charbuf = saved_charbuf - charbuf; \
ec6d2bb8 2685 } \
df7492f9
KH
2686 if (method == COMPOSITION_WITH_RULE) \
2687 for (i = 0; i < component_idx; i += 2, char_offset++) \
2688 *charbuf++ = components[i]; \
ec6d2bb8 2689 else \
df7492f9
KH
2690 for (i = component_len; i < component_idx; i++, char_offset++) \
2691 *charbuf++ = components[i]; \
2692 coding->annotated = 1; \
2693 composition_state = COMPOSING_NO; \
ec6d2bb8
KH
2694 } while (0)
2695
df7492f9 2696
ec6d2bb8
KH
2697/* Decode a composition rule from the byte C1 (and maybe one more byte
2698 from SRC) and store one encoded composition rule in
2699 coding->cmp_data. */
2700
2701#define DECODE_COMPOSITION_RULE(c1) \
2702 do { \
ec6d2bb8
KH
2703 (c1) -= 32; \
2704 if (c1 < 81) /* old format (before ver.21) */ \
2705 { \
2706 int gref = (c1) / 9; \
2707 int nref = (c1) % 9; \
2708 if (gref == 4) gref = 10; \
2709 if (nref == 4) nref = 10; \
df7492f9 2710 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
ec6d2bb8 2711 } \
b73bfc1c 2712 else if (c1 < 93) /* new format (after ver.21) */ \
ec6d2bb8
KH
2713 { \
2714 ONE_MORE_BYTE (c2); \
df7492f9 2715 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
ec6d2bb8 2716 } \
df7492f9
KH
2717 else \
2718 c1 = 0; \
ec6d2bb8 2719 } while (0)
88993dfd 2720
d46c5b12 2721
4ed46869
KH
2722/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2723
b73bfc1c 2724static void
df7492f9 2725decode_coding_iso_2022 (coding)
4ed46869 2726 struct coding_system *coding;
4ed46869 2727{
df7492f9
KH
2728 unsigned char *src = coding->source + coding->consumed;
2729 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 2730 unsigned char *src_base;
df7492f9
KH
2731 int *charbuf = coding->charbuf;
2732 int *charbuf_end = charbuf + coding->charbuf_size - 4;
2733 int consumed_chars = 0, consumed_chars_base;
2734 int char_offset = 0;
2735 int multibytep = coding->src_multibyte;
2736 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2737 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2738 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2739 struct charset *charset;
2740 int c;
2741 /* For handling composition sequence. */
2742#define COMPOSING_NO 0
2743#define COMPOSING_CHAR 1
2744#define COMPOSING_RULE 2
2745#define COMPOSING_COMPONENT_CHAR 3
2746#define COMPOSING_COMPONENT_RULE 4
2747
2748 int composition_state = COMPOSING_NO;
2749 enum composition_method method;
2750 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2751 int component_idx;
2752 int component_len;
2753 Lisp_Object attrs, eol_type, charset_list;
2754
2755 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2756 setup_iso_safe_charsets (attrs);
b73bfc1c
KH
2757
2758 while (1)
4ed46869 2759 {
b73bfc1c
KH
2760 int c1, c2;
2761
2762 src_base = src;
df7492f9
KH
2763 consumed_chars_base = consumed_chars;
2764
2765 if (charbuf >= charbuf_end)
2766 break;
2767
b73bfc1c 2768 ONE_MORE_BYTE (c1);
4ed46869 2769
ec6d2bb8 2770 /* We produce no character or one character. */
4ed46869
KH
2771 switch (iso_code_class [c1])
2772 {
2773 case ISO_0x20_or_0x7F:
df7492f9 2774 if (composition_state != COMPOSING_NO)
ec6d2bb8 2775 {
df7492f9
KH
2776 if (composition_state == COMPOSING_RULE
2777 || composition_state == COMPOSING_COMPONENT_RULE)
2778 {
2779 DECODE_COMPOSITION_RULE (c1);
2780 components[component_idx++] = c1;
2781 composition_state--;
2782 continue;
2783 }
ec6d2bb8 2784 }
df7492f9
KH
2785 if (charset_id_0 < 0
2786 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
781d7a48
KH
2787 /* This is SPACE or DEL. */
2788 charset = CHARSET_FROM_ID (charset_ascii);
2789 else
2790 charset = CHARSET_FROM_ID (charset_id_0);
2791 break;
4ed46869
KH
2792
2793 case ISO_graphic_plane_0:
781d7a48 2794 if (composition_state != COMPOSING_NO)
b73bfc1c 2795 {
781d7a48
KH
2796 if (composition_state == COMPOSING_RULE
2797 || composition_state == COMPOSING_COMPONENT_RULE)
2798 {
2799 DECODE_COMPOSITION_RULE (c1);
2800 components[component_idx++] = c1;
2801 composition_state--;
2802 continue;
2803 }
b73bfc1c 2804 }
df7492f9 2805 charset = CHARSET_FROM_ID (charset_id_0);
4ed46869
KH
2806 break;
2807
2808 case ISO_0xA0_or_0xFF:
df7492f9
KH
2809 if (charset_id_1 < 0
2810 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2811 || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2812 goto invalid_code;
4ed46869
KH
2813 /* This is a graphic character, we fall down ... */
2814
2815 case ISO_graphic_plane_1:
df7492f9
KH
2816 if (charset_id_1 < 0)
2817 goto invalid_code;
2818 charset = CHARSET_FROM_ID (charset_id_1);
4ed46869
KH
2819 break;
2820
2821 case ISO_carriage_return:
df7492f9 2822 if (c1 == '\r')
4ed46869 2823 {
df7492f9 2824 if (EQ (eol_type, Qdos))
4ed46869 2825 {
df7492f9
KH
2826 if (src == src_end)
2827 goto no_more_source;
2828 if (*src == '\n')
2829 ONE_MORE_BYTE (c1);
4ed46869 2830 }
df7492f9
KH
2831 else if (EQ (eol_type, Qmac))
2832 c1 = '\n';
4ed46869 2833 }
df7492f9
KH
2834 /* fall through */
2835
2836 case ISO_control_0:
2837 MAYBE_FINISH_COMPOSITION ();
2838 charset = CHARSET_FROM_ID (charset_ascii);
4ed46869
KH
2839 break;
2840
df7492f9
KH
2841 case ISO_control_1:
2842 MAYBE_FINISH_COMPOSITION ();
2843 goto invalid_code;
2844
4ed46869 2845 case ISO_shift_out:
df7492f9
KH
2846 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2847 || CODING_ISO_DESIGNATION (coding, 1) < 0)
2848 goto invalid_code;
2849 CODING_ISO_INVOCATION (coding, 0) = 1;
2850 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2851 continue;
4ed46869
KH
2852
2853 case ISO_shift_in:
df7492f9
KH
2854 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2855 goto invalid_code;
2856 CODING_ISO_INVOCATION (coding, 0) = 0;
2857 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2858 continue;
4ed46869
KH
2859
2860 case ISO_single_shift_2_7:
2861 case ISO_single_shift_2:
df7492f9
KH
2862 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2863 goto invalid_code;
4ed46869
KH
2864 /* SS2 is handled as an escape sequence of ESC 'N' */
2865 c1 = 'N';
2866 goto label_escape_sequence;
2867
2868 case ISO_single_shift_3:
df7492f9
KH
2869 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2870 goto invalid_code;
4ed46869
KH
2871 /* SS2 is handled as an escape sequence of ESC 'O' */
2872 c1 = 'O';
2873 goto label_escape_sequence;
2874
2875 case ISO_control_sequence_introducer:
2876 /* CSI is handled as an escape sequence of ESC '[' ... */
2877 c1 = '[';
2878 goto label_escape_sequence;
2879
2880 case ISO_escape:
2881 ONE_MORE_BYTE (c1);
2882 label_escape_sequence:
df7492f9 2883 /* Escape sequences handled here are invocation,
4ed46869
KH
2884 designation, direction specification, and character
2885 composition specification. */
2886 switch (c1)
2887 {
2888 case '&': /* revision of following character set */
2889 ONE_MORE_BYTE (c1);
2890 if (!(c1 >= '@' && c1 <= '~'))
df7492f9 2891 goto invalid_code;
4ed46869
KH
2892 ONE_MORE_BYTE (c1);
2893 if (c1 != ISO_CODE_ESC)
df7492f9 2894 goto invalid_code;
4ed46869
KH
2895 ONE_MORE_BYTE (c1);
2896 goto label_escape_sequence;
2897
2898 case '$': /* designation of 2-byte character set */
df7492f9
KH
2899 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2900 goto invalid_code;
4ed46869
KH
2901 ONE_MORE_BYTE (c1);
2902 if (c1 >= '@' && c1 <= 'B')
2903 { /* designation of JISX0208.1978, GB2312.1980,
88993dfd 2904 or JISX0208.1980 */
df7492f9 2905 DECODE_DESIGNATION (0, 2, 0, c1);
4ed46869
KH
2906 }
2907 else if (c1 >= 0x28 && c1 <= 0x2B)
2908 { /* designation of DIMENSION2_CHARS94 character set */
2909 ONE_MORE_BYTE (c2);
df7492f9 2910 DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
4ed46869
KH
2911 }
2912 else if (c1 >= 0x2C && c1 <= 0x2F)
2913 { /* designation of DIMENSION2_CHARS96 character set */
2914 ONE_MORE_BYTE (c2);
df7492f9 2915 DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
4ed46869
KH
2916 }
2917 else
df7492f9 2918 goto invalid_code;
b73bfc1c 2919 /* We must update these variables now. */
df7492f9
KH
2920 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2921 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 2922 continue;
4ed46869
KH
2923
2924 case 'n': /* invocation of locking-shift-2 */
df7492f9
KH
2925 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2926 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2927 goto invalid_code;
2928 CODING_ISO_INVOCATION (coding, 0) = 2;
2929 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2930 continue;
4ed46869
KH
2931
2932 case 'o': /* invocation of locking-shift-3 */
df7492f9
KH
2933 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2934 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2935 goto invalid_code;
2936 CODING_ISO_INVOCATION (coding, 0) = 3;
2937 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
b73bfc1c 2938 continue;
4ed46869
KH
2939
2940 case 'N': /* invocation of single-shift-2 */
df7492f9
KH
2941 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2942 || CODING_ISO_DESIGNATION (coding, 2) < 0)
2943 goto invalid_code;
2944 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
b73bfc1c 2945 ONE_MORE_BYTE (c1);
e7046a18 2946 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2947 goto invalid_code;
4ed46869
KH
2948 break;
2949
2950 case 'O': /* invocation of single-shift-3 */
df7492f9
KH
2951 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2952 || CODING_ISO_DESIGNATION (coding, 3) < 0)
2953 goto invalid_code;
2954 charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
b73bfc1c 2955 ONE_MORE_BYTE (c1);
e7046a18 2956 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
df7492f9 2957 goto invalid_code;
4ed46869
KH
2958 break;
2959
ec6d2bb8 2960 case '0': case '2': case '3': case '4': /* start composition */
df7492f9
KH
2961 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2962 goto invalid_code;
ec6d2bb8 2963 DECODE_COMPOSITION_START (c1);
b73bfc1c 2964 continue;
4ed46869 2965
ec6d2bb8 2966 case '1': /* end composition */
df7492f9
KH
2967 if (composition_state == COMPOSING_NO)
2968 goto invalid_code;
2969 DECODE_COMPOSITION_END ();
b73bfc1c 2970 continue;
4ed46869
KH
2971
2972 case '[': /* specification of direction */
df7492f9
KH
2973 if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2974 goto invalid_code;
4ed46869 2975 /* For the moment, nested direction is not supported.
d46c5b12 2976 So, `coding->mode & CODING_MODE_DIRECTION' zero means
df7492f9 2977 left-to-right, and nozero means right-to-left. */
4ed46869
KH
2978 ONE_MORE_BYTE (c1);
2979 switch (c1)
2980 {
2981 case ']': /* end of the current direction */
d46c5b12 2982 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869
KH
2983
2984 case '0': /* end of the current direction */
2985 case '1': /* start of left-to-right direction */
2986 ONE_MORE_BYTE (c1);
2987 if (c1 == ']')
d46c5b12 2988 coding->mode &= ~CODING_MODE_DIRECTION;
4ed46869 2989 else
df7492f9 2990 goto invalid_code;
4ed46869
KH
2991 break;
2992
2993 case '2': /* start of right-to-left direction */
2994 ONE_MORE_BYTE (c1);
2995 if (c1 == ']')
d46c5b12 2996 coding->mode |= CODING_MODE_DIRECTION;
4ed46869 2997 else
df7492f9 2998 goto invalid_code;
4ed46869
KH
2999 break;
3000
3001 default:
df7492f9 3002 goto invalid_code;
4ed46869 3003 }
b73bfc1c 3004 continue;
4ed46869
KH
3005
3006 default:
df7492f9
KH
3007 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3008 goto invalid_code;
4ed46869
KH
3009 if (c1 >= 0x28 && c1 <= 0x2B)
3010 { /* designation of DIMENSION1_CHARS94 character set */
3011 ONE_MORE_BYTE (c2);
df7492f9 3012 DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
4ed46869
KH
3013 }
3014 else if (c1 >= 0x2C && c1 <= 0x2F)
3015 { /* designation of DIMENSION1_CHARS96 character set */
3016 ONE_MORE_BYTE (c2);
df7492f9 3017 DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
4ed46869
KH
3018 }
3019 else
df7492f9 3020 goto invalid_code;
b73bfc1c 3021 /* We must update these variables now. */
df7492f9
KH
3022 charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3023 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
b73bfc1c 3024 continue;
4ed46869 3025 }
b73bfc1c 3026 }
4ed46869 3027
b73bfc1c 3028 /* Now we know CHARSET and 1st position code C1 of a character.
df7492f9
KH
3029 Produce a decoded character while getting 2nd position code
3030 C2 if necessary. */
3031 c1 &= 0x7F;
3032 if (CHARSET_DIMENSION (charset) > 1)
b73bfc1c
KH
3033 {
3034 ONE_MORE_BYTE (c2);
df7492f9 3035 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
b73bfc1c 3036 /* C2 is not in a valid range. */
df7492f9
KH
3037 goto invalid_code;
3038 c1 = (c1 << 8) | (c2 & 0x7F);
3039 if (CHARSET_DIMENSION (charset) > 2)
3040 {
3041 ONE_MORE_BYTE (c2);
3042 if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3043 /* C2 is not in a valid range. */
3044 goto invalid_code;
3045 c1 = (c1 << 8) | (c2 & 0x7F);
3046 }
3047 }
3048
3049 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3050 if (c < 0)
3051 {
3052 MAYBE_FINISH_COMPOSITION ();
3053 for (; src_base < src; src_base++, char_offset++)
3054 {
3055 if (ASCII_BYTE_P (*src_base))
3056 *charbuf++ = *src_base;
3057 else
3058 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3059 }
3060 }
3061 else if (composition_state == COMPOSING_NO)
3062 {
3063 *charbuf++ = c;
3064 char_offset++;
4ed46869 3065 }
df7492f9 3066 else
781d7a48
KH
3067 {
3068 components[component_idx++] = c;
3069 if (method == COMPOSITION_WITH_RULE
3070 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3071 && composition_state == COMPOSING_COMPONENT_CHAR))
3072 composition_state++;
3073 }
4ed46869
KH
3074 continue;
3075
df7492f9
KH
3076 invalid_code:
3077 MAYBE_FINISH_COMPOSITION ();
4ed46869 3078 src = src_base;
df7492f9
KH
3079 consumed_chars = consumed_chars_base;
3080 ONE_MORE_BYTE (c);
3081 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3082 coding->errors++;
4ed46869 3083 }
fb88bf2d 3084
df7492f9
KH
3085 no_more_source:
3086 coding->consumed_char += consumed_chars_base;
3087 coding->consumed = src_base - coding->source;
3088 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
3089}
3090
b73bfc1c 3091
f4dee582 3092/* ISO2022 encoding stuff. */
4ed46869
KH
3093
3094/*
f4dee582 3095 It is not enough to say just "ISO2022" on encoding, we have to
df7492f9 3096 specify more details. In Emacs, each coding system of ISO2022
4ed46869 3097 variant has the following specifications:
df7492f9 3098 1. Initial designation to G0 thru G3.
4ed46869
KH
3099 2. Allows short-form designation?
3100 3. ASCII should be designated to G0 before control characters?
3101 4. ASCII should be designated to G0 at end of line?
3102 5. 7-bit environment or 8-bit environment?
3103 6. Use locking-shift?
3104 7. Use Single-shift?
3105 And the following two are only for Japanese:
3106 8. Use ASCII in place of JIS0201-1976-Roman?
3107 9. Use JISX0208-1983 in place of JISX0208-1978?
df7492f9
KH
3108 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3109 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
f4dee582 3110 details.
4ed46869
KH
3111*/
3112
3113/* Produce codes (escape sequence) for designating CHARSET to graphic
b73bfc1c
KH
3114 register REG at DST, and increment DST. If <final-char> of CHARSET is
3115 '@', 'A', or 'B' and the coding system CODING allows, produce
3116 designation sequence of short-form. */
4ed46869
KH
3117
3118#define ENCODE_DESIGNATION(charset, reg, coding) \
3119 do { \
df7492f9 3120 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
4ed46869
KH
3121 char *intermediate_char_94 = "()*+"; \
3122 char *intermediate_char_96 = ",-./"; \
df7492f9
KH
3123 int revision = -1; \
3124 int c; \
3125 \
3126 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
c197f191 3127 revision = CHARSET_ISO_REVISION (charset); \
df7492f9
KH
3128 \
3129 if (revision >= 0) \
70c22245 3130 { \
df7492f9
KH
3131 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3132 EMIT_ONE_BYTE ('@' + revision); \
4ed46869 3133 } \
df7492f9 3134 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4ed46869
KH
3135 if (CHARSET_DIMENSION (charset) == 1) \
3136 { \
df7492f9
KH
3137 if (! CHARSET_ISO_CHARS_96 (charset)) \
3138 c = intermediate_char_94[reg]; \
4ed46869 3139 else \
df7492f9
KH
3140 c = intermediate_char_96[reg]; \
3141 EMIT_ONE_ASCII_BYTE (c); \
4ed46869
KH
3142 } \
3143 else \
3144 { \
df7492f9
KH
3145 EMIT_ONE_ASCII_BYTE ('$'); \
3146 if (! CHARSET_ISO_CHARS_96 (charset)) \
4ed46869 3147 { \
df7492f9 3148 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
b73bfc1c
KH
3149 || reg != 0 \
3150 || final_char < '@' || final_char > 'B') \
df7492f9 3151 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
4ed46869
KH
3152 } \
3153 else \
df7492f9 3154 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
4ed46869 3155 } \
df7492f9
KH
3156 EMIT_ONE_ASCII_BYTE (final_char); \
3157 \
3158 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
4ed46869
KH
3159 } while (0)
3160
df7492f9 3161
4ed46869
KH
3162/* The following two macros produce codes (control character or escape
3163 sequence) for ISO2022 single-shift functions (single-shift-2 and
3164 single-shift-3). */
3165
df7492f9
KH
3166#define ENCODE_SINGLE_SHIFT_2 \
3167 do { \
3168 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3169 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3170 else \
3171 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3172 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3173 } while (0)
3174
df7492f9
KH
3175
3176#define ENCODE_SINGLE_SHIFT_3 \
3177 do { \
3178 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3179 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3180 else \
3181 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3182 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
4ed46869
KH
3183 } while (0)
3184
df7492f9 3185
4ed46869
KH
3186/* The following four macros produce codes (control character or
3187 escape sequence) for ISO2022 locking-shift functions (shift-in,
3188 shift-out, locking-shift-2, and locking-shift-3). */
3189
df7492f9
KH
3190#define ENCODE_SHIFT_IN \
3191 do { \
3192 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3193 CODING_ISO_INVOCATION (coding, 0) = 0; \
4ed46869
KH
3194 } while (0)
3195
df7492f9
KH
3196
3197#define ENCODE_SHIFT_OUT \
3198 do { \
3199 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3200 CODING_ISO_INVOCATION (coding, 0) = 1; \
4ed46869
KH
3201 } while (0)
3202
df7492f9
KH
3203
3204#define ENCODE_LOCKING_SHIFT_2 \
3205 do { \
3206 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3207 CODING_ISO_INVOCATION (coding, 0) = 2; \
4ed46869
KH
3208 } while (0)
3209
df7492f9
KH
3210
3211#define ENCODE_LOCKING_SHIFT_3 \
3212 do { \
3213 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3214 CODING_ISO_INVOCATION (coding, 0) = 3; \
4ed46869
KH
3215 } while (0)
3216
df7492f9 3217
f4dee582
RS
3218/* Produce codes for a DIMENSION1 character whose character set is
3219 CHARSET and whose position-code is C1. Designation and invocation
4ed46869
KH
3220 sequences are also produced in advance if necessary. */
3221
6e85d753
KH
3222#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3223 do { \
df7492f9 3224 int id = CHARSET_ID (charset); \
bf16eb23
KH
3225 \
3226 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3227 && id == charset_ascii) \
3228 { \
3229 id = charset_jisx0201_roman; \
3230 charset = CHARSET_FROM_ID (id); \
3231 } \
3232 \
df7492f9 3233 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3234 { \
df7492f9
KH
3235 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3236 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753 3237 else \
df7492f9
KH
3238 EMIT_ONE_BYTE (c1 | 0x80); \
3239 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3240 break; \
3241 } \
df7492f9 3242 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3243 { \
df7492f9 3244 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
6e85d753
KH
3245 break; \
3246 } \
df7492f9 3247 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3248 { \
df7492f9 3249 EMIT_ONE_BYTE (c1 | 0x80); \
6e85d753
KH
3250 break; \
3251 } \
6e85d753
KH
3252 else \
3253 /* Since CHARSET is not yet invoked to any graphic planes, we \
3254 must invoke it, or, at first, designate it to some graphic \
3255 register. Then repeat the loop to actually produce the \
3256 character. */ \
df7492f9
KH
3257 dst = encode_invocation_designation (charset, coding, dst, \
3258 &produced_chars); \
4ed46869
KH
3259 } while (1)
3260
df7492f9 3261
f4dee582
RS
3262/* Produce codes for a DIMENSION2 character whose character set is
3263 CHARSET and whose position-codes are C1 and C2. Designation and
4ed46869
KH
3264 invocation codes are also produced in advance if necessary. */
3265
6e85d753
KH
3266#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3267 do { \
df7492f9 3268 int id = CHARSET_ID (charset); \
bf16eb23
KH
3269 \
3270 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3271 && id == charset_jisx0208) \
3272 { \
3273 id = charset_jisx0208_1978; \
3274 charset = CHARSET_FROM_ID (id); \
3275 } \
3276 \
df7492f9 3277 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
6e85d753 3278 { \
df7492f9
KH
3279 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3280 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753 3281 else \
df7492f9
KH
3282 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3283 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
6e85d753
KH
3284 break; \
3285 } \
df7492f9 3286 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
6e85d753 3287 { \
df7492f9 3288 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
6e85d753
KH
3289 break; \
3290 } \
df7492f9 3291 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
6e85d753 3292 { \
df7492f9 3293 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
6e85d753
KH
3294 break; \
3295 } \
6e85d753
KH
3296 else \
3297 /* Since CHARSET is not yet invoked to any graphic planes, we \
3298 must invoke it, or, at first, designate it to some graphic \
3299 register. Then repeat the loop to actually produce the \
3300 character. */ \
df7492f9
KH
3301 dst = encode_invocation_designation (charset, coding, dst, \
3302 &produced_chars); \
4ed46869
KH
3303 } while (1)
3304
05e6f5dc 3305
df7492f9
KH
3306#define ENCODE_ISO_CHARACTER(charset, c) \
3307 do { \
3308 int code = ENCODE_CHAR ((charset),(c)); \
3309 \
3310 if (CHARSET_DIMENSION (charset) == 1) \
3311 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3312 else \
3313 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
84fbb8a0 3314 } while (0)
bdd9fb48 3315
05e6f5dc 3316
4ed46869 3317/* Produce designation and invocation codes at a place pointed by DST
df7492f9 3318 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4ed46869
KH
3319 Return new DST. */
3320
3321unsigned char *
df7492f9
KH
3322encode_invocation_designation (charset, coding, dst, p_nchars)
3323 struct charset *charset;
4ed46869
KH
3324 struct coding_system *coding;
3325 unsigned char *dst;
df7492f9 3326 int *p_nchars;
4ed46869 3327{
df7492f9
KH
3328 int multibytep = coding->dst_multibyte;
3329 int produced_chars = *p_nchars;
4ed46869 3330 int reg; /* graphic register number */
df7492f9 3331 int id = CHARSET_ID (charset);
4ed46869
KH
3332
3333 /* At first, check designations. */
3334 for (reg = 0; reg < 4; reg++)
df7492f9 3335 if (id == CODING_ISO_DESIGNATION (coding, reg))
4ed46869
KH
3336 break;
3337
3338 if (reg >= 4)
3339 {
3340 /* CHARSET is not yet designated to any graphic registers. */
3341 /* At first check the requested designation. */
df7492f9
KH
3342 reg = CODING_ISO_REQUEST (coding, id);
3343 if (reg < 0)
1ba9e4ab
KH
3344 /* Since CHARSET requests no special designation, designate it
3345 to graphic register 0. */
4ed46869
KH
3346 reg = 0;
3347
3348 ENCODE_DESIGNATION (charset, reg, coding);
3349 }
3350
df7492f9
KH
3351 if (CODING_ISO_INVOCATION (coding, 0) != reg
3352 && CODING_ISO_INVOCATION (coding, 1) != reg)
4ed46869
KH
3353 {
3354 /* Since the graphic register REG is not invoked to any graphic
3355 planes, invoke it to graphic plane 0. */
3356 switch (reg)
3357 {
3358 case 0: /* graphic register 0 */
3359 ENCODE_SHIFT_IN;
3360 break;
3361
3362 case 1: /* graphic register 1 */
3363 ENCODE_SHIFT_OUT;
3364 break;
3365
3366 case 2: /* graphic register 2 */
df7492f9 3367 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3368 ENCODE_SINGLE_SHIFT_2;
3369 else
3370 ENCODE_LOCKING_SHIFT_2;
3371 break;
3372
3373 case 3: /* graphic register 3 */
df7492f9 3374 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4ed46869
KH
3375 ENCODE_SINGLE_SHIFT_3;
3376 else
3377 ENCODE_LOCKING_SHIFT_3;
3378 break;
3379 }
3380 }
b73bfc1c 3381
df7492f9 3382 *p_nchars = produced_chars;
4ed46869
KH
3383 return dst;
3384}
3385
df7492f9
KH
3386/* The following three macros produce codes for indicating direction
3387 of text. */
3388#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
ec6d2bb8 3389 do { \
df7492f9
KH
3390 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3391 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
ec6d2bb8 3392 else \
df7492f9 3393 EMIT_ONE_BYTE (ISO_CODE_CSI); \
ec6d2bb8
KH
3394 } while (0)
3395
ec6d2bb8 3396
df7492f9
KH
3397#define ENCODE_DIRECTION_R2L() \
3398 do { \
3399 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3400 EMIT_TWO_ASCII_BYTES ('2', ']'); \
ec6d2bb8
KH
3401 } while (0)
3402
ec6d2bb8 3403
df7492f9 3404#define ENCODE_DIRECTION_L2R() \
ec6d2bb8 3405 do { \
df7492f9
KH
3406 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3407 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4ed46869
KH
3408 } while (0)
3409
4ed46869
KH
3410
3411/* Produce codes for designation and invocation to reset the graphic
3412 planes and registers to initial state. */
df7492f9
KH
3413#define ENCODE_RESET_PLANE_AND_REGISTER() \
3414 do { \
3415 int reg; \
3416 struct charset *charset; \
3417 \
3418 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3419 ENCODE_SHIFT_IN; \
3420 for (reg = 0; reg < 4; reg++) \
3421 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3422 && (CODING_ISO_DESIGNATION (coding, reg) \
3423 != CODING_ISO_INITIAL (coding, reg))) \
3424 { \
3425 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3426 ENCODE_DESIGNATION (charset, reg, coding); \
3427 } \
4ed46869
KH
3428 } while (0)
3429
df7492f9 3430
bdd9fb48 3431/* Produce designation sequences of charsets in the line started from
b73bfc1c 3432 SRC to a place pointed by DST, and return updated DST.
bdd9fb48
KH
3433
3434 If the current block ends before any end-of-line, we may fail to
d46c5b12
KH
3435 find all the necessary designations. */
3436
b73bfc1c 3437static unsigned char *
df7492f9 3438encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
e0e989f6 3439 struct coding_system *coding;
df7492f9
KH
3440 int *charbuf, *charbuf_end;
3441 unsigned char *dst;
e0e989f6 3442{
df7492f9 3443 struct charset *charset;
bdd9fb48
KH
3444 /* Table of charsets to be designated to each graphic register. */
3445 int r[4];
df7492f9
KH
3446 int c, found = 0, reg;
3447 int produced_chars = 0;
3448 int multibytep = coding->dst_multibyte;
3449 Lisp_Object attrs;
3450 Lisp_Object charset_list;
3451
3452 attrs = CODING_ID_ATTRS (coding->id);
3453 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3454 if (EQ (charset_list, Qiso_2022))
3455 charset_list = Viso_2022_charset_list;
bdd9fb48
KH
3456
3457 for (reg = 0; reg < 4; reg++)
3458 r[reg] = -1;
3459
b73bfc1c 3460 while (found < 4)
e0e989f6 3461 {
df7492f9
KH
3462 int id;
3463
3464 c = *charbuf++;
b73bfc1c
KH
3465 if (c == '\n')
3466 break;
df7492f9
KH
3467 charset = char_charset (c, charset_list, NULL);
3468 id = CHARSET_ID (charset);
3469 reg = CODING_ISO_REQUEST (coding, id);
3470 if (reg >= 0 && r[reg] < 0)
bdd9fb48
KH
3471 {
3472 found++;
df7492f9 3473 r[reg] = id;
bdd9fb48 3474 }
bdd9fb48
KH
3475 }
3476
3477 if (found)
3478 {
3479 for (reg = 0; reg < 4; reg++)
3480 if (r[reg] >= 0
df7492f9
KH
3481 && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3482 ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
e0e989f6 3483 }
b73bfc1c
KH
3484
3485 return dst;
e0e989f6
KH
3486}
3487
4ed46869
KH
3488/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3489
df7492f9
KH
3490static int
3491encode_coding_iso_2022 (coding)
4ed46869 3492 struct coding_system *coding;
4ed46869 3493{
df7492f9
KH
3494 int multibytep = coding->dst_multibyte;
3495 int *charbuf = coding->charbuf;
3496 int *charbuf_end = charbuf + coding->charbuf_used;
3497 unsigned char *dst = coding->destination + coding->produced;
3498 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3499 int safe_room = 16;
3500 int bol_designation
3501 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3502 && CODING_ISO_BOL (coding));
3503 int produced_chars = 0;
3504 Lisp_Object attrs, eol_type, charset_list;
3505 int ascii_compatible;
b73bfc1c 3506 int c;
05e6f5dc 3507
df7492f9 3508 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
004068e4
KH
3509 setup_iso_safe_charsets (attrs);
3510 coding->safe_charsets
3511 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
bdd9fb48 3512
df7492f9 3513 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4ed46869 3514
df7492f9 3515 while (charbuf < charbuf_end)
4ed46869 3516 {
df7492f9 3517 ASSURE_DESTINATION (safe_room);
b73bfc1c 3518
df7492f9 3519 if (bol_designation)
b73bfc1c 3520 {
df7492f9 3521 unsigned char *dst_prev = dst;
4ed46869 3522
bdd9fb48 3523 /* We have to produce designation sequences if any now. */
df7492f9
KH
3524 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3525 bol_designation = 0;
3526 /* We are sure that designation sequences are all ASCII bytes. */
3527 produced_chars += dst - dst_prev;
4ed46869 3528 }
ec6d2bb8 3529
df7492f9 3530 c = *charbuf++;
4ed46869 3531
b73bfc1c
KH
3532 /* Now encode the character C. */
3533 if (c < 0x20 || c == 0x7F)
3534 {
df7492f9
KH
3535 if (c == '\n'
3536 || (c == '\r' && EQ (eol_type, Qmac)))
19a8d9e0 3537 {
df7492f9
KH
3538 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3539 ENCODE_RESET_PLANE_AND_REGISTER ();
3540 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
b73bfc1c 3541 {
df7492f9
KH
3542 int i;
3543
3544 for (i = 0; i < 4; i++)
3545 CODING_ISO_DESIGNATION (coding, i)
3546 = CODING_ISO_INITIAL (coding, i);
b73bfc1c 3547 }
df7492f9
KH
3548 bol_designation
3549 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
19a8d9e0 3550 }
df7492f9
KH
3551 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3552 ENCODE_RESET_PLANE_AND_REGISTER ();
3553 EMIT_ONE_ASCII_BYTE (c);
4ed46869 3554 }
df7492f9 3555 else if (ASCII_CHAR_P (c))
88993dfd 3556 {
df7492f9
KH
3557 if (ascii_compatible)
3558 EMIT_ONE_ASCII_BYTE (c);
3559 else
bf16eb23
KH
3560 {
3561 struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3562 ENCODE_ISO_CHARACTER (charset, c);
3563 }
88993dfd 3564 }
16eafb5d
KH
3565 else if (CHAR_BYTE8_P (c))
3566 {
3567 c = CHAR_TO_BYTE8 (c);
3568 EMIT_ONE_BYTE (c);
3569 }
b73bfc1c 3570 else
df7492f9
KH
3571 {
3572 struct charset *charset = char_charset (c, charset_list, NULL);
b73bfc1c 3573
df7492f9
KH
3574 if (!charset)
3575 {
41cbe562
KH
3576 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3577 {
3578 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3579 charset = CHARSET_FROM_ID (charset_ascii);
3580 }
3581 else
3582 {
3583 c = coding->default_char;
3584 charset = char_charset (c, charset_list, NULL);
3585 }
df7492f9
KH
3586 }
3587 ENCODE_ISO_CHARACTER (charset, c);
3588 }
84fbb8a0 3589 }
b73bfc1c 3590
df7492f9
KH
3591 if (coding->mode & CODING_MODE_LAST_BLOCK
3592 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3593 {
3594 ASSURE_DESTINATION (safe_room);
3595 ENCODE_RESET_PLANE_AND_REGISTER ();
3596 }
3597 coding->result = CODING_RESULT_SUCCESS;
3598 CODING_ISO_BOL (coding) = bol_designation;
3599 coding->produced_char += produced_chars;
3600 coding->produced = dst - coding->destination;
3601 return 0;
4ed46869
KH
3602}
3603
3604\f
df7492f9 3605/*** 8,9. SJIS and BIG5 handlers ***/
4ed46869 3606
df7492f9 3607/* Although SJIS and BIG5 are not ISO's coding system, they are used
4ed46869
KH
3608 quite widely. So, for the moment, Emacs supports them in the bare
3609 C code. But, in the future, they may be supported only by CCL. */
3610
3611/* SJIS is a coding system encoding three character sets: ASCII, right
3612 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3613 as is. A character of charset katakana-jisx0201 is encoded by
3614 "position-code + 0x80". A character of charset japanese-jisx0208
3615 is encoded in 2-byte but two position-codes are divided and shifted
df7492f9 3616 so that it fit in the range below.
4ed46869
KH
3617
3618 --- CODE RANGE of SJIS ---
3619 (character set) (range)
3620 ASCII 0x00 .. 0x7F
df7492f9 3621 KATAKANA-JISX0201 0xA0 .. 0xDF
c28a9453 3622 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
d14d03ac 3623 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
4ed46869
KH
3624 -------------------------------
3625
3626*/
3627
3628/* BIG5 is a coding system encoding two character sets: ASCII and
3629 Big5. An ASCII character is encoded as is. Big5 is a two-byte
df7492f9 3630 character set and is encoded in two-byte.
4ed46869
KH
3631
3632 --- CODE RANGE of BIG5 ---
3633 (character set) (range)
3634 ASCII 0x00 .. 0x7F
3635 Big5 (1st byte) 0xA1 .. 0xFE
3636 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3637 --------------------------
3638
df7492f9 3639 */
4ed46869
KH
3640
3641/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3642 Check if a text is encoded in SJIS. If it is, return
df7492f9 3643 CATEGORY_MASK_SJIS, else return 0. */
4ed46869 3644
0a28aafb 3645static int
df7492f9
KH
3646detect_coding_sjis (coding, mask)
3647 struct coding_system *coding;
3648 int *mask;
4ed46869 3649{
df7492f9
KH
3650 unsigned char *src = coding->source, *src_base = src;
3651 unsigned char *src_end = coding->source + coding->src_bytes;
3652 int multibytep = coding->src_multibyte;
3653 int consumed_chars = 0;
3654 int found = 0;
b73bfc1c 3655 int c;
df7492f9
KH
3656
3657 /* A coding system of this category is always ASCII compatible. */
3658 src += coding->head_ascii;
4ed46869 3659
b73bfc1c 3660 while (1)
4ed46869 3661 {
df7492f9 3662 ONE_MORE_BYTE (c);
682169fe
KH
3663 if (c < 0x80)
3664 continue;
df7492f9 3665 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4ed46869 3666 {
df7492f9 3667 ONE_MORE_BYTE (c);
682169fe 3668 if (c < 0x40 || c == 0x7F || c > 0xFC)
df7492f9
KH
3669 break;
3670 found = 1;
4ed46869 3671 }
df7492f9
KH
3672 else if (c >= 0xA0 && c < 0xE0)
3673 found = 1;
3674 else
3675 break;
4ed46869 3676 }
df7492f9
KH
3677 *mask &= ~CATEGORY_MASK_SJIS;
3678 return 0;
3679
3680 no_more_source:
3681 if (!found)
3682 return 0;
3683 *mask &= CATEGORY_MASK_SJIS;
3684 return 1;
4ed46869
KH
3685}
3686
3687/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3688 Check if a text is encoded in BIG5. If it is, return
df7492f9 3689 CATEGORY_MASK_BIG5, else return 0. */
4ed46869 3690
0a28aafb 3691static int
df7492f9
KH
3692detect_coding_big5 (coding, mask)
3693 struct coding_system *coding;
3694 int *mask;
4ed46869 3695{
df7492f9
KH
3696 unsigned char *src = coding->source, *src_base = src;
3697 unsigned char *src_end = coding->source + coding->src_bytes;
3698 int multibytep = coding->src_multibyte;
3699 int consumed_chars = 0;
3700 int found = 0;
b73bfc1c 3701 int c;
fa42c37f 3702
df7492f9
KH
3703 /* A coding system of this category is always ASCII compatible. */
3704 src += coding->head_ascii;
fa42c37f 3705
b73bfc1c 3706 while (1)
fa42c37f 3707 {
df7492f9
KH
3708 ONE_MORE_BYTE (c);
3709 if (c < 0x80)
fa42c37f 3710 continue;
df7492f9 3711 if (c >= 0xA1)
fa42c37f 3712 {
df7492f9
KH
3713 ONE_MORE_BYTE (c);
3714 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
fa42c37f 3715 return 0;
df7492f9 3716 found = 1;
fa42c37f 3717 }
df7492f9
KH
3718 else
3719 break;
fa42c37f 3720 }
df7492f9 3721 *mask &= ~CATEGORY_MASK_BIG5;
fa42c37f 3722 return 0;
df7492f9
KH
3723
3724 no_more_source:
3725 if (!found)
3726 return 0;
3727 *mask &= CATEGORY_MASK_BIG5;
3728 return 1;
fa42c37f
KH
3729}
3730
4ed46869
KH
3731/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3732 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3733
b73bfc1c 3734static void
df7492f9 3735decode_coding_sjis (coding)
4ed46869 3736 struct coding_system *coding;
4ed46869 3737{
df7492f9
KH
3738 unsigned char *src = coding->source + coding->consumed;
3739 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 3740 unsigned char *src_base;
df7492f9
KH
3741 int *charbuf = coding->charbuf;
3742 int *charbuf_end = charbuf + coding->charbuf_size;
3743 int consumed_chars = 0, consumed_chars_base;
3744 int multibytep = coding->src_multibyte;
3745 struct charset *charset_roman, *charset_kanji, *charset_kana;
3746 Lisp_Object attrs, eol_type, charset_list, val;
a5d301df 3747
df7492f9
KH
3748 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3749
3750 val = charset_list;
3751 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3752 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3753 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
4ed46869 3754
b73bfc1c 3755 while (1)
4ed46869 3756 {
df7492f9 3757 int c, c1;
b73bfc1c
KH
3758
3759 src_base = src;
df7492f9
KH
3760 consumed_chars_base = consumed_chars;
3761
3762 if (charbuf >= charbuf_end)
3763 break;
3764
3765 ONE_MORE_BYTE (c);
b73bfc1c 3766
df7492f9 3767 if (c == '\r')
4ed46869 3768 {
df7492f9 3769 if (EQ (eol_type, Qdos))
4ed46869 3770 {
df7492f9
KH
3771 if (src == src_end)
3772 goto no_more_source;
3773 if (*src == '\n')
3774 ONE_MORE_BYTE (c);
4ed46869 3775 }
df7492f9
KH
3776 else if (EQ (eol_type, Qmac))
3777 c = '\n';
4ed46869 3778 }
54f78171 3779 else
df7492f9
KH
3780 {
3781 struct charset *charset;
3782
3783 if (c < 0x80)
3784 charset = charset_roman;
3785 else
4ed46869 3786 {
df7492f9
KH
3787 if (c >= 0xF0)
3788 goto invalid_code;
3789 if (c < 0xA0 || c >= 0xE0)
fb88bf2d 3790 {
54f78171 3791 /* SJIS -> JISX0208 */
df7492f9
KH
3792 ONE_MORE_BYTE (c1);
3793 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3794 goto invalid_code;
3795 c = (c << 8) | c1;
3796 SJIS_TO_JIS (c);
3797 charset = charset_kanji;
5e34de15 3798 }
fb88bf2d 3799 else
b73bfc1c 3800 /* SJIS -> JISX0201-Kana */
df7492f9
KH
3801 charset = charset_kana;
3802 }
3803 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3804 }
3805 *charbuf++ = c;
3806 continue;
3807
3808 invalid_code:
3809 src = src_base;
3810 consumed_chars = consumed_chars_base;
3811 ONE_MORE_BYTE (c);
3812 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3813 coding->errors++;
3814 }
3815
3816 no_more_source:
3817 coding->consumed_char += consumed_chars_base;
3818 coding->consumed = src_base - coding->source;
3819 coding->charbuf_used = charbuf - coding->charbuf;
3820}
3821
3822static void
3823decode_coding_big5 (coding)
3824 struct coding_system *coding;
3825{
3826 unsigned char *src = coding->source + coding->consumed;
3827 unsigned char *src_end = coding->source + coding->src_bytes;
3828 unsigned char *src_base;
3829 int *charbuf = coding->charbuf;
3830 int *charbuf_end = charbuf + coding->charbuf_size;
3831 int consumed_chars = 0, consumed_chars_base;
3832 int multibytep = coding->src_multibyte;
3833 struct charset *charset_roman, *charset_big5;
3834 Lisp_Object attrs, eol_type, charset_list, val;
3835
3836 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3837 val = charset_list;
3838 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3839 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3840
3841 while (1)
3842 {
3843 int c, c1;
3844
3845 src_base = src;
3846 consumed_chars_base = consumed_chars;
3847
3848 if (charbuf >= charbuf_end)
3849 break;
3850
3851 ONE_MORE_BYTE (c);
3852
3853 if (c == '\r')
3854 {
3855 if (EQ (eol_type, Qdos))
3856 {
3857 if (src == src_end)
3858 goto no_more_source;
3859 if (*src == '\n')
3860 ONE_MORE_BYTE (c);
4ed46869 3861 }
df7492f9
KH
3862 else if (EQ (eol_type, Qmac))
3863 c = '\n';
3864 }
3865 else
3866 {
3867 struct charset *charset;
3868 if (c < 0x80)
3869 charset = charset_roman;
fb88bf2d 3870 else
fb88bf2d 3871 {
54f78171 3872 /* BIG5 -> Big5 */
df7492f9
KH
3873 if (c < 0xA1 || c > 0xFE)
3874 goto invalid_code;
3875 ONE_MORE_BYTE (c1);
3876 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3877 goto invalid_code;
3878 c = c << 8 | c1;
3879 charset = charset_big5;
4ed46869 3880 }
df7492f9 3881 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4ed46869 3882 }
4ed46869 3883
df7492f9 3884 *charbuf++ = c;
fb88bf2d
KH
3885 continue;
3886
df7492f9 3887 invalid_code:
4ed46869 3888 src = src_base;
df7492f9
KH
3889 consumed_chars = consumed_chars_base;
3890 ONE_MORE_BYTE (c);
3891 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3892 coding->errors++;
fb88bf2d 3893 }
d46c5b12 3894
df7492f9
KH
3895 no_more_source:
3896 coding->consumed_char += consumed_chars_base;
3897 coding->consumed = src_base - coding->source;
3898 coding->charbuf_used = charbuf - coding->charbuf;
3899}
3900
3901/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3902 This function can encode charsets `ascii', `katakana-jisx0201',
3903 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3904 are sure that all these charsets are registered as official charset
3905 (i.e. do not have extended leading-codes). Characters of other
3906 charsets are produced without any encoding. If SJIS_P is 1, encode
3907 SJIS text, else encode BIG5 text. */
3908
3909static int
3910encode_coding_sjis (coding)
3911 struct coding_system *coding;
3912{
3913 int multibytep = coding->dst_multibyte;
3914 int *charbuf = coding->charbuf;
3915 int *charbuf_end = charbuf + coding->charbuf_used;
3916 unsigned char *dst = coding->destination + coding->produced;
3917 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3918 int safe_room = 4;
3919 int produced_chars = 0;
3920 Lisp_Object attrs, eol_type, charset_list, val;
3921 int ascii_compatible;
3922 struct charset *charset_roman, *charset_kanji, *charset_kana;
3923 int c;
3924
3925 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3926 val = charset_list;
3927 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3928 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3929 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3930
3931 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3932
3933 while (charbuf < charbuf_end)
3934 {
3935 ASSURE_DESTINATION (safe_room);
3936 c = *charbuf++;
3937 /* Now encode the character C. */
3938 if (ASCII_CHAR_P (c) && ascii_compatible)
3939 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
3940 else if (CHAR_BYTE8_P (c))
3941 {
3942 c = CHAR_TO_BYTE8 (c);
3943 EMIT_ONE_BYTE (c);
3944 }
df7492f9
KH
3945 else
3946 {
3947 unsigned code;
3948 struct charset *charset = char_charset (c, charset_list, &code);
3949
3950 if (!charset)
3951 {
41cbe562
KH
3952 if (coding->mode & CODING_MODE_SAFE_ENCODING)
3953 {
3954 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3955 charset = CHARSET_FROM_ID (charset_ascii);
3956 }
3957 else
3958 {
3959 c = coding->default_char;
3960 charset = char_charset (c, charset_list, &code);
3961 }
df7492f9
KH
3962 }
3963 if (code == CHARSET_INVALID_CODE (charset))
3964 abort ();
3965 if (charset == charset_kanji)
3966 {
3967 int c1, c2;
3968 JIS_TO_SJIS (code);
3969 c1 = code >> 8, c2 = code & 0xFF;
3970 EMIT_TWO_BYTES (c1, c2);
3971 }
3972 else if (charset == charset_kana)
3973 EMIT_ONE_BYTE (code | 0x80);
3974 else
3975 EMIT_ONE_ASCII_BYTE (code & 0x7F);
3976 }
3977 }
3978 coding->result = CODING_RESULT_SUCCESS;
3979 coding->produced_char += produced_chars;
3980 coding->produced = dst - coding->destination;
3981 return 0;
3982}
3983
3984static int
3985encode_coding_big5 (coding)
3986 struct coding_system *coding;
3987{
3988 int multibytep = coding->dst_multibyte;
3989 int *charbuf = coding->charbuf;
3990 int *charbuf_end = charbuf + coding->charbuf_used;
3991 unsigned char *dst = coding->destination + coding->produced;
3992 unsigned char *dst_end = coding->destination + coding->dst_bytes;
3993 int safe_room = 4;
3994 int produced_chars = 0;
3995 Lisp_Object attrs, eol_type, charset_list, val;
3996 int ascii_compatible;
3997 struct charset *charset_roman, *charset_big5;
3998 int c;
3999
4000 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4001 val = charset_list;
4002 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4003 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4004 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4005
4006 while (charbuf < charbuf_end)
4007 {
4008 ASSURE_DESTINATION (safe_room);
4009 c = *charbuf++;
4010 /* Now encode the character C. */
4011 if (ASCII_CHAR_P (c) && ascii_compatible)
4012 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4013 else if (CHAR_BYTE8_P (c))
4014 {
4015 c = CHAR_TO_BYTE8 (c);
4016 EMIT_ONE_BYTE (c);
4017 }
df7492f9
KH
4018 else
4019 {
4020 unsigned code;
4021 struct charset *charset = char_charset (c, charset_list, &code);
4022
4023 if (! charset)
4024 {
41cbe562
KH
4025 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4026 {
4027 code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4028 charset = CHARSET_FROM_ID (charset_ascii);
4029 }
4030 else
4031 {
4032 c = coding->default_char;
4033 charset = char_charset (c, charset_list, &code);
4034 }
df7492f9
KH
4035 }
4036 if (code == CHARSET_INVALID_CODE (charset))
4037 abort ();
4038 if (charset == charset_big5)
4039 {
4040 int c1, c2;
4041
4042 c1 = code >> 8, c2 = code & 0xFF;
4043 EMIT_TWO_BYTES (c1, c2);
4044 }
4045 else
4046 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4047 }
4048 }
4049 coding->result = CODING_RESULT_SUCCESS;
4050 coding->produced_char += produced_chars;
4051 coding->produced = dst - coding->destination;
4052 return 0;
4053}
4054
4055\f
4056/*** 10. CCL handlers ***/
4057
4058/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4059 Check if a text is encoded in a coding system of which
4060 encoder/decoder are written in CCL program. If it is, return
4061 CATEGORY_MASK_CCL, else return 0. */
4062
4063static int
4064detect_coding_ccl (coding, mask)
4065 struct coding_system *coding;
4066 int *mask;
4067{
4068 unsigned char *src = coding->source, *src_base = src;
4069 unsigned char *src_end = coding->source + coding->src_bytes;
4070 int multibytep = coding->src_multibyte;
4071 int consumed_chars = 0;
4072 int found = 0;
4073 unsigned char *valids = CODING_CCL_VALIDS (coding);
4074 int head_ascii = coding->head_ascii;
4075 Lisp_Object attrs;
4076
4077 coding = &coding_categories[coding_category_ccl];
4078 attrs = CODING_ID_ATTRS (coding->id);
4079 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4080 src += head_ascii;
4081
4082 while (1)
4083 {
4084 int c;
4085 ONE_MORE_BYTE (c);
4086 if (! valids[c])
4087 break;
4088 if (!found && valids[c] > 1)
4089 found = 1;
4090 }
4091 *mask &= ~CATEGORY_MASK_CCL;
4092 return 0;
4093
4094 no_more_source:
4095 if (!found)
4096 return 0;
4097 *mask &= CATEGORY_MASK_CCL;
4098 return 1;
4099}
4100
4101static void
4102decode_coding_ccl (coding)
4103 struct coding_system *coding;
4104{
4105 unsigned char *src = coding->source + coding->consumed;
4106 unsigned char *src_end = coding->source + coding->src_bytes;
4107 int *charbuf = coding->charbuf;
4108 int *charbuf_end = charbuf + coding->charbuf_size;
4109 int consumed_chars = 0;
4110 int multibytep = coding->src_multibyte;
4111 struct ccl_program ccl;
4112 int source_charbuf[1024];
4113 int source_byteidx[1024];
4114
4115 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4116
4117 while (src < src_end)
4118 {
4119 unsigned char *p = src;
4120 int *source, *source_end;
4121 int i = 0;
4122
4123 if (multibytep)
4124 while (i < 1024 && p < src_end)
4125 {
4126 source_byteidx[i] = p - src;
4127 source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4128 }
4129 else
4130 while (i < 1024 && p < src_end)
4131 source_charbuf[i++] = *p++;
4132
4133 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4134 ccl.last_block = 1;
4135
4136 source = source_charbuf;
4137 source_end = source + i;
4138 while (source < source_end)
4139 {
4140 ccl_driver (&ccl, source, charbuf,
4141 source_end - source, charbuf_end - charbuf);
4142 source += ccl.consumed;
4143 charbuf += ccl.produced;
4144 if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4145 break;
4146 }
4147 if (source < source_end)
4148 src += source_byteidx[source - source_charbuf];
4149 else
4150 src = p;
4151 consumed_chars += source - source_charbuf;
4152
4153 if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4154 && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4155 break;
4156 }
4157
4158 switch (ccl.status)
4159 {
4160 case CCL_STAT_SUSPEND_BY_SRC:
4161 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4162 break;
4163 case CCL_STAT_SUSPEND_BY_DST:
4164 break;
4165 case CCL_STAT_QUIT:
4166 case CCL_STAT_INVALID_CMD:
4167 coding->result = CODING_RESULT_INTERRUPT;
4168 break;
4169 default:
4170 coding->result = CODING_RESULT_SUCCESS;
4171 break;
4172 }
4173 coding->consumed_char += consumed_chars;
4174 coding->consumed = src - coding->source;
4175 coding->charbuf_used = charbuf - coding->charbuf;
4176}
4177
4178static int
4179encode_coding_ccl (coding)
4180 struct coding_system *coding;
4181{
4182 struct ccl_program ccl;
4183 int multibytep = coding->dst_multibyte;
4184 int *charbuf = coding->charbuf;
4185 int *charbuf_end = charbuf + coding->charbuf_used;
4186 unsigned char *dst = coding->destination + coding->produced;
4187 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4188 unsigned char *adjusted_dst_end = dst_end - 1;
4189 int destination_charbuf[1024];
4190 int i, produced_chars = 0;
4191
4192 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4193
4194 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4195 ccl.dst_multibyte = coding->dst_multibyte;
4196
4197 while (charbuf < charbuf_end && dst < adjusted_dst_end)
4198 {
4199 int dst_bytes = dst_end - dst;
4200 if (dst_bytes > 1024)
4201 dst_bytes = 1024;
4202
4203 ccl_driver (&ccl, charbuf, destination_charbuf,
4204 charbuf_end - charbuf, dst_bytes);
4205 charbuf += ccl.consumed;
4206 if (multibytep)
4207 for (i = 0; i < ccl.produced; i++)
4208 EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4209 else
4210 {
4211 for (i = 0; i < ccl.produced; i++)
4212 *dst++ = destination_charbuf[i] & 0xFF;
4213 produced_chars += ccl.produced;
4214 }
4215 }
4216
4217 switch (ccl.status)
4218 {
4219 case CCL_STAT_SUSPEND_BY_SRC:
4220 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4221 break;
4222 case CCL_STAT_SUSPEND_BY_DST:
4223 coding->result = CODING_RESULT_INSUFFICIENT_DST;
4224 break;
4225 case CCL_STAT_QUIT:
4226 case CCL_STAT_INVALID_CMD:
4227 coding->result = CODING_RESULT_INTERRUPT;
4228 break;
4229 default:
4230 coding->result = CODING_RESULT_SUCCESS;
4231 break;
4232 }
4233
4234 coding->produced_char += produced_chars;
4235 coding->produced = dst - coding->destination;
4236 return 0;
4ed46869
KH
4237}
4238
df7492f9
KH
4239
4240\f
4241/*** 10, 11. no-conversion handlers ***/
4242
4243/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4ed46869 4244
b73bfc1c 4245static void
df7492f9 4246decode_coding_raw_text (coding)
4ed46869 4247 struct coding_system *coding;
4ed46869 4248{
df7492f9 4249 coding->chars_at_source = 1;
2c78b7e1
KH
4250 coding->consumed_char = 0;
4251 coding->consumed = 0;
df7492f9
KH
4252 coding->result = CODING_RESULT_SUCCESS;
4253}
4ed46869 4254
df7492f9
KH
4255static int
4256encode_coding_raw_text (coding)
4257 struct coding_system *coding;
4258{
4259 int multibytep = coding->dst_multibyte;
4260 int *charbuf = coding->charbuf;
4261 int *charbuf_end = coding->charbuf + coding->charbuf_used;
4262 unsigned char *dst = coding->destination + coding->produced;
4263 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4264 int produced_chars = 0;
4265 int c;
a5d301df 4266
df7492f9 4267 if (multibytep)
b73bfc1c 4268 {
df7492f9 4269 int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4ed46869 4270
df7492f9
KH
4271 if (coding->src_multibyte)
4272 while (charbuf < charbuf_end)
4273 {
4274 ASSURE_DESTINATION (safe_room);
4275 c = *charbuf++;
4276 if (ASCII_CHAR_P (c))
4277 EMIT_ONE_ASCII_BYTE (c);
4278 else if (CHAR_BYTE8_P (c))
4279 {
4280 c = CHAR_TO_BYTE8 (c);
4281 EMIT_ONE_BYTE (c);
4282 }
4283 else
4284 {
4285 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
93dec019 4286
df7492f9
KH
4287 CHAR_STRING_ADVANCE (c, p1);
4288 while (p0 < p1)
4289 EMIT_ONE_BYTE (*p0);
4290 }
4291 }
b73bfc1c 4292 else
df7492f9
KH
4293 while (charbuf < charbuf_end)
4294 {
4295 ASSURE_DESTINATION (safe_room);
4296 c = *charbuf++;
4297 EMIT_ONE_BYTE (c);
4298 }
4299 }
4300 else
4301 {
4302 if (coding->src_multibyte)
b73bfc1c 4303 {
df7492f9
KH
4304 int safe_room = MAX_MULTIBYTE_LENGTH;
4305
4306 while (charbuf < charbuf_end)
b73bfc1c 4307 {
df7492f9
KH
4308 ASSURE_DESTINATION (safe_room);
4309 c = *charbuf++;
4310 if (ASCII_CHAR_P (c))
4311 *dst++ = c;
4312 else if (CHAR_BYTE8_P (c))
4313 *dst++ = CHAR_TO_BYTE8 (c);
b73bfc1c 4314 else
df7492f9
KH
4315 CHAR_STRING_ADVANCE (c, dst);
4316 produced_chars++;
b73bfc1c 4317 }
4ed46869 4318 }
df7492f9
KH
4319 else
4320 {
4321 ASSURE_DESTINATION (charbuf_end - charbuf);
4322 while (charbuf < charbuf_end && dst < dst_end)
4323 *dst++ = *charbuf++;
4324 produced_chars = dst - (coding->destination + coding->dst_bytes);
4325 }
4ed46869 4326 }
df7492f9
KH
4327 coding->result = CODING_RESULT_SUCCESS;
4328 coding->produced_char += produced_chars;
4329 coding->produced = dst - coding->destination;
4330 return 0;
4ed46869
KH
4331}
4332
0a28aafb 4333static int
df7492f9
KH
4334detect_coding_charset (coding, mask)
4335 struct coding_system *coding;
4336 int *mask;
1397dc18 4337{
df7492f9
KH
4338 unsigned char *src = coding->source, *src_base = src;
4339 unsigned char *src_end = coding->source + coding->src_bytes;
4340 int multibytep = coding->src_multibyte;
4341 int consumed_chars = 0;
4342 Lisp_Object attrs, valids;
1397dc18 4343
df7492f9
KH
4344 coding = &coding_categories[coding_category_charset];
4345 attrs = CODING_ID_ATTRS (coding->id);
4346 valids = AREF (attrs, coding_attr_charset_valids);
4347
4348 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4349 src += coding->head_ascii;
1397dc18 4350
b73bfc1c 4351 while (1)
1397dc18 4352 {
df7492f9 4353 int c;
1397dc18 4354
df7492f9
KH
4355 ONE_MORE_BYTE (c);
4356 if (NILP (AREF (valids, c)))
4357 break;
4358 }
4359 *mask &= ~CATEGORY_MASK_CHARSET;
4360 return 0;
4ed46869 4361
df7492f9
KH
4362 no_more_source:
4363 *mask &= CATEGORY_MASK_CHARSET;
4364 return 1;
4365}
4ed46869 4366
b73bfc1c 4367static void
df7492f9 4368decode_coding_charset (coding)
4ed46869 4369 struct coding_system *coding;
4ed46869 4370{
df7492f9
KH
4371 unsigned char *src = coding->source + coding->consumed;
4372 unsigned char *src_end = coding->source + coding->src_bytes;
b73bfc1c 4373 unsigned char *src_base;
df7492f9
KH
4374 int *charbuf = coding->charbuf;
4375 int *charbuf_end = charbuf + coding->charbuf_size;
4376 int consumed_chars = 0, consumed_chars_base;
4377 int multibytep = coding->src_multibyte;
4eb6d3f1 4378 Lisp_Object attrs, eol_type, charset_list, valids;
df7492f9
KH
4379
4380 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4eb6d3f1 4381 valids = AREF (attrs, coding_attr_charset_valids);
b73bfc1c 4382
df7492f9 4383 while (1)
4ed46869 4384 {
4eb6d3f1 4385 int c;
df7492f9
KH
4386
4387 src_base = src;
4388 consumed_chars_base = consumed_chars;
b73bfc1c 4389
df7492f9
KH
4390 if (charbuf >= charbuf_end)
4391 break;
4392
4eb6d3f1 4393 ONE_MORE_BYTE (c);
df7492f9 4394 if (c == '\r')
d46c5b12 4395 {
c7c66a95
KH
4396 /* Here we assume that no charset maps '\r' to something
4397 else. */
df7492f9 4398 if (EQ (eol_type, Qdos))
b73bfc1c 4399 {
4eb6d3f1
KH
4400 if (src < src_end
4401 && *src == '\n')
df7492f9 4402 ONE_MORE_BYTE (c);
b73bfc1c 4403 }
df7492f9 4404 else if (EQ (eol_type, Qmac))
b73bfc1c 4405 c = '\n';
d46c5b12 4406 }
df7492f9 4407 else
d46c5b12 4408 {
4eb6d3f1
KH
4409 Lisp_Object val;
4410 struct charset *charset;
c7c66a95 4411 int dim;
acb2a965
KH
4412 int len = 1;
4413 unsigned code = c;
4eb6d3f1
KH
4414
4415 val = AREF (valids, c);
4416 if (NILP (val))
4417 goto invalid_code;
c7c66a95 4418 if (INTEGERP (val))
4eb6d3f1 4419 {
c7c66a95
KH
4420 charset = CHARSET_FROM_ID (XFASTINT (val));
4421 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4422 while (len < dim)
4eb6d3f1 4423 {
acb2a965
KH
4424 ONE_MORE_BYTE (c);
4425 code = (code << 8) | c;
f9d71dcd 4426 len++;
4eb6d3f1 4427 }
c7c66a95
KH
4428 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4429 charset, code, c);
4430 }
4431 else
4432 {
4433 /* VAL is a list of charset IDs. It is assured that the
4434 list is sorted by charset dimensions (smaller one
4435 comes first). */
c7c66a95
KH
4436 while (CONSP (val))
4437 {
4438 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4439 dim = CHARSET_DIMENSION (charset);
f9d71dcd 4440 while (len < dim)
c7c66a95 4441 {
acb2a965
KH
4442 ONE_MORE_BYTE (c);
4443 code = (code << 8) | c;
f9d71dcd 4444 len++;
c7c66a95 4445 }
c7c66a95
KH
4446 CODING_DECODE_CHAR (coding, src, src_base,
4447 src_end, charset, code, c);
4448 if (c >= 0)
4449 break;
4450 val = XCDR (val);
4451 }
4eb6d3f1 4452 }
df7492f9
KH
4453 if (c < 0)
4454 goto invalid_code;
d46c5b12 4455 }
df7492f9
KH
4456 *charbuf++ = c;
4457 continue;
4458
4459 invalid_code:
4460 src = src_base;
4461 consumed_chars = consumed_chars_base;
4462 ONE_MORE_BYTE (c);
4463 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4464 coding->errors++;
4ed46869
KH
4465 }
4466
df7492f9
KH
4467 no_more_source:
4468 coding->consumed_char += consumed_chars_base;
4469 coding->consumed = src_base - coding->source;
4470 coding->charbuf_used = charbuf - coding->charbuf;
4ed46869
KH
4471}
4472
df7492f9
KH
4473static int
4474encode_coding_charset (coding)
4ed46869 4475 struct coding_system *coding;
4ed46869 4476{
df7492f9
KH
4477 int multibytep = coding->dst_multibyte;
4478 int *charbuf = coding->charbuf;
4479 int *charbuf_end = charbuf + coding->charbuf_used;
4480 unsigned char *dst = coding->destination + coding->produced;
4481 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4482 int safe_room = MAX_MULTIBYTE_LENGTH;
4483 int produced_chars = 0;
df7492f9
KH
4484 Lisp_Object attrs, eol_type, charset_list;
4485 int ascii_compatible;
b73bfc1c 4486 int c;
b73bfc1c 4487
df7492f9 4488 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
df7492f9 4489 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
fb88bf2d 4490
df7492f9 4491 while (charbuf < charbuf_end)
4ed46869 4492 {
4eb6d3f1 4493 struct charset *charset;
df7492f9
KH
4494 unsigned code;
4495
4496 ASSURE_DESTINATION (safe_room);
4497 c = *charbuf++;
4498 if (ascii_compatible && ASCII_CHAR_P (c))
4499 EMIT_ONE_ASCII_BYTE (c);
16eafb5d
KH
4500 else if (CHAR_BYTE8_P (c))
4501 {
4502 c = CHAR_TO_BYTE8 (c);
4503 EMIT_ONE_BYTE (c);
4504 }
d46c5b12 4505 else
4eb6d3f1
KH
4506 {
4507 charset = char_charset (c, charset_list, &code);
4508 if (charset)
4509 {
4510 if (CHARSET_DIMENSION (charset) == 1)
4511 EMIT_ONE_BYTE (code);
4512 else if (CHARSET_DIMENSION (charset) == 2)
4513 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4514 else if (CHARSET_DIMENSION (charset) == 3)
4515 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4516 else
4517 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4518 (code >> 8) & 0xFF, code & 0xFF);
4519 }
4520 else
41cbe562
KH
4521 {
4522 if (coding->mode & CODING_MODE_SAFE_ENCODING)
4523 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4524 else
4525 c = coding->default_char;
4526 EMIT_ONE_BYTE (c);
4527 }
4eb6d3f1 4528 }
4ed46869
KH
4529 }
4530
df7492f9
KH
4531 coding->result = CODING_RESULT_SUCCESS;
4532 coding->produced_char += produced_chars;
4533 coding->produced = dst - coding->destination;
4534 return 0;
4ed46869
KH
4535}
4536
4537\f
1397dc18 4538/*** 7. C library functions ***/
4ed46869 4539
df7492f9
KH
4540/* Setup coding context CODING from information about CODING_SYSTEM.
4541 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4542 CODING_SYSTEM is invalid, signal an error. */
ec6d2bb8
KH
4543
4544void
df7492f9
KH
4545setup_coding_system (coding_system, coding)
4546 Lisp_Object coding_system;
ec6d2bb8
KH
4547 struct coding_system *coding;
4548{
df7492f9
KH
4549 Lisp_Object attrs;
4550 Lisp_Object eol_type;
4551 Lisp_Object coding_type;
4552 Lisp_Object val;
ec6d2bb8 4553
df7492f9
KH
4554 if (NILP (coding_system))
4555 coding_system = Qno_conversion;
4556
4557 CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4558
4559 attrs = CODING_ID_ATTRS (coding->id);
4560 eol_type = CODING_ID_EOL_TYPE (coding->id);
4561
4562 coding->mode = 0;
4563 coding->head_ascii = -1;
4564 coding->common_flags
4565 = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4566
4567 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4568 coding->max_charset_id = XSTRING (val)->size - 1;
4569 coding->safe_charsets = (char *) XSTRING (val)->data;
4570 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4571
4572 coding_type = CODING_ATTR_TYPE (attrs);
4573 if (EQ (coding_type, Qundecided))
4574 {
4575 coding->detector = NULL;
4576 coding->decoder = decode_coding_raw_text;
4577 coding->encoder = encode_coding_raw_text;
4578 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4579 }
4580 else if (EQ (coding_type, Qiso_2022))
4581 {
4582 int i;
4583 int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4584
4585 /* Invoke graphic register 0 to plane 0. */
4586 CODING_ISO_INVOCATION (coding, 0) = 0;
4587 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4588 CODING_ISO_INVOCATION (coding, 1)
4589 = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4590 /* Setup the initial status of designation. */
4591 for (i = 0; i < 4; i++)
4592 CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4593 /* Not single shifting initially. */
4594 CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4595 /* Beginning of buffer should also be regarded as bol. */
4596 CODING_ISO_BOL (coding) = 1;
4597 coding->detector = detect_coding_iso_2022;
4598 coding->decoder = decode_coding_iso_2022;
4599 coding->encoder = encode_coding_iso_2022;
4600 if (flags & CODING_ISO_FLAG_SAFE)
4601 coding->mode |= CODING_MODE_SAFE_ENCODING;
4602 coding->common_flags
4603 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4604 | CODING_REQUIRE_FLUSHING_MASK);
4605 if (flags & CODING_ISO_FLAG_COMPOSITION)
4606 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4607 if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4608 {
4609 setup_iso_safe_charsets (attrs);
4610 val = CODING_ATTR_SAFE_CHARSETS (attrs);
4611 coding->max_charset_id = XSTRING (val)->size - 1;
4612 coding->safe_charsets = (char *) XSTRING (val)->data;
4613 }
4614 CODING_ISO_FLAGS (coding) = flags;
4615 }
4616 else if (EQ (coding_type, Qcharset))
4617 {
4618 coding->detector = detect_coding_charset;
4619 coding->decoder = decode_coding_charset;
4620 coding->encoder = encode_coding_charset;
4621 coding->common_flags
4622 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4623 }
4624 else if (EQ (coding_type, Qutf_8))
4625 {
4626 coding->detector = detect_coding_utf_8;
4627 coding->decoder = decode_coding_utf_8;
4628 coding->encoder = encode_coding_utf_8;
4629 coding->common_flags
4630 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4631 }
4632 else if (EQ (coding_type, Qutf_16))
4633 {
4634 val = AREF (attrs, coding_attr_utf_16_bom);
4635 CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4636 : EQ (val, Qt) ? utf_16_with_bom
4637 : utf_16_without_bom);
4638 val = AREF (attrs, coding_attr_utf_16_endian);
4639 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4640 : utf_16_little_endian);
e19c3639 4641 CODING_UTF_16_SURROGATE (coding) = 0;
df7492f9
KH
4642 coding->detector = detect_coding_utf_16;
4643 coding->decoder = decode_coding_utf_16;
4644 coding->encoder = encode_coding_utf_16;
4645 coding->common_flags
4646 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4647 }
4648 else if (EQ (coding_type, Qccl))
4649 {
4650 coding->detector = detect_coding_ccl;
4651 coding->decoder = decode_coding_ccl;
4652 coding->encoder = encode_coding_ccl;
4653 coding->common_flags
4654 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4655 | CODING_REQUIRE_FLUSHING_MASK);
4656 }
4657 else if (EQ (coding_type, Qemacs_mule))
4658 {
4659 coding->detector = detect_coding_emacs_mule;
4660 coding->decoder = decode_coding_emacs_mule;
4661 coding->encoder = encode_coding_emacs_mule;
4662 coding->common_flags
4663 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4664 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4665 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4666 {
4667 Lisp_Object tail, safe_charsets;
4668 int max_charset_id = 0;
4669
4670 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4671 tail = XCDR (tail))
4672 if (max_charset_id < XFASTINT (XCAR (tail)))
4673 max_charset_id = XFASTINT (XCAR (tail));
4674 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4675 make_number (255));
4676 for (tail = Vemacs_mule_charset_list; CONSP (tail);
4677 tail = XCDR (tail))
4678 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4679 coding->max_charset_id = max_charset_id;
4680 coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4681 }
4682 }
4683 else if (EQ (coding_type, Qshift_jis))
4684 {
4685 coding->detector = detect_coding_sjis;
4686 coding->decoder = decode_coding_sjis;
4687 coding->encoder = encode_coding_sjis;
4688 coding->common_flags
4689 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4690 }
4691 else if (EQ (coding_type, Qbig5))
4692 {
4693 coding->detector = detect_coding_big5;
4694 coding->decoder = decode_coding_big5;
4695 coding->encoder = encode_coding_big5;
4696 coding->common_flags
4697 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4698 }
4699 else /* EQ (coding_type, Qraw_text) */
ec6d2bb8 4700 {
df7492f9
KH
4701 coding->detector = NULL;
4702 coding->decoder = decode_coding_raw_text;
4703 coding->encoder = encode_coding_raw_text;
4704 coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
ec6d2bb8 4705 }
df7492f9
KH
4706
4707 return;
ec6d2bb8
KH
4708}
4709
df7492f9
KH
4710/* Return raw-text or one of its subsidiaries that has the same
4711 eol_type as CODING-SYSTEM. */
ec6d2bb8 4712
df7492f9
KH
4713Lisp_Object
4714raw_text_coding_system (coding_system)
4715 Lisp_Object coding_system;
ec6d2bb8 4716{
0be8721c 4717 Lisp_Object spec, attrs;
df7492f9
KH
4718 Lisp_Object eol_type, raw_text_eol_type;
4719
4720 spec = CODING_SYSTEM_SPEC (coding_system);
4721 attrs = AREF (spec, 0);
4722
4723 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4724 return coding_system;
ec6d2bb8 4725
df7492f9
KH
4726 eol_type = AREF (spec, 2);
4727 if (VECTORP (eol_type))
4728 return Qraw_text;
4729 spec = CODING_SYSTEM_SPEC (Qraw_text);
4730 raw_text_eol_type = AREF (spec, 2);
4731 return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4732 : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4733 : AREF (raw_text_eol_type, 2));
ec6d2bb8
KH
4734}
4735
54f78171 4736
df7492f9
KH
4737/* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4738 does, return one of the subsidiary that has the same eol-spec as
4739 PARENT. Otherwise, return CODING_SYSTEM. */
4740
4741Lisp_Object
4742coding_inherit_eol_type (coding_system, parent)
b74e4686 4743 Lisp_Object coding_system, parent;
54f78171 4744{
df7492f9 4745 Lisp_Object spec, attrs, eol_type;
54f78171 4746
df7492f9
KH
4747 spec = CODING_SYSTEM_SPEC (coding_system);
4748 attrs = AREF (spec, 0);
4749 eol_type = AREF (spec, 2);
4750 if (VECTORP (eol_type))
4751 {
4752 Lisp_Object parent_spec;
df7492f9
KH
4753 Lisp_Object parent_eol_type;
4754
4755 parent_spec
4756 = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4757 parent_eol_type = AREF (parent_spec, 2);
4758 if (EQ (parent_eol_type, Qunix))
4759 coding_system = AREF (eol_type, 0);
4760 else if (EQ (parent_eol_type, Qdos))
4761 coding_system = AREF (eol_type, 1);
4762 else if (EQ (parent_eol_type, Qmac))
4763 coding_system = AREF (eol_type, 2);
54f78171 4764 }
df7492f9 4765 return coding_system;
54f78171
KH
4766}
4767
4ed46869
KH
4768/* Emacs has a mechanism to automatically detect a coding system if it
4769 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4770 it's impossible to distinguish some coding systems accurately
4771 because they use the same range of codes. So, at first, coding
4772 systems are categorized into 7, those are:
4773
0ef69138 4774 o coding-category-emacs-mule
4ed46869
KH
4775
4776 The category for a coding system which has the same code range
4777 as Emacs' internal format. Assigned the coding-system (Lisp
0ef69138 4778 symbol) `emacs-mule' by default.
4ed46869
KH
4779
4780 o coding-category-sjis
4781
4782 The category for a coding system which has the same code range
4783 as SJIS. Assigned the coding-system (Lisp
7717c392 4784 symbol) `japanese-shift-jis' by default.
4ed46869
KH
4785
4786 o coding-category-iso-7
4787
4788 The category for a coding system which has the same code range
7717c392 4789 as ISO2022 of 7-bit environment. This doesn't use any locking
d46c5b12
KH
4790 shift and single shift functions. This can encode/decode all
4791 charsets. Assigned the coding-system (Lisp symbol)
4792 `iso-2022-7bit' by default.
4793
4794 o coding-category-iso-7-tight
4795
4796 Same as coding-category-iso-7 except that this can
4797 encode/decode only the specified charsets.
4ed46869
KH
4798
4799 o coding-category-iso-8-1
4800
4801 The category for a coding system which has the same code range
4802 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4803 for DIMENSION1 charset. This doesn't use any locking shift
4804 and single shift functions. Assigned the coding-system (Lisp
4805 symbol) `iso-latin-1' by default.
4ed46869
KH
4806
4807 o coding-category-iso-8-2
4808
4809 The category for a coding system which has the same code range
4810 as ISO2022 of 8-bit environment and graphic plane 1 used only
7717c392
KH
4811 for DIMENSION2 charset. This doesn't use any locking shift
4812 and single shift functions. Assigned the coding-system (Lisp
4813 symbol) `japanese-iso-8bit' by default.
4ed46869 4814
7717c392 4815 o coding-category-iso-7-else
4ed46869
KH
4816
4817 The category for a coding system which has the same code range
df7492f9 4818 as ISO2022 of 7-bit environemnt but uses locking shift or
7717c392
KH
4819 single shift functions. Assigned the coding-system (Lisp
4820 symbol) `iso-2022-7bit-lock' by default.
4821
4822 o coding-category-iso-8-else
4823
4824 The category for a coding system which has the same code range
df7492f9 4825 as ISO2022 of 8-bit environemnt but uses locking shift or
7717c392
KH
4826 single shift functions. Assigned the coding-system (Lisp
4827 symbol) `iso-2022-8bit-ss2' by default.
4ed46869
KH
4828
4829 o coding-category-big5
4830
4831 The category for a coding system which has the same code range
4832 as BIG5. Assigned the coding-system (Lisp symbol)
e0e989f6 4833 `cn-big5' by default.
4ed46869 4834
fa42c37f
KH
4835 o coding-category-utf-8
4836
4837 The category for a coding system which has the same code range
4838 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4839 symbol) `utf-8' by default.
4840
4841 o coding-category-utf-16-be
4842
4843 The category for a coding system in which a text has an
4844 Unicode signature (cf. Unicode Standard) in the order of BIG
4845 endian at the head. Assigned the coding-system (Lisp symbol)
4846 `utf-16-be' by default.
4847
4848 o coding-category-utf-16-le
4849
4850 The category for a coding system in which a text has an
4851 Unicode signature (cf. Unicode Standard) in the order of
4852 LITTLE endian at the head. Assigned the coding-system (Lisp
4853 symbol) `utf-16-le' by default.
4854
1397dc18
KH
4855 o coding-category-ccl
4856
4857 The category for a coding system of which encoder/decoder is
4858 written in CCL programs. The default value is nil, i.e., no
4859 coding system is assigned.
4860
4ed46869
KH
4861 o coding-category-binary
4862
4863 The category for a coding system not categorized in any of the
4864 above. Assigned the coding-system (Lisp symbol)
e0e989f6 4865 `no-conversion' by default.
4ed46869
KH
4866
4867 Each of them is a Lisp symbol and the value is an actual
df7492f9 4868 `coding-system's (this is also a Lisp symbol) assigned by a user.
4ed46869
KH
4869 What Emacs does actually is to detect a category of coding system.
4870 Then, it uses a `coding-system' assigned to it. If Emacs can't
df7492f9 4871 decide only one possible category, it selects a category of the
4ed46869
KH
4872 highest priority. Priorities of categories are also specified by a
4873 user in a Lisp variable `coding-category-list'.
4874
4875*/
4876
df7492f9
KH
4877#define EOL_SEEN_NONE 0
4878#define EOL_SEEN_LF 1
4879#define EOL_SEEN_CR 2
4880#define EOL_SEEN_CRLF 4
4ed46869 4881
df7492f9
KH
4882/* Detect how end-of-line of a text of length CODING->src_bytes
4883 pointed by CODING->source is encoded. Return one of
4884 EOL_SEEN_XXX. */
4ed46869 4885
bc4bc72a
RS
4886#define MAX_EOL_CHECK_COUNT 3
4887
d46c5b12 4888static int
df7492f9
KH
4889detect_eol (coding, source, src_bytes)
4890 struct coding_system *coding;
d46c5b12 4891 unsigned char *source;
df7492f9 4892 EMACS_INT src_bytes;
4ed46869 4893{
df7492f9 4894 Lisp_Object attrs, coding_type;
d46c5b12 4895 unsigned char *src = source, *src_end = src + src_bytes;
4ed46869 4896 unsigned char c;
df7492f9
KH
4897 int total = 0;
4898 int eol_seen = EOL_SEEN_NONE;
4ed46869 4899
df7492f9
KH
4900 attrs = CODING_ID_ATTRS (coding->id);
4901 coding_type = CODING_ATTR_TYPE (attrs);
d46c5b12 4902
df7492f9 4903 if (EQ (coding_type, Qccl))
4ed46869 4904 {
df7492f9 4905 int msb, lsb;
fa42c37f 4906
df7492f9
KH
4907 msb = coding->spec.utf_16.endian == utf_16_little_endian;
4908 lsb = 1 - msb;
fa42c37f 4909
df7492f9 4910 while (src + 1 < src_end)
fa42c37f 4911 {
df7492f9
KH
4912 c = src[lsb];
4913 if (src[msb] == 0 && (c == '\n' || c == '\r'))
fa42c37f 4914 {
df7492f9
KH
4915 int this_eol;
4916
4917 if (c == '\n')
4918 this_eol = EOL_SEEN_LF;
4919 else if (src + 3 >= src_end
4920 || src[msb + 2] != 0
4921 || src[lsb + 2] != '\n')
4922 this_eol = EOL_SEEN_CR;
fa42c37f 4923 else
df7492f9
KH
4924 this_eol = EOL_SEEN_CRLF;
4925
4926 if (eol_seen == EOL_SEEN_NONE)
4927 /* This is the first end-of-line. */
4928 eol_seen = this_eol;
4929 else if (eol_seen != this_eol)
fa42c37f 4930 {
df7492f9
KH
4931 /* The found type is different from what found before. */
4932 eol_seen = EOL_SEEN_LF;
4933 break;
fa42c37f 4934 }
df7492f9
KH
4935 if (++total == MAX_EOL_CHECK_COUNT)
4936 break;
fa42c37f 4937 }
df7492f9 4938 src += 2;
fa42c37f 4939 }
df7492f9 4940 }
d46c5b12 4941 else
27901516 4942 {
df7492f9 4943 while (src < src_end)
27901516 4944 {
df7492f9
KH
4945 c = *src++;
4946 if (c == '\n' || c == '\r')
4947 {
4948 int this_eol;
d46c5b12 4949
df7492f9
KH
4950 if (c == '\n')
4951 this_eol = EOL_SEEN_LF;
4952 else if (src >= src_end || *src != '\n')
4953 this_eol = EOL_SEEN_CR;
4954 else
4955 this_eol = EOL_SEEN_CRLF, src++;
d46c5b12 4956
df7492f9
KH
4957 if (eol_seen == EOL_SEEN_NONE)
4958 /* This is the first end-of-line. */
4959 eol_seen = this_eol;
4960 else if (eol_seen != this_eol)
4961 {
4962 /* The found type is different from what found before. */
4963 eol_seen = EOL_SEEN_LF;
4964 break;
4965 }
4966 if (++total == MAX_EOL_CHECK_COUNT)
4967 break;
4968 }
4969 }
73be902c 4970 }
df7492f9 4971 return eol_seen;
73be902c
KH
4972}
4973
df7492f9 4974
73be902c 4975static void
df7492f9
KH
4976adjust_coding_eol_type (coding, eol_seen)
4977 struct coding_system *coding;
4978 int eol_seen;
73be902c 4979{
0be8721c 4980 Lisp_Object eol_type;
df7492f9
KH
4981
4982 eol_type = CODING_ID_EOL_TYPE (coding->id);
4983 if (eol_seen & EOL_SEEN_LF)
4984 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4985 else if (eol_type & EOL_SEEN_CRLF)
4986 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4987 else if (eol_type & EOL_SEEN_CR)
4988 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
d46c5b12
KH
4989}
4990
df7492f9
KH
4991/* Detect how a text specified in CODING is encoded. If a coding
4992 system is detected, update fields of CODING by the detected coding
4993 system. */
4994
4995void
4996detect_coding (coding)
d46c5b12 4997 struct coding_system *coding;
d46c5b12 4998{
df7492f9
KH
4999 unsigned char *src, *src_end;
5000 Lisp_Object attrs, coding_type;
d46c5b12 5001
df7492f9
KH
5002 coding->consumed = coding->consumed_char = 0;
5003 coding->produced = coding->produced_char = 0;
5004 coding_set_source (coding);
1c3478b0 5005
df7492f9 5006 src_end = coding->source + coding->src_bytes;
1c3478b0 5007
df7492f9
KH
5008 /* If we have not yet decided the text encoding type, detect it
5009 now. */
5010 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
b73bfc1c 5011 {
df7492f9
KH
5012 int mask = CATEGORY_MASK_ANY;
5013 int c, i;
5014
5015 for (src = coding->source; src < src_end; src++)
5016 {
5017 c = *src;
5018 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5019 || c == ISO_CODE_SI
5020 || c == ISO_CODE_SO)))
5021 break;
5022 }
5023 coding->head_ascii = src - (coding->source + coding->consumed);
5024
5025 if (coding->head_ascii < coding->src_bytes)
1c3478b0 5026 {
df7492f9
KH
5027 int detected = 0;
5028
5029 for (i = 0; i < coding_category_raw_text; i++)
1c3478b0 5030 {
df7492f9
KH
5031 enum coding_category category = coding_priorities[i];
5032 struct coding_system *this = coding_categories + category;
5033
5034 if (category >= coding_category_raw_text
5035 || detected & (1 << category))
5036 continue;
5037
5038 if (this->id < 0)
1c3478b0 5039 {
df7492f9
KH
5040 /* No coding system of this category is defined. */
5041 mask &= ~(1 << category);
5042 }
5043 else
5044 {
5045 detected |= detected_mask[category];
5046 if ((*(this->detector)) (coding, &mask))
5047 break;
1c3478b0
KH
5048 }
5049 }
df7492f9
KH
5050 if (! mask)
5051 setup_coding_system (Qraw_text, coding);
5052 else if (mask != CATEGORY_MASK_ANY)
5053 for (i = 0; i < coding_category_raw_text; i++)
5054 {
5055 enum coding_category category = coding_priorities[i];
5056 struct coding_system *this = coding_categories + category;
5057
5058 if (mask & (1 << category))
5059 {
5060 setup_coding_system (CODING_ID_NAME (this->id), coding);
5061 break;
5062 }
5063 }
1c3478b0 5064 }
b73bfc1c 5065 }
69f76525 5066
df7492f9
KH
5067 attrs = CODING_ID_ATTRS (coding->id);
5068 coding_type = CODING_ATTR_TYPE (attrs);
5069
5070 /* If we have not yet decided the EOL type, detect it now. But, the
5071 detection is impossible for a CCL based coding system, in which
5072 case, we detct the EOL type after decoding. */
5073 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5074 && ! EQ (coding_type, Qccl))
d46c5b12 5075 {
df7492f9
KH
5076 int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5077
5078 if (eol_seen != EOL_SEEN_NONE)
5079 adjust_coding_eol_type (coding, eol_seen);
d46c5b12 5080 }
4ed46869
KH
5081}
5082
aaaf0b1e
KH
5083
5084static void
df7492f9 5085decode_eol (coding)
aaaf0b1e 5086 struct coding_system *coding;
aaaf0b1e 5087{
df7492f9 5088 if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
aaaf0b1e 5089 {
df7492f9
KH
5090 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5091 unsigned char *pend = p + coding->produced;
5092 int eol_seen = EOL_SEEN_NONE;
aaaf0b1e 5093
df7492f9 5094 for (; p < pend; p++)
aaaf0b1e 5095 {
df7492f9
KH
5096 if (*p == '\n')
5097 eol_seen |= EOL_SEEN_LF;
5098 else if (*p == '\r')
aaaf0b1e 5099 {
df7492f9 5100 if (p + 1 < pend && *(p + 1) == '\n')
aaaf0b1e 5101 {
df7492f9
KH
5102 eol_seen |= EOL_SEEN_CRLF;
5103 p++;
aaaf0b1e 5104 }
aaaf0b1e 5105 else
df7492f9 5106 eol_seen |= EOL_SEEN_CR;
aaaf0b1e 5107 }
aaaf0b1e 5108 }
df7492f9
KH
5109 if (eol_seen != EOL_SEEN_NONE)
5110 adjust_coding_eol_type (coding, eol_seen);
aaaf0b1e 5111 }
aaaf0b1e 5112
df7492f9
KH
5113 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5114 {
5115 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5116 unsigned char *pend = p + coding->produced;
5117
5118 for (; p < pend; p++)
5119 if (*p == '\r')
5120 *p = '\n';
5121 }
5122 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5123 {
5124 unsigned char *p, *pbeg, *pend;
5125 Lisp_Object undo_list;
5126
5127 move_gap_both (coding->dst_pos + coding->produced_char,
5128 coding->dst_pos_byte + coding->produced);
5129 undo_list = current_buffer->undo_list;
5130 current_buffer->undo_list = Qt;
c197f191 5131 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
df7492f9
KH
5132 current_buffer->undo_list = undo_list;
5133 pbeg = GPT_ADDR;
5134 pend = pbeg + coding->produced;
5135
5136 for (p = pend - 1; p >= pbeg; p--)
5137 if (*p == '\r')
5138 {
5139 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5140 pend--;
5141 }
5142 coding->produced_char -= coding->produced - (pend - pbeg);
5143 coding->produced = pend - pbeg;
5144 insert_from_gap (coding->produced_char, coding->produced);
aaaf0b1e
KH
5145 }
5146}
5147
df7492f9
KH
5148static void
5149translate_chars (coding, table)
4ed46869 5150 struct coding_system *coding;
df7492f9 5151 Lisp_Object table;
4ed46869 5152{
df7492f9
KH
5153 int *charbuf = coding->charbuf;
5154 int *charbuf_end = charbuf + coding->charbuf_used;
5155 int c;
5156
5157 if (coding->chars_at_source)
5158 return;
4ed46869 5159
df7492f9 5160 while (charbuf < charbuf_end)
8844fa83 5161 {
df7492f9
KH
5162 c = *charbuf;
5163 if (c < 0)
5164 charbuf += c;
5165 else
5166 *charbuf++ = translate_char (table, c);
8844fa83 5167 }
df7492f9 5168}
4ed46869 5169
df7492f9
KH
5170static int
5171produce_chars (coding)
5172 struct coding_system *coding;
5173{
5174 unsigned char *dst = coding->destination + coding->produced;
5175 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5176 int produced;
5177 int produced_chars = 0;
b73bfc1c 5178
df7492f9 5179 if (! coding->chars_at_source)
4ed46869 5180 {
df7492f9
KH
5181 /* Characters are in coding->charbuf. */
5182 int *buf = coding->charbuf;
5183 int *buf_end = buf + coding->charbuf_used;
5184 unsigned char *adjusted_dst_end;
4ed46869 5185
df7492f9
KH
5186 if (BUFFERP (coding->src_object)
5187 && EQ (coding->src_object, coding->dst_object))
5188 dst_end = coding->source + coding->consumed;
5189 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
4ed46869 5190
df7492f9
KH
5191 while (buf < buf_end)
5192 {
5193 int c = *buf++;
5194
5195 if (dst >= adjusted_dst_end)
5196 {
5197 dst = alloc_destination (coding,
5198 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5199 dst);
5200 dst_end = coding->destination + coding->dst_bytes;
5201 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5202 }
5203 if (c >= 0)
5204 {
5205 if (coding->dst_multibyte
5206 || ! CHAR_BYTE8_P (c))
5207 CHAR_STRING_ADVANCE (c, dst);
5208 else
5209 *dst++ = CHAR_TO_BYTE8 (c);
5210 produced_chars++;
5211 }
5212 else
5213 /* This is an annotation data. */
5214 buf -= c + 1;
5215 }
5216 }
5217 else
5218 {
df7492f9
KH
5219 unsigned char *src = coding->source;
5220 unsigned char *src_end = src + coding->src_bytes;
5221 Lisp_Object eol_type;
b73bfc1c 5222
df7492f9 5223 eol_type = CODING_ID_EOL_TYPE (coding->id);
4ed46869 5224
df7492f9 5225 if (coding->src_multibyte != coding->dst_multibyte)
aaaf0b1e 5226 {
df7492f9
KH
5227 if (coding->src_multibyte)
5228 {
71c81426 5229 int multibytep = 1;
df7492f9 5230 int consumed_chars;
d46c5b12 5231
df7492f9
KH
5232 while (1)
5233 {
5234 unsigned char *src_base = src;
5235 int c;
b73bfc1c 5236
df7492f9
KH
5237 ONE_MORE_BYTE (c);
5238 if (c == '\r')
5239 {
5240 if (EQ (eol_type, Qdos))
5241 {
5242 if (src < src_end
5243 && *src == '\n')
5244 c = *src++;
5245 }
5246 else if (EQ (eol_type, Qmac))
5247 c = '\n';
5248 }
5249 if (dst == dst_end)
5250 {
2c78b7e1 5251 coding->consumed = src - coding->source;
b73bfc1c 5252
2c78b7e1
KH
5253 if (EQ (coding->src_object, coding->dst_object))
5254 dst_end = src;
5255 if (dst == dst_end)
5256 {
5257 dst = alloc_destination (coding, src_end - src + 1,
5258 dst);
5259 dst_end = coding->destination + coding->dst_bytes;
5260 coding_set_source (coding);
5261 src = coding->source + coding->consumed;
5262 src_end = coding->source + coding->src_bytes;
5263 }
df7492f9
KH
5264 }
5265 *dst++ = c;
5266 produced_chars++;
5267 }
5268 no_more_source:
5269 ;
5270 }
5271 else
5272 while (src < src_end)
5273 {
71c81426 5274 int multibytep = 1;
df7492f9 5275 int c = *src++;
b73bfc1c 5276
df7492f9
KH
5277 if (c == '\r')
5278 {
5279 if (EQ (eol_type, Qdos))
5280 {
5281 if (src < src_end
5282 && *src == '\n')
5283 c = *src++;
5284 }
5285 else if (EQ (eol_type, Qmac))
5286 c = '\n';
5287 }
5288 if (dst >= dst_end - 1)
5289 {
2c78b7e1 5290 coding->consumed = src - coding->source;
df7492f9 5291
2c78b7e1
KH
5292 if (EQ (coding->src_object, coding->dst_object))
5293 dst_end = src;
5294 if (dst >= dst_end - 1)
5295 {
5296 dst = alloc_destination (coding, src_end - src + 2,
5297 dst);
5298 dst_end = coding->destination + coding->dst_bytes;
5299 coding_set_source (coding);
5300 src = coding->source + coding->consumed;
5301 src_end = coding->source + coding->src_bytes;
5302 }
df7492f9
KH
5303 }
5304 EMIT_ONE_BYTE (c);
5305 }
d46c5b12 5306 }
df7492f9
KH
5307 else
5308 {
5309 if (!EQ (coding->src_object, coding->dst_object))
5310 {
5311 int require = coding->src_bytes - coding->dst_bytes;
4ed46869 5312
df7492f9
KH
5313 if (require > 0)
5314 {
5315 EMACS_INT offset = src - coding->source;
5316
5317 dst = alloc_destination (coding, require, dst);
5318 coding_set_source (coding);
5319 src = coding->source + offset;
5320 src_end = coding->source + coding->src_bytes;
5321 }
5322 }
5323 produced_chars = coding->src_chars;
5324 while (src < src_end)
5325 {
5326 int c = *src++;
5327
5328 if (c == '\r')
5329 {
5330 if (EQ (eol_type, Qdos))
5331 {
5332 if (src < src_end
5333 && *src == '\n')
5334 c = *src++;
5335 produced_chars--;
5336 }
5337 else if (EQ (eol_type, Qmac))
5338 c = '\n';
5339 }
5340 *dst++ = c;
5341 }
5342 }
2c78b7e1
KH
5343 coding->consumed = coding->src_bytes;
5344 coding->consumed_char = coding->src_chars;
b73bfc1c 5345 }
4ed46869 5346
df7492f9
KH
5347 produced = dst - (coding->destination + coding->produced);
5348 if (BUFFERP (coding->dst_object))
5349 insert_from_gap (produced_chars, produced);
5350 coding->produced += produced;
5351 coding->produced_char += produced_chars;
5352 return produced_chars;
b73bfc1c 5353}
52d41803 5354
df7492f9
KH
5355/* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5356 or
5357 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5358 */
4ed46869 5359
df7492f9
KH
5360static INLINE void
5361produce_composition (coding, charbuf)
4ed46869 5362 struct coding_system *coding;
df7492f9 5363 int *charbuf;
4ed46869 5364{
df7492f9
KH
5365 Lisp_Object buffer;
5366 int len;
5367 EMACS_INT pos;
5368 enum composition_method method;
5369 int cmp_len;
5370 Lisp_Object components;
5371
5372 buffer = coding->dst_object;
5373 len = -charbuf[0];
5374 pos = coding->dst_pos + charbuf[1];
5375 method = (enum composition_method) (charbuf[3]);
5376 cmp_len = charbuf[4];
5377
5378 if (method == COMPOSITION_RELATIVE)
5379 components = Qnil;
5380 else
d46c5b12 5381 {
df7492f9
KH
5382 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5383 int i;
4ed46869 5384
df7492f9
KH
5385 len -= 5;
5386 charbuf += 5;
5387 for (i = 0; i < len; i++)
5388 args[i] = make_number (charbuf[i]);
5389 components = (method == COMPOSITION_WITH_ALTCHARS
5390 ? Fstring (len, args) : Fvector (len, args));
5391 }
5392 compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5393}
b73bfc1c 5394
df7492f9
KH
5395static int *
5396save_composition_data (buf, buf_end, prop)
5397 int *buf, *buf_end;
5398 Lisp_Object prop;
5399{
5400 enum composition_method method = COMPOSITION_METHOD (prop);
5401 int cmp_len = COMPOSITION_LENGTH (prop);
4ed46869 5402
df7492f9
KH
5403 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5404 return NULL;
d46c5b12 5405
df7492f9
KH
5406 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5407 buf[2] = method;
5408 buf[3] = cmp_len;
b73bfc1c 5409
df7492f9
KH
5410 if (method == COMPOSITION_RELATIVE)
5411 buf[0] = 4;
5412 else
b73bfc1c 5413 {
df7492f9
KH
5414 Lisp_Object components;
5415 int len, i;
b73bfc1c 5416
df7492f9
KH
5417 components = COMPOSITION_COMPONENTS (prop);
5418 if (VECTORP (components))
d46c5b12 5419 {
df7492f9
KH
5420 len = XVECTOR (components)->size;
5421 for (i = 0; i < len; i++)
5422 buf[4 + i] = XINT (AREF (components, i));
5423 }
5424 else if (STRINGP (components))
5425 {
5426 int i_byte;
b73bfc1c 5427
df7492f9
KH
5428 len = XSTRING (components)->size;
5429 i = i_byte = 0;
5430 while (i < len)
5431 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5432 }
5433 else if (INTEGERP (components))
5434 {
5435 len = 1;
5436 buf[4] = XINT (components);
5437 }
5438 else if (CONSP (components))
5439 {
5440 for (len = 0; CONSP (components);
5441 len++, components = XCDR (components))
5442 buf[4 + len] = XINT (XCAR (components));
d46c5b12 5443 }
df7492f9
KH
5444 else
5445 abort ();
5446 buf[0] = 4 + len;
4ed46869 5447 }
df7492f9 5448 return (buf + buf[0]);
4ed46869
KH
5449}
5450
df7492f9
KH
5451#define CHARBUF_SIZE 0x4000
5452
5453#define ALLOC_CONVERSION_WORK_AREA(coding) \
5454 do { \
5455 int size = CHARBUF_SIZE;; \
5456 \
5457 coding->charbuf = NULL; \
5458 while (size > 1024) \
5459 { \
5460 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5461 if (coding->charbuf) \
5462 break; \
5463 size >>= 1; \
5464 } \
5465 if (! coding->charbuf) \
5466 { \
5467 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5468 return coding->result; \
5469 } \
5470 coding->charbuf_size = size; \
5471 } while (0)
4ed46869 5472
d46c5b12
KH
5473
5474static void
df7492f9 5475produce_annotation (coding)
d46c5b12 5476 struct coding_system *coding;
d46c5b12 5477{
df7492f9
KH
5478 int *charbuf = coding->charbuf;
5479 int *charbuf_end = charbuf + coding->charbuf_used;
d46c5b12 5480
df7492f9 5481 while (charbuf < charbuf_end)
d46c5b12 5482 {
df7492f9
KH
5483 if (*charbuf >= 0)
5484 charbuf++;
d46c5b12 5485 else
d46c5b12 5486 {
df7492f9
KH
5487 int len = -*charbuf;
5488 switch (charbuf[2])
5489 {
5490 case CODING_ANNOTATE_COMPOSITION_MASK:
5491 produce_composition (coding, charbuf);
5492 break;
5493 default:
5494 abort ();
5495 }
5496 charbuf += len;
d46c5b12 5497 }
df7492f9
KH
5498 }
5499}
d46c5b12 5500
df7492f9
KH
5501/* Decode the data at CODING->src_object into CODING->dst_object.
5502 CODING->src_object is a buffer, a string, or nil.
5503 CODING->dst_object is a buffer.
de79a6a5 5504
df7492f9
KH
5505 If CODING->src_object is a buffer, it must be the current buffer.
5506 In this case, if CODING->src_pos is positive, it is a position of
5507 the source text in the buffer, otherwise, the source text is in the
5508 gap area of the buffer, and CODING->src_pos specifies the offset of
5509 the text from GPT (which must be the same as PT). If this is the
5510 same buffer as CODING->dst_object, CODING->src_pos must be
5511 negative.
b73bfc1c 5512
df7492f9
KH
5513 If CODING->src_object is a string, CODING->src_pos in an index to
5514 that string.
d46c5b12 5515
df7492f9
KH
5516 If CODING->src_object is nil, CODING->source must already point to
5517 the non-relocatable memory area. In this case, CODING->src_pos is
5518 an offset from CODING->source.
d46c5b12 5519
df7492f9
KH
5520 The decoded data is inserted at the current point of the buffer
5521 CODING->dst_object.
5522*/
5523
5524static int
5525decode_coding (coding)
d46c5b12 5526 struct coding_system *coding;
d46c5b12 5527{
df7492f9 5528 Lisp_Object attrs;
d46c5b12 5529
df7492f9
KH
5530 if (BUFFERP (coding->src_object)
5531 && coding->src_pos > 0
5532 && coding->src_pos < GPT
5533 && coding->src_pos + coding->src_chars > GPT)
5534 move_gap_both (coding->src_pos, coding->src_pos_byte);
d46c5b12 5535
df7492f9 5536 if (BUFFERP (coding->dst_object))
88993dfd 5537 {
df7492f9
KH
5538 if (current_buffer != XBUFFER (coding->dst_object))
5539 set_buffer_internal (XBUFFER (coding->dst_object));
5540 if (GPT != PT)
5541 move_gap_both (PT, PT_BYTE);
88993dfd
KH
5542 }
5543
df7492f9
KH
5544 coding->consumed = coding->consumed_char = 0;
5545 coding->produced = coding->produced_char = 0;
5546 coding->chars_at_source = 0;
5547 coding->result = CODING_RESULT_SUCCESS;
5548 coding->errors = 0;
5549
5550 ALLOC_CONVERSION_WORK_AREA (coding);
5551
5552 attrs = CODING_ID_ATTRS (coding->id);
5553
5554 do
d46c5b12 5555 {
df7492f9
KH
5556 coding_set_source (coding);
5557 coding->annotated = 0;
5558 (*(coding->decoder)) (coding);
5559 if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5560 translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5561 coding_set_destination (coding);
5562 produce_chars (coding);
5563 if (coding->annotated)
5564 produce_annotation (coding);
d46c5b12 5565 }
df7492f9
KH
5566 while (coding->consumed < coding->src_bytes
5567 && ! coding->result);
d46c5b12 5568
df7492f9
KH
5569 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5570 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5571 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5572 decode_eol (coding);
d46c5b12 5573
df7492f9
KH
5574 coding->carryover_bytes = 0;
5575 if (coding->consumed < coding->src_bytes)
d46c5b12 5576 {
df7492f9
KH
5577 int nbytes = coding->src_bytes - coding->consumed;
5578 unsigned char *src;
5579
5580 coding_set_source (coding);
5581 coding_set_destination (coding);
5582 src = coding->source + coding->consumed;
5583
5584 if (coding->mode & CODING_MODE_LAST_BLOCK)
d46c5b12 5585 {
df7492f9
KH
5586 /* Flush out unprocessed data as binary chars. We are sure
5587 that the number of data is less than the size of
5588 coding->charbuf. */
5589 int *charbuf = coding->charbuf;
5590
5591 while (nbytes-- > 0)
d46c5b12 5592 {
df7492f9
KH
5593 int c = *src++;
5594 *charbuf++ = (c & 0x80 ? - c : c);
d46c5b12 5595 }
df7492f9 5596 produce_chars (coding);
d46c5b12 5597 }
d46c5b12 5598 else
df7492f9
KH
5599 {
5600 /* Record unprocessed bytes in coding->carryover. We are
5601 sure that the number of data is less than the size of
5602 coding->carryover. */
5603 unsigned char *p = coding->carryover;
5604
5605 coding->carryover_bytes = nbytes;
5606 while (nbytes-- > 0)
5607 *p++ = *src++;
5608 }
5609 coding->consumed = coding->src_bytes;
5610 }
b73bfc1c 5611
df7492f9 5612 return coding->result;
d46c5b12
KH
5613}
5614
df7492f9
KH
5615static void
5616consume_chars (coding)
5617 struct coding_system *coding;
5618{
5619 int *buf = coding->charbuf;
5620 /* -1 is to compensate for CRLF. */
5621 int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5622 unsigned char *src = coding->source + coding->consumed;
5623 int pos = coding->src_pos + coding->consumed_char;
5624 int end_pos = coding->src_pos + coding->src_chars;
5625 int multibytep = coding->src_multibyte;
5626 Lisp_Object eol_type;
5627 int c;
5628 int start, end, stop;
5629 Lisp_Object object, prop;
88993dfd 5630
df7492f9
KH
5631 eol_type = CODING_ID_EOL_TYPE (coding->id);
5632 if (VECTORP (eol_type))
5633 eol_type = Qunix;
88993dfd 5634
df7492f9 5635 object = coding->src_object;
b843d1ae 5636
df7492f9
KH
5637 /* Note: composition handling is not yet implemented. */
5638 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
ec6d2bb8 5639
df7492f9
KH
5640 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5641 && find_composition (pos, end_pos, &start, &end, &prop, object)
5642 && end <= end_pos
5643 && (start >= pos
5644 || (find_composition (end, end_pos, &start, &end, &prop, object)
5645 && end <= end_pos)))
5646 stop = start;
5647 else
5648 stop = end_pos;
ec6d2bb8 5649
df7492f9 5650 while (buf < buf_end)
ec6d2bb8 5651 {
df7492f9 5652 if (pos == stop)
ec6d2bb8 5653 {
df7492f9 5654 int *p;
ec6d2bb8 5655
df7492f9
KH
5656 if (pos == end_pos)
5657 break;
5658 p = save_composition_data (buf, buf_end, prop);
5659 if (p == NULL)
5660 break;
5661 buf = p;
5662 if (find_composition (end, end_pos, &start, &end, &prop, object)
5663 && end <= end_pos)
5664 stop = start;
5665 else
5666 stop = end_pos;
5667 }
5668
5669 if (! multibytep)
5670 c = *src++;
5671 else
5672 c = STRING_CHAR_ADVANCE (src);
5673 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5674 c = '\n';
5675 if (! EQ (eol_type, Qunix))
5676 {
5677 if (c == '\n')
5678 {
5679 if (EQ (eol_type, Qdos))
5680 *buf++ = '\r';
5681 else
5682 c = '\r';
ec6d2bb8 5683 }
ec6d2bb8 5684 }
df7492f9
KH
5685 *buf++ = c;
5686 pos++;
ec6d2bb8 5687 }
ec6d2bb8 5688
df7492f9
KH
5689 coding->consumed = src - coding->source;
5690 coding->consumed_char = pos - coding->src_pos;
5691 coding->charbuf_used = buf - coding->charbuf;
5692 coding->chars_at_source = 0;
ec6d2bb8
KH
5693}
5694
ec6d2bb8 5695
df7492f9
KH
5696/* Encode the text at CODING->src_object into CODING->dst_object.
5697 CODING->src_object is a buffer or a string.
5698 CODING->dst_object is a buffer or nil.
5699
5700 If CODING->src_object is a buffer, it must be the current buffer.
5701 In this case, if CODING->src_pos is positive, it is a position of
5702 the source text in the buffer, otherwise. the source text is in the
5703 gap area of the buffer, and coding->src_pos specifies the offset of
5704 the text from GPT (which must be the same as PT). If this is the
5705 same buffer as CODING->dst_object, CODING->src_pos must be
5706 negative and CODING should not have `pre-write-conversion'.
5707
5708 If CODING->src_object is a string, CODING should not have
5709 `pre-write-conversion'.
5710
5711 If CODING->dst_object is a buffer, the encoded data is inserted at
5712 the current point of that buffer.
5713
5714 If CODING->dst_object is nil, the encoded data is placed at the
5715 memory area specified by CODING->destination. */
5716
5717static int
5718encode_coding (coding)
ec6d2bb8 5719 struct coding_system *coding;
ec6d2bb8 5720{
df7492f9 5721 Lisp_Object attrs;
ec6d2bb8 5722
df7492f9 5723 attrs = CODING_ID_ATTRS (coding->id);
ec6d2bb8 5724
df7492f9 5725 if (BUFFERP (coding->dst_object))
ec6d2bb8 5726 {
df7492f9
KH
5727 set_buffer_internal (XBUFFER (coding->dst_object));
5728 coding->dst_multibyte
5729 = ! NILP (current_buffer->enable_multibyte_characters);
5730 }
ec6d2bb8 5731
df7492f9
KH
5732 coding->consumed = coding->consumed_char = 0;
5733 coding->produced = coding->produced_char = 0;
5734 coding->result = CODING_RESULT_SUCCESS;
5735 coding->errors = 0;
ec6d2bb8 5736
df7492f9 5737 ALLOC_CONVERSION_WORK_AREA (coding);
ec6d2bb8 5738
df7492f9
KH
5739 do {
5740 coding_set_source (coding);
5741 consume_chars (coding);
5742
5743 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5744 translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5745
5746 coding_set_destination (coding);
5747 (*(coding->encoder)) (coding);
5748 } while (coding->consumed_char < coding->src_chars);
5749
5750 if (BUFFERP (coding->dst_object))
5751 insert_from_gap (coding->produced_char, coding->produced);
5752
5753 return (coding->result);
ec6d2bb8
KH
5754}
5755
df7492f9 5756/* Work buffer */
fb88bf2d 5757
df7492f9
KH
5758/* List of currently used working buffer. */
5759Lisp_Object Vcode_conversion_work_buf_list;
d46c5b12 5760
df7492f9
KH
5761/* A working buffer used by the top level conversion. */
5762Lisp_Object Vcode_conversion_reused_work_buf;
b73bfc1c 5763
4ed46869 5764
df7492f9
KH
5765/* Return a working buffer that can be freely used by the following
5766 code conversion. MULTIBYTEP specifies the multibyteness of the
5767 buffer. */
b73bfc1c 5768
df7492f9
KH
5769Lisp_Object
5770make_conversion_work_buffer (multibytep)
5771 int multibytep;
5772{
5773 struct buffer *current = current_buffer;
5774 Lisp_Object buf;
d46c5b12 5775
df7492f9 5776 if (NILP (Vcode_conversion_work_buf_list))
e133c8fa 5777 {
df7492f9
KH
5778 if (NILP (Vcode_conversion_reused_work_buf))
5779 Vcode_conversion_reused_work_buf
5780 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5781 Vcode_conversion_work_buf_list
5782 = Fcons (Vcode_conversion_reused_work_buf, Qnil);
e133c8fa 5783 }
df7492f9 5784 else
d46c5b12 5785 {
c197f191 5786 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5787 char str[128];
e077cc80 5788
df7492f9
KH
5789 sprintf (str, " *code-conversion-work*<%d>", depth);
5790 Vcode_conversion_work_buf_list
5791 = Fcons (Fget_buffer_create (build_string (str)),
5792 Vcode_conversion_work_buf_list);
d46c5b12 5793 }
d46c5b12 5794
df7492f9
KH
5795 buf = XCAR (Vcode_conversion_work_buf_list);
5796 set_buffer_internal (XBUFFER (buf));
5797 current_buffer->undo_list = Qt;
5798 Ferase_buffer ();
5799 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
5800 set_buffer_internal (current);
5801 return buf;
5802}
d46c5b12 5803
df7492f9 5804static struct coding_system *saved_coding;
d46c5b12 5805
df7492f9
KH
5806Lisp_Object
5807code_conversion_restore (info)
5808 Lisp_Object info;
5809{
c197f191 5810 int depth = XINT (Flength (Vcode_conversion_work_buf_list));
df7492f9 5811 Lisp_Object buf;
d46c5b12 5812
df7492f9 5813 if (depth > 0)
d46c5b12 5814 {
df7492f9
KH
5815 buf = XCAR (Vcode_conversion_work_buf_list);
5816 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5817 if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5818 Fkill_buffer (buf);
5819 }
d46c5b12 5820
c197f191 5821 if (EQ (saved_coding->dst_object, Qt)
df7492f9
KH
5822 && saved_coding->destination)
5823 xfree (saved_coding->destination);
b843d1ae 5824
df7492f9
KH
5825 return save_excursion_restore (info);
5826}
d46c5b12 5827
12410ef1 5828
df7492f9
KH
5829int
5830decode_coding_gap (coding, chars, bytes)
5831 struct coding_system *coding;
5832 EMACS_INT chars, bytes;
5833{
5834 int count = specpdl_ptr - specpdl;
fb88bf2d 5835
df7492f9
KH
5836 saved_coding = coding;
5837 record_unwind_protect (code_conversion_restore, save_excursion_save ());
ec6d2bb8 5838
df7492f9
KH
5839 coding->src_object = Fcurrent_buffer ();
5840 coding->src_chars = chars;
5841 coding->src_bytes = bytes;
5842 coding->src_pos = -chars;
5843 coding->src_pos_byte = -bytes;
5844 coding->src_multibyte = chars < bytes;
5845 coding->dst_object = coding->src_object;
5846 coding->dst_pos = PT;
5847 coding->dst_pos_byte = PT_BYTE;
71c81426 5848 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4956c225 5849
df7492f9
KH
5850 if (CODING_REQUIRE_DETECTION (coding))
5851 detect_coding (coding);
5852
5853 decode_coding (coding);
d46c5b12 5854
df7492f9
KH
5855 unbind_to (count, Qnil);
5856 return coding->result;
5857}
d46c5b12 5858
df7492f9
KH
5859int
5860encode_coding_gap (coding, chars, bytes)
5861 struct coding_system *coding;
5862 EMACS_INT chars, bytes;
5863{
5864 int count = specpdl_ptr - specpdl;
5865 Lisp_Object buffer;
d46c5b12 5866
df7492f9
KH
5867 saved_coding = coding;
5868 record_unwind_protect (code_conversion_restore, save_excursion_save ());
fb88bf2d 5869
df7492f9
KH
5870 buffer = Fcurrent_buffer ();
5871 coding->src_object = buffer;
5872 coding->src_chars = chars;
5873 coding->src_bytes = bytes;
5874 coding->src_pos = -chars;
5875 coding->src_pos_byte = -bytes;
5876 coding->src_multibyte = chars < bytes;
5877 coding->dst_object = coding->src_object;
5878 coding->dst_pos = PT;
5879 coding->dst_pos_byte = PT_BYTE;
fb88bf2d 5880
df7492f9 5881 encode_coding (coding);
f2558efd 5882
df7492f9
KH
5883 unbind_to (count, Qnil);
5884 return coding->result;
5885}
b73bfc1c 5886
d46c5b12 5887
df7492f9
KH
5888/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5889 SRC_OBJECT into DST_OBJECT by coding context CODING.
ec6d2bb8 5890
df7492f9 5891 SRC_OBJECT is a buffer, a string, or Qnil.
ec6d2bb8 5892
df7492f9
KH
5893 If it is a buffer, the text is at point of the buffer. FROM and TO
5894 are positions in the buffer.
ec6d2bb8 5895
df7492f9
KH
5896 If it is a string, the text is at the beginning of the string.
5897 FROM and TO are indices to the string.
ec6d2bb8 5898
df7492f9
KH
5899 If it is nil, the text is at coding->source. FROM and TO are
5900 indices to coding->source.
ec6d2bb8 5901
df7492f9 5902 DST_OBJECT is a buffer, Qt, or Qnil.
d46c5b12 5903
df7492f9
KH
5904 If it is a buffer, the decoded text is inserted at point of the
5905 buffer. If the buffer is the same as SRC_OBJECT, the source text
5906 is deleted.
d46c5b12 5907
df7492f9
KH
5908 If it is Qt, a string is made from the decoded text, and
5909 set in CODING->dst_object.
d46c5b12 5910
df7492f9
KH
5911 If it is Qnil, the decoded text is stored at CODING->destination.
5912 The called must allocate CODING->dst_bytes bytes at
5913 CODING->destination by xmalloc. If the decoded text is longer than
5914 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5915 */
d46c5b12 5916
df7492f9
KH
5917void
5918decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5919 dst_object)
5920 struct coding_system *coding;
5921 Lisp_Object src_object;
5922 EMACS_INT from, from_byte, to, to_byte;
5923 Lisp_Object dst_object;
5924{
5925 int count = specpdl_ptr - specpdl;
5926 unsigned char *destination;
5927 EMACS_INT dst_bytes;
5928 EMACS_INT chars = to - from;
5929 EMACS_INT bytes = to_byte - from_byte;
5930 Lisp_Object attrs;
d46c5b12 5931
df7492f9
KH
5932 saved_coding = coding;
5933 record_unwind_protect (code_conversion_restore, save_excursion_save ());
93dec019 5934
df7492f9
KH
5935 if (NILP (dst_object))
5936 {
5937 destination = coding->destination;
5938 dst_bytes = coding->dst_bytes;
5939 }
93dec019 5940
df7492f9
KH
5941 coding->src_object = src_object;
5942 coding->src_chars = chars;
5943 coding->src_bytes = bytes;
5944 coding->src_multibyte = chars < bytes;
70ad9fc4 5945
df7492f9
KH
5946 if (STRINGP (src_object))
5947 {
5948 coding->src_pos = from;
5949 coding->src_pos_byte = from_byte;
5950 }
5951 else if (BUFFERP (src_object))
5952 {
5953 set_buffer_internal (XBUFFER (src_object));
5954 if (from != GPT)
5955 move_gap_both (from, from_byte);
5956 if (EQ (src_object, dst_object))
fb88bf2d 5957 {
df7492f9
KH
5958 TEMP_SET_PT_BOTH (from, from_byte);
5959 del_range_both (from, from_byte, to, to_byte, 1);
5960 coding->src_pos = -chars;
5961 coding->src_pos_byte = -bytes;
fb88bf2d 5962 }
df7492f9 5963 else
fb88bf2d 5964 {
df7492f9
KH
5965 coding->src_pos = from;
5966 coding->src_pos_byte = from_byte;
fb88bf2d 5967 }
d46c5b12 5968 }
fb88bf2d 5969
df7492f9
KH
5970 if (CODING_REQUIRE_DETECTION (coding))
5971 detect_coding (coding);
5972 attrs = CODING_ID_ATTRS (coding->id);
5973
5974 if (! NILP (CODING_ATTR_POST_READ (attrs))
5975 || EQ (dst_object, Qt))
b73bfc1c 5976 {
df7492f9
KH
5977 coding->dst_object = make_conversion_work_buffer (1);
5978 coding->dst_pos = BEG;
5979 coding->dst_pos_byte = BEG_BYTE;
5980 coding->dst_multibyte = 1;
b73bfc1c 5981 }
df7492f9 5982 else if (BUFFERP (dst_object))
12410ef1 5983 {
df7492f9
KH
5984 coding->dst_object = dst_object;
5985 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5986 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5987 coding->dst_multibyte
5988 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
12410ef1 5989 }
72d1a715 5990 else
df7492f9
KH
5991 {
5992 coding->dst_object = Qnil;
5993 coding->dst_multibyte = 1;
5994 }
5995
5996 decode_coding (coding);
4ed46869 5997
df7492f9
KH
5998 if (BUFFERP (coding->dst_object))
5999 set_buffer_internal (XBUFFER (coding->dst_object));
ec6d2bb8 6000
df7492f9 6001 if (! NILP (CODING_ATTR_POST_READ (attrs)))
d46c5b12 6002 {
df7492f9
KH
6003 struct gcpro gcpro1, gcpro2;
6004 EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
2b4f9037 6005 Lisp_Object val;
4ed46869 6006
c0cc7f7f 6007 TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
df7492f9
KH
6008 GCPRO2 (coding->src_object, coding->dst_object);
6009 val = call1 (CODING_ATTR_POST_READ (attrs),
6010 make_number (coding->produced_char));
6011 UNGCPRO;
6012 CHECK_NATNUM (val);
6013 coding->produced_char += Z - prev_Z;
6014 coding->produced += Z_BYTE - prev_Z_BYTE;
d46c5b12 6015 }
4ed46869 6016
df7492f9 6017 if (EQ (dst_object, Qt))
ec6d2bb8 6018 {
df7492f9
KH
6019 coding->dst_object = Fbuffer_string ();
6020 }
6021 else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6022 {
6023 set_buffer_internal (XBUFFER (coding->dst_object));
6024 if (dst_bytes < coding->produced)
6025 {
6026 destination
6027 = (unsigned char *) xrealloc (destination, coding->produced);
6028 if (! destination)
6029 {
6030 coding->result = CODING_RESULT_INSUFFICIENT_DST;
6031 unbind_to (count, Qnil);
6032 return;
6033 }
6034 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6035 move_gap_both (BEGV, BEGV_BYTE);
6036 bcopy (BEGV_ADDR, destination, coding->produced);
6037 coding->destination = destination;
6038 }
ec6d2bb8 6039 }
2b4f9037 6040
df7492f9 6041 unbind_to (count, Qnil);
d46c5b12
KH
6042}
6043
df7492f9
KH
6044
6045void
6046encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6047 dst_object)
b73bfc1c 6048 struct coding_system *coding;
df7492f9
KH
6049 Lisp_Object src_object;
6050 EMACS_INT from, from_byte, to, to_byte;
6051 Lisp_Object dst_object;
b73bfc1c
KH
6052{
6053 int count = specpdl_ptr - specpdl;
df7492f9
KH
6054 EMACS_INT chars = to - from;
6055 EMACS_INT bytes = to_byte - from_byte;
6056 Lisp_Object attrs;
6057
6058 saved_coding = coding;
6059 record_unwind_protect (code_conversion_restore, save_excursion_save ());
6060
6061 coding->src_object = src_object;
6062 coding->src_chars = chars;
6063 coding->src_bytes = bytes;
6064 coding->src_multibyte = chars < bytes;
6065
6066 attrs = CODING_ID_ATTRS (coding->id);
6067
6068 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6bac5b12 6069 {
df7492f9
KH
6070 coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6071 set_buffer_internal (XBUFFER (coding->src_object));
6072 if (STRINGP (src_object))
6073 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6074 else if (BUFFERP (src_object))
6075 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6076 else
6077 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6078
6079 if (EQ (src_object, dst_object))
6080 {
6081 set_buffer_internal (XBUFFER (src_object));
6082 del_range_both (from, from_byte, to, to_byte, 1);
6083 set_buffer_internal (XBUFFER (coding->src_object));
6084 }
6085
ac87bbef
KH
6086 call2 (CODING_ATTR_PRE_WRITE (attrs),
6087 make_number (BEG), make_number (Z));
6088 coding->src_object = Fcurrent_buffer ();
df7492f9
KH
6089 if (BEG != GPT)
6090 move_gap_both (BEG, BEG_BYTE);
6091 coding->src_chars = Z - BEG;
6092 coding->src_bytes = Z_BYTE - BEG_BYTE;
6093 coding->src_pos = BEG;
6094 coding->src_pos_byte = BEG_BYTE;
6095 coding->src_multibyte = Z < Z_BYTE;
6096 }
6097 else if (STRINGP (src_object))
6098 {
6099 coding->src_pos = from;
6100 coding->src_pos_byte = from_byte;
6101 }
6102 else if (BUFFERP (src_object))
d46c5b12 6103 {
df7492f9
KH
6104 set_buffer_internal (XBUFFER (src_object));
6105 if (from != GPT)
6106 move_gap_both (from, from_byte);
6107 if (EQ (src_object, dst_object))
d46c5b12 6108 {
df7492f9
KH
6109 del_range_both (from, from_byte, to, to_byte, 1);
6110 coding->src_pos = -chars;
6111 coding->src_pos_byte = -bytes;
d46c5b12 6112 }
df7492f9 6113 else
d46c5b12 6114 {
df7492f9
KH
6115 coding->src_pos = from;
6116 coding->src_pos_byte = from_byte;
d46c5b12
KH
6117 }
6118 }
4ed46869 6119
df7492f9 6120 if (BUFFERP (dst_object))
d46c5b12 6121 {
df7492f9
KH
6122 coding->dst_object = dst_object;
6123 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6124 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6125 coding->dst_multibyte
6126 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
b73bfc1c 6127 }
df7492f9 6128 else if (EQ (dst_object, Qt))
4956c225 6129 {
df7492f9 6130 coding->dst_object = Qnil;
df7492f9 6131 coding->dst_bytes = coding->src_chars;
ac87bbef
KH
6132 if (coding->dst_bytes == 0)
6133 coding->dst_bytes = 1;
6134 coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
df7492f9 6135 coding->dst_multibyte = 0;
4956c225 6136 }
df7492f9 6137 else
78108bcd 6138 {
df7492f9
KH
6139 coding->dst_object = Qnil;
6140 coding->dst_multibyte = 0;
78108bcd
KH
6141 }
6142
df7492f9 6143 encode_coding (coding);
4ed46869 6144
df7492f9 6145 if (EQ (dst_object, Qt))
4ed46869 6146 {
df7492f9
KH
6147 if (BUFFERP (coding->dst_object))
6148 coding->dst_object = Fbuffer_string ();
6149 else
73be902c 6150 {
df7492f9
KH
6151 coding->dst_object
6152 = make_unibyte_string ((char *) coding->destination,
6153 coding->produced);
6154 xfree (coding->destination);
73be902c 6155 }
4ed46869 6156 }
d46c5b12 6157
df7492f9 6158 unbind_to (count, Qnil);
b73bfc1c
KH
6159}
6160
df7492f9 6161
b73bfc1c 6162Lisp_Object
df7492f9 6163preferred_coding_system ()
b73bfc1c 6164{
df7492f9 6165 int id = coding_categories[coding_priorities[0]].id;
2391eaa4 6166
df7492f9 6167 return CODING_ID_NAME (id);
4ed46869
KH
6168}
6169
6170\f
6171#ifdef emacs
1397dc18 6172/*** 8. Emacs Lisp library functions ***/
4ed46869 6173
4ed46869 6174DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
48b0f3ae 6175 doc: /* Return t if OBJECT is nil or a coding-system.
df7492f9 6176See the documentation of `define-coding-system' for information
48b0f3ae
PJ
6177about coding-system objects. */)
6178 (obj)
4ed46869
KH
6179 Lisp_Object obj;
6180{
df7492f9 6181 return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
4ed46869
KH
6182}
6183
9d991de8
RS
6184DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6185 Sread_non_nil_coding_system, 1, 1, 0,
48b0f3ae
PJ
6186 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6187 (prompt)
4ed46869
KH
6188 Lisp_Object prompt;
6189{
e0e989f6 6190 Lisp_Object val;
9d991de8
RS
6191 do
6192 {
4608c386
KH
6193 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6194 Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
9d991de8
RS
6195 }
6196 while (XSTRING (val)->size == 0);
e0e989f6 6197 return (Fintern (val, Qnil));
4ed46869
KH
6198}
6199
9b787f3e 6200DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
48b0f3ae
PJ
6201 doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6202If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6203 (prompt, default_coding_system)
9b787f3e 6204 Lisp_Object prompt, default_coding_system;
4ed46869 6205{
f44d27ce 6206 Lisp_Object val;
9b787f3e
RS
6207 if (SYMBOLP (default_coding_system))
6208 XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4608c386 6209 val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
9b787f3e
RS
6210 Qt, Qnil, Qcoding_system_history,
6211 default_coding_system, Qnil);
e0e989f6 6212 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4ed46869
KH
6213}
6214
6215DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6216 1, 1, 0,
48b0f3ae
PJ
6217 doc: /* Check validity of CODING-SYSTEM.
6218If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6219It is valid if it is a symbol with a non-nil `coding-system' property.
6220The value of property should be a vector of length 5. */)
df7492f9 6221 (coding_system)
4ed46869
KH
6222 Lisp_Object coding_system;
6223{
b7826503 6224 CHECK_SYMBOL (coding_system);
4ed46869
KH
6225 if (!NILP (Fcoding_system_p (coding_system)))
6226 return coding_system;
6227 while (1)
02ba4723 6228 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4ed46869 6229}
df7492f9 6230
3a73fa5d 6231\f
d46c5b12 6232Lisp_Object
df7492f9 6233detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
d46c5b12
KH
6234 unsigned char *src;
6235 int src_bytes, highest;
0a28aafb 6236 int multibytep;
df7492f9 6237 Lisp_Object coding_system;
4ed46869 6238{
df7492f9
KH
6239 unsigned char *src_end = src + src_bytes;
6240 int mask = CATEGORY_MASK_ANY;
6241 int detected = 0;
6242 int c, i;
6243 Lisp_Object attrs, eol_type;
6244 Lisp_Object val;
6245 struct coding_system coding;
6246
6247 if (NILP (coding_system))
6248 coding_system = Qundecided;
6249 setup_coding_system (coding_system, &coding);
6250 attrs = CODING_ID_ATTRS (coding.id);
6251 eol_type = CODING_ID_EOL_TYPE (coding.id);
4ed46869 6252
df7492f9
KH
6253 coding.source = src;
6254 coding.src_bytes = src_bytes;
6255 coding.src_multibyte = multibytep;
6256 coding.consumed = 0;
4ed46869 6257
df7492f9 6258 if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
4ed46869 6259 {
df7492f9 6260 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
4ed46869 6261 }
df7492f9 6262 else
4ed46869 6263 {
df7492f9
KH
6264 coding_system = Qnil;
6265 for (; src < src_end; src++)
4ed46869 6266 {
df7492f9
KH
6267 c = *src;
6268 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6269 || c == ISO_CODE_SI
6270 || c == ISO_CODE_SO)))
d46c5b12 6271 break;
4ed46869 6272 }
df7492f9
KH
6273 coding.head_ascii = src - coding.source;
6274
6275 if (src < src_end)
6276 for (i = 0; i < coding_category_raw_text; i++)
6277 {
6278 enum coding_category category = coding_priorities[i];
6279 struct coding_system *this = coding_categories + category;
6280
6281 if (category >= coding_category_raw_text
6282 || detected & (1 << category))
6283 continue;
6284
6285 if (this->id < 0)
6286 {
6287 /* No coding system of this category is defined. */
6288 mask &= ~(1 << category);
6289 }
6290 else
6291 {
6292 detected |= detected_mask[category];
6293 if ((*(coding_categories[category].detector)) (&coding, &mask)
6294 && highest)
6295 {
6296 mask &= detected_mask[category];
6297 break;
6298 }
6299 }
6300 }
4ed46869 6301 }
4ed46869 6302
df7492f9
KH
6303 if (!mask)
6304 val = Fcons (make_number (coding_category_raw_text), Qnil);
6305 else if (mask == CATEGORY_MASK_ANY)
6306 val = Fcons (make_number (coding_category_undecided), Qnil);
6307 else if (highest)
4ed46869 6308 {
df7492f9
KH
6309 for (i = 0; i < coding_category_raw_text; i++)
6310 if (mask & (1 << coding_priorities[i]))
6311 {
6312 val = Fcons (make_number (coding_priorities[i]), Qnil);
6313 break;
6314 }
6315 }
6316 else
6317 {
6318 val = Qnil;
6319 for (i = coding_category_raw_text - 1; i >= 0; i--)
6320 if (mask & (1 << coding_priorities[i]))
6321 val = Fcons (make_number (coding_priorities[i]), val);
4ed46869 6322 }
df7492f9
KH
6323
6324 {
6325 int one_byte_eol = -1, two_byte_eol = -1;
6326 Lisp_Object tail;
6327
6328 for (tail = val; CONSP (tail); tail = XCDR (tail))
6329 {
6330 struct coding_system *this
6331 = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6332 : &coding);
6333 int this_eol;
6334
6335 attrs = CODING_ID_ATTRS (this->id);
6336 eol_type = CODING_ID_EOL_TYPE (this->id);
6337 XSETCAR (tail, CODING_ID_NAME (this->id));
6338 if (VECTORP (eol_type))
6339 {
6340 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6341 {
6342 if (two_byte_eol < 0)
6343 two_byte_eol = detect_eol (this, coding.source, src_bytes);
6344 this_eol = two_byte_eol;
6345 }
6346 else
6347 {
6348 if (one_byte_eol < 0)
6349 one_byte_eol =detect_eol (this, coding.source, src_bytes);
6350 this_eol = one_byte_eol;
6351 }
6352 if (this_eol == EOL_SEEN_LF)
6353 XSETCAR (tail, AREF (eol_type, 0));
6354 else if (this_eol == EOL_SEEN_CRLF)
6355 XSETCAR (tail, AREF (eol_type, 1));
6356 else if (this_eol == EOL_SEEN_CR)
6357 XSETCAR (tail, AREF (eol_type, 2));
6358 }
6359 }
6360 }
6361
03699b14 6362 return (highest ? XCAR (val) : val);
93dec019 6363}
4ed46869 6364
df7492f9 6365
d46c5b12
KH
6366DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6367 2, 3, 0,
48b0f3ae
PJ
6368 doc: /* Detect coding system of the text in the region between START and END.
6369Return a list of possible coding systems ordered by priority.
6370
6371If only ASCII characters are found, it returns a list of single element
6372`undecided' or its subsidiary coding system according to a detected
6373end-of-line format.
6374
6375If optional argument HIGHEST is non-nil, return the coding system of
6376highest priority. */)
6377 (start, end, highest)
d46c5b12
KH
6378 Lisp_Object start, end, highest;
6379{
6380 int from, to;
6381 int from_byte, to_byte;
6289dd10 6382
b7826503
PJ
6383 CHECK_NUMBER_COERCE_MARKER (start);
6384 CHECK_NUMBER_COERCE_MARKER (end);
4ed46869 6385
d46c5b12
KH
6386 validate_region (&start, &end);
6387 from = XINT (start), to = XINT (end);
6388 from_byte = CHAR_TO_BYTE (from);
6389 to_byte = CHAR_TO_BYTE (to);
6289dd10 6390
d46c5b12
KH
6391 if (from < GPT && to >= GPT)
6392 move_gap_both (to, to_byte);
c210f766 6393
d46c5b12 6394 return detect_coding_system (BYTE_POS_ADDR (from_byte),
df7492f9 6395 to_byte - from_byte,
0a28aafb
KH
6396 !NILP (highest),
6397 !NILP (current_buffer
df7492f9
KH
6398 ->enable_multibyte_characters),
6399 Qnil);
d46c5b12 6400}
6289dd10 6401
d46c5b12
KH
6402DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6403 1, 2, 0,
48b0f3ae
PJ
6404 doc: /* Detect coding system of the text in STRING.
6405Return a list of possible coding systems ordered by priority.
6406
6407If only ASCII characters are found, it returns a list of single element
6408`undecided' or its subsidiary coding system according to a detected
6409end-of-line format.
6410
6411If optional argument HIGHEST is non-nil, return the coding system of
6412highest priority. */)
6413 (string, highest)
d46c5b12
KH
6414 Lisp_Object string, highest;
6415{
b7826503 6416 CHECK_STRING (string);
4ed46869 6417
d46c5b12 6418 return detect_coding_system (XSTRING (string)->data,
df7492f9 6419 STRING_BYTES (XSTRING (string)),
0a28aafb 6420 !NILP (highest),
df7492f9
KH
6421 STRING_MULTIBYTE (string),
6422 Qnil);
4ed46869
KH
6423}
6424
05e6f5dc 6425
df7492f9
KH
6426static INLINE int
6427char_encodable_p (c, attrs)
6428 int c;
6429 Lisp_Object attrs;
05e6f5dc 6430{
df7492f9 6431 Lisp_Object tail;
df7492f9 6432 struct charset *charset;
05e6f5dc 6433
df7492f9
KH
6434 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6435 CONSP (tail); tail = XCDR (tail))
05e6f5dc 6436 {
df7492f9
KH
6437 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6438 if (CHAR_CHARSET_P (c, charset))
6439 break;
05e6f5dc 6440 }
df7492f9 6441 return (! NILP (tail));
05e6f5dc
KH
6442}
6443
6444
df7492f9
KH
6445/* Return a list of coding systems that safely encode the text between
6446 START and END. If EXCLUDE is non-nil, it is a list of coding
6447 systems not to check. The returned list doesn't contain any such
6448 coding systems. In any case, If the text contains only ASCII or is
6449 unibyte, return t. */
6450
6451DEFUN ("find-coding-systems-region-internal",
6452 Ffind_coding_systems_region_internal,
6453 Sfind_coding_systems_region_internal, 2, 3, 0,
6454 doc: /* Internal use only. */)
6455 (start, end, exclude)
6456 Lisp_Object start, end, exclude;
6457{
6458 Lisp_Object coding_attrs_list, safe_codings;
6459 EMACS_INT start_byte, end_byte;
6460 unsigned char *p, *pbeg, *pend;
6461 int c;
6462 Lisp_Object tail, elt;
05e6f5dc 6463
df7492f9
KH
6464 if (STRINGP (start))
6465 {
6466 if (!STRING_MULTIBYTE (start)
6467 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6468 return Qt;
6469 start_byte = 0;
6470 end_byte = STRING_BYTES (XSTRING (start));
6471 }
6472 else
6473 {
6474 CHECK_NUMBER_COERCE_MARKER (start);
6475 CHECK_NUMBER_COERCE_MARKER (end);
6476 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6477 args_out_of_range (start, end);
6478 if (NILP (current_buffer->enable_multibyte_characters))
6479 return Qt;
6480 start_byte = CHAR_TO_BYTE (XINT (start));
6481 end_byte = CHAR_TO_BYTE (XINT (end));
6482 if (XINT (end) - XINT (start) == end_byte - start_byte)
6483 return Qt;
05e6f5dc 6484
df7492f9
KH
6485 if (start < GPT && end > GPT)
6486 {
6487 if ((GPT - start) < (end - GPT))
6488 move_gap_both (start, start_byte);
6489 else
6490 move_gap_both (end, end_byte);
6491 }
6492 }
05e6f5dc 6493
df7492f9
KH
6494 coding_attrs_list = Qnil;
6495 for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6496 if (NILP (exclude)
6497 || NILP (Fmemq (XCAR (tail), exclude)))
6498 {
6499 Lisp_Object attrs;
05e6f5dc 6500
df7492f9
KH
6501 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6502 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6503 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6504 coding_attrs_list = Fcons (attrs, coding_attrs_list);
6505 }
6506
6507 if (STRINGP (start))
6508 p = pbeg = XSTRING (start)->data;
6509 else
6510 p = pbeg = BYTE_POS_ADDR (start_byte);
6511 pend = p + (end_byte - start_byte);
6512
6513 while (p < pend && ASCII_BYTE_P (*p)) p++;
6514 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
05e6f5dc
KH
6515
6516 while (p < pend)
6517 {
df7492f9
KH
6518 if (ASCII_BYTE_P (*p))
6519 p++;
6520 else
6521 {
6522 c = STRING_CHAR_ADVANCE (p);
6523
6524 charset_map_loaded = 0;
6525 for (tail = coding_attrs_list; CONSP (tail);)
6526 {
6527 elt = XCAR (tail);
6528 if (NILP (elt))
6529 tail = XCDR (tail);
6530 else if (char_encodable_p (c, elt))
6531 tail = XCDR (tail);
6532 else if (CONSP (XCDR (tail)))
6533 {
6534 XSETCAR (tail, XCAR (XCDR (tail)));
6535 XSETCDR (tail, XCDR (XCDR (tail)));
6536 }
6537 else
6538 {
6539 XSETCAR (tail, Qnil);
6540 tail = XCDR (tail);
6541 }
6542 }
6543 if (charset_map_loaded)
6544 {
6545 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
05e6f5dc 6546
df7492f9
KH
6547 if (STRINGP (start))
6548 pbeg = XSTRING (start)->data;
6549 else
6550 pbeg = BYTE_POS_ADDR (start_byte);
6551 p = pbeg + p_offset;
6552 pend = pbeg + pend_offset;
6553 }
6554 }
05e6f5dc 6555 }
df7492f9
KH
6556
6557 safe_codings = Qnil;
6558 for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6559 if (! NILP (XCAR (tail)))
6560 safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6561
05e6f5dc
KH
6562 return safe_codings;
6563}
6564
6565
df7492f9
KH
6566DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6567 Scheck_coding_systems_region, 3, 3, 0,
6568 doc: /* Check if the region is encodable by coding systems.
05e6f5dc 6569
df7492f9
KH
6570START and END are buffer positions specifying the region.
6571CODING-SYSTEM-LIST is a list of coding systems to check.
6572
6573The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6574CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6575whole region, POS0, POS1, ... are buffer positions where non-encodable
6576characters are found.
6577
6578If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6579value is nil.
6580
6581START may be a string. In that case, check if the string is
6582encodable, and the value contains indices to the string instead of
6583buffer positions. END is ignored. */)
6584 (start, end, coding_system_list)
6585 Lisp_Object start, end, coding_system_list;
05e6f5dc 6586{
df7492f9
KH
6587 Lisp_Object list;
6588 EMACS_INT start_byte, end_byte;
6589 int pos;
6590 unsigned char *p, *pbeg, *pend;
6591 int c;
6592 Lisp_Object tail, elt;
05e6f5dc
KH
6593
6594 if (STRINGP (start))
6595 {
df7492f9
KH
6596 if (!STRING_MULTIBYTE (start)
6597 && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6598 return Qnil;
6599 start_byte = 0;
6600 end_byte = STRING_BYTES (XSTRING (start));
6601 pos = 0;
05e6f5dc
KH
6602 }
6603 else
6604 {
b7826503
PJ
6605 CHECK_NUMBER_COERCE_MARKER (start);
6606 CHECK_NUMBER_COERCE_MARKER (end);
05e6f5dc
KH
6607 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6608 args_out_of_range (start, end);
6609 if (NILP (current_buffer->enable_multibyte_characters))
df7492f9
KH
6610 return Qnil;
6611 start_byte = CHAR_TO_BYTE (XINT (start));
6612 end_byte = CHAR_TO_BYTE (XINT (end));
6613 if (XINT (end) - XINT (start) == end_byte - start_byte)
05e6f5dc 6614 return Qt;
df7492f9
KH
6615
6616 if (start < GPT && end > GPT)
6617 {
6618 if ((GPT - start) < (end - GPT))
6619 move_gap_both (start, start_byte);
6620 else
6621 move_gap_both (end, end_byte);
6622 }
6623 pos = start;
6624 }
6625
6626 list = Qnil;
6627 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6628 {
6629 elt = XCAR (tail);
6630 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6631 Qnil)),
6632 list);
05e6f5dc
KH
6633 }
6634
df7492f9
KH
6635 if (STRINGP (start))
6636 p = pbeg = XSTRING (start)->data;
6637 else
6638 p = pbeg = BYTE_POS_ADDR (start_byte);
6639 pend = p + (end_byte - start_byte);
6640
6641 while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6642 while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6643
6644 while (p < pend)
05e6f5dc 6645 {
df7492f9
KH
6646 if (ASCII_BYTE_P (*p))
6647 p++;
6648 else
05e6f5dc 6649 {
df7492f9
KH
6650 c = STRING_CHAR_ADVANCE (p);
6651
6652 charset_map_loaded = 0;
6653 for (tail = list; CONSP (tail); tail = XCDR (tail))
6654 {
6655 elt = XCDR (XCAR (tail));
6656 if (! char_encodable_p (c, XCAR (elt)))
6657 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6658 }
6659 if (charset_map_loaded)
6660 {
6661 EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6662
6663 if (STRINGP (start))
6664 pbeg = XSTRING (start)->data;
6665 else
6666 pbeg = BYTE_POS_ADDR (start_byte);
6667 p = pbeg + p_offset;
6668 pend = pbeg + pend_offset;
6669 }
05e6f5dc 6670 }
df7492f9 6671 pos++;
05e6f5dc
KH
6672 }
6673
df7492f9
KH
6674 tail = list;
6675 list = Qnil;
6676 for (; CONSP (tail); tail = XCDR (tail))
05e6f5dc 6677 {
df7492f9
KH
6678 elt = XCAR (tail);
6679 if (CONSP (XCDR (XCDR (elt))))
6680 list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6681 list);
05e6f5dc 6682 }
df7492f9
KH
6683
6684 return list;
05e6f5dc
KH
6685}
6686
6687
df7492f9 6688
4031e2bf 6689Lisp_Object
df7492f9
KH
6690code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6691 Lisp_Object start, end, coding_system, dst_object;
6692 int encodep, norecord;
3a73fa5d
RS
6693{
6694 struct coding_system coding;
df7492f9
KH
6695 EMACS_INT from, from_byte, to, to_byte;
6696 Lisp_Object src_object;
3a73fa5d 6697
b7826503
PJ
6698 CHECK_NUMBER_COERCE_MARKER (start);
6699 CHECK_NUMBER_COERCE_MARKER (end);
df7492f9
KH
6700 if (NILP (coding_system))
6701 coding_system = Qno_conversion;
6702 else
6703 CHECK_CODING_SYSTEM (coding_system);
6704 src_object = Fcurrent_buffer ();
6705 if (NILP (dst_object))
6706 dst_object = src_object;
6707 else if (! EQ (dst_object, Qt))
6708 CHECK_BUFFER (dst_object);
3a73fa5d 6709
d46c5b12
KH
6710 validate_region (&start, &end);
6711 from = XFASTINT (start);
df7492f9 6712 from_byte = CHAR_TO_BYTE (from);
d46c5b12 6713 to = XFASTINT (end);
df7492f9 6714 to_byte = CHAR_TO_BYTE (to);
d46c5b12 6715
df7492f9
KH
6716 setup_coding_system (coding_system, &coding);
6717 coding.mode |= CODING_MODE_LAST_BLOCK;
6718
6719 if (encodep)
6720 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6721 dst_object);
6722 else
6723 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6724 dst_object);
6725 if (! norecord)
6726 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
d46c5b12 6727
df7492f9
KH
6728 if (coding.result != CODING_RESULT_SUCCESS)
6729 error ("Code conversion error: %d", coding.result);
3a73fa5d 6730
df7492f9
KH
6731 return (BUFFERP (dst_object)
6732 ? make_number (coding.produced_char)
6733 : coding.dst_object);
4031e2bf
KH
6734}
6735
df7492f9 6736
4031e2bf 6737DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
df7492f9 6738 3, 4, "r\nzCoding system: ",
48b0f3ae 6739 doc: /* Decode the current region from the specified coding system.
df7492f9
KH
6740When called from a program, takes four arguments:
6741 START, END, CODING-SYSTEM, and DESTINATION.
6742START and END are buffer positions.
6743
6744Optional 4th arguments DESTINATION specifies where the decoded text goes.
6745If nil, the region between START and END is replace by the decoded text.
6746If buffer, the decoded text is inserted in the buffer.
6747If t, the decoded text is returned.
6748
48b0f3ae
PJ
6749This function sets `last-coding-system-used' to the precise coding system
6750used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6751not fully specified.)
6752It returns the length of the decoded text. */)
df7492f9
KH
6753 (start, end, coding_system, destination)
6754 Lisp_Object start, end, coding_system, destination;
4031e2bf 6755{
df7492f9 6756 return code_convert_region (start, end, coding_system, destination, 0, 0);
3a73fa5d
RS
6757}
6758
6759DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
df7492f9
KH
6760 3, 4, "r\nzCoding system: ",
6761 doc: /* Encode the current region by specified coding system.
48b0f3ae
PJ
6762When called from a program, takes three arguments:
6763START, END, and CODING-SYSTEM. START and END are buffer positions.
df7492f9
KH
6764
6765Optional 4th arguments DESTINATION specifies where the encoded text goes.
6766If nil, the region between START and END is replace by the encoded text.
6767If buffer, the encoded text is inserted in the buffer.
6768If t, the encoded text is returned.
6769
48b0f3ae
PJ
6770This function sets `last-coding-system-used' to the precise coding system
6771used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6772not fully specified.)
6773It returns the length of the encoded text. */)
df7492f9
KH
6774 (start, end, coding_system, destination)
6775 Lisp_Object start, end, coding_system, destination;
3a73fa5d 6776{
df7492f9 6777 return code_convert_region (start, end, coding_system, destination, 1, 0);
4031e2bf 6778}
3a73fa5d 6779
4031e2bf 6780Lisp_Object
df7492f9
KH
6781code_convert_string (string, coding_system, dst_object,
6782 encodep, nocopy, norecord)
6783 Lisp_Object string, coding_system, dst_object;
6784 int encodep, nocopy, norecord;
4031e2bf
KH
6785{
6786 struct coding_system coding;
df7492f9 6787 EMACS_INT chars, bytes;
3a73fa5d 6788
b7826503 6789 CHECK_STRING (string);
d46c5b12 6790 if (NILP (coding_system))
df7492f9
KH
6791 {
6792 if (! norecord)
6793 Vlast_coding_system_used = Qno_conversion;
6794 if (NILP (dst_object))
6795 return (nocopy ? Fcopy_sequence (string) : string);
6796 }
4ed46869 6797
df7492f9
KH
6798 if (NILP (coding_system))
6799 coding_system = Qno_conversion;
6800 else
6801 CHECK_CODING_SYSTEM (coding_system);
6802 if (NILP (dst_object))
6803 dst_object = Qt;
6804 else if (! EQ (dst_object, Qt))
6805 CHECK_BUFFER (dst_object);
5f1cd180 6806
df7492f9 6807 setup_coding_system (coding_system, &coding);
d46c5b12 6808 coding.mode |= CODING_MODE_LAST_BLOCK;
df7492f9
KH
6809 chars = XSTRING (string)->size;
6810 bytes = STRING_BYTES (XSTRING (string));
6811 if (encodep)
6812 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6813 else
6814 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6815 if (! norecord)
6816 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
ec6d2bb8 6817
df7492f9
KH
6818 if (coding.result != CODING_RESULT_SUCCESS)
6819 error ("Code conversion error: %d", coding.result);
4ed46869 6820
df7492f9
KH
6821 return (BUFFERP (dst_object)
6822 ? make_number (coding.produced_char)
6823 : coding.dst_object);
4ed46869
KH
6824}
6825
4031e2bf 6826
ecec61c1 6827/* Encode or decode STRING according to CODING_SYSTEM.
ec6d2bb8
KH
6828 Do not set Vlast_coding_system_used.
6829
6830 This function is called only from macros DECODE_FILE and
6831 ENCODE_FILE, thus we ignore character composition. */
ecec61c1
KH
6832
6833Lisp_Object
6834code_convert_string_norecord (string, coding_system, encodep)
6835 Lisp_Object string, coding_system;
6836 int encodep;
6837{
0be8721c 6838 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
df7492f9 6839}
ecec61c1 6840
ecec61c1 6841
df7492f9
KH
6842DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6843 2, 4, 0,
6844 doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6845
6846Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6847if the decoding operation is trivial.
ecec61c1 6848
df7492f9 6849Optional fourth arg BUFFER non-nil meant that the decoded text is
a3f6ee6d 6850inserted in BUFFER instead of returned as a string. In this case,
df7492f9 6851the return value is BUFFER.
ecec61c1 6852
df7492f9
KH
6853This function sets `last-coding-system-used' to the precise coding system
6854used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6855not fully specified. */)
6856 (string, coding_system, nocopy, buffer)
6857 Lisp_Object string, coding_system, nocopy, buffer;
6858{
6859 return code_convert_string (string, coding_system, buffer,
6860 0, ! NILP (nocopy), 0);
6861}
6862
6863DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6864 2, 4, 0,
6865 doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6866
6867Optional third arg NOCOPY non-nil means it is OK to return STRING
6868itself if the encoding operation is trivial.
6869
6870Optional fourth arg BUFFER non-nil meant that the encoded text is
a3f6ee6d 6871inserted in BUFFER instead of returned as a string. In this case,
df7492f9
KH
6872the return value is BUFFER.
6873
6874This function sets `last-coding-system-used' to the precise coding system
6875used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6876not fully specified.) */)
6877 (string, coding_system, nocopy, buffer)
6878 Lisp_Object string, coding_system, nocopy, buffer;
6879{
6880 return code_convert_string (string, coding_system, buffer,
c197f191 6881 1, ! NILP (nocopy), 1);
ecec61c1 6882}
df7492f9 6883
3a73fa5d 6884\f
4ed46869 6885DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6886 doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6887Return the corresponding character. */)
6888 (code)
4ed46869
KH
6889 Lisp_Object code;
6890{
df7492f9
KH
6891 Lisp_Object spec, attrs, val;
6892 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6893 int c;
6894
6895 CHECK_NATNUM (code);
6896 c = XFASTINT (code);
6897 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6898 attrs = AREF (spec, 0);
4ed46869 6899
df7492f9
KH
6900 if (ASCII_BYTE_P (c)
6901 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6902 return code;
6903
6904 val = CODING_ATTR_CHARSET_LIST (attrs);
6905 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
004068e4
KH
6906 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6907 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
df7492f9
KH
6908
6909 if (c <= 0x7F)
6910 charset = charset_roman;
6911 else if (c >= 0xA0 && c < 0xDF)
55ab7be3 6912 {
df7492f9
KH
6913 charset = charset_kana;
6914 c -= 0x80;
55ab7be3
KH
6915 }
6916 else
6917 {
004068e4 6918 int s1 = c >> 8, s2 = c & 0xFF;
df7492f9
KH
6919
6920 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6921 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6922 error ("Invalid code: %d", code);
6923 SJIS_TO_JIS (c);
6924 charset = charset_kanji;
55ab7be3 6925 }
df7492f9
KH
6926 c = DECODE_CHAR (charset, c);
6927 if (c < 0)
6928 error ("Invalid code: %d", code);
6929 return make_number (c);
4ed46869
KH
6930}
6931
df7492f9 6932
4ed46869 6933DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
48b0f3ae
PJ
6934 doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6935Return the corresponding code in SJIS. */)
6936 (ch)
df7492f9 6937 Lisp_Object ch;
4ed46869 6938{
df7492f9
KH
6939 Lisp_Object spec, attrs, charset_list;
6940 int c;
6941 struct charset *charset;
6942 unsigned code;
4ed46869 6943
df7492f9
KH
6944 CHECK_CHARACTER (ch);
6945 c = XFASTINT (ch);
6946 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6947 attrs = AREF (spec, 0);
6948
6949 if (ASCII_CHAR_P (c)
6950 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6951 return ch;
6952
6953 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6954 charset = char_charset (c, charset_list, &code);
6955 if (code == CHARSET_INVALID_CODE (charset))
6956 error ("Can't encode by shift_jis encoding: %d", c);
6957 JIS_TO_SJIS (code);
6958
6959 return make_number (code);
4ed46869
KH
6960}
6961
6962DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
48b0f3ae
PJ
6963 doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6964Return the corresponding character. */)
6965 (code)
4ed46869
KH
6966 Lisp_Object code;
6967{
df7492f9
KH
6968 Lisp_Object spec, attrs, val;
6969 struct charset *charset_roman, *charset_big5, *charset;
6970 int c;
4ed46869 6971
df7492f9
KH
6972 CHECK_NATNUM (code);
6973 c = XFASTINT (code);
6974 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6975 attrs = AREF (spec, 0);
6976
6977 if (ASCII_BYTE_P (c)
6978 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6979 return code;
6980
6981 val = CODING_ATTR_CHARSET_LIST (attrs);
6982 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6983 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6984
6985 if (c <= 0x7F)
6986 charset = charset_roman;
c28a9453
KH
6987 else
6988 {
df7492f9
KH
6989 int b1 = c >> 8, b2 = c & 0x7F;
6990 if (b1 < 0xA1 || b1 > 0xFE
6991 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6992 error ("Invalid code: %d", code);
6993 charset = charset_big5;
c28a9453 6994 }
df7492f9
KH
6995 c = DECODE_CHAR (charset, (unsigned )c);
6996 if (c < 0)
6997 error ("Invalid code: %d", code);
6998 return make_number (c);
4ed46869
KH
6999}
7000
7001DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
48b0f3ae
PJ
7002 doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7003Return the corresponding character code in Big5. */)
7004 (ch)
4ed46869
KH
7005 Lisp_Object ch;
7006{
df7492f9
KH
7007 Lisp_Object spec, attrs, charset_list;
7008 struct charset *charset;
7009 int c;
7010 unsigned code;
7011
7012 CHECK_CHARACTER (ch);
7013 c = XFASTINT (ch);
7014 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7015 attrs = AREF (spec, 0);
7016 if (ASCII_CHAR_P (c)
7017 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7018 return ch;
7019
7020 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7021 charset = char_charset (c, charset_list, &code);
7022 if (code == CHARSET_INVALID_CODE (charset))
7023 error ("Can't encode by Big5 encoding: %d", c);
7024
7025 return make_number (code);
4ed46869 7026}
df7492f9 7027
3a73fa5d 7028\f
1ba9e4ab
KH
7029DEFUN ("set-terminal-coding-system-internal",
7030 Fset_terminal_coding_system_internal,
48b0f3ae
PJ
7031 Sset_terminal_coding_system_internal, 1, 1, 0,
7032 doc: /* Internal use only. */)
7033 (coding_system)
b74e4686 7034 Lisp_Object coding_system;
4ed46869 7035{
b7826503 7036 CHECK_SYMBOL (coding_system);
df7492f9
KH
7037 setup_coding_system (Fcheck_coding_system (coding_system),
7038 &terminal_coding);
7039
70c22245 7040 /* We had better not send unsafe characters to terminal. */
df7492f9
KH
7041 terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7042 /* Characer composition should be disabled. */
7043 terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7044 terminal_coding.src_multibyte = 1;
7045 terminal_coding.dst_multibyte = 0;
4ed46869
KH
7046 return Qnil;
7047}
7048
c4825358
KH
7049DEFUN ("set-safe-terminal-coding-system-internal",
7050 Fset_safe_terminal_coding_system_internal,
48b0f3ae 7051 Sset_safe_terminal_coding_system_internal, 1, 1, 0,
ddb67bdc 7052 doc: /* Internal use only. */)
48b0f3ae 7053 (coding_system)
b74e4686 7054 Lisp_Object coding_system;
c4825358 7055{
b7826503 7056 CHECK_SYMBOL (coding_system);
c4825358
KH
7057 setup_coding_system (Fcheck_coding_system (coding_system),
7058 &safe_terminal_coding);
df7492f9
KH
7059 /* Characer composition should be disabled. */
7060 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
b73bfc1c
KH
7061 safe_terminal_coding.src_multibyte = 1;
7062 safe_terminal_coding.dst_multibyte = 0;
c4825358
KH
7063 return Qnil;
7064}
7065
4ed46869
KH
7066DEFUN ("terminal-coding-system",
7067 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
48b0f3ae
PJ
7068 doc: /* Return coding system specified for terminal output. */)
7069 ()
4ed46869 7070{
df7492f9 7071 return CODING_ID_NAME (terminal_coding.id);
4ed46869
KH
7072}
7073
1ba9e4ab
KH
7074DEFUN ("set-keyboard-coding-system-internal",
7075 Fset_keyboard_coding_system_internal,
48b0f3ae
PJ
7076 Sset_keyboard_coding_system_internal, 1, 1, 0,
7077 doc: /* Internal use only. */)
7078 (coding_system)
4ed46869
KH
7079 Lisp_Object coding_system;
7080{
b7826503 7081 CHECK_SYMBOL (coding_system);
df7492f9
KH
7082 setup_coding_system (Fcheck_coding_system (coding_system),
7083 &keyboard_coding);
7084 /* Characer composition should be disabled. */
7085 keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
4ed46869
KH
7086 return Qnil;
7087}
7088
7089DEFUN ("keyboard-coding-system",
7090 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
48b0f3ae
PJ
7091 doc: /* Return coding system specified for decoding keyboard input. */)
7092 ()
4ed46869 7093{
df7492f9 7094 return CODING_ID_NAME (keyboard_coding.id);
4ed46869
KH
7095}
7096
7097\f
a5d301df
KH
7098DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7099 Sfind_operation_coding_system, 1, MANY, 0,
48b0f3ae
PJ
7100 doc: /* Choose a coding system for an operation based on the target name.
7101The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7102DECODING-SYSTEM is the coding system to use for decoding
7103\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7104for encoding (in case OPERATION does encoding).
7105
7106The first argument OPERATION specifies an I/O primitive:
7107 For file I/O, `insert-file-contents' or `write-region'.
7108 For process I/O, `call-process', `call-process-region', or `start-process'.
7109 For network I/O, `open-network-stream'.
7110
7111The remaining arguments should be the same arguments that were passed
7112to the primitive. Depending on which primitive, one of those arguments
7113is selected as the TARGET. For example, if OPERATION does file I/O,
7114whichever argument specifies the file name is TARGET.
7115
7116TARGET has a meaning which depends on OPERATION:
7117 For file I/O, TARGET is a file name.
7118 For process I/O, TARGET is a process name.
7119 For network I/O, TARGET is a service name or a port number
7120
7121This function looks up what specified for TARGET in,
7122`file-coding-system-alist', `process-coding-system-alist',
7123or `network-coding-system-alist' depending on OPERATION.
7124They may specify a coding system, a cons of coding systems,
7125or a function symbol to call.
7126In the last case, we call the function with one argument,
7127which is a list of all the arguments given to this function.
7128
7129usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7130 (nargs, args)
4ed46869
KH
7131 int nargs;
7132 Lisp_Object *args;
7133{
7134 Lisp_Object operation, target_idx, target, val;
7135 register Lisp_Object chain;
7136
7137 if (nargs < 2)
7138 error ("Too few arguments");
7139 operation = args[0];
7140 if (!SYMBOLP (operation)
7141 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
df7492f9 7142 error ("Invalid first arguement");
4ed46869
KH
7143 if (nargs < 1 + XINT (target_idx))
7144 error ("Too few arguments for operation: %s",
7145 XSYMBOL (operation)->name->data);
7146 target = args[XINT (target_idx) + 1];
7147 if (!(STRINGP (target)
7148 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
df7492f9 7149 error ("Invalid %dth argument", XINT (target_idx) + 1);
4ed46869 7150
2e34157c
RS
7151 chain = ((EQ (operation, Qinsert_file_contents)
7152 || EQ (operation, Qwrite_region))
02ba4723 7153 ? Vfile_coding_system_alist
2e34157c 7154 : (EQ (operation, Qopen_network_stream)
02ba4723
KH
7155 ? Vnetwork_coding_system_alist
7156 : Vprocess_coding_system_alist));
4ed46869
KH
7157 if (NILP (chain))
7158 return Qnil;
7159
03699b14 7160 for (; CONSP (chain); chain = XCDR (chain))
4ed46869 7161 {
f44d27ce 7162 Lisp_Object elt;
4ed46869 7163
df7492f9 7164 elt = XCAR (chain);
4ed46869
KH
7165 if (CONSP (elt)
7166 && ((STRINGP (target)
03699b14
KR
7167 && STRINGP (XCAR (elt))
7168 && fast_string_match (XCAR (elt), target) >= 0)
7169 || (INTEGERP (target) && EQ (target, XCAR (elt)))))
02ba4723 7170 {
03699b14 7171 val = XCDR (elt);
b19fd4c5
KH
7172 /* Here, if VAL is both a valid coding system and a valid
7173 function symbol, we return VAL as a coding system. */
02ba4723
KH
7174 if (CONSP (val))
7175 return val;
7176 if (! SYMBOLP (val))
7177 return Qnil;
7178 if (! NILP (Fcoding_system_p (val)))
7179 return Fcons (val, val);
b19fd4c5
KH
7180 if (! NILP (Ffboundp (val)))
7181 {
7182 val = call1 (val, Flist (nargs, args));
7183 if (CONSP (val))
7184 return val;
7185 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7186 return Fcons (val, val);
7187 }
02ba4723
KH
7188 return Qnil;
7189 }
4ed46869
KH
7190 }
7191 return Qnil;
7192}
7193
df7492f9 7194DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
a3f6ee6d 7195 Sset_coding_system_priority, 0, MANY, 0,
da7db224 7196 doc: /* Assign higher priority to the coding systems given as arguments.
1fcd6c8b 7197usage: (set-coding-system-priority CODING-SYSTEM ...) */)
df7492f9
KH
7198 (nargs, args)
7199 int nargs;
7200 Lisp_Object *args;
7201{
7202 int i, j;
7203 int changed[coding_category_max];
7204 enum coding_category priorities[coding_category_max];
7205
7206 bzero (changed, sizeof changed);
7207
7208 for (i = j = 0; i < nargs; i++)
7209 {
7210 enum coding_category category;
7211 Lisp_Object spec, attrs;
7212
7213 CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7214 attrs = AREF (spec, 0);
7215 category = XINT (CODING_ATTR_CATEGORY (attrs));
7216 if (changed[category])
7217 /* Ignore this coding system because a coding system of the
7218 same category already had a higher priority. */
7219 continue;
7220 changed[category] = 1;
7221 priorities[j++] = category;
7222 if (coding_categories[category].id >= 0
7223 && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7224 setup_coding_system (args[i], &coding_categories[category]);
7225 }
7226
7227 /* Now we have decided top J priorities. Reflect the order of the
7228 original priorities to the remaining priorities. */
7229
7230 for (i = j, j = 0; i < coding_category_max; i++, j++)
7231 {
7232 while (j < coding_category_max
7233 && changed[coding_priorities[j]])
7234 j++;
7235 if (j == coding_category_max)
7236 abort ();
7237 priorities[i] = coding_priorities[j];
7238 }
7239
7240 bcopy (priorities, coding_priorities, sizeof priorities);
7241 return Qnil;
7242}
7243
7244DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7245 Scoding_system_priority_list, 0, 1, 0,
da7db224
DL
7246 doc: /* Return a list of coding systems ordered by their priorities.
7247HIGHESTP non-nil means just return the highest priority one. */)
df7492f9
KH
7248 (highestp)
7249 Lisp_Object highestp;
d46c5b12
KH
7250{
7251 int i;
df7492f9 7252 Lisp_Object val;
d46c5b12 7253
df7492f9 7254 for (i = 0, val = Qnil; i < coding_category_max; i++)
d46c5b12 7255 {
df7492f9
KH
7256 enum coding_category category = coding_priorities[i];
7257 int id = coding_categories[category].id;
7258 Lisp_Object attrs;
7259
7260 if (id < 0)
7261 continue;
7262 attrs = CODING_ID_ATTRS (id);
7263 if (! NILP (highestp))
7264 return CODING_ATTR_BASE_NAME (attrs);
7265 val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7266 }
7267 return Fnreverse (val);
7268}
7269
f0064e1f
DL
7270static char *suffixes[] = { "-unix", "-dos", "-mac" };
7271
df7492f9
KH
7272static Lisp_Object
7273make_subsidiaries (base)
7274 Lisp_Object base;
7275{
7276 Lisp_Object subsidiaries;
df7492f9
KH
7277 int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7278 char *buf = (char *) alloca (base_name_len + 6);
7279 int i;
7280
7281 bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7282 subsidiaries = Fmake_vector (make_number (3), Qnil);
7283 for (i = 0; i < 3; i++)
7284 {
7285 bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7286 ASET (subsidiaries, i, intern (buf));
7287 }
7288 return subsidiaries;
7289}
7290
7291
7292DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7293 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
1fcd6c8b
DL
7294 doc: /* For internal use only.
7295usage: (define-coding-system-internal ...) */)
df7492f9
KH
7296 (nargs, args)
7297 int nargs;
7298 Lisp_Object *args;
7299{
7300 Lisp_Object name;
7301 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
7302 Lisp_Object attrs; /* Vector of attributes. */
7303 Lisp_Object eol_type;
7304 Lisp_Object aliases;
7305 Lisp_Object coding_type, charset_list, safe_charsets;
7306 enum coding_category category;
7307 Lisp_Object tail, val;
7308 int max_charset_id = 0;
7309 int i;
7310
7311 if (nargs < coding_arg_max)
7312 goto short_args;
7313
7314 attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7315
7316 name = args[coding_arg_name];
7317 CHECK_SYMBOL (name);
7318 CODING_ATTR_BASE_NAME (attrs) = name;
7319
7320 val = args[coding_arg_mnemonic];
7321 if (! STRINGP (val))
7322 CHECK_CHARACTER (val);
7323 CODING_ATTR_MNEMONIC (attrs) = val;
7324
7325 coding_type = args[coding_arg_coding_type];
7326 CHECK_SYMBOL (coding_type);
7327 CODING_ATTR_TYPE (attrs) = coding_type;
7328
7329 charset_list = args[coding_arg_charset_list];
7330 if (SYMBOLP (charset_list))
7331 {
7332 if (EQ (charset_list, Qiso_2022))
7333 {
7334 if (! EQ (coding_type, Qiso_2022))
7335 error ("Invalid charset-list");
7336 charset_list = Viso_2022_charset_list;
7337 }
7338 else if (EQ (charset_list, Qemacs_mule))
7339 {
7340 if (! EQ (coding_type, Qemacs_mule))
7341 error ("Invalid charset-list");
7342 charset_list = Vemacs_mule_charset_list;
7343 }
7344 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7345 if (max_charset_id < XFASTINT (XCAR (tail)))
7346 max_charset_id = XFASTINT (XCAR (tail));
7347 }
7348 else
7349 {
7350 charset_list = Fcopy_sequence (charset_list);
7351 for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7352 {
7353 struct charset *charset;
7354
7355 val = Fcar (tail);
7356 CHECK_CHARSET_GET_CHARSET (val, charset);
7357 if (EQ (coding_type, Qiso_2022)
7358 ? CHARSET_ISO_FINAL (charset) < 0
7359 : EQ (coding_type, Qemacs_mule)
7360 ? CHARSET_EMACS_MULE_ID (charset) < 0
7361 : 0)
7362 error ("Can't handle charset `%s'",
7363 XSYMBOL (CHARSET_NAME (charset))->name->data);
7364
7365 XCAR (tail) = make_number (charset->id);
7366 if (max_charset_id < charset->id)
7367 max_charset_id = charset->id;
7368 }
7369 }
7370 CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7371
7372 safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7373 make_number (255));
7374 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7375 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7376 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7377
7378 val = args[coding_arg_decode_translation_table];
7379 if (! NILP (val))
7380 CHECK_CHAR_TABLE (val);
7381 CODING_ATTR_DECODE_TBL (attrs) = val;
7382
7383 val = args[coding_arg_encode_translation_table];
7384 if (! NILP (val))
7385 CHECK_CHAR_TABLE (val);
7386 CODING_ATTR_ENCODE_TBL (attrs) = val;
7387
7388 val = args[coding_arg_post_read_conversion];
7389 CHECK_SYMBOL (val);
7390 CODING_ATTR_POST_READ (attrs) = val;
7391
7392 val = args[coding_arg_pre_write_conversion];
7393 CHECK_SYMBOL (val);
7394 CODING_ATTR_PRE_WRITE (attrs) = val;
7395
7396 val = args[coding_arg_default_char];
7397 if (NILP (val))
7398 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7399 else
7400 {
7401 CHECK_CHARACTER (val);
7402 CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7403 }
7404
7405 val = args[coding_arg_plist];
7406 CHECK_LIST (val);
7407 CODING_ATTR_PLIST (attrs) = val;
7408
7409 if (EQ (coding_type, Qcharset))
7410 {
c7c66a95
KH
7411 /* Generate a lisp vector of 256 elements. Each element is nil,
7412 integer, or a list of charset IDs.
7413
7414 If Nth element is nil, the byte code N is invalid in this
7415 coding system.
7416
7417 If Nth element is a number NUM, N is the first byte of a
7418 charset whose ID is NUM.
7419
7420 If Nth element is a list of charset IDs, N is the first byte
7421 of one of them. The list is sorted by dimensions of the
7422 charsets. A charset of smaller dimension comes firtst.
7423 */
df7492f9
KH
7424 val = Fmake_vector (make_number (256), Qnil);
7425
7426 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7427 {
c7c66a95
KH
7428 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7429 int dim = CHARSET_DIMENSION (charset);
7430 int idx = (dim - 1) * 4;
7431
15d143f7
KH
7432 for (i = charset->code_space[idx];
7433 i <= charset->code_space[idx + 1]; i++)
7434 {
c7c66a95
KH
7435 Lisp_Object tmp, tmp2;
7436 int dim2;
7437
7438 tmp = AREF (val, i);
7439 if (NILP (tmp))
7440 tmp = XCAR (tail);
7441 else if (NUMBERP (tmp))
7442 {
7443 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7444 if (dim < dim2)
c7c66a95 7445 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
f9d71dcd
KH
7446 else
7447 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
c7c66a95 7448 }
15d143f7 7449 else
c7c66a95
KH
7450 {
7451 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7452 {
7453 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7454 if (dim < dim2)
7455 break;
7456 }
7457 if (NILP (tmp2))
7458 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7459 else
7460 {
7461 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7462 XSETCAR (tmp2, XCAR (tail));
7463 }
7464 }
7465 ASET (val, i, tmp);
15d143f7 7466 }
df7492f9
KH
7467 }
7468 ASET (attrs, coding_attr_charset_valids, val);
7469 category = coding_category_charset;
7470 }
7471 else if (EQ (coding_type, Qccl))
7472 {
7473 Lisp_Object valids;
7474
7475 if (nargs < coding_arg_ccl_max)
7476 goto short_args;
7477
7478 val = args[coding_arg_ccl_decoder];
7479 CHECK_CCL_PROGRAM (val);
7480 if (VECTORP (val))
7481 val = Fcopy_sequence (val);
7482 ASET (attrs, coding_attr_ccl_decoder, val);
7483
7484 val = args[coding_arg_ccl_encoder];
7485 CHECK_CCL_PROGRAM (val);
7486 if (VECTORP (val))
7487 val = Fcopy_sequence (val);
7488 ASET (attrs, coding_attr_ccl_encoder, val);
7489
7490 val = args[coding_arg_ccl_valids];
7491 valids = Fmake_string (make_number (256), make_number (0));
7492 for (tail = val; !NILP (tail); tail = Fcdr (tail))
7493 {
7494 val = Fcar (tail);
7495 if (INTEGERP (val))
c197f191 7496 ASET (valids, XINT (val), make_number (1));
df7492f9
KH
7497 else
7498 {
7499 int from, to;
7500
7501 CHECK_CONS (val);
7502 CHECK_NUMBER (XCAR (val));
7503 CHECK_NUMBER (XCDR (val));
7504 from = XINT (XCAR (val));
7505 to = XINT (XCDR (val));
7506 for (i = from; i <= to; i++)
c197f191 7507 ASET (valids, i, make_number (1));
df7492f9
KH
7508 }
7509 }
7510 ASET (attrs, coding_attr_ccl_valids, valids);
7511
7512 category = coding_category_ccl;
7513 }
7514 else if (EQ (coding_type, Qutf_16))
7515 {
7516 Lisp_Object bom, endian;
7517
7518 if (nargs < coding_arg_utf16_max)
7519 goto short_args;
7520
7521 bom = args[coding_arg_utf16_bom];
7522 if (! NILP (bom) && ! EQ (bom, Qt))
7523 {
7524 CHECK_CONS (bom);
7525 CHECK_CODING_SYSTEM (XCAR (bom));
7526 CHECK_CODING_SYSTEM (XCDR (bom));
7527 }
7528 ASET (attrs, coding_attr_utf_16_bom, bom);
7529
7530 endian = args[coding_arg_utf16_endian];
7531 ASET (attrs, coding_attr_utf_16_endian, endian);
7532
7533 category = (CONSP (bom)
7534 ? coding_category_utf_16_auto
7535 : NILP (bom)
7536 ? (NILP (endian)
7537 ? coding_category_utf_16_be_nosig
7538 : coding_category_utf_16_le_nosig)
7539 : (NILP (endian)
7540 ? coding_category_utf_16_be
7541 : coding_category_utf_16_le));
7542 }
7543 else if (EQ (coding_type, Qiso_2022))
7544 {
7545 Lisp_Object initial, reg_usage, request, flags;
0be8721c 7546 int i, id;
1397dc18 7547
df7492f9
KH
7548 if (nargs < coding_arg_iso2022_max)
7549 goto short_args;
7550
7551 initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7552 CHECK_VECTOR (initial);
7553 for (i = 0; i < 4; i++)
7554 {
7555 val = Faref (initial, make_number (i));
7556 if (! NILP (val))
7557 {
7558 CHECK_CHARSET_GET_ID (val, id);
7559 ASET (initial, i, make_number (id));
7560 }
7561 else
7562 ASET (initial, i, make_number (-1));
7563 }
7564
7565 reg_usage = args[coding_arg_iso2022_reg_usage];
7566 CHECK_CONS (reg_usage);
7567 CHECK_NATNUM (XCAR (reg_usage));
7568 CHECK_NATNUM (XCDR (reg_usage));
7569
7570 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7571 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
1397dc18 7572 {
df7492f9
KH
7573 int id;
7574
7575 val = Fcar (tail);
7576 CHECK_CONS (val);
7577 CHECK_CHARSET_GET_ID (XCAR (val), id);
7578 CHECK_NATNUM (XCDR (val));
7579 if (XINT (XCDR (val)) >= 4)
7580 error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7581 XCAR (val) = make_number (id);
1397dc18 7582 }
df7492f9
KH
7583
7584 flags = args[coding_arg_iso2022_flags];
7585 CHECK_NATNUM (flags);
7586 i = XINT (flags);
7587 if (EQ (args[coding_arg_charset_list], Qiso_2022))
7588 flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7589
7590 ASET (attrs, coding_attr_iso_initial, initial);
7591 ASET (attrs, coding_attr_iso_usage, reg_usage);
7592 ASET (attrs, coding_attr_iso_request, request);
7593 ASET (attrs, coding_attr_iso_flags, flags);
7594 setup_iso_safe_charsets (attrs);
7595
7596 if (i & CODING_ISO_FLAG_SEVEN_BITS)
7597 category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7598 | CODING_ISO_FLAG_SINGLE_SHIFT))
7599 ? coding_category_iso_7_else
7600 : EQ (args[coding_arg_charset_list], Qiso_2022)
7601 ? coding_category_iso_7
7602 : coding_category_iso_7_tight);
7603 else
7604 {
7605 int id = XINT (AREF (initial, 1));
7606
7607 category = (((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7608 | CODING_ISO_FLAG_SINGLE_SHIFT))
7609 || EQ (args[coding_arg_charset_list], Qiso_2022)
7610 || id < 0)
7611 ? coding_category_iso_8_else
7612 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7613 ? coding_category_iso_8_1
7614 : coding_category_iso_8_2);
7615 }
7616 }
7617 else if (EQ (coding_type, Qemacs_mule))
7618 {
7619 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7620 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7621
7622 category = coding_category_emacs_mule;
7623 }
7624 else if (EQ (coding_type, Qshift_jis))
7625 {
7626
7627 struct charset *charset;
7628
7629 if (XINT (Flength (charset_list)) != 3)
7630 error ("There should be just three charsets");
7631
7632 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7633 if (CHARSET_DIMENSION (charset) != 1)
7634 error ("Dimension of charset %s is not one",
7635 XSYMBOL (CHARSET_NAME (charset))->name->data);
7636
7637 charset_list = XCDR (charset_list);
7638 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7639 if (CHARSET_DIMENSION (charset) != 1)
7640 error ("Dimension of charset %s is not one",
7641 XSYMBOL (CHARSET_NAME (charset))->name->data);
7642
7643 charset_list = XCDR (charset_list);
7644 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7645 if (CHARSET_DIMENSION (charset) != 2)
7646 error ("Dimension of charset %s is not two",
7647 XSYMBOL (CHARSET_NAME (charset))->name->data);
7648
7649 category = coding_category_sjis;
7650 Vsjis_coding_system = name;
7651 }
7652 else if (EQ (coding_type, Qbig5))
7653 {
7654 struct charset *charset;
7655
7656 if (XINT (Flength (charset_list)) != 2)
7657 error ("There should be just two charsets");
7658
7659 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7660 if (CHARSET_DIMENSION (charset) != 1)
7661 error ("Dimension of charset %s is not one",
7662 XSYMBOL (CHARSET_NAME (charset))->name->data);
7663
7664 charset_list = XCDR (charset_list);
7665 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7666 if (CHARSET_DIMENSION (charset) != 2)
7667 error ("Dimension of charset %s is not two",
7668 XSYMBOL (CHARSET_NAME (charset))->name->data);
7669
7670 category = coding_category_big5;
7671 Vbig5_coding_system = name;
7672 }
7673 else if (EQ (coding_type, Qraw_text))
7674 category = coding_category_raw_text;
7675 else if (EQ (coding_type, Qutf_8))
7676 category = coding_category_utf_8;
7677 else if (EQ (coding_type, Qundecided))
7678 category = coding_category_undecided;
7679 else
7680 error ("Invalid coding system type: %s",
7681 XSYMBOL (coding_type)->name->data);
7682
7683 CODING_ATTR_CATEGORY (attrs) = make_number (category);
7684
7685 eol_type = args[coding_arg_eol_type];
7686 if (! NILP (eol_type)
7687 && ! EQ (eol_type, Qunix)
7688 && ! EQ (eol_type, Qdos)
7689 && ! EQ (eol_type, Qmac))
7690 error ("Invalid eol-type");
7691
7692 aliases = Fcons (name, Qnil);
7693
7694 if (NILP (eol_type))
7695 {
7696 eol_type = make_subsidiaries (name);
7697 for (i = 0; i < 3; i++)
1397dc18 7698 {
df7492f9
KH
7699 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7700
7701 this_name = AREF (eol_type, i);
7702 this_aliases = Fcons (this_name, Qnil);
7703 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7704 this_spec = Fmake_vector (make_number (3), attrs);
7705 ASET (this_spec, 1, this_aliases);
7706 ASET (this_spec, 2, this_eol_type);
7707 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7708 Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7709 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7710 Vcoding_system_alist);
1397dc18 7711 }
d46c5b12 7712 }
1397dc18 7713
df7492f9
KH
7714 spec_vec = Fmake_vector (make_number (3), attrs);
7715 ASET (spec_vec, 1, aliases);
7716 ASET (spec_vec, 2, eol_type);
7717
7718 Fputhash (name, spec_vec, Vcoding_system_hash_table);
7719 Vcoding_system_list = Fcons (name, Vcoding_system_list);
7720 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7721 Vcoding_system_alist);
7722
7723 {
7724 int id = coding_categories[category].id;
7725
7726 if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7727 setup_coding_system (name, &coding_categories[category]);
7728 }
7729
d46c5b12 7730 return Qnil;
df7492f9
KH
7731
7732 short_args:
7733 return Fsignal (Qwrong_number_of_arguments,
7734 Fcons (intern ("define-coding-system-internal"),
7735 make_number (nargs)));
d46c5b12
KH
7736}
7737
da7db224
DL
7738/* Fixme: should this record the alias relationships for
7739 diagnostics? */
df7492f9
KH
7740DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7741 Sdefine_coding_system_alias, 2, 2, 0,
7742 doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7743 (alias, coding_system)
7744 Lisp_Object alias, coding_system;
66cfb530 7745{
df7492f9 7746 Lisp_Object spec, aliases, eol_type;
84d60297 7747
df7492f9
KH
7748 CHECK_SYMBOL (alias);
7749 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7750 aliases = AREF (spec, 1);
7751 while (!NILP (XCDR (aliases)))
7752 aliases = XCDR (aliases);
7753 XCDR (aliases) = Fcons (alias, Qnil);
66cfb530 7754
df7492f9
KH
7755 eol_type = AREF (spec, 2);
7756 if (VECTORP (eol_type))
66cfb530 7757 {
df7492f9
KH
7758 Lisp_Object subsidiaries;
7759 int i;
7760
7761 subsidiaries = make_subsidiaries (alias);
7762 for (i = 0; i < 3; i++)
7763 Fdefine_coding_system_alias (AREF (subsidiaries, i),
7764 AREF (eol_type, i));
7765
7766 ASET (spec, 2, subsidiaries);
66cfb530 7767 }
df7492f9
KH
7768
7769 Fputhash (alias, spec, Vcoding_system_hash_table);
5bad0796
DL
7770 Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7771 Vcoding_system_alist);
66cfb530
KH
7772
7773 return Qnil;
7774}
7775
df7492f9
KH
7776DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7777 1, 1, 0,
7778 doc: /* Return the base of CODING-SYSTEM.
da7db224 7779Any alias or subsidiary coding system is not a base coding system. */)
df7492f9
KH
7780 (coding_system)
7781 Lisp_Object coding_system;
7782{
7783 Lisp_Object spec, attrs;
7784
7785 if (NILP (coding_system))
7786 return (Qno_conversion);
7787 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7788 attrs = AREF (spec, 0);
7789 return CODING_ATTR_BASE_NAME (attrs);
7790}
7791
7792DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7793 1, 1, 0,
7794 doc: "Return the property list of CODING-SYSTEM.")
7795 (coding_system)
7796 Lisp_Object coding_system;
7797{
7798 Lisp_Object spec, attrs;
7799
7800 if (NILP (coding_system))
7801 coding_system = Qno_conversion;
7802 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7803 attrs = AREF (spec, 0);
7804 return CODING_ATTR_PLIST (attrs);
7805}
7806
7807
7808DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7809 1, 1, 0,
da7db224 7810 doc: /* Return the list of aliases of CODING-SYSTEM. */)
df7492f9
KH
7811 (coding_system)
7812 Lisp_Object coding_system;
7813{
7814 Lisp_Object spec;
7815
7816 if (NILP (coding_system))
7817 coding_system = Qno_conversion;
7818 CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
da7db224 7819 return AREF (spec, 1);
df7492f9
KH
7820}
7821
7822DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7823 Scoding_system_eol_type, 1, 1, 0,
7824 doc: /* Return eol-type of CODING-SYSTEM.
7825An eol-type is integer 0, 1, 2, or a vector of coding systems.
7826
7827Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7828and CR respectively.
7829
7830A vector value indicates that a format of end-of-line should be
7831detected automatically. Nth element of the vector is the subsidiary
7832coding system whose eol-type is N. */)
7833 (coding_system)
7834 Lisp_Object coding_system;
7835{
7836 Lisp_Object spec, eol_type;
7837 int n;
7838
7839 if (NILP (coding_system))
7840 coding_system = Qno_conversion;
7841 if (! CODING_SYSTEM_P (coding_system))
7842 return Qnil;
7843 spec = CODING_SYSTEM_SPEC (coding_system);
7844 eol_type = AREF (spec, 2);
7845 if (VECTORP (eol_type))
7846 return Fcopy_sequence (eol_type);
7847 n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7848 return make_number (n);
7849}
7850
4ed46869
KH
7851#endif /* emacs */
7852
7853\f
1397dc18 7854/*** 9. Post-amble ***/
4ed46869 7855
dfcf069d 7856void
4ed46869
KH
7857init_coding_once ()
7858{
7859 int i;
7860
df7492f9
KH
7861 for (i = 0; i < coding_category_max; i++)
7862 {
7863 coding_categories[i].id = -1;
7864 coding_priorities[i] = i;
7865 }
4ed46869
KH
7866
7867 /* ISO2022 specific initialize routine. */
7868 for (i = 0; i < 0x20; i++)
b73bfc1c 7869 iso_code_class[i] = ISO_control_0;
4ed46869
KH
7870 for (i = 0x21; i < 0x7F; i++)
7871 iso_code_class[i] = ISO_graphic_plane_0;
7872 for (i = 0x80; i < 0xA0; i++)
b73bfc1c 7873 iso_code_class[i] = ISO_control_1;
4ed46869
KH
7874 for (i = 0xA1; i < 0xFF; i++)
7875 iso_code_class[i] = ISO_graphic_plane_1;
7876 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7877 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7878 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7879 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7880 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7881 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7882 iso_code_class[ISO_CODE_ESC] = ISO_escape;
7883 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7884 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7885 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7886
b843d1ae 7887 inhibit_pre_post_conversion = 0;
df7492f9
KH
7888
7889 for (i = 0; i < 256; i++)
7890 {
7891 emacs_mule_bytes[i] = 1;
7892 }
781d7a48
KH
7893 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7894 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7895 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7896 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
e0e989f6
KH
7897}
7898
7899#ifdef emacs
7900
dfcf069d 7901void
e0e989f6
KH
7902syms_of_coding ()
7903{
df7492f9
KH
7904 staticpro (&Vcoding_system_hash_table);
7905 Vcoding_system_hash_table = Fmakehash (Qeq);
7906
7907 staticpro (&Vsjis_coding_system);
7908 Vsjis_coding_system = Qnil;
7909
7910 staticpro (&Vbig5_coding_system);
7911 Vbig5_coding_system = Qnil;
7912
7913 staticpro (&Vcode_conversion_work_buf_list);
7914 Vcode_conversion_work_buf_list = Qnil;
e0e989f6 7915
df7492f9
KH
7916 staticpro (&Vcode_conversion_reused_work_buf);
7917 Vcode_conversion_reused_work_buf = Qnil;
7918
7919 DEFSYM (Qcharset, "charset");
7920 DEFSYM (Qtarget_idx, "target-idx");
7921 DEFSYM (Qcoding_system_history, "coding-system-history");
bb0115a2
RS
7922 Fset (Qcoding_system_history, Qnil);
7923
9ce27fde 7924 /* Target FILENAME is the first argument. */
e0e989f6 7925 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9ce27fde 7926 /* Target FILENAME is the third argument. */
e0e989f6
KH
7927 Fput (Qwrite_region, Qtarget_idx, make_number (2));
7928
df7492f9 7929 DEFSYM (Qcall_process, "call-process");
9ce27fde 7930 /* Target PROGRAM is the first argument. */
e0e989f6
KH
7931 Fput (Qcall_process, Qtarget_idx, make_number (0));
7932
df7492f9 7933 DEFSYM (Qcall_process_region, "call-process-region");
9ce27fde 7934 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7935 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7936
df7492f9 7937 DEFSYM (Qstart_process, "start-process");
9ce27fde 7938 /* Target PROGRAM is the third argument. */
e0e989f6
KH
7939 Fput (Qstart_process, Qtarget_idx, make_number (2));
7940
df7492f9 7941 DEFSYM (Qopen_network_stream, "open-network-stream");
9ce27fde 7942 /* Target SERVICE is the fourth argument. */
e0e989f6
KH
7943 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7944
df7492f9
KH
7945 DEFSYM (Qcoding_system, "coding-system");
7946 DEFSYM (Qcoding_aliases, "coding-aliases");
4ed46869 7947
df7492f9
KH
7948 DEFSYM (Qeol_type, "eol-type");
7949 DEFSYM (Qunix, "unix");
7950 DEFSYM (Qdos, "dos");
4ed46869 7951
df7492f9
KH
7952 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7953 DEFSYM (Qpost_read_conversion, "post-read-conversion");
7954 DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7955 DEFSYM (Qdefault_char, "default-char");
7956 DEFSYM (Qundecided, "undecided");
7957 DEFSYM (Qno_conversion, "no-conversion");
7958 DEFSYM (Qraw_text, "raw-text");
4ed46869 7959
df7492f9 7960 DEFSYM (Qiso_2022, "iso-2022");
4ed46869 7961
df7492f9 7962 DEFSYM (Qutf_8, "utf-8");
27901516 7963
df7492f9
KH
7964 DEFSYM (Qutf_16, "utf-16");
7965 DEFSYM (Qutf_16_be, "utf-16-be");
7966 DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7967 DEFSYM (Qutf_16_le, "utf-16-l3");
7968 DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7969 DEFSYM (Qsignature, "signature");
7970 DEFSYM (Qendian, "endian");
7971 DEFSYM (Qbig, "big");
7972 DEFSYM (Qlittle, "little");
27901516 7973
df7492f9
KH
7974 DEFSYM (Qshift_jis, "shift-jis");
7975 DEFSYM (Qbig5, "big5");
4ed46869 7976
df7492f9 7977 DEFSYM (Qcoding_system_p, "coding-system-p");
4ed46869 7978
df7492f9 7979 DEFSYM (Qcoding_system_error, "coding-system-error");
4ed46869
KH
7980 Fput (Qcoding_system_error, Qerror_conditions,
7981 Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7982 Fput (Qcoding_system_error, Qerror_message,
9ce27fde 7983 build_string ("Invalid coding system"));
4ed46869 7984
df7492f9
KH
7985 /* Intern this now in case it isn't already done.
7986 Setting this variable twice is harmless.
7987 But don't staticpro it here--that is done in alloc.c. */
7988 Qchar_table_extra_slots = intern ("char-table-extra-slots");
4ed46869 7989
df7492f9 7990 DEFSYM (Qtranslation_table, "translation-table");
1397dc18 7991 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
df7492f9
KH
7992 DEFSYM (Qtranslation_table_id, "translation-table-id");
7993 DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7994 DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
bdd9fb48 7995
df7492f9 7996 DEFSYM (Qvalid_codes, "valid-codes");
05e6f5dc 7997
df7492f9 7998 DEFSYM (Qemacs_mule, "emacs-mule");
05e6f5dc 7999
df7492f9
KH
8000 Vcoding_category_table
8001 = Fmake_vector (make_number (coding_category_max), Qnil);
8002 staticpro (&Vcoding_category_table);
8003 /* Followings are target of code detection. */
8004 ASET (Vcoding_category_table, coding_category_iso_7,
8005 intern ("coding-category-iso-7"));
8006 ASET (Vcoding_category_table, coding_category_iso_7_tight,
8007 intern ("coding-category-iso-7-tight"));
8008 ASET (Vcoding_category_table, coding_category_iso_8_1,
8009 intern ("coding-category-iso-8-1"));
8010 ASET (Vcoding_category_table, coding_category_iso_8_2,
8011 intern ("coding-category-iso-8-2"));
8012 ASET (Vcoding_category_table, coding_category_iso_7_else,
8013 intern ("coding-category-iso-7-else"));
8014 ASET (Vcoding_category_table, coding_category_iso_8_else,
8015 intern ("coding-category-iso-8-else"));
8016 ASET (Vcoding_category_table, coding_category_utf_8,
8017 intern ("coding-category-utf-8"));
8018 ASET (Vcoding_category_table, coding_category_utf_16_be,
8019 intern ("coding-category-utf-16-be"));
8020 ASET (Vcoding_category_table, coding_category_utf_16_le,
8021 intern ("coding-category-utf-16-le"));
8022 ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8023 intern ("coding-category-utf-16-be-nosig"));
8024 ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8025 intern ("coding-category-utf-16-le-nosig"));
8026 ASET (Vcoding_category_table, coding_category_charset,
8027 intern ("coding-category-charset"));
8028 ASET (Vcoding_category_table, coding_category_sjis,
8029 intern ("coding-category-sjis"));
8030 ASET (Vcoding_category_table, coding_category_big5,
8031 intern ("coding-category-big5"));
8032 ASET (Vcoding_category_table, coding_category_ccl,
8033 intern ("coding-category-ccl"));
8034 ASET (Vcoding_category_table, coding_category_emacs_mule,
8035 intern ("coding-category-emacs-mule"));
8036 /* Followings are NOT target of code detection. */
8037 ASET (Vcoding_category_table, coding_category_raw_text,
8038 intern ("coding-category-raw-text"));
8039 ASET (Vcoding_category_table, coding_category_undecided,
8040 intern ("coding-category-undecided"));
70c22245 8041
4ed46869
KH
8042 defsubr (&Scoding_system_p);
8043 defsubr (&Sread_coding_system);
8044 defsubr (&Sread_non_nil_coding_system);
8045 defsubr (&Scheck_coding_system);
8046 defsubr (&Sdetect_coding_region);
d46c5b12 8047 defsubr (&Sdetect_coding_string);
05e6f5dc 8048 defsubr (&Sfind_coding_systems_region_internal);
df7492f9 8049 defsubr (&Scheck_coding_systems_region);
4ed46869
KH
8050 defsubr (&Sdecode_coding_region);
8051 defsubr (&Sencode_coding_region);
8052 defsubr (&Sdecode_coding_string);
8053 defsubr (&Sencode_coding_string);
8054 defsubr (&Sdecode_sjis_char);
8055 defsubr (&Sencode_sjis_char);
8056 defsubr (&Sdecode_big5_char);
8057 defsubr (&Sencode_big5_char);
1ba9e4ab 8058 defsubr (&Sset_terminal_coding_system_internal);
c4825358 8059 defsubr (&Sset_safe_terminal_coding_system_internal);
4ed46869 8060 defsubr (&Sterminal_coding_system);
1ba9e4ab 8061 defsubr (&Sset_keyboard_coding_system_internal);
4ed46869 8062 defsubr (&Skeyboard_coding_system);
a5d301df 8063 defsubr (&Sfind_operation_coding_system);
df7492f9
KH
8064 defsubr (&Sset_coding_system_priority);
8065 defsubr (&Sdefine_coding_system_internal);
8066 defsubr (&Sdefine_coding_system_alias);
8067 defsubr (&Scoding_system_base);
8068 defsubr (&Scoding_system_plist);
8069 defsubr (&Scoding_system_aliases);
8070 defsubr (&Scoding_system_eol_type);
8071 defsubr (&Scoding_system_priority_list);
4ed46869 8072
4608c386 8073 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
48b0f3ae
PJ
8074 doc: /* List of coding systems.
8075
8076Do not alter the value of this variable manually. This variable should be
df7492f9 8077updated by the functions `define-coding-system' and
48b0f3ae 8078`define-coding-system-alias'. */);
4608c386
KH
8079 Vcoding_system_list = Qnil;
8080
8081 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
48b0f3ae
PJ
8082 doc: /* Alist of coding system names.
8083Each element is one element list of coding system name.
8084This variable is given to `completing-read' as TABLE argument.
8085
8086Do not alter the value of this variable manually. This variable should be
8087updated by the functions `make-coding-system' and
8088`define-coding-system-alias'. */);
4608c386
KH
8089 Vcoding_system_alist = Qnil;
8090
4ed46869 8091 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
48b0f3ae
PJ
8092 doc: /* List of coding-categories (symbols) ordered by priority.
8093
8094On detecting a coding system, Emacs tries code detection algorithms
8095associated with each coding-category one by one in this order. When
8096one algorithm agrees with a byte sequence of source text, the coding
8097system bound to the corresponding coding-category is selected. */);
4ed46869
KH
8098 {
8099 int i;
8100
8101 Vcoding_category_list = Qnil;
df7492f9 8102 for (i = coding_category_max - 1; i >= 0; i--)
4ed46869 8103 Vcoding_category_list
d46c5b12
KH
8104 = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8105 Vcoding_category_list);
4ed46869
KH
8106 }
8107
8108 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
48b0f3ae
PJ
8109 doc: /* Specify the coding system for read operations.
8110It is useful to bind this variable with `let', but do not set it globally.
8111If the value is a coding system, it is used for decoding on read operation.
8112If not, an appropriate element is used from one of the coding system alists:
8113There are three such tables, `file-coding-system-alist',
8114`process-coding-system-alist', and `network-coding-system-alist'. */);
4ed46869
KH
8115 Vcoding_system_for_read = Qnil;
8116
8117 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
48b0f3ae
PJ
8118 doc: /* Specify the coding system for write operations.
8119Programs bind this variable with `let', but you should not set it globally.
8120If the value is a coding system, it is used for encoding of output,
8121when writing it to a file and when sending it to a file or subprocess.
8122
8123If this does not specify a coding system, an appropriate element
8124is used from one of the coding system alists:
8125There are three such tables, `file-coding-system-alist',
8126`process-coding-system-alist', and `network-coding-system-alist'.
8127For output to files, if the above procedure does not specify a coding system,
8128the value of `buffer-file-coding-system' is used. */);
4ed46869
KH
8129 Vcoding_system_for_write = Qnil;
8130
8131 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
df7492f9
KH
8132 doc: /*
8133Coding system used in the latest file or process I/O. */);
4ed46869
KH
8134 Vlast_coding_system_used = Qnil;
8135
9ce27fde 8136 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
df7492f9
KH
8137 doc: /*
8138*Non-nil means always inhibit code conversion of end-of-line format.
48b0f3ae
PJ
8139See info node `Coding Systems' and info node `Text and Binary' concerning
8140such conversion. */);
9ce27fde
KH
8141 inhibit_eol_conversion = 0;
8142
ed29121d 8143 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
df7492f9
KH
8144 doc: /*
8145Non-nil means process buffer inherits coding system of process output.
48b0f3ae
PJ
8146Bind it to t if the process output is to be treated as if it were a file
8147read from some filesystem. */);
ed29121d
EZ
8148 inherit_process_coding_system = 0;
8149
02ba4723 8150 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
df7492f9
KH
8151 doc: /*
8152Alist to decide a coding system to use for a file I/O operation.
48b0f3ae
PJ
8153The format is ((PATTERN . VAL) ...),
8154where PATTERN is a regular expression matching a file name,
8155VAL is a coding system, a cons of coding systems, or a function symbol.
8156If VAL is a coding system, it is used for both decoding and encoding
8157the file contents.
8158If VAL is a cons of coding systems, the car part is used for decoding,
8159and the cdr part is used for encoding.
8160If VAL is a function symbol, the function must return a coding system
0192762c
DL
8161or a cons of coding systems which are used as above. The function gets
8162the arguments with which `find-operation-coding-systems' was called.
48b0f3ae
PJ
8163
8164See also the function `find-operation-coding-system'
8165and the variable `auto-coding-alist'. */);
02ba4723
KH
8166 Vfile_coding_system_alist = Qnil;
8167
8168 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
df7492f9
KH
8169 doc: /*
8170Alist to decide a coding system to use for a process I/O operation.
48b0f3ae
PJ
8171The format is ((PATTERN . VAL) ...),
8172where PATTERN is a regular expression matching a program name,
8173VAL is a coding system, a cons of coding systems, or a function symbol.
8174If VAL is a coding system, it is used for both decoding what received
8175from the program and encoding what sent to the program.
8176If VAL is a cons of coding systems, the car part is used for decoding,
8177and the cdr part is used for encoding.
8178If VAL is a function symbol, the function must return a coding system
8179or a cons of coding systems which are used as above.
8180
8181See also the function `find-operation-coding-system'. */);
02ba4723
KH
8182 Vprocess_coding_system_alist = Qnil;
8183
8184 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
df7492f9
KH
8185 doc: /*
8186Alist to decide a coding system to use for a network I/O operation.
48b0f3ae
PJ
8187The format is ((PATTERN . VAL) ...),
8188where PATTERN is a regular expression matching a network service name
8189or is a port number to connect to,
8190VAL is a coding system, a cons of coding systems, or a function symbol.
8191If VAL is a coding system, it is used for both decoding what received
8192from the network stream and encoding what sent to the network stream.
8193If VAL is a cons of coding systems, the car part is used for decoding,
8194and the cdr part is used for encoding.
8195If VAL is a function symbol, the function must return a coding system
8196or a cons of coding systems which are used as above.
8197
8198See also the function `find-operation-coding-system'. */);
02ba4723 8199 Vnetwork_coding_system_alist = Qnil;
4ed46869 8200
68c45bf0 8201 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
75205970
RS
8202 doc: /* Coding system to use with system messages.
8203Also used for decoding keyboard input on X Window system. */);
68c45bf0
PE
8204 Vlocale_coding_system = Qnil;
8205
005f0d35 8206 /* The eol mnemonics are reset in startup.el system-dependently. */
7722baf9 8207 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
df7492f9
KH
8208 doc: /*
8209*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
7722baf9 8210 eol_mnemonic_unix = build_string (":");
4ed46869 8211
7722baf9 8212 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
df7492f9
KH
8213 doc: /*
8214*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
7722baf9 8215 eol_mnemonic_dos = build_string ("\\");
4ed46869 8216
7722baf9 8217 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
df7492f9
KH
8218 doc: /*
8219*String displayed in mode line for MAC-like (CR) end-of-line format. */);
7722baf9 8220 eol_mnemonic_mac = build_string ("/");
4ed46869 8221
7722baf9 8222 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
df7492f9
KH
8223 doc: /*
8224*String displayed in mode line when end-of-line format is not yet determined. */);
7722baf9 8225 eol_mnemonic_undecided = build_string (":");
4ed46869 8226
84fbb8a0 8227 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
df7492f9
KH
8228 doc: /*
8229*Non-nil enables character translation while encoding and decoding. */);
84fbb8a0 8230 Venable_character_translation = Qt;
bdd9fb48 8231
f967223b 8232 DEFVAR_LISP ("standard-translation-table-for-decode",
48b0f3ae
PJ
8233 &Vstandard_translation_table_for_decode,
8234 doc: /* Table for translating characters while decoding. */);
f967223b 8235 Vstandard_translation_table_for_decode = Qnil;
bdd9fb48 8236
f967223b 8237 DEFVAR_LISP ("standard-translation-table-for-encode",
48b0f3ae
PJ
8238 &Vstandard_translation_table_for_encode,
8239 doc: /* Table for translating characters while encoding. */);
f967223b 8240 Vstandard_translation_table_for_encode = Qnil;
4ed46869 8241
df7492f9 8242 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
48b0f3ae
PJ
8243 doc: /* Alist of charsets vs revision numbers.
8244While encoding, if a charset (car part of an element) is found,
df7492f9
KH
8245designate it with the escape sequence identifying revision (cdr part
8246of the element). */);
8247 Vcharset_revision_table = Qnil;
02ba4723
KH
8248
8249 DEFVAR_LISP ("default-process-coding-system",
8250 &Vdefault_process_coding_system,
48b0f3ae
PJ
8251 doc: /* Cons of coding systems used for process I/O by default.
8252The car part is used for decoding a process output,
8253the cdr part is used for encoding a text to be sent to a process. */);
02ba4723 8254 Vdefault_process_coding_system = Qnil;
c4825358 8255
3f003981 8256 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
df7492f9
KH
8257 doc: /*
8258Table of extra Latin codes in the range 128..159 (inclusive).
48b0f3ae
PJ
8259This is a vector of length 256.
8260If Nth element is non-nil, the existence of code N in a file
8261\(or output of subprocess) doesn't prevent it to be detected as
8262a coding system of ISO 2022 variant which has a flag
8263`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8264or reading output of a subprocess.
8265Only 128th through 159th elements has a meaning. */);
3f003981 8266 Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
d46c5b12
KH
8267
8268 DEFVAR_LISP ("select-safe-coding-system-function",
8269 &Vselect_safe_coding_system_function,
df7492f9
KH
8270 doc: /*
8271Function to call to select safe coding system for encoding a text.
48b0f3ae
PJ
8272
8273If set, this function is called to force a user to select a proper
8274coding system which can encode the text in the case that a default
8275coding system used in each operation can't encode the text.
8276
8277The default value is `select-safe-coding-system' (which see). */);
d46c5b12
KH
8278 Vselect_safe_coding_system_function = Qnil;
8279
22ab2303 8280 DEFVAR_BOOL ("inhibit-iso-escape-detection",
74383408 8281 &inhibit_iso_escape_detection,
df7492f9
KH
8282 doc: /*
8283If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
48b0f3ae
PJ
8284
8285By default, on reading a file, Emacs tries to detect how the text is
8286encoded. This code detection is sensitive to escape sequences. If
8287the sequence is valid as ISO2022, the code is determined as one of
8288the ISO2022 encodings, and the file is decoded by the corresponding
8289coding system (e.g. `iso-2022-7bit').
8290
8291However, there may be a case that you want to read escape sequences in
8292a file as is. In such a case, you can set this variable to non-nil.
8293Then, as the code detection ignores any escape sequences, no file is
8294detected as encoded in some ISO2022 encoding. The result is that all
8295escape sequences become visible in a buffer.
8296
8297The default value is nil, and it is strongly recommended not to change
8298it. That is because many Emacs Lisp source files that contain
8299non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8300in Emacs's distribution, and they won't be decoded correctly on
8301reading if you suppress escape sequence detection.
8302
8303The other way to read escape sequences in a file without decoding is
8304to explicitly specify some coding system that doesn't use ISO2022's
8305escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
74383408 8306 inhibit_iso_escape_detection = 0;
2c78b7e1
KH
8307
8308 {
8309 Lisp_Object args[coding_arg_max];
8310 Lisp_Object plist[14];
8311 int i;
8312
8313 for (i = 0; i < coding_arg_max; i++)
8314 args[i] = Qnil;
8315
8316 plist[0] = intern (":name");
8317 plist[1] = args[coding_arg_name] = Qno_conversion;
8318 plist[2] = intern (":mnemonic");
8319 plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8320 plist[4] = intern (":coding-type");
8321 plist[5] = args[coding_arg_coding_type] = Qraw_text;
8322 plist[6] = intern (":ascii-compatible-p");
8323 plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8324 plist[8] = intern (":default-char");
8325 plist[9] = args[coding_arg_default_char] = make_number (0);
8326 plist[10] = intern (":docstring");
8327 plist[11] = build_string ("Do no conversion.\n\
8328\n\
8329When you visit a file with this coding, the file is read into a\n\
8330unibyte buffer as is, thus each byte of a file is treated as a\n\
8331character.");
8332 plist[12] = intern (":eol-type");
8333 plist[13] = args[coding_arg_eol_type] = Qunix;
8334 args[coding_arg_plist] = Flist (14, plist);
8335 Fdefine_coding_system_internal (coding_arg_max, args);
8336 }
8337
8338 setup_coding_system (Qno_conversion, &keyboard_coding);
8339 setup_coding_system (Qno_conversion, &terminal_coding);
8340 setup_coding_system (Qno_conversion, &safe_terminal_coding);
4ed46869
KH
8341}
8342
68c45bf0
PE
8343char *
8344emacs_strerror (error_number)
8345 int error_number;
8346{
8347 char *str;
8348
ca9c0567 8349 synchronize_system_messages_locale ();
68c45bf0
PE
8350 str = strerror (error_number);
8351
8352 if (! NILP (Vlocale_coding_system))
8353 {
8354 Lisp_Object dec = code_convert_string_norecord (build_string (str),
8355 Vlocale_coding_system,
8356 0);
8357 str = (char *) XSTRING (dec)->data;
8358 }
8359
8360 return str;
8361}
8362
4ed46869 8363#endif /* emacs */